from lxml import etree
import requests
import os
root_url='http://www.51hao.cc/'
req=requests.get(root_url)
req.encoding='gb2312'
html=req.text
selector=etree.HTML(html)
infos=selector.xpath('//div[@class="fkce"]/div[@class="fkt"][position()>1]')
print(len(infos))
for info in infos:
province=info.xpath('div[@class="fkbj"]/p/a/text()')[0]
citys=info.xpath('div[@class="fklk"]/p/a/text()')
citys_url=info.xpath('div[@class="fklk"]/p/a/@href')
city_infos=zip(citys,citys_url)#映射函数
for city_info in city_infos:
city_name=city_info[0]
city_url=city_info[1]
print(province,city_name,city_url)
req2 = requests.get(city_url)
req2.encoding = 'gb2312'
html2 = req2.text
selector2 = etree.HTML(html2)
infos2 = selector2.xpath('//div[@class="all"]//div[@class="num_bg"]') # 运营商
num = len(infos2)
for i in range(num):
first_3 = selector2.xpath('//div[@class="all"]/div[%s]//span[@class="nums"]/text()' % str(i + 2))[0]# 参数化xpath表达式
types = selector2.xpath('//div[@class="all"]/div[%s]/div[1]/text()' % str(i + 2))[0]
types1 = types.split('(')[0]
total = types.split('(')[1].split(')')[0].replace('共', '').replace('个', '')
mobiles = selector2.xpath('//div[@class="all"]//li[%s]/a/text()' % str(i + 1))
for mobile in mobiles:
print(province,city_name,first_3, types1, total, mobile)
网友评论