frombs4importBeautifulSoup
importtime,requests,re
url='http://wx.58.com/pingbandiannao/25892738648911x.shtml
wb_data=requests.get(url,headers=header)
soup=BeautifulSoup(wb_data.text,'lxml')
defget_links_from(who_sells):
urls=[]
list_view='http://wx.58.com/pbdn/{}/pn2/'.format(str(who_sells))
wb_data=requests.get(list_view)
soup=BeautifulSoup(wb_data.text,'lxml')
forlinkinsoup.select('td.t > a.t'):
url_one=link.get('href').split('?')[0]
ifstr('zhuanzhuan')not inurl_one:#去掉不能被解析掉的转转网页
urls.append(url_one)
returnurls
defget_view():
id=re.findall('http.*?nao/(.*?)x.shtml',url,re.S)#使用正则表达筛选ID
# print(id[0])
api='http://jst1.58.com/counter?infoid={}'.format(id[0])
js=requests.get(api)
views=js.text.split('=')[-1]
# print(views)
returnviews
defget_item_info(who_sells=0):
urls=get_links_from(who_sells)
forurlinurls:
wb_data=requests.get(url)
soup=BeautifulSoup(wb_data.text,'lxml')
data={
'title':soup.title.text,
'price':soup.select('div.su_con > span.c_f50')[0].text,
'date':soup.select('li.time')[0].text,
'area':list(soup.select('span.c_25d')[0].stripped_strings)ifsoup.find_all('span','c_25d')else None,#去掉区域为空掉选项,防止报错
'url':url,
'cate':'个人'ifwho_sells==0else'商家',
'views':get_view(),
}
print(data)
# get_item_info(url)
# get_links_from()
# get_view()
get_item_info()
网友评论