from bs4 import BeautifulSoup
import requests
import time
urls=['http://bj.xiaozhu.com/search-duanzufang-p{}-0/'.format (str(i)) for i in range(1,10)]
def get_rent_info(url,data=None):
wb_data=requests.get(url)
time.sleep(3)
soup=BeautifulSoup(wb_data.text,'lxml')
titles=soup.select('body > div.wrap.clearfix.con_bg > div.con_l > div.pho_info > h4 > em')
addresss=soup.select('body > div.wrap.clearfix.con_bg > div.con_l > div.pho_info > p > span.pr5')
prices=soup.select('#pricePart > div.day_l > span')
house_imgs=soup.select('#curBigImage')
personal_imgs=soup.select('#floatRightBox > div.js_box.clearfix > div.member_pic > a > img')
genders=soup.select('#floatRightBox > div.js_box.clearfix > div.w_240 > h6 > span')
nicknames=soup.select('#floatRightBox > div.js_box.clearfix > div.w_240 > h6 > a.lorder_name')
for title,address,price,house_img,personal_img,gender,nickname in zip(titles,addresss,prices,house_imgs,personal_imgs,genders,nicknames):
if gender.get("class")==["member_girl_ico"]:
gender='女'
else:
gender='男'
data={
'title':title.get_text(),
'address':address.get_text(),
'prices':price.get_text(),
'house_imgs':house_img.get('src'),
'personal_imgs':personal_img.get('src'),
'gender':gender,
'nickname':nickname.get_text()
}
print(data)
for page_url in urls:
wb_data=requests.get(page_url)
soup=BeautifulSoup(wb_data.text,'lxml')
link_datas=soup.select('a.resule_img_a')
for links in link_datas:
link=links.get("href")
get_rent_info(link)
运行结果:
Paste_Image.png
网友评论