美文网首页
python 实战计划学习:爬取租房信息

python 实战计划学习:爬取租房信息

作者: jianjianxin2011 | 来源:发表于2016-06-01 00:42 被阅读0次

    from bs4 import BeautifulSoup
    import requests
    import time

    url='http://bj.xiaozhu.com/fangzi/1339353835.html'

    urls=['http://bj.xiaozhu.com/search-duanzufang-p{}-0/'.format (str(i)) for i in range(1,10)]

    def get_rent_info(url,data=None):
    wb_data=requests.get(url)
    time.sleep(3)
    soup=BeautifulSoup(wb_data.text,'lxml')
    titles=soup.select('body > div.wrap.clearfix.con_bg > div.con_l > div.pho_info > h4 > em')
    addresss=soup.select('body > div.wrap.clearfix.con_bg > div.con_l > div.pho_info > p > span.pr5')
    prices=soup.select('#pricePart > div.day_l > span')
    house_imgs=soup.select('#curBigImage')
    personal_imgs=soup.select('#floatRightBox > div.js_box.clearfix > div.member_pic > a > img')
    genders=soup.select('#floatRightBox > div.js_box.clearfix > div.w_240 > h6 > span')
    nicknames=soup.select('#floatRightBox > div.js_box.clearfix > div.w_240 > h6 > a.lorder_name')
    for title,address,price,house_img,personal_img,gender,nickname in zip(titles,addresss,prices,house_imgs,personal_imgs,genders,nicknames):
    if gender.get("class")==["member_girl_ico"]:
    gender='女'
    else:
    gender='男'
    data={
    'title':title.get_text(),
    'address':address.get_text(),
    'prices':price.get_text(),
    'house_imgs':house_img.get('src'),
    'personal_imgs':personal_img.get('src'),
    'gender':gender,
    'nickname':nickname.get_text()
    }
    print(data)

    for page_url in urls:
    wb_data=requests.get(page_url)
    soup=BeautifulSoup(wb_data.text,'lxml')
    link_datas=soup.select('a.resule_img_a')
    for links in link_datas:
    link=links.get("href")
    get_rent_info(link)

    运行结果:

    Paste_Image.png

    相关文章

      网友评论

          本文标题:python 实战计划学习:爬取租房信息

          本文链接:https://www.haomeiwen.com/subject/slzcdttx.html