美文网首页
05-动手练习爬网站信息

05-动手练习爬网站信息

作者: PlayPython | 来源:发表于2016-06-26 20:40 被阅读0次

第五天

学习延时功能

练习爬取安居客租房信息


from bs4 import BeautifulSoup
import requests


def sexual(n):
    if n.get('class') == ['member_girl_ico']:
        return '女'
    else:
        return '男'


def get_rentinfo(url):
    web_content = requests.get(url)
    soup = BeautifulSoup(web_content.text, 'lxml')

    titles = soup.select('div.pho_info > h4 > em')
    addresses = soup.select('div.pho_info > p')
    prices = soup.select('div.day_l > span')
    images = soup.select('#curBigImage')
    landlord_pics = soup.select('div.member_pic > a > img')
    landlord_sexuals = soup.select('div.w_240 > h6 > span')
    landlord_names = soup.select('div.w_240 > h6 > a')

    for title, address, price, image, landlord_pic, landlord_sexual, landlord_name in zip(titles, addresses, prices, images,
                                                                                          landlord_pics, landlord_sexuals,
                                                                                          landlord_names):
        data = {
            '房源:': title.get_text(),
            '地址:': address.get_text(),
            '价格:': price.get_text(),
            '图片:': image.get('src'),
            '房东图片:': landlord_pic.get('src'),
            '房东性别:': sexual(landlord_sexual),
            '房东名字:': landlord_name.get_text(),
        }
        print(data)


search_page = requests.get('http://hz.xiaozhu.com/?startDate=2016-06-27&endDate=2016-07-31')
list_soup = BeautifulSoup(search_page.text, 'lxml')
house_list = list_soup.select('#page_list > ul > li > a')

for i in house_list:
    rent_url = i.get('href')
    get_rentinfo(rent_url)


相关文章

网友评论

      本文标题:05-动手练习爬网站信息

      本文链接:https://www.haomeiwen.com/subject/jcczdttx.html