美文网首页
Python爬虫--爬取租房信息

Python爬虫--爬取租房信息

作者: LineWay | 来源:发表于2016-05-27 17:38 被阅读0次
    # _*_ coding:utf-8 _*_
    
    # 抓取小猪短租北京的300个短租房资料,网址是:http://xa.xiaozhu.com/search-duanzufang-p1-0/
    # 爬取内容:
    
    # 住房信息
    # 1.标题
    # 2.图片链接
    # 3.房屋地址
    # 4.房屋日租金
    
    # 房主信息
    # 1.性别
    # 2.昵称
    # 3.头像
    
    
    from bs4 import BeautifulSoup
    import requests
    import time
    
    page = 0
    limit_count = 300
    crawl_list = []
    headers = {
        'Content-Type': 'text/html; charset=utf-8',
        'User-Agent': 'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.109 Safari/537.36',
    }
    
    def parse_info(crawl_url):
        content = requests.get(crawl_url, headers=headers).text
        soup = BeautifulSoup(content, 'lxml')
        title = soup.select('.pho_info h4 em')[0].text
        address = soup.select('.pr5')[0].text.rstrip()
        price = soup.select('.bg_box .day_l span')[0].text.lstrip()
        img_uri = soup.select('#curBigImage')[0].get('src')
        landlord_name = soup.select('.lorder_name')[0].text
        landlord_img_uri = soup.select('.member_pic img')[0].get('src')
        landlord_role = u'女' if soup.select('.member_ico1') else u'男'
    
        print u'标题: %s, 地址: %s, 每晚价格: %s, 房屋图片: %s, 房东昵称: %s, 房东头像: %s, 房东性别: %s' %(title, address, price, img_uri, landlord_name, landlord_img_uri,landlord_role)
        time.sleep(1)
    
    
    
    while(len(crawl_list) < limit_count):
        page += 1
        start_url = 'http://xa.xiaozhu.com/search-duanzufang-p%s-0/' % page
        resp = requests.get(start_url, headers=headers)
        content = resp.text
        soup = BeautifulSoup(content, 'lxml')
        info_tags = soup.select('#page_list .pic_list li')
        for info_tag in info_tags:
            href_tag = info_tag.select('.resule_img_a')[0]
            info_url = href_tag.get('href')
            if len(crawl_list) < limit_count:
                crawl_list.append(info_url)
                parse_info(info_url)
            else:
                break
    
    

    相关文章

      网友评论

          本文标题:Python爬虫--爬取租房信息

          本文链接:https://www.haomeiwen.com/subject/cikfdttx.html