美文网首页我爱编程
selenium+PhantomJs实战Aribnb

selenium+PhantomJs实战Aribnb

作者: ilililillililil | 来源:发表于2018-02-02 16:28 被阅读40次

    一,先看结果

    二,思路

    三,源码

    
    from selenium import webdriver
    from selenium.webdriver.common.keys import Keys
    from selenium.webdriver.support.wait import WebDriverWait
    from selenium.webdriver.support import expected_conditions as EC
    import time
    from bs4 import BeautifulSoup
    import re
    
    class Airbnb(object):
        def __init__(self):
            self.airbnb_urla ='https://zh.airbnb.com/s/%E5%8F%B0%E5%8C%97--%E5%8F%B0%E6%B9%BE/homes?checkin=2018-01-11&checkout=2018-01-28&allow_override%5B%5D=&s_tag=5T-o2wsE&section_offset=1'
            self.aribnb_urlb = 'https://zh.airbnb.com/?af=43896654&c=%24pi%3A9.pk%3Abaidu_brd_brandzone_demand_title_p1&src=Baidu&medium=PPC&ag_kwid=2299-36-57701246c0b98773.6a0cc0f87b49337e'
    
        def get_airbnb(self):
            browser = webdriver.Firefox()
            timeout = WebDriverWait(browser,10)
            browser.get(self.aribnb_urlb)
    
            '''
                1.airbnb爬取完成
    
            '''
            while True:
                #browser下滑
                browser.execute_script('window,scrollBy(0,10000)')
                time.sleep(2)
                #点击下一页
                click_a = browser.find_element_by_css_selector('li._b8vexar > a._1hjqg6h > div._1yofwd5 > div._1rltvky > svg') #_1yofwd5 1,2 #
                click_a.click()
                html = browser.page_source
    
                soup = BeautifulSoup(html,'lxml')
                items = soup.find('div',class_='_fhph4u')
                for a in items:
                    # url = a.find('div',class_='_1fdzqn44')
                    name = a.find('div',class_='_saba1yg').get_text()
                    price =a.find('div',class_='_59f9ic').get_text().split(' ')[2:]
                    urls = a.find('div',attrs={'role':'img'})['style']
    
                    print('name:{}\njpgurl:{}\nprice:{}\n'.format(name,urls,price))
    
    if __name__ == '__main__':
        a = Airbnb()
        a.get_airbnb()
    
    

    相关文章

      网友评论

        本文标题:selenium+PhantomJs实战Aribnb

        本文链接:https://www.haomeiwen.com/subject/risezxtx.html