美文网首页
Python(七十三)selenium基础

Python(七十三)selenium基础

作者: Lonelyroots | 来源:发表于2022-03-04 22:38 被阅读0次

    12_selenium基础/01_链家网.py:

    """
    https://cs.lianjia.com/ershoufang/pg3/
    """
    import aiohttp
    import asyncio
    from lxml import etree
    import time
    
    async def requestDef(url):
        session = aiohttp.ClientSession()       # 等于 session = HTMLSession
        response = await session.get(url=url)
        result = await response.text()      # text(),返回html源码
        await session.close()
        return result
    
    def parse(html):
        tree = etree.HTML(html)
        for div in tree.xpath('//div[@class="info clear"]'):
            title = div.xpath('./div[@class="title"]/a/text()')[0]      # 标题 ./表示当前循环后的div里的所有内容
            position_Small = div.xpath('.//div[@class="positionInfo"]/a[1]/text()')[0].strip()  # 打印 和美星城
            position_Big = div.xpath('.//div[@class="positionInfo"]/a[2]/text()')[0]
            position = '{}-{}'.format(position_Small, position_Big)  # 地点 打印和美星城-暮云
            house = div.xpath('.//div[@class="houseInfo"]/text()')[0]        # 房子
            follow = div.xpath('./div[@class="followInfo"]/text()')[0]  # 打印 0人关注 / 7天以前发布
            followinfo = follow.split('/')  # 打印 ['0人关注 ', ' 7天以前发布']
            amount_of_attention = followinfo[0]     # 关注度
            release_time = followinfo[1]        # 发布时间
            house_price = div.xpath('.//div[@class="totalPrice totalPrice2"]/span/text()|//div[@class="totalPrice totalPrice2"]/i[2]/text()')  # 打印 ['121', '万']
            house_price = house_price[0] + house_price[1]  # 打印 121万
            per_yuan = div.xpath('.//div[@class="unitPrice"]/span/text()')[0]
            data = (title, position, house, amount_of_attention, release_time, house_price, per_yuan)
            print(data)
    
    async def get_html(url):
        print("采集请求:",url)
        result = await requestDef(url)
        parse(result)
    
    startTime = time.time()
    
    ReqUrl = "https://cs.lianjia.com/ershoufang/pg%s/"
    tasks = []
    for page in range(1,100):
        c = get_html(ReqUrl % page)
        task = asyncio.ensure_future(c)
        tasks.append(task)
    
    loop = asyncio.get_event_loop()
    loop.run_until_complete(asyncio.wait(tasks))
    print(time.time()-startTime)
    

    12_selenium基础/02_selenium的基本使用.py:

    from selenium import webdriver
    
    browser = webdriver.Chrome()
    
    # browser.maximize_window()       # 将浏览器最大化显示
    # browser.minimize_window()      # 将浏览器最小化显示
    browser.set_window_size(400,800)        # 设置浏览器宽400 高800 显示
    
    browser.get("http://www.baidu.com/")
    browser.get("http://www.douban.com/")
    
    browser.back()      # 后退
    browser.forward()       # 前进
    
    # browser.quit()      # 自动关闭
    # browser.close()      # 自动关闭
    

    12_selenium基础/03_selenium元素定位.py:

    from selenium import webdriver
    from selenium.webdriver.common.by import By
    
    browser = webdriver.Chrome()
    
    browser.get("https://www.baidu.com/")
    
    """百度输入框的定位方式 元素定位"""
    # # 通过ID方式定位
    # browser.find_element(By.ID,'kw').send_keys("selenium")
    
    # # 如果返回多个需要加索引,从0开始
    # browser.find_element(By.CLASS_NAME,'s_ipt').send_keys("selenium")
    
    # # 通过name方式定位
    # browser.find_element(By.NAME,'wd').send_keys("selenium")
    
    # # 通过css方式定位
    # browser.find_element(By.CSS_SELECTOR,'#kw').send_keys("selenium")
    
    # 通过xpath方式定位
    browser.find_element(By.XPATH,'//input[@id="kw"]').send_keys("selenium")
    
    browser.find_element(By.ID,'su').click()        # click点击
    

    12_selenium基础/04_京东.py:

    from selenium import webdriver
    from selenium.webdriver.common.by import By
    from lxml import etree
    import time
    
    browser = webdriver.Chrome()
    
    browser.get("https://www.jd.com/")
    
    browser.find_element(By.ID,'key').send_keys('笔记本')
    browser.find_element(By.XPATH,'//button[@class="button"]').click()
    
    time.sleep(2)
    
    html = browser.page_source      # 拿到当前页面源码
    
    tree = etree.HTML(html)
    # print(tree.xpath('//title/text()'))     # 返回页面标题,需要配上延时,不然返回的是上个页面
    for li in tree.xpath('//ul[@class="gl-warp clearfix"]/li'):
        print(li.xpath('.//div[@class="p-price"]//i/text()'),end='---')
        print(li.xpath('.//div[@class="p-name p-name-type-2"]//em//text()'),end='---')
        print(li.xpath('.//div[@class="p-commit"]//a/text()'),end='---')
        print(li.xpath('.//div[@class="p-shop"]//a/text()'))
    
    browser.find_element(By.XPATH,'//a[@class="pn-next"]').click()      # 翻页
    """
    京东一次只刷新30条数据,而页面总共是60条,所以需要有翻页操作
    """
    

    文章到这里就结束了!希望大家能多多支持Python(系列)!六个月带大家学会Python,私聊我,可以问关于本文章的问题!以后每天都会发布新的文章,喜欢的点点关注!一个陪伴你学习Python的新青年!不管多忙都会更新下去,一起加油!

    Editor:Lonelyroots

    相关文章

      网友评论

          本文标题:Python(七十三)selenium基础

          本文链接:https://www.haomeiwen.com/subject/ywfvrrtx.html