美文网首页
Python(七十三)selenium基础

Python(七十三)selenium基础

作者: Lonelyroots | 来源:发表于2022-03-04 22:38 被阅读0次

12_selenium基础/01_链家网.py:

"""
https://cs.lianjia.com/ershoufang/pg3/
"""
import aiohttp
import asyncio
from lxml import etree
import time

async def requestDef(url):
    session = aiohttp.ClientSession()       # 等于 session = HTMLSession
    response = await session.get(url=url)
    result = await response.text()      # text(),返回html源码
    await session.close()
    return result

def parse(html):
    tree = etree.HTML(html)
    for div in tree.xpath('//div[@class="info clear"]'):
        title = div.xpath('./div[@class="title"]/a/text()')[0]      # 标题 ./表示当前循环后的div里的所有内容
        position_Small = div.xpath('.//div[@class="positionInfo"]/a[1]/text()')[0].strip()  # 打印 和美星城
        position_Big = div.xpath('.//div[@class="positionInfo"]/a[2]/text()')[0]
        position = '{}-{}'.format(position_Small, position_Big)  # 地点 打印和美星城-暮云
        house = div.xpath('.//div[@class="houseInfo"]/text()')[0]        # 房子
        follow = div.xpath('./div[@class="followInfo"]/text()')[0]  # 打印 0人关注 / 7天以前发布
        followinfo = follow.split('/')  # 打印 ['0人关注 ', ' 7天以前发布']
        amount_of_attention = followinfo[0]     # 关注度
        release_time = followinfo[1]        # 发布时间
        house_price = div.xpath('.//div[@class="totalPrice totalPrice2"]/span/text()|//div[@class="totalPrice totalPrice2"]/i[2]/text()')  # 打印 ['121', '万']
        house_price = house_price[0] + house_price[1]  # 打印 121万
        per_yuan = div.xpath('.//div[@class="unitPrice"]/span/text()')[0]
        data = (title, position, house, amount_of_attention, release_time, house_price, per_yuan)
        print(data)

async def get_html(url):
    print("采集请求:",url)
    result = await requestDef(url)
    parse(result)

startTime = time.time()

ReqUrl = "https://cs.lianjia.com/ershoufang/pg%s/"
tasks = []
for page in range(1,100):
    c = get_html(ReqUrl % page)
    task = asyncio.ensure_future(c)
    tasks.append(task)

loop = asyncio.get_event_loop()
loop.run_until_complete(asyncio.wait(tasks))
print(time.time()-startTime)

12_selenium基础/02_selenium的基本使用.py:

from selenium import webdriver

browser = webdriver.Chrome()

# browser.maximize_window()       # 将浏览器最大化显示
# browser.minimize_window()      # 将浏览器最小化显示
browser.set_window_size(400,800)        # 设置浏览器宽400 高800 显示

browser.get("http://www.baidu.com/")
browser.get("http://www.douban.com/")

browser.back()      # 后退
browser.forward()       # 前进

# browser.quit()      # 自动关闭
# browser.close()      # 自动关闭

12_selenium基础/03_selenium元素定位.py:

from selenium import webdriver
from selenium.webdriver.common.by import By

browser = webdriver.Chrome()

browser.get("https://www.baidu.com/")

"""百度输入框的定位方式 元素定位"""
# # 通过ID方式定位
# browser.find_element(By.ID,'kw').send_keys("selenium")

# # 如果返回多个需要加索引,从0开始
# browser.find_element(By.CLASS_NAME,'s_ipt').send_keys("selenium")

# # 通过name方式定位
# browser.find_element(By.NAME,'wd').send_keys("selenium")

# # 通过css方式定位
# browser.find_element(By.CSS_SELECTOR,'#kw').send_keys("selenium")

# 通过xpath方式定位
browser.find_element(By.XPATH,'//input[@id="kw"]').send_keys("selenium")

browser.find_element(By.ID,'su').click()        # click点击

12_selenium基础/04_京东.py:

from selenium import webdriver
from selenium.webdriver.common.by import By
from lxml import etree
import time

browser = webdriver.Chrome()

browser.get("https://www.jd.com/")

browser.find_element(By.ID,'key').send_keys('笔记本')
browser.find_element(By.XPATH,'//button[@class="button"]').click()

time.sleep(2)

html = browser.page_source      # 拿到当前页面源码

tree = etree.HTML(html)
# print(tree.xpath('//title/text()'))     # 返回页面标题,需要配上延时,不然返回的是上个页面
for li in tree.xpath('//ul[@class="gl-warp clearfix"]/li'):
    print(li.xpath('.//div[@class="p-price"]//i/text()'),end='---')
    print(li.xpath('.//div[@class="p-name p-name-type-2"]//em//text()'),end='---')
    print(li.xpath('.//div[@class="p-commit"]//a/text()'),end='---')
    print(li.xpath('.//div[@class="p-shop"]//a/text()'))

browser.find_element(By.XPATH,'//a[@class="pn-next"]').click()      # 翻页
"""
京东一次只刷新30条数据,而页面总共是60条,所以需要有翻页操作
"""

文章到这里就结束了!希望大家能多多支持Python(系列)!六个月带大家学会Python,私聊我,可以问关于本文章的问题!以后每天都会发布新的文章,喜欢的点点关注!一个陪伴你学习Python的新青年!不管多忙都会更新下去,一起加油!

Editor:Lonelyroots

相关文章

网友评论

      本文标题:Python(七十三)selenium基础

      本文链接:https://www.haomeiwen.com/subject/ywfvrrtx.html