selenium 使用

作者: 半大人 | 来源:发表于2019-06-20 11:57 被阅读0次

使用selenium模拟淘宝秒杀(python)
Python_scrapy—selenium模拟浏览器进行简单操
python+selenium小米商城红米K40手机抢购！
网络爬虫-Selenium
selenium 使用
selenium 源码分析之port的设置问题
Python模拟登录与cookie保持
selenium 笔记 --- 元素定位【java】
在Selenium中发出POST请求而不填写表单
爬虫：selenium、pyppeteer、playwright

-- coding: utf-8 --

"""
selenium 使用
"""
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.wait import WebDriverWait
'''
browser=webdriver.Chrome()
try:
browser.get("https://www.baidu.com")
input=browser.find_element_by_id('kw')
input.send_keys(u'星游记')
input.send_keys(Keys.ENTER)
wait=WebDriverWait(browser,10)
wait.until(EC.presence_of_all_elements_located((By.ID,'content_left')))
print(browser.current_url)
print(browser.get_cookies())
print(browser.page_source)
finally:
browser.close()

声明浏览器对象

from selenium import webdriver
browser=webdriver.Chrome()#chrome浏览器控制器

谷歌无头浏览器

from selenium import webdriver
from selenium.webdriver.chrome.options import Options
这个是一个用来控制chrome以无界面模式打开的浏览器
创建一个参数对象，用来控制chrome以无界面的方式打开
chrome_options = Options()
后面的两个是固定写法必须这么写
chrome_options.add_argument('--headless')
chrome_options.add_argument('--disable-gpu')
驱动路径谷歌的驱动存放路径
path = r'C:\pacong_data\day3\chromedriver.exe'
创建浏览器对象
browser = webdriver.Chrome(chrome_options=chrome_options)
访问页面
url='https://www.baidu.com'
browser.get(url)
source=browser.page_source

查单个节点

browser.get('https://www.taobao.com')
input_first=browser.find_element_by_id('q')
input_second=browser.find_element_by_css_selector('#q')
input_third=browser.find_element_by_xpath('//*[@id="q"]')
print(input_first,input_second,input_third)

或通过By定位

在使用过程种可以根据实际的情况去选择对应的的定位方式，我们可以用By来设置定位策略，具体语法如下：
browser.find_element(By.ID,"kw")
browser.find_element(By.NAME,"wd")
browser.find_element(By.CLASS_NAME,"s_ipt")
browser.find_element(By.TAG_NAME,"input")
browser.find_element(By.LINK_TEXT,u"新闻")
browser.find_element(By.PARTIAL_LINK_TEXT,u"新")
browser.find_element(By.XPATH,"//*[@class='bg s_btn']")
browser.find_element(By.CSS_SELECTOR,"span.bg s_btn_wr>input#su")

查多个节点

lis=browser.find_elements_by_xpath('//div/div/div/ul/li/a')
print(lis)

节点交互

import time
browser=webdriver.Chrome()
browser.get('https://www.taobao.com')
input=browser.find_element_by_id('q')
input.send_keys('VR')
time.sleep(2)
input.clear()
input.send_keys('红米note7pro')
button=browser.find_element_by_class_name('btn-search')
button.click()

获取网页的源码

html=browser.page_source

获取节点信息

browser=webdriver.Chrome()
browser.get('https://www.zhihu.com/explore')

获取属性

logo=browser.find_element_by_id('zh-top-link-logo')
print(logo)
print(logo.get_attribute('class'))#获得标签class属性的值

获取文本值

input=browser.find_element_by_xpath('//*[@id="zh-top-nav-explore"]/a')
print(input.text)

获取id。位置，标签名和大小

print(input.id)
print(input.location)
print(input.tag_name)
print(input.size)

延时等待

隐式等待

browser=webdriver.Chrome()
browser.implicitly_wait(10)#当一开始时若没有找到目标节点，则再等待10s，再找找不到，就报错
browser.get('https://www.zhihu.com/explore')
input=browser.find_element_by_class_name('zu-top-add-question')
print(input)

显示等待

from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
browser=webdriver.Chrome()
browser.get('https://www.taobao.com/')
wait=WebDriverWait(browser,10)#为指定最大等待时间10s
inpu=wait.until(EC.presence_of_element_located((By.ID,'q')))#找到即退出等待时间
button=wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR,'btn-search')))
print(input,button)

cookies处理

browser=webdriver.Chrome()
browser.get('https://www.zhihu.com/explore')
print(browser.get_cookies())
browser.add_cookie({'name':'name','domain':'www.zhihu.com','value':'germey'})
print(browser.get_cookies())
browser.delete_all_cookies()
print(browser.get_cookies())
'''

补充：

options = webdriver.ChromeOptions()

1.添加缓存路径os.getcwd() 可获得当前目录

options.add_argument('--disk-cache-dir='+os.getcwd()+'\cache')

2.打开谷歌浏览器(随机UA)

options.add_argument('--user-agent='+random.choice(ua))

3.设置代理IP

options.add_argument('--proxy-server={0}'.format(ip))

4.以开发者模式

options.add_experimental_option('excludeSwitches', ['enable-automation'])

5.创建驱动

driver = webdriver.Chrome(executable_path='chromedriver',options=options)

6.全屏

driver.maximize_window()

使用原浏览器用户数据

selenium用法详解

网友评论

本文标题：selenium 使用

本文链接：https://www.haomeiwen.com/subject/irydqctx.html

延伸阅读

深度阅读

您也可以注册成为美文阅读网的作者，发表您的原创作品、分享您的心情！