方法
实现的是https://www.jb51.net/article/141305.htm里面1,4两个方法。
一、直接使用已知的cookie访问
import sys
import io
from urllib import request
sys.stdout = io.TextIOWrapper(sys.stdout.buffer,encoding='utf8') #改变标准输出的默认编码
url = 'https://bbs.pku.edu.cn/v2/home.php'
cookie_str = r'skey=xxx;uid=xxx;UM_distinctid=xxx'
#登录后才能访问的网页
url = 'https://bbs.pku.edu.cn/v2/home.php'
req = request.Request(url)
#设置cookie
req.add_header('cookie', cookie_str)
#设置请求头
req.add_header('User-Agent', 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36')
resp = request.urlopen(req)
print(resp.read().decode('utf-8'))
xxx是得在浏览器里看的内容
二、使用无头浏览器访问
import requests
import sys
import io
from selenium.webdriver import Firefox
from selenium.webdriver.firefox.options import Options
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf8')
options = Options()
options.add_argument('-headless')
browser = Firefox(executable_path = 'D:\\python\\geckodriver.exe',firefox_options=options)
browser.get('https://bbs.pku.edu.cn/v2/login.php')
username = browser.find_element_by_name('username')
username.send_keys('your username')
password = browser.find_element_by_name('password')
password.send_keys('your password')
login_button = browser.find_element_by_xpath("//a[@class='button red']")
login_button.click()
print(browser.page_source)
browser.close()
需下载geckodriver.exe https://github.com/mozilla/geckodriver/releases
bbs的登陆是个click后js的事件,不是button。
网友评论