1.urllib方式验证用户名和密码
#-*- coding:utf-8 -*-
from urllib.request import HTTPPasswordMgrWithDefaultRealm,HTTPBasicAuthHandler,build_opener
from urllib.error import URLError
username = 'change the username to yours'
password = 'change the password to yours'
url = 'change to the url that you want to visit'
p = HTTPPasswordMgrWithDefaultRealm()
p.add_password(None, url, username, password)
auth_handler = HTTPBasicAuthHandler(p)
opener = build_opener(auth_handler)
try:
result = opener.open(url)
html = result.read().decode('utf-8')
print(html)
except URLError as e:
print(e.reason)
2.Selenium方式需要验证用户名和密码
from selenium import webdriver
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
def login():
driver.get(url) #加载页面
#定位输入用户名的表单
username = WAIT.until(EC.presence_of_element_located((By.CSS_SELECTOR, "#exampleInputUser")))
#定位输入密码的表单
password = WAIT.until(EC.presence_of_element_located((By.CSS_SELECTOR, "#exampleInputPassword")))
#定位登录的按钮
submit = WAIT.until(EC.element_to_be_clickable((By.XPATH, '//*[@id="login-button"]')))
username.send_keys("XXXX") #引号内为用户和密码
password.send_keys("XXXX")
submit.click() #模拟鼠标点击
driver.refresh() #刷新页面
if __name__ =='__main__':
url = 'change to the url that you want to visit '
#options = webdriver.ChromeOptions() #使用chromeless需要的参数
#options.add_argument('headless')
#options.add_argument('disable-gpu')
#driver = webdriver.Chrome(options=options)
driver = webdriver.Firefox()
WAIT = WebDriverWait(driver, 10)
driver = login(url)
3.urllib使用代理ProxyHandler
#-*- coding:utf-8 -*-
from fake_useragent import UserAgent
import urllib
from urllib.error import URLError
from urllib.request import ProxyHandler,build_opener
headers = {
'User-Agent': UserAgent().random #百度的防爬手段,如果不加会被重定向
}
url = 'https://www.baidu.com' #换成要访问的地址
proxy_handler = ProxyHandler({
'http':'http://127.0.0.1:8080' #换成自己的代理
'https''https://127.0.0.1:8080' #换成自己的代理
})
opener = build_opener(proxy_handler)
request = urllib.request.Request(url,headers=headers) #headers的参数放到Request里面
try:
response = opener.open(request)
print(response.read().decode('utf-8'))
except URLError as e:
print(e.reason)
4.urlencode构造url
#-*- coding:utf-8 -*-
from urllib.parse import urlencode
parms = {
'wd': 'kanshan',
'age': 23
}
base_url = 'https://www.baidu.com/s?'
url = base_url + urlencode(parms)
print(url)
5.quote构造带有中文参数的url
#-*- coding:utf-8 -*-
from urllib.parse import quote #url带有中文参数时使用
keyword ='刘看山'
url = 'https://www.baidu.com/s?wd=' + quote(keyword)
print(url)
6.request下的代理proxies设置
#-*- coding:utf-8 -*-
import requests
proxies = {
"http": "http://127.0.0.1:1080",
"https": "http://127.0.0.1:1080",
#如果代理需要使用HTTP Basic Auth
"http": "http://user:password@host:port/",
#还支持SOCKS协议的代理,要先安装socks库;pip3 install 'requests[socks]'
"http": "socks5://user:password@host:port",
"https": "socks://user:password@host:port"
}
response = requests.get("https://www.baidu.com", proxies=proxies)
print(response)
7.requests提供的身份认证
#-*- coding:utf-8 -*-
import requests
from requests.auth import HTTPBasicAuth
r = requests.get('http://localhost:1080', author=HTTPBasicAuth('username','password'))
print(r.status_code)
#####如果参数都传入一个HTTPBasicAuth类,可简写如下########
import requests
r = requests.get('http://localhost:1080', auth=('username', 'password'))
print(r.status_code)
####此外requests还可以提供其他认证方式,如OAuth认证,要安装oauth包, pip3 install requests_oauthlib
import requests
from requests_oauthlib import OAuth1
url = 'https://api.twitter.com/1.1/search/tweets.json'
auth = OAuth1('your_app_key', 'your_app_secret', 'user_oauth_token', 'user_oauth_token_secret')
requests.get(url, auth=auth)
网友评论