handler和opener.py
import urllib
from urllib import request
# urlopen: 特殊的打开器opener
# urllib.request.urlopen(url)
# 处理cookie或代理 需要用到自定义打开器
# 处理器对象hander
http = urllib.request.HTTPHandler() # http处理器
# http = urllib.request.HTTPHandler(debuglevel=1) # debuglevel=1 调试级别,可以在控制台输出日志
# print(http)
# 创建打开器对象opener
opener = urllib.request.build_opener(http) # 要传入handler对象
# 设置opener为全局打开器
# 后面的urlopen也会使用opener去打开url
urllib.request.install_opener(opener)
# 打开url
response = opener.open("http://www.baidu.com")
print(response)
print(response.read().decode())
# urlopen()
# response = request.urlopen('http://www.baidu.com')
# print(response.read().decode())
cookie.py
from http import cookiejar
from urllib import request
# 获取cookie
# 创建一个cookie对象
cookies = cookiejar.CookieJar()
# print(cookies)
# handler对象
cookie_handler = request.HTTPCookieProcessor(cookies)
# opener对象
opener = request.build_opener(cookie_handler)
response = opener.open("http://www.baidu.com")
# print(response.read())
# 获取到百度的cookie
print(cookies)
for cookie in cookies:
# print(cookie.__dict__)
print(cookie.name, ":", cookie.value)
下载cookie.py
import urllib
from http import cookiejar
from urllib import request
# LWPCookieJar() : 是FileCookieJar的子类, FileCookieJar是CookieJar的子类
filename = "baiducookie.txt" # 用于存cookie
cookies = cookiejar.LWPCookieJar(filename=filename)
# handler
cookie_handler = request.HTTPCookieProcessor(cookies)
# opener
opener = request.build_opener(cookie_handler)
response = opener.open("http://www.baidu.com")
# print(response.read())
# print(cookies)
# 将opener设置为全局打开器:后面的urlopen也是该opener对象
# request.install_opener(opener)
# response = request.urlopen("http://www.baidu.com")
# 保存cookie
cookies.save()
# 如果有错误
# cookies.save(ignore_discard=True, ignore_expires=True)
重复使用cookie.py
import urllib
from http import cookiejar
from urllib import request
cookies = cookiejar.LWPCookieJar()
# 加载本地的cookie
cookies.load(filename="baiducookie.txt")
cookie_handler = request.HTTPCookieProcessor(cookies)
opener = request.build_opener(cookie_handler)
response = opener.open("http://www.baidu.com")
print(response.read())
代理IP和IP代理池和UA池.py
import random
import urllib
from urllib import request
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36"
}
# 使用代理
# proxy = {'http': "61.135.155.82:443"}
# 使用ccproxy
proxy = {'http': 'http://user1:123456@10.20.154.59:808'}
# proxy = {'http': 'http://10.20.154.59:808'}
# IP代理池
proxy_list = [
{'http': "61.135.155.82:443"},
{'http': "61.183.233.6:54896"},
{'https': "218.249.45.162:35586"},
{'https': "14.118.135.10:808"},
# ...
]
# ua池(user-agent池 )
user_agent_list=[
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36",
"Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; Touch; rv:11.0) like Gecko",
"Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A372 Safari/604.1",
"Mozilla/5.0 (Linux; Android 5.0; SM-G900P Build/LRX21T) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Mobile Safari/537.36"
]
# 设置代理
# proxy = random.choice(proxy_list) # 从代理池中随机获取一个代理ip
# print(proxy)
proxy_handler = request.ProxyHandler(proxies=proxy)
opener = request.build_opener(proxy_handler)
url = "http://www.ifeng.com/"
req = request.Request(url, headers=headers)
req.add_header("User-Agent", random.choice(user_agent_list)) # 从ua池中随机获取一个ua
res = opener.open(req)
print(res.read().decode())
requests的基本使用.py
import json
import requests
# GET请求
# requests.request('get', 'url')
response = requests.get('http://www.baidu.com')
print(response) # <Response [200]> 响应对象
# print(response.__dict__)
# print(response.status_code) # 200
# print(response.url) # http://www.baidu.com/
# print(response.cookies) # cookie
# print(response.encoding) # ISO-8859-1
# 响应数据,字符串类型
# print(response.text)
# print(type(response.text)) # <class 'str'>
# 响应数据,二进制
# print(response.content)
# print(response.content.decode())
# get传参
# 方式一:直接在url后加参数
# 方式二:可以使用params
# response = requests.get('http://www.baidu.com/s?wd=DG')
response = requests.get('http://www.baidu.com/s', params={'wd':'DG'})
# print(response.text)
# POST请求
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36"
}
# 有道翻译
wd = input('请输入要翻译的中文:')
# http://fanyi.youdao.com/translate?smartresult=dict&smartresult=rule
url = "http://fanyi.youdao.com/translate?smartresult=dict&smartresult=rule"
data = {
"i": wd,
"from": "zh-CHS",
"to": "en",
"smartresult": "dict",
"client": "fanyideskweb",
"salt": "1543306416677",
"sign": "68fcd812e1290ca9154edd145acffec0",
"doctype": "json",
"version": "2.1",
"keyfrom": "fanyi.web",
"action": "FY_BY_CLICKBUTTION",
"typoResult": "false",
}
response = requests.post(url, data=data, headers=headers)
# print(response.text)
# json解析
# dic = json.loads(response.text)
dic = response.json() # requests自带的json解析
# print(dic)
result = dic['translateResult'][0][0]['tgt']
print(result)
requests使用代理IP.py
import requests
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36"
}
# 代理
proxy = {'http': 'http://user1:123456@10.20.154.59:808'}
response = requests.get('http://www.ifeng.com', proxies=proxy, headers=headers)
print(response.text)
requests使用cookie.py
import requests
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36"
}
# 提交给服务器cookie
cookies = {"QZ_FE_WEBP_SUPPORT": "1", "pgv_pvid": "5307205432"}
response = requests.get('http://www.baidu.com', headers=headers, cookies=cookies)
# 获取cookie
res_cookies = response.cookies
print(res_cookies)
print(requests.utils.dict_from_cookiejar(res_cookies))
# {'H_PS_PSSID': '1452_21091_26350_20718', 'delPer': '0', 'BDSVRTM': '0', 'BD_HOME': '0'}
requests使用session.py
import requests
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36"
}
# 笔趣阁
url = "https://www.biquge5200.cc/u/login.htm"
data = {
"name": "niejeff",
"password": "E10ADC3949BA59ABBE56E057F20F883E",
"autoLogin": 1,
"autologin":1,
}
# session: 会保存cookie
session = requests.session()
# 请求完成后,会自动保存登录成功后的cookie
res = session.post(url, data=data, headers=headers)
# print(res.text) # {"flag":"success","data":""}
# 登录成功
print("============= 登录成功后 ==============")
# 访问登录成功后的个人中心(我的书架页面)
# https://www.biquge5200.cc/home/
url = "https://www.biquge5200.cc/home/"
res2 = session.get(url, headers=headers)
print(res2.text)
requests忽略证书
import requests
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36"
}
# verify:
# verify=True 需要验证证书,默认是True
# verify=False 忽略证书验证, 会出现警告
res = requests.get('https://www.baidu.com', headers=headers, verify=False)
# print(res.text)
# ssl._create_unverify_context()
# auth
# 请求github
auth = ('name', '123456') # 填写自己的用户名和密码
response = requests.get("https://api.github.com/user", auth=auth)
print(response.text)
网友评论