urllib.request
1.urllib.request.urlopen(url,data=None,[timeout;]*)
1.1 url:网址
1.2data:post提交的数据,默认为None,当data不为None时,urlopen()提交方式为POST
1.3.一些方法:
import urllib
respone=urllib.request.urlopen('https://python.org')
html=response.read().decode('utf-8') #返回网页内容
print('===================')
print(response.getheader('server')) #返回响应头中的server值
print('===================')
print(response.getheaders()) #以列表元祖对的形式返回响应头信息
print('===================')
print(response.fileno()) #返回文件描述符
print('===================')
print(response.version) #返回版本信息
print('===================')
print(response.status) #返回状态码200,404代表网页未找到
print('===================')
print(response.debuglevel) #返回调试等级
print('===================')
print(response.closed) #返回对象是否关闭布尔值
print('===================')
print(response.geturl()) #返回检索的URL
print('===================')
print(response.info()) #返回网页的头信息
print('===================')
print(response.getcode()) #返回响应的HTTP状态码
print('===================')
print(response.msg) #访问成功则返回ok
print('===================')
print(response.reason) #返回状态信息
2.urllib.request.Request(url, data=None, headers={}, origin_req_host=None, unverifiable=False, method=None)
2.2构建Request类,对请求进行完整包装,如加Headers伪装成浏览器
import urllib.request
url = "https://www.lagou.com/zhaopin/Python/?labelWords=label"
headers = {
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.108 Safari/537.36',
'Referer': 'https://www.lagou.com/zhaopin/Python/?labelWords=label',
'Connection': 'keep-alive'
}
req = request.Request(url, headers=headers)
page = request.urlopen(req).read()
page = page.decode('utf-8')
print(page)
3.设置代理
#! /usr/bin/env python3
import urllib.request
proxy_handler = urllib.request.ProxyHandler({
'socks5': 'localhost:1080',
'http': 'https://58.240.60.10:81',
'https': 'https://222.170.17.74:3128'
})
opener = urllib.request.build_opener(proxy_handler)
response = opener.open('https://www.baidu.com')
print(response.read())
4.cookies相关操作
4.1获取百度 Cookie 实例:
#! /usr/bin/env python3
import http.cookiejar, urllib.request
cookie = http.cookiejar.CookieJar()
handler = urllib.request.HTTPCookieProcessor(cookie)
opener = urllib.request.build_opener(handler)
response = opener.open('https://www.baidu.com')
for item in cookie:
print(item.name+"="+item.value)
4.2用MozillaCookieJar处理cookie,保存cookies
import http.cookiejar, urllib.request
filename = 'cookie.txt'
cookie = http.cookiejar.MozillaCookieJar(filename)
handler = urllib.request.HTTPCookieProcessor(cookie)
opener = urllib.request.build_opener(handler)
response = opener.open('https://www.baidu.com')
cookie.save(ignore_discard=True, ignore_expires=True)#保存
4.3用LWPCookieJar处理Cookie,加载cookie
import http.cookiejar, urllib.request
cookie = http.cookiejar.LWPCookieJar()
cookie.load('cookie.txt', ignore_discard=True, ignore_expires=True)#加载cookies
handler = urllib.request.HTTPCookieProcessor(cookie)
opener = urllib.request.build_opener(handler)
response = opener.open('https://www.baidu.com')
print(response.read().decode('utf-8'))
5.必须备注的各个名词
1.urlopen(url,data,timeout)
2.Request(url,data,headers={},method=None)
3.ProxyHandler({})
4.http.cookiejar.CookieJar()
5.http.cookiejar.MozillaCookieJar(filepath)
http.cookiejar.LWPCookieJar()
6.http.cookieja
7.HTTPCookirProcessor(cookieJar)
8.Build_opener(handler)
9.install_opener(opener)
10.pathname2url(path)
11.BaseHandler
12.FileHandler
13.DataHandler
14.FTPHandler
15.CacheFTPHandler
16.UnknowHandler
17.HTTPError
网友评论