python内置的http请求库
urllib.request 请求模块
urlib.error 异常处理模块
urllib.parse url解析模块
urllib.robotparser robots.txt解析模块
相比python2的变化
python2
import urllib2
response = urllib2.urlopen('http://www.biadu.com')
python3
import urllib.request
response = urllib.reqquest.urlopen('http://www.baidu.com')
用法:
urllib.request.urlopen(url,data=None,[timeout,]*,cafile-None,cadefault=False,context=None)
例子1:
import urllib.request
response = urllib.request.urlopen('http://www.baidu.com')
print(response.read().decode('utf-8'))
例子2:
# http://httpbin.org/post http测试用网址,以后可用测试使用
import urllib.parse
import urllib.request
data =- bytest(urllib.parse.urlencode({'word':'hello'}),encoding='utf-8')
response = urllib.request.urlopen('http://httpbin.org/post',data=data)
print(response.read())
例子3:
import urllib.request
response = urllib.request.urlopen('http://httpbin.org/get',timeout=1)
print(response.read())
例子4:
import socket
import urllib.request
import urllib.error
try:
response = urllib.request.urlopen('http://httpbin.org/get',timeout = 0.1)
except urllib.error.URLError as e:
if isinstance(e.reason,socket.timeout):
print('TIME OUT')
例子5:
import urllib.parse
import urllib.request
data = bytes(urllib.parse.urlencode({'word':'hello'}),encoding='utf-8')
response = urllib.request.urlopen('http://httpbin.org/post',data=data)
print(response.read())
执行结果:
b'{\n "args": {}, \n "data": "", \n "files": {}, \n "form": {\n "word": "hello"\n }, \n "headers": {\n "Accept-Encoding": "identity", \n "Content-Length": "10", \n "Content-Type": "application/x-www-form-urlencoded", \n "Host": "httpbin.org", \n "User-Agent": "Python-urllib/3.7"\n }, \n "json": null, \n "origin": "106.114.219.48, 106.114.219.48", \n "url": "https://httpbin.org/post"\n}\n'
例子6:
import urllib.request
response = urllib.request.urlopen('http://httpbin.org/get',timeout=1)
print(response.read())
执行结果:
b'{\n "args": {}, \n "headers": {\n "Accept-Encoding": "identity", \n "Host": "httpbin.org", \n "User-Agent": "Python-urllib/3.7"\n }, \n "origin": "106.114.219.48, 106.114.219.48", \n "url": "https://httpbin.org/get"\n}\n'
例子7:
import socket
import urllib.request
import urllib.error
try:
response = urllib.request.urlopen('http://httpbin.org/get',timeout = 0.1)
except urllib.error.URLError as e:
if isinstance(e.reason,socket.timeout):
print('TIME OUT')
执行结果:
TIME OUT
响应
响应类型
import urllib.request
response=urllib.request.urlopen('https://www.python.org')
print(type(response))
运行结果
<class 'http.client.HTTPResponse'>
状态码 响应头
import urllib.request
response = urllib.request.urlopen('https://www.python.org')
print(response.status)
print(response.getheaders())
print(response.getheader('Server'))
运行结果:
200
[('Server', 'nginx'), ('Content-Type', 'text/html; charset=utf-8'), ('X-Frame-Options', 'DENY'), ('Via', '1.1 vegur'), ('Via', '1.1 varnish'), ('Content-Length', '48280'), ('Accept-Ranges', 'bytes'), ('Date', 'Fri, 26 Apr 2019 07:14:38 GMT'), ('Via', '1.1 varnish'), ('Age', '3146'), ('Connection', 'close'), ('X-Served-By', 'cache-iad2136-IAD, cache-tyo19931-TYO'), ('X-Cache', 'HIT, HIT'), ('X-Cache-Hits', '2, 5565'), ('X-Timer', 'S1556262878.244237,VS0,VE0'), ('Vary', 'Cookie'), ('Strict-Transport-Security', 'max-age=63072000; includeSubDomains')]
nginx
import urllib.request
response = urllib.request.urlopen('https://www.python.org')
print(response.read().decode())
网友评论