1.requests_post请求
import requests
#url, 目标url
# data=None,:post请求要上传的表单数据
url = 'https://www.lagou.com/jobs/positionAjax.json? needAddtionalResult=false'
form_data = {
'first': 'true',
'pn': 1,
'kd': 'python',
}
#设置请求头
req_header = {
'User-Agent':'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36',
'Referer': 'https://www.lagou.com/jobs/list_python? city=%E5%85%A8%E5%9B%BD&cl=false&fromSearch=true&labelWords=&suginput=',
}
response = requests.post(url,data=form_data,headers=req_header)
print(response.status_code)
print(response.text)
#可以吧将返回的json字符串转为python数据类型
data = response.json()
print(type(data))
2.requests_post_file文件上传
import requests
#文件上传
#测试接口
url = 'https://httpbin.org/post'
files = {
'file':open('cookies.txt','r')
}
response = requests.post(url,files=files)
print(response.status_code)
print(response.text)
3.requests_auth客户端验证
#web客户端验证
import requests
#设置认证信息
auth = ('username','password')
url = 'http://192.168.1.110'
response = requests.get(url,auth=auth)
print(response.status_code)
4.requests_cookie模拟登录
#requests下使用cookies
import requests
#分析发现
# https://www.douban.com/accounts/login
# 没有验证码的情况
# source: index_nav
# form_email: 18518753265
# form_password: ljh12345678
#有验证码的情况
# source: index_nav
# form_email: 18518753265
# form_password: ljh12345678
# captcha-solution: blade
# captcha-id: 5IBtw5wm2riyrIrnV3utwUPt:en
url = 'https://www.douban.com/accounts/login'
form_data = {
'source': 'index_nav',
'form_email': '18518753265',
'form_password': 'ljh12345678',
'captcha-solution': 'violent',
'captcha-id': 'AuKNJ1FIktyrmpljJ6WAzXo3:en'
}
#设置请求头
req_header = {
'User-Agent':'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36',
}
#发起请求
response = requests.post(url,headers=req_header,data=form_data)
#使用response.cookies获取cookies信息
print('模拟登录后的cookies信息',response.cookies)
print(type(response.cookies))
print(response.headers)
with open('douban.html','w') as file:
file.write(response.text)
#requests.utils.cookiejar_from_dict():将字典转为cookiejar
#requests.utils.dict_from_cookiejar():将cookiejar转为字典
cookies_dict = requests.utils.dict_from_cookiejar(response.cookies)
print(cookies_dict)
#登录成功后访问个人主页,能够成功获取到个人主页信息,说明确实保存了cookie
#并且在一下次发起请求的时候携带了cookie
url = 'https://www.douban.com/people/175417123/'
#设置cookies参数,模拟用户发起请求
response = requests.get(url,headers=req_header,cookies=cookies_dict)
if response.status_code == 200:
with open('douban1.html','w') as file:
file.write(response.text)
5.requests_proxies代理
#使用requests模块设置代理
import requests
proxies = {
'http':'219.238.186.188:8118',
'https':'222.76.204.110:808',
'https':'https://username:password@ip:port',
'http':'http://username:password@ip:port'
}
url = 'https://httpbin.org/get'
response = requests.get(url,proxies=proxies,timeout=10)
print(response.text)
6.requests_session会话
#requests.session():维持会话,可以让我们在跨请求时保存某些参数
import requests
#实例化session
session = requests.session()
#目标url
url = 'https://www.douban.com/accounts/login'
form_data = {
'source': 'index_nav',
'form_email': '18518753265',
'form_password': 'ljh12345678',
'captcha-solution': 'stamp',
'captcha-id': 'b3dssX515MsmNaklBX8uh5Ab:en'
}
#设置请求头
req_header = {
'User-Agent':'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36',
}
#使用session发起请求
response = session.post(url,headers=req_header,data=form_data)
if response.status_code == 200:
#访问个人主页:
url = 'https://www.douban.com/people/175417123/'
response = session.get(url,headers = req_header)
if response.status_code == 200:
with open('douban3.html','w') as file:
file.write(response.text)
7.requests使用
#pip3 install requests
#requests模块:是对urllib的封装,可以实现urllib的所有功能
#并且api调用更加简单方便
import requests
# url = 'http://www.baidu.com/'
url = 'http://www.sina.com'
# url, :要请求的目标url
# params:get请求后面要拼接的参数
"""
:param method: 要发起的是什么类型的请求.
:param url: 要请求的目标url
:param params: get请求后面要拼接的参数
:param data: Dictionary, post请求的表单数据
:param json: 传递json数据跟上面的data效果类似
:param headers: (optional) Dictionary 请求头
:param cookies: (optional) Dict or CookieJar object (设置cookies信息模拟用户请求)
:param files: 上传文件
:param auth: 网站需要验证的信息(账号和密码)
:param timeout: 设置请求的超时时间
:param allow_redirects: bool,是否允许重定向
:param proxies: (optional) Dictionary (设置代理)
:param verify: Defaults to ``True``.(忽略证书认证,默认为True表示不忽略)
"""
req_header = {
'User-Agent':'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36',
}
parmars = {
'wd':'豆瓣'
}
# response = requests.get(url,params=parmars,headers=req_header)
response = requests.get(url,headers=req_header)
response.encoding='utf-8'
#从响应结果中获取的信息
#(这里得到的是解码后的字符串)
html = response.text
"""
#如果使用response.text出现了乱码
方式一
#response.content.decode('')
方式二
response.encoding=''设置编码类型
"""
#获取bytes类型的数据
b_html = response.content
#获取状态码
code = response.status_code
#获取响应头
response_headers = response.headers
#请求头
req_headers = response.request.headers
#获取当前请求的url地址
current_url = response.url
#response.json():可以将json字符串转为python数据类型
print(code)
print(html)
requests认证书问题
import requests
url = 'https://www.baidu.com/'
#verify:默认为True,表示需要进行CA证书认证,
#如果在请求网站的过程中遇到的ssl证书认证问题
#只需将verify改为False
response = requests.get(url,verify=False)
print(response.status_code)
网友评论