_post

作者: 錦魚 | 来源:发表于2018-11-16 23:13 被阅读0次
post大致逻辑
  • 构建表单(-->伪装请求头)
  • 传递表单 req
  • 获得结果 response
from urllib import request
from urllib.parse import urlencode
from fake_useragent import UserAgent


#使用url发起一个POST请求
web = UserAgent()
webr = web.random
#目标URL(xx)是一个测试接口
req_url = 'https://httpbin.org/post'


formdata = {
    'name':'崔',
    'age':12,
    'gender':'男',
    'class':'5081',
}


#数据要经过两部处理
#1.需要将表烦数据转换为url编码格式(urlencode)         2.转换后的字符串转换为2进制数据(encode)
data_tranfrom = urlencode(formdata).encode()

print(data_tranfrom)

req = request.Request(url=req_url,headers=webr)
response = request.urlopen(req,data=data_tranfrom,timeout=1)

#状态码例如:200 404 ,500
print(response.status)

p = response.read().decode()
print(p)

  • 作业
  • 有道翻译取数据
from urllib import request
from urllib.parse import urlencode
from fake_useragent import  UserAgent
import  json
#分析网页
#http://fanyi.youdao.com/


def youdao():
    browser = UserAgent()
    #提取url
    # POST请求的目标URL(这个代码是之前的链接,方便我们使用,不用传递sign参数,新版中该参数是加密的)
    req_url = 'http://fanyi.youdao.com/translate?smartresult=dict&smartresult=rule&smartresult=ugc&sessionFrom=null'
    inf = input('请输入信息')
    #分析表单数据
    form_data = {
#        'i': '老鼠爱大米',
        'i':inf,
        'from':'AUTO',
        'to':'AUTO',
        'smartresult':'dict',
        'client':'fanyideskweb',
        'doctype':'json',
        'version':'2.1',
        'keyfrom':'fanyi.web',
        'action':'FY_BY_CLICKBUTTION',
        'typeResult':'false',
    }
    #先转换16位编码,再转换二进制数据流
    req_data = urlencode(form_data).encode('utf-8')

    #伪装浏览器
    req_header = {
        'User_Agent':browser.random,
    }
    #构建request对象
    req = request.Request(url=req_url,data=req_data,headers=req_header)
    # 获取响应
    response = request.urlopen(req)
    # 响应码
    rsp_num = response.status
    print(rsp_num)
    # 结果
    rsp_inf = response.read().decode('utf-8').strip()
    print(rsp_inf)
    print(type(rsp_inf))
    # json转义
    result_j = json.loads(rsp_inf)
    print(type(result_j))

    print('翻译结果结果是')
    print(result_j['translateResult'][0][0]['tgt'])

if __name__=='__main__':
    youdao()

作业
  • 豆瓣登录
from urllib.parse import urlencode
from urllib import  request
import json

def douban():
    req_url = 'https://accounts.douban.com/login'

    form_data = {
        'source':'movie',
        'redir':'https://movie.douban.com/',
        'form_email':'2332256766@qq.com',
        'form_password':'密码',
        'login':'登录',
    }

    req_header = {
        'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.106 BIDUBrowser/8.7 Safari/537.36',
    }

    req_data = urlencode(form_data).encode('utf-8')

    req = request.Request(url=req_url,data=req_data,headers=req_header)

    response = request.urlopen(req)

    print(response.status)
    #读响应
    html_content = response.read().decode('utf-8')
    with open('page.html','w') as file:
        file.write(html_content)

if __name__ =='__main__':
    douban()

作业
  • 拉手网取数据
from urllib import request
from urllib.parse import urlencode
from fake_useragent import UserAgent
import json
#https://www.lagou.com/jobs/positionAjax.json?needAddtionalResult=false
#通过url确定为post请求
# first:false
# pn:2
# kd:Java
# first:false
# pn:3
# kd:Java
# first:false
# pn:4
# kd:Java
def lagoSpider():
    browers = UserAgent()
    keyword = input('请输入查询内容')
    #构建url
    req_url = 'https://www.lagou.com/jobs/positionAjax.json?needAddtionalResult=false'
    #构建post请求的表单数据
    for pg in range(1,31):
        form_data = {
            'first':'false',
            'pn': pg,
            'kd':keyword,
        }
        #转化为二进制数据
        req_data = urlencode(form_data).encode('utf-8')
        #构建header
        req_header = {
            'User-Agent':browers.random,
            'Referer':'https://www.lagou.com/jobs/list_Java?city=%E5%85%A8%E5%9B%BD&cl=false&fromSearch=true&labelWords=&suginput='
        }
        req = request.Request(url=req_url,data=req_data,headers=req_header)
        response = request.urlopen(req)

        print(response.status)

        if response.status == 200:
            print('请求成功')
            js_file = response.read().decode('utf-8')
            print(js_file)

            parse_data(js_file)


def parse_data(text):
    #分析发现返回的结果为json字符串
    #1.需要将json字符串,转换为python数据类型
    result = json.loads(text)
    print(type(result))

    positionResult = result['content']['positionResult']['result']

    print(len(positionResult))
    for i in positionResult:
        d = {}
        d['postionName'] = i['positionName']
        d['publishName'] = i['firstType']
        d['companyName'] = i['companyFullName']
        postion_dict = d

        json_srt = json.dumps(postion_dict,ensure_ascii=False)
        #文件的读写模式
        #w:没有则写,有则覆盖
        #w+:读写模式
        #wb:写入二进制
        #wb+:读写模式
        #a:追加,从末尾追加
        #a+ 追加,具有读写
        #ab:二进制追加
        #......

        with open('jsontext.json','a') as file:
            file.write(json_srt+'n')
        # print(postionName+'/n')
        # print(publishName)
        # print(companyName+'/n')

if __name__ == '__main__':
    lagoSpider()
  • 注意

反爬

  • 'Referer':'https://www.lagou.com/jobs/list_Java?city=%E5%85%A8%E5%9B%BD&cl=false&fromSearch=true&labelWords=&suginput='
  • cookie

相关文章

网友评论

      本文标题:_post

      本文链接:https://www.haomeiwen.com/subject/pojpfqtx.html