美文网首页
Python-爬取带有验证码的网站

Python-爬取带有验证码的网站

作者: 大森森_ | 来源:发表于2020-05-14 17:35 被阅读0次

    爬取古诗文网

    import requests
    from bs4 import BeautifulSoup
    import urllib.request

    headers={
    "User-Agnet":"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.130 Safari/537.36"
    }

    def download_code(s):
    url='https://so.gushiwen.org/user/login.aspx?from=http://so.gushiwen.org/user/collect.aspx'
    r=s.get(url=url,headers=headers)
    soup=BeautifulSoup(r.text,'lxml')
    #得到图片链接
    image_src='https://so.gushiwen.org'+soup.find('img',id="imgCode")['src']
    print(image_src)
    r_image=s.get(image_src,headers=headers)
    with open('/Users/marine/Desktop/python/code.png','wb') as fp:
    fp.write(r_image.content)
    #查找表单所需要的两个参数
    __VIEWSTATE=soup.find('input',id="__VIEWSTATE")['value']
    __VIEWSTATEGENERATOR=soup.find('input',id="__VIEWSTATEGENERATOR")['value']
    return __VIEWSTATE,__VIEWSTATEGENERATOR

    def login(view,viewg,s):
    #print(view)
    #print(viewg)
    #exit()
    post_url='https://so.gushiwen.org/user/login.aspx?from= HTTP/1.1'
    #提示用户输入验证码
    code=input('请输入验证码:')
    formdata={
    '__VIEWSTATE':view,
    '__VIEWSTATEGENERATOR':viewg,
    'from':'',
    'email':'13522759641',
    'pwd':'123456',
    'code':code,
    'denglu':'登录',
    }
    r=s.post(url=post_url,headers=headers,data=formdata)
    with open('/Users/marine/Desktop/python/gushi.html','w',encoding='utf-8')as fp:
    fp.write(r.text)

    def main():
    #创建会话
    s=requests.Session()
    #下载验证码到本地
    view,viewg=download_code(s)
    #向post地址发送请求-抓包工具
    login(view,viewg,s)

    if name=='main':
    main()

    相关文章

      网友评论

          本文标题:Python-爬取带有验证码的网站

          本文链接:https://www.haomeiwen.com/subject/rehxohtx.html