爬取古诗文网
import requests
from bs4 import BeautifulSoup
import urllib.request
headers={
"User-Agnet":"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.130 Safari/537.36"
}
def download_code(s):
url='https://so.gushiwen.org/user/login.aspx?from=http://so.gushiwen.org/user/collect.aspx'
r=s.get(url=url,headers=headers)
soup=BeautifulSoup(r.text,'lxml')
#得到图片链接
image_src='https://so.gushiwen.org'+soup.find('img',id="imgCode")['src']
print(image_src)
r_image=s.get(image_src,headers=headers)
with open('/Users/marine/Desktop/python/code.png','wb') as fp:
fp.write(r_image.content)
#查找表单所需要的两个参数
__VIEWSTATE=soup.find('input',id="__VIEWSTATE")['value']
__VIEWSTATEGENERATOR=soup.find('input',id="__VIEWSTATEGENERATOR")['value']
return __VIEWSTATE,__VIEWSTATEGENERATOR
def login(view,viewg,s):
#print(view)
#print(viewg)
#exit()
post_url='https://so.gushiwen.org/user/login.aspx?from= HTTP/1.1'
#提示用户输入验证码
code=input('请输入验证码:')
formdata={
'__VIEWSTATE':view,
'__VIEWSTATEGENERATOR':viewg,
'from':'',
'email':'13522759641',
'pwd':'123456',
'code':code,
'denglu':'登录',
}
r=s.post(url=post_url,headers=headers,data=formdata)
with open('/Users/marine/Desktop/python/gushi.html','w',encoding='utf-8')as fp:
fp.write(r.text)
def main():
#创建会话
s=requests.Session()
#下载验证码到本地
view,viewg=download_code(s)
#向post地址发送请求-抓包工具
login(view,viewg,s)
if name=='main':
main()
网友评论