get原理
- 拼接 url(-->伪装请求头)
- 传递 req
- 结果 response
作业
from urllib import request
from urllib.parse import urlencode
from fake_useragent import UserAgent
def beauty_girl():
try:
browser = UserAgent()
req_header = {
'User-Agent': browser.random,
}
while True:
print('正在下载...')
url = 'https://tieba.baidu.com/f?'
words = {
'kw': '美女吧',
'pn':i*50,
}
parmas_str = urlencode(words)
full_url = url + parmas_str
#请求头 响应
req = request.Request(url=full_url,headers=req_header)
response = request.urlopen(req)
#文件名 内容
filename = words['kw']+str(i+1)+'页.html'
html_str = response.read().decode('utf-8')
#添加至文件
with open('./html/'+filename,'w') as file:
print('正在写入...')
file.write(html_str)
except:
print('已结束')
if __name__ =='__main__':
beauty_girl()
网友评论