使用requests库和正则表达式爬取段子并保存到.txt文件
lianjie:https://github.com/Spacewe/python
import re
import requests
import sys
reload(sys)
sys.setdefaultencoding("utf-8")
url="http://hahahahhaahah.com/"
# url=""
header = {'User-agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36'}
haha = requests.get(url,headers=header)
haha.encoding='utf-8'
# print haha.text
heihei=re.findall('<p>(.*?)</p>',haha.text,re.S)
fp=open('neihan.txt', 'wb')
# fp.write(heihei.text)
for each in heihei:
print each
print '-'*100
fp.write(each)
fp.write("\n\n") 防止被覆盖
fp.close()
网友评论