python学习笔记
声明:学习笔记主要是根据廖雪峰官方网站python学习学习及博客
#糗百提取一页内容
# -*- coding=utf-8 -*-
import urllib
import urllib2
import re
import sys
reload(sys)
sys.setdefaultencoding('utf8')
page =1
url ='https://www.qiushibaike.com/hot/page/1/'
user_agent ='Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.84 Safari/537.36'
headers = {'User-Agent' : user_agent }
try:
request = urllib2.Request(url,headers = headers)
response = urllib2.urlopen(request)
content= response.read().decode("utf-8")
items = re.findall(r'\n+\s+(.*?)\s+\n+',content,re.S)
#'\n\n+(.*?)\n\n+'
for itemin items:
if '
' in item:
new_each = re.sub(r'
','\n',item)
print(new_each)
# 没有就照常输出
else:
print(item)
except urllib2.URLError, e:
if hasattr(e,"code"):
print e.code
if hasattr(e,"reason"):
print e.reason
网友评论