Python搜房网的抓取
记录者:zff
时间:12/11/2016 12:21:09 PM
import urllib
import urllib.request
import gzip
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.101 Safari/537.36',
'Accept-Encoding': 'gzip'}
url = 'http://newhouse.cs.fang.com/house/web/newhouse_sumall.php?page=1'
html= urllib.request.Request(url, headers=headers)
html = urllib.request.urlopen(html)
html = html.read()
html = gzip.decompress(html)
html = html.decode('gb2312', 'ignore')
f = open("C:\\Users\\Administrator\\Desktop\\hello.txt","w")
f.write(html)
f.close()
网友评论