简单记录一下,bs的使用
# -*- coding: utf-8 -*-
import requests
import bs4
goal_url = "https://baike.baidu.com/item/%E8%88%8C%E5%B0%96%E4%B8%8A%E7%9A%84%E4%B8%AD%E5%9B%BD/9081375"
def start_parse(url):
print "开始获取(%s)内容" % url
response = requests.get(url)
print "获取网页内容完毕"
soup = bs4.BeautifulSoup(response.content.decode("utf-8"),'lxml')
# soup = bs4.BeautifulSoup(response.text);
# 为了防止漏掉调用close方法,这里使用了with语句
# 写入到文件中的编码为utf-8
with open('archives.txt', 'w') as f:
for archive in soup.select("dd"):
f.write(archive.get_text().encode('utf-8'))
print archive.get_text().encode('utf-8')
if __name__ == '__main__':
start_parse(goal_url)
网友评论