1.网页数据的采集与urllib库
image.pngurllib自带标准库
#coding=utf-8
from urllib import request
url="http://www.baidu.com"
response=request.urlopen(url,timeout=1)
print(response.read().decode('utf-8'))
库bs4
from bs4 import BeautifulSoup
html_doc = """
<html>
<head>
<meta charset="utf-8">
<title>呵呵</title>
</head>
<body>
<!-- 网页控件元素,类似按钮/图片/文章什么的都写在这里 -->
</body>
</html>
"""
soup = BeautifulSoup(html_doc, 'lxml')
print(soup.prettify())
#获取网页title
print(soup.title)
#获取title的内容
print(soup.title.string)
print(soup.body)
网友评论