- 使用python包管理工具安装BeautifulSoup
pip install beautifulsoup4
- 新建2.py文件,将以下代码拷贝到文件中
#!/usr/bin/python
# -*- coding: UTF-8 -*-
from urllib.request import urlopen
from bs4 import BeautifulSoup
html = urlopen("http://www.toutiao.com")
bsObj = BeautifulSoup(html.read(), 'lxml')
print(bsObj.title)
-
运行python 3.py,可以看到打印出了 “<title>今日头条</title>”,这样就成功拿到了页面的标题
-
添加异常处理
将2.py中的代码替换为以下代码
#!/usr/bin/python
# -*- coding: UTF-8 -*-
from urllib.request import urlopen
from urllib.error import HTTPError
from bs4 import BeautifulSoup
def getTitle(url):
try:
html = urlopen(url)
except HTTPError as e: # http异常处理
return "http异常"
try:
bsObj = BeautifulSoup(html.read(), 'lxml')
title = bsObj.title
except AttributeError as e: # 标签异常处理
return "标签异常"
return title
title = getTitle('http://www.toutiao.com')
if title == None:
print ("title 没有找到")
else:
print(title)
喜欢就点个赞吧!!!
网友评论