from bs4 import BeautifulSoup
from urllib.request import urlopen
html = urlopen("https://morvanzhou.github.io/static/scraping/list.html").read().decode('utf-8')
print(html)
# <!DOCTYPE html>
# <html lang="cn">
# <head>
# <meta charset="UTF-8">
# <title>爬虫练习 列表 class | 莫烦 Python</title>
# <style>
# .jan {
# background-color: yellow;
# }
# .feb {
# font-size: 25px;
# }
# .month {
# color: red;
# }
# </style>
# </head>
#
# <body>
#
# <h1>列表 爬虫练习</h1>
#
# <p>这是一个在 <a href="https://morvanzhou.github.io/" >莫烦 Python</a> 的 <a href="https://morvanzhou.github.io/tutorials/data-manipulation/scraping/" >爬虫教程</a>
# 里无敌简单的网页, 所有的 code 让你一目了然, 清晰无比.</p>
#
# <ul>
# <li class="month">一月</li>
# <ul class="jan">
# <li>一月一号</li>
# <li>一月二号</li>
# <li>一月三号</li>
# </ul>
# <li class="feb month">二月</li>
# <li class="month">三月</li>
# <li class="month">四月</li>
# <li class="month">五月</li>
# </ul>
#
# </body>
# </html>
soup = BeautifulSoup(html, features='lxml')
month = soup.find_all('li', {"class": "month"})
print("--------")
for m in month:
print(m.get_text())
# 一月
# 二月
# 三月
# 四月
# 五月
jan = soup.find('ul', {"class": "jan"})
d_jan = jan.find_all('li')
print("--------")
for d in d_jan:
print(d.get_text())
# 一月一号
# 一月二号
# 一月三号
网友评论