from bs4 import BeautifulSoup
标准选择器
find(): 其实调用的就是find_all(),只不过限制了只查一个,即limit参数设置为1了,只查一个
find_all(name, attars, text):通过标签,属性,文本查询整个标签的内容
html='''
<div class="panel">
<div class="panel-heading">
<h4>Hello</h4>
</div>
<div class="panel-body">
<ul class="list" id="list-1">
<li class="element">Foo<>
<li class="element">Bar<>
<li class="element">Jay<>
</ul>
<ul class="list list-small" id="list-2">
<li class="element">Foo<>
<li class="element">Bar<>
</ul>
</div>
</div>
'''
soup = BeautifulSoup(html, 'lxml')
通过标签查询内容
print(soup.find_all('ul'))
print(soup.find_all('li'))
print('----------------------------------------')
通过属性内容查询类容
print(soup.find_all(attrs={'id': 'list-2'}))
print(soup.find_all(attrs={'class': 'list-small'}))
print('----------------------------------------')
print(soup.find_all(class_= 'list-small'))
print(soup.find_all(id='list-2'))
print('----------------------------------------')
通过文本获取内容
print(soup.find_all(text = 'Foo'))
print('----------------------------------------')
CSS选择器
通过标签查询内容
print(soup.select('ul li'))
print('---------------------------------')
通过id和class查询内容
print(soup.select('#list-2 li'))
print(soup.select('.list-small li'))
for item in soup.select('.list-small li'):
print(item.string)
网友评论