import requests
from bs4 import BeautifulSoup
def get_one_page(url):
headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36'
}
response = requests.get(url, headers=headers)
if response.status_code == 200:
return response.text
return None
def page_parser(html):
soup = BeautifulSoup(html,'lxml')
for dd in soup.select('dd'):
# 排名
num = dd.select('.board-index')[0].get_text()
# 标题
name = dd.find(attrs={'class':'name'}).a.string
# 图片
pic_src = dd.find(attrs={'class':'board-img'}).attrs['data-src']
# 演员
star = dd.select('.star')[0].get_text()
# 去除两边的空格
star = star.strip()
# 时间
releasetime = dd.select('.releasetime')[0].string
# 评分
integer = dd.select('.integer')[0].string
fraction = dd.select('.fraction')[0].string
score = integer+fraction
print(score)
def main():
url = 'http://maoyan.com/board/4'
html = get_one_page(url)
page_parser(html)
#print(a)
if __name__ == '__main__':
main()
网友评论