from bs4 import BeautifulSoup
import requests
def main(base_url):
# base_url='https://movie.douban.com/top250?start={}&filter='.format(start_name)
# print(base_url)
req=requests.get(base_url)
soup=BeautifulSoup(req.text,'lxml')
ol=soup.find("ol",class_="grid_view")
print(type(soup),type(ol))
li_list=ol.find_all('li')
for li in li_list:
img=li.find('img')
img_src=img['src']
title=li.find('span',class_="title").text.strip()
actor=li.find('div',class_="bd").p.get_text().strip()
star_info_all=li.find('div',class_='star').find_all('span')
mv_score=star_info_all[1].text.strip()
comment_num=star_info_all[2].text.strip()
print(title)
print(img_src)
print(actor)
print(mv_score)
print(comment_num)
print('-' * 50)
next_span=soup.find('span',class_='next')
next_a=next_span.find('a')
next_url=None #
if next_a:
next_url=next_a['href']
return 'https://movie.douban.com/top250'+next_url
if __name__=='__main__':
n_url=main('https://movie.douban.com/top250?start=0')
#page down
while n_url:
main(n_url)
网友评论