我的第一个爬虫作品,一天完成,纪念一下!
代码如下:
import requests
from bs4 import BeautifulSoup
urls =['https://movie.douban.com/top250?start={}&filter='.format(str(i*25)) for i in range(0,10)]
def get_movieinfo(urls,data = None):
web_data=requests.get(urls)
soup =BeautifulSoup(web_data.text,'lxml')
for data in soup.select('.item'):
rank = data.select('em')[0].text
name = data.select('.info')[0].select('a')[0].text.split('\n')[1]
score = data.select('.rating_num')[0].text
link = data.select('a')[0]['href']
director_actor = data.select('.bd')[0].text.split('\n')[2].lstrip(' ')
time_country = data.select('.bd')[0].text.split('\n')[3].lstrip(' ')
print(rank, name, score, link,director_actor, time_country)
for a in urls:
get_movieinfo(a)
结果如下截图:
网友评论