一、题目
记得我们第三关的时候爬取了豆瓣TOP250的电影名/评分/推荐语/链接,现在呢,我们要把它们存储下来,记得用今天课上学的csv和excel,分别存储下来哦~
import requests, bs4, csv
file = open("movie_top250.csv",'w',newline="")
writer = csv.writer(file)
writer.writerow(["序号","电影名","评分","推荐语","链接"])
headers = {
'user-agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36'}
for x in range(10):
url = 'https://movie.douban.com/top250?start=' + str(x * 25) + '&filter='
res = requests.get(url, headers=headers)
bs = bs4.BeautifulSoup(res.text, 'html.parser')
bs = bs.find('ol', class_="grid_view")
for titles in bs.find_all('li'):
num = titles.find('em', class_="").text
title = titles.find('span', class_="title").text
comment = titles.find('span', class_="rating_num").text
url_movie = titles.find('a')['href']
if titles.find('span', class_="inq") != None:
tes = titles.find('span', class_="inq").text
writer.writerow([num,title,comment,tes,url_movie])
# print(num + '.' + title + '——' + comment + '\n' + '推荐语:' + tes + '\n' + url_movie)
else:
# print(num + '.' + title + '——' + comment + '\n' + '\n' + url_movie)
writer.writerow([num, title, comment,"",url_movie])
file.close()
网友评论