方法一、写入进Excel中
import requests,random,bs4,openpyxl
headers={'user-agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36'}
wb = openpyxl.Workbook() #创建工作薄
sheet = wb.active #获取工作薄的活动表
sheet.title = 'douban' #工作表重命名
sheet['A1'] ='rank' #加表头,给A1单元格赋值
sheet['B1'] ='name' #加表头,给B1单元格赋值
sheet['C1'] ='comment' #加表头,给C1单元格赋值
sheet['D1'] ='link' #加表头,给D1单元格赋值
for x in range(10): #以下与下方代码一致
url = 'https://movie.douban.com/top250?start=' + str(x*25) + '&filter='
res = requests.get(url, headers=headers)
bs = bs4.BeautifulSoup(res.text,'html.parser')
bs = bs.find('ol',class_='grid_view')
for titles in bs.find_all('li'):
num = titles.find('em',class_='').text
title = titles.find('span',class_='title').text
comment = titles.find('span',class_='rating_num').text
url_movie = titles.find('a')['href']
sheet.append([num,title,comment,url_movie])
if titles.find('span',class_="inq") != None:
tes = titles.find('span',class_="inq").text
print(num + '.' + title + '——' + comment + '\n' + '推荐语:' + tes +'\n' + url_movie)
else:
print(num + '.' + title + '——' + comment + '\n' +'\n' + url_movie)
wb.save('douban.xlsx')
方法二、存为.csv格式
import requests, random, bs4,csv
headers={'user-agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36'}
csv_file = open('movieTop250.csv','w',newline = '') #打开csv文件,newline的意义在于排版正确
writer = csv.writer(csv_file) #创建一个writer对象
writer.writerow(['排名','电影','推荐语','电影链接']) #在csv文件里写第一行
for x in range(10): #循环10页
url = 'https://movie.douban.com/top250?start=' + str(x*25) + '&filter='
res = requests.get(url, headers=headers) #获取网址
bs = bs4.BeautifulSoup(res.text, 'html.parser') #用BeautifulSoup解析网址内容
bs = bs.find('ol', class_="grid_view") #利用find找到 标签为‘ol’,属性为grid_view的内容
for titles in bs.find_all('li'): #循环每个‘li’标签下的内容
num = titles.find('em',class_="").text #直接用text方法取出文本
title = titles.find('span', class_="title").text
comment = titles.find('span',class_="rating_num").text
url_movie = titles.find('a')['href'] #所要提取的链接是‘href’的键
writer.writerow([num,title,comment,url_movie]) #循环每行写num,title,comment,url_movie
if titles.find('span',class_="inq") != None: #需要判断,因为有的推荐语为空
tes = titles.find('span',class_="inq").text
print(num + '.' + title + '——' + comment + '\n' + '推荐语:' + tes +'\n' + url_movie)
else:#推荐语为空时不显示
print(num + '.' + title + '——' + comment + '\n' +'\n' + url_movie)
csv_file.close() #一定要记得关闭
网友评论