import time
import csv
import requests
from io import BytesIO
from PIL import Image
import requests as req
from queue import Queue
from threading import Thread
from bs4 import BeautifulSoup
#使用生产者消费者模式,生产者产生的id链接传给消费者执行
def producer(q,url):
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'
}
response = requests.get(url=url,headers=headers)
html = response.text
soup = BeautifulSoup(html, 'html.parser')
ids = soup.select('.dec a') # 获取包含歌单详情页网址的标签
q.put(ids)
#print(ids)
def consumer(q):
row = ['id','title','nickname','img','description','count','number of song','number of adding list','share','comment']
file = open('data.csv', 'w', encoding='utf-8')
csv_writer = csv.writer(file) #csv格式写入文件file
csv_writer.writerow(row)
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'
} #设置请求头
ids = q.get()
for i in ids:
url = 'https://music.163.com/' + i['href'] #生产者传递的id链接
response = requests.get(url=url,headers=headers)
html = response.text
soup = BeautifulSoup(html, 'html.parser')
idd = i['href'].split('=')[-1]
img = soup.select('img')[0]['data-src'] #图片链接
res = req.get(img)
image = Image.open(BytesIO(res.content)) #图片处理
try:
image.save(str(time.time())+'.jpg')
except:
image.save(str(time.time())+'.png')
#os.remove(os.getcwd()+f'\\{cnt}.jpg')
title = soup.select('title')[0].get_text() #标题
nickname = soup.select('.s-fc7')[0].get_text() #昵称
#print(idd,title,nickname)
description = soup.select('p')[1].get_text() #简介
count = soup.select('strong')[0].get_text() #播放次数
song_number = soup.select('span span')[0].get_text() #歌的数目
add_lis = soup.select('a i')[1].get_text() #添加进列表次数
share = soup.select('a i')[2].get_text() #分享次数
comment = soup.select('a i')[4].get_text() #评论次数
#print(description,count,song_number,add_lis,share,comment)
csv_writer.writerow([idd,title,nickname,img,description,count,song_number,add_lis,share,comment])
file.close()
def main():
start_time = time.time() #记录时间
url_list = []
plist = []
clist = []
q = Queue()
for n in range(0,1300,35):
url = f'https://music.163.com/discover/playlist/?order=hot&cat=%E8%AF%B4%E5%94%B1&limit=35&offset={n}'
url_list.append(url)
for url in url_list:
p = Thread(target=producer,args=(q,url,))
plist.append(p)
for p in plist: #启动线程
p.start()
for t in plist:
p.join()
for i in range(40):
c = Thread(target=consumer,args=(q,))
clist.append(c)
for c in clist: #启动线程
c.start()
for c in clist:
q.put(None)
print('time = %f'%(time.time()-start_time))
main()
data.csv
文件示例
id,title,nickname,img,description,count,number of song,number of adding list,share,comment
6674856328,温柔说唱|最怕Rapper唱情歌 - 歌单 - 网易云音乐,酷洛米大人的飞行笔记,http://p4.music.126.net/0IRSPK1AJKoMUv6ny60TyQ==/109951165827550046.jpg,同步歌单,随时畅听320k好音乐,492226,30,(4141),(54),(13)
5369148099,【精选】Boombap Old school说唱伴奏 Rap - 歌单 - 网易云音乐,西蒙的孩子,http://p3.music.126.net/S2Q9d31EHs3F7XzXGj_wSw==/109951165527651425.jpg,同步歌单,随时畅听320k好音乐,41427,12,(505),(228),评论
6718652523,Rap | 说唱热门推荐 - 歌单 - 网易云音乐,Hiphop嘻哈站,http://p4.music.126.net/Q2KDDI2x3ALKyqMkowDF9w==/109951165928324394.jpg,同步歌单,随时畅听320k好音乐,414320,54,(3145),(41),(8)
5137119649,说唱那可得来我这儿听~ - 歌单 - 网易云音乐,阿辰不会说唱,http://p3.music.126.net/9wHhFVyFsgGVs5Efgbhfag==/109951165162227958.jpg,"
【纯音乐】情绪氛围系列‖钢琴
",186306,71,(427),(8),(7)
保存的图片示例
网友评论