任务需求:
- 网站地址:https://music.163.com/playlist?id=5085967930
- 使用的库 requests,bs4
- 技术点:
- 1.输入的网址(https://music.163.com/#/playlist?id=5058285471
)需要删掉/# 字符(反扒机制)
- 1.输入的网址(https://music.163.com/#/playlist?id=5058285471
# -*- coding: utf-8 -*-
# @Time : 2020/7/29 6:05 下午
# @Author : livein80
# @Email : 12985594@qq.com
# @File : ssyer.py
# @Software : PyCharm
import requests
import os
from bs4 import BeautifulSoup
headers={
'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36'
}
# https://music.163.com/playlist?id=5085967930
play_url=input('输入你要爬取的歌单地址 : ')
# 过滤一下输入的url,替换 /#/ 为 /,绕过反扒机制
filter_url = play_url.replace('/#/','/')
# 保持回话
s = requests.session()
# 创建声明变量
response = s.get(filter_url,headers=headers).content
# # 实例化网页选择器
soup = BeautifulSoup(response ,'lxml')
main = soup.find('ul',{'class':'f-hide'})
# print(main)
# 全局变量 (歌曲名称 | 链接地址)
lists = []
for music in main.find_all('a'):
print('{}:{}'.format(music.text,music['href']))
_name=music.text
_link='http://music.163.com/song/media/outer/url'+music['href'][5:]+'.mp3'
lists.append([_name,_link])
print(lists)
# 下载列表中的所有歌曲
if not os.path.exists('./网易云音乐/'):
os.mkdir('./网易云音乐/')
for list in lists:
url= list[1]
name = list[0]
get = requests.get(url,headers=headers).content
with open('./网易云音乐/%s.mp3' % name,'wb') as file:
file.write(get)
print('%s 下载完成' % name)
print('全部下载完成!')
网友评论