网易云消息推送歌曲收集

作者: 冰_Angus | 来源:发表于2020-04-11 16:28 被阅读0次

网易云消息推送歌曲收集
2018·03·01网易能听SHE的歌了
消息推送收集的
【文字收集】网易云音乐歌曲评论区收集
消息推送平台高可用实践（上）
Android基础-Notification
【day18】K4 刘圆圆推送文案分析
Python爬虫小白——（二）爬虫基础——Selenium Ph
认识不一样的邓丽君
最近特别喜欢的歌好听，歌词浪漫

项目介绍

通过Chrome与selenium
将网易云私信中歌曲放到一个歌单中，方便实时收听新歌。
我们可以直接用已登录的Chrome（保存有网易云网站登录态），模拟用户操作，收藏新歌。

image.png

关键代码

初始化selenium

def init_driver():
    executable_path = "chromedriver"
    if not os.path.exists("chromedriver.exe"):  # 无驱则需要下载
        # 本地项目 复制驱动
        _path = os.path.abspath(__file__)
        if not "crawler_set" in _path:
            root_path = _path[:_path.index("crawler_set") + len("crawler_set")]
            executable_path = os.path.join(root_path, "driver", "chromedriver.exe")
        else:  # 单个文件 下载驱动
            url = "https://raw.githubusercontent.com/AngusWG/crawler_set/master/driver/chromedriver.exe"
            proxies = {
                "http": "socks5://127.0.0.1:1080",
                "https": "socks5://127.0.0.1:1080"
            }
            r = requests.get(url, proxies=proxies)
            with open("chromedriver.exe", "wb") as code:
                content_size = int(r.headers['content-length'])  # 内容体总大小
                for data in tqdm(iterable=r.iter_content(1024), total=content_size, unit="k", desc="下载驱动"):
                    code.write(data)

    user_cookies = "".join([os.path.expanduser('~'), r"\AppData\Local\Google\Chrome\User Data"])

    option = webdriver.ChromeOptions()
    option.add_argument("--user-data-dir={}".format(user_cookies))  # 设置成用户自己的数据目录

    try:
        driver = webdriver.Chrome(executable_path, options=option)
        driver.implicitly_wait(5)
        return driver
    except WebDriverException:
        print("请先关掉所有的Chrome")
        exit(-2)

收藏歌曲

def save_song(url):
    print("[{}] start".format(url), end=" ")
    driver.get(url)
    driver.switch_to.frame("contentFrame")
    title = driver.find_element_by_xpath('//div[contains(@class, "tit")]').text
    for word in shielding_words:
        if word in title:
            print("{} 因 {} 已忽略".format(title, word))
            return
    driver.find_element_by_xpath('//*[contains(text(), "收藏")]').click()
    driver.find_element_by_xpath('//*[contains(text(), "{}")]'.format(song_dir)).click()
    print(title)

全部代码

#!/usr/bin/python3
# encoding: utf-8 
# @Time    : 2019/12/19 9:36
# @author  : zza
# @Email   : 740713651@qq.com
# @File    : 将网易云的私信音乐整理.py
"""
该脚本会保存每天新私信里最后几首歌曲到tmp_save_dir
1.在Chrome上登录自己的网易云帐号t
2.创建tmp_save_dir
然后运行脚本
"""
import os

import requests
from selenium import webdriver
from selenium.common.exceptions import WebDriverException, NoSuchElementException
from tqdm import tqdm

song_dir = "tmp_save_dir"
# 屏蔽关键词
shielding_words = ['伴奏']


def init_driver():
    executable_path = "chromedriver"
    if not os.path.exists("chromedriver.exe"):  # 无驱则需要下载
        # 本地项目 复制驱动
        _path = os.path.abspath(__file__)
        if not "crawler_set" in _path:
            root_path = _path[:_path.index("crawler_set") + len("crawler_set")]
            executable_path = os.path.join(root_path, "driver", "chromedriver.exe")
        else:  # 单个文件 下载驱动
            url = "https://raw.githubusercontent.com/AngusWG/crawler_set/master/driver/chromedriver.exe"
            proxies = {
                "http": "socks5://127.0.0.1:1080",
                "https": "socks5://127.0.0.1:1080"
            }
            r = requests.get(url, proxies=proxies)
            with open("chromedriver.exe", "wb") as code:
                content_size = int(r.headers['content-length'])  # 内容体总大小
                for data in tqdm(iterable=r.iter_content(1024), total=content_size, unit="k", desc="下载驱动"):
                    code.write(data)

    user_cookies = "".join([os.path.expanduser('~'), r"\AppData\Local\Google\Chrome\User Data"])

    option = webdriver.ChromeOptions()
    option.add_argument("--user-data-dir={}".format(user_cookies))  # 设置成用户自己的数据目录

    try:
        driver = webdriver.Chrome(executable_path, options=option)
        driver.implicitly_wait(5)
        return driver
    except WebDriverException:
        print("请先关掉所有的Chrome")
        exit(-2)


def get_private_detail():
    # 点击私信后
    driver.get("https://music.163.com/#/msg/m/private")
    driver.switch_to.frame("contentFrame")
    new_msg_items = driver.find_elements_by_xpath('//i[@class="u-bub"]/b[@class="f-alpha"]/..//parent::*//a')
    private_detail_url_dict = dict()
    for i in new_msg_items:
        _, singer_id = i.get_attribute("href").split("?")
        uri = "https://music.163.com/#/msg/m/private_detail?" + singer_id
        msg_num = int(i.find_element_by_xpath("../i/em").text)
        private_detail_url_dict[uri] = msg_num
    return private_detail_url_dict


def get_song_url_from_album_set(url):
    song_set = set()
    # 歌曲页面保存
    driver.get(url)
    driver.switch_to.frame("contentFrame")
    url_list = driver.find_elements_by_xpath('//a[contains(@href, "/song?id")]')
    for item in url_list:
        if "伴奏" in item.text:
            break
        _, song_id = item.get_attribute("href").split("?id=")
        song_set.add("https://music.163.com/#/song?id=" + song_id)
        song_name = item.find_element_by_xpath("./b").get_attribute("title")
        print(song_name, end=" ")
    return song_set


def get_song_url_from_private_detail(url, msg_num):
    album_set = set()
    song_set = set()
    # 歌曲页面保存
    driver.get(url)
    driver.switch_to.frame("contentFrame")
    url_list = driver.find_elements_by_xpath('//div[contains(@class,"itemleft")]')[-msg_num:]
    for item in url_list:
        try:
            i = item.find_element_by_xpath(
                './/a[contains(@href,"album?id") or contains(@href, "song?id")]').get_attribute("href")
            _, _id = i.split("?id=")
            if "song?id" in i:
                song_set.add("https://music.163.com/#/song?id=" + _id)
            else:  # "album?id"
                album_set.add("https://music.163.com/#/album?id=" + _id)
        except NoSuchElementException:
            pass

    for album_url in album_set:
        song_set.update(get_song_url_from_album_set(album_url))
    return song_set


def save_song(url):
    print("[{}] start".format(url), end=" ")
    driver.get(url)
    driver.switch_to.frame("contentFrame")
    title = driver.find_element_by_xpath('//div[contains(@class, "tit")]').text
    for word in shielding_words:
        if word in title:
            print("{} 因 {} 已忽略".format(title, word))
            return
    driver.find_element_by_xpath('//*[contains(text(), "收藏")]').click()
    driver.find_element_by_xpath('//*[contains(text(), "{}")]'.format(song_dir)).click()
    print(title)


driver = init_driver()
if not os.path.exists("tmp.txt"):
    # 获取私信用户列表
    private_detail_url_dict = get_private_detail()
    print("private_detail_url_set len={}".format(len(private_detail_url_dict)))
    # 获取歌曲id
    song_url_set = set()
    for private_detail_url, msg_num in private_detail_url_dict.items():
        song_url_set.update(get_song_url_from_private_detail(private_detail_url, msg_num))
        print("song_url_set len={}".format(len(song_url_set)))
    with open("tmp.txt", "w", encoding="utf8") as f:
        f.write("\n".join(song_url_set))
else:
    with open("tmp.txt", "r", encoding="utf8") as f:
        data = f.read()
    song_url_set = data.split("\n") if data else []
# 保存歌曲
for song_url in song_url_set:
    save_song(song_url)
os.remove("tmp.txt")
driver.close()

网易云消息推送歌曲收集
项目介绍通过Chrome与selenium将网易云私信中歌曲放到一个歌单中，方便实时收听新歌。我们可以直接用已登...
2018·03·01网易能听SHE的歌了
今早到了公司，无意翻一下手机的推送消息，看见了网易云音乐的一条推送瞬间醒目，点进去发现消息内容是网易云音乐和华研...
消息推送收集的
1. 可以用的Demo 无法收到点击消息 iOS - 收到远程推送后的页面跳转 http://blog.csdn....
【文字收集】网易云音乐歌曲评论区收集
❤ 很喜欢的一段话：“我做好了要与你过一辈子的打算，也做好了你随时要走的准备。这大概是最好的爱情观，深情而不纠缠。...
消息推送平台高可用实践（上）
本文来自网易云社区作者：李弈远消息推送平台为公司内部和第三方应用提供统一消息推送服务，支持广播、私信、组播、附...
Android基础-Notification
在Android手机上，基本上每天都能看到各种各样的推送，如网易云的推荐，新闻的推荐，QQ，WeChat的消息推送...
【day18】K4 刘圆圆推送文案分析
第一个是网易云课堂的推送。因为在网易云课堂报了一个课程，所以每周都会登陆云课堂。网易云课堂算是推送比较频繁的，基本...
Python爬虫小白——（二）爬虫基础——Selenium Ph
前段时间尝试爬取了网易云音乐的歌曲，这次打算爬取QQ音乐的歌曲信息。网易云音乐歌曲列表是通过iframe展示的，可...
认识不一样的邓丽君
【日更之路的第5篇】邓丽君《Beat It（Live）》也许是因为最近在收集经典的翻唱歌曲，网易云的私人FM给...
最近特别喜欢的歌好听，歌词浪漫
网易云推送的歌曲，直觉听了一遍后，一直单曲循环春天需要这么甜的歌曲我喜欢你的眼睛你的睫毛你的侧脸喜欢你嘟着...