from selenium import webdriver
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver import ActionChains
import time
from lxml import etree
from kaisha import str2url
import requests
import threading # 多线程
from multiprocessing import Process #多进程
# 设置浏览器参数
chrome_options = webdriver.ChromeOptions()
# 设置成无头浏览器
# chrome_options.add_argument('--headless')
browser = webdriver.Chrome(options=chrome_options)
wait = WebDriverWait(browser, 5)
def get_page():
url = 'https://www.xiami.com/chart'
browser.get(url)
time.sleep(2)
html = browser.page_source
return html
#多线程保存音乐文件
def save_mp3_with_threads(data_title, data_mp3):
for i in range(len(data_title)):
title = data_title[i]
mp3 = data_mp3[i]
mp3_url = str2url(mp3)
thread = threading.Thread(target=save_mp3, args=(mp3_url, title))
thread.start()
#多进程保存音乐文件
def save_mp3_with_process(data_title, data_mp3):
for i in range(len(data_title)):
title = data_title[i]
mp3 = data_mp3[i]
mp3_url = str2url(mp3)
process = Process(target=save_mp3, args=(mp3_url, title))
process.start()
def parse_page(html):
etree_html = etree.HTML(html)
data_title = etree_html.xpath('//tr[@class="songwrapper"]/@data-title')
data_mp3 = etree_html.xpath('//tr[@class="songwrapper"]/@data-mp3')
# save_mp3_with_threads(data_title, data_mp3)
save_mp3_with_process(data_title, data_mp3)
# print(data_mp3)
# for i in range(len(data_title)):
# title = data_title[i]
# mp3 = data_mp3[i]
# mp3_url = str2url(mp3)
# print(mp3_url)
# save_mp3(mp3_url, title)
# 保存mp3文件到本地
def save_mp3(mp3_url, title):
print(title)
response = requests.get(mp3_url)
if response.status_code == 200:
content = response.content
# 写文件
with open('./mp3/%s.mp3' % title, 'wb') as f:
f.write(content)
def main():
html = get_page()
parse_page(html)
if __name__ == '__main__':
main()
网友评论