今天在360浏览器里看到了一个可以在电脑看抖音的插件,比手机app,格式相当简单,学习一波抓取音频
直接上代码:
from pyquery import PyQuery as pq
import requests
from requests.exceptions import ConnectionError
import os
from hashlib import md5
def get_html(url, header):
try:
response = requests.get(url, headers=header)
if response.status_code == 200:
response.encoding = response.apparent_encoding
return response.text
return None
except ConnectionError:
print('connect error')
return None
def get_links(html):
doc = pq(html)
items1 = doc('.rankbox ul li').items()
items2 = doc('.leftbox-bd .tit a').items()
#只能遍历一次,需重新声明
for (item1,item2) in zip(items1,items2):
# yield item.attr('data-audio')
yield(item1.attr('data-audio'), item2.text())
def get_music(url):
print("Downing" + url)
try:
response = requests.get(url)
if response.status_code == 200:
return response.content
return None
except ConnectionError:
return None
def save_music(content, name):
file_path = '{0}/{1}.{2}'.format(os.getcwd(), name, 'mp3')
#当前目录并不是指脚本所在的目录,而是所运行脚本的目录
print(file_path)
if not os.path.exists(file_path):
with open(file_path, 'wb') as f:
f.write(content)
f.close()
#判断存在重复图片
def main():
for i in range(1,3):
url = "https://kuaiyinshi.com/hot/music/?source=dou-yin&page="
header = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36',
'referer' : 'https://kuaiyinshi.com/hot/music/?source=dou-yin&page=' + str(i-1),
'cookie' : 'Hm_lvt_67cfc45e6393b98852546ccd940217ac=1533194039; Hm_lpvt_67cfc45e6393b98852546ccd940217ac=1533201706; Hm_lvt_cdce8cda34e84469b1c8015204129522=1533194039; Hm_lpvt_cdce8cda34e84469b1c8015204129522=1533201706'}
url = url + str(i)
html = get_html(url, header)
for (item1,item2) in get_links(html):
link = 'http://' + item1[2:]
content = get_music(link)
save_music(content, item2)
if __name__ == "__main__":
main()
学到了如何同时遍历两个数组,路还很长啊……
网友评论