美文网首页上海Python
爬虫作业2(酷狗音乐)

爬虫作业2(酷狗音乐)

作者: 56f82a501045 | 来源:发表于2019-07-25 22:26 被阅读0次

    import requests

    from lxmlimport etree

    import csv

    import re

    import json

    headers = {

    'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.25 Safari/537.36 Core/1.70.3719.400 QQBrowser/10.5.3715.400',

    'cookie':'kg_mid=***'

    }

    def get_info(url, writer):

    res = requests.get(url,headers=headers)

    html = etree.HTML(res.text)

    infos = html.xpath('//div[@class="pc_temp_songlist  pc_rank_songlist_short"]/ul/li')

    for infoin infos:

    rank1 = info.xpath('span[3]')[0]

    rank = rank1.xpath('string(.)').strip()

    name = info.xpath('a/text()')[0]

    singer = name.split('-')[0]

    song = name.split('-')[1]

    time = info.xpath('span[5]/span/text()')[0].strip()

    url_link = info.xpath('a/@href')[0]

    res1 = requests.get(url_link,headers=headers)

    for linein res1.text.split('\r'):

    if 'jQuery' in line:

    print(line)

    if 'dataFromSmarty' in line:

    hash = re.findall('"hash":"(.*?)",', line, re.S)[0]

    album_id = re.findall('"album_id":(.*?)}', line, re.S)[0]

    url_index ='https://wwwapi.kugou.com/yy/index.php?r=play/getdata&callback=jQuery19106328788476737324_1563785427610&hash={}&album_id={}'.format(

    hash, album_id)

    res2 = requests.get(url_index,headers=headers)

    json_data = json.loads(re.match(".*?({.*}).*", res2.text).group(1))

    # pprint.pprint(json_data)

                    play_url = json_data['data']['play_url']

    print(rank, singer, song, time, play_url)

    writer.writerow([rank, singer, song, time, play_url])

    if __name__ =='__main__':

    f =open('song.csv','w+',encoding='utf-8',newline='')

    writer = csv.writer(f)

    writer.writerow(['rank','singer','song','time','play_url'])

    urls = ['https://www.kugou.com/yy/rank/home/{}-6666.html?from=rank'.format(str(i))for iin range(1,6)]

    for urlin urls:

    get_info(url, writer)

    f.close()

    相关文章

      网友评论

        本文标题:爬虫作业2(酷狗音乐)

        本文链接:https://www.haomeiwen.com/subject/grtlrctx.html