美文网首页上海Python
python培训第二讲第三讲作业

python培训第二讲第三讲作业

作者: Zhaiyx | 来源:发表于2019-05-31 14:32 被阅读0次

    爬取目标:B站各类视频30日排行

    代码

    import requests
    from lxml import etree
    import xlwt
    import time
    
    headers = {
      'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36'
    }
    params = [
      {'type': '全部', 'code': '0'},
      {'type': '动画', 'code': '1'},
      {'type': '国创相关', 'code': '168'},
      {'type': '音乐', 'code': '3'},
      {'type': '舞蹈', 'code': '129'},
      {'type': '游戏', 'code': '4'},
      {'type': '科技', 'code': '36'},
      {'type': '数码', 'code': '188'},
      {'type': '生活', 'code': '160'},
      {'type': '鬼畜', 'code': '119'},
      {'type': '时尚', 'code': '155'},
      {'type': '娱乐', 'code': '5'},
      {'type': '影视', 'code': '181'}
    ]
    
    all_info_list = []
    
    def get_info(url, type):
      res = requests.get(url, headers=headers)
      html = etree.HTML(res.text)
      infos = html.xpath('//ul[@class="rank-list"]/li')
      for info in infos:
        rank = info.xpath('div[1]/text()')[0]
        name = info.xpath('div[2]/div[2]/a/text()')[0]
        players = info.xpath('div[2]/div[2]/div[1]/span[1]/text()')[0]
        comments = info.xpath('div[2]/div[2]/div[1]/span[2]/text()')[0]
        author = info.xpath('div[2]/div[2]/div[1]/a/span/text()')[0]
        score = info.xpath('div[2]/div[2]/div[2]/div/text()')[0]
        info_list = [rank, name, players, comments, author, score]
        all_info_list.append(info_list)
    
    if __name__ == '__main__':
      book = xlwt.Workbook(encoding='utf-8')
      for param in params:
        sheet = book.add_sheet(param['type'])
        header = ['排名', '视频', '播放量', '弹幕量', '作者', '综合得分']
        for t in range(len(header)):
          sheet.write(0, t, header[t])
    
        url = 'https://www.bilibili.com/ranking/all/{}/0/30'.format(str(param['code']))
        get_info(url, param['type'])
        i = 1
        for list in all_info_list:
          j = 0
          for data in list:
            sheet.write(i, j, data)
            j += 1
          i += 1
        all_info_list = []
        time.sleep(2)
    
      book.save('C:/Users/user/Desktop/B站30日排行.xls')
    
    

    结果

    全部排行
    动漫类排行
    国创类排行
    音乐类排行
    其他省略...

    相关文章

      网友评论

        本文标题:python培训第二讲第三讲作业

        本文链接:https://www.haomeiwen.com/subject/bwgwtctx.html