首先我们通过一个主题词进行视频搜索, 然后把找到的视频链接保存下来,里面有个视频的 id ,通过这个 id , 调用 B 站 api 接口可以获取视频的信息, 里面有视频的播放量信息。 尽量查的视频数量少一点哦, 给 B 站造成服务压力可不好。
# -*- coding: utf-8 -*-
import requests
import json
import urllib.request
import zlib
import os
import re
from bs4 import BeautifulSoup
from urllib.parse import quote
import time
#<iframe src="//player.bilibili.com/player.html?aid=66494272&page=1" scrolling="no" border="0" frameborder="no" framespacing="0" allowfullscreen="true"> </iframe>
headers = {
"User-Agent": "Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A372 Safari/604.1"
}
def require_video(video_id):
URL_VIDinfo = "http://api.bilibili.com/archive_stat/stat?aid="
PARAMS = {"aid":video_id }
VID_info = requests.get(url = URL_VIDinfo,params = PARAMS).json()
print(VID_info)
if(VID_info["message"] == "0"):
hot_video = VID_info["data"]["view"]
if hot_video != "--":
return hot_video
else:
return -1
else:
return -1
def get_aid(Keyword):
print('searching, please wait......')
f = open("hot_video.txt", "a")
for page_num in range(1,2):
time.sleep(1)
search_url="https://search.bilibili.com/all?keyword=" + Keyword + "&page=" + str(page_num)
search_url=quote(search_url,safe='/:?=&', encoding="utf-8")
print(search_url)
req = urllib.request.Request(url=search_url)
req.add_header("User-Agent","Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36 QIHU 360SE")
data=urllib.request.urlopen(req)
search_html = data.read().decode("utf-8",'ignore')
search_bsObj = BeautifulSoup(search_html,'html.parser')
search_linkList = search_bsObj.findAll("a",{"class":"title"})
count=len(search_linkList)
print('found %s in this page' %count)
for item in search_linkList:
time.sleep(1)
print('%s:%s' % (i,search_linkList[i]['title']))
search_link=search_linkList[i]['href']
search_link="http:"+search_link
search_link=quote(search_link,safe='/:?=&', encoding="utf-8")
print(search_link)
index1 = search_link.find('av')
index2 = search_link.find('?')
if index1 and index2 and index1 > 4:
avid = search_link[index1 + 2: index2]
print(avid)
video_played_times = require_video(avid)
if int(video_played_times) >= 100000:
f.write(avid + "\n")
f.close()
def main():
keyword = '舞蹈'
get_aid(keyword)
if __name__ == '__main__':
main()
网友评论