前言
千里之行始于足下,随着爬取深入发现了一些问题,而且数据还爬少了一个,因此花了几个小时完善了上次bilibili的爬取任务
修改内容
1.增加一条记录新番的开播日期
2.增加一个读写功能到本地
3.修复编码问题的bug
4.规整翻页等方法
import requests
import re
import time
def bilibili_score(page_number,numbers): #获取动漫信息
url =f"https://bangumi.bilibili.com/media/web_api/search/result?season_version=-1&\
area=-1&is_finish=-1©right=-1&season_status=-1&season_month=-1&pub_date=-1&\
style_id=-1&order=4&st=1&sort=0&page={page_number}&season_type=1&pagesize=20"
header ={'user-agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36'}
urlget = requests.get(url,headers = header) #伪装浏览器
bilibili_text = urlget.text
compile = re.compile("play\":\"(.*?)\",\"pub_date\":(.*?),\".*?,\"score\":\"(.*?)\".*?,\"title\":\"(.*?)\"}") #爬取的正则,追加开播日期
bilibili_hot = re.findall(compile,bilibili_text)
return bilibili_Animation(bilibili_hot,numbers)
def page(numbers): #这个函数是为了实现翻页功能
for page_number in range(1,numbers):
bilibili_score(page_number,numbers)
def bilibili_Animation(bilibili_hot,numbers): #这个是用来写入本地txt存取数据的
for item in bilibili_hot:
try:
if item:
with open(f"bilibilihot{numbers}", "a",encoding="gbk2312") as bilibili_text:
bilibili_text.write(str(item))
bilibili_text.write("\n")
except:
if item:
with open(f"bilibilihot{numbers}", "a",encoding="utf-8") as bilibili_text:
bilibili_text.write(str(item))
bilibili_text.write("\n")
def datetime(bilibili_hot): #需要把时间戳转换为年月份,暂时没写进循环中
timeStamp = int(str(bilibili_hot[2]))
timeArray = time.localtime(timeStamp)
otherStyleTime = time.strftime("%Y年%m月%d日 %H:%M:%S", timeArray)
print(otherStyleTime)
return otherStyleTime
if __name__ == '__main__':
page(5)
在记事本中是这个样子的
本地数据
网友评论