import requests
import json
from pprint import pp, pprint
from datetime import datetime
import time
import os
import os.path
from pathlib import Path
import random
from retrying import retry
class Discovery:
def checkTime(self):
time.sleep(12)
def createRootDir(self):
# create root folder
p_novel = Path('novel')
# os.path.exists(p_novel)
if p_novel.exists():
pprint("novel is exists...")
else:
os.mkdir("novel")
def createDirs(self, b_path):
# b_path = "novel/books"
if os.path.exists(b_path):
return
else:
os.makedirs(b_path)
@retry
def doRequest(self):
self.checkTime()
pprint("get home data")
# GET
url = "http://localhost:3000/api/category/discovery?pageNum=1&pageSize=20"
pprint(url)
r = requests.get(url, headers={'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.95 Safari/537.36'})
res = r.json()
data = res['data']
# pprint(data)
list = data['list']
# pprint(list)
fileName = 'novel/home.json'
with open(fileName,'w') as fileObj:
json.dump(res, fileObj, ensure_ascii=False)
for index in range(len(list)):
item = list[index]
bookList = item['bookList']
categoryName = item['categoryName']
type = item['type']
# pprint(bookList)
# pprint(categoryName)
# pprint(type)
for ii in range(len(bookList)):
book = bookList[ii]
# pprint(book)
bookId = book['bookId']
# pprint(bookId)
self.doRequestDetail(bookId)
break
break
@retry
def doRequestDetail(self, bookId):
self.checkTime()
pprint("get detail: " + str(bookId))
url = "http://localhost:3000/api/book/getDetail?bookId=" + str(bookId)
pprint(url)
r = requests.get(url, headers={'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.95 Safari/537.36'})
res = r.json()
data = res['data']
self.createDirs("novel/books/" + str(bookId))
fileName = 'novel/books/' + str(bookId) + '/detail_' + str(bookId) + '.json'
with open(fileName,'w') as fileObj:
json.dump(data, fileObj, ensure_ascii=False)
self.doRequestChapters(bookId)
@retry
def doRequestChapters(self, bookId):
self.checkTime()
pprint("get chapters: " + str(bookId))
url = "http://localhost:3000/api/chapter/getByBookId?bookId=" + str(bookId) + "&chapterId=0"
pprint(url)
r = requests.get(url, headers={'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.95 Safari/537.36'})
res = r.json()
data = res['data']
chapters = []
chapter1 = data['chapters'][0]
chapter2 = data['chapters'][1]
chapters.append(chapter1)
chapters.append(chapter2)
data['chapters'] = chapters
fileName = 'novel/books/' + str(bookId) + '/chapters_' + str(bookId) + '.json'
with open(fileName,'w') as fileObj:
json.dump(data, fileObj, ensure_ascii=False)
self.doRequestContent(bookId, [chapter1['id']])
self.doRequestContent(bookId, [chapter2['id']])
@retry
def doRequestContent(self, bookId, chapters):
self.checkTime()
pprint("get content: " + str(bookId))
pprint(chapters)
payload = {"bookId": bookId, "chapterIdList": chapters}
url = "http://localhost:3000/api/chapter/get"
r = requests.post(url, data=json.dumps(payload), headers={'Content-Type': 'application/json', 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.95 Safari/537.36'})
res = r.json()
data = res
self.createDirs("novel/books/" + str(bookId) + "/content")
fileName = 'novel/books/' + str(bookId) + '/content/' + str(bookId) + '_' + str(chapters[0]) + '.json'
with open(fileName,'w') as fileObj:
json.dump(data, fileObj, ensure_ascii=False)
discovery = Discovery()
# discovery.createRootDir()
discovery.createDirs("novel/books")
discovery.doRequest()
网友评论