环境:python3.6
目标:下载整本小说。(使用多线程)
import requests
import re
import threading
#下载小说章节
class MyThread(threading.Thread):
def __init__(self, threadID, threadName, url):
threading.Thread.__init__(self)
self.threadID = threadID
self.threadName = threadName
self.url = url
def run(self):
itemurl = self.url + '.html'
itemreq = requests.get(baselink + itemurl)
itemreq.encoding = 'utf-8'
itemf = open(itemurl, 'w')
itemf.write(itemreq.text)
itemf.close()
#找到所有的html字段
def findLinks(htmlstring):
linkPattern = re.compile("href=\"(.+?).html\"")
return linkPattern.findall(htmlstring)
# 抓去网页内容
baselink = 'http://chuanyuezhe.yetianzi.com'
req = requests.get(baselink)
req.encoding = 'utf-8'
# 找到章节链接
alllink = findLinks(req.text)
# 抓取所有章节的内容
for item in alllink:
thread = MyThread(alllink.index(item),item+"%s"% alllink.index(item),item)
thread.start()
thread.join()
网友评论