import urllib2
import os
import re
chapter = 1
while(chapter <= 69):
dir = 'G:\\python data\\' + str(chapter)
os.mkdir(dir)
os.chdir(dir)
URL = 'http://jj.xxdm.org/manhua/jjdjrmh/' + str(chapter) + '.shtml'
response = urllib2.urlopen(URL).read()
Urls = re.findall(r'/uploads/mh/jj/\d{3,5}/\d{2,4}\.\w{3}', response)
page = 1
for url in Urls:
pic = urllib2.urlopen('http://pic.xxdm.com/' + url).read()
print url
with open(str(page) + '.jpg', 'wb') as f:
f.write(pic)
page = page + 1
chapter = chapter + 1
用不来scrapy。最后没有用这个框架,反而简单很多。
但是很慢,真的特别慢。
网友评论