美文网首页
Python爬虫(Python3.6)

Python爬虫(Python3.6)

作者: Ucan先生 | 来源:发表于2018-05-05 17:57 被阅读0次
    import urllib.request
    import urllib.error
    import os
    import re
    import imageio
    capterId = 5301
    sectionId = 1
    dir = 'C:/Users/zybang/Desktop/gaoshu'
    url = "http://netedu.xauat.edu.cn/jpkc/netedu/jpkc/gdsx/homepage/5jxsd/51/513/"
    pattern = re.compile('<img.*?src="(.*?/.*?.gif)"')
    while capterId < 5313:
        url = url + str(capterId)+"/"
        while sectionId < 20:
            if sectionId<10:
                strSectionId = str(capterId)+str(0)+str(sectionId)
            else:
                strSectionId = str(capterId)+str(sectionId)
            requestUrl = url+strSectionId+'.htm'
            try:
                response = urllib.request.urlopen(requestUrl)
            except urllib.error.HTTPError as e :
                print(requestUrl)
                print(e.code)
                continue
            data = response.read()
            data1 = data.decode('gbk')
            data2 = str(data)
            titlePattern = re.compile('<title>(.*?)</title>')
            images = pattern.findall(data2)
            title = titlePattern.findall(data1)
            title1 = title[0]
            f = open(dir+'/'+title1+'.htm','wb')
            f.write(data)
            for image in images:
                imageUrl = url+image
                try:
                    imgResponse = urllib.request.urlopen(imageUrl)
                except urllib.error.URLError as e :
                    print(imageUrl)
                    print(e.reason)
                    continue
                imgBytes = imgResponse.read()
                pathpatt = re.compile('/')
                path = pathpatt.split(image)
                imgDir = dir+"/"+path[0]
                if not os.path.exists(imgDir):
                    os.makedirs(imgDir)
                imgFile = open(dir+"/"+image,"wb")
                imgFile.write(imgBytes)
                sectionId += 1
        capterId +=1
    
    

    相关文章

      网友评论

          本文标题:Python爬虫(Python3.6)

          本文链接:https://www.haomeiwen.com/subject/zijerftx.html