美文网首页
爬和卓照片

爬和卓照片

作者: SameX_4869 | 来源:发表于2018-08-26 15:48 被阅读7次
    #!/usr/bin/env python
    #coding:utf-8
    import urllib.request
    import urllib.parse
    import http.cookiejar
    import re
    import random
    import time
    import os
    
    rex = '"url":"http:.*?jpg"'
    rex2 = "'albumName':'.*?',"
    page = 1
    isLoadEnd = False
    albumIds = ["225325744"]
    
    def login():
        #这个地址是在network中监控的真实提交表单的地址
        url = "http://www.renren.com/ajaxLogin/login?1=1&uniqueTimestamp=20171011315479"
        postdata = urllib.parse.urlencode({
            "email":"same4869@163.com",
            "password":"wangxun0902"
        }).encode('utf-8')
        req = urllib.request.Request(url, postdata)
    
        # randdom_header = random.choice(my_headers)
        # req.add_header("User-Agent",randdom_header)
        #使用http.cookiejar.CookieJar()创建CookieJar对象
        cjar = http.cookiejar.CookieJar()
        #使用HTTPCookieProcessor创建cookie处理器,并以其为参数构造opener
        opener = urllib.request.build_opener(urllib.request.HTTPCookieProcessor(cjar))
        #安装为全局opener
        urllib.request.install_opener(opener)
        file = opener.open(req)
        # data = file.read()
        # file = open("/Users/xunwang/Desktop/python/Demo1.5.html", "wb")
        # file.write(data)
        # file.close()
    
    def pachong(albumId,upage,picFileDir):
        # url2 = "http://photo.renren.com/photo/240205043/album-267052848/v7"
        url2 = "http://photo.renren.com/photo/235602297/album-" + albumId + "/bypage/ajax/v7?page=" + str(upage) + "&pageSize=100&requestToken=2078225833&_rtk=47a03403;"
        data2 = urllib.request.urlopen(url2).read()
        data2 = data2.decode('utf-8')
        print(url2) 
    
        global isLoadEnd
        global page
        imgre = re.compile(rex)
        imglist = re.findall(imgre,data2)
        size = len(imglist)
        print(size)
        print(page)
        if size != 100:
            isLoadEnd = True
        else:
            page = page + 1
        print(isLoadEnd)
        # exit()
    
        y = 0
    
        opener=urllib.request.build_opener()
        opener.addheaders=[('User-Agent','Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36')]#'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1941.0 Safari/537.36')]
        urllib.request.install_opener(opener)
    
        for imgurl in imglist:
            #time.sleep(5)
            # new_header = random.choice(my_headers)
            #print("new_header --> " + new_header)
    
            # opener=urllib.request.build_opener()
            # opener.addheaders=[('User-Agent','Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1941.0 Safari/537.36')]
            # urllib.request.install_opener(opener)
            
            rimageurl = imgurl[7:-1].replace('\\','')
            z = rimageurl[-20:-4]
            y = y + 1
            print("rimageurl --> " + rimageurl + " y --> " + str(y) + " z --> " + str(z) + " page --> " + str(page))
            urllib.request.urlretrieve(rimageurl, picFileDir + '/s_%s_%s_%s.jpg' % (str(page),str(y),str(z)))
    
        # print(imglist)
    
    def makePicFileDir(albumId):
        url3 = "http://photo.renren.com/photo/235602297/album-" + albumId + "/v7"
        data3 = urllib.request.urlopen(url3).read()
        data3 = data3.decode('utf-8')
        imgre3 = re.compile(rex2)
        print("imgre3:" + str(imgre3) + "  data3:" + data3)
        imglist3 = re.findall(imgre3,data3)
        print(imglist3[0])
        albumName = imglist3[0][13:-2]
        print(albumName)
        albumPicPath = "/Users/wangxun/Desktop/python/imgs/" + albumName
        os.makedirs(albumPicPath)
        return albumPicPath
        # exit()
    
    if __name__=='__main__':
        # login()
        for albumId in albumIds:
            isLoadEnd = False
            page = 1
            login()
            picFileDir = makePicFileDir(albumId)
            while(isLoadEnd == False):
                login()
                pachong(albumId,page,picFileDir)
    

    相关文章

      网友评论

          本文标题:爬和卓照片

          本文链接:https://www.haomeiwen.com/subject/suyoiftx.html