美文网首页
爬人人网相册照片

爬人人网相册照片

作者: SameX_4869 | 来源:发表于2017-11-13 16:08 被阅读16次
    #!/usr/bin/env python
    #coding:utf-8
    import urllib.request
    import urllib.parse
    import http.cookiejar
    import re
    import random
    import time
    import os
    
    rex = '"url":"http:.*?jpg"'
    rex2 = "'albumName':'.*?',"
    page = 1
    isLoadEnd = False
    albumIds = ["1068447791",
    "1030171698",
    "267052848",
    "980916250",
    "493183895",
    "437218980",
    "822089791",
    "847001367",
    "636228162",
    "650307179",
    "628910214",
    "619085272",
    "592578174",
    "587627042",
    "583327356",
    "535841977",
    "528197608",
    "395222675",
    "477161703",
    "422816238",
    "422815880",
    "390560603",
    "389004270",
    "388995122",
    "388155364",
    "328765563",
    "362982341",
    "362980631",
    "355853058",
    "350794325",
    "315650606",
    "315648771",
    "315647264",
    "315645161",
    "295175666",
    "304383681",
    "235179777",
    "294407425",
    "294392538",
    "289200620",
    "289323292",
    "283312171",
    "283310606",
    "281707910",
    "277538581",
    "272557523",
    "274160390",
    "268267415",
    "266014788",
    "227414025"]
    
    def login():
        #这个地址是在network中监控的真实提交表单的地址
        url = "http://www.renren.com/ajaxLogin/login?1=1&uniqueTimestamp=20171011315479"
        postdata = urllib.parse.urlencode({
            "email":"",
            "password":""
        }).encode('utf-8')
        req = urllib.request.Request(url, postdata)
    
        # randdom_header = random.choice(my_headers)
        # req.add_header("User-Agent",randdom_header)
        #使用http.cookiejar.CookieJar()创建CookieJar对象
        cjar = http.cookiejar.CookieJar()
        #使用HTTPCookieProcessor创建cookie处理器,并以其为参数构造opener
        opener = urllib.request.build_opener(urllib.request.HTTPCookieProcessor(cjar))
        #安装为全局opener
        urllib.request.install_opener(opener)
        file = opener.open(req)
        # data = file.read()
        # file = open("/Users/xunwang/Desktop/python/Demo1.5.html", "wb")
        # file.write(data)
        # file.close()
    
    def pachong(albumId,upage,picFileDir):
        # url2 = "http://photo.renren.com/photo/240205043/album-267052848/v7"
        url2 = "http://photo.renren.com/photo/240205043/album-" + albumId + "/bypage/ajax/v7?page=" + str(upage) + "&pageSize=100&requestToken=2078225833&_rtk=47a03403;"
        data2 = urllib.request.urlopen(url2).read()
        data2 = data2.decode('utf-8')
        print(url2) 
    
        global isLoadEnd
        global page
        imgre = re.compile(rex)
        imglist = re.findall(imgre,data2)
        size = len(imglist)
        print(size)
        print(page)
        if size != 100:
            isLoadEnd = True
        else:
            page = page + 1
        print(isLoadEnd)
        # exit()
    
        y = 0
    
        opener=urllib.request.build_opener()
        opener.addheaders=[('User-Agent','Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36')]#'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1941.0 Safari/537.36')]
        urllib.request.install_opener(opener)
    
        for imgurl in imglist:
            #time.sleep(5)
            # new_header = random.choice(my_headers)
            #print("new_header --> " + new_header)
    
            # opener=urllib.request.build_opener()
            # opener.addheaders=[('User-Agent','Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1941.0 Safari/537.36')]
            # urllib.request.install_opener(opener)
            
            rimageurl = imgurl[7:-1].replace('\\','')
            z = rimageurl[-20:-4]
            y = y + 1
            print("rimageurl --> " + rimageurl + " y --> " + str(y) + " z --> " + str(z) + " page --> " + str(page))
            urllib.request.urlretrieve(rimageurl, picFileDir + '/s_%s_%s_%s.jpg' % (str(page),str(y),str(z)))
    
        # print(imglist)
    
    def makePicFileDir(albumId):
        url3 = "http://photo.renren.com/photo/240205043/album-" + albumId + "/v7"
        data3 = urllib.request.urlopen(url3).read()
        data3 = data3.decode('utf-8')
        imgre3 = re.compile(rex2)
        imglist3 = re.findall(imgre3,data3)
        albumName = imglist3[0][13:-2]
        print(albumName)
        albumPicPath = "/Users/xunwang/Desktop/python/imgs/" + albumName
        os.makedirs(albumPicPath)
        return albumPicPath
        # exit()
    
    if __name__=='__main__':
        # login()
        for albumId in albumIds:
            isLoadEnd = False
            page = 1
            login()
            picFileDir = makePicFileDir(albumId)
            while(isLoadEnd == False):
                login()
                pachong(albumId,page,picFileDir)
    

    相关文章

      网友评论

          本文标题:爬人人网相册照片

          本文链接:https://www.haomeiwen.com/subject/fkmzmxtx.html