美文网首页
下载二更的视频

下载二更的视频

作者: Do_More | 来源:发表于2017-08-16 11:18 被阅读0次
    image.png
    #coding=utf-8
    import re,urllib2,os,urllib,requests,json,cPickle,time
    
    def getHtmlCode(url):
        response = urllib2.urlopen(url)
        return response.read()
    
    def getUrls(htmlString):
        regUrl = re.compile("href=\"\/\/www.ergengtv.com\/video\/(.+?).html")
        return regUrl.findall(htmlString)
    
    def getTitle(htmlString):
        regTitle = re.compile("title\": \"(.+?)\",")
        return regTitle.findall(htmlString)
    
    def getMediaId(htmlString):
        regMediaId = re.compile("media_id\": (.+?),")
        return regMediaId.findall(htmlString)
    
    def getCreateTime(htmlString):
        regCreateTime = re.compile("create_at\": (.+?),")
        return regCreateTime.findall(htmlString)
    
    if __name__ == '__main__':
        fileMediaId = open('mediaIds.pkl', 'rb')
        mediaIdSaved = cPickle.load(fileMediaId)
        fileMediaId.close()
        print len(mediaIdSaved)
        for i in range(27,119):
            url = 'https://www.ergengtv.com/video/list/0_' + str(i) + '.html'
            htmlCode = getHtmlCode(url)
            urls = getUrls(htmlCode)
            urls = list(set(urls))
            for urlId in urls:
                url2 = 'https://www.ergengtv.com/video/' + urlId + '.html'
                htmlCode2 = getHtmlCode(url2)
                createTimes = getCreateTime(htmlCode2)
                timeString = time.localtime(float(createTimes[0]))
                createTime = time.strftime('%Y-%m-%d',timeString)
                titles = getTitle(htmlCode2)
                mediaIds = getMediaId(htmlCode2)
                mediaId = mediaIds[0]
                fileName = createTime + '--' + titles[0]
                print fileName
                if mediaId in mediaIdSaved:
                    print  'exsied-------------->  '
                else:
                    try:
                        apiUrl = 'https://member.ergengtv.com/api/video/vod/?id=' + mediaIds[0]
                        htmlCode3 = getHtmlCode(apiUrl)
                        decodeJson = json.loads(htmlCode3)
                        downloadUrl = decodeJson["msg"]["segs"]["1080p"][0]["url"]
                        downloadUrl = downloadUrl.replace('http', 'https')
    
                        urllib.urlretrieve(downloadUrl,"%s.mp4" %("videos/" + fileName))
                        print 'done'
    
                        mediaIdSaved.append(mediaId)
                        fileMediaId = open('mediaIds.pkl', 'wb')
                        cPickle.dump(mediaIdSaved,fileMediaId,protocol=01)
                        fileMediaId.close()
                    except:
                        print "error"
    

    相关文章

      网友评论

          本文标题:下载二更的视频

          本文链接:https://www.haomeiwen.com/subject/uzfirxtx.html