美文网首页
Python采集美拍视频

Python采集美拍视频

作者: 乂尤先生 | 来源:发表于2020-08-19 14:44 被阅读0次

    首先我们利用开发者工具进行抓包分析:

    页面抓包.png
    通过开发者工具发现左侧是这次加载页面动态加载的信息,可以看到这个链接是GET方式请求。然后可以试着去分析及查看他们的响应内容
    分析.png
    对json进行美化,可以基本断定该信息为美拍的接口地址,下面会对json进行分析处理
    {
        "medias":[
            {
                "id":1224285358,
                "client_id":1089857302,
                "caption":"当老爸变成了灯神.....",
                "entry_info":null,
                "weibo_share_caption":"#美拍#当老爸变成了灯神.....",
                "facebook_share_caption":"",
                "weixin_share_caption":"分享@祝晓晗🌻 的美拍",
                "weixin_friendfeed_share_caption":"当老爸变成了灯神.....",
                "qzone_share_caption":"当老爸变成了灯神.....",
                "qq_share_caption":"当老爸变成了灯神.....",
                "instagram_share_caption":"分享祝晓晗🌻的美拍“当老爸变成了灯神.....”,快来看看!",
                "weixin_share_sub_caption":"来跟我一起玩美拍吧~",
                "weixin_friendfeed_share_sub_caption":"来跟我一起玩美拍吧~",
                "qzone_share_sub_caption":"来跟我一起玩美拍吧~",
                "qq_share_sub_caption":"来跟我一起玩美拍吧~",
                "geo":null,
                "video":"0c02aHR0cHM635RLy9tdnZpZGVvMTEubWVpdHVkYXRhLmNvbS81ZjI1NzY0MmE1MmJkMTV1bThhOGFwODk0MV9IMjY0XzRfMjJlNTFmNTM0MGRm2mSmMjcubXA0",
                "url":"[https://www.meipai.com/media/1224285358?client_id=1089857306&utm_media_id=1224285358&utm_source=meipai_share&gid=](https://www.meipai.com/media/1224285358?client_id=1089857306&utm_media_id=1224285358&utm_source=meipai_share&gid=)",
                "cover_pic":"[https://mvimg11.meitudata.com/5f257642c12ec6a1eqs8673689.jpg](https://mvimg11.meitudata.com/5f257642c12ec6a1eqs8673689.jpg)",
                "pic_size":"720*1280",
                "category":3,
                "time":49,
                "is_long":true,
                "show_controls":false,
                "created_at":"08-01 22:05",
                "comments_count":114,
                "likes_count":3100,
                "reposts_count":3,
                "user":Object{...},
                "cur_lives_id":"",
                "cur_lives_type":0,
                "cur_lives_stream_type":0,
                "cur_lives_scheme":"",
                "cur_yy_actid":"",
                "feed_id":"6856016371960585390",
                "locked":false,
                "type":5,
                "caption_url_params":[
    
                ],
                "privacy_config":Object{...},
                "has_watermark":0,
                "refuse_gift":true,
                "refuse_gift_reason":"送礼功能已下线",
                "hide_gift_btn":true,
                "new_music":Object{...},
                "convert_cover_pic":"",
                "ar_magic_info":null,
                "aside_info":null,
                "convert_pic_size":"",
                "cover_pic_resize":null,
                "m_plan":false,
                "ad_level":"N",
                "first_frame_pic":"[http://mvimg10.meitudata.com/5f257aaf2bc6c3332.jpg](http://mvimg10.meitudata.com/5f257aaf2bc6c3332.jpg)",
                "first_frame_pic_size":"720*1280",
                "is_safe":1,
                "is_safe2":1,
                "miniprogram_data":Array[0],
                "cover_title":"",
                "dangerous_action":false,
                "category_tag_id":0,
                "recommend_caption":null,
                "recommend_cover_pic":null,
                "recommend_cover_pic_size":null,
                "user_recommend_cover_pic":null,
                "user_recommend_cover_pic_size":null,
                "is_prefer":0,
                "is_ad":0,
                "left_bottom_tip":Array[0],
                "display_source":125536,
                "trace_id":"ke0yujgv-30xvb4-4chv",
                "item_info":"{"id":1224285358,"code":125536,"codeDetail":"125536,125541","trace_id":"ke0yujgv-30xvb4-4chv"}",
                "caption_origin":"当老爸变成了灯神.....",
                "campaign":"",
                "created_at_origin":1596290704,
                "caption_complete":"当老爸变成了灯神.....",
                "caption_all":"当老爸变成了灯神....."
            },
            Object{...},
            Object{...},
            Object{...},
            Object{...},
            Object{...},
            Object{...},
            Object{...},
            Object{...}
        ],
        "total":1000,
        "current_page":2
    }
    
    

    使用找到的接口地址:

        def main(self,i):
            #抓包获取到的美拍接口地址
            url = "https://www.meipai.com/squares/new_timeline?page={i}&count=24&tid=13".format(i=i)
            print(url)
            response = self.get_req(url)
            if response:
                try:
                    self.get_video(response)
                except Exception as e:
                    print('获取视频出错,错误代码:',e)
    

    解析抓包文件,获取视频信息

        def get_video(self,response):
            reqs = json.loads(response)
            reqs = reqs['medias']
            for req in reqs:
                videoname = req['caption']
                if videoname:
                    video_name = videoname
                else:
                    video_name = req['weibo_share_caption']
                video_name = video_name.replace(' ', '')
                video_name = re.sub(r'[\|\/\<\>\:\*\?\\\"]', "_", video_name)  # 剔除不合法字符
                print(video_name)
                video_url = req['video']
                try:
                    videourl = self.video_decode(video_url).decode('utf8')  # 解密视频地址
                    print(videourl)
                    try:
                        self.download(video_name, videourl)
    
                    except Exception as e:
                        print('视频下载出错,错误代码:',e)
                        with open(r'meipai/spider.txt', 'a+', encoding='utf-8') as f:
                            f.write('视频下载出错,错误代码:{e}---采集{videourl}|{video_name}内容失败\n'.format(e=e,video_url=video_url,video_name=video_name))
                        pass
                except Exception as e:
                    print(r'视频地址解密出错,错误代码:',e)
                    with open(r'meipai/spider.txt', 'a+', encoding='utf-8') as f:
                        f.write('视频解密出错,错误代码:{e}---采集{video_url}|{video_name}内容失败\n'.format(e=e,video_url=video_url,video_name=video_name))
                    
    

    解析视频地址信息

        def video_decode(self,encoded_string):
            def getHex(param1):
                return {
                    'str': param1[4:],
                    'hex': ''.join(list(param1[:4])[::-1]),
                }
    
            def getDec(param1):
                loc2 = str(int(param1, 16))
                return {
                    'pre': list(loc2[:2]),
                    'tail': list(loc2[2:]),
                }
    
            def substr(param1, param2):
                loc3 = param1[0: int(param2[0])]
                loc4 = param1[int(param2[0]): int(param2[0]) + int(param2[1])]
                return loc3 + param1[int(param2[0]):].replace(loc4, "")
    
            def getPos(param1, param2):
                param2[0] = len(param1) - int(param2[0]) - int(param2[1])
                return param2
    
            dict2 = getHex(encoded_string)
            dict3 = getDec(dict2['hex'])
            str4 = substr(dict2['str'], dict3['pre'])
            return base64.b64decode(substr(str4, getPos(str4, dict3['tail'])))
    

    下载视频,附带进度显示

        def download(self,name,videourl):
            print("准备下载!")
            file_path = 'meipai/{name}.mp4'.format(name=name)
            with closing(requests.get(videourl,stream=True)) as response:
                chunk_size = 1024  # 单次请求最大值
                print(response.status_code)
                content_size = int(response.headers['content-length'])  # 内容体总大小
                print(content_size)
                data_count = 0
                with open(file_path, "wb") as file:
                    for data in response.iter_content(chunk_size=chunk_size):
                        file.write(data)
                        data_count = data_count + len(data)
                        now_jd = (data_count / content_size) * 100
                        print("\r 文件下载进度:%d%%(%d/%d) - %s" % (now_jd, data_count, content_size, file_path), end=" ")
                    print("\n>>> 获取视频成功了!")
            time.sleep(2)
    

    完整代码

    import requests
    from fake_useragent import UserAgent
    import base64
    import json
    import re
    import time
    from contextlib import closing
    import threading
    
    class MP():
        # 解密视频地址
        def video_decode(self,encoded_string):
            def getHex(param1):
                return {
                    'str': param1[4:],
                    'hex': ''.join(list(param1[:4])[::-1]),
                }
    
            def getDec(param1):
                loc2 = str(int(param1, 16))
                return {
                    'pre': list(loc2[:2]),
                    'tail': list(loc2[2:]),
                }
    
            def substr(param1, param2):
                loc3 = param1[0: int(param2[0])]
                loc4 = param1[int(param2[0]): int(param2[0]) + int(param2[1])]
                return loc3 + param1[int(param2[0]):].replace(loc4, "")
    
            def getPos(param1, param2):
                param2[0] = len(param1) - int(param2[0]) - int(param2[1])
                return param2
    
            dict2 = getHex(encoded_string)
            dict3 = getDec(dict2['hex'])
            str4 = substr(dict2['str'], dict3['pre'])
            return base64.b64decode(substr(str4, getPos(str4, dict3['tail'])))
    
        # 请求头
        def ua(self):
            ua = UserAgent()
            headers = {
                'Cookie': 'MUSID=kdhd1o131g536r6shisl5rdcg7; MP_WEB_GID=266934702254632; virtual_device_id=433ced9ee7d2b137b89ae37d40df50e9; pvid=UdlW5diAfeJPUaHLK1j3vMC7xVBOnB9c; sid=kdhd1o131g536r6shisl5rdcg7; UM_distinctid=174006923bd1a2-0ad676043abc8a-581b3318-1fa400-174006923beff; CNZZDATA1256786412=1978692496-1597731334-%7C1597731334',
                'Host': 'www.meipai.com',
                'Referer': 'https://www.meipai.com/square/13',
                #'User-Agent': ua.random,
                'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36'
    
            }
            return headers
        # 访问网页
        def get_req(self,url):
            response = requests.get(url, headers=self.ua())
            if response.status_code == 200:
                response = response.content.decode('utf-8')
    
            else:
                response = None
            return response
        #解析抓包文件
        def get_video(self,response):
            reqs = json.loads(response)
            reqs = reqs['medias']
            for req in reqs:
                videoname = req['caption']
                if videoname:
                    video_name = videoname
                else:
                    video_name = req['weibo_share_caption']
                video_name = video_name.replace(' ', '')
                video_name = re.sub(r'[\|\/\<\>\:\*\?\\\"]', "_", video_name)  # 剔除不合法字符
                print(video_name)
                video_url = req['video']
                try:
                    videourl = self.video_decode(video_url).decode('utf8')  # 解密视频地址
                    print(videourl)
                    try:
                        self.download(video_name, videourl)
    
                    except Exception as e:
                        print('视频下载出错,错误代码:',e)
                        with open(r'meipai/spider.txt', 'a+', encoding='utf-8') as f:
                            f.write('视频下载出错,错误代码:{e}---采集{videourl}|{video_name}内容失败\n'.format(e=e,video_url=video_url,video_name=video_name))
                        pass
                except Exception as e:
                    print(r'视频地址解密出错,错误代码:',e)
                    with open(r'meipai/spider.txt', 'a+', encoding='utf-8') as f:
                        f.write('视频解密出错,错误代码:{e}---采集{video_url}|{video_name}内容失败\n'.format(e=e,video_url=video_url,video_name=video_name))
                    pass
    
        def download(self,name,videourl):
            print("准备下载!")
            file_path = 'meipai/{name}.mp4'.format(name=name)
            with closing(requests.get(videourl, proxies={'https': 'https://127.0.0.1:1080', 'http': 'http://127.0.0.1:1080'},stream=True)) as response:
                chunk_size = 1024  # 单次请求最大值
                print(response.status_code)
                content_size = int(response.headers['content-length'])  # 内容体总大小
                print(content_size)
                data_count = 0
                with open(file_path, "wb") as file:
                    for data in response.iter_content(chunk_size=chunk_size):
                        file.write(data)
                        data_count = data_count + len(data)
                        now_jd = (data_count / content_size) * 100
                        print("\r 文件下载进度:%d%%(%d/%d) - %s" % (now_jd, data_count, content_size, file_path), end=" ")
                    print("\n>>> 获取视频成功了!")
            time.sleep(2)
        def main(self,i):
    
            #抓包获取到的美拍接口地址
            url = "https://www.meipai.com/squares/new_timeline?page={i}&count=24&tid=13".format(i=i)
            print(url)
            response = self.get_req(url)
            if response:
                try:
                    self.get_video(response)
                except Exception as e:
                    print('获取视频出错,错误代码:',e)
    
    
    if __name__=="__main__":
        video_download = MP()
        #video_download.main(1)
        for i in range(100):
            t1 = threading.Thread(target=video_download.main,kwargs={"i":i})
            t1.start()
    

    相关文章

      网友评论

          本文标题:Python采集美拍视频

          本文链接:https://www.haomeiwen.com/subject/mpwrjktx.html