美文网首页
python采集快手视频及评论数据并作自动点赞操作~

python采集快手视频及评论数据并作自动点赞操作~

作者: 颜狗一只 | 来源:发表于2022-07-19 18:50 被阅读0次

    前言

    嗨喽,大家好呀~这里是爱看美女的茜茜呐

    今天我们要采集的网站呢,它是一款国民级短视频App。

    在它那里,了解真实的世界,认识有趣的人,也可以记录真实而有趣的自己。 🛫

    现在,话不多说,让我们开始叭 ⛱

    本篇代码提供者:青灯教育-巳月

    知识点:

    • 动态数据抓包
    • requests发送请求
    • json数据解析

    准备工作

    • python 3.8 运行代码
    • pycharm 2021.2 辅助敲代码
    • requests 第三方模块

    代码实现:

    1. 发送请求
    2. 获取数据
    3. 解析数据
    4. 保存数据

    代码

    采集视频

    导入模块

    import requests         # 导入请求模块
    import re
    

    加入伪装

    # 服务器 识别 你是不是一个正常用户
    headers = {
        'content-type': 'application/json',
        'Cookie': 'kpf=PC_WEB; kpn=KUAISHOU_VISION; clientid=3; did=web_d3f9d8c2cbebafd126b80eb0b1c13360; client_key=65890b29; userId=270932146; kuaishou.server.web_st=ChZrdWFpc2hvdS5zZXJ2ZXIud2ViLnN0EqABH5pXBjDW6tX4vO3hi0mofxvSjVsHg6LatW99bMWA3KrRx_WY1RxCgq6BtAVnGDjqgm8RsWPrOJHukiYbXTmH_oFeSlNsOVIz4Ymg2x_wybN0bT-G-soz4woc62rMc5g-QkghtJkGFnl2dLiX7vfScrR1k2eequu1Sb75AelVzJHMI-6112E_BA8UXRmBpDcA7fzWbzJnEH0p7I6OiAJn1BoSf061Kc3w5Nem7YdpVBmH39ceIiBdj1aQWaSUYM-4rqViwshbJuwtgX0NJfI_52HdjZ6_MygFMAE; kuaishou.server.web_ph=8c6a5419c9f21478baa04f79fbf6c7283c13',
        'Host': 'www.kuaishou.com',
        'Origin': 'https://www.kuaishou.com',
        'Referer': 'https://www.kuaishou.com/search/video?searchKey=%E8%BE%A3%E5%A6%B9',
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36',
    }
    
    for page in range(1, 11):
        json = {
            'operationName': "visionSearchPhoto",
            'query': "fragment photoContent on PhotoEntity {\n  id\n  duration\n  caption\n  likeCount\n  viewCount\n  realLikeCount\n  coverUrl\n  photoUrl\n  photoH265Url\n  manifest\n  manifestH265\n  videoResource\n  coverUrls {\n    url\n    __typename\n  }\n  timestamp\n  expTag\n  animatedCoverUrl\n  distance\n  videoRatio\n  liked\n  stereoType\n  profileUserTopPhoto\n  __typename\n}\n\nfragment feedContent on Feed {\n  type\n  author {\n    id\n    name\n    headerUrl\n    following\n    headerUrls {\n      url\n      __typename\n    }\n    __typename\n  }\n  photo {\n    ...photoContent\n    __typename\n  }\n  canAddComment\n  llsid\n  status\n  currentPcursor\n  __typename\n}\n\nquery visionSearchPhoto($keyword: String, $pcursor: String, $searchSessionId: String, $page: String, $webPageArea: String) {\n  visionSearchPhoto(keyword: $keyword, pcursor: $pcursor, searchSessionId: $searchSessionId, page: $page, webPageArea: $webPageArea) {\n    result\n    llsid\n    webPageArea\n    feeds {\n      ...feedContent\n      __typename\n    }\n    searchSessionId\n    pcursor\n    aladdinBanner {\n      imgUrl\n      link\n      __typename\n    }\n    __typename\n  }\n}\n",
            'variables': {'keyword': "辣妹", 'pcursor': str(page), 'page': "search", 'searchSessionId': "MTRfMjcwOTMyMTQ2XzE2NTc3MTQ0MDUwMTZf6L6j5aa5Xzk1NDU"}
        }
        # x = 1, y = 2, z = 3
    

    发送请求

        url = 'https://www.kuaishou.com/graphql'
        response = requests.post(url=url, headers=headers, json=json)
    

    <Response [200]>: 请求成功

    python 里面字典可以转变为 json类型 字符串

    前后端数据交互

    .json()取出来的就是字典格式

    获取数据

        json_data = response.json()
    

    解析数据

        feeds = json_data['data']['visionSearchPhoto']['feeds']
        for i in range(len(feeds)):
            # 字典 列表
            caption = feeds[i]['photo']['caption']
            photoUrl = feeds[i]['photo']['photoUrl']
            # 正则替换 可以替换掉 字符串里面的字符
            caption = re.sub('[\\/:*?"<>|\\n]', '', caption)
            print(caption, photoUrl)
    

    保存数据

            video_data = requests.get(photoUrl).content
            open(f'video/{caption}.mp4', mode='wb').write(video_data)
    

    采集评论

    import requests
    
    headers = {
        'content-type': 'application/json',
        'Cookie': 'kpf=PC_WEB; kpn=KUAISHOU_VISION; clientid=3; did=web_d3f9d8c2cbebafd126b80eb0b1c13360; client_key=65890b29; userId=270932146; kuaishou.server.web_st=ChZrdWFpc2hvdS5zZXJ2ZXIud2ViLnN0EqABoMUaybC2UKr2MOuXtaCMlS0my2AJe2dE2crt0WKNd6n2_hWQlTHuEfB_l5yq5WEV-kbR4SSKebmVxd__N3y98rvOpZtEtTssidKptKitcRQA3_VuIfXKYDrOvmsKrLDo0l8u5-GXzDzbLrjLm2E14U65DCDSweVNdOqSCgDQh9gjxOCKWMG4sCZO-rhYHYi1pN4c-kn1SZcgSh8ItP6WOxoS8JByODRPv5hk-B95zTquvFHcIiD38YeNZzgktz_RU3mo3dDlFag5UaRViIBk-xbucAuJDSgFMAE; kuaishou.server.web_ph=21ef9689a064ff74dfbc5e3f00c440696d0d',
        'Host': 'www.kuaishou.com',
        'Origin': 'https://www.kuaishou.com',
        'Referer': 'https://www.kuaishou.com/short-video/3xk7fvrbbqektni?authorId=3xeb7adrir6iny9&streamSource=search&area=searchxxnull&searchKey=%E8%BE%A3%E5%A6%B9&currentPcursor=1',
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36',
    }
    json = {
        'operationName': "visionAddComment",
        'query': "mutation visionAddComment($photoId: String, $photoAuthorId: String, $content: String, $replyToCommentId: ID, $replyTo: ID, $expTag: String) {\n  visionAddComment(photoId: $photoId, photoAuthorId: $photoAuthorId, content: $content, replyToCommentId: $replyToCommentId, replyTo: $replyTo, expTag: $expTag) {\n    result\n    commentId\n    content\n    timestamp\n    status\n    __typename\n  }\n}\n",
        'variables': {
            'content': "不好",
            'expTag': "1_i/2005259440785868865_xpcwebsearchxxnull0",
            'photoAuthorId': "3xeb7adrir6iny9",
            'photoId': "3xk7fvrbbqektni"
        }
    }
    url = 'https://www.kuaishou.com/graphql'
    response = requests.post(url=url, headers=headers, json=json)
    

    实现自动点赞

    # 网页里面的所有操作 点赞 发布评论 访问网站 下单 预约...
    # 发送请求
    # 分析数据来源
    # https://www.kuaishou.com/graphql
    # json 请求体 有区别
    import requests
    
    headers = {
        'content-type': 'application/json',
        'Cookie': 'kpf=PC_WEB; kpn=KUAISHOU_VISION; clientid=3; did=web_d3f9d8c2cbebafd126b80eb0b1c13360; client_key=65890b29; userId=270932146; kuaishou.server.web_st=ChZrdWFpc2hvdS5zZXJ2ZXIud2ViLnN0EqAB8HEt1O6Ahi5XgrkkTFeYhp0xHyR0XW_-_xE7ccfDb3W_gPVA53sBXEk0qMFYounhic9pa97jqNrN12ll8dFi0aTKu-esFbPDlxwZrJMaugua7_C-TU4MiQs9boYD4uXCs2qiDmeCxyfpi2sxN-htiwYZJcyomKDMitAApaX6-HwgCqJ3LXlTnwg9hC9K3sQK4c3wjmKJKV9ABJILoQQdVxoSg3ZkWJHNsQvvc8vsvUksYr6BIiDWS1rGE8d_0kelcNUomvyiQjxk2bKONmeM5GDJSz0f4SgFMAE; kuaishou.server.web_ph=61e920d14e1e0abfae19521be99015ba22df',
        'Host': 'www.kuaishou.com',
        'Origin': 'https://www.kuaishou.com',
        'Referer': 'https://www.kuaishou.com/short-video/3xk7fvrbbqektni?authorId=3xeb7adrir6iny9&streamSource=search&area=searchxxnull&searchKey=%E8%BE%A3%E5%A6%B9&currentPcursor=1',
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36',
    }
    json = {
        'operationName': "visionVideoLike",
        'query': "mutation visionVideoLike($photoId: String, $photoAuthorId: String, $cancel: Int, $expTag: String) {\n  visionVideoLike(photoId: $photoId, photoAuthorId: $photoAuthorId, cancel: $cancel, expTag: $expTag) {\n    result\n    __typename\n  }\n}\n",
        'variables': {
            'cancel': 0,
            'expTag': "1_i/2001449839044035650_xpcwebsearchxxnull0",
            'photoAuthorId': "3xeb7adrir6iny9",
            'photoId': "3xk7fvrbbqektni"
        }
    }
    url = 'https://www.kuaishou.com/graphql'
    response = requests.post(url=url, headers=headers, json=json)
    

    尾语 💝

    感谢你观看我的文章呐~本次航班到这里就结束啦 🛬

    希望本篇文章有对你带来帮助 🎉,有学习到一点知识~

    躲起来的星星🍥也在努力发光,你也要努力加油(让我们一起努力叭)。

    最后,博主要一下你们的三连呀(点赞、评论、收藏),不要钱的还是可以搞一搞的嘛~

    不知道评论啥的,即使扣个6666也是对博主的鼓舞吖 💞 感谢 💐

    相关文章

      网友评论

          本文标题:python采集快手视频及评论数据并作自动点赞操作~

          本文链接:https://www.haomeiwen.com/subject/tqcsirtx.html