美文网首页
腾讯视频

腾讯视频

作者: 无量儿 | 来源:发表于2022-03-21 09:40 被阅读0次

    -- coding: utf-8 --

    from bs4 import BeautifulSoup
    import requests
    from urllib.parse import quote, unquote
    import re
    import time

    url = 'https://v.qq.com/x/cover/mzc00200pfr3hmt.html'

    url='https://v.qq.com/x/cover/mzc00200zsrmfna/c3217q6uk3c.html'

    url = 'https://v.qq.com/x/cover/mzc00200pfr3hmt.html'

    url = 'https://v.qq.com/x/cover/mzc00200qhoftwk.html'

    user_agent = 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) '
    'Chrome/94.0.4606.81 Safari/537.36'
    headers = {'User-Agent': user_agent}
    wb_data = requests.get(url, headers=headers)
    soup = BeautifulSoup(wb_data.text, 'html.parser')

    print(soup.select('div[class="search-video"]'))

    print(soup)

    taskName = '' # 任务名称
    platform_name = '腾讯视频' # 渠道名称
    item_id = url.rsplit('/', 1)[-1].split('.')[0] # 页面ID
    title = soup.select('h1[class="video_title _video_title"]')[0].get_text().strip() # 内容标题
    article_url = url # 内容链接

    str2=',"costar":null,"desc":"大雄开箱!180元的二手手机,能吃鸡能王者能刷小姐姐视频,这太超值了!","danmu":1,'

    content_raw = re.findall(r'"costar":\w+,"desc":"(\S*)","danmu":1', soup.text) # 正文

    content_raw = soup.select('p[class="summary _video_summary"]') # 正文 有可能为空
    if len(content_raw) == 0:
    content = '无' # 正文
    else:
    content = content_raw[0].get_text() # 正文

    media_name = soup.select('span[class="user_name"]')[0].get_text() # 创作者账户名

    keyword = '' # 搜索关键词

    read_count = soup.select('em[class="num"]')[0].get_text() # 内容浏览量(播放量)

    comments_count_raw = soup.select('div[class="txp_btn_text"]')
    if comments_count_raw:
    comments_count = re.findall(r'(\d*)\S+', comments_count_raw[0].attrs.get('title'))[0] # 总评论数
    else:
    comments_count = '总评论数为空'

    # share_count = '' # 转发量 无

    like_count_raw = '' # 点赞数 无

    datetime_raw = soup.select('span[class="date _date"]') # 内容上传时间
    if datetime_raw:
    datetime = str(datetime_raw[0].get_text())[0:11]
    else:
    datetime = '内容上传时间获取异常!!!'

    id_str = ''

    user_id ='' # 评论人ID

    user_name = soup.select('span[class="user_name"]')[0].get_text() # 评论人账户名

    text = '' # 评论内容 异步

    comment_like_count = '' # 评论点赞数 异步

    comment_time = '' # 评论时间 异步

    create_time = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) # 采集创建时间

    print(item_id, title, content, media_name, read_count, comments_count, datetime, create_time, sep='\n')

    相关文章

      网友评论

          本文标题:腾讯视频

          本文链接:https://www.haomeiwen.com/subject/hzpaoltx.html