美文网首页
爬取某内培训班vip课程并保存

爬取某内培训班vip课程并保存

作者: 羽天驿 | 来源:发表于2021-11-02 17:21 被阅读0次
    # auther:xiaomge
    # 达内视频下载(此代码用于学习)
    import time
    import shutil
    
    import lxml.etree as etree
    import requests
    from Crypto.Cipher import AES
    import os
    
    header3 = dict()
    header3[
        'User-Agent'] = 
    header3['Referer'] = 
    header3['Origin'] = 
    header3['Host'] = 
    header3['Accept'] = 
    header3['Accept-Encoding'] =
    header3['Sec-Fetch-Dest'] = 
    header3['Sec-Fetch-Mode'] =
    header3['Sec-Fetch-Site'] = 
    
    
    class DaNei:
        def __init__(self):
            self.headers2 = {
                'User-Agent': ,
                'Cookie': ,
            }
            self.studentscenter_url = 'https://tts.tmooc.cn/studentCenter/toMyttsPage'
            self.verify = False
            self.dow_lowd_path = './tes'
            self.dow_hb_path = './hb'
    
        def dow_lowd(self, key_url_pipei, menu_id):
            if os.path.exists('./tes'):
                print('exist')
            else:
                os.mkdir('./tes')  # 创建
            if os.path.exists('./hb'):
                print('exist')
            else:
                os.mkdir('./hb')
            for i in range(0, 500):
                try:
                    # 这两个是变化的 nsd21050727am 下面两个要换 网页中自动获取就行了
                    key_url = 'https://c.it211.com.cn/{}/static.key'.format(key_url_pipei)
                    ts_url = 'https://c.it211.com.cn/{}/{}-{}.ts'.format(key_url_pipei, key_url_pipei, i)
                    header3['Referer'] = header3['Referer'].format(menu_id)
                    key = requests.get(key_url, headers=header3, verify=self.verify).content
                    res_ts = requests.get(ts_url, headers=header3, verify=self.verify).content
                    # 下载ts文件
                    down_path = self.dow_lowd_path + '/{}.ts'.format(i)
                    with open(down_path, 'wb') as ts:
                        # 解密
                        cryptor = AES.new(key, AES.MODE_CBC, key)
                        ts.write(cryptor.decrypt(res_ts))
                        print('第{}个下载完成'.format(i))
                except Exception as e:
                    print('错误:', e)
                    break
            print('{}视频下载完成了,马上去合并'.format(key_url_pipei))
            return True
    
        def has_id(self):
            one_page_html = requests.get(self.studentscenter_url, headers=self.headers2, verify=self.verify).content.decode(
                'utf-8')
            selector = etree.HTML(one_page_html)
            course_list = selector.xpath('.//div[@class="course-menu"]//li[@class="sp"]//@href')
            for one_url in course_list:
                menu_id = one_url.split('?')[1].split('&')[0]
                one_page_html2 = requests.get(one_url, headers=self.headers2, verify=self.verify).content.decode('utf-8')
                selector2 = etree.HTML(one_page_html2)
                data_time = selector2.xpath('.//div[@class="video-list"]//p//@id')
                course_title = str(selector2.xpath('.//div[@id="video_stage_lty"]//text()')[0])
                # print(course_title)
                print('*' * 10)
                for one_pa_am in data_time:
                    true_time_contents = one_pa_am.split('_')[1].split('.')[0]
                    if self.dow_lowd(true_time_contents, menu_id) is True:
                        # 开始合并视频
                        all_ts = os.listdir('./tes')
                        save_path = self.dow_hb_path + '/' + course_title + str(true_time_contents) + '.mp4'
                        with open(save_path, 'wb+') as f:
                            for i in range(len(all_ts)):
                                ts_video_path = os.path.join('./tes', all_ts[i])
                                f.write(open(ts_video_path, 'rb').read())
                        print("合并完成!!删除所有")
                        try:
                            shutil.rmtree('./tes')
                        except Exception as e:
                            print(e)
                            pass
                    print('{}的视频下载合并完成了,下一个开始了!休息一分钟'.format(true_time_contents))
                    time.sleep(60)
                print('*' * 10)
    
    
    if __name__ == '__main__':
        D = DaNei()
        D.has_id()
    
    

    相关文章

      网友评论

          本文标题:爬取某内培训班vip课程并保存

          本文链接:https://www.haomeiwen.com/subject/xpqvaltx.html