美文网首页
视频&音频转换成文字 v1.0版(再也不用敲字幕了)

视频&音频转换成文字 v1.0版(再也不用敲字幕了)

作者: 夜妖黑猫 | 来源:发表于2017-03-06 14:51 被阅读801次

    用百度的语音识别胡搞出了个坑爹的1.0版,一个小程序。可以把一段文字转换成语音,也可以把一堆视频、音频文件转换成文字,功能如图:

    Paste_Image.png

    不知道简书会不会把代码给替换了,总之欢迎来喷,东拼西凑,加载一堆,代码混乱,没有多线程,没有异常处理,暴力修bug,写的什么玩意233

    用到ffmpeg,需要把这个下载到同一目录下。

    以下是代码:

    import os
    import sys
    import math
    import json
    import base64
    import urllib
    import shutil
    import subprocess
    import configparser
    import urllib.request
    from tinytag import TinyTag
    from subprocess import run
    
    class BaiduRest:
        """百度接口模块"""
        def __init__(self, cu_id, api_key, api_secert):
            # token认证的url
            self.token_url = "https://openapi.baidu.com/oauth/2.0/token?grant_type=client_credentials&client_id=%s&client_secret=%s"
            # 语音合成的resturl
            self.getvoice_url = "http://tsn.baidu.com/text2audio?tex=%s&lan=zh&cuid=%s&ctp=1&tok=%s"
            # 语音识别的resturl
            self.upvoice_url = 'http://vop.baidu.com/server_api'
    
            self.cu_id = cu_id
            self.getToken(api_key, api_secert)
            return
    
        def getToken(self, api_key, api_secert):
            # 1.获取token
            token_url = self.token_url % (api_key,api_secert)
            r_str = urllib.request.urlopen(token_url).read()
            token_data = json.loads(r_str)
            self.token_str = token_data['access_token']
            print ("token获取完成...")
            pass
    
        def getVoice(self, text, filename):
            # 2. 向Rest接口提交数据
            get_url = self.getvoice_url % (urllib.parse.quote(text), self.cu_id, self.token_str)
    
            voice_data = urllib.request.urlopen(get_url).read()
            # 3.处理返回数据
            voice_fp = open("output\\"+filename,'wb+')
            voice_fp.write(voice_data)
            voice_fp.close()
            pass
    
        def getText(self, filename):
            # 2. 向Rest接口提交数据
            repeat_times = 10
            for x in range(1,10):
                for y in range(repeat_times):
                    try:
                        data = {}
                        # 语音的一些参数
                        data['format'] = 'amr'
                        data['rate'] = 16000
                        data['channel'] = 1
                        data['cuid'] = self.cu_id
                        data['token'] = self.token_str
                        print("正在上传:"+filename)
                        wav_fp = open("temp\\"+filename,'rb')
                        voice_data = wav_fp.read()
                        data['len'] = len(voice_data)
                        data['speech'] = base64.b64encode(voice_data).decode('utf-8')
                        post_data = json.dumps(data)
                        r_data = urllib.request.urlopen(self.upvoice_url,data=bytes(post_data,encoding="utf-8")).read()
                        wav_fp.close()
                        return r_data
                        break
                    except :
                        print ("正在重试...")
                        sleep(1)
                        bdr = BaiduRest("Black", api_key, api_secert)
                        pass
                    pass
                pass
    
    
    class VoiceProcessing:
        """声音处理模块"""
        def __init__(self, filename, durtime):
            self.filename = filename
            self.durtime = durtime
            return
    
        def preprocess(self, filename,durtime):
            wfilename = self.filename
            if os.path.exists("ffmpeg.exe"):
                #预处理
                print ("音频文件预处理中...")
                suflen = len(os.path.splitext(wfilename)[1])
                endname = wfilename[:-suflen]+".mp3"
                #转换
                parameter = "ffmpeg.exe -n -i input\\"+wfilename+" -acodec mp3 -ac 1 -ar 16000 -vn temp\\"+endname
                run(parameter,shell=True)
                #预处理结束,获取信息
                wavetag = TinyTag.get("temp\\"+endname)
                durtime = round(wavetag.duration,2)
                sepparts = math.ceil(durtime/59)
                timeleft = durtime%59
                print("总时长:"+str(durtime)+"上传分段为:"+str(sepparts)+"末段时长:"+str(timeleft))
                os.remove("temp\\"+endname)
                return durtime
            else:
                print ("请把ffmpeg放到本目录后重试...")
                pass
        
        def transvoid(self, filename, durtime):
            tfilename = self.filename
            #音频转换部分
            rdurtime = VoiceProcessing(tfilename,0).preprocess(tfilename,0)
            durtime = rdurtime
            suflen = len(os.path.splitext(tfilename)[1])
            #初始化数据
            set_parts = 0
            start_time = 0
            end_time = 59
            #进程
            if durtime < 59:
                end_time = durtime
                endname = tfilename[:-suflen]+str(set_parts)+".amr"
                print ("正在转换:"+endname+"...")
                parameter = "ffmpeg.exe -n -i input\\"+tfilename+" -ss "+str(start_time)+" -t "+str(end_time)+" -acodec amr_wb -ac 1 -ar 16000 -vn temp\\"+endname
                run(parameter,shell=True)
                return rdurtime
            else:
                while durtime >=59:
                    endname = tfilename[:-suflen]+str(set_parts)+".amr"
                    parameter = "ffmpeg.exe -n -i input\\"+tfilename+" -ss "+str(start_time)+" -t "+str(end_time)+" -acodec amr_wb -ac 1 -ar 16000 -vn temp\\"+endname
                    start_time = start_time + 59
                    end_time = end_time + 59
                    durtime -= 59
                    set_parts += 1
                    print("正在处理:"+endname+"剩余时长:"+str(durtime)+"分段:"+str(set_parts))
                    run(parameter,shell=True)
                    pass
                else:
                    endname = tfilename[:-suflen]+str(set_parts)+".amr"
                    end_time = durtime
                    parameter = "ffmpeg.exe -n -i input\\"+tfilename+" -ss "+str(start_time)+" -t "+str(end_time)+" -acodec amr_wb -ac 1 -ar 16000 -vn temp\\"+endname
                    run(parameter,shell=True)
                    print ("音频文件转换完成...")
                    return rdurtime
    
    
    class filesmanage:
        """文件处理模块"""
        def __init__(self, filename):
            pass
    
        def comptxt(filename):
            #合并文本文档
            meragefiledir = os.getcwd()+"\\temp"
            finalfiledir = os.getcwd()+"\\output"
            suflen = len(os.path.splitext(filename)[1])
            txtname = filename[:-suflen]+".txt"
            #列举
            filenames=os.listdir("temp")
            file=open(finalfiledir+"\\"+txtname,"w")
            #遍历文件名  
            for files in filenames:
                filepath=meragefiledir+'\\'+files
                #遍历单个文件,读取行数
                for line in open(filepath):
                    file.writelines(line)
                file.write('\n')  
            file.close()  
            pass
    
    
    
    if __name__ == "__main__":
        #初始化加载API配置文件
        config = configparser.ConfigParser()
        if os.path.exists("apiconfig.cfg"):
            config.read("ApiConfig.cfg")
            api_key = config.get("api","api_key")
            api_secert = config.get("api","api_secert")
            print("API加载成功!")
        else:
            api_key = input("请输入Api_Key:\n")
            api_secert = input("请输入Secrect_Key:\n")
            config.write(open(r"ApiConfig.cfg","w"))
            config.read("ApiConfig.cfg")
            config.add_section("api")
            config.set("api","api_key",api_key)
            config.set("api","api_secert",api_secert)
            config.write(open(r"ApiConfig.cfg","w"))
        #建立文件夹
        if not os.path.isdir("input"):
            os.mkdir("input")
        if not os.path.isdir("output"):
            os.mkdir("output")
        if not os.path.isdir("temp"):
            os.mkdir("temp")
       
        #内容初始化
        bdr = BaiduRest("Black", api_key, api_secert)
        print ("  =======================================")
        print ("  | 视频&语音转成文字 v1.0版  by:Black |")
        print ("  | ——————————————    |")
        print ("  | 将文本文件放入“input”,      |")
        print ("  | 程序会自动将文字转变成语音。    |")
        print ("  |                                    |")
        print ("  | 将视频或音频文件放入“input”,   |")
        print ("  | 程序会将其中的语音转换成文字。   |")
        print ("  |                                    |")
        print ("  | 转换完成的文件会出现在“output”。 |")
        print ("  |                                    |")
        print ("  | 懒得修bug,出错就改文件名不要空格  |")
        print ("  =======================================")
        input ("放入文件后,按回车键继续..")
        print ("  ----------------------------")
    
        #先语音合成
        procfile = os.listdir("input")
        txtlist = []
        for names in procfile:
          if names.endswith(".txt"):
            txtlist.append(names)
        for txtname in txtlist:
            readtxt = open(txtname,'r')
            texts = readtxt.read()
            readtxt.close()
            suflen = len(os.path.splitext(txtname)[1])
            mp3name = txtname[:-suflen]+".mp3"
            bdr.getVoice(texts, mp3name)
    
        
        #再语音识别
        procfile = os.listdir("input")
        wavlist = []
        templist = []
        count = [0,0,0]
        #语音分段处理
        for names in procfile:
          if not names.endswith(".txt"):
            wavlist.append(names)
        for wavename in wavlist:
            waveinfo = VoiceProcessing(wavename,0).transvoid(wavename,0)
            #数据上传
            sepparts = math.ceil(waveinfo/59)
            suflen = len(os.path.splitext(wavename)[1])
            for i in range(0,sepparts):
                wavetemp = wavename[:-suflen]+str(i)+".amr"
                txttemp = wavename[:-suflen]+".txt"
                r_data = bdr.getText(wavetemp).decode('utf-8')
                t_data = str(r_data)
                f_data = eval(t_data)
                print ("返回结果:"+f_data['err_msg'])
                #内容存储
                if f_data['err_msg']=='success.':
                    word = f_data['result'][0].encode('utf-8')
                    if word!='':
                        if word[len(word)-3:len(word)]==',':
                            word = f_data['result'][0]
                            with open("output\\%s"%txttemp,"a") as f:
                                f.write(str(word))
                            print (wavename+"转换成功!但内容可能为空.")
                            f.close()
                            count[0] +=1
                            count[1] +=1
                        else:
                            word = f_data['result'][0]
                            with open("output\\%s"%txttemp,"a") as f:
                                f.write(str(word))
                            f.close()
                            count[0] +=1
                            count[1] +=1
                    else:
                        print ("音频文件不存在或格式错误...")
                        count[2] +=1
                else:
                    print ("未知错误,请重试...")
                    count[2] +=1
                os.remove("temp\\%s"%wavetemp)
        shutil.rmtree(r"temp")
        print ("--------------转换完成!---------------")
        print ("总计转换文件数:"+str(count[0])+"成功数"+str(count[1])+"失败数"+str(count[2]))
    
    

    相关文章

      网友评论

          本文标题:视频&音频转换成文字 v1.0版(再也不用敲字幕了)

          本文链接:https://www.haomeiwen.com/subject/lacugttx.html