用百度的语音识别胡搞出了个坑爹的1.0版,一个小程序。可以把一段文字转换成语音,也可以把一堆视频、音频文件转换成文字,功能如图:
Paste_Image.png不知道简书会不会把代码给替换了,总之欢迎来喷,东拼西凑,加载一堆,代码混乱,没有多线程,没有异常处理,暴力修bug,写的什么玩意233
用到ffmpeg,需要把这个下载到同一目录下。
以下是代码:
import os
import sys
import math
import json
import base64
import urllib
import shutil
import subprocess
import configparser
import urllib.request
from tinytag import TinyTag
from subprocess import run
class BaiduRest:
"""百度接口模块"""
def __init__(self, cu_id, api_key, api_secert):
# token认证的url
self.token_url = "https://openapi.baidu.com/oauth/2.0/token?grant_type=client_credentials&client_id=%s&client_secret=%s"
# 语音合成的resturl
self.getvoice_url = "http://tsn.baidu.com/text2audio?tex=%s&lan=zh&cuid=%s&ctp=1&tok=%s"
# 语音识别的resturl
self.upvoice_url = 'http://vop.baidu.com/server_api'
self.cu_id = cu_id
self.getToken(api_key, api_secert)
return
def getToken(self, api_key, api_secert):
# 1.获取token
token_url = self.token_url % (api_key,api_secert)
r_str = urllib.request.urlopen(token_url).read()
token_data = json.loads(r_str)
self.token_str = token_data['access_token']
print ("token获取完成...")
pass
def getVoice(self, text, filename):
# 2. 向Rest接口提交数据
get_url = self.getvoice_url % (urllib.parse.quote(text), self.cu_id, self.token_str)
voice_data = urllib.request.urlopen(get_url).read()
# 3.处理返回数据
voice_fp = open("output\\"+filename,'wb+')
voice_fp.write(voice_data)
voice_fp.close()
pass
def getText(self, filename):
# 2. 向Rest接口提交数据
repeat_times = 10
for x in range(1,10):
for y in range(repeat_times):
try:
data = {}
# 语音的一些参数
data['format'] = 'amr'
data['rate'] = 16000
data['channel'] = 1
data['cuid'] = self.cu_id
data['token'] = self.token_str
print("正在上传:"+filename)
wav_fp = open("temp\\"+filename,'rb')
voice_data = wav_fp.read()
data['len'] = len(voice_data)
data['speech'] = base64.b64encode(voice_data).decode('utf-8')
post_data = json.dumps(data)
r_data = urllib.request.urlopen(self.upvoice_url,data=bytes(post_data,encoding="utf-8")).read()
wav_fp.close()
return r_data
break
except :
print ("正在重试...")
sleep(1)
bdr = BaiduRest("Black", api_key, api_secert)
pass
pass
pass
class VoiceProcessing:
"""声音处理模块"""
def __init__(self, filename, durtime):
self.filename = filename
self.durtime = durtime
return
def preprocess(self, filename,durtime):
wfilename = self.filename
if os.path.exists("ffmpeg.exe"):
#预处理
print ("音频文件预处理中...")
suflen = len(os.path.splitext(wfilename)[1])
endname = wfilename[:-suflen]+".mp3"
#转换
parameter = "ffmpeg.exe -n -i input\\"+wfilename+" -acodec mp3 -ac 1 -ar 16000 -vn temp\\"+endname
run(parameter,shell=True)
#预处理结束,获取信息
wavetag = TinyTag.get("temp\\"+endname)
durtime = round(wavetag.duration,2)
sepparts = math.ceil(durtime/59)
timeleft = durtime%59
print("总时长:"+str(durtime)+"上传分段为:"+str(sepparts)+"末段时长:"+str(timeleft))
os.remove("temp\\"+endname)
return durtime
else:
print ("请把ffmpeg放到本目录后重试...")
pass
def transvoid(self, filename, durtime):
tfilename = self.filename
#音频转换部分
rdurtime = VoiceProcessing(tfilename,0).preprocess(tfilename,0)
durtime = rdurtime
suflen = len(os.path.splitext(tfilename)[1])
#初始化数据
set_parts = 0
start_time = 0
end_time = 59
#进程
if durtime < 59:
end_time = durtime
endname = tfilename[:-suflen]+str(set_parts)+".amr"
print ("正在转换:"+endname+"...")
parameter = "ffmpeg.exe -n -i input\\"+tfilename+" -ss "+str(start_time)+" -t "+str(end_time)+" -acodec amr_wb -ac 1 -ar 16000 -vn temp\\"+endname
run(parameter,shell=True)
return rdurtime
else:
while durtime >=59:
endname = tfilename[:-suflen]+str(set_parts)+".amr"
parameter = "ffmpeg.exe -n -i input\\"+tfilename+" -ss "+str(start_time)+" -t "+str(end_time)+" -acodec amr_wb -ac 1 -ar 16000 -vn temp\\"+endname
start_time = start_time + 59
end_time = end_time + 59
durtime -= 59
set_parts += 1
print("正在处理:"+endname+"剩余时长:"+str(durtime)+"分段:"+str(set_parts))
run(parameter,shell=True)
pass
else:
endname = tfilename[:-suflen]+str(set_parts)+".amr"
end_time = durtime
parameter = "ffmpeg.exe -n -i input\\"+tfilename+" -ss "+str(start_time)+" -t "+str(end_time)+" -acodec amr_wb -ac 1 -ar 16000 -vn temp\\"+endname
run(parameter,shell=True)
print ("音频文件转换完成...")
return rdurtime
class filesmanage:
"""文件处理模块"""
def __init__(self, filename):
pass
def comptxt(filename):
#合并文本文档
meragefiledir = os.getcwd()+"\\temp"
finalfiledir = os.getcwd()+"\\output"
suflen = len(os.path.splitext(filename)[1])
txtname = filename[:-suflen]+".txt"
#列举
filenames=os.listdir("temp")
file=open(finalfiledir+"\\"+txtname,"w")
#遍历文件名
for files in filenames:
filepath=meragefiledir+'\\'+files
#遍历单个文件,读取行数
for line in open(filepath):
file.writelines(line)
file.write('\n')
file.close()
pass
if __name__ == "__main__":
#初始化加载API配置文件
config = configparser.ConfigParser()
if os.path.exists("apiconfig.cfg"):
config.read("ApiConfig.cfg")
api_key = config.get("api","api_key")
api_secert = config.get("api","api_secert")
print("API加载成功!")
else:
api_key = input("请输入Api_Key:\n")
api_secert = input("请输入Secrect_Key:\n")
config.write(open(r"ApiConfig.cfg","w"))
config.read("ApiConfig.cfg")
config.add_section("api")
config.set("api","api_key",api_key)
config.set("api","api_secert",api_secert)
config.write(open(r"ApiConfig.cfg","w"))
#建立文件夹
if not os.path.isdir("input"):
os.mkdir("input")
if not os.path.isdir("output"):
os.mkdir("output")
if not os.path.isdir("temp"):
os.mkdir("temp")
#内容初始化
bdr = BaiduRest("Black", api_key, api_secert)
print (" =======================================")
print (" | 视频&语音转成文字 v1.0版 by:Black |")
print (" | —————————————— |")
print (" | 将文本文件放入“input”, |")
print (" | 程序会自动将文字转变成语音。 |")
print (" | |")
print (" | 将视频或音频文件放入“input”, |")
print (" | 程序会将其中的语音转换成文字。 |")
print (" | |")
print (" | 转换完成的文件会出现在“output”。 |")
print (" | |")
print (" | 懒得修bug,出错就改文件名不要空格 |")
print (" =======================================")
input ("放入文件后,按回车键继续..")
print (" ----------------------------")
#先语音合成
procfile = os.listdir("input")
txtlist = []
for names in procfile:
if names.endswith(".txt"):
txtlist.append(names)
for txtname in txtlist:
readtxt = open(txtname,'r')
texts = readtxt.read()
readtxt.close()
suflen = len(os.path.splitext(txtname)[1])
mp3name = txtname[:-suflen]+".mp3"
bdr.getVoice(texts, mp3name)
#再语音识别
procfile = os.listdir("input")
wavlist = []
templist = []
count = [0,0,0]
#语音分段处理
for names in procfile:
if not names.endswith(".txt"):
wavlist.append(names)
for wavename in wavlist:
waveinfo = VoiceProcessing(wavename,0).transvoid(wavename,0)
#数据上传
sepparts = math.ceil(waveinfo/59)
suflen = len(os.path.splitext(wavename)[1])
for i in range(0,sepparts):
wavetemp = wavename[:-suflen]+str(i)+".amr"
txttemp = wavename[:-suflen]+".txt"
r_data = bdr.getText(wavetemp).decode('utf-8')
t_data = str(r_data)
f_data = eval(t_data)
print ("返回结果:"+f_data['err_msg'])
#内容存储
if f_data['err_msg']=='success.':
word = f_data['result'][0].encode('utf-8')
if word!='':
if word[len(word)-3:len(word)]==',':
word = f_data['result'][0]
with open("output\\%s"%txttemp,"a") as f:
f.write(str(word))
print (wavename+"转换成功!但内容可能为空.")
f.close()
count[0] +=1
count[1] +=1
else:
word = f_data['result'][0]
with open("output\\%s"%txttemp,"a") as f:
f.write(str(word))
f.close()
count[0] +=1
count[1] +=1
else:
print ("音频文件不存在或格式错误...")
count[2] +=1
else:
print ("未知错误,请重试...")
count[2] +=1
os.remove("temp\\%s"%wavetemp)
shutil.rmtree(r"temp")
print ("--------------转换完成!---------------")
print ("总计转换文件数:"+str(count[0])+"成功数"+str(count[1])+"失败数"+str(count[2]))
网友评论