using UnityEngine;
using UnityEngine.UI;
//using Baidu.Aip.Face;//人脸识别
//using Baidu.Aip.Ocr;//图片文字识别
using Baidu.Aip.Speech;//语音识别
using System.IO;
using Newtonsoft.Json;//解析返回json数据
using Newtonsoft.Json.Linq;//解析返回json数据
using NAudio.Wave;//mp3转wav;
using System.Collections;
using System.Collections.Generic;
using System;
using LitJson;//LitJson解析返回json数据
//百度语音识别接口调用——非子萧
public class BaiDuYYYY : MonoBehaviour
{
public static BaiDuYYYY _instance;
private static string[] micArray = null;//硬件设备——麦克风
public Text debugInfo; // 显示debug信息
private string infoLog = "";//日志信息
public Text asrText; //显示录音识别成功的文字
public Text inputField_text; //需要合成文本,的输入框文本
private FileInfo file;
private Asr _asrClient; // 用来调用百度AI接口_语言识别
private Tts _ttsClient; // 用来调用百度AI接口_语言合成
private Dictionary<string, object> options; // 返回的数据
//private JObject result; // 接收返回的结果
private string ApiKey = "TvhA40Louirg9egVF2jRDVTG";//此处填写自己申请的key
private string SecretKey = "Z8umPDqzuuzoa7WafuHPEDjhl94uBflI";//此处填写自己申请的key
public static string ttString;//百度语言需要合成的文本,使用UTF-8编码,请注意文本长度必须小于1024字节
private string path;// = Application.dataPath + "/Resources/";//合成的语音文件保存的地址
private string ttsName_mp3;// = "xxmp3.mp3";//合成的语音文件保存的文件名
private string ttsName_wav;// = "xxmp3.wav";//合成的语音文件保存的文件名
private string arsName_wav;//= "arswav.wav";//识别的语言文件保存的w文件名;
private string mp3pathName;//合成的语言文件MP3=路径+文件名;
private string wavpathName;//合成的语言文件转wav=路径+文件名;
private string wavpathName2;//识别的语言文件wav=路径+文件名;(录音)
public AudioSource _AudioSource;//播放组件
public AudioSource _AudioSource2;//播放组件
private byte[] b; //百度语音合成返回byte[] 的MP3文件;
private static AudioClip _AudioClip;//百度语音需要识别的声音片段(录音文件),
private int audioLength=30;//录音的长度
private int frequency = 8000;//录音频率,一般44100,可以16000,8000;采用频率低,声音文件小
private void Awake()
{
_instance = this;
//网页端身份安全验证失败,我们需要在程序运行时手动添加安全证书,在Awake方法中加入
System.Net.ServicePointManager.ServerCertificateValidationCallback +=
delegate (object sender, System.Security.Cryptography.X509Certificates.X509Certificate certificate,
System.Security.Cryptography.X509Certificates.X509Chain chain,
System.Net.Security.SslPolicyErrors sslPolicyErrors)
{
return true; // always accept
};
_asrClient = new Asr(ApiKey, SecretKey);
_ttsClient = new Tts(ApiKey, SecretKey);
//path = Application.dataPath + "/Resources/";//合成的语音文件保存的地址(本地软件内)
path = Application.persistentDataPath +"/";//合成的语音文件保存的地址(pc沙盒)
//path= Application.persistentDataPath + "/";//合成的语音文件保存的地址(android沙盒)
ttsName_mp3 = "ttsmp3.mp3";//合成的语音文件保存的文件名
ttsName_wav = "ttswav.wav";//合成的语音文件保存的文件名
arsName_wav ="arswav.wav";//录音成功需要识别的语言文件名称
mp3pathName = path + ttsName_mp3;
wavpathName = path + ttsName_wav;
wavpathName2 = path + arsName_wav;//百度语言已经识别的wav文件(路径+文件名)
//_AudioSource = GetComponent<AudioSource>();
}
private void Start()
{
}
// 识别本地录音文件
public void AsrData()
{
//var data = File.ReadAllBytes("语音pcm文件地址");
//var result = _asrClient.Recognize(data, "pcm", 16000);
//var path = Application.dataPath + "/Test/mywave.wav";//识别的格式:pcm 或者 wav 或者 amr
var data = File.ReadAllBytes(wavpathName2);
var result = _asrClient.Recognize(data, "wav", 8000);//百度语音识别
//Newtonsoft.Json解析
//JObject jobject = (JObject)Newtonsoft.Json.JsonConvert.DeserializeObject(result.ToString());//json解析识别结果
//string s = jobject["result"][0].ToString();//json解析识别为字符串string;
//LitJson解析
JsonData jd0 = JsonMapper.ToObject(result.ToString());//json解析json数据
string s =jd0["result"][0].ToString();//json解析识别为字符串string;
Debug.Log(s);//ok
asrText.text = s;
debugInfo.text = result.ToString();
Debug.Log("识别本地文件");//ok
}
// 识别URL中的语音文件
public void AsrUrl()
{
var result = _asrClient.Recoginze(
"http://xxx.com/待识别的pcm文件地址",
"http://xxx.com/识别结果回调地址",
"pcm",
16000);
// Console.WriteLine(result);
Debug.Log("识别网络语音文件");//ok
}
// 合成
public void Tts()
{
ttString = inputField_text.text; //需要合成的文字为输入框输入的文字;
//ttString = "好好工作,天天加班";
// 可选参数
var option = new Dictionary<string, object>()
{
{"spd", 5}, // 语速int,取值0-9,默认为5中语速
{"pit", 5}, // 音调int,取值0-9,默认为5中语调
{"vol", 7}, // 音量int,取值0-15,默认为5中音量
{"per", 0} // 发音人,发音人选择, 0为女声,1为男声,3为情感合成-度逍遥,4为情感合成-度丫丫,默认为普通女
};
var result = _ttsClient.Synthesis(ttString, option);//百度语音识别
if (result.ErrorCode == 0) // 或 result.Success
{
File.WriteAllBytes(mp3pathName, result.Data);//保存识别成功返回的语音文件(.MP3文件),语言文件写入本地
b = result.Data;//百度语音合成返回byte[]的MP3文件;
// UnityEditor.AssetDatabase.Refresh();//刷新,使刚创建的mp3立刻导入。接下来才可以被使用
}
Debug.Log(mp3pathName);//ok
Debug.Log("文字合成语音文件MP3");//ok
}
//播放语言文件_mp3
public void PlayAudioMp3()
{
//string name = "ttsmp3";//合成的语音文件保存的文件名,(不需要后缀)
//_AudioSource2.clip = Resources.Load(name, typeof(AudioClip)) as AudioClip;//Resources加载资源
//Debug.Log(mp3pathName);
//_AudioSource2.Play();//
//Mp3ToWav();//mp3转wav,
//file = new FileInfo(wavpathName);
//Debug.Log(wavpathName);
//DirectoryInfo mydir = new DirectoryInfo(wavpathName);
//if (File.Exists(wavpathName))//判断一下本地是否有了该音频 如果有就不需下载
//{
// string s = @"file://" + wavpathName;
// StartCoroutine(LoadAudio());
//}
//Debug.Log("百度语音合成文件mp3播放");
_AudioSource.clip = FromMp3Data(b);
_AudioSource.Play();
}
//www加载资源
public IEnumerator LoadAudio()
{
//string s = @"file://" + wavpathName;//android读取文件必须使用file:
string s = wavpathName;//pc读取文件必须使用file:
WWW www = new WWW(s);
yield return www;
// AudioClip ac = www.audioClip;
AudioClip ac = WWWAudioExtensions.GetAudioClip(www);
// AudioClip ac = www.GetAudioClip(false, false);
if (www.isDone)
{
_AudioSource2.clip = ac;
_AudioSource2.Play();
}
}
//播放语言文件_wav
public void PlayAudioWav()
{
Mp3ToWav();//mp3转wav,
//resource加载资源
string name = "ttswav";//合成的语音文件保存的文件名,(不需要后缀)
_AudioSource2.clip = Resources.Load(name, typeof(AudioClip)) as AudioClip;
_AudioSource2.Play();//ok
// StartCoroutine(PlayAudioWaw2(wavpathName));
Debug.Log("百度语音合成文件wav播放");
}
//mp3转wav,使用了库using NAudio.Wave
public void Mp3ToWav()
{
var stream = File.Open(mp3pathName, FileMode.Open);
var reader = new Mp3FileReader(stream);
WaveFileWriter.CreateWaveFile(wavpathName, reader);
stream.Close();
// UnityEditor.AssetDatabase.Refresh();//刷新,使刚创建的mp3立刻导入。接下来才可以被使用
Debug.Log("文件MP3转文件wav");//ok
}
// 获取麦克风设备
public void GetMicrophoneDevice()
{
micArray = Microphone.devices;
if (micArray.Length == 0)
{
Debug.LogError("找不到麦克风设备!");
ShowInfoLog("找不到麦克风设备!");
return;
}
else
{
Debug.Log("麦克风已经就绪,可以录音!");
ShowInfoLog("麦克风已经就绪,可以录音!!");
//return;
}
}
//开始录音
public void StartRecord()
{
GetMicrophoneDevice();//获取麦克风设备
_AudioSource.Stop();//开始录音前,先停止上一个
_AudioSource.loop = false;//循环=不
_AudioSource.mute = true;//静音=是
_AudioSource.clip = Microphone.Start(null, false, audioLength, frequency);//麦克风开始录音;参数:1,麦克风名称。2,循环.3,10秒录音计划长度,4,录音频率,一般44100,16000,8000;
DateTime beginTime = DateTime.Now;
_AudioClip = _AudioSource.clip;//录音的声音片段文件(以下操作的都是这个文件)
while (!(Microphone.GetPosition(null) > 0))
{
}
//播放录音
// _AudioSource.Play();
Debug.Log("开始录音!");
ShowInfoLog("开始录音!");
//倒计时——开始协程
//StartCoroutine(TimeDown());
}
//协程--倒计时
IEnumerator TimeDown()
{
Debug.Log("协程--倒计时");
int time = 0;
while (time < audioLength)
{
if (!Microphone.IsRecording(null))
{ //如果没有录制
Debug.Log("录音失败");
yield break;
}
Debug.Log("yield return new WaitForSeconds " + time);
yield return new WaitForSeconds(1);
time++;
}
if (time >= audioLength)
{
Debug.Log("时间到,停止录音!");
ShowInfoLog("时间到,停止录音!");
StopRecord();//停止录音
//SaveAudioClip();//保存录音
}
yield return 0;
//int ShowTimeUI;
//yield return new WaitForSeconds(0f);
//for (float timer = 60; timer >= 0; timer -= Time.deltaTime)
//{
// if (timer <= 60)
// {
// ShowTimeUI = (int)timer + 1;
// ShowUI.text = ShowTimeUI.ToString() + "秒";
// }
// yield return 0;
//}
}
//停止录音
public void StopRecord()
{
//if (micArray.Length == 0)
//{
// return;
//}
//if (!Microphone.IsRecording(null))
//{
// return;
//}
int lastPos = Microphone.GetPosition(null);
if (Microphone.IsRecording(null))
{
audioLength = lastPos / frequency;//录音实际时长,string转int
}
else
{
audioLength = 30;//录音实际时长=录音计划时长
}
Microphone.End(null);//麦克风停止录音
Debug.Log("停止录音");
//Byte[] clipByte = GetClipData();//把录音转换为Byte[];
//// private string speech; //本地语音文件的的二进制语音数据 ,需要进行base64 编码。与len参数连一起使用。
////private int len; //原始语音长度/本地语音文件的的字节数,单位字节
//int len = clipByte.Length;//语音长度,int转string;
//string speech = Convert.ToBase64String(clipByte); //音文件的的二进制语音数据 ,需要进行base64 编码。与len参数连一起使用。
//Debug.Log("len" + len);//语音实际长度《语音计划长度
//Debug.Log("audioLength" + audioLength);
}
//保存录音
public void SaveAudio_wav()
{
Save(_AudioClip, wavpathName2);//保存录音
//UnityEditor.AssetDatabase.Refresh();//刷新,使刚创建的mp3立刻导入。接下来才可以被使用
Debug.Log("保存录音");
ShowInfoLog("保存录音");
}
//wav保存,录音文件保存为wav文件
public static void Save(AudioClip clip, string path)
{
string filePath = Path.GetDirectoryName(path);
//如果文件目录不存在
if (!Directory.Exists(filePath))
{
//创建文件
Directory.CreateDirectory(filePath);
}
//创建一个空文件
using (FileStream fileStream = CreateEmpty(path))
{
//写内容
ConvertAndWrite(fileStream, clip);
//写头文件
WriteHeader(fileStream, clip);
}
}
//wav保存,写内容
private static void ConvertAndWrite(FileStream fileStream, AudioClip clip)
{
float[] samples = new float[clip.samples];
clip.GetData(samples, 0);
Int16[] intData = new Int16[samples.Length];
Byte[] bytesData = new Byte[samples.Length * 2];
int rescaleFactor = 32767; //to convert float to Int16
for (int i = 0; i < samples.Length; i++)
{
intData[i] = (short)(samples[i] * rescaleFactor);
Byte[] byteArr = new Byte[2];
byteArr = BitConverter.GetBytes(intData[i]);
byteArr.CopyTo(bytesData, i * 2);
}
fileStream.Write(bytesData, 0, bytesData.Length);
}
//wav保存,创建空头
private static FileStream CreateEmpty(string filepath)
{
FileStream fileStream = new FileStream(filepath, FileMode.Create);
byte emptyByte = new byte();
for (int i = 0; i < 44; i++) //preparing the header
{
fileStream.WriteByte(emptyByte);
}
return fileStream;
}
//wav保存,写头文件
private static void WriteHeader(FileStream stream, AudioClip clip)
{
int hz = clip.frequency;
int channels = clip.channels;
int samples = clip.samples;
stream.Seek(0, SeekOrigin.Begin);
Byte[] riff = System.Text.Encoding.UTF8.GetBytes("RIFF");
stream.Write(riff, 0, 4);
Byte[] chunkSize = BitConverter.GetBytes(stream.Length - 8);
stream.Write(chunkSize, 0, 4);
Byte[] wave = System.Text.Encoding.UTF8.GetBytes("WAVE");
stream.Write(wave, 0, 4);
Byte[] fmt = System.Text.Encoding.UTF8.GetBytes("fmt ");
stream.Write(fmt, 0, 4);
Byte[] subChunk1 = BitConverter.GetBytes(16);
stream.Write(subChunk1, 0, 4);
UInt16 two = 2;
UInt16 one = 1;
Byte[] audioFormat = BitConverter.GetBytes(one);
stream.Write(audioFormat, 0, 2);
Byte[] numChannels = BitConverter.GetBytes(channels);
stream.Write(numChannels, 0, 2);
Byte[] sampleRate = BitConverter.GetBytes(hz);
stream.Write(sampleRate, 0, 4);
Byte[] byteRate = BitConverter.GetBytes(hz * channels * 2); // sampleRate * bytesPerSample*number of channels, here 44100*2*2
stream.Write(byteRate, 0, 4);
UInt16 blockAlign = (ushort)(channels * 2);
stream.Write(BitConverter.GetBytes(blockAlign), 0, 2);
UInt16 bps = 16;
Byte[] bitsPerSample = BitConverter.GetBytes(bps);
stream.Write(bitsPerSample, 0, 2);
Byte[] datastring = System.Text.Encoding.UTF8.GetBytes("data");
stream.Write(datastring, 0, 4);
Byte[] subChunk2 = BitConverter.GetBytes(samples * channels * 2);
stream.Write(subChunk2, 0, 4);
}
// 回放录音
public void PlayRecordAudio()
{
if (Microphone.IsRecording(null))
return;
if (_AudioSource.clip == null)
return;
_AudioSource.mute = false;
_AudioSource.loop = false;
_AudioSource.Play();
Debug.Log("回放录音");
ShowInfoLog("回放录音");
}
//显示日志提示信息
void ShowInfoLog(string info)
{
debugInfo.text = "";
infoLog += info;
infoLog += "\r\n";
}
/// <summary>
/// 把录音转换为Byte[]
/// </summary>
/// <returns></returns>
public Byte[] GetClipData()
{
if (_AudioSource.clip == null)
{
Debug.LogError("录音数据为空");
return null;
}
float[] samples = new float[_AudioSource.clip.samples];
_AudioSource.clip.GetData(samples, 0);
Byte[] outData = new byte[samples.Length * 2];
int rescaleFactor = 32767; //to convert float to Int16
for (int i = 0; i < samples.Length; i++)
{
short temshort = (short)(samples[i] * rescaleFactor);
Byte[] temdata = System.BitConverter.GetBytes(temshort);
outData[i * 2] = temdata[0];
outData[i * 2 + 1] = temdata[1];
}
if (outData == null || outData.Length <= 0)
{
Debug.LogError("录音数据为空");
return null;
}
//return SubByte(outData, 0, audioLength * 8000 * 2);
return outData;
}
//文本合成语音并播放
public void TtsAndPlay()
{
Tts();//百度文本合成
PlayAudioMp3();//播放语音文件
}
//录音识别文本并显示
//在按钮上挂载OnButtonPressed脚本,按钮按下,开始录音,按钮抬起,录音结束开始语音识别,识别成功返回结果
//byte[]wav转为AudioClip
public static AudioClip FromWavData(byte[] data)
{
WAV wav = new WAV(data);
AudioClip audioClip = AudioClip.Create("wavclip", wav.SampleCount, 1, wav.Frequency, false);
audioClip.SetData(wav.LeftChannel, 0);
return audioClip;
}
//byte[]mp3转为AudioClip
public static AudioClip FromMp3Data(byte[] data)
{
// Load the data into a stream
MemoryStream mp3stream = new MemoryStream(data);
// Convert the data in the stream to WAV format
Mp3FileReader mp3audio = new Mp3FileReader(mp3stream);
WaveStream waveStream = WaveFormatConversionStream.CreatePcmStream(mp3audio);
// Convert to WAV data
WAV wav = new WAV(AudioMemStream(waveStream).ToArray());
//Debug.Log(wav);
AudioClip audioClip = AudioClip.Create("testSound", wav.SampleCount, 1, wav.Frequency, false);
audioClip.SetData(wav.LeftChannel, 0);
// Return the clip
return audioClip;
}
//byte[] mp3转为AudioClip_byte[]mp3--wav
private static MemoryStream AudioMemStream(WaveStream waveStream)
{
MemoryStream outputStream = new MemoryStream();
using (WaveFileWriter waveFileWriter = new WaveFileWriter(outputStream, waveStream.WaveFormat))
{
byte[] bytes = new byte[waveStream.Length];
waveStream.Position = 0;
waveStream.Read(bytes, 0, Convert.ToInt32(waveStream.Length));
waveFileWriter.Write(bytes, 0, bytes.Length);
waveFileWriter.Flush();
}
return outputStream;
}
}
//WAV类,
public class WAV
{
// convert two bytes to one float in the range -1 to 1
static float bytesToFloat(byte firstByte, byte secondByte)
{
// convert two bytes to one short (little endian)
short s = (short)((secondByte << 8) | firstByte);
// convert to range from -1 to (just below) 1
return s / 32768.0F;
}
static int bytesToInt(byte[] bytes, int offset = 0)
{
int value = 0;
for (int i = 0; i < 4; i++)
{
value |= ((int)bytes[offset + i]) << (i * 8);
}
return value;
}
// properties
public float[] LeftChannel { get; internal set; }
public float[] RightChannel { get; internal set; }
public int ChannelCount { get; internal set; }
public int SampleCount { get; internal set; }
public int Frequency { get; internal set; }
public WAV(byte[] wav)
{
// Determine if mono or stereo
ChannelCount = wav[22]; // Forget byte 23 as 99.999% of WAVs are 1 or 2 channels
// Get the frequency
Frequency = bytesToInt(wav, 24);
// Get past all the other sub chunks to get to the data subchunk:
int pos = 12; // First Subchunk ID from 12 to 16
// Keep iterating until we find the data chunk (i.e. 64 61 74 61 ...... (i.e. 100 97 116 97 in decimal))
while (!(wav[pos] == 100 && wav[pos + 1] == 97 && wav[pos + 2] == 116 && wav[pos + 3] == 97))
{
pos += 4;
int chunkSize = wav[pos] + wav[pos + 1] * 256 + wav[pos + 2] * 65536 + wav[pos + 3] * 16777216;
pos += 4 + chunkSize;
}
pos += 8;
// Pos is now positioned to start of actual sound data.
SampleCount = (wav.Length - pos) / 2; // 2 bytes per sample (16 bit sound mono)
if (ChannelCount == 2) SampleCount /= 2; // 4 bytes per sample (16 bit stereo)
// Allocate memory (right will be null if only mono sound)
LeftChannel = new float[SampleCount];
if (ChannelCount == 2) RightChannel = new float[SampleCount];
else RightChannel = null;
// Write to double array/s:
int i = 0;
int maxInput = wav.Length - (RightChannel == null ? 1 : 3);
// while (pos < wav.Length)
while ((i < SampleCount) && (pos < maxInput))
{
LeftChannel[i] = bytesToFloat(wav[pos], wav[pos + 1]);
pos += 2;
if (ChannelCount == 2)
{
RightChannel[i] = bytesToFloat(wav[pos], wav[pos + 1]);
pos += 2;
}
i++;
}
}
public override string ToString()
{
return string.Format("[WAV: LeftChannel={0}, RightChannel={1}, ChannelCount={2}, SampleCount={3}, Frequency={4}]", LeftChannel, RightChannel, ChannelCount, SampleCount, Frequency);
}
}
网友评论