using UnityEngine;
using UnityEngine.UI;
//using Baidu.Aip.Face;//人脸识别
//using Baidu.Aip.Ocr;//图片文字识别
using Baidu.Aip.Speech;//语音识别
using System.IO;
using Newtonsoft.Json;//解析返回json数据
using Newtonsoft.Json.Linq;//解析返回json数据
using NAudio.Wave;//mp3转wav;
using System.Collections;
using System.Collections.Generic;
using System;
using LitJson;//LitJson解析返回json数据

public class BaiDuYYYY : MonoBehaviour
public static BaiDuYYYY _instance;
private static string[] micArray = null;//硬件设备——麦克风

public Text debugInfo;                             // 显示debug信息
private string infoLog = "";//日志信息
public Text asrText;                               //显示录音识别成功的文字
public Text inputField_text;                       //需要合成文本,的输入框文本

private FileInfo file;
private  Asr _asrClient;                          // 用来调用百度AI接口_语言识别
private  Tts _ttsClient;                          // 用来调用百度AI接口_语言合成

private Dictionary<string, object> options;       // 返回的数据
//private JObject result;                           // 接收返回的结果

private string ApiKey = "TvhA40Louirg9egVF2jRDVTG";//此处填写自己申请的key
private string SecretKey = "Z8umPDqzuuzoa7WafuHPEDjhl94uBflI";//此处填写自己申请的key

public static string ttString;//百度语言需要合成的文本,使用UTF-8编码,请注意文本长度必须小于1024字节

private string path;// = Application.dataPath + "/Resources/";//合成的语音文件保存的地址

private string ttsName_mp3;// = "xxmp3.mp3";//合成的语音文件保存的文件名
private string ttsName_wav;// = "xxmp3.wav";//合成的语音文件保存的文件名
private string arsName_wav;//= "arswav.wav";//识别的语言文件保存的w文件名;

private string mp3pathName;//合成的语言文件MP3=路径+文件名;
private string wavpathName;//合成的语言文件转wav=路径+文件名;

private string wavpathName2;//识别的语言文件wav=路径+文件名;(录音)

public  AudioSource _AudioSource;//播放组件
public  AudioSource _AudioSource2;//播放组件

private byte[] b; //百度语音合成返回byte[] 的MP3文件;
private static AudioClip _AudioClip;//百度语音需要识别的声音片段(录音文件),

private int audioLength=30;//录音的长度
private int frequency = 8000;//录音频率,一般44100,可以16000,8000;采用频率低,声音文件小

private void Awake()
    _instance = this;

    System.Net.ServicePointManager.ServerCertificateValidationCallback +=
        delegate (object sender, System.Security.Cryptography.X509Certificates.X509Certificate certificate,
                    System.Security.Cryptography.X509Certificates.X509Chain chain,
                    System.Net.Security.SslPolicyErrors sslPolicyErrors)
            return true;           // always accept

    _asrClient = new Asr(ApiKey, SecretKey);
     _ttsClient = new Tts(ApiKey, SecretKey);

    //path = Application.dataPath + "/Resources/";//合成的语音文件保存的地址(本地软件内)
    path =  Application.persistentDataPath +"/";//合成的语音文件保存的地址(pc沙盒)
    //path= Application.persistentDataPath + "/";//合成的语音文件保存的地址(android沙盒)
    ttsName_mp3 = "ttsmp3.mp3";//合成的语音文件保存的文件名
    ttsName_wav = "ttswav.wav";//合成的语音文件保存的文件名

    arsName_wav ="arswav.wav";//录音成功需要识别的语言文件名称
    mp3pathName = path + ttsName_mp3;
    wavpathName = path + ttsName_wav;
    wavpathName2 = path + arsName_wav;//百度语言已经识别的wav文件(路径+文件名)

    //_AudioSource = GetComponent<AudioSource>();

private void Start()

// 识别本地录音文件
public void AsrData()
    //var data = File.ReadAllBytes("语音pcm文件地址");
    //var result = _asrClient.Recognize(data, "pcm", 16000);
    //var path = Application.dataPath + "/Test/mywave.wav";//识别的格式:pcm 或者 wav 或者 amr
    var data = File.ReadAllBytes(wavpathName2);

     var result = _asrClient.Recognize(data, "wav", 8000);//百度语音识别

    //JObject jobject = (JObject)Newtonsoft.Json.JsonConvert.DeserializeObject(result.ToString());//json解析识别结果
    //string s = jobject["result"][0].ToString();//json解析识别为字符串string;

    JsonData jd0 = JsonMapper.ToObject(result.ToString());//json解析json数据
    string s =jd0["result"][0].ToString();//json解析识别为字符串string;

    asrText.text = s;

    debugInfo.text = result.ToString();
// 识别URL中的语音文件
public void AsrUrl()
    var result = _asrClient.Recoginze(
    // Console.WriteLine(result);
// 合成
public void Tts()
    ttString = inputField_text.text;   //需要合成的文字为输入框输入的文字;
    //ttString = "好好工作,天天加班";
    // 可选参数
    var option = new Dictionary<string, object>()
    {"spd", 5}, // 语速int,取值0-9,默认为5中语速    
    {"pit", 5}, // 音调int,取值0-9,默认为5中语调
    {"vol", 7}, // 音量int,取值0-15,默认为5中音量
    {"per", 0}  // 发音人,发音人选择, 0为女声,1为男声,3为情感合成-度逍遥,4为情感合成-度丫丫,默认为普通女
    var result = _ttsClient.Synthesis(ttString, option);//百度语音识别

    if (result.ErrorCode == 0)  // 或 result.Success
        File.WriteAllBytes(mp3pathName, result.Data);//保存识别成功返回的语音文件(.MP3文件),语言文件写入本地
         b = result.Data;//百度语音合成返回byte[]的MP3文件;
       // UnityEditor.AssetDatabase.Refresh();//刷新,使刚创建的mp3立刻导入。接下来才可以被使用



public void PlayAudioMp3()
//string name = "ttsmp3";//合成的语音文件保存的文件名,(不需要后缀)

    //_AudioSource2.clip = Resources.Load(name, typeof(AudioClip)) as AudioClip;//Resources加载资源




    //file = new FileInfo(wavpathName);
    //DirectoryInfo mydir = new DirectoryInfo(wavpathName);
    //if (File.Exists(wavpathName))//判断一下本地是否有了该音频  如果有就不需下载  
    //    string s = @"file://" + wavpathName;
    //    StartCoroutine(LoadAudio());

    _AudioSource.clip = FromMp3Data(b);
public IEnumerator LoadAudio()
    //string s = @"file://" + wavpathName;//android读取文件必须使用file:
    string s = wavpathName;//pc读取文件必须使用file:
    WWW www = new WWW(s);
    yield return www;
    // AudioClip ac = www.audioClip;
   AudioClip ac = WWWAudioExtensions.GetAudioClip(www);
  //  AudioClip ac = www.GetAudioClip(false, false);

    if (www.isDone)
        _AudioSource2.clip = ac;
public void PlayAudioWav()

     string name = "ttswav";//合成的语音文件保存的文件名,(不需要后缀)    
     _AudioSource2.clip = Resources.Load(name, typeof(AudioClip)) as AudioClip;

   // StartCoroutine(PlayAudioWaw2(wavpathName));
//mp3转wav,使用了库using NAudio.Wave
public void Mp3ToWav()
    var stream = File.Open(mp3pathName, FileMode.Open);
    var reader = new Mp3FileReader(stream);
    WaveFileWriter.CreateWaveFile(wavpathName, reader);
   // UnityEditor.AssetDatabase.Refresh();//刷新,使刚创建的mp3立刻导入。接下来才可以被使用
// 获取麦克风设备
public void GetMicrophoneDevice()
    micArray = Microphone.devices;
    if (micArray.Length == 0)
public void StartRecord()


    _AudioSource.loop = false;//循环=不
    _AudioSource.mute = true;//静音=是
    _AudioSource.clip = Microphone.Start(null, false, audioLength, frequency);//麦克风开始录音;参数:1,麦克风名称。2,循环.3,10秒录音计划长度,4,录音频率,一般44100,16000,8000;

    DateTime beginTime = DateTime.Now;
    _AudioClip = _AudioSource.clip;//录音的声音片段文件(以下操作的都是这个文件)
    while (!(Microphone.GetPosition(null) > 0))
   // _AudioSource.Play();


IEnumerator TimeDown()

    int time = 0;
    while (time < audioLength)
        if (!Microphone.IsRecording(null))
        { //如果没有录制  
            yield break;
        Debug.Log("yield return new WaitForSeconds " + time);
        yield return new WaitForSeconds(1);
    if (time >= audioLength)

    yield return 0;

    //int ShowTimeUI;
    //yield return new WaitForSeconds(0f);
    //for (float timer = 60; timer >= 0; timer -= Time.deltaTime)
    //    if (timer <= 60)
    //    {
    //        ShowTimeUI = (int)timer + 1;
    //        ShowUI.text = ShowTimeUI.ToString() + "秒";
    //    }
    //    yield return 0;
public void StopRecord()
    //if (micArray.Length == 0)
    //    return;
    //if (!Microphone.IsRecording(null))
    //    return;
     int lastPos = Microphone.GetPosition(null);
    if (Microphone.IsRecording(null))
        audioLength = lastPos / frequency;//录音实际时长,string转int
        audioLength = 30;//录音实际时长=录音计划时长

    //Byte[] clipByte = GetClipData();//把录音转换为Byte[];

    //// private string speech;                          //本地语音文件的的二进制语音数据 ,需要进行base64 编码。与len参数连一起使用。
    ////private int len;                                //原始语音长度/本地语音文件的的字节数,单位字节

    //int len = clipByte.Length;//语音长度,int转string;
    //string speech = Convert.ToBase64String(clipByte); //音文件的的二进制语音数据 ,需要进行base64 编码。与len参数连一起使用。

    //Debug.Log("len" + len);//语音实际长度《语音计划长度
    //Debug.Log("audioLength" + audioLength);

public void SaveAudio_wav()
    Save(_AudioClip, wavpathName2);//保存录音
public static void Save(AudioClip clip, string path)
    string filePath = Path.GetDirectoryName(path);
    if (!Directory.Exists(filePath))
    using (FileStream fileStream = CreateEmpty(path))
        ConvertAndWrite(fileStream, clip);
        WriteHeader(fileStream, clip);
private static void ConvertAndWrite(FileStream fileStream, AudioClip clip)

    float[] samples = new float[clip.samples];

    clip.GetData(samples, 0);

    Int16[] intData = new Int16[samples.Length];

    Byte[] bytesData = new Byte[samples.Length * 2];

    int rescaleFactor = 32767; //to convert float to Int16  

    for (int i = 0; i < samples.Length; i++)
        intData[i] = (short)(samples[i] * rescaleFactor);
        Byte[] byteArr = new Byte[2];
        byteArr = BitConverter.GetBytes(intData[i]);
        byteArr.CopyTo(bytesData, i * 2);
    fileStream.Write(bytesData, 0, bytesData.Length);
private static FileStream CreateEmpty(string filepath)
    FileStream fileStream = new FileStream(filepath, FileMode.Create);
    byte emptyByte = new byte();

    for (int i = 0; i < 44; i++) //preparing the header  

    return fileStream;
private static void WriteHeader(FileStream stream, AudioClip clip)
    int hz = clip.frequency;
    int channels = clip.channels;
    int samples = clip.samples;

    stream.Seek(0, SeekOrigin.Begin);

    Byte[] riff = System.Text.Encoding.UTF8.GetBytes("RIFF");
    stream.Write(riff, 0, 4);

    Byte[] chunkSize = BitConverter.GetBytes(stream.Length - 8);
    stream.Write(chunkSize, 0, 4);

    Byte[] wave = System.Text.Encoding.UTF8.GetBytes("WAVE");
    stream.Write(wave, 0, 4);

    Byte[] fmt = System.Text.Encoding.UTF8.GetBytes("fmt ");
    stream.Write(fmt, 0, 4);

    Byte[] subChunk1 = BitConverter.GetBytes(16);
    stream.Write(subChunk1, 0, 4);

    UInt16 two = 2;
    UInt16 one = 1;

    Byte[] audioFormat = BitConverter.GetBytes(one);
    stream.Write(audioFormat, 0, 2);

    Byte[] numChannels = BitConverter.GetBytes(channels);
    stream.Write(numChannels, 0, 2);

    Byte[] sampleRate = BitConverter.GetBytes(hz);
    stream.Write(sampleRate, 0, 4);

    Byte[] byteRate = BitConverter.GetBytes(hz * channels * 2); // sampleRate * bytesPerSample*number of channels, here 44100*2*2  
    stream.Write(byteRate, 0, 4);

    UInt16 blockAlign = (ushort)(channels * 2);
    stream.Write(BitConverter.GetBytes(blockAlign), 0, 2);

    UInt16 bps = 16;
    Byte[] bitsPerSample = BitConverter.GetBytes(bps);
    stream.Write(bitsPerSample, 0, 2);

    Byte[] datastring = System.Text.Encoding.UTF8.GetBytes("data");
    stream.Write(datastring, 0, 4);

    Byte[] subChunk2 = BitConverter.GetBytes(samples * channels * 2);
    stream.Write(subChunk2, 0, 4);
// 回放录音

public void PlayRecordAudio()
if (Microphone.IsRecording(null))
if (_AudioSource.clip == null)
_AudioSource.mute = false;
_AudioSource.loop = false;

void ShowInfoLog(string info)
    debugInfo.text = "";
    infoLog += info;
    infoLog += "\r\n";
/// <summary>
/// 把录音转换为Byte[]
/// </summary>
/// <returns></returns>
public Byte[] GetClipData()
        if (_AudioSource.clip == null)
            return null;

        float[] samples = new float[_AudioSource.clip.samples];

         _AudioSource.clip.GetData(samples, 0);

        Byte[] outData = new byte[samples.Length * 2];

        int rescaleFactor = 32767; //to convert float to Int16   

        for (int i = 0; i < samples.Length; i++)
            short temshort = (short)(samples[i] * rescaleFactor);

            Byte[] temdata = System.BitConverter.GetBytes(temshort);

            outData[i * 2] = temdata[0];
            outData[i * 2 + 1] = temdata[1];
        if (outData == null || outData.Length <= 0)
            return null;
        //return SubByte(outData, 0, audioLength * 8000 * 2);
        return outData;

public void TtsAndPlay()

public static AudioClip FromWavData(byte[] data)
    WAV wav = new WAV(data);
    AudioClip audioClip = AudioClip.Create("wavclip", wav.SampleCount, 1, wav.Frequency, false);
    audioClip.SetData(wav.LeftChannel, 0);
    return audioClip;

public static AudioClip FromMp3Data(byte[] data)
    // Load the data into a stream  
    MemoryStream mp3stream = new MemoryStream(data);
    // Convert the data in the stream to WAV format  
    Mp3FileReader mp3audio = new Mp3FileReader(mp3stream);

    WaveStream waveStream = WaveFormatConversionStream.CreatePcmStream(mp3audio);
    // Convert to WAV data  
    WAV wav = new WAV(AudioMemStream(waveStream).ToArray());
    AudioClip audioClip = AudioClip.Create("testSound", wav.SampleCount, 1, wav.Frequency, false);
    audioClip.SetData(wav.LeftChannel, 0);
    // Return the clip  
    return audioClip;
//byte[] mp3转为AudioClip_byte[]mp3--wav
private static MemoryStream AudioMemStream(WaveStream waveStream)
    MemoryStream outputStream = new MemoryStream();
    using (WaveFileWriter waveFileWriter = new WaveFileWriter(outputStream, waveStream.WaveFormat))
        byte[] bytes = new byte[waveStream.Length];
        waveStream.Position = 0;
        waveStream.Read(bytes, 0, Convert.ToInt32(waveStream.Length));
        waveFileWriter.Write(bytes, 0, bytes.Length);
    return outputStream;

public class WAV
// convert two bytes to one float in the range -1 to 1
static float bytesToFloat(byte firstByte, byte secondByte)
// convert two bytes to one short (little endian)
short s = (short)((secondByte << 8) | firstByte);
// convert to range from -1 to (just below) 1
return s / 32768.0F;

static int bytesToInt(byte[] bytes, int offset = 0)
    int value = 0;
    for (int i = 0; i < 4; i++)
        value |= ((int)bytes[offset + i]) << (i * 8);
    return value;
// properties  
public float[] LeftChannel { get; internal set; }
public float[] RightChannel { get; internal set; }
public int ChannelCount { get; internal set; }
public int SampleCount { get; internal set; }
public int Frequency { get; internal set; }

public WAV(byte[] wav)

    // Determine if mono or stereo  
    ChannelCount = wav[22];     // Forget byte 23 as 99.999% of WAVs are 1 or 2 channels  

    // Get the frequency  
    Frequency = bytesToInt(wav, 24);

    // Get past all the other sub chunks to get to the data subchunk:  
    int pos = 12;   // First Subchunk ID from 12 to 16  

    // Keep iterating until we find the data chunk (i.e. 64 61 74 61 ...... (i.e. 100 97 116 97 in decimal))  
    while (!(wav[pos] == 100 && wav[pos + 1] == 97 && wav[pos + 2] == 116 && wav[pos + 3] == 97))
        pos += 4;
        int chunkSize = wav[pos] + wav[pos + 1] * 256 + wav[pos + 2] * 65536 + wav[pos + 3] * 16777216;
        pos += 4 + chunkSize;
    pos += 8;

    // Pos is now positioned to start of actual sound data.  
    SampleCount = (wav.Length - pos) / 2;     // 2 bytes per sample (16 bit sound mono)  
    if (ChannelCount == 2) SampleCount /= 2;        // 4 bytes per sample (16 bit stereo)  

    // Allocate memory (right will be null if only mono sound)  
    LeftChannel = new float[SampleCount];
    if (ChannelCount == 2) RightChannel = new float[SampleCount];
    else RightChannel = null;

    // Write to double array/s:  
    int i = 0;
    int maxInput = wav.Length - (RightChannel == null ? 1 : 3);
    // while (pos < wav.Length)  
    while ((i < SampleCount) && (pos < maxInput))
        LeftChannel[i] = bytesToFloat(wav[pos], wav[pos + 1]);
        pos += 2;
        if (ChannelCount == 2)
            RightChannel[i] = bytesToFloat(wav[pos], wav[pos + 1]);
            pos += 2;
public override string ToString()
    return string.Format("[WAV: LeftChannel={0}, RightChannel={1}, ChannelCount={2}, SampleCount={3}, Frequency={4}]", LeftChannel, RightChannel, ChannelCount, SampleCount, Frequency);




