美文网首页
Microsoft Speech 语音识别

Microsoft Speech 语音识别

作者: Mark_ | 来源:发表于2019-10-31 17:58 被阅读0次

    废话不多说,直奔主题!

    1.环境

      Microsoft Speech SDK 11 (语音识别SDK)点这里

    2.语音识别

    MKSpeech.h

    #pragma once
    #include <Windows.h>
    #include <sapi.h>
    #include <sphelper.h>
    #include "stdafx.h"
    class MKSpeech
    {
    private:
        /** 是否打开*/
        bool isOpen;
        /** 是否初始化完成*/
        bool isInit;
    private:
        // 音频
        IAudioBeam*                 m_pAudioBeam ;
        // 语音识别输入流
        ISpStream*                  m_pSpeechStream ;
        // 语音识别器
        ISpRecognizer*              m_pSpeechRecognizer ;
        // 语音识别上下文
        ISpRecoContext*             m_pSpeechContext ;
        // 语音识别语法
        ISpRecoGrammar*             m_pSpeechGrammar ;
        // 语音识别触发事件
        HANDLE                      m_hSpeechEvent ;
        // 音频处理线程
        std::thread                 m_threadAudio;
        // 语法文件
        WCHAR*                      s_GrammarFileName;
    private:
        // 初始化语音识别
        HRESULT init_speech_recognizer();
        // 音频线程
        static void AudioThread(MKSpeech* pointer);
        // 语音行为
        void speech_behavior(const SPPHRASEPROPERTY* tag);
        // 语音处理
        void speech_process();
    public:
        MKSpeech(void);
        ~MKSpeech(void);
        void open(int type);
        void close(void);
    
    private:
        /** 回掉的函数指针*/
        ErrorCallBack  _errorCallBack;
        AudioCallBack  _audioCallBack;
    public:
        /**设置回掉函数*/
        void m_setErrorActionCallBack(ErrorCallBack call);
        void m_setAudioCallBack(AudioCallBack call);
    
        void m_errorCallBack(string codeStr);
        void m_audioCallBack(string str,AudioCallStatus status);
    };
    

    MKSpeech.cpp

    #include "MKSpeech.h"
    MKSpeech::MKSpeech(void)
    {
        isOpen = false;
        isInit = false;
        s_GrammarFileName = L"Grammar.xml";
        m_pAudioBeam = nullptr;    
        m_pSpeechStream = nullptr;    
        m_pSpeechRecognizer = nullptr;   
        m_pSpeechContext = nullptr;    
        m_pSpeechGrammar = nullptr;    
        m_hSpeechEvent = nullptr;   
    }
    
    
    MKSpeech::~MKSpeech(void)
    {
    
    }
    
    HRESULT MKSpeech::init_speech_recognizer()
    {
        HRESULT hr = S_OK;
        // 创建语音输入流
        hr = CoCreateInstance(CLSID_SpStream, nullptr, CLSCTX_INPROC_SERVER, __uuidof(ISpStream), (void**)&m_pSpeechStream);
        if (!SUCCEEDED(hr))
        {
            m_errorCallBack("CoCreateInstance CLSID_SpStream failed");
            return hr;
        }
        // 创建语言识别器
        hr = CoCreateInstance(CLSID_SpInprocRecognizer, nullptr, CLSCTX_INPROC_SERVER, __uuidof(ISpRecognizer), (void**)&m_pSpeechRecognizer);
        if (!SUCCEEDED(hr))
        {
            m_errorCallBack("CoCreateInstance CLSID_SpInprocRecognizer failed");
            return hr;
        }
        //创建默认音频输入对象
        CComPtr<ISpObjectToken> ISPToken = nullptr;
        hr = SpGetDefaultTokenFromCategoryId(SPCAT_AUDIOIN,&ISPToken);
        if (!SUCCEEDED(hr))
        {
            m_errorCallBack("SpGetDefaultTokenFromCategoryId failed");
            return hr;
        }
        //设置识别引擎输入源
        hr = m_pSpeechRecognizer->SetInput(ISPToken,TRUE);
        if (!SUCCEEDED(hr))
        {
            m_errorCallBack("SetInput failed");
            return hr;
        }
        // 创建语音识别对象
        ISpObjectToken *pEngineToken = nullptr;
        hr = SpFindBestToken(SPCAT_RECOGNIZERS, L"language=804", nullptr, &pEngineToken);
        if (!SUCCEEDED(hr))
        {
            m_errorCallBack("SpFindBestToken failed");
            return hr;
        }
        // 设置待识别语言
        m_pSpeechRecognizer->SetRecognizer(pEngineToken);
        // 创建语音识别上下文
        hr = m_pSpeechRecognizer->CreateRecoContext(&m_pSpeechContext);
        if (!SUCCEEDED(hr))
        {
            m_errorCallBack("m_pSpeechRecognizer CreateRecoContext failed");
            return hr;
        }
        SafeRelease(pEngineToken);
        // 适应性 ON! 防止因长时间的处理而导致识别能力的退化
        hr = m_pSpeechRecognizer->SetPropertyNum(L"AdaptationOn", 0);
        if (!SUCCEEDED(hr))
        {
            m_errorCallBack("m_pSpeechRecognizer SetPropertyNum failed");
            return hr;
        }
        // 创建语法
        hr = m_pSpeechContext->CreateGrammar(1, &m_pSpeechGrammar);
        if (!SUCCEEDED(hr))
        {
            m_errorCallBack("m_pSpeechContext CreateGrammar failed");
            return hr;
        }
        //加载语法规则
        hr = m_pSpeechGrammar->LoadCmdFromFile(s_GrammarFileName, SPLO_DYNAMIC);
        if (!SUCCEEDED(hr)){
            hr = m_pSpeechGrammar->LoadCmdFromFile(L".\\resources\\app\\kinectLib\\Grammar.xml", SPLO_DYNAMIC);     
        }
        if (!SUCCEEDED(hr))
        {
            m_errorCallBack("m_pSpeechGrammar LoadCmdFromFile failed");
            return hr;
        }
        // 激活语法规则
        hr = m_pSpeechGrammar->SetRuleState(nullptr, nullptr, SPRS_ACTIVE);
        if (!SUCCEEDED(hr))
        {
            m_errorCallBack("m_pSpeechGrammar SetRuleState failed");
            return hr;
        }
        // 设置识别器一直读取数据
        hr = m_pSpeechRecognizer->SetRecoState(SPRST_ACTIVE);
        if (!SUCCEEDED(hr))
        {
            m_errorCallBack("m_pSpeechRecognizer SetRecoState failed");
            return hr;
        }
        // 设置对识别事件感兴趣
        hr = m_pSpeechContext->SetInterest(SPFEI(SPEI_RECOGNITION), SPFEI(SPEI_RECOGNITION));
        if (!SUCCEEDED(hr))
        {
            m_errorCallBack("m_pSpeechContext SetInterest failed");
            return hr;
        }
        // 保证语音识别处于激活状态
        m_pSpeechContext->Resume(0);
        // 获取识别事件
        m_hSpeechEvent = m_pSpeechContext->GetNotifyEventHandle();
        
        return hr;
    }
    
    // 音频线程
    void MKSpeech::AudioThread(MKSpeech* pointer){
        // 先设置
        HANDLE events[] = { pointer->m_hSpeechEvent };
        bool exit = false;
        while (!exit) {
            switch (::WaitForMultipleObjects(lengthof(events), events, FALSE, 8000)){
            case WAIT_OBJECT_0:
                // 语言识别
                pointer->speech_process();
                exit = true;
                pointer->close();
                break;
            case WAIT_TIMEOUT:
                pointer->m_audioCallBack("faild",AudioCallStatus_Faild);
                exit = true;
                pointer->close();
                break;
            }
        }
    }
    
    
    // 音频处理
    void MKSpeech::speech_process() {
        
        // 置信阈值
        const float ConfidenceThreshold = 0.3f;
    
        SPEVENT curEvent = { SPEI_UNDEFINED, SPET_LPARAM_IS_UNDEFINED, 0, 0, 0, 0 };
        ULONG fetched = 0;
        HRESULT hr = S_OK;
        // 获取事件
        m_pSpeechContext->GetEvents(1, &curEvent, &fetched);
        while (fetched > 0)
        {
            // 确定是识别事件
            switch (curEvent.eEventId)
            {
            case SPEI_RECOGNITION:
                // 保证位对象
                if (SPET_LPARAM_IS_OBJECT == curEvent.elParamType) {
                    ISpRecoResult* result = reinterpret_cast<ISpRecoResult*>(curEvent.lParam);
                    SPPHRASE* pPhrase = nullptr;
                    // 获取识别短语
                    hr = result->GetPhrase(&pPhrase);
                    if (SUCCEEDED(hr)) {
                        // DEBUG时显示识别字符串
                        WCHAR* pwszFirstWord;
                        result->GetText(SP_GETWHOLEPHRASE, SP_GETWHOLEPHRASE, TRUE, &pwszFirstWord, nullptr);
                        //_cwprintf(pwszFirstWord);
                        char * m_char;    
                        int len= WideCharToMultiByte( CP_ACP ,0,pwszFirstWord ,wcslen( pwszFirstWord ), NULL,0, NULL ,NULL);    
                        m_char = new char[len+1];     
                        WideCharToMultiByte( CP_ACP ,0,pwszFirstWord,wcslen( pwszFirstWord ),m_char,len, NULL ,NULL );     
                        m_char[len]= '\0';     
                        printf(m_char);
    
                        //string base64Str =  GbkToUtf8(m_char);
    
                        
                        ::CoTaskMemFree(pwszFirstWord);
    
                        pPhrase->pProperties;
                        const SPPHRASEELEMENT* pointer = pPhrase->pElements;
                        if ((pPhrase->pProperties != nullptr) && (pPhrase->pProperties->pFirstChild != nullptr)) {
                            const SPPHRASEPROPERTY* pSemanticTag = pPhrase->pProperties->pFirstChild;
                            
                            //CString str(lpcwStr);
                            _cwprintf(pSemanticTag->pszValue);
    
                            char * m_char2;    
                            int len2= WideCharToMultiByte( CP_ACP ,0,pSemanticTag->pszValue ,wcslen( pSemanticTag->pszValue ), NULL,0, NULL ,NULL);
                        m_char2 = new char[len2+1];     
                        WideCharToMultiByte( CP_ACP ,0,pSemanticTag->pszValue,wcslen( pSemanticTag->pszValue ),m_char2,len2, NULL ,NULL );     
                        m_char2[len2]= '\0';     
                        printf(m_char2);
    
    
                            m_audioCallBack(m_char2,AudioCallStatus_Success);
    //#ifdef _DEBUG         
                            _cwprintf(L"   置信度:%d%%\n", (int)(pSemanticTag->SREngineConfidence*100.f));
    //#endif
                            if (pSemanticTag->SREngineConfidence > ConfidenceThreshold) {
                                speech_behavior(pSemanticTag);
                            }
                        }
                        ::CoTaskMemFree(pPhrase);
                    }
                }
                break;
            }
    
            m_pSpeechContext->GetEvents(1, &curEvent, &fetched);
        }
    
        return;
    }
    
    // 语音行为
    void MKSpeech::speech_behavior(const SPPHRASEPROPERTY* tag){
        if (!tag) return;
        if (!wcscmp(tag->pszName, L"战况")){
            enum class Subject{
                US = 0,
                Enemy
            } ;
            enum class Predicate{
                Destroy = 0,
                Defeat,
                Breakdown
            };
            // 分析战况
            union  Situation{
                struct{
                    // 主语
                    Subject subject;
                    // 谓语
                    Predicate predicate;
                    // 对象
                    int object2;
                    // 宾语
                    int object;
    
                };
                UINT32 data[4];
            };
            Situation situation;
            auto obj = tag->pFirstChild;
            auto pointer = situation.data;
            // 填写数据
            while (obj) {
                *pointer = obj->vValue.lVal;
                ++pointer;
                obj = obj->pNextSibling;
            }
            // XXX
        }
        else if (!wcscmp(tag->pszName, L"发现东西")){
            // 发现东西
        }
    }
    
    void MKSpeech::m_setErrorActionCallBack(ErrorCallBack call)
    {
        _errorCallBack = call;
    }
    
    void MKSpeech::m_setAudioCallBack(AudioCallBack call)
    {
        _audioCallBack = call;
    }
    
    void MKSpeech::m_errorCallBack(string codeStr) 
    {
        if (_errorCallBack)
        {
            _errorCallBack(codeStr);
        }
    }
    
    void MKSpeech::m_audioCallBack(string str,AudioCallStatus status)
    {
        if (isOpen)
        {
            if (_audioCallBack)
            {
                _audioCallBack(str,status);
            }
        }
        
    }
    
    void MKSpeech::open(int type)
    {
        if (!isInit)
        {
            HRESULT hr = init_speech_recognizer();
            if (SUCCEEDED(hr))
            {
                isInit = true;
            }
        }
        if (isInit)
        {
            if (type == 0)
            {
                printf("%d",wcscmp(s_GrammarFileName,L"Grammar.xml"));
                if (wcscmp(s_GrammarFileName,L"Grammar.xml") != 0)
                {
                    s_GrammarFileName = L"Grammar.xml";
                    //加载语法规则
                    HRESULT hr = m_pSpeechGrammar->LoadCmdFromFile(s_GrammarFileName, SPLO_DYNAMIC);
                    if (!SUCCEEDED(hr)){
                        hr = m_pSpeechGrammar->LoadCmdFromFile(L".\\resources\\app\\kinectLib\\Grammar.xml", SPLO_DYNAMIC); 
                    }
                    if (!SUCCEEDED(hr))
                    {
                        m_errorCallBack("m_pSpeechGrammar LoadCmdFromFile failed");
                        return ;
                    }
                    // 激活语法规则
                    hr = m_pSpeechGrammar->SetRuleState(nullptr, nullptr, SPRS_ACTIVE);
                    if (!SUCCEEDED(hr))
                    {
                        m_errorCallBack("m_pSpeechGrammar SetRuleState failed");
                        return ;
                    }
                }
            }else
            {
                printf("%d",wcscmp(s_GrammarFileName,L"Grammar_en.xml"));
                if (wcscmp(s_GrammarFileName,L"Grammar_en.xml") != 0)
                {
                    s_GrammarFileName = L"Grammar_en.xml";
                    //加载语法规则
                    HRESULT hr = m_pSpeechGrammar->LoadCmdFromFile(s_GrammarFileName, SPLO_DYNAMIC);
                    if (!SUCCEEDED(hr)){
                        hr = m_pSpeechGrammar->LoadCmdFromFile(L".\\resources\\app\\kinectLib\\Grammar_en.xml", SPLO_DYNAMIC);      
                    }
                    if (!SUCCEEDED(hr))
                    {
                        m_errorCallBack("m_pSpeechGrammar LoadCmdFromFile failed");
                        return ;
                    }
                    // 激活语法规则
                    hr = m_pSpeechGrammar->SetRuleState(nullptr, nullptr, SPRS_ACTIVE);
                    if (!SUCCEEDED(hr))
                    {
                        m_errorCallBack("m_pSpeechGrammar SetRuleState failed");
                        return ;
                    }
                }
                
            }
            isOpen = true;
            m_pSpeechContext->SetContextState(SPCS_ENABLED);
            m_pSpeechContext->Resume( 0 );
            m_pSpeechRecognizer->SetRecoState(SPRST_ACTIVE) ;
            m_threadAudio.std::thread::thread(AudioThread, this);
        }
    }
    
    void MKSpeech::close(void)
    {
        // Set Audio Input Strem to Stop
        ResetEvent(m_hSpeechEvent);
        m_pSpeechContext->SetContextState(SPCS_DISABLED);
        m_pSpeechRecognizer->SetRecoState(SPRST_INACTIVE_WITH_PURGE) ;
        m_pSpeechContext->Pause( 0 );
        isOpen = false;
    }
    

    KinectAudioStreamWrapper.h

    #pragma once
    #include "stdafx.h"
    
    // Kinect 音频流简单封装
    class KinectAudioStreamWrapper : public IStream{
    public:
        // 构造函数
        KinectAudioStreamWrapper(IStream *p32BitAudioStream);
        // 析构函数
        ~KinectAudioStreamWrapper();
        // 删除默认构造
        KinectAudioStreamWrapper();
        // 这是语音状态
        void SetSpeechState(BOOL state){ m_SpeechActive = state; }
        // IUnknown 方法 实现
        STDMETHODIMP_(ULONG) AddRef() { return InterlockedIncrement(&m_cRef); }
        STDMETHODIMP_(ULONG) Release() {
            UINT ref = InterlockedDecrement(&m_cRef);
            if (ref == 0){
                delete this;
            }
            return ref;
        }
        STDMETHODIMP QueryInterface(REFIID riid, void **ppv) {
            if (riid == IID_IUnknown) {
                AddRef();
                *ppv = (IUnknown*)this;
                return S_OK;
            }
            else if (riid == IID_IStream) {
                AddRef();
                *ppv = (IStream*)this;
                return S_OK;
            }
            else {
                return E_NOINTERFACE;
            }
        }
        // IStream 方法
        STDMETHODIMP Read(void *, ULONG, ULONG *);
        STDMETHODIMP Write(const void *, ULONG, ULONG *);
        STDMETHODIMP Seek(LARGE_INTEGER, DWORD, ULARGE_INTEGER *);
        STDMETHODIMP SetSize(ULARGE_INTEGER);
        STDMETHODIMP CopyTo(IStream *, ULARGE_INTEGER, ULARGE_INTEGER *, ULARGE_INTEGER *);
        STDMETHODIMP Commit(DWORD);
        STDMETHODIMP Revert();
        STDMETHODIMP LockRegion(ULARGE_INTEGER, ULARGE_INTEGER, DWORD);
        STDMETHODIMP UnlockRegion(ULARGE_INTEGER, ULARGE_INTEGER, DWORD);
        STDMETHODIMP Stat(STATSTG *, DWORD);
        STDMETHODIMP Clone(IStream **);
    private:
        // 引用计数
        UINT                    m_cRef;
        // 浮点缓冲区
        float*                  m_pFloatBuffer;
        // 缓冲区大小
        UINT                    m_uFloatBuferSize;
        // 封装对象
        IStream*                m_p32BitAudio;
        // 语音状态 使用BOOL保证数据对齐
        BOOL                    m_SpeechActive;
    };
    
    #include "KinectAudioStreamWrapper.h"
    
    // KinectAudioStreamWrapper 构造函数
    KinectAudioStreamWrapper::KinectAudioStreamWrapper(IStream *p32BitAudio) :m_p32BitAudio(p32BitAudio){
        m_cRef = 1;
        m_pFloatBuffer = nullptr;
        m_uFloatBuferSize = 0;
        m_SpeechActive = false;
        // 增加计数
        if (m_p32BitAudio){
            m_p32BitAudio->AddRef();
        }
    }
    
    
    // 析构函数
    KinectAudioStreamWrapper::~KinectAudioStreamWrapper(){
        SafeRelease(m_p32BitAudio);
        if (m_pFloatBuffer){
            delete[] m_pFloatBuffer;
            m_pFloatBuffer = nullptr;
        }
    }
    
    
    
    
    // IStream Read方法的实现
    STDMETHODIMP KinectAudioStreamWrapper::Read(void *pBuffer, ULONG cbBuffer, ULONG *pcbRead){
        // 参数检查
        if (!pBuffer || !pcbRead) return E_INVALIDARG;
        // 在读取前未使用 m_SpeechActive 返回S_OK
        if (!m_SpeechActive){
            *pcbRead = cbBuffer;
            return S_OK;
        }
        HRESULT hr = S_OK;
        // 目标是将浮点编码转换成16位PCM编码
        INT16* const p16Buffer = reinterpret_cast<INT16*>(pBuffer);
        // 长度倍数
        const int multiple = sizeof(float) / sizeof(INT16);
        // 检查缓冲区释放足够
        auto float_buffer_size = cbBuffer / multiple;
        if (float_buffer_size > m_uFloatBuferSize){
            // 不够就重新申请内存
            m_uFloatBuferSize = float_buffer_size;
            if (m_pFloatBuffer) delete[]m_pFloatBuffer;
            m_pFloatBuffer = new float[m_uFloatBuferSize];
        }
        // 缓冲区写入进度 字节为单位
        BYTE* pWriteProgress = reinterpret_cast<BYTE*>(m_pFloatBuffer);
        // 目前读取量
        ULONG bytesRead = 0;
        // 需要读取量
        ULONG bytesNeed = cbBuffer * multiple;
        // 循环读取
        while (true){
            // 已经不需要语音的情况下
            if (!m_SpeechActive){
                *pcbRead = cbBuffer;
                hr = S_OK;
                break;
            }
            // 从包装对象获取数据
            hr = m_p32BitAudio->Read(pWriteProgress, bytesNeed, &bytesRead);
    
            //printf("读取字节数: %d", &bytesRead);
    
            bytesNeed -= bytesRead;
            pWriteProgress += bytesRead;
            // 检查是否足够
            if (!bytesNeed){
                *pcbRead = cbBuffer;
                break;
            }
            // 不然就睡一个时间片的时间
            Sleep(20);
        }
        // 数据处理 float -> 16bit PCM
        if (!bytesNeed){
            for (UINT i = 0; i < cbBuffer / multiple; i++) {
                float sample = m_pFloatBuffer[i];
                // 区间保证
                //sample = max(min(sample, 1.f), -1.f);
                if (sample > 1.f) sample = 1.f;
                if (sample < -1.f) sample = -1.f;
                // 数据转换
                float sampleScaled = sample * (float)SHRT_MAX;
                p16Buffer[i] = (sampleScaled > 0.f) ? (INT16)(sampleScaled + 0.5f) : (INT16)(sampleScaled - 0.5f);
            }
        }
        return hr;
    }
    
    // 其他不需要支持的方法实现
    
    STDMETHODIMP KinectAudioStreamWrapper::Write(const void *, ULONG, ULONG *)
    {
        return E_NOTIMPL;
    }
    
    STDMETHODIMP KinectAudioStreamWrapper::Seek(LARGE_INTEGER /* dlibMove */, DWORD /* dwOrigin */, ULARGE_INTEGER * /* plibNewPosition */)
    {
        // Seek在语音识别中是个比较关键的函数 Kinect目前不支持 但是防止失败返回S_OK
        return S_OK;
    }
    
    STDMETHODIMP KinectAudioStreamWrapper::SetSize(ULARGE_INTEGER)
    {
        return E_NOTIMPL;
    }
    
    STDMETHODIMP KinectAudioStreamWrapper::CopyTo(IStream *, ULARGE_INTEGER, ULARGE_INTEGER *, ULARGE_INTEGER *)
    {
        return E_NOTIMPL;
    }
    
    STDMETHODIMP KinectAudioStreamWrapper::Commit(DWORD)
    {
        return E_NOTIMPL;
    }
    
    STDMETHODIMP KinectAudioStreamWrapper::Revert()
    {
        return E_NOTIMPL;
    }
    
    STDMETHODIMP KinectAudioStreamWrapper::LockRegion(ULARGE_INTEGER, ULARGE_INTEGER, DWORD)
    {
        return E_NOTIMPL;
    }
    
    STDMETHODIMP KinectAudioStreamWrapper::UnlockRegion(ULARGE_INTEGER, ULARGE_INTEGER, DWORD)
    {
        return E_NOTIMPL;
    }
    
    STDMETHODIMP KinectAudioStreamWrapper::Stat(STATSTG *, DWORD)
    {
        return E_NOTIMPL;
    }
    
    STDMETHODIMP KinectAudioStreamWrapper::Clone(IStream **)
    {
        return E_NOTIMPL;
    }
    
    

    3.测试代码

    main.h

    #include "KinectApp.h"
    #include <string>
    #include "MKSpeech.h"
    
    void __stdcall onActionCallBack(BodyRect bRect)
    {
        printf("Person %d : X:%d Y:%d Z:%d State:%d \n", 0, bRect.X,bRect.Y,bRect.Z,bRect.type); 
    }
    
    void __stdcall onErrorCallBack(string codeStr)
    {
        std::cout << codeStr << std::endl;
    }
    
    void __stdcall onPanActionCallBack(PanActionType type)
    {
        printf("%c",type); 
    }
    
    
    void __stdcall onAudioCallBack(string str,AudioCallStatus status)
    {
    
        printf("%d ",status); 
        
        std::cout << str << std::endl;
    }
    
    KinectApp *app;
    MKSpeech *speech;
    
    int main()
    {
        
        if (SUCCEEDED(CoInitialize(NULL)))
        {
            /*app = new KinectApp();
            app->m_setActionCallBack(onActionCallBack);
            app->m_setErrorActionCallBack(onErrorCallBack);
            app->m_setPanActionCallBack(onPanActionCallBack);
            app->open();*/
    
            speech = new MKSpeech();
            speech->m_setErrorActionCallBack(onErrorCallBack);
            speech->m_setAudioCallBack(onAudioCallBack);
            while (true)
            {
                int a = rand()%2;
                printf("%d",a);
                speech->open(a);
                system("pause");
            }
        }
        
    }
    

    4.语音文件格式

    <grammar root="rootRule" tag-format="semantics/1.0-literals" version="1.0" xml:lang="zh-CN" xmlns="http://www.w3.org/2001/06/grammar">
      <rule id="rootRule">
        <one-of>
          <item>
            <tag>5qyn5rSy</tag>
            <one-of>
              <item>青海交通职业技术学院</item>
              <item>欧洲</item>
              <item>亚洲</item>
              <item>青海</item>
              <item>黑龙江</item>
            </one-of>
          </item>
        </one-of>
      </rule>
    </grammar>
    

    5.几项注意

        1.语音识别可识别Kinect2Sdk语音输入流,语音识别对环境要求高(静),距离要近(2米内),越静识别速度、识别准确率越高。
        2.语音识别为指令性语音识别,识别指令相似度越高,识别准确率越低。
        3.本代码开发工具为Visual Studio 与上一篇手势控制鼠标为一套代码。
        4.如有问题、建议可联系249086205@qq.com。
    

    本文参考文献:

        [https://blog.csdn.net/dustpg/column/info/k4w2dn](https://blog.csdn.net/dustpg/column/info/k4w2dn)
    

    代码地址:链接: https://pan.baidu.com/s/1aY8S2VWOBIsW-JbAqcdEow 提取码: kujw

    相关文章

      网友评论

          本文标题:Microsoft Speech 语音识别

          本文链接:https://www.haomeiwen.com/subject/jnvtbctx.html