美文网首页实用轮子iOS Developer
使用Speech Framework实现语音转文字

使用Speech Framework实现语音转文字

作者: 丨n水瓶座菜虫灬 | 来源:发表于2017-03-16 16:16 被阅读218次

    这篇日志记录自己在学习使用苹果原生框架语音识别库Speech Framework时的总结和示例代码。看了一遍官方文档,把该框架中的相关类和方法了解了一遍,然后总结了一张XMind结构图。

    前提:需要Xcode 8 以上和一个运行iOS10以上系统的iOS设备.

    Speech Framework中的类和方法概念

    Paste_Image.png

    Note: 因为涉及到权限问题,需要在info.plist文件中添加两个key。分别是Privacy - Microphone Usage Description(麦克风权限)和 Privacy - Speech Recognition Usage Description(语音识别权限)

    Swift代码

    import UIKit
    import Speech
    
    class ViewController: UIViewController {
    
      @IBOutlet weak var textView: UITextView!
      @IBOutlet weak var microphoneButton: UIButton!
    
      /// 语音识别操作类对象
      private let speechRecognizer = SFSpeechRecognizer()
    
      /// 处理语音识别请求,给语音识别提供语音输入
      private var recognitionRequest: SFSpeechAudioBufferRecognitionRequest?
    
      /// 告诉用户语音识别对象的结果。拥有这个对象很方便因为你可以 用它删除或中断任务
      private var recognitionTask: SFSpeechRecognitionTask?
    
      /// 语音引擎。负责提供语音输入
      private let audioEngine = AVAudioEngine()
    
      override func viewDidLoad() {
          super.viewDidLoad()
          // Do any additional setup after loading the view, typically from a nib.
          microphoneButton.isEnabled = false
        
          speechRecognizer?.delegate = self
    
          /// 申请用户语音识别权限
          SFSpeechRecognizer.requestAuthorization { (authStatus) in
    
              var isButtonEnabled = false
    
              switch authStatus {
              case .authorized: // 用户授权语音识别
                  isButtonEnabled = true
    
              case .denied: // 用户拒绝授权语音识别
                  isButtonEnabled = false
                  print("User denied access to speech recognition")
    
              case .restricted: // 设备不支持语音识别功能
                  isButtonEnabled = false
                  print("Speech recognition restricted on this device")
    
              case .notDetermined: // 结果未知 用户尚未进行选择
                  isButtonEnabled = false
                  print("Speech recognition not yet authorized")
              }
    
              OperationQueue.main.addOperation {
                  self.microphoneButton.isEnabled = isButtonEnabled
              }
          }
      }
    
      @IBAction func microphoneButtonClick(_ sender: UIButton) {
          if audioEngine.isRunning {
              audioEngine.stop()
              recognitionRequest?.endAudio()
              microphoneButton.isEnabled = false
              microphoneButton.setTitle("Start Recording", for: .normal)
          } else {
              startRecording()
              microphoneButton.setTitle("Stop Recording", for: .normal)
          }
      }
    
      func startRecording() {
          if recognitionTask != nil { /// 检查recognitionTask是否在运行,如果在就取消任务和识别
              recognitionTask?.cancel()
              recognitionTask = nil
          }
    
          let audioSession = AVAudioSession.sharedInstance() /// 记录语音做准备
          do {
              try audioSession.setCategory(AVAudioSessionCategoryRecord)
              try audioSession.setMode(AVAudioSessionModeMeasurement)
              try audioSession.setActive(true, with: .notifyOthersOnDeactivation)
          } catch {
              print("audioSession properties weren't set because of an error.")
          }
    
          /// 实例化recognitionRequest 利用它把语音数据传到苹果后台
          recognitionRequest = SFSpeechAudioBufferRecognitionRequest()
    
          /// 检查audioEngine(你的设备)是否有做录音功能作为语音输入
          guard let inputNode = audioEngine.inputNode else {
              fatalError("Audio engine has no input node")
          }
    
          /// 检查recognitionRequest对象是否被实例化或不是nil
          guard let recognitionRequest = recognitionRequest else {
              fatalError("Unable to create an SFSpeechAudioBufferRecongitionRequest object")
        }
    
          /// 当用户说话的时候让recognitionRequest报告语音识别的部分结果
          recognitionRequest.shouldReportPartialResults = true
    
          /// 开启语音识别, 回调每次都会在识别引擎收到输入的时候,完善了当前识别的信息时候,或者被删除或者停止的时候被调用,最后会返回一个最终的文本
          recognitionTask = speechRecognizer?.recognitionTask(with: recognitionRequest, resultHandler: { (result, error) in
              var isFinal = false // 定义一个布尔值决定识别是否已经结束
    
              /// 如果结果result不是nil,把textView.text的值设置为我们的最优文本。如果结果是最终结果,设置isFinal为true
              if result != nil {
                  self.textView.text = result?.bestTranscription.formattedString
                  isFinal = (result?.isFinal)!
              }
    
              /// 如果没有错误或者结果是最终结果,停止audioEngine(语音输入)并且停止recognitionRequest和recognitionTask
              if error != nil || isFinal {
                  self.audioEngine.stop()
                  inputNode.removeTap(onBus: 0)
    
                  self.recognitionRequest = nil
                  self.recognitionTask = nil
    
                  self.microphoneButton.isEnabled = true
              }
          })
    
          /// 向recognitionRequest增加一个语音输入。注意在开始了recognitionTask之后增加语音输入是OK的。SpeechFramework会在语音输入被加入的同时就开始进行解析识别
          let recordingFormat = inputNode.outputFormat(forBus: 0)
          inputNode.installTap(onBus: 0, bufferSize: 1024, format: recordingFormat) { (buffer, when) in
              self.recognitionRequest?.append(buffer)
          }
    
          /// 准备并且开始audioEngine
          audioEngine.prepare()
    
          do {
              try audioEngine.start()
          } catch {
              print("audioEngine couldn't start because of an error")
         }
    
          textView.text = "Say something, I'm listening!"
      }
    }
    
    extension ViewController: SFSpeechRecognizerDelegate {
      /// 可用性状态改变时被调用
      func speechRecognizer(_ speechRecognizer: SFSpeechRecognizer, availabilityDidChange available: Bool) {
          if available {
              microphoneButton.isEnabled = true
          } else {
              microphoneButton.isEnabled = false
          }
      }
    }  
    

    Objective-C 代码

    #import <Speech/Speech.h>
    
    @interface ViewController ()<SFSpeechRecognizerDelegate>
    
    @property (nonatomic, strong) SFSpeechRecognizer *speechRecognizer;
    
    @property (nonatomic, strong) SFSpeechRecognitionTask *recognitionTask;
    
    @property (nonatomic, strong) SFSpeechAudioBufferRecognitionRequest *recognitionRequest;
    
    /// 音频引擎
    @property (nonatomic, strong) AVAudioEngine *audioEngine;
    
    @property (weak, nonatomic) IBOutlet UITextView *textView;
    @property (weak, nonatomic) IBOutlet UIButton *microphoneBtn;
    
    @end
    
    @implementation ViewController
    
    - (void)dealloc {
        [self.recognitionTask cancel];
        self.recognitionTask = nil;
    }
    
    - (void)viewDidLoad {
        [super viewDidLoad];
        // Do any additional setup after loading the view, typically from a nib.
        self.view.backgroundColor = [UIColor whiteColor];
    
        NSLog(@"supportedLocales: %@", [SFSpeechRecognizer supportedLocales]);
    
        self.microphoneBtn.enabled = NO;
    
        /// 创建语音识别器对象并设置代理
        self.speechRecognizer = [[SFSpeechRecognizer alloc] init];
    
        self.speechRecognizer.delegate = self;
    
        /// 请求用户授权
        [SFSpeechRecognizer requestAuthorization:^(SFSpeechRecognizerAuthorizationStatus status) {
    
        BOOL isButtonEnabled = NO;
    
        switch (status) {
            case SFSpeechRecognizerAuthorizationStatusNotDetermined:
                isButtonEnabled = NO;
                NSLog(@"SFSpeechRecognizerAuthorizationStatusNotDetermined");
                break;
            case SFSpeechRecognizerAuthorizationStatusDenied:
                isButtonEnabled = NO;
                NSLog(@"SFSpeechRecognizerAuthorizationStatusDenied");
                break;
            case SFSpeechRecognizerAuthorizationStatusRestricted:
                isButtonEnabled = NO;
                NSLog(@"SFSpeechRecognizerAuthorizationStatusRestricted");
                break;
            case SFSpeechRecognizerAuthorizationStatusAuthorized:
                NSLog(@"SFSpeechRecognizerAuthorizationStatusAuthorized");
                isButtonEnabled = YES;
                break;
            default:
                break;
        }
    
        dispatch_async(dispatch_get_main_queue(), ^{
            self.microphoneBtn.enabled = isButtonEnabled;
        });
    }];
    
    
        /// 创建音频引擎对象
        self.audioEngine = [[AVAudioEngine alloc] init];
    }
    
    - (IBAction)microphoneBtnClick:(UIButton *)sender {
        if (self.audioEngine.isRunning) {
            [self.audioEngine stop];
            [self.recognitionRequest endAudio];
            self.microphoneBtn.enabled = NO;
            [self.microphoneBtn setTitle:@"Start Recording" forState:UIControlStateNormal];
        } else {
            [self startRecording];
            [self.microphoneBtn setTitle:@"Stop Recording" forState:UIControlStateNormal];
        }
    }
    
    #pragma mark - private method
    - (void)startRecording {
        if (self.recognitionTask != nil) {
            [self.recognitionTask cancel]; // 取消当前语音识别任务
            self.recognitionTask = nil;
        }
    
        AVAudioSession *audioSession = [AVAudioSession sharedInstance];
        NSError *categoryError = nil;
        if (![audioSession setCategory:AVAudioSessionCategoryRecord error:&categoryError]) {
            NSLog(@"categoryError: %@", categoryError.localizedDescription);
        }
    
        NSError *modeError = nil;
        if (![audioSession setMode:AVAudioSessionModeMeasurement error:&modeError]) {
            NSLog(@"modeError: %@", modeError.localizedDescription);
        }
    
        NSError *activeError = nil;
        if (![audioSession setActive:YES withOptions:AVAudioSessionSetActiveOptionNotifyOthersOnDeactivation error:&activeError]) {
            NSLog(@"activeError: %@", activeError.localizedDescription);
        }
    
        /// 实例化 通过设备麦克风识别现场语音的请求 对象
        self.recognitionRequest = [[SFSpeechAudioBufferRecognitionRequest alloc] init];
    
        if (!self.audioEngine.inputNode) {// 系统输入节点
            NSLog(@"Audio engine has no input node");
            return;
        }
    
        if (!self.recognitionRequest) {
            NSLog(@"Unable to create an SFSpeechAudioBufferRecongitionRequest object");
            return;
        }
    
        /// 报告每个发音的部分非精确结果
        self.recognitionRequest.shouldReportPartialResults = YES;
    
        /// 执行语音识别任务 完成回调
        self.recognitionTask = [self.speechRecognizer recognitionTaskWithRequest:self.recognitionRequest resultHandler:^(SFSpeechRecognitionResult * _Nullable result, NSError * _Nullable error) {
    
        BOOL isFinal = NO;
    
        if (result) {
            self.textView.text = result.bestTranscription.formattedString;
            isFinal = result.isFinal;
        }
    
        if (error || isFinal) {
            [self.audioEngine stop];
            [self.audioEngine.inputNode removeTapOnBus:0];
    
            self.recognitionRequest = nil;
            self.recognitionTask = nil;
    
            self.microphoneBtn.enabled = YES;
        }
    }];
    
        AVAudioFormat *recordingFormat = [self.audioEngine.inputNode outputFormatForBus:0];
    
        [self.audioEngine.inputNode installTapOnBus:0 bufferSize:1024 format:recordingFormat block:^(AVAudioPCMBuffer * _Nonnull buffer, AVAudioTime * _Nonnull when) {
            /// 将PCM格式的音频追加到识别请求的结尾
            [self.recognitionRequest appendAudioPCMBuffer:buffer];
        }];
    
        [self.audioEngine prepare];
    
        NSError *startError = nil;
        if(![self.audioEngine startAndReturnError:&startError]) {
            NSLog(@"startError: %@", startError.localizedDescription);
        }
    
        self.textView.text = @"Say something, I'm listening";
    }
    
    #pragma mark - SFSpeechRecognizerDelegate
    - (void)speechRecognizer:(SFSpeechRecognizer *)speechRecognizer availabilityDidChange:(BOOL)available {
        if (available) {
            self.microphoneBtn.enabled = YES;
        } else {
            self.microphoneBtn.enabled = NO;
        }
    }
    

    参考链接:
    Building a Speech-to-Text App Using Speech Framework in iOS 10
    SpeakToMe: Using Speech Recognition with AVAudioEngine

    相关文章

      网友评论

      本文标题:使用Speech Framework实现语音转文字

      本文链接:https://www.haomeiwen.com/subject/jdrjnttx.html