美文网首页语音识别
基于kaldi的iOS语音识别(本地)+04+自定义解码器插件

基于kaldi的iOS语音识别(本地)+04+自定义解码器插件

作者: 长风浮云 | 来源:发表于2019-01-27 23:01 被阅读0次

    iOS在线识别:https://www.jianshu.com/u/3c2a0bd52ebc

    因为GStreamer的操作几乎都是在一个“黑盒”里面进行操作,所以它的这一套操作也有它自己的一个标准,就是一个个插件,我们要用到GStreamer,所以我们需要把解码器那部分也做成一个GStreamer插件,加入到管道(pipeline)中,这样就让GStreamer自己去处理传输和识别解码。

    接下来我们就说说该项目最难的一部分了。

    因为有些内容涉及到GStreamer自己的东西,比如制作模板要求和规范,我这里就不展开阐述了,也不是本内容的重点,这里只会讲解解码器在插件中的使用。

    插件模板

    插件名称kaldidecoder

    初始化

    /* the capabilities of the inputs and outputs.
     *
     */
    static GstStaticPadTemplate sink_template =
    GST_STATIC_PAD_TEMPLATE("sink",
        GST_PAD_SINK,
        GST_PAD_ALWAYS,
        GST_STATIC_CAPS(
            "audio/x-raw, "
            "format = (string) S16LE, "
            "channels = (int) 1, "
            "rate = (int) [ 1, MAX ]"));
    
    static GstStaticPadTemplate src_template =
    GST_STATIC_PAD_TEMPLATE("src",
        GST_PAD_SRC,
        GST_PAD_ALWAYS,
        GST_STATIC_CAPS("text/x-raw, format= { utf8 }"));
    
    static guint gst_ kaldidecoder_signals[LAST_SIGNAL];
    
    #define gst_ kaldidecoder_parent_class parent_class
    G_DEFINE_TYPE(Gst kaldidecoder, gst_ kaldidecoder,
                  GST_TYPE_ELEMENT);
    
    static void gst_ kaldidecoder_load_phone_syms(Gst kaldidecoder * filter,
                                                            const GValue * value);
    
    static void gst_kaldidecoder_load_word_syms(Gst kaldidecoder * filter,
                                                           const GValue * value);
    
    static void gst_ kaldidecoder_load_model(Gst kaldidecoder * filter,
                                                       const GValue * value);
    
    static void gst_ kaldidecoder_load_fst(Gst kaldidecoder * filter,
                                                     const GValue * value);
    
    static void gst_ kaldidecoder_load_lm_fst(Gst kaldidecoder * filter,
                                                        const GValue * value);
    
    static void gst_ kaldidecoder_load_big_lm(Gst kaldidecoder * filter,
                                                        const GValue * value);
    
    static void gst_ kaldidecoder_load_word_boundary_info(Gst kaldidecoder * filter,
                                                                    const GValue * value);
    
    
    static void gst_ kaldidecoder_set_property(GObject * object,
                                                         guint prop_id,
                                                         const GValue * value,
                                                         GParamSpec * pspec);
    
    static void gst_ kaldidecoder_get_property(GObject * object,
                                                         guint prop_id,
                                                         GValue * value,
                                                         GParamSpec * pspec);
    
    static gboolean gst_ kaldidecoder_sink_event(GstPad * pad,
                                                           GstObject * parent,
                                                           GstEvent * event);
    
    static GstFlowReturn gst_ kaldidecoder_chain(GstPad * pad,
                                                           GstObject * parent,
                                                           GstBuffer * buf);
    
    static GstStateChangeReturn gst_ kaldidecoder_change_state(
        GstElement *element, GstStateChange transition);
    
    static gboolean gst_ kaldidecoder_query(GstPad *pad, GstObject * parent, GstQuery * query);
    
    static void gst_ kaldidecoder_finalize(GObject * object);
    
    

    这些pad模板都需要通过gst_element_class_add_pad_template ()_class_init方法里面注册。
    _class_init:

    /* GObject vmethod implementations */
    
    /* initialize the kaldidecoder's class */
    static void gst_kaldidecoder_class_init(
        GstkaldidecoderClass * klass) {
      GObjectClass *gobject_class;
      GstElementClass *gstelement_class;
    
      gobject_class = (GObjectClass *) klass;
      gstelement_class = (GstElementClass *) klass;
    
      gobject_class->set_property = gst_kaldidecoder_set_property;
      gobject_class->get_property = gst_kaldidecoder_get_property;
      gobject_class->finalize = gst_kaldidecoder_finalize;
    
      gstelement_class->change_state = gst_kaldidecoder_change_state;
    
      g_object_class_install_property(
          ...
      );
      ...
    
      gst_kaldidecoder_signals[PARTIAL_RESULT_SIGNAL] = g_signal_new(
          "partial-result", G_TYPE_FROM_CLASS(klass), G_SIGNAL_RUN_LAST,
          G_STRUCT_OFFSET(GstkaldidecoderClass, partial_result),
          NULL,
          NULL, kaldi_marshal_VOID__STRING, G_TYPE_NONE, 1,
          G_TYPE_STRING);
    
      gst_kaldidecoder_signals[FINAL_RESULT_SIGNAL] = g_signal_new(
          "final-result", G_TYPE_FROM_CLASS(klass), G_SIGNAL_RUN_LAST,
          G_STRUCT_OFFSET(GstkaldidecoderClass, final_result),
          NULL,
          NULL, kaldi_marshal_VOID__STRING, G_TYPE_NONE, 1,
          G_TYPE_STRING);
    
      gst_kaldidecoder_signals[FULL_FINAL_RESULT_SIGNAL] = g_signal_new(
          "full-final-result", G_TYPE_FROM_CLASS(klass), G_SIGNAL_RUN_LAST,
          G_STRUCT_OFFSET(GstkaldidecoderClass, full_final_result),
          NULL,
          NULL, kaldi_marshal_VOID__STRING, G_TYPE_NONE, 1,
          G_TYPE_STRING);
    
      gst_element_class_set_details_simple(
          gstelement_class, "KaldiDecoder", "Speech/Audio",
          "Convert speech to text", "changfengfuyun");
    
      gst_element_class_add_pad_template(gstelement_class,
                                         gst_static_pad_template_get(&src_template));
    
      gst_element_class_add_pad_template(
          gstelement_class, gst_static_pad_template_get(&sink_template));
    }
    

    我们定义插件所有部分的代码,我们需要有_init()方法。

    /* entry point to initialize the plug-in
     * initialize the plug-in itself
     * register the element factories and other features
     */
    static gboolean kaldidecoder_init(
        GstPlugin * kaldidecoder) {
      /* debug category for fltering log messages
       *
       * exchange the string 'Template kaldidecoder' with your description
       */
      GST_DEBUG_CATEGORY_INIT(gst_kaldidecoder_debug,
                              "kaldidecoder", 0,
                              "Template kaldidecoder");
    
      return gst_element_register(kaldidecoder
                                  "kaldidecoder", GST_RANK_NONE,
                                  GST_TYPE_KALDIDECODER);
    }
    

    至此,就完成了解码器模板的初始化

    指定pads

    pads是数据进出元素的端口,这使得它们在元素创建过程中成为非常重要的项。在模板代码中,我们已经看到了静态pad模板如何负责将pad模板注册到元素类中。在这里,我们将看到如何创建实际的元素,如何使用_event()-函数来配置特定的格式,以及如何注册函数来让数据流经元素。

    创建pad:

    /* initialize the new element
     * instantiate pads and add them to element
     * set pad calback functions
     * initialize instance structure
     */
    static void gst_kaldidecoder_init(
        Gstkaldidecoder * filter) {
      ...
      filter->sinkpad = NULL;
    
      filter->sinkpad = gst_pad_new_from_static_template(&sink_template, "sink");
      gst_pad_set_event_function(
          filter->sinkpad,
          GST_DEBUG_FUNCPTR(gst_kaldidecoder_sink_event));
      gst_pad_set_chain_function(
          filter->sinkpad, GST_DEBUG_FUNCPTR(gst_kaldidecoder_chain));
      gst_pad_set_query_function(
          filter->sinkpad, GST_DEBUG_FUNCPTR(gst_kaldidecoder_query));
      gst_pad_use_fixed_caps(filter->sinkpad);
      gst_element_add_pad(GST_ELEMENT(filter), filter->sinkpad);
    
      filter->srcpad = gst_pad_new_from_static_template(&src_template, "src");
      gst_pad_use_fixed_caps(filter->srcpad);
      gst_element_add_pad(GST_ELEMENT(filter), filter->srcpad);
    
      // 解码相关的初始化
      ...
    }
    

    这里也是每次生成元件的时候都会调用的方法

    _chain方法:

    • 用于接收和处理sinkpad上的输入数据。
    /* chain function
     * this function does the actual processing
     */
    static GstFlowReturn gst_kaldidecoder_chain(GstPad * pad,
                                                           GstObject * parent,
                                                           GstBuffer * buf) {
      Gstkaldidecoder *filter = GST_KALDIDECODER(parent);
    
      if (G_UNLIKELY(!filter->audio_source))
        goto not_negotiated;
      if (!filter->silent) {
        filter->audio_source->PushBuffer(buf);
      }
      gst_buffer_unref(buf);
      return GST_FLOW_OK;
    
      /* special cases */
      not_negotiated: {
        GST_ELEMENT_ERROR(filter, CORE, NEGOTIATION, (NULL),
                          ("decoder wasn't allocated before chain function"));
    
        gst_buffer_unref(buf);
        return GST_FLOW_NOT_NEGOTIATED;
      }
    }
    

    _event方法:

    • 该方法通知你在传输数据流中发生的特殊事件(如caps, end-of-stream, newsegment, tags等)。
    /* this function handles sink events */
    static gboolean gst_kaldidecoder_sink_event(GstPad * pad,
                                                           GstObject * parent,
                                                           GstEvent * event) {
      gboolean ret;
      Gstkaldidecoder *filter;
    
      filter = GST_KALDIDECODER(parent);
    
      GST_DEBUG_OBJECT(filter, "Handling %s event", GST_EVENT_TYPE_NAME(event));
    
      switch (GST_EVENT_TYPE(event)) {
        case GST_EVENT_SEGMENT: {
          GST_DEBUG_OBJECT(filter, "Starting decoding task");
          filter->decoding = true;
          gst_pad_start_task(filter->srcpad,
                             (GstTaskFunction) gst_ kaldidecoder_loop,
                             filter, NULL);
    
          GST_DEBUG_OBJECT(filter, "Started decoding task");
          ret = TRUE;
          break;
        }
        case GST_EVENT_CAPS: {
          ret = TRUE;
          break;
        }
        case GST_EVENT_EOS: {
          /* end-of-stream, we should close down all stream leftovers here */
          GST_DEBUG_OBJECT(filter, "EOS received");
          if (filter->decoding) {
            filter->audio_source->SetEnded(true);
          } else {
            GST_DEBUG_OBJECT(filter, "EOS received while not decoding, pushing EOS out");
            gst_pad_push_event(filter->srcpad, gst_event_new_eos());
          }
          ret = TRUE;
          break;
        }
        default:
          ret = gst_pad_event_default(pad, parent, event);
          break;
      }
      return ret;
    }
    

    _query方法:

    • 元件接收queries必须回复的内容
    /* GstElement vmethod implementations */
    
    static gboolean
    gst_kaldidecoder_query (GstPad *pad, GstObject * parent, GstQuery * query) {
      gboolean ret;
      Gstkaldidecoder *filter;
    
      filter = GST_KALDIDECODER(parent);
    
      switch (GST_QUERY_TYPE (query)) {
        case GST_QUERY_CAPS: {
          if (filter->feature_info == NULL) {
            filter->feature_info = new OnlineNnet2FeaturePipelineInfo(*(filter->feature_config));
        if (strcmp((filter->feature_config->feature_type).c_str(), "plp") == 0)
          filter->sample_rate = (int) filter->feature_info->plp_opts.frame_opts.samp_freq;
        else
          filter->sample_rate = (int) filter->feature_info->mfcc_opts.frame_opts.samp_freq;
          }
          GstCaps *new_caps = gst_caps_new_simple ("audio/x-raw",
                "format", G_TYPE_STRING, "S16LE",
                "rate", G_TYPE_INT, filter->sample_rate,
                "channels", G_TYPE_INT, 1, NULL);
          GST_DEBUG_OBJECT (filter, "Setting caps query result: %" GST_PTR_FORMAT, new_caps);
          gst_query_set_caps_result (query, new_caps);
          gst_caps_unref (new_caps);
          ret = TRUE;
          break;
        }
        default:
          ret = gst_pad_query_default (pad, parent, query);
          break;
      }
      return ret;
    }
    

    接下来讲解kaldi的解码。

    相关文章

      网友评论

        本文标题:基于kaldi的iOS语音识别(本地)+04+自定义解码器插件

        本文链接:https://www.haomeiwen.com/subject/qiugjqtx.html