美文网首页
cvte在线解码器源码修改(干货)

cvte在线解码器源码修改(干货)

作者: 诸葛村姑 | 来源:发表于2017-12-23 09:14 被阅读0次
    //我改的部分都用zhangfeifan进行注释了,想只看差别的可搜索此关键字进行这部分的查看
    //思路
    //从OnlineNnet2FeaturePipelineConfig->OnlineNnet2FeaturePipelineInfo
    //->OnlineNnet2FeaturePipeline
    //主要修改两处,一是看读入的config文件中有没有cmvn处理;二是在构造函数中,判断若有cmvn配置,则进行特征提取
    // online2/online-nnet2-feature-pipeline.cc
    // Copyright 2013-2014   Johns Hopkins University (author: Daniel Povey)
    #include "online2/online-nnet2-feature-pipeline.h"
    #include "transform/cmvn.h"
    
    namespace kaldi {
    
    OnlineNnet2FeaturePipelineInfo::OnlineNnet2FeaturePipelineInfo(
        const OnlineNnet2FeaturePipelineConfig &config):
        silence_weighting_config(config.silence_weighting_config) {
      if (config.feature_type == "mfcc" || config.feature_type == "plp" ||
          config.feature_type == "fbank") {
        feature_type = config.feature_type;
      } else {
        KALDI_ERR << "Invalid feature type: " << config.feature_type << ". "
                  << "Supported feature types: mfcc, plp.";
      }
    
      if (config.mfcc_config != "") {
        ReadConfigFromFile(config.mfcc_config, &mfcc_opts);
        if (feature_type != "mfcc")
          KALDI_WARN << "--mfcc-config option has no effect "
                     << "since feature type is set to " << feature_type << ".";
      }  // else use the defaults.
    
      if (config.plp_config != "") {
        ReadConfigFromFile(config.plp_config, &plp_opts);
        if (feature_type != "plp")
          KALDI_WARN << "--plp-config option has no effect "
                     << "since feature type is set to " << feature_type << ".";
      }  // else use the defaults.
    
      if (config.fbank_config != "") {
        ReadConfigFromFile(config.fbank_config, &fbank_opts);
        if (feature_type != "fbank")
          KALDI_WARN << "--fbank-config option has no effect "
                     << "since feature type is set to " << feature_type << ".";
      }  // else use the defaults.
    
      add_pitch = config.add_pitch;
    
      if (config.online_pitch_config != "") {
        ReadConfigsFromFile(config.online_pitch_config,
                            &pitch_opts,
                            &pitch_process_opts);
        if (!add_pitch)
          KALDI_WARN << "--online-pitch-config option has no effect "
                     << "since you did not supply --add-pitch option.";
      }  // else use the defaults.
    
      //zhangfeifan start
      //判断是否有cmvn的config文件
       if (config.cmvn_config != "") {
        ReadConfigFromFile(config.cmvn_config, &cmvn_opts);
          global_cmvn_stats_rxfilename = config.global_cmvn_stats_rxfilename;
        if (global_cmvn_stats_rxfilename == "")
        KALDI_ERR << "--global-cmvn-stats option is required.";
      }  // else use the defaults.
    
      //zhangfeifan end
    
      if (config.ivector_extraction_config != "") {
        use_ivectors = true;
        OnlineIvectorExtractionConfig ivector_extraction_opts;
        ReadConfigFromFile(config.ivector_extraction_config,
                           &ivector_extraction_opts);
        ivector_extractor_info.Init(ivector_extraction_opts);
      } else {
        use_ivectors = false;
      }
    }
    //构造函数同-->Online-feature的init()
    OnlineNnet2FeaturePipeline::OnlineNnet2FeaturePipeline(
        const OnlineNnet2FeaturePipelineInfo &info):
        info_(info) {
    //zhangfeifan start
      if(info_.global_cmvn_stats_rxfilename!="")
          ReadKaldiObject(info_.global_cmvn_stats_rxfilename,&global_cmvn_stats_);
    //zhangfeifan end
      if (info_.feature_type == "mfcc") {
        base_feature_ = new OnlineMfcc(info_.mfcc_opts);
      } else if (info_.feature_type == "plp") {
        base_feature_ = new OnlinePlp(info_.plp_opts);
      } else if (info_.feature_type == "fbank") {
        base_feature_ = new OnlineFbank(info_.fbank_opts);
      } else {
        KALDI_ERR << "Code error: invalid feature type " << info_.feature_type;
      }
    
      //zhangfeifan start
      {
          if(global_cmvn_stats_.NumRows() != 0){
          if (info_.add_pitch){
              int32 global_dim = global_cmvn_stats_.NumCols() - 1;
              int32 dim = base_feature_->Dim();
              KALDI_ASSERT(global_dim >= dim);
              if (global_dim > dim){
                  Matrix<BaseFloat> last_col(global_cmvn_stats_.ColRange(global_dim, 1));
                  global_cmvn_stats_.Resize(global_cmvn_stats_.NumRows(), dim + 1,
                                      kCopyData);
                  global_cmvn_stats_.ColRange(dim, 1).CopyFromMat(last_col);
              }
          }
          Matrix<double> global_cmvn_stats_dbl(global_cmvn_stats_);
          OnlineCmvnState initial_state(global_cmvn_stats_dbl);
          cmvn_ = new OnlineCmvn(info_.cmvn_opts, initial_state, base_feature_);//构造函数会加上该特征
            }
      }
    
      //zhngfeifan end
    
      if (info_.add_pitch) {
        pitch_ = new OnlinePitchFeature(info_.pitch_opts);
        pitch_feature_ = new OnlineProcessPitch(info_.pitch_process_opts,
                                                pitch_);
        if(global_cmvn_stats_.NumRows() != 0)
        {
                feature_plus_optional_pitch_ = new OnlineAppendFeature(cmvn_,
                                                               pitch_feature_);//zhangfeifan
        }
        else
        {
            feature_plus_optional_pitch_ = new OnlineAppendFeature(base_feature_,
                                                               pitch_feature_);//zhangfeifan
        }
        
      } else {
        pitch_ = NULL;
        pitch_feature_ = NULL;
        if(global_cmvn_stats_.NumRows() != 0)
                feature_plus_optional_pitch_ = cmvn_;//zhangfeian
        else
            feature_plus_optional_pitch_ = base_feature_;
      }
    
      if (info_.use_ivectors) {
        ivector_feature_ = new OnlineIvectorFeature(info_.ivector_extractor_info,
                                                    base_feature_);
        final_feature_ = new OnlineAppendFeature(feature_plus_optional_pitch_,
                                                 ivector_feature_);
      } else {
        ivector_feature_ = NULL;
        final_feature_ = feature_plus_optional_pitch_;
      }
      dim_ = final_feature_->Dim();
    }
    
    int32 OnlineNnet2FeaturePipeline::Dim() const { return dim_; }
    
    bool OnlineNnet2FeaturePipeline::IsLastFrame(int32 frame) const {
      return final_feature_->IsLastFrame(frame);
    }
    
    int32 OnlineNnet2FeaturePipeline::NumFramesReady() const {
      return final_feature_->NumFramesReady();
    }
    
    void OnlineNnet2FeaturePipeline::GetFrame(int32 frame,
                                              VectorBase<BaseFloat> *feat) {
      return final_feature_->GetFrame(frame, feat);
    }
    
    //SetAdaptationState是ivector的自适应,应用cmvn的
    void OnlineNnet2FeaturePipeline::SetAdaptationState(
        const OnlineIvectorExtractorAdaptationState &adaptation_state) {
      if (info_.use_ivectors) {
        ivector_feature_->SetAdaptationState(adaptation_state);
      }
      // else silently do nothing, as there is nothing to do.
    }
    
    void OnlineNnet2FeaturePipeline::GetAdaptationState(
        OnlineIvectorExtractorAdaptationState *adaptation_state) const {
      if (info_.use_ivectors) {
        ivector_feature_->GetAdaptationState(adaptation_state);
      }
      // else silently do nothing, as there is nothing to do.
    }
    //zhangfeifan start
    void OnlineNnet2FeaturePipeline::SetCmvnState(const OnlineCmvnState &cmvn_state) {
      cmvn_->SetState(cmvn_state);
    }
    
    void OnlineNnet2FeaturePipeline::GetCmvnState(OnlineCmvnState *cmvn_state) {
      int32 frame = cmvn_->NumFramesReady() - 1;
      // the following call will crash if no frames are ready.
      cmvn_->GetState(frame, cmvn_state);
    }
    void OnlineNnet2FeaturePipeline::FreezeCmvn() {
      cmvn_->Freeze(cmvn_->NumFramesReady() - 1);
    }
    
    //zhangfeifan end
    //析构函数
    OnlineNnet2FeaturePipeline::~OnlineNnet2FeaturePipeline() {
      // Note: the delete command only deletes pointers that are non-NULL.  Not all
      // of the pointers below will be non-NULL.
      // Some of the online-feature pointers are just copies of other pointers,
      // and we do have to avoid deleting them in those cases.
      if (final_feature_ != feature_plus_optional_pitch_)
        delete final_feature_;
      delete ivector_feature_;
      if (feature_plus_optional_pitch_ != base_feature_)
        delete feature_plus_optional_pitch_;
      delete pitch_feature_;
      delete pitch_;
      delete cmvn_;//zhangfeifan,没有判断是否有pitch,有必要吗?
      delete base_feature_;
    }
    
    void OnlineNnet2FeaturePipeline::AcceptWaveform(
        BaseFloat sampling_rate,
        const VectorBase<BaseFloat> &waveform) {
      base_feature_->AcceptWaveform(sampling_rate, waveform);
      if (pitch_)
        pitch_->AcceptWaveform(sampling_rate, waveform);
    }
    
    void OnlineNnet2FeaturePipeline::InputFinished() {
      base_feature_->InputFinished();
      if (pitch_)
        pitch_->InputFinished();
    }
    
    BaseFloat OnlineNnet2FeaturePipelineInfo::FrameShiftInSeconds() const {
      if (feature_type == "mfcc") {
        return mfcc_opts.frame_opts.frame_shift_ms / 1000.0f;
      } else if (feature_type == "fbank") {
        return fbank_opts.frame_opts.frame_shift_ms / 1000.0f;
      } else if (feature_type == "plp") {
        return plp_opts.frame_opts.frame_shift_ms / 1000.0f;
      } else {
        KALDI_ERR << "Unknown feature type " << feature_type;
        return 0.0;
      }
    }
    
    
    }  // namespace kaldi
    
    // online2/online-nnet2-feature-pipeline.h
    
    // Copyright 2013-2014   Johns Hopkins University (author: Daniel Povey)
    
    #ifndef KALDI_ONLINE2_ONLINE_NNET2_FEATURE_PIPELINE_H_
    #define KALDI_ONLINE2_ONLINE_NNET2_FEATURE_PIPELINE_H_
    
    #include <string>
    #include <vector>
    #include <deque>
    
    #include "matrix/matrix-lib.h"
    #include "util/common-utils.h"
    #include "base/kaldi-error.h"
    #include "feat/online-feature.h"
    #include "feat/pitch-functions.h"
    #include "online2/online-ivector-feature.h"
    
    namespace kaldi {
    /// @addtogroup  onlinefeat OnlineFeatureExtraction
    /// @{
    
    /// @file
    /// This file contains a different version of the feature-extraction pipeline in
    /// \ref online-feature-pipeline.h, specialized for use in neural network
    /// decoding with iVectors.  Our recipe is that we extract iVectors that will
    /// be used as an additional input to the neural network, in addition to
    /// a window of several frames of spliced raw features (MFCC, PLP or filterbanks).
    /// The iVectors are extracted on top of a (splice+LDA+MLLT) feature pipeline,
    /// with the added complication that the GMM posteriors used for the iVector
    /// extraction are obtained with a version of the features that has online
    /// cepstral mean (and optionally variance) normalization, whereas the stats for
    /// iVector are accumulated with a non-mean-normalized version of the features.
    /// The idea here is that we want the iVector to learn the mean offset, but
    /// we want the posteriors to be somewhat invariant to mean offsets.
    ///
    /// Most of the logic for the actual iVector estimation is in \ref
    /// online-ivector-feature.h, this header contains mostly glue.
    ///
    /// Although the name of this header mentions nnet2, actually the code is
    /// used in the online decoding with nnet3 also.
    
    
    /// This configuration class is to set up OnlineNnet2FeaturePipelineInfo, which
    /// in turn is the configuration class for OnlineNnet2FeaturePipeline.
    /// Instead of taking the options for the parts of the feature pipeline
    /// directly, it reads in the names of configuration classes.
    struct OnlineNnet2FeaturePipelineConfig {
      std::string feature_type;  // "plp" or "mfcc" or "fbank"
      std::string mfcc_config;
      std::string plp_config;
      std::string fbank_config;
    
      // Note: if we do add pitch, it will not be added to the features we give to
      // the iVector extractor but only to the features we give to the neural
      // network, after the base features but before the iVector.  We don't think
      // the iVector will be particularly helpful in normalizing the pitch features,
      // and we wanted to avoid complications with things like online CMVN.
      bool add_pitch;
    
      // the following contains the type of options that you could give to
      // compute-and-process-kaldi-pitch-feats.
      std::string online_pitch_config;
    
      //zhangfeifan start
      std::string cmvn_config;
      std::string global_cmvn_stats_rxfilename;
      //zhangfeifan end
    
      // The configuration variables in ivector_extraction_config relate to the
      // iVector extractor and options related to it, see type
      // OnlineIvectorExtractionConfig.
      std::string ivector_extraction_config;
    
      // Config that relates to how we weight silence for (ivector) adaptation
      // this is registered directly to the command line as you might want to
      // play with it in test time.
      OnlineSilenceWeightingConfig silence_weighting_config;
    
      OnlineNnet2FeaturePipelineConfig():
          feature_type("mfcc"), add_pitch(false) { }
    
    
      void Register(OptionsItf *opts) {
        opts->Register("feature-type", &feature_type,
                       "Base feature type [mfcc, plp, fbank]");
        opts->Register("mfcc-config", &mfcc_config, "Configuration file for "
                       "MFCC features (e.g. conf/mfcc.conf)");
        opts->Register("plp-config", &plp_config, "Configuration file for "
                       "PLP features (e.g. conf/plp.conf)");
        opts->Register("fbank-config", &fbank_config, "Configuration file for "
                       "filterbank features (e.g. conf/fbank.conf)");
        opts->Register("add-pitch", &add_pitch, "Append pitch features to raw "
                       "MFCC/PLP/filterbank features [but not for iVector extraction]");
        opts->Register("online-pitch-config", &online_pitch_config, "Configuration "
                       "file for online pitch features, if --add-pitch=true (e.g. "
                       "conf/online_pitch.conf)");
    
        //zhangfeifan start
        opts->Register("cmvn-config", &cmvn_config, "Configuration class "
                       "file for online CMVN features (e.g. conf/online_cmvn.conf)");
        opts->Register("global-cmvn-stats", &global_cmvn_stats_rxfilename,
                       "(Extended) filename for global CMVN stats, e.g. obtained "
                       "from 'matrix-sum scp:data/train/cmvn.scp -'");
        //zhangfeifan end
    
        opts->Register("ivector-extraction-config", &ivector_extraction_config,
                       "Configuration file for online iVector extraction, "
                       "see class OnlineIvectorExtractionConfig in the code");
        silence_weighting_config.RegisterWithPrefix("ivector-silence-weighting", opts);
      }
    };
    
    
    /// This class is responsible for storing configuration variables, objects and
    /// options for OnlineNnet2FeaturePipeline (including the actual LDA and
    /// CMVN-stats matrices, and the iVector extractor, which is a member of
    /// ivector_extractor_info.  This class does not register options on the command
    /// line; instead, it is initialized from class OnlineNnet2FeaturePipelineConfig
    /// which reads the options from the command line.  The reason for structuring
    /// it this way is to make it easier to configure from code as well as from the
    /// command line, as well as for easiter multithreaded operation.
    struct OnlineNnet2FeaturePipelineInfo {
      OnlineNnet2FeaturePipelineInfo():
          feature_type("mfcc"), add_pitch(false) { }
    
      OnlineNnet2FeaturePipelineInfo(
          const OnlineNnet2FeaturePipelineConfig &config);
    
      BaseFloat FrameShiftInSeconds() const;
    
      std::string feature_type;  // "mfcc" or "plp" or "fbank"
    
      MfccOptions mfcc_opts;  // options for MFCC computation,
                              // if feature_type == "mfcc"
      PlpOptions plp_opts;  // Options for PLP computation, if feature_type == "plp"
      FbankOptions fbank_opts;  // Options for filterbank computation, if
                                // feature_type == "fbank"
    
      bool add_pitch;
      PitchExtractionOptions pitch_opts;  // Options for pitch extraction, if done.
      ProcessPitchOptions pitch_process_opts;  // Options for pitch post-processing
      
      //zhangfeifan start
      OnlineCmvnOptions cmvn_opts;  // Options for online CMN/CMVN computation.
      std::string global_cmvn_stats_rxfilename;  // Filename used for reading global
                                                 // CMVN stats
      //zhangfeifan end
    
      // If the user specified --ivector-extraction-config, we assume we're using
      // iVectors as an extra input to the neural net.  Actually, we don't
      // anticipate running this setup without iVectors.
      bool use_ivectors;
      OnlineIvectorExtractionInfo ivector_extractor_info;
    
      // Config for weighting silence in iVector adaptation.
      // We declare this outside of ivector_extractor_info... it was
      // just easier to set up the code that way; and also we think
      // it's the kind of thing you might want to play with directly
      // on the command line instead of inside sub-config-files.
      OnlineSilenceWeightingConfig silence_weighting_config;
    
      int32 IvectorDim() { return ivector_extractor_info.extractor.IvectorDim(); }
     private:
      KALDI_DISALLOW_COPY_AND_ASSIGN(OnlineNnet2FeaturePipelineInfo);
    };
    
    
    
    /// OnlineNnet2FeaturePipeline is a class that's responsible for putting
    /// together the various parts of the feature-processing pipeline for neural
    /// networks, in an online setting.  The recipe here does not include fMLLR;
    /// instead, it assumes we're giving raw features such as MFCC or PLP or
    /// filterbank (with no CMVN) to the neural network, and optionally augmenting
    /// these with an iVector that describes the speaker characteristics.  The
    /// iVector is extracted using class OnlineIvectorFeature (see that class for
    /// more info on how it's done).
    /// No splicing is currently done in this code, as we're currently only supporting
    /// the nnet2 neural network in which the splicing is done inside the network.
    /// Probably our strategy for nnet1 network conversion would be to convert to nnet2
    /// and just add layers to do the splicing.
    class OnlineNnet2FeaturePipeline: public OnlineFeatureInterface {
     public:
      /// Constructor from the "info" object.  After calling this for a
      /// non-initial utterance of a speaker, you may want to call
      /// SetAdaptationState().
      explicit OnlineNnet2FeaturePipeline(
          const OnlineNnet2FeaturePipelineInfo &info);
    
      /// Member functions from OnlineFeatureInterface:
    
      /// Dim() will return the base-feature dimension (e.g. 13 for normal MFCC);
      /// plus the pitch-feature dimension (e.g. 3), if used; plus the iVector
      /// dimension, if used.  Any frame-splicing happens inside the neural-network
      /// code.
      virtual int32 Dim() const;
    
      virtual bool IsLastFrame(int32 frame) const;
      virtual int32 NumFramesReady() const;
      virtual void GetFrame(int32 frame, VectorBase<BaseFloat> *feat);
    
      /// Set the adaptation state to a particular value, e.g. reflecting previous
      /// utterances of the same speaker; this will generally be called after
      /// Copy().
      void SetAdaptationState(
          const OnlineIvectorExtractorAdaptationState &adaptation_state);
    
    
      /// Get the adaptation state; you may want to call this before destroying this
      /// object, to get adaptation state that can be used to improve decoding of
      /// later utterances of this speaker.  You might not want to do this, though,
      /// if you have reason to believe that something went wrong in the recognition
      /// (e.g., low confidence).
      void GetAdaptationState(
          OnlineIvectorExtractorAdaptationState *adaptation_state) const;
    
    //zhangfeifan start
      void FreezeCmvn();  // stop it from moving further (do this when you start
                          // using fMLLR). This will crash if NumFramesReady() == 0.
    
      /// Set the CMVN state to a particular value (will generally be
      /// called after Copy().
      void SetCmvnState(const OnlineCmvnState &cmvn_state);
      void GetCmvnState(OnlineCmvnState *cmvn_state);
    //zhangfeifan end
    
      /// Accept more data to process.  It won't actually process it until you call
      /// GetFrame() [probably indirectly via (decoder).AdvanceDecoding()], when you
      /// call this function it will just copy it).  sampling_rate is necessary just
      /// to assert it equals what's in the config.
      void AcceptWaveform(BaseFloat sampling_rate,
                          const VectorBase<BaseFloat> &waveform);
    
      BaseFloat FrameShiftInSeconds() const { return info_.FrameShiftInSeconds(); }
    
      /// If you call InputFinished(), it tells the class you won't be providing any
      /// more waveform.  This will help flush out the last few frames of delta or
      /// LDA features, and finalize the pitch features (making them more
      /// accurate)... although since in neural-net decoding we don't anticipate
      /// rescoring the lattices, this may not be much of an issue.
      void InputFinished();
    
      // This function returns the ivector-extracting part of the feature pipeline
      // (or NULL if iVectors are not being used); the pointer is owned here and not
      // given to the caller.  This function is used in nnet3, and also in the
      // silence-weighting code used to exclude silence from the iVector estimation.
      OnlineIvectorFeature *IvectorFeature() {
        return ivector_feature_;
      }
    
      // This function returns the part of the feature pipeline that would be given
      // as the primary (non-iVector) input to the neural network in nnet3
      // applications.
     OnlineFeatureInterface *InputFeature() {
        return feature_plus_optional_pitch_;
      }
    
      virtual ~OnlineNnet2FeaturePipeline();
     private:
    
      const OnlineNnet2FeaturePipelineInfo &info_;
      //zhangfeifan start
      Matrix<BaseFloat> global_cmvn_stats_;  // Global CMVN stats.
      OnlineCmvn *cmvn_;
      //zhangfeifan end
      OnlineBaseFeature *base_feature_;        // MFCC/PLP/filterbank
    
      OnlinePitchFeature *pitch_;              // Raw pitch, if used
      OnlineProcessPitch *pitch_feature_;  // Processed pitch, if pitch used.
    
    
      // feature_plus_pitch_ is the base_feature_ appended (OnlineAppendFeature)
      /// with pitch_feature_, if used; otherwise, points to the same address as
      /// base_feature_.
      OnlineFeatureInterface *feature_plus_optional_pitch_;
    
      OnlineIvectorFeature *ivector_feature_;  // iVector feature, if used.
    
      // final_feature_ is feature_plus_optional_pitch_ appended
      // (OnlineAppendFeature) with ivector_feature_, if ivector_feature_ is used;
      // otherwise, points to the same address as feature_plus_optional_pitch_.
      OnlineFeatureInterface *final_feature_;
    
      // we cache the feature dimension, to save time when calling Dim().
      int32 dim_;
    };
    
    
    
    
    /// @} End of "addtogroup onlinefeat"
    }  // namespace kaldi
    
    
    
    #endif  // KALDI_ONLINE2_ONLINE_NNET2_FEATURE_PIPELINE_H_

    相关文章

      网友评论

          本文标题:cvte在线解码器源码修改(干货)

          本文链接:https://www.haomeiwen.com/subject/stpfgxtx.html