美文网首页Android音视频系列
《Android FFmpeg 播放器开发梳理》第四章 音频重采

《Android FFmpeg 播放器开发梳理》第四章 音频重采

作者: cain_huang | 来源:发表于2019-01-22 13:46 被阅读142次

    前面一章,我们讲解了音频输出的处理,这一章将会讲解音频重采样以及变速变调处理。
    AudioResampler是音频重采样处理的对象。重采样器主要是用来从音频解码器AudioDecoder中解码得到一帧音频帧,然后根据同步的类型,判断是否需要对其进行重采样处理以及变速变调处理的逻辑。其实现代码如下:

    /**
     * 音频参数
     */
    typedef struct AudioParams {
        int freq;
        int channels;
        int64_t channel_layout;
        enum AVSampleFormat fmt;
        int frame_size;
        int bytes_per_sec;
    } AudioParams;
    
    /**
     * 音频重采样状态结构体
     */
    typedef struct AudioState {
        double audioClock;                      // 音频时钟
        double audio_diff_cum;
        double audio_diff_avg_coef;
        double audio_diff_threshold;
        int audio_diff_avg_count;
        int audio_hw_buf_size;
        uint8_t *outputBuffer;                  // 输出缓冲大小
        uint8_t *resampleBuffer;                // 重采样大小
        short *soundTouchBuffer;                // SoundTouch缓冲
        unsigned int bufferSize;                // 缓冲大小
        unsigned int resampleSize;              // 重采样大小
        unsigned int soundTouchBufferSize;      // SoundTouch处理后的缓冲大小大小
        int bufferIndex;
        int writeBufferSize;                    // 写入大小
        SwrContext *swr_ctx;                    // 音频转码上下文
        int64_t audio_callback_time;            // 音频回调时间
        AudioParams audioParamsSrc;             // 音频原始参数
        AudioParams audioParamsTarget;          // 音频目标参数
    } AudioState;
    
    /**
     * 音频重采样器
     */
    class AudioResampler {
    public:
        AudioResampler(PlayerState *playerState, AudioDecoder *audioDecoder, MediaSync *mediaSync);
    
        virtual ~AudioResampler();
    
        int setResampleParams(AudioDeviceSpec *spec, int64_t wanted_channel_layout);
    
        void pcmQueueCallback(uint8_t *stream, int len);
    
    private:
        int audioSynchronize(int nbSamples);
    
        int audioFrameResample();
    
    private:
        PlayerState *playerState;
        MediaSync *mediaSync;
    
        AVFrame *frame;
        AudioDecoder *audioDecoder;             // 音频解码器
        AudioState *audioState;                 // 音频重采样状态
        SoundTouchWrapper *soundTouchWrapper;   // 变速变调处理
    };
    
    
    AudioResampler::AudioResampler(PlayerState *playerState, AudioDecoder *audioDecoder, MediaSync *mediaSync) {
        this->playerState = playerState;
        this->audioDecoder = audioDecoder;
        this->mediaSync = mediaSync;
        audioState = (AudioState *) av_mallocz(sizeof(AudioState));
        memset(audioState, 0, sizeof(AudioState));
        soundTouchWrapper = new SoundTouchWrapper();
        frame = av_frame_alloc();
    }
    
    AudioResampler::~AudioResampler() {
        playerState = NULL;
        audioDecoder = NULL;
        mediaSync = NULL;
        if (soundTouchWrapper) {
            delete soundTouchWrapper;
            soundTouchWrapper = NULL;
        }
        if (audioState) {
            swr_free(&audioState->swr_ctx);
            av_freep(&audioState->resampleBuffer);
            memset(audioState, 0, sizeof(AudioState));
            av_free(audioState);
            audioState = NULL;
        }
        if (frame) {
            av_frame_unref(frame);
            av_frame_free(&frame);
            frame = NULL;
        }
    }
    
    int AudioResampler::setResampleParams(AudioDeviceSpec *spec, int64_t wanted_channel_layout) {
    
        audioState->audioParamsSrc = audioState->audioParamsTarget;
        audioState->audio_hw_buf_size = spec->size;
        audioState->bufferSize = 0;
        audioState->bufferIndex = 0;
        audioState->audio_diff_avg_coef = exp(log(0.01) / AUDIO_DIFF_AVG_NB);
        audioState->audio_diff_avg_count = 0;
        audioState->audio_diff_threshold = (double) (audioState->audio_hw_buf_size) / audioState->audioParamsTarget.bytes_per_sec;
    
        audioState->audioParamsTarget.fmt = AV_SAMPLE_FMT_S16;
        audioState->audioParamsTarget.freq = spec->freq;
        audioState->audioParamsTarget.channel_layout = wanted_channel_layout;
        audioState->audioParamsTarget.channels = spec->channels;
        audioState->audioParamsTarget.frame_size = av_samples_get_buffer_size(NULL, audioState->audioParamsTarget.channels, 1,
                                                                              audioState->audioParamsTarget.fmt, 1);
        audioState->audioParamsTarget.bytes_per_sec = av_samples_get_buffer_size(NULL, audioState->audioParamsTarget.channels,
                                                                                 audioState->audioParamsTarget.freq,
                                                                                 audioState->audioParamsTarget.fmt, 1);
    
        if (audioState->audioParamsTarget.bytes_per_sec <= 0 || audioState->audioParamsTarget.frame_size <= 0) {
            av_log(NULL, AV_LOG_ERROR, "av_samples_get_buffer_size failed\n");
            return -1;
        }
        return 0;
    }
    
    void AudioResampler::pcmQueueCallback(uint8_t *stream, int len) {
        int bufferSize, length;
    
        // 没有音频解码器时,直接返回
        if (!audioDecoder) {
            memset(stream, 0, len);
            return;
        }
    
        audioState->audio_callback_time = av_gettime_relative();
        while (len > 0) {
            if (audioState->bufferIndex >= audioState->bufferSize) {
                bufferSize = audioFrameResample();
                if (bufferSize < 0) {
                    audioState->outputBuffer = NULL;
                    audioState->bufferSize = (unsigned int) (AUDIO_MIN_BUFFER_SIZE / audioState->audioParamsTarget.frame_size
                                                             * audioState->audioParamsTarget.frame_size);
                } else {
                    audioState->bufferSize = bufferSize;
                }
                audioState->bufferIndex = 0;
            }
    
            length = audioState->bufferSize - audioState->bufferIndex;
            if (length > len) {
                length = len;
            }
            // 复制经过转码输出的PCM数据到缓冲区中
            if (audioState->outputBuffer != NULL && !playerState->mute) {
                memcpy(stream, audioState->outputBuffer + audioState->bufferIndex, length);
            } else {
                memset(stream, 0, length);
            }
            len -= length;
            stream += length;
            audioState->bufferIndex += length;
        }
        audioState->writeBufferSize = audioState->bufferSize - audioState->bufferIndex;
    
        if (!isnan(audioState->audioClock) && mediaSync) {
            mediaSync->updateAudioClock(audioState->audioClock -
                                        (double) (2 * audioState->audio_hw_buf_size + audioState->writeBufferSize)
                                        / audioState->audioParamsTarget.bytes_per_sec,
                                        audioState->audio_callback_time / 1000000.0);
        }
    }
    
    int AudioResampler::audioSynchronize(int nbSamples) {
        int wanted_nb_samples = nbSamples;
    
        // 如果时钟不是同步到音频流,则需要进行对音频频进行同步处理
        if (playerState->syncType != AV_SYNC_AUDIO) {
            double diff, avg_diff;
            int min_nb_samples, max_nb_samples;
            diff = mediaSync ? mediaSync->getAudioDiffClock() : 0;
            if (!isnan(diff) && fabs(diff) < AV_NOSYNC_THRESHOLD) {
                audioState->audio_diff_cum = diff + audioState->audio_diff_avg_coef * audioState->audio_diff_cum;
                if (audioState->audio_diff_avg_count < AUDIO_DIFF_AVG_NB) {
                    audioState->audio_diff_avg_count++;
                } else {
                    avg_diff = audioState->audio_diff_cum * (1.0 - audioState->audio_diff_avg_coef);
    
                    if (fabs(avg_diff) >= audioState->audio_diff_threshold) {
                        wanted_nb_samples = nbSamples + (int)(diff * audioState->audioParamsSrc.freq);
                        min_nb_samples = ((nbSamples * (100 - SAMPLE_CORRECTION_PERCENT_MAX) / 100));
                        max_nb_samples = ((nbSamples * (100 + SAMPLE_CORRECTION_PERCENT_MAX) / 100));
                        wanted_nb_samples = av_clip(wanted_nb_samples, min_nb_samples, max_nb_samples);
                    }
                }
            } else {
                audioState->audio_diff_avg_count = 0;
                audioState->audio_diff_cum = 0;
            }
        }
    
        return wanted_nb_samples;
    }
    
    int AudioResampler::audioFrameResample() {
        int data_size, resampled_data_size;
        int64_t dec_channel_layout;
        int wanted_nb_samples;
        int translate_time = 1;
        int ret = -1;
    
        // 处于暂停状态
        if (!audioDecoder || playerState->abortRequest || playerState->pauseRequest) {
            return -1;
        }
    
        for (;;) {
    
            // 如果数据包解码失败,直接返回
            if ((ret = audioDecoder->getAudioFrame(frame)) < 0) {
                return -1;
            }
            if (ret == 0) {
                continue;
            }
    
            data_size = av_samples_get_buffer_size(NULL, av_frame_get_channels(frame),
                                                   frame->nb_samples,
                                                   (AVSampleFormat)frame->format, 1);
    
            dec_channel_layout =
                    (frame->channel_layout && av_frame_get_channels(frame) == av_get_channel_layout_nb_channels(frame->channel_layout))
                    ? frame->channel_layout : av_get_default_channel_layout(av_frame_get_channels(frame));
            wanted_nb_samples = audioSynchronize(frame->nb_samples);
    
            // 帧格式跟源格式不对????
            if (frame->format != audioState->audioParamsSrc.fmt
                || dec_channel_layout != audioState->audioParamsSrc.channel_layout
                || frame->sample_rate != audioState->audioParamsSrc.freq
                || (wanted_nb_samples != frame->nb_samples && !audioState->swr_ctx)) {
    
                swr_free(&audioState->swr_ctx);
                audioState->swr_ctx = swr_alloc_set_opts(NULL, audioState->audioParamsTarget.channel_layout,
                                                         audioState->audioParamsTarget.fmt, audioState->audioParamsTarget.freq,
                                                         dec_channel_layout, (AVSampleFormat)frame->format,
                                                         frame->sample_rate, 0, NULL);
    
                if (!audioState->swr_ctx || swr_init(audioState->swr_ctx) < 0) {
                    av_log(NULL, AV_LOG_ERROR, "Cannot create sample rate converter for conversion of %d Hz %s %d channels to %d Hz %s %d channels!\n",
                           frame->sample_rate,
                           av_get_sample_fmt_name((AVSampleFormat)frame->format),
                           av_frame_get_channels(frame),
                           audioState->audioParamsTarget.freq,
                           av_get_sample_fmt_name(audioState->audioParamsTarget.fmt),
                           audioState->audioParamsTarget.channels);
                    swr_free(&audioState->swr_ctx);
                    return -1;
                }
                audioState->audioParamsSrc.channel_layout = dec_channel_layout;
                audioState->audioParamsSrc.channels = av_frame_get_channels(frame);
                audioState->audioParamsSrc.freq = frame->sample_rate;
                audioState->audioParamsSrc.fmt = (AVSampleFormat)frame->format;
            }
    
            // 音频重采样处理
            if (audioState->swr_ctx) {
                const uint8_t **in = (const uint8_t **)frame->extended_data;
                uint8_t **out = &audioState->resampleBuffer;
                int out_count = (int64_t)wanted_nb_samples * audioState->audioParamsTarget.freq / frame->sample_rate + 256;
                int out_size  = av_samples_get_buffer_size(NULL, audioState->audioParamsTarget.channels, out_count, audioState->audioParamsTarget.fmt, 0);
                int len2;
                if (out_size < 0) {
                    av_log(NULL, AV_LOG_ERROR, "av_samples_get_buffer_size() failed\n");
                    return -1;
                }
                if (wanted_nb_samples != frame->nb_samples) {
                    if (swr_set_compensation(audioState->swr_ctx, (wanted_nb_samples - frame->nb_samples) * audioState->audioParamsTarget.freq / frame->sample_rate,
                                             wanted_nb_samples * audioState->audioParamsTarget.freq / frame->sample_rate) < 0) {
                        av_log(NULL, AV_LOG_ERROR, "swr_set_compensation() failed\n");
                        return -1;
                    }
                }
                av_fast_malloc(&audioState->resampleBuffer, &audioState->resampleSize, out_size);
                if (!audioState->resampleBuffer) {
                    return AVERROR(ENOMEM);
                }
                len2 = swr_convert(audioState->swr_ctx, out, out_count, in, frame->nb_samples);
                if (len2 < 0) {
                    av_log(NULL, AV_LOG_ERROR, "swr_convert() failed\n");
                    return -1;
                }
                if (len2 == out_count) {
                    av_log(NULL, AV_LOG_WARNING, "audio buffer is probably too small\n");
                    if (swr_init(audioState->swr_ctx) < 0) {
                        swr_free(&audioState->swr_ctx);
                    }
                }
                audioState->outputBuffer = audioState->resampleBuffer;
                resampled_data_size = len2 * audioState->audioParamsTarget.channels * av_get_bytes_per_sample(audioState->audioParamsTarget.fmt);
    
                // 变速变调处理
                if ((playerState->playbackRate != 1.0f || playerState->playbackPitch != 1.0f) && !playerState->abortRequest) {
                    int bytes_per_sample = av_get_bytes_per_sample(audioState->audioParamsTarget.fmt);
                    av_fast_malloc(&audioState->soundTouchBuffer, &audioState->soundTouchBufferSize, out_size * translate_time);
                    for (int i = 0; i < (resampled_data_size / 2); i++) {
                        audioState->soundTouchBuffer[i] = (audioState->resampleBuffer[i * 2] | (audioState->resampleBuffer[i * 2 + 1] << 8));
                    }
                    if (!soundTouchWrapper) {
                        soundTouchWrapper = new SoundTouchWrapper();
                    }
                    int ret_len = soundTouchWrapper->translate(audioState->soundTouchBuffer, (float)(playerState->playbackRate),
                                                               (float)(playerState->playbackPitch != 1.0f ? playerState->playbackPitch : 1.0f / playerState->playbackRate),
                                                               resampled_data_size / 2, bytes_per_sample,
                                                               audioState->audioParamsTarget.channels, frame->sample_rate);
                    if (ret_len > 0) {
                        audioState->outputBuffer = (uint8_t*)audioState->soundTouchBuffer;
                        resampled_data_size = ret_len;
                    } else {
                        translate_time++;
                        av_frame_unref(frame);
                        continue;
                    }
                }
            } else {
                audioState->outputBuffer = frame->data[0];
                resampled_data_size = data_size;
            }
    
            // 处理完直接退出循环
            break;
        }
    
        // 利用pts更新音频时钟
        if (frame->pts != AV_NOPTS_VALUE) {
            audioState->audioClock = frame->pts * av_q2d((AVRational){1, frame->sample_rate})
                                     + (double) frame->nb_samples / frame->sample_rate;
        } else {
            audioState->audioClock = NAN;
        }
    
        // 使用完成释放引用,防止内存泄漏
        av_frame_unref(frame);
    
        return resampled_data_size;
    }
    

    以上就是处理音频重采样以及变速变调处理的代码。这个代码也没啥好说的,在不是同步到音频时钟的情况下,我们需要根据实际的采样率(sample_rate) 得到目标采样率对应的采样数量(wanted_nb_samples),然后经过音频重采样处理,得到重采样后的缓冲数据,然后做变速变调处理,接着计算出重采样以及变速变调处理后的时长,加上原来的时钟,得到处理后的音频时间戳(pts)。我们通过不断地把音频输出设备回调地PCM缓冲区填满,填满后,我们需要计算出当前的音频时间戳用了多少,通知MediaSync更新音频时钟以及同步更新外部时钟。音频重采样以及变速变调处理的流程大体就这样了。

    当音频输出设备回调填充PCM数据方法时,我们的播放器将会通过void pcmQueueCallback(uint8_t *stream, int len); 方法调用音频重采样器进行处理,代码如下:

    void MediaPlayer::pcmQueueCallback(uint8_t *stream, int len) {
        if (!audioResampler) {
            memset(stream, 0, sizeof(len));
            return;
        }
        audioResampler->pcmQueueCallback(stream, len);
    }
    

    至此,音频重采样以及变速变调处理就讲解完了。
    完整代码请参考本人的播放器项目:CainPlayer

    相关文章

      网友评论

        本文标题:《Android FFmpeg 播放器开发梳理》第四章 音频重采

        本文链接:https://www.haomeiwen.com/subject/ficfcqtx.html