美文网首页ffmpeg
ffplay.c 源码分析- 音频部分

ffplay.c 源码分析- 音频部分

作者: deep_sadness | 来源:发表于2018-11-26 17:24 被阅读51次

    FFmpeg 代码 version 3.3:

    ffplay中的线程模型

    音频的线程模型.png

    概述

    主要的负责音频播放的线程,主要有如上几个

    1. 读取线程-read_thread
    在main方法中会启动的读取的线程。
    这个和视频的线程模型中是一致的。不同的是,循环读取的数据是音频数据。

    • 循环读取
      这个线程中,会进行读取的循环。不断的通过av_read_frame方法,读取解码前的数据packet。
    • 送入队列
      最后将得到的数据,送入对应的流的packet队列(视频/音频/字幕都对应视频流自己的队列)

    2. 对应流的解码线程-audio - thread
    在读取线程中,对AVFormatContext进行初始化,获取AVStream信息后,对应不同的码流会开启对应的解码线程Decode Thread。
    ffplay中这里包括了3种流。视频流。音频流和字幕流。

    • 循环读取
      会从对应流的packet队列中,得到数据。
      然后送入解码器通过avcodec_decode_video2(旧的API)进行解码。

    • 送入队列
      解码之后,得到解码前的数据AVFrame,并确定对应的pts
      最后然后其再次送入队列当中。

    3.播放的设置
    SDL的音频播放,主要是设置一个audiocallback,在callback当中,将我们解码后的数据设置给传入buff地址中。
    后续SDL会再将这个buff地址,传给对应的音频播放设备,进行播放。
    具体可见 SDL2库(4)-Android 端源码简要分析(AudioSubSystem) 文章中所述。

    整体的流程就是这样简单。


    音频参数

    定义了一个结构体,来简单的保存音频的参数。

    typedef struct AudioParams {
        //sampleRate
        int freq;
        //声道数
        int channels;
       //channel_layout 有什么不同吗?
        int64_t channel_layout;
        //音频的采样格式
        enum AVSampleFormat fmt;
        //每一帧的大小= 采样深度*声道数    
    int frame_size;
        //每一秒的字节数
        int bytes_per_sec;
    } AudioParams;
    

    ffplay初始化(main_thread)

    进行初始化的整体流程,大部分和上一边文章相似ffplay.c 源码分析- 视频部分
    (其中包括对FFmpeg的初始化对传递的参数进行初始化SDL的初始化通过stream_open函数开启read_thread读取线程
    这里就不做过多描述了。
    具体来看一下音频相关的部分。

    开启对应的解码线程

    打开stream_component_open对应的AVStream。打开解码线程。
    ffplay中对应三种码流。(视频、音频和字幕,对应打开自己的解码线程)

    stream_component_open中的音频部分

       switch (avctx->codec_type) {
        case AVMEDIA_TYPE_AUDIO:
            //忽略声音滤镜部分
            sample_rate    = avctx->sample_rate;
            nb_channels    = avctx->channels;
            channel_layout = avctx->channel_layout;
    
            /* prepare audio output */
            //打开音频设备。返回的值是音频设备中buffer的大小
            if ((ret = audio_open(is, channel_layout, nb_channels, sample_rate, &is->audio_tgt)) < 0)
                goto fail;
            //将当前的硬件参数保存下来
            is->audio_hw_buf_size = ret;
            is->audio_src = is->audio_tgt;
            is->audio_buf_size  = 0;
            is->audio_buf_index = 0;
    
            /* init averaging filter */
            //这个是用来进行音视频同步的算法的部分。暂时不管
            is->audio_diff_avg_coef  = exp(log(0.01) / AUDIO_DIFF_AVG_NB);
            is->audio_diff_avg_count = 0;
            /* since we do not have a precise anough audio FIFO fullness,
               we correct audio sync only if larger than this threshold */
            is->audio_diff_threshold = (double)(is->audio_hw_buf_size) / is->audio_tgt.bytes_per_sec;
    
            is->audio_stream = stream_index;
            is->audio_st = ic->streams[stream_index];
            //初始化解码器
            decoder_init(&is->auddec, avctx, &is->audioq, is->continue_read_thread);
            if ((is->ic->iformat->flags & (AVFMT_NOBINSEARCH | AVFMT_NOGENSEARCH | AVFMT_NO_BYTE_SEEK)) && !is->ic->iformat->read_seek) {
                is->auddec.start_pts = is->audio_st->start_time;
                is->auddec.start_pts_tb = is->audio_st->time_base;
            }
            //开启音频解码线程
            if ((ret = decoder_start(&is->auddec, audio_thread, is)) < 0)
                goto out;
            //播放
            SDL_PauseAudioDevice(audio_dev, 0);
            break;
    

    这里重点开看一下打开音频流的部分
    audio_open

    static int audio_open(void *opaque, int64_t wanted_channel_layout, int wanted_nb_channels, int wanted_sample_rate, struct AudioParams *audio_hw_params)
    {
        //这个结构体是SDL内部分音频时,记录音频参数的结构体
        SDL_AudioSpec wanted_spec, spec;
        const char *env;
        static const int next_nb_channels[] = {0, 0, 1, 6, 2, 6, 4, 6};
        static const int next_sample_rates[] = {0, 44100, 48000, 96000, 192000};
        int next_sample_rate_idx = FF_ARRAY_ELEMS(next_sample_rates) - 1;
    
        env = SDL_getenv("SDL_AUDIO_CHANNELS");
        if (env) {
            wanted_nb_channels = atoi(env);
            wanted_channel_layout = av_get_default_channel_layout(wanted_nb_channels);
        }
        if (!wanted_channel_layout || wanted_nb_channels != av_get_channel_layout_nb_channels(wanted_channel_layout)) {
            wanted_channel_layout = av_get_default_channel_layout(wanted_nb_channels);
            wanted_channel_layout &= ~AV_CH_LAYOUT_STEREO_DOWNMIX;
        }
        //可以看到channels 的数量可以通过av_get_channel_layout_nb_channels来进行计算
        wanted_nb_channels = av_get_channel_layout_nb_channels(wanted_channel_layout);
        wanted_spec.channels = wanted_nb_channels;
        wanted_spec.freq = wanted_sample_rate;
        if (wanted_spec.freq <= 0 || wanted_spec.channels <= 0) {
            av_log(NULL, AV_LOG_ERROR, "Invalid sample rate or channel count!\n");
            return -1;
        }
        while (next_sample_rate_idx && next_sample_rates[next_sample_rate_idx] >= wanted_spec.freq)
            next_sample_rate_idx--;
        //这里的format是暂时写死的AUDIO_S16SYS
        wanted_spec.format = AUDIO_S16SYS;
        wanted_spec.silence = 0;
        //samples 变量表示每一个声道对应的每一秒的采样的帧数
        wanted_spec.samples = FFMAX(SDL_AUDIO_MIN_BUFFER_SIZE, 2 << av_log2(wanted_spec.freq / SDL_AUDIO_MAX_CALLBACKS_PER_SEC));
       //SDL播放音频的话,会将每次需要播放的数据,根据callback将数据位置传递给callback,
       //我们需要在CallBack中自定义对音频数据的填充,就可以完成播放
        wanted_spec.callback = sdl_audio_callback;
        wanted_spec.userdata = opaque;
        //打开音频设备
        while (!(audio_dev = SDL_OpenAudioDevice(NULL, 0, &wanted_spec, &spec, SDL_AUDIO_ALLOW_FREQUENCY_CHANGE | SDL_AUDIO_ALLOW_CHANNELS_CHANGE))) {
            av_log(NULL, AV_LOG_WARNING, "SDL_OpenAudio (%d channels, %d Hz): %s\n",
                   wanted_spec.channels, wanted_spec.freq, SDL_GetError());
            wanted_spec.channels = next_nb_channels[FFMIN(7, wanted_spec.channels)];
            if (!wanted_spec.channels) {
                wanted_spec.freq = next_sample_rates[next_sample_rate_idx--];
                wanted_spec.channels = wanted_nb_channels;
                if (!wanted_spec.freq) {
                    av_log(NULL, AV_LOG_ERROR,
                           "No more combinations to try, audio open failed\n");
                    return -1;
                }
            }
            wanted_channel_layout = av_get_default_channel_layout(wanted_spec.channels);
        }
        if (spec.format != AUDIO_S16SYS) {
            av_log(NULL, AV_LOG_ERROR,
                   "SDL advised audio format %d is not supported!\n", spec.format);
            return -1;
        }
        if (spec.channels != wanted_spec.channels) {
            wanted_channel_layout = av_get_default_channel_layout(spec.channels);
            if (!wanted_channel_layout) {
                av_log(NULL, AV_LOG_ERROR,
                       "SDL advised channel count %d is not supported!\n", spec.channels);
                return -1;
            }
        }
        //最后将传递回来的硬件设备的数据进行记录
        audio_hw_params->fmt = AV_SAMPLE_FMT_S16;
        audio_hw_params->freq = spec.freq;
        audio_hw_params->channel_layout = wanted_channel_layout;
        audio_hw_params->channels =  spec.channels;
        audio_hw_params->frame_size = av_samples_get_buffer_size(NULL, audio_hw_params->channels, 1, audio_hw_params->fmt, 1);
        audio_hw_params->bytes_per_sec = av_samples_get_buffer_size(NULL, audio_hw_params->channels, audio_hw_params->freq, audio_hw_params->fmt, 1);
        if (audio_hw_params->bytes_per_sec <= 0 || audio_hw_params->frame_size <= 0) {
            av_log(NULL, AV_LOG_ERROR, "av_samples_get_buffer_size failed\n");
            return -1;
        }
        return spec.size;
    }
    

    这里最值得注意的就是
    wanted_spec.callback = sdl_audio_callback;
    SDL播放音频的话,会将每次需要播放的数据,根据callback将数据位置传递给callback,
    我们需要在callback中自定义对音频数据的填充,就可以完成播放。

    视频解码线程audio_thread

    read_thread的中对应视频流时,初始化好了AVCodecAVCodecContext。通过decoder_start方法,开启了video_thread
    video_thread中需要创建AVFrame来接受解码后的数据,确定视频的帧率。
    然后开启解码循环。
    不断的从队列中获取解码前的数据,然后送入解码器解码。
    再得到解码后的数据,在送入对应的队列当中。

    初始化参数

    创建AVFrame和得到大致的视频帧率

        //创建AVFrame
        AVFrame *frame = av_frame_alloc();
        //设置好time_base和frame_rate
        AVRational tb = is->video_st->time_base;
        // 猜测视频帧率
        AVRational frame_rate = av_guess_frame_rate(is->ic, is->video_st, NULL);
    

    开始循环解码

    解码的方式和视频播放相同。这儿就不做过多解释了。解码完,同样送入队列当中。

    音频设置部分

    正如上面所述,我们需要在传入的callback中对我们的数据进行处理。callback也同样运行在SDL中创建的RunAudio线程。
    SDL具体的运行方式,可见SDL2库(4)-Android 端源码简要分析(AudioSubSystem);

    /* prepare a new audio buffer */
    static void sdl_audio_callback(void *opaque, Uint8 *stream, int len)
    {
        VideoState *is = opaque;
        int audio_size, len1;
    
        audio_callback_time = av_gettime_relative();
        
        //len表示传入的数据的长度,stream表示传入的数组的指针(开始位置)
        while (len > 0) {
            if (is->audio_buf_index >= is->audio_buf_size) {
               //进行解码。得到audio_size
               audio_size = audio_decode_frame(is);
              //小于0,则表示失败
               if (audio_size < 0) {
                    /* if error, just output silence */
                   is->audio_buf = NULL;
                   is->audio_buf_size = SDL_AUDIO_MIN_BUFFER_SIZE / is->audio_tgt.frame_size * is->audio_tgt.frame_size;
               } else {
                   if (is->show_mode != SHOW_MODE_VIDEO)
                       update_sample_display(is, (int16_t *)is->audio_buf, audio_size);
                   //记录audio_buf_size
                   is->audio_buf_size = audio_size;
               }
              //重置0
               is->audio_buf_index = 0;
            }
             //用len1 表示单次解码的frame  的长度
            len1 = is->audio_buf_size - is->audio_buf_index;
            //最后送入的数据长度不能大于可用的空间
            if (len1 > len)
                len1 = len;
            //如果不进行音量调节,则可直接将音频数据拷贝进入
            if (!is->muted && is->audio_buf && is->audio_volume == SDL_MIX_MAXVOLUME)
                memcpy(stream, (uint8_t *)is->audio_buf + is->audio_buf_index, len1);
            else {
                memset(stream, 0, len1);
                if (!is->muted && is->audio_buf)
                    //通过SDL_MixAudioFormat进行混音。可用调整音量的大小
                    SDL_MixAudioFormat(stream, (uint8_t *)is->audio_buf + is->audio_buf_index, AUDIO_S16SYS, len1, is->audio_volume);
            }
           //进行偏移,如果还有空间,则继续解码更多的帧送入
            len -= len1;
            stream += len1;
            is->audio_buf_index += len1;
        }
        
        //最后用audio_write_buf_size 来记录这次一共解码出来的buf size
        is->audio_write_buf_size = is->audio_buf_size - is->audio_buf_index;
        /* Let's assume the audio driver that is used by SDL has two periods. */
        if (!isnan(is->audio_clock)) {
            //同步音频的时间钟。
            set_clock_at(&is->audclk, is->audio_clock - (double)(2 * is->audio_hw_buf_size + is->audio_write_buf_size) / is->audio_tgt.bytes_per_sec, is->audio_clock_serial, audio_callback_time / 1000000.0);
            sync_clock_to_slave(&is->extclk, &is->audclk);
        }
    }
    

    audio_decode_frame

    static int audio_decode_frame(VideoState *is)
    {
        int data_size, resampled_data_size;
        int64_t dec_channel_layout;
        av_unused double audio_clock0;
        int wanted_nb_samples;
        Frame *af;
    
        if (is->paused)
            return -1;
    
        do {
            //从队列中取得数据
            if (!(af = frame_queue_peek_readable(&is->sampq)))
                return -1;
            frame_queue_next(&is->sampq);
        } while (af->serial != is->audioq.serial);
        
        //计算data_size 
        data_size = av_samples_get_buffer_size(NULL, af->frame->channels,
                                               af->frame->nb_samples,
                                               af->frame->format, 1);
        
        //计算channel_layout 
        dec_channel_layout =
            (af->frame->channel_layout && af->frame->channels == av_get_channel_layout_nb_channels(af->frame->channel_layout)) ?
            af->frame->channel_layout : av_get_default_channel_layout(af->frame->channels);
        wanted_nb_samples = synchronize_audio(is, af->frame->nb_samples);
        
        //如果解码的格式和目标的格式不同的话,则需要对音频的数据进行转码
        if (af->frame->format        != is->audio_src.fmt            ||
            dec_channel_layout       != is->audio_src.channel_layout ||
            af->frame->sample_rate   != is->audio_src.freq           ||
            (wanted_nb_samples       != af->frame->nb_samples && !is->swr_ctx)) {
            swr_free(&is->swr_ctx);
            //创建和设置swr
            is->swr_ctx = swr_alloc_set_opts(NULL,
                                             is->audio_tgt.channel_layout, is->audio_tgt.fmt, is->audio_tgt.freq,
                                             dec_channel_layout,           af->frame->format, af->frame->sample_rate,
                                             0, NULL);
            if (!is->swr_ctx || swr_init(is->swr_ctx) < 0) {
                av_log(NULL, AV_LOG_ERROR,
                       "Cannot create sample rate converter for conversion of %d Hz %s %d channels to %d Hz %s %d channels!\n",
                        af->frame->sample_rate, av_get_sample_fmt_name(af->frame->format), af->frame->channels,
                        is->audio_tgt.freq, av_get_sample_fmt_name(is->audio_tgt.fmt), is->audio_tgt.channels);
                swr_free(&is->swr_ctx);
                return -1;
            }
            is->audio_src.channel_layout = dec_channel_layout;
            is->audio_src.channels       = af->frame->channels;
            is->audio_src.freq = af->frame->sample_rate;
            is->audio_src.fmt = af->frame->format;
        }
        
        //进行转码
        if (is->swr_ctx) {
            const uint8_t **in = (const uint8_t **)af->frame->extended_data;
            uint8_t **out = &is->audio_buf1;
            //这里加的256是什么意思?这个256是固定的吗
            int out_count = (int64_t)wanted_nb_samples * is->audio_tgt.freq / af->frame->sample_rate + 256;
            int out_size  = av_samples_get_buffer_size(NULL, is->audio_tgt.channels, out_count, is->audio_tgt.fmt, 0);
            int len2;
            if (out_size < 0) {
                av_log(NULL, AV_LOG_ERROR, "av_samples_get_buffer_size() failed\n");
                return -1;
            }
            if (wanted_nb_samples != af->frame->nb_samples) {
                if (swr_set_compensation(is->swr_ctx, (wanted_nb_samples - af->frame->nb_samples) * is->audio_tgt.freq / af->frame->sample_rate,
                                            wanted_nb_samples * is->audio_tgt.freq / af->frame->sample_rate) < 0) {
                    av_log(NULL, AV_LOG_ERROR, "swr_set_compensation() failed\n");
                    return -1;
                }
            }
            av_fast_malloc(&is->audio_buf1, &is->audio_buf1_size, out_size);
            if (!is->audio_buf1)
                return AVERROR(ENOMEM);
            //进行转换
            len2 = swr_convert(is->swr_ctx, out, out_count, in, af->frame->nb_samples);
            if (len2 < 0) {
                av_log(NULL, AV_LOG_ERROR, "swr_convert() failed\n");
                return -1;
            }
            if (len2 == out_count) {
                av_log(NULL, AV_LOG_WARNING, "audio buffer is probably too small\n");
                if (swr_init(is->swr_ctx) < 0)
                    swr_free(&is->swr_ctx);
            }
            is->audio_buf = is->audio_buf1;
            //重新计算采样的数据大小,并返回
            resampled_data_size = len2 * is->audio_tgt.channels * av_get_bytes_per_sample(is->audio_tgt.fmt);
        } else {
            is->audio_buf = af->frame->data[0];
            resampled_data_size = data_size;
        }
    
        audio_clock0 = is->audio_clock;
        /* update the audio clock with the pts */
        if (!isnan(af->pts))
            //更新pts  这个pts 等于当前的帧包含的所有帧数
            is->audio_clock = af->pts + (double) af->frame->nb_samples / af->frame->sample_rate;
        else
            is->audio_clock = NAN;
        is->audio_clock_serial = af->serial;
    #ifdef DEBUG
        {
            static double last_clock;
            printf("audio: delay=%0.3f clock=%0.3f clock0=%0.3f\n",
                   is->audio_clock - last_clock,
                   is->audio_clock, audio_clock0);
            last_clock = is->audio_clock;
        }
    #endif
        return resampled_data_size;
    }
    

    这里指的注意的有两点:

    1. 如果解码出来的音频数据不是Ouput的类型,是进行转码的
    2. 设置时间戳。为当前一帧播放完,所有音频数据的时间。
      在之前的sdl_audio_callback中,我们可以看到最后的同步时间戳。
      set_clock_at(&is->audclk, is->audio_clock - (double)(2 * is->audio_hw_buf_size + is->audio_write_buf_size) / is->audio_tgt.bytes_per_sec, is->audio_clock_serial, audio_callback_time / 1000000.0);
    

    最后的传入同步的时间戳,是我们完整的帧包含的时间戳-实际写入的帧数+2个硬件buffer的延迟。
    因为我们的写入的时候,还需要考虑传入的buffer的大小,预期情况下,如果buffer相同,则这里就是原来的pts-硬件延迟的时间。

    相关文章

      网友评论

        本文标题:ffplay.c 源码分析- 音频部分

        本文链接:https://www.haomeiwen.com/subject/tgoxqqtx.html