美文网首页iOS直播webrtc专题整理
iOS语音对讲(一)实时采集PCM并编码AAC

iOS语音对讲(一)实时采集PCM并编码AAC

作者: Goning | 来源:发表于2018-04-29 14:21 被阅读1673次

    本文介绍iOS实时语音双向对讲功能:
    (一)实时采集PCM并编码AAC
    (二)RTSP+RTP协议实时传输
    (三)FFmpeg实时解码AAC并播放PCM

    第一篇介绍使用<AVFoundation/AVFoundation.h>中的AVCaptureSession进行音频的实时采集,输出PCM数据;再使用<AudioToolbox/AudioToolbox.h>中的AudioConverterRef将采集到的PCM进行编码转换,输出AAC。


    具体过程如下:

    1.采集

    初始化AVCaptureSession并设置相关配置

    - (instancetype)initCaptureWithPreset:(CapturePreset)preset {
        if ([super init]) {
            [self initAVcaptureSession];
            _definePreset = preset;
        }
        return self;
    }
    
    - (void)initAVcaptureSession {
        //初始化AVCaptureSession
        _session = [[AVCaptureSession alloc] init];
        //开始配置
        [_session beginConfiguration];
        NSError *error;
        //获取音频设备对象
        self.audioDevice = [AVCaptureDevice defaultDeviceWithMediaType:AVMediaTypeAudio];
        //初始化捕获输入对象
        self.audioInput = [[AVCaptureDeviceInput alloc] initWithDevice:self.audioDevice error:&error];
        if (error) {
            NSLog(@"录音设备出错");
        }
        //添加音频输入对象到session
        if ([self.session canAddInput:self.audioInput]) {
            [self.session addInput:self.audioInput];
        }
        //初始化输出捕获对象
        self.audioOutput = [[AVCaptureAudioDataOutput alloc] init];
        //添加音频输出对象到session
        if ([self.session canAddOutput:self.audioOutput]) {
            [self.session addOutput:self.audioOutput];
        }
        //创建设置音频输出代理所需要的线程队列
        dispatch_queue_t audioQueue = dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_LOW, 0);
        [self.audioOutput setSampleBufferDelegate:self queue:audioQueue];
        //提交配置
        [self.session commitConfiguration];
    }
    

    开始与结束采集

    - (void)start {
        [self.session startRunning];
    }
    
    - (void)stop {
        [self.session stopRunning];
    }
    

    设置代理回调将PCM数据输出

    @protocol PCMCaptureDelegate <NSObject>
    - (void)audioWithSampleBuffer:(CMSampleBufferRef)sampleBuffer;
    @end
    
    //AVCaptureAudioDataOutputSampleBufferDelegate
    - (void)captureOutput:(AVCaptureOutput *)captureOutput didOutputSampleBuffer:(CMSampleBufferRef)sampleBuffer fromConnection:(AVCaptureConnection *)connection {
        if (captureOutput == self.audioOutput) {
            if (self.delegate && [self.delegate respondsToSelector:@selector(audioWithSampleBuffer:)]) {
                [self.delegate audioWithSampleBuffer:sampleBuffer];
            }
        }
    }
    

    2.编码

    创建转换器并设置相关属性

    - (void)setUpConverter:(CMSampleBufferRef)sampleBuffer {
        //获取audioformat的描述信息
        CMAudioFormatDescriptionRef audioFormatDes =  (CMAudioFormatDescriptionRef)CMSampleBufferGetFormatDescription(sampleBuffer);
        //获取输入的asbd的信息
        AudioStreamBasicDescription inAudioStreamBasicDescription = *(CMAudioFormatDescriptionGetStreamBasicDescription(audioFormatDes));
        //开始构造输出的asbd
        AudioStreamBasicDescription outAudioStreamBasicDescription = {0};
        //对于压缩格式必须设置为0
        outAudioStreamBasicDescription.mBitsPerChannel = 0;
        outAudioStreamBasicDescription.mBytesPerFrame = 0;
        //设定声道数为1
        outAudioStreamBasicDescription.mChannelsPerFrame = 1;
        //设定采样率为16000
        outAudioStreamBasicDescription.mSampleRate = 16000;
        //设定输出音频的格式
        outAudioStreamBasicDescription.mFormatID = kAudioFormatMPEG4AAC;
        outAudioStreamBasicDescription.mFormatFlags = kMPEG4Object_AAC_LC;
        //填充输出的音频格式
        UInt32 size = sizeof(outAudioStreamBasicDescription);
        AudioFormatGetProperty(kAudioFormatProperty_FormatInfo, 0, NULL, &size, &outAudioStreamBasicDescription);
        //选择aac的编码器(用来描述一个已经安装的编解码器)
        AudioClassDescription audioClassDes;
        //初始化为0
        memset(&audioClassDes, 0, sizeof(audioClassDes));
        //获取满足要求的aac编码器的总大小
        UInt32 countSize = 0;
        AudioFormatGetPropertyInfo(kAudioFormatProperty_Encoders, sizeof(outAudioStreamBasicDescription.mFormatID), &outAudioStreamBasicDescription.mFormatID, &countSize);
        //用来计算aac的编解码器的个数
        int cout = countSize/sizeof(audioClassDes);
        //创建一个包含有cout个数的编码器数组
        AudioClassDescription descriptions[cout];
        //将编码器数组信息写入到descriptions中
        AudioFormatGetProperty(kAudioFormatProperty_Encoders, sizeof(outAudioStreamBasicDescription.mFormatID), &outAudioStreamBasicDescription.mFormatID, &countSize, descriptions);
        for (int i = 0; i < cout; cout++) {
            AudioClassDescription temp = descriptions[i];
            if (temp.mManufacturer==kAppleSoftwareAudioCodecManufacturer//软编
                &&temp.mSubType==outAudioStreamBasicDescription.mFormatID) {
                audioClassDes = temp;
                break;
            }
        }
        //创建convertcontext用来保存converter的信息
        ConverterContext *context = malloc(sizeof(ConverterContext));
        self->convertContext = context;
        OSStatus result = AudioConverterNewSpecific(&inAudioStreamBasicDescription, &outAudioStreamBasicDescription, 1, &audioClassDes, &(context->converter));
        if (result == noErr) {
            //创建编解码器成功
            AudioConverterRef converter = context->converter;
            //设置编码器属性
            UInt32 temp = kAudioConverterQuality_High;
            AudioConverterSetProperty(converter, kAudioConverterCodecQuality, sizeof(temp), &temp);
            //设置比特率
            UInt32 bitRate = 32000;
            result = AudioConverterSetProperty(converter, kAudioConverterEncodeBitRate, sizeof(bitRate), &bitRate);
            if (result != noErr) {
                NSLog(@"设置比特率失败");
            }
        }else{
            //创建编解码器失败
            free(context);
            context = NULL;
            NSLog(@"创建编解码器失败");
        }
    }
    

    编码samplebuffer数据

    //编码samplebuffer数据
    - (void)encodeSmapleBuffer:(CMSampleBufferRef)sampleBuffer {
        if (!self->convertContext) {
            [self setUpConverter:sampleBuffer];
        }
        ConverterContext *cxt = self->convertContext;
        if (cxt && cxt->converter) {
            //从samplebuffer中提取数据
            CFRetain(sampleBuffer);
            dispatch_async(encodeQueue, ^{
                //从samplebuffer中获取blockbuffer
                CMBlockBufferRef blockBuffer = CMSampleBufferGetDataBuffer(sampleBuffer);
                size_t pcmLength = 0;
                char *pcmData = NULL;
                //获取blockbuffer中的pcm数据的指针和长度
                OSStatus status = CMBlockBufferGetDataPointer(blockBuffer, 0, NULL, &pcmLength, &pcmData);
                if (status != noErr) {
                    NSLog(@"从block中获取pcm数据失败");
                    CFRelease(sampleBuffer);
                    return;
                } else {
                    //在堆区分配内存用来保存编码后的aac数据
                    char *outputBuffer = malloc(pcmLength);
                    memset(outputBuffer, 0, pcmLength);
                    UInt32 packetSize = 1;
                    AudioStreamPacketDescription *outputPacketDes = (AudioStreamPacketDescription *)malloc(sizeof(AudioStreamPacketDescription) *packetSize);
                    //使用fillcomplexinputparm来保存pcm数据
                    FillComplexInputParm userParam;
                    userParam.source = pcmData;
                    userParam.sourceSize = (UInt32)pcmLength;
                    userParam.channelCount = 1;
                    userParam.packetDescription = NULL;
                    //在堆区创建audiobufferlist
                    AudioBufferList outputBufferList;
                    outputBufferList.mNumberBuffers = 1;
                    outputBufferList.mBuffers[0].mData = outputBuffer;
                    outputBufferList.mBuffers[0].mDataByteSize = (unsigned int)pcmLength;
                    outputBufferList.mBuffers[0].mNumberChannels = 1;
                    //编码
                    status = AudioConverterFillComplexBuffer(self->convertContext->converter, audioConverterComplexInputDataProc, &userParam, &packetSize, &outputBufferList, outputPacketDes);
                    free(outputPacketDes);
                    outputPacketDes = NULL;
                    if (status == noErr) {
    //                    NSLog(@"编码成功");
                        //获取原始的aac数据
                        NSData *rawAAC = [NSData dataWithBytes:outputBufferList.mBuffers[0].mData length:outputBufferList.mBuffers[0].mDataByteSize];
                        free(outputBuffer);
                        outputBuffer = NULL;
                        //设置adts头
                        int headerLength = 0;
                        char *packetHeader = newAdtsDataForPacketLength((int)rawAAC.length, &headerLength);
                        NSData *adtsHeader = [NSData dataWithBytes:packetHeader length:headerLength];
                        free(packetHeader);
                        packetHeader = NULL;
                        NSMutableData *fullData = [NSMutableData dataWithData:adtsHeader];
                        [fullData appendData:rawAAC];
                        //设置私有头
                        char *privateHeader = newPrivate((int)fullData.length);
                        NSData *privateHeaderData = [NSData dataWithBytes:privateHeader length:24];
                        free(privateHeader);
                        privateHeader = NULL;
                        NSMutableData *pFullData = [NSMutableData dataWithData:privateHeaderData];
                        [pFullData appendData:fullData];
                        //设置rtp头
                        char *rtpHeader = newRTPForAAC();
                        NSData *rtpHeaderData = [NSData dataWithBytes:rtpHeader length:12];
                        free(rtpHeader);
                        rtpHeader = NULL;
                        NSMutableData *fullData1 = [NSMutableData dataWithData:rtpHeaderData];
                        [fullData1 appendData:pFullData];
                        //设置rtsp interleaved frame头
                        char *rtspFrameHeader = newRTSPInterleavedFrame((int)fullData1.length);
                        NSData *rtspFrameHeaderData = [NSData dataWithBytes:rtspFrameHeader length:4];
                        free(rtspFrameHeader);
                        rtspFrameHeader = NULL;
                        NSMutableData *fullData2 = [NSMutableData dataWithData:rtspFrameHeaderData];
                        [fullData2 appendData:fullData1];
                        //发送数据
                        [self.delegate sendData:fullData2];
                        fullData2 = nil;
                        fullData1 = nil;
                        fullData = nil;
                        rawAAC = nil;
                    }
                    free(outputBuffer);
                    CFRelease(sampleBuffer);
                }
            });
        }
    }
    

    其中AudioConverterFillComplexBuffer即是用于转换的函数,转换出来的AAC是raw data,需要添加固定字节(56bits)的ADTS头信息,用于描述音频的信息,便于解码器读取,关于ADTS的描述,可参考https://blog.csdn.net/jay100500/article/details/52955232,下面是添加ADTS的具体代码:

    //给aac加上adts头, packetLength 为rawaac的长度
    char *newAdtsDataForPacketLength(int packetLength, int *ioHeaderLen) {
        //adts头的长度为固定的7个字节
        int adtsLen = 7;
        //在堆区分配7个字节的内存
        char *packet = malloc(sizeof(char)*adtsLen);
        //选择AAC LC
        int profile = 2;
        //选择采样率对应的下标
        int freqIdx = 8;
        //选择声道数所对应的下标
        int chanCfg = 1;
        //获取adts头和raw aac的总长度
        NSUInteger fullLength = adtsLen+packetLength;
        //设置syncword
        packet[0] = 0xFF;
        packet[1] = 0xF1;
        packet[2] = (char)(((profile-1)<<6)+(freqIdx<<2)+(chanCfg>>2));
        packet[3] = (char)(((chanCfg&3)<<6)+(fullLength>>11));
        packet[4] = (char)((fullLength&0x7FF)>>3);
        packet[5] = (char)(((fullLength&7)<<5)+0x1F);
        packet[6] = (char)0xFC;
        *ioHeaderLen = adtsLen;
        return packet;
    }
    
    PS:这里作者除了添加ADTS头之外,还增加了私有头,所以不是标准的封装格式,可忽略

    本片将中使用RTSP+RTP协议将编码后的ADTS-AAC进行传输,所以还需要添加RTP头(96bits)以及RTSP Interleaved frame头(32bits)进行封装,均为固定字节,关于RTP的描述,可参考https://blog.csdn.net/qingkongyeyue/article/details/60590613,下面是RTP封装的具体代码:

    //添加RTP头
    char *newRTPForAAC() {
        //RTP头长度为固定的12个字节
        int rtpLen = 12;
        //在堆区分配12个字节的内存
        char *packet = malloc(sizeof(char)*rtpLen);
        //设置syncword
        packet[0] = 0x80;//V_P_X_CC
        packet[1] = 0x88;//M_PT
        //Sequence
        packet[2] = 0x00;
        packet[3] = 0xDA;
        //timestamp
        packet[4] = 0x00;
        packet[5] = 0x01;
        packet[6] = 0x98;
        packet[7] = 0xC0;
        //SSRC
        packet[8] = 0x00;
        packet[9] = 0x00;
        packet[10] = 0x00;
        packet[11] = 0x00;
        return packet;
    }
    
    PS:由于作者不是标准的封装格式,所以RTP头中某些值并没有进行校验,所以也不够标准,这里需要大家按照RTP规则进行封装

    RTSP Interleaved frame头只有4字节,首字符为"$",下面是具体代码:

    char *newRTSPInterleavedFrame(int packetLength) {
        //RTP头长度为固定的4个字节
        int rtpLen = 4;
        //在堆区分配4个字节的内存
        char *packet = malloc(sizeof(char)*rtpLen);
        //设置syncword
        packet[0] = 0x24;
        packet[1] = 0x00;
        NSString *str = [NSString stringWithFormat:@"%d",packetLength];
        long long l = [str longLongValue];
        NSString *s16 = [NSString stringWithFormat:@"%04llx",l];
        NSString *s16_1 = [s16 substringToIndex:2];
        NSString *s16_2 = [s16 substringFromIndex:2];
        unsigned long res1 = strtoul([s16_1 UTF8String],0,16);
        unsigned long res2 = strtoul([s16_2 UTF8String],0,16);
        packet[2] = res1;
        packet[3] = res2;
        return packet;
    }
    

    最后,fullData2即是最后封装后的数据,也就是说,数据前面被增加了一层又一层的各种头信息,此时就可以拿来进行RTSP传输了。


    Demo地址:https://github.com/XuningZhai/TalkDemo
    支持G711的Demo地址:https://github.com/XuningZhai/TalkDemo_G711_AAC

    相关文章

      网友评论

        本文标题:iOS语音对讲(一)实时采集PCM并编码AAC

        本文链接:https://www.haomeiwen.com/subject/ryeblftx.html