美文网首页iOS直播webrtc专题整理
iOS语音对讲(一)实时采集PCM并编码AAC

iOS语音对讲(一)实时采集PCM并编码AAC

作者: Goning | 来源:发表于2018-04-29 14:21 被阅读1673次

本文介绍iOS实时语音双向对讲功能:
(一)实时采集PCM并编码AAC
(二)RTSP+RTP协议实时传输
(三)FFmpeg实时解码AAC并播放PCM

第一篇介绍使用<AVFoundation/AVFoundation.h>中的AVCaptureSession进行音频的实时采集,输出PCM数据;再使用<AudioToolbox/AudioToolbox.h>中的AudioConverterRef将采集到的PCM进行编码转换,输出AAC。


具体过程如下:

1.采集

初始化AVCaptureSession并设置相关配置

- (instancetype)initCaptureWithPreset:(CapturePreset)preset {
    if ([super init]) {
        [self initAVcaptureSession];
        _definePreset = preset;
    }
    return self;
}

- (void)initAVcaptureSession {
    //初始化AVCaptureSession
    _session = [[AVCaptureSession alloc] init];
    //开始配置
    [_session beginConfiguration];
    NSError *error;
    //获取音频设备对象
    self.audioDevice = [AVCaptureDevice defaultDeviceWithMediaType:AVMediaTypeAudio];
    //初始化捕获输入对象
    self.audioInput = [[AVCaptureDeviceInput alloc] initWithDevice:self.audioDevice error:&error];
    if (error) {
        NSLog(@"录音设备出错");
    }
    //添加音频输入对象到session
    if ([self.session canAddInput:self.audioInput]) {
        [self.session addInput:self.audioInput];
    }
    //初始化输出捕获对象
    self.audioOutput = [[AVCaptureAudioDataOutput alloc] init];
    //添加音频输出对象到session
    if ([self.session canAddOutput:self.audioOutput]) {
        [self.session addOutput:self.audioOutput];
    }
    //创建设置音频输出代理所需要的线程队列
    dispatch_queue_t audioQueue = dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_LOW, 0);
    [self.audioOutput setSampleBufferDelegate:self queue:audioQueue];
    //提交配置
    [self.session commitConfiguration];
}

开始与结束采集

- (void)start {
    [self.session startRunning];
}

- (void)stop {
    [self.session stopRunning];
}

设置代理回调将PCM数据输出

@protocol PCMCaptureDelegate <NSObject>
- (void)audioWithSampleBuffer:(CMSampleBufferRef)sampleBuffer;
@end
//AVCaptureAudioDataOutputSampleBufferDelegate
- (void)captureOutput:(AVCaptureOutput *)captureOutput didOutputSampleBuffer:(CMSampleBufferRef)sampleBuffer fromConnection:(AVCaptureConnection *)connection {
    if (captureOutput == self.audioOutput) {
        if (self.delegate && [self.delegate respondsToSelector:@selector(audioWithSampleBuffer:)]) {
            [self.delegate audioWithSampleBuffer:sampleBuffer];
        }
    }
}

2.编码

创建转换器并设置相关属性

- (void)setUpConverter:(CMSampleBufferRef)sampleBuffer {
    //获取audioformat的描述信息
    CMAudioFormatDescriptionRef audioFormatDes =  (CMAudioFormatDescriptionRef)CMSampleBufferGetFormatDescription(sampleBuffer);
    //获取输入的asbd的信息
    AudioStreamBasicDescription inAudioStreamBasicDescription = *(CMAudioFormatDescriptionGetStreamBasicDescription(audioFormatDes));
    //开始构造输出的asbd
    AudioStreamBasicDescription outAudioStreamBasicDescription = {0};
    //对于压缩格式必须设置为0
    outAudioStreamBasicDescription.mBitsPerChannel = 0;
    outAudioStreamBasicDescription.mBytesPerFrame = 0;
    //设定声道数为1
    outAudioStreamBasicDescription.mChannelsPerFrame = 1;
    //设定采样率为16000
    outAudioStreamBasicDescription.mSampleRate = 16000;
    //设定输出音频的格式
    outAudioStreamBasicDescription.mFormatID = kAudioFormatMPEG4AAC;
    outAudioStreamBasicDescription.mFormatFlags = kMPEG4Object_AAC_LC;
    //填充输出的音频格式
    UInt32 size = sizeof(outAudioStreamBasicDescription);
    AudioFormatGetProperty(kAudioFormatProperty_FormatInfo, 0, NULL, &size, &outAudioStreamBasicDescription);
    //选择aac的编码器(用来描述一个已经安装的编解码器)
    AudioClassDescription audioClassDes;
    //初始化为0
    memset(&audioClassDes, 0, sizeof(audioClassDes));
    //获取满足要求的aac编码器的总大小
    UInt32 countSize = 0;
    AudioFormatGetPropertyInfo(kAudioFormatProperty_Encoders, sizeof(outAudioStreamBasicDescription.mFormatID), &outAudioStreamBasicDescription.mFormatID, &countSize);
    //用来计算aac的编解码器的个数
    int cout = countSize/sizeof(audioClassDes);
    //创建一个包含有cout个数的编码器数组
    AudioClassDescription descriptions[cout];
    //将编码器数组信息写入到descriptions中
    AudioFormatGetProperty(kAudioFormatProperty_Encoders, sizeof(outAudioStreamBasicDescription.mFormatID), &outAudioStreamBasicDescription.mFormatID, &countSize, descriptions);
    for (int i = 0; i < cout; cout++) {
        AudioClassDescription temp = descriptions[i];
        if (temp.mManufacturer==kAppleSoftwareAudioCodecManufacturer//软编
            &&temp.mSubType==outAudioStreamBasicDescription.mFormatID) {
            audioClassDes = temp;
            break;
        }
    }
    //创建convertcontext用来保存converter的信息
    ConverterContext *context = malloc(sizeof(ConverterContext));
    self->convertContext = context;
    OSStatus result = AudioConverterNewSpecific(&inAudioStreamBasicDescription, &outAudioStreamBasicDescription, 1, &audioClassDes, &(context->converter));
    if (result == noErr) {
        //创建编解码器成功
        AudioConverterRef converter = context->converter;
        //设置编码器属性
        UInt32 temp = kAudioConverterQuality_High;
        AudioConverterSetProperty(converter, kAudioConverterCodecQuality, sizeof(temp), &temp);
        //设置比特率
        UInt32 bitRate = 32000;
        result = AudioConverterSetProperty(converter, kAudioConverterEncodeBitRate, sizeof(bitRate), &bitRate);
        if (result != noErr) {
            NSLog(@"设置比特率失败");
        }
    }else{
        //创建编解码器失败
        free(context);
        context = NULL;
        NSLog(@"创建编解码器失败");
    }
}

编码samplebuffer数据

//编码samplebuffer数据
- (void)encodeSmapleBuffer:(CMSampleBufferRef)sampleBuffer {
    if (!self->convertContext) {
        [self setUpConverter:sampleBuffer];
    }
    ConverterContext *cxt = self->convertContext;
    if (cxt && cxt->converter) {
        //从samplebuffer中提取数据
        CFRetain(sampleBuffer);
        dispatch_async(encodeQueue, ^{
            //从samplebuffer中获取blockbuffer
            CMBlockBufferRef blockBuffer = CMSampleBufferGetDataBuffer(sampleBuffer);
            size_t pcmLength = 0;
            char *pcmData = NULL;
            //获取blockbuffer中的pcm数据的指针和长度
            OSStatus status = CMBlockBufferGetDataPointer(blockBuffer, 0, NULL, &pcmLength, &pcmData);
            if (status != noErr) {
                NSLog(@"从block中获取pcm数据失败");
                CFRelease(sampleBuffer);
                return;
            } else {
                //在堆区分配内存用来保存编码后的aac数据
                char *outputBuffer = malloc(pcmLength);
                memset(outputBuffer, 0, pcmLength);
                UInt32 packetSize = 1;
                AudioStreamPacketDescription *outputPacketDes = (AudioStreamPacketDescription *)malloc(sizeof(AudioStreamPacketDescription) *packetSize);
                //使用fillcomplexinputparm来保存pcm数据
                FillComplexInputParm userParam;
                userParam.source = pcmData;
                userParam.sourceSize = (UInt32)pcmLength;
                userParam.channelCount = 1;
                userParam.packetDescription = NULL;
                //在堆区创建audiobufferlist
                AudioBufferList outputBufferList;
                outputBufferList.mNumberBuffers = 1;
                outputBufferList.mBuffers[0].mData = outputBuffer;
                outputBufferList.mBuffers[0].mDataByteSize = (unsigned int)pcmLength;
                outputBufferList.mBuffers[0].mNumberChannels = 1;
                //编码
                status = AudioConverterFillComplexBuffer(self->convertContext->converter, audioConverterComplexInputDataProc, &userParam, &packetSize, &outputBufferList, outputPacketDes);
                free(outputPacketDes);
                outputPacketDes = NULL;
                if (status == noErr) {
//                    NSLog(@"编码成功");
                    //获取原始的aac数据
                    NSData *rawAAC = [NSData dataWithBytes:outputBufferList.mBuffers[0].mData length:outputBufferList.mBuffers[0].mDataByteSize];
                    free(outputBuffer);
                    outputBuffer = NULL;
                    //设置adts头
                    int headerLength = 0;
                    char *packetHeader = newAdtsDataForPacketLength((int)rawAAC.length, &headerLength);
                    NSData *adtsHeader = [NSData dataWithBytes:packetHeader length:headerLength];
                    free(packetHeader);
                    packetHeader = NULL;
                    NSMutableData *fullData = [NSMutableData dataWithData:adtsHeader];
                    [fullData appendData:rawAAC];
                    //设置私有头
                    char *privateHeader = newPrivate((int)fullData.length);
                    NSData *privateHeaderData = [NSData dataWithBytes:privateHeader length:24];
                    free(privateHeader);
                    privateHeader = NULL;
                    NSMutableData *pFullData = [NSMutableData dataWithData:privateHeaderData];
                    [pFullData appendData:fullData];
                    //设置rtp头
                    char *rtpHeader = newRTPForAAC();
                    NSData *rtpHeaderData = [NSData dataWithBytes:rtpHeader length:12];
                    free(rtpHeader);
                    rtpHeader = NULL;
                    NSMutableData *fullData1 = [NSMutableData dataWithData:rtpHeaderData];
                    [fullData1 appendData:pFullData];
                    //设置rtsp interleaved frame头
                    char *rtspFrameHeader = newRTSPInterleavedFrame((int)fullData1.length);
                    NSData *rtspFrameHeaderData = [NSData dataWithBytes:rtspFrameHeader length:4];
                    free(rtspFrameHeader);
                    rtspFrameHeader = NULL;
                    NSMutableData *fullData2 = [NSMutableData dataWithData:rtspFrameHeaderData];
                    [fullData2 appendData:fullData1];
                    //发送数据
                    [self.delegate sendData:fullData2];
                    fullData2 = nil;
                    fullData1 = nil;
                    fullData = nil;
                    rawAAC = nil;
                }
                free(outputBuffer);
                CFRelease(sampleBuffer);
            }
        });
    }
}

其中AudioConverterFillComplexBuffer即是用于转换的函数,转换出来的AAC是raw data,需要添加固定字节(56bits)的ADTS头信息,用于描述音频的信息,便于解码器读取,关于ADTS的描述,可参考https://blog.csdn.net/jay100500/article/details/52955232,下面是添加ADTS的具体代码:

//给aac加上adts头, packetLength 为rawaac的长度
char *newAdtsDataForPacketLength(int packetLength, int *ioHeaderLen) {
    //adts头的长度为固定的7个字节
    int adtsLen = 7;
    //在堆区分配7个字节的内存
    char *packet = malloc(sizeof(char)*adtsLen);
    //选择AAC LC
    int profile = 2;
    //选择采样率对应的下标
    int freqIdx = 8;
    //选择声道数所对应的下标
    int chanCfg = 1;
    //获取adts头和raw aac的总长度
    NSUInteger fullLength = adtsLen+packetLength;
    //设置syncword
    packet[0] = 0xFF;
    packet[1] = 0xF1;
    packet[2] = (char)(((profile-1)<<6)+(freqIdx<<2)+(chanCfg>>2));
    packet[3] = (char)(((chanCfg&3)<<6)+(fullLength>>11));
    packet[4] = (char)((fullLength&0x7FF)>>3);
    packet[5] = (char)(((fullLength&7)<<5)+0x1F);
    packet[6] = (char)0xFC;
    *ioHeaderLen = adtsLen;
    return packet;
}
PS:这里作者除了添加ADTS头之外,还增加了私有头,所以不是标准的封装格式,可忽略

本片将中使用RTSP+RTP协议将编码后的ADTS-AAC进行传输,所以还需要添加RTP头(96bits)以及RTSP Interleaved frame头(32bits)进行封装,均为固定字节,关于RTP的描述,可参考https://blog.csdn.net/qingkongyeyue/article/details/60590613,下面是RTP封装的具体代码:

//添加RTP头
char *newRTPForAAC() {
    //RTP头长度为固定的12个字节
    int rtpLen = 12;
    //在堆区分配12个字节的内存
    char *packet = malloc(sizeof(char)*rtpLen);
    //设置syncword
    packet[0] = 0x80;//V_P_X_CC
    packet[1] = 0x88;//M_PT
    //Sequence
    packet[2] = 0x00;
    packet[3] = 0xDA;
    //timestamp
    packet[4] = 0x00;
    packet[5] = 0x01;
    packet[6] = 0x98;
    packet[7] = 0xC0;
    //SSRC
    packet[8] = 0x00;
    packet[9] = 0x00;
    packet[10] = 0x00;
    packet[11] = 0x00;
    return packet;
}
PS:由于作者不是标准的封装格式,所以RTP头中某些值并没有进行校验,所以也不够标准,这里需要大家按照RTP规则进行封装

RTSP Interleaved frame头只有4字节,首字符为"$",下面是具体代码:

char *newRTSPInterleavedFrame(int packetLength) {
    //RTP头长度为固定的4个字节
    int rtpLen = 4;
    //在堆区分配4个字节的内存
    char *packet = malloc(sizeof(char)*rtpLen);
    //设置syncword
    packet[0] = 0x24;
    packet[1] = 0x00;
    NSString *str = [NSString stringWithFormat:@"%d",packetLength];
    long long l = [str longLongValue];
    NSString *s16 = [NSString stringWithFormat:@"%04llx",l];
    NSString *s16_1 = [s16 substringToIndex:2];
    NSString *s16_2 = [s16 substringFromIndex:2];
    unsigned long res1 = strtoul([s16_1 UTF8String],0,16);
    unsigned long res2 = strtoul([s16_2 UTF8String],0,16);
    packet[2] = res1;
    packet[3] = res2;
    return packet;
}

最后,fullData2即是最后封装后的数据,也就是说,数据前面被增加了一层又一层的各种头信息,此时就可以拿来进行RTSP传输了。


Demo地址:https://github.com/XuningZhai/TalkDemo
支持G711的Demo地址:https://github.com/XuningZhai/TalkDemo_G711_AAC

相关文章

网友评论

    本文标题:iOS语音对讲(一)实时采集PCM并编码AAC

    本文链接:https://www.haomeiwen.com/subject/ryeblftx.html