Swift-VideoToolbox H264 硬编解码

作者: T92 | 来源:发表于2023-09-19 10:17 被阅读0次

编码

import UIKit
import VideoToolbox

class ZGH264Encoder: NSObject {
    
    var videoEncodeCallback : ((Data)-> Void)?
    var videoEncodeCallbackSPSAndPPS :((Data,Data)->Void)?
    
    private var width: Int32 = 480
    private var height:Int32 = 640
    private var bitRate : Int32 = 480 * 640 * 3 * 4
    private var fps : Int32 = 10
    private var hasSpsPps = false
    private var frameID:Int64 = 0
    
    
    private var encodeCallBack:VTCompressionOutputCallback?
    private var encodeQueue = DispatchQueue(label: "encode")
    private var callBackQueue = DispatchQueue(label: "callBack")
    private var encodeSession:VTCompressionSession!
    
    init(width:Int32 = 480,height:Int32 = 640,bitRate : Int32? = nil,fps: Int32? = nil ) {
        
        self.width = width
        self.height = height
        self.bitRate = bitRate != nil ? bitRate! : 480 * 640 * 3 * 4
        self.fps = (fps != nil) ? fps! : 10
        super.init()
        
        setCallBack()
        initVideoToolBox()
        
    }
    
    //开始编码
    func encodeVideo(sampleBuffer:CMSampleBuffer){
        if self.encodeSession == nil {
            initVideoToolBox()
        }
        encodeQueue.async {
            let imageBuffer = CMSampleBufferGetImageBuffer(sampleBuffer)
            var time = CMTime(value: self.frameID, timescale: 1000)
            if #available(iOS 15, *){
                time = CMTime(value: self.frameID, timescale: 100)
                self.frameID += 1
            }
            
            let state = VTCompressionSessionEncodeFrame(self.encodeSession, imageBuffer: imageBuffer!, presentationTimeStamp: time, duration: .invalid, frameProperties: nil, sourceFrameRefcon: nil, infoFlagsOut: nil)
            if state != 0{
                print("encode filure")
            }
        }
    }
    
    func stop(){
        hasSpsPps = false
        frameID = 0
        if ((encodeSession) != nil) {
            VTCompressionSessionCompleteFrames(encodeSession, untilPresentationTimeStamp: .invalid)
            VTCompressionSessionInvalidate(encodeSession);
            encodeSession = nil;
        }
    }
    
    //初始化编码器
    private func initVideoToolBox() {
        //创建VTCompressionSession
        let state = VTCompressionSessionCreate(allocator: kCFAllocatorDefault, width: width, height: height, codecType: kCMVideoCodecType_H264, encoderSpecification: nil, imageBufferAttributes: nil, compressedDataAllocator: nil, outputCallback:encodeCallBack , refcon: unsafeBitCast(self, to: UnsafeMutableRawPointer.self), compressionSessionOut: &self.encodeSession)
        
        if state != 0{
            print("creat VTCompressionSession failed")
            return
        }
        
        //设置实时编码输出
        VTSessionSetProperty(encodeSession, key: kVTCompressionPropertyKey_RealTime, value: kCFBooleanTrue)
        //设置编码方式
        VTSessionSetProperty(encodeSession, key: kVTCompressionPropertyKey_ProfileLevel, value: kVTProfileLevel_H264_Baseline_AutoLevel)
        //设置是否产生B帧(因为B帧在解码时并不是必要的,是可以抛弃B帧的)
        VTSessionSetProperty(encodeSession, key: kVTCompressionPropertyKey_AllowFrameReordering, value: kCFBooleanFalse)
        //设置关键帧间隔
        var frameInterval = 10
        let number = CFNumberCreate(kCFAllocatorDefault, CFNumberType.intType, &frameInterval)
        VTSessionSetProperty(encodeSession, key: kVTCompressionPropertyKey_MaxKeyFrameInterval, value: number)
        
        //设置期望帧率，不是实际帧率
        let fpscf = CFNumberCreate(kCFAllocatorDefault, CFNumberType.intType, &fps)
        VTSessionSetProperty(encodeSession, key: kVTCompressionPropertyKey_ExpectedFrameRate, value: fpscf)
        
        //设置码率平均值，单位是bps。码率大了话就会非常清晰，但同时文件也会比较大。码率小的话，图像有时会模糊，但也勉强能看
        //码率计算公式参考笔记
        //        var bitrate = width * height * 3 * 4
        let bitrateAverage = CFNumberCreate(kCFAllocatorDefault, CFNumberType.intType, &bitRate)
        VTSessionSetProperty(encodeSession, key: kVTCompressionPropertyKey_AverageBitRate, value: bitrateAverage)
        
        //码率限制
        let bitRatesLimit :CFArray = [bitRate * 5 / 8, 1] as CFArray
        VTSessionSetProperty(encodeSession, key: kVTCompressionPropertyKey_DataRateLimits, value: bitRatesLimit)
    }

    private func setCallBack()  {
        //编码完成回调
        encodeCallBack = {(outputCallbackRefCon, sourceFrameRefCon, status, flag, sampleBuffer)  in
            let encoder :ZGH264Encoder = unsafeBitCast(outputCallbackRefCon, to: ZGH264Encoder.self)
            guard sampleBuffer != nil else {
                return
            }
            /// 0. 原始字节数据 8字节
            let buffer : [UInt8] = [0x00,0x00,0x00,0x01]
            /// 1. [UInt8] -> UnsafeBufferPointer<UInt8>
            let unsafeBufferPointer = buffer.withUnsafeBufferPointer {$0}
            /// 2.. UnsafeBufferPointer<UInt8> -> UnsafePointer<UInt8>
            let  unsafePointer = unsafeBufferPointer.baseAddress
            guard let startCode = unsafePointer else {return}
            let attachArray = CMSampleBufferGetSampleAttachmentsArray(sampleBuffer!, createIfNecessary: false)
            let strkey = unsafeBitCast(kCMSampleAttachmentKey_NotSync, to: UnsafeRawPointer.self)
            let cfDic = unsafeBitCast(CFArrayGetValueAtIndex(attachArray, 0), to: CFDictionary.self)
            let keyFrame = !CFDictionaryContainsKey(cfDic, strkey);//没有这个键就意味着同步,就是关键帧
            //  获取sps pps
            if keyFrame && !encoder.hasSpsPps{
                if let description = CMSampleBufferGetFormatDescription(sampleBuffer!){
                    
                    var spsSize: Int = 0, spsCount :Int = 0,spsHeaderLength:Int32 = 0
                    var ppsSize: Int = 0, ppsCount: Int = 0,ppsHeaderLength:Int32 = 0
                    var spsDataPointer : UnsafePointer<UInt8>? = UnsafePointer(UnsafeMutablePointer<UInt8>.allocate(capacity: 0))
                    var ppsDataPointer : UnsafePointer<UInt8>? = UnsafePointer<UInt8>(bitPattern: 0)
                    let spsstatus = CMVideoFormatDescriptionGetH264ParameterSetAtIndex(description, parameterSetIndex: 0, parameterSetPointerOut: &spsDataPointer, parameterSetSizeOut: &spsSize, parameterSetCountOut: &spsCount, nalUnitHeaderLengthOut: &spsHeaderLength)
                    if spsstatus != 0{
                        print("sps失败")
                    }
                    
                    let ppsStatus = CMVideoFormatDescriptionGetH264ParameterSetAtIndex(description, parameterSetIndex: 1, parameterSetPointerOut: &ppsDataPointer, parameterSetSizeOut: &ppsSize, parameterSetCountOut: &ppsCount, nalUnitHeaderLengthOut: &ppsHeaderLength)
                    if ppsStatus != 0 {
                        print("pps失败")
                    }
                    
                    if let spsData = spsDataPointer,let ppsData = ppsDataPointer{
                        var spsDataValue = Data(capacity: 4 + spsSize)
                        spsDataValue.append(buffer, count: 4)
                        spsDataValue.append(spsData, count: spsSize)
                        
                        var ppsDataValue = Data(capacity: 4 + ppsSize)
                        ppsDataValue.append(startCode, count: 4)
                        ppsDataValue.append(ppsData, count: ppsSize)
                        encoder.callBackQueue.async {
                            encoder.videoEncodeCallbackSPSAndPPS!(spsDataValue, ppsDataValue)
                        }
                    }
                }
                encoder.hasSpsPps = true
            }
            
            let dataBuffer = CMSampleBufferGetDataBuffer(sampleBuffer!)
            var dataPointer: UnsafeMutablePointer<Int8>?  = nil
            var totalLength :Int = 0
            let blockState = CMBlockBufferGetDataPointer(dataBuffer!, atOffset: 0, lengthAtOffsetOut: nil, totalLengthOut: &totalLength, dataPointerOut: &dataPointer)
            if blockState != 0{
                print("获取data失败\(blockState)")
            }
            
            //NALU
            var offset :UInt32 = 0
            //返回的nalu数据前四个字节不是0001的startcode(不是系统端的0001)，而是大端模式的帧长度length
            let lengthInfoSize = 4
            //循环写入nalu数据
            while offset < totalLength - lengthInfoSize {
                //获取nalu 数据长度
                var naluDataLength:UInt32 = 0
                memcpy(&naluDataLength, dataPointer! + UnsafeMutablePointer<Int8>.Stride(offset), lengthInfoSize)
                //大端转系统端
                naluDataLength = CFSwapInt32BigToHost(naluDataLength)
                //获取到编码好的视频数据
                var data = Data(capacity: Int(naluDataLength) + lengthInfoSize)
                data.append(buffer, count: 4)
                //转化pointer；UnsafeMutablePointer<Int8> -> UnsafePointer<UInt8>
                let naluUnsafePoint = unsafeBitCast(dataPointer, to: UnsafePointer<UInt8>.self)

                data.append(naluUnsafePoint + UnsafePointer<UInt8>.Stride(offset + UInt32(lengthInfoSize)) , count: Int(naluDataLength))
                
                encoder.callBackQueue.async {
                    encoder.videoEncodeCallback!(data)
                }
                offset += (naluDataLength + UInt32(lengthInfoSize))
            }
        }
    }
    
    
    deinit {
        stop()
    }
}

解码类型

如果需要使用AVSampleBufferDisplayLayer播放CMSampleBuffer可以选择输出为sampleBuffer，其中sampleBuffer并未进行解码，AVSampleBufferDisplayLayer支持h264的CMSampleBuffer播放

enum ZGH264DecodeType {
    case imageBuffer //CVPixcelBuffer
    case sampleBuffer //CMSampleBuffer（H264）
}

解码

解码需要使用编码的SPS、PPS

import UIKit
import VideoToolbox

class ZGH264Decoder: NSObject {
    
    var videoDecodeCallback:((CVImageBuffer?) -> Void)?
    var videoDecodeSampleBufferCallback:((CMSampleBuffer?) -> Void)?
    
    private var width: Int32 = 480
    private var height:Int32 = 640
    
    private var spsData:Data?
    private var ppsData:Data?
    private var decompressionSession : VTDecompressionSession?
    private var decodeDesc : CMVideoFormatDescription?
    private var callback :VTDecompressionOutputCallback?
    private var decodeQueue = DispatchQueue(label: "decode")
    private var callBackQueue = DispatchQueue(label: "decodeCallBack")
    
    private var returnType: ZGH264DecodeType = .sampleBuffer
    
    init(width:Int32,height:Int32) {
        self.width = width
        self.height = height
    }
    
    func decode(data:Data) {
        decodeQueue.async {
            let length:UInt32 =  UInt32(data.count)
            self.decodeByte(data: data, size: length)
        }
    }
    
    /// 设置解码返回类型
    /// - Parameter type:ZGH264DecodeType
    func setReturnType(type: ZGH264DecodeType){
        returnType = type
    }
    
    private func decodeByte(data:Data,size:UInt32) {
        //数据类型:frame的前4个字节是NALU数据的开始码，也就是00 00 00 01，
        // 将NALU的开始码转为4字节大端NALU的长度信息
        let naluSize = size - 4
        let length : [UInt8] = [
            UInt8(truncatingIfNeeded: naluSize >> 24),
            UInt8(truncatingIfNeeded: naluSize >> 16),
            UInt8(truncatingIfNeeded: naluSize >> 8),
            UInt8(truncatingIfNeeded: naluSize)
            ]
        var frameByte :[UInt8] = length
        [UInt8](data).suffix(from: 4).forEach { (bb) in
            frameByte.append(bb)
        }
        let bytes = frameByte //[UInt8](frameData)
        // 第5个字节是表示数据类型，转为10进制后，7是sps, 8是pps, 5是IDR（I帧）信息
        let type :Int  = Int(bytes[4] & 0x1f)
        switch type{
        case 0x05:
            if initDecoder() {
                decode(frame: bytes, size: size)
            }
            
        case 0x06:
//            print("增强信息")
            break
        case 0x07:
            spsData = data
        case 0x08:
            ppsData = data
        default:
            if initDecoder() {
                decode(frame: bytes, size: size)
            }
        }
    }
    
    private func decode(frame:[UInt8],size:UInt32) {
        //
        var blockBUffer :CMBlockBuffer?
        var frame1 = frame
        //创建blockBuffer
        /*!
         参数1: structureAllocator kCFAllocatorDefault
         参数2: memoryBlock  frame
         参数3: frame size
         参数4: blockAllocator: Pass NULL
         参数5: customBlockSource Pass NULL
         参数6: offsetToData  数据偏移
         参数7: dataLength 数据长度
         参数8: flags 功能和控制标志
         参数9: newBBufOut blockBuffer地址,不能为空
         */
        let blockState = CMBlockBufferCreateWithMemoryBlock(allocator: kCFAllocatorDefault,
                                           memoryBlock: &frame1,
                                           blockLength: Int(size),
                                           blockAllocator: kCFAllocatorNull,
                                           customBlockSource: nil,
                                           offsetToData:0,
                                           dataLength: Int(size),
                                           flags: 0,
                                           blockBufferOut: &blockBUffer)
        if blockState != 0 {
            print("创建blockBuffer失败")
        }
//
        var sampleSizeArray :[Int] = [Int(size)]
        var sampleBuffer :CMSampleBuffer?
        //创建sampleBuffer
        /*
         参数1: allocator 分配器,使用默认内存分配, kCFAllocatorDefault
         参数2: blockBuffer.需要编码的数据blockBuffer.不能为NULL
         参数3: formatDescription,视频输出格式
         参数4: numSamples.CMSampleBuffer 个数.
         参数5: numSampleTimingEntries 必须为0,1,numSamples
         参数6: sampleTimingArray.  数组.为空
         参数7: numSampleSizeEntries 默认为1
         参数8: sampleSizeArray
         参数9: sampleBuffer对象
         */
        let readyState = CMSampleBufferCreateReady(allocator: kCFAllocatorDefault,
                                  dataBuffer: blockBUffer,
                                  formatDescription: decodeDesc,
                                  sampleCount: CMItemCount(1),
                                  sampleTimingEntryCount: CMItemCount(),
                                  sampleTimingArray: nil,
                                  sampleSizeEntryCount: CMItemCount(1),
                                  sampleSizeArray: &sampleSizeArray,
                                  sampleBufferOut: &sampleBuffer)
        
        guard let buffer = sampleBuffer, readyState == kCMBlockBufferNoErr else {
            print("解码失败")
            return
        }
        
        if returnType == .sampleBuffer {
            if let attachmentArray = CMSampleBufferGetSampleAttachmentsArray(buffer, createIfNecessary: true) {
                let dic = unsafeBitCast(CFArrayGetValueAtIndex(attachmentArray, 0), to: CFMutableDictionary.self)
                CFDictionarySetValue(dic,
                                     Unmanaged.passUnretained(kCMSampleAttachmentKey_DisplayImmediately).toOpaque(),
                                     Unmanaged.passUnretained(kCFBooleanTrue).toOpaque())
            }
            videoDecodeSampleBufferCallback?(sampleBuffer)
            return
        }
    
        //解码数据为CVPixcelBuffer
        /*
         参数1: 解码session
         参数2: 源数据 包含一个或多个视频帧的CMsampleBuffer
         参数3: 解码标志
         参数4: 解码后数据outputPixelBuffer
         参数5: 同步/异步解码标识
         */
        let sourceFrame:UnsafeMutableRawPointer? = nil
        var inforFalg = VTDecodeInfoFlags.asynchronous
        let decodeState = VTDecompressionSessionDecodeFrame(self.decompressionSession!, sampleBuffer: sampleBuffer!, flags:VTDecodeFrameFlags._EnableAsynchronousDecompression , frameRefcon: sourceFrame, infoFlagsOut: &inforFalg)
        if decodeState != 0 {
            print("解码失败")
        }
        
    }
    
    private func initDecoder() -> Bool {
        
        if decompressionSession != nil {
            return true
        }
        guard spsData != nil,ppsData != nil else {
            return false
        }

        //处理sps/pps
        var sps : [UInt8] = []
        [UInt8](spsData!).suffix(from: 4).forEach { (value) in
            sps.append(value)
        }
        var pps : [UInt8] = []
        [UInt8](ppsData!).suffix(from: 4).forEach{(value) in
            pps.append(value)
        }
        
        let spsAndpps = [sps.withUnsafeBufferPointer{$0}.baseAddress!,pps.withUnsafeBufferPointer{$0}.baseAddress!]
        let sizes = [sps.count,pps.count]

        /**
        根据sps pps设置解码参数
        param kCFAllocatorDefault 分配器
        param 2 参数个数
        param parameterSetPointers 参数集指针
        param parameterSetSizes 参数集大小
        param naluHeaderLen nalu nalu start code 的长度 4
        param _decodeDesc 解码器描述
        return 状态
        */
        let descriptionState = CMVideoFormatDescriptionCreateFromH264ParameterSets(allocator: kCFAllocatorDefault, parameterSetCount: 2, parameterSetPointers: spsAndpps, parameterSetSizes: sizes, nalUnitHeaderLength: 4, formatDescriptionOut: &decodeDesc)
        if descriptionState != 0 {
            print("description创建失败" )
            return false
        }
        //解码回调设置
        /*
         VTDecompressionOutputCallbackRecord 是一个简单的结构体，它带有一个指针 (decompressionOutputCallback)，指向帧解压完成后的回调方法。你需要提供可以找到这个回调方法的实例 (decompressionOutputRefCon)
         */
        setCallBack()
        var callbackRecord = VTDecompressionOutputCallbackRecord(decompressionOutputCallback: callback, decompressionOutputRefCon: unsafeBitCast(self, to: UnsafeMutableRawPointer.self))
        /*
         解码参数:
        * kCVPixelBufferPixelFormatTypeKey:摄像头的输出数据格式
         kCVPixelBufferPixelFormatTypeKey，已测可用值为
            kCVPixelFormatType_420YpCbCr8BiPlanarVideoRange，即420v
            kCVPixelFormatType_420YpCbCr8BiPlanarFullRange，即420f
            kCVPixelFormatType_32BGRA，iOS在内部进行YUV至BGRA格式转换
         YUV420一般用于标清视频，YUV422用于高清视频，这里的限制让人感到意外。但是，在相同条件下，YUV420计算耗时和传输压力比YUV422都小。
         
        * kCVPixelBufferWidthKey/kCVPixelBufferHeightKey: 视频源的分辨率 width*height
         * kCVPixelBufferOpenGLCompatibilityKey : 它允许在 OpenGL 的上下文中直接绘制解码后的图像，而不是从总线和 CPU 之间复制数据。这有时候被称为零拷贝通道，因为在绘制过程中没有解码的图像被拷贝.
         
         */
        let imageBufferAttributes = [
            kCVPixelBufferPixelFormatTypeKey:kCVPixelFormatType_420YpCbCr8BiPlanarFullRange,
            kCVPixelBufferWidthKey:width,
            kCVPixelBufferHeightKey:height,
//            kCVPixelBufferOpenGLCompatibilityKey:true
            ] as [CFString : Any]
        
        //创建session
        
        /*!
         @function    VTDecompressionSessionCreate
         @abstract    创建用于解压缩视频帧的会话。
         @discussion  解压后的帧将通过调用OutputCallback发出
         @param    allocator  内存的会话。通过使用默认的kCFAllocatorDefault的分配器。
         @param    videoFormatDescription 描述源视频帧
         @param    videoDecoderSpecification 指定必须使用的特定视频解码器.NULL
         @param    destinationImageBufferAttributes 描述源像素缓冲区的要求 NULL
         @param    outputCallback 使用已解压缩的帧调用的回调
         @param    decompressionSessionOut 指向一个变量以接收新的解压会话
         */
        let state = VTDecompressionSessionCreate(allocator: kCFAllocatorDefault, formatDescription: decodeDesc!, decoderSpecification: nil, imageBufferAttributes: imageBufferAttributes as CFDictionary, outputCallback: &callbackRecord, decompressionSessionOut: &decompressionSession)
        if state != 0 {
            print("创建decodeSession失败")
        }
        VTSessionSetProperty(self.decompressionSession!, key: kVTDecompressionPropertyKey_RealTime, value: kCFBooleanTrue)
        
        return true
        
    }

    //解码成功的回掉
    private func setCallBack()  {
        /*
         VTDecompressionOutputCallback 回调方法包括七个参数：
                参数1: 回调的引用
                参数2: 帧的引用
                参数3: 一个状态标识 (包含未定义的代码)
                参数4: 指示同步/异步解码，或者解码器是否打算丢帧的标识
                参数5: 实际图像的缓冲
                参数6: 出现的时间戳
                参数7: 出现的持续时间
         */
        //(UnsafeMutableRawPointer?, UnsafeMutableRawPointer?, OSStatus, VTDecodeInfoFlags, CVImageBuffer?, CMTime, CMTime) -> Void
        callback = { decompressionOutputRefCon,sourceFrameRefCon,status,inforFlags,imageBuffer,presentationTimeStamp,presentationDuration in
            let decoder :ZGH264Decoder = unsafeBitCast(decompressionOutputRefCon, to: ZGH264Decoder.self)
            
            guard imageBuffer != nil else {
                return
            }
            if let block = decoder.videoDecodeCallback  {
                decoder.callBackQueue.async {
                    block(imageBuffer)
                }
            }
        }
    }
    
    deinit {
        if decompressionSession != nil {
            VTDecompressionSessionInvalidate(decompressionSession!)
            decompressionSession = nil
        }
        
    }
}

使用示例

let h264Encoder = ZGH264Encoder(width: 360, height: 640, bitRate: 2*1024*1024)
let h264Decoder = ZGH264Decoder(width: 360, height: 640)

h264Encoder.videoEncodeCallback = {[weak self] (data) in
            self?.h264Decoder.decode(data: data)
        }
h264Encoder.videoEncodeCallbackSPSAndPPS = {[weak self] (sps, pps) in
            self?.h264Decoder.decode(data: sps)
            self?.h264Decoder.decode(data: pps)
        }

h264Decoder.videoDecodeSampleBufferCallback = { [weak self] sampleBuffer in
            guard let sampleBuffer = sampleBuffer else { return }
            self?.otherPreView.enqueue(sampleBuffer: sampleBuffer)
        }

//        h264Decoder.setReturnType(type: .imageBuffer)
//        h264Decoder.videoDecodeCallback = { pixelBuffer in
//
//        }

Swift-VideoToolbox H264 硬编解码

编码

解码类型

解码

相关文章

网友评论

延伸阅读

深度阅读

栏目导航

热点阅读