美文网首页
MP4文件格式

MP4文件格式

作者: 客昂康 | 来源:发表于2020-11-11 11:02 被阅读0次

下图包含有MP4标准文档:


mp4文件由一个个BOX组成,BOX可以嵌套,如图:

BOX主要由header和body两部分组成,body包含实际数据或其他BOX,header主要由4字节的size和4字节的type组成,size是整个BOX的大小,type是BOX的类型,如图:
  • 如果size等于0,该BOX是最后一个BOX。
  • 如果size等于1,该BOX存在可选的8字节的largesize字段,表示整个BOX的大小。
  • 如果type等于"uuid",该BOX存在可选的16字节的UUID字段。
  • 有一部分box是fullbox,它在基础box上添加了1字节的version字段和3字节的flags字段。

下表列出了标准定义的一些BOX以及它们之间的嵌套关系,带*号的BOX是必选的。


常见的比较重要的几个box

// mp4文件的第一个box
// brand通常有 isom、iso2、iso6、mp41、mp42、avc1 等。
aligned(8) class FileTypeBox extends Box(‘ftyp’) {
    unsigned int(32) major_brand;          // 主brand
    unsigned int(32) minor_version;        // 最小版本
    unsigned int(32) compatible_brands[];  // 兼容brand列表
}
// 整个视频的整体信息
aligned(8) class MovieHeaderBox extends FullBox(‘mvhd’, version, 0) {
    if (version == 1) {
        unsigned int(64) creation_time;     // 创建时间,从UTC时间1904年1月1日00:00:00起的秒数。
        unsigned int(64) modification_time; // 修改时间,计时起点同上。
        unsigned int(32) timescale;         // 一秒的时间刻度。
        unsigned int(64) duration;          // 总的持续时间。duration/timescale = 总秒数。
    } else { // version==0
        unsigned int(32) creation_time;     // 创建时间,从UTC时间1904年1月1日00:00:00起的秒数。
        unsigned int(32) modification_time; // 修改时间,计时起点同上。
        unsigned int(32) timescale;         // 一秒的时间刻度。
        unsigned int(32) duration;          // 总的持续时间。duration/timescale = 总秒数。
    }
    template int(32) rate;                  // 播放速度,16.16定点数,通常是0x00010000(1.0),也就是1.0正常速度播放。
    template int(16) volume;                // 音量大小,8.8定点数,通常是0x0100(1.0),也就是1.0全部音量。
    const bit(16) reserved = 0;             // 预留
    const unsigned int(32)[2] reserved = 0; // 预留
    template int(32)[9] matrix = {0x00010000,0,0,0,0x00010000,0,0,0,0x40000000 }; //Unity matrix
    bit(32)[6] pre_defined = 0;             // ????
    unsigned int(32) next_track_ID;         // ????
}
// Track 的整体信息
aligned(8) class TrackHeaderBox extends FullBox(‘tkhd’, version, flags){
    if (version==1) {
        unsigned int(64) creation_time;      // 创建时间,从UTC时间1904年1月1日00:00:00起的秒数。
        unsigned int(64) modification_time;  // 修改时间,计时起点同上。
        unsigned int(32) track_ID;           // track ID
        unsigned int(32) reserved = 0;       // 预留
        unsigned int(64) duration;           // 该 Track 总时间,duration/timescale = 总秒数。
    } else { // version==0
        unsigned int(32) creation_time;      // 创建时间,从UTC时间1904年1月1日00:00:00起的秒数。
        unsigned int(32) modification_time;  // 修改时间,计时起点同上。
        unsigned int(32) track_ID;           // track ID
        unsigned int(32) reserved = 0;       // 预留
        unsigned int(32) duration;           // 该 Track 总时间,duration/timescale = 总秒数。
    }
    unsigned int(32)[2] reserved = 0;        // 预留
    template int(16)    layer = 0;           // ???
    template int(16)    alternate_group = 0; // ???
    template int(16)    volume = {if track_is_audio 0x0100 else 0};
    unsigned int(16)    reserved = 0;        // 预留
    template int(32)[9] matrix={ 0x00010000,0,0,0,0x00010000,0,0,0,0x40000000 };// unity matrix
    unsigned int(32)    width;               // 对于文字或字幕类Track,它可以是显示文字区域建议的尺寸。
    unsigned int(32)    height;              // 对于文字或字幕类Track,它可以是显示文字区域建议的尺寸。
}
// 媒体头声明与音轨中媒体特性相关的总体信息。
aligned(8) class MediaHeaderBox extends FullBox(‘mdhd’, version, 0) {
    if (version==1) {
        unsigned int(64) creation_time;      // 创建时间,从UTC时间1904年1月1日00:00:00起的秒数。
        unsigned int(64) modification_time;  // 修改时间,计时起点同上。
        unsigned int(32) timescale;          // 一秒的时间刻度。
        unsigned int(64) duration;           // 总的持续时间。duration/timescale = 总秒数。
    } 
    else { 
        unsigned int(32) creation_time;      // 创建时间,从UTC时间1904年1月1日00:00:00起的秒数。
        unsigned int(32) modification_time;  // 修改时间,计时起点同上。
        unsigned int(32) timescale;          // 一秒的时间刻度。
        unsigned int(32) duration;           // 总的持续时间。duration/timescale = 总秒数。
    }
    bit(1) pad = 0;
    unsigned int(5)[3] language; // ISO-639-2/T language code
    unsigned int(16) pre_defined = 0;
}
// stsd box 有子box。
aligned(8) class SampleDescriptionBox (unsigned int(32) handler_type) extends FullBox('stsd', version, 0){
    unsigned int(32) entry_count;
    for (i=1; i<=entry_count; i++){
        SampleEntry(); // an instance of a class derived from SampleEntry
    }
}
// 记录每个sample的持续时间
aligned(8) class TimeToSampleBox extends FullBox(’stts’, version = 0, 0) {
    unsigned int(32) entry_count;       // 项目个数
    for (i=1; i<=entry_count; i++) {    // 
        unsigned int(32) sample_count;  // 连续相同的持续时间的数量
        unsigned int(32) sample_delta;  // sample的持续时间
    }
}
// 记录关键帧的列表
aligned(8) class SyncSampleBox extends FullBox(‘stss’, version = 0, 0) {
    unsigned int(32) entry_count;        // 关键帧个数
    for (i=1; i<=entry_count; i++) {     // 
        unsigned int(32) sample_number;  // 关键帧的帧号,从1数起,不是从0数起。
    }
}
// 记录每个sample的显示时间和解码时间的时间差
aligned(8) class CompositionOffsetBox extends FullBox(‘ctts’, version, 0) {
    unsigned int(32) entry_count;           // 个数。
    if (version == 0) {
        for (i=1; i<=entry_count; i++) {
            unsigned int(32) sample_count;  // 连续相同的偏移量的数量
            unsigned int(32) sample_offset; // 显示时间相对解码时间的偏移量,解码时间点+sample_offset=显示时间点。
        }
    }
    else if (version == 1) {
        for (i=1; i<=entry_count; i++) {
            unsigned int(32) sample_count;  // 连续相同的偏移量的数量
            signed int(32) sample_offset;   // 显示时间相对解码时间的偏移量,解码时间点+sample_offset=显示时间点。
        }
    }
}
// 记录每个 Chunk 中 sample 的个数,计算方法有点绕:
// entry_count是Chunk组的数量,每个Chunk组中第一个Chunk的序号是first_chunk,每个Chunk组中Chunk的数量
// 是下一个Chunk组的first_chunk减去本Chunk组的first_chunk,如果没有下一个Chunk组,那就从first_chunk数
// 到最后一个Chunk。Chunk总数可从 "stco" 或 "co64" box 获取。
aligned(8) class SampleToChunkBox extends FullBox(‘stsc’, version = 0, 0) {
    unsigned int(32) entry_count;                  // Chunk组的数量
    for (i=1; i<=entry_count; i++) {               // 
        unsigned int(32) first_chunk;              // 该Chunk组中第一个Chunk的序号
        unsigned int(32) samples_per_chunk;        // 该Chunk组中每一个Chunk中sample的个数
        unsigned int(32) sample_description_index; // ???
   }
}
// 记录每一个 sample 的大小
aligned(8) class SampleSizeBox extends FullBox(‘stsz’, version = 0, 0) { 
    unsigned int(32) sample_size;         // 如果每一个sample的大小都相同,那么sample的大小就是该值。
    unsigned int(32) sample_count;        // 如果sample_size是0,那么该值是sample的个数。
    if (sample_size == 0) {               //
        for (i=1; i<=sample_count; i++) { //
            unsigned int(32) entry_size;  // 各个sample的大小
        }
    }
}
// 记录每个 Chunk 在整个文件中的位置
aligned(8) class ChunkOffsetBox extends FullBox(‘stco’, version = 0, 0) { 
    unsigned int(32) entry_count;       // Chunk 个数
    for (i=1; i<=entry_count; i++) {    // 
        unsigned int(32) chunk_offset;  // 各个 Chunk 在整个mp4文件中的偏移量。
    }
}
// 作用同 stco,不同的是每个偏移量数值是64位,为了应对大于4GB的mp4文件。
aligned(8) class ChunkOffsetBox extends FullBox(‘stco’, version = 0, 0) { 
    unsigned int(32) entry_count;
    for (i=1; i<=entry_count; i++) {
        unsigned int(64)  chunk_offset;
    }
}

示例代码

以下示例代码可用于初步分析mp4文件结构:

#include <stdio.h>
#include <stdint.h>

#define  MP4BOXTYPE_ftyp  0x66747970
#define  MP4BOXTYPE_moov  0x6d6f6f76
#define  MP4BOXTYPE_mvhd  0x6d766864
#define  MP4BOXTYPE_iods  0x696f6473
#define  MP4BOXTYPE_trak  0x7472616b
#define  MP4BOXTYPE_tkhd  0x746b6864
#define  MP4BOXTYPE_edts  0x65647473
#define  MP4BOXTYPE_elst  0x656c7374
#define  MP4BOXTYPE_mdia  0x6d646961
#define  MP4BOXTYPE_mdhd  0x6d646864
#define  MP4BOXTYPE_hdlr  0x68646c72
#define  MP4BOXTYPE_minf  0x6d696e66
#define  MP4BOXTYPE_vmhd  0x766d6864
#define  MP4BOXTYPE_smhd  0x736d6864
#define  MP4BOXTYPE_dinf  0x64696e66
#define  MP4BOXTYPE_dref  0x64726566
#define  MP4BOXTYPE_stbl  0x7374626c
#define  MP4BOXTYPE_stsd  0x73747364
#define  MP4BOXTYPE_stts  0x73747473
#define  MP4BOXTYPE_stss  0x73747373
#define  MP4BOXTYPE_ctts  0x63747473
#define  MP4BOXTYPE_stsc  0x73747363
#define  MP4BOXTYPE_stsz  0x7374737a
#define  MP4BOXTYPE_stco  0x7374636f
#define  MP4BOXTYPE_co64  0x636f3634
#define  MP4BOXTYPE_sgpd  0x73677064
#define  MP4BOXTYPE_sbgp  0x73626770
#define  MP4BOXTYPE_sdtp  0x73647470
#define  MP4BOXTYPE_udta  0x75647461
#define  MP4BOXTYPE_mdat  0x6d646174
#define  MP4BOXTYPE_free  0x66726565
#define  MP4BOXTYPE_uuid  0x75756964

static int64_t getFileSize(FILE *fp){
    _fseeki64(fp, 0, SEEK_END);
    return _ftelli64(fp);
}

static uint32_t readU32BE(FILE *fp, int64_t offset){
    uint8_t buffer[8];
    if(offset >= 0){
        _fseeki64(fp, offset, SEEK_SET);
    }
    fread(buffer, 4, 1, fp);
    uint32_t value = buffer[0];
    value = (value << 8) | buffer[1];
    value = (value << 8) | buffer[2];
    value = (value << 8) | buffer[3];
    return value;
}

static int64_t readI64BE(FILE *fp, int64_t offset){
    uint8_t buffer[16];
    if(offset >= 0){
        _fseeki64(fp, offset, SEEK_SET);
    }
    fread(buffer, 8, 1, fp);
    int64_t value = buffer[0];
    value = (value << 8) | buffer[1];
    value = (value << 8) | buffer[2];
    value = (value << 8) | buffer[3];
    value = (value << 8) | buffer[4];
    value = (value << 8) | buffer[5];
    value = (value << 8) | buffer[6];
    value = (value << 8) | buffer[7];
    return value;
}

////////////////////////////////////////////////////////////////////////////////////////////////////

static void readBox_ftyp(FILE *fp, int64_t fileStart, int64_t fileEnd, uint8_t depth){
    
}

static void readBox_mvhd(FILE *fp, int64_t fileStart, int64_t fileEnd, uint8_t depth){
    
}

static void readBox_iods(FILE *fp, int64_t fileStart, int64_t fileEnd, uint8_t depth){
    
}

static void readBox_tkhd(FILE *fp, int64_t fileStart, int64_t fileEnd, uint8_t depth){
    
}

static void readBox_udta(FILE *fp, int64_t fileStart, int64_t fileEnd, uint8_t depth){
    
}

static void readBox_elst(FILE *fp, int64_t fileStart, int64_t fileEnd, uint8_t depth){
    
}

static void readBox_mdhd(FILE *fp, int64_t fileStart, int64_t fileEnd, uint8_t depth){
    
}

static void readBox_hdlr(FILE *fp, int64_t fileStart, int64_t fileEnd, uint8_t depth){
    
}

static void readBox_vmhd(FILE *fp, int64_t fileStart, int64_t fileEnd, uint8_t depth){
    
}

static void readBox_smhd(FILE *fp, int64_t fileStart, int64_t fileEnd, uint8_t depth){
    
}

static void readBox_dref(FILE *fp, int64_t fileStart, int64_t fileEnd, uint8_t depth){
    
}

static void readBox_stsd(FILE *fp, int64_t fileStart, int64_t fileEnd, uint8_t depth){
    
}

static void readBox_stts(FILE *fp, int64_t fileStart, int64_t fileEnd, uint8_t depth){
    
}

static void readBox_stss(FILE *fp, int64_t fileStart, int64_t fileEnd, uint8_t depth){
    
}

static void readBox_ctts(FILE *fp, int64_t fileStart, int64_t fileEnd, uint8_t depth){
    
}

static void readBox_stsc(FILE *fp, int64_t fileStart, int64_t fileEnd, uint8_t depth){
    
}

static void readBox_stsz(FILE *fp, int64_t fileStart, int64_t fileEnd, uint8_t depth){
    
}

static void readBox_stco(FILE *fp, int64_t fileStart, int64_t fileEnd, uint8_t depth){
    
}

static void readBox_co64(FILE *fp, int64_t fileStart, int64_t fileEnd, uint8_t depth){
    
}

static void readBox_sgpd(FILE *fp, int64_t fileStart, int64_t fileEnd, uint8_t depth){
    
}

static void readBox_sbgp(FILE *fp, int64_t fileStart, int64_t fileEnd, uint8_t depth){
    
}

static void readBox_sdtp(FILE *fp, int64_t fileStart, int64_t fileEnd, uint8_t depth){
    
}

static void readBox_uuid(FILE *fp, int64_t fileStart, int64_t fileEnd, uint8_t depth){
    
}

////////////////////////////////////////////////////////////////////////////////////////////////////

static void typeToStr(char *buffer, uint32_t type){
    char *p = (char*)&type;
    buffer[0] = p[3];
    buffer[1] = p[2];
    buffer[2] = p[1];
    buffer[3] = p[0];
    buffer[4] = 0;
}

static void printMp4Struct(int64_t fileStart, int64_t bodySize, uint32_t headSize, char *typeStr, uint8_t depth){
    printf("%12lld ", fileStart);
    while(depth--) printf("-----");
    printf(" %s (%u+%lld)\n", typeStr, headSize, bodySize);
}

static void readBox_xxxx(FILE *fp, int64_t fileStart, int64_t fileEnd, uint8_t depth){
    int64_t  size;
    uint32_t type;
    uint32_t head;
    char typeStr[8];
    while(fileStart < fileEnd){
        size = readU32BE(fp, fileStart+0);
        type = readU32BE(fp, fileStart+4);
        if(size >= 8){
            head = 8;
        }else if(size == 1){
            size = readI64BE(fp, fileStart+8);
            head = 16;
        }else{
            break;
        }
        if(type == MP4BOXTYPE_uuid){
            head += 16;
        }
        
        typeToStr(typeStr, type);
        printMp4Struct(fileStart, size-head, head, typeStr, depth);
        
        switch(type){
            case MP4BOXTYPE_ftyp:
            readBox_ftyp(fp, fileStart+head, fileStart+size, depth+1);
            break;
            
            case MP4BOXTYPE_moov:
            readBox_xxxx(fp, fileStart+head, fileStart+size, depth+1);
            break;
            
            case MP4BOXTYPE_mvhd:
            readBox_mvhd(fp, fileStart+head, fileStart+size, depth+1);
            break;
            
            case MP4BOXTYPE_iods:
            readBox_iods(fp, fileStart+head, fileStart+size, depth+1);
            break;
            
            case MP4BOXTYPE_trak:
            readBox_xxxx(fp, fileStart+head, fileStart+size, depth+1);
            break;
            
            case MP4BOXTYPE_tkhd:
            readBox_tkhd(fp, fileStart+head, fileStart+size, depth+1);
            break;
            
            case MP4BOXTYPE_edts:
            readBox_xxxx(fp, fileStart+head, fileStart+size, depth+1);
            break;
            
            case MP4BOXTYPE_elst:
            readBox_elst(fp, fileStart+head, fileStart+size, depth+1);
            break;
            
            case MP4BOXTYPE_mdia:
            readBox_xxxx(fp, fileStart+head, fileStart+size, depth+1);
            break;
            
            case MP4BOXTYPE_mdhd:
            readBox_mdhd(fp, fileStart+head, fileStart+size, depth+1);
            break;
            
            case MP4BOXTYPE_hdlr:
            readBox_hdlr(fp, fileStart+head, fileStart+size, depth+1);
            break;
            
            case MP4BOXTYPE_minf:
            readBox_xxxx(fp, fileStart+head, fileStart+size, depth+1);
            break;
            
            case MP4BOXTYPE_vmhd:
            readBox_vmhd(fp, fileStart+head, fileStart+size, depth+1);
            break;
            
            case MP4BOXTYPE_smhd:
            readBox_smhd(fp, fileStart+head, fileStart+size, depth+1);
            break;
            
            case MP4BOXTYPE_dinf:
            readBox_xxxx(fp, fileStart+head, fileStart+size, depth+1);
            break;
            
            case MP4BOXTYPE_dref:
            readBox_dref(fp, fileStart+head, fileStart+size, depth+1);
            break;
            
            case MP4BOXTYPE_stbl:
            readBox_xxxx(fp, fileStart+head, fileStart+size, depth+1);
            break;
            
            case MP4BOXTYPE_stsd:
            readBox_stsd(fp, fileStart+head, fileStart+size, depth+1);
            break;
            
            case MP4BOXTYPE_stts:
            readBox_stts(fp, fileStart+head, fileStart+size, depth+1);
            break;
            
            case MP4BOXTYPE_stss:
            readBox_stss(fp, fileStart+head, fileStart+size, depth+1);
            break;
            
            case MP4BOXTYPE_ctts:
            readBox_ctts(fp, fileStart+head, fileStart+size, depth+1);
            break;
            
            case MP4BOXTYPE_stsc:
            readBox_stsc(fp, fileStart+head, fileStart+size, depth+1);
            break;
            
            case MP4BOXTYPE_stsz:
            readBox_stsz(fp, fileStart+head, fileStart+size, depth+1);
            break;
            
            case MP4BOXTYPE_stco:
            readBox_stco(fp, fileStart+head, fileStart+size, depth+1);
            break;
            
            case MP4BOXTYPE_co64:
            readBox_co64(fp, fileStart+head, fileStart+size, depth+1);
            break;
            
            case MP4BOXTYPE_sgpd:
            readBox_sgpd(fp, fileStart+head, fileStart+size, depth+1);
            break;
            
            case MP4BOXTYPE_sbgp:
            readBox_sbgp(fp, fileStart+head, fileStart+size, depth+1);
            break;
            
            case MP4BOXTYPE_sdtp:
            readBox_sdtp(fp, fileStart+head, fileStart+size, depth+1);
            break;
                    
            case MP4BOXTYPE_udta:
            readBox_udta(fp, fileStart+head, fileStart+size, depth+1);
            break;
            
            case MP4BOXTYPE_uuid:
            readBox_uuid(fp, fileStart+head, fileStart+size, depth+1);
            break;
            
            case MP4BOXTYPE_mdat:
            case MP4BOXTYPE_free:
            break;
            
            default:
            printf("#define  MP4BOXTYPE_%s  0x%08x\n", typeStr, type);
            break;
        }
        
        fileStart += size;
    }
}

int main(int argc, char *argv[]){
    if(argc < 2) return 0;
    FILE *fp = fopen(argv[1], "rb");
    if(fp == NULL) return -1;
    int64_t fileSize = getFileSize(fp);
    if(fileSize <= 8){
        fclose(fp);
        return -2;
    }
    readBox_xxxx(fp, 0, fileSize, 1);
    fclose(fp);
    return 0;
}

运行效果如下图:


相关文章

网友评论

      本文标题:MP4文件格式

      本文链接:https://www.haomeiwen.com/subject/ntcebktx.html