LevelDB varint 变长编码

作者: wayyyy | 来源:发表于2021-04-06 01:21 被阅读0次

LevelDB varint 变长编码
LevelDb(Google ProtoBuf) Varint
可变长度整数的编码
var int 编解码
整数的Varint编码
Protobuf编码原理
protobuf 编码之varint/zigzag
跟我一起学Python（二）
protobuf
UTF-8编码方式

定义在coding.h 文件中。

固定长度编码

void PutFixed32(std::string *dst, uint32_t value)
{
    char buf[sizeof(value)];
    EncodeFixed32(buf, value);
    dst->append(buf, sizeof(buf));
}

inline void EncodeFixed32(char *dst, uint32_t value)
{
    uint8_t *const buffer = reinterpret_cast<uint8_t *>(dst);

    // Recent clang and gcc optimize this to a single mov / str instruction.
    buffer[0] = static_cast<uint8_t>(value);
    buffer[1] = static_cast<uint8_t>(value >> 8);
    buffer[2] = static_cast<uint8_t>(value >> 16);
    buffer[3] = static_cast<uint8_t>(value >> 24);
}

inline uint32_t DecodeFixed32(const char *ptr)
{
    const uint8_t *const buffer = reinterpret_cast<const uint8_t *>(ptr);

    // Recent clang and gcc optimize this to a single mov / ldr instruction.
    return (static_cast<uint32_t>(buffer[0])) |
           (static_cast<uint32_t>(buffer[1]) << 8) |
           (static_cast<uint32_t>(buffer[2]) << 16) |
           (static_cast<uint32_t>(buffer[3]) << 24);
}

注意到有注释说指令优化，写了简单代码，测试了下，的确如此。

变长编码

变长编码的本质是为了在存储时节约空间，试想一下，一个uint32_t 占4字节，其实，当我数值为7的时候，用1字节存储就可以了，没必要用4字节存储。

void PutVarint32(std::string *dst, uint32_t v)
{
    char buf[5];
    char *ptr = EncodeVarint32(buf, v);
    dst->append(buf, ptr - buf);
}

char *EncodeVarint32(char *dst, uint32_t v)
{
    // Operate on characters as unsigneds
    uint8_t *ptr = reinterpret_cast<uint8_t *>(dst);
    static const int B = 128;
    if (v < (1 << 7)) // 2^7
    {
        *(ptr++) = v;
    }
    else if (v < (1 << 14)) // 2^14
    {
        *(ptr++) = v | B;
        *(ptr++) = v >> 7;
    }
    else if (v < (1 << 21)) // 2^21
    {
        *(ptr++) = v | B;
        *(ptr++) = (v >> 7) | B;
        *(ptr++) = v >> 14;
    }
    else if (v < (1 << 28)) // 2^28
    {
        *(ptr++) = v | B;
        *(ptr++) = (v >> 7) | B;
        *(ptr++) = (v >> 14) | B;
        *(ptr++) = v >> 21;
    }
    else
    {
        *(ptr++) = v | B;
        *(ptr++) = (v >> 7) | B;
        *(ptr++) = (v >> 14) | B;
        *(ptr++) = (v >> 21) | B;
        *(ptr++) = v >> 28;
    }
    return reinterpret_cast<char *>(ptr);
}

EncodeVariant32是以7位为单位去存储的，每个字节最高位为1代表后续的byte也是该数字的一部分，如果该位为0，则代表结束。
因1字节最多能表达的大小为127，大于 127的数字，比如128，会用两个字节来表示，从低位到高位：1000 0000 0000 0001

一般情况下，uint32需要4个字节，而Variant算法，每个字节只存储7位，uint32较大的数字需要5个字节，但是根据统计来说，数字较小的概率更大，所以可以用Variant算法来节省存储空间。再来看下解码函数

bool GetVarint32(Slice *input, uint32_t *value)
{
        const char *p = input->data();
        const char *limit = p + input->size();
        const char *q = GetVarint32Ptr(p, limit, value);
        if (q == nullptr)
        {
            return false;
        }
        else
        {
            *input = Slice(q, limit - q);
            return true;
  }
}

inline const char *GetVarint32Ptr(const char *p, const char *limit, uint32_t *value)
{
    if (p < limit)
    {
        uint32_t result = *(reinterpret_cast<const uint8_t *>(p));
        if ((result & 128) == 0)
        {
            *value = result;
            return p + 1;
        }
    }
    return GetVarint32PtrFallback(p, limit, value);
}

const char *GetVarint32PtrFallback(const char *p, const char *limit, uint32_t *value)
{
    uint32_t result = 0;
    for (uint32_t shift = 0; shift <= 28 && p < limit; shift += 7)
    {
        uint32_t byte = *(reinterpret_cast<const uint8_t *>(p));
        p++;
        if (byte & 128)
        {
            // More bytes are present
            result |= ((byte & 127) << shift);
        }
        else
        {
            result |= (byte << shift);
            *value = result;
            return reinterpret_cast<const char *>(p);
        }
    }
    return nullptr;
}

int VarintLength(uint64_t v)
{
    int len = 1;
    while (v >= 128)
    {
        v >>= 7;
        len++;
    }
    return len;
}

网友评论

本文标题：LevelDB varint 变长编码

本文链接：https://www.haomeiwen.com/subject/yfkmektx.html

延伸阅读

深度阅读

您也可以注册成为美文阅读网的作者，发表您的原创作品、分享您的心情！

LevelDB varint 变长编码

固定长度编码

变长编码

相关文章