定义在coding.h 文件中。
固定长度编码
void PutFixed32(std::string *dst, uint32_t value)
{
char buf[sizeof(value)];
EncodeFixed32(buf, value);
dst->append(buf, sizeof(buf));
}
inline void EncodeFixed32(char *dst, uint32_t value)
{
uint8_t *const buffer = reinterpret_cast<uint8_t *>(dst);
// Recent clang and gcc optimize this to a single mov / str instruction.
buffer[0] = static_cast<uint8_t>(value);
buffer[1] = static_cast<uint8_t>(value >> 8);
buffer[2] = static_cast<uint8_t>(value >> 16);
buffer[3] = static_cast<uint8_t>(value >> 24);
}
inline uint32_t DecodeFixed32(const char *ptr)
{
const uint8_t *const buffer = reinterpret_cast<const uint8_t *>(ptr);
// Recent clang and gcc optimize this to a single mov / ldr instruction.
return (static_cast<uint32_t>(buffer[0])) |
(static_cast<uint32_t>(buffer[1]) << 8) |
(static_cast<uint32_t>(buffer[2]) << 16) |
(static_cast<uint32_t>(buffer[3]) << 24);
}
注意到有注释说指令优化,写了简单代码,测试了下,的确如此。
变长编码
变长编码的本质是为了在存储时节约空间,试想一下,一个uint32_t 占4字节,其实,当我数值为7的时候,用1字节存储就可以了,没必要用4字节存储。
void PutVarint32(std::string *dst, uint32_t v)
{
char buf[5];
char *ptr = EncodeVarint32(buf, v);
dst->append(buf, ptr - buf);
}
char *EncodeVarint32(char *dst, uint32_t v)
{
// Operate on characters as unsigneds
uint8_t *ptr = reinterpret_cast<uint8_t *>(dst);
static const int B = 128;
if (v < (1 << 7)) // 2^7
{
*(ptr++) = v;
}
else if (v < (1 << 14)) // 2^14
{
*(ptr++) = v | B;
*(ptr++) = v >> 7;
}
else if (v < (1 << 21)) // 2^21
{
*(ptr++) = v | B;
*(ptr++) = (v >> 7) | B;
*(ptr++) = v >> 14;
}
else if (v < (1 << 28)) // 2^28
{
*(ptr++) = v | B;
*(ptr++) = (v >> 7) | B;
*(ptr++) = (v >> 14) | B;
*(ptr++) = v >> 21;
}
else
{
*(ptr++) = v | B;
*(ptr++) = (v >> 7) | B;
*(ptr++) = (v >> 14) | B;
*(ptr++) = (v >> 21) | B;
*(ptr++) = v >> 28;
}
return reinterpret_cast<char *>(ptr);
}
EncodeVariant32是以7位为单位去存储的,每个字节最高位为1代表后续的byte也是该数字的一部分,如果该位为0,则代表结束。
因1字节最多能表达的大小为127,大于 127的数字,比如128,会用两个字节来表示,从低位到高位:1000 0000 0000 0001
一般情况下,uint32需要4个字节,而Variant算法,每个字节只存储7位,uint32较大的数字需要5个字节,但是根据统计来说,数字较小的概率更大,所以可以用Variant算法来节省存储空间。再来看下解码函数
bool GetVarint32(Slice *input, uint32_t *value)
{
const char *p = input->data();
const char *limit = p + input->size();
const char *q = GetVarint32Ptr(p, limit, value);
if (q == nullptr)
{
return false;
}
else
{
*input = Slice(q, limit - q);
return true;
}
}
inline const char *GetVarint32Ptr(const char *p, const char *limit, uint32_t *value)
{
if (p < limit)
{
uint32_t result = *(reinterpret_cast<const uint8_t *>(p));
if ((result & 128) == 0)
{
*value = result;
return p + 1;
}
}
return GetVarint32PtrFallback(p, limit, value);
}
const char *GetVarint32PtrFallback(const char *p, const char *limit, uint32_t *value)
{
uint32_t result = 0;
for (uint32_t shift = 0; shift <= 28 && p < limit; shift += 7)
{
uint32_t byte = *(reinterpret_cast<const uint8_t *>(p));
p++;
if (byte & 128)
{
// More bytes are present
result |= ((byte & 127) << shift);
}
else
{
result |= (byte << shift);
*value = result;
return reinterpret_cast<const char *>(p);
}
}
return nullptr;
}
int VarintLength(uint64_t v)
{
int len = 1;
while (v >= 128)
{
v >>= 7;
len++;
}
return len;
}
网友评论