对于实现数据结构来说,Redis 就给我们提供了两个优秀的设计思想:一个是使用连续的内存空间,避免内存碎片开销;二个是针对不同长度的数据,采用不同大小的元数据,以避免使用统一大小的元数据,造成内存空间的浪费。
在数据访问方面,你也要知道,使用共享对象其实可以避免重复创建冗余的数据,从而也可以有效地节省内存空间。不过,共享对象主要适用于只读场景,如果一个字符串被反复地修改,就无法被多个请求共享访问了。
SDS 内存优化设计
SDS 设计了不同类型的结构头,包括 sdshdr8、sdshdr16、sdshdr32 和 sdshdr64。这些不同类型的结构头可以适配不同大小的字符串,从而避免了内存浪费。不过,SDS 除了使用精巧设计的结构头外,在保存较小字符串时,其实还使用了嵌入式字符串的设计方法。这种方法避免了给字符串分配额外的空间,而是可以让字符串直接保存在 Redis 的基本数据对象结构体中。
要理解嵌入式字符串的设计与实现,先了解下 Redis 基本数据对象结构体 redisObject:
#define LRU_BITS 24
typedef struct redisObject {
// redisObject 的数据类型,是应用程序在 Redis 中保存的数据类型,包括 String、List、Hash 等
unsigned type:4;
// redisObject 的编码类型,是 Redis 内部实现各种数据类型所用的数据结构
unsigned encoding:4;
// redisObject 的 LRU 时间
unsigned lru:LRU_BITS; /* LRU time (relative to global lru_clock) or
* LFU data (least significant 8 bits frequency
* and most significant 16 bits access time). */
// redisObject 的引用计数
int refcount;
// 指向值的指针
void *ptr;
} robj;
变量后使用冒号和数值的定义方法。这实际上是 C 语言中的位域定义方法,可以用来有效地节省内存开销。
嵌入式字符串
/* Create a string object with EMBSTR encoding if it is smaller than
* OBJ_ENCODING_EMBSTR_SIZE_LIMIT, otherwise the RAW encoding is
* used.
*
* The current limit of 44 is chosen so that the biggest string object
* we allocate as EMBSTR will still fit into the 64 byte arena of jemalloc. */
#define OBJ_ENCODING_EMBSTR_SIZE_LIMIT 44
robj *createStringObject(const char *ptr, size_t len) {
if (len <= OBJ_ENCODING_EMBSTR_SIZE_LIMIT)
return createEmbeddedStringObject(ptr,len);
else
return createRawStringObject(ptr,len);
}
/* The actual Redis Object */
#define OBJ_STRING 0 /* String object. */
#define OBJ_LIST 1 /* List object. */
#define OBJ_SET 2 /* Set object. */
#define OBJ_ZSET 3 /* Sorted set object. */
#define OBJ_HASH 4 /* Hash object. */
/* Create a string object with encoding OBJ_ENCODING_RAW, that is a plain
* string object where o->ptr points to a proper sds string. */
robj *createRawStringObject(const char *ptr, size_t len) {
return createObject(OBJ_STRING, sdsnewlen(ptr,len));
}
/* ===================== Creation and parsing of objects ==================== */
robj *createObject(int type, void *ptr) {
robj *o = zmalloc(sizeof(*o));
o->type = type;
o->encoding = OBJ_ENCODING_RAW;
o->ptr = ptr;
o->refcount = 1;
/* Set the LRU to the current lruclock (minutes resolution), or
* alternatively the LFU counter. */
if (server.maxmemory_policy & MAXMEMORY_FLAG_LFU) {
o->lru = (LFUGetTimeInMinutes()<<8) | LFU_INIT_VAL;
} else {
o->lru = LRU_CLOCK();
}
return o;
}
Redis 会通过设计实现一块连续的内存空间,把 redisObject 结构体和 SDS 结构体紧凑地放置在一起。这样一来,对于不超过 44 字节的字符串来说,就可以避免内存碎片和两次内存分配的开销了
/* Create a string object with encoding OBJ_ENCODING_EMBSTR, that is
* an object where the sds string is actually an unmodifiable string
* allocated in the same chunk as the object itself. */
robj *createEmbeddedStringObject(const char *ptr, size_t len) {
robj *o = zmalloc(sizeof(robj)+sizeof(struct sdshdr8)+len+1);
struct sdshdr8 *sh = (void*)(o+1);
o->type = OBJ_STRING;
o->encoding = OBJ_ENCODING_EMBSTR;
o->ptr = sh+1;
o->refcount = 1;
if (server.maxmemory_policy & MAXMEMORY_FLAG_LFU) {
o->lru = (LFUGetTimeInMinutes()<<8) | LFU_INIT_VAL;
} else {
o->lru = LRU_CLOCK();
}
sh->len = len;
sh->alloc = len;
sh->flags = SDS_TYPE_8;
if (ptr == SDS_NOINIT)
sh->buf[len] = '\0';
else if (ptr) {
memcpy(sh->buf,ptr,len);
sh->buf[len] = '\0';
} else {
memset(sh->buf,0,len+1);
}
return o;
}
ziplist 内存优化设计
![](https://img.haomeiwen.com/i2132763/f8c3047a1633a42e.png)
/* The size of a ziplist header: two 32 bit integers for the total
* bytes count and last item offset. One 16 bit integer for the number
* of items field. */
#define ZIPLIST_HEADER_SIZE (sizeof(uint32_t)*2+sizeof(uint16_t))
/* Size of the "end of ziplist" entry. Just one byte. */
#define ZIPLIST_END_SIZE (sizeof(uint8_t))
/* Create a new empty ziplist. */
unsigned char *ziplistNew(void) {
unsigned int bytes = ZIPLIST_HEADER_SIZE+ZIPLIST_END_SIZE;
unsigned char *zl = zmalloc(bytes);
ZIPLIST_BYTES(zl) = intrev32ifbe(bytes);
ZIPLIST_TAIL_OFFSET(zl) = intrev32ifbe(ZIPLIST_HEADER_SIZE);
ZIPLIST_LENGTH(zl) = 0;
zl[bytes-1] = ZIP_END;
return zl;
}
/* We use this function to receive information about a ziplist entry.
* Note that this is not how the data is actually encoded, is just what we
* get filled by a function in order to operate more easily. */
typedef struct zlentry {
unsigned int prevrawlensize; /* Bytes used to encode the previous entry len*/
unsigned int prevrawlen; /* Previous entry len. */
unsigned int lensize; /* Bytes used to encode this entry type/len.
For example strings have a 1, 2 or 5 bytes
header. Integers always use a single byte.*/
unsigned int len; /* Bytes used to represent the actual entry.
For strings this is just the string length
while for integers it is 1, 2, 3, 4, 8 or
0 (for 4 bit immediate) depending on the
number range. */
unsigned int headersize; /* prevrawlensize + lensize. */
unsigned char encoding; /* Set to ZIP_STR_* or ZIP_INT_* depending on
the entry encoding. However for 4 bits
immediate integers this can assume a range
of values and must be range-checked. */
unsigned char *p; /* Pointer to the very start of the entry, that
is, this points to prev-entry-len field. */
} zlentry;
每个列表项中都会记录前一项的长度。因为每个列表项的长度不一样,所以如果使用相同的字节大小来记录 prevlen,就会造成内存空间浪费。
#define ZIP_BIG_PREVLEN 254
/* Encode the length of the previous entry and write it to "p". Return the
* number of bytes needed to encode this length if "p" is NULL. */
unsigned int zipStorePrevEntryLength(unsigned char *p, unsigned int len) {
if (p == NULL) {
return (len < ZIP_BIG_PREVLEN) ? 1 : sizeof(len)+1;
} else {
//判断prevlen的长度是否小于ZIP_BIG_PREVLEN,ZIP_BIG_PREVLEN等于254
if (len < ZIP_BIG_PREVLEN) {
//如果小于254字节,那么返回prevlen为1字节
p[0] = len;
return 1;
} else {
//否则,调用zipStorePrevEntryLengthLarge进行编码
return zipStorePrevEntryLengthLarge(p,len);
}
}
}
/* Encode the length of the previous entry and write it to "p". This only
* uses the larger encoding (required in __ziplistCascadeUpdate). */
int zipStorePrevEntryLengthLarge(unsigned char *p, unsigned int len) {
if (p != NULL) {
//将prevlen的第1字节设置为ZIP_BIG_PREVLEN,即254
p[0] = ZIP_BIG_PREVLEN;
//将前一个列表项的长度值拷贝至prevlen的第2至第5字节,其中sizeof(len)的值为4
memcpy(p+1,&len,sizeof(len));
memrev32ifbe(p+1);
}
//返回prevlen的大小,为5字节
return 1+sizeof(len);
}
ziplist 在 zipStoreEntryEncoding 函数中,针对整数和字符串,就分别使用了不同字节长度的编码结果。
/* Write the encoidng header of the entry in 'p'. If p is NULL it just returns
* the amount of bytes required to encode such a length. Arguments:
*
* 'encoding' is the encoding we are using for the entry. It could be
* ZIP_INT_* or ZIP_STR_* or between ZIP_INT_IMM_MIN and ZIP_INT_IMM_MAX
* for single-byte small immediate integers.
*
* 'rawlen' is only used for ZIP_STR_* encodings and is the length of the
* srting that this entry represents.
*
* The function returns the number of bytes used by the encoding/length
* header stored in 'p'. */
unsigned int zipStoreEntryEncoding(unsigned char *p, unsigned char encoding, unsigned int rawlen) {
//默认编码结果是1字节
unsigned char len = 1, buf[5];
if (ZIP_IS_STR(encoding)) {
/* Although encoding is given it may not be set for strings,
* so we determine it here using the raw length. */
//字符串长度小于等于63字节(16进制为0x3f)
if (rawlen <= 0x3f) {
if (!p) return len;
buf[0] = ZIP_STR_06B | rawlen;
//字符串长度小于等于16383字节(16进制为0x3fff)
} else if (rawlen <= 0x3fff) {
//编码结果是2字节
len += 1;
if (!p) return len;
buf[0] = ZIP_STR_14B | ((rawlen >> 8) & 0x3f);
buf[1] = rawlen & 0xff;
} else {
//编码结果是5字节
len += 4;
if (!p) return len;
buf[0] = ZIP_STR_32B;
buf[1] = (rawlen >> 24) & 0xff;
buf[2] = (rawlen >> 16) & 0xff;
buf[3] = (rawlen >> 8) & 0xff;
buf[4] = rawlen & 0xff;
}
} else {
/* Implies integer encoding, so length is always 1\. */
if (!p) return len;
buf[0] = encoding;
}
/* Store this length at p. */
memcpy(p,buf,len);
return len;
}
简而言之,针对不同长度的数据,使用不同大小的元数据信息(prevlen 和 encoding),这种方法可以有效地节省内存开销。
intset 内存优化设计
整数集合结构体中记录数据的部分,就是一个 int8_t 类型的整数数组 contents。从内存使用的角度来看,整数数组就是一块连续内存空间,所以这样就避免了内存碎片,并提升了内存使用效率。
typedef struct intset {
uint32_t encoding;
uint32_t length;
int8_t contents[];
} intset;
通过共享对象节省内存
Redis 就采用了共享对象的设计思想,把这些常用数据创建为共享对象,当上层应用需要访问它们时,直接读取就行。
#define OBJ_SHARED_INTEGERS 10000
void createSharedObjects(void) {
int j;
shared.crlf = createObject(OBJ_STRING,sdsnew("\r\n"));
//常见回复信息
shared.ok = createObject(OBJ_STRING,sdsnew("+OK\r\n"));
shared.err = createObject(OBJ_STRING,sdsnew("-ERR\r\n"));
...
//常见报错信息
shared.nokeyerr = createObject(OBJ_STRING,sdsnew(
"-ERR no such key\r\n"));
shared.syntaxerr = createObject(OBJ_STRING,sdsnew(
"-ERR syntax error\r\n"));
...
//0到9999的整数
for (j = 0; j < OBJ_SHARED_INTEGERS; j++) {
shared.integers[j] =
makeObjectShared(createObject(OBJ_STRING,(void*)(long)j));
shared.integers[j]->encoding = OBJ_ENCODING_INT;
}
for (j = 0; j < OBJ_SHARED_BULKHDR_LEN; j++) {
shared.mbulkhdr[j] = createObject(OBJ_STRING,
sdscatprintf(sdsempty(),"*%d\r\n",j));
shared.bulkhdr[j] = createObject(OBJ_STRING,
sdscatprintf(sdsempty(),"$%d\r\n",j));
}
/* The following two shared objects, minstring and maxstrings, are not
* actually used for their value but as a special object meaning
* respectively the minimum possible string and the maximum possible
* string in string comparisons for the ZRANGEBYLEX command. */
shared.minstring = sdsnew("minstring");
shared.maxstring = sdsnew("maxstring");
}
Q:SDS 判断是否使用嵌入式字符串的条件是 44 字节,你知道为什么是 44 字节吗?
A:Redis 规定嵌入式字符串最大以 64 字节存储,所以 N = 64 - 16(redisObject) - 3(sdshr8) - 1(\0), N = 44 字节。
![](https://img.haomeiwen.com/i2132763/8ec7af4c59018612.png)
![](https://img.haomeiwen.com/i2132763/4ee01c9e959ba6eb.png)
网友评论