字典,又称为符号表(symbol table)、关联数组(associative array)或映射(map),是一种用于保存键值对的抽象数据结构。
同样,C语言没有预置这种数据结构,Redis构建了自己的实现。
hash表的定义
Redis的字典使用hash表作为底层实现,hash表实现如下:
// hash表节点
typedef struct dictEntry {
void *key;
union {
void *val;
uint64_t u64;
int64_t s64;
double d;
} v;
// 指向下个hash表节点
struct dictEntry *next;
} dictEntry;
/* This is our hash table structure. Every dictionary has two of this as we
* implement incremental rehashing, for the old to the new table. */
typedef struct dictht {
// 链表数组
dictEntry **table;
// hash表大小,每个链表只有一个节点
unsigned long size;
// hash表掩码大小,用于计算索引值
// 总是等于size-1
unsigned long sizemask;
// hash表已有节点数量
unsigned long used;
} dictht;
默认hash表的size为4。
/* This is the initial size of every hash table */
#define DICT_HT_INITIAL_SIZE 4
字典的定义
字典的实现如下:
typedef struct dictType {
uint64_t (*hashFunction)(const void *key);
void *(*keyDup)(void *privdata, const void *key);
void *(*valDup)(void *privdata, const void *obj);
int (*keyCompare)(void *privdata, const void *key1, const void *key2);
void (*keyDestructor)(void *privdata, void *key);
void (*valDestructor)(void *privdata, void *obj);
int (*expandAllowed)(size_t moreMem, double usedRatio);
} dictType;
typedef struct dict {
// 类型特定函数
dictType *type;
// 私有数据
void *privdata;
// hash表
dictht ht[2];
// rehash索引,记录rehash的进度,rehash不在进行时,值为-1
long rehashidx; /* rehashing not in progress if rehashidx == -1 */
// rehash pause的标志
int16_t pauserehash; /* If >0 rehashing is paused (<0 indicates coding error) */
} dict;
dict.type:字典支持多态,type就是具体类型所对应的一系列特定函数的指针;
dict.privdata:多态时需要传给特性函数的可选参数;
dict.ht[2]:主备hash表,正常时用ht[0],执行rehash操作时才会使用ht[1];
字典的结构图
hash算法
添加新的键值对时,需要先执行以下操作计算hash值和索引值
hash = dict->type->hashFunction(key);
index = hash & dict->ht[x].size_mask;
目前默认的hash算法是SipHash算法,它能使输出随机化,减少hash冲突。算法的实现在siphash.c中。
出现冲突时,Redis的hash表使用链地址法来解决冲突。因为dictht.table没有指向链表尾的指针,为了性能考虑,插入新的键值对采用头部插入的方式。
rehash
随着操作的不断进行,hash表保存的键值对会不断增加或者减少,为了让hash表的负载因子(load factor)维持在一个合理的范围i内,当hash表的容量与保存的数据偏差过大时,需要使用rehash对hash表进行扩容或缩容。
rehash的操作顺序:
/* Performs N steps of incremental rehashing. Returns 1 if there are still
* keys to move from the old to the new hash table, otherwise 0 is returned.
*
* Note that a rehashing step consists in moving a bucket (that may have more
* than one key as we use chaining) from the old to the new hash table, however
* since part of the hash table may be composed of empty spaces, it is not
* guaranteed that this function will rehash even a single bucket, since it
* will visit at max N*10 empty buckets in total, otherwise the amount of
* work it does would be unbound and the function may block for a long time. */
int dictRehash(dict *d, int n) {
int empty_visits = n*10; /* Max number of empty buckets to visit. */
// 检查是否已在进行rehash操作
if (!dictIsRehashing(d)) return 0;
while(n-- && d->ht[0].used != 0) {
dictEntry *de, *nextde;
/* Note that rehashidx can't overflow as we are sure there are more
* elements because ht[0].used != 0 */
assert(d->ht[0].size > (unsigned long)d->rehashidx);
// 根据rehashidx找到第一个非空链表
while(d->ht[0].table[d->rehashidx] == NULL) {
d->rehashidx++;
if (--empty_visits == 0) return 1;
}
de = d->ht[0].table[d->rehashidx];
/* Move all the keys in this bucket from the old to the new hash HT */
while(de) {
uint64_t h;
nextde = de->next;
/* Get the index in the new hash table */
h = dictHashKey(d, de->key) & d->ht[1].sizemask;
de->next = d->ht[1].table[h];
d->ht[1].table[h] = de;
d->ht[0].used--;
d->ht[1].used++;
de = nextde;
}
d->ht[0].table[d->rehashidx] = NULL;
d->rehashidx++;
}
/* Check if we already rehashed the whole table... */
if (d->ht[0].used == 0) {
zfree(d->ht[0].table);
d->ht[0] = d->ht[1];
_dictReset(&d->ht[1]);
d->rehashidx = -1;
return 0;
}
/* More to rehash... */
return 1;
}
/* Rehash in ms+"delta" milliseconds. The value of "delta" is larger
* than 0, and is smaller than 1 in most cases. The exact upper bound
* depends on the running time of dictRehash(d,100).*/
int dictRehashMilliseconds(dict *d, int ms) {
if (d->pauserehash > 0) return 0;
long long start = timeInMilliseconds();
int rehashes = 0;
while(dictRehash(d,100)) {
rehashes += 100;
if (timeInMilliseconds()-start > ms) break;
}
return rehashes;
}
-
字典在执行扩缩容操作时会提前为ht[1]hash表分配空间,这个表的大小根据采取的操作及当前数据数量来定
- 如果执行的是扩容操作,ht[1].size = ht[0].used+1向上取2的幂;
- 如果执行的是缩容操作,ht[1].size = ht[0].used向上取2的幂;
-
检查当前是否正在进行rehash操作,如果已经在进行了,则返回0;
-
根据字典的rehashidx找到第一个非空的链表节点,从这里可以看出,rehase的过程是按照从0号链表开始一直到size-1号链表这种顺序进行的;
-
将ht[0]中的键值对重新计算hash值和索引值,然后移动到ht[1]上;
-
将ht[1]设置为ht[0]并在ht[1]上重新创建一个空白的hash表;
字典的扩容条件:
/* Using dictEnableResize() / dictDisableResize() we make possible to
* enable/disable resizing of the hash table as needed. This is very important
* for Redis, as we use copy-on-write and don't want to move too much memory
* around when there is a child performing saving operations.
*
* Note that even when dict_can_resize is set to 0, not all resizes are
* prevented: a hash table is still allowed to grow if the ratio between
* the number of elements and the buckets > dict_force_resize_ratio. */
static int dict_can_resize = 1;
/* Expand the hash table if needed */
static int _dictExpandIfNeeded(dict *d)
{
/* Incremental rehashing already in progress. Return. */
if (dictIsRehashing(d)) return DICT_OK;
/* If the hash table is empty expand it to the initial size. */
if (d->ht[0].size == 0) return dictExpand(d, DICT_HT_INITIAL_SIZE);
/* If we reached the 1:1 ratio, and we are allowed to resize the hash
* table (global setting) or we should avoid it but the ratio between
* elements/buckets is over the "safe" threshold, we resize doubling
* the number of buckets. */
if (d->ht[0].used >= d->ht[0].size &&
(dict_can_resize ||
d->ht[0].used/d->ht[0].size > dict_force_resize_ratio) &&
dictTypeExpandAllowed(d))
{
return dictExpand(d, d->ht[0].used + 1);
}
return DICT_OK;
}
以下条件取交集:
- 负载因子大于等于1;
- 字典可以resize或者负载因子大于5(原因可见dict_can_resize上面的注释);
- 有足够的内存可以申请;
字典的缩容条件:
int htNeedsResize(dict *dict) {
long long size, used;
size = dictSlots(dict);
used = dictSize(dict);
return (size > DICT_HT_INITIAL_SIZE &&
(used*100/size < HASHTABLE_MIN_FILL));
}C
其实就是非空且负载因子小于0.1.
渐进式rehash
因为hash表的数据量非常庞大,一次性rehash完是不现实的,所以Redis采用了渐进式rehash来保证服务可用。
详细来说就是在rehash阶段启用两个hash表(删、改、查操作都会去遍历两个表),从0号bucket开始逐渐rehash到ht[1],rehashidx就记录了当前rehash的进度。每次新增键值对都会添加到ht[1]中去,避免ht[0]中新增键值对,最后逐步变成一个空表。
网友评论