这一篇文章主要介绍dict的主要操作函数,代码详见redis的代码src/dict.c源文件,主要涉及dict的创建,查找,增加,替换,删除等基本操作。
dict的创建(dictCreate)
dictCreate为dict的数据结构分配空间并为各个变量赋初值。其中两个哈希表ht[0]和ht[1]起始都没有分配空间,table指针都赋为NULL
/* Reset a hash table already initialized with ht_init().
* NOTE: This function should only be called by ht_destroy(). */
static void _dictReset(dictht *ht)
{
ht->table = NULL;
ht->size = 0;
ht->sizemask = 0;
ht->used = 0;
}
/* Create a new hash table */
dict *dictCreate(dictType *type,
void *privDataPtr)
{
dict *d = zmalloc(sizeof(*d));
_dictInit(d,type,privDataPtr);
return d;
}
/* Initialize the hash table */
int _dictInit(dict *d, dictType *type,
void *privDataPtr)
{
_dictReset(&d->ht[0]);
_dictReset(&d->ht[1]);
d->type = type;
d->privdata = privDataPtr;
d->rehashidx = -1;
d->iterators = 0;
return DICT_OK;
}
dict的查找(dictFind)
- 1,dict是否为空
- 2,dict是否在进行重哈希,如果在进行,则向前推进一步
- 3,计算key的hash值h
- 4,先在dict的ht[0]:根据hash值h和ht[0]的sizemask计算索引值,根据索引值获取dictEntry链表的指针,根据指针在链表上查找是否存在为key的entry,如果有就返回,
- 5,第4步如果dict的ht[0]没有找到,判断当前是否在重哈希,如果没有返回空,否则在dict的ht[1]上进行同样的查找步骤
dictEntry *dictFind(dict *d, const void *key)
{
dictEntry *he;
unsigned int h, idx, table;
if (d->ht[0].used + d->ht[1].used == 0) return NULL; /* dict is empty */
if (dictIsRehashing(d)) _dictRehashStep(d);
h = dictHashKey(d, key);
for (table = 0; table <= 1; table++) {
idx = h & d->ht[table].sizemask;
he = d->ht[table].table[idx];
while(he) {
if (key==he->key || dictCompareKeys(d, key, he->key))
return he;
he = he->next;
}
if (!dictIsRehashing(d)) return NULL;
}
return NULL;
}
dict的添加(dictAdd)
- 1,包含两个过程,首先根据key在dict中找到dictEntry(dictAddRaw),然后在这个entry插入value(dictSetVal)
- 2,寻找entry的过程中,如果发现dict在重哈希的过程中,则向前推进一步
- 3,获取entry在dict的索引调用_dictKeyIndex函数,如果不在重哈希过程中,它只查找ht[0];否则查找ht[0]和ht[1],如果dict已经存在这个key,则会返回-1,进而dictAddRaw函数会返回NULL
- 4,如果dict在进行重哈希,则插入到dict的ht[1]中,否则插入到ht[0]中
- 5,插入过程会插入到entry链表的第一个节点(新数据再次访问概率比较高),used数目会更新,调用宏定义函数dictSetKey在entry中设置key
- 6,_dictKeyIndex可能触发dict内存扩展(_dictExpandIfNeeded,它将哈希表长度扩展为原来两倍)
/* Add an element to the target hash table */
int dictAdd(dict *d, void *key, void *val)
{
dictEntry *entry = dictAddRaw(d,key,NULL);
if (!entry) return DICT_ERR;
dictSetVal(d, entry, val);
return DICT_OK;
}
dictEntry *dictAddRaw(dict *d, void *key, dictEntry **existing)
{
int index;
dictEntry *entry;
dictht *ht;
if (dictIsRehashing(d)) _dictRehashStep(d);
/* Get the index of the new element, or -1 if
* the element already exists. */
if ((index = _dictKeyIndex(d, key, dictHashKey(d,key), existing)) == -1)
return NULL;
/* Allocate the memory and store the new entry.
* Insert the element in top, with the assumption that in a database
* system it is more likely that recently added entries are accessed
* more frequently. */
ht = dictIsRehashing(d) ? &d->ht[1] : &d->ht[0];
entry = zmalloc(sizeof(*entry));
entry->next = ht->table[index];
ht->table[index] = entry;
ht->used++;
/* Set the hash entry fields. */
dictSetKey(d, entry, key);
return entry;
}
static int _dictKeyIndex(dict *d, const void *key, unsigned int hash, dictEntry **existing)
{
unsigned int idx, table;
dictEntry *he;
if (existing) *existing = NULL;
/* Expand the hash table if needed */
if (_dictExpandIfNeeded(d) == DICT_ERR)
return -1;
for (table = 0; table <= 1; table++) {
idx = hash & d->ht[table].sizemask;
/* Search if this slot does not already contain the given key */
he = d->ht[table].table[idx];
while(he) {
if (key==he->key || dictCompareKeys(d, key, he->key)) {
if (existing) *existing = he;
return -1;
}
he = he->next;
}
if (!dictIsRehashing(d)) break;
}
return idx;
}
dict的删除(dictDelete)
dictDelete会进一步调用dictGenericDelete函数,该函数多了一个标志位nofree,以区分是否释放需要删除的entry,这里标记为0,代表需要释放。
dictUnlink也会调用dictGenericDelet,这里nofree的标志位设置为1,此时不释放entry,这个entry会被返回,为什么如此设计呢?如下作者解释的很清楚:如果需要使用查找到的entry再删除,分别调用
dictFind和dictDelete函数,后者也会进行一次查找,整个包含两次查找过程,而调用dictUnlink和dictFreeUnlinkedEntry则只有一次查找过程。
- This function is useful when we want to remove something from the hash
- table but want to use its value before actually deleting the entry.
- Without this function the pattern would require two lookups:
- entry = dictFind(...);
- // Do something with entry
- dictDelete(dictionary,entry);
- Thanks to this function it is possible to avoid this, and use
- instead:
- entry = dictUnlink(dictionary,entry);
- // Do something with entry
- dictFreeUnlinkedEntry(entry); // <- This does not need to lookup again.
删除过程会触发推进一步重哈希(_dictRehashStep)
如果当前不在重哈希过程中,它只在ht[0]中查找要删除的key;否则ht[0]和ht[1]它都要查找。
删除过程中,由于需要释放entry,会通过Free函数调用key和value的析构函数(keyDestructor和valDestructor)。
int dictDelete(dict *ht, const void *key) {
return dictGenericDelete(ht,key,0) ? DICT_OK : DICT_ERR;
}
static dictEntry *dictGenericDelete(dict *d, const void *key, int nofree) {
unsigned int h, idx;
dictEntry *he, *prevHe;
int table;
if (d->ht[0].used == 0 && d->ht[1].used == 0) return NULL;
if (dictIsRehashing(d)) _dictRehashStep(d);
h = dictHashKey(d, key);
for (table = 0; table <= 1; table++) {
idx = h & d->ht[table].sizemask;
he = d->ht[table].table[idx];
prevHe = NULL;
while(he) {
if (key==he->key || dictCompareKeys(d, key, he->key)) {
/* Unlink the element from the list */
if (prevHe)
prevHe->next = he->next;
else
d->ht[table].table[idx] = he->next;
if (!nofree) {
dictFreeKey(d, he);
dictFreeVal(d, he);
zfree(he);
}
d->ht[table].used--;
return he;
}
prevHe = he;
he = he->next;
}
if (!dictIsRehashing(d)) break;
}
return NULL; /* not found */
}
dictEntry *dictUnlink(dict *ht, const void *key) {
return dictGenericDelete(ht,key,1);
}
参考:
http://zhangtielei.com/posts/blog-redis-dict.html
《redis设计与实现》第二版
网友评论