redis字典dict——Part2：基本操作

作者: fred290 | 来源:发表于2019-03-06 21:10 被阅读0次

redis字典dict——Part2：基本操作
Redis 源码分析(三) ：dict
工作中遇到的hashtable
python_字典集合
数据团Python_5. Python映射：字典dict
4.字典知识汇总
Redis数据结构--字典
Redis 字典(dict)
Redis 数据结构与内存管理策略（下）
redis字典源码简单分析

这一篇文章主要介绍dict的主要操作函数，代码详见redis的代码src/dict.c源文件，主要涉及dict的创建，查找，增加，替换，删除等基本操作。

dict的创建（dictCreate）

dictCreate为dict的数据结构分配空间并为各个变量赋初值。其中两个哈希表ht[0]和ht[1]起始都没有分配空间，table指针都赋为NULL

/* Reset a hash table already initialized with ht_init().
 * NOTE: This function should only be called by ht_destroy(). */
static void _dictReset(dictht *ht)
{
    ht->table = NULL;
    ht->size = 0;
    ht->sizemask = 0;
    ht->used = 0;
}

/* Create a new hash table */
dict *dictCreate(dictType *type,
        void *privDataPtr)
{
    dict *d = zmalloc(sizeof(*d));

    _dictInit(d,type,privDataPtr);
    return d;
}

/* Initialize the hash table */
int _dictInit(dict *d, dictType *type,
        void *privDataPtr)
{
    _dictReset(&d->ht[0]);
    _dictReset(&d->ht[1]);
    d->type = type;
    d->privdata = privDataPtr;
    d->rehashidx = -1;
    d->iterators = 0;
    return DICT_OK;
}

dict的查找（dictFind）

1，dict是否为空
2，dict是否在进行重哈希，如果在进行，则向前推进一步
3，计算key的hash值h
4，先在dict的ht[0]：根据hash值h和ht[0]的sizemask计算索引值，根据索引值获取dictEntry链表的指针，根据指针在链表上查找是否存在为key的entry，如果有就返回，
5，第4步如果dict的ht[0]没有找到，判断当前是否在重哈希，如果没有返回空，否则在dict的ht[1]上进行同样的查找步骤

dictEntry *dictFind(dict *d, const void *key)
{
    dictEntry *he;
    unsigned int h, idx, table;

    if (d->ht[0].used + d->ht[1].used == 0) return NULL; /* dict is empty */
    if (dictIsRehashing(d)) _dictRehashStep(d);
    h = dictHashKey(d, key);
    for (table = 0; table <= 1; table++) {
        idx = h & d->ht[table].sizemask;
        he = d->ht[table].table[idx];
        while(he) {
            if (key==he->key || dictCompareKeys(d, key, he->key))
                return he;
            he = he->next;
        }
        if (!dictIsRehashing(d)) return NULL;
    }
    return NULL;
}

dict的添加（dictAdd）

1，包含两个过程，首先根据key在dict中找到dictEntry（dictAddRaw），然后在这个entry插入value（dictSetVal）
2，寻找entry的过程中，如果发现dict在重哈希的过程中，则向前推进一步
3，获取entry在dict的索引调用_dictKeyIndex函数，如果不在重哈希过程中，它只查找ht[0]；否则查找ht[0]和ht[1]，如果dict已经存在这个key，则会返回-1，进而dictAddRaw函数会返回NULL
4，如果dict在进行重哈希，则插入到dict的ht[1]中，否则插入到ht[0]中
5，插入过程会插入到entry链表的第一个节点(新数据再次访问概率比较高)，used数目会更新，调用宏定义函数dictSetKey在entry中设置key
6，_dictKeyIndex可能触发dict内存扩展（_dictExpandIfNeeded，它将哈希表长度扩展为原来两倍）

/* Add an element to the target hash table */
int dictAdd(dict *d, void *key, void *val)
{
    dictEntry *entry = dictAddRaw(d,key,NULL);

    if (!entry) return DICT_ERR;
    dictSetVal(d, entry, val);
    return DICT_OK;
}


dictEntry *dictAddRaw(dict *d, void *key, dictEntry **existing)
{
    int index;
    dictEntry *entry;
    dictht *ht;

    if (dictIsRehashing(d)) _dictRehashStep(d);

    /* Get the index of the new element, or -1 if
     * the element already exists. */
    if ((index = _dictKeyIndex(d, key, dictHashKey(d,key), existing)) == -1)
        return NULL;

    /* Allocate the memory and store the new entry.
     * Insert the element in top, with the assumption that in a database
     * system it is more likely that recently added entries are accessed
     * more frequently. */
    ht = dictIsRehashing(d) ? &d->ht[1] : &d->ht[0];
    entry = zmalloc(sizeof(*entry));
    entry->next = ht->table[index];
    ht->table[index] = entry;
    ht->used++;

    /* Set the hash entry fields. */
    dictSetKey(d, entry, key);
    return entry;
}

static int _dictKeyIndex(dict *d, const void *key, unsigned int hash, dictEntry **existing)
{
    unsigned int idx, table;
    dictEntry *he;
    if (existing) *existing = NULL;

    /* Expand the hash table if needed */
    if (_dictExpandIfNeeded(d) == DICT_ERR)
        return -1;
    for (table = 0; table <= 1; table++) {
        idx = hash & d->ht[table].sizemask;
        /* Search if this slot does not already contain the given key */
        he = d->ht[table].table[idx];
        while(he) {
            if (key==he->key || dictCompareKeys(d, key, he->key)) {
                if (existing) *existing = he;
                return -1;
            }
            he = he->next;
        }
        if (!dictIsRehashing(d)) break;
    }
    return idx;
}

dict的删除（dictDelete）

dictDelete会进一步调用dictGenericDelete函数，该函数多了一个标志位nofree，以区分是否释放需要删除的entry，这里标记为0，代表需要释放。
dictUnlink也会调用dictGenericDelet，这里nofree的标志位设置为1，此时不释放entry，这个entry会被返回，为什么如此设计呢？如下作者解释的很清楚：如果需要使用查找到的entry再删除，分别调用
dictFind和dictDelete函数，后者也会进行一次查找，整个包含两次查找过程，而调用dictUnlink和dictFreeUnlinkedEntry则只有一次查找过程。

This function is useful when we want to remove something from the hash
table but want to use its value before actually deleting the entry.
Without this function the pattern would require two lookups:
entry = dictFind(...);
// Do something with entry
dictDelete(dictionary,entry);
Thanks to this function it is possible to avoid this, and use
instead:
entry = dictUnlink(dictionary,entry);
// Do something with entry
dictFreeUnlinkedEntry(entry); // <- This does not need to lookup again.

删除过程会触发推进一步重哈希（_dictRehashStep）
如果当前不在重哈希过程中，它只在ht[0]中查找要删除的key；否则ht[0]和ht[1]它都要查找。
删除过程中，由于需要释放entry，会通过Free函数调用key和value的析构函数（keyDestructor和valDestructor）。

int dictDelete(dict *ht, const void *key) {
    return dictGenericDelete(ht,key,0) ? DICT_OK : DICT_ERR;
}

static dictEntry *dictGenericDelete(dict *d, const void *key, int nofree) {
    unsigned int h, idx;
    dictEntry *he, *prevHe;
    int table;

    if (d->ht[0].used == 0 && d->ht[1].used == 0) return NULL;

    if (dictIsRehashing(d)) _dictRehashStep(d);
    h = dictHashKey(d, key);

    for (table = 0; table <= 1; table++) {
        idx = h & d->ht[table].sizemask;
        he = d->ht[table].table[idx];
        prevHe = NULL;
        while(he) {
            if (key==he->key || dictCompareKeys(d, key, he->key)) {
                /* Unlink the element from the list */
                if (prevHe)
                    prevHe->next = he->next;
                else
                    d->ht[table].table[idx] = he->next;
                if (!nofree) {
                    dictFreeKey(d, he);
                    dictFreeVal(d, he);
                    zfree(he);
                }
                d->ht[table].used--;
                return he;
            }
            prevHe = he;
            he = he->next;
        }
        if (!dictIsRehashing(d)) break;
    }
    return NULL; /* not found */
}

dictEntry *dictUnlink(dict *ht, const void *key) {
    return dictGenericDelete(ht,key,1);
}