美文网首页
Redis深度历险 - dict的rehash

Redis深度历险 - dict的rehash

作者: 突击手平头哥 | 来源:发表于2021-04-17 15:49 被阅读0次

    Redis深度历险 - dict的rehash

    字典是Redis中非常重要的一个数据结构,本质就是一个hash表;那么这个hash表是怎么解决冲突的呢?是怎么进行rehash的呢?

    dict结构体

    相关代码在: dict.c、dict.h中

    /* This is our hash table structure. Every dictionary has two of this as we
     * implement incremental rehashing, for the old to the new table. */
    typedef struct dictht {
        dictEntry **table;              //使用的是数组来存储数据
        unsigned long size;             //数组空间大小
        unsigned long sizemask;         //数组空间大小-1
        unsigned long used;             //已使用空间
    } dictht;
    
    typedef struct dict {
        dictType *type;                     //这里并不是什么类型,二十hash表使用的hash、比较、销毁等函数
        void *privdata;
        dictht ht[2];                       //hash表结构体
        long rehashidx;                     //rehash标记
        unsigned long iterators; 
    } dict;
    

    hash冲突处理

    /* Add an element to the target hash table */
    static int dictAdd(dict *ht, void *key, void *val) {
        int index;
        dictEntry *entry;
    
        /* Get the index of the new element, or -1 if
         * the element already exists. */
        if ((index = _dictKeyIndex(ht, key)) == -1)
            return DICT_ERR;
    
        /* Allocates the memory and stores key */
        entry = malloc(sizeof(*entry));             //创建一个entry(一项即将插入的数据)
        entry->next = ht->table[index];             //ht->table中之前在这个位置上的旧entry放在新entry的next上
        ht->table[index] = entry;                   //将新entry放在ht->table这个位置上
    
        /* Set the hash entry fields. */
        dictSetHashKey(ht, entry, key);
        dictSetHashVal(ht, entry, val);
        ht->used++;
        return DICT_OK;
    }
    
    typedef struct dictEntry {
        void *key;
        union {
            void *val;
            uint64_t u64;
            int64_t s64;
            double d;
        } v;
        struct dictEntry *next;             //链表
    } dictEntry;
    

      dictEntry是字典中用来存放数据的结构体,ht->table就是用来存储entry的数组;

      dictEntry实现成了链表的形式,以链表法来解决hash冲突的问题,ht->table[index]指向的就是一个链表开头

      在插入一个新的元素时,就相当于在ht->table[index]这个链表的头部进行插入

    rehash处理

    rehash的条件

    /* Expand the hash table if needed */
    static int _dictExpandIfNeeded(dict *d)
    {
        /* Incremental rehashing already in progress. Return. */
        if (dictIsRehashing(d)) return DICT_OK;
    
        /* If the hash table is empty expand it to the initial size. */
        if (d->ht[0].size == 0) return dictExpand(d, DICT_HT_INITIAL_SIZE);
    
        /* If we reached the 1:1 ratio, and we are allowed to resize the hash
         * table (global setting) or we should avoid it but the ratio between
         * elements/buckets is over the "safe" threshold, we resize doubling
         * the number of buckets. */
        if (d->ht[0].used >= d->ht[0].size &&
            (dict_can_resize ||
             d->ht[0].used/d->ht[0].size > dict_force_resize_ratio))
        {
            return dictExpand(d, d->ht[0].used*2);
        }
        return DICT_OK;
    }
    
    static int dict_can_resize = 1;
    static unsigned int dict_force_resize_ratio = 5;
    

      此函数是在插入元素时判断是否需要扩充空间,扩充空间后就需要reahsh操作;判断条件为如下之一即可:

    • 元素个数大于hash表数组长度,并且允许重新设置大小(该变量与备份等操作有关)
    • 元素个数达到hash表数组长度的5倍,则强制进行hash操作(hash表使用链表法解决冲突,元素个数是可以大于数组长度的)

    reahsh的处理

    /* Expand or create the hash table */
    int dictExpand(dict *d, unsigned long size)
    {
        /* the size is invalid if it is smaller than the number of
         * elements already inside the hash table */
        if (dictIsRehashing(d) || d->ht[0].used > size)
            return DICT_ERR;
    
        dictht n; /* the new hash table */
        unsigned long realsize = _dictNextPower(size);
    
        /* Rehashing to the same table size is not useful. */
        if (realsize == d->ht[0].size) return DICT_ERR;
    
        /* Allocate the new hash table and initialize all pointers to NULL */
        n.size = realsize;
        n.sizemask = realsize-1;
        n.table = zcalloc(realsize*sizeof(dictEntry*));
        n.used = 0;
    
        /* Is this the first initialization? If so it's not really a rehashing
         * we just set the first hash table so that it can accept keys. */
        if (d->ht[0].table == NULL) {
            d->ht[0] = n;
            return DICT_OK;
        }
    
        /* Prepare a second hash table for incremental rehashing */
        d->ht[1] = n;
        d->rehashidx = 0;                   //打标志,表示开始reahsh操作
        return DICT_OK;
    }
    
    

      如上可知,在扩容hash表空间时仅仅设置了一个表示,而没有进行任何rehash操作;实际的rehash操作是在后续的操作中进行的

    dictEntry *dictAddRaw(dict *d, void *key, dictEntry **existing)
    {
        long index;
        dictEntry *entry;
        dictht *ht;
    
        if (dictIsRehashing(d)) _dictRehashStep(d);
    ...................
        
    #define dictIsRehashing(d) ((d)->rehashidx != -1)
    ...................
        
    static void _dictRehashStep(dict *d) {
        if (d->iterators == 0) dictRehash(d,1);
    }
    ....................
    
    int dictRehash(dict *d, int n) {
        int empty_visits = n*10; 
        if (!dictIsRehashing(d)) return 0;
    
        while(n-- && d->ht[0].used != 0) {
            dictEntry *de, *nextde;
    
            /* Note that rehashidx can't overflow as we are sure there are more
             * elements because ht[0].used != 0 */
            assert(d->ht[0].size > (unsigned long)d->rehashidx);
            while(d->ht[0].table[d->rehashidx] == NULL) {               //rehashidx指代即将进行rehash的数据下标
                d->rehashidx++;
                if (--empty_visits == 0) return 1;                      //从rehashidx开始查找一个有数据的位置进行rehash,最多10次
            }
            de = d->ht[0].table[d->rehashidx];
            /* Move all the keys in this bucket from the old to the new hash HT */
            while(de) {                                                 //每一个数据都是一个链表,全部需要进行rehash操作
                uint64_t h;
    
                nextde = de->next;
                /* Get the index in the new hash table */
                h = dictHashKey(d, de->key) & d->ht[1].sizemask;
                de->next = d->ht[1].table[h];
                d->ht[1].table[h] = de;                                 //rehash的数据放到d->ht[1]中
                d->ht[0].used--;
                d->ht[1].used++;
                de = nextde;
            }
            d->ht[0].table[d->rehashidx] = NULL;
            d->rehashidx++;
        }
    
        /* Check if we already rehashed the whole table... */
        if (d->ht[0].used == 0) {                                       //如果rehash完毕,释放空间清楚标记、切换d->ht[0]和d->ht[1]
            zfree(d->ht[0].table);
            d->ht[0] = d->ht[1];
            _dictReset(&d->ht[1]);
            d->rehashidx = -1;
            return 0;
        }
    
        /* More to rehash... */
        return 1;
    }
    
    

      rehashidxht->table的下标,表示从改下标开始进行rehash操作,每次reahsh数据都需要处理该位置的整个链表;还记得dictht ht[2];吗?两个hash表一个用来存储实际数据,另外一个是在hash过程中配合存储数据的。

    rehash的效率

      在扩容时没有进行rehash操作,具体的rehash操作分散在每一次的添加、修改等操作中;时间复杂度始终控制在O(1)级别。

    相关文章

      网友评论

          本文标题:Redis深度历险 - dict的rehash

          本文链接:https://www.haomeiwen.com/subject/sbaulltx.html