美文网首页
redis hash

redis hash

作者: GeekAmI | 来源:发表于2024-06-09 21:08 被阅读0次

    一、hash 数据结构

    源码文件:src/dict.h、src/dict.c

    // 哈希项
    typedef struct dictEntry {
        void *key;
        union {
            void *val;
            uint64_t u64;
            int64_t s64;
            double d;
        } v;
        struct dictEntry *next;
    } dictEntry;
    
    // 哈希表
    /* This is our hash table structure. Every dictionary has two of this as we
     * implement incremental rehashing, for the old to the new table. */
    typedef struct dictht {
        dictEntry **table;
        unsigned long size;
        unsigned long sizemask;
        unsigned long used;
    } dictht;
    
    typedef struct dict {
        dictType *type;
        void *privdata;
        dictht ht[2];
        long rehashidx; /* rehashing not in progress if rehashidx == -1 */
        int16_t pauserehash; /* If >0 rehashing is paused (<0 indicates coding error) */
    } dict;
    

    二、如何实现 rehash?

    Add an element to the target hash table

    int dictAdd(dict *d, void *key, void *val)
    {
        dictEntry *entry = dictAddRaw(d,key,NULL);
    
        if (!entry) return DICT_ERR;
        dictSetVal(d, entry, val);
        return DICT_OK;
    }
    

    This function adds the entry but instead of setting a value returns the dictEntry structure to the user

    dictEntry *dictAddRaw(dict *d, void *key, dictEntry **existing)
    {
        long index;
        dictEntry *entry;
        dictht *ht;
    
        if (dictIsRehashing(d)) _dictRehashStep(d);
    
        /* Get the index of the new element, or -1 if
         * the element already exists. */
        if ((index = _dictKeyIndex(d, key, dictHashKey(d,key), existing)) == -1)
            return NULL;
    
        /* Allocate the memory and store the new entry.
         * Insert the element in top, with the assumption that in a database
         * system it is more likely that recently added entries are accessed
         * more frequently. */
        ht = dictIsRehashing(d) ? &d->ht[1] : &d->ht[0];
        entry = zmalloc(sizeof(*entry));
        entry->next = ht->table[index];
        ht->table[index] = entry;
        ht->used++;
    
        /* Set the hash entry fields. */
        dictSetKey(d, entry, key);
        return entry;
    }
    

    Get the index of the new element, or -1 if the element already exists.

    static long _dictKeyIndex(dict *d, const void *key, uint64_t hash, dictEntry **existing)
    {
        unsigned long idx, table;
        dictEntry *he;
        if (existing) *existing = NULL;
    
        /* Expand the hash table if needed */
        if (_dictExpandIfNeeded(d) == DICT_ERR)
            return -1;
        for (table = 0; table <= 1; table++) {
            idx = hash & d->ht[table].sizemask;
            /* Search if this slot does not already contain the given key */
            he = d->ht[table].table[idx];
            while(he) {
                if (key==he->key || dictCompareKeys(d, key, he->key)) {
                    if (existing) *existing = he;
                    return -1;
                }
                he = he->next;
            }
            if (!dictIsRehashing(d)) break;
        }
        return idx;
    }
    

    什么时候触发rehash?

    /* Expand the hash table if needed */
    static int _dictExpandIfNeeded(dict *d)
    {
        /* Incremental rehashing already in progress. Return. */
        if (dictIsRehashing(d)) return DICT_OK;
    
        /* If the hash table is empty expand it to the initial size. */
        if (d->ht[0].size == 0) return dictExpand(d, DICT_HT_INITIAL_SIZE);
    
        /* If we reached the 1:1 ratio, and we are allowed to resize the hash
         * table (global setting) or we should avoid it but the ratio between
         * elements/buckets is over the "safe" threshold, we resize doubling
         * the number of buckets. */
        if (!dictTypeExpandAllowed(d))
            return DICT_OK;
        if ((dict_can_resize == DICT_RESIZE_ENABLE &&
             d->ht[0].used >= d->ht[0].size) ||
            (dict_can_resize != DICT_RESIZE_FORBID &&
             d->ht[0].used / d->ht[0].size > dict_force_resize_ratio))
        {
            return dictExpand(d, d->ht[0].used + 1);
        }
        return DICT_OK;
    }
    

    rehash扩容扩多大?

    /* return DICT_ERR if expand was not performed */
    int dictExpand(dict *d, unsigned long size) {
        return _dictExpand(d, size, NULL);
    }
    
    /* Expand or create the hash table,
     * when malloc_failed is non-NULL, it'll avoid panic if malloc fails (in which case it'll be set to 1).
     * Returns DICT_OK if expand was performed, and DICT_ERR if skipped. */
    int _dictExpand(dict *d, unsigned long size, int* malloc_failed)
    {
        if (malloc_failed) *malloc_failed = 0;
    
        /* the size is invalid if it is smaller than the number of
         * elements already inside the hash table */
        if (dictIsRehashing(d) || d->ht[0].used > size)
            return DICT_ERR;
    
        dictht n; /* the new hash table */
        unsigned long realsize = _dictNextPower(size);
    
        /* Detect overflows */
        if (realsize < size || realsize * sizeof(dictEntry*) < realsize)
            return DICT_ERR;
    
        /* Rehashing to the same table size is not useful. */
        if (realsize == d->ht[0].size) return DICT_ERR;
    
        /* Allocate the new hash table and initialize all pointers to NULL */
        n.size = realsize;
        n.sizemask = realsize-1;
        if (malloc_failed) {
            n.table = ztrycalloc(realsize*sizeof(dictEntry*));
            *malloc_failed = n.table == NULL;
            if (*malloc_failed)
                return DICT_ERR;
        } else
            n.table = zcalloc(realsize*sizeof(dictEntry*));
    
        n.used = 0;
    
        /* Is this the first initialization? If so it's not really a rehashing
         * we just set the first hash table so that it can accept keys. */
        if (d->ht[0].table == NULL) {
            d->ht[0] = n;
            return DICT_OK;
        }
    
        /* Prepare a second hash table for incremental rehashing */
        d->ht[1] = n;
        d->rehashidx = 0;
        return DICT_OK;
    }
    
    /* Our hash table capability is a power of two */
    static unsigned long _dictNextPower(unsigned long size)
    {
        unsigned long i = DICT_HT_INITIAL_SIZE;
    
        if (size >= LONG_MAX) return LONG_MAX + 1LU;
        while(1) {
            if (i >= size)
                return i;
            i *= 2;
        }
    }
    

    渐进式rehash如何实现?

    static void _dictRehashStep(dict *d) {
        if (d->pauserehash == 0) dictRehash(d,1);
    }
    
    int dictRehash(dict *d, int n) {
        int empty_visits = n*10; /* Max number of empty buckets to visit. */
        unsigned long s0 = d->ht[0].size;
        unsigned long s1 = d->ht[1].size;
        if (dict_can_resize == DICT_RESIZE_FORBID || !dictIsRehashing(d)) return 0;
        if (dict_can_resize == DICT_RESIZE_AVOID && 
            ((s1 > s0 && s1 / s0 < dict_force_resize_ratio) ||
             (s1 < s0 && s0 / s1 < dict_force_resize_ratio)))
        {
            return 0;
        }
    
        //主循环,根据要拷贝的bucket数量n,循环n次后停止或ht[0]中的数据迁移完停止
        while(n-- && d->ht[0].used != 0) {
            dictEntry *de, *nextde;
    
            /* Note that rehashidx can't overflow as we are sure there are more
             * elements because ht[0].used != 0 */
            assert(d->ht[0].size > (unsigned long)d->rehashidx);
          
            //如果当前要迁移的bucket中没有元素
            while(d->ht[0].table[d->rehashidx] == NULL) {
                d->rehashidx++;
                if (--empty_visits == 0) return 1;
            }
          
            //获得哈希表中哈希项
            de = d->ht[0].table[d->rehashidx];
            /* Move all the keys in this bucket from the old to the new hash HT */
            //如果rehashidx指向的bucket不为空
            while(de) {
                uint64_t h;
                //获得同一个bucket中下一个哈希项
                nextde = de->next;
                /* Get the index in the new hash table */
                //根据扩容后的哈希表ht[1]大小,计算当前哈希项在扩容后哈希表中的bucket位置
                h = dictHashKey(d, de->key) & d->ht[1].sizemask;
                //将当前哈希项添加到扩容后的哈希表ht[1]中
                de->next = d->ht[1].table[h];
                d->ht[1].table[h] = de;
                //减少当前哈希表的哈希项个数
                d->ht[0].used--;
                //增加扩容后哈希表的哈希项个数
                d->ht[1].used++;
                //指向下一个哈希项
                de = nextde;
            }
            //如果当前bucket中已经没有哈希项了,将该bucket置为NULL
            d->ht[0].table[d->rehashidx] = NULL;
            //将rehash加1,下一次将迁移下一个bucket中的元素
            d->rehashidx++;
        }
    
        /* Check if we already rehashed the whole table... */
        //判断ht[0]的数据是否迁移完成
        if (d->ht[0].used == 0) {
            //ht[0]迁移完后,释放ht[0]内存空间
            zfree(d->ht[0].table);
            //让ht[0]指向ht[1],以便接受正常的请求
            d->ht[0] = d->ht[1];
            //重置ht[1]的大小为0
            _dictReset(&d->ht[1]);
            //设置全局哈希表的rehashidx标识为-1,表示rehash结束
            d->rehashidx = -1;
            return 0;
        }
    
        /* More to rehash... */
        //返回1,表示ht[0]中仍然有元素没有迁移完
        return 1;
    }
    
    /* Reset a hash table already initialized with ht_init().
     * NOTE: This function should only be called by ht_destroy(). */
    static void _dictReset(dictht *ht)
    {
        ht->table = NULL;
        ht->size = 0;
        ht->sizemask = 0;
        ht->used = 0;
    }
    

    触发 rehash的 5 个场景


    image.png

    相关文章

      网友评论

          本文标题:redis hash

          本文链接:https://www.haomeiwen.com/subject/jntiqjtx.html