美文网首页
【三】哈希表

【三】哈希表

作者: Michael_abc | 来源:发表于2019-10-10 11:02 被阅读0次

    前言

    哈希表基本上是我见过使用最广泛的数据结构了,不管是PHP,MySQL,Redis,JAVA等,都使用到了哈希表,使用特别广泛。

    定义

    哈希表是一种通过关键码查找值得数据结构,是一种无顺序的散列数据结构。

    例图

    image.png

    说明

    哈希表也有很多种,今天我们用来举例的是PHP的哈希表实现。

    PHP的哈希表

    在PHP内核中 大量使用的HashTable ,函数列表,类列表,数组,多维数组就是哈希表套着哈希表

    image.png

    PHP的哈希实现时数组+顺序链表+碰撞链表

    哈希扩容

    • 当哈希表里的元素数量大于某个阈值时,就需要动态扩容,在PHP中每次扩容是在原来的size2,然后再将元素写入到新的槽中,从新形成大的哈希表。
    • 这样做有个坏处,就是当哈希表在很大的时候,扩容需要花费比较长的时间,不利于性能,在PHP中一般情况下都是非常短的web处理,不会有很大的哈希表扩容性能要求,这种场景下是比较合适。
    • 在其他情况下就要分析了,例如在Redis下,这种方式就不合适,哈希表可能会非常大,如上白万,假如一次扩容,在重新插入,这就出现性能问题,内存和时间消耗会很大,在Redis中则是采用渐进式扩容,使用两个哈希表,在数据量大时,不一次性扩容,而是启用新的哈希表,假如转存旧的元素,隔段时间启动一次 知道老的哈希表被完全转存。
    • 需求场景不同,设计也不一样,很棒。
    • 解决hash碰撞的方法有很多,不一一列举了。

    个人哈希表C语言实现

    my_hash.h

    #ifndef MY_MY_HASH_H
    #define MY_MY_HASH_H
    
    #define MY_HASH_MAX_SIZE 0x1000000000
    
    #define MY_HASH_EXPAND_RATIO 0.8
    #define MY_HASH_REDUCE_RATIO 0.2
    #define MY_HASH_INIT_SIZE 4
    
    typedef struct my_hash_bucket_s my_hash_bucket_t;
    typedef struct my_hash_s my_hash_t;
    
    struct my_hash_s {
        unsigned int size;
        unsigned int mask;
        unsigned int ele_num;
        float fill_ratio;
        my_hash_bucket_t *list_head;
        my_hash_bucket_t *list_tail;
        my_hash_bucket_t **table;
    };
    
    struct my_hash_bucket_s {
        unsigned long index;
        unsigned int key_len;
        unsigned int index_num;
        char *key;
        void *data;
        my_hash_bucket_t *list_next;
        my_hash_bucket_t *list_prev;
        my_hash_bucket_t *next;
        my_hash_bucket_t *prev;
    };
    
    my_hash_t *my_hash_init(void);
    
    my_hash_bucket_t *my_hash_bucket_init(char *, unsigned int, int, void *);
    
    void *my_hash_find(my_hash_t *, char *, int);
    
    int my_hash_insert_or_update(my_hash_t *, char *, unsigned int, void *);
    
    int my_hash_delete(my_hash_t *, char *, int);
    
    int my_hash_expand(my_hash_t *);
    
    int my_hash_reduce(my_hash_t *);
    
    int my_hash_destory(my_hash_t *);
    
    int my_hash_key_index(char *key, int);
    
    void my_hash_foreach(my_hash_t *);
    
    void my_hash_dump(my_hash_t *);
    
    #endif
    

    my_hash.c

    #include <my_hash.h>
    
    /**
     * hash表初始化
     * @return
     */
    my_hash_t *my_hash_init(void) {
        unsigned int mem_size;
        my_hash_t *hash;
        hash = (my_hash_t *) malloc(sizeof(my_hash_t));
        hash->size = MY_HASH_INIT_SIZE;
        hash->mask = hash->size - 1;
        hash->ele_num = 0;
        hash->fill_ratio = 0.0;
        hash->list_head = NULL;
        hash->list_tail = NULL;
        mem_size = hash->size * sizeof(my_hash_bucket_t);
        hash->table = (my_hash_bucket_t **) malloc(mem_size);
        memset(hash->table, 0, mem_size);
        return hash;
    }
    
    /**
     *
     * @param key
     * @param key_len
     * @param index
     * @param data
     * @return
     */
    my_hash_bucket_t *my_hash_bucket_init(char *key, unsigned int key_len, int index, void *data) {
        my_hash_bucket_t *new_bucket;
        new_bucket = (my_hash_bucket_t *) malloc(sizeof(my_hash_bucket_t));
        new_bucket->next = new_bucket->prev = NULL;
        new_bucket->list_next = new_bucket->list_prev = NULL;
        new_bucket->index_num = 0;
        new_bucket->data = data;
        new_bucket->index = index;
        new_bucket->key = key;
        new_bucket->key_len = key_len;
        return new_bucket;
    }
    
    /**
     * hash查找
     * @param hash
     * @param key
     * @return
     */
    void *my_hash_find(my_hash_t *hash, char *key, int key_len) {
        int index = my_hash_key_index(key, key_len) % hash->mask;
        my_hash_bucket_t *bucket;
        bucket = hash->table[index];
    
        while (bucket) {
            if (bucket->key == key) {
                bucket->index_num++;
                return bucket->data;
            }
            bucket = bucket->next;
        }
        return NULL;
    }
    
    /**
     *
     * @param hash
     * @param key
     * @param key_len
     * @param data
     * @return
     */
    int my_hash_insert_or_update(my_hash_t *hash, char *key, unsigned int key_len, void *data) {
        int ret;
        //查找更新
        void *old_data;
        int hash_index = my_hash_key_index(key, key_len);
        int index = hash_index % hash->mask;
        my_hash_bucket_t *bucket, *new_bucket;
        bucket = hash->table[index];
        while (bucket) {
            if (strcmp(bucket->key, key) == 0) {
                old_data = bucket->data;
                bucket->data = data;
                free(old_data);
                return RET_SUCCESS;
            }
            bucket = bucket->next;
        }
        // 插入
        new_bucket = my_hash_bucket_init(key, key_len, index, data);
        if (hash->table[index]) {
            hash->table[index]->prev = new_bucket;
            new_bucket->next = hash->table[index];
        }
        hash->table[index] = new_bucket;
        hash->ele_num++;
        hash->fill_ratio = (float) hash->ele_num / hash->size;
    
        if (hash->list_tail) {
            new_bucket->list_prev = hash->list_tail;
            hash->list_tail->list_next = new_bucket;
        }
        hash->list_tail = new_bucket;
        if (hash->ele_num == 1) {
            hash->list_head = new_bucket;
        }
        //判断容积率
        if (hash->fill_ratio > MY_HASH_EXPAND_RATIO) {
            ret = my_hash_expand(hash);
            if (ret != RET_SUCCESS) {
                return ret;
            }
        }
        return RET_SUCCESS;
    }
    
    /**
     * 哈希删除
     * @param hash
     * @param key
     * @param key_len
     * @return
     */
    int my_hash_delete(my_hash_t *hash, char *key, int key_len) {
        int opt_ret = RET_FAIL;
        int index = my_hash_key_index(key, key_len) % hash->mask;
        my_hash_bucket_t *bucket, *prev_bucket;
        prev_bucket = NULL;
        bucket = hash->table[index];
        while (bucket) {
            //find it
            if (strcmp(bucket->key, key) == 0) {
                if (bucket->list_prev && bucket->list_next) {
                    //中间
                    bucket->list_prev->list_next = bucket->list_next;
                    bucket->list_next->list_prev = bucket->list_prev;
                } else if (bucket->list_prev && !bucket->list_next) {
                    //尾
                    bucket->list_prev->list_next = NULL;
                    hash->list_tail = bucket->list_prev;
                } else if (!bucket->list_prev && bucket->list_next) {
                    //头
                    bucket->list_next->list_prev = NULL;
                    hash->list_head = bucket->list_next;
                }
                if (bucket->prev && bucket->next) {
                    //中间
                    bucket->prev->next = bucket->next;
                    bucket->next->prev = bucket->prev;
                } else if (bucket->prev && !bucket->next) {
                    //尾
                    bucket->prev->next = NULL;
                } else if (!bucket->prev && bucket->next) {
                    //头
                    bucket->next->prev = NULL;
                    hash->table[index] = bucket->next;
                } else {
                    hash->table[index] = NULL;
                }
                free(bucket);
                bucket = NULL;
                opt_ret = RET_SUCCESS;
                hash->ele_num--;
                hash->fill_ratio = (float) hash->ele_num / hash->size;
                break;
            }
            prev_bucket = bucket;
            bucket = bucket->next;
        }
        if (opt_ret == RET_SUCCESS && hash->fill_ratio <= MY_HASH_REDUCE_RATIO) {
            opt_ret = my_hash_reduce(hash);
            if (opt_ret != RET_SUCCESS) {
                return opt_ret;
            }
        }
        return opt_ret;
    }
    
    /**
     * hash扩张
     * @param hash
     * @return
     */
    int my_hash_expand(my_hash_t *hash) {
        hash->size = hash->size << 1;
        if (hash->size > MY_HASH_MAX_SIZE) {
            return RET_FAIL;
        }
        // define
        int index;
        unsigned int mem_size;
        my_hash_bucket_t *bucket, *tmp_bucket;
        my_hash_bucket_t **new_table;
        hash->mask = hash->size - 1;
        //
        mem_size = sizeof(my_hash_bucket_t) * hash->size;
        new_table = (my_hash_bucket_t **) malloc(mem_size);
        memset(new_table, 0, mem_size);
        free(hash->table);
        hash->table = new_table;
        hash->fill_ratio = hash->ele_num / hash->size;
        bucket = hash->list_head;
        //
        while (bucket) {
            bucket->next = bucket->prev = NULL;
            index = my_hash_key_index(bucket->key, bucket->key_len) % hash->mask;
            bucket->index = index;
            if (hash->table[index]) {
                hash->table[index]->prev = bucket;
                bucket->next = hash->table[index];
            }
            hash->table[index] = bucket;
            bucket = bucket->list_next;
        }
        return RET_SUCCESS;
    }
    
    /**
     *
     * @param hash
     * @return
     */
    int my_hash_reduce(my_hash_t *hash) {
        hash->size = hash->size >> 1;
        hash->size = hash->size < MY_HASH_INIT_SIZE ? MY_HASH_INIT_SIZE : hash->size;
        // define
        int index;
        unsigned int mem_size;
        my_hash_bucket_t *bucket, *tmp_bucket;
        my_hash_bucket_t **new_table;
        hash->mask = hash->size - 1;
        //
        mem_size = sizeof(my_hash_bucket_t) * hash->size;
        new_table = (my_hash_bucket_t **) malloc(mem_size);
        memset(new_table, 0, mem_size);
        free(hash->table);
        hash->table = new_table;
        hash->fill_ratio = hash->ele_num / hash->size;
        bucket = hash->list_head;
        //
        while (bucket) {
            bucket->next = bucket->prev = NULL;
            index = my_hash_key_index(bucket->key, bucket->key_len) % hash->mask;
            bucket->index = index;
            if (hash->table[index]) {
                hash->table[index]->prev = bucket;
                bucket->next = hash->table[index];
            }
            hash->table[index] = bucket;
            bucket = bucket->list_next;
        }
        return RET_SUCCESS;
    }
    
    /**
     * hash字符串index
     * @param key
     * @return
     */
    int my_hash_key_index(char *key, int key_len) {
        unsigned int hash = 1315423911;
        unsigned int i = 0;
        for (i = 0; i < key_len; key++, i++) {
            hash ^= ((hash << 5) + (*key) + (hash >> 2));
        }
        return hash;
    }
    /**
     *
     * @param hash
     * @return
     */
    int my_hash_destory(my_hash_t *hash) {
        my_hash_bucket_t *bucket, *while_bucket;
        while_bucket = hash->list_head;
        while (while_bucket) {
            bucket = while_bucket;
            free(bucket);
            while_bucket = while_bucket->list_next;
        }
        free(hash);
        return RET_SUCCESS;
    }
    
    /**
     *
     * @param hash
     */
    void my_hash_foreach(my_hash_t *hash) {
        my_hash_bucket_t *bucket;
        bucket = hash->list_head;
        while (bucket) {
            printf("[key:%s(%d)]->%d\n", bucket->key, bucket->index, *(int *)bucket->data);
            bucket = bucket->list_next;
        }
    }
    
    /**
     *
     * @param hash
     */
    void my_hash_dump(my_hash_t *hash) {
        my_hash_bucket_t *bucket;
        int i;
        for (i = 1; i <= hash->size; i++) {
            bucket = hash->table[i];
            printf("node:%03d-->", i);
            while (bucket) {
                printf("[%s(%d)]=>%d->", bucket->key, bucket->index, *(int *)bucket->data);
                bucket = bucket->next;
            }
            printf("\n");
        }
    }
    

    相关文章

      网友评论

          本文标题:【三】哈希表

          本文链接:https://www.haomeiwen.com/subject/uteopctx.html