前言
哈希表基本上是我见过使用最广泛的数据结构了,不管是PHP,MySQL,Redis,JAVA等,都使用到了哈希表,使用特别广泛。
定义
哈希表是一种通过关键码查找值得数据结构,是一种无顺序的散列数据结构。
例图

说明
哈希表也有很多种,今天我们用来举例的是PHP的哈希表实现。
PHP的哈希表
在PHP内核中 大量使用的HashTable ,函数列表,类列表,数组,多维数组就是哈希表套着哈希表

PHP的哈希实现时数组+顺序链表+碰撞链表
哈希扩容
- 当哈希表里的元素数量大于某个阈值时,就需要动态扩容,在PHP中每次扩容是在原来的size2,然后再将元素写入到新的槽中,从新形成大的哈希表。
- 这样做有个坏处,就是当哈希表在很大的时候,扩容需要花费比较长的时间,不利于性能,在PHP中一般情况下都是非常短的web处理,不会有很大的哈希表扩容性能要求,这种场景下是比较合适。
- 在其他情况下就要分析了,例如在Redis下,这种方式就不合适,哈希表可能会非常大,如上白万,假如一次扩容,在重新插入,这就出现性能问题,内存和时间消耗会很大,在Redis中则是采用渐进式扩容,使用两个哈希表,在数据量大时,不一次性扩容,而是启用新的哈希表,假如转存旧的元素,隔段时间启动一次 知道老的哈希表被完全转存。
- 需求场景不同,设计也不一样,很棒。
- 解决hash碰撞的方法有很多,不一一列举了。
个人哈希表C语言实现
my_hash.h
#ifndef MY_MY_HASH_H
#define MY_MY_HASH_H
#define MY_HASH_MAX_SIZE 0x1000000000
#define MY_HASH_EXPAND_RATIO 0.8
#define MY_HASH_REDUCE_RATIO 0.2
#define MY_HASH_INIT_SIZE 4
typedef struct my_hash_bucket_s my_hash_bucket_t;
typedef struct my_hash_s my_hash_t;
struct my_hash_s {
unsigned int size;
unsigned int mask;
unsigned int ele_num;
float fill_ratio;
my_hash_bucket_t *list_head;
my_hash_bucket_t *list_tail;
my_hash_bucket_t **table;
};
struct my_hash_bucket_s {
unsigned long index;
unsigned int key_len;
unsigned int index_num;
char *key;
void *data;
my_hash_bucket_t *list_next;
my_hash_bucket_t *list_prev;
my_hash_bucket_t *next;
my_hash_bucket_t *prev;
};
my_hash_t *my_hash_init(void);
my_hash_bucket_t *my_hash_bucket_init(char *, unsigned int, int, void *);
void *my_hash_find(my_hash_t *, char *, int);
int my_hash_insert_or_update(my_hash_t *, char *, unsigned int, void *);
int my_hash_delete(my_hash_t *, char *, int);
int my_hash_expand(my_hash_t *);
int my_hash_reduce(my_hash_t *);
int my_hash_destory(my_hash_t *);
int my_hash_key_index(char *key, int);
void my_hash_foreach(my_hash_t *);
void my_hash_dump(my_hash_t *);
#endif
my_hash.c
#include <my_hash.h>
/**
* hash表初始化
* @return
*/
my_hash_t *my_hash_init(void) {
unsigned int mem_size;
my_hash_t *hash;
hash = (my_hash_t *) malloc(sizeof(my_hash_t));
hash->size = MY_HASH_INIT_SIZE;
hash->mask = hash->size - 1;
hash->ele_num = 0;
hash->fill_ratio = 0.0;
hash->list_head = NULL;
hash->list_tail = NULL;
mem_size = hash->size * sizeof(my_hash_bucket_t);
hash->table = (my_hash_bucket_t **) malloc(mem_size);
memset(hash->table, 0, mem_size);
return hash;
}
/**
*
* @param key
* @param key_len
* @param index
* @param data
* @return
*/
my_hash_bucket_t *my_hash_bucket_init(char *key, unsigned int key_len, int index, void *data) {
my_hash_bucket_t *new_bucket;
new_bucket = (my_hash_bucket_t *) malloc(sizeof(my_hash_bucket_t));
new_bucket->next = new_bucket->prev = NULL;
new_bucket->list_next = new_bucket->list_prev = NULL;
new_bucket->index_num = 0;
new_bucket->data = data;
new_bucket->index = index;
new_bucket->key = key;
new_bucket->key_len = key_len;
return new_bucket;
}
/**
* hash查找
* @param hash
* @param key
* @return
*/
void *my_hash_find(my_hash_t *hash, char *key, int key_len) {
int index = my_hash_key_index(key, key_len) % hash->mask;
my_hash_bucket_t *bucket;
bucket = hash->table[index];
while (bucket) {
if (bucket->key == key) {
bucket->index_num++;
return bucket->data;
}
bucket = bucket->next;
}
return NULL;
}
/**
*
* @param hash
* @param key
* @param key_len
* @param data
* @return
*/
int my_hash_insert_or_update(my_hash_t *hash, char *key, unsigned int key_len, void *data) {
int ret;
//查找更新
void *old_data;
int hash_index = my_hash_key_index(key, key_len);
int index = hash_index % hash->mask;
my_hash_bucket_t *bucket, *new_bucket;
bucket = hash->table[index];
while (bucket) {
if (strcmp(bucket->key, key) == 0) {
old_data = bucket->data;
bucket->data = data;
free(old_data);
return RET_SUCCESS;
}
bucket = bucket->next;
}
// 插入
new_bucket = my_hash_bucket_init(key, key_len, index, data);
if (hash->table[index]) {
hash->table[index]->prev = new_bucket;
new_bucket->next = hash->table[index];
}
hash->table[index] = new_bucket;
hash->ele_num++;
hash->fill_ratio = (float) hash->ele_num / hash->size;
if (hash->list_tail) {
new_bucket->list_prev = hash->list_tail;
hash->list_tail->list_next = new_bucket;
}
hash->list_tail = new_bucket;
if (hash->ele_num == 1) {
hash->list_head = new_bucket;
}
//判断容积率
if (hash->fill_ratio > MY_HASH_EXPAND_RATIO) {
ret = my_hash_expand(hash);
if (ret != RET_SUCCESS) {
return ret;
}
}
return RET_SUCCESS;
}
/**
* 哈希删除
* @param hash
* @param key
* @param key_len
* @return
*/
int my_hash_delete(my_hash_t *hash, char *key, int key_len) {
int opt_ret = RET_FAIL;
int index = my_hash_key_index(key, key_len) % hash->mask;
my_hash_bucket_t *bucket, *prev_bucket;
prev_bucket = NULL;
bucket = hash->table[index];
while (bucket) {
//find it
if (strcmp(bucket->key, key) == 0) {
if (bucket->list_prev && bucket->list_next) {
//中间
bucket->list_prev->list_next = bucket->list_next;
bucket->list_next->list_prev = bucket->list_prev;
} else if (bucket->list_prev && !bucket->list_next) {
//尾
bucket->list_prev->list_next = NULL;
hash->list_tail = bucket->list_prev;
} else if (!bucket->list_prev && bucket->list_next) {
//头
bucket->list_next->list_prev = NULL;
hash->list_head = bucket->list_next;
}
if (bucket->prev && bucket->next) {
//中间
bucket->prev->next = bucket->next;
bucket->next->prev = bucket->prev;
} else if (bucket->prev && !bucket->next) {
//尾
bucket->prev->next = NULL;
} else if (!bucket->prev && bucket->next) {
//头
bucket->next->prev = NULL;
hash->table[index] = bucket->next;
} else {
hash->table[index] = NULL;
}
free(bucket);
bucket = NULL;
opt_ret = RET_SUCCESS;
hash->ele_num--;
hash->fill_ratio = (float) hash->ele_num / hash->size;
break;
}
prev_bucket = bucket;
bucket = bucket->next;
}
if (opt_ret == RET_SUCCESS && hash->fill_ratio <= MY_HASH_REDUCE_RATIO) {
opt_ret = my_hash_reduce(hash);
if (opt_ret != RET_SUCCESS) {
return opt_ret;
}
}
return opt_ret;
}
/**
* hash扩张
* @param hash
* @return
*/
int my_hash_expand(my_hash_t *hash) {
hash->size = hash->size << 1;
if (hash->size > MY_HASH_MAX_SIZE) {
return RET_FAIL;
}
// define
int index;
unsigned int mem_size;
my_hash_bucket_t *bucket, *tmp_bucket;
my_hash_bucket_t **new_table;
hash->mask = hash->size - 1;
//
mem_size = sizeof(my_hash_bucket_t) * hash->size;
new_table = (my_hash_bucket_t **) malloc(mem_size);
memset(new_table, 0, mem_size);
free(hash->table);
hash->table = new_table;
hash->fill_ratio = hash->ele_num / hash->size;
bucket = hash->list_head;
//
while (bucket) {
bucket->next = bucket->prev = NULL;
index = my_hash_key_index(bucket->key, bucket->key_len) % hash->mask;
bucket->index = index;
if (hash->table[index]) {
hash->table[index]->prev = bucket;
bucket->next = hash->table[index];
}
hash->table[index] = bucket;
bucket = bucket->list_next;
}
return RET_SUCCESS;
}
/**
*
* @param hash
* @return
*/
int my_hash_reduce(my_hash_t *hash) {
hash->size = hash->size >> 1;
hash->size = hash->size < MY_HASH_INIT_SIZE ? MY_HASH_INIT_SIZE : hash->size;
// define
int index;
unsigned int mem_size;
my_hash_bucket_t *bucket, *tmp_bucket;
my_hash_bucket_t **new_table;
hash->mask = hash->size - 1;
//
mem_size = sizeof(my_hash_bucket_t) * hash->size;
new_table = (my_hash_bucket_t **) malloc(mem_size);
memset(new_table, 0, mem_size);
free(hash->table);
hash->table = new_table;
hash->fill_ratio = hash->ele_num / hash->size;
bucket = hash->list_head;
//
while (bucket) {
bucket->next = bucket->prev = NULL;
index = my_hash_key_index(bucket->key, bucket->key_len) % hash->mask;
bucket->index = index;
if (hash->table[index]) {
hash->table[index]->prev = bucket;
bucket->next = hash->table[index];
}
hash->table[index] = bucket;
bucket = bucket->list_next;
}
return RET_SUCCESS;
}
/**
* hash字符串index
* @param key
* @return
*/
int my_hash_key_index(char *key, int key_len) {
unsigned int hash = 1315423911;
unsigned int i = 0;
for (i = 0; i < key_len; key++, i++) {
hash ^= ((hash << 5) + (*key) + (hash >> 2));
}
return hash;
}
/**
*
* @param hash
* @return
*/
int my_hash_destory(my_hash_t *hash) {
my_hash_bucket_t *bucket, *while_bucket;
while_bucket = hash->list_head;
while (while_bucket) {
bucket = while_bucket;
free(bucket);
while_bucket = while_bucket->list_next;
}
free(hash);
return RET_SUCCESS;
}
/**
*
* @param hash
*/
void my_hash_foreach(my_hash_t *hash) {
my_hash_bucket_t *bucket;
bucket = hash->list_head;
while (bucket) {
printf("[key:%s(%d)]->%d\n", bucket->key, bucket->index, *(int *)bucket->data);
bucket = bucket->list_next;
}
}
/**
*
* @param hash
*/
void my_hash_dump(my_hash_t *hash) {
my_hash_bucket_t *bucket;
int i;
for (i = 1; i <= hash->size; i++) {
bucket = hash->table[i];
printf("node:%03d-->", i);
while (bucket) {
printf("[%s(%d)]=>%d->", bucket->key, bucket->index, *(int *)bucket->data);
bucket = bucket->next;
}
printf("\n");
}
}
网友评论