redis源码之dict

dict.h中的结构体

dictEntry结构体

typedef struct dictEntry {
    // 关键字key定义
    void *key;  
    // 值value定义，这里采用了联合体，根据union的特点，联合体只能存放一个被选中的成员
    union {
        void *val;      // 自定义类型
        uint64_t u64;   // 无符号整形
        int64_t s64;    // 有符号整形
        double d;       // 浮点型
    } v;
    // 指向下一个键值对节点
    struct dictEntry *next;
} dictEntry;

dictType结构体

/* 定义了字典操作的公共方法，类似于adlist.h文件中list的定义，将对节点的公共操作方法统一定义。搞不明白为什么要命名为dictType */
typedef struct dictType {
    /* hash方法，根据关键字计算哈希值 */
    unsigned int (*hashFunction)(const void *key);
    /* 复制key */
    void *(*keyDup)(void *privdata, const void *key);
    /* 复制value */
    void *(*valDup)(void *privdata, const void *obj);
    /* 关键字比较方法 */
    int (*keyCompare)(void *privdata, const void *key1, const void *key2);
    /* 销毁key */
    void (*keyDestructor)(void *privdata, void *key);
    /* 销毁value */
    void (*valDestructor)(void *privdata, void *obj);
} dictType;

dictht结构体

/* This is our hash table structure. Every dictionary has two of this as we
 * implement incremental rehashing, for the old to the new table. */
/* 哈希表结构 */
typedef struct dictht {
    // 散列数组。哈希表内部是基于数组，数组的元素是dictEntry *类型，所以这里是指针数组。
    dictEntry **table;
    // 散列数组的长度
    unsigned long size;
    // sizemask等于size减1
    unsigned long sizemask;
    // 散列数组中已经被使用的节点数量
    unsigned long used;
} dictht;

dict结构体

/* 字典的主操作类，对dictht结构再次包装  */
typedef struct dict {
    // 字典类型
    dictType *type;
    // 私有数据指针
    void *privdata;
    // 一个字典中有两个哈希表，后面的分析中，我们将ht[0]称作就表，ht[1]称作新表
    /*  dict的rehash。通常情况下，所有的数据都是存在放dict的ht[0]中，ht[1]只在rehash的时候使用。
        dict进行rehash操作的时候，将ht[0]中的所有数据rehash到ht[1]中。然后将ht[1]赋值给ht[0]，
        并清空ht[1]。rehash操作我们会在后面详细看到。
    */
    dictht ht[2];
    // 数据动态迁移的位置，如果rehashidx == -1说明当前没有执行rehash操作
    long rehashidx; /* rehashing not in progress if rehashidx == -1 */
    // 当前正在使用的迭代器的数量
    int iterators; /* number of iterators currently running */
} dict;

四个结构体之间的关系：

散列函数

Thomas Wang’s 32 bit Mix dictIntHashFunction,对整形取hash
MurmurHash2 by Austin Appleby dictGenHashFunction，对key值与指定长度取hash
case insensitive hash function (based on djb hash) dictGenCaseHashFunction 对字符串进行hash

Rehash操作

n步渐进式rehash操作，这是redis的一个亮点，能够将一次rehash分摊到多次数据请求中
- _dictRehashStep,每调用一次Rehash一个bucket
- dictRehashMilliseconds 在一定时间内rehash多个bucket
- 上面两个函数都调用了dictRehash方法:

/* Performs N steps of incremental rehashing. Returns 1 if there are still
 * keys to move from the old to the new hash table, otherwise 0 is returned.
 * Note that a rehashing step consists in moving a bucket (that may have more
 * than one key as we use chaining) from the old to the new hash table. */
/* 执行n步渐进式的rehash操作，如果还有key需要从旧表迁移到新表则返回1，否则返回0 */
int dictRehash(dict *d, int n) {
    if (!dictIsRehashing(d)) return 0;

    // n步渐进式的rehash操作就是每次只迁移哈希数组中的n个bucket
    while(n--) {
        dictEntry *de, *nextde;

        /* Check if we already rehashed the whole table... */
        // 检查是否对整个哈希表进行了rehash操作
        if (d->ht[0].used == 0) {
            zfree(d->ht[0].table);
            d->ht[0] = d->ht[1];
            _dictReset(&d->ht[1]);
            d->rehashidx = -1;
            return 0;
        }

        /* Note that rehashidx can't overflow as we are sure there are more
         * elements because ht[0].used != 0 */
        // rehashidx标记的是当前rehash操作进行到了ht[0]旧表的那个位置（下标），因此需要判断它是否操作ht[0]的长度
        assert(d->ht[0].size > (unsigned long)d->rehashidx);
        // 跳过ht[0]中前面为空的位置
        while(d->ht[0].table[d->rehashidx] == NULL) d->rehashidx++;
        de = d->ht[0].table[d->rehashidx];
        /* Move all the keys in this bucket from the old to the new hash HT */
        // 下面的操作将每个节点（键值对）从ht[0]迁移到ht[1],此过程需要重新计算每个节点key的哈希值
        while(de) {
            unsigned int h;

            nextde = de->next;
            /* Get the index in the new hash table */
            h = dictHashKey(d, de->key) & d->ht[1].sizemask;
            de->next = d->ht[1].table[h];
            d->ht[1].table[h] = de;
            d->ht[0].used--;
            d->ht[1].used++;
            de = nextde;
        }
        d->ht[0].table[d->rehashidx] = NULL;
        d->rehashidx++;
    }
    return 1;
}