t_set.c

作者: 生命就是个Bug | 来源:发表于2019-04-17 16:56 被阅读0次

    Redis的t_set.c是个set数据结构的实现。

    当元素都是Integer类型的时候,采用intset来实现。
    当元素有非Integer类型的时候,采用dict来实现。

    intset 插入非整型元素的时候,需要将类型转换为 dict

    操作集合的时候,一旦集合为空,需要清除。

    类型转换代码如下:

    /* Convert the set to specified encoding. The resulting dict (when converting
     * to a hash table) is presized to hold the number of elements in the original
     * set. */
    //类型转换,将intset中的元素都添加到dict中
    void setTypeConvert(robj *setobj, int enc) {
        setTypeIterator *si;
        serverAssertWithInfo(NULL,setobj,setobj->type == OBJ_SET &&
                                 setobj->encoding == OBJ_ENCODING_INTSET);
    
        if (enc == OBJ_ENCODING_HT) {
            int64_t intele;
            dict *d = dictCreate(&setDictType,NULL);
            sds element;
    
            /* Presize the dict to avoid rehashing */
            //提前分配intsetLen大小的空间,避免rehash
            dictExpand(d,intsetLen(setobj->ptr));
    
            /* To add the elements we extract integers and create redis objects */
            si = setTypeInitIterator(setobj);
            //遍历
            while (setTypeNext(si,&element,&intele) != -1) {
                element = sdsfromlonglong(intele);
                serverAssert(dictAdd(d,element,NULL) == DICT_OK);
            }
            setTypeReleaseIterator(si);
    
            setobj->encoding = OBJ_ENCODING_HT;
            zfree(setobj->ptr);
            setobj->ptr = d;
        } else {
            serverPanic("Unsupported set conversion");
        }
    }
    

    1. 新增元素

    //sadd命令 sadd key member [member...]
    void saddCommand(client *c) {
        robj *set;
        int j, added = 0;
    
        //在dict查找key
        set = lookupKeyWrite(c->db,c->argv[1]);
        if (set == NULL) {
            //查找不到,则创建set
            set = setTypeCreate(c->argv[2]->ptr);
            //将set加入到dict
            dbAdd(c->db,c->argv[1],set);
        } else {
            //类型不正确,报错
            if (set->type != OBJ_SET) {
                addReply(c,shared.wrongtypeerr);
                return;
            }
        }
    
        //添加元素
        for (j = 2; j < c->argc; j++) {
            if (setTypeAdd(set,c->argv[j]->ptr)) added++;
        }
        if (added) {
            signalModifiedKey(c->db,c->argv[1]);
            notifyKeyspaceEvent(NOTIFY_SET,"sadd",c->argv[1],c->db->id);
        }
        server.dirty += added;
        addReplyLongLong(c,added);
    }
    
    /* Add the specified value into a set.
     *
     * If the value was already member of the set, nothing is done and 0 is
     * returned, otherwise the new element is added and 1 is returned. */
    //添加元素
    int setTypeAdd(robj *subject, sds value) {
        long long llval;
        //如果编码是dict
        if (subject->encoding == OBJ_ENCODING_HT) {
            dict *ht = subject->ptr;
            dictEntry *de = dictAddRaw(ht,value,NULL);
            if (de) {
                dictSetKey(ht,de,sdsdup(value));
                dictSetVal(ht,de,NULL);
                return 1;
            }
        }
    
        //如果编码是intset
        else if (subject->encoding == OBJ_ENCODING_INTSET) {
            //如果插入的值是整型,则直接插入
            if (isSdsRepresentableAsLongLong(value,&llval) == C_OK) {
                uint8_t success = 0;
                //会进行二分查找,实现去重
                subject->ptr = intsetAdd(subject->ptr,llval,&success);
                if (success) {
                    /* Convert to regular set when the intset contains
                     * too many entries. */
                    if (intsetLen(subject->ptr) > server.set_max_intset_entries)
                        setTypeConvert(subject,OBJ_ENCODING_HT);
                    return 1;
                }
            }
    
            //如果插入的值不是整型,则需要进行类型转换,将intset转为dict
            else {
                /* Failed to get integer from object, convert to regular set. */
                setTypeConvert(subject,OBJ_ENCODING_HT);
    
                /* The set *was* an intset and this value is not integer
                 * encodable, so dictAdd should always work. */
                serverAssert(dictAdd(subject->ptr,sdsdup(value),NULL) == DICT_OK);
                return 1;
            }
        } else {
            serverPanic("Unknown set encoding");
        }
        return 0;
    }
    

    2. 删除元素

    //srem命令 srem key member [member...]
    void sremCommand(client *c) {
        robj *set;
        int j, deleted = 0, keyremoved = 0;
    
        //根据key查找set
        if ((set = lookupKeyWriteOrReply(c,c->argv[1],shared.czero)) == NULL ||
            checkType(c,set,OBJ_SET)) return;
    
        //删除member
        for (j = 2; j < c->argc; j++) {
            if (setTypeRemove(set,c->argv[j]->ptr)) {
                deleted++;
                if (setTypeSize(set) == 0) {
                    dbDelete(c->db,c->argv[1]);
                    keyremoved = 1;
                    break;
                }
            }
        }
    
        if (deleted) {
            signalModifiedKey(c->db,c->argv[1]);
            notifyKeyspaceEvent(NOTIFY_SET,"srem",c->argv[1],c->db->id);
            //集合为空,需要移除key
            if (keyremoved)
                notifyKeyspaceEvent(NOTIFY_GENERIC,"del",c->argv[1],
                                    c->db->id);
            server.dirty += deleted;
        }
        addReplyLongLong(c,deleted);
    }
    
    //移除元素
    int setTypeRemove(robj *setobj, sds value) {
        long long llval;
        //dict类型
        if (setobj->encoding == OBJ_ENCODING_HT) {
            if (dictDelete(setobj->ptr,value) == DICT_OK) {
                if (htNeedsResize(setobj->ptr)) dictResize(setobj->ptr);
                return 1;
            }
        }
        //intset类型
        else if (setobj->encoding == OBJ_ENCODING_INTSET) {
            if (isSdsRepresentableAsLongLong(value,&llval) == C_OK) {
                int success;
                setobj->ptr = intsetRemove(setobj->ptr,llval,&success);
                if (success) return 1;
            }
        } else {
            serverPanic("Unknown set encoding");
        }
        return 0;
    }
    
    //spop命令
    void spopCommand(client *c) {
        robj *set, *ele, *aux;
        sds sdsele;
        int64_t llele;
        int encoding;
    
        //带有count的
        if (c->argc == 3) {
            spopWithCountCommand(c);
            return;
        } else if (c->argc > 3) {
            addReply(c,shared.syntaxerr);
            return;
        }
    
        /* Make sure a key with the name inputted exists, and that it's type is
         * indeed a set */
        if ((set = lookupKeyWriteOrReply(c,c->argv[1],shared.nullbulk)) == NULL ||
            checkType(c,set,OBJ_SET)) return;
    
        /* Get a random element from the set */
        encoding = setTypeRandomElement(set,&sdsele,&llele);
    
        /* Remove the element from the set */
        //intset
        if (encoding == OBJ_ENCODING_INTSET) {
            ele = createStringObjectFromLongLong(llele);
            set->ptr = intsetRemove(set->ptr,llele,NULL);
        }
        //dict
        else {
            ele = createStringObject(sdsele,sdslen(sdsele));
            setTypeRemove(set,ele->ptr);
        }
    
        notifyKeyspaceEvent(NOTIFY_SET,"spop",c->argv[1],c->db->id);
    
        /* Replicate/AOF this command as an SREM operation */
        aux = createStringObject("SREM",4);
        rewriteClientCommandVector(c,3,aux,c->argv[1],ele);
        decrRefCount(aux);
    
        /* Add the element to the reply */
        addReplyBulk(c,ele);
        decrRefCount(ele);
    
        /* Delete the set if it's empty */
        if (setTypeSize(set) == 0) {
            dbDelete(c->db,c->argv[1]);
            notifyKeyspaceEvent(NOTIFY_GENERIC,"del",c->argv[1],c->db->id);
        }
    
        /* Set has been modified */
        signalModifiedKey(c->db,c->argv[1]);
        server.dirty++;
    }
    
    //spop命令  spop key [count] 移除count个元素
    void spopWithCountCommand(client *c) {
        long l;
        unsigned long count, size;
        robj *set;
    
        /* Get the count argument */
        if (getLongFromObjectOrReply(c,c->argv[2],&l,NULL) != C_OK) return;
        if (l >= 0) {
            count = (unsigned long) l;
        } else {
            addReply(c,shared.outofrangeerr);
            return;
        }
    
        /* Make sure a key with the name inputted exists, and that it's type is
         * indeed a set. Otherwise, return nil */
        if ((set = lookupKeyReadOrReply(c,c->argv[1],shared.emptymultibulk))
            == NULL || checkType(c,set,OBJ_SET)) return;
    
        /* If count is zero, serve an empty multibulk ASAP to avoid special
         * cases later. */
        if (count == 0) {
            addReply(c,shared.emptymultibulk);
            return;
        }
    
        size = setTypeSize(set);
    
        /* Generate an SPOP keyspace notification */
        notifyKeyspaceEvent(NOTIFY_SET,"spop",c->argv[1],c->db->id);
        server.dirty += count;
    
        /* CASE 1:
         * The number of requested elements is greater than or equal to
         * the number of elements inside the set: simply return the whole set. */
        //将要移除的个数和集合大小对比
        //如果移除个数大于等于集合大小,则直接移除key
        if (count >= size) {
            /* We just return the entire set */
            sunionDiffGenericCommand(c,c->argv+1,1,NULL,SET_OP_UNION);
    
            /* Delete the set as it is now empty */
            dbDelete(c->db,c->argv[1]);
            notifyKeyspaceEvent(NOTIFY_GENERIC,"del",c->argv[1],c->db->id);
    
            /* Propagate this command as an DEL operation */
            rewriteClientCommandVector(c,2,shared.del,c->argv[1]);
            signalModifiedKey(c->db,c->argv[1]);
            server.dirty++;
            return;
        }
    
        /* Case 2 and 3 require to replicate SPOP as a set of SREM commands.
         * Prepare our replication argument vector. Also send the array length
         * which is common to both the code paths. */
        robj *propargv[3];
        propargv[0] = createStringObject("SREM",4);
        propargv[1] = c->argv[1];
        addReplyMultiBulkLen(c,count);
    
        /* Common iteration vars. */
        sds sdsele;
        robj *objele;
        int encoding;
        int64_t llele;
        unsigned long remaining = size-count; /* Elements left after SPOP. */
    
        /* If we are here, the number of requested elements is less than the
         * number of elements inside the set. Also we are sure that count < size.
         * Use two different strategies.
         *
         * CASE 2: The number of elements to return is small compared to the
         * set size. We can just extract random elements and return them to
         * the set. */
        //如果要移除的元素较少,则直接操作原集合
        if (remaining*SPOP_MOVE_STRATEGY_MUL > count) {
            while(count--) {
                /* Emit and remove. */
                encoding = setTypeRandomElement(set,&sdsele,&llele);
                if (encoding == OBJ_ENCODING_INTSET) {
                    addReplyBulkLongLong(c,llele);
                    objele = createStringObjectFromLongLong(llele);
                    set->ptr = intsetRemove(set->ptr,llele,NULL);
                } else {
                    addReplyBulkCBuffer(c,sdsele,sdslen(sdsele));
                    objele = createStringObject(sdsele,sdslen(sdsele));
                    setTypeRemove(set,sdsele);
                }
    
                /* Replicate/AOF this command as an SREM operation */
                propargv[2] = objele;
                alsoPropagate(server.sremCommand,c->db->id,propargv,3,
                    PROPAGATE_AOF|PROPAGATE_REPL);
                decrRefCount(objele);
            }
        }
    
        //若要移除的元素较多,接近于原集合,则取出不被移除的部分,建立一个新集合,再释放原来的集合
        else {
        /* CASE 3: The number of elements to return is very big, approaching
         * the size of the set itself. After some time extracting random elements
         * from such a set becomes computationally expensive, so we use
         * a different strategy, we extract random elements that we don't
         * want to return (the elements that will remain part of the set),
         * creating a new set as we do this (that will be stored as the original
         * set). Then we return the elements left in the original set and
         * release it. */
            robj *newset = NULL;
    
            /* Create a new set with just the remaining elements. */
            while(remaining--) {
                encoding = setTypeRandomElement(set,&sdsele,&llele);
                if (encoding == OBJ_ENCODING_INTSET) {
                    sdsele = sdsfromlonglong(llele);
                } else {
                    sdsele = sdsdup(sdsele);
                }
                if (!newset) newset = setTypeCreate(sdsele);
                setTypeAdd(newset,sdsele);
                setTypeRemove(set,sdsele);
                sdsfree(sdsele);
            }
    
            /* Transfer the old set to the client. */
            setTypeIterator *si;
            si = setTypeInitIterator(set);
            while((encoding = setTypeNext(si,&sdsele,&llele)) != -1) {
                if (encoding == OBJ_ENCODING_INTSET) {
                    addReplyBulkLongLong(c,llele);
                    objele = createStringObjectFromLongLong(llele);
                } else {
                    addReplyBulkCBuffer(c,sdsele,sdslen(sdsele));
                    objele = createStringObject(sdsele,sdslen(sdsele));
                }
    
                /* Replicate/AOF this command as an SREM operation */
                propargv[2] = objele;
                alsoPropagate(server.sremCommand,c->db->id,propargv,3,
                    PROPAGATE_AOF|PROPAGATE_REPL);
                decrRefCount(objele);
            }
            setTypeReleaseIterator(si);
    
            /* Assign the new set as the key value. */
            dbOverwrite(c->db,c->argv[1],newset);
        }
    
        /* Don't propagate the command itself even if we incremented the
         * dirty counter. We don't want to propagate an SPOP command since
         * we propagated the command as a set of SREMs operations using
         * the alsoPropagate() API. */
        decrRefCount(propargv[0]);
        preventCommandPropagation(c);
        signalModifiedKey(c->db,c->argv[1]);
        server.dirty++;
    }
    

    根据要移除的元素个数和集合的大小考量。
    要移除的元素个数较多,则取出不移除的部分,再直接移除原集合。
    要移除的元素个数较少,则直接操作原集合。

    3. 集合操作(交、并、差)

    //sinter命令 sinter key [key...]
    void sinterCommand(client *c) {
        sinterGenericCommand(c,c->argv+1,c->argc-1,NULL);
    }
    
    //sinterstore命令 sinterstore destination key [key...]
    //取完交集并存入一个新集合c->argv[1]中
    void sinterstoreCommand(client *c) {
        sinterGenericCommand(c,c->argv+2,c->argc-2,c->argv[1]);
    }
    
    //取交集通用实现
    void sinterGenericCommand(client *c, robj **setkeys,
                              unsigned long setnum, robj *dstkey) {
        robj **sets = zmalloc(sizeof(robj*)*setnum);
        setTypeIterator *si;
        robj *dstset = NULL;
        sds elesds;
        int64_t intobj;
        void *replylen = NULL;
        unsigned long j, cardinality = 0;
        int encoding;
    
        //setnum 集合的个数
        //dstkey 交集存入的新集合
        for (j = 0; j < setnum; j++) {
            robj *setobj = dstkey ?
                lookupKeyWrite(c->db,setkeys[j]) :
                lookupKeyRead(c->db,setkeys[j]);
            //如果参与做交集操作的集合不存在
            if (!setobj) {
                zfree(sets);
                //如果交集存入的新集合存在
                if (dstkey) {
                    //删除掉
                    if (dbDelete(c->db,dstkey)) {
                        signalModifiedKey(c->db,dstkey);
                        server.dirty++;
                    }
                    addReply(c,shared.czero);
                } else {
                    addReply(c,shared.emptymultibulk);
                }
                return;
            }
            if (checkType(c,setobj,OBJ_SET)) {
                zfree(sets);
                return;
            }
            sets[j] = setobj;
        }
        /* Sort sets from the smallest to largest, this will improve our
         * algorithm's performance */
        qsort(sets,setnum,sizeof(robj*),qsortCompareSetsByCardinality);
    
        /* The first thing we should output is the total number of elements...
         * since this is a multi-bulk write, but at this stage we don't know
         * the intersection set size, so we use a trick, append an empty object
         * to the output list and save the pointer to later modify it with the
         * right length */
        if (!dstkey) {
            replylen = addDeferredMultiBulkLength(c);
        } else {
            /* If we have a target key where to store the resulting set
             * create this key with an empty set inside */
            dstset = createIntsetObject();
        }
    
        /* Iterate all the elements of the first (smallest) set, and test
         * the element against all the other sets, if at least one set does
         * not include the element it is discarded */
        si = setTypeInitIterator(sets[0]);
        //遍历第一个集合set
        while((encoding = setTypeNext(si,&elesds,&intobj)) != -1) {
            //和其它集合取交集,通过查找其它集合中是否都存在当前元素
            for (j = 1; j < setnum; j++) {
                if (sets[j] == sets[0]) continue;
                //intset
                if (encoding == OBJ_ENCODING_INTSET) {
                    /* intset with intset is simple... and fast */
                    //都是intset
                    if (sets[j]->encoding == OBJ_ENCODING_INTSET &&
                        !intsetFind((intset*)sets[j]->ptr,intobj))
                    {
                        break;
                    /* in order to compare an integer with an object we
                     * have to use the generic function, creating an object
                     * for this */
                    }
                    //有的是dict
                    else if (sets[j]->encoding == OBJ_ENCODING_HT) {
                        elesds = sdsfromlonglong(intobj);
                        if (!setTypeIsMember(sets[j],elesds)) {
                            sdsfree(elesds);
                            break;
                        }
                        sdsfree(elesds);
                    }
                }
                //dict
                else if (encoding == OBJ_ENCODING_HT) {
                    if (!setTypeIsMember(sets[j],elesds)) {
                        break;
                    }
                }
            }
    
            /* Only take action when all sets contain the member */
            if (j == setnum) {
                if (!dstkey) {
                    if (encoding == OBJ_ENCODING_HT)
                        addReplyBulkCBuffer(c,elesds,sdslen(elesds));
                    else
                        addReplyBulkLongLong(c,intobj);
                    cardinality++;
                } else {
                    if (encoding == OBJ_ENCODING_INTSET) {
                        elesds = sdsfromlonglong(intobj);
                        setTypeAdd(dstset,elesds);
                        sdsfree(elesds);
                    } else {
                        setTypeAdd(dstset,elesds);
                    }
                }
            }
        }
        setTypeReleaseIterator(si);
    
        //如果需要将交集结果存入一个新集合,且非空
        if (dstkey) {
            /* Store the resulting set into the target, if the intersection
             * is not an empty set. */
            int deleted = dbDelete(c->db,dstkey);
            if (setTypeSize(dstset) > 0) {
                dbAdd(c->db,dstkey,dstset);
                addReplyLongLong(c,setTypeSize(dstset));
                notifyKeyspaceEvent(NOTIFY_SET,"sinterstore",
                    dstkey,c->db->id);
            } else {
                decrRefCount(dstset);
                addReply(c,shared.czero);
                if (deleted)
                    notifyKeyspaceEvent(NOTIFY_GENERIC,"del",
                        dstkey,c->db->id);
            }
            signalModifiedKey(c->db,dstkey);
            server.dirty++;
        } else {
            setDeferredMultiBulkLength(c,replylen,cardinality);
        }
        zfree(sets);
    }
    

    交集,主要通过辅助集合及取出一个集合遍历来和其它集合比较。

    //sunion命令 sunion key [key...]
    void sunionCommand(client *c) {
        sunionDiffGenericCommand(c,c->argv+1,c->argc-1,NULL,SET_OP_UNION);
    }
    
    //sunionstore命令 sunionstore destination key [key...]
    void sunionstoreCommand(client *c) {
        sunionDiffGenericCommand(c,c->argv+2,c->argc-2,c->argv[1],SET_OP_UNION);
    }
    
    //sdiff命令 sdiff key [key...]
    void sdiffCommand(client *c) {
        sunionDiffGenericCommand(c,c->argv+1,c->argc-1,NULL,SET_OP_DIFF);
    }
    
    //sdiffstore命令 sdiffstore destination key [key...]
    void sdiffstoreCommand(client *c) {
        sunionDiffGenericCommand(c,c->argv+2,c->argc-2,c->argv[1],SET_OP_DIFF);
    }
    
    //并集和差集的通用实现
    void sunionDiffGenericCommand(client *c, robj **setkeys, int setnum,
                                  robj *dstkey, int op) {
        robj **sets = zmalloc(sizeof(robj*)*setnum);
        setTypeIterator *si;
        robj *dstset = NULL;
        sds ele;
        int j, cardinality = 0;
        int diff_algo = 1;
    
        //dstkey 并集或者差集要存入的目标集合
        for (j = 0; j < setnum; j++) {
            robj *setobj = dstkey ?
                lookupKeyWrite(c->db,setkeys[j]) :
                lookupKeyRead(c->db,setkeys[j]);
            if (!setobj) {
                sets[j] = NULL;
                continue;
            }
            if (checkType(c,setobj,OBJ_SET)) {
                zfree(sets);
                return;
            }
            sets[j] = setobj;
        }
    
        /* Select what DIFF algorithm to use.
         *
         * Algorithm 1 is O(N*M) where N is the size of the element first set
         * and M the total number of sets.
         *
         * Algorithm 2 is O(N) where N is the total number of elements in all
         * the sets.
         *
         * We compute what is the best bet with the current input here. */
        if (op == SET_OP_DIFF && sets[0]) {
            long long algo_one_work = 0, algo_two_work = 0;
    
            for (j = 0; j < setnum; j++) {
                if (sets[j] == NULL) continue;
    
                algo_one_work += setTypeSize(sets[0]);
                algo_two_work += setTypeSize(sets[j]);
            }
    
            /* Algorithm 1 has better constant times and performs less operations
             * if there are elements in common. Give it some advantage. */
            algo_one_work /= 2;
            diff_algo = (algo_one_work <= algo_two_work) ? 1 : 2;
    
            if (diff_algo == 1 && setnum > 1) {
                /* With algorithm 1 it is better to order the sets to subtract
                 * by decreasing size, so that we are more likely to find
                 * duplicated elements ASAP. */
                qsort(sets+1,setnum-1,sizeof(robj*),
                    qsortCompareSetsByRevCardinality);
            }
        }
    
        /* We need a temp set object to store our union. If the dstkey
         * is not NULL (that is, we are inside an SUNIONSTORE operation) then
         * this set object will be the resulting object to set into the target key*/
        //辅助集合
        dstset = createIntsetObject();
    
        //取并集
        if (op == SET_OP_UNION) {
            /* Union is trivial, just add every element of every set to the
             * temporary set. */
            //遍历每一个set的所有元素,加入到dstset即可。
            for (j = 0; j < setnum; j++) {
                //空集合略过
                if (!sets[j]) continue; /* non existing keys are like empty sets */
    
                si = setTypeInitIterator(sets[j]);
                while((ele = setTypeNextObject(si)) != NULL) {
                    if (setTypeAdd(dstset,ele)) cardinality++;
                    sdsfree(ele);
                }
                setTypeReleaseIterator(si);
            }
        }
        //取差集,算法1
        else if (op == SET_OP_DIFF && sets[0] && diff_algo == 1) {
            /* DIFF Algorithm 1:
             *
             * We perform the diff by iterating all the elements of the first set,
             * and only adding it to the target set if the element does not exist
             * into all the other sets.
             *
             * This way we perform at max N*M operations, where N is the size of
             * the first set, and M the number of sets. */
            //取出第一个集合
            si = setTypeInitIterator(sets[0]);
            //遍历第一个集合
            while((ele = setTypeNextObject(si)) != NULL) {
                //将第一个集合的元素和其它所有集合的比较
                for (j = 1; j < setnum; j++) {
                    if (!sets[j]) continue; /* no key is an empty set. */
                    if (sets[j] == sets[0]) break; /* same set! */
                    if (setTypeIsMember(sets[j],ele)) break;
                }
                //只有这个元素是没有交集的才可以加入dstset中
                if (j == setnum) {
                    /* There is no other set with this element. Add it. */
                    setTypeAdd(dstset,ele);
                    cardinality++;
                }
                sdsfree(ele);
            }
            setTypeReleaseIterator(si);
        }
        //取差集,算法2
        else if (op == SET_OP_DIFF && sets[0] && diff_algo == 2) {
            /* DIFF Algorithm 2:
             *
             * Add all the elements of the first set to the auxiliary set.
             * Then remove all the elements of all the next sets from it.
             *
             * This is O(N) where N is the sum of all the elements in every
             * set. */
            //将第一个集合的所有元素加入到辅助集合中,然后跟第二个集合对比,移除相同的元素,以此类推。
            //此法应该是参考的单个集合异或去重大法
            for (j = 0; j < setnum; j++) {
                if (!sets[j]) continue; /* non existing keys are like empty sets */
    
                si = setTypeInitIterator(sets[j]);
                while((ele = setTypeNextObject(si)) != NULL) {
                    if (j == 0) {
                        if (setTypeAdd(dstset,ele)) cardinality++;
                    } else {
                        if (setTypeRemove(dstset,ele)) cardinality--;
                    }
                    sdsfree(ele);
                }
                setTypeReleaseIterator(si);
    
                /* Exit if result set is empty as any additional removal
                 * of elements will have no effect. */
                if (cardinality == 0) break;
            }
        }
    
        /* Output the content of the resulting set, if not in STORE mode */
        if (!dstkey) {
            addReplyMultiBulkLen(c,cardinality);
            si = setTypeInitIterator(dstset);
            while((ele = setTypeNextObject(si)) != NULL) {
                addReplyBulkCBuffer(c,ele,sdslen(ele));
                sdsfree(ele);
            }
            setTypeReleaseIterator(si);
            decrRefCount(dstset);
        } else {
            /* If we have a target key where to store the resulting set
             * create this key with the result set inside */
            int deleted = dbDelete(c->db,dstkey);
            if (setTypeSize(dstset) > 0) {
                dbAdd(c->db,dstkey,dstset);
                addReplyLongLong(c,setTypeSize(dstset));
                notifyKeyspaceEvent(NOTIFY_SET,
                    op == SET_OP_UNION ? "sunionstore" : "sdiffstore",
                    dstkey,c->db->id);
            } else {
                decrRefCount(dstset);
                addReply(c,shared.czero);
                if (deleted)
                    notifyKeyspaceEvent(NOTIFY_GENERIC,"del",
                        dstkey,c->db->id);
            }
            signalModifiedKey(c->db,dstkey);
            server.dirty++;
        }
        zfree(sets);
    }
    

    并集,直接将所有集合的所有元素加入到辅助集合即可。
    差集,用了2种算法,1种通过拿出一个集合并和其它集合比较取差集,另外1种是通过集合之间两两合并去除重复元素。

    4. 随机返回元素

    //srandmember命令
    void srandmemberCommand(client *c) {
        robj *set;
        sds ele;
        int64_t llele;
        int encoding;
    
        //带有count的
        if (c->argc == 3) {
            srandmemberWithCountCommand(c);
            return;
        } else if (c->argc > 3) {
            addReply(c,shared.syntaxerr);
            return;
        }
    
        if ((set = lookupKeyReadOrReply(c,c->argv[1],shared.nullbulk)) == NULL ||
            checkType(c,set,OBJ_SET)) return;
    
        encoding = setTypeRandomElement(set,&ele,&llele);
        if (encoding == OBJ_ENCODING_INTSET) {
            addReplyBulkLongLong(c,llele);
        } else {
            addReplyBulkCBuffer(c,ele,sdslen(ele));
        }
    }
    
    #define SRANDMEMBER_SUB_STRATEGY_MUL 3
    
    //srandmember命令 srandmember key [count]
    void srandmemberWithCountCommand(client *c) {
        long l;
        unsigned long count, size;
        int uniq = 1;
        robj *set;
        sds ele;
        int64_t llele;
        int encoding;
    
        dict *d;
    
        if (getLongFromObjectOrReply(c,c->argv[2],&l,NULL) != C_OK) return;
        if (l >= 0) {
            count = (unsigned long) l;
        } else {
            /* A negative count means: return the same elements multiple times
             * (i.e. don't remove the extracted element after every extraction). */
            count = -l;
            uniq = 0;
        }
    
        if ((set = lookupKeyReadOrReply(c,c->argv[1],shared.emptymultibulk))
            == NULL || checkType(c,set,OBJ_SET)) return;
        size = setTypeSize(set);
    
        /* If count is zero, serve it ASAP to avoid special cases later. */
        if (count == 0) {
            addReply(c,shared.emptymultibulk);
            return;
        }
    
        /* CASE 1: The count was negative, so the extraction method is just:
         * "return N random elements" sampling the whole set every time.
         * This case is trivial and can be served without auxiliary data
         * structures. */
        //获取个数为负数的情况
        if (!uniq) {
            addReplyMultiBulkLen(c,count);
            //随机count次
            while(count--) {
                encoding = setTypeRandomElement(set,&ele,&llele);
                if (encoding == OBJ_ENCODING_INTSET) {
                    addReplyBulkLongLong(c,llele);
                } else {
                    addReplyBulkCBuffer(c,ele,sdslen(ele));
                }
            }
            return;
        }
    
        /* CASE 2:
         * The number of requested elements is greater than the number of
         * elements inside the set: simply return the whole set. */
        //如果count大于集合大小,那么直接返回整个集合
        if (count >= size) {
            sunionDiffGenericCommand(c,c->argv+1,1,NULL,SET_OP_UNION);
            return;
        }
    
        /* For CASE 3 and CASE 4 we need an auxiliary dictionary. */
        //创建一个辅助的dict
        d = dictCreate(&objectKeyPointerValueDictType,NULL);
    
        /* CASE 3:
         * The number of elements inside the set is not greater than
         * SRANDMEMBER_SUB_STRATEGY_MUL times the number of requested elements.
         * In this case we create a set from scratch with all the elements, and
         * subtract random elements to reach the requested number of elements.
         *
         * This is done because if the number of requsted elements is just
         * a bit less than the number of elements in the set, the natural approach
         * used into CASE 3 is highly inefficient. */
        //要获取的元素较多的情况下
        if (count*SRANDMEMBER_SUB_STRATEGY_MUL > size) {
            setTypeIterator *si;
    
            /* Add all the elements into the temporary dictionary. */
            si = setTypeInitIterator(set);
            //把所有set中的元素放到dict中
            while((encoding = setTypeNext(si,&ele,&llele)) != -1) {
                int retval = DICT_ERR;
    
                if (encoding == OBJ_ENCODING_INTSET) {
                    retval = dictAdd(d,createStringObjectFromLongLong(llele),NULL);
                } else {
                    retval = dictAdd(d,createStringObject(ele,sdslen(ele)),NULL);
                }
                serverAssert(retval == DICT_OK);
            }
            setTypeReleaseIterator(si);
            serverAssert(dictSize(d) == size);
    
            /* Remove random elements to reach the right count. */
            //随机移除size-count个元素
            while(size > count) {
                dictEntry *de;
    
                de = dictGetRandomKey(d);
                dictDelete(d,dictGetKey(de));
                size--;
            }
        }
    
        /* CASE 4: We have a big set compared to the requested number of elements.
         * In this case we can simply get random elements from the set and add
         * to the temporary set, trying to eventually get enough unique elements
         * to reach the specified count. */
        //要获取的元素个数较少的情况
        else {
            unsigned long added = 0;
            robj *objele;
            //随机获取count个元素存放到dict中
            while(added < count) {
                encoding = setTypeRandomElement(set,&ele,&llele);
                if (encoding == OBJ_ENCODING_INTSET) {
                    objele = createStringObjectFromLongLong(llele);
                } else {
                    objele = createStringObject(ele,sdslen(ele));
                }
                /* Try to add the object to the dictionary. If it already exists
                 * free it, otherwise increment the number of objects we have
                 * in the result dictionary. */
                if (dictAdd(d,objele,NULL) == DICT_OK)
                    added++;
                else
                    decrRefCount(objele);
            }
        }
    
        //返回dict
        /* CASE 3 & 4: send the result to the user. */
        {
            dictIterator *di;
            dictEntry *de;
    
            addReplyMultiBulkLen(c,count);
            di = dictGetIterator(d);
            while((de = dictNext(di)) != NULL)
                addReplyBulk(c,dictGetKey(de));
            dictReleaseIterator(di);
            dictRelease(d);
        }
    }
    

    根据要随机取的个数和集合的个数考量。
    要取的元素较多,则移除较少部分元素。
    要取的元素较少,则直接取。

    相关文章

      网友评论

          本文标题:t_set.c

          本文链接:https://www.haomeiwen.com/subject/orbnwqtx.html