美文网首页
hyperloglog求交集

hyperloglog求交集

作者: lyuto | 来源:发表于2017-11-19 18:07 被阅读0次

    有两种方法实现容斥原理,一种是遍历,一种是递归(DFS),对于hyperloglog求交集而言选择遍历。

    第一步初始化 redis

    import redis
    r = redis.Redis(host='localhost', port=6379, db=0)  
    r.pfadd('day1',*['user_1','user_2','user_3','user_4','user_5','user_6','user_7'])
    r.pfadd('day2',*['user_8','user_2','user_3','user_4','user_5','user_6','user_7'])
    r.pfadd('day3',*['user_8','user_9','user_3','user_4','user_5','user_6','user_7'])
    r.pfadd('day4',*['user_8','user_9','user_10','user_4','user_5','user_6','user_7'])
    r.pfadd('day5',*['user_8','user_9','user_10','user_11','user_5','user_6','user_7'])
    r.pfadd('day6',*['user_8','user_9','user_10','user_11','user_12','user_6','user_7'])
    r.pfadd('day7',*['user_8','user_9','user_10','user_11','user_12','user_13','user_7'])
    r.pfadd('day8',*['user_8','user_9','user_10','user_11','user_12','user_13','user_14'])
    

    第二部实现排列函数和归属函数

    #求排列
    from itertools import combinations
    def arrangement(length):
        combins = {}
        for i in range(length):
            combins[i+1] = [c for c in combinations(range(length), (i+1))]
        return combins
    
    #lst1是否包含lst2
    def is_exist_in_tuple(lst1,lst2):
        d1 = {}
        for i in lst1:
            if i in d1:
                d1[i] += 1
            else:
                d1[i] = 1
        d2 = {}
        for i in lst2:
            if i in d2:
                d2[i] += 1
            else:
                d2[i] = 1
        for key,value in d2.items():
            if key not in d1 or value>d1[key]:
                return False
        return True
    

    第三步 实现交集函数

    #combins--排列字典
    #combins_intersection--交集
    #i--第几层
    #j--第几个
    #k--第几个的第几个
    #下面开始计算交集
    #l--同i
    #m--同j
    
    def intersection(lst):
        count_intersection = 0
        combins = arrangement(len(lst))
        combins_intersection = combins
    
        for i in range(len(combins)):
            for j in range(len(combins[i+1])):
                for k in range(len(combins[i+1][j])):
                    r.pfmerge("combins_intersection%s_%s" % (i+1,j), lst[combins[i+1][j][k]])
                
                combins = arrangement(len(lst))
                combins_intersection[i+1][j] = r.pfcount("combins_intersection%s_%s" % (i+1,j))
                
                if(i>0):
                    for l in range((i)):
                        for m in range(len(combins[l+1])):
                            if(is_exist_in_tuple(combins[i+1][j], combins[l+1][m])):
                                combins_intersection[i+1][j] += ((-1)**(l+1))*abs(combins_intersection[l+1][m]) 
                                
            for n in range(len(combins[i+1])):
                combins_intersection[i+1][n] = abs(combins_intersection[i+1][n])
        
        for i in range(len(combins)):
            for j in range(len(combins[i+1])):
                r.delete("combins_intersection%s_%s" % (i+1,j))
        return combins_intersection[i+1][0]
    

    测试

    combins_intersection1 = intersection(["day1","day2","day3","day4","day5","day6","day7","day8"])
    combins_intersection = intersection(["day1","day2","day3","day4","day5","day6","day7"])
    

    看结果输出为

    print combins_intersection
    print "----------------"
    print combins_intersection1
    
    1
    ----------------
    0
    

    大功告成

    相关文章

      网友评论

          本文标题:hyperloglog求交集

          本文链接:https://www.haomeiwen.com/subject/eqdpvxtx.html