美文网首页
K-medoids聚类

K-medoids聚类

作者: 微雨旧时歌丶 | 来源:发表于2018-08-17 08:39 被阅读0次

    转自https://blog.csdn.net/ywjun0919/article/details/8692117
    用到的库有 Pycluster, numpy,还有自己编写的距离函数类similaritymeasures.py

    # main.py
    import Pycluster as PC
    from similaritymeasures import Similarity
    import numpy as np
    measures = Similarity()
    
    words = [["highway","traffic_signals"],["highway","railway"],["railway","highway"],["railway","highway"],["name"]]
    #Calcuate the distance matrix 计算距离矩阵
    K = len(words)
    m=np.zeros(K*K)
    m.shape =(K,K)
    for i in range(0,K):
        for j in range(i,K):
            d =1-measures.jaccard_similarity(words[i],words[j])
            m[i][j]=d
            m[j][i]=d
    
    clustermap = PC.kmedoids(m,3,npass=20)[0]  # 归类的向量
    
    ##############
    print(clustermap)
    medoids = {}
    for i in clustermap:
        # print medoids.get(i,0)
        medoids[i] = medoids.get(i,0) + 1
    
    print(medoids.keys())
    
    # similaritymeasures.py
    #!/usr/bin/env python
      
    from math import*
    from decimal import Decimal
      
    class Similarity():
      
      """ Five similarity measures function """
      
      def euclidean_distance(self,x,y):
      
       """ return euclidean distance between two lists """
      
       return sqrt(sum(pow(a-b,2) for a, b in zip(x, y)))
      
      def manhattan_distance(self,x,y):
      
       """ return manhattan distance between two lists """
      
       return sum(abs(a-b) for a,b in zip(x,y))
      
      def minkowski_distance(self,x,y,p_value):
      
       """ return minkowski distance between two lists """
      
       return self.nth_root(sum(pow(abs(a-b),p_value) for a,b in zip(x, y)),p_value)
      
      def nth_root(self,value, n_root):
      
       """ returns the n_root of an value """
      
       root_value = 1/float(n_root)
       return round (Decimal(value) ** Decimal(root_value),3)
      
      def cosine_similarity(self,x,y):
      
       """ return cosine similarity between two lists """
      
       numerator = sum(a*b for a,b in zip(x,y))
       denominator = self.square_rooted(x)*self.square_rooted(y)
       return round(numerator/float(denominator),3)
      
      def square_rooted(self,x):
      
       """ return 3 rounded square rooted value """
      
       return round(sqrt(sum([a*a for a in x])),3)
      
      def jaccard_similarity(self,x,y):
      
       """ returns the jaccard similarity between two lists """
      
       intersection_cardinality = len(set.intersection(*[set(x), set(y)]))
       union_cardinality = len(set.union(*[set(x), set(y)]))
       if union_cardinality == 0:
           return 1
       else:
           return intersection_cardinality/float(union_cardinality)
    

    相关文章

      网友评论

          本文标题:K-medoids聚类

          本文链接:https://www.haomeiwen.com/subject/kwtzbftx.html