密度聚类算法的实现(Python)

作者: 牛顿学计算机 | 来源:发表于2018-11-02 22:41 被阅读5次

    算法流程

    ~~~~~算法的原理可参考周志华老师的《机器学习》,概念很简单,也总结了算法的流程。
    ~~~~~下面就看一下算法的流程

    密度聚类算法流程.PNG

    完整代码

    ~~~~~代码比较复杂,但是完全对照算法的思路,就可看懂我的代码,一步一步按照思路理解算法。

    import numpy as np
    from numpy import *
    import math
    import matplotlib.pyplot as plt
    
    def load_data_set(file_name):
        D = []
        fr = open(file_name)
    
        for line in fr.readlines():
            line_data = line.strip().split()
            D.append([float(line_data[0]), float(line_data[1])])
        return D
    
    def get_dist(x1, x2):
        n = len(x1)
        dist = 0
    
        for i in range(n):
            dist += (abs(float(x1[i] - x2[i]))) ** 2
        return sqrt(dist)
    
    def get_partision(x1, X, sigma):
       m = len(X)
       N = []
    
       for i in range(m):
           dist = get_dist(x1, X[i])
           print(X[i])
           if(dist <= sigma):
               N.append(X[i])
       return N
    
    def list_compare(x1_list, x2_list):
        n1 = len(x1_list)
        n2 = len(x2_list)
        equal = True
    
        if(n1 != n2):
            return False
        for i in range(n1):
            if(x1_list[i] != x2_list[i]):
                equal = False
        return equal
    
    def get_intersection(x1, x2):
        n = len(x2)
        m = len(x1)
        W = []
    
        for i in range(m):
            for j in range(n):
                if(list_compare(x1[i], x2[j]) == True):
                    W.append(x1[i])
        return W
    
    def delete_aggregate(X, x1):
        n = len(x1)
        m = len(X)
        del_list = []
    
        for i in range(n):
            for j in range(m):
                if(list_compare(x1[i], X[j]) == True):
                    del_list.append(X[j])
        for j in range(len(del_list)):
            print("del_list[", j, "] = ", del_list[j])
            X.remove([del_list[j][0], del_list[j][1]])
        return X
    
    def dbscan(X, sigma, min_pts):
        m = len(X)
        W = []
        C = []
    
        for i in range(m):
           N = get_partision(X[i], X, sigma)
           if(len(N) >= min_pts):
               W.append(X[i])
        k = 0
        gama = copy(X).tolist()
        while(len(W) != 0):
            old_gama = copy(gama).tolist()
            index = random.randint(0, len(W))
            Q = []
            Q.append(W[index])
            del W[index]
            while(len(Q) != 0):
                q = copy(Q[0]).tolist()
                del Q[0]
                N = get_partision(q, X, sigma)
                if(len(N) >= min_pts):
                    deta = get_intersection(N, gama)
                    for i in range(len(deta)):
                        Q.append(deta[i])
                    gama = delete_aggregate(gama, deta)
            k += 1
            Ck = delete_aggregate(old_gama, gama)
            C.append(Ck)
            W = delete_aggregate(W, Ck)
        return C
    
    def show_experiment_plot(D):
        X1 = mat(array(D[0]))
        X2 = mat(array(D[1]))
        X3 = mat(array(D[2]))
        X4 = mat(array(D[3]))
    
        plt.plot(X1[:, 0], X1[:, 1], "or")
        plt.plot(X2[:, 0], X2[:, 1], "ob")
        plt.plot(X3[:, 0], X3[:, 1], "og")
        plt.plot(X4[:, 0], X4[:, 1], "oy")
    
        plt.xlabel("X1")
        plt.ylabel("X2")
        plt.show()
    
    if __name__ == "__main__":
        D = load_data_set("data_set.txt")
        C = dbscan(D, 0.11, 5)
        print(D)
        show_experiment_plot(C)
    

    输入数据

    ~~~本文的输入数据和书上的输入数据是一样的

    0.697 0.460
    0.774 0.376
    0.634 0.264
    0.608 0.318
    0.556 0.215
    0.403 0.237
    0.481 0.149
    0.437 0.211
    0.666 0.091
    0.243 0.267
    0.245 0.057
    0.343 0.099
    0.639 0.161
    0.657 0.198
    0.360 0.370
    0.593 0.042
    0.719 0.103
    0.359 0.188
    0.339 0.241
    0.282 0.257
    0.748 0.232
    0.714 0.346
    0.483 0.312
    0.478 0.437
    0.525 0.369
    0.751 0.489
    0.532 0.472
    0.473 0.376
    0.725 0.445
    0.446 0.459
    

    ~~~~~书上的输入数据

    输入数据.PNG
    初始化参数.PNG

    实验结果

    ~~~~~本文的实验结果

    实验结果.PNG
    书上实验结果.PNG

    结论

    ~~~~~两者的实验结果对比,完全一模一样。本文结果中没有画出的点则是噪声,画出的则是最终的聚类结果。

    相关文章

      网友评论

        本文标题:密度聚类算法的实现(Python)

        本文链接:https://www.haomeiwen.com/subject/onpdtqtx.html