【Python】一个类似区域生长思想的聚类过程

作者: 吵吵人 | 来源:发表于2021-04-15 17:09 被阅读0次

【Python】一个类似区域生长思想的聚类过程
机器学习 | kmeans聚类算法学习总结
计算机科学与Python编程导论 15次作业
基于密度的聚类方法
scala类与伴生类、单例
DBSCAN算法应用实例及代码（1）
2018-12-19
机器学习（8）——其他聚类
2019-04-29
2018年6月26日【python学习笔记】

    def get_scope_simple(self, th1, th2, th3):
        """
        由于get_scope太慢，为减少时间，进行了简化
        计算邻域范围：只要符合阈值要求，即聚在一起
        :param th1:紧凑度阈值
        :param th2:面积阈值
        :param th3:相似度阈值
        :return:
        """
        # 为方便操作，将土地利用数据的ORIG_FID设为索引
        self.area.set_index(['ORIG_FID'], drop=False, inplace=True)

        # 增加'scope'存储邻域类别
        if "scope" not in self.vertex.columns:
            self.vertex.insert(self.vertex.shape[1], "scope", -1)
        else:
            self.vertex["scope"] = -1

        k = -1
        for i in range(self.vertex.shape[0]):
            if self.vertex.iloc[i].loc["scope"] != -1:  # 已经被聚类分到了某一个类簇
                continue

            # 获取该类邻居
            neis = [_ for _ in self.vertex.iloc[i].loc['Neighbors']]
            subdataset = [g for g in neis if self.vertex.loc[g, 'scope'] == -1]

            if not len(subdataset):
                continue

            # 以下聚出一个新类
            sets = []
            sets.append(self.vertex.iloc[i].loc['ORIG_FID'])
            k += 1
            self.vertex.loc[sets[0], 'scope'] = k

            print('邻域范围聚类：{}'.format(k))

            count = 0  # 最大迭代次数，避免无限循环
            while (subdataset):
                if count > N_MAXCOUNT:
                    break

                # # 计算邻域紧凑度、面积标准差和属性相似度
                cmpt = [0.0 for _ in range(len(subdataset))]
                stda = [0.0 for _ in range(len(subdataset))]
                similar = [0.0 for _ in range(len(subdataset))]

                # 计算预选点的紧凑度、 面积和相似度值
                for g in range(len(subdataset)):
                    # 尝试加入该点数据
                    sets.append(subdataset[g])

                    # 逐个计算紧凑度
                    cmpt[g] = self.compact(sets)

                    # 逐个计算面积阈值
                    area = []
                    for it in sets:
                        area.append(self.vertex.loc[it, "myArea"])
                    stda[g] = np.std(area, ddof=1)

                    # 逐个计算相似度
                    similar[g] = self.similarity(sets)

                    sets.pop()

                tag = 0
                selected = []
                for g in range(len(subdataset)):
                    # 选择合适的加进来
                    if cmpt[g] > th1 and stda[g] < th2 and similar[g] < th3:
                        self.vertex.loc[subdataset[g], 'scope'] = k
                        selected.append(subdataset[g])  # 标记在这一轮被处理的元素
                        sets.append(subdataset[g])
                        count += 1

                        # 更新邻域
                        neis = [_ for _ in self.vertex.loc[subdataset[g], 'Neighbors']]
                        newp = [s for s in neis if self.vertex.loc[s, 'scope'] == -1 and self.vertex.loc[
                            s, 'ORIG_FID'] not in subdataset]
                        subdataset.extend(newp)
                        tag = 1

                for item in selected:
                    subdataset.remove(item)

                if tag == 0:
                    # 没有满足条件的邻居，终止循环
                    break  # 停止while这一层循环
            print('邻域范围聚类完毕：类{}-数量{}'.format(k, len(sets)))