美文网首页
图算法-群体发现(Louvain)

图算法-群体发现(Louvain)

作者: linux_hex | 来源:发表于2021-07-13 20:38 被阅读0次

Louvain 算法是基于模块度的社区发现算法

修改:支持输入点id不连续问题

【数据类】

public class FastLouvainEdgeimplements Cloneable{

private static final Logger logger = LoggerFactory.getLogger(FastLouvainEdge.class);

public int v;//v表示连接点的编号,w表示此边的权值

    public double weight;

public int next;//next负责连接和此点相关的边

    public Object clone(){

FastLouvainEdge temp=null;

try{

temp = (FastLouvainEdge)super.clone();//浅复制

        }catch(Exception e) {

logger.error("Exception: ", e);

}

return temp;

}

}

【计算类】

public class FastLouvainCalculatorimplements Cloneable{

private static final Loggerlogger = LoggerFactory.getLogger(FastLouvainCalculator.class);

private MapnodeMapInput =new HashMap<>();

private MapnodeMapOutput =new HashMap<>();

int n;// 结点个数

    int m;// 边数

    int cluster[];// 结点i属于哪个簇

    FastLouvainEdgeedge[];// 邻接表

    int head[];// 头结点下标

    int top;// 已用E的个数

    double resolution;// 1/2m 全局不变

    double node_weight[];// 结点的权值

    double totalEdgeWeight;// 总边权值

    double[]cluster_weight;// 簇的权值

    double eps =1e-14;// 误差

    int global_n;// 最初始的n

    int global_cluster[];// 最后的结果,i属于哪个簇

    FastLouvainEdge[]new_edge;//新的邻接表

    int[]new_head;

int new_top =0;

final int iteration_time =3;// 最大迭代次数

    FastLouvainEdgeglobal_edge[];//全局初始的临接表  只保存一次,永久不变,不参与后期运算

    int global_head[];

int global_top=0;

void addEdge(int u,int v,double weight) {

if(edge[top]==null)

edge[top]=new FastLouvainEdge();

edge[top].v = v;

edge[top].weight = weight;

edge[top].next =head[u];

head[u] =top++;

}

void addNewEdge(int u,int v,double weight) {

if(new_edge[new_top]==null)

new_edge[new_top]=new FastLouvainEdge();

new_edge[new_top].v = v;

new_edge[new_top].weight = weight;

new_edge[new_top].next =new_head[u];

new_head[u] =new_top++;

}

void addGlobalEdge(int u,int v,double weight) {

if(global_edge[global_top]==null)

global_edge[global_top]=new FastLouvainEdge();

global_edge[global_top].v = v;

global_edge[global_top].weight = weight;

global_edge[global_top].next =global_head[u];

global_head[u] =global_top++;

}

public void initData(List nodes, List> links){

for (int i =0; i < nodes.size(); i++) {

nodeMapInput.put(Long.valueOf(String.valueOf(nodes.get(i))), i);

nodeMapOutput.put(i, Long.valueOf(String.valueOf(nodes.get(i))));

}

n =global_n =nodeMapInput.size() +1;

m = links.size() *2;

edge =new FastLouvainEdge[m];

head =new int[n];

for (int i =0; i

head[i] = -1;

top =0;

global_edge=new FastLouvainEdge[m];

global_head =new int[n];

for(int i=0;i

global_head[i]=-1;

global_top=0;

global_cluster =new int[n];

for (int i =0; i

global_cluster[i] = i;

node_weight =new double[n];

totalEdgeWeight =0.0;

double curw =1.0;

links.forEach(item -> {

int u =nodeMapInput.get(Long.valueOf(String.valueOf(item.get("startId"))));

int v =nodeMapInput.get(Long.valueOf(String.valueOf(item.get("endId"))));

addEdge(u, v,curw);

addEdge(v, u,curw);

addGlobalEdge(u,v,curw);

addGlobalEdge(v,u,curw);

totalEdgeWeight +=2 *curw;

node_weight[u] +=curw;

if (u != v) {

node_weight[v] +=curw;

}

});

resolution =1 /totalEdgeWeight;

}

public Map> getResult(){

Map> result =new HashMap<>();

for(int i=0; i

Long vl =nodeMapOutput.get(i);

if(vl ==null){

continue;

}

String v = String.valueOf(vl);

String group = String.valueOf(global_cluster[i]);

List nodes = result.get(group);

if(nodes !=null){

nodes.add(v);

}else {

nodes =new ArrayList<>();

nodes.add(v);

result.put(group, nodes);

}

}

return result;

}

void init_cluster() {

cluster =new int[n];

for (int i =0; i

            cluster[i] = i;

}

}

boolean try_move_i(int i) {// 尝试将i加入某个簇

        double[] edgeWeightPerCluster =new double[n];

for (int j =head[i]; j != -1; j =edge[j].next) {

int l =cluster[edge[j].v];// l是nodeid所在簇的编号

            edgeWeightPerCluster[l] +=edge[j].weight;

}

int bestCluster = -1;// 最优的簇号下标(先默认是自己)

        double maxx_deltaQ =0.0;// 增量的最大值

        boolean[] vis =new boolean[n];

cluster_weight[cluster[i]] -=node_weight[i];

for (int j =head[i]; j != -1; j =edge[j].next) {

int l =cluster[edge[j].v];// l代表領接点的簇号

            if (vis[l])// 一个領接簇只判断一次

                continue;

vis[l] =true;

double cur_deltaQ = edgeWeightPerCluster[l];

cur_deltaQ -=node_weight[i] *cluster_weight[l] *resolution;

if (cur_deltaQ > maxx_deltaQ) {

bestCluster = l;

maxx_deltaQ = cur_deltaQ;

}

edgeWeightPerCluster[l] =0;

}

if (maxx_deltaQ

bestCluster =cluster[i];

}

//System.out.println(maxx_deltaQ);

        cluster_weight[bestCluster] +=node_weight[i];

if (bestCluster !=cluster[i]) {// i成功移动了

            cluster[i] = bestCluster;

return true;

}

return false;

}

void rebuildGraph() {// 重构图

        /// 先对簇进行离散化

        int[] change =new int[n];

int change_size=0;

boolean vis[] =new boolean[n];

for (int i =0; i

if (vis[cluster[i]])

continue;

vis[cluster[i]] =true;

change[change_size++]=cluster[i];

}

int[] index =new int[n];// index[i]代表 i号簇在新图中的结点编号

        for (int i =0; i < change_size; i++)

index[change[i]] = i;

int new_n = change_size;// 新图的大小

        new_edge =new FastLouvainEdge[m];

new_head =new int[new_n];

new_top =0;

double new_node_weight[] =new double[new_n];// 新点权和

        for(int i=0;i

new_head[i]=-1;

ArrayList[] nodeInCluster =new ArrayList[new_n];// 代表每个簇中的节点列表

        for (int i =0; i < new_n; i++)

nodeInCluster[i] =new ArrayList();

for (int i =0; i

nodeInCluster[index[cluster[i]]].add(i);

}

for (int u =0; u < new_n; u++) {// 将同一个簇的挨在一起分析。可以将visindex数组降到一维

            boolean visindex[] =new boolean[new_n];// visindex[v]代表新图中u节点到v的边在临街表是第几个(多了1,为了初始化方便)

            double delta_w[] =new double[new_n];// 边权的增量

            for (int i =0; i < nodeInCluster[u].size(); i++) {

int t = nodeInCluster[u].get(i);

for (int k =head[t]; k != -1; k =edge[k].next) {

int j =edge[k].v;

int v = index[cluster[j]];

if (u != v) {

if (!visindex[v]) {

addNewEdge(u, v,0);

visindex[v] =true;

}

delta_w[v] +=edge[k].weight;

}

}

new_node_weight[u] +=node_weight[t];

}

for (int k =new_head[u]; k != -1; k =new_edge[k].next) {

int v =new_edge[k].v;

new_edge[k].weight = delta_w[v];

}

}

// 更新答案

        int[] new_global_cluster =new int[global_n];

for (int i =0; i

new_global_cluster[i] = index[cluster[global_cluster[i]]];

}

for (int i =0; i

global_cluster[i] = new_global_cluster[i];

}

top =new_top;

for (int i =0; i

edge[i] =new_edge[i];

}

for (int i =0; i < new_n; i++) {

node_weight[i] = new_node_weight[i];

head[i] =new_head[i];

}

n = new_n;

init_cluster();

}

void print() {

for (int i =0; i

System.out.println(i +": " +global_cluster[i]);

}

System.out.println("-------");

}

public void louvain() {

init_cluster();

int count =0;// 迭代次数

    boolean update_flag;// 标记是否发生过更新

    do {// 第一重循环,每次循环rebuild一次图

      //    print(); // 打印簇列表

      count++;

cluster_weight =new double[n];

for (int j =0; j

        cluster_weight[cluster[j]] +=node_weight[j];

}

int[] order =new int[n];// 生成随机序列

      for (int i =0; i

Random random =new Random();

for (int i =0; i

int j = random.nextInt(n);

int temp = order[i];

order[i] = order[j];

order[j] = temp;

}

int enum_time =0;// 枚举次数,到n时代表所有点已经遍历过且无移动的点

      int point =0;// 循环指针

      update_flag =false;// 是否发生过更新的标记

      do {

int i = order[point];

point = (point +1) %n;

if (try_move_i(i)) {// 对i点做尝试

          enum_time =0;

update_flag =true;

}else {

enum_time++;

}

}while (enum_time

if (count >iteration_time || !update_flag)// 如果没更新过或者迭代次数超过指定值

      break;

rebuildGraph();// 重构图

    }while (true);

}

}

【测试代码】

List nodes = (List) MapUtils.getObject(map,"nodes");

List> links = (List) MapUtils.getObject(map,"links");

try {

return Response.success(graphAlgorithmService.getCommunities(nodes, links));

}catch (Exception e) {

logger.error("community error:", e);

return Response.fail();

}

【测试数据map】

{"nodes":["4120","942248","151624"],"links":[{"startId":"942248","endId":"4120"},{"startId":"4120","endId":"151624"}]}

相关文章

  • 图算法-群体发现(Louvain)

    Louvain 算法是基于模块度的社区发现算法 修改:支持输入点id不连续问题 【数据类】 public clas...

  • Louvain算法的介绍与利用Graphx的实现过程

    Louvain是用来进行社会网络挖掘的社区发现算法,属于图的聚类算法。 1. 算法介绍 Louvain是基于模块度...

  • 图算法之Community Detection

    Louvain(Fast-Unfolding)   Louvain算法,又称之为Fast-Unfolding算法,...

  • 社区发现算法-Louvain

    简介 Louvain算法[1]是一种基于多层次优化Modularity[2]的算法,它的优点是快速、准确,被[3]...

  • 2018-11-13

    基于Louvain算法的聚类分析及推荐 一、算法介绍: Louvain算法是基于模块度的一个快速算法,用模块性对社...

  • community detection

    社区发现算法与可视化 from community import community_louvain 因为comm...

  • Louvain算法

    参考:Louvain algorithm for community detection[https://www....

  • Community Detection

    本文结构安排 图聚类简介 正则化割 Louvain 非负矩阵分解(NMF) 其他常见方法 图(graph):是一种...

  • 细胞异质性||Louvain 算法概述

    什么是细胞异质性? 在谈及细胞异质性之前,还是让我们先来看看肿瘤的异质性吧:肿瘤的异质性是恶性肿瘤的特征之一,是指...

  • 好好恋爱

    盖尔沙普利算法给我们的启示 合作博弈处理的是群体群体之间的博弈。艾尔沙普利算法的匹配法则有:艾尔沙普利算法则是博弈...

网友评论

      本文标题:图算法-群体发现(Louvain)

      本文链接:https://www.haomeiwen.com/subject/rrgspltx.html