Louvain 算法是基于模块度的社区发现算法
修改:支持输入点id不连续问题
【数据类】
public class FastLouvainEdgeimplements Cloneable{
private static final Logger logger = LoggerFactory.getLogger(FastLouvainEdge.class);
public int v;//v表示连接点的编号,w表示此边的权值
public double weight;
public int next;//next负责连接和此点相关的边
public Object clone(){
FastLouvainEdge temp=null;
try{
temp = (FastLouvainEdge)super.clone();//浅复制
}catch(Exception e) {
logger.error("Exception: ", e);
}
return temp;
}
}
【计算类】
public class FastLouvainCalculatorimplements Cloneable{
private static final Loggerlogger = LoggerFactory.getLogger(FastLouvainCalculator.class);
private MapnodeMapInput =new HashMap<>();
private MapnodeMapOutput =new HashMap<>();
int n;// 结点个数
int m;// 边数
int cluster[];// 结点i属于哪个簇
FastLouvainEdgeedge[];// 邻接表
int head[];// 头结点下标
int top;// 已用E的个数
double resolution;// 1/2m 全局不变
double node_weight[];// 结点的权值
double totalEdgeWeight;// 总边权值
double[]cluster_weight;// 簇的权值
double eps =1e-14;// 误差
int global_n;// 最初始的n
int global_cluster[];// 最后的结果,i属于哪个簇
FastLouvainEdge[]new_edge;//新的邻接表
int[]new_head;
int new_top =0;
final int iteration_time =3;// 最大迭代次数
FastLouvainEdgeglobal_edge[];//全局初始的临接表 只保存一次,永久不变,不参与后期运算
int global_head[];
int global_top=0;
void addEdge(int u,int v,double weight) {
if(edge[top]==null)
edge[top]=new FastLouvainEdge();
edge[top].v = v;
edge[top].weight = weight;
edge[top].next =head[u];
head[u] =top++;
}
void addNewEdge(int u,int v,double weight) {
if(new_edge[new_top]==null)
new_edge[new_top]=new FastLouvainEdge();
new_edge[new_top].v = v;
new_edge[new_top].weight = weight;
new_edge[new_top].next =new_head[u];
new_head[u] =new_top++;
}
void addGlobalEdge(int u,int v,double weight) {
if(global_edge[global_top]==null)
global_edge[global_top]=new FastLouvainEdge();
global_edge[global_top].v = v;
global_edge[global_top].weight = weight;
global_edge[global_top].next =global_head[u];
global_head[u] =global_top++;
}
public void initData(List nodes, List> links){
for (int i =0; i < nodes.size(); i++) {
nodeMapInput.put(Long.valueOf(String.valueOf(nodes.get(i))), i);
nodeMapOutput.put(i, Long.valueOf(String.valueOf(nodes.get(i))));
}
n =global_n =nodeMapInput.size() +1;
m = links.size() *2;
edge =new FastLouvainEdge[m];
head =new int[n];
for (int i =0; i
head[i] = -1;
top =0;
global_edge=new FastLouvainEdge[m];
global_head =new int[n];
for(int i=0;i
global_head[i]=-1;
global_top=0;
global_cluster =new int[n];
for (int i =0; i
global_cluster[i] = i;
node_weight =new double[n];
totalEdgeWeight =0.0;
double curw =1.0;
links.forEach(item -> {
int u =nodeMapInput.get(Long.valueOf(String.valueOf(item.get("startId"))));
int v =nodeMapInput.get(Long.valueOf(String.valueOf(item.get("endId"))));
addEdge(u, v,curw);
addEdge(v, u,curw);
addGlobalEdge(u,v,curw);
addGlobalEdge(v,u,curw);
totalEdgeWeight +=2 *curw;
node_weight[u] +=curw;
if (u != v) {
node_weight[v] +=curw;
}
});
resolution =1 /totalEdgeWeight;
}
public Map> getResult(){
Map> result =new HashMap<>();
for(int i=0; i
Long vl =nodeMapOutput.get(i);
if(vl ==null){
continue;
}
String v = String.valueOf(vl);
String group = String.valueOf(global_cluster[i]);
List nodes = result.get(group);
if(nodes !=null){
nodes.add(v);
}else {
nodes =new ArrayList<>();
nodes.add(v);
result.put(group, nodes);
}
}
return result;
}
void init_cluster() {
cluster =new int[n];
for (int i =0; i
cluster[i] = i;
}
}
boolean try_move_i(int i) {// 尝试将i加入某个簇
double[] edgeWeightPerCluster =new double[n];
for (int j =head[i]; j != -1; j =edge[j].next) {
int l =cluster[edge[j].v];// l是nodeid所在簇的编号
edgeWeightPerCluster[l] +=edge[j].weight;
}
int bestCluster = -1;// 最优的簇号下标(先默认是自己)
double maxx_deltaQ =0.0;// 增量的最大值
boolean[] vis =new boolean[n];
cluster_weight[cluster[i]] -=node_weight[i];
for (int j =head[i]; j != -1; j =edge[j].next) {
int l =cluster[edge[j].v];// l代表領接点的簇号
if (vis[l])// 一个領接簇只判断一次
continue;
vis[l] =true;
double cur_deltaQ = edgeWeightPerCluster[l];
cur_deltaQ -=node_weight[i] *cluster_weight[l] *resolution;
if (cur_deltaQ > maxx_deltaQ) {
bestCluster = l;
maxx_deltaQ = cur_deltaQ;
}
edgeWeightPerCluster[l] =0;
}
if (maxx_deltaQ
bestCluster =cluster[i];
}
//System.out.println(maxx_deltaQ);
cluster_weight[bestCluster] +=node_weight[i];
if (bestCluster !=cluster[i]) {// i成功移动了
cluster[i] = bestCluster;
return true;
}
return false;
}
void rebuildGraph() {// 重构图
/// 先对簇进行离散化
int[] change =new int[n];
int change_size=0;
boolean vis[] =new boolean[n];
for (int i =0; i
if (vis[cluster[i]])
continue;
vis[cluster[i]] =true;
change[change_size++]=cluster[i];
}
int[] index =new int[n];// index[i]代表 i号簇在新图中的结点编号
for (int i =0; i < change_size; i++)
index[change[i]] = i;
int new_n = change_size;// 新图的大小
new_edge =new FastLouvainEdge[m];
new_head =new int[new_n];
new_top =0;
double new_node_weight[] =new double[new_n];// 新点权和
for(int i=0;i
new_head[i]=-1;
ArrayList[] nodeInCluster =new ArrayList[new_n];// 代表每个簇中的节点列表
for (int i =0; i < new_n; i++)
nodeInCluster[i] =new ArrayList();
for (int i =0; i
nodeInCluster[index[cluster[i]]].add(i);
}
for (int u =0; u < new_n; u++) {// 将同一个簇的挨在一起分析。可以将visindex数组降到一维
boolean visindex[] =new boolean[new_n];// visindex[v]代表新图中u节点到v的边在临街表是第几个(多了1,为了初始化方便)
double delta_w[] =new double[new_n];// 边权的增量
for (int i =0; i < nodeInCluster[u].size(); i++) {
int t = nodeInCluster[u].get(i);
for (int k =head[t]; k != -1; k =edge[k].next) {
int j =edge[k].v;
int v = index[cluster[j]];
if (u != v) {
if (!visindex[v]) {
addNewEdge(u, v,0);
visindex[v] =true;
}
delta_w[v] +=edge[k].weight;
}
}
new_node_weight[u] +=node_weight[t];
}
for (int k =new_head[u]; k != -1; k =new_edge[k].next) {
int v =new_edge[k].v;
new_edge[k].weight = delta_w[v];
}
}
// 更新答案
int[] new_global_cluster =new int[global_n];
for (int i =0; i
new_global_cluster[i] = index[cluster[global_cluster[i]]];
}
for (int i =0; i
global_cluster[i] = new_global_cluster[i];
}
top =new_top;
for (int i =0; i
edge[i] =new_edge[i];
}
for (int i =0; i < new_n; i++) {
node_weight[i] = new_node_weight[i];
head[i] =new_head[i];
}
n = new_n;
init_cluster();
}
void print() {
for (int i =0; i
System.out.println(i +": " +global_cluster[i]);
}
System.out.println("-------");
}
public void louvain() {
init_cluster();
int count =0;// 迭代次数
boolean update_flag;// 标记是否发生过更新
do {// 第一重循环,每次循环rebuild一次图
// print(); // 打印簇列表
count++;
cluster_weight =new double[n];
for (int j =0; j
cluster_weight[cluster[j]] +=node_weight[j];
}
int[] order =new int[n];// 生成随机序列
for (int i =0; i
Random random =new Random();
for (int i =0; i
int j = random.nextInt(n);
int temp = order[i];
order[i] = order[j];
order[j] = temp;
}
int enum_time =0;// 枚举次数,到n时代表所有点已经遍历过且无移动的点
int point =0;// 循环指针
update_flag =false;// 是否发生过更新的标记
do {
int i = order[point];
point = (point +1) %n;
if (try_move_i(i)) {// 对i点做尝试
enum_time =0;
update_flag =true;
}else {
enum_time++;
}
}while (enum_time
if (count >iteration_time || !update_flag)// 如果没更新过或者迭代次数超过指定值
break;
rebuildGraph();// 重构图
}while (true);
}
}
【测试代码】
List nodes = (List) MapUtils.getObject(map,"nodes");
List> links = (List) MapUtils.getObject(map,"links");
try {
return Response.success(graphAlgorithmService.getCommunities(nodes, links));
}catch (Exception e) {
logger.error("community error:", e);
return Response.fail();
}
【测试数据map】
{"nodes":["4120","942248","151624"],"links":[{"startId":"942248","endId":"4120"},{"startId":"4120","endId":"151624"}]}
网友评论