先挖个坑:https://blog.csdn.net/qazplm12_3/article/details/79429117
https://blog.csdn.net/enyayang/article/details/102551939
########################################################
#-------------------------------------------------------
# Topic:定义一个桑基图函数
# Author:Wang Haiquan
# Date:Sat Jun 06 23:08:32 2020
# Mail:mg1835020@smail.nju.edu.cn
#-------------------------------------------------------
########################################################
#函数数据格式如下
#假设想看一下不同省份三大能源的流向情况
mydata<-data.frame(energy=paste("能源",sample(1:3,200,replace = T)),
provin=paste("省份",sample(1:34,200,replace = T)),
direct=paste("用途",sample(1:20,200,replace = T)))
#数据格式如下:
# energy provin direct
#能源 1 省份 7 用途 17
# 能源 1 省份 13 用途 9
#能源 3 省份 31 用途 14
#能源 2 省份 22 用途 11
#能源 1 省份 4 用途 6
#能源 2 省份 3 用途 13
get_summary_sankey_df<-function(df){
df=df
library(stringr)
library(networkD3)
#为防止重复,将每一列加上列序号
for (i in 1:ncol(df)) {
df[,i]=paste(df[,i],colnames(df)[i],sep = "-")
}
#将数据进行汇总,算出权重
i=1
df_sub1=df[,i:(i+1)]
#将两列的名字合并到一起便于用table统计
df_sub_name=paste(df_sub1[,i],df_sub1[,i+1],sep = "_")
#使用table统计
df_sub1=as.data.frame(table(df_sub_name))
#将统计结果整理成三列的df格式
src=str_split(df_sub1$df_sub_name,"_",simplify = T)[,1]
target=str_split(df_sub1$df_sub_name,"_",simplify = T)[,2]
value=df_sub1$Freq
df_sub1=data.frame(Src=src,Target=target,Value=value)
#假如原始df中的列数》2,使用循环完成后续过程
if(ncol(df)>2){
for (i in 2:(ncol(df)-1)) {
df_sub=df[,i:(i+1)]
df_sub_name=paste(df_sub[,1],df_sub[,2],sep = "_")
df_sub=as.data.frame(table(df_sub_name))
src=str_split(df_sub$df_sub_name,"_",simplify = T)[,1]
target=str_split(df_sub$df_sub_name,"_",simplify = T)[,2]
value=df_sub$Freq
df_sub=data.frame(Src=src,Target=target,Value=value)
df_sub1=rbind(df_sub1,df_sub)
}
}
network=df_sub1
factor_list <- sort(unique(c(levels(network$Src), levels(network$Target))))
#建立一个factor_list与数字意义对应的df,注意以0开头
attribute <- data.frame(name=c(factor_list),
num_list=0:(length(factor_list)-1))
#将结果merge,获得最终的df
network<-merge(network,attribute,by.x="Src",by.y="name",all.x=T)
network<-merge(network,attribute,by.x="Target",by.y="name",all.x=T)
network<-network[,c(4,5,3)]
colnames(network)<-c("Src","Target","Value")
#作图
sankeyNetwork(Links = network, Nodes = attribute,
Source = "Src", Target = "Target",
Value = "Value", NodeID = "name",
fontSize= 20, nodeWidth = 30)
}
get_summary_sankey_df(mydata)
![](https://img.haomeiwen.com/i7983008/2a524689f20ce2ac.png)
补充:也可以使用ggplot做,简单一点,但是可能调整起来比较麻烦
library(ggalluvial)
ggplot(mydata,aes(axis1=energy,
axis2=provin,
axis3=direct))+
geom_alluvium(aes(fill=energy))+geom_stratum()+
geom_text(stat = "stratum", infer.label = TRUE)
![](https://img.haomeiwen.com/i7983008/aac0642856e29e04.png)
网友评论