########################################################
#-------------------------------------------------------
# Topic:写一个应用于clusterprofiler对象的kappa statsisc的计算函数
# Author:Wang Haiquan
# Date:Thu Nov 11 13:58:52 2021
# Mail:mg1835020@smail.nju.edu.cn
#-------------------------------------------------------
########################################################
enrich_kappa_res<-function(enrich_res,subset_term=1:10,kappa_cut=0.25){
#加载包
require(dplyr)
require(stringr)
require(clusterProfiler)
#自定义函数
fromListONLY<-function(list_with_elements,element_name="gene"){
m=list_with_elements%>%do.call(c,.)%>%unique()
b=matrix(NA,nrow = length(m),ncol = length(list_with_elements))%>%as.data.frame()
n=data.frame(x=m)%>%cbind(.,b)
colnames(n)<-c(element_name,names(list_with_elements))
for (i in 2:c(length(list_with_elements)+1)) {
n[match(list_with_elements[[i-1]],n[[element_name]]),i]=1
n[is.na(n[[i]]),i]<-0
}
return(n)
}
#示例数据演示
#enrich_res<-enrich_res%>%setReadable(.,org.Hs.eg.db,keyType = "ENTREZID")
enrich_res_list<-enrich_res@result$geneID%>%as.list()%>%lapply(.,function(x){str_split(x,"\\/",simplify = T)%>%as.character()})
names(enrich_res_list)<-enrich_res@result$ID
#构建混淆矩阵
toy_mat<-fromListONLY(enrich_res_list[subset_term],element_name = "gene")
#最后输出的模板
summary_var<-c("A1B1","A1B0","A0B1","A0B0")
total_var<-c("rowtotal1","rowtotal2","coltotal1","coltotal2","total")
statistic_var<-c("Ovalue","Avalue","Kappa")
combn_term<-combn(toy_mat[,-1]%>%colnames(),2)%>%t()%>%as.data.frame() #排列组合
combn_term<-cbind(combn_term,matrix(NA,nrow = nrow(combn_term),ncol = 12)%>%as.data.frame())
colnames(combn_term)<-c("term1","term2",
summary_var,
total_var,
statistic_var)
#browser()
#计算数值
for (i in 1:nrow(combn_term)) {
#统计summary_var
direct_file<-toy_mat[,combn_term[i,1:2]%>%as.character()]
direct_file$label<-paste("A",direct_file[[1]],"B",direct_file[[2]],sep = "")
direct_file_summary<-data.frame(Var1=summary_var,Freq=0)
direct_file_summary<-rbind(table(direct_file$label)%>%as.data.frame(),direct_file_summary)%>%
filter(!duplicated(Var1))%>%
.[match(summary_var,.$Var1),]
direct_file_summary<-direct_file_summary%>%as.matrix()%>%t()
colnames(direct_file_summary)<-direct_file_summary[1,]
direct_file_summary<-direct_file_summary[-1,,drop=F]
#统计statistic_var和total_var
for (j in 1:ncol(direct_file_summary)) {
assign(colnames(direct_file_summary)[j],direct_file_summary[[j]][1]%>%as.numeric())
}
direct_file_total_summary<-data.frame(rowtotal1=A1B1+A1B0,
rowtotal2=A0B1+A0B0,
coltotal1=A1B1+A0B1,
coltotal2=A1B0+A0B0)%>%
cbind(direct_file_summary,.)%>%
mutate(total=rowtotal1+rowtotal2)
direct_file_total_summary$Ovalue<-(A1B1+A0B0)/direct_file_total_summary$total
direct_file_total_summary<-direct_file_total_summary%>%
mutate(Avalue=(coltotal1*rowtotal1+coltotal2*rowtotal2)/total^2,
Kappa=(Ovalue-Avalue)/(1-Avalue))
combn_term[i,3:14]<-direct_file_total_summary%>%t()%>%.[,1]%>%as.numeric()
}
#browser()
#输出结果
combn_term<-combn_term%>%arrange(-Kappa)
combn_term$geneid_1<-sapply(enrich_res_list[as.character(combn_term$term1)],function(x){paste(x,collapse = "/")})
combn_term$geneid_2<-sapply(enrich_res_list[as.character(combn_term$term2)],function(x){paste(x,collapse = "/")})
res1=combn_term
#输出kappa矩阵
combn_term$Kappa<-ifelse(combn_term$Kappa<kappa_cut,0,combn_term$Kappa)#小于kappa_cutoff的值都是0
b=combn_term[,c("term1","term2","Kappa")]
x<-combn_term[,c("term2","term1","Kappa")]
d<-data.frame(term1=unique(c(combn_term$term1,combn_term$term2)),
term2=unique(c(combn_term$term1,combn_term$term2)),
Kappa=1)
colnames(x)<-colnames(b)
rbind(b,x,d)%>%dcast(term1~term2)->d
merge(enrich_res@result[,c("ID","Description")],d,by.x="ID",by.y="term1",all.y=T)->d
rownames(d)<-d$Description
d<-d[,-c(1:2)]
#输出最终结果
return(list(res=res1,kappa_mat=d))
}
其实在R中有现成的函数可以用!!!算了,就当锻炼编程能力了吧
require(irr)
data(diagnoses)
dat=diagnoses[,c(1,2)]
kappa2(dat[,c(1,2)],'unweighted')
网友评论