美文网首页
计算kappa值

计算kappa值

作者: 一只烟酒僧 | 来源:发表于2021-11-11 17:51 被阅读0次
######################################################## 
#-------------------------------------------------------
# Topic:写一个应用于clusterprofiler对象的kappa statsisc的计算函数
# Author:Wang Haiquan
# Date:Thu Nov 11 13:58:52 2021
# Mail:mg1835020@smail.nju.edu.cn
#-------------------------------------------------------
########################################################



enrich_kappa_res<-function(enrich_res,subset_term=1:10,kappa_cut=0.25){
  #加载包
  require(dplyr)
  require(stringr)
  require(clusterProfiler)
  
  #自定义函数
  fromListONLY<-function(list_with_elements,element_name="gene"){
    m=list_with_elements%>%do.call(c,.)%>%unique()
    b=matrix(NA,nrow = length(m),ncol = length(list_with_elements))%>%as.data.frame()
    n=data.frame(x=m)%>%cbind(.,b)
    colnames(n)<-c(element_name,names(list_with_elements))
    for (i in 2:c(length(list_with_elements)+1)) {
      n[match(list_with_elements[[i-1]],n[[element_name]]),i]=1
      n[is.na(n[[i]]),i]<-0
    }
    return(n)
    
  }
  
  #示例数据演示
  #enrich_res<-enrich_res%>%setReadable(.,org.Hs.eg.db,keyType = "ENTREZID")
  
  enrich_res_list<-enrich_res@result$geneID%>%as.list()%>%lapply(.,function(x){str_split(x,"\\/",simplify = T)%>%as.character()})
  
  names(enrich_res_list)<-enrich_res@result$ID
  
  #构建混淆矩阵
  toy_mat<-fromListONLY(enrich_res_list[subset_term],element_name = "gene")
  
  #最后输出的模板
  summary_var<-c("A1B1","A1B0","A0B1","A0B0")
  total_var<-c("rowtotal1","rowtotal2","coltotal1","coltotal2","total")
  statistic_var<-c("Ovalue","Avalue","Kappa")
  
  combn_term<-combn(toy_mat[,-1]%>%colnames(),2)%>%t()%>%as.data.frame() #排列组合
  combn_term<-cbind(combn_term,matrix(NA,nrow = nrow(combn_term),ncol = 12)%>%as.data.frame())
  colnames(combn_term)<-c("term1","term2",
                          summary_var,
                          total_var,
                          statistic_var)
  
  #browser()
  #计算数值
  
  for (i in 1:nrow(combn_term)) {
    
    #统计summary_var
    direct_file<-toy_mat[,combn_term[i,1:2]%>%as.character()]
    direct_file$label<-paste("A",direct_file[[1]],"B",direct_file[[2]],sep = "")
    direct_file_summary<-data.frame(Var1=summary_var,Freq=0)
    direct_file_summary<-rbind(table(direct_file$label)%>%as.data.frame(),direct_file_summary)%>%
      filter(!duplicated(Var1))%>%
      .[match(summary_var,.$Var1),]
    
    direct_file_summary<-direct_file_summary%>%as.matrix()%>%t()
    colnames(direct_file_summary)<-direct_file_summary[1,]
    direct_file_summary<-direct_file_summary[-1,,drop=F]
    
    #统计statistic_var和total_var
    for (j in 1:ncol(direct_file_summary)) {
      assign(colnames(direct_file_summary)[j],direct_file_summary[[j]][1]%>%as.numeric())
    }
    
    direct_file_total_summary<-data.frame(rowtotal1=A1B1+A1B0,
                                          rowtotal2=A0B1+A0B0,
                                          coltotal1=A1B1+A0B1,
                                          coltotal2=A1B0+A0B0)%>%
      cbind(direct_file_summary,.)%>%
      mutate(total=rowtotal1+rowtotal2)
    direct_file_total_summary$Ovalue<-(A1B1+A0B0)/direct_file_total_summary$total
    direct_file_total_summary<-direct_file_total_summary%>%
      mutate(Avalue=(coltotal1*rowtotal1+coltotal2*rowtotal2)/total^2,
             Kappa=(Ovalue-Avalue)/(1-Avalue))
    
    
    combn_term[i,3:14]<-direct_file_total_summary%>%t()%>%.[,1]%>%as.numeric()
  }
  #browser()
  #输出结果
  combn_term<-combn_term%>%arrange(-Kappa)
  combn_term$geneid_1<-sapply(enrich_res_list[as.character(combn_term$term1)],function(x){paste(x,collapse = "/")})
  combn_term$geneid_2<-sapply(enrich_res_list[as.character(combn_term$term2)],function(x){paste(x,collapse = "/")})
  res1=combn_term
  #输出kappa矩阵
  combn_term$Kappa<-ifelse(combn_term$Kappa<kappa_cut,0,combn_term$Kappa)#小于kappa_cutoff的值都是0
  b=combn_term[,c("term1","term2","Kappa")]
  x<-combn_term[,c("term2","term1","Kappa")]
  d<-data.frame(term1=unique(c(combn_term$term1,combn_term$term2)),
                term2=unique(c(combn_term$term1,combn_term$term2)),
                Kappa=1)
  
  colnames(x)<-colnames(b)
  rbind(b,x,d)%>%dcast(term1~term2)->d
  merge(enrich_res@result[,c("ID","Description")],d,by.x="ID",by.y="term1",all.y=T)->d
  rownames(d)<-d$Description
  d<-d[,-c(1:2)]
  
#输出最终结果
  return(list(res=res1,kappa_mat=d))
  
}


其实在R中有现成的函数可以用!!!算了,就当锻炼编程能力了吧

require(irr)

data(diagnoses)

dat=diagnoses[,c(1,2)]

kappa2(dat[,c(1,2)],'unweighted')

相关文章

网友评论

      本文标题:计算kappa值

      本文链接:https://www.haomeiwen.com/subject/xkjgzltx.html