美文网首页
TCGA|一致性聚类

TCGA|一致性聚类

作者: 高大石头 | 来源:发表于2022-02-12 00:08 被阅读0次

    一致性聚类基于重采样方法验证聚类的合理性,非常适合分子分型文章。下面就学起来。
    数据下载参考:R|TCGA|m6AlncRNA

    rm(list = ls())
    library(tidyverse)
    library(limma)
    library(ConsensusClusterPlus)
    df <- data.table::fread("F:/TCGA-Xena/expression/TCGA-PRAD.htseq_fpkm.tsv.gz",data.table = F)
    df_ann <- data.table::fread("F:/TCGA-anno/gencode.v22.annotation.gene.probeMap",data.table = F)
    gene <- read.table("F:/Ongoing/Inflammation-PRAD/感兴趣的基因.txt")$V1
    
    df1 <- df_ann %>% 
      select(id,gene) %>% 
      inner_join(df,by=c("id"="Ensembl_ID")) %>% 
      select(-id)
    df1 <- avereps(df1[,-1],ID=df1$gene) %>% 
      as.data.frame()    #重复行取均值
    df1 <- df1[rownames(df1) %in% gene,]
    df1 <- df1[,str_sub(colnames(df1),14,15)<10]
    
    tmp <- "F:/Ongoing/Inflammation-PRAD/results/"
    maxk=9
    results <- ConsensusClusterPlus(as.matrix(df1),
                                    maxK = 9,
                                    reps = 500,
                                    pItem = 0.8,
                                    pFeature = 1,
                                    title =tmp ,
                                    clusterAlg = "km",
                                    distance = "euclidean",
                                    seed = 123,
                                    plot="png")
    results[[2]][["consensusClass"]] #取出样本为2的分类
    

    输出结果是这样式的:



    彩蛋:
    其实数据预处理很重要,可以像这样预处理:

    ##使用ALL示例数据
    library(ALL)
    data(ALL)
    d=exprs(ALL)
    d[1:5,1:5]
    
                 01005    01010    03002    04006    04007
    1000_at   7.597323 7.479445 7.567593 7.384684 7.905312
    1001_at   5.046194 4.932537 4.799294 4.922627 4.844565
    1002_f_at 3.900466 4.208155 3.886169 4.206798 3.416923
    1003_s_at 5.903856 6.169024 5.860459 6.116890 5.687997
    1004_at   5.925260 5.912780 5.893209 6.170245 5.615210
    
    #筛选前5000标准差的基因
    mads=apply(d,1,mad)
    d=d[rev(order(mads))[1:5000],]
    
    #sweep函数减去中位数进行标准化
    d = sweep(d,1, apply(d,1,median,na.rm=T))
    
    #一步完成聚类
    library(ConsensusClusterPlus)
    title=tempdir()
    results = ConsensusClusterPlus(d,maxK=6,reps=50,pItem=0.8,pFeature=1,
     title=title,clusterAlg="hc",distance="pearson",seed=1262118388.71279,plot="png")
    

    参考链接:
    一致性聚类ConsensusClusterPlus
    R-一致性聚类(公众号:医学僧的科研日记)

    相关文章

      网友评论

          本文标题:TCGA|一致性聚类

          本文链接:https://www.haomeiwen.com/subject/nkftlrtx.html