美文网首页
TCGA|一致性聚类

TCGA|一致性聚类

作者: 高大石头 | 来源:发表于2022-02-12 00:08 被阅读0次

一致性聚类基于重采样方法验证聚类的合理性,非常适合分子分型文章。下面就学起来。
数据下载参考:R|TCGA|m6AlncRNA

rm(list = ls())
library(tidyverse)
library(limma)
library(ConsensusClusterPlus)
df <- data.table::fread("F:/TCGA-Xena/expression/TCGA-PRAD.htseq_fpkm.tsv.gz",data.table = F)
df_ann <- data.table::fread("F:/TCGA-anno/gencode.v22.annotation.gene.probeMap",data.table = F)
gene <- read.table("F:/Ongoing/Inflammation-PRAD/感兴趣的基因.txt")$V1

df1 <- df_ann %>% 
  select(id,gene) %>% 
  inner_join(df,by=c("id"="Ensembl_ID")) %>% 
  select(-id)
df1 <- avereps(df1[,-1],ID=df1$gene) %>% 
  as.data.frame()    #重复行取均值
df1 <- df1[rownames(df1) %in% gene,]
df1 <- df1[,str_sub(colnames(df1),14,15)<10]

tmp <- "F:/Ongoing/Inflammation-PRAD/results/"
maxk=9
results <- ConsensusClusterPlus(as.matrix(df1),
                                maxK = 9,
                                reps = 500,
                                pItem = 0.8,
                                pFeature = 1,
                                title =tmp ,
                                clusterAlg = "km",
                                distance = "euclidean",
                                seed = 123,
                                plot="png")
results[[2]][["consensusClass"]] #取出样本为2的分类

输出结果是这样式的:



彩蛋:
其实数据预处理很重要,可以像这样预处理:

##使用ALL示例数据
library(ALL)
data(ALL)
d=exprs(ALL)
d[1:5,1:5]

             01005    01010    03002    04006    04007
1000_at   7.597323 7.479445 7.567593 7.384684 7.905312
1001_at   5.046194 4.932537 4.799294 4.922627 4.844565
1002_f_at 3.900466 4.208155 3.886169 4.206798 3.416923
1003_s_at 5.903856 6.169024 5.860459 6.116890 5.687997
1004_at   5.925260 5.912780 5.893209 6.170245 5.615210

#筛选前5000标准差的基因
mads=apply(d,1,mad)
d=d[rev(order(mads))[1:5000],]

#sweep函数减去中位数进行标准化
d = sweep(d,1, apply(d,1,median,na.rm=T))

#一步完成聚类
library(ConsensusClusterPlus)
title=tempdir()
results = ConsensusClusterPlus(d,maxK=6,reps=50,pItem=0.8,pFeature=1,
 title=title,clusterAlg="hc",distance="pearson",seed=1262118388.71279,plot="png")

参考链接:
一致性聚类ConsensusClusterPlus
R-一致性聚类(公众号:医学僧的科研日记)

相关文章

网友评论

      本文标题:TCGA|一致性聚类

      本文链接:https://www.haomeiwen.com/subject/nkftlrtx.html