以前一直用经典的MCP法分免疫浸润 总感觉不好 上篇文章上也写过 这次用肠癌单细胞6万多的细胞的特异性基因,特异性基因我是一个一个看的,我选的标准是logfc大于1,而且如果是肿瘤细胞的特异性基因,那么他在间质细胞和免疫细胞的表达量和表达值很低,总之用单细胞的marker分免疫浸润比用传统的cibersort和mcp群分的好看的多,个人也觉得mcp法适合用于通用的肿瘤,在特异性肿瘤中,如果手中有单细胞数据,可以用单细胞数据集合更加试用于特定的肿瘤。
下载数据
xena数据库下载结肠癌fpkm数据和临床数据
#表达矩阵
dd <- data.table::fread('TCGA-COAD.htseq_fpkm.tsv',data.table = F)
rownames(dd) <- dd$Ensembl_ID
dd <- dd[,-1]
library(stringr)
dd =dd[,str_sub(colnames(dd),14,16)=="01A"]
colnames(dd) <- str_sub(colnames(dd),1,15)
test <- dd[1:10,1:10]
tcga_panmRNA_expr1 <- dd
tcga_panmRNA_expr1$gene_id <- rownames(tcga_panmRNA_expr1 )
tcga_panmRNA_expr1 <- tcga_panmRNA_expr1 %>%
tidyr::separate(gene_id,into = c("gene_id","drop"),sep="\\.") %>%
dplyr::select(-drop)
rownames(tcga_panmRNA_expr1) <- tcga_panmRNA_expr1$gene_id
tcga_panmRNA_expr1 <- tcga_panmRNA_expr1[,-454]#去除最后最后一行基因名字
IGHG1 <- tcga_panmRNA_expr1['ENSG00000211896',]
IGHG2 <- tcga_panmRNA_expr1['ENSG00000211893',]
IGHG3 <- tcga_panmRNA_expr1['ENSG00000211897',]
IGHG4 <- tcga_panmRNA_expr1['ENSG00000211892',]
IGHA1 <- tcga_panmRNA_expr1['ENSG00000211895',]
IGHA2 <- tcga_panmRNA_expr1['ENSG00000211890',]
JCHAIN <- tcga_panmRNA_expr1['ENSG00000132465',]
IGHD <-tcga_panmRNA_expr1['ENSG00000211898',]
tcga_panmRNA_expr2 <- rbind(IGHG1,IGHG2,IGHG3,IGHG4,IGHA1,IGHA2,JCHAIN)
rownames(tcga_panmRNA_expr2) <- c('IGHG1','IGHG2','IGHG3','IGHG4','IGHA1','IGHA2','JCHAIN')
#MIR4435<-tcga_panmRNA_expr1['ENSG00000172965',]
#tcga_panmRNA_expr2 <- rbind(IGHG1,IGHG2,IGHG3,IGHG4,IGHA1,IGHA2,JCHAIN,MIR4435)
#rownames(tcga_panmRNA_expr2) <- c('IGHG1','IGHG2','IGHG3','IGHG4','IGHA1','IGHA2','JCHAIN','MIR4435')
临床数据下载
clinic_data = read.table("COAD_clinicalMatrix", header = T, sep = "\t")
surv_data = read.table("COAD_survival.txt", header = T, sep = "\t")
clinic_data = merge(clinic_data, surv_data, by.x = "sampleID", by.y = "sample")
rownames(clinic_data) <- clinic_data$sampleID
clinic_data=clinic_data[,c('gender','age_at_initial_pathologic_diagnosis','sampleID','CDE_ID_3226963','MSI_updated_Oct62011','pathologic_stage','OS','OS.time','DSS','DSS.time','PFI','PFI.time')]
clin =clinic_data[str_sub(rownames(clinic_data),14,15)=="01",]
index <- intersect(rownames(clin),colnames(dd))
table(index)
### 获取有临床信息的样本
clin <- clin[index,]
write.csv(clin,'clin.csv' )
clin <- read.csv('clin1.csv',header = T)
rownames(clin) <- clin$sampleID
table(clin$pathologic_stage)
index <- intersect(rownames(clin),colnames(dd))
dd <- dd[,index]
tcga_panmRNA_expr2 <- tcga_panmRNA_expr2[,index]
clin <- clin[index,]
IGHD <- IGHD[,index]
探针转换
dd$gene_id <- rownames(dd)
load("anno.Rdata")
### 提取编码mRNA
library(dplyr)
library(tidyr)
library(tibble)
tcga_panmRNA_expr <- mRNA_anno %>%
dplyr::select(c(gene_name,gene_id)) %>%
inner_join(dd,by ="gene_id") %>%
dplyr::select(-gene_id) %>%
filter(gene_name!="NA") %>%
distinct(gene_name,.keep_all = T) %>%
column_to_rownames("gene_name")
identical(colnames(tcga_panmRNA_expr),colnames(IGHD))
tcga_panmRNA_expr <- rbind(tcga_panmRNA_expr,IGHD)
gene <- c('PDCD1','CD274','PDCD1LG2','CTLA4','TGFB1','IL10','ENTPD1','HAVCR2','LAG3')
gene1 <- c('FCER2','ENSG00000211898','CXCL13')
gene2 <- c('CD40','CD80','CD86')
tcga_panmRNA_expr3 <- tcga_panmRNA_expr[gene,]
tcga_panmRNA_expr4 <- tcga_panmRNA_expr[gene1,]
tcga_panmRNA_expr5 <- tcga_panmRNA_expr[gene2,]
cellMarker <- data.table::fread("cellMarker 3.csv",data.table = F)
colnames(cellMarker)[2] <- "celltype"
cellMarker <- lapply(split(cellMarker,cellMarker$celltype), function(x){
dd = x$Metagene
unique(dd)
})
expr <- as.matrix(tcga_panmRNA_expr)
### 3.使用ssGSEA量化免疫浸润
library(GSVA)
gsva_data <- gsva(expr,cellMarker, method = "ssgsea")
GSVA分析
cellMarker <- data.table::fread("cellMarker 1.csv",data.table = F)
colnames(cellMarker)[2] <- "celltype"
cellMarker <- lapply(split(cellMarker,cellMarker$celltype), function(x){
dd = x$Metagene
unique(dd)
})
expr <- as.matrix(tcga_panmRNA_expr)
### 3.使用ssGSEA量化免疫浸润
library(GSVA)
gsva_data <- gsva(expr,cellMarker, method = "ssgsea")
免疫亚群分析
#去除肿瘤上皮细胞
gsva_data2<- gsva_data[c(1,2,6,8,9,10,11,13),]
title="D:\\GZ04"
library(ConsensusClusterPlus)
test2 = ConsensusClusterPlus(
gsva_data2,maxK=10, #聚类的最大类数,所以会评估聚2类、3类...6类
reps=50, #50个重采样
pItem=0.8, #重采样样本为80%
pFeature=1, #重采样基因为80%
title=title,#输出文件的保存位置
clusterAlg="km", #聚合层次聚类算法
distance="pearson", #Pearson 相关距离
seed=1262118388.71279,
#设置特定的随机种子,使例子是可重复的
plot="png")
dd1 <- as.data.frame(test2[[4]][["consensusClass"]])
colnames(dd1) <- 'CIC'
dd1$sample <- rownames(dd1)
dd1$CIC <- ifelse(dd1$CIC=='1','C',ifelse(dd1$CIC=='2','A',ifelse(dd1$CIC=='3','D','B')))
library(dplyr)
dd1 <- dd1 %>%
arrange(dd1$CIC)
names <- rownames(dd1)
gsva_data3 <- gsva_data[c(13,3,11,1,10,2,9,6,8),names]
gsva_data4 <- gsva_data[c(4,5),names]
tcga_panmRNA_expr5 <-tcga_panmRNA_expr5[,names]
tcga_panmRNA_expr6 <- rbind(tcga_panmRNA_expr5,gsva_data4)
tcga_panmRNA_expr2 <-tcga_panmRNA_expr2[,names]
tcga_panmRNA_expr3 <-tcga_panmRNA_expr3[,names]
tcga_panmRNA_expr4 <-tcga_panmRNA_expr4[,names]
clin <- clin[names,]
identical(rownames(dd1),rownames(clin))
dd1$MSI <- clin$MSI.Status
dd1$Stage <- clin$pathologic_stage
dd1 <- dd1 %>%
select(-sample)
result3<- t(scale(t(gsva_data3)))
result3[result3>2]=2
result3[result3<-2]=-2
DD1 <- pheatmap(result3, #热图的数据
cluster_rows = F,#行聚类
cluster_cols = F,#列聚类,可以看出样本之间的区分度
annotation_col=dd1, #标注样本分类
annotation_legend=TRUE, # 显示注释
show_rownames = T,
show_colnames = F,# 显示行名
#scale = "row", #以行来标准化
color =colorRampPalette(c('blue','white', "red"))(100),#调色
#filename = "heatmap_F.pdf",#是否保存
cellwidth = 1.5, cellheight = 15,# 格子比例
fontsize = 10)
pdf("DD1.pdf",width = 15,height = 7)
print(DD1)
dev.off()
#计算热图钟的p值
#三组以上的p值计算用 Kruskal–Wallis
Last1$group <- as.factor(Last1$CIC)
kruskal.test(`T cells`~group, data=Last1)
p=c('2.2e-8','7e-16','2.2e-17','2.9e-16')
p.adjust(p,method = 'fdr',n=length(p))
#求各个组中的P值
library(rgdal)
library(pgirmess)
library(coin)
library(multcomp)
kruskalmc(`T cells`~group, data=Last1, probs=0.05)
library(PMCMR)
posthoc.kruskal.nemenyi.test(`T cells`~group, data=Last1)
posthoc.kruskal.nemenyi.test(`T cells`~group, data=Last1,dist="Chisq")

生存分析
gsva_data4<- as.data.frame(t(gsva_data3))
Last <- cbind(gsva_data4,clin)
Last1 <- cbind(Last,dd1)
write.csv(Last1,'Last1.csv')
library(survival)
library(survminer)
Last1$group1 <- ifelse(Last1$`CD20+ B cells`> median(Last1$`CD20+ B cells`),'high','low')
sfit1 <- survfit(Surv(OS.time/30, OS)~CIC, data=Last1)
DD10<-ggsurvplot(sfit1,pval =T,data = Last1,risk.table = TRUE,size = 1,palette = 'nejm')
pdf("DD10.pdf",width = 7,height = 6)
print(DD10)
dev.off()

相关性
gsva_data1 <- rbind(gsva_data,hotair)
results2<- as.data.frame(t(gsva_data1))
library(ggplot2)
corr_eqn <- function(x,y,digits=2) {
test <- cor.test(x,y,method="spearman")
paste(paste0("n = ",length(x)),
paste0("r = ",round(test$estimate,digits),"(pearson)"),
paste0("p.value= ",round(test$p.value,digits)),
sep = ", ")
}
PP2 <- results2 %>%
ggplot(aes(Hotair,Activated))+
geom_point(col="#984ea3")+
geom_smooth(method=lm, se=T,na.rm=T, fullrange=T,size=2,col="#fdc086")+
geom_rug(col="#7fc97f")+
theme_minimal()+
xlab(paste0('Hotair'," (log2(TPM))"))+
ylab(paste0('Activated'," (NES)"))+
labs(title = paste0(corr_eqn(results2$Hotair,results2$Activated)))+
theme(plot.title = element_text(hjust = 0.5),
plot.margin = margin(1, 1, 1, 1, "cm"))

网友评论