批量TCGA生存分析

作者: 落寞的橙子 | 来源:发表于2020-05-17 03:21 被阅读0次

TCGA生存分析③
TCGA生存分析②
TCGA甲基化芯片数据的差异基因的生存分析与富集分析
批量TCGA生存分析
TCGA生存分析
【科研猫·出品】TCGA超大批量生存分析教程
TCGA学习04：建模预测-cox回归
TCGA学习01：数据下载与整理
TCGA学习04：建模预测-随机森林&向量机
TCGA学习02：差异分析

以乳腺癌为例，提取basal type的data
下载及预处理
 另一个方法，但是这个方法的生存信息并没有更新

rm(list = ls())
options(stringsAsFactors=F)
suppressMessages(library(TCGAbiolinks))
suppressMessages(library(tidyverse))
data_dir<-"～/TCGA/BRCA/"
####prepare the clc data
BRCA_path_subtypes <- as.data.frame(TCGAquery_subtype(tumor = "brca"))
get_os_time<-function(BRCA_path_subtypes){
  new_survival<-data.frame()
  for (i in 1:nrow(BRCA_path_subtypes)){
    patient<-BRCA_path_subtypes[i,"patient"]
    vital_status<-BRCA_path_subtypes[i,"vital_status"]
    if (vital_status=="Alive"){
      times=BRCA_path_subtypes[i,"days_to_last_followup"]
      status=0
    }else{
      times=BRCA_path_subtypes[i,"days_to_death"]
      status=1
    }
    new_survival<-rbind(new_survival,cbind(patient,status,times))
  }
  return(new_survival)
}
os_time<-get_os_time(BRCA_path_subtypes)
new_clc<-os_time %>% inner_join(BRCA_path_subtypes, by="patient")
#write.csv(new_clc,paste0(data_dir,"TCGA_BRCA_BRCA_path_subtypes_05162020.csv"))

####prepare the expression data
##download the data before this process
#get tumor data
miR_data<-read.csv(paste0(data_dir,"TCGA-BRCA-miRNAs_RPKM_01202020.csv"),check.names = F,row.names = 1)
colnames(miR_data)<-unlist(lapply(colnames(miR_data), FUN = function(x) {return(strsplit(x, split = "_mapped_",fixed = T)[[1]][2])}))
miR_data_tumor<-miR_data[,grep("-01A-",colnames(miR_data))]
colnames(miR_data_tumor)<-substr(colnames(miR_data_tumor), 1, 12)
#get basal subtype
new_clc_basal<-new_clc %>% filter(BRCA_Subtype_PAM50=="Basal" & times>0)
com_id<-intersect(as.character(new_clc_basal$patient),colnames(miR_data_tumor))
pick_targets<-grep("hsa-mir-137|143",row.names(miR_data_tumor),value = T)#搜索想要的gene
miR_data_tumor_basal_pick<-miR_data_tumor[pick_targets,com_id]

##get the expression levels
#define the function
Median<-function(x){
  ifelse(x>median(x),"High","Low")
}

sur_plot<-function(gene,out_dir,df){
  suppressMessages(library(survminer))
  suppressMessages(library(survival))
  suppressMessages(library(ggplot2))
  dir.create(out_dir,recursive = T)
  rt<-df[,c(gene,"status","times")]
  rt[,"levels"]<-rt[,1]
  rt[,"times"]=as.numeric(rt[,"times"])/365
  myfit2<-coxph(Surv(times,status==1) ~ levels,data=rt)
  pvalue <- anova(myfit2)$Pr[2]
  coxsummary<-summary(myfit2)
  HR<-coxsummary$coefficients[,2][1]
  filename=paste0(out_dir,"/",gene,"_p=",as.character(pvalue ),"_HR=",as.character(HR),"_Median_sur.png")
  myfit1 <-survfit(Surv(times,status==1) ~ levels,data=rt)
  p<-ggsurvplot(myfit1, pval = F, palette = c("#DF3D8C","#0A5EB9"),
                risk.table = TRUE, risk.table.col = "strata",pval.size=5,
                risk.table.height = 0.25#Useful when you have multiple groups
  )
  ggsave(filename,print(p),device="png",width = 6,height = 6,units = "in")
}

###prepare the sur data

miR_data_tumor_basal_pick_levels<-apply(t(miR_data_tumor_basal_pick), 2, Median)
new_clc_basal_sur<-new_clc_basal[,1:3]
row.names(new_clc_basal_sur)<-new_clc_basal_sur[,1]

sur_data<-cbind(miR_data_tumor_basal_pick_levels[com_id,],new_clc_basal_sur[com_id,])

##plot the data
out_dir=paste0(data_dir,"Median_sur")
sapply(pick_targets,sur_plot,out_dir=out_dir,df=sur_data)