生存分析——tcga实战

作者: monkey_study | 来源:发表于2022-05-11 12:04 被阅读0次

TCGA生存分析③
TCGA生存分析②
生存分析——tcga实战
TCGA学习04：建模预测-cox回归
TCGA学习01：数据下载与整理
TCGA学习04：建模预测-随机森林&向量机
TCGA学习02：差异分析
TCGA学习03：生存分析
TCGA生存分析
TCGA生存分析

pacman::p_load("data.table","tibble","dplyr","tidyr","survival","survminer","stringr") 
##读入TCGA—mRNA count数据
load("D:/Rstudio_data/TCGA/project/pyroptosis_lncRNA/Brca_count_anno.Rdata")
#查看数据
dim(ex)
ex[1:4,1:4] #60660
#查看NA的数据
rownames(ex) <- ex[,1]
exp <- ex[,-(1:3)]
exp[1:4,1:4]
#去除表达量过低的基因
exp = exp[apply(exp, 1, function(x) sum(x > 1) > 9), ] 
dim(exp)  #50544
#读入临床信息
load("D:/Rstudio_data/TCGA/project/pyroptosis_lncRNA/BRCA_clinical_df.Rdata")
dim(cl_df) #1097  104
cl_df[1:4,1:4]
#整理临床信息
colna_clinical <- as.data.frame(colnames(cl_df))
clinical_data <-cl_df %>%
 transmute(Patient_ID=bcr_patient_barcode,OS.status=vital_status,Age=age_at_initial_pathologic_diagnosis,
           Gender=gender,Race=race_list,History_of_neoadjuvant_treatment=history_of_neoadjuvant_treatment,
           Stage=stage_event,PR_status= breast_carcinoma_progesterone_receptor_status,
          Her2_status=lab_proc_her2_neu_immunohistochemistry_receptor_status, ER_status= breast_carcinoma_estrogen_receptor_status)%>%
  mutate(OS.time=OS.time) %>% select(Patient_ID,OS.status,OS.time,everything())
table(cl_df$new_tumor_events)
save(exp,clinical_data,colna_clinical,file = "BRCA_survival.Rdata")
rm(list=ls())
load("BRCA_survival.Rdata")
##生存相关信息
sur_data<- clinical_data[,1:3]

#构建分组信息
##01–09是癌症，10–19是正常，20–29是癌旁
# 根据样本ID的第14-15位，给样本分组（tumor和normal）
table(str_sub(colnames(exp),14,15))  
group_list = ifelse(as.numeric(str_sub(colnames(exp),14,15)) < 10,'tumor','normal')
group_list = factor(group_list,levels = c("normal","tumor"))
table(group_list) 
#normal  tumor 
# 113   1113
Patient_ID <- rownames(exprSet)
exprSet <- as.data.frame()
exprSet <- exprSet %>%
  
  # then make it a tibble (nice printing while debugging)
  
  as_tibble() %>% mutate(Patient_ID=Patient_ID) %>%
  select(Patient_ID, ENSG00000000003.15 ,ENSG00000000005.6 ) %>% 
  
  # then trim the barcode (see head(clin), and substr)
  
  mutate(Patient_ID = substr(Patient_ID, 1, 12)) %>% 
  
  # then join back to clinical data
  
  inner_join(sur_data, by="Patient_ID")
exprSet$OS.status <- ifelse(exprSet$OS.status=="Alive",T,F)
##以表达中位值作为分界点，将基因ENSG00000000003.15连续变量变为分类变量
group <- ifelse(exprSet$ENSG00000000003.15>median(exprSet$ENSG00000000003.15),'high','low')
fit<-survfit(Surv(OS.time,OS.status)~group,data=exprSet)
summary(fit)  #查看生存分析结果
ggsurvplot(fit = fit,
           # fun = "pct",
           linetype = 1, 
           ylab="OS",
           title="ENSG00000000003.15",
           pval = T,
          
           surv.median.line = "none",risk.table =T,tables.height = 0.25,
           ggtheme = theme_survminer(),  #主题名称
           conf.int = T,  #是否画出置信区间
           palette = "ngp" )

image.png