pacman::p_load("data.table","tibble","dplyr","tidyr","survival","survminer","stringr")
##读入TCGA—mRNA count数据
load("D:/Rstudio_data/TCGA/project/pyroptosis_lncRNA/Brca_count_anno.Rdata")
#查看数据
dim(ex)
ex[1:4,1:4] #60660
#查看NA的数据
rownames(ex) <- ex[,1]
exp <- ex[,-(1:3)]
exp[1:4,1:4]
#去除表达量过低的基因
exp = exp[apply(exp, 1, function(x) sum(x > 1) > 9), ]
dim(exp) #50544
#读入临床信息
load("D:/Rstudio_data/TCGA/project/pyroptosis_lncRNA/BRCA_clinical_df.Rdata")
dim(cl_df) #1097 104
cl_df[1:4,1:4]
#整理临床信息
colna_clinical <- as.data.frame(colnames(cl_df))
clinical_data <-cl_df %>%
transmute(Patient_ID=bcr_patient_barcode,OS.status=vital_status,Age=age_at_initial_pathologic_diagnosis,
Gender=gender,Race=race_list,History_of_neoadjuvant_treatment=history_of_neoadjuvant_treatment,
Stage=stage_event,PR_status= breast_carcinoma_progesterone_receptor_status,
Her2_status=lab_proc_her2_neu_immunohistochemistry_receptor_status, ER_status= breast_carcinoma_estrogen_receptor_status)%>%
mutate(OS.time=OS.time) %>% select(Patient_ID,OS.status,OS.time,everything())
table(cl_df$new_tumor_events)
save(exp,clinical_data,colna_clinical,file = "BRCA_survival.Rdata")
rm(list=ls())
load("BRCA_survival.Rdata")
##生存相关信息
sur_data<- clinical_data[,1:3]
#构建分组信息
##01–09是癌症,10–19是正常,20–29是癌旁
# 根据样本ID的第14-15位,给样本分组(tumor和normal)
table(str_sub(colnames(exp),14,15))
group_list = ifelse(as.numeric(str_sub(colnames(exp),14,15)) < 10,'tumor','normal')
group_list = factor(group_list,levels = c("normal","tumor"))
table(group_list)
#normal tumor
# 113 1113
Patient_ID <- rownames(exprSet)
exprSet <- as.data.frame()
exprSet <- exprSet %>%
# then make it a tibble (nice printing while debugging)
as_tibble() %>% mutate(Patient_ID=Patient_ID) %>%
select(Patient_ID, ENSG00000000003.15 ,ENSG00000000005.6 ) %>%
# then trim the barcode (see head(clin), and substr)
mutate(Patient_ID = substr(Patient_ID, 1, 12)) %>%
# then join back to clinical data
inner_join(sur_data, by="Patient_ID")
exprSet$OS.status <- ifelse(exprSet$OS.status=="Alive",T,F)
##以表达中位值作为分界点,将基因ENSG00000000003.15连续变量变为分类变量
group <- ifelse(exprSet$ENSG00000000003.15>median(exprSet$ENSG00000000003.15),'high','low')
fit<-survfit(Surv(OS.time,OS.status)~group,data=exprSet)
summary(fit) #查看生存分析结果
ggsurvplot(fit = fit,
# fun = "pct",
linetype = 1,
ylab="OS",
title="ENSG00000000003.15",
pval = T,
surv.median.line = "none",risk.table =T,tables.height = 0.25,
ggtheme = theme_survminer(), #主题名称
conf.int = T, #是否画出置信区间
palette = "ngp" )

image.png
网友评论