以乳腺癌为例,提取basal type的data
下载及预处理
另一个方法,但是这个方法的生存信息并没有更新
rm(list = ls())
options(stringsAsFactors=F)
suppressMessages(library(TCGAbiolinks))
suppressMessages(library(tidyverse))
data_dir<-"~/TCGA/BRCA/"
####prepare the clc data
BRCA_path_subtypes <- as.data.frame(TCGAquery_subtype(tumor = "brca"))
get_os_time<-function(BRCA_path_subtypes){
new_survival<-data.frame()
for (i in 1:nrow(BRCA_path_subtypes)){
patient<-BRCA_path_subtypes[i,"patient"]
vital_status<-BRCA_path_subtypes[i,"vital_status"]
if (vital_status=="Alive"){
times=BRCA_path_subtypes[i,"days_to_last_followup"]
status=0
}else{
times=BRCA_path_subtypes[i,"days_to_death"]
status=1
}
new_survival<-rbind(new_survival,cbind(patient,status,times))
}
return(new_survival)
}
os_time<-get_os_time(BRCA_path_subtypes)
new_clc<-os_time %>% inner_join(BRCA_path_subtypes, by="patient")
#write.csv(new_clc,paste0(data_dir,"TCGA_BRCA_BRCA_path_subtypes_05162020.csv"))
####prepare the expression data
##download the data before this process
#get tumor data
miR_data<-read.csv(paste0(data_dir,"TCGA-BRCA-miRNAs_RPKM_01202020.csv"),check.names = F,row.names = 1)
colnames(miR_data)<-unlist(lapply(colnames(miR_data), FUN = function(x) {return(strsplit(x, split = "_mapped_",fixed = T)[[1]][2])}))
miR_data_tumor<-miR_data[,grep("-01A-",colnames(miR_data))]
colnames(miR_data_tumor)<-substr(colnames(miR_data_tumor), 1, 12)
#get basal subtype
new_clc_basal<-new_clc %>% filter(BRCA_Subtype_PAM50=="Basal" & times>0)
com_id<-intersect(as.character(new_clc_basal$patient),colnames(miR_data_tumor))
pick_targets<-grep("hsa-mir-137|143",row.names(miR_data_tumor),value = T)#搜索想要的gene
miR_data_tumor_basal_pick<-miR_data_tumor[pick_targets,com_id]
##get the expression levels
#define the function
Median<-function(x){
ifelse(x>median(x),"High","Low")
}
sur_plot<-function(gene,out_dir,df){
suppressMessages(library(survminer))
suppressMessages(library(survival))
suppressMessages(library(ggplot2))
dir.create(out_dir,recursive = T)
rt<-df[,c(gene,"status","times")]
rt[,"levels"]<-rt[,1]
rt[,"times"]=as.numeric(rt[,"times"])/365
myfit2<-coxph(Surv(times,status==1) ~ levels,data=rt)
pvalue <- anova(myfit2)$Pr[2]
coxsummary<-summary(myfit2)
HR<-coxsummary$coefficients[,2][1]
filename=paste0(out_dir,"/",gene,"_p=",as.character(pvalue ),"_HR=",as.character(HR),"_Median_sur.png")
myfit1 <-survfit(Surv(times,status==1) ~ levels,data=rt)
p<-ggsurvplot(myfit1, pval = F, palette = c("#DF3D8C","#0A5EB9"),
risk.table = TRUE, risk.table.col = "strata",pval.size=5,
risk.table.height = 0.25#Useful when you have multiple groups
)
ggsave(filename,print(p),device="png",width = 6,height = 6,units = "in")
}
###prepare the sur data
miR_data_tumor_basal_pick_levels<-apply(t(miR_data_tumor_basal_pick), 2, Median)
new_clc_basal_sur<-new_clc_basal[,1:3]
row.names(new_clc_basal_sur)<-new_clc_basal_sur[,1]
sur_data<-cbind(miR_data_tumor_basal_pick_levels[com_id,],new_clc_basal_sur[com_id,])
##plot the data
out_dir=paste0(data_dir,"Median_sur")
sapply(pick_targets,sur_plot,out_dir=out_dir,df=sur_data)
网友评论