cox_t<-cox_train[,-c(1:9)]
cox_t<-log2(cox_t+0.1)###记得查看结果,是否有NAN,很奇快,没项独立运输没有NAN,但这一句一次行运算,很多NAN。后面风险模型评估就没法建立。
cox_train_univar<-cbind(cox_train[,c(8,6)],cox_t)
cox_train_univar[,2]<-cox_train_univar[,2]/365
colnames(cox_train_univar)[1]<-"status"
colnames(cox_train_univar)[2]<-"life_span"
###单因素cox分析
univar_out<-data.frame(matrix(NA,(ncol(cox_train_univar)-2),5))
rownames(univar_out)<-colnames(cox_train_univar)[-c(1:2)]
colnames(univar_out)<-c("Coeffcient","HR","lower.95","upper.95","P-Value")
for (i in colnames(cox_train_univar)[-c(1:2)]) {
cox<-coxph(Surv(life_span,status)~cox_train_univar[,i],data = cox_train_univar)
cox_summary<-summary(cox)
univar_out[i,1]<-cox_summary$coefficients[,1]
univar_out[i,2]<-cox_summary$coefficients[,2]
univar_out[i,3]<-cox_summary$conf.int[,3]
univar_out[i,4]<-cox_summary$conf.int[,4]
univar_out[i,5]<-cox_summary$coefficients[,5]
}
数据格式(行名是所有的肿瘤样本名)
status life_span gene gene gene ......
0 365 ... ... .....
1 235 ..... .... .....
##通过随机森林筛选关键数个基因
uni001<-univar_out[univar_out$`P-Value`<0.01,]
uni005<-univar_out[univar_out$`P-Value`<0.05,]
bbb=colnames(cox_train_univar)[-c(1,2)]%in%rownames(uni005)###记得删除【-c(1,2)】,否则后面的取值全部错两列
cox_train_multi<-cbind(cox_train_univar[,c(1,2)],cox_train_univar[,bbb])
diseaserf<-rfsrc(Surv(life_span,status)~.,data = cox_train_multi,ntree=500,tree.err = T,importance = T)
outrf<-var.select(object = diseaserf,vdv,method = "vh",nrep = 200)##这一步多跑几次,每次结果不一样,而且30可以换更大值
outrf$topvars
###多因素COX分析 找出是否有统计学意义
sig4genename<-c("ENSG00000275149","ENSG00000245526","ENSG00000230838","ENSG00000237423")#我只找出四个
cox_train_multi_sig<-cbind(cox_train_univar[,c(1,2)],cox_train_univar[,sig5genename])
cox_4<-coxph(Surv(life_span,status)~.,data = cox_train_multi_sig)##建立模型,其实如果有意义的基因不多的话,可以省去随机森林这一步
cox_4<-step(cox_4,direction = "both")
save(cox_4,file = "cox_4.Rdata")
riskscore_train<-predict(cox_4,type = "risk",newdata = cox_train_multi)#通过模型,给每个病例重新计算分值
risktrain<-as.vector(ifelse(riskscore_train>median(riskscore_train),"high","low"))##记着下次测试组时也用median(riskscore_train)
rownames(cox_train_multi_sig)<-cox_train$clinical.submitter_id
risk_traindata<-cbind(id=rownames(cox_train_multi_sig),cox_train_multi_sig[,c(1,2)],riskscore_train,risktrain)
网友评论