美文网首页
2020-01-08单因素COX分析和建立模型

2020-01-08单因素COX分析和建立模型

作者: 海阔天空周 | 来源:发表于2020-01-08 21:54 被阅读0次

    cox_t<-cox_train[,-c(1:9)]

    cox_t<-log2(cox_t+0.1)###记得查看结果,是否有NAN,很奇快,没项独立运输没有NAN,但这一句一次行运算,很多NAN。后面风险模型评估就没法建立。

    cox_train_univar<-cbind(cox_train[,c(8,6)],cox_t)

    cox_train_univar[,2]<-cox_train_univar[,2]/365

    colnames(cox_train_univar)[1]<-"status"

    colnames(cox_train_univar)[2]<-"life_span"

    ###单因素cox分析

    univar_out<-data.frame(matrix(NA,(ncol(cox_train_univar)-2),5))

    rownames(univar_out)<-colnames(cox_train_univar)[-c(1:2)]

    colnames(univar_out)<-c("Coeffcient","HR","lower.95","upper.95","P-Value")

    for (i in colnames(cox_train_univar)[-c(1:2)]) {

      cox<-coxph(Surv(life_span,status)~cox_train_univar[,i],data = cox_train_univar)

      cox_summary<-summary(cox)

      univar_out[i,1]<-cox_summary$coefficients[,1]

      univar_out[i,2]<-cox_summary$coefficients[,2]

      univar_out[i,3]<-cox_summary$conf.int[,3]

      univar_out[i,4]<-cox_summary$conf.int[,4]

      univar_out[i,5]<-cox_summary$coefficients[,5]

      }

    数据格式(行名是所有的肿瘤样本名)

       status    life_span  gene   gene   gene ......

         0             365       ...         ...       .....

       1              235     .....          ....        .....

    ##通过随机森林筛选关键数个基因

    uni001<-univar_out[univar_out$`P-Value`<0.01,]

    uni005<-univar_out[univar_out$`P-Value`<0.05,]

    bbb=colnames(cox_train_univar)[-c(1,2)]%in%rownames(uni005)###记得删除【-c(1,2)】,否则后面的取值全部错两列

    cox_train_multi<-cbind(cox_train_univar[,c(1,2)],cox_train_univar[,bbb])

    diseaserf<-rfsrc(Surv(life_span,status)~.,data = cox_train_multi,ntree=500,tree.err = T,importance = T)

    outrf<-var.select(object = diseaserf,vdv,method = "vh",nrep = 200)##这一步多跑几次,每次结果不一样,而且30可以换更大值

    outrf$topvars

    ###多因素COX分析 找出是否有统计学意义

    sig4genename<-c("ENSG00000275149","ENSG00000245526","ENSG00000230838","ENSG00000237423")#我只找出四个

    cox_train_multi_sig<-cbind(cox_train_univar[,c(1,2)],cox_train_univar[,sig5genename])

    cox_4<-coxph(Surv(life_span,status)~.,data = cox_train_multi_sig)##建立模型,其实如果有意义的基因不多的话,可以省去随机森林这一步

    cox_4<-step(cox_4,direction = "both")

    save(cox_4,file = "cox_4.Rdata")

    riskscore_train<-predict(cox_4,type = "risk",newdata = cox_train_multi)#通过模型,给每个病例重新计算分值

    risktrain<-as.vector(ifelse(riskscore_train>median(riskscore_train),"high","low"))##记着下次测试组时也用median(riskscore_train)

    rownames(cox_train_multi_sig)<-cox_train$clinical.submitter_id

    risk_traindata<-cbind(id=rownames(cox_train_multi_sig),cox_train_multi_sig[,c(1,2)],riskscore_train,risktrain)

    相关文章

      网友评论

          本文标题:2020-01-08单因素COX分析和建立模型

          本文链接:https://www.haomeiwen.com/subject/yrheactx.html