美文网首页R优质资源
【搬砖】构建预后signature 实践

【搬砖】构建预后signature 实践

作者: yadandb | 来源:发表于2021-02-26 20:32 被阅读0次

    Cox 回归模型

    写在最开头:诚心推荐解螺旋麦子的Cox回归模型R教学视频!!!如果你对cox还稀里糊涂的,下面的链接,点!15分钟就能GET超级舒爽的代码~
    【Cox回归的R操作:从单因素到多因素一气呵成https://www.bilibili.com/video/av18918951/

    最后的效果图

    根据麦子老师的课,我整理的代码如下↓

    library(survival)
    library(plyr)
    library(xlsx)
    
    data(lung)
    ### 1 COX 风险模型
    ## 1.1 转换数据类型(基线表要改成数值型)
    Baseline = lung
    ## 1.2 单因素Cox回归,用sex
    BaSurv <- Surv(time=Baseline$time,event=Baseline$status)
    Baseline$BaSurv <- with(Baseline,BaSurv)  # with函数的用法
    SexCox <- coxph(BaSurv~sex,data=Baseline) # ties='breslow' 那么结果与SPSS一样
    SexSum <- summary(SexCox)
    HR <- round(SexSum$coefficients[,2],2)
    PValue <- round(SexSum$coefficients[,5],3)
    CI <- paste0(round(SexSum$conf.int[,3:4],2),collapse="-")
    Unicox <- data.frame("Characteristics" = "Sex",
                        "Hazard ratio" = HR,
                        "CI95" = CI,
                        "p-Value" = PValue)
    
    # 多个单因素回归,自定义函数,使用lapply(向量化处理)
    UniCox <- function(x){
     FML <- as.formula(paste0("BaSurv~",x))
     SexCox <- coxph(FML,data=Baseline)
     SexSum <- summary(SexCox)
     HR <- round(SexSum$coefficients[,2],2)
     PValue <- round(SexSum$coefficients[,5],3)
     CI <- paste0(round(SexSum$conf.int[,3:4],2),collapse="-")
     Unicox <- data.frame("Characteristics" = x,
                          "Hazard ratio" = HR,
                          "CI95" = CI,
                          "p-Value" = PValue)
     return(Unicox)
    }
    UniCox("ph.ecog")
    ValNames <- colnames(Baseline)[4:10]
    UniVar <- lapply(ValNames,UniCox) #前面填名字,后面填函数
    UniVar <- ldply(UniVar)  #把list转换为data.frame
    UniVar$Characteristics[UniVar$p.Value < 0.05]
    
    ## 1.3 多因素Cox回归分析
    fml=as.formula(paste0("BaSurv~",paste0(UniVar$Characteristics[UniVar$p.Value < 0.05],collapse = "+")))
    MultiCox <- coxph(fml,data=Baseline)
    MultiSum <- summary(MultiCox)
    MultiSum$coefficients
    MultiSum$conf.int
    MHR <- round(MultiSum$coefficients[,2],2)
    MPValue <- round(MultiSum$coefficients[,5],3)
    MCIL <- round(MultiSum$conf.int[,3],2)
    MCIH <- round(MultiSum$conf.int[,4],2)
    MCI <- paste0(MCIL,'-',MCIH)
    MultiName=as.character(UniVar$Characteristics[UniVar$p.Value < 0.05])
    Multicox <- data.frame("Characteristics" = MultiName,
                        "Hazard ratio" = MHR,
                        "CI95" = MCI,
                        "p-Value" = MPValue)
    ## 1.4 整合表格
    Final <- merge.data.frame(UniVar,Multicox,by="Characteristics",all=T,sort = T)
    write.xlsx(Final,'CoxOnline.xlsx',col.names = T,row.names = F,showNA = F)
    
    
    Final。输出到excel就可以制作paper上那种表格啦

    回到正题:如何构建预后signature?

    参考文章:doi: 10.3389/fonc.2019.00078


    doi: 10.3389/fonc.2019.00078

    统计学中,多重检验,两两检验的p值需要进行Bonferroni校正。结合这篇文章(https://www.jianshu.com/p/1aeeac34ce51)理解下容错率FDR。

    使用library(My.stepwise)

    ### 2 MUV Cox,逐步回归 stepwise forward
    newBaseline <- Baseline
    newBaseline$BoneRe.status <- ifelse(newBaseline$BoneRe.status==2,1,0)
    Varlist <- colnames(newBaseline[9:26])
    My.stepwise.coxph(Time = "time.m", Status = "BoneRe.status", variable.list = Varlist) # 结果只能直接输出
    
    结果

    得到含9个gene的signature。
    接下来进行可视化。类似下图。


    doi: 10.7150/ijbs.45050

    (a) risk score 分布

    # 散点图:Risk score 分布
    ggplot(data=plot.data)+
      geom_point(mapping = aes(x=id,y=data),
                 color=ifelse(plot.data$group==0,"blue","red"),
                 show.legend = F)+
      labs(x="Patiens",y="Risk Score")
    
    a

    (b) PCA

    library("factoextra") 
    dat_pca <- t(GSE2034_log) 
    DatPCA <- prcomp(dat_pca)
    fviz_pca_ind(DatPCA, label="none",habillage=group_risk$group,
                 addEllipses=TRUE, ellipse.level=0.95,
                 palette = c("red","blue"))
    
    b

    (c)t-SNE

    library(Rtsne)
    tsne_out <- Rtsne(
      dat_pca,
      dims = 2, #降维之后的维度,默认值为2
      pca = FALSE, #是否对原始数据进行PCA分析,再用PCA得到的topN主成分进行后续分析
      perplexity = 60, #参数的取值必须小于(nrow(data) - 1 )/ 3
      theta = 0.0,
      max_iter = 1000 # 最大迭代次数
    )
    tsne_res <- as.data.frame(tsne_out$Y)
    colnames(tsne_res) <- c("tSNE1","tSNE2")
    head(tsne_res)
    Group=group_risk$group
    ggplot(tsne_res,aes(tSNE1,tSNE2,color=Group)) + 
      geom_point() + theme_bw() + 
      geom_hline(yintercept = 0,lty=2,col="black") + 
      geom_vline(xintercept = 0,lty=2,col="black") + #lwd
      theme(plot.title = element_text(hjust = 0.5)) + 
      labs(title = "tSNE plot")
    
    c

    (d)略。
    (e)生存曲线

    Baseline$RiskScore = RiskScore
    Baseline$Group = ifelse(Baseline$RiskScore>= median(Baseline$RiskScore),
                            "high-risk","low-risk")
    ReSurv = Surv(time = Baseline$time.m, event = Baseline$relapse.status)
    fit <- survfit(ReSurv ~ Group, data = Baseline) 
    ggsurvplot(fit, # 创建的拟合对象
               #surv.median.line = "hv",  # 增加中位生存时间
               #conf.int = TRUE, # 显示置信区间
               pval = TRUE, # 添加P值
               # add.all = TRUE,# 添加总患者生存曲线
               # risk.table = TRUE)
    
    e

    (f)ROC

    library(pROC)
    BoneRe.risk = roc(response = Baseline$BoneRe.status, 
                      predictor = Baseline$RiskScore,ci=T,auc=T)
    ggroc(BoneRe.risk,col="red")+
      theme_bw()+
      geom_abline(intercept=1,slope=1)+
      labs(x="Specificity",y="Sensitivity",title = "ROC")+
      annotate("text", x= 0.75, y= 0.75,label="AUC = 0.5726")
    
    f

    综上,我的signature 表现不是很好。。。 :)

    相关文章

      网友评论

        本文标题:【搬砖】构建预后signature 实践

        本文链接:https://www.haomeiwen.com/subject/qvcmfltx.html