美文网首页TCGA
TCGA预后基因筛选(单因素和多因素cox分析)

TCGA预后基因筛选(单因素和多因素cox分析)

作者: 萍智医信 | 来源:发表于2021-08-10 13:59 被阅读0次

    ①单因素cox分析

    输入文件.png
    library(survival)      #引用包
    pFilter=0.05           #显著性过滤条件
    setwd("E:\\research")     #设置工作目录
    rt=read.table("pairTime.txt", header=T, sep="\t", check.names=F, row.names=1)     #读取输入文件
    rt$futime=rt$futime/365      #生存单位改成年
    
    #单因素过滤条件
    outTab=data.frame()
    sigGenes=c("futime","fustat")
    for(gene in colnames(rt[,3:ncol(rt)])){
        cox=coxph(Surv(futime, fustat) ~ rt[,gene], data = rt)
        coxSummary = summary(cox)
        coxP=coxSummary$coefficients[,"Pr(>|z|)"]
            
        if(coxP<pFilter){
            sigGenes=c(sigGenes,gene)
            outTab=rbind(outTab,
                       cbind(gene=gene,
                       HR=coxSummary$conf.int[,"exp(coef)"],
                       HR.95L=coxSummary$conf.int[,"lower .95"],
                       HR.95H=coxSummary$conf.int[,"upper .95"],
                       pvalue=coxP) )
        }
    }
    
    #输出单因素结果
    write.table(outTab,file="uniCox.txt",sep="\t",row.names=F,quote=F)
    surSigExp=rt[,sigGenes]
    surSigExp=cbind(id=row.names(surSigExp),surSigExp)
    write.table(surSigExp,file="uniSigExp.txt",sep="\t",row.names=F,quote=F)
    

    单因素cox分析的输出文件,也为多因素cox分析的输入文件


    uniCox.txt.png
    uniSigExp.png

    ②多因素cox分析

    #引用包
    library(survival)
    library(survminer)
    library(glmnet)
    setwd("E:\\Master research")         #设置工作目录
    rt=read.table("uniSigExp.txt", header=T, sep="\t", check.names=F, row.names=1)     #读取输入文件
    
    ###lasso筛选免疫lncRNA对
    #rt$futime[rt$futime<=0]=0.003
    x=as.matrix(rt[,c(3:ncol(rt))])
    y=data.matrix(Surv(rt$futime,rt$fustat))
    fit <- glmnet(x, y, family = "cox", maxit = 1000)
    pdf("lasso.lambda.pdf")
    plot(fit, xvar = "lambda", label = TRUE)
    dev.off()
    cvfit <- cv.glmnet(x, y, family="cox", maxit = 1000)
    pdf("lasso.cvfit.pdf")
    plot(cvfit)
    abline(v=log(c(cvfit$lambda.min,cvfit$lambda.1se)),lty="dashed")
    dev.off()
    coef <- coef(fit, s=cvfit$lambda.min)
    index <- which(coef != 0)
    actCoef <- coef[index]
    lassoGene=row.names(coef)[index]
    lassoGene=c("futime","fustat",lassoGene)
    lassoSigExp=rt[,lassoGene]
    lassoSigExp=cbind(id=row.names(lassoSigExp),lassoSigExp)
    write.table(lassoSigExp,file="lasso.SigExp.txt",sep="\t",row.names=F,quote=F)
    
    #COX模型构建
    rt=read.table("lasso.SigExp.txt",header=T,sep="\t",check.names=F,row.names=1)
    multiCox=coxph(Surv(futime, fustat) ~ ., data = rt)
    multiCox=step(multiCox, direction="both")
    multiCoxSum=summary(multiCox)
    
    #输出模型参数
    outTab=data.frame()
    outTab=cbind(
                 coef=multiCoxSum$coefficients[,"coef"],
                 HR=multiCoxSum$conf.int[,"exp(coef)"],
                 HR.95L=multiCoxSum$conf.int[,"lower .95"],
                 HR.95H=multiCoxSum$conf.int[,"upper .95"],
                 pvalue=multiCoxSum$coefficients[,"Pr(>|z|)"])
    outTab=cbind(id=row.names(outTab),outTab)
    outTab=gsub("`","",outTab)
    write.table(outTab,file="multi.Cox.txt",sep="\t",row.names=F,quote=F)
    
    #输出病人风险值
    riskScore=predict(multiCox, type="risk", newdata=rt)
    coxGene=rownames(multiCoxSum$coefficients)
    coxGene=gsub("`", "", coxGene)
    outCol=c("futime", "fustat", coxGene)
    riskOut=cbind(rt[,outCol], riskScore)
    riskOut=cbind(id=rownames(riskOut), riskOut)
    write.table(riskOut, file="riskScore.txt", sep="\t", quote=F, row.names=F)
    
    
    ############绘制森林图函数############
    bioForest=function(coxFile=null, forestFile=null, forestCol=null){
        #读取输入文件
        rt <- read.table(coxFile,header=T,sep="\t",row.names=1,check.names=F)
        gene <- rownames(rt)
        hr <- sprintf("%.3f",rt$"HR")
        hrLow  <- sprintf("%.3f",rt$"HR.95L")
        hrHigh <- sprintf("%.3f",rt$"HR.95H")
        Hazard.ratio <- paste0(hr,"(",hrLow,"-",hrHigh,")")
        pVal <- ifelse(rt$pvalue<0.001, "<0.001", sprintf("%.3f", rt$pvalue))
            
        #输出图形
        pdf(file=forestFile, width=9, height=7)
        n <- nrow(rt)
        nRow <- n+1
        ylim <- c(1,nRow)
        layout(matrix(c(1,2),nc=2),width=c(3,2.5))
            
        #绘制森林图左边的临床信息
        xlim = c(0,3)
        par(mar=c(4,2.5,2,1))
        plot(1,xlim=xlim,ylim=ylim,type="n",axes=F,xlab="",ylab="")
        text.cex=0.8
        text(0,n:1,gene,adj=0,cex=text.cex)
        text(2.08-0.5*0.2,n:1,pVal,adj=1,cex=text.cex);text(2.08-0.5*0.2,n+1,'pvalue',cex=text.cex,font=2,adj=1)
        text(3.12,n:1,Hazard.ratio,adj=1,cex=text.cex);text(3.12,n+1,'Hazard ratio',cex=text.cex,font=2,adj=1,)
            
        #绘制森林图
        par(mar=c(4,1,2,1),mgp=c(2,0.5,0))
        xlim = c(0,max(as.numeric(hrLow),as.numeric(hrHigh)))
        plot(1,xlim=xlim,ylim=ylim,type="n",axes=F,ylab="",xaxs="i",xlab="Hazard ratio")
        arrows(as.numeric(hrLow),n:1,as.numeric(hrHigh),n:1,angle=90,code=3,length=0.05,col="darkblue",lwd=2.5)
        abline(v=1,col="black",lty=2,lwd=2)
        boxcolor = ifelse(as.numeric(hr) > 1, forestCol[1], forestCol[2])
        points(as.numeric(hr), n:1, pch = 15, col = boxcolor, cex=1.6)
        axis(1)
        dev.off()
    }
    #绘制模型森林图
    bioForest(coxFile="multi.Cox.txt", forestFile="model.multiForest.pdf", forestCol=c("red","green"))
    
    #绘制模型中基因单因素森林图
    uniRT=read.table("uniCox.txt",header=T,sep="\t",row.names=1,check.names=F)
    uniRT=uniRT[coxGene,]
    uniRT=cbind(id=row.names(uniRT), uniRT)
    write.table(uniRT, file="unicox.forest.txt", sep="\t", row.names=F, quote=F)
    bioForest(coxFile="unicox.forest.txt", forestFile="model.uniForest.pdf", forestCol=c("red","green"))
    

    相关文章

      网友评论

        本文标题:TCGA预后基因筛选(单因素和多因素cox分析)

        本文链接:https://www.haomeiwen.com/subject/xnhtbltx.html