美文网首页good code
生存分析,简单粗暴!

生存分析,简单粗暴!

作者: 小洁忘了怎么分身 | 来源:发表于2020-07-02 00:07 被阅读0次

    生存分析代码的原始版本见:两种方法批量生存分析,这个是R包GDCRNATools给出的简化版。大段代码变成几个函数搞定。

    0.R包和数据准备

    if(!require(GDCRNATools))BiocManager::install("GDCRNATools")
    library(GDCRNATools)
    
    # mRNA 和miRNA的表达矩阵
    data(rnaCounts);dim(rnaCounts)
    
    ## [1] 1000   45
    
    rnaCounts[1:3,1:3]
    
    ##                 TCGA-3X-AAV9-01 TCGA-3X-AAVA-01
    ## ENSG00000003989            1520             960
    ## ENSG00000004799            7659             957
    ## ENSG00000005812            2246            1698
    ##                 TCGA-3X-AAVB-01
    ## ENSG00000003989            2177
    ## ENSG00000004799            2295
    ## ENSG00000005812            2454
    
    data(mirCounts);dim(mirCounts)
    
    ## [1] 2588   45
    
    mirCounts[1:3,1:3]
    
    ##                 TCGA-3X-AAV9-01 TCGA-3X-AAVA-01
    ## hsa-let-7a-5p            165141          132094
    ## hsa-let-7a-3p               204             169
    ## hsa-let-7a-2-3p              30              26
    ##                 TCGA-3X-AAVB-01
    ## hsa-let-7a-5p            210259
    ## hsa-let-7a-3p               298
    ## hsa-let-7a-2-3p              50
    
    #临床信息
    metaMatrix.RNA <- gdcParseMetadata(project.id = 'TCGA-CHOL',
                                       data.type  = 'RNAseq',
                                       write.meta = FALSE)
    metaMatrix.RNA <- gdcFilterDuplicate(metaMatrix.RNA)
    metaMatrix.RNA <- gdcFilterSampleType(metaMatrix.RNA) 
    metaMatrix.RNA[1:4,1:4]
    
    ##                                                             file_name
    ## TCGA-3X-AAV9-01A 725eaa94-5221-4c22-bced-0c36c10c2c3b.htseq.counts.gz
    ## TCGA-3X-AAVA-01A b6a2c03a-c8ad-41e9-8a19-8f5ac53cae9f.htseq.counts.gz
    ## TCGA-3X-AAVB-01A c2765336-c804-4fd2-b45a-e75af2a91954.htseq.counts.gz
    ## TCGA-3X-AAVC-01A 8b20cba8-9fd5-4d56-bd02-c6f4a62767e8.htseq.counts.gz
    ##                                               file_id
    ## TCGA-3X-AAV9-01A 85bc7f81-51fb-4446-b12d-8741eef4acee
    ## TCGA-3X-AAVA-01A 42b8d463-6209-4ea0-bb01-8023a1302fa0
    ## TCGA-3X-AAVB-01A 6e2031e9-df75-48df-b094-8dc6be89bf8b
    ## TCGA-3X-AAVC-01A 19e8fd21-f6c8-49b0-aa76-109eef46c2e9
    ##                       patient          sample
    ## TCGA-3X-AAV9-01A TCGA-3X-AAV9 TCGA-3X-AAV9-01
    ## TCGA-3X-AAVA-01A TCGA-3X-AAVA TCGA-3X-AAVA-01
    ## TCGA-3X-AAVB-01A TCGA-3X-AAVB TCGA-3X-AAVB-01
    ## TCGA-3X-AAVC-01A TCGA-3X-AAVC TCGA-3X-AAVC-01
    
    rnaExpr <- gdcVoomNormalization(counts = rnaCounts, filter = FALSE)
    mirExpr <- gdcVoomNormalization(counts = mirCounts, filter = FALSE)
    

    1.差异分析

    table(metaMatrix.RNA$sample_type)
    
    ## 
    ##      PrimaryTumor SolidTissueNormal 
    ##                36                 9
    
    DEGAll <- gdcDEAnalysis(counts     = rnaCounts, 
                            group      = metaMatrix.RNA$sample_type, 
                            comparison = 'PrimaryTumor-SolidTissueNormal', 
                            method     = 'limma');dim(DEGAll)
    
    ## [1] 565   8
    
    head(DEGAll)
    
    ##                  symbol          group     logFC   AveExpr
    ## ENSG00000143257   NR1I3 protein_coding -6.916825  7.023129
    ## ENSG00000205707  ETFRF1 protein_coding -2.492182  9.515997
    ## ENSG00000134532    SOX5 protein_coding -4.871118  6.228227
    ## ENSG00000141338   ABCA8 protein_coding -5.653794  7.520581
    ## ENSG00000066583   ISOC1 protein_coding -2.370131 10.466194
    ## ENSG00000164188 RANBP3L protein_coding -5.624376  4.356284
    ##                         t       PValue          FDR        B
    ## ENSG00000143257 -17.29086 4.244355e-22 2.419282e-19 40.04288
    ## ENSG00000205707 -16.06753 8.353256e-21 2.380678e-18 37.19751
    ## ENSG00000134532 -15.03589 1.168746e-19 2.220617e-17 34.49828
    ## ENSG00000141338 -14.86069 1.851519e-19 2.638414e-17 34.11581
    ## ENSG00000066583 -14.56532 4.053959e-19 4.621513e-17 33.35640
    ## ENSG00000164188 -14.22477 1.013592e-18 9.629125e-17 32.25659
    

    可以获取全部差异基因,也可以单独获取mRNA和lncRNA的差异分析结果

    deALL <- gdcDEReport(deg = DEGAll, gene.type = 'all');dim(deALL)
    
    ## [1] 283   8
    
    deLNC <- gdcDEReport(deg = DEGAll, gene.type = 'long_non_coding');dim(deLNC)
    
    ## [1] 47  8
    
    dePC <- gdcDEReport(deg = DEGAll, gene.type = 'protein_coding');dim(dePC)
    
    ## [1] 222   8
    

    2.任意两个基因的相关性图

    gdcCorPlot(gene1    = 'ENSG00000003989', 
               gene2    = 'ENSG00000004799', 
               rna.expr = rnaExpr, 
               metadata = metaMatrix.RNA)
    

    3.生存分析

    支持两种方法:CoxPH和KM,基于survival包,函数是gdcSurvivalAnalysis()

    CoxPH analysis

    ####### CoxPH analysis #######
    survOutput <- gdcSurvivalAnalysis(gene     = rownames(deALL), 
                                      method   = 'coxph', 
                                      rna.expr = rnaExpr, 
                                      metadata = metaMatrix.RNA)
    table(survOutput$pValue<0.05)
    

    KM analysis

    ####### KM analysis #######
    survOutput <- gdcSurvivalAnalysis(gene     = rownames(deALL), 
                                      method   = 'KM', 
                                      rna.expr = rnaExpr, 
                                      metadata = metaMatrix.RNA, 
                                      sep      = 'median')
    table(as.numeric(as.character(survOutput$pValue))<0.05)
    

    KM plot

    gdcKMPlot(gene     = 'ENSG00000003989',
              rna.expr = rnaExpr,
              metadata = metaMatrix.RNA,
              sep      = 'median')
    

    相关文章

      网友评论

        本文标题:生存分析,简单粗暴!

        本文链接:https://www.haomeiwen.com/subject/htgkqktx.html