美文网首页
03~用TCGAbiolinks下载数据

03~用TCGAbiolinks下载数据

作者: 归鹤鸣 | 来源:发表于2020-12-25 23:31 被阅读0次

    一、安装TCGAbiolinks packages

    if (!requireNamespace("BiocManager", quietly = TRUE))
        install.packages("BiocManager")
    BiocManager::install("TCGAbiolinks")
    

    二、选择癌症类型;

    > library(TCGAbiolinks)
    > library(dplyr)
    > library(DT)
    > library(SummarizedExperiment)
    > TCGAbiolinks::getGDCprojects()$project_id
    [1] "HCMI-CMDC"             "GENIE-MSK"             "TCGA-UCEC"             "TCGA-LGG"              "TCGA-SARC"             "TCGA-PAAD"            
     [7] "TCGA-ESCA"             "TCGA-PRAD"             "GENIE-VICC"            "TCGA-LAML"             "TCGA-KIRC"             "TCGA-PCPG"            
    [13] "TCGA-HNSC"             "GENIE-JHU"             "TCGA-OV"               "TCGA-GBM"              "TCGA-UCS"              "TCGA-MESO"            
    [19] "TCGA-TGCT"             "TCGA-KICH"             "TCGA-READ"             "TCGA-UVM"              "TCGA-THCA"             "OHSU-CNL"             
    [25] "GENIE-DFCI"            "GENIE-NKI"             "GENIE-GRCC"            "FM-AD"                 "GENIE-UHN"             "GENIE-MDA"            
    [31] "TCGA-LIHC"             "TCGA-THYM"             "TCGA-CHOL"             "TARGET-ALL-P1"         "ORGANOID-PANCREATIC"   "TCGA-DLBC"            
    [37] "TCGA-KIRP"             "TCGA-BLCA"             "CPTAC-2"               "TARGET-ALL-P3"         "TARGET-CCSK"           "TARGET-NBL"           
    [43] "TARGET-AML"            "TARGET-ALL-P2"         "NCICCR-DLBCL"          "CTSP-DLBCL1"           "TARGET-RT"             "TARGET-OS"            
    [49] "TCGA-BRCA"             "TCGA-COAD"             "TCGA-CESC"             "TCGA-LUSC"             "TCGA-STAD"             "TCGA-SKCM"            
    [55] "CMI-MBC"               "CMI-ASC"               "TCGA-LUAD"             "TARGET-WT"             "TCGA-ACC"              "BEATAML1.0-CRENOLANIB"
    [61] "BEATAML1.0-COHORT"     "VAREPOP-APOLLO"        "MMRF-COMMPASS"         "WCDT-MCRPC"            "CPTAC-3"               "CGCI-BLGSP"           
    [67] "CGCI-HTMCP-CC"       
    
    > cancer_type="TCGA-PRAD" 
    

    三、数据下载
    1)临床数据下载

    clinical<-GDCquery_clinic(project=cancer_type,type="clinical")
    write.csv(clinical,file = "TCGAbiolinks_TCGA_PRAD_clinical.csv")
    
    1. 下载RNA-seq的counts
    query <- GDCquery(project = cancer_type, 
                      data.category = "Transcriptome Profiling", 
                      data.type = "Gene Expression Quantification", 
                      workflow.type = "HTSeq - Counts")
    
    GDCdownload(query, method = "api", files.per.chunk = 100)
    expdat <- GDCprepare(query = query)
    count_matrix=assay(expdat)
    write.csv(count_matrix,file = "TCGA_PRAD_Counts.csv")
    
    1. 下载miRNA数据
    query <- GDCquery(project = cancer_type, 
                        data.category = "Transcriptome Profiling", 
                        data.type = "miRNA Expression Quantification", 
                        workflow.type = "BCGSC miRNA Profiling")
      
      GDCdownload(query, method = "api", files.per.chunk = 50)
      expdat <- GDCprepare(query = query)
      count_matrix=assay(expdat)
      write.csv(count_matrix,file = "TCGA_PRAD_miRNA.csv")
    
    1. 下载Copy Number Variation数据
    query <- GDCquery(project = cancer_type, 
                        data.category = "Copy Number Variation", 
                        data.type = "Copy Number Segment")
      
      GDCdownload(query, method = "api", files.per.chunk = 50)
      expdat <- GDCprepare(query = query)
      count_matrix=assay(expdat)
      write.csv(count_matrix,file = "TCGA_PRAD_Copy-Number-Variation.csv")
    
    1. 下载甲基化数据
    query.met <- GDCquery(project =cancer_type,
                            legacy = TRUE,
                            data.category = "DNA methylation",
                            platform = c("Illumina Human Methylation 450"))
      GDCdownload(query.met, method = "api", files.per.chunk = 300)
      expdat <- GDCprepare(query = query)
      count_matrix=assay(expdat)
      write.csv(count_matrix,file ="TCGA_PRAD_methylation.csv")
    

    相关文章

      网友评论

          本文标题:03~用TCGAbiolinks下载数据

          本文链接:https://www.haomeiwen.com/subject/gcoinktx.html