美文网首页TCGA数据流程ggplot集锦
2022新版TCGA批量下载表达矩阵及临床信息

2022新版TCGA批量下载表达矩阵及临床信息

作者: 科研小徐 | 来源:发表于2022-04-23 14:04 被阅读0次
    # BiocManager::install("BioinformaticsFMRP/TCGAbiolinksGUI.data")
    # BiocManager::install("BioinformaticsFMRP/TCGAbiolinks")
    gdcdata=function(i){
    library(TCGAbiolinks)
    projects <- getGDCprojects()
    library(dplyr)
    projects <- projects %>% 
      as.data.frame() %>% 
      select(project_id,tumor) %>% 
      filter(grepl(pattern="TCGA",project_id))
      ## 0.运行信息
      print(paste0("Downloading number ",i,",project name: ",projects$project_id[i]))
      ## 1.查询信息
      query.exp = GDCquery(project = projects$project_id[i], 
                           data.category = "Transcriptome Profiling",
                           data.type = "Gene Expression Quantification",
                           workflow.type = "STAR - Counts")
      ## 2.正式下载
      GDCdownload(query.exp)
      ## 3.多个数据合并
      pre.exp = GDCprepare(query = query.exp)
      ## 4.提取表达量数据
      library(SummarizedExperiment)
      countsdata = SummarizedExperiment::assay(pre.exp,1)
      fpkmdata=SummarizedExperiment::assay(pre.exp,5)
      tpmdata=SummarizedExperiment::assay(pre.exp,4)
      gene_id=data.frame(id=rowData(pre.exp)@listData[["gene_id"]], gene_name= rowData(pre.exp)@listData[["gene_name"]],gene_type=rowData(pre.exp)@listData[["gene_type"]])
      counts=cbind(gene_id,countsdata)
      fpkm=cbind(gene_id,fpkmdata)
      tpm=cbind(gene_id,tpmdata)
      #临床信息
      clinical <- GDCquery_clinic(project = projects$project_id[i], type = "clinical")
      ## 5.保存数据
      filename1 = paste0("result/",projects$project_id[i],"-counts.txt")
      filename2 = paste0("result/",projects$project_id[i],"-fpkm.txt")
      filename3 = paste0("result/",projects$project_id[i],"-tpm.txt")
      filename4 = paste0("result/",projects$project_id[i],"-clinical.txt")
      write.table(counts,filename1,sep="\t",col.names=T,row.names=F,quote=F) 
      write.table(fpkm,filename2,sep="\t",col.names=T,row.names=F,quote=F) 
      write.table(tpm,filename3,sep="\t",col.names=T,row.names=F,quote=F) 
      write.table(clinical,filename4,sep="\t",col.names=T,row.names=F,quote=F) 
    }
    dir.create("result")
    for (i in 1:33) {
      gdcdata(i)
    }
    
    
    

    相关文章

      网友评论

        本文标题:2022新版TCGA批量下载表达矩阵及临床信息

        本文链接:https://www.haomeiwen.com/subject/lmjaertx.html