一、安装TCGAbiolinks packages
if (!requireNamespace("BiocManager", quietly = TRUE))
install.packages("BiocManager")
BiocManager::install("TCGAbiolinks")
二、选择癌症类型;
> library(TCGAbiolinks)
> library(dplyr)
> library(DT)
> library(SummarizedExperiment)
> TCGAbiolinks::getGDCprojects()$project_id
[1] "HCMI-CMDC" "GENIE-MSK" "TCGA-UCEC" "TCGA-LGG" "TCGA-SARC" "TCGA-PAAD"
[7] "TCGA-ESCA" "TCGA-PRAD" "GENIE-VICC" "TCGA-LAML" "TCGA-KIRC" "TCGA-PCPG"
[13] "TCGA-HNSC" "GENIE-JHU" "TCGA-OV" "TCGA-GBM" "TCGA-UCS" "TCGA-MESO"
[19] "TCGA-TGCT" "TCGA-KICH" "TCGA-READ" "TCGA-UVM" "TCGA-THCA" "OHSU-CNL"
[25] "GENIE-DFCI" "GENIE-NKI" "GENIE-GRCC" "FM-AD" "GENIE-UHN" "GENIE-MDA"
[31] "TCGA-LIHC" "TCGA-THYM" "TCGA-CHOL" "TARGET-ALL-P1" "ORGANOID-PANCREATIC" "TCGA-DLBC"
[37] "TCGA-KIRP" "TCGA-BLCA" "CPTAC-2" "TARGET-ALL-P3" "TARGET-CCSK" "TARGET-NBL"
[43] "TARGET-AML" "TARGET-ALL-P2" "NCICCR-DLBCL" "CTSP-DLBCL1" "TARGET-RT" "TARGET-OS"
[49] "TCGA-BRCA" "TCGA-COAD" "TCGA-CESC" "TCGA-LUSC" "TCGA-STAD" "TCGA-SKCM"
[55] "CMI-MBC" "CMI-ASC" "TCGA-LUAD" "TARGET-WT" "TCGA-ACC" "BEATAML1.0-CRENOLANIB"
[61] "BEATAML1.0-COHORT" "VAREPOP-APOLLO" "MMRF-COMMPASS" "WCDT-MCRPC" "CPTAC-3" "CGCI-BLGSP"
[67] "CGCI-HTMCP-CC"
> cancer_type="TCGA-PRAD"
三、数据下载
1)临床数据下载
clinical<-GDCquery_clinic(project=cancer_type,type="clinical")
write.csv(clinical,file = "TCGAbiolinks_TCGA_PRAD_clinical.csv")
- 下载RNA-seq的counts
query <- GDCquery(project = cancer_type,
data.category = "Transcriptome Profiling",
data.type = "Gene Expression Quantification",
workflow.type = "HTSeq - Counts")
GDCdownload(query, method = "api", files.per.chunk = 100)
expdat <- GDCprepare(query = query)
count_matrix=assay(expdat)
write.csv(count_matrix,file = "TCGA_PRAD_Counts.csv")
- 下载miRNA数据
query <- GDCquery(project = cancer_type,
data.category = "Transcriptome Profiling",
data.type = "miRNA Expression Quantification",
workflow.type = "BCGSC miRNA Profiling")
GDCdownload(query, method = "api", files.per.chunk = 50)
expdat <- GDCprepare(query = query)
count_matrix=assay(expdat)
write.csv(count_matrix,file = "TCGA_PRAD_miRNA.csv")
- 下载Copy Number Variation数据
query <- GDCquery(project = cancer_type,
data.category = "Copy Number Variation",
data.type = "Copy Number Segment")
GDCdownload(query, method = "api", files.per.chunk = 50)
expdat <- GDCprepare(query = query)
count_matrix=assay(expdat)
write.csv(count_matrix,file = "TCGA_PRAD_Copy-Number-Variation.csv")
- 下载甲基化数据
query.met <- GDCquery(project =cancer_type,
legacy = TRUE,
data.category = "DNA methylation",
platform = c("Illumina Human Methylation 450"))
GDCdownload(query.met, method = "api", files.per.chunk = 300)
expdat <- GDCprepare(query = query)
count_matrix=assay(expdat)
write.csv(count_matrix,file ="TCGA_PRAD_methylation.csv")
网友评论