之前试了GDC下载数据,确实批量下载挺快的但是缺点也很明显
下载的数据是单个的,不是整合好的
![](https://img.haomeiwen.com/i21086530/65eaa0f3039cf2bc.png)
然后看了别人的教程决定试试R包TCGAbiolinks
参考教程
https://www.jianshu.com/p/3b4c07f7e5f3
https://www.jianshu.com/p/00f6ed2d5cff
https://cloud.tencent.com/developer/article/1424598
1.安装TCGAbiolinks
library(BiocManager)
BiocManager::install("TCGAbiolinks")
2.查看肿瘤类型
library(TCGAbiolinks)
TCGAbiolinks::getGDCprojects()$project_id
另外各肿瘤缩写参照https://www.jianshu.com/p/3c0f74e85825
3.下载临床数据
cancer_type = "TCGA-LUAD"
clincal <- GDCquery_clinic(project = cancer_type,type = "clinical")
View(clinical)#注意这个v是大写的
write.csv(clinical,file = "TCGAbiolinks_LUAD_clinical.csv")#保存
4.下载RNA-seq的counts数据
library(dplyr)
library(DT)
library(SummarizedExperiment)
data_type <- "Gene Expression Quantification"
data_category <- "Transcriptome Profiling"
workflow_type <- "HTSeq - Counts"
query_TranscriptomeCounts <- GDCquery(project = cancer_type,
data.category = data_category,
data.type = data_type,
workflow.type = workflow_type)
#官方参数设置http://www.bioconductor.org/packages/release/bioc/vignettes/TCGAbiolinks/inst/doc/query.html#useful_information
GDCdownload(query_TranscriptomeCounts,method = "api")
expdat <- GDCprepare(query = query_TranscriptomeCounts)
count_matix = assay(expdat)
View(count_matix)
write.csv(count_matix,file = "TCGAbiolinks_LUAD_counts.csv")
5.下载miRNAcounts数据
TCGAbiolinks:::getProjectSummary("TCGA-LUAD")
query_mi <- GDCquery(project = cancer_type,
data.category = "Transcriptome Profiling",
data.type = "miRNA Expression Quantification",
workflow.type = "BCGSC miRNA Profiling")
GDCdownload(query_mi,method = "api",files.per.chunk = 50)
expdat <- GDCprepare(query = query_mi)
GDCdownload(query_mi)
expdat <- GDCprepare(query = query_mi)
write.csv(expdat,file = paste(cancer_type,"miRNAs.csv",sep = "-"))
row.names(expdat) <- as.character(expdat[,1])
expdat <- expdat[,-1]
col_name<-unlist(lapply(colnames(expdat), FUN = function(x) {return(strsplit(x, split = "TCGA",fixed = T)[[1]][2])}))
col_name<-col_name[!duplicated(col_name)]
rpkm_names<-paste("reads_per_million_miRNA_mapped_TCGA",col_name,sep = "")
count_names<-paste("read_count_TCGA",col_name,sep = "")
write.csv(expdat[,rpkm_names],file = paste(cancer_type,"miRNAs_RPKM.csv",sep = "-"))
write.csv(expdat[,count_names],file = paste(cancer_type,"miRNAs_counts.csv",sep = "-"))
网友评论