(1)分析单个基因在TCGA多个类型肿瘤中的分布(正常/肿瘤)
- 通过TCGAbiolinks下载表达量数据集
- 注释将表达量矩阵转化成基于gene symbol的表达矩阵
- 绘图展示单个基因在tcga数据库泛癌中的分布
第一步,下载表达矩阵
#=======================================================
#=======================================================
library(GenomicDataCommons)
setwd('D:\\SCIwork\\F16DMDmeta\\review\\TCGA')
rm(list=ls())
library(dplyr)
library(TCGAbiolinks)
library(dplyr)
library(DT)
library(SummarizedExperiment)
library(stringr)
#=======================================================
#=======================================================
cancer <- TCGAbiolinks:::getGDCprojects()$project_id
cancer <- str_subset(cancer, "TCGA")
cancer <- sort(cancer)
for (i in 1:33) {
cancer_select <- cancer[i]
print(cancer_select)
#下载rna-seq的counts数据
suppressMessages({
query <- GDCquery(
project = cancer_select,
data.category = "Transcriptome Profiling",
data.type = "Gene Expression Quantification",
workflow.type = "HTSeq - FPKM") })
if (is.null(query)){
print(paste0("No FPKM data of solid normal tissue for ", cancer_select ))
} else{
GDCdownload(query, method = "api",
files.per.chunk = 150)
expdat <- GDCprepare(query = query, save = TRUE,
save.filename = paste0(cancer_select,".rda"))
count_matrix=assay(expdat)
write.csv(count_matrix,
file = paste( cancer_select,"Counts.csv",
sep = "-"))}}
网友评论