在分析TCGA数据时,不可避免的需要对亚组的突变情况进行展示,这个时候oncoprint瀑布图便成为了首选,下面就来共同学习下如何提取亚组的突变信息吧。
1.突变及临床数据下载
#rm(list = ls())
#BiocManager::install("TCGAbiolinks",ask = F,update = F)
library(TCGAbiolinks)
library(maftools)
clinical <- GDCquery(project="TCGA-PRAD",
data.category = "Clinical",
file.type = "xml")
GDCdownload(clinical)
cliquery <- GDCprepare_clinic(clinical,clinical.info = "patient")
colnames(cliquery)[1] <- "Tumor_Sample_Barcode"
mut <- GDCquery_Maf(tumor = "PRAD",pipelines = "mutect2") #下载突变数据
缩短样本名称
mut$Tumor_Sample_Barcode[1]
## [1] "TCGA-G9-6353-01A-11D-1961-08"
2.提取亚组突变信息
mut$Tumor_Sample_Barcode <- str_sub(mut$Tumor_Sample_Barcode,1,12)
rt <- data.table::fread("tcga_group.txt",data.table = F)
mut.high <- mut[(mut$Tumor_Sample_Barcode %in% rt$id[rt$risk=="high"]),]
mut.low <- mut[(mut$Tumor_Sample_Barcode %in% rt$id[rt$risk=="low"]),]
3.瀑布图
library(maftools)
maf.high <- read.maf(maf=mut.high,clinicalData=cliquery,isTCGA=T)
## -Validating
## --Removed 1 duplicated variants
## -Silent variants: 7135
## -Summarizing
## --Possible FLAGS among top ten genes:
## TTN
## MUC16
## SYNE1
## -Processing clinical data
## -Finished in 2.280s elapsed (2.220s cpu)
maf.low <- read.maf(maf = mut.low,clinicalData = cliquery,isTCGA = T)
## -Validating
## -Silent variants: 2481
## -Summarizing
## --Possible FLAGS among top ten genes:
## TTN
## MUC16
## SYNE1
## -Processing clinical data
## -Finished in 0.640s elapsed (0.560s cpu)
#突变
col = RColorBrewer::brewer.pal(n = 10, name = 'Paired')
names(col) = c('Frame_Shift_Del','Missense_Mutation', 'Nonsense_Mutation', 'Frame_Shift_Ins','In_Frame_Ins', 'Splice_Site', 'In_Frame_Del','Nonstop_Mutation','Translation_Start_Site','Multi_Hit')
#人种
racecolors = RColorBrewer::brewer.pal(n = 4,name = 'Spectral')
names(racecolors) = c("ASIAN", "WHITE", "BLACK_OR_AFRICAN_AMERICAN", "AMERICAN_INDIAN_OR_ALASKA_NATIVE")
oncoplot(maf = maf.high,
colors = col,#给突变配色
top = 10)
oncoplot(maf = maf.low,
colors = col,#给突变配色
top = 10)
参考链接:
Summarize, Analyze and Visualize MAF Files
网友评论