28. 瀑布图绘制
清除当前环境中的变量
rm(list=ls())
设置工作目录
setwd("C:/Users/Dell/Desktop/R_Plots/28waterfall/")
使用waterfalls包绘制瀑布图
# 安装并加载所需的R包
#install.packages("waterfalls")
library(waterfalls)
# 构建示例数据
data <- data.frame(category = letters[1:5],
value = c(100, -20, 10, 20, 110))
head(data)
## category value
## 1 a 100
## 2 b -20
## 3 c 10
## 4 d 20
## 5 e 110
# 使用waterfall函数绘制瀑布图
waterfall(.data = data,
fill_colours = colorRampPalette(c("#1b7cd6", "#d5e6f2"))(5),
fill_by_sign = FALSE)
image.png
使用maftools包绘制瀑布图
# 安装并加载所需的R包
#BiocManager::install("maftools")
library(maftools)
# 查看示例数据
#path to TCGA LAML MAF file
# maf格式的基因突变信息
laml.maf = system.file('extdata', 'tcga_laml.maf.gz', package = 'maftools')
#clinical information containing survival information and histology. This is optional
# 临床表型注释信息
laml.clin = system.file('extdata', 'tcga_laml_annot.tsv', package = 'maftools')
# 使用read.maf函数读取数据
laml = read.maf(maf = laml.maf, clinicalData = laml.clin)
## -Reading
## -Validating
## -Silent variants: 475
## -Summarizing
## -Processing clinical data
## -Finished in 0.380s elapsed (0.320s cpu)
#Typing laml shows basic summary of MAF file.
# 查看maf对象
laml
## An object of class MAF
## ID summary Mean Median
## 1: NCBI_Build 37 NA NA
## 2: Center genome.wustl.edu NA NA
## 3: Samples 193 NA NA
## 4: nGenes 1241 NA NA
## 5: Frame_Shift_Del 52 0.271 0
## 6: Frame_Shift_Ins 91 0.474 0
## 7: In_Frame_Del 10 0.052 0
## 8: In_Frame_Ins 42 0.219 0
## 9: Missense_Mutation 1342 6.990 7
## 10: Nonsense_Mutation 103 0.536 0
## 11: Splice_Site 92 0.479 0
## 12: total 1732 9.021 9
#Shows sample summry
# 获取maf对象汇总信息
getSampleSummary(laml)
## Tumor_Sample_Barcode Frame_Shift_Del Frame_Shift_Ins In_Frame_Del
## 1: TCGA-AB-3009 0 5 0
## 2: TCGA-AB-2807 1 0 1
## 3: TCGA-AB-2959 0 0 0
## 4: TCGA-AB-3002 0 0 0
## 5: TCGA-AB-2849 0 1 0
## ---
## 188: TCGA-AB-2933 0 0 0
## 189: TCGA-AB-2942 0 0 0
## 190: TCGA-AB-2946 0 0 0
## 191: TCGA-AB-2954 0 0 0
## 192: TCGA-AB-2982 0 0 0
## In_Frame_Ins Missense_Mutation Nonsense_Mutation Splice_Site total
## 1: 1 25 2 1 34
## 2: 0 16 3 4 25
## 3: 0 22 0 1 23
## 4: 0 15 1 5 21
## 5: 0 16 1 2 20
## ---
## 188: 0 1 0 0 1
## 189: 1 0 0 0 1
## 190: 0 1 0 0 1
## 191: 0 1 0 0 1
## 192: 0 1 0 0 1
#Shows all fields in MAF
getFields(laml)
## [1] "Hugo_Symbol" "Entrez_Gene_Id"
## [3] "Center" "NCBI_Build"
## [5] "Chromosome" "Start_Position"
## [7] "End_Position" "Strand"
## [9] "Variant_Classification" "Variant_Type"
## [11] "Reference_Allele" "Tumor_Seq_Allele1"
## [13] "Tumor_Seq_Allele2" "Tumor_Sample_Barcode"
## [15] "Protein_Change" "i_TumorVAF_WU"
## [17] "i_transcript_name"
# 使用plotmafSummary函数可视化maf对象汇总信息
plotmafSummary(maf = laml,
rmOutlier = TRUE,
addStat = 'median',
dashboard = TRUE,
titvRaw = FALSE)
image.png
# 使用oncoplot函数绘制基因突变瀑布图
#oncoplot for top ten mutated genes.
# 展示top10变异基因的信息
oncoplot(maf = laml, top = 10)
image.png
# 自定义变异类型的颜色
library(RColorBrewer)
vc_cols <- brewer.pal(8,"Set1")
names(vc_cols) <- levels(laml@data$Variant_Classification)
head(vc_cols)
## Frame_Shift_Del Frame_Shift_Ins In_Frame_Del In_Frame_Ins
## "#E41A1C" "#377EB8" "#4DAF4A" "#984EA3"
## Missense_Mutation Nonsense_Mutation
## "#FF7F00" "#FFFF33"
oncoplot(maf = laml, top = 20,colors = vc_cols)
image.png
# 添加临床注释信息,按注释类型进行排序
names(laml@clinical.data)
## [1] "Tumor_Sample_Barcode" "FAB_classification"
## [3] "days_to_last_followup" "Overall_Survival_Status"
oncoplot(maf = laml, top = 20,
clinicalFeatures = "FAB_classification",
sortByAnnotation = T)
image.png
# 展示多个临床注释信息
oncoplot(maf = laml, top = 20,
clinicalFeatures = c("FAB_classification","Overall_Survival_Status"),
sortByAnnotation = T)
image.png
使用GenVisR包绘制瀑布图
# 安装并加载所需的R包
#BiocManager::install("GenVisR")
library(GenVisR)
# 查看内置示例数据
head(brcaMAF)
## Hugo_Symbol Entrez_Gene_Id Center NCBI_Build Chromosome
## 1 A2ML1 144568 genome.wustl.edu 37 12
## 2 AADAC 13 genome.wustl.edu 37 3
## 3 AADAT 51166 genome.wustl.edu 37 4
## 4 AASS 10157 genome.wustl.edu 37 7
## 5 ABAT 0 genome.wustl.edu 37 16
## 6 ABCA3 21 genome.wustl.edu 37 16
## Start_Position End_Position Strand Variant_Classification Variant_Type
## 1 8994108 8994108 + Missense_Mutation SNP
## 2 151545656 151545656 + Missense_Mutation SNP
## 3 170991750 170991750 + Silent SNP
## 4 121756793 121756793 + Missense_Mutation SNP
## 5 8857982 8857982 + Silent SNP
## 6 2335631 2335631 + Missense_Mutation SNP
## Reference_Allele Tumor_Seq_Allele1 Tumor_Seq_Allele2 dbSNP_RS
## 1 G G C novel
## 2 A A G novel
## 3 G G A novel
## 4 G G A novel
## 5 G G A novel
## 6 C T T novel
## dbSNP_Val_Status Tumor_Sample_Barcode
## 1 TCGA-A1-A0SO-01A-22D-A099-09
## 2 TCGA-A2-A0EU-01A-22W-A071-09
## 3 TCGA-A2-A0ER-01A-21W-A050-09
## 4 TCGA-A2-A0EN-01A-13D-A099-09
## 5 TCGA-A1-A0SI-01A-11D-A142-09
## 6 TCGA-A2-A0D0-01A-11W-A019-09
## Matched_Norm_Sample_Barcode Match_Norm_Seq_Allele1
## 1 TCGA-A1-A0SO-10A-03D-A099-09 G
## 2 TCGA-A2-A0EU-10A-01W-A071-09 A
## 3 TCGA-A2-A0ER-10A-01W-A055-09 G
## 4 TCGA-A2-A0EN-10A-01D-A099-09 G
## 5 TCGA-A1-A0SI-10B-01D-A142-09 G
## 6 TCGA-A2-A0D0-10A-01W-A021-09 C
## Match_Norm_Seq_Allele2 Tumor_Validation_Allele1 Tumor_Validation_Allele2
## 1 G G C
## 2 A
## 3 G
## 4 G G A
## 5 G
## 6 C
## Match_Norm_Validation_Allele1 Match_Norm_Validation_Allele2
## 1 G G
## 2
## 3
## 4 G G
## 5
## 6
## Verification_Status Validation_Status Mutation_Status Sequencing_Phase
## 1 Unknown Valid Somatic Phase_IV
## 2 Unknown Untested Somatic Phase_IV
## 3 Unknown Untested Somatic Phase_IV
## 4 Unknown Valid Somatic Phase_IV
## 5 Unknown Untested Somatic Phase_IV
## 6 Unknown Untested Somatic Phase_IV
## Sequence_Source Validation_Method Score BAM_File Sequencer
## 1 WXS Illumina_WXS_gDNA 1 dbGAP Illumina GAIIx
## 2 WXS none 1 dbGAP Illumina GAIIx
## 3 WXS none 1 dbGAP Illumina GAIIx
## 4 WXS Illumina_WXS_gDNA 1 dbGAP Illumina GAIIx
## 5 WXS none 1 dbGAP Illumina GAIIx
## 6 WXS none 1 dbGAP Illumina GAIIx
## Tumor_Sample_UUID
## 1 b3568259-c63c-4eb1-bbc7-af711ddd33db
## 2 de30da8f-903f-428e-a63d-59625fc858a9
## 3 31ed187e-9bfe-4ca3-8cbb-10c1e0184331
## 4 12362ad7-6866-4e7a-9ec6-8a0a68df8896
## 5 e218c272-a7e1-4bc9-b8c5-d2d1c903550f
## 6 3f20d0fe-aaa1-40f1-b2c1-7f070f93aef5
## Matched_Norm_Sample_UUID chromosome_name_WU start_WU
## 1 17ba8cdb-e35b-4496-a787-d1a7ee7d4a1e 12 8994108
## 2 1583a7c5-c835-44fa-918a-1448abf6533d 3 151545656
## 3 2bc2fdaf-fb2f-4bfd-9e20-e20edff6633a 4 170991750
## 4 ad478c68-a18b-4529-ad7a-86039e6da6b1 7 121756793
## 5 fbcab9dc-4a6b-4928-9459-699c9932e3e1 16 8857982
## 6 bbf1c43d-d7b3-4574-a074-d22ad537829c 16 2335631
## stop_WU reference_WU variant_WU type_WU gene_name_WU
## 1 8994108 G C SNP A2ML1
## 2 151545656 A G SNP AADAC
## 3 170991750 G A SNP AADAT
## 4 121756793 G A SNP AASS
## 5 8857982 G A SNP ABAT
## 6 2335631 C T SNP ABCA3
## transcript_name_WU transcript_species_WU transcript_source_WU
## 1 NM_144670.3 human genbank
## 2 NM_001086.2 human genbank
## 3 NM_016228.3 human genbank
## 4 NM_005763.3 human genbank
## 5 NM_000663.4 human genbank
## 6 NM_001089.2 human genbank
## transcript_version_WU strand_WU transcript_status_WU trv_type_WU
## 1 58_37c 1 validated missense
## 2 58_37c 1 reviewed missense
## 3 58_37c -1 reviewed silent
## 4 58_37c -1 reviewed missense
## 5 58_37c 1 reviewed silent
## 6 58_37c -1 reviewed missense
## c_position_WU amino_acid_change_WU ucsc_cons_WU
## 1 c.1224 p.W408C 0.995
## 2 c.896 p.N299S 0.000
## 3 c.708 p.L236 1.000
## 4 c.788 p.T263M 1.000
## 5 c.423 p.E141 0.987
## 6 c.3295 p.D1099N 0.980
## domain_WU
## 1 NULL
## 2 HMMPfam_Abhydrolase_3,superfamily_alpha/beta-Hydrolases
## 3 HMMPfam_Aminotran_1_2,superfamily_PLP-dependent transferases
## 4 HMMPfam_AlaDh_PNT_C
## 5 HMMPfam_Aminotran_3,superfamily_PyrdxlP-dep_Trfase_major
## 6 NULL
## all_domains_WU
## 1 HMMPfam_A2M,HMMPfam_A2M_N,superfamily_Terpenoid cyclases/Protein prenyltransferases,HMMPfam_A2M_recep,superfamily_Alpha-macroglobulin receptor domain,HMMPfam_A2M_N_2,HMMPfam_A2M_comp,HMMPfam_Thiol-ester_cl,PatternScan_ALPHA_2_MACROGLOBULIN
## 2 PatternScan_LIPASE_GDXG_SER,HMMPfam_Abhydrolase_3,superfamily_alpha/beta-Hydrolases
## 3 HMMPfam_Aminotran_1_2,superfamily_PLP-dependent transferases
## 4 HMMPfam_Saccharop_dh,HMMPfam_AlaDh_PNT_C,HMMPfam_AlaDh_PNT_N,superfamily_NAD(P)-binding Rossmann-fold domains,superfamily_Formate/glycerate dehydrogenase catalytic domain-like,superfamily_Glyceraldehyde-3-phosphate dehydrogenase-like C-terminal domain
## 5 HMMPfam_Aminotran_3,PatternScan_AA_TRANSFER_CLASS_3,superfamily_PyrdxlP-dep_Trfase_major
## 6 HMMPfam_ABC_tran,HMMSmart_SM00382,PatternScan_ABC_TRANSPORTER_1,superfamily_P-loop containing nucleoside triphosphate hydrolases
## deletion_substructures_WU transcript_error
## 1 - no_errors
## 2 - no_errors
## 3 - no_errors
## 4 - no_errors
## 5 - no_errors
## 6 - no_errors
names(brcaMAF)
## [1] "Hugo_Symbol" "Entrez_Gene_Id"
## [3] "Center" "NCBI_Build"
## [5] "Chromosome" "Start_Position"
## [7] "End_Position" "Strand"
## [9] "Variant_Classification" "Variant_Type"
## [11] "Reference_Allele" "Tumor_Seq_Allele1"
## [13] "Tumor_Seq_Allele2" "dbSNP_RS"
## [15] "dbSNP_Val_Status" "Tumor_Sample_Barcode"
## [17] "Matched_Norm_Sample_Barcode" "Match_Norm_Seq_Allele1"
## [19] "Match_Norm_Seq_Allele2" "Tumor_Validation_Allele1"
## [21] "Tumor_Validation_Allele2" "Match_Norm_Validation_Allele1"
## [23] "Match_Norm_Validation_Allele2" "Verification_Status"
## [25] "Validation_Status" "Mutation_Status"
## [27] "Sequencing_Phase" "Sequence_Source"
## [29] "Validation_Method" "Score"
## [31] "BAM_File" "Sequencer"
## [33] "Tumor_Sample_UUID" "Matched_Norm_Sample_UUID"
## [35] "chromosome_name_WU" "start_WU"
## [37] "stop_WU" "reference_WU"
## [39] "variant_WU" "type_WU"
## [41] "gene_name_WU" "transcript_name_WU"
## [43] "transcript_species_WU" "transcript_source_WU"
## [45] "transcript_version_WU" "strand_WU"
## [47] "transcript_status_WU" "trv_type_WU"
## [49] "c_position_WU" "amino_acid_change_WU"
## [51] "ucsc_cons_WU" "domain_WU"
## [53] "all_domains_WU" "deletion_substructures_WU"
## [55] "transcript_error"
# 使用waterfall函数绘制瀑布图
# Plot only genes with mutations in 6% or more of samples
# 只展示至少在6%的样本中变异的基因
waterfall(brcaMAF, fileType="MAF", mainRecurCutoff = 0.06)
## Checking if input is properly formatted...
## Calculating frequency of mutations...
## setting mutation hierarchy...
## Performing recurrence cutoff...
## NULL
image.png
# Plot only the specified genes
# 展示特定基因的变异信息
# Define specific genes to plot
genes_to_plot <- c("PIK3CA", "TP53", "USH2A", "MLL3", "BRCA1", "CDKN1B")
waterfall(brcaMAF, plotGenes = genes_to_plot)
# Checking if input is properly formatted...
## Calculating frequency of mutations...
## Removing genes not in: PIK3CA, TP53, USH2A, MLL3, BRCA1, CDKN1B
## setting mutation hierarchy...
## NULL
image.png
# Create clinical data
# 添加临床表型信息
subtype <- c("lumA", "lumB", "her2", "basal", "normal")
subtype <- sample(subtype, 50, replace = TRUE)
age <- c("20-30", "31-50", "51-60", "61+")
age <- sample(age, 50, replace = TRUE)
sample <- as.character(unique(brcaMAF$Tumor_Sample_Barcode))
clinical <- as.data.frame(cbind(sample, subtype, age))
# Melt the clinical data into 'long' format.
library(reshape2)
clinical <- melt(clinical, id.vars = c("sample"))
head(clinical)
## sample variable value
## 1 TCGA-A1-A0SO-01A-22D-A099-09 subtype normal
## 2 TCGA-A2-A0EU-01A-22W-A071-09 subtype normal
## 3 TCGA-A2-A0ER-01A-21W-A050-09 subtype lumB
## 4 TCGA-A2-A0EN-01A-13D-A099-09 subtype lumA
## 5 TCGA-A1-A0SI-01A-11D-A142-09 subtype lumB
## 6 TCGA-A2-A0D0-01A-11W-A019-09 subtype lumA
# Run waterfall
waterfall(brcaMAF, clinDat = clinical,
clinVarCol = c(lumA = "blue4", lumB = "deepskyblue",
her2 = "hotpink2", basal = "firebrick2",
normal = "green4",
`20-30` = "#ddd1e7", `31-50` = "#bba3d0",
`51-60` = "#9975b9", `61+` = "#7647a2"),
plotGenes = c("PIK3CA", "TP53", "USH2A", "MLL3", "BRCA1"),
clinLegCol = 2,
clinVarOrder = c("lumA", "lumB", "her2", "basal", "normal", "20-30", "31-50", "51-60", "61+"))
## Checking if input is properly formatted...
## Calculating frequency of mutations...
## Removing genes not in: PIK3CA, TP53, USH2A, MLL3, BRCA1
## setting mutation hierarchy...
image.png
sessionInfo()
## R version 3.6.0 (2019-04-26)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 18363)
##
## Matrix products: default
##
## locale:
## [1] LC_COLLATE=Chinese (Simplified)_China.936
## [2] LC_CTYPE=Chinese (Simplified)_China.936
## [3] LC_MONETARY=Chinese (Simplified)_China.936
## [4] LC_NUMERIC=C
## [5] LC_TIME=Chinese (Simplified)_China.936
##
## attached base packages:
## [1] parallel stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] reshape2_1.4.3 GenVisR_1.16.1 RColorBrewer_1.1-2
## [4] maftools_2.0.16 Biobase_2.44.0 BiocGenerics_0.30.0
## [7] waterfalls_0.1.2
##
## loaded via a namespace (and not attached):
## [1] bitops_1.0-6 matrixStats_0.54.0
## [3] bit64_0.9-7 doParallel_1.0.14
## [5] progress_1.2.2 httr_1.4.0
## [7] GenomeInfoDb_1.20.0 tools_3.6.0
## [9] R6_2.4.0 DBI_1.0.0
## [11] lazyeval_0.2.2 colorspace_1.4-1
## [13] withr_2.1.2 tidyselect_0.2.5
## [15] gridExtra_2.3 prettyunits_1.0.2
## [17] bit_1.1-14 compiler_3.6.0
## [19] DelayedArray_0.10.0 pkgmaker_0.27
## [21] rtracklayer_1.44.0 labeling_0.3
## [23] scales_1.0.0 NMF_0.21.0
## [25] stringr_1.4.0 digest_0.6.20
## [27] Rsamtools_2.0.0 rmarkdown_1.13
## [29] XVector_0.24.0 pkgconfig_2.0.2
## [31] htmltools_0.3.6 bibtex_0.4.2
## [33] BSgenome_1.52.0 rlang_0.4.7
## [35] RSQLite_2.1.1 gtools_3.8.1
## [37] BiocParallel_1.17.18 dplyr_0.8.3
## [39] VariantAnnotation_1.30.1 RCurl_1.95-4.12
## [41] magrittr_1.5 GenomeInfoDbData_1.2.1
## [43] wordcloud_2.6 Matrix_1.2-17
## [45] Rcpp_1.0.5 munsell_0.5.0
## [47] S4Vectors_0.22.0 viridis_0.5.1
## [49] stringi_1.4.3 yaml_2.2.0
## [51] SummarizedExperiment_1.14.0 zlibbioc_1.30.0
## [53] plyr_1.8.4 FField_0.1.0
## [55] grid_3.6.0 blob_1.1.1
## [57] crayon_1.3.4 lattice_0.20-38
## [59] Biostrings_2.52.0 splines_3.6.0
## [61] GenomicFeatures_1.36.3 hms_0.4.2
## [63] knitr_1.23 pillar_1.4.2
## [65] GenomicRanges_1.36.0 rngtools_1.4
## [67] codetools_0.2-16 biomaRt_2.40.1
## [69] stats4_3.6.0 XML_3.98-1.20
## [71] glue_1.3.1 evaluate_0.14
## [73] data.table_1.12.2 foreach_1.4.4
## [75] gtable_0.3.0 purrr_0.3.2
## [77] assertthat_0.2.1 ggplot2_3.2.0
## [79] xfun_0.8 gridBase_0.4-7
## [81] xtable_1.8-4 viridisLite_0.3.0
## [83] survival_2.44-1.1 tibble_2.1.3
## [85] iterators_1.0.10 GenomicAlignments_1.20.1
## [87] AnnotationDbi_1.46.0 registry_0.5-1
## [89] memoise_1.1.0 IRanges_2.18.1
## [91] cluster_2.0.8
网友评论