R语言可视化（二十八）：瀑布图绘制

作者: Davey1220 | 来源:发表于2020-10-18 09:49 被阅读0次

28. 瀑布图绘制

清除当前环境中的变量

rm(list=ls())

设置工作目录

setwd("C:/Users/Dell/Desktop/R_Plots/28waterfall/")

使用waterfalls包绘制瀑布图

# 安装并加载所需的R包
#install.packages("waterfalls")
library(waterfalls)

# 构建示例数据
data <- data.frame(category = letters[1:5],
                   value = c(100, -20, 10, 20, 110))
head(data)
##   category value
## 1        a   100
## 2        b   -20
## 3        c    10
## 4        d    20
## 5        e   110

# 使用waterfall函数绘制瀑布图
waterfall(.data = data, 
          fill_colours = colorRampPalette(c("#1b7cd6", "#d5e6f2"))(5),
          fill_by_sign = FALSE)

image.png

使用maftools包绘制瀑布图

# 安装并加载所需的R包
#BiocManager::install("maftools")
library(maftools)

# 查看示例数据
#path to TCGA LAML MAF file
# maf格式的基因突变信息
laml.maf = system.file('extdata', 'tcga_laml.maf.gz', package = 'maftools') 
#clinical information containing survival information and histology. This is optional
# 临床表型注释信息
laml.clin = system.file('extdata', 'tcga_laml_annot.tsv', package = 'maftools') 

# 使用read.maf函数读取数据
laml = read.maf(maf = laml.maf, clinicalData = laml.clin)
## -Reading
## -Validating
## -Silent variants: 475 
## -Summarizing
## -Processing clinical data
## -Finished in 0.380s elapsed (0.320s cpu)

#Typing laml shows basic summary of MAF file.
# 查看maf对象
laml
## An object of class  MAF 
##                    ID          summary  Mean Median
##  1:        NCBI_Build               37    NA     NA
##  2:            Center genome.wustl.edu    NA     NA
##  3:           Samples              193    NA     NA
##  4:            nGenes             1241    NA     NA
##  5:   Frame_Shift_Del               52 0.271      0
##  6:   Frame_Shift_Ins               91 0.474      0
##  7:      In_Frame_Del               10 0.052      0
##  8:      In_Frame_Ins               42 0.219      0
##  9: Missense_Mutation             1342 6.990      7
## 10: Nonsense_Mutation              103 0.536      0
## 11:       Splice_Site               92 0.479      0
## 12:             total             1732 9.021      9

#Shows sample summry
# 获取maf对象汇总信息
getSampleSummary(laml)
##      Tumor_Sample_Barcode Frame_Shift_Del Frame_Shift_Ins In_Frame_Del
##   1:         TCGA-AB-3009               0               5            0
##   2:         TCGA-AB-2807               1               0            1
##   3:         TCGA-AB-2959               0               0            0
##   4:         TCGA-AB-3002               0               0            0
##   5:         TCGA-AB-2849               0               1            0
##  ---                                                                  
## 188:         TCGA-AB-2933               0               0            0
## 189:         TCGA-AB-2942               0               0            0
## 190:         TCGA-AB-2946               0               0            0
## 191:         TCGA-AB-2954               0               0            0
## 192:         TCGA-AB-2982               0               0            0
##      In_Frame_Ins Missense_Mutation Nonsense_Mutation Splice_Site total
##   1:            1                25                 2           1    34
##   2:            0                16                 3           4    25
##   3:            0                22                 0           1    23
##   4:            0                15                 1           5    21
##   5:            0                16                 1           2    20
##  ---                                                                   
## 188:            0                 1                 0           0     1
## 189:            1                 0                 0           0     1
## 190:            0                 1                 0           0     1
## 191:            0                 1                 0           0     1
## 192:            0                 1                 0           0     1

#Shows all fields in MAF
getFields(laml)
##  [1] "Hugo_Symbol"            "Entrez_Gene_Id"        
##  [3] "Center"                 "NCBI_Build"            
##  [5] "Chromosome"             "Start_Position"        
##  [7] "End_Position"           "Strand"                
##  [9] "Variant_Classification" "Variant_Type"          
## [11] "Reference_Allele"       "Tumor_Seq_Allele1"     
## [13] "Tumor_Seq_Allele2"      "Tumor_Sample_Barcode"  
## [15] "Protein_Change"         "i_TumorVAF_WU"         
## [17] "i_transcript_name"

# 使用plotmafSummary函数可视化maf对象汇总信息
plotmafSummary(maf = laml, 
               rmOutlier = TRUE, 
               addStat = 'median', 
               dashboard = TRUE, 
               titvRaw = FALSE)

image.png

# 使用oncoplot函数绘制基因突变瀑布图
#oncoplot for top ten mutated genes.
# 展示top10变异基因的信息
oncoplot(maf = laml, top = 10)

image.png

# 自定义变异类型的颜色
library(RColorBrewer)
vc_cols <- brewer.pal(8,"Set1")
names(vc_cols) <- levels(laml@data$Variant_Classification)
head(vc_cols)
##   Frame_Shift_Del   Frame_Shift_Ins      In_Frame_Del      In_Frame_Ins 
##         "#E41A1C"         "#377EB8"         "#4DAF4A"         "#984EA3" 
## Missense_Mutation Nonsense_Mutation 
##         "#FF7F00"         "#FFFF33"

oncoplot(maf = laml, top = 20,colors = vc_cols)

image.png

# 添加临床注释信息，按注释类型进行排序
names(laml@clinical.data)
## [1] "Tumor_Sample_Barcode"    "FAB_classification"     
## [3] "days_to_last_followup"   "Overall_Survival_Status"

oncoplot(maf = laml, top = 20,
         clinicalFeatures = "FAB_classification",
         sortByAnnotation = T)

image.png

# 展示多个临床注释信息
oncoplot(maf = laml, top = 20,
         clinicalFeatures = c("FAB_classification","Overall_Survival_Status"),
         sortByAnnotation = T)

image.png

使用GenVisR包绘制瀑布图

# 安装并加载所需的R包
#BiocManager::install("GenVisR")
library(GenVisR)

# 查看内置示例数据
head(brcaMAF)
##   Hugo_Symbol Entrez_Gene_Id           Center NCBI_Build Chromosome
## 1       A2ML1         144568 genome.wustl.edu         37         12
## 2       AADAC             13 genome.wustl.edu         37          3
## 3       AADAT          51166 genome.wustl.edu         37          4
## 4        AASS          10157 genome.wustl.edu         37          7
## 5        ABAT              0 genome.wustl.edu         37         16
## 6       ABCA3             21 genome.wustl.edu         37         16
##   Start_Position End_Position Strand Variant_Classification Variant_Type
## 1        8994108      8994108      +      Missense_Mutation          SNP
## 2      151545656    151545656      +      Missense_Mutation          SNP
## 3      170991750    170991750      +                 Silent          SNP
## 4      121756793    121756793      +      Missense_Mutation          SNP
## 5        8857982      8857982      +                 Silent          SNP
## 6        2335631      2335631      +      Missense_Mutation          SNP
##   Reference_Allele Tumor_Seq_Allele1 Tumor_Seq_Allele2 dbSNP_RS
## 1                G                 G                 C    novel
## 2                A                 A                 G    novel
## 3                G                 G                 A    novel
## 4                G                 G                 A    novel
## 5                G                 G                 A    novel
## 6                C                 T                 T    novel
##   dbSNP_Val_Status         Tumor_Sample_Barcode
## 1                  TCGA-A1-A0SO-01A-22D-A099-09
## 2                  TCGA-A2-A0EU-01A-22W-A071-09
## 3                  TCGA-A2-A0ER-01A-21W-A050-09
## 4                  TCGA-A2-A0EN-01A-13D-A099-09
## 5                  TCGA-A1-A0SI-01A-11D-A142-09
## 6                  TCGA-A2-A0D0-01A-11W-A019-09
##    Matched_Norm_Sample_Barcode Match_Norm_Seq_Allele1
## 1 TCGA-A1-A0SO-10A-03D-A099-09                      G
## 2 TCGA-A2-A0EU-10A-01W-A071-09                      A
## 3 TCGA-A2-A0ER-10A-01W-A055-09                      G
## 4 TCGA-A2-A0EN-10A-01D-A099-09                      G
## 5 TCGA-A1-A0SI-10B-01D-A142-09                      G
## 6 TCGA-A2-A0D0-10A-01W-A021-09                      C
##   Match_Norm_Seq_Allele2 Tumor_Validation_Allele1 Tumor_Validation_Allele2
## 1                      G                        G                        C
## 2                      A                                                  
## 3                      G                                                  
## 4                      G                        G                        A
## 5                      G                                                  
## 6                      C                                                  
##   Match_Norm_Validation_Allele1 Match_Norm_Validation_Allele2
## 1                             G                             G
## 2                                                            
## 3                                                            
## 4                             G                             G
## 5                                                            
## 6                                                            
##   Verification_Status Validation_Status Mutation_Status Sequencing_Phase
## 1             Unknown             Valid         Somatic         Phase_IV
## 2             Unknown          Untested         Somatic         Phase_IV
## 3             Unknown          Untested         Somatic         Phase_IV
## 4             Unknown             Valid         Somatic         Phase_IV
## 5             Unknown          Untested         Somatic         Phase_IV
## 6             Unknown          Untested         Somatic         Phase_IV
##   Sequence_Source Validation_Method Score BAM_File      Sequencer
## 1             WXS Illumina_WXS_gDNA     1    dbGAP Illumina GAIIx
## 2             WXS              none     1    dbGAP Illumina GAIIx
## 3             WXS              none     1    dbGAP Illumina GAIIx
## 4             WXS Illumina_WXS_gDNA     1    dbGAP Illumina GAIIx
## 5             WXS              none     1    dbGAP Illumina GAIIx
## 6             WXS              none     1    dbGAP Illumina GAIIx
##                      Tumor_Sample_UUID
## 1 b3568259-c63c-4eb1-bbc7-af711ddd33db
## 2 de30da8f-903f-428e-a63d-59625fc858a9
## 3 31ed187e-9bfe-4ca3-8cbb-10c1e0184331
## 4 12362ad7-6866-4e7a-9ec6-8a0a68df8896
## 5 e218c272-a7e1-4bc9-b8c5-d2d1c903550f
## 6 3f20d0fe-aaa1-40f1-b2c1-7f070f93aef5
##               Matched_Norm_Sample_UUID chromosome_name_WU  start_WU
## 1 17ba8cdb-e35b-4496-a787-d1a7ee7d4a1e                 12   8994108
## 2 1583a7c5-c835-44fa-918a-1448abf6533d                  3 151545656
## 3 2bc2fdaf-fb2f-4bfd-9e20-e20edff6633a                  4 170991750
## 4 ad478c68-a18b-4529-ad7a-86039e6da6b1                  7 121756793
## 5 fbcab9dc-4a6b-4928-9459-699c9932e3e1                 16   8857982
## 6 bbf1c43d-d7b3-4574-a074-d22ad537829c                 16   2335631
##     stop_WU reference_WU variant_WU type_WU gene_name_WU
## 1   8994108            G          C     SNP        A2ML1
## 2 151545656            A          G     SNP        AADAC
## 3 170991750            G          A     SNP        AADAT
## 4 121756793            G          A     SNP         AASS
## 5   8857982            G          A     SNP         ABAT
## 6   2335631            C          T     SNP        ABCA3
##   transcript_name_WU transcript_species_WU transcript_source_WU
## 1        NM_144670.3                 human              genbank
## 2        NM_001086.2                 human              genbank
## 3        NM_016228.3                 human              genbank
## 4        NM_005763.3                 human              genbank
## 5        NM_000663.4                 human              genbank
## 6        NM_001089.2                 human              genbank
##   transcript_version_WU strand_WU transcript_status_WU trv_type_WU
## 1                58_37c         1            validated    missense
## 2                58_37c         1             reviewed    missense
## 3                58_37c        -1             reviewed      silent
## 4                58_37c        -1             reviewed    missense
## 5                58_37c         1             reviewed      silent
## 6                58_37c        -1             reviewed    missense
##   c_position_WU amino_acid_change_WU ucsc_cons_WU
## 1        c.1224              p.W408C        0.995
## 2         c.896              p.N299S        0.000
## 3         c.708               p.L236        1.000
## 4         c.788              p.T263M        1.000
## 5         c.423               p.E141        0.987
## 6        c.3295             p.D1099N        0.980
##                                                      domain_WU
## 1                                                         NULL
## 2      HMMPfam_Abhydrolase_3,superfamily_alpha/beta-Hydrolases
## 3 HMMPfam_Aminotran_1_2,superfamily_PLP-dependent transferases
## 4                                          HMMPfam_AlaDh_PNT_C
## 5     HMMPfam_Aminotran_3,superfamily_PyrdxlP-dep_Trfase_major
## 6                                                         NULL
##                                                                                                                                                                                                                                                all_domains_WU
## 1             HMMPfam_A2M,HMMPfam_A2M_N,superfamily_Terpenoid cyclases/Protein prenyltransferases,HMMPfam_A2M_recep,superfamily_Alpha-macroglobulin receptor domain,HMMPfam_A2M_N_2,HMMPfam_A2M_comp,HMMPfam_Thiol-ester_cl,PatternScan_ALPHA_2_MACROGLOBULIN
## 2                                                                                                                                                                         PatternScan_LIPASE_GDXG_SER,HMMPfam_Abhydrolase_3,superfamily_alpha/beta-Hydrolases
## 3                                                                                                                                                                                                HMMPfam_Aminotran_1_2,superfamily_PLP-dependent transferases
## 4 HMMPfam_Saccharop_dh,HMMPfam_AlaDh_PNT_C,HMMPfam_AlaDh_PNT_N,superfamily_NAD(P)-binding Rossmann-fold domains,superfamily_Formate/glycerate dehydrogenase catalytic domain-like,superfamily_Glyceraldehyde-3-phosphate dehydrogenase-like C-terminal domain
## 5                                                                                                                                                                    HMMPfam_Aminotran_3,PatternScan_AA_TRANSFER_CLASS_3,superfamily_PyrdxlP-dep_Trfase_major
## 6                                                                                                                            HMMPfam_ABC_tran,HMMSmart_SM00382,PatternScan_ABC_TRANSPORTER_1,superfamily_P-loop containing nucleoside triphosphate hydrolases
##   deletion_substructures_WU transcript_error
## 1                         -        no_errors
## 2                         -        no_errors
## 3                         -        no_errors
## 4                         -        no_errors
## 5                         -        no_errors
## 6                         -        no_errors

names(brcaMAF)
##  [1] "Hugo_Symbol"                   "Entrez_Gene_Id"               
##  [3] "Center"                        "NCBI_Build"                   
##  [5] "Chromosome"                    "Start_Position"               
##  [7] "End_Position"                  "Strand"                       
##  [9] "Variant_Classification"        "Variant_Type"                 
## [11] "Reference_Allele"              "Tumor_Seq_Allele1"            
## [13] "Tumor_Seq_Allele2"             "dbSNP_RS"                     
## [15] "dbSNP_Val_Status"              "Tumor_Sample_Barcode"         
## [17] "Matched_Norm_Sample_Barcode"   "Match_Norm_Seq_Allele1"       
## [19] "Match_Norm_Seq_Allele2"        "Tumor_Validation_Allele1"     
## [21] "Tumor_Validation_Allele2"      "Match_Norm_Validation_Allele1"
## [23] "Match_Norm_Validation_Allele2" "Verification_Status"          
## [25] "Validation_Status"             "Mutation_Status"              
## [27] "Sequencing_Phase"              "Sequence_Source"              
## [29] "Validation_Method"             "Score"                        
## [31] "BAM_File"                      "Sequencer"                    
## [33] "Tumor_Sample_UUID"             "Matched_Norm_Sample_UUID"     
## [35] "chromosome_name_WU"            "start_WU"                     
## [37] "stop_WU"                       "reference_WU"                 
## [39] "variant_WU"                    "type_WU"                      
## [41] "gene_name_WU"                  "transcript_name_WU"           
## [43] "transcript_species_WU"         "transcript_source_WU"         
## [45] "transcript_version_WU"         "strand_WU"                    
## [47] "transcript_status_WU"          "trv_type_WU"                  
## [49] "c_position_WU"                 "amino_acid_change_WU"         
## [51] "ucsc_cons_WU"                  "domain_WU"                    
## [53] "all_domains_WU"                "deletion_substructures_WU"    
## [55] "transcript_error"

# 使用waterfall函数绘制瀑布图
# Plot only genes with mutations in 6% or more of samples
# 只展示至少在6%的样本中变异的基因
waterfall(brcaMAF, fileType="MAF", mainRecurCutoff = 0.06)
## Checking if input is properly formatted...
## Calculating frequency of mutations...
## setting mutation hierarchy...
## Performing recurrence cutoff...
## NULL

image.png

# Plot only the specified genes
# 展示特定基因的变异信息
# Define specific genes to plot
genes_to_plot <- c("PIK3CA", "TP53", "USH2A", "MLL3", "BRCA1", "CDKN1B")
waterfall(brcaMAF, plotGenes = genes_to_plot)
# Checking if input is properly formatted...
## Calculating frequency of mutations...
## Removing genes not in: PIK3CA, TP53, USH2A, MLL3, BRCA1, CDKN1B
## setting mutation hierarchy...
## NULL

image.png

# Create clinical data
# 添加临床表型信息
subtype <- c("lumA", "lumB", "her2", "basal", "normal")
subtype <- sample(subtype, 50, replace = TRUE)
age <- c("20-30", "31-50", "51-60", "61+")
age <- sample(age, 50, replace = TRUE)
sample <- as.character(unique(brcaMAF$Tumor_Sample_Barcode))
clinical <- as.data.frame(cbind(sample, subtype, age))

# Melt the clinical data into 'long' format.
library(reshape2)
clinical <- melt(clinical, id.vars = c("sample"))
head(clinical)
##                         sample variable  value
## 1 TCGA-A1-A0SO-01A-22D-A099-09  subtype normal
## 2 TCGA-A2-A0EU-01A-22W-A071-09  subtype normal
## 3 TCGA-A2-A0ER-01A-21W-A050-09  subtype   lumB
## 4 TCGA-A2-A0EN-01A-13D-A099-09  subtype   lumA
## 5 TCGA-A1-A0SI-01A-11D-A142-09  subtype   lumB
## 6 TCGA-A2-A0D0-01A-11W-A019-09  subtype   lumA

# Run waterfall
waterfall(brcaMAF, clinDat = clinical, 
          clinVarCol = c(lumA = "blue4", lumB = "deepskyblue", 
                         her2 = "hotpink2", basal = "firebrick2", 
                         normal = "green4", 
                         `20-30` = "#ddd1e7", `31-50` = "#bba3d0", 
                         `51-60` = "#9975b9", `61+` = "#7647a2"), 
          plotGenes = c("PIK3CA", "TP53", "USH2A", "MLL3", "BRCA1"), 
          clinLegCol = 2, 
          clinVarOrder = c("lumA", "lumB", "her2", "basal", "normal", "20-30", "31-50", "51-60", "61+"))
## Checking if input is properly formatted...
## Calculating frequency of mutations...
## Removing genes not in: PIK3CA, TP53, USH2A, MLL3, BRCA1
## setting mutation hierarchy...

image.png

sessionInfo()
## R version 3.6.0 (2019-04-26)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 18363)
## 
## Matrix products: default
## 
## locale:
## [1] LC_COLLATE=Chinese (Simplified)_China.936 
## [2] LC_CTYPE=Chinese (Simplified)_China.936   
## [3] LC_MONETARY=Chinese (Simplified)_China.936
## [4] LC_NUMERIC=C                              
## [5] LC_TIME=Chinese (Simplified)_China.936    
## 
## attached base packages:
## [1] parallel  stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
## [1] reshape2_1.4.3      GenVisR_1.16.1      RColorBrewer_1.1-2 
## [4] maftools_2.0.16     Biobase_2.44.0      BiocGenerics_0.30.0
## [7] waterfalls_0.1.2   
## 
## loaded via a namespace (and not attached):
##  [1] bitops_1.0-6                matrixStats_0.54.0         
##  [3] bit64_0.9-7                 doParallel_1.0.14          
##  [5] progress_1.2.2              httr_1.4.0                 
##  [7] GenomeInfoDb_1.20.0         tools_3.6.0                
##  [9] R6_2.4.0                    DBI_1.0.0                  
## [11] lazyeval_0.2.2              colorspace_1.4-1           
## [13] withr_2.1.2                 tidyselect_0.2.5           
## [15] gridExtra_2.3               prettyunits_1.0.2          
## [17] bit_1.1-14                  compiler_3.6.0             
## [19] DelayedArray_0.10.0         pkgmaker_0.27              
## [21] rtracklayer_1.44.0          labeling_0.3               
## [23] scales_1.0.0                NMF_0.21.0                 
## [25] stringr_1.4.0               digest_0.6.20              
## [27] Rsamtools_2.0.0             rmarkdown_1.13             
## [29] XVector_0.24.0              pkgconfig_2.0.2            
## [31] htmltools_0.3.6             bibtex_0.4.2               
## [33] BSgenome_1.52.0             rlang_0.4.7                
## [35] RSQLite_2.1.1               gtools_3.8.1               
## [37] BiocParallel_1.17.18        dplyr_0.8.3                
## [39] VariantAnnotation_1.30.1    RCurl_1.95-4.12            
## [41] magrittr_1.5                GenomeInfoDbData_1.2.1     
## [43] wordcloud_2.6               Matrix_1.2-17              
## [45] Rcpp_1.0.5                  munsell_0.5.0              
## [47] S4Vectors_0.22.0            viridis_0.5.1              
## [49] stringi_1.4.3               yaml_2.2.0                 
## [51] SummarizedExperiment_1.14.0 zlibbioc_1.30.0            
## [53] plyr_1.8.4                  FField_0.1.0               
## [55] grid_3.6.0                  blob_1.1.1                 
## [57] crayon_1.3.4                lattice_0.20-38            
## [59] Biostrings_2.52.0           splines_3.6.0              
## [61] GenomicFeatures_1.36.3      hms_0.4.2                  
## [63] knitr_1.23                  pillar_1.4.2               
## [65] GenomicRanges_1.36.0        rngtools_1.4               
## [67] codetools_0.2-16            biomaRt_2.40.1             
## [69] stats4_3.6.0                XML_3.98-1.20              
## [71] glue_1.3.1                  evaluate_0.14              
## [73] data.table_1.12.2           foreach_1.4.4              
## [75] gtable_0.3.0                purrr_0.3.2                
## [77] assertthat_0.2.1            ggplot2_3.2.0              
## [79] xfun_0.8                    gridBase_0.4-7             
## [81] xtable_1.8-4                viridisLite_0.3.0          
## [83] survival_2.44-1.1           tibble_2.1.3               
## [85] iterators_1.0.10            GenomicAlignments_1.20.1   
## [87] AnnotationDbi_1.46.0        registry_0.5-1             
## [89] memoise_1.1.0               IRanges_2.18.1             
## [91] cluster_2.0.8

R语言可视化（二十八）：瀑布图绘制

28. 瀑布图绘制

清除当前环境中的变量

设置工作目录

使用waterfalls包绘制瀑布图

使用maftools包绘制瀑布图

使用GenVisR包绘制瀑布图

相关文章

网友评论

延伸阅读

深度阅读

栏目导航

热点阅读

R语言学习

bioinformatics

绘图技巧

R绘图

R

数据分析