美文网首页编程语言
R语言可视化10: 韦恩图/upset图 - VennDiagr

R语言可视化10: 韦恩图/upset图 - VennDiagr

作者: 小程的学习笔记 | 来源:发表于2023-04-26 21:06 被阅读0次

    1. 使用\color{green}{VennDiagram}包绘制韦恩图

    1.1 两个数据集

    # 安装并加载所需的R包
    # install.packages("VennDiagram")
    library(VennDiagram)
    
    # 创建测试数据
    set1 <- sample(1:1000,300, replace = F) # replace = F是默认的,表示不放回抽样
    set2 <- sample(1:1000,130, replace = F)
    set3 <- sample(1:1000,300, replace = F)
    set4 <- sample(1:1000,200, replace = F)
    set5 <- sample(1:1000,300, replace = F)
    
    s1 <- list(set1 = set1, 
               set2 = set2)
    
    v1 <- venn.diagram(x = s1, 
                       filename = NULL, # 直接给一个名称会自动保存文件到本地
                       # 输出的图形参数
                       # imagetype = "png", # 输出图片类型(tiff,png,svg)
                       # height = 1000, # 图片高度
                       # width = 1000, # 图片宽度
                       # resolution = 300, # 图片分辨率
                       
                       scaled = T, # 根据比例显示大小
                       alpha=c(0.8, 0.8), # 设置每个区块的透明度
                       
                       ## 下面是除了标题外,图形其他元素的设置参数
                       # 图形元素设置:圈
                       lwd = 1, # 圆圈线条的粗细:1 2 3 4 5 6
                       lty = 1, # 圆圈线条的类型:1为实线,2为虚线,blank为无线条
                       col = c("black","red"), # 圆圈线条颜色
                       fill = c("#0073C2FF", "#EFC000FF"),  # 圆圈颜色
                       
                       # 图形元素设置:数字
                       cex = 1, # 数字大小
                       fontface = "bold", # 加粗
                       fonrfamily = "sans", # 数字字体
                      
                       
                       # 图形元素设置:标签即(category)
                       cat.cex = 1,  # 标签字体大小
                       cat.col = "black",  # 标签字体色
                       cat.fontface = "bold",  # 加粗
                       cat.default.pos = "outer",  # 标签内外位置, 在圆圈内还是圆圈外,outer 内 text 外
                       cat.pos = c(0, 0),  # 标签旋转位置,用圆的度数
                       cat.dist = c(0.05,0.03),  # 标签离圆圈位置,离圆的距离,如果标签与圆圈重叠,可以调整这个参数
                       cat.fontfamily = "sans",  # 标签字体
                       )
    
    cowplot::plot_grid(v1)
    
    VennDiagram-1

    1.2 多个数据集(此处以5个为示例)

    s2 <- list(
      set1 = set1,
      set2 = set2,
      set3 = set3,
      set4 = set4,
      set5 = set5
    )
    
    v2 <- venn.diagram(x = s2, filename = NULL, 
                       col = "transparent",
                       fill = c("dodgerblue", "goldenrod1", "darkorange1", "seagreen3", "orchid3"),
                       label.col = c("dodgerblue", "goldenrod1","darkorange1","seagreen3", "orchid3","white", "white", 
                                     "white","white","white","white","white","white", "white","white","white","white",
                                     "white","white","white", "white", "white", "white",  "white", "white","white",
                                     "white","white", "white", "white", "black"),
                       fontface = "bold",
                       cat.col = c(cat.col = c("darkblue", "darkgreen", "orange", "grey50", "purple")),
                       cat.dist = c(0.2, 0.2, 0.18, 0.18, 0.2),
                       alpha = 0.50, 
                       cex = 1, 
                       cat.cex = 1,
                       margin = 0.05
    )
    
    cowplot::plot_grid(v2)
    
    VennDiagram-2

    1.3 交集元素的提取

    # VennDiagram包中的函数get.venn.partitions()提供了此这个功能
    # 以上述5个分组为例,组间交集元素获得
    inter <- get.venn.partitions(s2)
    
    head(inter)
    ##    set1  set2  set3 set4 set5                      ..set..                                                ..values.. ..count..
    ##  1  TRUE  TRUE  TRUE TRUE TRUE     set1∩set2∩set3∩set4∩set5                                                  822, 588         2
    ##  2 FALSE  TRUE  TRUE TRUE TRUE (set2∩set3∩set4∩set5)∖(set1)                                                       406         1
    ##  3  TRUE FALSE  TRUE TRUE TRUE (set1∩set3∩set4∩set5)∖(set2)                                                  442, 104         2
    ##  4 FALSE FALSE  TRUE TRUE TRUE (set3∩set4∩set5)∖(set1∪set2) 366, 715, 379, 414, 30, 308, 398, 322, 359, 825, 708, 458        12
    ##  5  TRUE  TRUE FALSE TRUE TRUE (set1∩set2∩set4∩set5)∖(set3)                                                  615, 541         2
    ##  6 FALSE  TRUE FALSE TRUE TRUE (set2∩set4∩set5)∖(set1∪set3)                                          934, 84, 75, 655         4
    

    5个数据集VennDiagram包的上限

    2. 使用\color{green}{ggVennDiagram}包绘制韦恩图

    # 安装并加载所需的R包
    # install.packages("ggVennDiagram")
    library(ggplot2)
    library(ggVennDiagram)
    
    # ggVennDiagram提供了不同的形状以供选择,默认情况下,只使用最合适的形状,但也可自行指定形状
    plot_shapes()
    
    ggVennDiagram-1

    2.1 三个数据集

    x1 <- list(
      set1 = set1,
      set2 = set2,
      set3 = set3
    )
    
    # method1
    ggVennDiagram(x1, category.names = c("A", "B", "C"), # 设定样本名称
                  label = "both", # 可选:"both", "count", "percent", "none"
                  label_color = "black",
                  label_alpha = 0, # 去除文字标签底色
                  edge_lty = "dashed", # 圆圈线条虚线
                  edge_size = 1) +
      scale_fill_gradient(low = "white", high = "#b9292b", name = "gene count")
    
    # method2
    # 构建维恩对象
    venn <- Venn(x1)
    data <- process_data(venn, shape_id == "301")
    
    
    ggplot() +
      geom_sf(aes(fill = count), 
              data = venn_region(data)) +
      geom_sf(color="grey", 
              size = 1, 
              data = venn_setedge(data), 
              show.legend = FALSE) +
      scale_fill_gradient(low ="white", high = "#b9292b", name = "gene count")+
      geom_sf_text(aes(label = name), 
                   data = venn_setlabel(data),
                   size = 8) +
      geom_sf_label(aes(label = count), 
                    data = venn_region(data),
                    size = 4) +
      theme_void()
    
    ggVennDiagram-2

    2.2 多个数据集(此处以5个为示例)

    # 不添加过多的填充颜色,可在Ai中进行后期调整
    library(ggsci)
    
    ggVennDiagram(x2, , label_alpha = 0, label = "none",
                  edge_size = 0.5, 
                  # show_intersect = TRUE # 用交互的方式(plotly)查看每个子集中的基因
                  ) + 
      scale_color_lancet() + # R包"ggsci",柳叶刀期刊色标
      scale_fill_gradient(low = "gray100", high = "gray95", guide = "none")
                  
    # 自定义颜色;
    color1 <- alpha("#f8766d", 0.9)
    
    ggVennDiagram(x2, label_alpha = 0, label_size = 3,
                  # edge_size = 0.5, label ="count", # 隐藏百分比, 默认"both"
                  # show_intersect = TRUE # 用交互的方式(plotly)查看每个子集中的基因
    ) +
      scale_color_brewer(palette = "Paired") + 
      scale_fill_gradient(low = "white", high = color1, 
                          guide="none" # 去除图例
      )
    
    ggVennDiagram-3

    ★ 支持1-7维的韦恩图绘制
    ★ 是ggplot2的拓展包,因此支持ggplot2的其他语法设置
    ★ show_intersect = T时,可输出为交互式html,此时可点击数值显示源数据

    3. 使用\color{green}{upsetR}包绘制upset图

    UpsetR包,经常用于大于5个样本的“韦恩图”

    # 安装并加载所需的R包
    # install.packages("UpSetR")
    # install.packages("RColorBrewer")
    # 安装一个数据集
    install.packages("ggplot2movies")
    library(UpSetR)
    library(RColorBrewer)
    library(ggplot2)
    
    # 使用的来自IMDB中的电影数据
    movies <- as.data.frame(ggplot2movies::movies)
    head(movies)
    ##                      title year length budget rating votes   r1   r2  r3   r4   r5   r6   r7   r8   r9  r10 mpaa Action Animation Comedy Drama Documentary Romance Short
    ## 1                        $ 1971    121     NA    6.4   348  4.5  4.5 4.5  4.5 14.5 24.5 24.5 14.5  4.5  4.5           0         0      1     1           0       0     0
    ## 2        $1000 a Touchdown 1939     71     NA    6.0    20  0.0 14.5 4.5 24.5 14.5 14.5 14.5  4.5  4.5 14.5           0         0      1     0           0       0     0
    ## 3   $21 a Day Once a Month 1941      7     NA    8.2     5  0.0  0.0 0.0  0.0  0.0 24.5  0.0 44.5 24.5 24.5           0         1      0     0           0       0     1
    ## 4                  $40,000 1996     70     NA    8.2     6 14.5  0.0 0.0  0.0  0.0  0.0  0.0  0.0 34.5 45.5           0         0      1     0           0       0     0
    ## 5 $50,000 Climax Show, The 1975     71     NA    3.4    17 24.5  4.5 0.0 14.5 14.5  4.5  0.0  0.0  0.0 24.5           0         0      0     0           0       0     0
    ## 6                    $pent 2000     91     NA    4.3    45  4.5  4.5 4.5 14.5 14.5 14.5  4.5  4.5 14.5 14.5           0         0      0     1           0       0     0
    
    
    # 调整与美化后的集合图#
    upset(fromList(movies),
          nsets = length(movies), # 显示数据集的所有数据, nsets = 数值调整可视化数据集数量
          nintersects = 15, # 显示前多少个
          sets = c("title","length","budget","votes","year"), # keep.order = TRUE, # 指定集合或用keep.order = TRUE保持集合按输入的顺序排序
          number.angles = 0, # 交互集合柱状图的柱标倾角
          point.size = 4, # 图中点的大小
          line.size = 1, # 图中连接线粗细
          mainbar.y.label = "Intersection size", # y轴的标签
          main.bar.color = 'black', # y轴柱状图颜色
          matrix.color = "black", # x轴点的颜色
          sets.x.label = "Set size", # x轴的标签
          sets.bar.color=brewer.pal(5,"Set1"), # x轴柱状图的颜色; Set1中只有9个颜色,Set3中有12个颜色,Paired中有12个颜色
          mb.ratio = c(0.7, 0.3), # bar plot和matrix plot图形高度的占比
          order.by = "freq", # y轴矩阵排序,如"freq"频率,"degree"程度
          text.scale = c(1.5, 1.5, 1.5, 1.5, 1.5, 1), # 6个参数intersection size title(y标题大小),intersection size tick labels(y刻度标签大小), set size title(set标题大小), set size tick labels(set刻度标签大小), set names(set 分类标签大小), numbers above bars(柱数字大小)的设置
          shade.color = "#12507B", # 图中阴影部分的颜色
          queries=list(list(query = intersects, params = list("votes"), color = "purple", active = T), # 设置自己想要展示的特定组的交集,通过queries参数进行设置,需要展示几个关注组合的颜色,就展示几个
                       list(query = intersects, params = list("votes","length"), color = "orange", active = T))
    )
    
    upsetR-1

    ★ 不支持ggplot语法

    4. 使用\color{green}{ComplexUpset}包绘制upset图

    4.1 基本用法

    # 安装并加载所需的R包
    # install.packages('ComplexUpset')
    
    # if(!require(devtools)) install.packages("devtools")
    # devtools::install_github("krassowski/complex-upset")
    library(ggplot2)
    library(ComplexUpset)
    
    movies = as.data.frame(ggplot2movies::movies)
    # 第18-24列是电影类型(用0,1矩阵表示)
    genres <- colnames(movies)[18:24]
    genres
    ## [1] "Action"      "Animation"   "Comedy"      "Drama"       "Documentary" "Romance"     "Short"
    
    # 把mpaa这一列中的空值变成NA,然后为了方便演示去掉缺失值
    movies[movies$mpaa == "", "mpaa"] <- NA
    movies <- na.omit(movies)
    
    upset(movies, genres, 
          name='genre', # 底部的标签
          width_ratio = 0.2, # 左侧柱状图的宽度
          height_ratio = 0.3, # 下图部分比例
          min_size = 5, # 显示的最小集合的大小
          min_degree = 2, # 最小等级,即显示最少几个数据集的集合
          n_intersections = 15,
          wrap = TRUE, set_sizes = FALSE
          ) 
    
    ComplexUpset-1

    4.2 添加组件(annotations)

    # 三种方法添加多个注释组件
    upset(
      movies,
      genres,
      annotations = list(
        # 方法1-使用list:添加length这一列数据
        'Length'= list(
          aes = aes(x = intersection, y = length),
          geom = geom_boxplot(na.rm = TRUE)
        ),
        # 方法2-使用ggplot2:添加rating这一列数据
        'Rating'=(
          # aes(x=intersection) 是默认提供的,可以跳过
          ggplot(mapping = aes(y = rating))
          + geom_jitter(aes(color = log10(votes)), na.rm = TRUE)
          + geom_violin(alpha = 0.5, na.rm = TRUE)
        ),
        # 方法3:使用内置的 upset_annotate() 函数
        'Budget'=upset_annotate('budget', geom_boxplot(na.rm=TRUE))
      ),
      min_size = 10,
      width_ratio = 0.1
    )
      
    # 使用条形图来展示分类变量比例的差异
    upset(
      movies,
      genres,
      annotations = list(
        'MPAA Rating'= (
          ggplot(mapping = aes(fill = mpaa))
          + geom_bar(stat = 'count', position = 'fill')
          + scale_y_continuous(labels = scales::percent_format())
          + scale_fill_manual(values = c(
            'R' = '#E41A1C', 'PG' = '#377EB8',
            'PG-13' = '#4DAF4A', 'NC-17' = '#FF7F00'
          ))
          + ylab('MPAA Rating')
        )
      ),
      width_ratio = 0.1
    )
    
    ComplexUpset-2

    4.3 区域选择模式

    ComplexUpset提供\color{orange}{四种模式}定义相应维恩图上的感兴趣区域(以A、B、C三个数据集为例),自定义时,可用intersection_size()进行相应地调整

    \ \ \ \ 1) exclusive_intersection( (𝐴∩𝐵)∖𝐶):属于定义交集但不属于任何其他集的交集元素(别名:distinct),默认
    \ \ \ \ 2) inclusive_intersection(𝐴∩𝐵):属于定义交叉点的集合的交叉点元素,包括与其他集合的重叠(别名:intersect)
    \ \ \ \ 3) exclusive_union((𝐴∪𝐵)∖𝐶):属于定义并集的集合的并集元素,不包括与任何其他集合重叠的元素
    \ \ \ \ 4) inclusive_unionregion(𝐴∪𝐵):属于定义并集的集合的并集元素,包括与任何其他集合重叠的元素(别名:union)

    upset(
    upset(
      movies, genres,
      mode = 'inclusive_intersection',
      annotations = list(
        # # 这里如果不指定就会使用上面设置好的模式)
        'Length (inclusive intersection)' = (
          ggplot(mapping = aes(y = length))
          + geom_jitter(alpha = 0.2, na.rm = TRUE)
        ),
        'Length (exclusive intersection)' = (
          ggplot(mapping = aes(y = length))
          + geom_jitter(alpha = 0.2, na.rm = TRUE)
          + upset_mode('exclusive_intersection')
        ),
        'Length (inclusive union)' = (
          ggplot(mapping = aes(y = length))
          + geom_jitter(alpha = 0.2, na.rm = TRUE)
          + upset_mode('inclusive_union')
        )
      ),
      min_size = 10,
      width_ratio = 0.1
    )
    
    # 增加颜色映射
    library(ggsci)
    upset(movies, genres,
          min_size = 10, width_ratio = 0.1,
          # 调整intersection size
          base_annotations = list(
            "intersection size" = intersection_size(
              counts = F, # 不显示个数
              mapping = aes(fill = "bars_color")
            )
            + scale_fill_manual(values = c("bars_color" = "skyblue"), guide = "none") # 使用单一颜色
          )
    )
    
    
    upset(movies, genres,
          min_size = 10, width_ratio = 0.1,
          # 调整intersection size
          base_annotations = list(
            "intersection size" = intersection_size(
              counts = F, # 不显示个数
              mapping = aes(fill = mpaa)
          )
          + scale_fill_lancet() # 使用ggsci包的lancet配色
          )
    )
    
    ComplexUpset-3

    5. 使用\color{green}{VennDetail}包,韦恩图+韦恩条形图+韦恩饼图+upset图

    5.1 不同布局的图形

    # 安装并加载所需的R包
    # if (!requireNamespace("BiocManager"))
    #    install.packages("BiocManager")
    # BiocManager::install("VennDetail")
    library(VennDetail)
    
    # 创建测试数据
    A <- sample(1:1000, 400, replace = FALSE)
    B <- sample(1:1000, 600, replace = FALSE)
    C <- sample(1:1000, 350, replace = FALSE)
    D <- sample(1:1000, 550, replace = FALSE)
    E <- sample(1:1000, 450, replace = FALSE)
    
    venn <- venndetail(list(A = A, B = B, C= C, D = D, E = E))
    detail(venn) 
    
    # 韦恩图(默认)
    plot(venn)
    
    # 韦恩饼图
    plot(venn, type = "vennpie")
    
    vennpie(venn, 
            min = 4 # 显示集合至少包含来自四个数据集的元素
            # any = 1, revcolor = "lightgrey" # 突出显示唯一或共享子集
            )
    
    
    
    # 韦恩条形图
    dplot(venn, order = TRUE, textsize = 4)
    
    # upset图
    plot(venn, type = "upset")
    
    VennDetail-1

    5.2 提取子集及可用注释

    ## 列出子集名称
    detail(venn) 
    ##  Shared B_C_D_E A_C_D_E   C_D_E A_B_D_E   B_D_E   A_D_E     D_E A_B_C_E   B_C_E   A_C_E     C_E   A_B_E     B_E 
    ##       15      27      14      23      51      59      29      38      17      22      11      14      29      50 
    ##      A_E       E A_B_C_D   B_C_D   A_C_D     C_D   A_B_D     B_D     A_D       D   A_B_C     B_C     A_C       C 
    ##       19      32      28      43       7      27      34      61      32      62      30      37      14      21 
    ##      A_B       B       A 
    ##       49      48      21 
    
    
    head(getSet(venn, subset = c("Shared", "A_C_D_E")), 10)
    ##    Subset Detail
    ##  1  Shared    522
    ##  2  Shared    413
    ##  3  Shared    362
    ##  4  Shared    415
    ##  5  Shared    789
    ##  6  Shared    984
    ##  7  Shared    712
    ##  8  Shared    719
    ##  9  Shared    114
    ##  10 Shared    666
    
    head(result(venn, wide = TRUE))
    ##     Detail A B C D E SharedSets
    ##  10     522 1 1 1 1 1          5
    ##  52     413 1 1 1 1 1          5
    ##  116    362 1 1 1 1 1          5
    ##  136    415 1 1 1 1 1          5
    ##  177    789 1 1 1 1 1          5
    ##  185    984 1 1 1 1 1          5
    

    参考:

    1. http://news.sohu.com/a/541738972_120055884
    2. https://github.com/krassowski/complex-upset

    相关文章

      网友评论

        本文标题:R语言可视化10: 韦恩图/upset图 - VennDiagr

        本文链接:https://www.haomeiwen.com/subject/ozhujdtx.html