美文网首页R语言学习
R语言可视化(十):集合图绘制

R语言可视化(十):集合图绘制

作者: Davey1220 | 来源:发表于2020-08-13 09:52 被阅读0次

    10.UpSet集合图绘制


    清除当前环境中的变量

    rm(list=ls())
    

    设置工作目录

    setwd("C:/Users/Dell/Desktop/R_Plots/10upset/")
    

    使用UpSetR包绘制集合图

    library(UpSetR)
    # 加载UpSetR包的内置数据集
    movies <- read.csv(system.file("extdata", "movies.csv", package = "UpSetR"), header = T, sep = ";")
    dim(movies)
    ## [1] 3883   21
    
    head(movies)
    ##                                 Name ReleaseDate Action Adventure Children
    ## 1                   Toy Story (1995)        1995      0         0        1
    ## 2                     Jumanji (1995)        1995      0         1        1
    ## 3            Grumpier Old Men (1995)        1995      0         0        0
    ## 4           Waiting to Exhale (1995)        1995      0         0        0
    ## 5 Father of the Bride Part II (1995)        1995      0         0        0
    ## 6                        Heat (1995)        1995      1         0        0
    ##   Comedy Crime Documentary Drama Fantasy Noir Horror Musical Mystery
    ## 1      1     0           0     0       0    0      0       0       0
    ## 2      0     0           0     0       1    0      0       0       0
    ## 3      1     0           0     0       0    0      0       0       0
    ## 4      1     0           0     1       0    0      0       0       0
    ## 5      1     0           0     0       0    0      0       0       0
    ## 6      0     1           0     0       0    0      0       0       0
    ##   Romance SciFi Thriller War Western AvgRating Watches
    ## 1       0     0        0   0       0      4.15    2077
    ## 2       0     0        0   0       0      3.20     701
    ## 3       1     0        0   0       0      3.02     478
    ## 4       0     0        0   0       0      2.73     170
    ## 5       0     0        0   0       0      3.01     296
    ## 6       0     0        1   0       0      3.88     940
    

    基础绘图

    upset(data = movies, 
          sets = c("Action", "Adventure", "Comedy", "Drama", "Mystery", 
                   "Thriller", "Romance", "War", "Western"), # 指定所用的集合
          number.angles = 30, # 设置相交集合柱状图上方数字的角度
          point.size = 3.5, # 设置矩阵中圆圈的大小
          line.size = 2, # 设置矩阵中连接圆圈的线的大小
          mainbar.y.label = "Genre Intersections", # 设置y轴标签
          sets.x.label = "Movies Per Genre", # 设置x轴标签
          mb.ratio = c(0.6, 0.4), # 设置bar plot和matrix plot图形高度的占比
          order.by = "freq")
    
    image.png
    upset(data = movies, 
         sets = c("Action", "Adventure", "Comedy", "Drama", "Mystery", 
                  "Thriller", "Romance", "War", "Western"), # 指定所用的集合
         number.angles = 45, # 设置相交集合柱状图上方数字的角度
         point.size = 3, # 设置矩阵中圆圈的大小
         line.size = 1.5, # 设置矩阵中连接圆圈的线的大小
         mainbar.y.label = "Genre Intersections", # 设置y轴标签
         sets.x.label = "Movies Per Genre", # 设置x轴标签
         mb.ratio = c(0.7, 0.3), # 设置bar plot和matrix plot图形高度的占比
         order.by = "degree", # 更改排序的方式
         keep.order = TRUE # 保持集合按输入的顺序排序
         )
    
    image.png
    # 使用fromList函数输入列表格式的集合数据
    # example of list input (list of named vectors)
    listInput <- list(one = c(1, 2, 3, 5, 7, 8, 11, 12, 13), 
                      two = c(1, 2, 4, 5, 10), 
                      three = c(1, 5, 6, 7, 8, 9, 10, 12, 13))
    listInput
    ## $one
    ## [1]  1  2  3  5  7  8 11 12 13
    ## 
    ## $two
    ## [1]  1  2  4  5 10
    ## 
    ## $three
    ## [1]  1  5  6  7  8  9 10 12 13
    
    upset(fromList(listInput), order.by = "freq")
    
    image.png
    # 使用fromExpression函数输入表达式向量格式的集合数据
    # example of expression input
    expressionInput <- c(one = 2, two = 1, three = 2, 
                         `one&two` = 1, `one&three` = 4, 
                         `two&three` = 1, `one&two&three` = 2)
    expressionInput
    ##           one           two         three       one&two     one&three 
    ##             2             1             2             1             4 
    ##     two&three one&two&three 
    ##             1             2
    
    upset(fromExpression(expressionInput), order.by = "freq",point.size = 2,line.size = 1)
    
    image.png

    使用set.metadata参数添加元数据信息

    # 构建metadata信息
    sets <- names(movies[3:19])
    avgRottenTomatoesScore <- round(runif(17, min = 0, max = 90))
    metadata <- as.data.frame(cbind(sets, avgRottenTomatoesScore))
    names(metadata) <- c("sets", "avgRottenTomatoesScore")
    head(metadata)
    ##          sets avgRottenTomatoesScore
    ## 1      Action                     73
    ## 2   Adventure                     76
    ## 3    Children                     30
    ## 4      Comedy                      5
    ## 5       Crime                     44
    ## 6 Documentary                      8
    
    metadata$avgRottenTomatoesScore <- as.numeric(as.character(metadata$avgRottenTomatoesScore))
    

    添加元数据条形图

    upset(movies, 
          sets = c("Action", "Adventure", "Comedy", "Drama", "Mystery", "Thriller", "Romance", "War", "Western"),
          set.metadata = list(data = metadata, 
                              plots = list(list(type = "hist", column = "avgRottenTomatoesScore", assign = 20))))
    
    image.png

    添加元数据热图

    Cities <- sample(c("Boston", "NYC", "LA"), 17, replace = T)
    metadata <- cbind(metadata, Cities)
    metadata$Cities <- as.character(metadata$Cities)
    metadata[which(metadata$sets %in% c("Drama", "Comedy", "Action", "Thriller", "Romance")), ]
    ##        sets avgRottenTomatoesScore Cities
    ## 1    Action                     73     LA
    ## 4    Comedy                      5     LA
    ## 7     Drama                     55    NYC
    ## 13  Romance                     43 Boston
    ## 15 Thriller                     51     LA
    
    head(metadata)
    ##          sets avgRottenTomatoesScore Cities
    ## 1      Action                     73     LA
    ## 2   Adventure                     76     LA
    ## 3    Children                     30    NYC
    ## 4      Comedy                      5     LA
    ## 5       Crime                     44    NYC
    ## 6 Documentary                      8     LA
    
    upset(movies, 
          sets = c("Drama", "Comedy", "Action", "Thriller", "Romance"),
          set.metadata = list(data = metadata, 
                              plots = list(list(type = "heat", column = "Cities", assign = 10, colors = c(Boston = "green", NYC = "navy", LA = "purple")))))
    
    image.png
    upset(movies, 
          sets = c("Drama", "Comedy", "Action", "Thriller", "Romance"),
          set.metadata = list(data = metadata, 
                              plots = list(list(type = "heat", column = "Cities", assign = 10, colors = c(Boston = "green", NYC = "navy", LA = "purple")), 
                                           list(type = "heat", column = "avgRottenTomatoesScore", assign = 10))))
    
    image.png

    添加元数据文本

    upset(movies, 
          sets = c("Drama", "Comedy", "Action", "Thriller", "Romance"),
          set.metadata = list(data = metadata, 
                              plots = list(list(type = "text", column = "Cities", assign = 10, colors = c(Boston = "green", NYC = "navy", LA = "purple")))))
    
    image.png

    添加元数据矩阵条形图

    upset(movies, 
          sets = c("Drama", "Comedy", "Action", "Thriller", "Romance"),
          set.metadata = list(data = metadata, 
                              plots = list(list(type = "hist", column = "avgRottenTomatoesScore", assign = 20), 
                                           list(type = "matrix_rows", column = "Cities", colors = c(Boston = "green", NYC = "navy", LA = "purple"), alpha = 0.5))))
    
    image.png

    使用queries参数查询数据

    head(movies)
    ##                                 Name ReleaseDate Action Adventure Children
    ## 1                   Toy Story (1995)        1995      0         0        1
    ## 2                     Jumanji (1995)        1995      0         1        1
    ## 3            Grumpier Old Men (1995)        1995      0         0        0
    ## 4           Waiting to Exhale (1995)        1995      0         0        0
    ## 5 Father of the Bride Part II (1995)        1995      0         0        0
    ## 6                        Heat (1995)        1995      1         0        0
    ##   Comedy Crime Documentary Drama Fantasy Noir Horror Musical Mystery
    ## 1      1     0           0     0       0    0      0       0       0
    ## 2      0     0           0     0       1    0      0       0       0
    ## 3      1     0           0     0       0    0      0       0       0
    ## 4      1     0           0     1       0    0      0       0       0
    ## 5      1     0           0     0       0    0      0       0       0
    ## 6      0     1           0     0       0    0      0       0       0
    ##   Romance SciFi Thriller War Western AvgRating Watches
    ## 1       0     0        0   0       0      4.15    2077
    ## 2       0     0        0   0       0      3.20     701
    ## 3       1     0        0   0       0      3.02     478
    ## 4       0     0        0   0       0      2.73     170
    ## 5       0     0        0   0       0      3.01     296
    ## 6       0     0        1   0       0      3.88     940
    
    # 使用内置的相交查询intersects来查找或显示特定相交处的元素。
    upset(movies, 
          queries = list(list(query = intersects, params = list("Drama", "Comedy", "Action"), color = "orange", active = T), 
                         list(query = intersects, params = list("Drama"), color = "red", active = F), 
                         list(query = intersects, params = list("Action", "Drama"), active = T)))
    
    image.png
    # 使用内置的元素查询elements来可视化某些元素在相交之间的分布方式
    upset(movies, 
          queries = list(list(query = elements, params = list("AvgRating", 3.5, 4.1), color = "blue", active = T), 
                         list(query = elements, params = list("ReleaseDate", 1980, 1990, 2000), color = "red", active = F)))
    
    image.png
    # 添加查询图例
    upset(movies, 
          query.legend = "top", 
          queries = list(list(query = intersects, params = list("Drama", "Comedy", "Action"), color = "orange", active = T, query.name = "Funny action"), 
                         list(query = intersects, params = list("Drama"), color = "red", active = F), 
                         list(query = intersects, params = list("Action", "Drama"), active = T, query.name = "Emotional action")))
    
    image.png

    使用attribute.plots参数添加属性图

    head(movies)
    ##                                 Name ReleaseDate Action Adventure Children
    ## 1                   Toy Story (1995)        1995      0         0        1
    ## 2                     Jumanji (1995)        1995      0         1        1
    ## 3            Grumpier Old Men (1995)        1995      0         0        0
    ## 4           Waiting to Exhale (1995)        1995      0         0        0
    ## 5 Father of the Bride Part II (1995)        1995      0         0        0
    ## 6                        Heat (1995)        1995      1         0        0
    ##   Comedy Crime Documentary Drama Fantasy Noir Horror Musical Mystery
    ## 1      1     0           0     0       0    0      0       0       0
    ## 2      0     0           0     0       1    0      0       0       0
    ## 3      1     0           0     0       0    0      0       0       0
    ## 4      1     0           0     1       0    0      0       0       0
    ## 5      1     0           0     0       0    0      0       0       0
    ## 6      0     1           0     0       0    0      0       0       0
    ##   Romance SciFi Thriller War Western AvgRating Watches
    ## 1       0     0        0   0       0      4.15    2077
    ## 2       0     0        0   0       0      3.20     701
    ## 3       1     0        0   0       0      3.02     478
    ## 4       0     0        0   0       0      2.73     170
    ## 5       0     0        0   0       0      3.01     296
    ## 6       0     0        1   0       0      3.88     940
    

    添加内置属性直方图

    upset(movies, 
          main.bar.color = "black", 
          queries = list(list(query = intersects, params = list("Drama"), active = T)), 
          attribute.plots = list(gridrows = 50, 
                                 plots = list(list(plot = histogram, x = "ReleaseDate", queries = F), 
                                              list(plot = histogram, x = "AvgRating", queries = T)), ncols = 2))
    
    
    image.png

    添加内置属性散点图

    upset(movies, 
          main.bar.color = "black", 
          queries = list(list(query = intersects, params = list("Drama"), color = "red", active = F),
                         list(query = intersects, params = list("Action", "Drama"), active = T), 
                         list(query = intersects, params = list("Drama", "Comedy", "Action"), color = "orange", active = T)), 
          attribute.plots = list(gridrows = 45, 
                                 plots = list(list(plot = scatter_plot, x = "ReleaseDate", y = "AvgRating", queries = T), 
                                              list(plot = scatter_plot, x = "AvgRating", y = "Watches", queries = F)), ncols = 2), query.legend = "bottom")
    
    image.png

    添加属性箱线图

    upset(movies, boxplot.summary = c("AvgRating", "ReleaseDate"))
    
    image.png

    一次性添加元数据,查询和属性图

    upset(movies, 
          set.metadata = list(data = metadata, 
                              plots = list(list(type = "hist", column = "avgRottenTomatoesScore", assign = 20),
                                           list(type = "text", column = "Cities", assign = 5, colors = c(Boston = "green", NYC = "navy", LA = "purple")), 
                                           list(type = "matrix_rows", column = "Cities", colors = c(Boston = "green", NYC = "navy", LA = "purple"), alpha = 0.5))), 
          queries = list(list(query = intersects, params = list("Drama"), color = "red", active = F), 
                         list(query = intersects, params = list("Action", "Drama"), active = T), 
                         list(query = intersects, params = list("Drama", "Comedy", "Action"), color = "orange", active = T)), 
          attribute.plots = list(gridrows = 45, 
                                 plots = list(list(plot = scatter_plot, x = "ReleaseDate", y = "AvgRating", queries = T), 
                                              list(plot = scatter_plot, x = "AvgRating", y = "Watches", queries = F)), ncols = 2), query.legend = "bottom")
    
    image.png
    sessionInfo()
    ## R version 3.6.0 (2019-04-26)
    ## Platform: x86_64-w64-mingw32/x64 (64-bit)
    ## Running under: Windows 10 x64 (build 18363)
    ## 
    ## Matrix products: default
    ## 
    ## locale:
    ## [1] LC_COLLATE=Chinese (Simplified)_China.936 
    ## [2] LC_CTYPE=Chinese (Simplified)_China.936   
    ## [3] LC_MONETARY=Chinese (Simplified)_China.936
    ## [4] LC_NUMERIC=C                              
    ## [5] LC_TIME=Chinese (Simplified)_China.936    
    ## 
    ## attached base packages:
    ## [1] stats     graphics  grDevices utils     datasets  methods   base     
    ## 
    ## other attached packages:
    ## [1] UpSetR_1.4.0
    ## 
    ## loaded via a namespace (and not attached):
    ##  [1] Rcpp_1.0.5       knitr_1.23       magrittr_1.5     tidyselect_0.2.5
    ##  [5] munsell_0.5.0    colorspace_1.4-1 R6_2.4.0         rlang_0.4.7     
    ##  [9] plyr_1.8.4       stringr_1.4.0    dplyr_0.8.3      tools_3.6.0     
    ## [13] grid_3.6.0       gtable_0.3.0     xfun_0.8         withr_2.1.2     
    ## [17] htmltools_0.3.6  assertthat_0.2.1 yaml_2.2.0       lazyeval_0.2.2  
    ## [21] digest_0.6.20    tibble_2.1.3     crayon_1.3.4     gridExtra_2.3   
    ## [25] purrr_0.3.2      ggplot2_3.2.0    glue_1.3.1       evaluate_0.14   
    ## [29] rmarkdown_1.13   labeling_0.3     stringi_1.4.3    compiler_3.6.0  
    ## [33] pillar_1.4.2     scales_1.0.0     pkgconfig_2.0.2
    

    ▼更多精彩推荐,请关注我们▼

    image

    相关文章

      网友评论

        本文标题:R语言可视化(十):集合图绘制

        本文链接:https://www.haomeiwen.com/subject/lbfbdktx.html