美文网首页可视化
[R语言] TidyTuesday ggplot2可视化学习 2

[R语言] TidyTuesday ggplot2可视化学习 2

作者: 半为花间酒 | 来源:发表于2020-04-17 19:59 被阅读0次

    数据主题:Rap Artists

    数据源:
    https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-04-14/polls.csv

    这次挑的图比较简单,比较有意思的就是纯黑背景
    实际就是封装成一个函数可以单独调用

    前置知识

    - 黑色主题

    黑色主题代码:Black theme for ggplot2

    主题代码和简单示例如下:

    theme_black = function(base_size = 12, base_family = "") {
      theme_grey(base_size = base_size, base_family = base_family) %+replace% 
        theme(
          # Specify axis options
          axis.line = element_blank(),  
          axis.text.x = element_text(size = base_size*0.8, color = "white", lineheight = 0.9),  
          axis.text.y = element_text(size = base_size*0.8, color = "white", lineheight = 0.9),  
          axis.ticks = element_line(color = "white", size  =  0.2),  
          axis.title.x = element_text(size = base_size, color = "white", margin = margin(0, 10, 0, 0)),  
          axis.title.y = element_text(size = base_size, color = "white", angle = 90, margin = margin(0, 10, 0, 0)),  
          axis.ticks.length = unit(0.3, "lines"),   
          # Specify legend options
          legend.background = element_rect(color = NA, fill = "black"),  
          legend.key = element_rect(color = "white",  fill = "black"),  
          legend.key.size = unit(1.2, "lines"),  
          legend.key.height = NULL,  
          legend.key.width = NULL,      
          legend.text = element_text(size = base_size*0.8, color = "white"),  
          legend.title = element_text(size = base_size*0.8, face = "bold", hjust = 0, color = "white"),  
          legend.position = "right",  
          legend.text.align = NULL,  
          legend.title.align = NULL,  
          legend.direction = "vertical",  
          legend.box = NULL, 
          # Specify panel options
          panel.background = element_rect(fill = "black", color  =  NA),  
          panel.border = element_rect(fill = NA, color = "white"),  
          panel.grid.major = element_line(color = "grey35"),  
          panel.grid.minor = element_line(color = "grey20"),  
          panel.spacing = unit(0.5, "lines"),   
          # Specify facetting options
          strip.background = element_rect(fill = "grey30", color = "grey10"),  
          strip.text.x = element_text(size = base_size*0.8, color = "white"),  
          strip.text.y = element_text(size = base_size*0.8, color = "white",angle = -90),  
          # Specify plot options
          plot.background = element_rect(color = "black", fill = "black"),  
          plot.title = element_text(size = base_size*1.2, color = "white"),  
          plot.margin = unit(rep(1, 4), "lines") 
        )  
    }
    
    library(patchwork)
    
    p1 <- ggplot(mtcars, aes(wt, mpg)) + geom_point()
    p2 <- ggplot(mtcars, aes(wt, mpg)) + geom_point(color = "white") + theme_black()
    
    p3 <- iris %>% 
      ggplot(aes(Sepal.Length,Petal.Length,color=Species)) +
      geom_point() +
      theme(
        legend.position = c(0.85,0.18)
      )
    p4 <- iris %>% 
      ggplot(aes(Sepal.Length,Petal.Length,color=Species)) +
      geom_point() + 
      theme_black() +
      theme(
        legend.position = c(0.85,0.18)
      )
    
    (p1 + p3)/(p2 + p4)
    

    - gghighlight

    gghighlight() can highlight almost any geoms.

    本质就是把感兴趣的点highlight
    我觉得理解成非感兴趣的点unhighlight可能更好

    对示例简单修改后如下:

    # 构建数据
    dat <- data.frame(
      idx = c(1, 1, 1, 2, 2, 2, 3, 3, 3),
      value = c(1, 2, 3, 10, 11, 12, 9, 10, 11),
      category = rep(c("apple", "banana", "strawberry"), 3),
      stringsAsFactors = FALSE
    )
    
    • (1) 各分组最大值符合条件则highlight该分组
    p1 <- ggplot(dat, aes(idx, value, colour = category)) +
      geom_line() + 
      gghighlight(max(value) > 10)
    

    可以看到gghighlight会设置打标签
    本质就是ggrepel::geom_label_repel(),所以可以进行个性化调整

    • (2) 分组后按数据筛选感兴趣的点
    ggplot(dat, aes(idx, value)) +
      geom_point() +
      gghighlight(value > 2, label_key = category)
    

    依然会对highlight的点打上标签
    但如果点过多会有如下warning并且不打标签,上限应该是10个

    label_key: category
    Too many data points, skip labeling
    
    • (2) 可以调节非highlight数据的参数:unhighlighted_params
      就是标识但弱化非感兴趣数据的存在,举两个例子
    p1 <- ggplot(dat, aes(idx, value, colour = category)) +
      geom_line(size = 4) +
      gghighlight(max(value) > 10,
                  unhighlighted_params = list(size = 2)
      )
    
    p2 <- ggplot(mpg, aes(displ, hwy)) +
      geom_point(aes(fill=class),shape=21,color='white',size=2.5,stroke=1) +
      gghighlight::gghighlight(hwy > 19,
                               unhighlighted_params = list(size = 1))
    
    p1 + p2
    

    画图

    • 第一张图:歌曲年份和排名的散点图
    # 数据源
    polls <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-04-14/polls.csv')
    # 注册字体
    windowsFonts(HEL=windowsFont("Helvetica CE 55 Roman"),
                 RMN=windowsFont("Times New Roman"),
                 ARL=windowsFont("Arial"),
                 JBM=windowsFont("JetBrains Mono"))
    
    polls %>%
      # 自定义扩大了数据范围利于结果呈现
      mutate(points = case_when(rank == 1 ~ 10,
                                rank == 2 ~ 8,
                                rank == 3 ~ 6,
                                rank == 4 ~ 4,
                                rank == 5 ~ 2)) %>%
      group_by(title, year, gender) %>%
      summarise(sum_points = sum(points)) %>%
      # arrange(-var) 等价于 arrange(desc(var))
      arrange(-sum_points) %>%
      ggplot(aes(year, sum_points)) +
      geom_point(position = "jitter")  +
      # 黑色背景下highlight无论是感兴趣还是非兴趣的数据点颜色均会反转成可见
      gghighlight::gghighlight(sum_points > 90,  label_key = title,
                               # 这里原作者没有修改
                               label_params = list(fill=NA, colour="white", 
                                                   size = 5, family='ARL',
                                                   segment.colour = NA)) +
      labs(title = "Top Rated Songs",
           x = "Year",
           y= "Total Points") +
      # 加上黑色主题函数 
      theme_black() +
      # 如果需要调整各参数需要在主题背景之后设置否则会被覆盖
      theme(
        text = element_text(family="ARL"),
        plot.title = element_text(size = 20,vjust = 2),
        axis.title.x = element_text(size = 16),
        axis.title.y = element_text(size = 16),
        axis.text.x = element_text(size = 13),  
        axis.text.y = element_text(size = 13)
      )
    
    • 第二张图:探究歌曲排名与歌手合作的关系
    res <- polls %>%
      # 利用正则和文本检测寻找特定符号 ft 和 & 明确有无合作
      mutate(first_artist = gsub( " ft.*$", "", artist ), 
             first_artist = gsub( " &.*$", "", first_artist ), 
             collaboration = case_when(str_detect(artist, "ft") ~ "Yes",
                                       str_detect(artist, "&") ~ "Yes",
                                       TRUE ~ "No"),
             points = case_when(rank == 1 ~ 10,
                                rank == 2 ~ 8,
                                rank == 3 ~ 6,
                                rank == 4 ~ 4,
                                rank == 5 ~ 2)) %>% 
      # 根据歌手和合作与否分别计算平均歌曲得分
      group_by(first_artist, collaboration) %>%
      summarise(sum_points = sum(points),
                n = n(),
                avg_points = sum_points/n) %>%
      group_by(first_artist) %>% 
      # 等价于 filter(n() == 2),筛选出有无合作均发过歌曲的歌手
      filter(n() > 1) %>% 
      arrange(-avg_points)
    
    res %>% 
      ggplot(aes(avg_points, first_artist, color = collaboration)) +
      geom_line(aes(group = first_artist)) +
      geom_point() +
      scale_color_manual(values = c("red", "green"), name = "Collaboration?") +
      labs(title = "Do artists get higher rankings when they collaborate?",
           y = "",
           x = "Average Points Rating") +
      theme_black() +
      theme(
        text = element_text(family="ARL"),
        plot.title = element_text(size = 17,vjust = 4),
        axis.title.x = element_text(size = 14),
        axis.title.y = element_text(size = 14),
        axis.text.x = element_text(size = 11),  
        axis.text.y = element_text(size = 11),
        legend.title = element_text(size = 13),
        legend.text = element_text(size = 10)
      )
    

    相关文章

      网友评论

        本文标题:[R语言] TidyTuesday ggplot2可视化学习 2

        本文链接:https://www.haomeiwen.com/subject/mxmyvhtx.html