美文网首页R语言
[R语言] TidyTuesday ggplot2可视化学习 1

[R语言] TidyTuesday ggplot2可视化学习 1

作者: 半为花间酒 | 来源:发表于2020-04-11 10:39 被阅读0次

    原始数据主题:Tour de France Winners

    重新绘制主题:口袋妖怪 Pokemons

    跟着刘博学画图,语雀指路
    TidyTuesday 可视化学习之 ggplot2 一笔一画绘制表格
    原始数据
    https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-04-07/tdf_winners.csv
    重绘数据(口袋妖怪)
    https://pan.baidu.com/s/1sMHPLKL_OsEpsrwJ6uQGPQ
    提取码:oxm0
    转载请注明:陈熹 chenx6542@foxmail.com (简书号:半为花间酒)

    前置知识

    - rle函数

    计算向量中连续相同字符(广义游程)的个数

    x <- c(1,1,1,2,3,3,3,1,1)
    rle(x)
    # Run Length Encoding
    # lengths: int [1:4] 3 1 3 2
    # values : num [1:4] 1 2 3 1
    
    x <- c(1,2,3,4,4,1,2)
    rle(x)
    # Run Length Encoding
    # lengths: int [1:6] 1 1 1 2 1 1
    # values : num [1:6] 1 2 3 4 1 2
    
    x <- c(1,1,2,3,4,4,1,3)
    rle(x)
    # Run Length Encoding
    # lengths: int [1:6] 2 1 1 2 1 1
    # values : num [1:6] 1 2 3 4 1 3
    

    图源《R编程艺术》

    - glue函数

    类似python3的字符串格式化

    # > python
    name = 'Fred'
    age = '50'
    print(f'My name is {name}, my age next year is {age + 1}')
    # My name is Fred, my age next year is 51.
    
    #> R
    name <- "Fred"
    age <- 50
    anniversary <- as.Date("1991-10-12")
    
    library(glue)
    glue('My name is {name},
          my age next year is {age + 1},
          my anniversary is {anniversary}')
    # 会识别\n换行
    # My name is Fred,
    # my age next year is 51,
    # my anniversary is 1991-10-12
    
    glue('My name is {name},',
         'my age next year is {age + 1},',
         'my anniversary is {format(anniversary, "%A, %B %d, %Y")}.')
    # 以,相连直接拼接不换行
    # My name is Fred,my age next year is 51,my anniversary is 星期六, 十月 12, 1991.
    

    - coord_cartesian函数

    放大镜效果不改变图形形状

    library(patchwork)
    library(ggplot2)
    
    p1 <- ggplot(mtcars, aes(disp, wt)) +
      geom_point() +
      geom_smooth()
    p2 <- p1 + scale_x_continuous(limits = c(325, 500))
    p3 <- p1 + coord_cartesian(xlim = c(325, 500))
    
    p1 + p2 + p3
    

    - with函数

    部分参考:R语言with/within函数添加数据框到环境变量

    # 使用with函数将dat添加到环境
    dat <- matrix(rnorm(20),nrow = 4,ncol=5)
    colnames(dat)<-paste("a" ,1:5,sep ="")
    rownames(dat)<-paste("b",1:4,sep = "")
    dat <- as.data.frame(dat)
    
    dat$a1 + dat$a2
    # 等价于
    with(dat, a1+a2)
    
    # 另一个例子
    dat <- read.csv("femaleMiceWeights.csv") 
    X <- filter(dat,Diet == "chow") %>%  
      select(Bodyweight) %>% unlist
    Y <- filter(dat,Diet == "hf") %>%  
      select(Bodyweight) %>% unlist
    
    t.test(X,Y)$p.value
    # 等价于
    with(t.test(X,Y), p.value)
    

    - here包

    部分参考:Project-oriented workflow 面向项目的工作流程

    How can you avoid setwd() at the top of every script?
    Use the here() function from the here package to build the path when you read or write a file. Create paths relative to the top-level directory.

    getwd()
    
    # 当前目录,也可以直接不指定
    ggsave(here::here('.',paste0("tour-de-france ", format(Sys.time(), "%Y%m%d"), ".png")), 
           dpi = 320, width = 11, height = 17)
    
    # 上一级目录
    ggsave(here::here('..',paste0("tour-de-france ", format(Sys.time(), "%Y%m%d"), ".png")), 
           dpi = 320, width = 11, height = 17)
    
    # 下级目录
    ggsave(here::here('dat','test',paste0("tour-de-france ", format(Sys.time(), "%Y%m%d"), ".png")), 
           dpi = 320, width = 11, height = 17)
    

    原图复现

    - 数据预处理部分

    library(tidyverse)
    library(lubridate)
    library(countrycode)
    # remotes::install_github("wilkelab/ggtext")
    library(ggtext)
    library(glue)
    library(here)
    #library(skimr)
    library(dplyr)
    
    tdf_winners <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-04-07/tdf_winners.csv')
    
    tdf_table  <- tdf_winners %>% 
      mutate(
        wins_consecutive = with(rle(winner_name), rep(lengths, times = lengths)),
        year = year(start_date), # 提取年数据
        year_labels = ifelse(year %% 10 == 0, glue("**{year}**"), year),
        year_group = case_when(
          year < 1915 ~ 1,
          year > 1915 & year < 1940 ~ 2,
          TRUE ~ 3),
        avg_speed = distance / time_overall,
        country_code = countrycode(nationality, origin = "country.name", destination = "iso3c"),
        winner_annot = ifelse(wins_consecutive > 2, glue("**{winner_name} ({country_code})**"), glue("{winner_name} ({country_code})"))
      ) %>%
      # 分组很妙,添加行号
      group_by(year_group) %>% 
      mutate(
        n_annot = row_number(),
        annot = ifelse((n_annot - 2) %% 3 == 0, TRUE, FALSE)
      ) %>% 
      ungroup() %>% 
      add_row(year = c(1915, 1916, 1917, 1918, 1940, 1941, 1942, 1943)) %>%
      arrange(year) %>% 
      mutate(n = row_number())
    

    - 画图部分

    # 把字体安排上
    windowsFonts(HEL=windowsFont("Helvetica CE 55 Roman"),
                 RMN=windowsFont("Times New Roman"),
                 ARL=windowsFont("Arial"),
                 ARLB=windowsFont("Arial Bold"),
                 JBM=windowsFont("JetBrains Mono"))
    
    # step1: geom_segment() 标虚线
    ggplot(tdf_table) +
      # dotted gridlines ---------------------------------------------------
      # 使用 geom_segment() 函数添加虚线
      geom_segment(data = subset(tdf_table, !is.na(year_labels)),
                 aes(x = 0, xend = 24000, y = n, yend = n), 
                 linetype = "dotted", size = 0.2) +
      # step2:加上左右两侧的年份
      geom_richtext(aes(x = -1000, y = n, label = year_labels), 
                  fill = "red", label.color = NA, 
                  label.padding = unit(0.1, "lines"), 
                  family = "JBM", size = 2.5) +
      geom_richtext(aes(x = 25000, y = n, label = year_labels),
                    fill = "blue", label.color = NA, 
                    label.padding = unit(0.1, "lines"),
                    family = "JBM", size = 2.5) +
      # step3:geom_area() 加上填充面积
      geom_area(aes(x = distance * 0.625, y = n, group = year_group), 
                fill = "#7DDDB6", alpha = 0.6, 
                orientation = "y", position = "identity") +
      # step4:选择性加上每一个上面对应的点
      geom_point(data = subset(tdf_table, annot), 
                 aes(x = distance * 0.625, y = n), size = 0.5) +
      # step5:给 step4 中的点加上数值
      geom_label(data = subset(tdf_table, annot), 
                 aes(x = distance * 0.625 + 100, y = n, label = distance), 
                 fill = "#F3F2EE", label.size = 0, 
                 label.padding = unit(0.1, "lines"), 
                 hjust = 0, family = "JBM", size = 2.5) +
      # step6:给每一行加上注释,对应 WINNER
      geom_richtext(aes(x = 5300, y = n, label = winner_annot, .na = NULL), 
                  fill = "#F3F2EE", label.size = 0, 
                  label.padding = unit(0.1, "lines"), 
                  hjust = 0, family = "JBM", size = 2.5) +
      geom_label(aes(x = 10300, y = n, label = glue("{winner_team}", .na = NULL)), 
                 fill = "#F3F2EE", label.size = 0, label.padding = unit(0.1, "lines"),
                 hjust = 0, family = "JBM", size = 2.5) +
      # step7:geom_segment 函数添加 AVERAGE  SPEED 数据
      geom_segment(aes(x = 16000, xend = 16000 + avg_speed * 66.67, y = n, yend = n), 
                   size = 2, colour = "#7DDDB6", alpha = 0.6) +
      # step8:选择性添加 AVERAGE SPEED 对应的数值
      geom_label(data = subset(tdf_table, annot), 
                 aes(x = 16000 + avg_speed * 66.67 + 100, y = n, 
                     label = round(avg_speed, 1)), fill = "#F3F2EE", 
                 label.size = 0, label.padding = unit(0.1, "lines"), 
                 hjust = 0, family = "JBM", size = 2.5) +
      # step9:添加 TOTAL TIME 时间填充(geom_ribbon)、点、标签
      geom_ribbon(aes(xmin = 20000, xmax = 20000 + time_overall * 10, y = n, group = year_group),
                  fill = "#FCDF33", alpha = 0.6, orientation = "y", position = "identity") +
      geom_point(data = subset(tdf_table, annot), 
                 aes(x = 20000 + time_overall * 10, y = n), size = 0.5) +
      geom_label(data = subset(tdf_table, annot), 
                 aes(x = 20000 + time_overall * 10 + 100, y = n, 
                     label = round(time_overall, 1)), 
                 fill = "#F3F2EE", label.size = 0, 
                 label.padding = unit(0.1, "lines"),
                 hjust = 0, family = "JBM", size = 2.5) +
      # step10:annotate 函数添加竖直线
      annotate("segment", 
               x = c(-2000, 0, 5000, 10000, 16000, 20000, 24000, 26000),
               xend = c(-2000, 0, 5000, 10000, 16000, 20000, 24000, 26000),
               y = -4, yend = 115, size = 0.3) +
      # step11:annotate 函数添加三条横线
      annotate("segment",
               x = -2000, xend = 26000, 
               y = c(-4, -1, 115), yend = c(-4, -1, 115), size = 0.3) +
      # step12:annotate 添加表头
      annotate("text", 
               x = c(-1000, 2500, 7500, 13000, 18000, 22000, 25000), 
               y = -2.5, 
               label = toupper(c("year", "distance", "winner", "team", "average speed", "total time", "year")), 
               hjust = 0.5, family = "ARLB", size = 3.5) +
      # step13:annotate 函数加上空白
      annotate("rect",
               xmin = -2000, ymin = c(13, 38), 
               xmax = 26000, ymax = c(16, 41), 
               fill = "#F3F2EE", colour = "black", size = 0.3) +
      # step14:annotate 函数参数 richtext 添加中间小表头
      annotate("richtext", x = 13000, y = c(14.5, 39.5), 
               label = c("**1915-1918** Tour suspended because of Word War I",
                         "**1940-1946** Tour suspended because of Word War II"), 
               label.color = NA, fill = "#F3F2EE", hjust = 0.5, 
               family = "ARL", size = 3.5) +
      # step15:annotate 函数参数 text 给 DISTANCE 栏加上单位刻度
      annotate("text", x = c(100, 4900), y = 0, 
               label = c("0", "8000 km"), hjust = c(0, 1), 
               family = "ARL", size = 3) +
      # step16:annotate 函数参数 text 给其他的添加刻度尺和注释
      annotate("text", x = c(16100, 19900), y = 0, 
               label = c("0", "60 km/h"), hjust = c(0, 1), 
               family = "ARL", size = 3) +
      annotate("text", x = c(20100, 23900), y = 0, 
               label = c("0", "300 h"), hjust = c(0, 1), 
               family = "ARL", size = 3) +
      annotate("text", x = 26000, y = -6, 
               label = "Source: alastairrushworth/tdf & kaggle.com/jaminliu | Graphic: Georgios Karamanis", 
               hjust = 1, family = "ARL", size = 3) +
      # step17:coord_cartesian 函数取消画板限制范围
      coord_cartesian(clip = 'off') +
      # step18:scale_x_continuous 函数通过 limits 和 expand 函数控制贴 y 轴距离
      scale_x_continuous(limits = c(-2300, 26300), expand = expansion(add = 1)) +
      # step19:scale_y_reverse 函数翻转 y 轴左边起始顺序,上下颠倒,并通过 expand = expansion(add = 0) 控制 y 轴顶端和低端间隙为 0
      scale_y_reverse(expand = expansion(add = 0)) +
      # step20:labs 加标题以及 theme_void 去除主题线条背景以及坐标轴
      labs(
        title = "Tour de France Winners") +
      theme_void(base_family = "JBM") +
      # step21:设置灰色背景,画板大小,以及标题大小
      theme(
        plot.background = element_rect(fill = "#F3F2EE", colour = NA),
        plot.margin = margin(20, 20, 20, 20),
        plot.title = element_text(hjust = 0.01, size = 28, 
                                  family = "JBM", margin = margin(0, 0, -8, 0))
      )
    
    
    # ggsave(here::here('dat','test',paste0("tour-de-france ", format(Sys.time(), "%Y%m%d"), ".png")), 
    #      dpi = 320, width = 11, height = 17)
    
    # # step22:用here方法保存图片
    ggsave(here::here('.',paste0("tour-de-france ", format(Sys.time(), "%Y%m%d"), ".png")), 
           dpi = 320, width = 11, height = 17)
    

    - 成品

    拿数据重新绘图

    library(tidyverse)
    library(lubridate)
    library(countrycode)
    # remotes::install_github("wilkelab/ggtext")
    library(ggtext)
    library(glue)
    library(here)
    #library(skimr)
    library(dplyr)
    library(RColorBrewer)
    library(scales)
    
    # 设置渐变颜色
    Blues <- brewer.pal(9, "Blues")[4:7]
    pal <- colorRampPalette(Blues)
    new_Blues <- pal(10)
    show_col(new_Blues)
    
    # 注册字体
    windowsFonts(HEL=windowsFont("Helvetica CE 55 Roman"),
                 RMN=windowsFont("Times New Roman"),
                 ARL=windowsFont("Arial"),
                 ARLB=windowsFont("Arial Bold"),
                 JBM=windowsFont("JetBrains Mono"),
                 MY=windowsFont("Microsoft YaHei"),
                 IPML=windowsFont("IBM Plex Mono Light"),
                 IPS=windowsFont("IBM Plex Sans"),
                 IPSB=windowsFont("IBM Plex Sans Bold"),
                 JBMB=windowsFont("JetBrains Mono Bold"))
    
    pokemons <- readr::read_csv('C:/Users/chenx/Desktop/pokemons.csv')
    
    pokemons_dat <- pokemons %>% 
      filter((100 <= id & id <= 126 )| (131 <= id & id <= 156 ) |  (161 <= id & id <= 200 )) %>% 
      select(1:5,'book_color','HP':'total_value') %>% 
      mutate(
        id_group = case_when(
          id < 127 ~ 1,
          id > 130 & id < 127 ~ 2,
          TRUE ~ 3),
        # 实际上不转换也行
        color = case_when(
        book_color == '绿色' ~ 'green',
        book_color == '灰色' ~ 'grey',
        book_color == '褐色' ~ 'brown',
        book_color == '蓝色' ~ 'blue',
        book_color == '粉红色' ~ 'pink',
        book_color == '红色' ~ 'red',
        book_color == '黄色' ~ 'yellow',
        book_color == '紫色' ~ 'purple',
        book_color == '白色' ~ 'white',
        book_color == '黑色' ~ 'black'),
        name_type = glue('{Chinese_name}/{Japanese_name}({poketype})')
      ) %>% 
      group_by(id_group) %>% 
      mutate(
        n_annot = row_number(),
        # 设置可显示出的数据点
        annot = ifelse((n_annot - 2) %% 3 == 0, TRUE, FALSE)
      ) %>% 
      ungroup() %>% 
      add_row(id = c(127, 128, 129, 130, 157, 158, 159, 160)) %>% 
      arrange(id) %>% 
      mutate(n = row_number())
      
    ggplot(pokemons_dat) +
      geom_segment(data = subset(pokemons_dat, !is.na(color)),
                 aes(x = 0, xend = 24000, y = n, yend = n), 
                 linetype = "dotted", size = 0.2) +
      geom_richtext(aes(x = -1000, y = n, label = id), 
                    fill = "red", label.color = NA, 
                    label.padding = unit(0.1, "lines"), 
                    family = "JBM", size = 2.5) +
      geom_richtext(aes(x = 25000, y = n, label = id, fill = color),
                    label.color = NA, 
                    label.padding = unit(0.1, "lines"),
                    family = "JBM", size = 2.5) + 
      scale_fill_manual(values =new_Blues) +
      guides(fill=FALSE) +
      geom_area(aes(x = total_value * 7, y = n, group = id_group), 
                fill = "#7DDDB6", alpha = 0.6, 
                orientation = "y", position = "identity") +
      geom_point(data = subset(pokemons_dat, annot), 
                 aes(x = total_value * 7, y = n), size = 0.5) +
      geom_label(data = subset(pokemons_dat, annot), 
                 aes(x = total_value * 7 + 100, y = n, label = total_value), 
                 fill = "#F3F2EE", label.size = 0, 
                 label.padding = unit(0.1, "lines"), 
                 hjust = 0, family = "JBM", size = 2.5) +
      # 很无奈,中文和日文打带边框标签都会留出右端空格,所以只能把英文名单拎出来加边框
      geom_label(aes(x = 6000, y = n, label = name_type,.na = NULL), 
                    fill = "#F3F2EE", label.size = 0, 
                    label.padding = unit(0.1, "lines"), 
                    hjust = 0, family = "ARL", size = 2.5) +
      geom_richtext(aes(x = 13000, y = n, label = English_name, .na = NULL), 
                 fill = "#F3F2EE", label.size = 0, label.padding = unit(0.1, "lines"),
                 hjust = 0, family = "JBM", size = 2.5) +
      geom_segment(aes(x = 16000, xend = 16000 + Attack * 21, y = n, yend = n), 
                   size = 2, colour = "#7DDDB6", alpha = 0.6) +
      geom_label(data = subset(pokemons_dat, annot), 
                 aes(x = 16000 + Attack * 21 + 100, y = n, 
                     label = round(Attack, 1)), fill = "#F3F2EE", 
                 label.size = 0, label.padding = unit(0.1, "lines"), 
                 hjust = 0, family = "JBM", size = 2.5) +
      geom_ribbon(aes(xmin = 21000, xmax = 21000 + Speed * 15, y = n, group = id_group),
                  fill = "#FCDF33", alpha = 0.6, orientation = "y", position = "identity") +
      geom_point(data = subset(pokemons_dat, annot), 
                 aes(x = 21000 + Speed * 15, y = n), size = 0.5) +
      geom_label(data = subset(pokemons_dat, annot), 
                 aes(x = 21000 + Speed * 15 + 100, y = n, 
                     label = round(Speed, 1)), 
                 fill = "#F3F2EE", label.size = 0, 
                 label.padding = unit(0.1, "lines"),
                 hjust = 0, family = "JBM", size = 2.5) +
      annotate("segment", 
               x = c(-2000, 0, 5700, 12700, 16000, 21000, 24000, 26000),
               xend = c(-2000, 0, 5700, 12700, 16000, 21000, 24000, 26000),
               y = -4, yend = 103, size = 0.3) +
      annotate("segment",
               x = -2000, xend = 26000, 
               y = c(-4, -1, 103), yend = c(-4, -1, 103), size = 0.3) +
      annotate("text", 
               x = c(-1000, 2750, 9200, 14350, 18500, 22500, 25000), 
               y = -2.5, 
               label = toupper(c("ID", "ABILITY SCORES", "NAME/TYPE", "ENAME", "ATTACK", "SPEED", "ID")), 
               hjust = 0.5, family = "ARLB", size = 3.5) +
      annotate("rect",
               xmin = -2000, ymin = c(27.5, 57.5), 
               xmax = 26000, ymax = c(31.5, 61.5), 
               fill = "#F3F2EE", colour = "black", size = 0.3) +
      annotate("richtext", x = 13000, y = c(29.5, 59.5), 
               label = c("**127-130**  Which Have Been Deleted :) I",
                         "**157-160**  Which Have Been Deleted :) II"), 
               label.color = NA, fill = "#F3F2EE", hjust = 0.5, 
               family = "JBM", size = 5) +
      annotate("text", x = c(100, 5600), y = 0, 
               label = c("0", "800"), hjust = c(0, 1), 
               family = "JBM", size = 3) +
      annotate("text", x = c(16100, 20900), y = 0, 
               label = c("0", "250"), hjust = c(0, 1), 
               family = "JBM", size = 3) +
      annotate("text", x = c(21100, 23900), y = 0, 
               label = c("0", "200"), hjust = c(0, 1), 
               family = "JBM", size = 3) +
      annotate("text", x = 26000, y = -6, 
               label = "Source: wiki.52poke.com | Graphic: Xi Chen", 
               hjust = 1, family = "JBM", size = 3) +
      coord_cartesian(clip = 'off') +
      scale_x_continuous(limits = c(-2300, 26300), expand = expansion(add = 1)) +
      scale_y_reverse(expand = expansion(add = 0)) +
      labs(
        title = "Pokemons"
      ) +
      theme_void(base_family = "JBM") +
      theme(
        plot.background = element_rect(fill = "#F3F2EE", colour = NA),
        plot.margin = margin(20, 20, 20, 20),
        plot.title = element_text(hjust = 0.01, size = 28, 
                                  family = "JBMB", margin = margin(0, 0, -8, 0))
      )
    
    ggsave(here::here('.',paste0("Pokemons ", format(Sys.time(), "%Y%m%d"), ".png")), 
           dpi = 640, width = 10, height = 14)
    

    - 成品

    相关文章

      网友评论

        本文标题:[R语言] TidyTuesday ggplot2可视化学习 1

        本文链接:https://www.haomeiwen.com/subject/dvtrmhtx.html