美文网首页基本图形绘制R ggplotR
R 数据可视化 —— ggplot 线图

R 数据可视化 —— ggplot 线图

作者: 名本无名 | 来源:发表于2021-04-14 20:20 被阅读0次

    前言

    ggplot2 包含很多绘制线条的函数:大致可分为如下几类:

    • 连接线:折线(geom_line)、路径线(geom_path)、阶梯线(geom_step)

    • 参考线:水平线(geom_hline)、竖直线(geom_vline)、斜线(geom_abline)

    • 线段和曲线: geom_segmentgeom_spokegeom_curve

    • 函数曲线: geom_functionstat_function

    示例

    1. 连接线

    主要有三种连接线:

    1. geom_path:按照它们在数据中出现的顺序连接起来
    2. geom_line:按 x 轴上变量的顺序连接起来
    3. geom_step:创建一个阶梯图,突出显示数据的变化

    常用参数:

    • linetype:线条类型
    • size:线条大小
    • lineend:线端点样式:round, butt, square
    • linejoin:线连接点样式:round, mitre, bevel
    • arrow:使用 grid::arrow() 函数设置箭头样式

    绘制一条简单的时间序列折线

    ggplot(economics, aes(date, unemploy)) + geom_line()
    

    绘制多条折线

    economics_long %>% 
      subset(variable %in% c("uempmed", "unemploy")) %>%
      ggplot(aes(date, value01, colour = variable)) +
      geom_line()
    

    翻转线条

    ggplot(economics, aes(unemploy, date)) + geom_line(orientation = "y")
    

    如果我们更加关注 y 值的变化情况,可以使用 geom_step 绘制阶梯图

    recent <- economics[economics$date > as.Date("2013-01-01"), ]
    
    p1 <- ggplot(recent, aes(date, unemploy)) + geom_line()
    
    p2 <- ggplot(recent, aes(date, unemploy)) + geom_step()
    
    plot_grid(p1, p2)
    

    geom_path 可以让你探索两个变量是如何随着时间的推移而发生变化的

    例如,失业率和个人储蓄率随时间的关系

    esamp <- sample_n(economics, 10)
      
    m <- ggplot(esamp, aes(unemploy/pop, psavert))
    
    p1 <- m + geom_path()
    
    p2 <- m + geom_path(aes(colour = as.numeric(date)))
    
    plot_grid(p1, p2)
    

    设置箭头

    c <- ggplot(economics, aes(x = date, y = pop))
    c1 <- c + geom_line(arrow = arrow())
    
    c2 <- c + geom_line(
      arrow = arrow(angle = 15, ends = "both", type = "closed")
    )
    plot_grid(c1, c2)
    

    更改连接线及端点样式

    base <- tibble(x = 1:3, y = c(4, 1, 9)) %>%
      ggplot(aes(x, y))
    
    b1 <- base + geom_path(size = 8)
    
    b2 <- base + geom_path(size = 8, lineend = "round")
    
    b3 <- base + geom_path(size = 8, lineend = "round", colour = "red")
    
    b4 <- base + geom_path(size = 8, linejoin = "mitre", lineend = "butt")
    
    plot_grid(b1, b2, b3, b4)
    

    当线条的中间有 NA 值时,则会有一个断点

    df <- data.frame(x = 1:5, y = c(1, 2, NA, 4, 5))
    ggplot(df, aes(x, y)) + geom_point() + geom_line()
    

    设置线条类型

    economics_long %>% 
      subset(variable %in% c("uempmed", "unemploy")) %>%
      ggplot(aes(date, value01, colour = variable)) +
      geom_line(aes(linetype = factor(variable))) +
      scale_linetype_manual("variable", values = c(5, 3))
    

    注意:无法同时设置渐变色与线条类型,下面的代码将会报错

    economics_long %>% 
      subset(variable %in% c("uempmed", "unemploy")) %>%
      ggplot(aes(date, value01, group = variable)) +
      geom_line(aes(colour = value01), linetype = 2)
    

    2. 参考线

    为图形添加参考线对图形的注释非常有用,主要有水平、竖直和对角线三种参考线,对应于三个函数:

    • geom_hline: yintercepty 轴截距)
    • geom_vline: xinterceptx 轴截距)
    • geom_abline: slope(斜率) 和 intercept(截距)
    p <- ggplot(mtcars, aes(wt, mpg)) + geom_point()
    
    # 使用固定值
    p1 <- p + geom_vline(xintercept = 5)
    # 使用向量
    p2 <- p + geom_vline(xintercept = 1:5)
    # 水平线
    p3 <- p + geom_hline(yintercept = 20)
    # 斜线
    p4 <- p + geom_abline(intercept = 31, slope = -5)
    
    plot_grid(p1, p2, p3, p4)
    

    计算拟合曲线的截距和斜率,然后绘制直线

    > coef(lm(mpg ~ wt, data = mtcars))
    (Intercept)          wt 
      37.285126   -5.344472 
    > p + geom_abline(intercept = 37, slope = -5)
    

    更简单的方式是使用 geom_smooth 绘制拟合直线

    p + geom_smooth(method = "lm", se = FALSE)
    

    在绘制分面图形的时候,可以为不同的分面绘制不同的直线

    p <- ggplot(mtcars, aes(mpg, wt)) +
      geom_point() +
      facet_wrap(~ cyl)
    
    mean_wt <- data.frame(cyl = c(4, 6, 8), wt = c(2.28, 3.11, 4.00))
    p + geom_hline(aes(yintercept = wt), mean_wt)
    

    也可以添加其他属性

    ggplot(mtcars, aes(mpg, wt, colour = wt)) +
      geom_point() +
      geom_hline(aes(yintercept = wt, colour = wt), mean_wt) +
      facet_wrap(~ cyl)
    

    3. 线段和曲线

    geom_segment 用于绘制两个点之间的直线,geom_curve 用于绘制两点的曲线。

    两个点通过四个参数 (x, y) 和 (xend, yend) 指定坐标。

    例如,在散点图中标注两点之间的连接线

    b <- ggplot(mtcars, aes(wt, mpg)) +
      geom_point()
    
    df <- data.frame(x1 = 2.320, x2 = 3.520, y1 = 22.8, y2 = 15.5)
    b +
      geom_curve(aes(x = x1, y = y1, xend = x2, yend = y2, colour = "curve"), data = df) +
      geom_segment(aes(x = x1, y = y1, xend = x2, yend = y2, colour = "segment"), data = df)
    

    设置不同的曲率

    b1 <- b + geom_curve(aes(x = x1, y = y1, xend = x2, yend = y2), data = df, curvature = -0.2)
    
    b2 <- b + geom_curve(aes(x = x1, y = y1, xend = x2, yend = y2), data = df, curvature = 0.9)
    
    plot_grid(b1, b2)
    

    添加箭头

    b + geom_curve(
      aes(x = x1, y = y1, xend = x2, yend = y2),
      data = df,
      arrow = arrow(length = unit(0.05, "npc"))
    )
    

    使用 geom_segment 通过设置线段大小来绘制直方图

    counts <- as.data.frame(table(x = rpois(100,5)))
    counts$x <- as.numeric(as.character(counts$x))
    
    ggplot(counts, aes(x, Freq)) +
      geom_segment(aes(xend = x, yend = 0), size = 10, lineend = "butt")
    

    geom_spoke 是由坐标点 (x, y) 以及角度 (angle) 和 半径 (radius) 指定的线段

    df <- expand.grid(x = 1:10, y=1:10)
    df$angle <- runif(100, 0, 2*pi)
    df$speed <- runif(100, 0, sqrt(0.1 * df$x))
    
    ggplot(df, aes(x, y)) +
      geom_point() +
      geom_spoke(aes(angle = angle), radius = 0.5)
    

    看起来像是散落的大头针一样

    设置可变的半径

    ggplot(df, aes(x, y)) +
      geom_point() +
      geom_spoke(aes(angle = angle, radius = speed))
    

    4. 函数曲线

    使用 geom_functionstat_function 可以绘制指定函数的曲线,例如

    set.seed(2021)
    ggplot(data.frame(x = rnorm(100)), aes(x)) +
      geom_density() +
      geom_function(fun = dnorm, colour = "red")
    

    绘制了函数在数据范围内的曲线

    也可以只指定范围,来绘制无数据的函数曲线

    base <- ggplot() + xlim(-5, 5)
    base + geom_function(fun = dnorm)
    

    设置函数的参数值

    base + geom_function(fun = dnorm, args = list(mean = 2, sd = .5))
    

    其底层原理是在一些离散点上执行函数,然后用线将各函数值连接起来

    b1 <- base + stat_function(fun = dnorm, geom = "point")
    
    b2 <- base + stat_function(fun = dnorm, geom = "point", n = 20)
    
    plot_grid(b1, b2)
    

    下面两行代码效果是一样的

    b1 <- base + geom_function(fun = dnorm, n = 20)
    
    b2 <- base + stat_function(fun = dnorm, geom = "line", n = 20)
    
    plot_grid(b1, b2)
    

    自定义函数

    # 一张图绘制不同的函数
    p1 <- base +
      geom_function(aes(colour = "normal"), fun = dnorm) +
      geom_function(aes(colour = "t, df = 1"), fun = dt, args = list(df = 1))
    # 使用匿名函数
    p2 <- base + geom_function(fun = function(x) 0.5*exp(-abs(x)))
    # 同上
    p3 <- base + geom_function(fun = ~ 0.5*exp(-abs(.x)))
    # 使用自定义函数,效果同上
    f <- function(x) 0.5*exp(-abs(x))
    p4 <- base + geom_function(fun = f)
    
    plot_grid(p1, p2, p3, p4)
    

    样式图

    1. 路线图

    sample_n(mtcars, 10) %>%
      ggplot(aes(mpg, disp)) +
      geom_point(colour = "#69b3a2", na.rm = TRUE) +
      geom_segment(aes(xend = c(tail(mpg, n=-1), NA),
                       yend = c(tail(disp, n=-1), NA)),
                   arrow = arrow(length=unit(0.3,"cm")),
                   colour = "#69b3a2") +
      geom_text(aes(label = disp), hjust = 1.2) +
      theme_bw()
    

    2. 坡度图

    library(ggrepel)
    
    mpg %>% 
      group_by(year, manufacturer) %>%
      summarise(value = sum(displ)) %>%
      pivot_wider(names_from = year, values_from = value) %>%
      mutate(class = if_else((`1999` - `2008`) > 0, "#8dd3c7", "#bebada")) %>%
      ggplot() +
      geom_segment(aes(x = 1, xend = 2, y = `1999`, yend = `2008`, colour = class),
                   size = .75, show.legend = FALSE) +
      geom_vline(xintercept = 1, linetype = "solid", size = 1, colour = "#ff7f00") +
      geom_vline(xintercept = 2, linetype = "solid", size = 1, colour = "#1f78b4") +
      geom_point(aes(x = 1, y = `1999`), size = 3, shape = 21, fill = "green") +
      geom_point(aes(x = 2, y = `2008`), size = 3, shape = 21, fill = "red") +
      scale_colour_manual(labels = c("Up", "Down"), values = c("#8dd3c7", "#bebada")) +
      xlim(.5, 2.5) +
      
      geom_text_repel(aes(x = 1, y = `1999`, label = `1999`), 
                      hjust = "left", size = 3.5) +
      geom_text_repel(aes(x = 2, y = `2008`, label = `2008`), 
                      hjust = "right", size = 3.5) +
      geom_text(aes(y = 1.03*max(max(`1999`), max(`2008`))), label = "1999", x = 1,
                size = 5, hjust = 1.2) +
      geom_text(aes(y = 1.03*max(max(`1999`), max(`2008`))), label = "2008", x = 2,
                size = 5, hjust = -.2) +
      theme_void()
    

    在这个例子中,由于点有重叠的现象,导致标签也会重叠在一起。

    所以我们使用了 ggplot2 的扩展包 ggrepelgeom_text_repel 来绘制不重叠标签。

    相关文章

      网友评论

        本文标题:R 数据可视化 —— ggplot 线图

        本文链接:https://www.haomeiwen.com/subject/prcklltx.html