美文网首页R语言学习
ggpubr002 绘制一个连续变量

ggpubr002 绘制一个连续变量

作者: caoqiansheng | 来源:发表于2020-08-18 22:46 被阅读0次

    1.密度图 ggdensity

    1.1 用法
    # 密度图 ggdensity
    ggdensity(
    # 数据,数据框  
      data, 
    # 横坐标,绘制的变量
      x,   
    # 纵坐标,引用数据需""
      y = "..density..", 
    # 逻辑值。默认值为FALSE。仅当y是包含要绘制的多个变量的向量时使用。
    # 如果为TRUE,则通过组合y变量的图来创建多面板图
      combine = FALSE, 
    # 逻辑或字符值。默认值为FALSE。仅当y是包含要绘制的多个变量的向量时使用。
    # 如果为TRUE,则在同一绘图区域中合并多个y变量。允许的值还包括"asis”(TRUE)和“ flip”。如果merge =“ flip”,则y变量用作x刻度标签,而x变量用作分组变量。
      merge = FALSE,  
    # 密度线颜色和填充颜色
      color = "black",                                      
      fill = NA,
    # 用于按组着色或填充的调色板
      palette = NULL,
    # 更改点和轮廓的大小
      size = NULL,
      linetype = "solid",
    # 透明度
      alpha = 0.5,
    # 标题及标签
      title = NULL,
      xlab = NULL,
      ylab = NULL,
    # 指定用于将图面划分为多个面板的分组变量
      facet.by = NULL,
      panel.labs = NULL,
      short.panel.labs = TRUE,
    # 允许的值是“平均值”或“中位数”之一(分别用于添加平均值或中位数线)
      add = c("none", "mean", "median"),
      add.params = list(linetype = "dashed"),
    # 辑值值,如果为TRUE,则添加边缘地毯。
      rug = FALSE,
      label = NULL,
    # 字体标签,一个可以包含以下元素的组合的列表:
    # 大小(例如:14),样式(例如:“纯”,“粗体”,“斜体”,“ bold.italic”)
    # 颜色(例如:“红色”)的标签。例如font.label = list(size = 14,face =“ bold”,颜色=“ red”)。
    # 若要仅指定大小和样式,请使用font.label = list(size = 14,face =“ plain”)
      font.label = list(size = 11, color = "black"),
      label.select = NULL,
    # 是否使用ggrepel避免过度绘制文本标签的逻辑值
      repel = FALSE,
      label.rectangle = FALSE,
      ggtheme = theme_pubr(),
      ...
    )
    
    1.2 自定义参数ggpar()

    使用函数ggpar()可以轻松自定义绘图, 阅读?ggpar进行更改:

    • 主标题和轴标签:main,xlab,ylab
    • 轴区间:xlim,ylim(例如:ylim = c(0,30))
    • 轴比例尺:xscale,yscale(例如:yscale ="log2")
    • 调色板:palette="Dark2"或调色板= c("gray","blue","red")
    • 图例标题,标签和位置:legend="right"
    • 绘图方向:orientation = c("vertical", "horizontal", "reverse")
    1.3 实例
    # Create some data format
    set.seed(1234)
    wdata = data.frame(
      sex = factor(rep(c("F", "M"), each=200)),
      weight = c(rnorm(200, 55), rnorm(200, 58)))
    
    head(wdata, 4)#>   sex   weight
    #> 1   F 53.79293
    #> 2   F 55.27743
    #> 3   F 56.08444
    #> 4   F 52.65430
    # Basic density plot
    # Add mean line and marginal rug
    p1 <- ggdensity(wdata, x = "weight", fill = "lightgray",
              add = "mean", rug = TRUE)
    p1
    #> Warning: geom_vline(): Ignoring `mapping` because `xintercept` was provided.#> Warning: geom_vline(): Ignoring `data` because `xintercept` was provided.
    # Change outline colors by groups ("sex")
    # Use custom palette
    p2 <- ggdensity(wdata, x = "weight",
              add = "mean", rug = TRUE,
              color = "sex", palette = c("#00AFBB", "#E7B800"))
    p2
    # Change outline and fill colors by groups ("sex")
    # Use custom palette
    p3 <- ggdensity(wdata, x = "weight",
              add = "mean", rug = TRUE,
              color = "sex", fill = "sex",
              palette = c("#00AFBB", "#E7B800"))
    p3
    p4 <- ggdensity(wdata, x = "weight",
                    add = "mean", rug = TRUE,
                    color = "sex", fill = "sex",
                    palette = c("#00AFBB", "#E7B800"),
                    facet.by = "sex")
    p4
    
    p1
    p2
    p3
    p4

    2. 叠加正常密度图 stat_overlay_normal_density

    在视觉上检查偏离正常程度的程度

    2.1 用法
    stat_overlay_normal_density(
    # 由aes()或aes_()创建的美学映射集。如果指定且继承.aes = TRUE(默认值),它将与绘图顶层的默认映射结合。如果没有绘图映射,则必须提供映射。  
    mapping = NULL,
      data = NULL,
      geom = "line",
      position = "identity",
      na.rm = FALSE,
      show.legend = NA,
      inherit.aes = TRUE,
      ...
    )
    
    2.2 举例
    # Simpledensity plot
    data("mtcars")
    p1 <- ggdensity(mtcars, x = "mpg", fill = "red") +
      scale_x_continuous(limits = c(-1, 50)) +
      stat_overlay_normal_density(color = "red", linetype = "dashed")
    p1
    # Color by groups
    data(iris)
    p2 <- ggdensity(iris, "Sepal.Length", color = "Species") +
      stat_overlay_normal_density(aes(color = Species), linetype = "dashed")
    p2
    # Facet
    p3 <- ggdensity(iris, "Sepal.Length", facet.by = "Species") +
      stat_overlay_normal_density(color = "red", linetype = "dashed")
    p3
    
    p1
    p2
    p3

    3.经验累积密度函数 Empirical cumulative density function

    样本分布函数(sample distribution function)亦称经验分布函数,统计学中的基本概念之一。样本分布函数Fn(x)具有分布函数的性质,我们可以将其看成是以等概率1/n 取值X1,X2,…,Xn的离散型随机变量的分布函数,且该函数的图形呈跳跃式一条台阶形折线,如观测值不重复,则每一跳跃为1/n ,如有重复,则按1/n的倍数跳跃上升。

    3.1 用法
    ggecdf(
      data,
      x,
      combine = FALSE,
      merge = FALSE,
      color = "black",
      palette = NULL,
      size = NULL,
      linetype = "solid",
      title = NULL,
      xlab = NULL,
      ylab = NULL,
      facet.by = NULL,
      panel.labs = NULL,
      short.panel.labs = TRUE,
      ggtheme = theme_pubr(),
      ...
    )
    
    
    3.2 举例
    # Create some data format
    set.seed(1234)
    wdata = data.frame(
      sex = factor(rep(c("F", "M"), each=200)),
      weight = c(rnorm(200, 55), rnorm(200, 58)))
    
    head(wdata, 4)#>   sex   weight
    #> 1   F 53.79293
    #> 2   F 55.27743
    #> 3   F 56.08444
    #> 4   F 52.65430
    # Basic ECDF plot
    p1 <- ggecdf(wdata, x = "weight")
    p1
    # Change colors and linetype by groups ("sex")
    # Use custom palette
    p2 <- ggecdf(wdata, x = "weight",
           color = "sex", linetype = "sex",
           palette = c("#00AFBB", "#E7B800"))
    p2
    
    
    p1
    p2

    4.直方图 Histogram plot

    4.1 用法
    gghistogram(
    # 数据,数据框  
      data, 
    # 横坐标,绘制的变量
      x,   
    # 纵坐标,引用数据需" "
      y = "..count..",
    # 逻辑值。默认值为FALSE。仅当y是包含要绘制的多个变量的向量时使用。
    # 如果为TRUE,则通过组合y变量的图来创建多面板图
      combine = FALSE,
    # 逻辑或字符值。默认值为FALSE。仅当y是包含要绘制的多个变量的向量时使用。
    # 如果为TRUE,则在同一绘图区域中合并多个y变量。允许的值还包括"asis”(TRUE)和“ flip”。如果merge =“ flip”,则y变量用作x刻度标签,而x变量用作分组变量。
      merge = FALSE,
      weight = NULL,
      color = "black",
      fill = NA,
    # 着色板,用于按组着色或填充的调色板
      palette = NULL,
      size = NULL,
      linetype = "solid",
      alpha = 0.5,
    # bin数默认为30。
      bins = NULL,
    # 指定箱宽的数值。当您有很强的密集点图时,请使用介于0和1之间的值。例如binwidth = 0.2。
      binwidth = NULL,
      title = NULL,
      xlab = NULL,
      ylab = NULL,
    # 长度为1或2的字符向量,指定用于将图面划分为多个面板的分组变量。应该在数据中
      facet.by = NULL,
    # 用于修改构面面板标签的一个或两个字符向量的列表。例如,panel.labs = list(sex = c(“ Male”,“ Female”))指定“ sex”变量的标签。对于两个分组变量,您可以使用例如panel.labs = list(sex = c(“ Male”,“ Female”),rx = c(“ Obs”,“ Lev”,“ Lev2”)))。
      panel.labs = NULL,
      short.panel.labs = TRUE,
      add = c("none", "mean", "median"),
    # 参数'add'的参数(颜色,大小,线型);例如:add.params = list(颜色=“红色”)。
      add.params = list(linetype = "dashed"),
      rug = FALSE,
      add_density = FALSE,
      label = NULL,
      font.label = list(size = 11, color = "black"),
      label.select = NULL,
      repel = FALSE,
      label.rectangle = FALSE,
      position = position_identity(),
      ggtheme = theme_pubr(),
      ...
    )
    
    4.2 自定义参数ggpar()

    使用函数ggpar()可以轻松自定义绘图, 阅读?ggpar进行更改:

    • 主标题和轴标签:main,xlab,ylab
    • 轴区间:xlim,ylim(例如:ylim = c(0,30))
    • 轴比例尺:xscale,yscale(例如:yscale ="log2")
    • 调色板:palette="Dark2"或调色板= c("gray","blue","red")
    • 图例标题,标签和位置:legend="right"
    • 绘图方向:orientation = c("vertical", "horizontal", "reverse")
    4.3 实例
    # Create some data format
    set.seed(1234)
    wdata = data.frame(
      sex = factor(rep(c("F", "M"), each=200)),
      weight = c(rnorm(200, 55), rnorm(200, 58)))
    
    head(wdata, 4)
    #>   sex   weight
    #> 1   F 53.79293
    #> 2   F 55.27743
    #> 3   F 56.08444
    #> 4   F 52.65430
    # Basic density plot
    # Add mean line and marginal rug
    p1 <- gghistogram(wdata, x = "weight", fill = "lightgray",
                add = "mean", rug = TRUE)
    p1
    #> Warning: Using `bins = 30` by default. Pick better value with the argument `bins`.#> Warning: geom_vline(): Ignoring `mapping` because `xintercept` was provided.#> Warning: geom_vline(): Ignoring `data` because `xintercept` was provided.
    # Change outline colors by groups ("sex")
    # Use custom color palette
    p2 <- gghistogram(wdata, x = "weight",
                add = "mean", rug = TRUE,
                color = "sex", palette = c("#00AFBB", "#E7B800"))
    p2
    #> Warning: Using `bins = 30` by default. Pick better value with the argument `bins`.
    # Change outline and fill colors by groups ("sex")
    # Use custom color palette
    p3 <- gghistogram(wdata, x = "weight",
                add = "mean", rug = TRUE,
                color = "sex", fill = "sex",
                palette = c("#00AFBB", "#E7B800"))
    p3
    #> Warning: Using `bins = 30` by default. Pick better value with the argument `bins`.
    
    
    # Combine histogram and density plots
    p4 <- gghistogram(wdata, x = "weight",
                add = "mean", rug = TRUE,
                fill = "sex", palette = c("#00AFBB", "#E7B800"),
                add_density = TRUE)
    p4
    #> Warning: Using `bins = 30` by default. Pick better value with the argument `bins`.
    # Weighted histogram
    p5 <- gghistogram(iris, x = "Sepal.Length", weight = "Petal.Length")
    p5
    #> Warning: Using `bins = 30` by default. Pick better value with the argument `bins`.
    
    
    p1
    p2
    p3
    p4
    p5

    5.分位数图示法 Quantile Quantile Plot

    统计学里Q-Q图(Q代表分位数)是一个概率图,用图形的方式比较两个概率分布,把他们的两个分位数放在一起比较。首先选好分位数间隔。图上的点(x,y)反映出其中一个第二个分布(y坐标)的分位数和与之对应的第一分布(x坐标)的相同分位数。因此,这条线是一条以分位数间隔为参数的曲线。如果两个分布相似,则该Q-Q图趋近于落在y=x线上。如果两分布线性相关,则点在Q-Q图上趋近于落在一条直线上,但不一定在y=x线上。Q-Q图可以用来可在分布的位置-尺度范畴上可视化的评估参数。
    从定义中可以看出Q-Q图主要用于检验数据分布的相似性,如果要利用Q-Q图来对数据进行正态分布的检验,则可以令x轴为正态分布的分位数,y轴为样本分位数,如果这两者构成的点分布在一条直线上,就证明样本数据与正态分布存在线性相关性,即服从正态分布。

    5.1 用法
    ggqqplot(
      data,
      x,
      combine = FALSE,
      merge = FALSE,
      color = "black",
      palette = NULL,
      size = NULL,
      shape = NULL,
      add = c("qqline", "none"),
      add.params = list(linetype = "solid"),
      conf.int = TRUE,
      conf.int.level = 0.95,
      title = NULL,
      xlab = NULL,
      ylab = NULL,
      facet.by = NULL,
      panel.labs = NULL,
      short.panel.labs = TRUE,
      ggtheme = theme_pubr(),
      ...
    )
    
    
    5.2 自定义参数ggpar()

    使用函数ggpar()可以轻松自定义绘图, 阅读?ggpar进行更改:

    • 主标题和轴标签:main,xlab,ylab
    • 轴区间:xlim,ylim(例如:ylim = c(0,30))
    • 轴比例尺:xscale,yscale(例如:yscale ="log2")
    • 调色板:palette="Dark2"或调色板= c("gray","blue","red")
    • 图例标题,标签和位置:legend="right"
    • 绘图方向:orientation = c("vertical", "horizontal", "reverse")
    5.3 实例
    # Create some data format
    set.seed(1234)
    wdata = data.frame(
      sex = factor(rep(c("F", "M"), each=200)),
      weight = c(rnorm(200, 55), rnorm(200, 58)))
    
    head(wdata, 4)#>   sex   weight
    #> 1   F 53.79293
    #> 2   F 55.27743
    #> 3   F 56.08444
    #> 4   F 52.65430
    # Basic QQ plot
    p1 <- ggqqplot(wdata, x = "weight")
    p1
    # Change colors and shape by groups ("sex")
    # Use custom palette
    p2 <- ggqqplot(wdata, x = "weight",
             color = "sex", palette = c("#00AFBB", "#E7B800"))
    p2
    
    
    p1
    p2

    Reference

    https://rpkgs.datanovia.com/ggpubr/reference/index.html
    http://www.sthda.com/english/articles/24-ggpubr-publication-ready-plots/

    相关文章

      网友评论

        本文标题:ggpubr002 绘制一个连续变量

        本文链接:https://www.haomeiwen.com/subject/gblljktx.html