美文网首页
R语言可视化3: 森林图-forestplot/forestpl

R语言可视化3: 森林图-forestplot/forestpl

作者: 小程的学习笔记 | 来源:发表于2023-02-19 11:02 被阅读0次

1. 使用\color{green}{forestplot}包绘制森林图

1.1 基本用法

# 安装并加载所需的R包
# install.packages("devtools")
# devtools::install_github("NightingaleHealth/ggforestplot")
# install.packages("tidyverse")

library(ggforestplot)
library(tidyverse)

# 载入示例数据,并过滤示例前30个生物标志物与BMI的关联
df <-
  ggforestplot::df_linear_associations %>%
  filter(
    trait == "BMI", # Nightingale血清生物标志物的名称,有BMI, log(HOMA-IR) 或 fasting glucose
    dplyr::row_number() <= 30
  )

# 绘制简单的森林图
ggforestplot::forestplot(
  df = df,
  name = name, # Nightingale血清生物标志物的名称
  estimate = beta, # 回归系数
  se = se, # 标准偏差
  pvalue = pvalue,
  psignif = 0.002, # 假设共同的显着性阈值𝛼=0.05,30个测试是独立的,每个单独假设的Bonferroni校正是𝛼=0.05/30≈0.002,不显著的点显示为空心
  xlab = "1-SD increment in BMI\nper 1-SD increment in biomarker concentration", #  添加x轴标签
  title = "Associations of blood biomarkers to BMI" # 添加标题
)
forestplot-1

1.2 比较多个特征

# 提取生物标志物名称
selected_bmrs <- df %>% pull(name)

# 过滤上述生物标志物和所有三个特征的演示数据集:
# BMI, HOMA-IR and fasting glucose
df_compare_traits <-
  ggforestplot::df_linear_associations %>%
  filter(name %in% selected_bmrs) %>%
  # 将类设置为因子以设置显示顺序.
  mutate(
    trait = factor(
      trait,
      levels = c("BMI", "HOMA-IR", "Fasting glucose")
    )
  )

# 绘图
ggforestplot::forestplot(
  df = df_compare_traits,
  estimate = beta,
  pvalue = pvalue,
  psignif = 0.002,
  xlab = "1-SD increment in cardiometabolic trait\nper 1-SD increment in biomarker concentration",
  title = "Biomarker associations to metabolic traits",
  colour = trait
)
forestplot-2

1.3 对生物标志物进行分组

# Install and attach the ggforce library
# install.packages("ggforce")
library(ggforce)

# 过滤包含组的 df_NG_biomarker_metadata,仅针对正在讨论的 30 种生物标志物
df_grouping <-
  df_NG_biomarker_metadata %>%
  filter(name %in% df_compare_traits$name)

# 加入关联数据框 df_compare_traits 与组数据
df_compare_traits_groups <-
  df_compare_traits %>%
  dplyr::right_join(., df_grouping, by = "name") %>%
  dplyr::mutate(
    group = factor(.data$group, levels = unique(.data$group))
  )

# 绘图
forestplot(
  df = df_compare_traits_groups,
  estimate = beta,
  pvalue = pvalue,
  psignif = 0.002,
  xlab = "1-SD increment in cardiometabolic trait\nper 1-SD increment in biomarker concentration",
  colour = trait
) +
  ggforce::facet_col(
    facets = ~group,
    scales = "free_y",
    space = "free"
  )
forestplot-3

2. 使用\color{green}{forestploter}包绘制森林图

2.1 基本用法

# 安装并加载所需的R包
# install.packages("forestploter")
# install.packages("devtools")
# devtools::install_github("adayim/forestploter")

dt <- read.csv(system.file("extdata", "example_data.csv", package = "forestploter"))

# 如果placebo列中有数字,则缩进形成子列
dt$Subgroup <- ifelse(is.na(dt$Placebo), 
                      dt$Subgroup,
                      paste0("   ", dt$Subgroup))

# 将NA值变为空白
dt$Treatment <- ifelse(is.na(dt$Treatment), "", dt$Treatment)
dt$Placebo <- ifelse(is.na(dt$Placebo), "", dt$Placebo)
dt$se <- (log(dt$hi) - log(dt$est))/1.96

# 添加空白列以显示 CI
# 用空格调整列宽
dt$` ` <- paste(rep(" ", 20), collapse = " ")

# 创建要显示的置信区间列,"%.2f"意为保留小数点后两位精度
dt$`HR (95% CI)` <- ifelse(is.na(dt$se), "",
                           sprintf("%.2f (%.2f to %.2f)",
                                   dt$est, dt$low, dt$hi))

# 定义简单的主题
tm <- forest_theme(base_size = 10,
                   refline_col = "red",
                   arrow_type = "closed",
                   footnote_col = "blue")

p <- forest(dt[,c(1:3, 20:21)],
            est = dt$est,
            lower = dt$low, 
            upper = dt$hi,
            sizes = dt$se,
            ci_column = 4,
            ref_line = 1,
            arrow_lab = c("Placebo Better", "Treatment Better"),
            xlim = c(0, 4),
            ticks_at = c(0.5, 1, 2, 3),
            footnote = "This is the demo data. Please feel free to change\nanything you want.",
            theme = tm)

# 绘图
plot(p)
forestploter-1.png

2.2 其他参数设置

dt_tmp <- rbind(dt[-1, ], dt[1, ])
dt_tmp[nrow(dt_tmp), 1] <- "Overall"

tm <- forest_theme(base_size = 10,  #文本的大小
                   # Confidence interval point shape, line type/color/width
                   ci_pch = 15,   # 可信区间点的形状
                   ci_col = "#762a83",    # CI的颜色
                   ci_fill = "blue",     # CI中se点的颜色填充
                   ci_alpha = 0.8,        # CI透明度
                   ci_lty = 1,            # CI的线型
                   ci_lwd = 1.5,          # CI的线宽
                   ci_Theight = 0.2, # Set an T end at the end of CI  CI的高度,默认是NULL
                   # Reference line width/type/color   参考线默认的参数,中间的竖的虚线
                   refline_lwd = 1,       #中间的竖的虚线
                   refline_lty = "dashed",
                   refline_col = "grey20",
                   # Vertical line width/type/color  垂直线宽/类型/颜色   可以添加一条额外的垂直线,如果没有就不显示
                   vertline_lwd = 1,              #可以添加一条额外的垂直线,如果没有就不显示
                   vertline_lty = "dashed",
                   vertline_col = "grey20",
                   # Change summary color for filling and borders   更改填充和边框的摘要颜色
                   summary_fill = "yellow",       #汇总部分大菱形的颜色
                   summary_col = "#4575b4",
                   # Footnote font size/face/color  脚注字体大小/字体/颜色
                   footnote_cex = 0.6,
                   footnote_fontface = "italic",
                   footnote_col = "red")


pt <- forest(dt_tmp[,c(1:3, 20:21)],
             est = dt_tmp$est,
             lower = dt_tmp$low, 
             upper = dt_tmp$hi,
             sizes = dt_tmp$se,
             is_summary = c(rep(FALSE, nrow(dt_tmp)-1), TRUE),
             ci_column = 4,
             ref_line = 1,
             arrow_lab = c("Placebo Better", "Treatment Better"),
             xlim = c(0, 4),
             ticks_at = c(0.5, 1, 2, 3),
             footnote = "This is the demo data. Please feel free to change\nanything you want.",
             theme = tm)

plot(pt)
forestploter-2

2.3.1 多组CI - 简单绘制

dt <- read.csv(system.file("extdata", "example_data.csv", package = "forestploter"))
# indent the subgroup if there is a number in the placebo column
dt$Subgroup <- ifelse(is.na(dt$Placebo), 
                      dt$Subgroup,
                      paste0("   ", dt$Subgroup))
# 将 NA 转为空白或转换为对应字符
dt$n1 <- ifelse(is.na(dt$Treatment), "", dt$Treatment)
dt$n2 <- ifelse(is.na(dt$Placebo), "", dt$Placebo)
# 为 CI 添加两个空白列
dt$`CVD outcome` <- paste(rep(" ", 20), collapse = " ")
dt$`COPD outcome` <- paste(rep(" ", 20), collapse = " ")
# 设置主题
tm <- forest_theme(base_size = 10,
                   refline_lty = "solid",
                   ci_pch = c(15, 18),
                   ci_col = c("#377eb8", "#4daf4a"),
                   footnote_col = "blue",
                   legend_name = "Group",
                   legend_value = c("Trt 1", "Trt 2"),
                   vertline_lty = c("dashed", "dotted"),
                   vertline_col = c("#d6604d", "#bababa"))
p <- forest(dt[,c(1, 19, 21, 20, 22)],
            est = list(dt$est_gp1,
                       dt$est_gp2,
                       dt$est_gp3,
                       dt$est_gp4),
            lower = list(dt$low_gp1,
                         dt$low_gp2,
                         dt$low_gp3,
                         dt$low_gp4), 
            upper = list(dt$hi_gp1,
                         dt$hi_gp2,
                         dt$hi_gp3,
                         dt$hi_gp4),
            ci_column = c(3, 5),
            ref_line = 1,
            vert_line = c(0.5, 2),
            nudge_y = 0.2,
            theme = tm)
plot(p) # 若提供的est,lower和upper大于绘制 CI 的列数,则est,lower和upper 将被重用。故,est_gp1和est_gp2被视为第 1 组,est_gp3和est_gp4被视为第 2 组
forestploter-3

2.3.1 多组CI - 不同的列进行不同的设置

dt$`HR (95% CI)` <- ifelse(is.na(dt$est_gp1), "",
                           sprintf("%.2f (%.2f to %.2f)",
                                   dt$est_gp1, dt$low_gp1, dt$hi_gp1))
dt$`Beta (95% CI)` <- ifelse(is.na(dt$est_gp2), "",
                             sprintf("%.2f (%.2f to %.2f)",
                                     dt$est_gp2, dt$low_gp2, dt$hi_gp2))
tm <- forest_theme(arrow_type = "closed",
                   arrow_label_just = "end")
p <- forest(dt[,c(1, 21, 23, 22, 24)],
            est = list(dt$est_gp1,
                       dt$est_gp2),
            lower = list(dt$low_gp1,
                         dt$low_gp2), 
            upper = list(dt$hi_gp1,
                         dt$hi_gp2),
            ci_column = c(2, 4),
            ref_line = c(1, 0),
            vert_line = list(c(0.3, 1.4), c(0.6, 2)),
            x_trans = c("log", "none"),
            arrow_lab = list(c("L1", "R1"), c("L2", "R2")),
            xlim = list(c(0, 3), c(-1, 3)),
            ticks_at = list(c(0.1, 0.5, 1, 2.5), c(-1, 0, 2)),
            xlab = c("OR", "Beta"),
            nudge_y = 0.2,
            theme = tm)
plot(p)
forestploter-4

参考:

  1. https://nightingalehealth.github.io/ggforestplot/articles/ggforestplot.html
  2. https://github.com/adayim/forestploter

相关文章

网友评论

      本文标题:R语言可视化3: 森林图-forestplot/forestpl

      本文链接:https://www.haomeiwen.com/subject/rtpwhdtx.html