1. 简介
误差线(Error bars)是数据变异性的图形显示,用于表明被测值的误差或不确定性。它指示了被测值的准确性、或被测值和真实值的差异性。可用来表示standard deviation、standard error 或 confidence interval,由于它们的值不尽相同,所以需要在图中特别说明。
标准差(sd)是描述性统计里用来表示数据本身均值范围的,sd越小,表明数据越集中在均值附近。标准差与均数结合估计参考值范围,计算变异系数,计算标准误。标准差(sd)不牵扯均值对比推测,仅仅是描述性的。
标准误(se)表示样本平均数对总体平均数的变异程度,反映抽样误差的大小;标准误(se)用于预测样本数据准确性 ,标准误越小,样本均值和总体均值差距越小,样本数据越能代表总体数据。
2. 作图
基于ggplot2的扩展包,ggpubr
和ggsignif 可以很简便的做出相应的图,但代价是作图过程中很多内部数据变换过程被封装起来了,不利于学习。
require(tidyverse)
#以species为分组变量,求petal.wdith 的sd,se和ci
conf.interval = 0.95
data <- iris %>%
group_by(Species) %>%
summarise(mean = mean(Petal.Width, na.rm = T),
sd = sd(Petal.Width, na.rm = T),
N = length(Petal.Width),
se = sd/sqrt(N),
ciMult = qt(conf.interval/2 + 0.5, N - 1),
ci = se * ciMult)
#不做统计变换的柱状图
col_plot <- ggplot() +
geom_col(data = data, aes(x = Species,
y = mean, fill = Species), width = 0.5)
#误差线
p <- col_plot + geom_errorbar(data = data, aes(x = Species,
ymin = mean-se,
ymax = mean + se,
group = Species,
width = 0.2),
position = position_dodge(width = 0.8))
col_plot.png
例2
# 计算均值和sd
require(tidyverse)
set.seed(13)
set.seed(13)
data <- diamonds %>%
sample_n(1000)
sum_data <- data %>%
group_by(cut) %>%
summarise(
mean_price = mean(price ),
sd_price = sd(price ))
# 作图
p <- ggplot(data = sum_data, aes(x = cut, y = mean_price)) +
geom_col(aes(fill = cut), color = "black", width = 0.85) +
geom_errorbar(aes(ymin = mean_price - sd_price,
ymax = mean_price + sd_price),
color = "#22292F",
width = .1) +
labs(
y = "Mean Price",
title = "Mean price in Different cuts",
caption = "Error bars indicate standard deviations"
)
Rplot.png
方差分析
用agricolae进行多重比较。
require(agricolae)
cut.aov <- aov(price ~ cut, data =data)
LSD <- LSD.test(cut.aov, "cut", p.adj="bonferroni")$groups
cut <- rownames(LSD)
LSD <- cbind(cut, LSD)
data_c <- merge(sum_data, LSD, by = "cut")
p + geom_text(data = data_c , aes(x = cut,
y = 2*mean_price + sd_price,
label = groups),
position = position_dodge(0.9),
size = 5, fontface = "bold")+
labs(
caption = "Barchart with Significance Tests"
)
Rplot01.png
两两比较
# 跟据最高y设定连线坐标
sign <- tibble(
x = c("Fair", "Fair", "Very Good", "Very Good"),
y = c(8200,8400,8400,8200))
p + geom_line(data =sign,
aes(x = x, y = y, group = 1)) +
annotate("text", x = 2, y = 8600, # 跟据x,y设定星号坐标
# x = 2, 来源于cut 转换为了levels
label = "***",
size = 8, color = "#22292F")
Rplot02.png
网友评论