赖江山老师在科学网分享了Francois Gillet编写的两个方差分析多重比较的函数 boxplert()和boxplerk()【来源Numerical Ecology with R (second Edition)】
我看了一下出图的部分是用boxplot函数绘制的,作为一个ggplot2的爱好者自己尝试着用ggplot2把函数boxplert()重新写了一下。在重写的过程中收获几个问题:
- X 轴如何按照给定的数据顺序而不是系统默认的编码顺序
- 显著性标签怎么加,关键是位置放在哪
- 绘图的颜色如何调整
- 在ggplot中加annotate的方法,位置与字体的设置
- 把ggplot2封装到函数里面合适吗?我觉得不写成函数会让出来的图更加易于调整,ggplot2 本来很灵活,写到函数里面反而限制了她的妖娆。
现在我们分别来测试一下,为了演示X轴的摆列顺序我把InsectSprays数据集写出来打乱里面本来按顺序的分组信息。
rm(list=ls())
setwd("C:\\Users\\Administrator\\Desktop\\boxplot")
library(agricolae)
library(stats)
data(InsectSprays)
# InsectSprays<-InsectSprays
# getwd()
# InsectSprays<-write.csv(InsectSprays,"InsectSprays.csv")
InsectSprays<-read.csv("InsectSprays.csv")
###
library(agricolae)
library(stats)
先看看之前的函数boxplert()出图的效果:
# 检验方差分析假设
shapiro.test(resid(aov(InsectSprays$count ~ InsectSprays$spray)))
boxplert(
InsectSprays$count,
InsectSprays$spray,
ylab = "yield",
xlab = "virus",
bcol = "orange",
p.adj = "holm"
)
输出:
$`comparison`
mean se sd min max n group
A 16.666667 1.7936476 6.213378 9 26 12 a
B 2.083333 0.5701984 1.975225 0 7 12 b
C 15.333333 1.2329647 4.271115 7 21 12 a
D 3.500000 0.5000000 1.732051 1 6 12 b
E 14.500000 1.3623732 4.719399 7 23 12 a
F 4.916667 0.7225621 2.503028 2 12 12 b
$p.value
[1] 3.182584e-17
再看新函数gglert()的效果:
# 检验方差分析假设
shapiro.test(resid(aov(InsectSprays$count ~ InsectSprays$spray)))
gglert(InsectSprays,
InsectSprays$count,
InsectSprays$spray,
yLab = "count",
xLab = "spray",
bcol = "bisque",
p.adj = "holm",
las = 1)
输出:
$`comparison`
mean se sd min max median n group
E 14.500000 1.3623732 4.719399 7 23 14.0 12 a
C 15.333333 1.2329647 4.271115 7 21 16.5 12 b
B 2.083333 0.5701984 1.975225 0 7 1.5 12 a
F 4.916667 0.7225621 2.503028 2 12 5.0 12 b
D 3.500000 0.5000000 1.732051 1 6 3.0 12 a
A 16.666667 1.7936476 6.213378 9 26 15.0 12 b
$p.value
[1] 3.182584e-17
$plot
至于非参数组间差异的Kruskal-Wallis检验出图的函数boxplerk()我就不再修改了,真的,我觉得写成函数得不偿失,不写成函数还好一些。
赖老师科学网分享的boxplert()和boxplerk()函数
#普通方差分析多重比较
boxplert <- function(X,
Y,
main = NULL,
xlab = NULL,
ylab = NULL,
bcol = "bisque",
p.adj = "none",
cexy = 1,
varwidth = TRUE,
las = 1,
paired = FALSE)
{
aa <- levels(as.factor(Y))
an <- as.character(c(1:length(aa)))
tt1 <- matrix(nrow = length(aa), ncol = 6)
for (i in 1:length(aa))
{
temp <- X[Y == aa[i]]
tt1[i, 1] <- mean(temp, na.rm = TRUE)
tt1[i, 2] <- sd(temp, na.rm = TRUE) / sqrt(length(temp))
tt1[i, 3] <- sd(temp, na.rm = TRUE)
tt1[i, 4] <- min(temp, na.rm = TRUE)
tt1[i, 5] <- max(temp, na.rm = TRUE)
tt1[i, 6] <- length(temp)
}
tt1 <- as.data.frame(tt1)
row.names(tt1) <- aa
colnames(tt1) <- c("mean", "se", "sd", "min", "max", "n")
boxplot(
X ~ Y,
main = main,
xlab = xlab,
ylab = ylab,
las = las,
col = bcol,
cex.axis = cexy,
cex.lab = cexy,
varwidth = varwidth
)
require(agricolae)
Yn <- factor(Y, labels = an)
sig <- "ns"
model <- aov(X ~ Yn)
if (paired == TRUE & length(aa) == 2)
{
coms <- t.test(X ~ Yn, paired = TRUE)
pp <- coms$p.value
} else
{
pp <- anova(model)$Pr[1]
}
if (pp <= 0.1)
sig <- "."
if (pp <= 0.05)
sig <- "*"
if (pp <= 0.01)
sig <- "**"
if (pp <= 0.001)
sig <- "***"
mtext(
sig,
side = 3,
line = 0.5,
adj = 0,
cex = 2,
font = 1
)
if(pp <= 0.05) {
comp <- LSD.test(model,
"Yn",
alpha = 0.05,
p.adj = p.adj,
group = TRUE) # gror <- comp$groups[order(comp$groups$groups), ]
# tt1$cld <- gror$M
gror <- comp$groups[order(rownames(comp$groups)), ]
tt1$group <- gror$groups
mtext(
tt1$group,
side = 3,
at = c(1:length(aa)),
line = 0.5,
cex = 1,
font = 4
)
}
list(comparison = tt1, p.value = pp)
}
##非参数组间差异的Kruskal-Wallis检验
boxplerk <- function(X,
Y,
main = NULL,
xlab = NULL,
ylab = NULL,
bcol = "bisque",
p.adj = "none",
cexy = 1,
varwidth = TRUE,
las = 1,
paired = FALSE)
{
aa <- levels(as.factor(Y))
an <- as.character(c(1:length(aa)))
tt1 <- matrix(nrow = length(aa), ncol = 7)
for (i in 1:length(aa))
{
temp <- X[Y == aa[i]]
tt1[i, 1] <- mean(temp, na.rm = TRUE)
tt1[i, 2] <- sd(temp, na.rm = TRUE) / sqrt(length(temp))
tt1[i, 3] <- sd(temp, na.rm = TRUE)
tt1[i, 4] <- min(temp, na.rm = TRUE)
tt1[i, 5] <- max(temp, na.rm = TRUE)
tt1[i, 6] <- median(temp, na.rm = TRUE)
tt1[i, 7] <- length(temp)
}
tt1 <- as.data.frame(tt1)
row.names(tt1) <- aa
colnames(tt1) <- c("mean", "se", "sd", "min", "max", "median", "n")
boxplot(
X ~ Y,
main = main,
xlab = xlab,
ylab = ylab,
las = las,
col = bcol,
cex.axis = cexy,
cex.lab = cexy,
varwidth = varwidth
)
require(agricolae)
Yn <- factor(Y, labels = an)
comp <- kruskal(X, Yn, p.adj = p.adj)
sig <- "ns"
if (paired == TRUE & length(aa) == 2)
{
coms <- wilcox.test(X ~ Yn, paired = TRUE)
pp <- coms$p.value
} else
{
pp <- comp$statistics$p.chisq
}
if(pp <= 0.1)
sig <- "."
if(pp <= 0.05)
sig <- "*"
if(pp <= 0.01)
sig <- "**"
if(pp <= 0.001)
sig <- "***"
gror <- comp$groups[order(rownames(comp$groups)), ]
tt1$rank <- gror$X
tt1$group <- gror$groups
mtext(
sig,
side = 3,
line = 0.5,
adj = 0,
cex = 2,
font = 1
)
if(pp <= 0.1)
mtext(
tt1$group,
side = 3,
at = c(1:length(aa)),
line = 0.5,
cex = 1,
font = 4
)
list(comparison = tt1, p.value = pp)
}
我根据boxplert()修改的函数gglert()
gglert <-function(data,
X,
Y,
main = NULL,
xLab = NULL,
yLab = NULL,
bcol = "bisque",
p.adj = "none",
cexy = 1,
varwidth = TRUE,
las = 1,
paired = FALSE){
library(ggplot2)
#names(mydata)=c("Group","count","color")
Y = factor(Y,levels = unique(Y))
#color = unique(as.vector(mydata$color))
#color=(palette(rainbow(100)))[1:length(unique(Y))]
color=heat.colors(length(unique(Y)))
p<-ggplot(data=data,aes(Y,X,aes(Y,X,fill=unique(Y))))+
geom_boxplot(fill=color)+theme(axis.text.x=element_text(angle=45,hjust=1))+labs(x=xLab,y=yLab)+#
theme_light()#+
#stat_summary(geom = 'text', label = labdata$groups, fun.y = max , vjust = -0.5
require(agricolae)
aa <- levels(factor(Y, levels=unique(Y)))#levels(as.factor(Y))
#an <- as.character(c(1:length(aa)))
tt1 <- matrix(nrow = length(aa), ncol = 7)
for (i in 1:length(aa)) {
temp <- X[Y == aa[i]]
tt1[i, 1] <- mean(temp, na.rm = TRUE)
tt1[i, 2] <- sd(temp, na.rm = TRUE)/sqrt(length(temp))
tt1[i, 3] <- sd(temp, na.rm = TRUE)
tt1[i, 4] <- min(temp, na.rm = TRUE)
tt1[i, 5] <- max(temp, na.rm = TRUE)
tt1[i, 6] <- median(temp, na.rm = TRUE)
tt1[i, 7] <- length(temp)
}
tt1 <- as.data.frame(tt1)
row.names(tt1) <- aa
colnames(tt1) <- c("mean", "se", "sd", "min", "max", "median", "n")
Yn <- factor(Y)
model <- aov(X ~ Yn)
if (paired == TRUE & length(aa) == 2)
{
coms <- t.test(X ~ Yn, paired = TRUE)
pp <- coms$p.value
} else
{
pp <- anova(model)$Pr[1]
}
if (pp <= 0.05) {
comp <- LSD.test(model,
"Yn",
alpha = 0.05,
p.adj = p.adj,
group = TRUE)
labdata<- comp$groups
labdata$name<-row.names(labdata)
labdata<-labdata[match(unique(Y),labdata$name),]
gror <- comp$groups[order(rownames(comp$groups)), ]
tt1$group <- gror$groups
p=p+stat_summary(geom = 'text', label = labdata$groups, fun.y = max , vjust = -0.5)
}
p=p+annotate("text",x=-Inf,y=Inf,hjust=-0.5,vjust=max(X)*0.05,label= paste("p=",sprintf("%.3f",pp),sep = ""),fontface = "italic",family = "Arial",size = 4)
list(comparison = tt1, p.value = pp,plot=p)
}
参考
方差分析多重比较出图
网友评论