美文网首页RR语言生物信息学与算法
ggplot2:方差分析多重比较标注显著字母

ggplot2:方差分析多重比较标注显著字母

作者: 周运来就是我 | 来源:发表于2018-12-16 11:27 被阅读142次

    赖江山老师在科学网分享了Francois Gillet编写的两个方差分析多重比较的函数 boxplert()和boxplerk()【来源Numerical Ecology with R (second Edition)】

    我看了一下出图的部分是用boxplot函数绘制的,作为一个ggplot2的爱好者自己尝试着用ggplot2把函数boxplert()重新写了一下。在重写的过程中收获几个问题:

    • X 轴如何按照给定的数据顺序而不是系统默认的编码顺序
    • 显著性标签怎么加,关键是位置放在哪
    • 绘图的颜色如何调整
    • 在ggplot中加annotate的方法,位置与字体的设置
    • 把ggplot2封装到函数里面合适吗?我觉得不写成函数会让出来的图更加易于调整,ggplot2 本来很灵活,写到函数里面反而限制了她的妖娆。

    现在我们分别来测试一下,为了演示X轴的摆列顺序我把InsectSprays数据集写出来打乱里面本来按顺序的分组信息。

    rm(list=ls())
    setwd("C:\\Users\\Administrator\\Desktop\\boxplot")
    library(agricolae)
    library(stats)
    data(InsectSprays)
    # InsectSprays<-InsectSprays
    # getwd()
    # InsectSprays<-write.csv(InsectSprays,"InsectSprays.csv")
    InsectSprays<-read.csv("InsectSprays.csv")
    ###
    library(agricolae)
    library(stats)
    

    先看看之前的函数boxplert()出图的效果:

    # 检验方差分析假设
    shapiro.test(resid(aov(InsectSprays$count ~ InsectSprays$spray)))
    boxplert(
      InsectSprays$count,
      InsectSprays$spray,
      ylab = "yield",
      xlab = "virus",
      bcol = "orange",
      p.adj = "holm"
    )
    

    输出:

    $`comparison`
           mean        se       sd min max  n group
    A 16.666667 1.7936476 6.213378   9  26 12     a
    B  2.083333 0.5701984 1.975225   0   7 12     b
    C 15.333333 1.2329647 4.271115   7  21 12     a
    D  3.500000 0.5000000 1.732051   1   6 12     b
    E 14.500000 1.3623732 4.719399   7  23 12     a
    F  4.916667 0.7225621 2.503028   2  12 12     b
    
    $p.value
    [1] 3.182584e-17
    
    

    再看新函数gglert()的效果:

    # 检验方差分析假设
    shapiro.test(resid(aov(InsectSprays$count ~ InsectSprays$spray)))
    gglert(InsectSprays,
           InsectSprays$count,
           InsectSprays$spray,
           yLab = "count",
           xLab = "spray",
           bcol = "bisque",
           p.adj = "holm",
           las = 1)
    

    输出:

    $`comparison`
           mean        se       sd min max median  n group
    E 14.500000 1.3623732 4.719399   7  23   14.0 12     a
    C 15.333333 1.2329647 4.271115   7  21   16.5 12     b
    B  2.083333 0.5701984 1.975225   0   7    1.5 12     a
    F  4.916667 0.7225621 2.503028   2  12    5.0 12     b
    D  3.500000 0.5000000 1.732051   1   6    3.0 12     a
    A 16.666667 1.7936476 6.213378   9  26   15.0 12     b
    
    $p.value
    [1] 3.182584e-17
    
    $plot
    

    至于非参数组间差异的Kruskal-Wallis检验出图的函数boxplerk()我就不再修改了,真的,我觉得写成函数得不偿失,不写成函数还好一些。

    赖老师科学网分享的boxplert()和boxplerk()函数
    #普通方差分析多重比较
    boxplert <-  function(X,
                          Y,
                          main = NULL,
                          xlab = NULL,
                          ylab = NULL,
                          bcol = "bisque",
                          p.adj = "none",
                          cexy = 1,
                          varwidth = TRUE,
                          las = 1,
                          paired = FALSE)
    {
      aa <- levels(as.factor(Y))
      an <- as.character(c(1:length(aa)))
      tt1 <- matrix(nrow = length(aa), ncol = 6)    
      for (i in 1:length(aa))
      {
        temp <- X[Y == aa[i]]
        tt1[i, 1] <- mean(temp, na.rm = TRUE)
        tt1[i, 2] <- sd(temp, na.rm = TRUE) / sqrt(length(temp))
        tt1[i, 3] <- sd(temp, na.rm = TRUE)
        tt1[i, 4] <- min(temp, na.rm = TRUE)
        tt1[i, 5] <- max(temp, na.rm = TRUE)
        tt1[i, 6] <- length(temp)
      }
      
      tt1 <- as.data.frame(tt1)
      row.names(tt1) <- aa
      colnames(tt1) <- c("mean", "se", "sd", "min", "max", "n")
      
      boxplot(
        X ~ Y,
        main = main,
        xlab = xlab,
        ylab = ylab,
        las = las,
        col = bcol,
        cex.axis = cexy,
        cex.lab = cexy,
        varwidth = varwidth
      )    
      require(agricolae)
      Yn <- factor(Y, labels = an)
      sig <- "ns"
      model <- aov(X ~ Yn)    
      if (paired == TRUE & length(aa) == 2)
      {
        coms <- t.test(X ~ Yn, paired = TRUE)
        pp <- coms$p.value
      }    else
      {
        pp <- anova(model)$Pr[1]
      }    
      if (pp <= 0.1)
        sig <- "."
      if (pp <= 0.05)
        sig <- "*"
      if (pp <= 0.01)
        sig <- "**"
      if (pp <= 0.001)
        sig <- "***"
      
      mtext(
        sig,
        side = 3,
        line = 0.5,
        adj = 0,
        cex = 2,
        font = 1
      )    
      if(pp <= 0.05) {
        comp <- LSD.test(model,
                         "Yn",
                         alpha = 0.05,
                         p.adj = p.adj,
                         group = TRUE)      # gror <- comp$groups[order(comp$groups$groups), ]
        # tt1$cld <- gror$M
        gror <- comp$groups[order(rownames(comp$groups)), ]
        tt1$group <- gror$groups
        mtext(
          tt1$group,
          side = 3,
          at = c(1:length(aa)),
          line = 0.5,
          cex = 1,
          font = 4
        )
      }
      list(comparison = tt1, p.value = pp)
      
    }
    
    ##非参数组间差异的Kruskal-Wallis检验
    boxplerk <-  function(X,
                          Y,
                          main = NULL,
                          xlab = NULL,
                          ylab = NULL,
                          bcol = "bisque",
                          p.adj = "none",
                          cexy = 1,
                          varwidth = TRUE,
                          las = 1,
                          paired = FALSE)
    {
      aa <- levels(as.factor(Y))
      an <- as.character(c(1:length(aa)))
      tt1 <- matrix(nrow = length(aa), ncol = 7)    
      for (i in 1:length(aa))
      {
        temp <- X[Y == aa[i]]
        tt1[i, 1] <- mean(temp, na.rm = TRUE)
        tt1[i, 2] <- sd(temp, na.rm = TRUE) / sqrt(length(temp))
        tt1[i, 3] <- sd(temp, na.rm = TRUE)
        tt1[i, 4] <- min(temp, na.rm = TRUE)
        tt1[i, 5] <- max(temp, na.rm = TRUE)
        tt1[i, 6] <- median(temp, na.rm = TRUE)
        tt1[i, 7] <- length(temp)
      }
      
      tt1 <- as.data.frame(tt1)
      row.names(tt1) <- aa
      colnames(tt1) <- c("mean", "se", "sd", "min", "max", "median", "n")
      
      boxplot(
        X ~ Y,
        main = main,
        xlab = xlab,
        ylab = ylab,
        las = las,
        col = bcol,
        cex.axis = cexy,
        cex.lab = cexy,
        varwidth = varwidth
      )    
      require(agricolae)
      Yn <- factor(Y, labels = an)
      comp <- kruskal(X, Yn, p.adj = p.adj)
      sig <- "ns"
      
      if (paired == TRUE & length(aa) == 2)
      {
        coms <- wilcox.test(X ~ Yn, paired = TRUE)
        pp <- coms$p.value
      }    else
      {
        pp <- comp$statistics$p.chisq
      }    
      if(pp <= 0.1)
        sig <- "."
      if(pp <= 0.05)
        sig <- "*"
      if(pp <= 0.01)
        sig <- "**"
      if(pp <= 0.001)
        sig <- "***"
      
      gror <- comp$groups[order(rownames(comp$groups)), ]
      tt1$rank <- gror$X
      tt1$group <- gror$groups
      mtext(
        sig,
        side = 3,
        line = 0.5,
        adj = 0,
        cex = 2,
        font = 1
      )   
      if(pp <= 0.1)
        mtext(
          tt1$group,
          side = 3,
          at = c(1:length(aa)),
          line = 0.5,
          cex = 1,
          font = 4
        )
      
      list(comparison = tt1, p.value = pp)
      
    }
    
    我根据boxplert()修改的函数gglert()
    gglert <-function(data,
                      X,
                      Y,
                      main = NULL,
                      xLab = NULL,
                      yLab = NULL,
                      bcol = "bisque",
                      p.adj = "none",
                      cexy = 1,
                      varwidth = TRUE,
                      las = 1,
                      paired = FALSE){
    library(ggplot2)
    #names(mydata)=c("Group","count","color")
    Y = factor(Y,levels =  unique(Y))
    #color = unique(as.vector(mydata$color))
    #color=(palette(rainbow(100)))[1:length(unique(Y))]
    color=heat.colors(length(unique(Y)))
    
    p<-ggplot(data=data,aes(Y,X,aes(Y,X,fill=unique(Y))))+
      geom_boxplot(fill=color)+theme(axis.text.x=element_text(angle=45,hjust=1))+labs(x=xLab,y=yLab)+#
      theme_light()#+
      #stat_summary(geom = 'text', label = labdata$groups, fun.y = max , vjust = -0.5
    require(agricolae)
    aa <- levels(factor(Y, levels=unique(Y)))#levels(as.factor(Y))
    #an <- as.character(c(1:length(aa)))
    
    tt1 <- matrix(nrow = length(aa), ncol = 7)
    for (i in 1:length(aa)) {
      temp <- X[Y == aa[i]]
      tt1[i, 1] <- mean(temp, na.rm = TRUE)
      tt1[i, 2] <- sd(temp, na.rm = TRUE)/sqrt(length(temp))
      tt1[i, 3] <- sd(temp, na.rm = TRUE)
      tt1[i, 4] <- min(temp, na.rm = TRUE)
      tt1[i, 5] <- max(temp, na.rm = TRUE)
      tt1[i, 6] <- median(temp, na.rm = TRUE)
      tt1[i, 7] <- length(temp)
    }
    tt1 <- as.data.frame(tt1)
    row.names(tt1) <- aa
    colnames(tt1) <- c("mean", "se", "sd", "min", "max", "median", "n")
    Yn <- factor(Y)
    model <- aov(X ~ Yn)
    if (paired == TRUE & length(aa) == 2)
    {
      coms <- t.test(X ~ Yn, paired = TRUE)
      pp <- coms$p.value
    }    else
    {
      pp <- anova(model)$Pr[1]
    }    
    
    if (pp <= 0.05) {
      comp <- LSD.test(model,
                       "Yn",
                       alpha = 0.05,
                       p.adj = p.adj,
                       group = TRUE)
      labdata<- comp$groups
      labdata$name<-row.names(labdata)
      labdata<-labdata[match(unique(Y),labdata$name),]
      gror <- comp$groups[order(rownames(comp$groups)), ]
      tt1$group <- gror$groups
      
      p=p+stat_summary(geom = 'text', label = labdata$groups, fun.y = max , vjust = -0.5)
    }
    p=p+annotate("text",x=-Inf,y=Inf,hjust=-0.5,vjust=max(X)*0.05,label= paste("p=",sprintf("%.3f",pp),sep = ""),fontface = "italic",family = "Arial",size = 4)
    
    list(comparison = tt1, p.value = pp,plot=p)
    
    }
    
    
    

    参考
    方差分析多重比较出图

    相关文章

      网友评论

        本文标题:ggplot2:方差分析多重比较标注显著字母

        本文链接:https://www.haomeiwen.com/subject/zevtkqtx.html