美文网首页
R语言小记1:ggplot批量画图(geom_text, geo

R语言小记1:ggplot批量画图(geom_text, geo

作者: 小程的学习笔记 | 来源:发表于2024-01-03 12:03 被阅读0次

    1. 数据准备

    # 加载所需的R包
    library(reshape2)
    library(dplyr)
    library(ggpubr)
    library(FSA)
    
    # 读取文件
    data_sample <- read.delim("/data/shumin/test/predictions_ko.L3.xls") # 样本信息
    data_sample[1:3, 1:8]
    ##   Pathway_level1          Pathway_level2 Pathway_level3           Level3_description        N11         N12         N14         N15
    ## 1     Metabolism Carbohydrate metabolism        ko00010 Glycolysis / Gluconeogenesis 0.01438271 0.015587858 0.014518278 0.015202541
    ## 2     Metabolism Carbohydrate metabolism        ko00020    Citrate cycle (TCA cycle) 0.00394630 0.003274079 0.003909439 0.003349509
    ## 3     Metabolism Carbohydrate metabolism        ko00030    Pentose phosphate pathway 0.01052230 0.010446724 0.010254249 0.010372544
    
    data_group <- read.delim("/data/shumin/test/group.txt") # 分组信息
    head(data_group)
    ##   Sample  Group
    ## 1    N11 Normal
    ## 2    N12 Normal
    ## 3    N14 Normal
    ## 4    N15 Normal
    ## 5    N17 Normal
    ## 6    N18 Normal
    
    Data <- as.data.frame(t(data_sample[,c(4:23)])) # 提取需要的数据,4-23列(样本)
    colnames(Data) <- Data[1,] # 将第一行变为列名
    Data_2 <- Data[-1, ] %>% mutate(Sample = row.names(.))  %>% merge(., data_group, by = "Sample") # 去除第一行并和分组信息合并
    str(Data_2)
    ## 'data.frame':    19 obs. of  264 variables:
    ##  $ Sample                                                                 : chr  "N11" "N12" "N14" "N15" ...
    ##  $ Glycolysis / Gluconeogenesis                                           : chr  "1.438271e-02" "1.558786e-02" "1.451828e-02" "1.520254e-02" ...
    ##  $ Citrate cycle (TCA cycle)                                              : chr  "3.946300e-03" "3.274079e-03" "3.909439e-03" "3.349509e-03" ...
    ##  $ Pentose phosphate pathway                                              : chr  "1.052230e-02" "1.044672e-02" "1.025425e-02" "1.037254e-02" ...
    ##  $ Pentose and glucuronate interconversions                               : chr  "5.962178e-03" "5.686224e-03" "5.268724e-03" "5.778111e-03" ...
    ##  $ Fructose and mannose metabolism                                        : chr  "1.653927e-02" "1.731108e-02" "1.595058e-02" "1.714072e-02" ...
    ## ......
    
    Data_2[2:263] <- lapply(Data_2[2:263], as.numeric) # 将character数据格式改为numeric
    str(Data_2)
    ##  'data.frame':   19 obs. of  264 variables:
    ##   $ Sample                                                                 : chr  "N11" "N12" "N14" "N15" ...
    ##   $ Glycolysis / Gluconeogenesis                                           : num  0.0144 0.0156 0.0145 0.0152 0.0146 ...
    ##   $ Citrate cycle (TCA cycle)                                              : num  0.00395 0.00327 0.00391 0.00335 0.00381 ...
    ##   $ Pentose phosphate pathway                                              : num  0.0105 0.0104 0.0103 0.0104 0.0105 ...
    ##   $ Pentose and glucuronate interconversions                               : num  0.00596 0.00569 0.00527 0.00578 0.00606 ...
    ##   $ Fructose and mannose metabolism                                        : num  0.0165 0.0173 0.016 0.0171 0.0169 ...
    ##  ......
    

    2. 图形绘制

    # 设置错误处理机制
    options(error = recover)
    
    for (i in 2:(length(colnames(Data_2))-1)) { # 最后一列为分组信息
      tryCatch({
      test <- Data_2[,c(1, i)]
      test$Group<- factor(Data_2$Group, levels = c("Normal","Mild","Severe")) # 修改因子水平,方便后续绘图时按照指定的因子水平顺序进行绘制
      dunn_test <- dunnTest(test[,2] ~ Group, method = "bonferroni", data = test) # Dunn's test(邓恩检验),多重比较分析,用于比较多个群组之间的差异
      # 添加文本标注
      label_data <- data.frame(
        x = c(1.5, 2.5, 2), # 添加文本的x轴位置
        y = rep(c(max(test[,2]) + 0.001,  max(test[,2]) + 0.0015, max(test[,2]) + 0.002)), # 添加文本的y轴位置
        label = sprintf("p = %.4f", dunn_test[["res"]]$P.adj) # 文本保留4位小数
      )
      # 添加线段
      line_data <- data.frame(
        x = c(1, 2, 1), # 线段x轴的起始位置
        y = rep(c(max(test[,2]) + 0.0007, max(test[,2]) + 0.0012, max(test[,2]) + 0.0017)), # 线段y轴的起始位置
        xend = c(2, 3, 3), # 线段x轴的终止位置
        yend = rep(c(max(test[,2]) + 0.0007, max(test[,2]) + 0.0012, max(test[,2]) + 0.0017)) # 线段y轴的终止位置
      )
      
      png(filename = paste0(i, "_2.png"), width = 4000, height = 3000, res = 300)
      print(ggboxplot(test, x = "Group", y = test[2], color = colnames(test)[3], add = "jitter") +
              scale_color_manual(values = c("#3D6590", "#F7D46E", "#DC615F")) + # 自定义颜色(和我的保持一致了)
              stat_compare_means()  + # 添加全局p值,默认kruskal.test(非参数检验),也可改anova(参数检验)
              theme_bw() + 
              ylab("Percentage") +
              ggtitle(colnames(test)[2]) +
              theme(axis.text.x = element_text(angle = 30, vjust = 0.85, hjust = 0.75),
                    plot.title = element_text(hjust = 0.5)) +
              geom_text(data = label_data, aes(x = x, y = y, label = label), size = 4, inherit.aes = FALSE) +
              geom_segment(data = line_data, aes(x = x, y = y, xend = xend, yend = yend), size = 0.7, color = "black")
              )
      dev.off()
      }, error = function(e){
        # 打印错误消息并跳过继续后续
        cat("Error in boxplot for Group", i, ": ", conditionMessage(e), "\n")
      })
    }
    
    # 服务器终端查看
    $ ls
    100_2.png  127_2.png  154_2.png  182_2.png  209_2.png  235_2.png  26_2.png   5_2.png   80_2.png
    101_2.png  128_2.png  155_2.png  18_2.png   210_2.png  236_2.png  263_2.png  53_2.png  81_2.png
    102_2.png  129_2.png  156_2.png  183_2.png  211_2.png  237_2.png  27_2.png   54_2.png  82_2.png
    10_2.png   130_2.png  157_2.png  184_2.png  212_2.png  238_2.png  28_2.png   55_2.png  8_2.png
    103_2.png  131_2.png  158_2.png  185_2.png  21_2.png   239_2.png  29_2.png   56_2.png  83_2.png
    ......
    
    ggplot2-1

    相关文章

      网友评论

          本文标题:R语言小记1:ggplot批量画图(geom_text, geo

          本文链接:https://www.haomeiwen.com/subject/cxbmndtx.html