美文网首页微生物扩增子微生物
16S rRNA扩增子之alpha多样性结果可视化(分面和拼图)

16S rRNA扩增子之alpha多样性结果可视化(分面和拼图)

作者: 你猜我菜不菜 | 来源:发表于2020-04-26 19:18 被阅读0次

    数据的分析和可视化工作没有止境!

    1. alpha多样性数据导入和整理

    数据来自公司给的alpha多样性指数数据表格,主要包括Observe,Chao1,ACE, Shannon和Simpson指数。


    #载入包
    library(tidyverse)
    library(ggsignif) #统计分析并标记显著性
    
    #alpha多样性数据导入和转换
    alpha_data <- read.csv('alpha_wide.csv',  sep = ',', 
                           stringsAsFactors = FALSE,check.names = FALSE)
    
    #宽数据转化为长数据
    alpha_tidy_data <- alpha_data %>% 
      pivot_longer(-sample, names_to = "alpha_index", values_to = "value")
    write.csv(alpha_tidy_data, file = "alpha_tidy_data.csv")
    
    #手动添加了分组信息后再次导入数据
    alpha_data <- read.csv('alpha_tidy_data.csv', row.names = 1, 
                           header = TRUE, sep = ',', 
                           stringsAsFactors = FALSE,check.names = FALSE)
    head(alpha_data)
    

    使用tidyverse包将宽数据变成了长数据,但使用R进一步整理数据表格能力有限,在长数据中手动添加了一些分组信息。


    2. 在不同时间点上对照组和处理组间的多样性指标的差异
    #因子排序,对多样性指数指标进行排序
    alpha_data$alpha_index <- factor(alpha_data$alpha_index, 
                              levels = c("Observe", "Chao1", "ACE",
                                         "Shannon", "Simpson"), 
                              ordered = TRUE)
    
    #ggplot2画图
    library(scales)
    library(facetscales) #facetscales包可以控制分面后Y轴刻度
    
    #设置分面后各个部分的Y轴刻度
    scales_y <- list(
      ACE = scale_y_continuous(limits = c(50, 162), breaks = seq(50, 162, 30)),
      Chao1 = scale_y_continuous(limits = c(50, 160), breaks = seq(50, 160, 30)),
      Observe = scale_y_continuous(limits = c(50, 155), breaks = seq(50, 155, 30)),
      Shannon = scale_y_continuous(limits = c(1.7, 2.5), breaks = seq(1.7, 2.6, 0.25)),
      Simpson = scale_y_continuous(limits = c(0.10, 0.40), breaks = seq(0.10, 0.40, 0.1))
    )
    
    #修改分面标题
    to_string <- as_labeller(c(`1` = "1DPE", `7` = "7DPE", 
                               `14` = "14DPE",`ACE` = "ACE"))  
    
    #画图
    p_alpha <- ggplot(data = alpha_data, mapping = aes(x = treatment, y = value)) + 
      geom_violin(mapping = aes(fill = treatment),width = 0.8, size = 0.2) + #小提琴图
      geom_boxplot(width = 0.1, linetype = 1, size = 0.2,outlier.size = 0.7) + #箱线图
      facet_grid_sc(alpha_data$alpha_index~alpha_data$time, #两个维度上的分面,
      #按不同的多样性指数分面,按不同时间分面。
                    scales = list(y = scales_y),
                    labeller = to_string) + 
      scale_fill_manual(values=c("#2874C5", "#EABF00")) + #指定颜色
      theme(legend.position="none", 
            plot.margin =unit(c(2,2,2,0),"mm"),
            axis.ticks.y=element_blank(),
            panel.grid=element_blank(),
            strip.background = element_rect(colour=NA, fill="grey"),
            axis.title.x = element_text(size = 16, vjust = 0.5, 
                                       hjust = 0.5),
            axis.title.y = element_text(size = 18, vjust = 0.5, 
                                        hjust = 0.5),
            axis.text.x = element_text(angle = 0, size = 10,
                                       vjust = 0.5, hjust = 0.5),
            axis.text.y = element_text(size = 15,vjust = 0.5, 
                                       hjust = 0.5)) +
      theme_bw() + labs(x = '', y = '', fill = "Treatment")+
      geom_signif(comparisons = list(c("UN","IR")),
                  map_signif_level = TRUE,
                  textsize=3, size = 0.3, vjust = 0) 
    
    p_alpha
    
    3. 随着时间变化多样性指标的变化趋势

    通过分组拟合数据点展示变化趋势

    p_alpha_fit <- ggplot() + 
      geom_smooth(data = alpha_data,  #拟合
                  mapping = aes(x = time, y = value, 
                                fill = treatment,  #拟合线的置信区间的填色
                                color = treatment,  #拟合线的填色
                                group = treatment), #分组
                  size = 1.2, level = 0.95, alpha=0.3) +
      scale_color_manual(values=c("#2874C5", "#EABF00")) + 
      scale_fill_manual(values=c("#2874C5", "#EABF00")) + 
      theme_bw() + facet_grid_sc(rows = vars(alpha_index), #分面
                              scales = list(y = scales_y)) +
      theme(plot.margin =unit(c(2,2,2,0),"mm"),
            axis.ticks.y=element_blank(),
            plot.title = element_text(size = 12, vjust = 0.5, 
                                      hjust=0.5),
            axis.title.x = element_text(size = 12,  hjust = 0.5, 
                                        vjust = 0.5),
            axis.title.y = element_text(size = 12, vjust = 0.5, 
                                        hjust = 0.5),
            axis.text.x = element_text(size = 8,vjust = 0.5, 
                                       hjust = 0.5),
            axis.text.y = element_text(size = 8,vjust = 0.5, 
                                       hjust = 0.5))  + 
      labs(x = '', y = '', color = "Treatment", title = "") +
      guides(fill = "none", color = "none") + 
      scale_x_continuous(breaks = c(1, 7, 14), 
                         labels = c("1DPE", "7DPE", "14DPE"))  #指定X轴刻度的标记
    
    p_alpha_fit
    
    4. 使用patchwork包拼图

    之前使用过cowplot包来拼图,最近发现patchwork包拼图更优秀,它使用“+”,“/”,“()”等简单的符号进行拼图,并且不限于ggplot系列的图片。

    #将p_alpha和p_alpha_fit拼图
    library(patchwork)
    (p_alpha + p_alpha_fit) + plot_layout(widths = c(3, 1)) +  #以3:1宽度比例拼两张图.
      plot_layout(guides = 'collect') #图例自动校正到合适位置
    

    需要进一步学习R中的数据清洗,R for data science这本书还得看好几遍,熟练使用tidyverse中的 dplyrtidyr包,强迫自己在R中完成所有的数据处理和转换工作。

    相关文章

      网友评论

        本文标题:16S rRNA扩增子之alpha多样性结果可视化(分面和拼图)

        本文链接:https://www.haomeiwen.com/subject/zmvjwhtx.html