美文网首页R
普氏分析

普氏分析

作者: Dayueban | 来源:发表于2019-06-15 18:38 被阅读44次

    目的

    为了展示相同个体不同组学数据,或者不同表型数据之间的相似性关系以及之间的差异性,所以采用了普氏分析。这里展示的是如何将两组数据进行普氏分析并进行可视化

    一、导入数据

    1 | 微生物种水平丰度表

    图一 微生物种水平丰度表

    2 | 食物数据的非加权unifrac矩阵

    图二 食物数据
    # 食物数据
    
    # load the food distance matrix, unweighted unifrac
    food_un <- read.delim("data/diet/processed_food/dhydrt_smry_no_soy_beta/unweighted_unifrac_dhydrt.smry.no.soy.txt", row = 1) # weighted in not significant
    food_dist <- as.dist(food_un)
    
    ##############taxonomy############
    # load taxonomy collapsed for each person
    tax <- read.delim("data/microbiome/processed_average/UN_tax_CLR_mean_norm_s.txt", row = 1) # updates from reviewer comments
    # drop soylents
    tax <- tax[,colnames(tax) %in% colnames(food_un)]
    
    # make taxonomy distance matrix
    tax_dist <- dist(t(tax))
    
    ###############nutrients############
    # load nutrition data
    nutr <- read.delim("data/diet/processed_nutr/nutr_65_smry_no_soy.txt", row = 1)
    
    # normalize nutrition data across features (rows)
    nutr_n <- sweep(nutr, 1, rowSums(nutr), "/")
    
    # make nutrition distance matrix (euclidean)
    nutr_dist <- dist(t(nutr_n))
    

    二、进行procrustes分析并作图

    1 | 首先以食物和微生物为例

    # make pcoas 
    pcoa_f <- as.data.frame(pcoa(food_dist)$vectors)
    pcoa_t <- as.data.frame(pcoa(tax_dist)$vectors)
    
    # procrustes
    pro <- procrustes(pcoa_f, pcoa_t)
    pro_test <- protest(pcoa_f, pcoa_t, perm = 999)  #普氏分析组间数据的检验
    
    eigen <- sqrt(pro$svd$d)
    percent_var <- signif(eigen/sum(eigen), 4)*100
    
    beta_pro <- data.frame(pro$X)
    trans_pro <- data.frame(pro$Yrot)
    beta_pro$UserName <- rownames(beta_pro)
    beta_pro$type <- "Food (Unweighted Unifrac)"
    trans_pro$UserName <- rownames(trans_pro)
    trans_pro$type <- "Microbiome (Aitchison's)"
    
    colnames(trans_pro) <- colnames(beta_pro)
    
    pval <- signif(pro_test$signif, 1)
    
    plot <- rbind(beta_pro, trans_pro)
    
    food_micro <- ggplot(plot) +
      geom_point(size = 2, alpha=0.75, aes(x = Axis.1, y = Axis.2, color = type)) +
      scale_color_manual(values = c("#5a2071", "#5f86b7")) +
        theme_classic() +
        geom_line(aes(x= Axis.1, y=Axis.2, group=UserName), col = "darkgrey", alpha = 0.6) +
      theme(panel.background = element_blank(),
            panel.grid.major = element_blank(),
            panel.grid.minor = element_blank(),
            legend.title = element_blank(),
            legend.text = element_text(size=9),
            legend.position = 'bottom',
            axis.text = element_text(size=4),
            axis.title = element_text(size=9),
             aspect.ratio = 1) +
      guides(color = guide_legend(ncol = 1)) +
      annotate("text", x = 0.3, y = -0.27, label = paste0("p-value=",pval), size = 2) +
      xlab(paste0("PC 1 [",percent_var[1],"%]")) +
      ylab(paste0("PC 2 [",percent_var[2],"%]")) 
      
      food_micro_leg <- get_legend(food_micro) #得到ggplot图的图例信息
    
    
    food_micro + theme(legend.position = "none")
    

    2 | 营养与微生物之间的普氏分析

    # make pcoas 
    pcoa_n <- as.data.frame(pcoa(nutr_dist)$vectors)
    pcoa_t <- as.data.frame(pcoa(tax_dist)$vectors)
    
    # procrustes
    pro <- procrustes(pcoa_n, pcoa_t)
    pro_test <- protest(pcoa_n, pcoa_t, perm = 999)
    
    eigen <- sqrt(pro$svd$d)
    percent_var <- signif(eigen/sum(eigen), 4)*100
    
    beta_pro <- data.frame(pro$X)
    trans_pro <- data.frame(pro$Yrot)
    beta_pro$UserName <- rownames(beta_pro)
    beta_pro$type <- "Nutrient (Euclidean)"
    trans_pro$UserName <- rownames(trans_pro)
    trans_pro$type <- "Microbiome (Aitchison's)"
    
    colnames(trans_pro) <- colnames(beta_pro)
    
    pval <- signif(pro_test$signif, 1)
    
    plot <- rbind(beta_pro, trans_pro)
    
    nutr_micro <- ggplot(plot) +
      geom_point(size = 2, alpha=0.75, aes(x = Axis.1, y = Axis.2, color = type)) +
      scale_color_manual(values = c("#5f86b7", "#2bbaa7")) +
        theme_classic() +
        geom_line(aes(x= Axis.1, y=Axis.2, group=UserName), col = "darkgrey", alpha = 0.6) +
      theme(panel.background = element_blank(),
            panel.grid.major = element_blank(),
            panel.grid.minor = element_blank(),
            legend.title = element_blank(),
            legend.text = element_text(size=9),
            legend.position = 'bottom',
            axis.text = element_text(size=4),
            axis.title = element_text(size=9),
             aspect.ratio = 1) +
      guides(color = guide_legend(ncol = 1)) +
      annotate("text", x = 0.01, y = -0.19, label = paste0("p-value=",pval), size = 2) +
      xlab(paste0("PC 1 [",percent_var[1],"%]")) +
      ylab(paste0("PC 2 [",percent_var[2],"%]")) 
    
    nutr_micro + theme(legend.position = "none")
    
    nutr_micro_leg <- get_legend(nutr_micro)
    

    3 | 谷物类数据和微生物数据(欧几里得距离)

    # Import the beta diversity table from grains_beta (weighted or unweighted)
    food_beta <- read.table(file="data/diet/fiber/grains_data/grains_beta/unweighted_unifrac_grains_fiber.txt")
    
    food_dist <- as.dist(food_beta)
    # refer this code to make a pcoa you will have to fix naming and play with the data structure of the dataframe called plot to get this to work.
    
    # Import the beta diversity table for taxonomy
    taxa_beta <- read.table(file="data/microbiome/processed_average/UN_tax_beta/euclidean_UN_taxonomy_clr_s.txt")
    
    tax_dist <- as.dist(taxa_beta)
    
    # make pcoas 
    pcoa_f <- as.data.frame(pcoa(food_dist)$vectors)
    pcoa_t <- as.data.frame(pcoa(tax_dist)$vectors)
    
    # procrustes
    pro <- procrustes(pcoa_f, pcoa_t)
    pro_test <- protest(pcoa_f, pcoa_t, perm = 999)
    
    eigen <- sqrt(pro$svd$d)
    percent_var <- signif(eigen/sum(eigen), 4)*100
    
    beta_pro <- data.frame(pro$X)
    trans_pro <- data.frame(pro$Yrot)
    beta_pro$UserName <- rownames(beta_pro)
    beta_pro$type <- "Grain Fiber (Unweighted Unifrac)"
    trans_pro$UserName <- rownames(trans_pro)
    trans_pro$type <- "Microbiome (Aitchison's)"
    
    colnames(trans_pro) <- colnames(beta_pro)
    
    pval <- signif(pro_test$signif)
    
    plot <- rbind(beta_pro, trans_pro)
    
    grain_micro <- ggplot(plot) +
      geom_point(size = 2, alpha=0.75, aes(x = Axis.1, y = Axis.2, color = type)) +
      scale_color_manual(values = c("#fe9700", "#5f86b7")) +
        theme_classic() +
        geom_line(aes(x= Axis.1, y=Axis.2, group=UserName), col = "darkgrey", alpha = 0.6) +
      theme(panel.background = element_blank(),
            panel.grid.major = element_blank(),
            panel.grid.minor = element_blank(),
            legend.title = element_blank(),
            legend.text = element_text(size=9),
            legend.position = 'bottom',
            axis.text = element_text(size=4),
            axis.title = element_text(size=9),
             aspect.ratio = 1) +
      guides(color = guide_legend(ncol = 1)) +
      annotate("text", x = 0.05, y = -0.27, label = paste0("p-value=",pval), size = 2) +
      xlab(paste0("PC 1 [",percent_var[1],"%]")) +
      ylab(paste0("PC 2 [",percent_var[2],"%]")) 
    
    grain_micro + theme(legend.position = "none")
    
    grain_leg <- get_legend(grain_micro)
    

    4 | 水果数据和微生物数据

    # Import the beta diversity table from fruit_beta (weighted or unweighted)
    food_beta <- read.table(file="data/diet/fiber/fruit_data/fruit_beta/unweighted_unifrac_fruit_fiber.txt")
    
    food_dist <- as.dist(food_beta)
    
    # make pcoas 
    pcoa_f <- as.data.frame(pcoa(food_dist)$vectors)
    pcoa_t <- as.data.frame(pcoa(tax_dist)$vectors)
    
    # procrustes
    pro <- procrustes(pcoa_f, pcoa_t)
    pro_test <- protest(pcoa_f, pcoa_t, perm = 999)
    
    eigen <- sqrt(pro$svd$d)
    percent_var <- signif(eigen/sum(eigen), 4)*100
    
    beta_pro <- data.frame(pro$X)
    trans_pro <- data.frame(pro$Yrot)
    beta_pro$UserName <- rownames(beta_pro)
    beta_pro$type <- "Fruit Fiber (Unweighted Unifrac)"
    trans_pro$UserName <- rownames(trans_pro)
    trans_pro$type <- "Microbiome (CLR-Euclidean)"
    
    colnames(trans_pro) <- colnames(beta_pro)
    
    pval <- signif(pro_test$signif, 3)
    
    plot <- rbind(beta_pro, trans_pro)
    
    fruit_micro <- ggplot(plot) +
        geom_point(size = 2, alpha=0.75, aes(x = Axis.1, y = Axis.2, color = type)) +
      scale_color_manual(values = c("#CBD13E", "#5f86b7")) +
        theme_classic() +
        geom_line(aes(x= Axis.1, y=Axis.2, group=UserName), col = "darkgrey", alpha = 0.6) +
      theme(panel.background = element_blank(),
            panel.grid.major = element_blank(),
            panel.grid.minor = element_blank(),
            legend.title = element_blank(),
            legend.text = element_text(size=9),
            legend.position = 'bottom',
            axis.text = element_text(size=4),
            axis.title = element_text(size=9),
             aspect.ratio = 1) +
      guides(color = guide_legend(ncol = 1)) +
      annotate("text", x = 0.33, y = -0.33, label = paste0("p-value=",pval), size = 2) +
      xlab(paste0("PC 1 [",percent_var[1],"%]")) +
      ylab(paste0("PC 2 [",percent_var[2],"%]")) 
    
    fruit_micro + theme(legend.position = "none")
    
    fruit_leg <- get_legend(fruit_micro)
    
    # 合并四个图
    
    plotC_H <- plot_grid(  food_micro + theme(legend.position = "none"), 
              nutr_micro + theme(legend.position = "none"), 
              grain_micro + theme(legend.position = "none"), 
              fruit_micro + theme(legend.position = "none"), 
              ncol = 2, 
              align = "h", 
              labels = c("E", "F", "G", "H"))
    
    save_plot("Figure2E_H.pdf", plotC_H, base_width = 3.5, base_height = 3.5)
    

    5 | 出图,大功告成


    图三 普氏分析图

    参考文章

    [1] https://www.cell.com/cell-host-microbe/fulltext/S1931-3128(19)30250-1

    相关文章

      网友评论

        本文标题:普氏分析

        本文链接:https://www.haomeiwen.com/subject/pnfifctx.html