宏基因组下游分析

作者: 皮尔洛_dys | 来源:发表于2023-01-27 08:44 被阅读0次
    ###################part1-species_analysis###################################
    setwd("D:/metagenomics/upstream")
    library("vegan")
    library(ggsignif)
    library("xlsx")
    #样本显著性检
    diversity <- read.table( "metadata.txt", header=T, row.names=1, sep="\t", comment.char="", stringsAsFactors = F) #View(diversity)
    diversity 
    mytable <- xtabs(~gender+Group,data=diversity);mytable
    
    
    ages.p <- wilcox.test(ages ~ Group, data = diversity, var.equal = TRUE)$p.value
    gender.p <- fisher.test(xtabs(~gender+Group,data=diversity))$p.value 
    bmi.p <- wilcox.test(bmi ~ Group, data = diversity, var.equal = TRUE)$p.value
    education.p <- fisher.test(xtabs(~education+Group,data=diversity),simulate.p.value=TRUE)$p.value 
    
    rowname=c("ages","gender","bmi","education")
    div=data.frame(c(ages.p,gender.p,bmi.p,education.p),row.names = rowname)
    colnames(div)="P value"
    
    
    if (!file.exists("./species_analysis/phenotype_diff"))
    {
      dir.create("./species_analysis/phenotype_diff",recursive = TRUE)
    }
    write.xlsx(div,file="./species_analysis/phenotype_diff/meta_diff.xlsx")
    
    #置换多元方差分析
    otu <- read.table("merged_abundance_table_species.txt",header = T,sep = "\t",row.names = 1)
    meta = read.table("metadata.txt",sep = "\t",header = T,row.names = 1)
    otu = otu[,rownames(meta)]
    otu = t(otu)
    
    otu.adonis = adonis2(otu ~ Group+ages+bmi+education+gender,data = meta,permutations = 9999,by="margin")
    
    write.xlsx(otu.adonis,file="./species_analysis/phenotype_diff/permanova_diff.xlsx")
    #alpha多样性
    library("vegan")
    library("ggsignif")
    library("xlsx")
    library("ggplot2")
    library("ggsci")
    #清空变量
    rm(list = ls())
    #将当前工作目录下文件名为.txt的文件找
    file=list.files(getwd(),pattern = "merged_abundance_table_.*[^1].txt")
    #使用循环嵌套分别对class,family,order,phylum,genus和species进行alpha多样性分析(shanno,simpson,invsimpson)
    for (i in file)
    {
      #读取otu列表
      name=sub(".txt","",i)###将文件名.txt前面的名字提出来,也为了后面批量生成文件名准
      otu <- read.table(i,header = T,sep = "\t")
      row.names(otu) <- otu$clade_name
      otu <- subset(otu, select = -clade_name )
      otu<-t(otu)
      #读取样本分组信息
      metadata <- read.table("metadata.txt",header = T,sep = "\t")
      metadata <- metadata[,c("ID","Group")]
      #将otu列表中的样本的排列顺序与metadata中样本的顺序保持一�?
      otu=otu[metadata$ID,]
      #分别计算shannon,simpson,invsimpson多样�?
      shannon <- diversity(otu ,"shannon")
      simpson <- diversity(otu,"simpson")
      invsimpson <- diversity(otu,"invsimpson")
      #将三种alpha多样性写入excel(append=T,设为追加模式,防止之后的循环覆盖之前的写入文件,同时将不同分类水平的alpha多样性写入不同的sheet)
      alpha_div = cbind(shannon,simpson,invsimpson)
      if (!file.exists("./species_analysis/alpha"))
      {
        dir.create("./species_analysis/alpha",recursive = TRUE)
      }
      write.xlsx(alpha_div, file="./species_analysis/alpha/alpha_div.xlsx", sheetName=sub("merged_abundance_table_","",name), append=TRUE)
      
      #将计算得到的alpha多样性结果与分组信息合并(!!!若未对metadata与otu列表的样本顺序统一处理会造成分析结果错误)
      Group <- metadata$Group
      shannon=data.frame(shannon,Group)
      simpson=data.frame(simpson,Group)
      invsimpson=data.frame(invsimpson,Group)
      #分别对三种多样性绘制Boxplot
      for (j in list(shannon,simpson,invsimpson))
      {
        p = ggplot(j, aes(x=Group, y=j[,1], color=Group))+
          # 添加数据、xy值�? 颜色参数给画图函数ggplot
          geom_boxplot(alpha=1,size=1.2,width=0.5)+
          # 盒图
          labs(title="Alpha diversity", x="Group",y = paste(str(j)," index"))+
          # 标题
          theme(plot.title=element_text(hjust=0.5),legend.position="top",panel.background = element_rect(fill = "transparent",colour = NA),axis.line = element_line(colour = "black"))+
          geom_jitter(width = 0.2)+
          geom_signif(comparisons = list(c("control","experience")),
                      test= wilcox.test,
                      #step_increase = 10,
                      #y_position = 5,
                      position = "identity",
                      tip_length = 0,
                      size=0.9,color = "black",
                      map_signif_level = T)+
          ylab(paste0(colnames(j)[1]," Index In ",sub("merged_abundance_table_","",name)," Level"))+
          #scale_fill_manual(values=c(experience = "red", control = "blue"))
          scale_color_aaas(a=0.55)+scale_fill_aaas(a=0.55)
        pdf_name=""
        pdf_name=paste(pdf_name,name,sep = "")
        pdf_name=paste(pdf_name,colnames(j)[1],sep = "_")
        pdf_name=paste(pdf_name,".png",sep = "")
        pdf_name = sub("merged_abundance_table_","",pdf_name)
        pdf_name = paste("Alpha plot of ",pdf_name,sep="")
        ggsave(p,file =paste("./species_analysis/alpha/ ",pdf_name,sep = "") ,width=5,height=6)##ggsave批量生成name变量为名字的pdf文件
      }
      
      
    }
    
    #置换多元方差分析(不同分类水平)
    
    file=list.files(getwd(),pattern = "merged_abundance_table_.*[^1].txt")
    for (i in file)
    {
      name = sub("merged_abundance_table_","",i)
      name = sub(".txt","",name)
      name = paste(name,".xlsx",sep = "_permanova")
      print(name)
      otu <- read.table(i,header = T,sep = "\t",row.names = 1)
      meta = read.table("metadata.txt",sep = "\t",header = T,row.names = 1)
      otu = otu[,rownames(meta)]
      otu = t(otu)
      
      otu.adonis = adonis2(otu ~ Group,data = meta,permutations = 9999,by="margin")
      if (!file.exists("./species_analysis/permanova"))
      {
        dir.create("./species_analysis/permanova",recursive = TRUE)
      }
      write.xlsx(otu.adonis,file=paste("./species_analysis/permanova/",name,sep = ""))
      
    }
    #beta多样性分析
    #PCA
    library(ggsci)
    library("ggpubr")
    library("factoextra")
    library("vegan")
    library("ggtext")
    library(ggsignif)
    rm(list = ls())
    file=list.files(getwd(),pattern = "merged_abundance_table_.*[^1].txt")
    
    for (i in file)
    {
      name=sub(".txt","",i)
      tax = sub("merged_abundance_table_","",name)
      otu =read.table(i, row.names= 1, header=T, sep="\t",  comment.char="", stringsAsFactors = F)
      meta=read.table("metadata.txt", row.names = 1, header=T, sep="\t",  comment.char="", stringsAsFactors = F)
      otu = otu[,row.names(meta)]
      
      tmpotu= otu
      tmpmeta = meta
      tmpotu = data.frame(t(tmpotu))
      group = meta[,"Group"]
      group = data.frame(group)
      
      result = adonis2(tmpotu~group,group,distance="bray",permutations = 10000)
      
      otu.pca <- prcomp(t(otu), scale. =F)
      p <- fviz_eig(otu.pca, addlabels = TRUE)
      p = p+theme(panel.background = element_rect(fill = "white"),plot.background=element_rect(fill = "white"))
      p$labels$title=paste("Scree plot of ",tax,sep = "")
      if (!file.exists("./species_analysis/beta/PCA"))
      {
        dir.create("./species_analysis/beta/PCA",recursive = TRUE)
      }
      
      ggsave(paste("./species_analysis/beta/PCA/scree plot of ",".pdf",sep = tax), p,  width=180, height=150, units="mm")
      ggsave(paste("./species_analysis/beta/PCA/scree plot of ",".png",sep = tax), p,  width=180, height=150, units="mm")
      p1 <- fviz_pca_ind(otu.pca,
                         geom.ind = "point", # show points only ( not "text")
                         col.ind = meta$Group, # color by groups
                         addEllipses = TRUE, # Concentration ellipses
                         legend.title = "Groups")
      
      p1=p1+theme(axis.line = element_line(colour = "black"))+theme_bw()
      p1$labels$x=sub("Dim","PCA",p1$labels$x)
      p1$labels$y=sub("Dim","PCA",p1$labels$y)
      p1$labels$title=paste("PCA of ",tax,sep = "")
      p1 = p1 + scale_color_aaas(a=0.55)+scale_fill_aaas(a=0.55)
      
      text_annotation=paste("PERMANOVA:<p>\nexperience vs control<p>\np-value = ",as.character(round((result$`Pr(>F)`[1]),digits = 4),sep = ""))
      #p1 = p1 + annotate("label",x=Inf,y=Inf,label=text_annotation,vjust=1.1,hjust=1.05)
      p1 =p1 + ggtext::geom_richtext (aes (vjust = 1.1,hjust = 1.05,x= Inf, y=Inf,label=text_annotation, family = "sans", label.colour = 'black'))
      
      PC1 <- otu.pca$x[,1]
      PC2 <- otu.pca$x[,2]
      
      otu.pca.data <- data.frame(rownames(otu.pca$x),PC1,PC2,meta$Group)
      colnames(otu.pca.data) <- c("sample", "PC1", "PC2", "group")
      my_comparisons = list(c('control','experience'))
      
      
      p2 <- ggplot(otu.pca.data, aes(x = group, y= PC2, colour = group)) +
        geom_boxplot() +
        theme(panel.background = element_rect(fill = "white",colour = "white"),
              panel.grid = element_blank(),
              axis.text.y = element_blank(),
              axis.ticks=element_blank(),
              legend.position="none") +
        geom_signif(comparisons = list(c("control","experience")),
                    test= wilcox.test,
                    position = "identity",
                    tip_length = 0,
                    size=0.9,color = "black",
                    map_signif_level=c("***"=0.001,"**"=0.01, "*"=0.05, " "=2))+
        xlab("") + ylab("")+
        scale_color_aaas(a=0.55)
      
      
      p3 <- ggplot(otu.pca.data, aes(x = group, y= PC1, colour = group)) +
        geom_boxplot() +
        theme(panel.background = element_rect(fill = "white",colour = "white"),
              panel.grid = element_blank(),
              axis.text.x = element_blank(),
              axis.ticks=element_blank(),
              legend.position="none") +
        xlab("") + ylab("") +
        coord_flip() + # coord_flip()函数翻转坐标�?
        geom_signif(comparisons = list(c("control","experience")),
                    test= wilcox.test,
                    #step_increase = 10,
                    #y_position = 5,
                    position = "identity",
                    tip_length = 0,
                    size=0.9,color = "black",
                    map_signif_level=c("***"=0.001,"**"=0.01, "*"=0.05, " "=2))+
        scale_color_aaas(a=0.55)
      
      p4 <- ggarrange(p3, NULL, p1, p2, widths = c(5,2), heights = c(2,4), align = "hv")+theme(plot.background=element_rect(fill = "white"))
      ggsave(paste("./species_analysis/beta/PCA/PCA of ",".pdf",sep = tax), p4,  width=180, height=150, units="mm")
      ggsave(paste("./species_analysis/beta/PCA/PCA of ",".png",sep = tax), p4,  width=180, height=150, units="mm")
      
    }
    
    #PCoA
    #########################################################
    library("amplicon")
    library(xlsx)
    library(ggsci)
    library(ggsignif)
    library("ggpubr")
    rm(list = ls())
    rm(list = ls())
    #setwd("D:/metagenomic")
    file=list.files(getwd(),pattern = "merged_abundance_table_.*[^1].txt")
    
    for (i in file){
      name=sub(".txt","",i)
      tax = sub("merged_abundance_table_","",name)
      otu =read.table(i, row.names= 1, header=T, sep="\t",  comment.char="", stringsAsFactors = F)
      meta=read.table("metadata.txt", row.names = 1, header=T, sep="\t",  comment.char="", stringsAsFactors = F)
      tmpotu= otu
      tmpmeta = meta
      tmpotu = data.frame(t(tmpotu))
      group = meta[,"Group"]
      group = data.frame(group)
      result = adonis2(tmpotu~group,group,distance="bray",permutations = 10000)
      otu=otu[,row.names(meta)]
      bray_dis = vegdist(data.frame(t(otu)),method = "bray")
      bray_dis =  as.matrix(bray_dis)
      bray_dis = as.data.frame(bray_dis)
      if (!file.exists("./species_analysis/beta/PCoA"))
      {
        dir.create("./species_analysis/beta/PCoA",recursive = TRUE)
      }
      write.xlsx(bray_dis,file=paste("./species_analysis/beta/PCoA/bray ",".xlsx",sep = tax))
      p = beta_pcoa(bray_dis,metadata = meta)
      p = p+scale_color_aaas(a=0.55)+theme_bw()
      text_annotation=paste("PERMANOVA:<p>\nexperience vs control<p>\np-value = ",as.character(round((result$`Pr(>F)`[1]),digits = 4),sep = ""))
      p = p + ggtext::geom_richtext (aes (x= Inf, y=Inf,label=text_annotation ),color="black",label.colour = 'black',vjust = 1.1,hjust = 1.05)
      p = p + labs(title = paste("PCoA of",tax,sep = " "))
      
      
      p2 <- ggplot(p$data, aes(x = group, y= y, colour = group)) +
        geom_boxplot() +
        theme(panel.background = element_rect(fill = "white",colour = "white"),
              panel.grid = element_blank(),
              axis.text.y = element_blank(),
              axis.ticks=element_blank(),
              legend.position="none") +
        geom_signif(comparisons = list(c("control","experience")),
                    test= wilcox.test,
                    position = "identity",
                    tip_length = 0,
                    size=0.9,color = "black",
                    map_signif_level=c("***"=0.001,"**"=0.01, "*"=0.05, " "=2))+
        xlab("") + ylab("")+
        scale_color_aaas(a=0.55)
      
      
      p3 <- ggplot(p$data, aes(x = group, y=x, colour = group)) +
        geom_boxplot() +
        theme(panel.background = element_rect(fill = "white",colour = "white"),
              panel.grid = element_blank(),
              axis.text.x = element_blank(),
              axis.ticks=element_blank(),
              legend.position="none") +
        xlab("") + ylab("") +
        coord_flip() + 
        geom_signif(comparisons = list(c("control","experience")),
                    test= wilcox.test,
                    position = "identity",
                    tip_length = 0,
                    size=0.9,color = "black",
                    map_signif_level=c("***"=0.001,"**"=0.01, "*"=0.05, " "=2))+
        scale_color_aaas(a=0.55)
      if (!file.exists("./species_analysis/beta/PCoA/picture1"))
      {
        dir.create("./species_analysis/beta/PCoA/picture1",recursive = TRUE)
      }
      p4 <- ggarrange(p3, NULL, p, p2, widths = c(4,1), heights = c(1,3), align = "hv")+theme(plot.background=element_rect(fill = "white"))
      ggsave(paste("./species_analysis/beta/PCoA/picture1/PCoA of ",".pdf",sep = tax), p4,  width=180, height=150, units="mm")
      ggsave(paste("./species_analysis/beta/PCoA/picture1/PCoA of ",".png",sep = tax), p4,  width=180, height=150, units="mm")
      
      
    }
    
    #NMDS
    library(phyloseq)
    library(ggsci)
    library("ggpubr")
    setwd("D:")
    #setwd("D:/metagenomic")
    nmds_my=function(otu,group){
      library("vegan")
      library("ggplot2")
      library("ggsci")
      otu <- t(otu)
      otu = otu[row.names(group),]
      bray_dis <- vegdist(otu, method = 'bray')
      otu_nmds <- metaMDS(bray_dis,distance = 'bray')
      otu_nmds
      otu_nmds.stress <- otu_nmds$stress
      otu_nmds.point <- data.frame(otu_nmds$point)
      otu_nmds.species <- data.frame(otu_nmds$species)
      sample_site <- otu_nmds.point[1:2]
      sample_site$names <- rownames(sample_site)
      colnames(sample_site)[1:2] <- c('NMDS1', 'NMDS2')
      
      #合并分组数据
      sample_site <- cbind(sample_site,Group=group$Group)
      #sample_site = sample_site[,1:4]
      #sample_site$NMDS2 = -sample_site$NMDS2
      p <- ggplot(data=sample_site,aes(NMDS1,NMDS2)) +
        #geom_hline(aes(yintercept=0),colour="#d8d6d6",linetype=5)+
        #geom_vline(aes(xintercept=0),colour="#d8d6d6",linetype=5)+
        geom_point(aes(color =Group),shape = 19,size = 3.5)+
        #scale_color_aaas(alpha=0.7)+
        stat_ellipse(aes(color = Group),geom="polygon",level=0.90,alpha=0)+labs(x= "NMDS1", y = "NMDS2")+
        scale_color_aaas(alpha=0.7)+
        theme(plot.title = element_text(hjust = 0.5))+
        theme_bw()+
        #theme(panel.background = element_blank(),axis.line = element_line(color = "black"))+
        geom_text(aes(x = -0.4 , y =0.5,label = paste("stress = ",round(otu_nmds.stress,2))),size=5)+
        xlim(-0.5, 0.5)+ylim(-0.5,0.5)
      return(p)
    }
    
    file=list.files(getwd(),pattern = "merged_abundance_table_.*[^1].txt")
    
    for (i in file){
      name=sub(".txt","",i)
      tax = sub("merged_abundance_table_","",name)
      otu = read.table(i,header = T,row.names = 1,sep = "\t")
      group = read.table("metadata.txt",header = T,row.names = 1,sep = "\t")
      p = nmds_my(otu = otu,group = group)
      if (!file.exists("./species_analysis/beta/NMDS/picture1"))
      {
        dir.create("./species_analysis/beta/NMDS/picture1",recursive = TRUE)
      }
      ggsave(paste("./species_analysis/beta/NMDS/picture1/NMDS of ",".pdf",sep = tax), p,  width=180, height=150, units="mm")
      ggsave(paste("./species_analysis/beta/NMDS/picture1/NMDS of ",".png",sep = tax), p,  width=180, height=150, units="mm")
    }
    #CCA
    my_cca <- function(meta = meta ,otu =otu,env = env,tax=tax)
    {
      library("ggplot2")
      library("vegan")
      otu = otu[,rownames(groups)]
      env = env[rownames(groups),]
      decorana(otu)
      
      
      cca <- cca(X = t(otu), Y = env)
      x.sig = anova(cca)
      x.p = x.sig$Pr[1]
      x.p
      x.F = x.sig$F[1]
      x.F
      F1 <- paste("anova F: ", round(x.F, 2), sep = "")
      pp1 = paste("p: ", round(x.p, 2), sep = "")
      # 对CCA的结果进行summary, 提取对应信息;
      cca.sum <- summary(cca)
      str(cca.sum)
      
      cca.sum$sites
      
      cca.sum$biplot
      
      sites <- data.frame(cca.sum$sites[, c(1, 2)], groups, check.names = F)
      biplot <- as.data.frame(cca.sum$biplot, check.names = F)
      species <- as.data.frame(cca.sum$species, check.names = F)
      eig <- as.data.frame(cca.sum$concont$importance, check.names = F)
      labs <- sprintf("%s(%.2f%%)", colnames(sites)[1:2], eig[2,1:2]*100)
      
      
      library("ggplot2")
      library(ggrepel)
      p = ggplot() + geom_point(data = sites, aes(x = CCA1, y = CCA2,  fill = Group), pch = 21, colour = "black", size = 3,alpha=0.7) +
        geom_segment(data =as.data.frame(cca.sum$biplot) , aes(x = 0, y = 0, xend = CCA1, yend = CCA2), arrow = arrow(angle = 22.5,length = unit(0.2, "cm")), linetype = 1, size = 1,colour = "black") + 
        geom_label_repel(data = as.data.frame(cca.sum$biplot) ,aes(CCA1*1.1, CCA2*1.1,label=row.names(as.data.frame(cca.sum$biplot) )),fontface="bold",color="black",alpha=0.6, 
                         box.padding=unit(1, "lines"), point.padding=unit(0.3, "lines"), 
                         segment.colour = "grey50",segment.size=0.75,segment.alpha=0.9,
                         label.r=0.15,label.size=0.5, max.overlaps = 50,size=3)+
        labs(x = labs[1], 
             y = labs[2])
      
      p = p + theme_bw() + geom_hline(aes(yintercept = 0), colour = "black", linetype = 2) + 
        geom_vline(aes(xintercept = 0), colour = "black", linetype = "dashed") + 
        theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), 
              plot.title = element_text(vjust = -0.5, hjust = 0.1,size = 20), 
              axis.title.y = element_text(size = 8,  face = "bold", colour = "black"), 
              axis.title.x = element_text(size = 8, face = "bold", colour = "black"),
              axis.text = element_text(size = 8, face = "bold"), 
              axis.text.x = element_text(colour = "black",size = 8), 
              axis.text.y = element_text(colour = "black", size = 8), 
              legend.text = element_text(size = 8,face = "bold"))+
        ggtext::geom_richtext(
          aes(
            x = Inf , y = Inf, 
            vjust = 1.1,hjust = 1.03, 
            label = paste(F1,' <p>\n p-value = ',pp1, sep = "")),
          size =5, family = "sans",colour = 'black',label.colour="black")+scale_color_aaas(a=0.55)+scale_fill_aaas(a=0.55)
      # 添加箭头,标记为环境因子;
      
      
      #p <- p+scale_x_continuous(limits = c(min(sites$CCA1)*0.6,max(sites$CCA1)*0.6))
      #p <- p+scale_y_continuous(limits = c(min(sites$CCA2)*0.6,max(sites$CCA2)*0.6))
      p = p+labs(title = paste("CCA of ",tax,sep = ""))
      return(p)
    }
    
    #RDA
    my_rda <- function(meta = meta ,otu =otu,env = env,tax=tax)
    {
      library("ggplot2")
      library("vegan")
      otu = otu[,rownames(groups)]
      env = env[rownames(groups),]
      decorana(otu)
      
      
      rda <- rda(X = t(otu), Y = env)
      x.sig = anova(rda)
      x.p = x.sig$Pr[1]
      x.p
      x.F = x.sig$F[1]
      x.F
      F1 <- paste("anova F: ", round(x.F, 2), sep = "")
      pp1 = paste("p: ", round(x.p, 2), sep = "")
      # 对CCA的结果进行summary, 提取对应信息;
      rda.sum <- summary(rda)
      str(rda.sum)
      
      rda.sum$sites
      
      rda.sum$biplot
      
      sites <- data.frame(rda.sum$sites[, c(1, 2)], groups, check.names = F)
      biplot <- as.data.frame(rda.sum$biplot, check.names = F)
      species <- as.data.frame(rda.sum$species, check.names = F)
      eig <- as.data.frame(rda.sum$concont$importance, check.names = F)
      labs <- sprintf("%s(%.2f%%)", colnames(sites)[1:2], eig[2,1:2]*100)
      
      
      library("ggplot2")
      library(ggrepel)
      p = ggplot() + geom_point(data = sites, aes(x = RDA1, y = RDA2,  fill = Group), pch = 21, colour = "black", size = 3,alpha=0.7) +
        geom_segment(data =as.data.frame(rda.sum$biplot) , aes(x = 0, y = 0, xend = RDA1, yend = RDA2), arrow = arrow(angle = 22.5,length = unit(0.2, "cm")), linetype = 1, size = 1,colour = "black") + 
        geom_label_repel(data = as.data.frame(rda.sum$biplot) ,aes(RDA1*1.1, RDA2*1.1,label=row.names(as.data.frame(rda.sum$biplot) )),fontface="bold",color="black",alpha=0.6, 
                         box.padding=unit(1, "lines"), point.padding=unit(0.3, "lines"), 
                         segment.colour = "grey50",segment.size=0.75,segment.alpha=0.9,
                         label.r=0.15,label.size=0.5, max.overlaps = 50,size=3)+
        labs(x = labs[1], 
             y = labs[2])
      
      p = p + theme_bw() + geom_hline(aes(yintercept = 0), colour = "black", linetype = 2) + 
        geom_vline(aes(xintercept = 0), colour = "black", linetype = "dashed") + 
        theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), 
              plot.title = element_text(vjust = -0.5, hjust = 0.1,size = 20), 
              axis.title.y = element_text(size = 8,  face = "bold", colour = "black"), 
              axis.title.x = element_text(size = 8, face = "bold", colour = "black"),
              axis.text = element_text(size = 8, face = "bold"), 
              axis.text.x = element_text(colour = "black",size = 8), 
              axis.text.y = element_text(colour = "black", size = 8), 
              legend.text = element_text(size = 8,face = "bold"))+
        ggtext::geom_richtext(
          aes(
            x = Inf , y = Inf, 
            vjust = 1.1,hjust = 1.03, 
            label = paste(F1,' <p>\n p-value = ',pp1, sep = "")),
          size =5, family = "sans",colour = 'black',label.colour="black")+scale_color_aaas(a=0.55)+scale_fill_aaas(a=0.55)
      # 添加箭头,标记为环境因子;
      
      
      #p <- p+scale_x_continuous(limits = c(min(sites$RDA1)*0.6,max(sites$RDA1)*0.6))
      # p <- p+scale_y_continuous(limits = c(min(sites$RDA2)*0.6,max(sites$RDA2)*0.6))
      p = p+labs(title = paste("RDA of ",tax,sep = ""))
      return(p)
    }
    #p = my_rda(meta = groups,env=env,otu=otu,tax="species")
    
    library(devtools)
    library(amplicon)
    library("ggsci")
    file=list.files(getwd(),pattern = "merged_abundance_table_.*[^1].txt")
    for (i in file){
      name=sub(".txt","",i)
      tax = sub("merged_abundance_table_","",name)
      otu =read.table(i, row.names= 1, header=T, sep="\t",  comment.char="", stringsAsFactors = F)
      map=read.table("metadata.txt", row.names = 1, header=T, sep="\t",  comment.char="", stringsAsFactors = F)
      env=read.delim("env.txt",row.names=1)
      p = my_rda (otu=otu,meta = map,env=env,tax = tax)
      if (!file.exists("./species_analysis/beta/RDA/picture1"))
      {
        dir.create("./species_analysis/beta/RDA/picture1",recursive = TRUE)
      }
      ggsave(paste("./species_analysis/beta/RDA/picture1/RDA plot of ",".pdf",sep = tax), p,  width=180, height=150, units="mm")
      ggsave(paste("./species_analysis/beta/RDA/picture1/RDA plot of ",".png",sep = tax), p,  width=180, height=150, units="mm")
    }
    
    
    
    #优势物种分析
    #堆叠图
    dominant_species <- function(n=n,otutab = otutab,metadata=metadata,tax= tax)
    {
      library(tidyverse)
      library("ggsci")
      library("ggplot2")
      library("RColorBrewer")
      getpalette =colorRampPalette(brewer.pal(12,"Set3"))
      otu = otutab
      map = metadata
      #将实验组和对照组的样本分开
      experience = list()
      control = list()
      for (i in row.names(map))
      {
        if (map[i,"Group"] == "experience")
        {
          experience = append(experience,i)  
        }
        if (map[i,"Group"] == "control")
        {
          control = append(control,i)  
        }
        
      }
      #将otu列表区分为实验组otu和对照组otu
      exp.otu = otu[which(colnames(otu) %in% experience)]
      ctr.otu = otu[which(colnames(otu) %in% control)]
      #对实验组和对照组中不同物种的数量进行求和
      exp.otu.sum = rowSums(exp.otu)
      ctr.otu.sum = rowSums(ctr.otu)
      #将实验组与对照组求和数据进行合并
      df = as.data.frame(cbind(exp.otu.sum,ctr.otu.sum))
      colnames(df) = c("experience","control")
      #直接计算相对丰度
      df$experience = df$experience/sum(df$experience)
      df$control = df$control/sum(df$control)
      #计算不同物种在对照组以及实验组之间的平均值并根据平均值排序
      df$average = (df$experience+df$control)/2
      df = df[order(df$average,decreasing = T),]
      #找出top n 的物种,其余物种并入other并计算other的总和
      df.top10 = df[1:n-1,]
      df.res = df[n:length(row.names(df)),]
      other = colSums(df.res)
      #将other加入top n数据中并更改名字
      a = rbind(df.top10,other)
      rownames(a)[length(rownames(a))]="Other"
      a$Phylum = row.names(a)
      df = a
      df = df[,which(colnames(df)!="average")]
      #设置线段内容
      
      link_dat= data.frame()
      sum.experience =0
      sum.control=0
      for (i in length(rownames(df)):1)
      {
        sum.control = sum.control + df$control[i]
        link_dat[rownames(df)[i],"control"] = sum.control
        sum.experience = sum.experience + df$experience[i]
        link_dat[rownames(df)[i],"experience"] = sum.experience
      }
      link_dat$Phylum = row.names(link_dat)
      df.long <- reshape2::melt(df, value.name='abundance', variable.name='group')
      #绘图
      df.long$Phylum = factor(df.long$Phylum,levels = rev(link_dat$Phylum))
      #a = df.long$Phylum
      df.long$group = factor(df.long$group,levels = c("control","experience"))
      p = ggplot(df.long, aes(x=group, y=abundance, fill=Phylum)) + 
        geom_bar(stat = "identity", width=0.5, col='black',size=1)  + 
        geom_segment(data=link_dat, aes(x=1.25, xend=1.75, y=control, yend=experience),size=1)+
        theme_bw()  + 
        #scale_color_aaas(a=0.10)+scale_fill_aaas(a=0.10)+
        ylab("Relative abundance(%)")+
        
        theme(axis.title.x=element_text(vjust=2, size=20,face = "bold"),
              axis.title.y =element_text(vjust=2, size=20,face = "bold"),
              legend.title  = element_text(size = 20,face = "bold")
        )
      name = paste("Top ",n,sep = "")
      name = paste(name,tax,sep = " abundant ")
      p = p+scale_fill_manual(values = getpalette(n))+labs(fill=name)+labs(x="Groups")
      
      return(p)
    }
    otu =read.table("out_ARGcategory.txt", row.names= 1, header=T, sep="\t",  comment.char="", stringsAsFactors = F)
    map=read.table("metadata.2.txt", row.names = 1, header=T, sep="\t",  comment.char="", stringsAsFactors = F)
    
    p= dominant_species(n=10,metadata = map,otutab = otu,tax = "ARGcategory")
    
    p
    #多组箱线图
    setwd("D://metagenomics//upstream")
    otu =read.table("merged_abundance_table_species.txt", row.names= 1, header=T, sep="\t",  comment.char="", stringsAsFactors = F)
    map=read.table("metadata.txt", row.names = 1, header=T, sep="\t",  comment.char="", stringsAsFactors = F)
    multi_boxplot <- function(otu = otu ,meta = meta,n =n,tax=tax)
    {
      get_top_abundant <- function(n=n,otutab = otutab,metadata=metadata)
      {
        library(tidyverse)
        library("ggsci")
        library("ggplot2")
        library("RColorBrewer")
        library(ggsignif)
        getpalette = brewer.pal(12,"Set3")
        otu = otutab
        map = metadata
        #将实验组和对照组的样本分开
        experience = list()
        control = list()
        for (i in row.names(map))
        {
          # print(i)
          if (map[i,"Group"] == "experience")
          {
            experience = append(experience,i)  
          }
          if (map[i,"Group"] == "control")
          {
            control = append(control,i)  
          }
          
        }
        #将otu列表区分为实验组otu和对照组otu
        exp.otu = otu[which(colnames(otu) %in% experience)]
        ctr.otu = otu[which(colnames(otu) %in% control)]
        #对实验组和对照组中不同物种的数量进行求和
        exp.otu.sum = rowSums(exp.otu)
        ctr.otu.sum = rowSums(ctr.otu)
        #将实验组与对照组求和数据进行合并
        df = as.data.frame(cbind(exp.otu.sum,ctr.otu.sum))
        colnames(df) = c("experience","control")
        #计算不同物种在对照组以及实验组之间的平均值并根据平均值排序
        df$average = (df$experience+df$control)/2
        df = df[order(df$average,decreasing = T),]
        #找出top n 的物种,其余物种并入other并计算other的总和
        p = rownames(df)[1:n]
        q = rownames(df)[n+1:length(row.names(df))]
        #将other加入top n数据中并更改名字
        
        return(p)
      }
      #获取丰度前十的物种名称
      result= get_top_abundant(n=n,otutab = otu,metadata = map)
      name_list = result
      name_list = as.list(name_list)
      #调整otu顺序与meta信息保持一致
      otu =as.data.frame( t(otu))
      otu =  otu[row.names(map),]
      #根据返回的top abundant信息提取对应的otu信息
      otu =as.data.frame( otu[which(colnames(otu) %in% name_list)])
      #根据name_list的顺序调整otu的物种(列)顺序
      name_list =  as.character(name_list)
      otu = otu[,name_list]
      #给矩阵的每一个元素取对数
      otu = log10(otu)
      #给每一个样本添加分组信息
      otu$Group = NULL
      for (i in rownames(otu))
      {
        otu[i,"Group"]=map[i,"Group"]
      }
      library(simplevis)
      library(ggplot2)
      library(tidyverse)
      library("ggsci")
      library(ggsignif)
      data.long <- pivot_longer(otu, cols = 1:length(colnames(otu))-1, names_to ="index", values_to = "num")
      data.long$Group <- as.factor(data.long$Group)
      data.long$index <- factor(data.long$index,levels =rev(colnames(otu)) )
      data.long <- data.long[!is.infinite(data.long$num),]
      str(data.long)
      p = ggplot(data.long, aes(x=index, y=num,fill=Group))+theme_bw()+
        geom_boxplot(alpha=1,size=0.8,width=0.5,outlier.color =     "gray",position=position_dodge(width=0.8))+#调整control和experience之间的距离
        theme(axis.text.x = element_text(angle = 90),
              plot.title = element_text(hjust = 0.5),#强行标题居中
              #axis.text.y = element_text(angle = 90),
              axis.title.x=element_text(vjust=2, size=10,face = "bold"),
              axis.title.y =element_text(vjust=2, size=10,face = "bold"),
              legend.title  = element_text(hjust = 0.5, size = 10,face = "bold"))+coord_flip()+
        stat_compare_means(aes(group=Group),
                           method = "wilcox.test",
                           label="p.signif",
                           hide.ns = T,
                           vjust = 0.7,)+
        scale_fill_aaas(a=0.55)+scale_color_aaas(a=0.55)+labs(y="Relative Abundance(log10)",x=NULL,title =paste("Top abundant ",tax,sep = "") ,hjust=0.5)
      return(p)
    }
    p = multi_boxplot(otu =otu,meta = map,n=10,tax="species")
    p
    
    #分面堆叠图
    otu =read.table("merged_abundance_table_species.txt", row.names= 1, header=T, sep="\t",  comment.char="", stringsAsFactors = F)
    map=read.table("metadata.txt", row.names = 1, header=T, sep="\t",  comment.char="", stringsAsFactors = F)
    facet_stackplot <- function(otu =otu ,meta=meta)
    {
      get_other_abundant <- function(n=n,otutab = otutab,metadata=metadata)
      {
        library(tidyverse)
        library("ggsci")
        library("ggplot2")
        library("RColorBrewer")
        library(ggsignif)
        #getpalette = brewer.pal(12,"Set3")
        otu = otutab
        map = metadata
        #将实验组和对照组的样本分开
        experience = list()
        control = list()
        for (i in row.names(map))
        {
          # print(i)
          if (map[i,"Group"] == "experience")
          {
            experience = append(experience,i)  
          }
          if (map[i,"Group"] == "control")
          {
            control = append(control,i)  
          }
          
        }
        #将otu列表区分为实验组otu和对照组otu
        exp.otu = otu[which(colnames(otu) %in% experience)]
        ctr.otu = otu[which(colnames(otu) %in% control)]
        #对实验组和对照组中不同物种的数量进行求和
        exp.otu.sum = rowSums(exp.otu)
        ctr.otu.sum = rowSums(ctr.otu)
        #将实验组与对照组求和数据进行合并
        df = as.data.frame(cbind(exp.otu.sum,ctr.otu.sum))
        colnames(df) = c("experience","control")
        #计算不同物种在对照组以及实验组之间的平均值并根据平均值排序
        df$average = (df$experience+df$control)/2
        df = df[order(df$average,decreasing = T),]
        #找出top n 的物种,其余物种并入other并计算other的总和
        p = rownames(df)[1:n]
        q = rownames(df)[n+1:length(row.names(df))]
        #将other加入top n数据中并更改名字
        
        return(q)
      }
      
      
      get_top_abundant <- function(n=n,otutab = otutab,metadata=metadata)
      {
        library(tidyverse)
        library("ggsci")
        library("ggplot2")
        library("RColorBrewer")
        library(ggsignif)
        #getpalette = brewer.pal(12,"Set3")
        otu = otutab
        map = metadata
        #将实验组和对照组的样本分开
        experience = list()
        control = list()
        for (i in row.names(map))
          
        {
          
          # print(i)
          if (map[i,"Group"] == "experience")
            
          {
            experience = append(experience,i)  
          }
          if (map[i,"Group"] == "control")
          {
            control = append(control,i)  
          }
          
        }
        #将otu列表区分为实验组otu和对照组otu
        exp.otu = otu[which(colnames(otu) %in% experience)]
        ctr.otu = otu[which(colnames(otu) %in% control)]
        #对实验组和对照组中不同物种的数量进行求和
        exp.otu.sum = rowSums(exp.otu)
        ctr.otu.sum = rowSums(ctr.otu)
        #将实验组与对照组求和数据进行合并
        df = as.data.frame(cbind(exp.otu.sum,ctr.otu.sum))
        colnames(df) = c("experience","control")
        #计算不同物种在对照组以及实验组之间的平均值并根据平均值排序
        df$average = (df$experience+df$control)/2
        df = df[order(df$average,decreasing = T),]
        #找出top n 的物种,其余物种并入other并计算other的总和
        p = rownames(df)[1:n]
        q = rownames(df)[n+1:length(row.names(df))]
        #将other加入top n数据中并更改名字
        
        return(p)
      }
      name_list = get_top_abundant(n=10,otutab = otu,metadata = map)
      name_list = as.list(name_list)
      other_list = get_other_abundant(n=10,otutab = otu,metadata = map)
      other_list = as.list(other_list)
      otu.top = otu[which(row.names(otu) %in% name_list),]
      otu.other = otu[which(row.names(otu) %in% other_list),]
      
      t= colSums(otu.other)
      df=as.data.frame( rbind(otu.top,t))
      rownames(df)[length(rownames(df))]="Other"
      otu = df
      otu = otu/100
      
      otu = as.data.frame(otu)
      otu$Taxonomy = row.names(otu)
      otu
      data_frame=melt(otu, id='Taxonomy')
      names(data_frame)[2]='sample_id'#更改列名
      map$sample_id = row.names(map)
      
      data_group = map[,c("sample_id","Group")]
      data_frame=merge(data_frame, data_group, by='sample_id')
      
      ylim =c(0,100)
      xlim =c(0,100)
      library(ggplot2)
      library("RColorBrewer")
      library(ggsignif)
      library("ggsci")
      getpalette =colorRampPalette(brewer.pal(12,"Set3"))
      
      name_list = append(name_list,"Other")
      name_list = as.character(name_list)
      data_frame$Taxonomy = factor(data_frame$Taxonomy,levels = name_list )
      stack_plot=ggplot(data_frame, aes(x=sample_id, fill=Taxonomy, y=value*100))+
        # 数据输入:样本、物种、丰度
        geom_col(position='stack') +
        # stack:堆叠图
        labs(x='Samples', y='Relative Abundance (%)')+
        # 给xy轴取名
        scale_y_continuous(expand=c(0, 0))+
        # 调整y轴属性
        theme(axis.text.x=element_text(angle=90, hjust=1,size = 5),
              axis.title.x=element_text(vjust=2, size=10,face = "bold"),
              axis.title.y =element_text(vjust=2, size=10,face = "bold"),
              #legend.title  = element_text(hjust = 0.5, size = 10,face = "bold"),
              legend.title  = element_blank()
        )+
        facet_wrap(~Group, scales = 'free_x', ncol = 2)+ # 按group组,X轴,分2面
        scale_fill_manual(values = getpalette(20))+labs(x=NULL)
      return(stack_plot)
    }
    p = facet_stackplot(otu = otu,meta =map)
    p
    
    # Maaslin2
    library(Maaslin2)
    otu = read.table(file = "merged_abundance_table_species.txt", header = T,row.names = 1,sep = "\t")
    metadata = read.table(file = "metadata.txt", header = T,row.names = 1,sep = "\t")
    fit_data = Maaslin2(
      t(otu), 
      metadata, 
      output = "demo_output", 
      fixed_effects = c("Group","ages","gender","education","bmi"),
      #random_effects = c("ages","gender","education","bmi"),
      plot_heatmap=TRUE,
      heatmap_first_n=10)
      
      
      
      ###############################PART2-function_analysis######################################
      #####my funcation################
    setwd("D:\\metagenomics\\upstream")
    #####my funcation################
    setwd("D:\\metagenomic")
    #####PCA##########################
    my_pca <- function(otu = otu,meta = metadata ,title = title)
    {
      library(ggsci)
      library("ggpubr")
      library("factoextra")
      library("vegan")
      library("ggtext")
      library(ggsignif)
      #otu =t(read.table("./out_GBM.txt",header = T,row.names = 1,sep = "\t")) 
      #meta = read.table("./metadata.txt",header = T,row.names = 1,sep = "\t")
      otu = t(otu)
      otu = otu[row.names(meta),]
      permanova = adonis2(otu ~ Group, data = meta, permutations = 9999)
      #PCA
      otu.pca <- prcomp(otu, scale. =F)
      p <- fviz_eig(otu.pca, addlabels = TRUE)
      p = p+theme(panel.background = element_rect(fill = "white"),plot.background=element_rect(fill = "white"))
      p1 <- fviz_pca_ind(otu.pca,
                         geom.ind = "point", # show points only ( not "text")
                         col.ind = meta$Group, # color by groups
                         addEllipses = TRUE, # Concentration ellipses
                         legend.title = "Groups")
      
      p1=p1+theme(axis.line = element_line(colour = "black"))+theme_bw()
      p1$labels$x=sub("Dim","PCA",p1$labels$x)
      p1$labels$y=sub("Dim","PCA",p1$labels$y)
      p1=p1+labs(title = "PCA of path.function")+scale_color_aaas(a=0.55)+scale_fill_aaas(a=0.55)
      text_annotation=paste("PERMANOVA:<p>\nexperience vs control<p>\np-value = ",as.character(round((permanova$`Pr(>F)`[1]),digits = 4),sep = ""))
      #p1 = p1 + annotate("label",x=Inf,y=Inf,label=text_annotation,vjust=1.1,hjust=1.05)
      p1 =p1 + ggtext::geom_richtext (aes (vjust = 1.1,hjust = 1.05,x= Inf, y=Inf,label=text_annotation, family = "sans", label.colour = 'black'))
      #p1 =p1 +geom_point( alpha = 0.3)
      PC1 <- otu.pca$x[,1]
      PC2 <- otu.pca$x[,2]
      df = as.data.frame(cbind(PC1,PC2)) 
      df$groups = meta$Group
      pd = ggplot(data = df, aes(x=PC1,y=PC2))+geom_point(aes(color=groups,shape=groups,color = groups,fill=groups),size=2,alpha=0.7)+
        ggtext::geom_richtext (aes (vjust = 1.1,hjust = 1.05,x= Inf, y=Inf,label=text_annotation, label.colour = 'black',text.colour = "black"))
      #pd
      #pd = pd +theme_bw()+stat_ellipse(aes(fill=groups),type="norm",geom="polygon",alpha=0.2,color=NA)
      pd = pd+stat_ellipse(aes(
        color=groups,
        fill=groups),
        level = 0.99,
        geom = "polygon",
        alpha=0.1)+scale_fill_d3(a=0.5)+theme_bw()+scale_color_d3(a=0.5)
      #+geom_point(shape=c(1,2))
      #pd=pd+theme(panel.grid.major = element_blank())
      pd=pd+labs(x=paste("PCA1(","%)",sep = as.character(round(p$data$eig[1],2))),y=paste("PCA2(","%)",sep = as.character(round(p$data$eig[2],2))))
      #pd = pd+  ggtext::geom_richtext (aes (vjust = 1.1,hjust = 1.05,x= Inf, y=Inf,label=text_annotation, family = "sans", label.colour = 'black'))
      pd = pd+labs(title = paste("PCA of ",title,sep = ""))
      
      
      
      otu.pca.data <- data.frame(rownames(otu.pca$x),PC1,PC2,meta$Group)
      colnames(otu.pca.data) <- c("sample", "PC1", "PC2", "group")
      my_comparisons = list(c('control','experience'))
      
      p2 <- ggplot(otu.pca.data, aes(x = group, y= PC2, colour = group)) +
        geom_boxplot() +
        theme(panel.background = element_rect(fill = "white",colour = "white"),
              panel.grid = element_blank(),
              axis.text.y = element_blank(),
              axis.ticks=element_blank(),
              legend.position="none") +
        geom_signif(comparisons = list(c("control","experience")),
                    test= wilcox.test,
                    position = "identity",
                    tip_length = 0,
                    size=0.9,color = "black",
                    map_signif_level=c("***"=0.001,"**"=0.01, "*"=0.05, " "=2))+
        xlab("") + ylab("")+
        scale_color_d3(a=0.5)
      
      p3 <- ggplot(otu.pca.data, aes(x = group, y= PC1, colour = group)) +
        geom_boxplot() +
        theme(panel.background = element_rect(fill = "white",colour = "white"),
              panel.grid = element_blank(),
              axis.text.x = element_blank(),
              axis.ticks=element_blank(),
              legend.position="none") +
        xlab("") + ylab("") +
        coord_flip() + # coord_flip()函数翻转坐标�?
        geom_signif(comparisons = list(c("control","experience")),
                    test= wilcox.test,
                    #step_increase = 10,
                    #y_position = 5,
                    position = "identity",
                    tip_length = 0,
                    size=0.9,color = "black",
                    map_signif_level=c("***"=0.001,"**"=0.01, "*"=0.05, " "=2))+
        scale_color_d3(a=0.5)
      
      
      
      p4 <- ggarrange(p3, NULL, pd, p2, widths = c(5,2), heights = c(2,4), align = "hv")+theme(plot.background=element_rect(fill = "white"))
      return(p4)
    }
    #测试
    #otu =read.table("./out_GBM.txt",header = T,row.names = 1,sep = "\t") 
    #meta = read.table("./metadata.txt",header = T,row.names = 1,sep = "\t")
    #pca=my_pca(otu = otu ,meta = meta ,title = "GBM")
    
    
    
    
    ###############PcoA####################
    my_pcoa <- function(otu=otu,meta=meta,title=title)
    {
      library("amplicon")
      library(xlsx)
      library(ggsci)
      library(ggsignif)
      library("ggpubr")
      Beta_PCOA <- function (dis_mat, metadata, groupID = "Group", ellipse = T, 
                             label = F, PCo = 12) 
      {
        p_list = c("ggplot2", "vegan", "ggrepel")
        for (p in p_list) {
          if (!requireNamespace(p)) {
            install.packages(p)
          }
          suppressWarnings(suppressMessages(library(p, character.only = T)))
        }
        idx = rownames(metadata) %in% rownames(dis_mat)
        metadata = metadata[idx, , drop = F]
        dis_mat = dis_mat[rownames(metadata), rownames(metadata)]
        sampFile = as.data.frame(metadata[, groupID], row.names = row.names(metadata))
        pcoa = cmdscale(dis_mat, k = 3, eig = T)
        points = as.data.frame(pcoa$points)
        eig = pcoa$eig
        points = cbind(points, sampFile[rownames(points), ])
        colnames(points) = c("x", "y", "z", "group")
        if (PCo == 12) {
          p = ggplot(points, aes(x = x, y = y, color = group)) + 
            labs(x = paste("PCo 1 (", format(100 * eig[1]/sum(eig), 
                                             digits = 4), "%)", sep = ""), y = paste("PCo 2 (", 
                                                                                     format(100 * eig[2]/sum(eig), digits = 4), "%)", 
                                                                                     sep = ""), color = groupID)
        }
        if (PCo == 13) {
          p = ggplot(points, aes(x = x, y = z, color = group)) + 
            labs(x = paste("PCo 1 (", format(100 * eig[1]/sum(eig), 
                                             digits = 4), "%)", sep = ""), y = paste("PCo 3 (", 
                                                                                     format(100 * eig[2]/sum(eig), digits = 4), "%)", 
                                                                                     sep = ""), color = groupID)
        }
        if (PCo == 23) {
          p = ggplot(points, aes(x = y, y = z, color = group)) + 
            labs(x = paste("PCo 2 (", format(100 * eig[1]/sum(eig), 
                                             digits = 4), "%)", sep = ""), y = paste("PCo 3 (", 
                                                                                     format(100 * eig[2]/sum(eig), digits = 4), "%)", 
                                                                                     sep = ""), color = groupID)
        }
        p = p + geom_point(alpha = 0.6, size = 3) + theme_classic() + 
          theme(text = element_text(family = "sans", size = 7))
        if (ellipse == T) {
          p = p + stat_ellipse(level = 0.68)
        }
        if (label == T) {
          p = p + geom_text_repel(label = paste(rownames(points)), 
                                  colour = "black", size = 3)
        }
        p
      }
      
      #setwd("D:/metagenomic")
      #file=list.files(getwd(),pattern = "merged_abundance_table_.*[^1].txt")
      #otu = t(read.table("out_path.function.txt",sep = "\t",header = T,row.names = 1))
      #abun = t(read.table("out_pathabundance.txt",sep = "\t",header = T,row.names = 1))
      # meta = read.table("metadata.txt",sep = "\t",header = T,row.names = 1)
      otu = t(otu)
      otu = otu[row.names(meta),]
      #otu = otu[,-c(1,2)]
      permanova = adonis2(otu~Group,data = meta,permutations = 9999)
      bray_dis = vegdist(data.frame(otu),method = "bray")
      bray_dis =  as.matrix(bray_dis)
      bray_dis = as.data.frame(bray_dis)
      
      #write.xlsx(bray_dis,file=paste("./function/humann3/bray_dis ",".xlsx",sep =""))
      p = Beta_PCOA(bray_dis,metadata = meta)
      p = p+scale_color_d3(a=0.5)+theme_bw()
      text_annotation=paste("PERMANOVA:<p>\nexperience vs control<p>\np-value = ",as.character(round((permanova$`Pr(>F)`[1]),digits = 4),sep = ""))
      p = p + ggtext::geom_richtext (aes (x= Inf, y=Inf,label=text_annotation ),color="black",label.colour = 'black',vjust = 1.1,hjust = 1.05)
      p = p + labs(title = paste("PCoA of",title,sep = " "))
      
      p2 <- ggplot(p$data, aes(x = group, y= y, colour = group)) +
        geom_boxplot() +
        theme(panel.background = element_rect(fill = "white",colour = "white"),
              panel.grid = element_blank(),
              axis.text.y = element_blank(),
              axis.ticks=element_blank(),
              legend.position="none") +
        geom_signif(comparisons = list(c("control","experience")),
                    test= wilcox.test,
                    position = "identity",
                    tip_length = 0,
                    size=0.9,color = "black",
                    map_signif_level=c("***"=0.001,"**"=0.01, "*"=0.05, " "=2))+
        xlab("") + ylab("")+
        scale_color_d3(a=0.5)
      p3 <- ggplot(p$data, aes(x = group, y=x, colour = group)) +
        geom_boxplot() +
        theme(panel.background = element_rect(fill = "white",colour = "white"),
              panel.grid = element_blank(),
              axis.text.x = element_blank(),
              axis.ticks=element_blank(),
              legend.position="none") +
        xlab("") + ylab("") +
        coord_flip() + 
        geom_signif(comparisons = list(c("control","experience")),
                    test= wilcox.test,
                    position = "identity",
                    tip_length = 0,
                    size=0.9,color = "black",
                    map_signif_level=c("***"=0.001,"**"=0.01, "*"=0.05, " "=2))+
        scale_color_d3(a=0.5)
      
      
      p4 <- ggarrange(p3, NULL, p, p2, widths = c(4,1), heights = c(1,3), align = "hv")+theme(plot.background=element_rect(fill = "white"))
      return(p4)
      
    }
    
    #pcoa= my_pcoa(otu = otu,meta = meta ,title = "BGM")
    
    
    
    ###############棒棒糖图##################
    library(xlsx)
    data = read.xlsx("D:\\metagenomics\\upstream\\function\\GBM\\table_result.xlsx",sheetName = "sheet0")
    
    
    
    mydotchart <- function(data=data)
    {
      library(ggplot2)
      library(ggpubr)
      
      data = data[,c("anno","P-value","effect_size")]#"anno"        "P.value"     "effect_size"
      data = data[which(data$`P-value` < 0.05),]#挑选显著性
      data = data[order(data$`P-value`),]
      if(length(data$`P-value`) > 20)
      {
        data = data[1:20,]
      }
      p = ggdotchart(data, x = "anno", y = "effect_size",
                     add = "segments",
                     color = "P-value",
                     sorting = "descending",
                     add.params = list(color = "lightgray", size = 2),#改变线的粗细和颜色
                     dot.size = 9,
                     label = round(data$effect_size,1), 
                     font.label = list(color = "white", size = 9,
                                       vjust = 0.5),
                     
                     
                     
      ) + geom_hline(yintercept = 0, linetype = 2, color = "lightgray")+
        theme(legend.position = "right",
              axis.text.x = element_text(angle = 40,vjust = 1,hjust = 1,size = 5)
        )+
        labs(x="",y="effect size")+ scale_color_continuous(low="#FF7F0EFF",high="#1F77B4FF")
      
      return(p)
    }
    #data = read.xlsx("table_result.xlsx",sheetName = "sheet0")
    p = mydotchart(data = data)
    p
    
    ##################差异分析####################
    
    my_diff <- function(otu = otu,meta = meta,anno = F,anno_file=anno_file)
    {
      options(scipen = 200)
      otu = t(otu)
      otu = otu[row.names(meta),]
      groups = meta$Group
      df = data.frame(cbind(otu,groups))
      
      name = anno_file
      table_result = data.frame()#创建储存结果的输出数据框
      q.value=list()#储存p-value计算q-value
      #计算实验组和对照组的个数
      control.num = as.numeric(table(df$groups)["control"])
      experience.num = as.numeric(table(df$groups)["experience"])
      #
      for (i in 1:(ncol(df)-1))
      {
        if (anno == F)
        {
          
        }else
        {
          table_result[colnames(df)[i],"anno"] = name[colnames(df)[i],"anno"]
        }
        
        table_result[colnames(df)[i],"P-value"] = wilcox.test(as.numeric(df[which(df$groups == "control"),i]) ,as.numeric(df[which(df$groups== "experience"),i]) )$p.value
        pvalue = table_result[colnames(df)[i],"P-value"]
        q.value = append(q.value, pvalue)
        diffusion = data.frame(
          number = c(as.numeric(df[which(df$groups== "control"),i]),as.numeric(df[which(df$groups == "experience"),i])),
          Group = factor(c(groups[which(groups=="control")],groups[which(groups=="experience")]))
          #Group  =  factor(rep(c()))
        )
        effect.z = coin::wilcox_test(number~Group,diffusion)@statistic@standardizedlinearstatistic
        effect_size = effect.z/sqrt(nrow(df))
        table_result[colnames(df)[i],"effect_size"] = effect_size
        mean.abun = tapply(as.numeric(df[,i]) , df$groups , mean)
        #print(mean.abun[1])
        table_result[colnames(df)[i],"mean_abun_control"] = mean.abun[1]
        table_result[colnames(df)[i],"mean_abun_experience"] = mean.abun[2]
        #####
        rk = rank(as.numeric(df[,i]))
        mean.rank = tapply(rk,df$groups,mean)
        #print(mean.rank[1])
        table_result[colnames(df)[i],"mean_rank_control"] = mean.rank[1]
        table_result[colnames(df)[i],"mean_rank_experience"] = mean.rank[2]
        mean.occ = tapply(df[,i]>0, df$groups, mean)
        table_result[colnames(df)[i],"mean_occ_control"] = mean.occ[1]
        table_result[colnames(df)[i],"mean_occ_experience"] = mean.occ[2]
        if (mean.abun[1] > mean.abun[2])
        {
          table_result[colnames(df)[i],"enrichment"] = "control > experience"
        }
        if(mean.abun[1] < mean.abun[2])
        {
          table_result[colnames(df)[i],"enrichment"] = "control < experience"
        }
        table_result[colnames(df)[i],"mean_abunt"] =  (mean.abun[1]*control.num+mean.abun[2]*experience.num)/(control.num+experience.num)
        table_result[colnames(df)[i],"mean_occ"] = (mean.occ[1]*control.num+mean.occ[2]*experience.num)/(control.num+experience.num)
      }
      table_result$Q_value = p.adjust(table_result$`P-value`,method = "BH")
      
      table_result = table_result[order(table_result$`P-value`),]
      return(table_result)
    }
    
    #测试GBM
    otu = read.table("./out_GBM.txt",header = T,row.names = 1,sep = "\t")
    meta = read.table("./metadata.txt",header = T,row.names = 1,sep = "\t")
    name = read.table("./GBM.anno.txt",header = T,row.names = 1,sep = "\t")
    
    result = my_diff(meta = meta,otu = otu,anno = T,anno_file = name)
    #测试GMM
    otu = read.table("./out_GMM.txt",header = T,row.names = 1,sep = "\t")
    meta = read.table("./metadata.txt",header = T,row.names = 1,sep = "\t")
    name = read.table("./GMM.anno.txt",header = T,row.names = 1,sep = "\t")
    if (!file.exists("./function/GMM"))
    {
      dir.create("./function/GMM",recursive = TRUE)
    }
    result = my_diff(meta = meta,otu = otu,anno = T,anno_file = name)
    write.xlsx(result, "./table_result.xlsx",sheetName = "sheet0")
    data = read.xlsx("table_result.xlsx",sheetName = "sheet0")
    dotchar = mydotchart(data = data)
    pca = my_pca(meta = meta,otu = otu ,title = "GMM")
    pcoa = my_pcoa(meta = meta,otu = otu ,title = "GMM")
    
    
    
    permanova = adonis2(t(otu)~Group,data = meta,permutations = 9999)
    
    
    ###Pathway 富集条形图
    
    otu = read.table("./out_Pathway.txt",header = T,row.names = 1,sep = "\t")
    anno = read.table("./79_Pathway_ano.txt",header = T,row.names = 1,sep = "\t",quote = "")
    meta = read.table("./metadata.txt",header = T,row.names = 1,sep = "\t")
    
    data = data.frame()
    
    
    for (i in 1:length(rownames(anno)))
    {
      #print(anno[i,"anno"])
      unlist(strsplit(anno[1,"anno"],split = ";"))[1]
      data[i,"1"]=unlist(strsplit(anno[i,"anno"],split = ";"))[1]
      data[i,"2"]=unlist(strsplit(anno[i,"anno"],split = ";"))[2]
      data[i,"3"]=unlist(strsplit(anno[i,"anno"],split = ";"))[3]
      rownames(data)[i] = rownames(anno)[i]
      #print(rownames(anno[i,])) 
    }
    
    anno = data
    #顺序
    otu = t(otu)
    otu = otu[rownames(meta),]
    otu_meta = cbind(otu,Group = meta$Group)
    otu_meta = as.data.frame(otu_meta)
    #秩和检验
    #wilcox.test(bmi ~ Group, data = otu_meta, var.equal = TRUE)$p.value
    
    #改变value的类型
    for (i in 1:length(colnames(otu_meta))-1)
    {
      #print(colnames(otu_meta)[i])
      #print(otu_meta[,colnames(otu_meta)[i]])
      otu_meta[,colnames(otu_meta)[i]] = as.numeric(otu_meta[,colnames(otu_meta)[i]])
      #print(wilcox.test(map05223 ~ Group, data = otu_meta, var.equal = TRUE)$p.value)
      # wilcox.test(colnames(otu_meta)[i] ~ Group, data = otu_meta, var.equal = TRUE)$p.value
    }
    #将有显著差异的通路挑选出来
    select_list = data.frame()
    count = 1
    for (i in 1:(length(colnames(otu_meta))-1))
    {
      pv=wilcox.test(otu_meta[which(otu_meta$Group == "control"),colnames(otu_meta)[i]],otu_meta[which(otu_meta$Group == "experience"),colnames(otu_meta)[i]])$p.value
      #print(pv)
      if(!is.nan(pv))
      {
        print(pv)
        if(pv < 0.05)
        {
          select_list[count,"pathway"] = colnames(otu_meta)[i]
          select_list[count,"P.value"] = pv
          count = count +1
          #select_list = append(select_list,colnames(otu_meta)[i])
        }
      }
      
    }
    #将显著差异的通路的otu表格提取
    for (i in 1:length(row.names(select_list)))
    {
      #print(select_list[i,"pathway"])
      if (select_list[i,"pathway"] %in% rownames(anno))
      {
        select_list[i,"label1"] = anno[select_list[i,"pathway"],"1"]
        select_list[i,"label2"] = anno[select_list[i,"pathway"],"3"]
      }
    }
    select_list=select_list[which(select_list$label1 != "Microbial metabolism in diverse environments"),]
    #绘制条形图
    
    
    
    select_list$P.value = -log10(select_list$P.value)
    
    select_list = select_list[order(select_list$label1,-select_list$P.value,decreasing = T),]
    select_list$label2 = factor(select_list$label2,levels = select_list$label2)
    
    
    p = ggplot(select_list, aes(x=label2, y=P.value,fill =  label1)) + 
      geom_bar(stat = "identity", width=0.7,size=2,position=position_dodge(2))+
      labs(y="-log10(P value)",title = "Pathway",x="")
    p = p+theme(axis.text.x = element_text(angle = 90),legend.title = element_blank(),plot.title = element_text(hjust = 0))+coord_flip()+scale_fill_d3(a= 0.5)+theme_bw()
    
    p = p+guides(fill=guide_legend(title=NULL))
    p
    if (!file.exists("./function/pathway"))
    {
      dir.create("./function/pathway",recursive = TRUE)
    }
    ggsave("./function/pathway/pathway.pdf", p,  width=280, height=700, units="mm")
    
    
    
    
    
    ###########################permanova
    otu <- read.table("merged_abundance_table_species.txt",header = T,sep = "\t",row.names = 1)
    meta = read.table("metadata.txt",sep = "\t",header = T,row.names = 1)
    otu = otu[,rownames(meta)]
    otu = t(otu)
    
    otu.adonis = adonis2(otu ~ Group+ages+bmi+education,data = meta,permutations = 9999,by="margin")
    
    
    
    #####################boxplot-module
    function_boxplot <- function(otu=otu,meta=meta,save_path=save_path)
    {
      otu = t(otu)
      otu = otu[rownames(meta),]
      otu_meta = cbind(otu,Group = meta$Group)
      otu_meta = as.data.frame(otu_meta)
      #numeric数据
      for (i in 1:length(colnames(otu_meta))-1)
      {
        
        otu_meta[,colnames(otu_meta)[i]] = as.numeric(otu_meta[,colnames(otu_meta)[i]])
        
      }
      #挑选p-value小于0.05的module
      select_list=data.frame()
      count=1
      for (i in 1:(length(colnames(otu_meta))-1))
      {
        pv = wilcox.test(otu_meta[which(otu_meta$Group == "control"),colnames(otu_meta)[i]],otu_meta[which(otu_meta$Group == "experience"),colnames(otu_meta)[i]])$p.value
        if(!is.nan(pv))
        {
          print(pv)
          if(pv < 0.05)
          {
            select_list[count,"pathway"] = colnames(otu_meta)[i]
            select_list[count,"P.value"] = pv
            count = count +1
            #select_list = append(select_list,colnames(otu_meta)[i])
          }
        }
      }
      for (i in 1:length(select_list$pathway))
      {
        #print(select_list$pathway[i])
        tmp = data.frame()
        tmp =as.data.frame(cbind(pathway = otu_meta[,select_list$pathway[i]],Group = otu_meta[,"Group"]))  
        tmp$pathway =as.numeric(tmp$pathway)
        tmp$Name= select_list$pathway[i]
        p = ggplot(data = tmp,aes(x=Group,y=pathway,color=Group))+
          geom_boxplot(alpha=1,size=1.2,width=0.5)+theme_bw()
        p=p + geom_jitter(width = 0.2)+
          geom_signif(comparisons = list(c("control","experience")),
                      test= wilcox.test,
                      #step_increase = 10,
                      #y_position = 5,
                      position = "identity",
                      tip_length = 0,
                      size=0.9,color = "black",
                      map_signif_level = T)+scale_color_d3(a=0.5)+
          geom_signif(comparisons = list(c("control","experience")),
                      test= wilcox.test,
                      #step_increase = 10,
                      #y_position = 5,
                      position = "identity",
                      tip_length = 0,
                      vjust=2,
                      size=0.9,color = "black",
                      map_signif_level = F)+scale_color_d3(a=0.5)+
          
          facet_grid(. ~Name ) + theme(legend.position = "bo",#设置分类标签位置
                                       #y轴刻度线去掉
                                       axis.ticks.y= element_blank(),
                                       #y轴字去掉
                                       axis.text.y = element_blank(),
                                       #设置图例字体样式和大小
                                       text = element_text(family = "sans",size = 10),
                                       #设置顶部分面背景
                                       strip.background = element_rect(fill="grey"),
                                       strip.switch.pad.grid =unit(0.2, "cm"),
                                       strip.text = element_text(size = 25, colour="black",#face="italic",
                                                                 margin = margin(0.5,0,0.5,0, "cm")))+
          labs(x="",y="")
        #pictures = append(pictures,p)
        ####
        if (!file.exists(save_path))
        {
          dir.create(save_path,recursive = TRUE)
        }
        #修改保存的路径部分
        ggsave(paste( save_path,".png",sep = select_list$pathway[i]), p,  width=180, height=150, units="mm")
      }
    }
    
    otu = read.table("./out_GBM.txt",header = T,row.names = 1,sep = "\t")
    meta = read.table("./metadata.txt",header = T,row.names = 1,sep = "\t")
    #name = read.table("./GBM.anno.txt",header = T,row.names = 1,sep = "\t")
    
    
    
    
    #绘制boxplot
    #pictures = list()
    
    p
    otu_meta2 = otu_meta[,which()]
    pictures[1]
    
    ############################################
    
    
    
    ####################
    
    
    
    otu = read.table("./out_Pathway.txt",header = T,row.names = 1,sep = "\t")
    anno = read.table("./79_Pathway_ano.txt",header = T,row.names = 1,sep = "\t",quote = "")
    meta = read.table("./metadata.txt",header = T,row.names = 1,sep = "\t")
    
    data = data.frame()
    
    
    for (i in 1:length(rownames(anno)))
    {
      #print(anno[i,"anno"])
      unlist(strsplit(anno[1,"anno"],split = ";"))[1]
      data[i,"1"]=unlist(strsplit(anno[i,"anno"],split = ";"))[1]
      data[i,"2"]=unlist(strsplit(anno[i,"anno"],split = ";"))[2]
      data[i,"3"]=unlist(strsplit(anno[i,"anno"],split = ";"))[3]
      rownames(data)[i] = rownames(anno)[i]
      #print(rownames(anno[i,])) 
    }
    #找出属于代谢途径的pathway
    select_name =rownames(data)[which(data$`1`=="Metabolism")]   
    #找出相应的otu
    otu_metabolism = which(rownames(otu) %in% select_name)
    otu2= otu[otu_metabolism,]
    #转置后拼接分组信息
    otu2 = t(otu2)
    otu2 = otu2[row.names(meta),]
    otu2 =as.data.frame(cbind(otu2 ,Group = meta$Group)) 
    otu =otu2
    
    
    for (i in 1:(length(colnames(otu))-1))
    {
      #print(colnames(otu)[i])
      colnames(otu)[i] = data[colnames(otu)[i],3]
    }
    #
    
    
    library(simplevis)
    library(ggplot2)
    library(tidyverse)
    library("ggsci")
    library(ggsignif)
    
    
    data.long <- pivot_longer(otu, cols = 1:length(colnames(otu))-1, names_to ="index", values_to = "num")
    data.long$Group <- as.factor(data.long$Group)
    data.long$index <- factor(data.long$index,levels =rev(colnames(otu)) )
    data.long <- data.long[!is.infinite(data.long$num),]
    data.long$num = as.numeric(data.long$num)
    data.long = as.data.frame(data.long)
    data.long[i,"index"]=as.character(data.long[i,"index"])
    data.long$num = log10(data.long$num)
    ##############改index名字
    
    
    
    #str(data.long)
    p = ggplot(data.long, aes(x=index, y=num,fill=Group))+theme_bw()+
      geom_boxplot(alpha=1,size=0.5,width=0.5,outlier.color = "gray",position=position_dodge(width=0.8),outlier.size = 0.5)+#调整control和experience之间的距离
      theme(axis.text.x = element_text(angle = 90),
            plot.title = element_text(hjust = 0.5),#强行标题居中
            #axis.text.y = element_text(angle = 90),
            axis.title.x=element_text(vjust=2, size=10,face = "bold"),
            axis.title.y =element_text(vjust=2, size=10,face = "bold"),
            legend.title  = element_text(hjust = 0.5, size = 10,face = "bold"))+coord_flip()+
      scale_fill_aaas(a=0.55)+scale_color_aaas(a=0.55)+labs(y="Relative Abundance",x=NULL,title = "Metabolism",hjust=0.5)+
      stat_compare_means(aes(group=Group),
                         method = "wilcox.test",
                         label="p.signif",
                         hide.ns = T,
                         vjust = 0.8,
                         position = "identity",
                         map_signif_level=c("***"=0.001,"**"=0.01, "*"=0.05, " "=2))
    # geom_signif(comparisons = list(c("control","experience")),
    #             test= wilcox.test,
    
    #      position = "identity",
    #     tip_length = 0,
    #    size=0.9,color = "black",
    #    map_signif_level = T)
    #p
    
    ggsave("test.pdf", p,  width=280, height=700, units="mm")
    
    
    
    
    #------------------------------------------
    #humann3
    library(xlsx)
    #1.diff
    otu = read.table("out_path.function.txt",sep = "\t",header = T,row.names = 1)
    otu = otu[3:length(rownames(otu)),]
    meta = read.table("metadata.txt",sep = "\t",header = T,row.names = 1)
    
    table_result=my_diff(otu=otu,meta = meta,anno = F,anno_file = F)
    if (!file.exists("./function/humann"))
    {
      dir.create("./function/humann",recursive = TRUE)
    }
    write.xlsx(table_result,sheetName = "sheet0",file = "./function/humann/diff.xlsx")
    #func = func[rownames(meta),]
    #2.PCA
    p = my_pca(otu = otu,meta = meta,title = "path.function")
    if (!file.exists("./function/humann"))
    {
      dir.create("./function/humann",recursive = TRUE)
    }
    ggsave("./function/humann/PCA of humann.png",p,  width=180, height=150, units="mm")
    #3.PCoa
    p = my_pcoa(otu = otu,meta = meta,title = "path.function")
    if (!file.exists("./function/humann"))
    {
      dir.create("./function/humann",recursive = TRUE)
    }
    ggsave("./function/humann/PCoA of humann.png",p,  width=180, height=150, units="mm")
    #GBM
    otu = read.table("out_GBM.txt",sep = "\t",header = T,row.names = 1)
    meta = read.table("metadata.txt",sep = "\t",header = T,row.names = 1)
    anno = read.table("GBM.anno.txt",sep = "\t",header = T,row.names = 1)
    #func = func[rownames(meta),]
    table_result=my_diff(otu=otu,meta = meta,anno = T,anno_file = anno)
    if (!file.exists("./function/GBM"))
    {
      dir.create("./function/GBM",recursive = TRUE)
    }
    write.xlsx(table_result,sheetName = "sheet0",file = "./function/GBM/diff.xlsx")
    #2.PCA
    p = my_pca(otu = otu,meta = meta,title = "path.function")
    ggsave("./function/GBM/PCA of GBM.png",p,  width=180, height=150, units="mm")
    #3.PCoa
    p = my_pcoa(otu = otu,meta = meta,title = "path.function")
    ggsave("./function/GBM/PCoA of GBM.png",p,  width=180, height=150, units="mm")
    #4.bangbangtang
    #data = table_result[,2:length(colnames(table_result))]
    p = mydotchart(data = table_result)
    ggsave("./function/GBM/dotchart of GBM.png",p,  width=180, height=150, units="mm")
    #5箱线图
    function_boxplot(meta = meta,otu = otu,save_path = "./function/GBM/")
    
    
    #GMM
    otu = read.table("out_GMM.txt",sep = "\t",header = T,row.names = 1)
    meta = read.table("metadata.txt",sep = "\t",header = T,row.names = 1)
    anno = read.table("GMM.anno.txt",sep = "\t",header = T,row.names = 1)
    #diff
    table_result=my_diff(otu=otu,meta = meta,anno = T,anno_file = anno)
    if (!file.exists("./function/GMM"))
    {
      dir.create("./function/GMM",recursive = TRUE)
    }
    write.xlsx(table_result,sheetName = "sheet0",file = "./function/GMM/diff.xlsx")
    #2.PCA
    p = my_pca(otu = otu,meta = meta,title = "path.function")
    ggsave("./function/GMM/PCA of GMM.png",p,  width=180, height=150, units="mm")
    #3.PCoa
    p = my_pcoa(otu = otu,meta = meta,title = "path.function")
    ggsave("./function/GMM/PCoA of GMM.png",p,  width=180, height=150, units="mm")
    #4.bangbangtang
    #data = table_result[,2:length(colnames(table_result))]
    p = mydotchart(data = table_result)
    ggsave("./function/GMM/dotchart of GMM.png",p,  width=180, height=150, units="mm")
    #5箱线图
    function_boxplot(meta = meta,otu = otu,save_path = "./function/GMM/")
    
    #KO
    
    otu = read.table("out_KO.txt",sep = "\t",header = T,row.names = 1)
    meta = read.table("metadata.txt",sep = "\t",header = T,row.names = 1)
    anno = read.table("KO.ann.txt",sep = "\t",header = T,row.names = 1)
    #diff
    table_result=my_diff(otu=otu,meta = meta,anno = T,anno_file = anno)
    if (!file.exists("./function/KO"))
    {
      dir.create("./function/KO",recursive = TRUE)
    }
    write.xlsx(table_result,sheetName = "sheet0",file = "./function/KO/diff.xlsx")
    #2.PCA
    p = my_pca(otu = otu,meta = meta,title = "KO")
    ggsave("./function/KO/PCA of KO.png",p,  width=180, height=150, units="mm")
    #3.PCoa
    p = my_pcoa(otu = otu,meta = meta,title = "KO")
    ggsave("./function/KO/PCoA of KO.png",p,  width=180, height=150, units="mm")
    #4.bangbangtang
    #data = table_result[,2:length(colnames(table_result))]
    p = mydotchart(data = table_result)
    ggsave("./function/KO/dotchart of KO.png",p,  width=180, height=150, units="mm")
    #5箱线图
    function_boxplot(meta = meta,otu = otu,save_path = "./function/KO/")
    
    #module
    
    otu = read.table("out_Module.txt",sep = "\t",header = T,row.names = 1)
    meta = read.table("metadata.txt",sep = "\t",header = T,row.names = 1)
    anno = read.table("79_Module_ano.txt",sep = "\t",header = T,row.names = 1)
    #diff
    table_result=my_diff(otu=otu,meta = meta,anno = T,anno_file = anno)
    if (!file.exists("./function/Module"))
    {
      dir.create("./function/Module",recursive = TRUE)
    }
    write.xlsx(table_result,sheetName = "sheet0",file = "./function/Module/diff.xlsx")
    #2.PCA
    p = my_pca(otu = otu,meta = meta,title = "Module")
    ggsave("./function/Module/PCA of Module.png",p,  width=180, height=150, units="mm")
    #3.PCoa
    p = my_pcoa(otu = otu,meta = meta,title = "Module")
    ggsave("./function/Module/PCoA of Module.png",p,  width=180, height=150, units="mm")
    #4.bangbangtang
    #data = table_result[,2:length(colnames(table_result))]
    p = mydotchart(data = table_result)
    ggsave("./function/Module/dotchart of Module.png",p,  width=180, height=150, units="mm")
    #5箱线图
    function_boxplot(meta = meta,otu = otu,save_path = "./function/Module/")
    
    
    
    ###########################################6pathwaysPCA
    otu = read.table("./out_Pathway.txt",header = T,row.names = 1,sep = "\t")
    anno = read.table("./79_Pathway_ano.txt",header = T,row.names = 1,sep = "\t",quote = "")
    meta = read.table("./metadata.txt",header = T,row.names = 1,sep = "\t")
    
    data = data.frame()
    
    
    for (i in 1:length(rownames(anno)))
    {
      #print(anno[i,"anno"])
      unlist(strsplit(anno[1,"anno"],split = ";"))[1]
      data[i,"1"]=unlist(strsplit(anno[i,"anno"],split = ";"))[1]
      data[i,"2"]=unlist(strsplit(anno[i,"anno"],split = ";"))[2]
      data[i,"3"]=unlist(strsplit(anno[i,"anno"],split = ";"))[3]
      rownames(data)[i] = rownames(anno)[i]
      #print(rownames(anno[i,])) 
    }
    
    data = data[which(data$`1`=="Cellular Processes"),]
    path_list = rownames(data)
    otu = otu[path_list,]
    p= my_pca(otu=otu,meta = meta ,title = "Cellular Processes")
    ###########################
    
    
    
    otu = read.table("./out_ARGcategory.txt",header = T,row.names = 1,sep = "\t")
    
    dominant_species <- function(n=n,otutab = otutab,metadata=metadata,tax= tax)
    {
      library(tidyverse)
      library("ggsci")
      library("ggplot2")
      library("RColorBrewer")
      getpalette = brewer.pal(12,"Set3")
      otu = otutab
      map = metadata
      #将实验组和对照组的样本分开
      experience = list()
      control = list()
      for (i in row.names(map))
      {
        # print(i)
        if (map[i,"Group"] == "experience")
        {
          experience = append(experience,i)  
        }
        if (map[i,"Group"] == "control")
        {
          control = append(control,i)  
        }
        
      }
      #将otu列表区分为实验组otu和对照组otu
      exp.otu = otu[which(colnames(otu) %in% experience)]
      ctr.otu = otu[which(colnames(otu) %in% control)]
      #对实验组和对照组中不同物种的数量进行求和
      exp.otu.sum = rowSums(exp.otu)
      ctr.otu.sum = rowSums(ctr.otu)
      #将实验组与对照组求和数据进行合并
      df = as.data.frame(cbind(exp.otu.sum,ctr.otu.sum))
      colnames(df) = c("experience","control")
      #计算不同物种在对照组以及实验组之间的平均值并根据平均值排序
      df$average = (df$experience+df$control)/2
      df = df[order(df$average,decreasing = T),]
      #找出top n 的物种,其余物种并入other并计算other的总和
      df.top10 = df[1:n-1,]
      df.res = df[n:length(row.names(df)),]
      other = colSums(df.res)
      #将other加入top n数据中并更改名字
      a = rbind(df.top10,other)
      rownames(a)[length(rownames(a))]="Other"
      a$Phylum = row.names(a)
      a = a[order(a$average,decreasing = T),]
      #将物种数量转化为相对丰富
      df = select(a,-average)
      df$experience = df$experience/sum(df$experience)
      df$control = df$control/sum(df$control)
      #设置线段内容
      link_dat <- df %>% 
        arrange(by=experience) %>% 
        mutate(experience=cumsum(experience), control=cumsum(control)) 
      #
      df.long <- reshape2::melt(df, value.name='abundance', variable.name='group')
      #绘图
      df.long$Phylum = factor(df.long$Phylum,levels = rev(link_dat$Phylum))
      #a = df.long$Phylum
      p = ggplot(df.long, aes(x=group, y=abundance, fill=Phylum)) + 
        geom_bar(stat = "identity", width=0.5, col='black',size=2)  + 
        geom_segment(data=link_dat, aes(x=1.25, xend=1.75, y=experience, yend=control),size=2)+
        theme_bw()  + 
        #scale_color_aaas(a=0.10)+scale_fill_aaas(a=0.10)+
        ylab("Relative abundance(%)")+
        
        theme(axis.title.x=element_text(vjust=2, size=20,face = "bold"),
              axis.title.y =element_text(vjust=2, size=20,face = "bold"),
              legend.title  = element_text(size = 20,face = "bold")
        )
      name = paste("Top ",n,sep = "")
      name = paste(name,tax,sep = " abundant ")
      p = p+scale_fill_manual(values = getpalette)+labs(fill=name)
      
      return(p)
    }
    otu =read.table("merged_abundance_table_class.txt", row.names= 1, header=T, sep="\t",  comment.char="", stringsAsFactors = F)
    map=read.table("metadata.txt", row.names = 1, header=T, sep="\t",  comment.char="", stringsAsFactors = F)
    
    p= dominant_species(n=10,metadata = map,otutab = otu,tax = "class")
    
    p
    
    #######抗性基因
    
    otu =read.table("out_KO.txt", row.names= 1, header=T, sep="\t",  comment.char="", stringsAsFactors = F)
    meta=read.table("metadata.txt", row.names = 1, header=T, sep="\t",  comment.char="", stringsAsFactors = F)
    
    anno = read.table("KO.ann.txt", row.names= 1, header=T, sep="\t",  comment.char="", stringsAsFactors = F,quote = "")
    meta[which(meta$Group==1),"Group"] = "control"
    meta[which(meta$Group==2),"Group"] = "experience"
    p = my_pca(otu = otu ,meta = meta,title = "ARGcategory")
    p = my_pcoa(otu = otu ,meta = meta,title = "ARGcategory")
    
    
    result_table = my_diff(otu = otu,meta = meta,anno = T,anno_file = anno)
    
    
    

    相关文章

      网友评论

        本文标题:宏基因组下游分析

        本文链接:https://www.haomeiwen.com/subject/zlfehdtx.html