宏基因组下游分析

作者: 皮尔洛_dys | 来源:发表于2023-01-27 08:44 被阅读0次
WGS全基因组分析 || SNP过滤
[13] 9 下游分析 & 9.1 基因组 contex
肠道菌群：宏转录组测序分析流程解读
58.《Bioinformatics Data Skills》之
宏蛋白质组研究概述
插件 | 蛋白序列集合功能注释快速完成 - Quick Prot
重测序分析（14）全基因组关联分析GWAS介绍
生信log13|原核基因组分析流程一：常用在线工具使用总结及评价
组装简介
24nt siRNA与甲基化关联分析
###################part1-species_analysis###################################
setwd("D:/metagenomics/upstream")
library("vegan")
library(ggsignif)
library("xlsx")
#样本显著性检
diversity <- read.table( "metadata.txt", header=T, row.names=1, sep="\t", comment.char="", stringsAsFactors = F) #View(diversity)
diversity 
mytable <- xtabs(~gender+Group,data=diversity);mytable


ages.p <- wilcox.test(ages ~ Group, data = diversity, var.equal = TRUE)$p.value
gender.p <- fisher.test(xtabs(~gender+Group,data=diversity))$p.value 
bmi.p <- wilcox.test(bmi ~ Group, data = diversity, var.equal = TRUE)$p.value
education.p <- fisher.test(xtabs(~education+Group,data=diversity),simulate.p.value=TRUE)$p.value 

rowname=c("ages","gender","bmi","education")
div=data.frame(c(ages.p,gender.p,bmi.p,education.p),row.names = rowname)
colnames(div)="P value"


if (!file.exists("./species_analysis/phenotype_diff"))
{
  dir.create("./species_analysis/phenotype_diff",recursive = TRUE)
}
write.xlsx(div,file="./species_analysis/phenotype_diff/meta_diff.xlsx")

#置换多元方差分析
otu <- read.table("merged_abundance_table_species.txt",header = T,sep = "\t",row.names = 1)
meta = read.table("metadata.txt",sep = "\t",header = T,row.names = 1)
otu = otu[,rownames(meta)]
otu = t(otu)

otu.adonis = adonis2(otu ~ Group+ages+bmi+education+gender,data = meta,permutations = 9999,by="margin")

write.xlsx(otu.adonis,file="./species_analysis/phenotype_diff/permanova_diff.xlsx")
#alpha多样性
library("vegan")
library("ggsignif")
library("xlsx")
library("ggplot2")
library("ggsci")
#清空变量
rm(list = ls())
#将当前工作目录下文件名为.txt的文件找
file=list.files(getwd(),pattern = "merged_abundance_table_.*[^1].txt")
#使用循环嵌套分别对class,family,order,phylum,genus和species进行alpha多样性分析(shanno,simpson,invsimpson)
for (i in file)
{
  #读取otu列表
  name=sub(".txt","",i)###将文件名.txt前面的名字提出来，也为了后面批量生成文件名准
  otu <- read.table(i,header = T,sep = "\t")
  row.names(otu) <- otu$clade_name
  otu <- subset(otu, select = -clade_name )
  otu<-t(otu)
  #读取样本分组信息
  metadata <- read.table("metadata.txt",header = T,sep = "\t")
  metadata <- metadata[,c("ID","Group")]
  #将otu列表中的样本的排列顺序与metadata中样本的顺序保持一�?
  otu=otu[metadata$ID,]
  #分别计算shannon,simpson,invsimpson多样�?
  shannon <- diversity(otu ,"shannon")
  simpson <- diversity(otu,"simpson")
  invsimpson <- diversity(otu,"invsimpson")
  #将三种alpha多样性写入excel(append=T,设为追加模式,防止之后的循环覆盖之前的写入文件,同时将不同分类水平的alpha多样性写入不同的sheet)
  alpha_div = cbind(shannon,simpson,invsimpson)
  if (!file.exists("./species_analysis/alpha"))
  {
    dir.create("./species_analysis/alpha",recursive = TRUE)
  }
  write.xlsx(alpha_div, file="./species_analysis/alpha/alpha_div.xlsx", sheetName=sub("merged_abundance_table_","",name), append=TRUE)
  
  #将计算得到的alpha多样性结果与分组信息合并(!!!若未对metadata与otu列表的样本顺序统一处理会造成分析结果错误)
  Group <- metadata$Group
  shannon=data.frame(shannon,Group)
  simpson=data.frame(simpson,Group)
  invsimpson=data.frame(invsimpson,Group)
  #分别对三种多样性绘制Boxplot
  for (j in list(shannon,simpson,invsimpson))
  {
    p = ggplot(j, aes(x=Group, y=j[,1], color=Group))+
      # 添加数据、xy值�? 颜色参数给画图函数ggplot
      geom_boxplot(alpha=1,size=1.2,width=0.5)+
      # 盒图
      labs(title="Alpha diversity", x="Group",y = paste(str(j)," index"))+
      # 标题
      theme(plot.title=element_text(hjust=0.5),legend.position="top",panel.background = element_rect(fill = "transparent",colour = NA),axis.line = element_line(colour = "black"))+
      geom_jitter(width = 0.2)+
      geom_signif(comparisons = list(c("control","experience")),
                  test= wilcox.test,
                  #step_increase = 10,
                  #y_position = 5,
                  position = "identity",
                  tip_length = 0,
                  size=0.9,color = "black",
                  map_signif_level = T)+
      ylab(paste0(colnames(j)[1]," Index In ",sub("merged_abundance_table_","",name)," Level"))+
      #scale_fill_manual(values=c(experience = "red", control = "blue"))
      scale_color_aaas(a=0.55)+scale_fill_aaas(a=0.55)
    pdf_name=""
    pdf_name=paste(pdf_name,name,sep = "")
    pdf_name=paste(pdf_name,colnames(j)[1],sep = "_")
    pdf_name=paste(pdf_name,".png",sep = "")
    pdf_name = sub("merged_abundance_table_","",pdf_name)
    pdf_name = paste("Alpha plot of ",pdf_name,sep="")
    ggsave(p,file =paste("./species_analysis/alpha/ ",pdf_name,sep = "") ,width=5,height=6)##ggsave批量生成name变量为名字的pdf文件
  }
  
  
}

#置换多元方差分析（不同分类水平）

file=list.files(getwd(),pattern = "merged_abundance_table_.*[^1].txt")
for (i in file)
{
  name = sub("merged_abundance_table_","",i)
  name = sub(".txt","",name)
  name = paste(name,".xlsx",sep = "_permanova")
  print(name)
  otu <- read.table(i,header = T,sep = "\t",row.names = 1)
  meta = read.table("metadata.txt",sep = "\t",header = T,row.names = 1)
  otu = otu[,rownames(meta)]
  otu = t(otu)
  
  otu.adonis = adonis2(otu ~ Group,data = meta,permutations = 9999,by="margin")
  if (!file.exists("./species_analysis/permanova"))
  {
    dir.create("./species_analysis/permanova",recursive = TRUE)
  }
  write.xlsx(otu.adonis,file=paste("./species_analysis/permanova/",name,sep = ""))
  
}
#beta多样性分析
#PCA
library(ggsci)
library("ggpubr")
library("factoextra")
library("vegan")
library("ggtext")
library(ggsignif)
rm(list = ls())
file=list.files(getwd(),pattern = "merged_abundance_table_.*[^1].txt")

for (i in file)
{
  name=sub(".txt","",i)
  tax = sub("merged_abundance_table_","",name)
  otu =read.table(i, row.names= 1, header=T, sep="\t",  comment.char="", stringsAsFactors = F)
  meta=read.table("metadata.txt", row.names = 1, header=T, sep="\t",  comment.char="", stringsAsFactors = F)
  otu = otu[,row.names(meta)]
  
  tmpotu= otu
  tmpmeta = meta
  tmpotu = data.frame(t(tmpotu))
  group = meta[,"Group"]
  group = data.frame(group)
  
  result = adonis2(tmpotu~group,group,distance="bray",permutations = 10000)
  
  otu.pca <- prcomp(t(otu), scale. =F)
  p <- fviz_eig(otu.pca, addlabels = TRUE)
  p = p+theme(panel.background = element_rect(fill = "white"),plot.background=element_rect(fill = "white"))
  p$labels$title=paste("Scree plot of ",tax,sep = "")
  if (!file.exists("./species_analysis/beta/PCA"))
  {
    dir.create("./species_analysis/beta/PCA",recursive = TRUE)
  }
  
  ggsave(paste("./species_analysis/beta/PCA/scree plot of ",".pdf",sep = tax), p,  width=180, height=150, units="mm")
  ggsave(paste("./species_analysis/beta/PCA/scree plot of ",".png",sep = tax), p,  width=180, height=150, units="mm")
  p1 <- fviz_pca_ind(otu.pca,
                     geom.ind = "point", # show points only ( not "text")
                     col.ind = meta$Group, # color by groups
                     addEllipses = TRUE, # Concentration ellipses
                     legend.title = "Groups")
  
  p1=p1+theme(axis.line = element_line(colour = "black"))+theme_bw()
  p1$labels$x=sub("Dim","PCA",p1$labels$x)
  p1$labels$y=sub("Dim","PCA",p1$labels$y)
  p1$labels$title=paste("PCA of ",tax,sep = "")
  p1 = p1 + scale_color_aaas(a=0.55)+scale_fill_aaas(a=0.55)
  
  text_annotation=paste("PERMANOVA:<p>\nexperience vs control<p>\np-value = ",as.character(round((result$`Pr(>F)`[1]),digits = 4),sep = ""))
  #p1 = p1 + annotate("label",x=Inf,y=Inf,label=text_annotation,vjust=1.1,hjust=1.05)
  p1 =p1 + ggtext::geom_richtext (aes (vjust = 1.1,hjust = 1.05,x= Inf, y=Inf,label=text_annotation, family = "sans", label.colour = 'black'))
  
  PC1 <- otu.pca$x[,1]
  PC2 <- otu.pca$x[,2]
  
  otu.pca.data <- data.frame(rownames(otu.pca$x),PC1,PC2,meta$Group)
  colnames(otu.pca.data) <- c("sample", "PC1", "PC2", "group")
  my_comparisons = list(c('control','experience'))
  
  
  p2 <- ggplot(otu.pca.data, aes(x = group, y= PC2, colour = group)) +
    geom_boxplot() +
    theme(panel.background = element_rect(fill = "white",colour = "white"),
          panel.grid = element_blank(),
          axis.text.y = element_blank(),
          axis.ticks=element_blank(),
          legend.position="none") +
    geom_signif(comparisons = list(c("control","experience")),
                test= wilcox.test,
                position = "identity",
                tip_length = 0,
                size=0.9,color = "black",
                map_signif_level=c("***"=0.001,"**"=0.01, "*"=0.05, " "=2))+
    xlab("") + ylab("")+
    scale_color_aaas(a=0.55)
  
  
  p3 <- ggplot(otu.pca.data, aes(x = group, y= PC1, colour = group)) +
    geom_boxplot() +
    theme(panel.background = element_rect(fill = "white",colour = "white"),
          panel.grid = element_blank(),
          axis.text.x = element_blank(),
          axis.ticks=element_blank(),
          legend.position="none") +
    xlab("") + ylab("") +
    coord_flip() + # coord_flip()函数翻转坐标�?
    geom_signif(comparisons = list(c("control","experience")),
                test= wilcox.test,
                #step_increase = 10,
                #y_position = 5,
                position = "identity",
                tip_length = 0,
                size=0.9,color = "black",
                map_signif_level=c("***"=0.001,"**"=0.01, "*"=0.05, " "=2))+
    scale_color_aaas(a=0.55)
  
  p4 <- ggarrange(p3, NULL, p1, p2, widths = c(5,2), heights = c(2,4), align = "hv")+theme(plot.background=element_rect(fill = "white"))
  ggsave(paste("./species_analysis/beta/PCA/PCA of ",".pdf",sep = tax), p4,  width=180, height=150, units="mm")
  ggsave(paste("./species_analysis/beta/PCA/PCA of ",".png",sep = tax), p4,  width=180, height=150, units="mm")
  
}

#PCoA
#########################################################
library("amplicon")
library(xlsx)
library(ggsci)
library(ggsignif)
library("ggpubr")
rm(list = ls())
rm(list = ls())
#setwd("D:/metagenomic")
file=list.files(getwd(),pattern = "merged_abundance_table_.*[^1].txt")

for (i in file){
  name=sub(".txt","",i)
  tax = sub("merged_abundance_table_","",name)
  otu =read.table(i, row.names= 1, header=T, sep="\t",  comment.char="", stringsAsFactors = F)
  meta=read.table("metadata.txt", row.names = 1, header=T, sep="\t",  comment.char="", stringsAsFactors = F)
  tmpotu= otu
  tmpmeta = meta
  tmpotu = data.frame(t(tmpotu))
  group = meta[,"Group"]
  group = data.frame(group)
  result = adonis2(tmpotu~group,group,distance="bray",permutations = 10000)
  otu=otu[,row.names(meta)]
  bray_dis = vegdist(data.frame(t(otu)),method = "bray")
  bray_dis =  as.matrix(bray_dis)
  bray_dis = as.data.frame(bray_dis)
  if (!file.exists("./species_analysis/beta/PCoA"))
  {
    dir.create("./species_analysis/beta/PCoA",recursive = TRUE)
  }
  write.xlsx(bray_dis,file=paste("./species_analysis/beta/PCoA/bray ",".xlsx",sep = tax))
  p = beta_pcoa(bray_dis,metadata = meta)
  p = p+scale_color_aaas(a=0.55)+theme_bw()
  text_annotation=paste("PERMANOVA:<p>\nexperience vs control<p>\np-value = ",as.character(round((result$`Pr(>F)`[1]),digits = 4),sep = ""))
  p = p + ggtext::geom_richtext (aes (x= Inf, y=Inf,label=text_annotation ),color="black",label.colour = 'black',vjust = 1.1,hjust = 1.05)
  p = p + labs(title = paste("PCoA of",tax,sep = " "))
  
  
  p2 <- ggplot(p$data, aes(x = group, y= y, colour = group)) +
    geom_boxplot() +
    theme(panel.background = element_rect(fill = "white",colour = "white"),
          panel.grid = element_blank(),
          axis.text.y = element_blank(),
          axis.ticks=element_blank(),
          legend.position="none") +
    geom_signif(comparisons = list(c("control","experience")),
                test= wilcox.test,
                position = "identity",
                tip_length = 0,
                size=0.9,color = "black",
                map_signif_level=c("***"=0.001,"**"=0.01, "*"=0.05, " "=2))+
    xlab("") + ylab("")+
    scale_color_aaas(a=0.55)
  
  
  p3 <- ggplot(p$data, aes(x = group, y=x, colour = group)) +
    geom_boxplot() +
    theme(panel.background = element_rect(fill = "white",colour = "white"),
          panel.grid = element_blank(),
          axis.text.x = element_blank(),
          axis.ticks=element_blank(),
          legend.position="none") +
    xlab("") + ylab("") +
    coord_flip() + 
    geom_signif(comparisons = list(c("control","experience")),
                test= wilcox.test,
                position = "identity",
                tip_length = 0,
                size=0.9,color = "black",
                map_signif_level=c("***"=0.001,"**"=0.01, "*"=0.05, " "=2))+
    scale_color_aaas(a=0.55)
  if (!file.exists("./species_analysis/beta/PCoA/picture1"))
  {
    dir.create("./species_analysis/beta/PCoA/picture1",recursive = TRUE)
  }
  p4 <- ggarrange(p3, NULL, p, p2, widths = c(4,1), heights = c(1,3), align = "hv")+theme(plot.background=element_rect(fill = "white"))
  ggsave(paste("./species_analysis/beta/PCoA/picture1/PCoA of ",".pdf",sep = tax), p4,  width=180, height=150, units="mm")
  ggsave(paste("./species_analysis/beta/PCoA/picture1/PCoA of ",".png",sep = tax), p4,  width=180, height=150, units="mm")
  
  
}

#NMDS
library(phyloseq)
library(ggsci)
library("ggpubr")
setwd("D:")
#setwd("D:/metagenomic")
nmds_my=function(otu,group){
  library("vegan")
  library("ggplot2")
  library("ggsci")
  otu <- t(otu)
  otu = otu[row.names(group),]
  bray_dis <- vegdist(otu, method = 'bray')
  otu_nmds <- metaMDS(bray_dis,distance = 'bray')
  otu_nmds
  otu_nmds.stress <- otu_nmds$stress
  otu_nmds.point <- data.frame(otu_nmds$point)
  otu_nmds.species <- data.frame(otu_nmds$species)
  sample_site <- otu_nmds.point[1:2]
  sample_site$names <- rownames(sample_site)
  colnames(sample_site)[1:2] <- c('NMDS1', 'NMDS2')
  
  #合并分组数据
  sample_site <- cbind(sample_site,Group=group$Group)
  #sample_site = sample_site[,1:4]
  #sample_site$NMDS2 = -sample_site$NMDS2
  p <- ggplot(data=sample_site,aes(NMDS1,NMDS2)) +
    #geom_hline(aes(yintercept=0),colour="#d8d6d6",linetype=5)+
    #geom_vline(aes(xintercept=0),colour="#d8d6d6",linetype=5)+
    geom_point(aes(color =Group),shape = 19,size = 3.5)+
    #scale_color_aaas(alpha=0.7)+
    stat_ellipse(aes(color = Group),geom="polygon",level=0.90,alpha=0)+labs(x= "NMDS1", y = "NMDS2")+
    scale_color_aaas(alpha=0.7)+
    theme(plot.title = element_text(hjust = 0.5))+
    theme_bw()+
    #theme(panel.background = element_blank(),axis.line = element_line(color = "black"))+
    geom_text(aes(x = -0.4 , y =0.5,label = paste("stress = ",round(otu_nmds.stress,2))),size=5)+
    xlim(-0.5, 0.5)+ylim(-0.5,0.5)
  return(p)
}

file=list.files(getwd(),pattern = "merged_abundance_table_.*[^1].txt")

for (i in file){
  name=sub(".txt","",i)
  tax = sub("merged_abundance_table_","",name)
  otu = read.table(i,header = T,row.names = 1,sep = "\t")
  group = read.table("metadata.txt",header = T,row.names = 1,sep = "\t")
  p = nmds_my(otu = otu,group = group)
  if (!file.exists("./species_analysis/beta/NMDS/picture1"))
  {
    dir.create("./species_analysis/beta/NMDS/picture1",recursive = TRUE)
  }
  ggsave(paste("./species_analysis/beta/NMDS/picture1/NMDS of ",".pdf",sep = tax), p,  width=180, height=150, units="mm")
  ggsave(paste("./species_analysis/beta/NMDS/picture1/NMDS of ",".png",sep = tax), p,  width=180, height=150, units="mm")
}
#CCA
my_cca <- function(meta = meta ,otu =otu,env = env,tax=tax)
{
  library("ggplot2")
  library("vegan")
  otu = otu[,rownames(groups)]
  env = env[rownames(groups),]
  decorana(otu)
  
  
  cca <- cca(X = t(otu), Y = env)
  x.sig = anova(cca)
  x.p = x.sig$Pr[1]
  x.p
  x.F = x.sig$F[1]
  x.F
  F1 <- paste("anova F: ", round(x.F, 2), sep = "")
  pp1 = paste("p: ", round(x.p, 2), sep = "")
  # 对CCA的结果进行summary, 提取对应信息；
  cca.sum <- summary(cca)
  str(cca.sum)
  
  cca.sum$sites
  
  cca.sum$biplot
  
  sites <- data.frame(cca.sum$sites[, c(1, 2)], groups, check.names = F)
  biplot <- as.data.frame(cca.sum$biplot, check.names = F)
  species <- as.data.frame(cca.sum$species, check.names = F)
  eig <- as.data.frame(cca.sum$concont$importance, check.names = F)
  labs <- sprintf("%s(%.2f%%)", colnames(sites)[1:2], eig[2,1:2]*100)
  
  
  library("ggplot2")
  library(ggrepel)
  p = ggplot() + geom_point(data = sites, aes(x = CCA1, y = CCA2,  fill = Group), pch = 21, colour = "black", size = 3,alpha=0.7) +
    geom_segment(data =as.data.frame(cca.sum$biplot) , aes(x = 0, y = 0, xend = CCA1, yend = CCA2), arrow = arrow(angle = 22.5,length = unit(0.2, "cm")), linetype = 1, size = 1,colour = "black") + 
    geom_label_repel(data = as.data.frame(cca.sum$biplot) ,aes(CCA1*1.1, CCA2*1.1,label=row.names(as.data.frame(cca.sum$biplot) )),fontface="bold",color="black",alpha=0.6, 
                     box.padding=unit(1, "lines"), point.padding=unit(0.3, "lines"), 
                     segment.colour = "grey50",segment.size=0.75,segment.alpha=0.9,
                     label.r=0.15,label.size=0.5, max.overlaps = 50,size=3)+
    labs(x = labs[1], 
         y = labs[2])
  
  p = p + theme_bw() + geom_hline(aes(yintercept = 0), colour = "black", linetype = 2) + 
    geom_vline(aes(xintercept = 0), colour = "black", linetype = "dashed") + 
    theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), 
          plot.title = element_text(vjust = -0.5, hjust = 0.1,size = 20), 
          axis.title.y = element_text(size = 8,  face = "bold", colour = "black"), 
          axis.title.x = element_text(size = 8, face = "bold", colour = "black"),
          axis.text = element_text(size = 8, face = "bold"), 
          axis.text.x = element_text(colour = "black",size = 8), 
          axis.text.y = element_text(colour = "black", size = 8), 
          legend.text = element_text(size = 8,face = "bold"))+
    ggtext::geom_richtext(
      aes(
        x = Inf , y = Inf, 
        vjust = 1.1,hjust = 1.03, 
        label = paste(F1,' <p>\n p-value = ',pp1, sep = "")),
      size =5, family = "sans",colour = 'black',label.colour="black")+scale_color_aaas(a=0.55)+scale_fill_aaas(a=0.55)
  # 添加箭头,标记为环境因子；
  
  
  #p <- p+scale_x_continuous(limits = c(min(sites$CCA1)*0.6,max(sites$CCA1)*0.6))
  #p <- p+scale_y_continuous(limits = c(min(sites$CCA2)*0.6,max(sites$CCA2)*0.6))
  p = p+labs(title = paste("CCA of ",tax,sep = ""))
  return(p)
}

#RDA
my_rda <- function(meta = meta ,otu =otu,env = env,tax=tax)
{
  library("ggplot2")
  library("vegan")
  otu = otu[,rownames(groups)]
  env = env[rownames(groups),]
  decorana(otu)
  
  
  rda <- rda(X = t(otu), Y = env)
  x.sig = anova(rda)
  x.p = x.sig$Pr[1]
  x.p
  x.F = x.sig$F[1]
  x.F
  F1 <- paste("anova F: ", round(x.F, 2), sep = "")
  pp1 = paste("p: ", round(x.p, 2), sep = "")
  # 对CCA的结果进行summary, 提取对应信息；
  rda.sum <- summary(rda)
  str(rda.sum)
  
  rda.sum$sites
  
  rda.sum$biplot
  
  sites <- data.frame(rda.sum$sites[, c(1, 2)], groups, check.names = F)
  biplot <- as.data.frame(rda.sum$biplot, check.names = F)
  species <- as.data.frame(rda.sum$species, check.names = F)
  eig <- as.data.frame(rda.sum$concont$importance, check.names = F)
  labs <- sprintf("%s(%.2f%%)", colnames(sites)[1:2], eig[2,1:2]*100)
  
  
  library("ggplot2")
  library(ggrepel)
  p = ggplot() + geom_point(data = sites, aes(x = RDA1, y = RDA2,  fill = Group), pch = 21, colour = "black", size = 3,alpha=0.7) +
    geom_segment(data =as.data.frame(rda.sum$biplot) , aes(x = 0, y = 0, xend = RDA1, yend = RDA2), arrow = arrow(angle = 22.5,length = unit(0.2, "cm")), linetype = 1, size = 1,colour = "black") + 
    geom_label_repel(data = as.data.frame(rda.sum$biplot) ,aes(RDA1*1.1, RDA2*1.1,label=row.names(as.data.frame(rda.sum$biplot) )),fontface="bold",color="black",alpha=0.6, 
                     box.padding=unit(1, "lines"), point.padding=unit(0.3, "lines"), 
                     segment.colour = "grey50",segment.size=0.75,segment.alpha=0.9,
                     label.r=0.15,label.size=0.5, max.overlaps = 50,size=3)+
    labs(x = labs[1], 
         y = labs[2])
  
  p = p + theme_bw() + geom_hline(aes(yintercept = 0), colour = "black", linetype = 2) + 
    geom_vline(aes(xintercept = 0), colour = "black", linetype = "dashed") + 
    theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), 
          plot.title = element_text(vjust = -0.5, hjust = 0.1,size = 20), 
          axis.title.y = element_text(size = 8,  face = "bold", colour = "black"), 
          axis.title.x = element_text(size = 8, face = "bold", colour = "black"),
          axis.text = element_text(size = 8, face = "bold"), 
          axis.text.x = element_text(colour = "black",size = 8), 
          axis.text.y = element_text(colour = "black", size = 8), 
          legend.text = element_text(size = 8,face = "bold"))+
    ggtext::geom_richtext(
      aes(
        x = Inf , y = Inf, 
        vjust = 1.1,hjust = 1.03, 
        label = paste(F1,' <p>\n p-value = ',pp1, sep = "")),
      size =5, family = "sans",colour = 'black',label.colour="black")+scale_color_aaas(a=0.55)+scale_fill_aaas(a=0.55)
  # 添加箭头,标记为环境因子；
  
  
  #p <- p+scale_x_continuous(limits = c(min(sites$RDA1)*0.6,max(sites$RDA1)*0.6))
  # p <- p+scale_y_continuous(limits = c(min(sites$RDA2)*0.6,max(sites$RDA2)*0.6))
  p = p+labs(title = paste("RDA of ",tax,sep = ""))
  return(p)
}
#p = my_rda(meta = groups,env=env,otu=otu,tax="species")

library(devtools)
library(amplicon)
library("ggsci")
file=list.files(getwd(),pattern = "merged_abundance_table_.*[^1].txt")
for (i in file){
  name=sub(".txt","",i)
  tax = sub("merged_abundance_table_","",name)
  otu =read.table(i, row.names= 1, header=T, sep="\t",  comment.char="", stringsAsFactors = F)
  map=read.table("metadata.txt", row.names = 1, header=T, sep="\t",  comment.char="", stringsAsFactors = F)
  env=read.delim("env.txt",row.names=1)
  p = my_rda (otu=otu,meta = map,env=env,tax = tax)
  if (!file.exists("./species_analysis/beta/RDA/picture1"))
  {
    dir.create("./species_analysis/beta/RDA/picture1",recursive = TRUE)
  }
  ggsave(paste("./species_analysis/beta/RDA/picture1/RDA plot of ",".pdf",sep = tax), p,  width=180, height=150, units="mm")
  ggsave(paste("./species_analysis/beta/RDA/picture1/RDA plot of ",".png",sep = tax), p,  width=180, height=150, units="mm")
}



#优势物种分析
#堆叠图
dominant_species <- function(n=n,otutab = otutab,metadata=metadata,tax= tax)
{
  library(tidyverse)
  library("ggsci")
  library("ggplot2")
  library("RColorBrewer")
  getpalette =colorRampPalette(brewer.pal(12,"Set3"))
  otu = otutab
  map = metadata
  #将实验组和对照组的样本分开
  experience = list()
  control = list()
  for (i in row.names(map))
  {
    if (map[i,"Group"] == "experience")
    {
      experience = append(experience,i)  
    }
    if (map[i,"Group"] == "control")
    {
      control = append(control,i)  
    }
    
  }
  #将otu列表区分为实验组otu和对照组otu
  exp.otu = otu[which(colnames(otu) %in% experience)]
  ctr.otu = otu[which(colnames(otu) %in% control)]
  #对实验组和对照组中不同物种的数量进行求和
  exp.otu.sum = rowSums(exp.otu)
  ctr.otu.sum = rowSums(ctr.otu)
  #将实验组与对照组求和数据进行合并
  df = as.data.frame(cbind(exp.otu.sum,ctr.otu.sum))
  colnames(df) = c("experience","control")
  #直接计算相对丰度
  df$experience = df$experience/sum(df$experience)
  df$control = df$control/sum(df$control)
  #计算不同物种在对照组以及实验组之间的平均值并根据平均值排序
  df$average = (df$experience+df$control)/2
  df = df[order(df$average,decreasing = T),]
  #找出top n 的物种，其余物种并入other并计算other的总和
  df.top10 = df[1:n-1,]
  df.res = df[n:length(row.names(df)),]
  other = colSums(df.res)
  #将other加入top n数据中并更改名字
  a = rbind(df.top10,other)
  rownames(a)[length(rownames(a))]="Other"
  a$Phylum = row.names(a)
  df = a
  df = df[,which(colnames(df)!="average")]
  #设置线段内容
  
  link_dat= data.frame()
  sum.experience =0
  sum.control=0
  for (i in length(rownames(df)):1)
  {
    sum.control = sum.control + df$control[i]
    link_dat[rownames(df)[i],"control"] = sum.control
    sum.experience = sum.experience + df$experience[i]
    link_dat[rownames(df)[i],"experience"] = sum.experience
  }
  link_dat$Phylum = row.names(link_dat)
  df.long <- reshape2::melt(df, value.name='abundance', variable.name='group')
  #绘图
  df.long$Phylum = factor(df.long$Phylum,levels = rev(link_dat$Phylum))
  #a = df.long$Phylum
  df.long$group = factor(df.long$group,levels = c("control","experience"))
  p = ggplot(df.long, aes(x=group, y=abundance, fill=Phylum)) + 
    geom_bar(stat = "identity", width=0.5, col='black',size=1)  + 
    geom_segment(data=link_dat, aes(x=1.25, xend=1.75, y=control, yend=experience),size=1)+
    theme_bw()  + 
    #scale_color_aaas(a=0.10)+scale_fill_aaas(a=0.10)+
    ylab("Relative abundance(%)")+
    
    theme(axis.title.x=element_text(vjust=2, size=20,face = "bold"),
          axis.title.y =element_text(vjust=2, size=20,face = "bold"),
          legend.title  = element_text(size = 20,face = "bold")
    )
  name = paste("Top ",n,sep = "")
  name = paste(name,tax,sep = " abundant ")
  p = p+scale_fill_manual(values = getpalette(n))+labs(fill=name)+labs(x="Groups")
  
  return(p)
}
otu =read.table("out_ARGcategory.txt", row.names= 1, header=T, sep="\t",  comment.char="", stringsAsFactors = F)
map=read.table("metadata.2.txt", row.names = 1, header=T, sep="\t",  comment.char="", stringsAsFactors = F)

p= dominant_species(n=10,metadata = map,otutab = otu,tax = "ARGcategory")

p
#多组箱线图
setwd("D://metagenomics//upstream")
otu =read.table("merged_abundance_table_species.txt", row.names= 1, header=T, sep="\t",  comment.char="", stringsAsFactors = F)
map=read.table("metadata.txt", row.names = 1, header=T, sep="\t",  comment.char="", stringsAsFactors = F)
multi_boxplot <- function(otu = otu ,meta = meta,n =n,tax=tax)
{
  get_top_abundant <- function(n=n,otutab = otutab,metadata=metadata)
  {
    library(tidyverse)
    library("ggsci")
    library("ggplot2")
    library("RColorBrewer")
    library(ggsignif)
    getpalette = brewer.pal(12,"Set3")
    otu = otutab
    map = metadata
    #将实验组和对照组的样本分开
    experience = list()
    control = list()
    for (i in row.names(map))
    {
      # print(i)
      if (map[i,"Group"] == "experience")
      {
        experience = append(experience,i)  
      }
      if (map[i,"Group"] == "control")
      {
        control = append(control,i)  
      }
      
    }
    #将otu列表区分为实验组otu和对照组otu
    exp.otu = otu[which(colnames(otu) %in% experience)]
    ctr.otu = otu[which(colnames(otu) %in% control)]
    #对实验组和对照组中不同物种的数量进行求和
    exp.otu.sum = rowSums(exp.otu)
    ctr.otu.sum = rowSums(ctr.otu)
    #将实验组与对照组求和数据进行合并
    df = as.data.frame(cbind(exp.otu.sum,ctr.otu.sum))
    colnames(df) = c("experience","control")
    #计算不同物种在对照组以及实验组之间的平均值并根据平均值排序
    df$average = (df$experience+df$control)/2
    df = df[order(df$average,decreasing = T),]
    #找出top n 的物种，其余物种并入other并计算other的总和
    p = rownames(df)[1:n]
    q = rownames(df)[n+1:length(row.names(df))]
    #将other加入top n数据中并更改名字
    
    return(p)
  }
  #获取丰度前十的物种名称
  result= get_top_abundant(n=n,otutab = otu,metadata = map)
  name_list = result
  name_list = as.list(name_list)
  #调整otu顺序与meta信息保持一致
  otu =as.data.frame( t(otu))
  otu =  otu[row.names(map),]
  #根据返回的top abundant信息提取对应的otu信息
  otu =as.data.frame( otu[which(colnames(otu) %in% name_list)])
  #根据name_list的顺序调整otu的物种（列）顺序
  name_list =  as.character(name_list)
  otu = otu[,name_list]
  #给矩阵的每一个元素取对数
  otu = log10(otu)
  #给每一个样本添加分组信息
  otu$Group = NULL
  for (i in rownames(otu))
  {
    otu[i,"Group"]=map[i,"Group"]
  }
  library(simplevis)
  library(ggplot2)
  library(tidyverse)
  library("ggsci")
  library(ggsignif)
  data.long <- pivot_longer(otu, cols = 1:length(colnames(otu))-1, names_to ="index", values_to = "num")
  data.long$Group <- as.factor(data.long$Group)
  data.long$index <- factor(data.long$index,levels =rev(colnames(otu)) )
  data.long <- data.long[!is.infinite(data.long$num),]
  str(data.long)
  p = ggplot(data.long, aes(x=index, y=num,fill=Group))+theme_bw()+
    geom_boxplot(alpha=1,size=0.8,width=0.5,outlier.color =     "gray",position=position_dodge(width=0.8))+#调整control和experience之间的距离
    theme(axis.text.x = element_text(angle = 90),
          plot.title = element_text(hjust = 0.5),#强行标题居中
          #axis.text.y = element_text(angle = 90),
          axis.title.x=element_text(vjust=2, size=10,face = "bold"),
          axis.title.y =element_text(vjust=2, size=10,face = "bold"),
          legend.title  = element_text(hjust = 0.5, size = 10,face = "bold"))+coord_flip()+
    stat_compare_means(aes(group=Group),
                       method = "wilcox.test",
                       label="p.signif",
                       hide.ns = T,
                       vjust = 0.7,)+
    scale_fill_aaas(a=0.55)+scale_color_aaas(a=0.55)+labs(y="Relative Abundance(log10)",x=NULL,title =paste("Top abundant ",tax,sep = "") ,hjust=0.5)
  return(p)
}
p = multi_boxplot(otu =otu,meta = map,n=10,tax="species")
p

#分面堆叠图
otu =read.table("merged_abundance_table_species.txt", row.names= 1, header=T, sep="\t",  comment.char="", stringsAsFactors = F)
map=read.table("metadata.txt", row.names = 1, header=T, sep="\t",  comment.char="", stringsAsFactors = F)
facet_stackplot <- function(otu =otu ,meta=meta)
{
  get_other_abundant <- function(n=n,otutab = otutab,metadata=metadata)
  {
    library(tidyverse)
    library("ggsci")
    library("ggplot2")
    library("RColorBrewer")
    library(ggsignif)
    #getpalette = brewer.pal(12,"Set3")
    otu = otutab
    map = metadata
    #将实验组和对照组的样本分开
    experience = list()
    control = list()
    for (i in row.names(map))
    {
      # print(i)
      if (map[i,"Group"] == "experience")
      {
        experience = append(experience,i)  
      }
      if (map[i,"Group"] == "control")
      {
        control = append(control,i)  
      }
      
    }
    #将otu列表区分为实验组otu和对照组otu
    exp.otu = otu[which(colnames(otu) %in% experience)]
    ctr.otu = otu[which(colnames(otu) %in% control)]
    #对实验组和对照组中不同物种的数量进行求和
    exp.otu.sum = rowSums(exp.otu)
    ctr.otu.sum = rowSums(ctr.otu)
    #将实验组与对照组求和数据进行合并
    df = as.data.frame(cbind(exp.otu.sum,ctr.otu.sum))
    colnames(df) = c("experience","control")
    #计算不同物种在对照组以及实验组之间的平均值并根据平均值排序
    df$average = (df$experience+df$control)/2
    df = df[order(df$average,decreasing = T),]
    #找出top n 的物种，其余物种并入other并计算other的总和
    p = rownames(df)[1:n]
    q = rownames(df)[n+1:length(row.names(df))]
    #将other加入top n数据中并更改名字
    
    return(q)
  }
  
  
  get_top_abundant <- function(n=n,otutab = otutab,metadata=metadata)
  {
    library(tidyverse)
    library("ggsci")
    library("ggplot2")
    library("RColorBrewer")
    library(ggsignif)
    #getpalette = brewer.pal(12,"Set3")
    otu = otutab
    map = metadata
    #将实验组和对照组的样本分开
    experience = list()
    control = list()
    for (i in row.names(map))
      
    {
      
      # print(i)
      if (map[i,"Group"] == "experience")
        
      {
        experience = append(experience,i)  
      }
      if (map[i,"Group"] == "control")
      {
        control = append(control,i)  
      }
      
    }
    #将otu列表区分为实验组otu和对照组otu
    exp.otu = otu[which(colnames(otu) %in% experience)]
    ctr.otu = otu[which(colnames(otu) %in% control)]
    #对实验组和对照组中不同物种的数量进行求和
    exp.otu.sum = rowSums(exp.otu)
    ctr.otu.sum = rowSums(ctr.otu)
    #将实验组与对照组求和数据进行合并
    df = as.data.frame(cbind(exp.otu.sum,ctr.otu.sum))
    colnames(df) = c("experience","control")
    #计算不同物种在对照组以及实验组之间的平均值并根据平均值排序
    df$average = (df$experience+df$control)/2
    df = df[order(df$average,decreasing = T),]
    #找出top n 的物种，其余物种并入other并计算other的总和
    p = rownames(df)[1:n]
    q = rownames(df)[n+1:length(row.names(df))]
    #将other加入top n数据中并更改名字
    
    return(p)
  }
  name_list = get_top_abundant(n=10,otutab = otu,metadata = map)
  name_list = as.list(name_list)
  other_list = get_other_abundant(n=10,otutab = otu,metadata = map)
  other_list = as.list(other_list)
  otu.top = otu[which(row.names(otu) %in% name_list),]
  otu.other = otu[which(row.names(otu) %in% other_list),]
  
  t= colSums(otu.other)
  df=as.data.frame( rbind(otu.top,t))
  rownames(df)[length(rownames(df))]="Other"
  otu = df
  otu = otu/100
  
  otu = as.data.frame(otu)
  otu$Taxonomy = row.names(otu)
  otu
  data_frame=melt(otu, id='Taxonomy')
  names(data_frame)[2]='sample_id'#更改列名
  map$sample_id = row.names(map)
  
  data_group = map[,c("sample_id","Group")]
  data_frame=merge(data_frame, data_group, by='sample_id')
  
  ylim =c(0,100)
  xlim =c(0,100)
  library(ggplot2)
  library("RColorBrewer")
  library(ggsignif)
  library("ggsci")
  getpalette =colorRampPalette(brewer.pal(12,"Set3"))
  
  name_list = append(name_list,"Other")
  name_list = as.character(name_list)
  data_frame$Taxonomy = factor(data_frame$Taxonomy,levels = name_list )
  stack_plot=ggplot(data_frame, aes(x=sample_id, fill=Taxonomy, y=value*100))+
    # 数据输入：样本、物种、丰度
    geom_col(position='stack') +
    # stack：堆叠图
    labs(x='Samples', y='Relative Abundance (%)')+
    # 给xy轴取名
    scale_y_continuous(expand=c(0, 0))+
    # 调整y轴属性
    theme(axis.text.x=element_text(angle=90, hjust=1,size = 5),
          axis.title.x=element_text(vjust=2, size=10,face = "bold"),
          axis.title.y =element_text(vjust=2, size=10,face = "bold"),
          #legend.title  = element_text(hjust = 0.5, size = 10,face = "bold"),
          legend.title  = element_blank()
    )+
    facet_wrap(~Group, scales = 'free_x', ncol = 2)+ # 按group组，X轴，分2面
    scale_fill_manual(values = getpalette(20))+labs(x=NULL)
  return(stack_plot)
}
p = facet_stackplot(otu = otu,meta =map)
p

# Maaslin2
library(Maaslin2)
otu = read.table(file = "merged_abundance_table_species.txt", header = T,row.names = 1,sep = "\t")
metadata = read.table(file = "metadata.txt", header = T,row.names = 1,sep = "\t")
fit_data = Maaslin2(
  t(otu), 
  metadata, 
  output = "demo_output", 
  fixed_effects = c("Group","ages","gender","education","bmi"),
  #random_effects = c("ages","gender","education","bmi"),
  plot_heatmap=TRUE,
  heatmap_first_n=10)
  
  
  
  ###############################PART2-function_analysis######################################
  #####my funcation################
setwd("D:\\metagenomics\\upstream")
#####my funcation################
setwd("D:\\metagenomic")
#####PCA##########################
my_pca <- function(otu = otu,meta = metadata ,title = title)
{
  library(ggsci)
  library("ggpubr")
  library("factoextra")
  library("vegan")
  library("ggtext")
  library(ggsignif)
  #otu =t(read.table("./out_GBM.txt",header = T,row.names = 1,sep = "\t")) 
  #meta = read.table("./metadata.txt",header = T,row.names = 1,sep = "\t")
  otu = t(otu)
  otu = otu[row.names(meta),]
  permanova = adonis2(otu ~ Group, data = meta, permutations = 9999)
  #PCA
  otu.pca <- prcomp(otu, scale. =F)
  p <- fviz_eig(otu.pca, addlabels = TRUE)
  p = p+theme(panel.background = element_rect(fill = "white"),plot.background=element_rect(fill = "white"))
  p1 <- fviz_pca_ind(otu.pca,
                     geom.ind = "point", # show points only ( not "text")
                     col.ind = meta$Group, # color by groups
                     addEllipses = TRUE, # Concentration ellipses
                     legend.title = "Groups")
  
  p1=p1+theme(axis.line = element_line(colour = "black"))+theme_bw()
  p1$labels$x=sub("Dim","PCA",p1$labels$x)
  p1$labels$y=sub("Dim","PCA",p1$labels$y)
  p1=p1+labs(title = "PCA of path.function")+scale_color_aaas(a=0.55)+scale_fill_aaas(a=0.55)
  text_annotation=paste("PERMANOVA:<p>\nexperience vs control<p>\np-value = ",as.character(round((permanova$`Pr(>F)`[1]),digits = 4),sep = ""))
  #p1 = p1 + annotate("label",x=Inf,y=Inf,label=text_annotation,vjust=1.1,hjust=1.05)
  p1 =p1 + ggtext::geom_richtext (aes (vjust = 1.1,hjust = 1.05,x= Inf, y=Inf,label=text_annotation, family = "sans", label.colour = 'black'))
  #p1 =p1 +geom_point( alpha = 0.3)
  PC1 <- otu.pca$x[,1]
  PC2 <- otu.pca$x[,2]
  df = as.data.frame(cbind(PC1,PC2)) 
  df$groups = meta$Group
  pd = ggplot(data = df, aes(x=PC1,y=PC2))+geom_point(aes(color=groups,shape=groups,color = groups,fill=groups),size=2,alpha=0.7)+
    ggtext::geom_richtext (aes (vjust = 1.1,hjust = 1.05,x= Inf, y=Inf,label=text_annotation, label.colour = 'black',text.colour = "black"))
  #pd
  #pd = pd +theme_bw()+stat_ellipse(aes(fill=groups),type="norm",geom="polygon",alpha=0.2,color=NA)
  pd = pd+stat_ellipse(aes(
    color=groups,
    fill=groups),
    level = 0.99,
    geom = "polygon",
    alpha=0.1)+scale_fill_d3(a=0.5)+theme_bw()+scale_color_d3(a=0.5)
  #+geom_point(shape=c(1,2))
  #pd=pd+theme(panel.grid.major = element_blank())
  pd=pd+labs(x=paste("PCA1(","%)",sep = as.character(round(p$data$eig[1],2))),y=paste("PCA2(","%)",sep = as.character(round(p$data$eig[2],2))))
  #pd = pd+  ggtext::geom_richtext (aes (vjust = 1.1,hjust = 1.05,x= Inf, y=Inf,label=text_annotation, family = "sans", label.colour = 'black'))
  pd = pd+labs(title = paste("PCA of ",title,sep = ""))
  
  
  
  otu.pca.data <- data.frame(rownames(otu.pca$x),PC1,PC2,meta$Group)
  colnames(otu.pca.data) <- c("sample", "PC1", "PC2", "group")
  my_comparisons = list(c('control','experience'))
  
  p2 <- ggplot(otu.pca.data, aes(x = group, y= PC2, colour = group)) +
    geom_boxplot() +
    theme(panel.background = element_rect(fill = "white",colour = "white"),
          panel.grid = element_blank(),
          axis.text.y = element_blank(),
          axis.ticks=element_blank(),
          legend.position="none") +
    geom_signif(comparisons = list(c("control","experience")),
                test= wilcox.test,
                position = "identity",
                tip_length = 0,
                size=0.9,color = "black",
                map_signif_level=c("***"=0.001,"**"=0.01, "*"=0.05, " "=2))+
    xlab("") + ylab("")+
    scale_color_d3(a=0.5)
  
  p3 <- ggplot(otu.pca.data, aes(x = group, y= PC1, colour = group)) +
    geom_boxplot() +
    theme(panel.background = element_rect(fill = "white",colour = "white"),
          panel.grid = element_blank(),
          axis.text.x = element_blank(),
          axis.ticks=element_blank(),
          legend.position="none") +
    xlab("") + ylab("") +
    coord_flip() + # coord_flip()函数翻转坐标�?
    geom_signif(comparisons = list(c("control","experience")),
                test= wilcox.test,
                #step_increase = 10,
                #y_position = 5,
                position = "identity",
                tip_length = 0,
                size=0.9,color = "black",
                map_signif_level=c("***"=0.001,"**"=0.01, "*"=0.05, " "=2))+
    scale_color_d3(a=0.5)
  
  
  
  p4 <- ggarrange(p3, NULL, pd, p2, widths = c(5,2), heights = c(2,4), align = "hv")+theme(plot.background=element_rect(fill = "white"))
  return(p4)
}
#测试
#otu =read.table("./out_GBM.txt",header = T,row.names = 1,sep = "\t") 
#meta = read.table("./metadata.txt",header = T,row.names = 1,sep = "\t")
#pca=my_pca(otu = otu ,meta = meta ,title = "GBM")




###############PcoA####################
my_pcoa <- function(otu=otu,meta=meta,title=title)
{
  library("amplicon")
  library(xlsx)
  library(ggsci)
  library(ggsignif)
  library("ggpubr")
  Beta_PCOA <- function (dis_mat, metadata, groupID = "Group", ellipse = T, 
                         label = F, PCo = 12) 
  {
    p_list = c("ggplot2", "vegan", "ggrepel")
    for (p in p_list) {
      if (!requireNamespace(p)) {
        install.packages(p)
      }
      suppressWarnings(suppressMessages(library(p, character.only = T)))
    }
    idx = rownames(metadata) %in% rownames(dis_mat)
    metadata = metadata[idx, , drop = F]
    dis_mat = dis_mat[rownames(metadata), rownames(metadata)]
    sampFile = as.data.frame(metadata[, groupID], row.names = row.names(metadata))
    pcoa = cmdscale(dis_mat, k = 3, eig = T)
    points = as.data.frame(pcoa$points)
    eig = pcoa$eig
    points = cbind(points, sampFile[rownames(points), ])
    colnames(points) = c("x", "y", "z", "group")
    if (PCo == 12) {
      p = ggplot(points, aes(x = x, y = y, color = group)) + 
        labs(x = paste("PCo 1 (", format(100 * eig[1]/sum(eig), 
                                         digits = 4), "%)", sep = ""), y = paste("PCo 2 (", 
                                                                                 format(100 * eig[2]/sum(eig), digits = 4), "%)", 
                                                                                 sep = ""), color = groupID)
    }
    if (PCo == 13) {
      p = ggplot(points, aes(x = x, y = z, color = group)) + 
        labs(x = paste("PCo 1 (", format(100 * eig[1]/sum(eig), 
                                         digits = 4), "%)", sep = ""), y = paste("PCo 3 (", 
                                                                                 format(100 * eig[2]/sum(eig), digits = 4), "%)", 
                                                                                 sep = ""), color = groupID)
    }
    if (PCo == 23) {
      p = ggplot(points, aes(x = y, y = z, color = group)) + 
        labs(x = paste("PCo 2 (", format(100 * eig[1]/sum(eig), 
                                         digits = 4), "%)", sep = ""), y = paste("PCo 3 (", 
                                                                                 format(100 * eig[2]/sum(eig), digits = 4), "%)", 
                                                                                 sep = ""), color = groupID)
    }
    p = p + geom_point(alpha = 0.6, size = 3) + theme_classic() + 
      theme(text = element_text(family = "sans", size = 7))
    if (ellipse == T) {
      p = p + stat_ellipse(level = 0.68)
    }
    if (label == T) {
      p = p + geom_text_repel(label = paste(rownames(points)), 
                              colour = "black", size = 3)
    }
    p
  }
  
  #setwd("D:/metagenomic")
  #file=list.files(getwd(),pattern = "merged_abundance_table_.*[^1].txt")
  #otu = t(read.table("out_path.function.txt",sep = "\t",header = T,row.names = 1))
  #abun = t(read.table("out_pathabundance.txt",sep = "\t",header = T,row.names = 1))
  # meta = read.table("metadata.txt",sep = "\t",header = T,row.names = 1)
  otu = t(otu)
  otu = otu[row.names(meta),]
  #otu = otu[,-c(1,2)]
  permanova = adonis2(otu~Group,data = meta,permutations = 9999)
  bray_dis = vegdist(data.frame(otu),method = "bray")
  bray_dis =  as.matrix(bray_dis)
  bray_dis = as.data.frame(bray_dis)
  
  #write.xlsx(bray_dis,file=paste("./function/humann3/bray_dis ",".xlsx",sep =""))
  p = Beta_PCOA(bray_dis,metadata = meta)
  p = p+scale_color_d3(a=0.5)+theme_bw()
  text_annotation=paste("PERMANOVA:<p>\nexperience vs control<p>\np-value = ",as.character(round((permanova$`Pr(>F)`[1]),digits = 4),sep = ""))
  p = p + ggtext::geom_richtext (aes (x= Inf, y=Inf,label=text_annotation ),color="black",label.colour = 'black',vjust = 1.1,hjust = 1.05)
  p = p + labs(title = paste("PCoA of",title,sep = " "))
  
  p2 <- ggplot(p$data, aes(x = group, y= y, colour = group)) +
    geom_boxplot() +
    theme(panel.background = element_rect(fill = "white",colour = "white"),
          panel.grid = element_blank(),
          axis.text.y = element_blank(),
          axis.ticks=element_blank(),
          legend.position="none") +
    geom_signif(comparisons = list(c("control","experience")),
                test= wilcox.test,
                position = "identity",
                tip_length = 0,
                size=0.9,color = "black",
                map_signif_level=c("***"=0.001,"**"=0.01, "*"=0.05, " "=2))+
    xlab("") + ylab("")+
    scale_color_d3(a=0.5)
  p3 <- ggplot(p$data, aes(x = group, y=x, colour = group)) +
    geom_boxplot() +
    theme(panel.background = element_rect(fill = "white",colour = "white"),
          panel.grid = element_blank(),
          axis.text.x = element_blank(),
          axis.ticks=element_blank(),
          legend.position="none") +
    xlab("") + ylab("") +
    coord_flip() + 
    geom_signif(comparisons = list(c("control","experience")),
                test= wilcox.test,
                position = "identity",
                tip_length = 0,
                size=0.9,color = "black",
                map_signif_level=c("***"=0.001,"**"=0.01, "*"=0.05, " "=2))+
    scale_color_d3(a=0.5)
  
  
  p4 <- ggarrange(p3, NULL, p, p2, widths = c(4,1), heights = c(1,3), align = "hv")+theme(plot.background=element_rect(fill = "white"))
  return(p4)
  
}

#pcoa= my_pcoa(otu = otu,meta = meta ,title = "BGM")



###############棒棒糖图##################
library(xlsx)
data = read.xlsx("D:\\metagenomics\\upstream\\function\\GBM\\table_result.xlsx",sheetName = "sheet0")



mydotchart <- function(data=data)
{
  library(ggplot2)
  library(ggpubr)
  
  data = data[,c("anno","P-value","effect_size")]#"anno"        "P.value"     "effect_size"
  data = data[which(data$`P-value` < 0.05),]#挑选显著性
  data = data[order(data$`P-value`),]
  if(length(data$`P-value`) > 20)
  {
    data = data[1:20,]
  }
  p = ggdotchart(data, x = "anno", y = "effect_size",
                 add = "segments",
                 color = "P-value",
                 sorting = "descending",
                 add.params = list(color = "lightgray", size = 2),#改变线的粗细和颜色
                 dot.size = 9,
                 label = round(data$effect_size,1), 
                 font.label = list(color = "white", size = 9,
                                   vjust = 0.5),
                 
                 
                 
  ) + geom_hline(yintercept = 0, linetype = 2, color = "lightgray")+
    theme(legend.position = "right",
          axis.text.x = element_text(angle = 40,vjust = 1,hjust = 1,size = 5)
    )+
    labs(x="",y="effect size")+ scale_color_continuous(low="#FF7F0EFF",high="#1F77B4FF")
  
  return(p)
}
#data = read.xlsx("table_result.xlsx",sheetName = "sheet0")
p = mydotchart(data = data)
p

##################差异分析####################

my_diff <- function(otu = otu,meta = meta,anno = F,anno_file=anno_file)
{
  options(scipen = 200)
  otu = t(otu)
  otu = otu[row.names(meta),]
  groups = meta$Group
  df = data.frame(cbind(otu,groups))
  
  name = anno_file
  table_result = data.frame()#创建储存结果的输出数据框
  q.value=list()#储存p-value计算q-value
  #计算实验组和对照组的个数
  control.num = as.numeric(table(df$groups)["control"])
  experience.num = as.numeric(table(df$groups)["experience"])
  #
  for (i in 1:(ncol(df)-1))
  {
    if (anno == F)
    {
      
    }else
    {
      table_result[colnames(df)[i],"anno"] = name[colnames(df)[i],"anno"]
    }
    
    table_result[colnames(df)[i],"P-value"] = wilcox.test(as.numeric(df[which(df$groups == "control"),i]) ,as.numeric(df[which(df$groups== "experience"),i]) )$p.value
    pvalue = table_result[colnames(df)[i],"P-value"]
    q.value = append(q.value, pvalue)
    diffusion = data.frame(
      number = c(as.numeric(df[which(df$groups== "control"),i]),as.numeric(df[which(df$groups == "experience"),i])),
      Group = factor(c(groups[which(groups=="control")],groups[which(groups=="experience")]))
      #Group  =  factor(rep(c()))
    )
    effect.z = coin::wilcox_test(number~Group,diffusion)@statistic@standardizedlinearstatistic
    effect_size = effect.z/sqrt(nrow(df))
    table_result[colnames(df)[i],"effect_size"] = effect_size
    mean.abun = tapply(as.numeric(df[,i]) , df$groups , mean)
    #print(mean.abun[1])
    table_result[colnames(df)[i],"mean_abun_control"] = mean.abun[1]
    table_result[colnames(df)[i],"mean_abun_experience"] = mean.abun[2]
    #####
    rk = rank(as.numeric(df[,i]))
    mean.rank = tapply(rk,df$groups,mean)
    #print(mean.rank[1])
    table_result[colnames(df)[i],"mean_rank_control"] = mean.rank[1]
    table_result[colnames(df)[i],"mean_rank_experience"] = mean.rank[2]
    mean.occ = tapply(df[,i]>0, df$groups, mean)
    table_result[colnames(df)[i],"mean_occ_control"] = mean.occ[1]
    table_result[colnames(df)[i],"mean_occ_experience"] = mean.occ[2]
    if (mean.abun[1] > mean.abun[2])
    {
      table_result[colnames(df)[i],"enrichment"] = "control > experience"
    }
    if(mean.abun[1] < mean.abun[2])
    {
      table_result[colnames(df)[i],"enrichment"] = "control < experience"
    }
    table_result[colnames(df)[i],"mean_abunt"] =  (mean.abun[1]*control.num+mean.abun[2]*experience.num)/(control.num+experience.num)
    table_result[colnames(df)[i],"mean_occ"] = (mean.occ[1]*control.num+mean.occ[2]*experience.num)/(control.num+experience.num)
  }
  table_result$Q_value = p.adjust(table_result$`P-value`,method = "BH")
  
  table_result = table_result[order(table_result$`P-value`),]
  return(table_result)
}

#测试GBM
otu = read.table("./out_GBM.txt",header = T,row.names = 1,sep = "\t")
meta = read.table("./metadata.txt",header = T,row.names = 1,sep = "\t")
name = read.table("./GBM.anno.txt",header = T,row.names = 1,sep = "\t")

result = my_diff(meta = meta,otu = otu,anno = T,anno_file = name)
#测试GMM
otu = read.table("./out_GMM.txt",header = T,row.names = 1,sep = "\t")
meta = read.table("./metadata.txt",header = T,row.names = 1,sep = "\t")
name = read.table("./GMM.anno.txt",header = T,row.names = 1,sep = "\t")
if (!file.exists("./function/GMM"))
{
  dir.create("./function/GMM",recursive = TRUE)
}
result = my_diff(meta = meta,otu = otu,anno = T,anno_file = name)
write.xlsx(result, "./table_result.xlsx",sheetName = "sheet0")
data = read.xlsx("table_result.xlsx",sheetName = "sheet0")
dotchar = mydotchart(data = data)
pca = my_pca(meta = meta,otu = otu ,title = "GMM")
pcoa = my_pcoa(meta = meta,otu = otu ,title = "GMM")



permanova = adonis2(t(otu)~Group,data = meta,permutations = 9999)


###Pathway 富集条形图

otu = read.table("./out_Pathway.txt",header = T,row.names = 1,sep = "\t")
anno = read.table("./79_Pathway_ano.txt",header = T,row.names = 1,sep = "\t",quote = "")
meta = read.table("./metadata.txt",header = T,row.names = 1,sep = "\t")

data = data.frame()


for (i in 1:length(rownames(anno)))
{
  #print(anno[i,"anno"])
  unlist(strsplit(anno[1,"anno"],split = ";"))[1]
  data[i,"1"]=unlist(strsplit(anno[i,"anno"],split = ";"))[1]
  data[i,"2"]=unlist(strsplit(anno[i,"anno"],split = ";"))[2]
  data[i,"3"]=unlist(strsplit(anno[i,"anno"],split = ";"))[3]
  rownames(data)[i] = rownames(anno)[i]
  #print(rownames(anno[i,])) 
}

anno = data
#顺序
otu = t(otu)
otu = otu[rownames(meta),]
otu_meta = cbind(otu,Group = meta$Group)
otu_meta = as.data.frame(otu_meta)
#秩和检验
#wilcox.test(bmi ~ Group, data = otu_meta, var.equal = TRUE)$p.value

#改变value的类型
for (i in 1:length(colnames(otu_meta))-1)
{
  #print(colnames(otu_meta)[i])
  #print(otu_meta[,colnames(otu_meta)[i]])
  otu_meta[,colnames(otu_meta)[i]] = as.numeric(otu_meta[,colnames(otu_meta)[i]])
  #print(wilcox.test(map05223 ~ Group, data = otu_meta, var.equal = TRUE)$p.value)
  # wilcox.test(colnames(otu_meta)[i] ~ Group, data = otu_meta, var.equal = TRUE)$p.value
}
#将有显著差异的通路挑选出来
select_list = data.frame()
count = 1
for (i in 1:(length(colnames(otu_meta))-1))
{
  pv=wilcox.test(otu_meta[which(otu_meta$Group == "control"),colnames(otu_meta)[i]],otu_meta[which(otu_meta$Group == "experience"),colnames(otu_meta)[i]])$p.value
  #print(pv)
  if(!is.nan(pv))
  {
    print(pv)
    if(pv < 0.05)
    {
      select_list[count,"pathway"] = colnames(otu_meta)[i]
      select_list[count,"P.value"] = pv
      count = count +1
      #select_list = append(select_list,colnames(otu_meta)[i])
    }
  }
  
}
#将显著差异的通路的otu表格提取
for (i in 1:length(row.names(select_list)))
{
  #print(select_list[i,"pathway"])
  if (select_list[i,"pathway"] %in% rownames(anno))
  {
    select_list[i,"label1"] = anno[select_list[i,"pathway"],"1"]
    select_list[i,"label2"] = anno[select_list[i,"pathway"],"3"]
  }
}
select_list=select_list[which(select_list$label1 != "Microbial metabolism in diverse environments"),]
#绘制条形图



select_list$P.value = -log10(select_list$P.value)

select_list = select_list[order(select_list$label1,-select_list$P.value,decreasing = T),]
select_list$label2 = factor(select_list$label2,levels = select_list$label2)


p = ggplot(select_list, aes(x=label2, y=P.value,fill =  label1)) + 
  geom_bar(stat = "identity", width=0.7,size=2,position=position_dodge(2))+
  labs(y="-log10(P value)",title = "Pathway",x="")
p = p+theme(axis.text.x = element_text(angle = 90),legend.title = element_blank(),plot.title = element_text(hjust = 0))+coord_flip()+scale_fill_d3(a= 0.5)+theme_bw()

p = p+guides(fill=guide_legend(title=NULL))
p
if (!file.exists("./function/pathway"))
{
  dir.create("./function/pathway",recursive = TRUE)
}
ggsave("./function/pathway/pathway.pdf", p,  width=280, height=700, units="mm")





###########################permanova
otu <- read.table("merged_abundance_table_species.txt",header = T,sep = "\t",row.names = 1)
meta = read.table("metadata.txt",sep = "\t",header = T,row.names = 1)
otu = otu[,rownames(meta)]
otu = t(otu)

otu.adonis = adonis2(otu ~ Group+ages+bmi+education,data = meta,permutations = 9999,by="margin")



#####################boxplot-module
function_boxplot <- function(otu=otu,meta=meta,save_path=save_path)
{
  otu = t(otu)
  otu = otu[rownames(meta),]
  otu_meta = cbind(otu,Group = meta$Group)
  otu_meta = as.data.frame(otu_meta)
  #numeric数据
  for (i in 1:length(colnames(otu_meta))-1)
  {
    
    otu_meta[,colnames(otu_meta)[i]] = as.numeric(otu_meta[,colnames(otu_meta)[i]])
    
  }
  #挑选p-value小于0.05的module
  select_list=data.frame()
  count=1
  for (i in 1:(length(colnames(otu_meta))-1))
  {
    pv = wilcox.test(otu_meta[which(otu_meta$Group == "control"),colnames(otu_meta)[i]],otu_meta[which(otu_meta$Group == "experience"),colnames(otu_meta)[i]])$p.value
    if(!is.nan(pv))
    {
      print(pv)
      if(pv < 0.05)
      {
        select_list[count,"pathway"] = colnames(otu_meta)[i]
        select_list[count,"P.value"] = pv
        count = count +1
        #select_list = append(select_list,colnames(otu_meta)[i])
      }
    }
  }
  for (i in 1:length(select_list$pathway))
  {
    #print(select_list$pathway[i])
    tmp = data.frame()
    tmp =as.data.frame(cbind(pathway = otu_meta[,select_list$pathway[i]],Group = otu_meta[,"Group"]))  
    tmp$pathway =as.numeric(tmp$pathway)
    tmp$Name= select_list$pathway[i]
    p = ggplot(data = tmp,aes(x=Group,y=pathway,color=Group))+
      geom_boxplot(alpha=1,size=1.2,width=0.5)+theme_bw()
    p=p + geom_jitter(width = 0.2)+
      geom_signif(comparisons = list(c("control","experience")),
                  test= wilcox.test,
                  #step_increase = 10,
                  #y_position = 5,
                  position = "identity",
                  tip_length = 0,
                  size=0.9,color = "black",
                  map_signif_level = T)+scale_color_d3(a=0.5)+
      geom_signif(comparisons = list(c("control","experience")),
                  test= wilcox.test,
                  #step_increase = 10,
                  #y_position = 5,
                  position = "identity",
                  tip_length = 0,
                  vjust=2,
                  size=0.9,color = "black",
                  map_signif_level = F)+scale_color_d3(a=0.5)+
      
      facet_grid(. ~Name ) + theme(legend.position = "bo",#设置分类标签位置
                                   #y轴刻度线去掉
                                   axis.ticks.y= element_blank(),
                                   #y轴字去掉
                                   axis.text.y = element_blank(),
                                   #设置图例字体样式和大小
                                   text = element_text(family = "sans",size = 10),
                                   #设置顶部分面背景
                                   strip.background = element_rect(fill="grey"),
                                   strip.switch.pad.grid =unit(0.2, "cm"),
                                   strip.text = element_text(size = 25, colour="black",#face="italic",
                                                             margin = margin(0.5,0,0.5,0, "cm")))+
      labs(x="",y="")
    #pictures = append(pictures,p)
    ####
    if (!file.exists(save_path))
    {
      dir.create(save_path,recursive = TRUE)
    }
    #修改保存的路径部分
    ggsave(paste( save_path,".png",sep = select_list$pathway[i]), p,  width=180, height=150, units="mm")
  }
}

otu = read.table("./out_GBM.txt",header = T,row.names = 1,sep = "\t")
meta = read.table("./metadata.txt",header = T,row.names = 1,sep = "\t")
#name = read.table("./GBM.anno.txt",header = T,row.names = 1,sep = "\t")




#绘制boxplot
#pictures = list()

p
otu_meta2 = otu_meta[,which()]
pictures[1]

############################################



####################



otu = read.table("./out_Pathway.txt",header = T,row.names = 1,sep = "\t")
anno = read.table("./79_Pathway_ano.txt",header = T,row.names = 1,sep = "\t",quote = "")
meta = read.table("./metadata.txt",header = T,row.names = 1,sep = "\t")

data = data.frame()


for (i in 1:length(rownames(anno)))
{
  #print(anno[i,"anno"])
  unlist(strsplit(anno[1,"anno"],split = ";"))[1]
  data[i,"1"]=unlist(strsplit(anno[i,"anno"],split = ";"))[1]
  data[i,"2"]=unlist(strsplit(anno[i,"anno"],split = ";"))[2]
  data[i,"3"]=unlist(strsplit(anno[i,"anno"],split = ";"))[3]
  rownames(data)[i] = rownames(anno)[i]
  #print(rownames(anno[i,])) 
}
#找出属于代谢途径的pathway
select_name =rownames(data)[which(data$`1`=="Metabolism")]   
#找出相应的otu
otu_metabolism = which(rownames(otu) %in% select_name)
otu2= otu[otu_metabolism,]
#转置后拼接分组信息
otu2 = t(otu2)
otu2 = otu2[row.names(meta),]
otu2 =as.data.frame(cbind(otu2 ,Group = meta$Group)) 
otu =otu2


for (i in 1:(length(colnames(otu))-1))
{
  #print(colnames(otu)[i])
  colnames(otu)[i] = data[colnames(otu)[i],3]
}
#


library(simplevis)
library(ggplot2)
library(tidyverse)
library("ggsci")
library(ggsignif)


data.long <- pivot_longer(otu, cols = 1:length(colnames(otu))-1, names_to ="index", values_to = "num")
data.long$Group <- as.factor(data.long$Group)
data.long$index <- factor(data.long$index,levels =rev(colnames(otu)) )
data.long <- data.long[!is.infinite(data.long$num),]
data.long$num = as.numeric(data.long$num)
data.long = as.data.frame(data.long)
data.long[i,"index"]=as.character(data.long[i,"index"])
data.long$num = log10(data.long$num)
##############改index名字



#str(data.long)
p = ggplot(data.long, aes(x=index, y=num,fill=Group))+theme_bw()+
  geom_boxplot(alpha=1,size=0.5,width=0.5,outlier.color = "gray",position=position_dodge(width=0.8),outlier.size = 0.5)+#调整control和experience之间的距离
  theme(axis.text.x = element_text(angle = 90),
        plot.title = element_text(hjust = 0.5),#强行标题居中
        #axis.text.y = element_text(angle = 90),
        axis.title.x=element_text(vjust=2, size=10,face = "bold"),
        axis.title.y =element_text(vjust=2, size=10,face = "bold"),
        legend.title  = element_text(hjust = 0.5, size = 10,face = "bold"))+coord_flip()+
  scale_fill_aaas(a=0.55)+scale_color_aaas(a=0.55)+labs(y="Relative Abundance",x=NULL,title = "Metabolism",hjust=0.5)+
  stat_compare_means(aes(group=Group),
                     method = "wilcox.test",
                     label="p.signif",
                     hide.ns = T,
                     vjust = 0.8,
                     position = "identity",
                     map_signif_level=c("***"=0.001,"**"=0.01, "*"=0.05, " "=2))
# geom_signif(comparisons = list(c("control","experience")),
#             test= wilcox.test,

#      position = "identity",
#     tip_length = 0,
#    size=0.9,color = "black",
#    map_signif_level = T)
#p

ggsave("test.pdf", p,  width=280, height=700, units="mm")




#------------------------------------------
#humann3
library(xlsx)
#1.diff
otu = read.table("out_path.function.txt",sep = "\t",header = T,row.names = 1)
otu = otu[3:length(rownames(otu)),]
meta = read.table("metadata.txt",sep = "\t",header = T,row.names = 1)

table_result=my_diff(otu=otu,meta = meta,anno = F,anno_file = F)
if (!file.exists("./function/humann"))
{
  dir.create("./function/humann",recursive = TRUE)
}
write.xlsx(table_result,sheetName = "sheet0",file = "./function/humann/diff.xlsx")
#func = func[rownames(meta),]
#2.PCA
p = my_pca(otu = otu,meta = meta,title = "path.function")
if (!file.exists("./function/humann"))
{
  dir.create("./function/humann",recursive = TRUE)
}
ggsave("./function/humann/PCA of humann.png",p,  width=180, height=150, units="mm")
#3.PCoa
p = my_pcoa(otu = otu,meta = meta,title = "path.function")
if (!file.exists("./function/humann"))
{
  dir.create("./function/humann",recursive = TRUE)
}
ggsave("./function/humann/PCoA of humann.png",p,  width=180, height=150, units="mm")
#GBM
otu = read.table("out_GBM.txt",sep = "\t",header = T,row.names = 1)
meta = read.table("metadata.txt",sep = "\t",header = T,row.names = 1)
anno = read.table("GBM.anno.txt",sep = "\t",header = T,row.names = 1)
#func = func[rownames(meta),]
table_result=my_diff(otu=otu,meta = meta,anno = T,anno_file = anno)
if (!file.exists("./function/GBM"))
{
  dir.create("./function/GBM",recursive = TRUE)
}
write.xlsx(table_result,sheetName = "sheet0",file = "./function/GBM/diff.xlsx")
#2.PCA
p = my_pca(otu = otu,meta = meta,title = "path.function")
ggsave("./function/GBM/PCA of GBM.png",p,  width=180, height=150, units="mm")
#3.PCoa
p = my_pcoa(otu = otu,meta = meta,title = "path.function")
ggsave("./function/GBM/PCoA of GBM.png",p,  width=180, height=150, units="mm")
#4.bangbangtang
#data = table_result[,2:length(colnames(table_result))]
p = mydotchart(data = table_result)
ggsave("./function/GBM/dotchart of GBM.png",p,  width=180, height=150, units="mm")
#5箱线图
function_boxplot(meta = meta,otu = otu,save_path = "./function/GBM/")


#GMM
otu = read.table("out_GMM.txt",sep = "\t",header = T,row.names = 1)
meta = read.table("metadata.txt",sep = "\t",header = T,row.names = 1)
anno = read.table("GMM.anno.txt",sep = "\t",header = T,row.names = 1)
#diff
table_result=my_diff(otu=otu,meta = meta,anno = T,anno_file = anno)
if (!file.exists("./function/GMM"))
{
  dir.create("./function/GMM",recursive = TRUE)
}
write.xlsx(table_result,sheetName = "sheet0",file = "./function/GMM/diff.xlsx")
#2.PCA
p = my_pca(otu = otu,meta = meta,title = "path.function")
ggsave("./function/GMM/PCA of GMM.png",p,  width=180, height=150, units="mm")
#3.PCoa
p = my_pcoa(otu = otu,meta = meta,title = "path.function")
ggsave("./function/GMM/PCoA of GMM.png",p,  width=180, height=150, units="mm")
#4.bangbangtang
#data = table_result[,2:length(colnames(table_result))]
p = mydotchart(data = table_result)
ggsave("./function/GMM/dotchart of GMM.png",p,  width=180, height=150, units="mm")
#5箱线图
function_boxplot(meta = meta,otu = otu,save_path = "./function/GMM/")

#KO

otu = read.table("out_KO.txt",sep = "\t",header = T,row.names = 1)
meta = read.table("metadata.txt",sep = "\t",header = T,row.names = 1)
anno = read.table("KO.ann.txt",sep = "\t",header = T,row.names = 1)
#diff
table_result=my_diff(otu=otu,meta = meta,anno = T,anno_file = anno)
if (!file.exists("./function/KO"))
{
  dir.create("./function/KO",recursive = TRUE)
}
write.xlsx(table_result,sheetName = "sheet0",file = "./function/KO/diff.xlsx")
#2.PCA
p = my_pca(otu = otu,meta = meta,title = "KO")
ggsave("./function/KO/PCA of KO.png",p,  width=180, height=150, units="mm")
#3.PCoa
p = my_pcoa(otu = otu,meta = meta,title = "KO")
ggsave("./function/KO/PCoA of KO.png",p,  width=180, height=150, units="mm")
#4.bangbangtang
#data = table_result[,2:length(colnames(table_result))]
p = mydotchart(data = table_result)
ggsave("./function/KO/dotchart of KO.png",p,  width=180, height=150, units="mm")
#5箱线图
function_boxplot(meta = meta,otu = otu,save_path = "./function/KO/")

#module

otu = read.table("out_Module.txt",sep = "\t",header = T,row.names = 1)
meta = read.table("metadata.txt",sep = "\t",header = T,row.names = 1)
anno = read.table("79_Module_ano.txt",sep = "\t",header = T,row.names = 1)
#diff
table_result=my_diff(otu=otu,meta = meta,anno = T,anno_file = anno)
if (!file.exists("./function/Module"))
{
  dir.create("./function/Module",recursive = TRUE)
}
write.xlsx(table_result,sheetName = "sheet0",file = "./function/Module/diff.xlsx")
#2.PCA
p = my_pca(otu = otu,meta = meta,title = "Module")
ggsave("./function/Module/PCA of Module.png",p,  width=180, height=150, units="mm")
#3.PCoa
p = my_pcoa(otu = otu,meta = meta,title = "Module")
ggsave("./function/Module/PCoA of Module.png",p,  width=180, height=150, units="mm")
#4.bangbangtang
#data = table_result[,2:length(colnames(table_result))]
p = mydotchart(data = table_result)
ggsave("./function/Module/dotchart of Module.png",p,  width=180, height=150, units="mm")
#5箱线图
function_boxplot(meta = meta,otu = otu,save_path = "./function/Module/")



###########################################6pathwaysPCA
otu = read.table("./out_Pathway.txt",header = T,row.names = 1,sep = "\t")
anno = read.table("./79_Pathway_ano.txt",header = T,row.names = 1,sep = "\t",quote = "")
meta = read.table("./metadata.txt",header = T,row.names = 1,sep = "\t")

data = data.frame()


for (i in 1:length(rownames(anno)))
{
  #print(anno[i,"anno"])
  unlist(strsplit(anno[1,"anno"],split = ";"))[1]
  data[i,"1"]=unlist(strsplit(anno[i,"anno"],split = ";"))[1]
  data[i,"2"]=unlist(strsplit(anno[i,"anno"],split = ";"))[2]
  data[i,"3"]=unlist(strsplit(anno[i,"anno"],split = ";"))[3]
  rownames(data)[i] = rownames(anno)[i]
  #print(rownames(anno[i,])) 
}

data = data[which(data$`1`=="Cellular Processes"),]
path_list = rownames(data)
otu = otu[path_list,]
p= my_pca(otu=otu,meta = meta ,title = "Cellular Processes")
###########################



otu = read.table("./out_ARGcategory.txt",header = T,row.names = 1,sep = "\t")

dominant_species <- function(n=n,otutab = otutab,metadata=metadata,tax= tax)
{
  library(tidyverse)
  library("ggsci")
  library("ggplot2")
  library("RColorBrewer")
  getpalette = brewer.pal(12,"Set3")
  otu = otutab
  map = metadata
  #将实验组和对照组的样本分开
  experience = list()
  control = list()
  for (i in row.names(map))
  {
    # print(i)
    if (map[i,"Group"] == "experience")
    {
      experience = append(experience,i)  
    }
    if (map[i,"Group"] == "control")
    {
      control = append(control,i)  
    }
    
  }
  #将otu列表区分为实验组otu和对照组otu
  exp.otu = otu[which(colnames(otu) %in% experience)]
  ctr.otu = otu[which(colnames(otu) %in% control)]
  #对实验组和对照组中不同物种的数量进行求和
  exp.otu.sum = rowSums(exp.otu)
  ctr.otu.sum = rowSums(ctr.otu)
  #将实验组与对照组求和数据进行合并
  df = as.data.frame(cbind(exp.otu.sum,ctr.otu.sum))
  colnames(df) = c("experience","control")
  #计算不同物种在对照组以及实验组之间的平均值并根据平均值排序
  df$average = (df$experience+df$control)/2
  df = df[order(df$average,decreasing = T),]
  #找出top n 的物种，其余物种并入other并计算other的总和
  df.top10 = df[1:n-1,]
  df.res = df[n:length(row.names(df)),]
  other = colSums(df.res)
  #将other加入top n数据中并更改名字
  a = rbind(df.top10,other)
  rownames(a)[length(rownames(a))]="Other"
  a$Phylum = row.names(a)
  a = a[order(a$average,decreasing = T),]
  #将物种数量转化为相对丰富
  df = select(a,-average)
  df$experience = df$experience/sum(df$experience)
  df$control = df$control/sum(df$control)
  #设置线段内容
  link_dat <- df %>% 
    arrange(by=experience) %>% 
    mutate(experience=cumsum(experience), control=cumsum(control)) 
  #
  df.long <- reshape2::melt(df, value.name='abundance', variable.name='group')
  #绘图
  df.long$Phylum = factor(df.long$Phylum,levels = rev(link_dat$Phylum))
  #a = df.long$Phylum
  p = ggplot(df.long, aes(x=group, y=abundance, fill=Phylum)) + 
    geom_bar(stat = "identity", width=0.5, col='black',size=2)  + 
    geom_segment(data=link_dat, aes(x=1.25, xend=1.75, y=experience, yend=control),size=2)+
    theme_bw()  + 
    #scale_color_aaas(a=0.10)+scale_fill_aaas(a=0.10)+
    ylab("Relative abundance(%)")+
    
    theme(axis.title.x=element_text(vjust=2, size=20,face = "bold"),
          axis.title.y =element_text(vjust=2, size=20,face = "bold"),
          legend.title  = element_text(size = 20,face = "bold")
    )
  name = paste("Top ",n,sep = "")
  name = paste(name,tax,sep = " abundant ")
  p = p+scale_fill_manual(values = getpalette)+labs(fill=name)
  
  return(p)
}
otu =read.table("merged_abundance_table_class.txt", row.names= 1, header=T, sep="\t",  comment.char="", stringsAsFactors = F)
map=read.table("metadata.txt", row.names = 1, header=T, sep="\t",  comment.char="", stringsAsFactors = F)

p= dominant_species(n=10,metadata = map,otutab = otu,tax = "class")

p

#######抗性基因

otu =read.table("out_KO.txt", row.names= 1, header=T, sep="\t",  comment.char="", stringsAsFactors = F)
meta=read.table("metadata.txt", row.names = 1, header=T, sep="\t",  comment.char="", stringsAsFactors = F)

anno = read.table("KO.ann.txt", row.names= 1, header=T, sep="\t",  comment.char="", stringsAsFactors = F,quote = "")
meta[which(meta$Group==1),"Group"] = "control"
meta[which(meta$Group==2),"Group"] = "experience"
p = my_pca(otu = otu ,meta = meta,title = "ARGcategory")
p = my_pcoa(otu = otu ,meta = meta,title = "ARGcategory")


result_table = my_diff(otu = otu,meta = meta,anno = T,anno_file = anno)
WGS全基因组分析 || SNP过滤
当全基因组分析得到成百上千样本的变异位点vcf文件时，即可开始进行下游的一系列分析。而下游分析的首先工作，就是对得...
[13] 9 下游分析 & 9.1 基因组 contex
9 下游分析本章介绍如何注释已鉴定的 Peak 的基因组序列，并注释到基因，然后对其进行功能特征分析。它还提出了...
肠道菌群：宏转录组测序分析流程解读
上回给大家讲述了16S测序分析和宏基因组测序分析，本期的宏转录组来啦~ 你知道吗？通过16S测序分析和宏基...
58.《Bioinformatics Data Skills》之
在分析基因组数据之前，我们首先需要关注测序的质量如何，回答下面两个问题：测序技术错误如何分布？由何引起？对下游...
宏蛋白质组研究概述
前言宏组学（Meta-Omics）是涵盖宏基因组学、宏转录组学和宏蛋白质组学的一门学科。其中宏基因组研究可以获知...
插件 | 蛋白序列集合功能注释快速完成 - Quick Prot
写在前面下游数据分析的时候，大体会遇到以下两个场景：手工查看基因组任意位置的基因，具体的注释信息，以及其前后基...
重测序分析（14）全基因组关联分析GWAS介绍
全基因组关联分析（GWAS）全基因组关联分析（Genome wide association study，GWA...
生信log13|原核基因组分析流程一：常用在线工具使用总结及评价
笔者有话：写这篇日志的目的在于记录原核基因组下游分析比较基础的步骤，使用这些工具的时候很想吐槽的点，以及一些些数据...
组装简介
基因组应该是生信分析的基因组。一个好的基因组会让生信分析事半功倍。基因组组装简单的理解：借助工具进行 read...
24nt siRNA与甲基化关联分析
简介：small RNA测序分析结果与 WGBS测序分析结果联合分析 1.全基因组层面全基因组上固定bin长...