美文网首页
R packages:phyloseq(一) 16s 扩增子α多

R packages:phyloseq(一) 16s 扩增子α多

作者: 佳名 | 来源:发表于2020-08-26 22:37 被阅读0次

    α多样性分析需要没有经过归一化的数据

    1.安装

    BiocManager::install("phyloseq")
    BiocManager::install("MicrobiotaProcess")
    

    2.导入文件

    rm(list = ls())
    library("phyloseq")
    library("MicrobiotaProcess")
    library("ggplot2")
    library("ggpubr")#用于事后检验标记
    otu <- "./table-dada2_pe.qza"
    rep <- "rep-seqs-dada2_pe.qza"
    tree <- "rooted-tree_pe.qza"
    tax <- "taxonomy_pe.qza"
    sample <- "metadata_pe.txt"
    qiimedata <- import_qiime2(otuqza=otu, taxaqza=tax,refseqqza=rep,
                              mapfilename=sample,treeqza=tree)
    qiimedata
    #phyloseq-class experiment-level object
    #otu_table()   OTU Table:         [ 1320 taxa and 19 samples ]
    #sample_data() Sample Data:       [ 19 samples by 2 sample variables ]
    #tax_table()   Taxonomy Table:    [ 1320 taxa by 8 taxonomic ranks ]
    #phy_tree()    Phylogenetic Tree: [ 1320 tips and 1319 internal nodes ]
    #refseq()      DNAStringSet:      [ 1320 reference sequences ]
    

    查看样品个数

    nsamples(qiimedata)
    #[1] 25
    

    查看otu个数

    ntaxa(qiimedata)
    #[1] 1919
    

    3.筛选otu

    方法1:保留在所有样本中总序列数大于100的otu

    提取总count数量大于100的样品

    sub_qiimedata  <-  prune_taxa(taxa_sums(qiimedata) > =100, qiimedata)
    

    taxa_sums函数可以将qiimedata每一条otu在所有样品中序列数求和

    taxa_sums(qiimedata)
    #4d335c926abe085bbfc77449f788a5a7 2ec6e10f3d193804e55967e2d7b3d9b0 
    #                             126                              221 
    #ce92babc3ea4623a1678033cba9b3e84 fd513425dcdc4d8c7e35faf166162e4a 
    #                             775                              172 
    #5ae65fbc4c217f8558906c695bb15d99 5936e83d1a8073359fcb4a660ef63083
    

    查看剩余otu个数

    ntaxa(sub_qiimedata)
    #[1] 399
    

    方法2:去除至少20%样本中未见过3次以上的OTU

    sub_qiimedata = filter_taxa(qiimedata, 
                                function(x) sum(x > 3) > (0.2*length(x)), TRUE)
    
    ntaxa(sub_qiimedata)
    #[1] 213
    

    方法3:去除序列数少于10000的样本

    sub_qiimedata = prune_samples(sample_sums(qiimedata)>=10000, qiimedata)
    nsamples(sub_qiimedata)
    #[1] 20
    

    4. α多样性分析

    4.1 phyloseq包的plot_richness函数

    p <- plot_richness(qiimedata, "group", 
                       measures=NULL,
                       color = "group")+
      geom_boxplot(aes(fill=group))+
      theme_bw()+xlab(NULL)+
      scale_color_aaas()+
      scale_fill_aaas(alpha=0.7)
    library("ggpubr")#用于事后检验标记
    mycompare=list(c("Pre","Post"))
    p<-p+stat_compare_means(comparisons=mycompare,
                            label = "p.signif",
                            method = 'wilcox')
    p
    
    7种α多样性指数.png

    4.2 MicrobiotaProcess包的get_alphaindex函数

    alphaobj <- get_alphaindex(qiimedata)
    
    p_alpha <- ggbox(alphaobj, geom="boxplot",
                     factorNames="group",
                     p_textsize =3,
                     signifmap=FALSE)+ 
      theme_bw()+
      geom_point(aes(color =group))+
      scale_color_aaas()+
      scale_fill_aaas()
    p_alpha
    
    6种α多样性.png

    稀释曲线

    按样品
    p_rare <- ggrarecurve(obj=qiimedata, 
                          indexNames=c("Observe","Chao1","ACE"), 
                          chunks=30) +
      theme(legend.spacing.y=unit(0.02,"cm"),
            legend.text=element_text(size=4))+
      theme_bw()+
    p_rare
    
    alpha metric.png
    分组
    p2 <- ggplot(p_rare$data,aes(readsNums,value,color = group))+
      geom_point(stat = "summary", fun = mean)+
      geom_smooth()+
      facet_wrap(~Alpha,scale = "free")+
      theme_bw()+
      xlab("Number of sequence")+ylab("alpha metric")+
      scale_color_aaas()+
      scale_fill_aaas()
    
    alpha metric.png

    相关文章

      网友评论

          本文标题:R packages:phyloseq(一) 16s 扩增子α多

          本文链接:https://www.haomeiwen.com/subject/sthqsktx.html