美文网首页
R packages:phyloseq(一) 16s 扩增子α多

R packages:phyloseq(一) 16s 扩增子α多

作者: 佳名 | 来源:发表于2020-08-26 22:37 被阅读0次

α多样性分析需要没有经过归一化的数据

1.安装

BiocManager::install("phyloseq")
BiocManager::install("MicrobiotaProcess")

2.导入文件

rm(list = ls())
library("phyloseq")
library("MicrobiotaProcess")
library("ggplot2")
library("ggpubr")#用于事后检验标记
otu <- "./table-dada2_pe.qza"
rep <- "rep-seqs-dada2_pe.qza"
tree <- "rooted-tree_pe.qza"
tax <- "taxonomy_pe.qza"
sample <- "metadata_pe.txt"
qiimedata <- import_qiime2(otuqza=otu, taxaqza=tax,refseqqza=rep,
                          mapfilename=sample,treeqza=tree)
qiimedata
#phyloseq-class experiment-level object
#otu_table()   OTU Table:         [ 1320 taxa and 19 samples ]
#sample_data() Sample Data:       [ 19 samples by 2 sample variables ]
#tax_table()   Taxonomy Table:    [ 1320 taxa by 8 taxonomic ranks ]
#phy_tree()    Phylogenetic Tree: [ 1320 tips and 1319 internal nodes ]
#refseq()      DNAStringSet:      [ 1320 reference sequences ]

查看样品个数

nsamples(qiimedata)
#[1] 25

查看otu个数

ntaxa(qiimedata)
#[1] 1919

3.筛选otu

方法1:保留在所有样本中总序列数大于100的otu

提取总count数量大于100的样品

sub_qiimedata  <-  prune_taxa(taxa_sums(qiimedata) > =100, qiimedata)

taxa_sums函数可以将qiimedata每一条otu在所有样品中序列数求和

taxa_sums(qiimedata)
#4d335c926abe085bbfc77449f788a5a7 2ec6e10f3d193804e55967e2d7b3d9b0 
#                             126                              221 
#ce92babc3ea4623a1678033cba9b3e84 fd513425dcdc4d8c7e35faf166162e4a 
#                             775                              172 
#5ae65fbc4c217f8558906c695bb15d99 5936e83d1a8073359fcb4a660ef63083

查看剩余otu个数

ntaxa(sub_qiimedata)
#[1] 399

方法2:去除至少20%样本中未见过3次以上的OTU

sub_qiimedata = filter_taxa(qiimedata, 
                            function(x) sum(x > 3) > (0.2*length(x)), TRUE)
ntaxa(sub_qiimedata)
#[1] 213

方法3:去除序列数少于10000的样本

sub_qiimedata = prune_samples(sample_sums(qiimedata)>=10000, qiimedata)
nsamples(sub_qiimedata)
#[1] 20

4. α多样性分析

4.1 phyloseq包的plot_richness函数

p <- plot_richness(qiimedata, "group", 
                   measures=NULL,
                   color = "group")+
  geom_boxplot(aes(fill=group))+
  theme_bw()+xlab(NULL)+
  scale_color_aaas()+
  scale_fill_aaas(alpha=0.7)
library("ggpubr")#用于事后检验标记
mycompare=list(c("Pre","Post"))
p<-p+stat_compare_means(comparisons=mycompare,
                        label = "p.signif",
                        method = 'wilcox')
p
7种α多样性指数.png

4.2 MicrobiotaProcess包的get_alphaindex函数

alphaobj <- get_alphaindex(qiimedata)
p_alpha <- ggbox(alphaobj, geom="boxplot",
                 factorNames="group",
                 p_textsize =3,
                 signifmap=FALSE)+ 
  theme_bw()+
  geom_point(aes(color =group))+
  scale_color_aaas()+
  scale_fill_aaas()
p_alpha
6种α多样性.png

稀释曲线

按样品
p_rare <- ggrarecurve(obj=qiimedata, 
                      indexNames=c("Observe","Chao1","ACE"), 
                      chunks=30) +
  theme(legend.spacing.y=unit(0.02,"cm"),
        legend.text=element_text(size=4))+
  theme_bw()+
p_rare
alpha metric.png
分组
p2 <- ggplot(p_rare$data,aes(readsNums,value,color = group))+
  geom_point(stat = "summary", fun = mean)+
  geom_smooth()+
  facet_wrap(~Alpha,scale = "free")+
  theme_bw()+
  xlab("Number of sequence")+ylab("alpha metric")+
  scale_color_aaas()+
  scale_fill_aaas()
alpha metric.png

相关文章

网友评论

      本文标题:R packages:phyloseq(一) 16s 扩增子α多

      本文链接:https://www.haomeiwen.com/subject/sthqsktx.html