美文网首页
R 针对KEGG ORTHOLOGY的Lefse分析

R 针对KEGG ORTHOLOGY的Lefse分析

作者: leoxiaobei | 来源:发表于2021-12-22 00:14 被阅读0次

    在拿到PICRUST2预测的KO丰度表格后,我们其实接下来也可以像完成优势物种进化分支图一样来针对KO的Lefse分析
    首先接上篇,获取KEGG ORTHOLOGY分层注释文件。

    Part 1 数据整理
    data(KO)
    head(KO)
    # A tibble: 6 x 9
    # KO     Description                                                L1_ID L1         L2_ID L2                      L3_ID L3                     PathwayID
    # <chr>  <chr>                                                      <chr> <chr>      <chr> <chr>                   <chr> <chr>                  <chr>    
    # 1 K00844 HK; hexokinase [EC:2.7.1.1]                                09100 Metabolism 09101 Carbohydrate metabolism 00010 Glycolysis / Gluconeo~ ko00010  
    # 2 K12407 GCK; glucokinase [EC:2.7.1.2]                              09100 Metabolism 09101 Carbohydrate metabolism 00010 Glycolysis / Gluconeo~ ko00010  
    # 3 K00845 glk; glucokinase [EC:2.7.1.2]                              09100 Metabolism 09101 Carbohydrate metabolism 00010 Glycolysis / Gluconeo~ ko00010  
    # 4 K25026 glk; glucokinase [EC:2.7.1.2]                              09100 Metabolism 09101 Carbohydrate metabolism 00010 Glycolysis / Gluconeo~ ko00010  
    # 5 K01810 GPI, pgi; glucose-6-phosphate isomerase [EC:5.3.1.9]       09100 Metabolism 09101 Carbohydrate metabolism 00010 Glycolysis / Gluconeo~ ko00010  
    # 6 K06859 pgi1; glucose-6-phosphate isomerase, archaeal [EC:5.3.1.9] 09100 Metabolism 09101 Carbohydrate metabolism 00010 Glycolysis / Gluconeo~ ko00010  
    
    #加载预测的KO丰度表格
    raw <- read_tsv("./picrust2/pred_metagenome_unstrat.tsv")
    processed <- inner_join(raw,KO,by = c("function" = "KO"))
    x <- processed %>% #总结KO L3数据
      group_by(PathwayID) %>%
      summarise(M1=sum(M1),
                M2=sum(M2),
                M3=sum(M3),
                M4=sum(M4),
                M5=sum(M5),
                M6=sum(M6),
                M7=sum(M7),
                M8=sum(M8),
                N1=sum(N1),
                N2=sum(N2),
                N3=sum(N3),
                N4=sum(N4),
                N5=sum(N5),
                N6=sum(N6),
                L1=unique(L1),
                L2=unique(L2),
                L3=unique(L3))
    head(x)
    # A tibble: 6 x 18
    # PathwayID      M1       M2      M3      M4      M5      M6      M7      M8       N1      N2       N3      N4      N5      N6 L1         L2      L3     
    # <chr>       <dbl>    <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>    <dbl>   <dbl>    <dbl>   <dbl>   <dbl>   <dbl> <chr>      <chr>   <chr>  
    #   1 ko00010   979437. 1157257. 966058. 799234. 977727. 790647. 923608. 905087. 1087292. 886595. 1027478. 980108. 936901. 941357. Metabolism Carboh~ Glycol~
    #   2 ko00020   555098.  662764. 574935. 456071  556023. 430408. 498903. 464911.  526917. 451617.  556598. 550912. 500860. 480391. Metabolism Carboh~ Citrat~
    #   3 ko00030   593586.  675527. 574631. 471552. 552199. 473592. 575287. 571875.  767123  590887.  696223. 639922. 596782. 630457. Metabolism Carboh~ Pentos~
    #   4 ko00040   270708.  312007. 241999. 220993. 236960. 218570. 265125. 260460.  320277. 263980.  297270. 294852. 285620. 264528. Metabolism Carboh~ Pentos~
    #   5 ko00051   581103.  685743. 538040. 440673. 548036. 449644. 520336. 522393.  644275. 523412.  606600. 545518. 543689. 557024. Metabolism Carboh~ Fructo~
    #   6 ko00052   599459.  686096. 512145. 461007. 560880. 484895. 530024. 588223.  663407. 488498.  568301. 539427. 576209. 545741. Metabolism Carboh~ Galact~
    
    Part 2 Lefse分析
    #整理成phyloseq对象
    sampleda <- readxl::read_excel("group.xlsx") %>% tibble::column_to_rownames("#SampleID")
    featuretab <- x[,c(2:15)] %>% apply(2,as.integer) %>% as.matrix()
    rownames(featuretab) <- x$PathwayID
    tax <- as.matrix(x[,c(1,16:18)] %>% tibble::column_to_rownames("PathwayID"))
    library(phyloseq)
    ps <- phyloseq(otu_table(featuretab,taxa_are_rows = T),tax_table(tax),sample_data(sampleda))
    # ps1 <- MicrobiotaProcess::as.mpse(ps) %>%
    #   mp_rrarefy() %>%
    #   mp_cal_abundance(.abundance = RareAbundance,force=T)
    # temp <- ps1 %>% mp_extract_assays(.abundance = RelRareAbundanceBySample) %>% as.matrix()
    # ps1 <- phyloseq(otu_table(temp,taxa_are_rows = T),tax_table(tax),sample_data(sampleda))
    #ps和ps1的分析结果相同
    #lefse分析
    library(MicrobiotaProcess)
    set.seed(1024)
    deres <- diff_analysis(obj = ps, #实际利用的是抽平后的相对丰度表
                           classgroup = "Category",
                           firstcomfun = "kruskal.test",
                           # standard_method = "NULL",
                           padjust = "fdr",#p值校正方法
                           filtermod = "pvalue",#以pvalue列过滤
                           firstalpha = 0.05,
                           strictmod = TRUE,#当subgroup被提供时,strictmod决定是否执行all-against-all。
                           secondcomfun = "wilcox.test",
                           secondalpha = 0.01,
                           mlfun = "lda",#线性判别分析,可选随机森林
                           ldascore=3,#线性判别分数
                           type="others" #非物种
    )
    deres
    # The original data: 384 features and 14 samples
    # The sample data: 1 variables and 14 samples
    # The taxda contained 334 by 3 rank
    # after first test (kruskal.test) number of feature (pvalue<=0.05):162
    # after second test (wilcox.test and generalizedFC) number of significantly discriminative feature:110
    # after lda, Number of discriminative features: 17 (certain taxonomy classification:17; uncertain taxonomy classication: 0)
    
    Part 3 画图
    ggdiffclade(obj=deres,
                layout="radial",#布局类型
                alpha=0.3, #树分支的背景透明度
                linewd=0.2, #树中连线粗细
                skpointsize=0.8, #树骨架中国点的大小
                taxlevel=2, #要展示的树分支水平1(L1)及1以下(L2和L3)
                cladetext=3,#文本大小
                setColors=F,#自定义颜色
                removeUnknown=T,#不删分支,但移除分类中有un_的物种注释
                reduce=T)+ # 移除分类不明的物种分支,分支和注释一同删去,为T时removeUnknown参数无效
      scale_fill_manual(values=c("#00AED7", "#FD9347"),guide=guide_legend(order=1)) +
      scale_size_continuous(range = c(0.5, 2), guide = guide_legend(keywidth = 0.2, keyheight = 1, order = 2))+
      guides(color = guide_legend(keywidth = 0.2, keyheight = 1,order = 2,ncol=1)) +
      theme(panel.background=element_rect(fill=NA),
            legend.position="right",
            plot.margin=margin(0,0,0,0),
            legend.spacing.y=unit(0.02, "cm"),
            legend.title=element_text(size=12),
            legend.text=element_text(size=10),
            legend.box.spacing=unit(0.02,"cm"))
    
    ggdiffbox(
      deres, 
      l_xlabtext="relative abundance (%)",
      box_notch=FALSE, 
      colorlist=c("#00AED7", "#FD9347")
    )
    

    相关文章

      网友评论

          本文标题:R 针对KEGG ORTHOLOGY的Lefse分析

          本文链接:https://www.haomeiwen.com/subject/efdtqrtx.html