更新go和kegg注释

作者: 多啦A梦的时光机_648d | 来源:发表于2022-09-14 19:16 被阅读0次

    非模式物种

    step1:注释
    把本物种的蛋白序列用diamond到eggnog数据库比对

    ##/home/lx_sky6/software/eggnog-mapper-2.1.8/emapper.py -i GCF_905147795.1_ilPieRapa1.1_protein.faa -m diamond -o eggnog --cpu 60
    

    拿到eggnog的注释结果记得吧以#开头的行全部删掉
    step2:构建自己的物种数据库

    eggnogmapper<- read_delim(
      file = 'eggnog.emapper.annotations2',
      "\t",
      escape_double =FALSE,
      col_names=FALSE,
      comment = "#",trim_ws = TRUE)%>%
      dplyr::select(GID=X1,
                    Gene_Symbol=X9,
                    GO=X10,
                    KO=X12,
                    Pathway=X13,
                    OG=X7,
                    Gene_Name=X21)
    gene_info <- dplyr::select(eggnogmapper,  GID, Gene_Name) %>%
      dplyr::filter(!is.na(Gene_Name))
    # 提取GO信息
    gene2go <- dplyr::select(eggnogmapper, GID, GO) %>%
      separate_rows(GO, sep = ',', convert = F) %>%
      filter(!is.na(GO)) %>%
      mutate(EVIDENCE = 'IEA')
    gene2go <- subset(gene2go,grepl("^.*GO.*$",GO))
    
    
    # 构建 OrgDB,参数十分玄学,请按照以下格式
    library(AnnotationForge)
    AnnotationForge::makeOrgPackage(gene_info=gene_info,
                                                   go=gene2go,
                                                   maintainer='yuantao <909474045@qq.com>',
                                                   author='yuantao',
                                                   outputDir="/home/lx_sky6/yt/rworkplace",
                                                   tax_id=64459,
                                                   genus='Pieris',                     #只改动这里
                                                   species='rapae',               #和这里,进行命名
                                                   goTable="go",
                                                   version="1.0")
    pkgbuild::build('/home/lx_sky6/yt/rworkplace/org.Prap.eg.db', dest_path = "/home/lx_sky6/yt/rworkplace/")       #这两个路径自己创建
    

    step3:安装注释包进行注释

    install.packages("./org.Prapae.eg.db", repos = NULL,type = "source")
    
    library(org.Prapae.eg.db)
    library(readr)
    fuji1_gene <- read_table2("../anita/fuji1.txt")
    ##fuji1:254个基因
    de_gene <- pull(fuji1_gene, GID)
    de_ego <- enrichGO(gene = de_gene,
                       OrgDb = org.Prapae.eg.db,
                       keyType = 'GID',
                       ont = 'ALL',
                       qvalueCutoff = 0.05,
                       pvalueCutoff = 0.05)
    de_ego_df <- as.data.frame(de_ego)
    enrichplot::dotplot(de_ego,showCategory = 20)
    
    
    ##run__KEGG富集 
    pathway2gene <- dplyr::select(eggnogmapper, Pathway, GID) %>%
      separate_rows(Pathway, sep = ',', convert = F) %>%
      filter(str_detect(Pathway, 'ko')) %>%
      mutate(Pathway = str_remove(Pathway, 'ko'))
    library(magrittr)
    get_path2name <- function(){
      keggpathid2name.df <- clusterProfiler:::kegg_list("pathway")
      keggpathid2name.df[,1] %<>% gsub("path:map", "", .)
      colnames(keggpathid2name.df) <- c("path_id","path_name")
      return(keggpathid2name.df)
    }
    pathway2name <- get_path2name()
    library(clusterProfiler)
    de_ekp <- enricher(de_gene,
                       TERM2GENE = pathway2gene,
                       TERM2NAME = pathway2name,
                       pvalueCutoff = 0.05,
                       qvalueCutoff = 0.05)
    de_ekp_df <- as.data.frame(de_ekp)
    head(de_ekp_df)
    enrichplot::dotplot(de_ekp, showCategory = 20)
    ##导出数据
    write.table(de_ego_df,file = '/home/lx_sky6/yt/rworkplace/914_GO.csv',sep = ',',quote = FALSE)
    
    write.table(de_ekp_df,file = '/home/lx_sky6/yt/rworkplace/914_Kegg.csv',sep = ',',quote = FALSE)
    

    相关文章

      网友评论

        本文标题:更新go和kegg注释

        本文链接:https://www.haomeiwen.com/subject/gxdkortx.html