美文网首页注释和富集无聊RNA-seq分析
非模式物种的一种简单版的go,kegg富集

非模式物种的一种简单版的go,kegg富集

作者: 夹竹桃的下午 | 来源:发表于2020-05-30 16:52 被阅读0次

    首先推荐Kobashttp://kobas.cbi.pku.edu.cn/kobas3/genelist/
    上传你的蛋白序列 genelist,背景基因选相似物种就可以做kegg,kegg的结果画图参考
    https://mp.weixin.qq.com/s/fwZ4rZOg8PdVb2ZE9wskoQ

    data01<-read.table("你物种的ko结果.txt",header = T,sep="\t")
    pathway=data.frame(data01[,c(1,4,6,7)])
    pathway=head(pathway,n=20)
    names(pathway)<-c("PATHWAY","Gene","Pvalue","Qvalue")
    library(ggplot2)
    ggplot(pathway,aes(Pvalue,PATHWAY))+
      geom_point(aes(size=Gene,color=-1*log10(Qvalue)))+
      scale_color_gradient(low="green",high = "red")+ 
      labs(color=expression(-log[10](Qvalue)),size="Gene",  
           x="Pvalue",     
           y="Pathway name",
           title="Top20 of Pathway enrichment")+ 
      theme_bw() 
    write.table(pathway,"simpathway.txt",row.names=F,col.names=T,sep="\t")
    
    
    image.png image.png

    我同时也去尝试kobas的go富集 但一直显示这个画面 不过kegg倒是挺快的


    image.png

    接下来可以用eggnog-mapper先注释然后构建自己的org.db 再用clusterProfiler包画,可以参考别人的做法 我做完感觉图还是不好看 ,构建库也是很慢
    接下来做Go我使用eggnog-mapper先对我要的物种注释获得Go号
    sim.emapper.annotations这是我的注释结果命名为sim

    
    vi chang_go.py
    #!/usr/bin/python
    # -*- coding: utf-8 -*-
    import sys, os
    x = sys.argv[1]
    file = open(x, "r")
    lines = file.readlines()
    for line in lines:
        line=line.strip()  
        if line.startswith("#"):
            continue
        else:
            tmp=line.split("\t")
            if tmp[6] == "":
                continue
            else :
                 mystr=tmp[0]+"\t"+tmp[6]
                 print (mystr)
    
    
    python chang_go.py sim >simout
    sed 's/,/\t/g' simout >simout.txt
    

    然后借用下某平台简单做一下
    https://www.omicshare.com/tools/home/report/goenrich.html
    把你的genelist和注释的结果一放就好了 结果会有类似这种图 和用R语言clusterProfiler包画的差不多吧 感觉应该可以这样做吧 请大家试试

    image.png

    6.7号更新
    用eggnog-mapper画go富集分析代码 不用构建物种库 快速运行 节约时间

    rm(list=ls())
    setwd("D:")
    BiocManager::install("stringr")
    BiocManager::install("dplyr")
    BiocManager::install("clusterProfiler")
    
    library(stringr)
    library(dplyr)
    egg<-read.csv(".emapper.annotations",sep="\t",header=T)
    egg[egg==""]<-NA  
    gene_ids <- egg$query_name
    eggnog_lines_with_go <- egg$GOs!= ""
    eggnog_lines_with_go
    eggnog_annoations_go <- str_split(egg[eggnog_lines_with_go,]$GOs, ",")
    gene_to_go <- data.frame(gene = rep(gene_ids[eggnog_lines_with_go],
                                        times = sapply(eggnog_annoations_go, length)),
                             term = unlist(eggnog_annoations_go))
    head(gene_to_go)
    library(clusterProfiler)
    gene_list <- read.table("ID")
    gene_list <-as.vector(gene_list$V1)
    term2gene<-gene_to_go[,c(2,1)]
    term2gene=na.omit(term2gene)
    df<-enricher(gene=gene_list,
                 pvalueCutoff = 0.05,
                 pAdjustMethod = "BH",
                 TERM2GENE = term2gene)
    head(df)
    barplot(df)
    dotplot(df)
    df<-as.data.frame(df)
    dim(df)
    df1<-go2term(df$ID)
    dim(df1)
    head(df1)
    df$term<-df1$Term
    df2<-go2ont(df$ID)
    dim(df2)
    head(df2)
    df$Ont<-df2$Ontology
    head(df)
    df3<-df%>%
      select(c("term","Ont","pvalue"))
    #df3=subset(df,select=c("term","Ont","pvalue"))
    head(df3)
    library(ggplot2)
    ggplot(df3,aes(x=term,y=-log10(pvalue)))+
      geom_col(aes(fill=Ont))+
      coord_flip()+labs(x="")+
      theme_bw()
    write.table(df3,"go_out",row.names=T,col.names=T,sep="\t")
    
    

    Go号由一对多变为多对多 然后换agriGo做

    library(tidyverse)
    setwd("D:/kobas注释/go")
    data01<-read.table("simout",header = F,sep="\t")
    names(data01)<-c("gene","GO_ID")
    data02<-data01 %>%
      tidyr::separate_rows(GO_ID, sep = ",")
    write.table(data02,"simgo.out",quote = FALSE,row.names=F,col.names=F,sep="\t")
    
    
    ###go富集 注释的分类图
    rm(list=ls())
    setwd("D:/")
    library(openxlsx)
    data<- read.xlsx("Functions enriched by Blast2GO.xlsx", sheet = 1)
    ###处理为ID   Description   GeneNumber     type
    data1=subset(data,select=c("GO.ID","GO.Name","Nr.test","GO.Category"))
    library(ggplot2)
    data1=data1[,c(2,3,4)]
    colnames(data1)=c("name","number","type")
    a=data1[data1$number>=10,]
    a=a[order(a$type),]
    ggplot(a,aes(x=factor(name,levels=unique(name)),y=number,fill=type))+geom_bar(stat="identity")+coord_flip()+
      theme_bw() + ylab("Number") + xlab("Name") 
    

    相关文章

      网友评论

        本文标题:非模式物种的一种简单版的go,kegg富集

        本文链接:https://www.haomeiwen.com/subject/jqkhzhtx.html