美文网首页GEO数据挖掘富集分析
第二部:简单的GEO数据的GO分析

第二部:简单的GEO数据的GO分析

作者: 碌碌无为的杰少 | 来源:发表于2020-05-24 10:52 被阅读0次

    GO分析

    GO分析有三个过程,GO_CC细胞组分,GO_BP生物过程, GO_MP分析功能,首先转换成ENTREZID,然后利用clusterProfiler函数。

    rm(list = ls())  
    load(file = 'step4output.Rdata')
    library(ggplot2)
    library(clusterProfiler)
    library(org.Hs.eg.db)
    library(dplyr)
    s2e <- bitr(deg$symbol, 
                fromType = "SYMBOL",
                toType = "ENTREZID",
                OrgDb = org.Hs.eg.db)#人类
    #其他物种http://bioconductor.org/packages/release/BiocViews.html#___OrgDb
    deg <- inner_join(deg,s2e,by=c("symbol"="SYMBOL"))
    #富集分析考验网速,因此给大家保存了Rdata
    #上课运行示例数据无需修改,在做自己的数据时请注意把本行之后的load()去掉
    library(clusterProfiler)
    library(dplyr)
    library(ggplot2)
    source("kegg_plot_function.R")
    #source表示运行整个kegg_plot_function.R脚本,里面是一个function
    #以up_kegg和down_kegg为输入数据做图
    
    #1.GO database analysis ----
    
    #(1)输入数据
    gene_up = deg[deg$change == 'up','ENTREZID'] 
    gene_down = deg[deg$change == 'down','ENTREZID'] 
    gene_diff = c(gene_up,gene_down)
    gene_all = deg[,'ENTREZID']
    #(2)GO分析,分三部分
    #以下步骤耗时很长,实际运行时注意把if后面的括号里F改成T
    if(T){
      #细胞组分
      ego_CC <- enrichGO(gene = gene_up,
                         OrgDb= org.Hs.eg.db,
                         ont = "CC",
                         pAdjustMethod = "BH",
                         minGSSize = 1,
                         pvalueCutoff = 0.01,
                         qvalueCutoff = 0.01,
                         readable = TRUE)
      #生物过程
      ego_BP <- enrichGO(gene = gene_up,
                         OrgDb= org.Hs.eg.db,
                         ont = "BP",
                         pAdjustMethod = "BH",
                         minGSSize = 1,
                         pvalueCutoff = 0.01,
                         qvalueCutoff = 0.01,
                         readable = TRUE)
      #分子功能:
      ego_MF <- enrichGO(gene = gene_up,
                         OrgDb= org.Hs.eg.db,
                         ont = "MF",
                         pAdjustMethod = "BH",
                         minGSSize = 1,
                         pvalueCutoff = 0.01,
                         qvalueCutoff = 0.01,
                         readable = TRUE)
      save(ego_CC,ego_BP,ego_MF,file = "ego_GSE4107.Rdata")
    }
    ggg <- ego_BP@result
    load(file = "ego_GSE4107.Rdata")
    
    #(3)可视化
    #条带图
    dotplot(ego_BP,showCategory=20)
    barplot(ego_BP)
    
    image.png

    三条图联合

    三条图联合
    if(F){
      go <- enrichGO(gene = gene_up, OrgDb = "org.Hs.eg.db", ont="all")
    }
    ##save(go,file="go.Rdata")
    ### 直接加载我保存的数据
    
    library(ggplot2)
    p <- dotplot(go, split="ONTOLOGY") +facet_grid(ONTOLOGY~., scale="free")
    p
    
    image.png

    前五条通路的共同基因的circle图

    geneList = deg$logFC
    names(geneList)=deg$ENTREZID
    geneList = sort(geneList,decreasing = T)
    #(3)展示top5通路的共同基因,要放大看。
    #Gene-Concept Network
    cnetplot(ego_CC, categorySize="pvalue", foldChange=geneList,colorEdge = TRUE)
    cnetplot(ego_CC, foldChange=geneList, circular = TRUE, colorEdge = TRUE)
    
    image.png

    基因富集结果可视化

    emapplot(ego_CC)
    
    image.png

    通路之间的关系

    goplot(ego_CC)
    
    image.png

    基因展示的heatmap

    heatplot(ego_CC,foldChange = geneList)
    #太多基因就会糊。可通过调整比例或者减少基因来控制。
    pdf("heatplot.pdf",width = 14,height = 5)
    heatplot(ego_CC,foldChange = geneList)
    dev.off()
    
    image.png

    相关文章

      网友评论

        本文标题:第二部:简单的GEO数据的GO分析

        本文链接:https://www.haomeiwen.com/subject/rkynohtx.html