美文网首页
三大富集

三大富集

作者: 郭师傅 | 来源:发表于2022-04-19 09:59 被阅读0次

    三大富集分析,画图

    1.调包,有没有用都调

    rm(list=ls())
    path = "your path"
    setwd(path)
    library(GEOquery)
    library(stringr)
    library(dplyr)
    library(limma)
    library(reshape2)
    library(ggplot2)
    library(ggrepel)
    library(WGCNA)
    library(ggpubr)
    library(patchwork)
    library(clusterProfiler)
    library(org.Hs.eg.db)
    select <- dplyr::select
    gse_id <- "your gse"
    Sys.setenv("VROOM_CONNECTION_SIZE"=131072*6)
    

    2.id转换

    x <- module_brown      
    gene_id <-  bitr(x,                                     #数据集,genesymbol 构成的向量
                             fromType="SYMBOL",    #输入为SYMBOL格式
                            toType="ENTREZID",      # 转为ENTERZID格式
                            OrgDb="org.Hs.eg.db")   #人类 数据库
    

    3.分析,需要网络

    # gene就是差异基因对应的向量,keyType指定基因ID的类型,默认为ENTREZID,  该参数的取值可以参 keytypes(org.Hs.eg.db)的结果, 
    # 建议采用ENTREZID, OrgDb指定该物种对应的org包的名字,ont代表GO的3大类别,BP, CC, MF,也可以选择ALL; 
    # pAdjustMethod指定多重假设检验矫正的方法,这里默认pAdjustMethod="BH",所以这里没有写出来,
    # cutoff指定对应的阈值,
    # readable=TRUE代表将基因ID转换为gene  symbol。
    GO<-enrichGO(gene=gene_id$ENTREZID,OrgDb = "org.Hs.eg.db",
                 keyType = "ENTREZID",
                 ont="ALL",
                 qvalueCutoff = 0.05,
                 readable = T) 
    

    4.画图前整理数据

    go<-as.data.frame(GO)
    #View(go)
    table(go[,1]) #查看BP,CC,MF的统计数目
    
    go_MF<-go[go$ONTOLOGY=="MF",][1:10,]
    go_CC<-go[go$ONTOLOGY=="CC",][1:10,]
    go_BP<-go[go$ONTOLOGY=="BP",][1:10,]
    go_enrich_df<-data.frame(ID=c(go_BP$ID, go_CC$ID, go_MF$ID),
                             Description=c(go_BP$Description, go_CC$Description, go_MF$Description),
                             GeneNumber=c(go_BP$Count, go_CC$Count, go_MF$Count),
                             type=factor(c(rep("biological process", 10), 
                                               rep("cellular component", 10),
                                               rep("molecular function",10)),
                                               levels=c("molecular function", "cellular component", "biological process")))
    
    ## numbers as data on x axis
    go_enrich_df$number <- factor(rev(1:nrow(go_enrich_df)))
    

    5.定义一个函数,对过长的术语名称缩短显示

    ## shorten the names of GO terms
    shorten_names <- function(x, n_word=4, n_char=40){
      if (length(strsplit(x, " ")[[1]]) > n_word || (nchar(x) > 40))
      {
        if (nchar(x) > 40) x <- substr(x, 1, 40)
        x <- paste(paste(strsplit(x, " ")[[1]][1:min(length(strsplit(x," ")[[1]]), n_word)],
                         collapse=" "), "...", sep="")
        return(x)
      } 
      else
      {
        return(x)
      }
    }
    
    labels = sapply(go_enrich_df$Description,shorten_names)
    names(labels) = rev(1:nrow(go_enrich_df))
    

    6.绘制富集分析图

    ## colors for bar // green, blue, orange
    CPCOLS <- c("#8DA1CB", "#FD8D62", "#66C3A5")
    
    p_go_greenyellow <- ggplot(data=go_enrich_df, aes(x=number, y=GeneNumber, fill=type)) +
      geom_bar(stat="identity", width=0.8) + coord_flip() + 
      scale_fill_manual(values = CPCOLS) + theme_test() + 
      scale_x_discrete(labels=labels) +
      xlab("GO term") +
      labs(title = "The Most Enriched GO Terms in Greenyellow Module")+
      theme(text=element_text(size = 8)) +
      theme(legend.box = "",
            legend.position = "top",legend.key.size = unit(0.3,"cm"),
            legend.text = element_text(size = 8),
            legend.box.spacing = unit(2,"mm")) 
    #coord_flip(...)横向转换坐标:把x轴和y轴互换,没有特殊参数
    p_go_greenyellow
    ggsave(p_go_greenyellow,filename = ".\\plots\\p_go_brown.pdf",width = 9,height = 10,units = c("cm"))
    

    7.kegg分析,需要网络

    KEGG<-enrichKEGG(gene_id$ENTREZID,#KEGG富集分析
                     organism = 'hsa',
                     pvalueCutoff = 0.05,
                     qvalueCutoff = 0.05)
    

    8.绘制kegg气泡图

    labels = sapply(df_kegg$Description,shorten_names)
    names(labels) = rev(1:nrow(df_kegg))
    
    kegg_greenyellow <- ggplot(df_kegg[1:15,],aes(x = pvalue,y = Description))+
      geom_point(aes(size = Count,color = -1*log10(qvalue)))+
      scale_color_gradient(low = "green",high = "red")+
      labs(color = expression(-log[10](qvalue)),size = "gene",x = "pvalue",y = "pathway name",title = "pathway enrichment")+
      theme_bw()+
      scale_y_discrete(labels=function(x)str_wrap(x,width = 40))+
      theme(text=element_text(size = 8),
            legend.box.spacing = unit(1,"mm"),
            legend.position = "right",legend.key.width = unit(0.2,"cm"),legend.key.height = unit(0.3,"cm"),
            legend.text = element_text(size = 8))
    kegg_greenyellow
    ggsave(kegg_greenyellow,filename = ".\\plots\\kegg_greenyellow.pdf",width = 10,height = 10,units = c("cm"))
    

    9.其他画法,不用ggplot2,直接画也行

    barplot(GO, split="ONTOLOGY")+facet_grid(ONTOLOGY~., scale="free")#柱状图
    #点状图
    dotplot(GO, split="ONTOLOGY")+
      facet_grid(ONTOLOGY~., scale="free")+
      theme(text = element_text(size = 8),strip.background = element_blank())
    

    相关文章

      网友评论

          本文标题:三大富集

          本文链接:https://www.haomeiwen.com/subject/kghsertx.html