美文网首页富集分析
2021-04-29 kegg分析

2021-04-29 kegg分析

作者: 学习生信的小兔子 | 来源:发表于2021-04-29 00:24 被阅读0次

参考:生信技能树

#富集分析
rm(list=ls())
library(ggplot2)
library(clusterProfiler)
library(org.Hs.eg.db)
load('deg.Rdata')
#由于enrichKEGG()需要输入的基因名格式为ENTREZID,所以需要转换一下,这里使用clusterProfiler包的bitr()函数
#提取包中的标准基因名(SYMBOL)与ENTREZID的对应关系
df <- bitr(unique(deg$symbol), fromType = "SYMBOL",
           toType = c( "ENTREZID"),
           OrgDb = org.Hs.eg.db)
library(clusterProfiler)
deg=merge(deg,df,by.y='SYMBOL',by.x='symbol') 
save(deg,file='deg.Rdata')
deg
library(clusterProfiler)
load('deg.Rdata')
gene_up <- deg[deg$change=="UP","ENTREZID"]
gene_down <- deg[deg$change=="DOWN","ENTREZID"]

##kegg.up
kk.up <- enrichKEGG(gene = gene_up,
                    organism = "hsa",
                    pvalueCutoff = 0.9,
                    qvalueCutoff = 0.9)
head(kk.up)
kegg_up_dt <- as.data.frame(kk.up)
##kegg.down
kk.down <- enrichKEGG(gene = gene_down,
                      organism = "hsa",
                      pvalueCutoff = 0.9,
                      qvalueCutoff = 0.9)
head(kk.down)
kegg_down_dt <- as.data.frame(kk.down)
##kegg.diff



kegg_plot <- function(up_kegg,down_kegg){
  dat=rbind(up_kegg,down_kegg)
  dat$pvalue <- -log10(dat$pvalue)
  dat$pvalue <- dat$pvalue*dat$group
  dat=dat[order(dat$pvalue,decreasing = F),]
  ggplot(dat, aes(x=reorder(Description,order(pvalue,decreasing= F)),y=pvalue, fill=group)) + 
    #x轴按对应的pvalue值从大到小排列pathway的Description
    geom_bar(stat='identity') +
    #设置柱状图高低直接为数值大小,而不是counts
    scale_fill_gradient(low="blue",high = "red", guide = F) +
    scale_x_discrete(name="pathway names") +
    #针对字符型轴标签注释
    scale_y_continuous(name="log10P-value") +
    #针对连续型轴标题注释
    coord_flip() + theme_bw() + theme(plot.title = element_text(hjust = 0.5)) +
    ggtitle("pathway enrichment")
}
up_kegg <- kegg_up_dt[kegg_up_dt$pvalue<0.05,];up_kegg$group <- -1
down_kegg <- kegg_down_dt[kegg_down_dt$pvalue<0.05,];down_kegg$group <- 1
kegg_plot(up_kegg,down_kegg)
kegg.plot
### GO database analysis 
### 做GO数据集超几何分布检验分析,重点在结果的可视化及生物学意义的理解。
{
  
  g_list=list(gene_up=gene_up,
              gene_down=gene_down,
              gene_diff=gene_diff)
  # 因为go数据库非常多基因集,所以运行速度会很慢。
  if(F){
    go_enrich_results <- lapply( g_list , function(gene) {
      lapply( c('BP','MF','CC') , function(ont) {
        cat(paste('Now process ',ont ))
        ego <- enrichGO(gene          = gene,
                        universe      = gene_all,
                        OrgDb         = org.Hs.eg.db,
                        ont           = ont ,
                        pAdjustMethod = "BH",
                        pvalueCutoff  = 0.99,
                        qvalueCutoff  = 0.99,
                        readable      = TRUE)
        
        print( head(ego) )
        return(ego)
      })
    })
    save(go_enrich_results,file = 'go_enrich_results.Rdata')
    
  }
  # 只有第一次需要运行,就保存结果哈,下次需要探索结果,就载入即可。
  load(file = 'go_enrich_results.Rdata')
  
  n1= c('gene_up','gene_down','gene_diff')
  n2= c('BP','MF','CC') 
  for (i in 1:3){
    for (j in 1:3){
      fn=paste0('dotplot_',n1[i],'_',n2[j],'.png')
      cat(paste0(fn,'\n'))
      png(fn,res=150,width = 1080)
      print( dotplot(go_enrich_results[[i]][[j]] ))
      dev.off()
    }
  }
  
  
}

电脑卡住了 先做笔记吧。。。
参考 生信技能树 和 小贝学生信

相关文章

网友评论

    本文标题:2021-04-29 kegg分析

    本文链接:https://www.haomeiwen.com/subject/nbhirltx.html