美文网首页ggplot集锦
【表观调控 实战】九、获得差异基因.bed与批量富集分析

【表观调控 实战】九、获得差异基因.bed与批量富集分析

作者: 佳奥 | 来源:发表于2022-08-27 09:46 被阅读0次

这里是佳奥!表观调控分析的最后一篇,分析剩余RNA-Seq的数据,让我们开始吧!

1 分离差异表达基因

第十二步,RNA-gene-bed

load(file = 'deg_output.Rdata')
library(ggpubr)
colnames(DEG_PhoKO)
DEG_PhoKO$log=log(DEG_PhoKO$baseMean+1)
DEG_PhoKO$change=ifelse(DEG_PhoKO$padj>0.05,'stable',
                        ifelse(DEG_PhoKO$log2FoldChange > 0,'up','down'))
table(DEG_PhoKO$change)
ggscatter(DEG_PhoKO,x="log" ,y="log2FoldChange",color = 'change')

DEG_SppsKO$log=log(DEG_SppsKO$baseMean+1)
DEG_SppsKO$change=ifelse(DEG_SppsKO$padj>0.05,'stable',
                         ifelse(DEG_SppsKO$log2FoldChange > 0,'up','down'))
table(DEG_SppsKO$change)
ggscatter(DEG_SppsKO,x="log" ,y="log2FoldChange",color = 'change')

library(UpSetR)

SppsKO_up=rownames(DEG_SppsKO[DEG_SppsKO$change=='up',])
SppsKO_down=rownames(DEG_SppsKO[DEG_SppsKO$change=='down',])
PhoKO_up=rownames(DEG_PhoKO[DEG_PhoKO$change=='up',])
PhoKO_down=rownames(DEG_PhoKO[DEG_PhoKO$change=='down',])

allG=unique(c(SppsKO_up,SppsKO_down,PhoKO_up,PhoKO_down))

df=data.frame(allG=allG,
              SppsKO_up=as.numeric(allG %in% SppsKO_up),
              SppsKO_down=as.numeric(allG %in% SppsKO_down),
              PhoKO_up=as.numeric(allG %in% PhoKO_up),
              PhoKO_down=as.numeric(allG %in% PhoKO_down))

upset(df)

重要的是df:一个01矩阵


QQ截图20220825215944.png

我们要把之前的数据取出来:572、474、447、444等。


QQ截图20220825220232.png
colnames(df)
# "SppsKO_up" "SppsKO_down" "PhoKO_up" "PhoKO_down" 

##筛选出这些数据,对比一下上面的colnames就可以理解如何取值了
SppsKO_up_uniq=df[df[,2]==1 & df[,3]==0 & df[,4]==0 & df[,5]==0 ,1] 
SppsKO_down_uniq=df[df[,2]==0 & df[,3]==1 & df[,4]==0 & df[,5]==0 ,1]
PhoKO_up_uniq=df[df[,2]==0 & df[,3]==0 & df[,4]==1 & df[,5]==0 ,1]
PhoKO_down_uniq=df[df[,2]==0 & df[,3]==0 & df[,4]==0 & df[,5]==1 ,1]
SppsKO_up_PhoKO_up=df[df[,2]==1 & df[,3]==0 & df[,4]==1 & df[,5]==0 ,1]
SppsKO_down_PhoKO_down=df[df[,2]==0 & df[,3]==1 & df[,4]==0 & df[,5]==1 ,1]
QQ截图20220825220402.png

但我们需要的是基因的坐标文件。

##加载包,内含坐标
library(org.Dm.eg.db)
t1=toTable(org.Dm.egCHRLOC)
t2=toTable(org.Dm.egCHRLOCEND)
t3=toTable(org.Dm.egSYMBOL)

##下一步是把t1、t2、t3、merge合并,再与基因相对应,留作作业

##另一个包也可以获取坐标信息
require(TxDb.Dmelanogaster.UCSC.dm6.ensGene  )
txdb <- TxDb.Dmelanogaster.UCSC.dm6.ensGene
gs=as.data.frame(genes(txdb))##获取信息表

library(org.Dm.eg.db)
t100=toTable(org.Dm.egFLYBASE)
t101=toTable(org.Dm.egSYMBOL)
t200=merge(t100,t101,by='gene_id')
colnames(gs)[6]="flybase_id"
t300=merge(t200,gs,by="flybase_id")

SppsKO_up_uniq
SppsKO_bed=t300[match(SppsKO_up_uniq[SppsKO_up_uniq %in% t300$symbol ],
                      t300$symbol),4:6]
QQ截图20220825221911.png QQ截图20220825225808.png

最终获取.bed


QQ截图20220825230153.png

2 多个基因富集分析

第十三步,annotation

对一个基因集注释

load(file = 'deg_output.Rdata')##导入DEG_Phoko和DEG_SppsKO的Data
library(ggpubr)
colnames(DEG_PhoKO)
DEG_PhoKO$log=log(DEG_PhoKO$baseMean+1)
DEG_PhoKO$change=ifelse(DEG_PhoKO$padj>0.05,'stable',
                        ifelse(DEG_PhoKO$log2FoldChange > 0,'up','down'))
table(DEG_PhoKO$change)
ggscatter(DEG_PhoKO,x="log" ,y="log2FoldChange",color = 'change')

DEG_SppsKO$log=log(DEG_SppsKO$baseMean+1)
DEG_SppsKO$change=ifelse(DEG_SppsKO$padj>0.05,'stable',
                         ifelse(DEG_SppsKO$log2FoldChange > 0,'up','down'))
table(DEG_SppsKO$change)
ggscatter(DEG_SppsKO,x="log" ,y="log2FoldChange",color = 'change')

library(UpSetR)

SppsKO_up=rownames(DEG_SppsKO[DEG_SppsKO$change=='up',])
SppsKO_down=rownames(DEG_SppsKO[DEG_SppsKO$change=='down',])
PhoKO_up=rownames(DEG_PhoKO[DEG_PhoKO$change=='up',])
PhoKO_down=rownames(DEG_PhoKO[DEG_PhoKO$change=='down',])

allG=unique(c(SppsKO_up,SppsKO_down,PhoKO_up,PhoKO_down))

df=data.frame(allG=allG,
              SppsKO_up=as.numeric(allG %in% SppsKO_up),
              SppsKO_down=as.numeric(allG %in% SppsKO_down),
              PhoKO_up=as.numeric(allG %in% PhoKO_up),
              PhoKO_down=as.numeric(allG %in% PhoKO_down))

upset(df)

colnames(df)
# "SppsKO_up"   "SppsKO_down" "PhoKO_up"    "PhoKO_down" 
SppsKO_up_uniq=df[df[,2]==1 & df[,3]==0 & df[,4]==0 & df[,5]==0 ,1] 
SppsKO_down_uniq=df[df[,2]==0 & df[,3]==1 & df[,4]==0 & df[,5]==0 ,1]
PhoKO_up_uniq=df[df[,2]==0 & df[,3]==0 & df[,4]==1 & df[,5]==0 ,1]
PhoKO_down_uniq=df[df[,2]==0 & df[,3]==0 & df[,4]==0 & df[,5]==1 ,1]
SppsKO_up_PhoKO_up=df[df[,2]==1 & df[,3]==0 & df[,4]==1 & df[,5]==0 ,1]
SppsKO_down_PhoKO_down=df[df[,2]==0 & df[,3]==1 & df[,4]==0 & df[,5]==1 ,1]
##anno分析,对第一个基因集进行注释
rm(list = ls())
load(file = '6-genesets.Rdata')

library(ggplot2)
library(clusterProfiler)
library(org.Dm.eg.db)

##单个基因集的KEGG注释,批量注释即生产function函数kegg_anno
kegg_anno <- function(gs,pro){
  #pro='SppsKO_up_uniq'
  #gs=SppsKO_up_uniq
  gene_up= bitr(gs, fromType = "SYMBOL",
                toType = c( "ENTREZID"),
                OrgDb = org.Dm.eg.db)[,2] 
  kk.up <- enrichKEGG(gene         = gene_up,
                      organism     = 'dme', 
                      keyType =  'ncbi-geneid',
                      pvalueCutoff = 0.9,
                      qvalueCutoff =0.9)
  head(kk.up)[,1:6]
  kk=kk.up
  dotplot(kk)
  ggsave(paste0(pro,'_kk.png'))
  kk=DOSE::setReadable(kk, OrgDb='org.Dm.eg.db',keyType='ENTREZID')
  write.csv(kk@result,paste0(pro,'_kk.up.csv'))
}

l=list(SppsKO_up_uniq,SppsKO_down_uniq,
       PhoKO_up_uniq, PhoKO_down_uniq,
       SppsKO_up_PhoKO_up,SppsKO_down_PhoKO_down)

names(l)=c('SppsKO_up_uniq','SppsKO_down_uniq',
           'PhoKO_up_uniq', 'PhoKO_down_uniq',
           'SppsKO_up_PhoKO_up','SppsKO_down_PhoKO_down')

lapply(1:length(l), function(i){
  kegg_anno(l[[i]],names(l)[i])
})

单个基因集_kk.png:


QQ截图20220826200936.png

运行上述全部代码,对目录下6个文件进行绘图分析。

Warning messages:
1: In bitr(gs, fromType = "SYMBOL", toType = c("ENTREZID"), OrgDb = org.Dm.eg.db) :
  2.25% of input gene IDs are fail to map...
2: In bitr(gs, fromType = "SYMBOL", toType = c("ENTREZID"), OrgDb = org.Dm.eg.db) :
  2.97% of input gene IDs are fail to map...
3: In bitr(gs, fromType = "SYMBOL", toType = c("ENTREZID"), OrgDb = org.Dm.eg.db) :
  2.91% of input gene IDs are fail to map...
4: In bitr(gs, fromType = "SYMBOL", toType = c("ENTREZID"), OrgDb = org.Dm.eg.db) :
  4.22% of input gene IDs are fail to map...
5: In bitr(gs, fromType = "SYMBOL", toType = c("ENTREZID"), OrgDb = org.Dm.eg.db) :
  1.5% of input gene IDs are fail to map...
6: In bitr(gs, fromType = "SYMBOL", toType = c("ENTREZID"), OrgDb = org.Dm.eg.db) :
  2.37% of input gene IDs are fail to map...

生成结果如下:

QQ截图20220826203503.png
如果遇到图片内容挤在一起,点击这个扫把Clear all Plots把plot界面还原一下,再运行一次就正常了。
QQ截图20220826203620.png
##用现成的函数compareCluster可以做到
library(ggplot2)
library(clusterProfiler)
library(org.Dm.eg.db)
gcSample=lapply(l, function(x){
  return( bitr(x, fromType = "SYMBOL",
               toType = c( "ENTREZID"),
               OrgDb = org.Dm.eg.db)[,2] )
})
xx <- compareCluster(gcSample, fun="enrichKEGG",
                     keyType =  'ncbi-geneid',
                     organism="dme", pvalueCutoff=0.05)
dotplot(xx)

最后生成的结果对比:


QQ截图20220826204007.png

别的分析依照GitHub代码来,如GO分析。

至此表观分析学习结束,主要就是下游的各种分析。

我们新的篇章再见!

相关文章

网友评论

    本文标题:【表观调控 实战】九、获得差异基因.bed与批量富集分析

    本文链接:https://www.haomeiwen.com/subject/wtrxnrtx.html