美文网首页ggplot集锦
文章复现进程3

文章复现进程3

作者: jiarf | 来源:发表于2021-12-13 18:32 被阅读0次

    这篇一部分复现的文章是
    Comprehensive Analysis of the Expression and Prognosis for E2Fs in Human Breast Cancer的图7图8图9

    原图如图所示

    一、Fig7 :A、 B 、C

    由于上图网络图没有分析结果,所以此处用的gene list是E2Fs
    使用GO分析结果,代码如下

    #bioinformatics  bp
    setwd('/data/jiarf/literiture')
    rm(list=ls())
    library(clusterProfiler)
    library(org.Hs.eg.db)
    bp <- read.delim('./chart_bp.txt')
    cc <- read.delim('./chart_cc.txt')
    genelist <- c('E2F1',
    'E2F2',
    'E2F3',
    'E2F4',
    'E2F5',
    'E2F6',
    'E2F7',
    'E2F8')
    geneinfo = select(org.Hs.eg.db, keys=keys(org.Hs.eg.db), columns = c('ENTREZID',"SYMBOL"))
    geneinfo2 <- geneinfo[which(geneinfo$SYMBOL%in% genelist),]
    
    ego_cc <- enrichGO(gene = geneinfo2$ENTREZID,
                       OrgDb=org.Hs.eg.db,
                       ont = "CC",##"MF", "BP", and "CC"
                       pAdjustMethod = "BH",
                       minGSSize = 1,
                       pvalueCutoff = 0.05)
    ego_cc_result<-as.data.frame(ego_cc@result)
    ego_cc_result <- ego_cc_result[ego_cc_result$pvalue<0.05,]
    library(ggplot2)
    
    dt <- ego_cc_result
    dt$`-log10(pvalue)` <- -log10(dt$pvalue)
    dt$Count<-as.numeric(gsub('/[0-9]+','',dt$GeneRatio))
    dt$Description<-factor(dt$Description,levels = rev(unique(dt$Description)))
    
    
    # p<- ggplot(dt,aes(`-log10(pvalue)`,y=reorder(Description,-pvalue)))+ #以pvalue和pathway为X轴和Y轴
    #   geom_point(aes(size=Count,color=Change))+ #点图大小和颜色数据
    #   labs(y='',x='-log10(Pvalue)')+
    #   scale_size_area(name="Gene count")+
    #   theme_bw()+
    #   geom_vline(xintercept = -log10(0.05),linetype =2,colour = 'black')+
    #   theme(axis.text=element_text(size=15,face = "bold", color="gray50"),
    #         axis.title=element_text(size=16),
    #         legend.text=element_text(size=16),legend.title = element_text(size=15))
    # p
    
    p <- ggplot(data=dt, aes(x=ID, y=Count,fill=`-log10(pvalue)`)) +
      geom_bar(stat="identity", width=0.8) + 
      coord_flip() +
      theme_bw() + 
      # scale_x_discrete(labels=rev(dt$Description)) +
      xlab("") +
      theme(axis.text=element_text(size=10,face = "plain", color="black"),
            axis.title=element_text(size=10),
            legend.text=element_text(size=10),legend.title = element_text(size=20)) +
      scale_fill_gradient2(mid = "white",low='red',high='blue',midpoint = (16.671628-1.347296)/2)
    p
    ggsave('./CC.pdf',p,width=8,height = 5)
    write.table(dt,'./CC_data.tab',sep='\t',quote=F)
    #### bp
    ego_bp <- enrichGO(gene = geneinfo2$ENTREZID,
                       OrgDb=org.Hs.eg.db,
                       ont = "BP",##"MF", "BP", and "CC"
                       pAdjustMethod = "BH",
                       minGSSize = 1,
                       pvalueCutoff = 0.05)
    ego_bp_result<-as.data.frame(ego_bp@result)
    ego_bp_result <- ego_bp_result[ego_bp_result$pvalue<0.05,]
    library(ggplot2)
    dt <- ego_bp_result
    dt$`-log10(pvalue)` <- -log10(dt$pvalue)
    dt$Count<-as.numeric(gsub('/[0-9]+','',dt$GeneRatio))
    dt$Description<-factor(dt$Description,levels = rev(unique(dt$Description)))
     # dt2 <- dt[dt$ID %in% c('GO:0006352','GO:0051726','GO:0045739','GO:0034644','GO:0007265',
     #                        'GO:0006281','GO:0006270','GO:0006260',
     #                          'GO:0000122','GO:0000082'),]
    dt2 <- dt[order(dt$`-log10(pvalue)`,decreasing = T),][1:20,]
    range(dt2$`-log10(pvalue)`)
    p <- ggplot(data=dt2, aes(x=ID, y=Count,fill=`-log10(pvalue)`)) +
      geom_bar(stat="identity", width=0.8) + 
      coord_flip() +
      theme_bw() + 
      # scale_x_discrete(labels=rev(dt$Description)) +
      xlab("") +
      theme(axis.text=element_text(size=10,face = "plain", color="black"),
            axis.title=element_text(size=10),
            legend.text=element_text(size=10),legend.title = element_text(size=20)) +
      scale_fill_gradient2(mid = "white",low='red',high='blue',midpoint = (9.485309-7.154111)/2+7.154111)
    p
    ggsave('./BP.pdf',p,width=8,height = 5)
    write.table(dt2,'./BP_data.tab',sep='\t',quote=F)
    ##### MF
    ego_mf <- enrichGO(gene = geneinfo2$ENTREZID,
                       OrgDb=org.Hs.eg.db,
                       ont = "MF",##"MF", "BP", and "CC"
                       pAdjustMethod = "BH",
                       minGSSize = 1,
                       pvalueCutoff = 0.05)
    ego_mf_result<-as.data.frame(ego_mf@result)
    ego_mf_result <- ego_mf_result[ego_mf_result$pvalue<0.05,]
    library(ggplot2)
    dt <- ego_mf_result
    dt$`-log10(pvalue)` <- -log10(dt$pvalue)
    dt$Count<-as.numeric(gsub('/[0-9]+','',dt$GeneRatio))
    dt$Description<-factor(dt$Description,levels = rev(unique(dt$Description)))
    range(dt$`-log10(pvalue)`)
    p <- ggplot(data=dt, aes(x=ID, y=Count,fill=`-log10(pvalue)`)) +
      geom_bar(stat="identity", width=0.8) + 
      coord_flip() +
      theme_bw() + 
      # scale_x_discrete(labels=rev(dt$Description)) +
      xlab("") +
      theme(axis.text=element_text(size=10,face = "plain", color="black"),
            axis.title=element_text(size=10),
            legend.text=element_text(size=10),legend.title = element_text(size=20)) +
      scale_fill_gradient2(mid = "white",low='red',high='blue',midpoint = (5.640599-1.667579)/2+1.667579)
    p
    ggsave('./MF.pdf',p,width=8,height = 5)
    write.table(dt,'./MF_data.tab',sep='\t',quote=F)
    

    图分别为


    BP
    CC
    MF

    由图中我们可以看出,E2Fs的BP富集到的terms为

    GO:0000083 regulation of transcription involved in G1/S transition of mitotic cell cycle
    GO:0006977 DNA damage response, signal transduction by p53 class mediator resulting in cell cycle arrest
    GO:0072431 signal transduction involved in mitotic G1 DNA damage checkpoint
    GO:1902400 intracellular signal transduction involved in G1 DNA damage checkpoint
    GO:0072413 signal transduction involved in mitotic cell cycle checkpoint
    GO:1902402 signal transduction involved in mitotic DNA damage checkpoint
    GO:1902403 signal transduction involved in mitotic DNA integrity checkpoint
    GO:0031571 mitotic G1 DNA damage checkpoint
    GO:0044819 mitotic G1/S transition checkpoint
    GO:0044783 G1 DNA damage checkpoint
    GO:0072401 signal transduction involved in DNA integrity checkpoint
    GO:0072422 signal transduction involved in DNA damage checkpoint
    GO:0072395 signal transduction involved in cell cycle checkpoint
    GO:0071158 positive regulation of cell cycle arrest
    GO:0072331 signal transduction by p53 class mediator
    GO:0000082 G1/S transition of mitotic cell cycle
    GO:0044773 mitotic DNA damage checkpoint
    GO:0044843 cell cycle G1/S phase transition
    GO:0044774 mitotic DNA integrity checkpoint
    GO:0030330 DNA damage response, signal transduction by p53 class mediator

    与原文找到的通路有些出入,但是我们找到的结果中,已经包含了文章中提到的transcription, DNA-templatedDNA replication initiationDNA replication, G1/S transition of mitotic cell cycle, regulation of cell cycle, negative regulation of transcription from RNA polymerase II promoter,但是此处由于富集的 terms 条数太多,所以我们按照 -log10(pvalue) 选择 Top20 的通路进行了展示,
    同理CC的结果如下:

    GO:0090575 RNA polymerase II transcription regulator complex
    GO:0005667 transcription regulator complex
    GO:0035189 Rb-E2F complex
    GO:0000790 nuclear chromatin
    GO:0044665 MLL1/2 complex
    GO:0071339 MLL1 complex
    GO:0035097 histone methyltransferase complex
    GO:0034708 methyltransferase complex

    MF结果如下:

    GO:0090575 RNA polymerase II transcription regulator complex
    GO:0005667 transcription regulator complex
    GO:0035189 Rb-E2F complex
    GO:0000790 nuclear chromatin
    GO:0044665 MLL1/2 complex
    GO:0071339 MLL1 complex
    GO:0035097 histone methyltransferase complex
    GO:0034708 methyltransferase complex

    也包括了文章提到的transcription factor complex和一些细胞周期依赖蛋白复合物

    Fig8 kegg

    代码如下:

    #### kegg
    ego_kegg <- enrichKEGG(gene = geneinfo2$ENTREZID,
                       
                       keyType = 'kegg',##"MF", "BP", and "CC"
                       pAdjustMethod = "BH",
                       minGSSize = 1,
                       pvalueCutoff = 0.05)
    ego_kegg_result<-as.data.frame(ego_kegg@result)
    ego_kegg_result <- ego_kegg_result[ego_kegg_result$pvalue<0.05,]
    library(ggplot2)
    dt <- ego_kegg_result
    dt$`-log10(pvalue)` <- -log10(dt$pvalue)
    dt$Count<-as.numeric(gsub('/[0-9]+','',dt$GeneRatio))
    dt$Description<-factor(dt$Description,levels = rev(unique(dt$Description)))
    range(dt$`-log10(pvalue)`)
    p <- ggplot(data=dt, aes(x=Description, y=Count,fill=`-log10(pvalue)`)) +
      geom_bar(stat="identity", width=0.8) + 
      coord_flip() +
      theme_bw() + 
      # scale_x_discrete(labels=rev(dt$Description)) +
      xlab("") +
      theme(axis.text=element_text(size=10,face = "plain", color="black"),
            axis.title=element_text(size=10),
            legend.text=element_text(size=10),legend.title = element_text(size=20)) +
      scale_fill_gradient2(mid = "white",low='red',high='blue',midpoint = (9.078405-1.360482 )/2+1.360482 )
    p
    ggsave('./kegg.pdf',p,width=8,height = 5)
    write.table(dt,'./kegg_data.tab',sep='\t',quote=F)
    
    
    结果如图
    原文只找到了15个通路,我们此处找到了24个通路,包括文章中提到的Cell cycle, TGF-beta signaling pathway,
    还有好多cancer的通路 但是没有找到signaling pathway, and cfa04310:Wnt signaling pathway

    Fig 9

    kegg pathview
    打开网站KEGG PATHWAY Database (genome.jp)

    输入E2F,点击GO
    点击第一个 cell cycle
    选择参考基因组为人,点击go
    得到如图所示结果,下载标圈即可
    同理可以找到 在kegg结果第二页
    接着把文件下载即可

    相关文章

      网友评论

        本文标题:文章复现进程3

        本文链接:https://www.haomeiwen.com/subject/oxzjfrtx.html