美文网首页
R可视化:Venn图进阶版本

R可视化:Venn图进阶版本

作者: 生信学习者2 | 来源:发表于2022-01-10 11:25 被阅读0次

    前言

    最近看到一张不一样的韦恩图,添加了上下调基因数目的韦恩图,将其实现一下,用于自己的研究展示。更多知识分享请到 https://zouhua.top/

    导入R包和数据

    library(dplyr)
    library(tibble)
    library(ggplot2)
    library(ggVennDiagram)
    library(ggpubr)
    library(data.table)
    
    subgrp <- c("HC", "CP", "PDAC")
    grp.col <- c("#568875", "#73FAFC", "#EE853D")
    
    PDAC_HC_Batch1 <- fread("PDAC_HC_TWilcox_Batch1Run.csv")
    PDAC_HC_Batch2 <- fread("PDAC_HC_TWilcox_Batch2Run.csv")
    

    构建函数

    VennFun <- function(daTest1=PDAC_HC_Batch1,
                        daTest2=PDAC_HC_Batch2 ,
                        datType1="PDAC vs HC(Batch1)",
                        datType2="PDAC vs HC(Batch2)",
                        group_name=subgrp[c(3,1)],
                        group_col=grp.col[c(3,1)],
                        Pval=0.05,
                        logFC=0.5){
      
      # extract significant features
      get_signif_num <- function(DataTest, Gname=group_name){
        
        # DataTest=daTest1
        # Gname=group_name
        
        datsignif <- DataTest %>% filter(Enrichment != "Nonsignif")
        if(nrow(datsignif) == 0){
          stop("There are no significant feature, please check your input")
        }
        total_num <- nrow(datsignif)
        dat_status <- table(datsignif$Enrichment) %>% data.frame() %>%
          setNames(c("Group", "Number"))
        grp1_num <- with(dat_status, dat_status[Group%in%Gname[1], "Number"])
        grp2_num <- with(dat_status, dat_status[Group%in%Gname[2], "Number"])
        
        if(length(grp1_num) == 0){
          grp1_num <- 0
        }
        if(length(grp2_num) == 0){
          grp2_num <- 0
        }    
        
        res <- list(sig=datsignif,
                    tnum=total_num,
                    num1=grp1_num,
                    num2=grp2_num)
        
        return(res)
      }
      # dat1 & data2
      datSig1 <- get_signif_num(daTest1)
      datSig2 <- get_signif_num(daTest2) 
      
      # extract the intersect features 
      get_intersect_feature <- function(DataTest1, DataTest2, Gname=group_name){
        
        # DataTest1=datSig1$sig
        # DataTest2=datSig2$sig
        # Gname=group_name
        
        # intersect
        interset_feature <- intersect(DataTest1$FeatureID, DataTest2$FeatureID)
        datinterset <- DataTest1 %>% 
          filter(FeatureID%in%interset_feature) %>%
          dplyr::select(FeatureID, Enrichment, Log2FoldChange) %>%
          dplyr::rename(Enrichment1=Enrichment, Log2FoldChange1=Log2FoldChange) %>%
          inner_join(DataTest2 %>% 
            dplyr::select(FeatureID, Enrichment, Log2FoldChange) %>%
            dplyr::rename(Enrichment2=Enrichment, Log2FoldChange2=Log2FoldChange),
          by = "FeatureID")
        total_num_inter <- nrow(datinterset)  
        datinterset$Enrichment <- with(datinterset,
                                       ifelse(Enrichment1 == Gname[1] & 
                                                Enrichment2 == Gname[1], Gname[1],
                                          ifelse(Enrichment1 == Gname[2] & 
                                                   Enrichment2 == Gname[2], Gname[2],
                                                 "Differently Regulated")))
        dat_status_inter <- table(datinterset$Enrichment) %>% data.frame() %>%
          setNames(c("Group", "Number"))
        grp1_num_inter <- with(dat_status_inter, dat_status_inter[Group%in%Gname[1], "Number"])
        grp2_num_inter <- with(dat_status_inter, dat_status_inter[Group%in%Gname[2], "Number"])
        DR_num_inter <- with(dat_status_inter, 
                             dat_status_inter[Group%in%"Differently Regulated",
                                                                "Number"])
        if(length(DR_num_inter) == 0){
          DR_num_inter <- 0
        }
        if(length(grp1_num_inter) == 0){
          grp1_num_inter <- 0
        }
        if(length(grp2_num_inter) == 0){
          grp2_num_inter <- 0
        }    
        res <- list(sig=datinterset,
                    tnum=total_num_inter,
                    num1=grp1_num_inter,
                    num2=grp2_num_inter,
                    num_DR=DR_num_inter)
        
        return(res)
      }
      datInter <- get_intersect_feature(datSig1$sig, datSig2$sig)
    
      
      # datalist
      datlist <- list(A=datSig1$sig$FeatureID, 
                      B=datSig2$sig$FeatureID)
      datggVenn <- ggVennDiagram::process_data(Venn(datlist))
      
      # vector for colors
      colorGroups <- c(group_col[1], "grey50", group_col[2])
      # # use colorRampPalette to create function that interpolates colors 
      # colfunc <- colorRampPalette(colorGroups)
      # # call function and create vector of 15 colors
      # col <- colfunc(100)
      
      # annotation
      datregion <- venn_region(datggVenn)
      datregion$count <- c(datSig1$tnum, datSig2$tnum, datInter$tnum)
      datsetlable <- venn_setlabel(datggVenn)
      datsetlable$name <- c(datType1, datType2)
      
      annotation <- data.frame(
       x = c(-2.5, -1.5, 1, 2, 3, 5.5, 6.5),
       y = rep(-0.5, 7),
       label = c(datSig1$num1, datSig1$num2, 
                 datInter$num1, datInter$num_DR, datInter$num2, 
                 datSig2$num1, datSig2$num2)#,
       #color = c("red", "blue", "red", "grey", "blue", "red", "blue")
       )
      
      Main_Venn_pl <- ggplot()+
        geom_sf(aes(fill=name), data=datregion, color="black")+
        geom_sf_text(aes(label=name), data=datsetlable, size=4)+
        geom_sf_text(aes(label=count), data=datregion, size=8, fontface="bold")+
        scale_fill_manual(values=colorGroups)+
        # dat1
        annotate("rect", 
                 xmin=annotation$x[1]-0.5, xmax=annotation$x[1]+0.5, 
                 ymin=annotation$y[1]-0.25, ymax=annotation$y[1]+0.25, 
                 fill="red", color="black", alpha=1)+
        annotate("rect", 
                 xmin=annotation$x[2]-0.5, xmax=annotation$x[2]+0.5, 
                 ymin=annotation$y[1]-0.25, ymax=annotation$y[1]+0.25,  
                 fill="blue", color="black", alpha=1)+ 
        # dat1 vs dat2
        annotate("rect", 
                 xmin=annotation$x[3]-0.5, xmax=annotation$x[3]+0.5, 
                 ymin=annotation$y[1]-0.25, ymax=annotation$y[1]+0.25,  
                 fill="red", color="black", alpha=1)+
        annotate("rect", 
                 xmin=annotation$x[4]-0.5, xmax=annotation$x[4]+0.5, 
                 ymin=annotation$y[1]-0.25, ymax=annotation$y[1]+0.25,  
                 fill="grey", color="black", alpha=1)+ 
        annotate("rect", 
                 xmin=annotation$x[5]-0.5, xmax=annotation$x[5]+0.5, 
                 ymin=annotation$y[1]-0.25, ymax=annotation$y[1]+0.25,  
                 fill="blue", color="black", alpha=1)+     
        # dat2 
        annotate("rect", 
                 xmin=annotation$x[6]-0.5, xmax=annotation$x[6]+0.5, 
                 ymin=annotation$y[1]-0.25, ymax=annotation$y[1]+0.25, 
                 fill="red", color="black", alpha=1)+
        annotate("rect", 
                 xmin=annotation$x[7]-0.5, xmax=annotation$x[7]+0.5, 
                 ymin=annotation$y[1]-0.25, ymax=annotation$y[1]+0.25,
                 fill="blue", color="black", alpha=1)+     
        geom_text(data=annotation, aes(x=x, y=y, label=label), 
                  size=5, fontface="bold", color="white")+
        guides(fill="none", color="none")+
        theme_void() 
        
      Text_annotation <- data.frame(
       x = c(-2, -2, -2),
       y = c(-5.1, -5.5, -5.9),
       label = c("Upregulated", "Differently Regulated", "Downregulated"))
      
      Regulation_venn_pl <- Main_Venn_pl +
        # Regulation_annotate 
        annotate("rect", 
                 xmin=Text_annotation$x[1]-0.5, xmax=Text_annotation$x[1]+3,
                 ymin=Text_annotation$y[3]-0.3, ymax=Text_annotation$y[1]+0.3,
                 fill="white", color="black", alpha=1)+      
        annotate("rect", 
                 xmin=Text_annotation$x[1]-0.1, xmax=Text_annotation$x[1]+0.1, 
                 ymin=Text_annotation$y[1]-0.1, ymax=Text_annotation$y[1]+0.1,  
                 fill="red", color="black", alpha=1)+
        annotate("rect", 
                 xmin=Text_annotation$x[1]-0.1, xmax=Text_annotation$x[1]+0.1,
                 ymin=Text_annotation$y[2]-0.1, ymax=Text_annotation$y[2]+0.1,  
                 fill="grey", color="black", alpha=1)+ 
        annotate("rect", 
                 xmin=Text_annotation$x[1]-0.1, xmax=Text_annotation$x[1]+0.1,
                 ymin=Text_annotation$y[3]-0.1, ymax=Text_annotation$y[3]+0.1,   
                 fill="blue", color="black", alpha=1)+
        geom_text(data=Text_annotation, aes(x=x, y=y, label=label), 
                  size=4, color="black", nudge_x = 1.5)
      
      Test_annotation <- data.frame(
       x = c(2.5, 2.5),
       y = c(-5.2, -5.6),
       label = c(paste("FDR threshold:", Pval), paste("Log2FoldChange threshold:", logFC))) 
      
      Test_venn_pl <- Regulation_venn_pl +
        # test
        annotate("rect", 
                 xmin=Test_annotation$x[1]-0.5, xmax=Test_annotation$x[1]+3,
                 ymin=Test_annotation$y[2]-0.3, ymax=Test_annotation$y[1]+0.4,  
                 fill="white", color="black", alpha=1)+
        geom_text(data=Test_annotation, aes(x=x, y=y, label=label), 
                  size=4, color="black", nudge_x = 1.5) 
      
      res <- list(pl=Test_venn_pl,
                  inter=datInter$sig)
      
      return(res)
    }
    

    运行

    PDAC_HC_Batch12_Venn <- VennFun(daTest1=PDAC_HC_Batch1,
                                    daTest2=PDAC_HC_Batch2,
                                    datType1="PDAC vs HC(Batch1)",
                                    datType2="PDAC vs HC(Batch2)",
                                    group_name=subgrp[c(3,1)],
                                    group_col=grp.col[c(3,1)],
                                    Pval=0.05,
                                    logFC=0.5)
    
    PDAC_HC_Batch12_Venn$pl
    DT::datatable(PDAC_HC_Batch12_Venn$inter)
    

    结果

    1. 两组比较结果的重叠部分基因中,有5和6个基因分别是共同高或低表达,还有6个基因是表达方向不一样的;

    2. 另外在不同比较中,韦恩图也展示了不同高低表达基因的数目。

    参考

    参考文章如引起任何侵权问题,可以与我联系,谢谢。

    相关文章

      网友评论

          本文标题:R可视化:Venn图进阶版本

          本文链接:https://www.haomeiwen.com/subject/sdkjcrtx.html