美文网首页TCrna_seq
TCGA 02 不同数据DEG结果取交集火山图及韦恩图展示

TCGA 02 不同数据DEG结果取交集火山图及韦恩图展示

作者: rochiman | 来源:发表于2020-05-18 23:23 被阅读0次

    1. 三个数据集差异基因火山图(每10个肿瘤和正常组织样本)

    导入表达矩阵,选择NT组的前10和TP组的前10个样本进行差异分析

    library(TCGAbiolinks)
    # 导入dataFilt表达矩阵
    load("dataFilt.RData")
    
    # selection of normal samples "NT"
    samplesNT <- TCGAquery_SampleTypes(barcode = colnames(dataFilt),
                                       typesample = c("NT"))
    # selection of tumor samples "TP"
    samplesTP <- TCGAquery_SampleTypes(barcode = colnames(dataFilt), 
                                       typesample = c("TP"))
    # Diff.expr.analysis (DEA)
    DEG.LUAD.edgeR <- TCGAanalyze_DEA(mat1 = dataFilt[,samplesNT[1:10]],
                                      mat2 = dataFilt[,samplesTP[1:10]],
                                      pipeline="edgeR",
                                      batch.factors = c("TSS"),
                                      Cond1type = "Normal",
                                      Cond2type = "Tumor",
                                      voom =FALSE, 
                                      method = "glmLRT",
                                      # fdr.cut = 0.01,  #保留FDR<0.01的基因
                                      # logFC.cut = 1 #保留logFC>1的基因
    )
    # ----------------------- DEA -------------------------------
    #   there are Cond1 type Normal in  10 samples
    # there are Cond2 type Tumor in  10 samples
    # there are  12980 features as miRNA or genes 
    # I Need about  8.7 seconds for this DEA. [Processing 30k elements /s]  
    # ----------------------- END DEA -------------------------------
    

    绘制第一个火山图

    valcano_data <- data.frame(genes=rownames(DEG.LUAD.edgeR), 
                               logFC=DEG.LUAD.edgeR$logFC, 
                               FDR=DEG.LUAD.edgeR$FDR,
                               group=rep("NotSignificant", 
                                         nrow(DEG.LUAD.edgeR)),
                               stringsAsFactors = F)
    
    valcano_data[which(valcano_data['FDR'] < 0.05 & 
                         valcano_data['logFC'] > 1.5),"group"] <- "Increased"
    valcano_data[which(valcano_data['FDR'] < 0.05 &
                         valcano_data['logFC'] < -1.5),"group"] <- "Decreased"
    
    cols = c("darkgrey","#00B2FF","orange")
    names(cols) = c("NotSignificant","Increased","Decreased")
    
    library(ggplot2)
    vol1 <- ggplot(valcano_data, aes(x = logFC, y = -log10(FDR), color = group))+
      scale_colour_manual(values = cols) +
      ggtitle(label = "Volcano Plot 1", subtitle = "LUAD 1-10 samples volcano plot") +
      geom_point(size = 2.5, alpha = 1, na.rm = T) +
      theme_bw(base_size = 14) + 
      theme(legend.position = "right") + 
      xlab(expression(log[2]("logFC"))) + 
      ylab(expression(-log[10]("FDR"))) +
      geom_hline(yintercept = 1.30102, colour="#990000", linetype="dashed") + 
      geom_vline(xintercept = 1.5849, colour="#990000", linetype="dashed") + 
      geom_vline(xintercept = -1.5849, colour="#990000", linetype="dashed")+ 
      scale_y_continuous(trans = "log1p")
    

    选择NT组的11-20和TP组的11-20样本进行差异分析

    # Diff.expr.analysis (DEA)
    DEG.LUAD.edgeR2 <- TCGAanalyze_DEA(mat1 = dataFilt[,samplesNT[11:20]],
                                      mat2 = dataFilt[,samplesTP[11:20]],
                                      pipeline="edgeR",
                                      batch.factors = c("TSS"),
                                      Cond1type = "Normal",
                                      Cond2type = "Tumor",
                                      voom =FALSE, 
                                      method = "glmLRT",
                                      # fdr.cut = 0.01,  #保留FDR<0.01的基因
                                      # logFC.cut = 1 #保留logFC>1的基因
    )
    # ----------------------- DEA -------------------------------
    #   there are Cond1 type Normal in  10 samples
    # there are Cond2 type Tumor in  10 samples
    # there are  12980 features as miRNA or genes 
    # I Need about  8.7 seconds for this DEA. [Processing 30k elements /s]  
    # ----------------------- END DEA -------------------------------
    

    绘制第2个火山图

    valcano_data2 <- data.frame(genes=rownames(DEG.LUAD.edgeR2), 
                               logFC=DEG.LUAD.edgeR2$logFC, 
                               FDR=DEG.LUAD.edgeR2$FDR,
                               group=rep("NotSignificant", 
                                         nrow(DEG.LUAD.edgeR2)),
                               stringsAsFactors = F)
    
    valcano_data2[which(valcano_data2['FDR'] < 0.05 & 
                         valcano_data2['logFC'] > 1.5),"group"] <- "Increased"
    valcano_data2[which(valcano_data2['FDR'] < 0.05 &
                         valcano_data2['logFC'] < -1.5),"group"] <- "Decreased"
    
    cols = c("darkgrey","#00B2FF","orange")
    names(cols) = c("NotSignificant","Increased","Decreased")
    
    library(ggplot2)
    vol2 <- ggplot(valcano_data2, aes(x = logFC, y = -log10(FDR), color = group))+
      scale_colour_manual(values = cols) +
      ggtitle(label = "Volcano Plot 2", subtitle = "LUAD 11-20 volcano plot") +
      geom_point(size = 2.5, alpha = 1, na.rm = T) +
      theme_bw(base_size = 14) + 
      theme(legend.position = "right") + 
      xlab(expression(log[2]("logFC"))) + 
      ylab(expression(-log[10]("FDR"))) +
      geom_hline(yintercept = 1.30102, colour="#990000", linetype="dashed") + 
      geom_vline(xintercept = 1.5849, colour="#990000", linetype="dashed") + 
      geom_vline(xintercept = -1.5849, colour="#990000", linetype="dashed")+ 
      scale_y_continuous(trans = "log1p")
    

    选择NT组的21-30和TP组的21-30样本进行差异分析

    # Diff.expr.analysis (DEA)
    DEG.LUAD.edgeR3 <- TCGAanalyze_DEA(mat1 = dataFilt[,samplesNT[21:30]],
                                      mat2 = dataFilt[,samplesTP[21:30]],
                                      pipeline="edgeR",
                                      batch.factors = c("TSS"),
                                      Cond1type = "Normal",
                                      Cond2type = "Tumor",
                                      voom =FALSE, 
                                      method = "glmLRT",
                                      # fdr.cut = 0.01,  #保留FDR<0.01的基因
                                      # logFC.cut = 1 #保留logFC>1的基因
    )
    # ----------------------- DEA -------------------------------
    #   there are Cond1 type Normal in  10 samples
    # there are Cond2 type Tumor in  10 samples
    # there are  12980 features as miRNA or genes 
    # I Need about  8.7 seconds for this DEA. [Processing 30k elements /s]  
    # ----------------------- END DEA -------------------------------
    

    绘制第3个火山图

    valcano_data3 <- data.frame(genes=rownames(DEG.LUAD.edgeR3), 
                                logFC=DEG.LUAD.edgeR3$logFC, 
                                FDR=DEG.LUAD.edgeR3$FDR,
                                group=rep("NotSignificant", 
                                          nrow(DEG.LUAD.edgeR3)),
                                stringsAsFactors = F)
    
    valcano_data3[which(valcano_data3['FDR'] < 0.05 & 
                         valcano_data3['logFC'] > 1.5),"group"] <- "Increased"
    valcano_data3[which(valcano_data3['FDR'] < 0.05 &
                         valcano_data3['logFC'] < -1.5),"group"] <- "Decreased"
    
    cols = c("darkgrey","#00B2FF","orange")
    names(cols) = c("NotSignificant","Increased","Decreased")
    
    library(ggplot2)
    vol3 <- ggplot(valcano_data3, aes(x = logFC, y = -log10(FDR), color = group))+
      scale_colour_manual(values = cols) +
      ggtitle(label = "Volcano Plot 3", subtitle = "LUAD 21-30 volcano plot") +
      geom_point(size = 2.5, alpha = 1, na.rm = T) +
      theme_bw(base_size = 14) + 
      theme(legend.position = "right") + 
      xlab(expression(log[2]("logFC"))) + 
      ylab(expression(-log[10]("FDR"))) +
      geom_hline(yintercept = 1.30102, colour="#990000", linetype="dashed") + 
      geom_vline(xintercept = 1.5849, colour="#990000", linetype="dashed") + 
      geom_vline(xintercept = -1.5849, colour="#990000", linetype="dashed")+ 
      scale_y_continuous(trans = "log1p")
    library(cowplot)
    library(patchwork)
    vol1+vol2+vol3
    
    3个火山图合并

    2. upset图和韦恩图分析

    提取3个数据的差异基因列表

    DEG1_up <- valcano_data[valcano_data$group=="Increased", "genes"]
    DEG1_down <- valcano_data[valcano_data$group=="Decreased", "genes"]
    
    DEG2_up <- valcano_data[valcano_data2$group=="Increased", "genes"]
    DEG2_down <- valcano_data[valcano_data2$group=="Decreased", "genes"]
    
    DEG3_up <- valcano_data[valcano_data3$group=="Increased", "genes"]
    DEG3_down <- valcano_data[valcano_data3$group=="Decreased", "genes"]
    

    用Y叔开发的ggupset做Upset图

    # devtools::install_github("GuangchuangYu/yyplot")
    # devtools::install_github("GuangchuangYu/UpSetR")
    # install.packages("venneuler")
    # remove.packages("ggplot2")
    # install.packages("ggimage")
    # if (!requireNamespace("BiocManager", quietly = TRUE))
    #     install.packages("BiocManager")
    # BiocManager::install("ComplexHeatmap")
    
    # 上调基因
    lt_up = list(TCGA_1 = DEG1_up,
                 TCGA_2 = DEG2_up,
                 TCGA_3 = DEG3_up)
    dat_up<- ComplexHeatmap::list_to_matrix(lt_up)
    dat_plot_up <-  data.frame(dat_up)
    
    require(UpSetR)
    p1 <- upset(dat_plot_up, sets=c("TCGA_3", "TCGA_2", "TCGA_1"),
      keep.order = TRUE)
    require(ggplotify)
    g1 <- as.ggplot(p1) + ggtitle("Up regulated")
    
    
    # 下调基因
    lt_down = list(TCGA_1 = DEG1_down,
                   TCGA_2 = DEG2_down,
                   TCGA_3 = DEG3_down)
    dat_down<- ComplexHeatmap::list_to_matrix(lt_down)
    dat_plot_down <-  data.frame(dat_down)
    
    require(UpSetR)
    p2 <- upset(dat_plot_down, , sets=c("TCGA_3", "TCGA_2", "TCGA_1"),
                keep.order = TRUE)
    require(ggplotify)
    g2 <- as.ggplot(p2) + ggtitle("Down regulated")
    
    # 拼图
    library(cowplot)
    library(patchwork)
    g1+g2
    
    upset图合并

    做韦恩图

    require(yyplot)
    library("ggsci")
    g3 <- ggvenn(dat_plot_up) + theme_void() +  
      scale_fill_jco() + ggtitle("Up regulated genes")
    g4 <- ggvenn(dat_plot_down) + theme_void() +  
      scale_fill_jco() + ggtitle("Down regulated genes")
    g3 + g4
    
    韦恩图合并

    upset和韦恩图拼接

    require(ggimage)
    g1_3 <- g1 + geom_subview(subview=g3+theme_void(), x=.78, y=.8, w=.5, h=.5)
    g2_4 <- g2 + geom_subview(subview=g4+theme_void(), x=.78, y=.8, w=.5, h=.5)
    
    g1_3+g2_4
    
    
    upset和韦恩图合并

    3. 火山图、upset图+韦恩图合并

    (vol1|vol2|vol3)/
      (g1_3|g2_4)
    
    火山图-韦恩图-upset图合并

    挑选出三个数据集中共同上调或下调的基因

    up_common <- Reduce(intersect, list(DEG1_up, DEG2_up, DEG3_up))
    down_common <- Reduce(intersect, list(DEG1_down, DEG2_down, DEG3_down))
    background_genes <- Reduce(union, list(DEG1_up, DEG2_up, DEG3_up,
                                           DEG1_down, DEG2_down, DEG3_down))
    up_common_df <- data.frame(gene=up_common, 
                               logFC=valcano_data[valcano_data$genes %in% 
                                                    up_common, 
                                                  "logFC"])
    down_common_df <- data.frame(gene=down_common, 
                                 logFC=valcano_data[valcano_data$genes %in% 
                                                      down_common, 
                                                    "logFC"])
    background_genes_df <- data.frame(gene=background_genes, 
                                  logFC=valcano_data[valcano_data$genes %in% 
                                                       background_genes, 
                                                     "logFC"], stringsAsFactors = F)
    all_gene_df <- valcano_data[, c("genes", "logFC")]
    save(list=c("up_common_df", "down_common_df", "background_genes_df", "all_gene_df"), 
         file="filtered_genes.RData")
    

    相关文章

      网友评论

        本文标题:TCGA 02 不同数据DEG结果取交集火山图及韦恩图展示

        本文链接:https://www.haomeiwen.com/subject/adfiohtx.html