美文网首页ggplot集锦TC
帮老婆写代谢组的热图和火山图

帮老婆写代谢组的热图和火山图

作者: SolomonTeng | 来源:发表于2021-09-05 22:35 被阅读0次
    rm(list=ls())
    setwd('/Users/zhangjuxiang/Desktop/R time seq/')
    #Bioconductor 安装 edgeR
    #install.packages('BiocManager')  #需要首先安装 BiocManager,如果尚未安装请先执行该步
    BiocManager::install('edgeR',force=T)
    #读取基因表达矩阵
    targets <- read.csv('rawdata.csv')
    as.matrix(targets)
    rownames(targets) <- targets[,1]
    targets <- targets[,-1]
    #指定分组,注意要保证表达矩阵中的样本顺序和这里的分组顺序是一一对应的
    #对照组在前,处理组在后
    group <- rep(c('BC', 'EC'),c(121,192))
    
    
    
    
    
    library(edgeR)
    #数据预处理
    #(1)构建 DGEList 对象
    dgelist <- DGEList(counts = targets, group = group)
    
    #(2)过滤 low count 数据,例如 CPM 标准化(推荐)
    keep <- rowSums(cpm(dgelist) > 1 ) >= 2
    dgelist <- dgelist[keep,, keep.lib.sizes = FALSE]
    
    #(3)标准化,以 TMM 标准化为例
    dgelist_norm <- calcNormFactors(dgelist, method = 'TMM')
    
    
    
    
    
    
    #差异表达基因分析
    #首先根据分组信息构建试验设计矩阵,分组信息中一定要是对照组在前,处理组在后
    design <- model.matrix(~group)
    
    #(1)估算基因表达值的离散度
    dge <- estimateDisp(dgelist_norm, design, robust = TRUE)
    
    #(2)模型拟合,edgeR 提供了多种拟合算法
    #负二项广义对数线性模型
    fit <- glmFit(dge, design, robust = TRUE)
    lrt <- topTags(glmLRT(fit), n = nrow(dgelist$counts))
    
    write.table(lrt, 'control_treat.glmLRT.txt', sep = '\t', col.names = NA, quote = FALSE)
    
    #拟似然负二项广义对数线性模型
    fit <- glmQLFit(dge, design, robust = TRUE)
    lrt <- topTags(glmQLFTest(fit), n = nrow(dgelist$counts))
    
    write.table(lrt, 'control_treat.glmQLFit.txt', sep = '\t', col.names = NA, quote = FALSE)
    
    
    
    
    
    
    
    
    ##筛选差异表达基因
    #读取上述输出的差异倍数计算结果
    gene_diff <- read.delim('control_treat.glmLRT.txt', row.names = 1, sep = '\t', check.names = FALSE)
    
    #首先对表格排个序,按 FDR 值升序排序,相同 FDR 值下继续按 log2FC 降序排序
    gene_diff <- gene_diff[order(gene_diff$FDR, gene_diff$logFC, decreasing = c(FALSE, TRUE)), ]
    
    #log2FC≥1 & FDR<0.01 标识 up,代表显著上调的基因
    #log2FC≤-1 & FDR<0.01 标识 down,代表显著下调的基因
    #其余标识 none,代表非差异的基因
    gene_diff[which(gene_diff$logFC >= 1 & gene_diff$FDR < 0.05),'sig'] <- 'up'
    gene_diff[which(gene_diff$logFC <= -1 & gene_diff$FDR < 0.05),'sig'] <- 'down'
    gene_diff[which(abs(gene_diff$logFC) <= 1 | gene_diff$FDR >= 0.05),'sig'] <- 'none'
    
    #输出选择的差异基因总表
    gene_diff_select <- subset(gene_diff, sig %in% c('up', 'down'))
    write.table(gene_diff_select, file = 'control_treat.glmQLFit.select.txt', sep = '\t', col.names = NA, quote = FALSE)
    
    #根据 up 和 down 分开输出
    gene_diff_up <- subset(gene_diff, sig == 'up')
    gene_diff_down <- subset(gene_diff, sig == 'down')
    
    write.table(gene_diff_up, file = 'control_treat.glmQLFit.up.txt', sep = '\t', col.names = NA, quote = FALSE)
    write.table(gene_diff_down, file = 'control_treat.glmQLFit.down.txt', sep = '\t', col.names = NA, quote = FALSE)
    
    
    
    
    
    
    
    install.packages('pheatmap')
    library(pheatmap)
    {
      tmp = gene_diff_select[gene_diff_select$PValue < 0.05,]
      #差异结果需要先根据p值挑选
      nrDEG_Z = tmp[ order( tmp$logFC ), ]
      nrDEG_F = tmp[ order( -tmp$logFC ), ]
      choose_gene = c( rownames( nrDEG_Z )[1:100], rownames( nrDEG_F )[1:100] )
      choose_matrix = targets[ choose_gene, ]
      choose_matrix = t( scale( t( choose_matrix ) ) )
      
      choose_matrix[choose_matrix > 2] = 2
      choose_matrix[choose_matrix < -2] = -2
      
      annotation_col = data.frame( CellType = factor( group ) )
      rownames( annotation_col ) = colnames( targets )
      choose_matrix <- na.omit(choose_matrix)
      pheatmap( fontsize = 2, choose_matrix, annotation_col = annotation_col, show_rownames = F, annotation_legend = F, filename = "heatmap_BRCA_medianexp2.png")
    }
    
    
    
    
    
    
    
    
    install.packages('ggplot2')
    library( "ggplot2" )
    nrDEG <- gene_diff
    logFC_cutoff <- with( nrDEG, mean( abs( logFC ) ) + 2 * sd( abs( logFC ) ) )
    logFC_cutoff
    logFC_cutoff = 1
    {
      nrDEG$change = as.factor( ifelse( nrDEG$PValue < 0.01 & abs(nrDEG$logFC) > logFC_cutoff,
                                        ifelse( nrDEG$logFC > logFC_cutoff , 'UP', 'DOWN' ), 'NOT' ) )
      
      save( nrDEG, file = "nrDEG_array_medianexp.Rdata" )
      
      this_tile <- paste0( 'Cutoff for logFC is ', round( logFC_cutoff, 3 ),
                           ' The number of up M/Z is ', nrow(nrDEG[ nrDEG$change =='UP', ] ),
                           ' The number of down M/Z is ', nrow(nrDEG[ nrDEG$change =='DOWN', ] ) )
      
      volcano = ggplot(data = nrDEG, aes( x = logFC, y = -log10(PValue), color = change)) +
        geom_point( alpha = 0.4, size = 1.75) +
        theme_set( theme_set( theme_minimal( base_size = 15 ) ) ) +
        xlab( "log2 fold change" ) + ylab( "-log10 p-value" ) +
        theme(legend.title = element_text(colour="black", size=6, face="bold")) +
        theme(legend.text = element_text(colour="black", size = 7, face = "bold")) +
        theme(axis.title.x = element_text(size = 9, color = "black", face = "bold")) +
        theme(axis.title.y = element_text(size = 9, color = "black", face = "bold")) +
        ggtitle( this_tile ) + theme( plot.title = element_text( size = 8, hjust = 0.5, face = "bold" )) +
        theme(legend.position=c(1.2, 0.8)) +
        theme(aspect.ratio=1) +
        scale_colour_manual( values = c('green','black','red') ) + theme(panel.grid.major = element_line(colour = "white", 
                                                                                                         linetype = "blank"), panel.grid.minor = element_line(colour = "white"), 
                                                                         panel.background = element_rect(fill = "aliceblue", 
                                                                                                         colour = "white"), plot.background = element_rect(colour = "azure1"))
      print( volcano ) 
      ggsave( volcano, filename = 'volcano_BRCA_medianexp.tiff' )
      dev.off()
    }
    

    相关文章

      网友评论

        本文标题:帮老婆写代谢组的热图和火山图

        本文链接:https://www.haomeiwen.com/subject/lgpswltx.html