美文网首页TCGATCGA数据挖掘转录组数据分析
第二步:TCGA数据差异分析整理

第二步:TCGA数据差异分析整理

作者: 碌碌无为的杰少 | 来源:发表于2020-06-01 11:09 被阅读0次

    安装软件

    对于差异基因我们有三个R包,DESeq,edgeR,和limma包,三个包都可以,作者更倾向于DESeq包,这个包也太慢了,建议睡前跑,醒了就跑结束了

    if(!require(ggplotify))install.packages("ggplotify")
    if(!require(patchwork))install.packages("patchwork")
    if(!require(cowplot))install.packages("cowplot")
    if(!require(DESeq2))BiocManager::install('DESeq2')
    if(!require(edgeR))BiocManager::install('edgeR')
    if(!require(limma))BiocManager::install('limma')
    

    DESeq2

    rm(list = ls())
    load("TCGA-stamgdc.Rdata")
    table(group_list)
    #deseq2----
    library(DESeq2)
    colData <- data.frame(row.names =colnames(exp), 
                          condition=group_list)
    if(!file.exists(paste0(cancer_type,"dd.Rdata"))){
      dds <- DESeqDataSetFromMatrix(
      countData = exp,
      colData = colData,
      design = ~ condition)
      dds <- DESeq(dds)
      save(dds,file = paste0(cancer_type,"dd.Rdata"))
    }
    res <- results(dds, contrast = c("condition",rev(levels(group_list))))
    resOrdered <- res[order(res$padj),] # 按照P值排序
    DEG <- as.data.frame(resOrdered)
    head(DEG)
    
    #添加change列标记基因上调下调
    logFC_cutoff <- with(DEG,mean(abs(log2FoldChange)) + 2*sd(abs(log2FoldChange)) )
    #logFC_cutoff <- 2
    DEG$change = as.factor(
      ifelse(DEG$padj < 0.05 & abs(DEG$log2$log2FoldChaFoldChange) > logFC_cutoff,
             ifelse(DEGnge > logFC_cutoff ,'UP','DOWN'),'NOT')
    )
    head(DEG)
    
    table(DEG$change)
    DESeq2_DEG <- DEG
    save(DESeq2_DEG,file = "DESeq2_DEG.Rdata")
    load(file = "DESeq2_DEG.Rdata")
    

    edgeR

    rm(list = ls())
    load("TCGA-stamgdc.Rdata")
    library(edgeR)
    
    dge <- DGEList(counts=exp,group=group_list)
    dge$samples$lib.size <- colSums(dge$counts)
    dge <- calcNormFactors(dge) 
    
    design <- model.matrix(~0+group_list)
    rownames(design)<-colnames(dge)
    colnames(design)<-levels(group_list)
    
    dge <- estimateGLMCommonDisp(dge,design)
    dge <- estimateGLMTrendedDisp(dge, design)
    dge <- estimateGLMTagwiseDisp(dge, design)
    
    fit <- glmFit(dge, design)
    fit2 <- glmLRT(fit, contrast=c(-1,1)) 
    
    DEG=topTags(fit2, n=nrow(exp))
    DEG=as.data.frame(DEG)
    logFC_cutoff <- with(DEG,mean(abs(logFC)) + 2*sd(abs(logFC)) )
    #logFC_cutoff <- 2
    DEG$change = as.factor(
      ifelse(DEG$FDR < 0.05 & abs(DEG$logFC) > logFC_cutoff,
             ifelse(DEG$logFC > logFC_cutoff ,'UP','DOWN'),'NOT')
    )
    head(DEG)
    table(DEG$change)
    edgeR_DEG <- DEG
    save(edgeR_DEG ,file = "edgeR_DEG .Rdata")
    load(file = "edgeR_DEG .Rdata")
    

    limma

    rm(list = ls())
    load("TCGA-stamgdc.Rdata")
    table(group_list)
    library(limma)
    
    design <- model.matrix(~0+group_list)
    colnames(design)=levels(group_list)
    rownames(design)=colnames(exp)
    
    dge <- DGEList(counts=exp)
    dge <- calcNormFactors(dge)
    logCPM <- cpm(dge, log=TRUE, prior.count=3)
    
    v <- voom(dge,design, normalize="quantile")
    fit <- lmFit(v, design)
    
    constrasts = paste(rev(levels(group_list)),collapse = "-")
    cont.matrix <- makeContrasts(contrasts=constrasts,levels = design) 
    fit2=contrasts.fit(fit,cont.matrix)
    fit2=eBayes(fit2)
    
    DEG = topTable(fit2, coef=constrasts, n=Inf)
    DEG = na.omit(DEG)
    logFC_cutoff <- with(DEG,mean(abs(logFC)) + 2*sd(abs(logFC)) )
    #logFC_cutoff <- 2
    DEG$change = as.factor(
      ifelse(DEG$adj.P.Val < 0.05 & abs(DEG$logFC) > logFC_cutoff,
             ifelse(DEG$logFC > logFC_cutoff ,'UP','DOWN'),'NOT')
    )
    head(DEG)
    
    #y=as.numeric(exp[rownames(DEG)[10],])
    #x=group_list
    #boxplot(y~x)
    limma_voom_DEG <- DEG
    save(limma_voom_DEG ,file = "limma_voom_DEG .Rdata")
    

    查看三个R包上调及下调个数

    rm(list = ls())
    load(file = "edgeR_DEG .Rdata")
    load(file="limma_voom_DEG .Rdata")
    load(file = "DESeq2_DEG.Rdata")
    tj = data.frame(deseq2 = as.integer(table(DESeq2_DEG$change)),
               edgeR = as.integer(table(edgeR_DEG$change)),
               limma_voom = as.integer(table(limma_voom_DEG$change)),
               row.names = c("down","not","up")
              );tj
    save(DESeq2_DEG,edgeR_DEG,limma_voom_DEG,group_list,tj,file = paste0(cancer_type,"DEG.Rdata"))
    
    image.png

    PCA主成分分析

    rm(list = ls())
    load("TCGA-stamDEG.Rdata")
    load("TCGA-stamgdc.Rdata")
    
    if(!require(tinyplanet))devtools::install_local("tinyplanet-master.zip",upgrade = F)
    library(ggplot2)
    library(tinyplanet)
    exp[1:4,1:4]
    dat = log(exp+1)
    pca.plot = draw_pca(dat,group_list);pca.plot
    save(pca.plot,file = paste0(cancer_type,"pcaplot.Rdata"))
    
    image.png

    热图和火山图

    
    cg1 = rownames(DESeq2_DEG)[DESeq2_DEG$change !="NOT"]
    cg2 = rownames(edgeR_DEG)[edgeR_DEG$change !="NOT"]
    cg3 = rownames(limma_voom_DEG)[limma_voom_DEG$change !="NOT"]
    
    h1 = draw_heatmap(dat[cg1,],group_list,scale_before = T)
    h2 = draw_heatmap(dat[cg2,],group_list,scale_before = T)
    h3 = draw_heatmap(dat[cg3,],group_list,scale_before = T)
    
    m2d = function(x){
      mean(abs(x))+2*sd(abs(x))
    }
    
    v1 = draw_volcano(DESeq2_DEG,pkg = 1,logFC_cutoff = m2d(DESeq2_DEG$log2FoldChange))
    v2 = draw_volcano(edgeR_DEG,pkg = 2,logFC_cutoff = m2d(edgeR_DEG$logFC))
    v3 = draw_volcano(limma_voom_DEG,pkg = 3,logFC_cutoff = m2d(limma_voom_DEG$logFC))
    
    library(patchwork)
    (h1 + h2 + h3) / (v1 + v2 + v3) +plot_layout(guides = 'collect')
    
    ggsave(paste0(cancer_type,"heat_vo.png"),width = 15,height = 10)
    
    image.png

    三大R包差异基因对比

    rm(list = ls())
    load("TCGA-stamDEG.Rdata")
    load("TCGA-stamgdc.Rdata")
    load("TCGA-stampcaplot.Rdata")
    UP=function(df){
      rownames(df)[df$change=="UP"]
    }
    DOWN=function(df){
      rownames(df)[df$change=="DOWN"]
    }
    
    up = intersect(intersect(UP(DESeq2_DEG),UP(edgeR_DEG)),UP(limma_voom_DEG))
    down = intersect(intersect(DOWN(DESeq2_DEG),DOWN(edgeR_DEG)),DOWN(limma_voom_DEG))
    
    hp = draw_heatmap(exp[c(up,down),],group_list,scale_before = T,n_cutoff = 1.6)
    
    #上调、下调基因分别画维恩图
    
    up.plot <- draw_venn(UP(DESeq2_DEG),UP(edgeR_DEG),UP(limma_voom_DEG),
                         "UPgene"
    )
    down.plot <- draw_venn(DOWN(DESeq2_DEG),DOWN(edgeR_DEG),DOWN(limma_voom_DEG),
                           "DOWNgene"
    )
    #维恩图拼图,终于搞定
    
    library(patchwork)
    #up.plot + down.plot
    # 就爱玩拼图
    pca.plot + hp+up.plot +down.plotdown.plot
    ggsave(paste0(cancer_type,"heat_ve_pca.png"),width = 15,height = 10)
    
    ggsave(paste0(cancer_type,"heat_ve_pca.png"),width = 15,height = 10)
    
    image.png

    相关文章

      网友评论

        本文标题:第二步:TCGA数据差异分析整理

        本文链接:https://www.haomeiwen.com/subject/lbqrzhtx.html