美文网首页
转录组分析--step2 差异分析

转录组分析--step2 差异分析

作者: 莎莉噢 | 来源:发表于2020-03-11 16:22 被阅读0次

    以下内容根据生信技能树Jimmy老师代码按个人习惯修改而成,欢迎讨论共同进步
    三种最常用的方法都试一下看看,用function定义函数的写法比较整齐,不然大段大段的心烦

    用DEseq2
    degdeseq <- function(exprset,pd,group,pvcut){
      library(DESeq2)
      colData <- data.frame(row.names=rownames(pd), grouplist=pd[,group])
      dds <- DESeqDataSetFromMatrix(countData = exprset,colData = colData,design = ~ grouplist)
      dds <- dds[rowSums(counts(dds))>100,]   ###过滤低counts####
      dds <- DESeq(dds)
      res <- results(dds, contrast=c('grouplist',"trt","untrt"))
      #res <- results(dds, contrast=c('grouplist',"trt","untrt"),pAdjustMethod = 'fdr', alpha = 0.01)
      plotMA(res)
      resordered <- as.data.frame(res[order(res$padj),])
      resordered <- na.omit(resordered)
      fdcutoff <- with(resordered,mean(abs(log2FoldChange))+2*sd(abs(log2FoldChange))) 
      #fdcutoff <- 3      ###或者简单粗暴自己定义cutoff###
      print(fdcutoff)
      resordered$sig <- ifelse(resordered$padj<pvcut & abs(resordered$log2FoldChange)>fdcutoff,ifelse(resordered$log2FoldChange > 0,'up','down'),'no sig')        
      return(resordered)
    }
    resdegseq <- degdeseq(exprset,pd,'group',0.05)
    table(resdegseq$sig)            ###看一下上下调基因的分布####
    
    EdgeR
    degdge <- function(exprset,group_list,pvcut){
      library(edgeR)
      dge <- DGEList(counts=exprset,group=factor(group_list))
      keepe <- rowSums(cpm(dge)>1) >=2               ###按cpm过滤低表达####
      dge <- dge[keepe,keep.lib.sizes=F]
      dge <- calcNormFactors(dge)            
      #dge$samples
      design <- model.matrix(~0+factor(group_list))
      rownames(design)<-colnames(dge)
      colnames(design)<-levels(factor(group_list))
      dge <- estimateDisp(dge,design,robust = T)
      #dge <- estimateGLMCommonDisp(dge,design)
      #dge <- estimateGLMTrendedDisp(dge, design)
      #dge <- estimateGLMTagwiseDisp(dge, design)
      fit <- glmQLFit(dge, design)
      cont.matrix=makeContrasts(contrasts=c('trt-untrt'),levels = design)
      lrt <- glmQLFTest(fit,  contrast=cont.matrix)
      nrDEG <- topTags(lrt, n=nrow(exprset))
      resdge <- na.omit(as.data.frame(nrDEG))
      #fdcutoff <- 3
      fdcutoff <- with(resdge,mean(abs(logFC))+2*sd(abs(logFC)))
      print(paste0('fdcutoff is ',fdcutoff))
      resdge$sig <- ifelse(resdge$PValue<pvcut & abs(resdge$logFC)>fdcutoff,ifelse(resdge$logFC > 0,'up','down'),'no sig')
      return(resdge)
    }
    resdge <- degdge(exprset,group_list,0.05)
    table(resdge$sig)
    
    Limma
    deglim <- function(exprset,pvcut){
      library(limma)
      design <- model.matrix(~0+factor(group_list))
      colnames(design)=levels(factor(group_list))
      rownames(design)=colnames(exprset)
      dgelim <- DGEList(counts=exprset,group=factor(group_list))
      keepl <- rowSums(cpm(dgelim)>1) >=2               ###按cpm过滤低表达####
      dgelim <- dgelim[keepl,keep.lib.sizes=F]
      dgelim <- calcNormFactors(dgelim)
      #logCPM <- cpm(dgelim, log=TRUE, prior.count=3)    ####可以选另一种方法trend#####
      #fitc <- lmFit(logCPM, design)
      v <- voom(dgelim,design,plot=TRUE, normalize="quantile")
      fit <- lmFit(v, design)
      cont.matrix=makeContrasts(contrasts=c('trt-untrt'),levels = design)
      fit2 <- contrasts.fit(fit,cont.matrix)
      fit2 <- eBayes(fit2)
      tempOutput <- topTable(fit2, coef='trt-untrt', n=Inf)
      deglim <- na.omit(tempOutput)
      #fdcutoff <- 3
      fdcutoff <- with(deglim,mean(abs(logFC))+2*sd(abs(logFC)))
      deglim$sig <- ifelse(deglim$adj.P.Val<pvcut & abs(deglim$logFC)>fdcutoff,ifelse(deglim$logFC > 0,'up','down'),'no sig')
      print(paste0('fdcutoff is ',fdcutoff))
      return(resdge)
    }
    reslim <- deglim(exprset,0.05)
    table(reslim$sig)
    
    #######check expression ####
    cherld <- function(rld){
      library(gplots)
      rld <- assay(rlogTransformation(dds))                 ####in deseq2 count-log2 
      rld <- log2(cpm(exprset)+1)                           ####cpm
      boxplot(rld)
      hist(rld)
      sampleDists <- as.matrix(dist(t(rld)))
      library(RColorBrewer)
      mycols <- brewer.pal(ncol(sampleDists), "Dark2")[1:length(unique(group_list))]
      heatmap.2(as.matrix(sampleDists), key=F, trace="none",
                col=colorpanel(100, "black", "white"),
                ColSideColors=mycols[group_list], RowSideColors=mycols[group_list],
                margin=c(10, 10), main="Sample Distance Matrix")
    }
    

    韦恩图,看一下三种方法的差距

    ######gene overlap ######
    venn <- function(degf,allv,deseq){
      library(VennDiagram)
      geneedge <- rownames(degf)[-grep("no sig",degf$sig)]
      genelima <- rownames(allv)[-grep("no sig",allv$sig)]
      genedeseq <- rownames(deseq)[-grep("no sig",deseq$sig)]
      venn.plot <- venn.diagram(x=list(edger=geneedge,limmac=genelima,deseq=genedeseq),col="transparent",filename = '3 triple overlap.png',
                                fill=c("red","blue","green"),cex = 1.5,
                                fontfamily = "serif",
                                fontface = "bold",
                                cat.default.pos = "text",
                                cat.col = c("darkred", "darkblue", "darkgreen"),
                                cat.cex = 1.5,
                                cat.fontfamily = "serif",
                                cat.dist = c(0.08, 0.08, 0.08),
                                cat.pos = 0,
                                alpha = 0.7,
                                margin = 0.08)
    }
    venn(resdegseq,resdge,reslim)
    

    类似效果如下

    热图展示​

    ######heatmap######
    pt <- function(degsig,exprn,pd){
      library(pheatmap)
      degsig <- degsig[-grep('no sig',degsig$sig),] 
      #degsig <- degsig[order(degsig$log2FoldChange,decreasing = T),]  ###for deseq2####
      g <- c(head(rownames(degsig),20),tail(rownames(degsig),20))  #前20个差异基因###
      degmat <- exprn[g,]
      #degmat <- log2(degmat+1)
      degmat <- t(scale(t(degmat)))                        ##normalize extreme value##
      anc <- matrix(pd$type)
      rownames(anc) <- rownames(pd)
      anc <- as.data.frame(pd$type,row.names = rownames(pd))
      pheatmap(degmat,annotation_col = anc,cluster_cols = F)
    }
    pt(reslim,mergerem,pd2)
    

    火山图

    ######volcano########
    volcano_print <- function(DEG,fccutoff,pcutoff,plotfccut,plotpcut){
      library(dplyr)
      library(ggrepel)
      degsig <- DEG[-grep('no sig',DEG$sig),]
      this_tile <- paste0('Cutoff for log2FoldChange is ',round(fccutoff,3),
                          '\nThe number of up gene is ',nrow(DEG[DEG$sig =='up',]) ,
                          '\nThe number of down gene is ',nrow(DEG[DEG$sig =='down',]))
      l <- degsig[which(abs(degsig$log2FoldChange)>=plotfccut & (-log10(degsig$padj) > -log10(plotpcut))),]                     ##filter label###
      g <- ggplot(data=degsig, aes(x=log2FoldChange, y=-log10(padj), color=sig)) +
        geom_point(aes(color = sig), alpha = 0.6, size = 1)  +
        theme_set(theme_set(theme_bw(base_size=20)))+
        geom_vline(xintercept = c(-fccutoff, fccutoff), color = 'gray', size = 0.25) +
        geom_hline(yintercept = -log(pcutoff, 10), color = 'gray', size = 0.25) +
        xlab("log2 fold change") + ylab("-log10 p-value") +
        ggtitle( this_tile ) + theme(plot.title = element_text(size=15,hjust = 0.5))+
        geom_text_repel(data=l,aes(label=rownames(l)),size=2)      ###加标签###
      print(g)
    }
    volcano_print(resdegseq,3,0.05,3,0.05)
    

    代码出来是有标签的,图上忘了加

    相关文章

      网友评论

          本文标题:转录组分析--step2 差异分析

          本文链接:https://www.haomeiwen.com/subject/lwzcjhtx.html