RNAseq下游分析--edgeR +cluserprofile

作者: 生信编程日常 | 来源:发表于2019-12-18 21:01 被阅读0次

    RNAseq分析可以用hisat2+samtools+stringtie得到表达矩阵,后面可以通过edgeR + clusterprofiler分析。

    source("https://bioconductor.org/biocLite.R")#install
    biocLite("edgeR")
    library(edgeR)
    library(org.Mm.eg.db)
    library(clusterProfiler)
    library(xlsx)
    

    Mut_Wt为stringtie输入文件转化成的counts文件或者其他方式得到的要分析的data.frame,行为基因名,列为sample,基因表达为counts

    
    Mut_Wt.df<-Mut_Wt#用于存储原始表达矩阵
    #三个重复,1代表control,2代表treat
    #1 设计分组
    group_list <- factor(c(rep("1",3), rep("2",3)))
    #2去掉低表达的基因
    Mut_Wt <- Mut_Wt[rowSums(cpm(Mut_Wt) > 1) >= 2,]
    Mut_Wt <- DGEList(counts = Mut_Wt, group = group_list)
    Mut_Wt <- calcNormFactors(Mut_Wt)
    #3 计算离散度
    Mut_Wt <- estimateCommonDisp(Mut_Wt)
    Mut_Wt <- estimateTagwiseDisp(Mut_Wt)
    #4 得到差异基因,并分为显著性的上调和下调
    Mut_Wt_et <- exactTest(Mut_Wt)
    Mut_Wt_tTag <- topTags(Mut_Wt_et, n=nrow(Mut_Wt))
    Mut_Wt_tTag <- as.data.frame(Mut_Wt_tTag)
    Mut_Wt_tTag_count<-merge(Mut_Wt_tTag,Mut_Wt.df,by.x = 0,by.y = 0)
    Mut_Wt_up<-subset(Mut_Wt_tTag_count,logFC>log2(1.5)&PValue<0.05)
    Mut_Wt_up<-Mut_Wt_up[order(-Mut_Wt_up$logFC,-Mut_Wt_up$PValue),]
    Mut_Wt_down<-subset(Mut_Wt_tTag_count,logFC< -log2(1.5)&PValue<0.05)
    Mut_Wt_down<-Mut_Wt_down[order(-abs(Mut_Wt_down$logFC),-Mut_Wt_down$PValue),]
    
    

    edgeR的结果可以直接做用clusterprofiler做goterm,用循环方便一些

    library(xlsx)
    library(ggplot2)
    #go term of Mut vs WT
    gotermlist<-list(Mut_Wt_up,Mut_Wt_down)
    names(gotermlist)<-c('Mut_Wt_up','Mut_Wt_down')
    for (i in 1:2){
      write.xlsx(gotermlist[[i]],"Mut_Wt.xlsx",sheetName = names(gotermlist)[i],append =T )
    }
    
    #############################################################################
    for (i in 1:2){
      GeneSymbol<-as.character(gotermlist[[i]]$Row.names)
    EN = bitr(GeneSymbol,fromType = "SYMBOL",toType = "ENTREZID",OrgDb = "org.Mm.eg.db")
    #BP
    goBP = enrichGO(OrgDb="org.Mm.eg.db",gene = as.vector(EN$ENTREZID),ont = "BP", pvalueCutoff = 0.05, readable= TRUE)
    goCC = enrichGO(OrgDb="org.Mm.eg.db",gene = as.vector(EN$ENTREZID),ont = "CC", pvalueCutoff = 0.05, readable= TRUE)
    goMF = enrichGO(OrgDb="org.Mm.eg.db",gene = as.vector(EN$ENTREZID),ont = "MF", pvalueCutoff = 0.05, readable= TRUE)
    #BP
    png(paste0("picture/",names(gotermlist)[i],"_BP.png"),width = 800,height = 480)
    print(clusterProfiler::dotplot(goBP,showCategory=15,title=paste0(names(gotermlist)[i],"_BP"))+scale_color_gradient(low = "#132B43", high = "#56B1F7"))
    dev.off()
    #CC
    png(paste0("picture/",names(gotermlist)[i],"_CC.png"),width = 800,height = 480)
    print(clusterProfiler::dotplot(goCC,showCategory=15,title=paste0(names(gotermlist)[i],"_CC"))+scale_color_gradient(low = "#132B43", high = "#56B1F7"))
    dev.off()
    #MF
    png(paste0("picture/",names(gotermlist)[i],"_MF.png"),width = 800,height = 480)
    print(clusterProfiler::dotplot(goMF,showCategory=15,title=paste0(names(gotermlist)[i],"_MF"))+scale_color_gradient(low = "#132B43", high = "#56B1F7"))
    dev.off()
    
    #KEGG
    KEGG = enrichKEGG(organism = "mmu",gene = as.vector(EN$ENTREZID))
    KEGG = setReadable(KEGG, OrgDb = org.Mm.eg.db, keytype="SYMBOL")
    
    png(paste0("picture/",names(gotermlist)[i],"_KEGG.png"),width = 800,height = 480)
    print(clusterProfiler::dotplot(KEGG,showCategory=15,title=paste0(names(gotermlist)[i],"_KEGG"))+scale_color_gradient(low = "#132B43", high = "#56B1F7"))
    dev.off()
    
    
    try({write.xlsx(goBP@result,"Mut_Wt.xlsx",sheetName = paste0(names(gotermlist)[i],"_BP"),append = T)
      write.xlsx(goCC@result,"Mut_Wt.xlsx",sheetName = paste0(names(gotermlist)[i],"_CC"),append = T)
      write.xlsx(goMF@result,"Mut_Wt.xlsx",sheetName = paste0(names(gotermlist)[i],"_MF"),append = T)
      write.xlsx(KEGG@result,"Mut_Wt.xlsx",sheetName = paste0(names(gotermlist)[i],"_KEGG"),append = T)
    })
    }
    
    

    欢迎关注!

    相关文章

      网友评论

        本文标题:RNAseq下游分析--edgeR +cluserprofile

        本文链接:https://www.haomeiwen.com/subject/qhxpnctx.html