美文网首页生信绘图single cell注释和富集
单细胞之富集分析-3:GO和KEGG富集分析及绘图

单细胞之富集分析-3:GO和KEGG富集分析及绘图

作者: Hayley笔记 | 来源:发表于2021-07-16 22:08 被阅读0次

    单细胞富集分析系列:


    单细胞富集分析我最常用的是分组GSVA,但最近用到了GO分析,就复习一下GO和KEGG富集分析及绘图。

    1. 数据集准备

    library(Seurat)
    library(patchwork)
    library(clusterProfiler)
    library(org.Mm.eg.db) ##加载小鼠
    library(org.Hs.eg.db) ##加载人类
    library(tidyverse)
    

    载入无比熟悉的pbmc.3k数据集 (已注释好,数据准备见monocle)

    pbmc <-readRDS("pbmc.rds")
    table(pbmc$cell_type)
    
    #  Naive CD4 T Memory CD4 T   CD14+ Mono            B        CD8 T FCGR3A+ Mono 
    #          711          480          472          344          279          162 
    #           NK           DC     Platelet 
    #         144           32           14 
    

    pbmc3k数据集只有1个样本,没办法区分HC和病例组。
    若有分组,可以使用subset函数将某种细胞取出,来做这种细胞病例组和对照组相比的差异基因和富集分析

    2. 计算差异基因

    • 使用seurat包的FindMarkers来计算差异基因。
      ident.1是病例组,ident.2是对照组。(这里只做演示,计算的是和Naive CD4 T相比,Memory CD4 T的差异基因)
    dge.celltype <- FindMarkers(pbmc, ident.1 = 'Memory CD4 T',ident.2 = 'Naive CD4 T', 
                                group.by = 'cell_type',logfc.threshold = 0,min.pct = 0)
    saveRDS(dge.celltype, file = "deg.rds")
    sig_dge.all <- subset(dge.celltype, p_val_adj<0.05&abs(avg_log2FC)>0.15) #所有差异基因
    View(sig_dge.all)
    
    结果默认按p_val_adj从小到大排列
    • 分组可视化
    sig_dge.up <- subset(dge.celltype, p_val_adj<0.05&avg_log2FC>0.15)
    sig_dge.up <- sig_dge.up[order(sig_dge.up$avg_log2FC,decreasing = T),]
    sig_dge.up_TOP30 <- rownames(sig_dge.up[1:30,])
    sig_dge.down <- subset(dge.celltype, p_val_adj<0.05&avg_log2FC< -0.15)
    sig_dge.down <- sig_dge.down[order(sig_dge.down$avg_log2FC,decreasing = T),]
    sig_dge.down_TOP30 <- rownames(sig_dge.down[1:30,])
    diffall <-c(sig_dge.up_TOP30,sig_dge.down_TOP30) 
    
    Idents(pbmc) <- 'cell_type'
    pbmc_sub <- subset(pbmc,ident=c('Memory CD4 T','Naive CD4 T'))
    Idents(pbmc_sub) <- 'cell_type'
    View(pbmc_sub)
    matrix <- AverageExpression(object = pbmc_sub,assays = 'RNA',slot = "scale.data")[[1]]
    matrix <- matrix[rownames(matrix)%in%diffall,]
    matrix[matrix>2]=2;matrix[matrix< -2]= -2
    p=pheatmap( matrix ,show_colnames =T,
                show_rownames = T,
                cluster_cols = T, cluster_row = T,
                border_color = NA,
                color = colorRampPalette(c("navy", "white", "firebrick3"))(50))
    save_pheatmap_pdf <- function(x, filename, width=8, height=15) {
        stopifnot(!missing(x))
        stopifnot(!missing(filename))
        pdf(filename, width=width, height=height)
        grid::grid.newpage()
        grid::grid.draw(x$gtable)
        dev.off()
      }
     save_pheatmap_pdf(p, "diff_heatmap.pdf")
    
    替换数据画所有样本的差异基因热图原理一样

    3. GO富集分析(分为BP, CC和MF)

    # BP, CC和MF三种通路都一起富集
    ego_ALL <- enrichGO(gene          = row.names(sig_dge.all),
                        #universe     = row.names(dge.celltype),
                        OrgDb         = 'org.Hs.eg.db',
                        keyType       = 'SYMBOL',
                        ont           = "ALL",  #设置为ALL时BP, CC, MF都计算
                        pAdjustMethod = "BH",
                        pvalueCutoff  = 0.01,
                        qvalueCutoff  = 0.05)
    ego_all <- data.frame(ego_ALL)
    write.csv(ego_ALL,'enrichGO_all.csv')
    View(ego_all)
    
    # 分别对BP, CC和MF进行富集
    ego_CC <- enrichGO(gene          = row.names(sig_dge.all),
                       #universe     = row.names(dge.celltype),
                       OrgDb         = 'org.Hs.eg.db',
                       keyType       = 'SYMBOL',
                       ont           = "CC",
                       pAdjustMethod = "BH",
                       pvalueCutoff  = 0.01,
                       qvalueCutoff  = 0.05)
    ego_cc <- data.frame(ego_CC)
    write.csv(ego_cc,'enrichGO_cc.csv') 
    ego_MF <- enrichGO(gene          = row.names(sig_dge.all),
                       #universe     = row.names(dge.celltype),
                       OrgDb         = 'org.Hs.eg.db',
                       keyType       = 'SYMBOL',
                       ont           = "MF",
                       pAdjustMethod = "BH",
                       pvalueCutoff  = 0.01,
                       qvalueCutoff  = 0.05)
    ego_mf <- data.frame(ego_MF)
    write.csv(ego_mf,'enrichGO_mf.csv') 
    ego_BP <- enrichGO(gene          = row.names(sig_dge.all),
                       #universe     = row.names(dge.celltype),
                       OrgDb         = 'org.Hs.eg.db',
                       keyType       = 'SYMBOL',
                       ont           = "BP",
                       pAdjustMethod = "BH",
                       pvalueCutoff  = 0.01,
                       qvalueCutoff  = 0.05) 
    ego_bp <- data.frame(ego_BP)
    write.csv(ego_bp,'enrichGO_bp.csv') 
    
    绘图
    • 最普通的图,也是一般生信公司出报告的图,略丑。
    p_BP <- barplot(ego_BP,showCategory = 10) + ggtitle("barplot for Biological process")
    p_CC <- barplot(ego_CC,showCategory = 10) + ggtitle("barplot for Cellular component")
    p_MF <- barplot(ego_MF,showCategory = 10) + ggtitle("barplot for Molecular function")
    plotc <- p_BP/p_CC/p_MF
    ggsave('enrichGO.pdf', plotc, width = 12,height = 10)
    
    • 使用ggplot绘图(更灵活)
    # 我一般只画bp图,感觉更有意义。
    ego_bp <- ego_bp[order(ego_bp$p.adjust),]
    ego_bp_top30 <- ego_bp[1 : 30,]
    ggplot(data=ego_bp_top30, aes(x=Description,y=Count)) + 
      geom_bar(stat="identity", width=0.8,fill='salmon1') + 
      coord_flip() +  xlab("GO term") + ylab("Num of Genes") + 
      theme_bw()
    
    top30 BP通路,纵轴也可设为log10P.value等。

    之所以长短不齐不按顺序是因为没有排序

    #按照p值排序
    ego_bp <- ego_bp[order(ego_all$pvalue,decreasing = T),]
    ego_bp$Description <- factor(ego_bp$Description, levels = ego_bp$Description)
    

    排完续之后再画p值就是按顺序的了

    4. KEGG富集分析

    genelist <- bitr(row.names(sig_dge.all), fromType="SYMBOL",
                     toType="ENTREZID", OrgDb='org.Hs.eg.db')
    genelist <- pull(genelist,ENTREZID)               
    ekegg <- enrichKEGG(gene = genelist, organism = 'hsa')
    p1 <- barplot(ekegg, showCategory=20)
    p2 <- dotplot(ekegg, showCategory=20)
    plotc = p1/p2
    ggsave("enrichKEGG.png", plot = plotc, width = 12, height = 10)
    

    附:单细胞测序数据的差异表达分析方法总结

    相关文章

      网友评论

        本文标题:单细胞之富集分析-3:GO和KEGG富集分析及绘图

        本文链接:https://www.haomeiwen.com/subject/bltldltx.html