美文网首页单细胞学习
2021-05-16 scRNA基础分析:差异基因富集分析

2021-05-16 scRNA基础分析:差异基因富集分析

作者: 学习生信的小兔子 | 来源:发表于2021-05-16 09:36 被阅读0次

    此前的分析我们按转录特征把细胞分成了很多类别,例如seurat聚类分析得到的按cluster分类,singleR分析得到的按细胞类型分类,monocle分析得到的按拟时状态(state)分类。不同的细胞类型之间,有哪些表达差异基因呢,这些差异基因有特别的意义吗?

    基因差异表达分析

    #基因差异表达分析
    library(Seurat)
    library(tidyverse)
    library(patchwork)
    library(monocle)
    library(clusterProfiler)
    library(org.Hs.eg.db)
    rm(list=ls())
    
    scRNA1 <- readRDS("scRNA1.rds")
    mycds <- readRDS("mycds.rds")
    #比较cluster0和cluster1的差异表达基因
    dge.cluster <- FindMarkers(scRNA1,ident.1 = 0,ident.2 = 1)
    colnames(dge.cluster )[2] <- 'avg_logFC'
    sig_dge.cluster <- subset(dge.cluster, p_val_adj<0.01&abs(avg_logFC)>1)
    #比较B_cell和T_cells的差异表达基因
    ##不知道为什么每次这里都出错,我记得保存了这一步的数据了呀。。。
    library(SingleR)
    load("D:/genetic_r/singer-cell-learning/filtered_feature_bc_matrix/ref_Human_all.RData")
    refdata <- ref_Human_all
    testdata <- GetAssayData(scRNA1, slot="data")
    ###把scRNA数据中的seurat_clusters提取出来,注意这里是因子类型的
    clusters <- scRNA1@meta.data$seurat_clusters
    ###开始用singler分析
    cellpred <- SingleR(test = testdata, ref = refdata, labels = refdata$label.main, 
                        method = "cluster", clusters = clusters, 
                        assay.type.test = "logcounts", assay.type.ref = "logcounts")
    ###制作细胞类型的注释文件
    celltype = data.frame(ClusterID=rownames(cellpred), celltype=cellpred$labels, stringsAsFactors = FALSE)
    
    ##把singler的注释写到metadata中 有两种方法
    scRNA1@meta.data$celltype = "NA"
    for(i in 1:nrow(celltype)){
      scRNA1@meta.data[which(scRNA1@meta.data$seurat_clusters == celltype$ClusterID[i]),'celltype'] <- celltype$celltype[i]}
    ###因为我把singler的注释加载到metadata中时候,命名的名字叫celltype,所以画图时候,group.by="celltype"
    table(scRNA1@meta.data$celltype)
    #B_cell Monocyte  NK_cell  T_cells 
    #175      319       53      466 
    
    dge.celltype <- FindMarkers(scRNA1, ident.1 = 'B_cell', ident.2 = 'T_cells', group.by = 'celltype')
    colnames(dge.celltype)[2] <- 'avg_logFC'
    sig_dge.celltype <- subset(dge.celltype, p_val_adj<0.01&abs(avg_logFC)>1)
    #比较拟时State1和State3的差异表达基因
    p_data <- subset(pData(mycds),select='State')
    scRNAsub <- subset(scRNA1, cells=row.names(p_data))
    scRNAsub <- AddMetaData(scRNAsub,p_data,col.name = 'State')
    dge.State <- FindMarkers(scRNAsub, ident.1 = 1, ident.2 = 3, group.by = 'State')
    colnames(dge.State)[2] <- 'avg_logFC'
    sig_dge.State <- subset(dge.State, p_val_adj<0.01&abs(avg_logFC)>1)
    
    

    差异基因GO富集分析

    差异基因GO富集分析
    ego_ALL <- enrichGO(gene          = row.names(sig_dge.celltype),
                        #universe     = row.names(dge.celltype),
                        OrgDb         = 'org.Hs.eg.db',
                        keyType       = 'SYMBOL',
                        ont           = "ALL",
                        pAdjustMethod = "BH",
                        pvalueCutoff  = 0.01,
                        qvalueCutoff  = 0.05)
    ego_all <- data.frame(ego_ALL)
    ##电脑卡住了。。。。不运行这个了。。
    ego_CC <- enrichGO(gene          = row.names(sig_dge.celltype),
                       #universe     = row.names(dge.celltype),
                       OrgDb         = 'org.Hs.eg.db',
                       keyType       = 'SYMBOL',
                       ont           = "CC",
                       pAdjustMethod = "BH",
                       pvalueCutoff  = 0.01,
                       qvalueCutoff  = 0.05)
    ego_MF <- enrichGO(gene          = row.names(sig_dge.celltype),
                       #universe     = row.names(dge.celltype),
                       OrgDb         = 'org.Hs.eg.db',
                       keyType       = 'SYMBOL',
                       ont           = "MF",
                       pAdjustMethod = "BH",
                       pvalueCutoff  = 0.01,
                       qvalueCutoff  = 0.05)
    ego_BP <- enrichGO(gene          = row.names(sig_dge.celltype),
                       #universe     = row.names(dge.celltype),
                       OrgDb         = 'org.Hs.eg.db',
                       keyType       = 'SYMBOL',
                       ont           = "BP",
                       pAdjustMethod = "BH",
                       pvalueCutoff  = 0.01,
                       qvalueCutoff  = 0.05) #电脑卡住了。。暂时不画这个图
    #截取每个description的前70个字符,方便后面作图排版
    ego_CC@result$Description <- substring(ego_CC@result$Description,1,70)
    ego_MF@result$Description <- substring(ego_MF@result$Description,1,70)
    ego_BP@result$Description <- substring(ego_BP@result$Description,1,70)
    p_BP <- barplot(ego_BP,showCategory = 10) + ggtitle("barplot for Biological process") ##电脑又卡住了。。。hhh,就不画这个图了
    p_CC <- barplot(ego_CC,showCategory = 10) + ggtitle("barplot for Cellular component")
    p_MF <- barplot(ego_MF,showCategory = 10) + ggtitle("barplot for Molecular function")
    plotc <- p_BP/p_CC/p_MF
    

    差异基因kegg富集分析

    genelist <- bitr(row.names(sig_dge.celltype), fromType="SYMBOL",
                     toType="ENTREZID", OrgDb='org.Hs.eg.db')
    genelist <- pull(genelist,ENTREZID)               
    ekegg <- enrichKEGG(gene = genelist, organism = 'hsa')
    p1 <- barplot(ekegg, showCategory=20)
    p2 <- dotplot(ekegg, showCategory=20)
    plotc = p1/p2
    plotc
    
    kegg.5.15.png

    相关文章

      网友评论

        本文标题:2021-05-16 scRNA基础分析:差异基因富集分析

        本文链接:https://www.haomeiwen.com/subject/zxvljltx.html