美文网首页
单细胞数据挖掘实战:文献复现(十)完结篇

单细胞数据挖掘实战:文献复现(十)完结篇

作者: 生信开荒牛 | 来源:发表于2022-08-20 09:17 被阅读0次

    单细胞数据挖掘实战:文献复现(一)批量读取数据

    单细胞数据挖掘实战:文献复现(二)批量创建Seurat对象及质控

    单细胞数据挖掘实战:文献复现(三)降维、聚类和细胞注释

    单细胞数据挖掘实战:文献复现(四)细胞比例饼图

    单细胞数据挖掘实战:文献复现(五)细胞亚群并可视化

    单细胞数据挖掘实战:文献复现(六)标记基因及可视化

    单细胞数据挖掘实战:文献复现(七)MG 和 Mo/MΦ 评分

    单细胞数据挖掘实战:文献复现(八)marker基因在Hom-MG、 Act-MG 和 Mo/MΦ 细胞中的表达情况

    单细胞数据挖掘实战:文献复现(九)基因GO分析及cnetplot可视化结果

    这篇文献的最后一个结果是介绍MHCII基因小胶质细胞表达的性别相关差异,结果说明MHCII 和Cd74基因的表达在男性神经胶质瘤的小胶质细胞和单核细胞/巨噬细胞中更为丰富,结果通过Fig. 5来展示。

    一、加载R包

    if (T) {
      if(!require(BiocManager))install.packages("BiocManager")
      if(!require(Seurat))install.packages("Seurat")
      if(!require(Matrix))install.packages("Matrix")
      if(!require(ggplot2))install.packages("ggplot2")
      if(!require(cowplot))install.packages("cowplot")
      if(!require(magrittr))install.packages("magrittr")
      if(!require(dplyr))install.packages("dplyr")
      if(!require(purrr))install.packages("purrr")
      if(!require(ggrepel))install.packages("ggrepel")
      if(!require(ggridges))install.packages("ggridges")
      if(!require(ggpubr))install.packages("ggpubr")
    }
    

    二、添加sex_condition

    table(seu_object$orig.ident)
    #GSM4039241-F-ctrl-1  GSM4039242-F-ctrl-2 GSM4039243-F-tumor-1 
    #                5151                 4781                 4878 
    #GSM4039244-F-tumor-2  GSM4039245-M-ctrl-1  GSM4039246-M-ctrl-2 
    #                5467                 4786                 5218 
    #GSM4039247-M-tumor-1 GSM4039248-M-tumor-2 
    #                4091                 4891
    group <- c(rep("F_ctrl",times = 5151),
               rep("F_ctrl",times = 4781),
               rep("F_tumor",times = 4878),
               rep("F_tumor",times = 5467),
               rep("M_ctrl",times = 4786),
               rep("M_ctrl",times = 5218),
               rep("M_tumor",times = 4091),
               rep("M_tumor",times = 4891))
    seu_object$sex_condition <- group
    # Figure 5a
    DimPlot(seu_object, group.by = "sex_condition")
    
    1.png

    三、Act-MG 和 Mo/MΦ 中不同性别的 DEG

    table(Idents(seu_object))
    #   MG Mo/MΦ   BAM 
    #31959  5624  1680
    #选出"MG","Mo/MΦ"
    seu_object <- subset(seu_object, idents = c("MG","Mo/MΦ"))
    table(Idents(seu_object))
    #   MG Mo/MΦ 
    #31959  5624
    #将Microglia细分为"h.Microglia"和"a.Microglia"
    Idents(seu_object) <- seu_object$seurat_clusters
    seu_object$cell_type_4_groups <- plyr::mapvalues(Idents(seu_object), 
                                                     from=c(0:4,6:11,13,14,16,18,19), 
                                                     to = c("h.Microglia",  "h.Microglia", "a.Microglia",
                                                            "h.Microglia", "Macrophages", "h.Microglia",  
                                                            "h.Microglia", "Macrophages","a.Microglia", 
                                                            "a.Microglia", "Macrophages","a.Microglia", 
                                                            "h.Microglia", "Macrophages" ,"Macrophages", 
                                                            "Macrophages"))
    Idents(seu_object) <- seu_object$cell_type_4_groups
    table(Idents(seu_object))
    #h.Microglia a.Microglia Macrophages 
    #      24728        7231        5624
    object_tmp <- seu_object
    object_tmp$sex <- substr(seu_object$sex_condition,1,1)
    object_tmp$sex_cell_type <- paste0(object_tmp$sex, "_", as.character(Idents(seu_object)))
    Idents(object_tmp) <- object_tmp$sex_cell_type
    table(Idents(object_tmp))
    #F_h.Microglia F_a.Microglia F_Macrophages M_h.Microglia M_Macrophages 
    #        13273          3496          2691         11455          2933 
    #M_a.Microglia 
    #         3735 
    #找marker基因
    markers_male_ActMG <- FindMarkers(object = object_tmp, ident.1 = "M_a.Microglia", 
                                      ident.2 = "F_a.Microglia", only.pos = TRUE, min.pct = 0.25, 
                                      logfc.threshold = 0.0001)
    
    markers_female_ActMG <- FindMarkers(object = object_tmp, ident.1 = "F_a.Microglia", 
                                        ident.2 = "M_a.Microglia", only.pos = TRUE, min.pct = 0.25, 
                                        logfc.threshold = 0.0001)
    markers_female_ActMG$avg_log2FC <- markers_female_ActMG$avg_log2FC*-1
    
    markers_male_MoM <- FindMarkers(object = object_tmp, ident.1 = "M_Macrophages", 
                                    ident.2 = "F_Macrophages", only.pos = TRUE, min.pct = 0.25, 
                                    logfc.threshold = 0.0001)
    
    markers_female_MoM <- FindMarkers(object = object_tmp, ident.1 = "F_Macrophages", 
                                      ident.2 = "M_Macrophages", only.pos = TRUE, min.pct = 0.25, 
                                      logfc.threshold = 0.0001)
    markers_female_MoM$avg_log2FC <- markers_female_MoM$avg_log2FC*-1
    #放入一个list中循环进行条件筛选
    markers_list <- list(markers_male_ActMG, markers_female_ActMG, markers_male_MoM, markers_female_MoM)
    names(markers_list) <- c("markers_male_ActMG", "markers_female_ActMG", "markers_male_MoM", "markers_female_MoM")
    markers_list <- lapply(markers_list, function(x) {
      x$gene <- rownames(x)
      x$padj_log10<- -log10(x$p_val_adj)
      x[x$padj_log10 == Inf, "padj_log10"]<-284
      x$log2FC <- log2(exp(x$avg_log2FC))
      x <- x %>% mutate(threshold = c(p_val_adj < 0.05 & abs(log2FC) > 0.5 & (pct.1>=0.25 | pct.2 >=0.25)))
      x$color <- x$threshold
      x[x$color == F, "color"] <- "below threshold"
      x[x$color == T, "color"]<-"male"
      x[x$color=="male" & x$log2FC<0, "color"]<-"female"
      x$color<-factor(x$color, levels=c("female", "male", "below threshold"))
      x<-na.omit(x)
      x <-x[rev(order(x$color)),]
      rownames(x)<-seq_along(x$p_val)
      x
    })
    #按性别进行整合
    markers_ActMG_sex <- rbind(cbind(markers_list$markers_male_ActMG, sex="M"),
                               cbind(markers_list$markers_female_ActMG, sex = "F"))
    
    markers_MoM_sex <- rbind(cbind(markers_list$markers_male_MoM, sex="M"),
                             cbind(markers_list$markers_female_MoM, sex = "F"))
    markers_MoM_sex[markers_MoM_sex$padj_log10 > 149, "padj_log10"] <- 149
    
    markers_ActMG_sex$color <- factor(markers_ActMG_sex$color, levels=c("female", "male", "below threshold"))
    markers_MoM_sex$color <- factor(markers_MoM_sex$color, levels=c("female", "male", "below threshold"))
    #画火山图
    col_male<- "#4FCAFF"
    col_female<- "#FFFF00"
    
    MG<-ggplot(markers_ActMG_sex, aes(x=log2FC, y=padj_log10, fill=color))+
      geom_jitter(size=3, shape=21, color="black", stroke=0.01)+
      geom_text_repel(data=markers_ActMG_sex[markers_ActMG_sex$threshold==T & markers_ActMG_sex$log2FC>0,],
                      aes(label=gene), nudge_x = 2, nudge_y = 2,
                      color="black", size=6, force=5)+
      geom_text_repel(data=markers_ActMG_sex[markers_ActMG_sex$threshold==T & markers_ActMG_sex$log2FC<0,],
                      aes(label=gene), nudge_x = -2, nudge_y = 2,
                      color="black", size=6, force=5)+
      scale_fill_manual(values=c(col_female,col_male, "gray90"))+
      scale_x_continuous(limits=c(-4,4))+
      labs(title= "Act-MG",x="avg log2FC", y="log10 padj")+
      theme_light(base_size=18)+
      theme(panel.grid = element_blank(), legend.title = element_blank(),
            legend.text = element_text(size=18))+
      guides(fill = guide_legend(override.aes = list(size=6)))
    
    MoM<-ggplot(markers_MoM_sex, aes(x=log2FC, y=padj_log10, fill=color))+
      geom_jitter(size=3, shape=21, color="black")+
      geom_text_repel(data=markers_MoM_sex[markers_MoM_sex$threshold==T & markers_MoM_sex$log2FC>0 & markers_MoM_sex$gene != "AB124611",],
                      aes(label=gene), nudge_x = 2, nudge_y = 5,
                      color="black", size=6)+
      geom_text_repel(data=markers_MoM_sex[markers_MoM_sex$threshold==T & markers_MoM_sex$log2FC<0,],
                      aes(label=gene), nudge_x = -2.5, nudge_y = 2,
                      color="black", size=6)+
      scale_fill_manual(values=c(col_female,col_male, "gray90"))+
      scale_x_continuous(limits=c(-4,4))+
      labs(title= "Mo/M ", x="avg log2FC", y="log10 padj")+
      theme_light(base_size=18)+
      theme(panel.grid = element_blank(), legend.title = element_blank(),
            legend.text = element_text(size=18))+
      guides(fill = guide_legend(override.aes = list(size=6)))
    #fig5b
    pdf("fig5b.pdf",width = 20,height = 20)
    ggarrange(MG, MoM, ncol=2, common.legend = T)
    dev.off()
    
    2.png 3.png

    与文献结果基本一致。

    Figure 5c

    FeaturePlots for selected MHC II genes(Cd74, H2-Ab1, H2-Eb1, H2-Aa)

    gene_to_plot <- "Cd74"
    f5c_1 <- FeaturePlot(seu_object, features = gene_to_plot) + 
      scale_color_gradientn(colors=viridis::viridis(n=50)) +
      ggtitle(gene_to_plot)
    gene_to_plot <- "H2-Ab1"
    f5c_2 <- FeaturePlot(seu_object, features = gene_to_plot) + 
      scale_color_gradientn(colors=viridis::viridis(n=50)) +
      ggtitle(gene_to_plot)
    gene_to_plot <- "H2-Eb1"
    f5c_3 <- FeaturePlot(seu_object, features = gene_to_plot) + 
      scale_color_gradientn(colors=viridis::viridis(n=50)) +
      ggtitle(gene_to_plot)
    gene_to_plot <- "H2-Aa"
    f5c_4 <- FeaturePlot(seu_object, features = gene_to_plot) + 
      scale_color_gradientn(colors=viridis::viridis(n=50)) +
      ggtitle(gene_to_plot)
    pdf("fig5c.pdf",width = 20,height = 5)
    ggarrange(f5c_1, f5c_2,f5c_3,f5c_4, ncol=4, common.legend = T)
    dev.off()
    
    4.png

    四、MHCII 基因和Cd74高表达的 Act-MG 和 intMoMΦ 群体中的富集情况

    Figure 5d

    rm(list = ls())
    options(stringsAsFactors = F)
    seu_object = readRDS("seu_object_merge_prediff.RDS")
    seu_object <- ScaleData(seu_object)
    
    #添加性别
    table(seu_object$orig.ident)
    group <- c(rep("F_ctrl",times = 5151),
               rep("F_ctrl",times = 4781),
               rep("F_tumor",times = 4878),
               rep("F_tumor",times = 5467),
               rep("M_ctrl",times = 4786),
               rep("M_ctrl",times = 5218),
               rep("M_tumor",times = 4091),
               rep("M_tumor",times = 4891))
    seu_object$sex_condition <- group
    
    #将Microglia细分为"h.Microglia"和"a.Microglia"
    Idents(seu_object) <- seu_object$seurat_clusters
    seu_object$cell_type_4_groups <- plyr::mapvalues(Idents(seu_object), 
                                                     from=c(0:11,13,14,16,18,19), 
                                                     to = c("h.Microglia",  "h.Microglia", "a.Microglia","h.Microglia","BAM" ,"Macrophages", "h.Microglia",  
                                                            "h.Microglia", "Macrophages","a.Microglia","a.Microglia", "Macrophages","a.Microglia", 
                                                            "h.Microglia", "Macrophages" ,"Macrophages","Macrophages"))
    
    
    
    genes<-c("Cd74", "H2-Ab1", "H2-Eb1", "H2-Aa" )
    
    gene_expression_data <- GetAssayData(object = seu_object, slot = "data")
    gene_expression_data <- as.data.frame(t(gene_expression_data[genes,]))
    gene_expression_data$cell_types_4_groups <- as.character(Idents(seu_object))
    gene_expression_data$clusters_0.6 <- seu_object$RNA_snn_res.0.6
    
    gene_expression_data$cell_types_6_groups <- NULL
    gene_expression_data$cell_types_6_groups[gene_expression_data$clusters_0.6 == 11] <- "Mo"
    gene_expression_data$cell_types_6_groups[gene_expression_data$clusters_0.6 == 4] <- "intMoM"
    gene_expression_data$cell_types_6_groups[gene_expression_data$clusters_0.6 %in% c(8,16,18,19)] <- "M"
    gene_expression_data$cell_types_6_groups[gene_expression_data$clusters_0.6 == 5] <- "BAM"
    gene_expression_data$cell_types_6_groups[gene_expression_data$clusters_0.6 %in% c(0,1,3,6,7,14)] <- "h.Microglia"
    gene_expression_data$cell_types_6_groups[gene_expression_data$clusters_0.6 %in% c(2,9,10,13)] <- "a.Microglia"
    
    gene_expression_data$cell_types_6_groups <- factor(gene_expression_data$cell_types_6_groups)
    gene_expression_data$cell_types_6_groups <- factor(gene_expression_data$cell_types_6_groups, 
                                                       levels = levels(gene_expression_data$cell_types_6_groups)[c(3,1,6,4,5,2)])
    
    seu_object$sex <- substr(seu_object$sex_condition,1,1)
    gene_expression_data$sex <- seu_object$sex
    
    graphs<-list() 
    for (i in seq_along(genes)){ 
      plot<-ggplot(gene_expression_data, aes(x="", y=cell_types_6_groups, fill=sex))+
        geom_density_ridges(aes_string(x=as.name(genes[i])), alpha=0.65, scale=1 )+
        scale_fill_manual(values=c( "#ffff00",  "#4FCAFF"))+
        xlab("expression level")+
        ylab("")+
        labs(title=genes[i])+
        theme_classic(base_size=18)+
        theme(axis.text = element_text(size=16), axis.text.y = element_text(face="bold", size=18),
              axis.title=element_text(size=16),
              title = element_text(size = 20), legend.text = element_text(size=18),
              legend.title = element_blank())
      graphs[[i]]<-plot
    }
    if(!require(ggpubr))install.packages("ggpubr")
    pdf("fig_5d2.pdf", width = 20,height = 5)
    ggarrange(plotlist=graphs, ncol=4, nrow=1, common.legend = T, legend="top")
    dev.off()
    
    5.png

    Figure 5f

    gene_expression_data$mhc2_score <- rowMeans(gene_expression_data[, c("H2-Ab1", "H2-Eb1", "H2-Aa")])
    gene_expression_data <- gene_expression_data  %>% mutate(threshold = mhc2_score > 2)
    gene_expression_data[gene_expression_data$threshold==T, "threshold"]<-"MHCII High"
    gene_expression_data[gene_expression_data$threshold==F, "threshold"]<-"MHCII Low"
    gene_expression_data$threshold <- factor(gene_expression_data$threshold, 
                                             levels = c("MHCII Low", "MHCII High"))
    
    gene_expression_data_all_genes <- GetAssayData(object = seu_object, slot = "data")
    gene_expression_data_ciita_mif <- as.data.frame(t(gene_expression_data_all_genes[c("Ciita", "Mif"), ]))
    colnames(gene_expression_data_ciita_mif) <- c("Ciita", "Mif")
    gene_expression_data <- cbind(gene_expression_data, gene_expression_data_ciita_mif)
    
    gene_expression_data <- gene_expression_data[gene_expression_data$cell_types_6_groups %in% 
                                                   c("a.Microglia", "intMoM"), ]
    gene_expression_data$cell_types_selected <- factor(gene_expression_data$cell_types_6_groups, 
                                                       levels = c("a.Microglia", "intMoM"))
    
    p5f_1 <- ggplot(gene_expression_data, aes(x=mhc2_score, y=cell_types_selected, fill=sex))+
      geom_density_ridges(alpha=0.65, scale=1 )+
      scale_fill_manual(values=c( "#ffff00",  "#4FCAFF"))+
      geom_vline(xintercept=2, color="red", linetype="dashed", size=1)+
      xlab("MHCII score (H2-Ab1, H2-Eb1, H2-Aa)")+
      ylab("")+
      theme_classic(base_size=18)+
      theme(axis.text.y = element_text(face="bold"),
            title = element_text(size = 20), axis.title.y = element_text(size=16),
            legend.text = element_text(size=18), legend.title = element_blank(),
            axis.text = element_text(size=18), legend.position = "top")
    
    p5f_2 <- ggplot(gene_expression_data, aes(x=cell_types_selected, y=Mif, fill=sex))+
      geom_violin(scale="count")+
      scale_fill_manual(values=c( "#ffff00",  "#4FCAFF"))+
      facet_grid(~threshold)+
      ylab("Mif expression level")+
      xlab("")+
      theme_classic(base_size=18)+
      theme(axis.text = element_text(size=18), legend.position = "none")
    
    pdf("fig_5f.pdf", width = 10,height = 10)
    ggarrange(p5f_1,p5f_2, ncol=1, nrow=2)
    dev.off()
    
    6.png

    本系列单细胞文献复现已经完结,从最开始的读取数据到创建Seurat对象及质控以及降维、聚类和细胞注释并到最后的marker基因等等步骤,按照原文中的5个Fig结果进行复现,总体感觉复现结果还行,自己也学到了不少东西,不过还有很多细节需要仔细琢磨和体会。

    往期单细胞数据挖掘实战

    单细胞数据挖掘实战:文献复现(一)批量读取数据

    单细胞数据挖掘实战:文献复现(二)批量创建Seurat对象及质控

    单细胞数据挖掘实战:文献复现(三)降维、聚类和细胞注释

    单细胞数据挖掘实战:文献复现(四)细胞比例饼图

    单细胞数据挖掘实战:文献复现(五)细胞亚群并可视化

    单细胞数据挖掘实战:文献复现(六)标记基因及可视化

    单细胞数据挖掘实战:文献复现(七)MG 和 Mo/MΦ 评分

    单细胞数据挖掘实战:文献复现(八)marker基因在Hom-MG、 Act-MG 和 Mo/MΦ 细胞中的表达情况

    单细胞数据挖掘实战:文献复现(九)基因GO分析及cnetplot可视化结果

    相关文章

      网友评论

          本文标题:单细胞数据挖掘实战:文献复现(十)完结篇

          本文链接:https://www.haomeiwen.com/subject/bwmpgrtx.html