单细胞数据挖掘实战:文献复现(二)批量创建Seurat对象及质控
单细胞数据挖掘实战:文献复现(七)MG 和 Mo/MΦ 评分
单细胞数据挖掘实战:文献复现(八)marker基因在Hom-MG、 Act-MG 和 Mo/MΦ 细胞中的表达情况
单细胞数据挖掘实战:文献复现(九)基因GO分析及cnetplot可视化结果
这篇文献的最后一个结果是介绍MHCII基因小胶质细胞表达的性别相关差异,结果说明MHCII 和Cd74基因的表达在男性神经胶质瘤的小胶质细胞和单核细胞/巨噬细胞中更为丰富,结果通过Fig. 5来展示。
一、加载R包
if (T) {
if(!require(BiocManager))install.packages("BiocManager")
if(!require(Seurat))install.packages("Seurat")
if(!require(Matrix))install.packages("Matrix")
if(!require(ggplot2))install.packages("ggplot2")
if(!require(cowplot))install.packages("cowplot")
if(!require(magrittr))install.packages("magrittr")
if(!require(dplyr))install.packages("dplyr")
if(!require(purrr))install.packages("purrr")
if(!require(ggrepel))install.packages("ggrepel")
if(!require(ggridges))install.packages("ggridges")
if(!require(ggpubr))install.packages("ggpubr")
}
二、添加sex_condition
table(seu_object$orig.ident)
#GSM4039241-F-ctrl-1 GSM4039242-F-ctrl-2 GSM4039243-F-tumor-1
# 5151 4781 4878
#GSM4039244-F-tumor-2 GSM4039245-M-ctrl-1 GSM4039246-M-ctrl-2
# 5467 4786 5218
#GSM4039247-M-tumor-1 GSM4039248-M-tumor-2
# 4091 4891
group <- c(rep("F_ctrl",times = 5151),
rep("F_ctrl",times = 4781),
rep("F_tumor",times = 4878),
rep("F_tumor",times = 5467),
rep("M_ctrl",times = 4786),
rep("M_ctrl",times = 5218),
rep("M_tumor",times = 4091),
rep("M_tumor",times = 4891))
seu_object$sex_condition <- group
# Figure 5a
DimPlot(seu_object, group.by = "sex_condition")
1.png
三、Act-MG 和 Mo/MΦ 中不同性别的 DEG
table(Idents(seu_object))
# MG Mo/MΦ BAM
#31959 5624 1680
#选出"MG","Mo/MΦ"
seu_object <- subset(seu_object, idents = c("MG","Mo/MΦ"))
table(Idents(seu_object))
# MG Mo/MΦ
#31959 5624
#将Microglia细分为"h.Microglia"和"a.Microglia"
Idents(seu_object) <- seu_object$seurat_clusters
seu_object$cell_type_4_groups <- plyr::mapvalues(Idents(seu_object),
from=c(0:4,6:11,13,14,16,18,19),
to = c("h.Microglia", "h.Microglia", "a.Microglia",
"h.Microglia", "Macrophages", "h.Microglia",
"h.Microglia", "Macrophages","a.Microglia",
"a.Microglia", "Macrophages","a.Microglia",
"h.Microglia", "Macrophages" ,"Macrophages",
"Macrophages"))
Idents(seu_object) <- seu_object$cell_type_4_groups
table(Idents(seu_object))
#h.Microglia a.Microglia Macrophages
# 24728 7231 5624
object_tmp <- seu_object
object_tmp$sex <- substr(seu_object$sex_condition,1,1)
object_tmp$sex_cell_type <- paste0(object_tmp$sex, "_", as.character(Idents(seu_object)))
Idents(object_tmp) <- object_tmp$sex_cell_type
table(Idents(object_tmp))
#F_h.Microglia F_a.Microglia F_Macrophages M_h.Microglia M_Macrophages
# 13273 3496 2691 11455 2933
#M_a.Microglia
# 3735
#找marker基因
markers_male_ActMG <- FindMarkers(object = object_tmp, ident.1 = "M_a.Microglia",
ident.2 = "F_a.Microglia", only.pos = TRUE, min.pct = 0.25,
logfc.threshold = 0.0001)
markers_female_ActMG <- FindMarkers(object = object_tmp, ident.1 = "F_a.Microglia",
ident.2 = "M_a.Microglia", only.pos = TRUE, min.pct = 0.25,
logfc.threshold = 0.0001)
markers_female_ActMG$avg_log2FC <- markers_female_ActMG$avg_log2FC*-1
markers_male_MoM <- FindMarkers(object = object_tmp, ident.1 = "M_Macrophages",
ident.2 = "F_Macrophages", only.pos = TRUE, min.pct = 0.25,
logfc.threshold = 0.0001)
markers_female_MoM <- FindMarkers(object = object_tmp, ident.1 = "F_Macrophages",
ident.2 = "M_Macrophages", only.pos = TRUE, min.pct = 0.25,
logfc.threshold = 0.0001)
markers_female_MoM$avg_log2FC <- markers_female_MoM$avg_log2FC*-1
#放入一个list中循环进行条件筛选
markers_list <- list(markers_male_ActMG, markers_female_ActMG, markers_male_MoM, markers_female_MoM)
names(markers_list) <- c("markers_male_ActMG", "markers_female_ActMG", "markers_male_MoM", "markers_female_MoM")
markers_list <- lapply(markers_list, function(x) {
x$gene <- rownames(x)
x$padj_log10<- -log10(x$p_val_adj)
x[x$padj_log10 == Inf, "padj_log10"]<-284
x$log2FC <- log2(exp(x$avg_log2FC))
x <- x %>% mutate(threshold = c(p_val_adj < 0.05 & abs(log2FC) > 0.5 & (pct.1>=0.25 | pct.2 >=0.25)))
x$color <- x$threshold
x[x$color == F, "color"] <- "below threshold"
x[x$color == T, "color"]<-"male"
x[x$color=="male" & x$log2FC<0, "color"]<-"female"
x$color<-factor(x$color, levels=c("female", "male", "below threshold"))
x<-na.omit(x)
x <-x[rev(order(x$color)),]
rownames(x)<-seq_along(x$p_val)
x
})
#按性别进行整合
markers_ActMG_sex <- rbind(cbind(markers_list$markers_male_ActMG, sex="M"),
cbind(markers_list$markers_female_ActMG, sex = "F"))
markers_MoM_sex <- rbind(cbind(markers_list$markers_male_MoM, sex="M"),
cbind(markers_list$markers_female_MoM, sex = "F"))
markers_MoM_sex[markers_MoM_sex$padj_log10 > 149, "padj_log10"] <- 149
markers_ActMG_sex$color <- factor(markers_ActMG_sex$color, levels=c("female", "male", "below threshold"))
markers_MoM_sex$color <- factor(markers_MoM_sex$color, levels=c("female", "male", "below threshold"))
#画火山图
col_male<- "#4FCAFF"
col_female<- "#FFFF00"
MG<-ggplot(markers_ActMG_sex, aes(x=log2FC, y=padj_log10, fill=color))+
geom_jitter(size=3, shape=21, color="black", stroke=0.01)+
geom_text_repel(data=markers_ActMG_sex[markers_ActMG_sex$threshold==T & markers_ActMG_sex$log2FC>0,],
aes(label=gene), nudge_x = 2, nudge_y = 2,
color="black", size=6, force=5)+
geom_text_repel(data=markers_ActMG_sex[markers_ActMG_sex$threshold==T & markers_ActMG_sex$log2FC<0,],
aes(label=gene), nudge_x = -2, nudge_y = 2,
color="black", size=6, force=5)+
scale_fill_manual(values=c(col_female,col_male, "gray90"))+
scale_x_continuous(limits=c(-4,4))+
labs(title= "Act-MG",x="avg log2FC", y="log10 padj")+
theme_light(base_size=18)+
theme(panel.grid = element_blank(), legend.title = element_blank(),
legend.text = element_text(size=18))+
guides(fill = guide_legend(override.aes = list(size=6)))
MoM<-ggplot(markers_MoM_sex, aes(x=log2FC, y=padj_log10, fill=color))+
geom_jitter(size=3, shape=21, color="black")+
geom_text_repel(data=markers_MoM_sex[markers_MoM_sex$threshold==T & markers_MoM_sex$log2FC>0 & markers_MoM_sex$gene != "AB124611",],
aes(label=gene), nudge_x = 2, nudge_y = 5,
color="black", size=6)+
geom_text_repel(data=markers_MoM_sex[markers_MoM_sex$threshold==T & markers_MoM_sex$log2FC<0,],
aes(label=gene), nudge_x = -2.5, nudge_y = 2,
color="black", size=6)+
scale_fill_manual(values=c(col_female,col_male, "gray90"))+
scale_x_continuous(limits=c(-4,4))+
labs(title= "Mo/M ", x="avg log2FC", y="log10 padj")+
theme_light(base_size=18)+
theme(panel.grid = element_blank(), legend.title = element_blank(),
legend.text = element_text(size=18))+
guides(fill = guide_legend(override.aes = list(size=6)))
#fig5b
pdf("fig5b.pdf",width = 20,height = 20)
ggarrange(MG, MoM, ncol=2, common.legend = T)
dev.off()
2.png
3.png
与文献结果基本一致。
Figure 5c
FeaturePlots for selected MHC II genes(Cd74, H2-Ab1, H2-Eb1, H2-Aa)
gene_to_plot <- "Cd74"
f5c_1 <- FeaturePlot(seu_object, features = gene_to_plot) +
scale_color_gradientn(colors=viridis::viridis(n=50)) +
ggtitle(gene_to_plot)
gene_to_plot <- "H2-Ab1"
f5c_2 <- FeaturePlot(seu_object, features = gene_to_plot) +
scale_color_gradientn(colors=viridis::viridis(n=50)) +
ggtitle(gene_to_plot)
gene_to_plot <- "H2-Eb1"
f5c_3 <- FeaturePlot(seu_object, features = gene_to_plot) +
scale_color_gradientn(colors=viridis::viridis(n=50)) +
ggtitle(gene_to_plot)
gene_to_plot <- "H2-Aa"
f5c_4 <- FeaturePlot(seu_object, features = gene_to_plot) +
scale_color_gradientn(colors=viridis::viridis(n=50)) +
ggtitle(gene_to_plot)
pdf("fig5c.pdf",width = 20,height = 5)
ggarrange(f5c_1, f5c_2,f5c_3,f5c_4, ncol=4, common.legend = T)
dev.off()
4.png
四、MHCII 基因和Cd74高表达的 Act-MG 和 intMoMΦ 群体中的富集情况
Figure 5d
rm(list = ls())
options(stringsAsFactors = F)
seu_object = readRDS("seu_object_merge_prediff.RDS")
seu_object <- ScaleData(seu_object)
#添加性别
table(seu_object$orig.ident)
group <- c(rep("F_ctrl",times = 5151),
rep("F_ctrl",times = 4781),
rep("F_tumor",times = 4878),
rep("F_tumor",times = 5467),
rep("M_ctrl",times = 4786),
rep("M_ctrl",times = 5218),
rep("M_tumor",times = 4091),
rep("M_tumor",times = 4891))
seu_object$sex_condition <- group
#将Microglia细分为"h.Microglia"和"a.Microglia"
Idents(seu_object) <- seu_object$seurat_clusters
seu_object$cell_type_4_groups <- plyr::mapvalues(Idents(seu_object),
from=c(0:11,13,14,16,18,19),
to = c("h.Microglia", "h.Microglia", "a.Microglia","h.Microglia","BAM" ,"Macrophages", "h.Microglia",
"h.Microglia", "Macrophages","a.Microglia","a.Microglia", "Macrophages","a.Microglia",
"h.Microglia", "Macrophages" ,"Macrophages","Macrophages"))
genes<-c("Cd74", "H2-Ab1", "H2-Eb1", "H2-Aa" )
gene_expression_data <- GetAssayData(object = seu_object, slot = "data")
gene_expression_data <- as.data.frame(t(gene_expression_data[genes,]))
gene_expression_data$cell_types_4_groups <- as.character(Idents(seu_object))
gene_expression_data$clusters_0.6 <- seu_object$RNA_snn_res.0.6
gene_expression_data$cell_types_6_groups <- NULL
gene_expression_data$cell_types_6_groups[gene_expression_data$clusters_0.6 == 11] <- "Mo"
gene_expression_data$cell_types_6_groups[gene_expression_data$clusters_0.6 == 4] <- "intMoM"
gene_expression_data$cell_types_6_groups[gene_expression_data$clusters_0.6 %in% c(8,16,18,19)] <- "M"
gene_expression_data$cell_types_6_groups[gene_expression_data$clusters_0.6 == 5] <- "BAM"
gene_expression_data$cell_types_6_groups[gene_expression_data$clusters_0.6 %in% c(0,1,3,6,7,14)] <- "h.Microglia"
gene_expression_data$cell_types_6_groups[gene_expression_data$clusters_0.6 %in% c(2,9,10,13)] <- "a.Microglia"
gene_expression_data$cell_types_6_groups <- factor(gene_expression_data$cell_types_6_groups)
gene_expression_data$cell_types_6_groups <- factor(gene_expression_data$cell_types_6_groups,
levels = levels(gene_expression_data$cell_types_6_groups)[c(3,1,6,4,5,2)])
seu_object$sex <- substr(seu_object$sex_condition,1,1)
gene_expression_data$sex <- seu_object$sex
graphs<-list()
for (i in seq_along(genes)){
plot<-ggplot(gene_expression_data, aes(x="", y=cell_types_6_groups, fill=sex))+
geom_density_ridges(aes_string(x=as.name(genes[i])), alpha=0.65, scale=1 )+
scale_fill_manual(values=c( "#ffff00", "#4FCAFF"))+
xlab("expression level")+
ylab("")+
labs(title=genes[i])+
theme_classic(base_size=18)+
theme(axis.text = element_text(size=16), axis.text.y = element_text(face="bold", size=18),
axis.title=element_text(size=16),
title = element_text(size = 20), legend.text = element_text(size=18),
legend.title = element_blank())
graphs[[i]]<-plot
}
if(!require(ggpubr))install.packages("ggpubr")
pdf("fig_5d2.pdf", width = 20,height = 5)
ggarrange(plotlist=graphs, ncol=4, nrow=1, common.legend = T, legend="top")
dev.off()
5.png
Figure 5f
gene_expression_data$mhc2_score <- rowMeans(gene_expression_data[, c("H2-Ab1", "H2-Eb1", "H2-Aa")])
gene_expression_data <- gene_expression_data %>% mutate(threshold = mhc2_score > 2)
gene_expression_data[gene_expression_data$threshold==T, "threshold"]<-"MHCII High"
gene_expression_data[gene_expression_data$threshold==F, "threshold"]<-"MHCII Low"
gene_expression_data$threshold <- factor(gene_expression_data$threshold,
levels = c("MHCII Low", "MHCII High"))
gene_expression_data_all_genes <- GetAssayData(object = seu_object, slot = "data")
gene_expression_data_ciita_mif <- as.data.frame(t(gene_expression_data_all_genes[c("Ciita", "Mif"), ]))
colnames(gene_expression_data_ciita_mif) <- c("Ciita", "Mif")
gene_expression_data <- cbind(gene_expression_data, gene_expression_data_ciita_mif)
gene_expression_data <- gene_expression_data[gene_expression_data$cell_types_6_groups %in%
c("a.Microglia", "intMoM"), ]
gene_expression_data$cell_types_selected <- factor(gene_expression_data$cell_types_6_groups,
levels = c("a.Microglia", "intMoM"))
p5f_1 <- ggplot(gene_expression_data, aes(x=mhc2_score, y=cell_types_selected, fill=sex))+
geom_density_ridges(alpha=0.65, scale=1 )+
scale_fill_manual(values=c( "#ffff00", "#4FCAFF"))+
geom_vline(xintercept=2, color="red", linetype="dashed", size=1)+
xlab("MHCII score (H2-Ab1, H2-Eb1, H2-Aa)")+
ylab("")+
theme_classic(base_size=18)+
theme(axis.text.y = element_text(face="bold"),
title = element_text(size = 20), axis.title.y = element_text(size=16),
legend.text = element_text(size=18), legend.title = element_blank(),
axis.text = element_text(size=18), legend.position = "top")
p5f_2 <- ggplot(gene_expression_data, aes(x=cell_types_selected, y=Mif, fill=sex))+
geom_violin(scale="count")+
scale_fill_manual(values=c( "#ffff00", "#4FCAFF"))+
facet_grid(~threshold)+
ylab("Mif expression level")+
xlab("")+
theme_classic(base_size=18)+
theme(axis.text = element_text(size=18), legend.position = "none")
pdf("fig_5f.pdf", width = 10,height = 10)
ggarrange(p5f_1,p5f_2, ncol=1, nrow=2)
dev.off()
6.png
本系列单细胞文献复现已经完结,从最开始的读取数据到创建Seurat对象及质控以及降维、聚类和细胞注释并到最后的marker基因等等步骤,按照原文中的5个Fig结果进行复现,总体感觉复现结果还行,自己也学到了不少东西,不过还有很多细节需要仔细琢磨和体会。
往期单细胞数据挖掘实战:
单细胞数据挖掘实战:文献复现(二)批量创建Seurat对象及质控
单细胞数据挖掘实战:文献复现(七)MG 和 Mo/MΦ 评分
网友评论