hello,周四了,又一周即将过去,时间越来越近了,疫情还是没有过去,北京的房租还是那么贵,这让我想起燕十三大侠说的一句话,人生总得做几件愚蠢的事,若是人人都只做聪明事,人生岂非就会变得更无趣了。
今天我们来分享一个小知识,至于合不合理,尚无定论,但是绝对可以借鉴,那就是依据细胞类型的marker gene来解卷积空间spot,之前的做法都是用单细胞空间联合的方法来注释空间,这样的话必须要有单细胞空间两套数据,其实很多情况无法满足,那么今天,我们就来分享一个依据marker gene来注释空间的方法---Celloscope,参考文章在Celloscope: a probabilistic model for marker-gene-driven cell type deconvolution in spatial transcriptomics data,至于方法好不好,我们暂且不讨论,但这是一个很好的分析方向。
Celloscope,利用已建立的关于标记基因的先验知识,从空间转录组学数据中进行细胞类型去卷积。
Celloscope model overview.
Celloscope,一种新的 ST 数据中基因表达的贝叶斯概率图形模型,它对 ST spot中的细胞类型组成进行去卷积,以及一种基于 MCMC 算法推断模型参数的方法。Celloscope 流程的第一步是分析这些 H&E 图像。 在此步骤中,估计每个spot的细胞数。
关于所考虑细胞类型的标记基因的先验知识被编码为矩阵。 这些类型对应于预期在检查的组织中或在选定的感兴趣区域中发现的此类细胞。 此外,假设存在一种虚拟类型来解释新的或未知的细胞类型。 虚拟类型的特点是零标记基因。
估计的细胞计数和编码标记基因先验知识的矩阵,以及每个(选定)spot中标记基因的测量表达构成概率模型的输入。 该模型假设测量的表达式取决于每个点中隐藏的细胞类型混合物。 作为输出,Celloscope 返回每个感兴趣spot的细胞类型比例。
图片.png
看一眼分析的输出效果
图片.png简单看看示例代码
library(formatR)
library(Ringo) # for visualizing the binary matrix B; BiocManager::install("Ringo")
library(stringr)
library(knitr)
library(kableExtra)
library(IRdisplay)
数据数据和marker gene
input_data <- "~/Celloscope/example/data/"
# reading marker genes from .csv file
markers <- read.csv("~/Celloscope/markers/example.csv", header=TRUE, stringsAsFactors = FALSE)
markers %>% kable("html") %>% as.character() %>% display_html()
图片.png
Encoding prior knowledge on marker genes as a binary matrix B
# indicating names of cell types
types <- markers[,1]
# indicating all marker genes
all_markers <- strsplit(markers[,2], ", ")
names(all_markers) <- types
# constructing B matrix
list2env(all_markers, envir=globalenv())
B <- as.data.frame.matrix(table(stack(all_markers)))
# ordering marker genes in B matrix in the same order as in .csv file
B<- B[match(unlist(all_markers),rownames(B) ),]
Matrix B encoding prior knowledge on marker genes
options(repr.plot.width = 6, repr.plot.height = 13)
par(mgp=c(3, 0,0),mar=c(0,6,0,2)+0.1)
plotBM(data.matrix(B), boxCol = "darkgrey", reorder = FALSE, frame = TRUE)
图片.png
Saving matrix B as .csv
#adding dummy type
B <- cbind(B, 0)
types <- c(types, "DT")
colnames(B) <- types
write.csv(B, paste0(input_data, "matB.csv"))
Loading and extracting data on mouse brain from Seurat package
library(Seurat)
library(SeuratData)
brain <- LoadData("stxBrain", type = "anterior1")
ST <- brain@assays$Spatial@data
# subsetting ST data to marker genes
ST <- ST[rownames(ST) %in% rownames(B), ]
ST <- as.matrix(ST)
Extracting coordinates
cor <- brain@images$anterior1@coordinates
xx <- cor$col
yy <- cor$row
spots <- paste0("X", xx, "x", yy)
colnames(ST) <- spots
#Visualizing spots taken on the tissue
options(repr.plot.width = 5, repr.plot.height = 5)
par(mgp=c(0, 0,0),mar=c(0,0,0,0)+0.1)
plot(xx,yy, xlab="", ylab="", pch=21, col="black", bg="gray90", xaxt="n", yaxt="n", ann = FALSE, cex=0.6)
图片.png
示例的话提取其中的一部分作为参考,大家分析的时候全部的spot都要纳入分析
#### Choosing an arbitrary subset of spots
df <- data.frame(x=xx, y=yy)
chosen <- df[ (df$x<102) & (df$x > 70), ]
chosen <- chosen[ (chosen$y>20) & (chosen$y < 50) ,]
options(repr.plot.width = 5, repr.plot.height = 5)
par(mgp=c(0, 0,0),mar=c(0,0,0,0)+0.1)
plot(xx,yy, xlab="", ylab="", pch=21, col="black", bg="gray90", xaxt="n", yaxt="n", ann = FALSE, cex=0.6)
points(chosen$x, chosen$y, pch=21, col="black", bg="lightblue")
legend("bottomleft", legend=c("whole", "chosen"), pch=16, col=c("gray90", "lightblue" ), cex=1.5, bty="n")
图片.png
# subsetting ST data to the chosen subset of spots
coord <- paste0("X", chosen$x, "x", chosen$y)
ST <- ST[,colnames(ST) %in% coord]
# genes in ST matrix must be in the same order as in B matrix
ST <- ST[match(rownames(B), rownames(ST) ),]
#Saving ST matrix to `.csv` file
write.csv(as.matrix(ST), paste0(input_data, "C_gs.csv"))
ST[1:5, 1:5] %>% kable("html") %>% as.character() %>% display_html()
Estimate for the number of all cells
df_number_of_cells <- read.table("~/Celloscope/data/mouse-brain/CellCountSummary_Seurat.txt", sep=",", header = TRUE, stringsAsFactors = FALSE)
as.matrix(head(df_number_of_cells))%>% kable("html") %>% as.character() %>% display_html()
#as.matrix(head(df_number_of_cells))
图片.png
lista <- str_split(df_number_of_cells$spot.coordinates, "x")
xx <- as.numeric(sapply(lista, function(x) x[[1]]))
yy <- as.numeric(sapply(lista, function(x) x[[2]]))
par(mgp=c(0, 0,0),mar=c(0,0,0,0)+0.1)
plot(yy,xx, xlab="", ylab="", pch=21, col="black", bg="gray90", xaxt="n", yaxt="n", ann = FALSE, cex=0.6)
图片.png
# alignment of coordinates with ST data
df_number_of_cells$"spot.coordinates" <- paste0("X", yy, "x", xx)
# extrating numbers of cells from column 'no.of.nuclei'
df_number_of_cells$"no.of.nuclei" <- as.numeric(gsub("([0-9]+).*$", "\\1", df_number_of_cells$"no.of.nuclei"))
# selecting our spots of interest
n_cells <- df_number_of_cells[df_number_of_cells$"spot.coordinates" %in% coord,]
n_cells <- df_number_of_cells[match(coord, df_number_of_cells$"spot.coordinates"),]
n_cells$"no.of.nuclei"[n_cells$"no.of.nuclei"==0] <- 1
rownames(n_cells) <- c()
head(n_cells) %>% kable("html") %>% as.character() %>% display_html()
图片.png
保存结果
write.csv(n_cells, paste0(input_data, "n_cells.csv"))
结果的可视化
options(warn=0)
library(formatR)
library(stringr)
library(ggplot2)
library(gridExtra)
library(ggpubr)
library(data.table)
library(knitr)
library(kableExtra)
results <- "~/Celloscope/example/results/"
input_data <- "~/Celloscope/example/data/"
图片.png
C_gs <- read.csv( paste0(input_data, "C_gs.csv") )
coor<- colnames(C_gs)[-1]
res <- str_split(coor, "x")
x <- sapply(res, function(z) z[1])
y <- as.numeric(sapply(res, function(z) z[2]))
x <- as.numeric(sapply(x, function(z) substring(z, 2)))
options(repr.plot.width = 3, repr.plot.height = 3)
par(mgp=c(0, 0,0),mar=c(0,0,0,0)+0.1)
plot(x,y, xlab="", ylab="", pch=21, col="black", bg="gray90", xaxt="n", yaxt="n", ann = FALSE, cex=0.6)
图片.png
Importing results from `.csv files
# Importing h estimation results
h_est_chain01 <- t(read.csv(paste0(results, "chain01/h_est.csv"), head=FALSE))
h_est_chain02 <- t(read.csv(paste0(results, "chain02/h_est.csv"), head=FALSE))
# Averaging over chains
h <- h_est_chain01 + h_est_chain02
h <- h/rowSums(h)
可视化
types <- c("T1", "T2", "T3", "T4", "DT")
# function drawing a single heatmap for a single type
one_type <- function(type_nr){
df <- data.frame(x=x,y=y, frequency= h[,type_nr])
df <- df[order(df$frequency), ]
kolory <- c("#000080", "#ffff99", "gold", "#cc00cc", "#cc00cc","#cc00cc", "#cc00cc")
marginesy <- c(0, 0.01, -0.5, -0.5)
ggplot(df, aes(x =x, y = y)) + geom_point(aes(color =frequency), size =2) +
scale_color_gradientn(colors =kolory)+
theme_bw() + labs(x = "", y="") + ggtitle(types[type_nr])+
theme(plot.title = element_text(size=12, hjust = 0, vjust =-2 ), legend.position = "bottom", legend.key.width=unit(1,"cm"),
legend.background = element_rect(fill='transparent'),
plot.margin = unit(marginesy, "cm"), axis.text.x=element_blank(), axis.ticks.x=element_blank(),
axis.text.y=element_blank(), axis.ticks.y=element_blank(), panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
legend.text=element_text(size=12), legend.title=element_text(size=12),
legend.box="horizontal", legend.key.size = unit(0.4, 'cm'))
}
Agg1 <- one_type(1)+ theme(legend.position="none"); Agg2 <- one_type(2)+ theme(legend.position="none");
Agg3 <- one_type(3)+ theme(legend.position="none"); Agg4 <- one_type(4)+ theme(legend.position="none");
Agg5 <- one_type(5)+ theme(legend.position="none");
options(repr.plot.width = 4, repr.plot.height = 3)
ggarrange(Agg1 , Agg2 , Agg3 , Agg4, Agg5, ncol=5, nrow=1, common.legend = TRUE, legend="bottom")
图片.png
方法可以一试,github在Celloscope,生活很好,有你更好
网友评论