美文网首页单细胞测序专题集合single cell
10X空间转录组数据分析之依据marker gene 解卷积空间

10X空间转录组数据分析之依据marker gene 解卷积空间

作者: 单细胞空间交响乐 | 来源:发表于2022-05-26 10:40 被阅读0次

    hello,周四了,又一周即将过去,时间越来越近了,疫情还是没有过去,北京的房租还是那么贵,这让我想起燕十三大侠说的一句话,人生总得做几件愚蠢的事,若是人人都只做聪明事,人生岂非就会变得更无趣了。

    今天我们来分享一个小知识,至于合不合理,尚无定论,但是绝对可以借鉴,那就是依据细胞类型的marker gene来解卷积空间spot,之前的做法都是用单细胞空间联合的方法来注释空间,这样的话必须要有单细胞空间两套数据,其实很多情况无法满足,那么今天,我们就来分享一个依据marker gene来注释空间的方法---Celloscope,参考文章在Celloscope: a probabilistic model for marker-gene-driven cell type deconvolution in spatial transcriptomics data,至于方法好不好,我们暂且不讨论,但这是一个很好的分析方向。

    Celloscope,利用已建立的关于标记基因的先验知识,从空间转录组学数据中进行细胞类型去卷积。

    Celloscope model overview.

    Celloscope,一种新的 ST 数据中基因表达的贝叶斯概率图形模型,它对 ST spot中的细胞类型组成进行去卷积,以及一种基于 MCMC 算法推断模型参数的方法。Celloscope 流程的第一步是分析这些 H&E 图像。 在此步骤中,估计每个spot的细胞数。

    关于所考虑细胞类型的标记基因的先验知识被编码为矩阵。 这些类型对应于预期在检查的组织中或在选定的感兴趣区域中发现的此类细胞。 此外,假设存在一种虚拟类型来解释新的或未知的细胞类型。 虚拟类型的特点是零标记基因

    估计的细胞计数和编码标记基因先验知识的矩阵,以及每个(选定)spot中标记基因的测量表达构成概率模型的输入。 该模型假设测量的表达式取决于每个点中隐藏的细胞类型混合物。 作为输出,Celloscope 返回每个感兴趣spot的细胞类型比例。


    图片.png

    看一眼分析的输出效果

    图片.png

    简单看看示例代码

    library(formatR)
    library(Ringo) # for visualizing the binary matrix B;  BiocManager::install("Ringo")
    library(stringr)
    library(knitr)
    library(kableExtra)
    library(IRdisplay)
    

    数据数据和marker gene

    input_data <- "~/Celloscope/example/data/"
    
    # reading marker genes from .csv file
    markers <- read.csv("~/Celloscope/markers/example.csv", header=TRUE, stringsAsFactors = FALSE)
    markers %>% kable("html") %>% as.character() %>% display_html()
    
    图片.png

    Encoding prior knowledge on marker genes as a binary matrix B

    # indicating names of cell types
    types <- markers[,1]
    # indicating all marker genes
    all_markers <- strsplit(markers[,2], ", ")
    names(all_markers) <- types
    # constructing B matrix
    list2env(all_markers, envir=globalenv())
    B <- as.data.frame.matrix(table(stack(all_markers)))
    # ordering marker genes in B matrix in the same order as in .csv file
    B<- B[match(unlist(all_markers),rownames(B) ),]
    

    Matrix B encoding prior knowledge on marker genes

    options(repr.plot.width = 6, repr.plot.height = 13)
    
    par(mgp=c(3, 0,0),mar=c(0,6,0,2)+0.1)
    plotBM(data.matrix(B), boxCol = "darkgrey", reorder = FALSE, frame = TRUE)
    
    图片.png

    Saving matrix B as .csv

    #adding dummy type
    B <- cbind(B, 0)
    types <- c(types, "DT")
    colnames(B) <- types
    write.csv(B, paste0(input_data, "matB.csv"))
    

    Loading and extracting data on mouse brain from Seurat package

    library(Seurat)
    library(SeuratData)
    brain <- LoadData("stxBrain", type = "anterior1")
    ST <- brain@assays$Spatial@data
    
    # subsetting ST data to marker genes
    ST <- ST[rownames(ST) %in% rownames(B), ]
    ST <- as.matrix(ST)
    

    Extracting coordinates

    cor <- brain@images$anterior1@coordinates
    xx <- cor$col
    yy <- cor$row
    
    spots <- paste0("X", xx, "x", yy)
    colnames(ST) <- spots
    #Visualizing spots taken on the tissue
    options(repr.plot.width = 5, repr.plot.height = 5)
    par(mgp=c(0, 0,0),mar=c(0,0,0,0)+0.1)
    plot(xx,yy, xlab="", ylab="",  pch=21, col="black", bg="gray90", xaxt="n", yaxt="n", ann = FALSE,  cex=0.6)
    
    图片.png

    示例的话提取其中的一部分作为参考,大家分析的时候全部的spot都要纳入分析

    #### Choosing an arbitrary subset of spots
    df <- data.frame(x=xx, y=yy)
    chosen <- df[ (df$x<102) & (df$x > 70), ]
    chosen <- chosen[ (chosen$y>20) & (chosen$y < 50)    ,]
    
    
    options(repr.plot.width = 5, repr.plot.height = 5)
    par(mgp=c(0, 0,0),mar=c(0,0,0,0)+0.1)
    plot(xx,yy, xlab="", ylab="",  pch=21, col="black", bg="gray90", xaxt="n", yaxt="n", ann = FALSE,  cex=0.6)
    points(chosen$x, chosen$y, pch=21, col="black", bg="lightblue")
    legend("bottomleft", legend=c("whole", "chosen"), pch=16, col=c("gray90", "lightblue" ), cex=1.5, bty="n")
    
    图片.png
    # subsetting ST data to the chosen subset of spots
    coord <- paste0("X", chosen$x, "x", chosen$y)
    ST <- ST[,colnames(ST) %in% coord]
    # genes in ST matrix must be in the same order as in B matrix
    ST <- ST[match(rownames(B), rownames(ST) ),]
    #Saving ST matrix to `.csv` file
    write.csv(as.matrix(ST), paste0(input_data, "C_gs.csv"))
    ST[1:5, 1:5]  %>% kable("html") %>% as.character() %>% display_html()
    

    Estimate for the number of all cells

    df_number_of_cells <- read.table("~/Celloscope/data/mouse-brain/CellCountSummary_Seurat.txt",  sep=",", header = TRUE, stringsAsFactors = FALSE)
    as.matrix(head(df_number_of_cells))%>% kable("html") %>% as.character() %>% display_html()
    #as.matrix(head(df_number_of_cells))
    
    图片.png
    lista <- str_split(df_number_of_cells$spot.coordinates, "x")
    xx <- as.numeric(sapply(lista, function(x) x[[1]]))
    yy <- as.numeric(sapply(lista, function(x) x[[2]]))
    par(mgp=c(0, 0,0),mar=c(0,0,0,0)+0.1)
    plot(yy,xx, xlab="", ylab="",  pch=21, col="black", bg="gray90", xaxt="n", yaxt="n", ann = FALSE,  cex=0.6)
    
    图片.png
    # alignment of coordinates with ST data
    df_number_of_cells$"spot.coordinates" <- paste0("X", yy, "x", xx)
    # extrating numbers of cells from column 'no.of.nuclei'
    df_number_of_cells$"no.of.nuclei" <- as.numeric(gsub("([0-9]+).*$", "\\1", df_number_of_cells$"no.of.nuclei"))
    
    # selecting our spots of interest 
    n_cells <- df_number_of_cells[df_number_of_cells$"spot.coordinates" %in% coord,]
    
    
    n_cells <- df_number_of_cells[match(coord, df_number_of_cells$"spot.coordinates"),]   
    n_cells$"no.of.nuclei"[n_cells$"no.of.nuclei"==0] <- 1
    rownames(n_cells) <- c()
    head(n_cells) %>% kable("html") %>% as.character() %>% display_html()
    
    图片.png

    保存结果

    write.csv(n_cells, paste0(input_data, "n_cells.csv"))
    

    结果的可视化

    options(warn=0)
    library(formatR)
    library(stringr)
    library(ggplot2)
    library(gridExtra)
    library(ggpubr)
    library(data.table)
    library(knitr)
    library(kableExtra)
    results <- "~/Celloscope/example/results/"
    input_data <-  "~/Celloscope/example/data/"
    
    图片.png
    C_gs <- read.csv( paste0(input_data, "C_gs.csv")  )
    coor<- colnames(C_gs)[-1]
    
    res <- str_split(coor, "x")
    
    x <- sapply(res, function(z) z[1])
    y <- as.numeric(sapply(res, function(z) z[2]))
    x <- as.numeric(sapply(x,  function(z) substring(z, 2)))
    options(repr.plot.width = 3, repr.plot.height = 3)
    par(mgp=c(0, 0,0),mar=c(0,0,0,0)+0.1)
    plot(x,y, xlab="", ylab="",  pch=21, col="black", bg="gray90", xaxt="n", yaxt="n", ann = FALSE,  cex=0.6)
    
    图片.png

    Importing results from `.csv files

    # Importing h estimation results
    h_est_chain01 <- t(read.csv(paste0(results, "chain01/h_est.csv"), head=FALSE))
    h_est_chain02 <- t(read.csv(paste0(results, "chain02/h_est.csv"), head=FALSE))
    
    # Averaging over chains
    h <- h_est_chain01 + h_est_chain02 
    h <- h/rowSums(h)
    

    可视化

    types <- c("T1", "T2", "T3", "T4", "DT")
    
    # function drawing a single heatmap for a single type
    one_type <- function(type_nr){
      df <- data.frame(x=x,y=y, frequency= h[,type_nr])
      df <- df[order(df$frequency), ]
      kolory <-  c("#000080",  "#ffff99",  "gold", "#cc00cc", "#cc00cc","#cc00cc", "#cc00cc")
      
      marginesy <- c(0, 0.01, -0.5, -0.5)
      ggplot(df, aes(x =x, y = y)) + geom_point(aes(color =frequency), size =2) +
        scale_color_gradientn(colors =kolory)+
        theme_bw() + labs(x = "", y="") +   ggtitle(types[type_nr])+
        theme(plot.title = element_text(size=12, hjust = 0, vjust =-2 ), legend.position = "bottom", legend.key.width=unit(1,"cm"),
              legend.background = element_rect(fill='transparent'),
              plot.margin = unit(marginesy, "cm"), axis.text.x=element_blank(), axis.ticks.x=element_blank(),
              axis.text.y=element_blank(), axis.ticks.y=element_blank(), panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
              legend.text=element_text(size=12), legend.title=element_text(size=12), 
              legend.box="horizontal",  legend.key.size = unit(0.4, 'cm'))
    }
    
    Agg1 <- one_type(1)+ theme(legend.position="none"); Agg2 <- one_type(2)+ theme(legend.position="none");
    Agg3 <- one_type(3)+ theme(legend.position="none"); Agg4 <- one_type(4)+ theme(legend.position="none"); 
    Agg5 <- one_type(5)+ theme(legend.position="none");
    options(repr.plot.width = 4, repr.plot.height = 3)
    ggarrange(Agg1 , Agg2 , Agg3 , Agg4, Agg5, ncol=5, nrow=1, common.legend = TRUE, legend="bottom")
    
    图片.png

    方法可以一试,github在Celloscope,生活很好,有你更好

    相关文章

      网友评论

        本文标题:10X空间转录组数据分析之依据marker gene 解卷积空间

        本文链接:https://www.haomeiwen.com/subject/fmjuprtx.html