10X单细胞空间分析回顾之SPOTlight

作者: 单细胞空间交响乐 | 来源:发表于2022-01-04 16:48 被阅读0次

    一年了,我们都需要总结,这一篇我们回顾一下SPOTlight,非常好的方法,建议大家也总结一下,看看2021年,自己都得到了什么。

    SPOTlight 的目标是提供一种工具,能够对包含细胞混合物的每个捕获位置中存在的细胞类型和细胞类型比例进行解卷积,最初是为 10X 的 Visium - 空间转录组学技术开发的, it can be used for all technologies returning mixtures of cells。 SPOTlight 基于通过 NMFreg 模型为每种细胞类型查找topics profiles singatures,并找到最适合我们想要解卷积的spot的组合。

    图片.png

    Libraries

    library(Matrix)
    library(data.table)
    library(Seurat)
    library(SeuratData)
    library(dplyr)
    library(gt)
    library(SPOTlight)
    library(igraph)
    library(RColorBrewer)
    

    Load data

    Load single-cell reference dataset.

    path_to_data <- system.file(package = "SPOTlight")
    cortex_sc <- readRDS(glue::glue("{path_to_data}/allen_cortex_dwn.rds"))
    

    Load Spatial data

    if (! "stxBrain" %in% SeuratData::AvailableData()[, "Dataset"]) {
      # If dataset not downloaded proceed to download it
      SeuratData::InstallData("stxBrain")
    }
    
    # Load data
    anterior <- SeuratData::LoadData("stxBrain", type = "anterior1")
    

    Pre-processing,设置种子的功能大家还知道么??

    set.seed(123)
    cortex_sc <- Seurat::SCTransform(cortex_sc, verbose = FALSE) %>%
      Seurat::RunPCA(., verbose = FALSE) %>%
      Seurat::RunUMAP(., dims = 1:30, verbose = FALSE)
    

    Visualize the clustering

    Seurat::DimPlot(cortex_sc,
                    group.by = "subclass",
                    label = TRUE) + Seurat::NoLegend()
    
    图片.png

    Descriptive

    cortex_sc@meta.data %>%
      dplyr::count(subclass) %>%
      gt::gt(.[-1, ]) %>%
      gt::tab_header(
        title = "Cell types present in the reference dataset",
      ) %>%
      gt::cols_label(
        subclass = gt::html("Cell Type")
      )
    
    图片.png

    Compute marker genes

    为了确定最重要的标记基因,我们可以使用函数 Seurat::FindAllMarkers,它将返回每个cluster的标记。

    Seurat::Idents(object = cortex_sc) <- cortex_sc@meta.data$subclass
    cluster_markers_all <- Seurat::FindAllMarkers(object = cortex_sc, 
                                                  assay = "SCT",
                                                  slot = "data",
                                                  verbose = TRUE, 
                                                  only.pos = TRUE)
    
    saveRDS(object = cluster_markers_all,
            file = here::here("inst/markers_sc.RDS"))
    

    SPOTlight Decomposition

    set.seed(123)
    
    spotlight_ls <- spotlight_deconvolution(
      se_sc = cortex_sc,
      counts_spatial = anterior@assays$Spatial@counts,
      clust_vr = "subclass", # Variable in sc_seu containing the cell-type annotation
      cluster_markers = cluster_markers_all, # Dataframe with the marker genes
      cl_n = 100, # number of cells per cell type to use
      hvg = 3000, # Number of HVG to use
      ntop = NULL, # How many of the marker genes to use (by default all)
      transf = "uv", # Perform unit-variance scaling per cell and spot prior to factorzation and NLS
      method = "nsNMF", # Factorization method
      min_cont = 0 # Remove those cells contributing to a spot below a certain threshold 
      )
    
    saveRDS(object = spotlight_ls, file = here::here("inst/spotlight_ls.rds"))
    

    Read RDS object

    spotlight_ls <- readRDS(file = here::here("inst/spotlight_ls.rds"))
    
    nmf_mod <- spotlight_ls[[1]]
    decon_mtrx <- spotlight_ls[[2]]
    

    Assess deconvolution

    Before even looking at the decomposed spots we can gain insight on how well the model performed by looking at the topic profiles for the cell types.
    The first thing we can do is look at how specific the topic profiles are for each cell type.

    h <- NMF::coef(nmf_mod[[1]])
    rownames(h) <- paste("Topic", 1:nrow(h), sep = "_")
    topic_profile_plts <- SPOTlight::dot_plot_profiles_fun(
      h = h,
      train_cell_clust = nmf_mod[[2]])
    
    topic_profile_plts[[2]] + ggplot2::theme(
      axis.text.x = ggplot2::element_text(angle = 90), 
      axis.text = ggplot2::element_text(size = 12))
    
    图片.png

    接下来我们可以看看每个细胞类型中每个细胞的各个topic profiles的行为。
    在这里,我们期望来自同一细胞类型的所有细胞显示出相似的topic profiles分布,否则该cluster中可能会有更多的子结构,我们可能只捕获其中一个

    topic_profile_plts[[1]] + theme(axis.text.x = element_text(angle = 90), 
                                    axis.text = element_text(size = 12))
    
    图片.png

    Lastly we can take a look at which genes are the most important for each topic and therefore get an insight into which genes are driving them.

    basis_spotlight <- data.frame(NMF::basis(nmf_mod[[1]]))
    
    colnames(basis_spotlight) <- unique(stringr::str_wrap(nmf_mod[[2]], width = 30))
    
    basis_spotlight %>%
      dplyr::arrange(desc(Astro)) %>%
      round(., 5) %>% 
      DT::datatable(., filter = "top")
    
    图片.png

    Visualization

    Join decomposition with metadata

    # This is the equivalent to setting min_cont to 0.04
    decon_mtrx_sub <- decon_mtrx[, colnames(decon_mtrx) != "res_ss"]
    decon_mtrx_sub[decon_mtrx_sub < 0.08] <- 0
    decon_mtrx <- cbind(decon_mtrx_sub, "res_ss" = decon_mtrx[, "res_ss"])
    rownames(decon_mtrx) <- colnames(anterior)
    
    decon_df <- decon_mtrx %>%
      data.frame() %>%
      tibble::rownames_to_column("barcodes")
    
    anterior@meta.data <- anterior@meta.data %>%
      tibble::rownames_to_column("barcodes") %>%
      dplyr::left_join(decon_df, by = "barcodes") %>%
      tibble::column_to_rownames("barcodes")
    

    Specific cell-types

    we can use the standard Seurat::SpatialFeaturePlot to view predicted celltype proportions one at a time.

    Seurat::SpatialFeaturePlot(
      object = anterior,
      features = c("L2.3.IT", "L6b", "Meis2", "Oligo"),
      alpha = c(0.1, 1))
    
    图片.png

    Spatial scatterpies

    cell_types_all <- colnames(decon_mtrx)[which(colnames(decon_mtrx) != "res_ss")]
    
    SPOTlight::spatial_scatterpie(se_obj = anterior,
                                  cell_types_all = cell_types_all,
                                  img_path = "sample_data/spatial/tissue_lowres_image.png",
                                  pie_scale = 0.4)
    
    图片.png

    Plot spot composition of spots containing cell-types of interest

    SPOTlight::spatial_scatterpie(se_obj = anterior,
                                  cell_types_all = cell_types_all,
                                  img_path = "sample_data/spatial/tissue_lowres_image.png",
                                  cell_types_interest = "L6b",
                                  pie_scale = 0.8)
    
    图片.png

    Spatial interaction graph

    现在我们知道在每个点内发现了哪些细胞类型,我们可以制作一个表示空间相互作用的图,其中细胞类型之间的边缘越强,我们在同一点内发现它们的频率越高。 为此,我们只需要运行 get_spatial_interaction_graph 函数,该函数将打印绘图并返回绘图所需的元素。

    graph_ntw <- SPOTlight::get_spatial_interaction_graph(decon_mtrx = decon_mtrx[, cell_types_all])
    

    If you want to tune how the graph looks you can do the following or you can check out more options here:

    deg <- degree(graph_ntw, mode="all")
    
    # Get color palette for difusion
    edge_importance <- E(graph_ntw)$importance
    
    # Select a continuous palette
    qual_col_pals <- brewer.pal.info[brewer.pal.info$category == 'seq',]
    
    # Create a color palette
    getPalette <- colorRampPalette(brewer.pal(9, "YlOrRd"))
    
    # Get how many values we need
    grad_edge <- seq(0, max(edge_importance), 0.1)
    
    # Generate extended gradient palette dataframe
    graph_col_df <- data.frame(value = as.character(grad_edge),
                               color = getPalette(length(grad_edge)),
                               stringsAsFactors = FALSE)
    # Assign color to each edge
    color_edge <- data.frame(value = as.character(round(edge_importance, 1)), stringsAsFactors = FALSE) %>%
      dplyr::left_join(graph_col_df, by = "value") %>%
      dplyr::pull(color)
    
    # Open a pdf file
    plot(graph_ntw,
         # Size of the edge
         edge.width = edge_importance,
         edge.color = color_edge,
         # Size of the buble
         vertex.size = deg,
         vertex.color = "#cde394",
         vertex.frame.color = "white",
         vertex.label.color = "black",
         vertex.label.family = "Ubuntu", # Font family of the label (e.g.“Times”, “Helvetica”)
         layout = layout.circle)
    
    图片.png

    Lastly one can compute cell-cell correlations to see groups of cells that correlate positively or negatively.

    # Remove cell types not predicted to be on the tissue
    decon_mtrx_sub <- decon_mtrx[, cell_types_all]
    decon_mtrx_sub <- decon_mtrx_sub[, colSums(decon_mtrx_sub) > 0]
    
    # Compute correlation
    decon_cor <- cor(decon_mtrx_sub)
    
    # Compute correlation P-value
    p.mat <- corrplot::cor.mtest(mat = decon_mtrx_sub, conf.level = 0.95)
    
    # Visualize
    ggcorrplot::ggcorrplot(
      corr = decon_cor,
      p.mat = p.mat[[1]],
      hc.order = TRUE,
      type = "full",
      insig = "blank",
      lab = TRUE,
      outline.col = "lightgrey",
      method = "square",
      # colors = c("#4477AA", "white", "#BB4444"))
      colors = c("#6D9EC1", "white", "#E46726"),
      title = "Predicted cell-cell proportion correlation",
      legend.title = "Correlation\n(Pearson)") +
      ggplot2::theme(
        plot.title = ggplot2::element_text(size = 22, hjust = 0.5, face = "bold"),
        legend.text = ggplot2::element_text(size = 12),
        legend.title = ggplot2::element_text(size = 15),
        axis.text.x = ggplot2::element_text(angle = 90),
        axis.text = ggplot2::element_text(size = 18, vjust = 0.5))
    
    图片.png

    Step-by-Step insight

    Here we are going to show step by step what is going on and all the different steps involved in the process

    图片.png

    Downsample data

    如果数据集非常大,我们希望在细胞数量和基因数量方面对其进行下采样,以训练模型。 为了进行下采样,我们希望保留每个簇的代表性细胞数量和最重要的基因。 我们表明这种下采样不会影响模型的性能并大大加快了模型训练的速度。

    # Downsample scRNAseq to select gene set and number of cells to train the model
    se_sc_down <- downsample_se_obj(se_obj = cortex_sc,
                                    clust_vr = "subclass",
                                    cluster_markers = cluster_markers_all,
                                    cl_n = 100,
                                    hvg = 3000)
    

    Train NMF model

    Once we have the data ready to pass to the model we can train it as shown below.

    start_time <- Sys.time()
    nmf_mod_ls <- train_nmf(cluster_markers = cluster_markers_all, 
                            se_sc = se_sc_down, 
                            mtrx_spatial = anterior@assays$Spatial@counts,
                            clust_vr = "subclass",
                            ntop = NULL,
                            hvg = 3000,
                            transf = "uv",
                            method = "nsNMF")
    
    nmf_mod <- nmf_mod_ls[[1]]
    
    Extract matrices form the model:
    # get basis matrix W
    w <- basis(nmf_mod)
    dim(w)
    
    # get coefficient matrix H
    h <- coef(nmf_mod)
    dim(h)
    
    Look at cell-type specific topic profile
    rownames(h) <- paste("Topic", 1:nrow(h), sep = "_")
    topic_profile_plts <- dot_plot_profiles_fun(
      h = h,
      train_cell_clust = nmf_mod_ls[[2]]
      )
    
    topic_profile_plts[[2]] + theme(axis.text.x = element_text(angle = 90))
    
    Spot Deconvolution
    # Extract count matrix
    spot_counts <- anterior@assays$Spatial@counts
    
    # Subset to genes used to train the model
    spot_counts <- spot_counts[rownames(spot_counts) %in% rownames(w), ]
    
    Run spots through the basis to get the pertinent coefficients. To do this for every spot we are going to set up a system of linear equations where we need to find the coefficient, we will use non-negative least squares to determine the best coefficient fit.
    ct_topic_profiles <- topic_profile_per_cluster_nmf(h = h,
                                  train_cell_clust = nmf_mod_ls[[2]])
    
    decon_mtrx <- mixture_deconvolution_nmf(nmf_mod = nmf_mod,
                              mixture_transcriptome = spot_counts,
                              transf = "uv",
                              reference_profiles = ct_topic_profiles, 
                              min_cont = 0.09)
    

    生活很好,有你更好

    相关文章

      网友评论

        本文标题:10X单细胞空间分析回顾之SPOTlight

        本文链接:https://www.haomeiwen.com/subject/rcwqcrtx.html