美文网首页单细胞测序专题集合
10X空间转录组分析回顾之STutility

10X空间转录组分析回顾之STutility

作者: 单细胞空间交响乐 | 来源:发表于2022-01-25 17:06 被阅读0次

    临近过年了,很多的时候都在总结,10X空间转录组的方法回顾了很多,总结也是一种进步,这一篇临近放假之际,回顾一下10X空间转录组的分析方法STutility。

    Load data

    library(STutility)
    

    输入文件(这里就是10X空间转录组分析的结果文件夹)

    图片.png
    se <- InputFromTable(infotable = infoTable, 
                          min.gene.count = 100, 
                          min.gene.spots = 5,
                          min.spot.count = 500,
                          platform =  "Visium")
    ST.FeaturePlot(se, features = c("nFeature_RNA"), cols = c("lightgray", "mistyrose", "red", "darkred", "black"), ncol = 2, pt.size = 1.3)
    
    图片.png
    这个配色我很喜欢
    图片.png

    Quality control(这一步我们通常看看就好,不要强行将SPOT过滤掉)

    ST.FeaturePlot(se, features = "nFeature_RNA", cols = c("lightgray", "mistyrose", "red", "dark red", "black"), ncol = 2, pt.size = 1.3)
    
    图片.png

    Now let’s load the data with the subsetting enabled. Here we can use wither the raw matrices or the filtered matrices as long as we have spotfiles available in our infoTable data.frame which will be used to select the spots under tissue.

    质控的标准参考

    • min.gene.count : sets a threshold for the minimum allowed UMI counts of a gene across the whole dataset
    • min.gene.spots : sets a threshold for the minimum allowed number of spots where a gene is detected cross the whole dataset
    • min.spot.feature.count : sets a threshold for the minimum allowed number of unique genes in a spot
    • min.spot.count : sets a threshold for the minimum allowed UMI counts in a spot
    • topN : subset the expression matrix to include only the topN most expressed genes
    p1 <- ggplot() +
      geom_histogram(data = se[[]], aes(nFeature_RNA), fill = "red", alpha = 0.7, bins = 50) +
      ggtitle("Unique genes per spot")
    
    p2 <- ggplot() +
      geom_histogram(data = se[[]], aes(nCount_RNA), fill = "red", alpha = 0.7, bins = 50) +
      ggtitle("Total counts per spots")
    
    gene_attr <- data.frame(nUMI = Matrix::rowSums(se@assays$RNA@counts), 
                            nSpots = Matrix::rowSums(se@assays$RNA@counts > 0))
    p3 <- ggplot() +
      geom_histogram(data = gene_attr, aes(nUMI), fill = "red", alpha = 0.7, bins = 50) +
      scale_x_log10() +
      ggtitle("Total counts per gene (log10 scale)")
    
    p4 <- ggplot() +
      geom_histogram(data = gene_attr, aes(nSpots), fill = "red", alpha = 0.7,  bins = 50) +
      ggtitle("Total spots per gene")
    
    (p1 - p2)/(p3 - p4)
    
    图片.png
    Mitochondrial content
    # Collect all genes coded on the mitochondrial genome
    mt.genes <- grep(pattern = "^mt-", x = rownames(se), value = TRUE)
    se$percent.mito <- (Matrix::colSums(se@assays$RNA@counts[mt.genes, ])/Matrix::colSums(se@assays$RNA@counts))*100
    
    # Collect all genes coding for ribosomal proteins
    rp.genes <- grep(pattern = "^Rpl|^Rps", x = rownames(se), value = TRUE)
    se$percent.ribo <- (Matrix::colSums(se@assays$RNA@counts[rp.genes, ])/Matrix::colSums(se@assays$RNA@counts))*100
    
    ST.FeaturePlot(se, features = "percent.mito", cols = c("lightgray", "mistyrose", "red", "dark red", "black"), pt.size = 1.3, ncol = 2)
    
    图片.png
    ST.FeaturePlot(se, features = "percent.ribo", cols = c("lightgray", "mistyrose", "red", "dark red", "black"), pt.size = 1.3, ncol = 2)
    
    图片.png
    Removing genes
    ensids <- read.table(file = list.files(system.file("extdata", package = "STutility"), full.names = T, pattern = "mouse_genes"), header = T, sep = "\t", stringsAsFactors = F)
    
    # Print available biotypes
    unique(ensids$gene_type)
    keep.genes <- subset(ensids, gene_type %in% "protein_coding")$gene_name
    
    # Subset Seurat object
    se.subset <- se[intersect(rownames(se), keep.genes), ]
    
    cat("Number of genes removed : ", nrow(se) - nrow(se.subset), "\n")
    
    画图主题
    custom_theme <- theme(legend.position = c(0.45, 0.8), # Move color legend to top
                          legend.direction = "horizontal", # Flip legend
                          legend.text = element_text(angle = 30, hjust = 1), # rotate legend axis text
                          strip.text = element_blank(), # remove strip text
                          plot.title = element_blank(), # remove plot title
                          plot.margin = margin(t = 0, r = 0, b = 0, l = 0, unit = "cm")) # remove plot margins
    
    p <- ST.FeaturePlot(se, features = "nFeature_RNA", ncol = 2, show.sb = FALSE, palette = "Spectral")
    p & custom_theme
    
    图片.png
    p & theme_bw()
    
    图片.png

    下游的数据分析,类似Seurat

    se <- SCTransform(se)
    
    Regressing out batch effects

    When deciding on a normalization strategy using SCTransform it is important to consider potential batch effects that could confound downstream analysis. Such batch effects could for example arise between different sample specimens, storage times, array slides etc. and even between consecutive sections prepared on the same slide. The experimental setup is crucial to make it possible to combat such batch effects and it is important to carefully think through if and how they should be removed to make the biological variation in your data more meaningful. Some of these effects can be effectively removed during normalization with SCTransform by specifying your batches with the vars.to.regress option.

    # Add a section column to your meta.data
    se$section <- paste0("section_", GetStaffli(se)[[, "sample", drop = T]])
    
    # Run normalization with "vars.to.regress" option
    se.batch.cor <- SCTransform(se, vars.to.regress = "section")
    

    Rationale of approach

    library(Matrix)
    library(magrittr)
    library(dplyr)
    library(ggplot2)
    
    # Get raw count data 
    umi_data <- GetAssayData(object = se, slot = "counts", assay = "RNA")
    dim(umi_data)
    
    # Calculate gene attributes
    gene_attr <- data.frame(mean = rowMeans(umi_data),
                            detection_rate = rowMeans(umi_data > 0),
                            var = apply(umi_data, 1, var), 
                            row.names = rownames(umi_data)) %>%
      mutate(log_mean = log10(mean), log_var = log10(var))
    
    # Obtain spot attributes from Seurat meta.data slot
    spot_attr <- se[[c("nFeature_RNA", "nCount_RNA")]]
    
    p1 <- ggplot(gene_attr, aes(log_mean, log_var)) + 
      geom_point(alpha = 0.3, shape = 16) + 
      geom_density_2d(size = 0.3) +
      geom_abline(intercept = 0, slope = 1, color = 'red') +
      ggtitle("Mean-variance relationship")
    
    # add the expected detection rate under Poisson model
    x = seq(from = -2, to = 2, length.out = 1000)
    poisson_model <- data.frame(log_mean = x, detection_rate = 1 - dpois(0, lambda = 10^x))
    p2 <- ggplot(gene_attr, aes(log_mean, detection_rate)) + 
      geom_point(alpha = 0.3, shape = 16) + 
      geom_line(data = poisson_model, color='red') +
      ggtitle("Mean-detection-rate relationship")
    
    p1 - p2
    
    图片.png

    Matrix factorization

    se <- RunNMF(se, nfactors = 40) # Specificy nfactors to choose the number of factors, default=20.
    cscale <- c("lightgray", "mistyrose", "red", "darkred", "black")
    
    ST.DimPlot(se, 
               dims = 1:10,
               ncol = 2, # Sets the number of columns at dimensions level
               grid.ncol = 2, # Sets the number of columns at sample level
               reduction = "NMF", 
               pt.size = 1, 
               center.zero = F, 
               cols = cscale, 
               show.sb = FALSE)
    
    图片.png
    ST.DimPlot(se, 
               dims = 11:20,
               ncol = 2, 
               grid.ncol = 2, 
               reduction = "NMF", 
               pt.size = 1, 
               center.zero = F, 
               cols = cscale, 
               show.sb = FALSE)
    
    图片.png
    print(se[["NMF"]])
    factor_ 1 
    Positive:  Pvalb, Gad1, Spp1, Slc32a1, Six3, Sparc, Lsm6, Ndufb8, Nsg1, Agt 
           Enho, Ldhb, Gabra1, Cst3, Cox6c, Ndrg2, Cox5a, Ckb, Lgi2, Nefh 
    Negative:  Rgs20, C1qtnf2, Lyn, Nid2, St6galnac2, Rbm20, Pdzd2, Mndal, Emx2, Sp9 
           Gcnt2, Gm19410, Irak2, Tbx2, Gm11627, Nfatc1, Bambi, Sorcs1, Pam, Arhgap45 
    factor_ 2 
    Positive:  Vamp1, Pvalb, Bend6, Nat8l, Cox5a, Pcp4l1, Stmn3, Nefm, Syt2, Snrpn 
           Pcsk1n, Ndufa4, Ctxn3, Slc17a6, Cend1, Kcnab3, Cox4i1, Mdh1, Scn1b, Camk2n2 
    Negative:  Plxnc1, Cdhr1, C1qtnf2, Evc, Klf11, Nid2, Ly6g6f, Hist1h4h, Ppp1r18, St6galnac2 
           Fxyd2, Blnk, Fam89a, Dock8, Emx2, Cyp20a1, Sp9, Ginm1, Acacb, Gm10561 
    factor_ 3 
    Positive:  mt-Nd3, Rps29, Rps21, Rpl39, Rplp1, mt-Co3, Cnot3, Rpl37, Rps19, Rps27 
           mt-Nd5, Rpl35a, Rps28, mt-Nd4l, Rpl37a, Tatdn1, Rpl26, Nnat, Uba52, mt-Co1 
    Negative:  Otx1, Rgs20, Evc, Pkd2, Nid2, Ppp1r18, St6galnac2, Tnxb, Atf4, Rbm20 
           Slc25a45, Pdzd2, Mndal, Amt, Blnk, Agtrap, Grik1, Fam89a, Dock8, Pcdh18 
    factor_ 4 
    Positive:  Th, Slc6a3, Slc18a2, Ddc, Sncg, Slc10a4, Ret, Dlk1, En1, Chrna6 
           Drd2, Cplx1, Sv2c, Aldh1a1, Gch1, Uchl1, Gap43, Chrnb3, Tagln3, Foxa1 
    Negative:  Cdhr1, Evc, Nid2, St6galnac2, Slc25a45, Blnk, C530008M17Rik, Dock8, Emx2, Sp9 
           Bin2, Wnt5b, Nfatc1, Bambi, Sorcs1, Phactr4, Masp1, Rapgef3, Kdr, Epha8 
    factor_ 5 
    Positive:  Spink8, Tmsb4x, Fibcd1, Hpca, Fkbp1a, Itpka, Cnih2, Calm2, Tspan13, Lefty1 
           Crym, Prnp, Dynll1, Rprml, Cpne6, Arpc5, Mpped1, Cpne7, Neurod6, Ociad2 
    Negative:  Cdh9, Otx1, Spsb1, Cdhr1, Rgs20, Nid2, Ly6g6f, Ppp1r18, Rbm20, Slc25a45 
           Pdzd2, Mndal, Atp2a3, Grik1, Fam89a, Cux1, Tex9, Dock8, Msi1, Sp9 
    
    FactorGeneLoadingPlot(se, factor = 1)
    
    图片.png

    Clustering

    se <- FindNeighbors(object = se, verbose = FALSE, reduction = "NMF", dims = 1:40)
    se <- FindClusters(object = se, verbose = FALSE)
    library(RColorBrewer)
    n <- 19
    qual_col_pals = brewer.pal.info[brewer.pal.info$category == 'qual',]
    col_vector = unlist(mapply(brewer.pal, qual_col_pals$maxcolors, rownames(qual_col_pals)))
    
    ST.FeaturePlot(object = se, features = "seurat_clusters", cols = col_vector, pt.size = 1, ncol = 2)
    
    图片.png
    # Run UMAP
    se <- RunUMAP(se, reduction = "NMF", dims = 1:40, n.neighbors = 10)
    # Define colors for heatmap
    heatmap.colors <- c("lightgray", "mistyrose", "red", "darkred", "black")
    fts <- c("Prkcd", "Opalin", "Lamp5")
    
    # plot transformed features expression on UMAP embedding
    p.fts <- lapply(fts, function(ftr) {
      FeaturePlot(se, features = ftr, reduction = "umap", order = TRUE, cols = heatmap.colors)
    })
    
    # plot transformed features expression on Visium coordinates
    p3 <- ST.FeaturePlot(se, features = fts, ncol = 2, grid.ncol = 1, cols = heatmap.colors, pt.size = 1, show.sb = FALSE)
    
    # Construct final plot
    cowplot::plot_grid(cowplot::plot_grid(plotlist = p.fts, ncol = 1), p3, ncol = 2, rel_widths = c(1, 1.3))
    
    图片.png

    RGB dimensionality reduction plots

    se <- RunUMAP(object = se, dims = 1:40, verbose = FALSE, n.components = 3, reduction = "NMF", reduction.name = "umap.3d")
    ST.DimPlot(object = se, dims = 1:3, reduction = "umap.3d", blend = T, pt.size = 1.8)
    
    图片.png

    DEA

    markers <- FindMarkers(se, ident.1 = "19")
    head(markers) %>%
      kbl() %>%
      kable_styling()
    
    FeatureOverlay(se, features = "Dsp", 
                  sampleids = 1:2,
                  cols = c("lightgray", "mistyrose", "red", "darkred", "black"),
                  pt.size = 1.5, 
                  pt.alpha = 0.5,
                  ncol = 2)
    
    图片.png

    Neighborhood analysis(最重点的地方)

    FeatureOverlay(se, features = "seurat_clusters", sampleids = 1:2, ncols = 2)
    
    图片.png

    Connected Spatial Network

    Once you have defined a region of interest and you want to find all spots neighboring to this region you can use the RegionNeighbours function to automatically detect such spots.

    For example, let’s say that we want to select all neighbors to cluster 2. The first step is to make sure that the identity of your seurat object is correct, here we need to set it to “seurat_clusters”.

    se <- SetIdent(se, value = "seurat_clusters")
    se <- RegionNeighbours(se, id = "2", verbose = TRUE)
    FeatureOverlay(se, features = "nbs_2", ncols = 2, sampleids = 1:2, cols = c("red", "lightgray"), pt.size = 2)
    
    图片.png
    se <- SetIdent(se, value = "seurat_clusters")
    se <- RegionNeighbours(se, id = 2, keep.within.id = T, verbose = TRUE)
    FeatureOverlay(se, features = "nbs_2", ncols = 2, sampleids = 1:2, cols = c("red", "lightgray"), pt.size = 2)
    
    图片.png
    library(magrittr)
    library(dplyr)
    
    se <- SetIdent(se, value = "nbs_2")
    nbs_2.markers <- FindMarkers(se, ident.1 = "2", ident.2 = "nbs_2")
    nbs_2.markers$gene <- rownames(nbs_2.markers)
    se.subset <- SubsetSTData(se, expression = nbs_2 %in% c("2", "nbs_2"))
    sorted.marks <- nbs_2.markers %>% top_n(n = 40, wt = abs(avg_logFC))
    sorted.marks <- sorted.marks[order(sorted.marks$avg_logFC, decreasing = T), ]
    DoHeatmap(se.subset, features = sorted.marks$gene, group.colors = c("red", "lightgray"), disp.min = -2, disp.max = 2)
    
    图片.png
    FeatureOverlay(se.subset, features = c("COX6C", "FCGR3B", "LGALS1", "CYBA"), pt.size = 2,  
                   ncols = 2, cols = c("darkblue", "cyan", "yellow", "red", "darkred"))
    
    图片.png
    se <- SetIdent(se, value = "seurat_clusters")
    se <- RegionNeighbours(se, id = 2, keep.idents = TRUE, verbose = TRUE)
    FeatureOverlay(se, features = "nbs_2", ncols = 2, sampleids = 1:2, pt.size = 2)
    
    图片.png

    最后的3D展示,3D visualization

    se <- Create3DStack(se)
    stack_3d <- setNames(GetStaffli(se)@scatter.data, c("x", "y", "z", "grid.cell"))
    
    ggplot(stack_3d, aes(x, 2e3 - y)) +
      geom_point(size = 0.1, color = "lightgray") +
      facet_wrap(~z, ncol = 1) +
      theme_void()
    interpolated.data <- FeaturePlot3D(se, features = "Mbp", return.data = TRUE)
    
    ggplot(interpolated.data, aes(x, 2e3 - y, color = val)) +
      geom_point(size = 0.1) +
      facet_wrap(~z, ncol = 1) +
      theme_void() +
      ggtitle("Mbp") +
      scale_color_gradientn(colours = c("black", "dark blue", "cyan", "yellow", "red", "dark red"))
    
    图片.png
    3D plot
    FeaturePlot3D(se, features = "Mbp", pt.size = 0.6, pts.downsample = 5e4)
    
    图片.png
    Multiple 3D plots
    p1 <- FeaturePlot3D(se, features = "Mbp", scene = "scene", cols = c("dark blue", "navyblue", "cyan", "white"), add.margins = 1, pts.downsample = 5e4)
    p2 <- FeaturePlot3D(se, features = "Calb2", scene = "scene2", cols = c("dark blue", "navyblue", "cyan", "white"), add.margins = 1)
    
    plotly::subplot(p1, p2, margin = 0)
    
    图片.png

    生活很好,有你更好

    相关文章

      网友评论

        本文标题:10X空间转录组分析回顾之STutility

        本文链接:https://www.haomeiwen.com/subject/hgbwhrtx.html