临近过年了,很多的时候都在总结,10X空间转录组的方法回顾了很多,总结也是一种进步,这一篇临近放假之际,回顾一下10X空间转录组的分析方法STutility。
Load data
library(STutility)
输入文件(这里就是10X空间转录组分析的结果文件夹)
图片.pngse <- InputFromTable(infotable = infoTable,
min.gene.count = 100,
min.gene.spots = 5,
min.spot.count = 500,
platform = "Visium")
ST.FeaturePlot(se, features = c("nFeature_RNA"), cols = c("lightgray", "mistyrose", "red", "darkred", "black"), ncol = 2, pt.size = 1.3)
图片.png
这个配色我很喜欢
图片.pngQuality control(这一步我们通常看看就好,不要强行将SPOT过滤掉)
ST.FeaturePlot(se, features = "nFeature_RNA", cols = c("lightgray", "mistyrose", "red", "dark red", "black"), ncol = 2, pt.size = 1.3)
图片.png
Now let’s load the data with the subsetting enabled. Here we can use wither the raw matrices or the filtered matrices as long as we have spotfiles available in our infoTable data.frame which will be used to select the spots under tissue.
质控的标准参考
- min.gene.count : sets a threshold for the minimum allowed UMI counts of a gene across the whole dataset
- min.gene.spots : sets a threshold for the minimum allowed number of spots where a gene is detected cross the whole dataset
- min.spot.feature.count : sets a threshold for the minimum allowed number of unique genes in a spot
- min.spot.count : sets a threshold for the minimum allowed UMI counts in a spot
- topN : subset the expression matrix to include only the topN most expressed genes
p1 <- ggplot() +
geom_histogram(data = se[[]], aes(nFeature_RNA), fill = "red", alpha = 0.7, bins = 50) +
ggtitle("Unique genes per spot")
p2 <- ggplot() +
geom_histogram(data = se[[]], aes(nCount_RNA), fill = "red", alpha = 0.7, bins = 50) +
ggtitle("Total counts per spots")
gene_attr <- data.frame(nUMI = Matrix::rowSums(se@assays$RNA@counts),
nSpots = Matrix::rowSums(se@assays$RNA@counts > 0))
p3 <- ggplot() +
geom_histogram(data = gene_attr, aes(nUMI), fill = "red", alpha = 0.7, bins = 50) +
scale_x_log10() +
ggtitle("Total counts per gene (log10 scale)")
p4 <- ggplot() +
geom_histogram(data = gene_attr, aes(nSpots), fill = "red", alpha = 0.7, bins = 50) +
ggtitle("Total spots per gene")
(p1 - p2)/(p3 - p4)
图片.png
Mitochondrial content
# Collect all genes coded on the mitochondrial genome
mt.genes <- grep(pattern = "^mt-", x = rownames(se), value = TRUE)
se$percent.mito <- (Matrix::colSums(se@assays$RNA@counts[mt.genes, ])/Matrix::colSums(se@assays$RNA@counts))*100
# Collect all genes coding for ribosomal proteins
rp.genes <- grep(pattern = "^Rpl|^Rps", x = rownames(se), value = TRUE)
se$percent.ribo <- (Matrix::colSums(se@assays$RNA@counts[rp.genes, ])/Matrix::colSums(se@assays$RNA@counts))*100
ST.FeaturePlot(se, features = "percent.mito", cols = c("lightgray", "mistyrose", "red", "dark red", "black"), pt.size = 1.3, ncol = 2)
图片.png
ST.FeaturePlot(se, features = "percent.ribo", cols = c("lightgray", "mistyrose", "red", "dark red", "black"), pt.size = 1.3, ncol = 2)
图片.png
Removing genes
ensids <- read.table(file = list.files(system.file("extdata", package = "STutility"), full.names = T, pattern = "mouse_genes"), header = T, sep = "\t", stringsAsFactors = F)
# Print available biotypes
unique(ensids$gene_type)
keep.genes <- subset(ensids, gene_type %in% "protein_coding")$gene_name
# Subset Seurat object
se.subset <- se[intersect(rownames(se), keep.genes), ]
cat("Number of genes removed : ", nrow(se) - nrow(se.subset), "\n")
画图主题
custom_theme <- theme(legend.position = c(0.45, 0.8), # Move color legend to top
legend.direction = "horizontal", # Flip legend
legend.text = element_text(angle = 30, hjust = 1), # rotate legend axis text
strip.text = element_blank(), # remove strip text
plot.title = element_blank(), # remove plot title
plot.margin = margin(t = 0, r = 0, b = 0, l = 0, unit = "cm")) # remove plot margins
p <- ST.FeaturePlot(se, features = "nFeature_RNA", ncol = 2, show.sb = FALSE, palette = "Spectral")
p & custom_theme
图片.png
p & theme_bw()
图片.png
下游的数据分析,类似Seurat
se <- SCTransform(se)
Regressing out batch effects
When deciding on a normalization strategy using SCTransform it is important to consider potential batch effects that could confound downstream analysis. Such batch effects could for example arise between different sample specimens, storage times, array slides etc. and even between consecutive sections prepared on the same slide. The experimental setup is crucial to make it possible to combat such batch effects and it is important to carefully think through if and how they should be removed to make the biological variation in your data more meaningful. Some of these effects can be effectively removed during normalization with SCTransform by specifying your batches with the vars.to.regress option.
# Add a section column to your meta.data
se$section <- paste0("section_", GetStaffli(se)[[, "sample", drop = T]])
# Run normalization with "vars.to.regress" option
se.batch.cor <- SCTransform(se, vars.to.regress = "section")
Rationale of approach
library(Matrix)
library(magrittr)
library(dplyr)
library(ggplot2)
# Get raw count data
umi_data <- GetAssayData(object = se, slot = "counts", assay = "RNA")
dim(umi_data)
# Calculate gene attributes
gene_attr <- data.frame(mean = rowMeans(umi_data),
detection_rate = rowMeans(umi_data > 0),
var = apply(umi_data, 1, var),
row.names = rownames(umi_data)) %>%
mutate(log_mean = log10(mean), log_var = log10(var))
# Obtain spot attributes from Seurat meta.data slot
spot_attr <- se[[c("nFeature_RNA", "nCount_RNA")]]
p1 <- ggplot(gene_attr, aes(log_mean, log_var)) +
geom_point(alpha = 0.3, shape = 16) +
geom_density_2d(size = 0.3) +
geom_abline(intercept = 0, slope = 1, color = 'red') +
ggtitle("Mean-variance relationship")
# add the expected detection rate under Poisson model
x = seq(from = -2, to = 2, length.out = 1000)
poisson_model <- data.frame(log_mean = x, detection_rate = 1 - dpois(0, lambda = 10^x))
p2 <- ggplot(gene_attr, aes(log_mean, detection_rate)) +
geom_point(alpha = 0.3, shape = 16) +
geom_line(data = poisson_model, color='red') +
ggtitle("Mean-detection-rate relationship")
p1 - p2
图片.png
Matrix factorization
se <- RunNMF(se, nfactors = 40) # Specificy nfactors to choose the number of factors, default=20.
cscale <- c("lightgray", "mistyrose", "red", "darkred", "black")
ST.DimPlot(se,
dims = 1:10,
ncol = 2, # Sets the number of columns at dimensions level
grid.ncol = 2, # Sets the number of columns at sample level
reduction = "NMF",
pt.size = 1,
center.zero = F,
cols = cscale,
show.sb = FALSE)
图片.png
ST.DimPlot(se,
dims = 11:20,
ncol = 2,
grid.ncol = 2,
reduction = "NMF",
pt.size = 1,
center.zero = F,
cols = cscale,
show.sb = FALSE)
图片.png
print(se[["NMF"]])
factor_ 1
Positive: Pvalb, Gad1, Spp1, Slc32a1, Six3, Sparc, Lsm6, Ndufb8, Nsg1, Agt
Enho, Ldhb, Gabra1, Cst3, Cox6c, Ndrg2, Cox5a, Ckb, Lgi2, Nefh
Negative: Rgs20, C1qtnf2, Lyn, Nid2, St6galnac2, Rbm20, Pdzd2, Mndal, Emx2, Sp9
Gcnt2, Gm19410, Irak2, Tbx2, Gm11627, Nfatc1, Bambi, Sorcs1, Pam, Arhgap45
factor_ 2
Positive: Vamp1, Pvalb, Bend6, Nat8l, Cox5a, Pcp4l1, Stmn3, Nefm, Syt2, Snrpn
Pcsk1n, Ndufa4, Ctxn3, Slc17a6, Cend1, Kcnab3, Cox4i1, Mdh1, Scn1b, Camk2n2
Negative: Plxnc1, Cdhr1, C1qtnf2, Evc, Klf11, Nid2, Ly6g6f, Hist1h4h, Ppp1r18, St6galnac2
Fxyd2, Blnk, Fam89a, Dock8, Emx2, Cyp20a1, Sp9, Ginm1, Acacb, Gm10561
factor_ 3
Positive: mt-Nd3, Rps29, Rps21, Rpl39, Rplp1, mt-Co3, Cnot3, Rpl37, Rps19, Rps27
mt-Nd5, Rpl35a, Rps28, mt-Nd4l, Rpl37a, Tatdn1, Rpl26, Nnat, Uba52, mt-Co1
Negative: Otx1, Rgs20, Evc, Pkd2, Nid2, Ppp1r18, St6galnac2, Tnxb, Atf4, Rbm20
Slc25a45, Pdzd2, Mndal, Amt, Blnk, Agtrap, Grik1, Fam89a, Dock8, Pcdh18
factor_ 4
Positive: Th, Slc6a3, Slc18a2, Ddc, Sncg, Slc10a4, Ret, Dlk1, En1, Chrna6
Drd2, Cplx1, Sv2c, Aldh1a1, Gch1, Uchl1, Gap43, Chrnb3, Tagln3, Foxa1
Negative: Cdhr1, Evc, Nid2, St6galnac2, Slc25a45, Blnk, C530008M17Rik, Dock8, Emx2, Sp9
Bin2, Wnt5b, Nfatc1, Bambi, Sorcs1, Phactr4, Masp1, Rapgef3, Kdr, Epha8
factor_ 5
Positive: Spink8, Tmsb4x, Fibcd1, Hpca, Fkbp1a, Itpka, Cnih2, Calm2, Tspan13, Lefty1
Crym, Prnp, Dynll1, Rprml, Cpne6, Arpc5, Mpped1, Cpne7, Neurod6, Ociad2
Negative: Cdh9, Otx1, Spsb1, Cdhr1, Rgs20, Nid2, Ly6g6f, Ppp1r18, Rbm20, Slc25a45
Pdzd2, Mndal, Atp2a3, Grik1, Fam89a, Cux1, Tex9, Dock8, Msi1, Sp9
FactorGeneLoadingPlot(se, factor = 1)
图片.png
Clustering
se <- FindNeighbors(object = se, verbose = FALSE, reduction = "NMF", dims = 1:40)
se <- FindClusters(object = se, verbose = FALSE)
library(RColorBrewer)
n <- 19
qual_col_pals = brewer.pal.info[brewer.pal.info$category == 'qual',]
col_vector = unlist(mapply(brewer.pal, qual_col_pals$maxcolors, rownames(qual_col_pals)))
ST.FeaturePlot(object = se, features = "seurat_clusters", cols = col_vector, pt.size = 1, ncol = 2)
图片.png
# Run UMAP
se <- RunUMAP(se, reduction = "NMF", dims = 1:40, n.neighbors = 10)
# Define colors for heatmap
heatmap.colors <- c("lightgray", "mistyrose", "red", "darkred", "black")
fts <- c("Prkcd", "Opalin", "Lamp5")
# plot transformed features expression on UMAP embedding
p.fts <- lapply(fts, function(ftr) {
FeaturePlot(se, features = ftr, reduction = "umap", order = TRUE, cols = heatmap.colors)
})
# plot transformed features expression on Visium coordinates
p3 <- ST.FeaturePlot(se, features = fts, ncol = 2, grid.ncol = 1, cols = heatmap.colors, pt.size = 1, show.sb = FALSE)
# Construct final plot
cowplot::plot_grid(cowplot::plot_grid(plotlist = p.fts, ncol = 1), p3, ncol = 2, rel_widths = c(1, 1.3))
图片.png
RGB dimensionality reduction plots
se <- RunUMAP(object = se, dims = 1:40, verbose = FALSE, n.components = 3, reduction = "NMF", reduction.name = "umap.3d")
ST.DimPlot(object = se, dims = 1:3, reduction = "umap.3d", blend = T, pt.size = 1.8)
图片.png
DEA
markers <- FindMarkers(se, ident.1 = "19")
head(markers) %>%
kbl() %>%
kable_styling()
FeatureOverlay(se, features = "Dsp",
sampleids = 1:2,
cols = c("lightgray", "mistyrose", "red", "darkred", "black"),
pt.size = 1.5,
pt.alpha = 0.5,
ncol = 2)
图片.png
Neighborhood analysis(最重点的地方)
FeatureOverlay(se, features = "seurat_clusters", sampleids = 1:2, ncols = 2)
图片.png
Connected Spatial Network
Once you have defined a region of interest and you want to find all spots neighboring to this region you can use the RegionNeighbours function to automatically detect such spots.
For example, let’s say that we want to select all neighbors to cluster 2. The first step is to make sure that the identity of your seurat object is correct, here we need to set it to “seurat_clusters”.
se <- SetIdent(se, value = "seurat_clusters")
se <- RegionNeighbours(se, id = "2", verbose = TRUE)
FeatureOverlay(se, features = "nbs_2", ncols = 2, sampleids = 1:2, cols = c("red", "lightgray"), pt.size = 2)
图片.png
se <- SetIdent(se, value = "seurat_clusters")
se <- RegionNeighbours(se, id = 2, keep.within.id = T, verbose = TRUE)
FeatureOverlay(se, features = "nbs_2", ncols = 2, sampleids = 1:2, cols = c("red", "lightgray"), pt.size = 2)
图片.png
library(magrittr)
library(dplyr)
se <- SetIdent(se, value = "nbs_2")
nbs_2.markers <- FindMarkers(se, ident.1 = "2", ident.2 = "nbs_2")
nbs_2.markers$gene <- rownames(nbs_2.markers)
se.subset <- SubsetSTData(se, expression = nbs_2 %in% c("2", "nbs_2"))
sorted.marks <- nbs_2.markers %>% top_n(n = 40, wt = abs(avg_logFC))
sorted.marks <- sorted.marks[order(sorted.marks$avg_logFC, decreasing = T), ]
DoHeatmap(se.subset, features = sorted.marks$gene, group.colors = c("red", "lightgray"), disp.min = -2, disp.max = 2)
图片.png
FeatureOverlay(se.subset, features = c("COX6C", "FCGR3B", "LGALS1", "CYBA"), pt.size = 2,
ncols = 2, cols = c("darkblue", "cyan", "yellow", "red", "darkred"))
图片.png
se <- SetIdent(se, value = "seurat_clusters")
se <- RegionNeighbours(se, id = 2, keep.idents = TRUE, verbose = TRUE)
FeatureOverlay(se, features = "nbs_2", ncols = 2, sampleids = 1:2, pt.size = 2)
图片.png
最后的3D展示,3D visualization
se <- Create3DStack(se)
stack_3d <- setNames(GetStaffli(se)@scatter.data, c("x", "y", "z", "grid.cell"))
ggplot(stack_3d, aes(x, 2e3 - y)) +
geom_point(size = 0.1, color = "lightgray") +
facet_wrap(~z, ncol = 1) +
theme_void()
interpolated.data <- FeaturePlot3D(se, features = "Mbp", return.data = TRUE)
ggplot(interpolated.data, aes(x, 2e3 - y, color = val)) +
geom_point(size = 0.1) +
facet_wrap(~z, ncol = 1) +
theme_void() +
ggtitle("Mbp") +
scale_color_gradientn(colours = c("black", "dark blue", "cyan", "yellow", "red", "dark red"))
图片.png
3D plot
FeaturePlot3D(se, features = "Mbp", pt.size = 0.6, pts.downsample = 5e4)
图片.png
Multiple 3D plots
p1 <- FeaturePlot3D(se, features = "Mbp", scene = "scene", cols = c("dark blue", "navyblue", "cyan", "white"), add.margins = 1, pts.downsample = 5e4)
p2 <- FeaturePlot3D(se, features = "Calb2", scene = "scene2", cols = c("dark blue", "navyblue", "cyan", "white"), add.margins = 1)
plotly::subplot(p1, p2, margin = 0)
图片.png
生活很好,有你更好
网友评论