Analyze single-cell pooled CRSIPR screens.
New Seurat functions
1.Calculating the perturbation-specific signature of every cell.
2.Identifying and removing cells that have ‘escaped’ CRISPR perturbation.
3.Visualizing similarities/differences across different perturbations
1 Loading required packages
# Load packages.
library(Seurat)
library(SeuratData)
library(ggplot2)
library(patchwork)
library(scales)
library(dplyr)
library(reshape2)
# Download dataset using SeuratData.
InstallData(ds = "thp1.eccite")
# Setup custom theme for plotting.
custom_theme <- theme(
plot.title = element_text(size=16, hjust = 0.5),
legend.key.size = unit(0.7, "cm"),
legend.text = element_text(size = 14))
2 Loading Seurat object containing ECCITE-seq dataset
数据集:Download dataset using SeuratData package.{InstallData(ds = "thp1.eccite")}
111 gRNA ECCITE-seq dataset generated from stimulated THP-1 cells that was recently published from our lab in bioRxiv Papalexi et al. 2020.
# Load object.
eccite <- LoadData(ds = "thp1.eccite")
# Normalize protein.
eccite <- NormalizeData(
object = eccite,
assay = "ADT",
normalization.method = "CLR",
margin = 2)
3 RNA-based clustering is driven by confounding sources of variation
依照标准Seurat流程进行聚类。获得的Cluster主要由细胞周期和replicate ID 驱动。并没有期望的perturbation-specific clusters,只观察到一个perturbation-specific cluster包含细胞表达Ifn-γ途径的gRNAs。
# Prepare RNA assay for dimensionality reduction:
# Normalize data, find variable features and scale data.
DefaultAssay(object = eccite) <- 'RNA'
eccite <- NormalizeData(object = eccite) %>% FindVariableFeatures() %>% ScaleData()
# Run Principle Component Analysis (PCA) to reduce the dimensionality of the data.
eccite <- RunPCA(object = eccite)
# Run Uniform Manifold Approximation and Projection (UMAP) to visualize clustering in 2-D.
eccite <- RunUMAP(object = eccite, dims = 1:40)
# Generate plots to check if clustering is driven by biological replicate ID,
# cell cycle phase or target gene class.
p1 <- DimPlot(
object = eccite,
group.by = 'replicate',
label = F,
pt.size = 0.2,
reduction = "umap", cols = "Dark2", repel = T) +
scale_color_brewer(palette = "Dark2") +
ggtitle("Biological Replicate") +
xlab("UMAP 1") +
ylab("UMAP 2") +
custom_theme
p2 <- DimPlot(
object = eccite,
group.by = 'Phase',
label = F, pt.size = 0.2,
reduction = "umap", repel = T) +
ggtitle("Cell Cycle Phase") +
ylab("UMAP 2") +
xlab("UMAP 1") +
custom_theme
p3 <- DimPlot(
object = eccite,
group.by = 'crispr',
pt.size = 0.2,
reduction = "umap",
split.by = "crispr",
ncol = 1,
cols = c("grey39","goldenrod3")) +
ggtitle("Perturbation Status") +
ylab("UMAP 2") +
xlab("UMAP 1") +
custom_theme
# Visualize plots.
((p1 / p2 + plot_layout(guides = 'auto')) | p3 )

4 Calculating local perturbation signatures mitigates confounding effects
为了计算局部扰动特征,我们将non-targeting Nearest Neighbors (NNs)的数量设置为k=20,建议20<k<30。直观地说,用户不希望将k设置得非常小或很大,因为这很可能不能从数据集除去technical variation。使用PRTB signature对细胞进行聚类消除了所有technical variation,并显示了一个 perturbation-specific cluster。
# Calculate perturbation signature (PRTB).
eccite<- CalcPerturbSig(
object = eccite,
assay = "RNA",
slot = "data",
gd.class ="gene",
nt.cell.class = "NT",
reduction = "pca",
ndims = 40,
num.neighbors = 20,
split.by = "replicate",
new.assay.name = "PRTB")
# Prepare PRTB assay for dimensionality reduction:
# Normalize data, find variable features and center data.
DefaultAssay(object = eccite) <- 'PRTB'
# Use variable features from RNA assay.
VariableFeatures(object = eccite) <- VariableFeatures(object = eccite[["RNA"]])
eccite <- ScaleData(object = eccite, do.scale = F, do.center = T)
# Run PCA to reduce the dimensionality of the data.
eccite <- RunPCA(object = eccite, reduction.key = 'prtbpca', reduction.name = 'prtbpca')
# Run UMAP to visualize clustering in 2-D.
eccite <- RunUMAP(
object = eccite,
dims = 1:40,
reduction = 'prtbpca',
reduction.key = 'prtbumap',
reduction.name = 'prtbumap')
# Generate plots to check if clustering is driven by biological replicate ID,
# cell cycle phase or target gene class.
q1 <- DimPlot(
object = eccite,
group.by = 'replicate',
reduction = 'prtbumap',
pt.size = 0.2, cols = "Dark2", label = F, repel = T) +
scale_color_brewer(palette = "Dark2") +
ggtitle("Biological Replicate") +
ylab("UMAP 2") +
xlab("UMAP 1") +
custom_theme
q2 <- DimPlot(
object = eccite,
group.by = 'Phase',
reduction = 'prtbumap',
pt.size = 0.2, label = F, repel = T) +
ggtitle("Cell Cycle Phase") +
ylab("UMAP 2") +
xlab("UMAP 1") +
custom_theme
q3 <- DimPlot(
object = eccite,
group.by = 'crispr',
reduction = 'prtbumap',
split.by = "crispr",
ncol = 1,
pt.size = 0.2,
cols = c("grey39","goldenrod3")) +
ggtitle("Perturbation Status") +
ylab("UMAP 2") +
xlab("UMAP 1") +
custom_theme
# Visualize plots.
(q1 / q2 + plot_layout(guides = 'auto') | q3)
CalcPerturbSig()
eccite<- CalcPerturbSig(
object = eccite,
assay = "RNA",
slot = "data",
gd.class ="gene",
nt.cell.class = "NT",
reduction = "pca",
ndims = 40,
num.neighbors = 20,
split.by = "replicate",
new.assay.name = "PRTB")eccite <- RunPCA(object = eccite,
reduction.key = 'prtbpca',
reduction.name = 'prtbpca')
eccite <- RunUMAP(
object = eccite,
dims = 1:40,
reduction = 'prtbpca',
reduction.key = 'prtbumap',
reduction.name = 'prtbumap')

5 Mixscape identifies cells with no detectable perturbation
在这里,我们假设每个目标基因类是两个高斯分布的混合,一个代表 knockout (KO),另一个代表 non-perturbed (NP) 细胞。我们进一步假设NP细胞的分布与表达non-targeting gRNAs (NT) 的细胞的分布相同,并尝试使用mixtools软件包中的函数normalmixEM()估计KO细胞的分布。接下来,我们计算细胞属于KO分布的后验概率,并将概率高于0.5的细胞分类为KO。应用该方法,我们在11个靶基因类别中识别KO,并检测每个类别中gRNA靶向效率的变化。
RunMixscape()
# Run mixscape.
eccite <- RunMixscape(
object = eccite,
assay = "PRTB",
slot = "scale.data",
labels = "gene",
nt.class.name = "NT",
min.de.genes = 5,
iter.num = 10,
de.assay = "RNA",
verbose = F,
prtb.type = "KO")
# Calculate percentage of KO cells for all target gene classes.
df <- prop.table(table(eccite$mixscape_class.global, eccite$NT),2)
df2 <- reshape2::melt(df)
df2$Var2 <- as.character(df2$Var2)
test <- df2[which(df2$Var1 == "KO"),]
test <- test[order(test$value, decreasing = T),]
new.levels <- test$Var2
df2$Var2 <- factor(df2$Var2, levels = new.levels )
df2$Var1 <- factor(df2$Var1, levels = c("NT", "NP", "KO"))
df2$gene <- sapply(as.character(df2$Var2), function(x) strsplit(x, split = "g")[[1]][1])
df2$guide_number <- sapply(as.character(df2$Var2),
function(x) strsplit(x, split = "g")[[1]][2])
df3 <- df2[-c(which(df2$gene == "NT")),]
p1 <- ggplot(df3, aes(x = guide_number, y = value*100, fill= Var1)) +
geom_bar(stat= "identity") +
theme_classic()+
scale_fill_manual(values = c("grey49", "grey79","coral1")) +
ylab("% of cells") +
xlab("sgRNA")
p1 + theme(axis.text.x = element_text(size = 18, hjust = 1),
axis.text.y = element_text(size = 18),
axis.title = element_text(size = 16),
strip.text = element_text(size=16, face = "bold")) +
facet_wrap(vars(gene),ncol = 5, scales = "free") +
labs(fill = "mixscape class") +theme(legend.title = element_text(size = 14),
legend.text = element_text(size = 12))

6 Inspecting mixscape results
为了确保mixscape将正确的扰动状态分配给细胞,我们可以使用下面的函数查看a target gene class (例如IFNGR2)的perturbation score distributions 和 the posterior probabilities of cells,并将其与NT细胞进行比较。
此外,我们可以进行 differential expression(DE)分析,并表明只有IFNGR2 KO细胞的IFNG-pathway基因表达降低。
最后,作为一项独立检查,我们可以查看NP和KO细胞中的PD-L1蛋白表达值,找出为PD-L1调节因子的靶基因。
# Explore the perturbation scores of cells.
PlotPerturbScore(object = eccite,
target.gene.ident = "IFNGR2",
mixscape.class = "mixscape_class",
col = "coral2") +labs(fill = "mixscape class")

# Inspect the posterior probability values in NP and KO cells.
VlnPlot(eccite, "mixscape_class_p_ko", idents = c("NT", "IFNGR2 KO", "IFNGR2 NP")) +
theme(axis.text.x = element_text(angle = 0, hjust = 0.5),axis.text = element_text(size = 16) ,plot.title = element_text(size = 20)) +
NoLegend() +
ggtitle("mixscape posterior probabilities")

# Run DE analysis and visualize results on a heatmap ordering cells by their posterior
# probability values.
Idents(object = eccite) <- "gene"
MixscapeHeatmap(object = eccite,
ident.1 = "NT",
ident.2 = "IFNGR2",
balanced = F,
assay = "RNA",
max.genes = 20, angle = 0,
group.by = "mixscape_class",
max.cells.group = 300,
size=6.5) + NoLegend() +theme(axis.text.y = element_text(size = 16))

# Show that only IFNG pathway KO cells have a reduction in PD-L1 protein expression.
VlnPlot(
object = eccite,
features = "adt_PDL1",
idents = c("NT","JAK2","STAT1","IFNGR1","IFNGR2", "IRF1"),
group.by = "gene",
pt.size = 0.2,
sort = T,
split.by = "mixscape_class.global",
cols = c("coral3","grey79","grey39")) +
ggtitle("PD-L1 protein") +
theme(axis.text.x = element_text(angle = 0, hjust = 0.5), plot.title = element_text(size = 20), axis.text = element_text(size = 16))

7 Visualizing perturbation responses with Linear Discriminant Analysis (LDA)
我们使用LDA作为降维方法来可视化perturbation-specific clusters。LDA试图利用基因表达和labels 作为input,最大限度地提高已知labels (mixscape classes)的可分性。
# Remove non-perturbed cells and run LDA to reduce the dimensionality of the data.
Idents(eccite) <- "mixscape_class.global"
sub <- subset(eccite, idents = c("KO", "NT"))
# Run LDA.
sub <- MixscapeLDA(
object = sub,
assay = "RNA",
pc.assay = "PRTB",
labels = "gene",
nt.label = "NT",
npcs = 10,
logfc.threshold = 0.25,
verbose = F)
# Use LDA results to run UMAP and visualize cells on 2-D.
# Here, we note that the number of the dimensions to be used is equal to the number of
# labels minus one (to account for NT cells).
sub <- RunUMAP(
object = sub,
dims = 1:11,
reduction = 'lda',
reduction.key = 'ldaumap',
reduction.name = 'ldaumap')
# Visualize UMAP clustering results.
Idents(sub) <- "mixscape_class"
sub$mixscape_class <- as.factor(sub$mixscape_class)
# Set colors for each perturbation.
col = setNames(object = hue_pal()(12),nm = levels(sub$mixscape_class))
names(col) <- c(names(col)[1:7], "NT", names(col)[9:12])
col[8] <- "grey39"
p <- DimPlot(object = sub,
reduction = "ldaumap",
repel = T,
label.size = 5,
label = T,
cols = col) + NoLegend()
p2 <- p+
scale_color_manual(values=col, drop=FALSE) +
ylab("UMAP 2") +
xlab("UMAP 1") +
custom_theme
p2

网友评论