Seurat for single-cell sequencin

作者: wkzhang81 | 来源:发表于2023-02-07 11:23 被阅读0次

R: Seurat 过滤、标准化、细胞周期
FindConservedMarkers vs FindMark
Seurat—Mixscape Vignette
cell-free tumor DNA序列数据的生物信息学分析
易基因 | Mol Biol Evol：利用RRBS技术多维度分
RIP-Seq（1）--实验原理及技术路线
Demutiplex of illumina sequencin
单细胞综述文章及关键配图（做ppt用）
单细胞实战2: Seurat+SingleR--从矩阵到细胞类型
Seurat提速——并行化（future包）2022-06-07

Data import and integration

p_load(Seurat, tidyverse)
# Load data for individual sample
sce1 <- Read10X("./ST1/", gene.column = 1) # add gene.column = 1 for old version
sce1 <- CreateSeuratObject(counts = sce1, 
                              project = "ST1", 
                              min.cells = 3, min.features = 200)
sce1[["percent.mt"]] <- PercentageFeatureSet(sce1, pattern = "^mt-")
sce1<- subset(sce1, subset = nFeature_RNA > 200 & nFeature_RNA < 7000 
              & percent.mt < 5 & nCount_RNA < quantile(nCount_RNA, 0.95) 
              & nCount_RNA > 500)
sce2 <- Read10X("./ST2/", gene.column = 1) # add gene.column = 1 for old version
sce2 <- CreateSeuratObject(counts = sce2, 
                              project = "ST2", 
                              min.cells = 3, min.features = 200)
sce2[["percent.mt"]] <- PercentageFeatureSet(sce2, pattern = "^mt-")
sce2<- subset(sce2, subset = nFeature_RNA > 200 & nFeature_RNA < 7000 
              & percent.mt < 5 & nCount_RNA < quantile(nCount_RNA, 0.95) 
              & nCount_RNA > 500)

# Data integration
sce <- merge(sce1, sce2)
sce <- NormalizeData(sce)
sce <- FindVariableFeatures(sce)
sce <- ScaleData(sce)
sce <- RunPCA(sce)
sce <- IntegrateLayers(object = sce, method = RPCAIntegration,
                       orig.reduction = "pca", new.reduction = "integrated.rpca",
                       verbose = FALSE)
sce <- FindNeighbors(sce, reduction = "integrated.rpca", dims = 1:30)
sce <- FindClusters(sce, resolution = 0.2, cluster.name = "rpca_clusters")

sce <- RunUMAP(sce, reduction = "integrated.rpca", 
               dims = 1:30, reduction.name = "umap.rpca")
ElbowPlot(sce, ndims = 30)

# Plots
DimPlot(sce, label = T, split.by = "orig.ident") + NoAxes() + NoLegend()

# Split for integrative analysis
sce <- split(sce[["RNA"]], f = sce$orig.ident)
# Join for marker analysis
sce <- JoinLayers(sce)

Data analysis

# Search for marker genes
markers <- FindAllMarkers(sce, only.pos = T, min.pct = 0.25, logfc.threshold = 0.25, assay="RNA")
write.csv(markers, "marker_genes.csv")
topnmarkers <- markers %>% group_by(cluster) %>% top_n(n=5, wt = avg_log2FC)
DotPlot(sce, 
  features = topnmarkers$gene,
  assay = 'RNA',
  cols = c('lightgrey', 'red')) + RotatedAxis()

# Marker genes for annotation
genes_to_check = c("Slc17a7","Slc30a3","Otof","Rorb","Rspo1","Fam84b",
                   "Syt6","Cplx3","Tshz2", #Glutamatergic
                   "Pvalb","Sst","Vip","Sncg","Lamp5", #GABAergic
                   "Sox10","Olig1","Mbp", #Oligodendracytes
                   "Pdgfra", #OPC
                   "Gfap","Aqp4", #Astrocytes
                   "Gli3", #NPCs
                   "Ctss","Ptprc", #Microglial
                   "Flt1","Cd14",
                   "Acta2", #SMC
                   "Kcnj8" #Pericytes
)
DotPlot(sce, 
  features = genes_to_check,
  assay = 'RNA',
  cols = c('lightgrey', 'red')) + RotatedAxis() + coord_flip()

# SingleR for annotation
p_load(Seurat, tidyverse, SingleR, reshape2)
load("~/Desktop/Saved index/....")
count_for_SingleR <- GetAssayData(sce, layer="data")
singler <- SingleR(test = count_for_SingleR, ref = ident_matrix, labels = ident_matrix$label.main, assay.type.test=1)  ###main could be replaced by fine
singler_result <- as.data.frame.matrix(table(singler$labels, sce$seurat_clusters))
write.csv(singler_result, "singler_annotation.csv")

# Annotation
new.cluster.ids <- c("...")
names(new.cluster.ids) <- levels(sce)
sce <- RenameIdents(sce, new.cluster.ids)
sce@meta.data$cellType <- sce@active.ident
DimPlot(sce,  split.by = "orig.ident", label = T, pt.size = 0.5) + NoLegend()

VlnPlot(sce, features = c("..."), assay = "RNA")
FeaturePlot(sce, features = c("..."))

# Significant test
sig_gene <- FindMarkers(sce, ident.1 = "0", logfc.threshold = log(1.5), assay = "RNA")
sig_gene <- sig_gene[order(sig_gene$avg_log2FC),]

sig_gene$change = ifelse(sig_gene$p_val_adj < 0.05 & abs(sig_gene$avg_log2FC) >= 1, 
  ifelse(sig_gene$avg_log2FC >= 1, 'Up', 'Down'), 'Stable')
sig_gene_sig <- subset(sig_gene, change != "Stable")
sig_gene_sig <- sig_gene_sig %>% top_n(20, abs(avg_log2FC))
p <- ggplot(sig_gene, aes(avg_log2FC, -log(p_val_adj, 10), colour = change)) + 
    geom_point(alpha=0.3, size = 3) + 
    scale_color_manual(values=c("blue", "grey","red")) + 
    xlab(expression("log"[2]*"FC")) + ylab(expression("-log"[10]*"FDR")) + 
    geom_vline(xintercept=c(-1,1),lty=4,col="lightgrey",lwd=0.8) + 
    geom_hline(yintercept = -log10(0.05),lty=4,col="lightgrey",lwd=0.8)  +
    geom_text_repel(aes(label = rownames(sig_gene_sig)), data = sig_gene_sig) +
    theme_classic()
p + theme(legend.position = "none")
rm(sig_gene_sig)
table(sig_gene$change)

av <- AverageExpression(immuno_1, assays = "RNA")
av <- av[[1]]
cg <- names(tail(sort(apply(av, 1, sd)), 1000))
pheatmap(cor(av[cg,], method = "spearman"), border = F, angle_col = 45, clustering_method = "median")

interest_module <- "Cxcl"
interest_module <- features[grepl(interest_module, features)]
DoHeatmap(sce, features = interest_module, label = F, slot = "data") + scale_fill_gradientn(colors = c("black","red3"))

1. Load R packages and count_matrix.

For csv matrix loading:

p_load(Seurat, tidyverse)
plan("multisession", workers = 4)
setwd("<...>")
count_matrix <- read.csv("<SequenceNo_Counts.csv>", row.names = 1)
count_sc <- CreateSeuratObject(counts = count_matrix, project = "ProjName", 
    min.cells = 3, min.features = 200)

For H5 matrix loading:

count_matrix_h5 <- Read10X_h5("<SequenceNo_Counts.h5>")
count_sc <- CreateSeuratObject(counts = count_matrix_h5, project = "ProjName", 
    min.cells = 3, min.features = 200)

For mtx loading:

counts <- Read10X(data.dir = "<...>")
count_sc <- CreateSeuratObject(counts = counts)

Combine samples

### Create Seurat objects
count_sc_1 <- CreateSeuratObject(counts = count_1, project = "1", min.cells = 3, min.features = 200)
count_sc_2 <- CreateSeuratObject(counts = count_2, project = "2", min.cells = 3, min.features = 200)

### Normalization
count_sc_1 <- NormalizeData(count_sc_1, normalization.method = "LogNormalize", scale.factor = 1e4)
count_sc_2 <- NormalizeData(count_sc_2, normalization.method = "LogNormalize", scale.factor = 1e4)

### Find variable features and integration anchors
count_sc_1 <- FindVariableFeatures(count_sc_1, selection.method = "vst", nfeatures = 2000)
count_sc_2 <- FindVariableFeatures(count_sc_2, selection.method = "vst", nfeatures = 2000)
merge_anchors <- FindIntegrationAnchors(object.list = list(count_sc_1, count_sc_2), dims = 1:20)
count_merge <- integrateData(anchorset = merge_anchors, dims = 1:20)
DefaultAssay(count_merge) <- "integrated"

2. Quarlity control and count filtering by mitochondrial-RNA percentage.

count_sc[["percent.mt"]] <- PercentageFeatureSet(count_sc, pattern = "^MT-")
VlnPlot(count_sc, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)

plot1 <- FeatureScatter(count_sc, feature1 = "nCount_RNA", feature2 = "percent.mt")
plot2 <- FeatureScatter(count_sc, feature1 = "nCount_RNA", feature2 = "nFeature_RNA")
plot1 + plot2

count_sc <- subset(count_sc, subset = nFeature_RNA > 200 & nFeature_RNA < 2500 & percent.mt < 5)

3. Normalization.

count_sc <- NormalizeData(count_sc, normalization.method = "LogNormalize", scale.factor = 10000)
count_sc <- FindVariableFeatures(count_sc, selection.method = "vst", nfeatures = 2000)
top10 <- head(VariableFeatures(count_sc), 10)
plot1 <- VariableFeaturePlot(count_sc)
plot2 <- LabelPoints(plot = plot1, points = top10, repel = T)
plot1 + plot2

4. Linear dimensional reduction.

all.genes <- rownames(count_sc)
count_sc <- ScaleData(count_sc, features = all.genes)
count_sc <- RunPCA(count_sc, features = VariableFeatures(object = count_sc))
print(count_sc[["pca"]], dims = 1:5, nfeatures = 5)

VizDimLoadings(count_sc, dims = 1:2, reduction = "pca")
DimPlot(count_sc, reduction = "pca")
DimHeatmap(count_sc, dims = 1, cells = 500, balanced = TRUE)

5. Defining the dimensions.

count_sc <- JackStraw(count_sc, num.replicate = 100)
count_sc <- ScoreJackStraw(count_sc, dims = 1:20)
JackStrawPlot(count_sc, dims = 1:15)
ElbowPlot(count_sc)

6. Clustering of the cells with t-sne.

count_sc <- FindNeighbors(count_sc, dims = 1:15)
count_sc <- FindClusters(count_sc, resolution = 0.5)
head(Idents(count_sc), 5)
count_sc <- RunTSNE(count_sc, dims = 1:10)
head(count_sc@reductions$tsne@cell.embeddings)
DimPlot(count_sc, reduction = "tsne")

7. Finding marker genes for each cluster.

sc_markers <- FindAllMarkers(count_sc, only.pos = T, min.pct = 0.25)
top5 <- sc_markers %>% group_by(cluster) %>% top_n(n = 5, wt = avg_log2FC)
DoHeatmap(count_sc, features = top5$gene) + NoLegend()

8. Search for interested genes

VlnPlot(count_sc, features = c("FABP4", "APOC1", "DCN", "RGS5", "MYH11", "DARC", "CD52"))
FeaturePlot(count_sc, features = c("FABP4", "APOC1", "DCN", "RGS5", "MYH11", "DARC", "CD52"))
DotPlot(count_sc, features = c("FABP4", "APOC1", "DCN", "RGS5", "MYH11", "DARC", "CD52")) + RotatedAxis()

9. Defining cell populations

new.cluster.ids <- c("Naive CD4 T", "Memory CD4 T", "CD14+ Mono", "B", "CD8 T", "FCGR3A+ Mono", "NK", "DC")
names(new.cluster.ids) <- levels(count_sc)
count_sc <- RenameIdents(count_sc, new.cluster.ids)

DimPlot(count_sc, reduction = "tsne", label = TRUE, pt.size = 0.5) + NoLegend()

R: Seurat 过滤、标准化、细胞周期
Seurat文章 Spatial reconstruction of single-cell gene expre...
FindConservedMarkers vs FindMark
When use Seurat package to perform single-cell RNA seq, t...
Seurat—Mixscape Vignette
Analyze single-cell pooled CRSIPR screens. New Seurat fun...
cell-free tumor DNA序列数据的生物信息学分析
Bioinformatics Analysis for Cell-Free Tumor DNA Sequencin...
易基因 | Mol Biol Evol：利用RRBS技术多维度分
关于RRBS 简化甲基化测序(Reduced Representation Bisulfite Sequencin...
RIP-Seq（1）--实验原理及技术路线
1.实验原理 RIP-seq（RNA immunoprecipitation and deep sequencin...
Demutiplex of illumina sequencin
1. Usage of bcl2fastq Explanation of key parameters:--use...
单细胞综述文章及关键配图（做ppt用）
Single-cell analysis targeting the proteome Single-cell a...
单细胞实战2: Seurat+SingleR--从矩阵到细胞类型
单细胞数据标准分析Seurat流程： Seurat官网：https://satijalab.org/seurat/...
Seurat提速——并行化（future包）2022-06-07
关键词 Parallelization in Seurat Seurat 并行化 Seurat 加快运行速度 Se...