Data import and integration
p_load(Seurat, tidyverse)
# Load data for individual sample
sce1 <- Read10X("./ST1/", gene.column = 1) # add gene.column = 1 for old version
sce1 <- CreateSeuratObject(counts = sce1,
project = "ST1",
min.cells = 3, min.features = 200)
sce1[["percent.mt"]] <- PercentageFeatureSet(sce1, pattern = "^mt-")
sce1<- subset(sce1, subset = nFeature_RNA > 200 & nFeature_RNA < 7000
& percent.mt < 5 & nCount_RNA < quantile(nCount_RNA, 0.95)
& nCount_RNA > 500)
sce2 <- Read10X("./ST2/", gene.column = 1) # add gene.column = 1 for old version
sce2 <- CreateSeuratObject(counts = sce2,
project = "ST2",
min.cells = 3, min.features = 200)
sce2[["percent.mt"]] <- PercentageFeatureSet(sce2, pattern = "^mt-")
sce2<- subset(sce2, subset = nFeature_RNA > 200 & nFeature_RNA < 7000
& percent.mt < 5 & nCount_RNA < quantile(nCount_RNA, 0.95)
& nCount_RNA > 500)
# Data integration
sce <- merge(sce1, sce2)
sce <- NormalizeData(sce)
sce <- FindVariableFeatures(sce)
sce <- ScaleData(sce)
sce <- RunPCA(sce)
sce <- IntegrateLayers(object = sce, method = RPCAIntegration,
orig.reduction = "pca", new.reduction = "integrated.rpca",
verbose = FALSE)
sce <- FindNeighbors(sce, reduction = "integrated.rpca", dims = 1:30)
sce <- FindClusters(sce, resolution = 0.2, cluster.name = "rpca_clusters")
sce <- RunUMAP(sce, reduction = "integrated.rpca",
dims = 1:30, reduction.name = "umap.rpca")
ElbowPlot(sce, ndims = 30)
# Plots
DimPlot(sce, label = T, split.by = "orig.ident") + NoAxes() + NoLegend()
# Split for integrative analysis
sce <- split(sce[["RNA"]], f = sce$orig.ident)
# Join for marker analysis
sce <- JoinLayers(sce)
Data analysis
# Search for marker genes
markers <- FindAllMarkers(sce, only.pos = T, min.pct = 0.25, logfc.threshold = 0.25, assay="RNA")
write.csv(markers, "marker_genes.csv")
topnmarkers <- markers %>% group_by(cluster) %>% top_n(n=5, wt = avg_log2FC)
DotPlot(sce,
features = topnmarkers$gene,
assay = 'RNA',
cols = c('lightgrey', 'red')) + RotatedAxis()
# Marker genes for annotation
genes_to_check = c("Slc17a7","Slc30a3","Otof","Rorb","Rspo1","Fam84b",
"Syt6","Cplx3","Tshz2", #Glutamatergic
"Pvalb","Sst","Vip","Sncg","Lamp5", #GABAergic
"Sox10","Olig1","Mbp", #Oligodendracytes
"Pdgfra", #OPC
"Gfap","Aqp4", #Astrocytes
"Gli3", #NPCs
"Ctss","Ptprc", #Microglial
"Flt1","Cd14",
"Acta2", #SMC
"Kcnj8" #Pericytes
)
DotPlot(sce,
features = genes_to_check,
assay = 'RNA',
cols = c('lightgrey', 'red')) + RotatedAxis() + coord_flip()
# SingleR for annotation
p_load(Seurat, tidyverse, SingleR, reshape2)
load("~/Desktop/Saved index/....")
count_for_SingleR <- GetAssayData(sce, layer="data")
singler <- SingleR(test = count_for_SingleR, ref = ident_matrix, labels = ident_matrix$label.main, assay.type.test=1) ###main could be replaced by fine
singler_result <- as.data.frame.matrix(table(singler$labels, sce$seurat_clusters))
write.csv(singler_result, "singler_annotation.csv")
# Annotation
new.cluster.ids <- c("...")
names(new.cluster.ids) <- levels(sce)
sce <- RenameIdents(sce, new.cluster.ids)
sce@meta.data$cellType <- sce@active.ident
DimPlot(sce, split.by = "orig.ident", label = T, pt.size = 0.5) + NoLegend()
VlnPlot(sce, features = c("..."), assay = "RNA")
FeaturePlot(sce, features = c("..."))
# Significant test
sig_gene <- FindMarkers(sce, ident.1 = "0", logfc.threshold = log(1.5), assay = "RNA")
sig_gene <- sig_gene[order(sig_gene$avg_log2FC),]
sig_gene$change = ifelse(sig_gene$p_val_adj < 0.05 & abs(sig_gene$avg_log2FC) >= 1,
ifelse(sig_gene$avg_log2FC >= 1, 'Up', 'Down'), 'Stable')
sig_gene_sig <- subset(sig_gene, change != "Stable")
sig_gene_sig <- sig_gene_sig %>% top_n(20, abs(avg_log2FC))
p <- ggplot(sig_gene, aes(avg_log2FC, -log(p_val_adj, 10), colour = change)) +
geom_point(alpha=0.3, size = 3) +
scale_color_manual(values=c("blue", "grey","red")) +
xlab(expression("log"[2]*"FC")) + ylab(expression("-log"[10]*"FDR")) +
geom_vline(xintercept=c(-1,1),lty=4,col="lightgrey",lwd=0.8) +
geom_hline(yintercept = -log10(0.05),lty=4,col="lightgrey",lwd=0.8) +
geom_text_repel(aes(label = rownames(sig_gene_sig)), data = sig_gene_sig) +
theme_classic()
p + theme(legend.position = "none")
rm(sig_gene_sig)
table(sig_gene$change)
av <- AverageExpression(immuno_1, assays = "RNA")
av <- av[[1]]
cg <- names(tail(sort(apply(av, 1, sd)), 1000))
pheatmap(cor(av[cg,], method = "spearman"), border = F, angle_col = 45, clustering_method = "median")
interest_module <- "Cxcl"
interest_module <- features[grepl(interest_module, features)]
DoHeatmap(sce, features = interest_module, label = F, slot = "data") + scale_fill_gradientn(colors = c("black","red3"))
1. Load R packages and count_matrix.
p_load(Seurat, tidyverse)
plan("multisession", workers = 4)
setwd("<...>")
count_matrix <- read.csv("<SequenceNo_Counts.csv>", row.names = 1)
count_sc <- CreateSeuratObject(counts = count_matrix, project = "ProjName",
min.cells = 3, min.features = 200)
count_matrix_h5 <- Read10X_h5("<SequenceNo_Counts.h5>")
count_sc <- CreateSeuratObject(counts = count_matrix_h5, project = "ProjName",
min.cells = 3, min.features = 200)
counts <- Read10X(data.dir = "<...>")
count_sc <- CreateSeuratObject(counts = counts)
### Create Seurat objects
count_sc_1 <- CreateSeuratObject(counts = count_1, project = "1", min.cells = 3, min.features = 200)
count_sc_2 <- CreateSeuratObject(counts = count_2, project = "2", min.cells = 3, min.features = 200)
### Normalization
count_sc_1 <- NormalizeData(count_sc_1, normalization.method = "LogNormalize", scale.factor = 1e4)
count_sc_2 <- NormalizeData(count_sc_2, normalization.method = "LogNormalize", scale.factor = 1e4)
### Find variable features and integration anchors
count_sc_1 <- FindVariableFeatures(count_sc_1, selection.method = "vst", nfeatures = 2000)
count_sc_2 <- FindVariableFeatures(count_sc_2, selection.method = "vst", nfeatures = 2000)
merge_anchors <- FindIntegrationAnchors(object.list = list(count_sc_1, count_sc_2), dims = 1:20)
count_merge <- integrateData(anchorset = merge_anchors, dims = 1:20)
DefaultAssay(count_merge) <- "integrated"
2. Quarlity control and count filtering by mitochondrial-RNA percentage.
count_sc[["percent.mt"]] <- PercentageFeatureSet(count_sc, pattern = "^MT-")
VlnPlot(count_sc, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)
plot1 <- FeatureScatter(count_sc, feature1 = "nCount_RNA", feature2 = "percent.mt")
plot2 <- FeatureScatter(count_sc, feature1 = "nCount_RNA", feature2 = "nFeature_RNA")
plot1 + plot2
count_sc <- subset(count_sc, subset = nFeature_RNA > 200 & nFeature_RNA < 2500 & percent.mt < 5)
3. Normalization.
count_sc <- NormalizeData(count_sc, normalization.method = "LogNormalize", scale.factor = 10000)
count_sc <- FindVariableFeatures(count_sc, selection.method = "vst", nfeatures = 2000)
top10 <- head(VariableFeatures(count_sc), 10)
plot1 <- VariableFeaturePlot(count_sc)
plot2 <- LabelPoints(plot = plot1, points = top10, repel = T)
plot1 + plot2
4. Linear dimensional reduction.
all.genes <- rownames(count_sc)
count_sc <- ScaleData(count_sc, features = all.genes)
count_sc <- RunPCA(count_sc, features = VariableFeatures(object = count_sc))
print(count_sc[["pca"]], dims = 1:5, nfeatures = 5)
VizDimLoadings(count_sc, dims = 1:2, reduction = "pca")
DimPlot(count_sc, reduction = "pca")
DimHeatmap(count_sc, dims = 1, cells = 500, balanced = TRUE)
5. Defining the dimensions.
count_sc <- JackStraw(count_sc, num.replicate = 100)
count_sc <- ScoreJackStraw(count_sc, dims = 1:20)
JackStrawPlot(count_sc, dims = 1:15)
ElbowPlot(count_sc)
6. Clustering of the cells with t-sne.
count_sc <- FindNeighbors(count_sc, dims = 1:15)
count_sc <- FindClusters(count_sc, resolution = 0.5)
head(Idents(count_sc), 5)
count_sc <- RunTSNE(count_sc, dims = 1:10)
head(count_sc@reductions$tsne@cell.embeddings)
DimPlot(count_sc, reduction = "tsne")
7. Finding marker genes for each cluster.
sc_markers <- FindAllMarkers(count_sc, only.pos = T, min.pct = 0.25)
top5 <- sc_markers %>% group_by(cluster) %>% top_n(n = 5, wt = avg_log2FC)
DoHeatmap(count_sc, features = top5$gene) + NoLegend()
8. Search for interested genes
VlnPlot(count_sc, features = c("FABP4", "APOC1", "DCN", "RGS5", "MYH11", "DARC", "CD52"))
FeaturePlot(count_sc, features = c("FABP4", "APOC1", "DCN", "RGS5", "MYH11", "DARC", "CD52"))
DotPlot(count_sc, features = c("FABP4", "APOC1", "DCN", "RGS5", "MYH11", "DARC", "CD52")) + RotatedAxis()
9. Defining cell populations
new.cluster.ids <- c("Naive CD4 T", "Memory CD4 T", "CD14+ Mono", "B", "CD8 T", "FCGR3A+ Mono", "NK", "DC")
names(new.cluster.ids) <- levels(count_sc)
count_sc <- RenameIdents(count_sc, new.cluster.ids)
DimPlot(count_sc, reduction = "tsne", label = TRUE, pt.size = 0.5) + NoLegend()
网友评论