- SingCellaR代码实操(四):多样本单细胞数据整合下游分析
- SingCellaR代码实操(三):多样本单细胞数据整合上游分析
- 单细胞空间转录分析之Seurat-多样本整合(浅谈空间批次)
- 单细胞转录组数据分析课件||5. scRNA-seq data
- 单细胞转录组之Scanpy - 样本整合分析
- 单细胞不同样本数据整合-解决AnnData合并时ValueErr
- SingCellaR代码实操(二):单个骨髓纤维化患者的分析流程
- 单细胞数据整合分析——批次效应(batch effect)去除
- simspec:基于细胞簇谱系相似性整合单细胞数据
- Seurat的三种单细胞数据整合方法汇总(批次校正)2022-0
前言
在上一期推文:SingCellaR代码实操(二):单个骨髓纤维化患者的标准分析流程中,Immugent展示了如何使用SingCellaR对单个患者的样本数据进行分析。而在实际应用中,我们为了对比不同分组或者处理的样本,需要整合多个样本的单细胞数据,因此就需要将不同来源的单细胞数据进行整合。SingCellaR内置了很多强大的去批次工具,可以直接对不同来源的数据进行整合分析,由于内容较多,Immugent将通过两期推文分别对整合分析的上游和下游进行讲解。
本期推文将先展示如何使用SingCellaR进行整合多个来源的单细胞数据,以及其上游分析流程的代码。
代码展示
library(SingCellaR)
human_HSPCs <- new("SingCellaR")
human_HSPCs@dir_path_SingCellR_object_files<-"../SingCellaR_example_datasets/Psaila_et_al/SingCellaR_objects/"
human_HSPCs@SingCellR_object_files=c("Sample1.SingCellaR.rdata",
"Sample6.SingCellaR.rdata",
"Sample11.SingCellaR.rdata",
"Sample20.SingCellaR.rdata",
"Sample21.SingCellaR.rdata")
preprocess_integration(human_HSPCs)
human_HSPCs
## An object of class SingCellaR with a matrix of : 33538 genes across 35892 samples.
To speed up the integration, we downsample the number of cells to 5,000.
subsample_cells(human_HSPCs,n.subsample = 5000,n.seed = 42)
filter_cells_and_genes(human_HSPCs,
min_UMIs=1000,
max_UMIs=80000,
min_detected_genes=500,
max_detected_genes=8000,
max_percent_mito=15,
genes_with_expressing_cells = 10,isRemovedDoublets = FALSE)
cell_anno.info<-get_cells_annotation(human_HSPCs)
head(cell_anno.info)
table(cell_anno.info$sampleID)
## 1_Sample1 1_Sample11 1_Sample20 1_Sample21 1_Sample6
## 469 818 1448 1479 786
human_HSPCs@meta.data$status[human_HSPCs@meta.data$sampleID=="1_Sample1"]<-"healthy"
human_HSPCs@meta.data$status[human_HSPCs@meta.data$sampleID=="1_Sample6"]<-"healthy"
human_HSPCs@meta.data$status[human_HSPCs@meta.data$sampleID=="1_Sample11"]<-"MF"
human_HSPCs@meta.data$status[human_HSPCs@meta.data$sampleID=="1_Sample20"]<-"MF"
human_HSPCs@meta.data$status[human_HSPCs@meta.data$sampleID=="1_Sample21"]<-"MF"
cell_anno.info<-get_cells_annotation(human_HSPCs)
head(cell_anno.info)
normalize_UMIs(human_HSPCs,use.scaled.factor = T)
get_variable_genes_by_fitting_GLM_model(human_HSPCs,mean_expr_cutoff = 0.1,disp_zscore_cutoff = 0.1)
remove_unwanted_genes_from_variable_gene_set(human_HSPCs,gmt.file = "../SingCellaR_example_datasets/Human_genesets/human.ribosomal-mitocondrial.genes.gmt",
removed_gene_sets=c("Ribosomal_gene","Mitocondrial_gene"))
plot_variable_genes(human_HSPCs)

runPCA(human_HSPCs,use.components=50,use.regressout.data = F)
plot_PCA_Elbowplot(human_HSPCs)
SingCellaR::runUMAP(human_HSPCs,dim_reduction_method = "pca",n.dims.use = 20,n.neighbors = 30,
uwot.metric = "euclidean")
plot_umap_label_by_a_feature_of_interest(human_HSPCs,feature = "sampleID",point.size = 0.5)
plot_umap_label_by_a_feature_of_interest(human_HSPCs,feature = "status",point.size = 0.5)


Supervised harmony integration
library(harmony)
runSupervised_Harmony(human_HSPCs,covariates = c("sampleID"),n.dims.use = 20,
hcl.height.cutoff = 0.25,harmony.max.iter = 20,n.seed = 1)
SingCellaR::runUMAP(human_HSPCs,useIntegrativeEmbeddings = T, integrative_method = "supervised_harmony",n.dims.use = 20,
n.neighbors = 30,uwot.metric = "euclidean") plot_umap_label_by_a_feature_of_interest(human_HSPCs,feature = "sampleID",point.size = 0.5)
plot_umap_label_by_a_feature_of_interest(human_HSPCs,feature = "status",point.size = 0.5)


plot_umap_label_by_multiple_gene_sets(human_HSPCs,gmt.file = "../SingCellaR_example_datasets/Human_genesets/human.signature.genes.v1.gmt",
show_gene_sets = c("Erythroid","Lymphoid","Myeloid","Megakaryocyte"),
custom_color = c("red","orange","cyan","purple"),
isNormalizedByHouseKeeping = T,point.size = 1,background.color = "black")

Seurat integration
library(Seurat)
meta.data<-get_cells_annotation(human_HSPCs)
rownames(meta.data)<-meta.data$Cell
runSeuratIntegration(human_HSPCs,Seurat.metadata=meta.data,n.dims.use = 20,
Seurat.split.by = "data_set",use.SingCellaR.varGenes = FALSE)
SingCellaR::runUMAP(human_HSPCs,useIntegrativeEmbeddings = T, integrative_method = "seurat",n.dims.use = 20,
n.neighbors = 30,uwot.metric = "euclidean") plot_umap_label_by_a_feature_of_interest(human_HSPCs,feature = "sampleID",point.size = 0.5)
plot_umap_label_by_a_feature_of_interest(human_HSPCs,feature = "status",point.size = 0.5)


Liger integration
library(rliger)
runLiger_online_iNMF(human_HSPCs,liger.k = 20)
SingCellaR::runUMAP(human_HSPCs,useIntegrativeEmbeddings = T, integrative_method = "liger",n.dims.use = 20,
n.neighbors = 30,uwot.metric = "euclidean")
plot_umap_label_by_a_feature_of_interest(human_HSPCs,feature = "sampleID",point.size = 0.5)
plot_umap_label_by_a_feature_of_interest(human_HSPCs,feature = "status",point.size = 0.5)
plot_umap_label_by_multiple_gene_sets(human_HSPCs,gmt.file = "../SingCellaR_example_datasets/Human_genesets/human.signature.genes.v1.gmt",
show_gene_sets = c("Erythroid","Lymphoid","Myeloid","Megakaryocyte"),
custom_color = c("red","orange","cyan","purple"),
isNormalizedByHouseKeeping = T,point.size = 1,background.color = "black")

Scanorama integration
runScanorama(human_HSPCs)
runPCA(human_HSPCs,use.scanorama.integrative.matrix = T,use.components = 50)
SingCellaR::runUMAP(human_HSPCs,dim_reduction_method = "pca",
n.dims.use = 20,n.neighbors = 30,uwot.metric = "euclidean")
plot_umap_label_by_a_feature_of_interest(human_HSPCs,feature = "sampleID",point.size = 0.5)
plot_umap_label_by_a_feature_of_interest(human_HSPCs,feature = "status",point.size = 0.5)
plot_umap_label_by_multiple_gene_sets(human_HSPCs,gmt.file = "../SingCellaR_example_datasets/Human_genesets/human.signature.genes.v1.gmt",
show_gene_sets = c("Erythroid","Lymphoid","Myeloid","Megakaryocyte"),
custom_color = c("red","orange","cyan","purple"),
isNormalizedByHouseKeeping = T,point.size = 1,background.color = "black")



Combat
library(sva)
runCombat(human_HSPCs,use.reduced_dim = F,batch_identifier = "sampleID")
runPCA(human_HSPCs,use.regressout.data = T)
SingCellaR::runUMAP(human_HSPCs,dim_reduction_method = "pca",n.dims.use = 20, n.neighbors = 20,uwot.metric = "euclidean")
plot_umap_label_by_a_feature_of_interest(human_HSPCs,feature = "sampleID",point.size = 0.5)
plot_umap_label_by_a_feature_of_interest(human_HSPCs,feature = "status",point.size = 0.5)
plot_umap_label_by_multiple_gene_sets(human_HSPCs,gmt.file = "../SingCellaR_example_datasets/Human_genesets/human.signature.genes.v1.gmt",
show_gene_sets = c("Erythroid","Lymphoid","Myeloid","Megakaryocyte"),
custom_color = c("red","orange","cyan","purple"),
isNormalizedByHouseKeeping = T,point.size = 1,background.color = "black")



Limma
remove_unwanted_confounders(human_HSPCs,residualModelFormulaStr="~UMI_count+percent_mito+sampleID")
runPCA(human_HSPCs,use.regressout.data = T)
SingCellaR::runUMAP(human_HSPCs,dim_reduction_method = "pca",n.dims.use = 20, n.neighbors = 20,uwot.metric = "euclidean")
plot_umap_label_by_a_feature_of_interest(human_HSPCs,feature = "sampleID",point.size = 0.5)
plot_umap_label_by_a_feature_of_interest(human_HSPCs,feature = "status",point.size = 0.5)
plot_umap_label_by_multiple_gene_sets(human_HSPCs,gmt.file = "../SingCellaR_example_datasets/Human_genesets/human.signature.genes.v1.gmt",
show_gene_sets = c("Erythroid","Lymphoid","Myeloid","Megakaryocyte"),
custom_color = c("red","orange","cyan","purple"),
isNormalizedByHouseKeeping = T,point.size = 1,background.color = "black")
save(human_HSPCs,file="../SingCellaR_example_datasets/Psaila_et_al/SingCellaR_objects/human_HSPCs_v0.1.SingCellaR.rdata")



说在最后
上面Immugent展示了6种进行单细胞数据整合的算法,分别是:harmony,Seurat,Liger,Scanorama,Combat,Limma;这些已经几乎涵盖了目前所有主流的单细胞数据整合算法。总体上看,这些算法整合出的结果大同小异;其次需要根据数据量的大小选择合适的整合工具,比如如果你的数据庞大(20w+),而你的电脑配置又低,那就不太适合选择耗时耗内存的一些算法如Seurat,而是应该选择轻便的算法如Harmony。此外,如果数据是跨平台的,或者批次效应很大,那就需要使用Seurat这类强力的整合工具了。
好啦,本期分享到这就结束了,我们下期将会对整合数据后的下游分析流程进行展示,敬请期待!
网友评论