美文网首页
SingCellaR代码实操(三):多样本单细胞数据整合上游分析

SingCellaR代码实操(三):多样本单细胞数据整合上游分析

作者: 生信宝库 | 来源:发表于2022-09-30 12:28 被阅读0次

前言

在上一期推文:SingCellaR代码实操(二):单个骨髓纤维化患者的标准分析流程中,Immugent展示了如何使用SingCellaR对单个患者的样本数据进行分析。而在实际应用中,我们为了对比不同分组或者处理的样本,需要整合多个样本的单细胞数据,因此就需要将不同来源的单细胞数据进行整合。SingCellaR内置了很多强大的去批次工具,可以直接对不同来源的数据进行整合分析,由于内容较多,Immugent将通过两期推文分别对整合分析的上游和下游进行讲解。

本期推文将先展示如何使用SingCellaR进行整合多个来源的单细胞数据,以及其上游分析流程的代码。


代码展示

library(SingCellaR)

human_HSPCs <- new("SingCellaR")
human_HSPCs@dir_path_SingCellR_object_files<-"../SingCellaR_example_datasets/Psaila_et_al/SingCellaR_objects/"
human_HSPCs@SingCellR_object_files=c("Sample1.SingCellaR.rdata",
                                     "Sample6.SingCellaR.rdata",
                                     "Sample11.SingCellaR.rdata",
                                     "Sample20.SingCellaR.rdata",
                                     "Sample21.SingCellaR.rdata")

preprocess_integration(human_HSPCs)
human_HSPCs
## An object of class SingCellaR with a matrix of : 33538 genes across 35892 samples.

To speed up the integration, we downsample the number of cells to 5,000.

subsample_cells(human_HSPCs,n.subsample = 5000,n.seed = 42)
filter_cells_and_genes(human_HSPCs,
                       min_UMIs=1000,
                       max_UMIs=80000,
                       min_detected_genes=500,
                       max_detected_genes=8000,
                       max_percent_mito=15,
                       genes_with_expressing_cells = 10,isRemovedDoublets = FALSE)

cell_anno.info<-get_cells_annotation(human_HSPCs)
head(cell_anno.info)
table(cell_anno.info$sampleID)
##  1_Sample1 1_Sample11 1_Sample20 1_Sample21  1_Sample6 
##        469        818       1448       1479        786

human_HSPCs@meta.data$status[human_HSPCs@meta.data$sampleID=="1_Sample1"]<-"healthy"
human_HSPCs@meta.data$status[human_HSPCs@meta.data$sampleID=="1_Sample6"]<-"healthy"
human_HSPCs@meta.data$status[human_HSPCs@meta.data$sampleID=="1_Sample11"]<-"MF"
human_HSPCs@meta.data$status[human_HSPCs@meta.data$sampleID=="1_Sample20"]<-"MF"
human_HSPCs@meta.data$status[human_HSPCs@meta.data$sampleID=="1_Sample21"]<-"MF"

cell_anno.info<-get_cells_annotation(human_HSPCs)
head(cell_anno.info)

normalize_UMIs(human_HSPCs,use.scaled.factor = T)
get_variable_genes_by_fitting_GLM_model(human_HSPCs,mean_expr_cutoff = 0.1,disp_zscore_cutoff = 0.1)
remove_unwanted_genes_from_variable_gene_set(human_HSPCs,gmt.file = "../SingCellaR_example_datasets/Human_genesets/human.ribosomal-mitocondrial.genes.gmt",
                                             removed_gene_sets=c("Ribosomal_gene","Mitocondrial_gene"))
plot_variable_genes(human_HSPCs)                                               
image.png
runPCA(human_HSPCs,use.components=50,use.regressout.data = F)
plot_PCA_Elbowplot(human_HSPCs)
SingCellaR::runUMAP(human_HSPCs,dim_reduction_method = "pca",n.dims.use = 20,n.neighbors = 30,
        uwot.metric = "euclidean")
plot_umap_label_by_a_feature_of_interest(human_HSPCs,feature = "sampleID",point.size = 0.5)
plot_umap_label_by_a_feature_of_interest(human_HSPCs,feature = "status",point.size = 0.5)
image.png
image.png

Supervised harmony integration

library(harmony)
runSupervised_Harmony(human_HSPCs,covariates = c("sampleID"),n.dims.use = 20,
                      hcl.height.cutoff = 0.25,harmony.max.iter = 20,n.seed = 1)
SingCellaR::runUMAP(human_HSPCs,useIntegrativeEmbeddings = T, integrative_method = "supervised_harmony",n.dims.use = 20,
        n.neighbors = 30,uwot.metric = "euclidean")           plot_umap_label_by_a_feature_of_interest(human_HSPCs,feature = "sampleID",point.size = 0.5)   
plot_umap_label_by_a_feature_of_interest(human_HSPCs,feature = "status",point.size = 0.5)        
image.png
image.png
plot_umap_label_by_multiple_gene_sets(human_HSPCs,gmt.file = "../SingCellaR_example_datasets/Human_genesets/human.signature.genes.v1.gmt",
                                      show_gene_sets = c("Erythroid","Lymphoid","Myeloid","Megakaryocyte"),
                                      custom_color = c("red","orange","cyan","purple"),
                                      isNormalizedByHouseKeeping = T,point.size = 1,background.color = "black")
image.png

Seurat integration

library(Seurat)

meta.data<-get_cells_annotation(human_HSPCs)
rownames(meta.data)<-meta.data$Cell

runSeuratIntegration(human_HSPCs,Seurat.metadata=meta.data,n.dims.use = 20,
                     Seurat.split.by = "data_set",use.SingCellaR.varGenes = FALSE)
SingCellaR::runUMAP(human_HSPCs,useIntegrativeEmbeddings = T, integrative_method = "seurat",n.dims.use = 20,
        n.neighbors = 30,uwot.metric = "euclidean")           plot_umap_label_by_a_feature_of_interest(human_HSPCs,feature = "sampleID",point.size = 0.5)   
plot_umap_label_by_a_feature_of_interest(human_HSPCs,feature = "status",point.size = 0.5)           
image.png
image.png

Liger integration

library(rliger)
runLiger_online_iNMF(human_HSPCs,liger.k = 20)

SingCellaR::runUMAP(human_HSPCs,useIntegrativeEmbeddings = T, integrative_method = "liger",n.dims.use = 20,
        n.neighbors = 30,uwot.metric = "euclidean")
plot_umap_label_by_a_feature_of_interest(human_HSPCs,feature = "sampleID",point.size = 0.5)

plot_umap_label_by_a_feature_of_interest(human_HSPCs,feature = "status",point.size = 0.5)
plot_umap_label_by_multiple_gene_sets(human_HSPCs,gmt.file = "../SingCellaR_example_datasets/Human_genesets/human.signature.genes.v1.gmt",
                                      show_gene_sets = c("Erythroid","Lymphoid","Myeloid","Megakaryocyte"),
                                      custom_color = c("red","orange","cyan","purple"),
                                      isNormalizedByHouseKeeping = T,point.size = 1,background.color = "black")
image.png

Scanorama integration

runScanorama(human_HSPCs)
runPCA(human_HSPCs,use.scanorama.integrative.matrix = T,use.components = 50)
SingCellaR::runUMAP(human_HSPCs,dim_reduction_method = "pca",
        n.dims.use = 20,n.neighbors = 30,uwot.metric = "euclidean")
        
plot_umap_label_by_a_feature_of_interest(human_HSPCs,feature = "sampleID",point.size = 0.5)
plot_umap_label_by_a_feature_of_interest(human_HSPCs,feature = "status",point.size = 0.5)
plot_umap_label_by_multiple_gene_sets(human_HSPCs,gmt.file = "../SingCellaR_example_datasets/Human_genesets/human.signature.genes.v1.gmt",
                                      show_gene_sets = c("Erythroid","Lymphoid","Myeloid","Megakaryocyte"),
                                      custom_color = c("red","orange","cyan","purple"),
                                      isNormalizedByHouseKeeping = T,point.size = 1,background.color = "black")
image.png
image.png
image.png

Combat

library(sva)
runCombat(human_HSPCs,use.reduced_dim = F,batch_identifier = "sampleID")
runPCA(human_HSPCs,use.regressout.data = T)
SingCellaR::runUMAP(human_HSPCs,dim_reduction_method = "pca",n.dims.use = 20, n.neighbors = 20,uwot.metric = "euclidean")
plot_umap_label_by_a_feature_of_interest(human_HSPCs,feature = "sampleID",point.size = 0.5)
plot_umap_label_by_a_feature_of_interest(human_HSPCs,feature = "status",point.size = 0.5)
plot_umap_label_by_multiple_gene_sets(human_HSPCs,gmt.file = "../SingCellaR_example_datasets/Human_genesets/human.signature.genes.v1.gmt",
                                      show_gene_sets = c("Erythroid","Lymphoid","Myeloid","Megakaryocyte"),
                                      custom_color = c("red","orange","cyan","purple"),
                                      isNormalizedByHouseKeeping = T,point.size = 1,background.color = "black")
image.png
image.png
image.png

Limma

remove_unwanted_confounders(human_HSPCs,residualModelFormulaStr="~UMI_count+percent_mito+sampleID")
runPCA(human_HSPCs,use.regressout.data = T)
SingCellaR::runUMAP(human_HSPCs,dim_reduction_method = "pca",n.dims.use = 20, n.neighbors = 20,uwot.metric = "euclidean")
plot_umap_label_by_a_feature_of_interest(human_HSPCs,feature = "sampleID",point.size = 0.5)
plot_umap_label_by_a_feature_of_interest(human_HSPCs,feature = "status",point.size = 0.5)

plot_umap_label_by_multiple_gene_sets(human_HSPCs,gmt.file = "../SingCellaR_example_datasets/Human_genesets/human.signature.genes.v1.gmt",
                                      show_gene_sets = c("Erythroid","Lymphoid","Myeloid","Megakaryocyte"),
                                      custom_color = c("red","orange","cyan","purple"),
                                      isNormalizedByHouseKeeping = T,point.size = 1,background.color = "black")
save(human_HSPCs,file="../SingCellaR_example_datasets/Psaila_et_al/SingCellaR_objects/human_HSPCs_v0.1.SingCellaR.rdata")                                                                    
image.png
image.png
image.png

说在最后

上面Immugent展示了6种进行单细胞数据整合的算法,分别是:harmony,Seurat,Liger,Scanorama,Combat,Limma;这些已经几乎涵盖了目前所有主流的单细胞数据整合算法。总体上看,这些算法整合出的结果大同小异;其次需要根据数据量的大小选择合适的整合工具,比如如果你的数据庞大(20w+),而你的电脑配置又低,那就不太适合选择耗时耗内存的一些算法如Seurat,而是应该选择轻便的算法如Harmony。此外,如果数据是跨平台的,或者批次效应很大,那就需要使用Seurat这类强力的整合工具了。

好啦,本期分享到这就结束了,我们下期将会对整合数据后的下游分析流程进行展示,敬请期待!

相关文章

网友评论

      本文标题:SingCellaR代码实操(三):多样本单细胞数据整合上游分析

      本文链接:https://www.haomeiwen.com/subject/grwlartx.html