美文网首页
SingCellaR代码实操(四):多样本单细胞数据整合下游分析

SingCellaR代码实操(四):多样本单细胞数据整合下游分析

作者: 生信宝库 | 来源:发表于2022-10-10 17:32 被阅读0次

    前言

    Immugent在上一期推文:SingCellaR代码实操(三):多样本单细胞数据整合上游分析中,很好的展示了如何使用SingCellaR基于多种单细胞数据整合算法进行多样本单细胞数据的整合,并将每一种整合结果进行了对比。这期推文Immugent将接着进行下游分析,即对整合后的数据进行功能解析。事实上,单细胞数据虽然测序深度不足,但是当样本量足够大时可以在一定程度上弥补测序不足的问题。整合多个来源或多批次的单细胞数据就是为了扩大样本量。除此之外,我们还需要衡量在扩大样本量同时还会引入批次效应,如果为了整合一个很小的数据而引入批次反而不是一种可取的方法,而评估数据整合效果最好的方式就是看整合后的数据能否解释我们的科学问题。

    本期流程的代码是上一期的延续,如果想复现本期代码的小伙伴还需要先将上一期的代码跑一下。


    代码展示

    library(SingCellaR)
    
    human_HSPCs<-local(get(load(file="../SingCellaR_example_datasets/Psaila_et_al/SingCellaR_objects/human_HSPCs_v0.1.SingCellaR.rdata")))
    human_HSPCs
    ## An object of class SingCellaR with a matrix of : 33538 genes across 5000 samples.
    
    identifyClusters(human_HSPCs,useIntegrativeEmbeddings = T,integrative_method = "harmony", n.dims.use = 20,n.neighbors = 50, knn.metric = "euclidean")
    plot_umap_label_by_clusters(human_HSPCs,show_method = "louvain")
    findMarkerGenes(human_HSPCs,cluster.type = "louvain")
    plot_heatmap_for_marker_genes(human_HSPCs,cluster.type = "louvain",n.TopGenes = 5,rowFont.size = 5)
    
    image.png image.png
    plot_umap_label_by_genes(human_HSPCs,gene_list = c("GZMB","EPHA2"))
    remove_clusters(human_HSPCs,cluster.type = "louvain",cluster_ids = "cl10")
    normalize_UMIs(human_HSPCs,use.scaled.factor = T)
    get_variable_genes_by_fitting_GLM_model(human_HSPCs,mean_expr_cutoff = 0.05,disp_zscore_cutoff = 0.05)
    
    remove_unwanted_genes_from_variable_gene_set(human_HSPCs,gmt.file = "../SingCellaR_example_datasets/Human_genesets/human.ribosomal-mitocondrial.genes.gmt",
                                                 removed_gene_sets=c("Ribosomal_gene","Mitocondrial_gene"))
    runPCA(human_HSPCs,use.components=50,use.regressout.data = F) plot_PCA_Elbowplot(human_HSPCs)                               
    runHarmony(human_HSPCs,covariates = c("sampleID"),harmony.max.iter = 20)                 
    
    identifyClusters(human_HSPCs,useIntegrativeEmbeddings = T,integrative_method = "harmony", n.dims.use = 20,n.neighbors = 50, knn.metric = "euclidean")
    runUMAP(human_HSPCs,useIntegrativeEmbeddings = T, integrative_method = "harmony",n.dims.use = 20,
            n.neighbors = 30,uwot.metric = "euclidean")
    plot_umap_label_by_clusters(human_HSPCs,show_method = "louvain")
    runFA2_ForceDirectedGraph(human_HSPCs,n.dims.use = 20, useIntegrativeEmbeddings = T,integrative_method = "harmony",n.neighbors = 5,n.seed = 1,fa2_n_iter = 1000)
    plot_forceDirectedGraph_label_by_clusters(human_HSPCs,show_method = "louvain")
    plot_forceDirectedGraph_label_by_multiple_gene_sets(human_HSPCs,gmt.file = "../SingCellaR_example_datasets/Human_genesets/human.signature.genes.v1.gmt",
                                          show_gene_sets = c("Erythroid","Lymphoid","Myeloid","Megakaryocyte"),
                                          custom_color = c("red","orange","cyan","purple"),
                                          isNormalizedByHouseKeeping = T,vertex.size = 1,edge.size = 0.05,
                                          background.color = "black")
    
    image.png
    image.png
    image.png

    KNN-graph trajectory analysis

    runKNN_Graph(human_HSPCs,n.dims.use = 20, useIntegrativeEmbeddings = T,integrative_method = "harmony")
    library(threejs)
    plot_3D_knn_graph_label_by_clusters(human_HSPCs,show_method = "louvain",vertext.size = 0.25)
    genesets<-get_gmtGeneSets("../SingCellaR_example_datasets/Human_genesets/human.signature.genes.v1.gmt")
    plot_3D_knn_graph_label_by_a_signature_gene_set(human_HSPCs,gene_list = genesets[["Megakaryocyte"]],vertext.size = 0.40)
    
    image.png image.png

    Diffusion map analysis

    library(destiny)
    runDiffusionMap(human_HSPCs,n.dims.use = 20, useIntegrativeEmbeddings = T,integrative_method = "harmony")
    plot_diffusionmap_label_by_multiple_gene_sets(human_HSPCs,gmt.file = "../SingCellaR_example_datasets/Human_genesets/human.signature.genes.v1.gmt",
                                          show_gene_sets = c("Erythroid","Lymphoid","Myeloid","Megakaryocyte"),
                                          custom_color = c("red","orange","cyan","purple"),
                                          isNormalizedByHouseKeeping = T,point.size = 0.8,
                                          background.color = "black")
    findMarkerGenes(human_HSPCs,cluster.type = "louvain")         
    plot_heatmap_for_marker_genes(human_HSPCs,cluster.type = "louvain",n.TopGenes = 10,rowFont.size = 5)                                         
    
    image.png
    image.png
    plot_umap_label_by_genes(human_HSPCs,gene_list = c("AVP","CNRIP1","MPO","PF4"))
    plot_forceDirectedGraph_label_by_genes(human_HSPCs,gene_list = c("AVP","CNRIP1","MPO","PF4"))
    plot_violin_for_marker_genes(human_HSPCs,gene_list = c("AVP","CNRIP1","MPO","PF4"))
    plot_bubble_for_genes_per_cluster(human_HSPCs,cluster.type = "louvain",
                                gene_list=c("AVP","CLEC9A",
                                            "APOE","CNRIP1",
                                            "MPO","AZU1",
                                            "PF4"),show.percent = T,buble.scale = 12,point.color1 = "gray",
                                            point.color2 = "firebrick1")
    
    image.png
    image.png
    image.png
    image.png

    GSEA analysis

    pre_rankedGenes_for_GSEA<-identifyGSEAPrerankedGenes_for_all_clusters(human_HSPCs,
                                                                        cluster.type = "louvain")
                                                                 
    save(pre_rankedGenes_for_GSEA,file="../SingCellaR_example_datasets/Psaila_et_al/SingCellaR_objects/human_HSPCs_preRankedGenes_for_GSEA.rdata")            
    
    load("../SingCellaR_example_datasets/Psaila_et_al/SingCellaR_objects/human_HSPCs_preRankedGenes_for_GSEA.rdata")
    fgsea_Results<-Run_fGSEA_for_multiple_comparisons(GSEAPrerankedGenes_list = pre_rankedGenes_for_GSEA,nPermSimple=2000,
                        gmt.file = "../SingCellaR_example_datasets/Human_genesets/human.hematopoiesis.signature.genes.gmt")
      
    plot_heatmap_for_fGSEA_all_clusters(fgsea_Results,isApplyCutoff = TRUE,
                                        use_pvalues_for_clustering=T,
                                        show_NES_score = T,fontsize_row = 5,
                                        adjusted_pval = 0.10,
                                        show_only_NES_positive_score = T,format.digits = 3,
                                        clustering_method = "ward.D",
                                        clustering_distance_rows = "euclidean",
                                        clustering_distance_cols = "euclidean",show_text_for_ns = F)  
    
    image.png

    AUCell analysis

    library(AUCell)
    
    Build_AUCell_Rankings(human_HSPCs,AUCell_buildRankings.file="../SingCellaR_example_datasets/Psaila_et_al/SingCellaR_objects/human_life_cells_rankings.AUCells.rdata")
    set.seed(1)
    human_HSPCs.AUCells.score<-Run_AUCell(human_HSPCs,"../SingCellaR_example_datasets/Psaila_et_al/SingCellaR_objects/human_life_cells_rankings.AUCells.rdata","../SingCellaR_example_datasets/Human_genesets/human.signature.genes.v1.gmt")
    head(human_HSPCs.AUCells.score)
    plot_umap_label_by_AUCell_score(human_HSPCs,AUCell_gene_set_name=c("Megakaryocyte"),
                                    human_HSPCs.AUCells.score,AUCell_cutoff=0.10,point.size = 0.5)
    plot_umap_label_by_AUCell_score(human_HSPCs,AUCell_gene_set_name=c("Erythroid"),
                                    human_HSPCs.AUCells.score,AUCell_cutoff=0.10,point.size = 0.5) 
    
    plot_forceDirectedGraph_label_by_AUCell_score(human_HSPCs,AUCell_gene_set_name=c("Megakaryocyte"),
                                    human_HSPCs.AUCells.score,AUCell_cutoff=0.10,vertex.size = 0.5) 
    
    plot_forceDirectedGraph_label_by_AUCell_score(human_HSPCs,AUCell_gene_set_name=c("Erythroid"),
                                    human_HSPCs.AUCells.score,AUCell_cutoff=0.10,vertex.size = 0.5) 
    
    save(human_HSPCs,file="Psaila_et_al/SingCellaR_objects/human_HSPCs_v0.2.SingCellaR.rdata")                          
    
    image.png
    image.png
    image.png
    image.png

    说在最后

    大家最后从AUCell分析的结果可以看出,基于gene sets计算出的评分还是能够很好的揭示不同功能状态的细胞群,同样也反映出我们使用Harmony整合数据的结果是比较理想的。在准确定义出细胞群后,我们后续就可以基于对不同细胞群的功能特征进行深入分析。但是有时因遇到的数据不同,需要使用不同的整合算法去对比,只有能符合预期或者可以用理论知识解释的通的结果才是可取的。

    好啦,本期分享到这就结束啦,我们下期再会~~

    相关文章

      网友评论

          本文标题:SingCellaR代码实操(四):多样本单细胞数据整合下游分析

          本文链接:https://www.haomeiwen.com/subject/hfdzartx.html