美文网首页scRNA-seq
SingCellaR代码实操(一):PBMCs的标准流程

SingCellaR代码实操(一):PBMCs的标准流程

作者: 生信宝库 | 来源:发表于2022-09-23 11:31 被阅读0次

    前言

    Immugent在前一篇推文:SingCellaR:一站式研究细胞分化轨迹的功能调节的利器中,基于一项造血干细胞的研究结果初步介绍了SingCellaR的功能框架。从本节推文开始后续会有5篇有关SingCellaR代码实操的教程,分别不同角度分析多种来源的数据,以解决和发现不同的科学问题。

    为了方便大家复现本次展示的流程,Immugent使用的是10X官网的5k_pbmc_v3数据,大家可以直接到10x genomics官网下载相应的数据,即可以进行本节代码的复现。


    分析流程

    首先是将数据导入到SingCellaR中。

    library(SingCellaR)
    
    data_matrices_dir<-"../SingCellaR_example_datasets/10x_genomics_5K_PBMCs/filtered_feature_bc_matrix/"
    PBMCs<-new("SingCellaR")
    PBMCs@dir_path_10x_matrix<-data_matrices_dir
    PBMCs@sample_uniq_id<-"PBMCs"
    
    load_matrices_from_cellranger(PBMCs,cellranger.version = 3)
    
    PBMCs
    ## An object of class SingCellaR with a matrix of : 33538 genes across 5025 samples.
    

    一行代码实现QC结果的展示,顶!

    plot_cells_annotation(PBMCs,type="histogram")
    
    image.png

    还有其它QC结果展示的图,都很美观。

    plot_cells_annotation(PBMCs,type="boxplot")
    plot_UMIs_vs_Detected_genes(PBMCs)
    
    image.png image.png

    检测双细胞和过滤双细胞。

    filter_cells_and_genes(PBMCs,
                           min_UMIs=1000,
                           max_UMIs=30000,
                           min_detected_genes=300,
                           max_detected_genes=5000,
                           max_percent_mito=15,
                           genes_with_expressing_cells = 10,isRemovedDoublets = FALSE)
    
    DoubletDetection_with_scrublet(PBMCs)
    table(PBMCs@meta.data$Scrublet_type)
    ## 
    ## Doublets  Singlet 
    ##       96     4929
    
    filter_cells_and_genes(PBMCs,
                           min_UMIs=1000,
                           max_UMIs=30000,
                           min_detected_genes=300,
                           max_detected_genes=5000,
                           max_percent_mito=15,
                           genes_with_expressing_cells = 10,isRemovedDoublets = TRUE)
    

    以下就是和Seurat类似的单细胞常规处理流程。

    normalize_UMIs(PBMCs,use.scaled.factor = T)
    remove_unwanted_confounders(PBMCs,residualModelFormulaStr="~UMI_count+percent_mito")
    get_variable_genes_by_fitting_GLM_model(PBMCs,mean_expr_cutoff = 0.05,disp_zscore_cutoff = 0.05)
    
    remove_unwanted_genes_from_variable_gene_set(PBMCs,gmt.file = "../SingCellaR_example_datasets//Human_genesets/human.ribosomal-mitocondrial.genes.gmt",
                                                 removed_gene_sets=c("Ribosomal_gene","Mitocondrial_gene"))
    plot_variable_genes(PBMCs)                                                   
    
    image.png
    runPCA(PBMCs,use.components=30,use.regressout.data = T)
    plot_PCA_Elbowplot(PBMCs)
    runTSNE(PBMCs,dim_reduction_method = "pca",n.dims.use = 10)
    plot_tsne_label_by_qc(PBMCs,point.size = 0.1)
    
    runUMAP(PBMCs,dim_reduction_method = "pca",n.dims.use = 10,n.neighbors = 20,
            uwot.metric = "euclidean")
    plot_umap_label_by_a_feature_of_interest(PBMCs,feature = "UMI_count",point.size = 0.1,mark.feature = F)
    
    image.png image.png image.png
    identifyClusters(PBMCs,n.dims.use = 10,n.neighbors = 100,knn.metric = "euclidean")
    plot_tsne_label_by_clusters(PBMCs,show_method = "louvain",point.size = 2)
    plot_umap_label_by_clusters(PBMCs,show_method = "louvain",point.size = 0.80)
    
    image.png image.png

    下面介绍SingCellaR的一项特有功能分析--Force-directed graph analysis,它结合了力定向图来可视化细胞轨迹。

    runFA2_ForceDirectedGraph(PBMCs,n.dims.use = 10,
                              n.neighbors = 5,n.seed = 1,fa2_n_iter = 1000)
    plot_forceDirectedGraph_label_by_clusters(PBMCs,show_method = "louvain",vertex.size = 1,
                                              background.color = "black")                            
    
    image.png
    findMarkerGenes(PBMCs,cluster.type = "louvain")
    plot_heatmap_for_marker_genes(PBMCs,cluster.type = "louvain",n.TopGenes = 10,rowFont.size = 5)
    
    cl3_marker_genes<-getMarkerGenesInfo(PBMCs,cluster.type = "louvain",cluster_id = "cl3")
    cl3_marker_genes<-cl3_marker_genes[order(cl3_marker_genes$log2FC,decreasing = T),]
    head(cl3_marker_genes)
    
    plot_tsne_label_by_genes(PBMCs,gene_list = c("CCR7","CD163","GNLY","CD72"))
    plot_forceDirectedGraph_label_by_genes(PBMCs,gene_list = c("CCR7","CD163","GNLY","CD72"))
    plot_violin_for_marker_genes(PBMCs,gene_list = c("CCR7","CD163","GNLY","CD72"))
    plot_bubble_for_genes_per_cluster(PBMCs,cluster.type = "louvain",IsApplyClustering = T,
                                IsClusterByRow = T,IsClusterByColumn = T,
                                gene_list=c("CCR7","CD163",
                                            "GNLY","CD72",
                                            "CD8A","CD8B","IL3RA"
                                            ),show.percent = T,buble.scale = 12,point.color1 = "gray",
                                            point.color2 = "firebrick1")
    
    image.png image.png image.png image.png image.png
    runKNN_Graph(PBMCs,n.dims.use = 10,n.neighbors = 10)
    library(threejs)
    plot_3D_knn_graph_label_by_clusters(PBMCs,show_method = "louvain",vertext.size = 0.25)
    plot_3D_knn_graph_label_by_gene(PBMCs,show.gene = "GNLY",vertext.size = 0.30)
    clustering_KMeansOnKNN_Graph(PBMCs, k=10, iter.max = 100)
    
    plot_tsne_label_by_clusters(PBMCs,show_method = "kmeans",point.size = 1)
    plot_umap_label_by_clusters(PBMCs,show_method = "kmeans",point.size = 1)
    
    image.png image.png

    最后,SingCellaR还内置了GSEA分析流程,并且将结果可视化。

    pre_rankedGenes_for_GSEA<-identifyGSEAPrerankedGenes_for_all_clusters(PBMCs,
                                                                        cluster.type = "louvain")
    save(pre_rankedGenes_for_GSEA,file="../SingCellaR_example_datasets/10x_genomics_5K_PBMCs/PBMCs_preRankedGenes_for_GSEA.rdata")                 
    #Run fGSEA
    load("../SingCellaR_example_datasets/10x_genomics_5K_PBMCs/PBMCs_preRankedGenes_for_GSEA.rdata")
    fgsea_Results<-Run_fGSEA_for_multiple_comparisons(GSEAPrerankedGenes_list = pre_rankedGenes_for_GSEA, eps = 0,
                        gmt.file = "../SingCellaR_example_datasets/Human_genesets/human.hematopoiesis.signature.genes.gmt")
    plot_heatmap_for_fGSEA_all_clusters(fgsea_Results,isApplyCutoff = TRUE,
                                        use_pvalues_for_clustering=T,
                                        show_NES_score = T,fontsize_row = 10,
                                        adjusted_pval = 0.1,
                                        show_only_NES_positive_score = T,format.digits = 3,
                                        clustering_method = "ward.D",
                                        clustering_distance_rows = "euclidean",
                                        clustering_distance_cols = "euclidean",show_text_for_ns = F)
    save(PBMCs,file="../SingCellaR_example_datasets/10x_genomics_5K_PBMCs/PBMCs_v0.1.SingCellaR.rdata")                                                      
    
    image.png

    说在最后

    从上面的分析流程大家也能感受到SingCellaR的功能还是十分齐全的,相对于Seurat包的分析结果,它在处理单细胞数据标准流程上有以下优点:

    1. QC过程简单粗暴,而且结果很容易理解;
    2. 可以通过一行代码鉴定双细胞并去除;
    3. 能进行Force-directed graph analysis,这种分析在有些数据中表现很好;
    4. 可以进行GSEA分析,内置的免疫细胞gmt文件很方便大家进行细胞注释。

    好啦,本期推文到这就结束了,下期Immugent将会展示使用SingCellaR分析一个公共的疾病数据,并结合具体的科学问题进行解读。

    相关文章

      网友评论

        本文标题:SingCellaR代码实操(一):PBMCs的标准流程

        本文链接:https://www.haomeiwen.com/subject/akhnortx.html