scRNA基础分析-5:伪时间分析

作者: 小贝学生信 | 来源:发表于2020-08-26 16:44 被阅读0次

    scRNA基础分析-1:安装包、导入数据、过滤质控 - 简书
    scRNA基础分析-2:降维与聚类 - 简书
    scRNA基础分析-3:鉴定细胞类型 - 简书
    scRNA基础分析-4:细胞亚类再聚类、注释 - 简书
    scRNA基础分析-5:伪时间分析 - 简书
    scRNA基础分析-6:富集分析 - 简书

    主要通过MonocleR包,使用反向图形嵌入(Reversed Graph Embedding)的机器学习算法,来学习描述细胞如何从一种状态过渡到另外一种状态的轨迹。其中分析的前提需要一张展现细胞转录特征相似性关系的图,Monocle2使用DDTree降维图,Monocle3使用UMAP降维图。

    rm(list = ls())
    library(monocle)
    library(Seurat)
    library(tidyverse)
    scRNAsub <- readRDS("scRNAsub.rds")  #scRNAsub是上一节保存的T细胞子集seurat对象
    

    1、生成mycds对象

    data <- as(as.matrix(scRNAsub@assays$RNA@counts), 'sparseMatrix')
    # count矩阵
    pd <- new('AnnotatedDataFrame', data = scRNAsub@meta.data)
    # meta表转成特定格式
    fData <- data.frame(gene_short_name = row.names(data), row.names = row.names(data))
    fd <- new('AnnotatedDataFrame', data = fData)
    # 基因名表转成特定格式
    mycds <- newCellDataSet(data,
                            phenoData = pd,
                            featureData = fd,
                            expressionFamily = negbinomial.size())
    #expressionFamily参数用于指定表达矩阵的数据类型,有几个选项可以选择:
    #稀疏矩阵用negbinomial.size(),FPKM值用tobit(),logFPKM值用gaussianff()
    
    mycds <- estimateSizeFactors(mycds)
    mycds <- estimateDispersions(mycds, cores=4, relative_expr = TRUE)
    

    2、选择代表性基因

    思路1:cluster间差异基因

    diff.wilcox = FindAllMarkers(scRNAsub)
    all.markers = diff.wilcox %>% select(gene, everything()) %>% subset(p_val<0.05)
    diff.genes <- subset(all.markers,p_val_adj<0.01)$gene
    
    mycds <- setOrderingFilter(mycds, diff.genes)
    p1 <- plot_ordering_genes(mycds)
    

    思路2:离散程度大的基因

    #Seurat法
    #FindVariableFeatures(scRNAsub, selection.method = "vst", nfeatures = 2000) 
    var.genes <- VariableFeatures(scRNAsub)
    mycds <- setOrderingFilter(mycds, var.genes)
    p2 <- plot_ordering_genes(mycds)
    
    #monocle法
    disp_table <- dispersionTable(mycds)
    disp.genes <- subset(disp_table, mean_expression >= 0.1 & dispersion_empirical >= 1 * dispersion_fit)$gene_id
    mycds <- setOrderingFilter(mycds, disp.genes)
    p3 <- plot_ordering_genes(mycds)
    
    p1|p2|p3
    ?plot_ordering_genes
    #后续分析以disp.genes进行演示
    
    • Each gray point in the plot is a gene.

    • The black dots are those that were included in the last call to setOrderingFilter.

    • The red curve shows the mean-variance model learning by estimateDispersions().


      2-1

    3、降维及排序

    #降维
    mycds <- reduceDimension(mycds, max_components = 2, method = 'DDRTree')
    #排序
    mycds <- orderCells(mycds)
    

    4、可视化

    4.1 轨迹图

    #State轨迹分布图
    p1 = plot_cell_trajectory(mycds, color_by = "State")
    #分面展示
    p2 = plot_cell_trajectory(mycds, color_by = "State") + facet_wrap(~State, nrow = 1)
    p1|p2
    
    4-1
    ##Cluster轨迹分布图
    p1 <- plot_cell_trajectory(mycds, color_by = "seurat_clusters")
    p2 <- plot_cell_trajectory(mycds, color_by = "seurat_clusters") + facet_wrap(~seurat_clusters, nrow = 1)
    p1/p2
    
    4-2
    #Pseudotime轨迹图
    plot3 <- plot_cell_trajectory(mycds, color_by = "Pseudotime")
    
    4-3

    4.2 特定基因轨迹图

    s.genes <- c("ITGB1","CCR7","KLRB1","GNLY")
    # 点图(抖动)
    p1 <- plot_genes_jitter(mycds[s.genes,], grouping = "State", color_by = "State")
    # 小提琴图
    p2 <- plot_genes_violin(mycds[s.genes,], grouping = "State", color_by = "State")
    # 伪时间图
    p3 <- plot_genes_in_pseudotime(mycds[s.genes,], color_by = "State")
    plotc <- p1|p2|p3
    
    4-4

    4.3 拟时相关基因聚类热图

    • monocle包的differentialGeneTest()函数可以按条件进行差异分析,将相关参数设为fullModelFormulaStr = "~sm.ns(Pseudotime)"时,可以找到与拟时先关的差异基因。
    • 为了提高效率,可直接使用cluster差异基因或者高变基因,进行筛选,再绘制热图。以高变基因为例:
    disp_table <- dispersionTable(mycds)
    disp.genes <- subset(disp_table, mean_expression >= 0.5&dispersion_empirical >= 1*dispersion_fit)
    disp.genes <- as.character(disp.genes$gene_id)
    diff_test <- differentialGeneTest(mycds[disp.genes,], cores = 4, 
                                  fullModelFormulaStr = "~sm.ns(Pseudotime)")
    #找到与拟时先关的差异基因
    sig_gene_names <- row.names(subset(diff_test, qval < 1e-04))
    #以qval为指标,挑选显著性
    plot_pseudotime_heatmap(mycds[sig_gene_names,], num_clusters=5,
                                 show_rownames=T, return_heatmap=T)
    
    4-5

    补充、BEAM分析

    • 单细胞轨迹通常包含分支(如本例),主要是因为细胞的表达模式受各种因素影响而发生变化;
    • BEAM(Branched expression analysis modeling)统计方法可用于寻找以依赖分支方式来实现调控的基因。
    disp_table <- dispersionTable(mycds)
    disp.genes <- subset(disp_table, mean_expression >= 0.5&dispersion_empirical >= 1*dispersion_fit)
    disp.genes <- as.character(disp.genes$gene_id)
    mycds_sub <- mycds[disp.genes,]
    plot_cell_trajectory(mycds_sub, color_by = "State")
    beam_res <- BEAM(mycds_sub, branch_point = 1, cores = 8)
    beam_res <- beam_res[order(beam_res$qval),]
    beam_res <- beam_res[,c("gene_short_name", "pval", "qval")]
    mycds_sub_beam <- mycds_sub[row.names(subset(beam_res, qval < 1e-4)),]
    plot_genes_branched_heatmap(mycds_sub_beam,  branch_point = 1, num_clusters = 3, show_rownames = T)
    
    5
    saveRDS(mycds, file="mycds.rds")
    

    相关文章

      网友评论

        本文标题:scRNA基础分析-5:伪时间分析

        本文链接:https://www.haomeiwen.com/subject/dszxsktx.html