scRNA基础分析-1:安装包、导入数据、过滤质控 - 简书
scRNA基础分析-2:降维与聚类 - 简书
scRNA基础分析-3:鉴定细胞类型 - 简书
scRNA基础分析-4:细胞亚类再聚类、注释 - 简书
scRNA基础分析-5:伪时间分析 - 简书
scRNA基础分析-6:富集分析 - 简书
主要通过MonocleR包,使用反向图形嵌入(Reversed Graph Embedding)的机器学习算法,来学习描述细胞如何从一种状态过渡到另外一种状态的轨迹。其中分析的前提需要一张展现细胞转录特征相似性关系的图,Monocle2使用DDTree降维图,Monocle3使用UMAP降维图。
rm(list = ls())
library(monocle)
library(Seurat)
library(tidyverse)
scRNAsub <- readRDS("scRNAsub.rds") #scRNAsub是上一节保存的T细胞子集seurat对象
1、生成mycds对象
data <- as(as.matrix(scRNAsub@assays$RNA@counts), 'sparseMatrix')
# count矩阵
pd <- new('AnnotatedDataFrame', data = scRNAsub@meta.data)
# meta表转成特定格式
fData <- data.frame(gene_short_name = row.names(data), row.names = row.names(data))
fd <- new('AnnotatedDataFrame', data = fData)
# 基因名表转成特定格式
mycds <- newCellDataSet(data,
phenoData = pd,
featureData = fd,
expressionFamily = negbinomial.size())
#expressionFamily参数用于指定表达矩阵的数据类型,有几个选项可以选择:
#稀疏矩阵用negbinomial.size(),FPKM值用tobit(),logFPKM值用gaussianff()
mycds <- estimateSizeFactors(mycds)
mycds <- estimateDispersions(mycds, cores=4, relative_expr = TRUE)
2、选择代表性基因
思路1:cluster间差异基因
diff.wilcox = FindAllMarkers(scRNAsub)
all.markers = diff.wilcox %>% select(gene, everything()) %>% subset(p_val<0.05)
diff.genes <- subset(all.markers,p_val_adj<0.01)$gene
mycds <- setOrderingFilter(mycds, diff.genes)
p1 <- plot_ordering_genes(mycds)
思路2:离散程度大的基因
#Seurat法
#FindVariableFeatures(scRNAsub, selection.method = "vst", nfeatures = 2000)
var.genes <- VariableFeatures(scRNAsub)
mycds <- setOrderingFilter(mycds, var.genes)
p2 <- plot_ordering_genes(mycds)
#monocle法
disp_table <- dispersionTable(mycds)
disp.genes <- subset(disp_table, mean_expression >= 0.1 & dispersion_empirical >= 1 * dispersion_fit)$gene_id
mycds <- setOrderingFilter(mycds, disp.genes)
p3 <- plot_ordering_genes(mycds)
p1|p2|p3
?plot_ordering_genes
#后续分析以disp.genes进行演示
-
Each gray point in the plot is a gene.
-
The black dots are those that were included in the last call to setOrderingFilter.
-
The red curve shows the mean-variance model learning by estimateDispersions().
2-1
3、降维及排序
#降维
mycds <- reduceDimension(mycds, max_components = 2, method = 'DDRTree')
#排序
mycds <- orderCells(mycds)
4、可视化
4.1 轨迹图
#State轨迹分布图
p1 = plot_cell_trajectory(mycds, color_by = "State")
#分面展示
p2 = plot_cell_trajectory(mycds, color_by = "State") + facet_wrap(~State, nrow = 1)
p1|p2
4-1
##Cluster轨迹分布图
p1 <- plot_cell_trajectory(mycds, color_by = "seurat_clusters")
p2 <- plot_cell_trajectory(mycds, color_by = "seurat_clusters") + facet_wrap(~seurat_clusters, nrow = 1)
p1/p2
4-2
#Pseudotime轨迹图
plot3 <- plot_cell_trajectory(mycds, color_by = "Pseudotime")
4-3
4.2 特定基因轨迹图
s.genes <- c("ITGB1","CCR7","KLRB1","GNLY")
# 点图(抖动)
p1 <- plot_genes_jitter(mycds[s.genes,], grouping = "State", color_by = "State")
# 小提琴图
p2 <- plot_genes_violin(mycds[s.genes,], grouping = "State", color_by = "State")
# 伪时间图
p3 <- plot_genes_in_pseudotime(mycds[s.genes,], color_by = "State")
plotc <- p1|p2|p3
4-4
4.3 拟时相关基因聚类热图
- monocle包的
differentialGeneTest()函数
可以按条件进行差异分析,将相关参数设为fullModelFormulaStr = "~sm.ns(Pseudotime)"
时,可以找到与拟时先关的差异基因。 - 为了提高效率,可直接使用cluster差异基因或者高变基因,进行筛选,再绘制热图。以高变基因为例:
disp_table <- dispersionTable(mycds)
disp.genes <- subset(disp_table, mean_expression >= 0.5&dispersion_empirical >= 1*dispersion_fit)
disp.genes <- as.character(disp.genes$gene_id)
diff_test <- differentialGeneTest(mycds[disp.genes,], cores = 4,
fullModelFormulaStr = "~sm.ns(Pseudotime)")
#找到与拟时先关的差异基因
sig_gene_names <- row.names(subset(diff_test, qval < 1e-04))
#以qval为指标,挑选显著性
plot_pseudotime_heatmap(mycds[sig_gene_names,], num_clusters=5,
show_rownames=T, return_heatmap=T)
4-5
补充、BEAM分析
- 单细胞轨迹通常包含分支(如本例),主要是因为细胞的表达模式受各种因素影响而发生变化;
- BEAM(Branched expression analysis modeling)统计方法可用于寻找以依赖分支方式来实现调控的基因。
disp_table <- dispersionTable(mycds)
disp.genes <- subset(disp_table, mean_expression >= 0.5&dispersion_empirical >= 1*dispersion_fit)
disp.genes <- as.character(disp.genes$gene_id)
mycds_sub <- mycds[disp.genes,]
plot_cell_trajectory(mycds_sub, color_by = "State")
beam_res <- BEAM(mycds_sub, branch_point = 1, cores = 8)
beam_res <- beam_res[order(beam_res$qval),]
beam_res <- beam_res[,c("gene_short_name", "pval", "qval")]
mycds_sub_beam <- mycds_sub[row.names(subset(beam_res, qval < 1e-4)),]
plot_genes_branched_heatmap(mycds_sub_beam, branch_point = 1, num_clusters = 3, show_rownames = T)
5
saveRDS(mycds, file="mycds.rds")
网友评论