更新函数pseudo_cell可以处理更大的数据集合(100w以上)
R-scripts-for-task/pseudo_cell func.r at main · Nh-code/R-scripts-for-task (github.com)
以下方法对小型数据集(<10w细胞)的Seurat对象对表达矩阵进行分组合并细胞,返回拟细胞seurat对象。
library(Seurat);library(dplyr);set.seed(123)
#define a pseudo_cell func
pseudocell <- function(object,group="orig.ident",group_n=10){
object@meta.data <- object@meta.data %>% mutate(cellN=colnames(object)) %>% group_by(get(group)) %>%
mutate( bins.no = rep(1:ceiling(n()/group_n), each=group_n, length.out=n()) ) %>%
mutate(pseudo_label=paste0(get(group),"_N.",bins.no)) %>% tibble::column_to_rownames('cellN') %>%
data.frame()
object_mini <- AverageExpression(object = object,group.by = "pseudo_label",slot = 'counts',return.seurat = TRUE)
return(object_mini)
}
##object:seurat对象;group:指定metadata表细胞类型列,group_n:指定每种细胞类型内合并多少个细胞为一个pseudo_cell
scRNA <- readRDS("seurat.rds")
scRNA@meta.data$celltype <- sample(LETTERS[1:5],size = ncol(scRNA),replace = T) #random add a cellType column
sc_mini <- pseudocell(scRNA,group = "celltype",group_n = 20)
测试数据
#新建一个简单表达矩阵
counts <- data.frame(cell.1=c(2,6,1,8),
cell.2=c(4,4,2,9),
cell.3=c(3,3,3,3),
row.names = paste0("gene.",seq(4)))
> counts
cell.1 cell.2 cell.3
gene.1 2 4 3
gene.2 6 4 3
gene.3 1 2 3
gene.4 8 9 3
mini.sc <- CreateSeuratObject(counts = counts) #创建一个Seurat对象
mini.sc.condense <- pseudocell(mini.sc,group_n = 2) #压缩两个细胞
> mini.sc.condense@assays$RNA@counts #压缩后的counts表达量,计算的是平均值
4 x 2 sparse Matrix of class "dgCMatrix"
SeuratProject_N.1 SeuratProject_N.2
gene.1 3.0 3
gene.2 5.0 3
gene.3 1.5 3
gene.4 8.5 3
counts <- as.matrix(mini.sc.condense@assays$RNA@counts)
网友评论