Seurat文章
- Spatial reconstruction of single-cell gene expression data.
Nature Biotechnology. 2015 - Integrating single-cell transcriptomic data across different conditions, technologies, and species.
Nature Biotechnology. 2018 - Comprehensive Integration of Single-Cell Data.
Cell. 2019 - Integrated analysis of multimodal single-cell data.
Cell. 2019
Seurat地址
主页:https://satijalab.org/seurat/
bioconda: https://anaconda.org/bioconda/r-seurat
安装Seurat
conda activate r411
conda instal r-seurat
# r-seurat-4.0.5
# r-seuratobject-4.0.3
依赖和数据
library("Seurat")
library("stringr")
men = read.table("=cells_rawcount.txt", sep="\t", header=T, row.names=1)
过滤cell & gene
## Filter #############################################
# r-seurat-4.0.5
# r-seuratobject-4.0.3
# 1 删除稀有基因,> 0.1%
# 2 删除线粒体基因占比太多的细胞,< 10% (0.1)
# 3 删除少基因细胞,> 800
# 4 然后,删除基因数离群的细胞,小于Q1-(1.5)IQR或大于Q3+(1.5)IQR
# 1
keep_gene = rowSums(men != 0) > ncol(men)/1000 # 0.618
# 2
mt_name = grepl("^MT-", rownames(men))
mt_men = men[mt_name,]
keep_cell <- colSums(mt_men != 0)/colSums(men != 0) < 0.1
men_filter = men[keep_gene, keep_cell]
# 3
keep_cell = colSums(men_filter != 0) > 800
men_filter = men[, keep_cell]
# 4
num_cell = colSums(men_filter != 0)
IQR = quantile(num_cell, 0.75) - quantile(num_cell, 0.25)
Min = quantile(num_cell, 0.25) - 1.5*IQR
Max = quantile(num_cell, 0.75) + 1.5*IQR
keep_cell = num_cell < Max & num_cell > Min
men_filter = men_filter[, keep_cell]
创建seurat对象、标准化
seu = CreateSeuratObject(counts = men_filter)
seu = NormalizeData(seu,
normalization.method = "LogNormalize",
scale.factor = 10000)
seu = FindVariableFeatures(seu, selection.method = "vst")
seu_norm = ScaleData(seu, features = rownames(seu))
细胞周期分析
# 查看细胞周期marker genes
cc.genes
s.genes = cc.genes$s.genes
g2m.genes = cc.genes$g2m.genes
# 样本gene
gene_name = c()
for(i in rownames(men_filter))
{
gene_name = c(gene_name, unlist(strsplit(i, "-"))[1])
}
df_name = data.frame(gene_name = gene_name,
old_name = rownames(men_filter))
# 交集
s.seu = c()
g2m.seu = c()
for(i in 1:nrow(df_name))
{
if(gene_name[i] %in% s.genes)
{s.seu = c(s.seu, df_name$old_name[i])}
else if(gene_name[i] %in% g2m.genes)
{g2m.seu = c(g2m.seu, df_name$old_name[i])}
}
# 细胞周期评分
seu_cycle <- CellCycleScoring(seu_norm,
s.features = s.seu,
g2m.features = g2m.seu,
set.ident = TRUE)
RidgePlot(seu_cycle,
features = c("MCM4-ENSG00000104738.12",
"NASP-ENSG00000132780.12",
"TMPO-ENSG00000120802.9",
"DLGAP5-ENSG00000126787.8"), ncol = 2)
网友评论