质控,过滤
##系统报错改为英文
Sys.setenv(LANGUAGE = "en")
##禁止转化为因子
options(stringsAsFactors = FALSE)
##清空环境
rm(list=ls())
###加载所需要的包
library(Seurat)
library(tidyverse)
library(dplyr)
library(patchwork)
###读取10x的数据
scRNA.counts <- Read10X("D:/genetic_r/singer-cell-learning/filtered_feature_bc_matrix")
####创建Seurat对象
scRNA = CreateSeuratObject(scRNA.counts)
#后续的分析大都以此为基础
table(scRNA@meta.data$orig.ident) #查看样本的细胞数量
#1222个
##查看一下scRNA@meta.data的结构
head(scRNA@meta.data) #细胞的基因(nFeature_RNA)、mRNA数量(nCount_RNA)
summary(scRNA@meta.data)

#计算细胞中线粒体基因比例
scRNA[["percent.mt"]] <- PercentageFeatureSet(scRNA, pattern = "^MT-")
head(scRNA@meta.data)
#计算红细胞比例
HB.genes <- c("HBA1","HBA2","HBB","HBD","HBE1","HBG1","HBG2","HBM","HBQ1","HBZ")
HB_m <- match(HB.genes, rownames(scRNA@assays$RNA))
HB.genes <- rownames(scRNA@assays$RNA)[HB_m]
HB.genes <- HB.genes[!is.na(HB.genes)]
scRNA[["percent.HB"]]<-PercentageFeatureSet(scRNA, features=HB.genes)
head(scRNA@meta.data)
#可视化
col.num <- length(levels(scRNA@active.ident))
violin <- VlnPlot(scRNA,
features = c("nFeature_RNA", "nCount_RNA", "percent.mt","percent.HB"),
cols =rainbow(col.num),
pt.size = 0.01, #不需要显示点,可以设置pt.size = 0
ncol = 4) +
theme(axis.title.x=element_blank(), axis.text.x=element_blank(), axis.ticks.x=element_blank())
violin

#查看几个指标的相关性
###这几个指标之间的相关性。 把图画到画板上,然后手动保存
plot1=FeatureScatter(scRNA, feature1 = "nCount_RNA", feature2 = "percent.mt")
plot2=FeatureScatter(scRNA, feature1 = "nCount_RNA", feature2 = "nFeature_RNA")
plot3=FeatureScatter(scRNA, feature1 = "nCount_RNA", feature2 = "percent.HB")
pearplot <- CombinePlots(plots = list(plot1, plot2, plot3), nrow=1, legend="none")

我们可以看到,nFeature_RNA的范围在0到6000之内,percent.mt代表线粒体含量我们默认线粒体含量至少要小于20%,这是根据生物学知识得出的默认阈值。红细胞的数目要至少小于5%至于nFeature_RNA和nCount_RNA的阈值怎么确定,这个要结合 pearplot的图来判断。我们质控的目标就是删除离异值。而且注意阈值尽可能取的宽松一下,防止后面分析想要的细胞得不到。接下来从pearplot的图片来做质控---剔除离异值作者建议minGene=500,maxGene=4000,pctMT=15(线粒体基因比例低于15%)。
minGene=500
maxGene=4000
pctMT=15
scRNA1 <- subset(scRNA, subset = nFeature_RNA > minGene & nFeature_RNA < maxGene & percent.mt < pctMT)
col.num <- length(levels(scRNA@active.ident))
violin <-VlnPlot(scRNA1,
features = c("nFeature_RNA", "nCount_RNA", "percent.mt"),
cols =rainbow(col.num),
pt.size = 0.1,
ncol = 3)
violin
查看过滤细胞的数量
scRNA;scRNA1 #1222 变为 1013
#均一化
scRNA1 <- NormalizeData(scRNA1, normalization.method = "LogNormalize", scale.factor = 10000)
save(scRNA1,file='scRNA1.Rdata')

参考公众号:生信会客厅
网友评论