2021-05-15 scRNA 基础分析：细胞再聚类

作者: 学习生信的小兔子 | 来源:发表于2021-05-15 23:59 被阅读0次

2021-05-15 scRNA 基础分析：细胞再聚类
scRNA基础分析-4：细胞亚类再聚类、注释
scRNA基础分析-5：伪时间分析
scRNA基础分析-6：富集分析
scRNA基础分析-3：鉴定细胞类型
【文献速递】-基于神经网络的迭代转移学习在单细胞RNA-seq分
2019-04-03 文献阅读 Challenges in un
完整的单细胞分析流程——特征（基因）选择
单细胞大样本量数据处理新方法||Simpler single c
单细胞分析：聚类流程（六）

单细胞数据分析中，一般需要对可以细分的细胞再聚类，比如本次分析中的T细胞群体可以细分为Naive T cells、CD8+ T cells、Treg cells、Tmemory cells等。细胞子集的提取使用subset函数，主要依据meta.data的分类项目选择，也可以自定义细胞barcode提取。

提取细胞子集

library(Seurat)
BiocManager::install("monocle")
library(monocle)
library(tidyverse)
library(patchwork)
rm(list=ls())
dir.create("subcluster")
scRNA1 <- readRDS("scRNA1.rds")
##提取细胞子集
Cells.sub <- subset(scRNA1@meta.data, celltype=="T_cells")
scRNAsub <- subset(scRNA1, cells=row.names(Cells.sub))

重新降维聚类

因为再聚类的细胞之间差异比较小，所以聚类函数FindClusters()控制分辨率的参数建议调高到resolution = 0.9。

##PCA降维
scRNAsub <- FindVariableFeatures(scRNAsub, selection.method = "vst", nfeatures = 2000)
scale.genes <-  rownames(scRNAsub)
scRNAsub <- ScaleData(scRNAsub, features = scale.genes)
scRNAsub <- RunPCA(scRNAsub, features = VariableFeatures(scRNAsub))
ElbowPlot(scRNAsub, ndims=20, reduction="pca")
# 如图结果，选取10个主成分为宜
pc.num=1:10

细胞聚类

scRNAsub <- FindNeighbors(scRNAsub, dims = pc.num) 
scRNAsub <- FindClusters(scRNAsub, resolution = 0.9)
table(scRNAsub@meta.data$seurat_clusters)#分成了5类
#0   1   2   3 
#181 163  64  58 
metadata <- scRNAsub@meta.data
cell_cluster <- data.frame(cell_ID=rownames(metadata), cluster_ID=metadata$seurat_clusters)
write.csv(cell_cluster,'subcluster/cell_cluster.csv',row.names = F)

非线性降维

#tSNE
scRNAsub = RunTSNE(scRNAsub, dims = pc.num)
embed_tsne <- Embeddings(scRNAsub, 'tsne')
plot1 = DimPlot(scRNAsub, reduction = "tsne") 
plot1

#UMAP
scRNAsub <- RunUMAP(scRNAsub, dims = pc.num)
embed_umap <- Embeddings(scRNAsub, 'umap')
plot2 = DimPlot(scRNAsub, reduction = "umap") 
plot2

#合并tSNE与UMAP
plotc <- plot1+plot2+ plot_layout(guides = 'collect')
plotc

Cluster差异分析

diff.wilcox = FindAllMarkers(scRNAsub)
all.markers = diff.wilcox %>% select(gene, everything()) %>% subset(p_val<0.05)
colnames(all.markers)[3] <- 'avg_logFC'
top10 = all.markers %>% group_by(cluster) %>% top_n(n = 10, wt = avg_logFC)
##top10基因绘制热图
top10_genes = CaseMatch(search = as.vector(top10$gene), match = rownames(scRNA1)) 
plot1 = DoHeatmap(scRNA1, features = top10_genes, group.by = "seurat_clusters", group.bar = T, size = 4)
plot1

SingleR细胞鉴定

SingleR细胞鉴定
Subcluster的细胞同样可以使用SingleR鉴定细胞类型。使用的时候注意调整参考数据库和分类标签，以便鉴定结果更有针对性。上节使用SingleR时使用的参考数据库是人类主要细胞图谱（HumanPrimaryCellAtlasData），采用分类标签是主分类标签（label.main）；这次建议使用人类免疫细胞数据（MonacoImmuneData），分类标签采用精细分类标签（label.fine）
library(SingleR)
refdata <- MonacoImmuneData() #慢。。
#加载下载好的数据库
load("D:/genetic_r/singer-cell-learning/filtered_feature_bc_matrix/ref_Monaco_114s.RData")
refdata <- ref_Monaco
testdata <- GetAssayData(scRNAsub, slot="data")
clusters <- scRNAsub@meta.data$seurat_clusters
cellpred <- SingleR(test = testdata, ref = refdata, labels = refdata$label.fine, 
                    # # 注意此时labels参数为 refdata$label.fine，与上一节注释时的设置不同
                    method = "cluster", clusters = clusters, 
                    assay.type.test = "logcounts", assay.type.ref = "logcounts")
celltype = data.frame(ClusterID=rownames(cellpred), celltype=cellpred$labels, stringsAsFactors = F)

scRNAsub@meta.data$celltype = "NA"
for(i in 1:nrow(celltype)){
  scRNAsub@meta.data[which(scRNAsub@meta.data$seurat_clusters == celltype$ClusterID[i]),'celltype'] <- celltype$celltype[i]}
p1 = DimPlot(scRNAsub, group.by="celltype", label=T, label.size=5, reduction='tsne')
p2 = DimPlot(scRNAsub, group.by="celltype", label=T, label.size=5, reduction='umap')
p3 = plotc <- p1+p2+ plot_layout(guides = 'collect')
p3
# 最后保存结果文件，下一步拟时分析会用到
saveRDS(scRNAsub, file="scRNAsub.rds")

参考小贝学生信公众号：生信会客厅
唉只是把代码跑出来了具体的原理还要再学习

##第三种方法用已有生物学知识或者文献来看，比如区分免疫或者非免疫细胞就用CD45这个基因。高表达CD45划分为免疫细胞，PTPRC是CD45官方名字
通过已有的marker基因画气泡图
定义免疫细胞的亚群
区分免疫细胞和非免疫细胞的marker基因
select_genes <- c('COL2A1',"PTPRC","EPCAM")
p <-DotPlot(scRNA1, features = select_genes ) + coord_flip()
p
###我们可以看到0,1,2,4,6,7,9这7个cluster是免疫细胞、
##提取气泡图的信息
dat=p$data 
###提取cd45的信息
cd45=dat[dat$features.plot=='PTPRC',]
###查看五分位数
fivenum(cd45$avg.exp.scaled)

###选择中位数的cluster，这个要结合气泡图来综合选择，但是我发现所以的分类都是以-0.5为准的。所以我把-0.5当做固定值了
###这里的-0.5我是问健明老师的  他告诉的哈哈哈
imm=cd45[cd45$avg.exp.scaled > -0.5,]$id
imm
###我们看到在-0.5的时候，与我们肉眼判断的免疫cluster是一样的
##接下来这一步是将免疫和非免疫细胞的信息添加到metadata中
scRNA1@meta.data$immune_annotation <-ifelse(scRNA1@meta.data$RNA_snn_res.0.5  %in% imm ,'immune','non-immune')
# MAke a table 查看一下数据
table(scRNA1@meta.data$immune_annotation)
# Make and save relevant plots 可视化一下免疫和非免疫的细胞
p <- TSNEPlot(object =scRNA1, group.by = 'immune_annotation')
p 
###好了这节课学完了  我们保存一下
save(scRNA1,file='scRNA1.Rdata')

####单细胞转录组基础分析五：细胞再聚类 ####

####提取细胞子集--提取scRNA1中的免疫细胞
##我公众号也讲过 提取细胞的方法有两种
####方法一：用which函数
phe=scRNA1@meta.data
###查看一下immune_annotation中的信息，主要是为了方便复制粘贴
table(phe$immune_annotation)
immune non-immune 
794        219 
###用which匹配immune，然后得到细胞的ID 就是metadata的行名
cells.use <- row.names(scRNA1@meta.data)[which(phe$immune_annotation=='immune')]
###将scRNA1中的免疫细胞提取出来
scRNA.imm <-subset(scRNA1, cells=cells.use)
##查看一下有多少是免疫细胞
scRNA.imm
##方法二：用两个subset函数
cells.use1 <- subset(scRNA1@meta.data, immune_annotation=="immune")
###将scRNA1中的免疫细胞提取出来
scRNAsub <- subset(scRNA1, cells=row.names(cells.use1))
##查看一下有多少是免疫细胞
scRNAsub
###我们发现scRNAsub和scRNA.imm是一样的细胞数目（794） 说明提取没有错

##重新降维聚类重新降维聚类：因为再聚类的细胞之间差异比较小，所以聚类函数FindClusters()控制分辨率的参数建议调高到resolution = 0.9
##PCA降维，与之前的一样 这里不重复赘述了
scRNAsub <- FindVariableFeatures(scRNAsub, selection.method = "vst", nfeatures = 2000)
scale.genes <-  rownames(scRNAsub)
scRNAsub <- ScaleData(scRNAsub, features = scale.genes)
scRNAsub <- RunPCA(scRNAsub, features = VariableFeatures(scRNAsub))
ElbowPlot(scRNAsub, ndims=20, reduction="pca")
pc.num=1:10
##细胞聚类，与之前的一样 这里不重复赘述了
scRNAsub <- FindNeighbors(scRNAsub, dims = 1:5) 
scRNAsub <- FindClusters(scRNAsub, resolution = 0.3)
table(scRNAsub@meta.data$seurat_clusters)
0   1   2   3   4 
300 241  94  81  78 
metadata <- scRNAsub@meta.data
cell_cluster <- data.frame(cell_ID=rownames(metadata), cluster_ID=metadata$seurat_clusters)
write.csv(cell_cluster,'cell_cluster.csv',row.names = F)
##非线性降维 ，与之前的一样 这里不重复赘述了
#tSNE
scRNAsub = RunTSNE(scRNAsub, dims = pc.num)
embed_tsne <- Embeddings(scRNAsub, 'tsne')
write.csv(embed_tsne,'embed_tsne.csv')
plot1 = DimPlot(scRNAsub, reduction = "tsne") 
plot1
ggsave("tSNE.pdf", plot = plot1, width = 8, height = 7)
?FeaturePlot
#UMAP
scRNAsub <- RunUMAP(scRNAsub, dims = pc.num)
embed_umap <- Embeddings(scRNAsub, 'umap')
write.csv(embed_umap,'embed_umap.csv') 
plot2 = DimPlot(scRNAsub, reduction = "umap") 
plot2
ggsave("UMAP.pdf", plot = plot2, width = 8, height = 7)

#合并tSNE与UMAP
plotc <- plot1+plot2+ plot_layout(guides = 'collect')
plotc
ggsave("tSNE_UMAP.pdf", plot = plotc, width = 10, height = 5)

####接下来就是注释  注释的方法还是三种 前面的内容有讲过 所以这里就不再赘述了

##Cluster差异分析---其实就是找marker基因，找完marker基因去网站  再手动对其进行注释
diff.wilcox = FindAllMarkers(scRNAsub)
all.markers = diff.wilcox %>% select(gene, everything()) %>% subset(p_val<0.05)
top10 = all.markers %>% group_by(cluster) %>% top_n(n = 10, wt = avg_logFC)
write.csv(all.markers, "subcluster/diff_genes_wilcox.csv", row.names = F)
write.csv(top10, "subcluster/top10_diff_genes_wilcox.csv", row.names = F)

##细胞类型鉴定
library(SingleR)
##SingleR细胞鉴定，但是这次我们调用的是免疫的数据库，与上次不一样哟  上次是用的人的数据库来注释的
load("D:/genetic_r/singer-cell-learning/filtered_feature_bc_matrix/ref_Monaco_114s.RData")
####我们将加载的数据赋值到refdata
refdata <- ref_Monaco
###接下来的内容与之前的singler注释形式是一模一样的 
testdata <- GetAssayData(scRNAsub, slot="data")
clusters <- scRNAsub@meta.data$seurat_clusters
cellpred <- SingleR(test = testdata, ref = refdata, labels = refdata$label.fine, 
                    method = "cluster", clusters = clusters, 
                    assay.type.test = "logcounts", assay.type.ref = "logcounts")
celltype = data.frame(ClusterID=rownames(cellpred), celltype=cellpred$labels, stringsAsFactors = F)
write.csv(celltype,"celltype_singleR.csv",row.names = F)
###这里是将singler注释的信息加载到metadata中
scRNAsub@meta.data$celltype = "NA"
for(i in 1:nrow(celltype)){
  scRNAsub@meta.data[which(scRNAsub@meta.data$seurat_clusters == celltype$ClusterID[i]),'celltype'] <- celltype$celltype[i]}
p1 = DimPlot(scRNAsub, group.by="celltype", label=T, label.size=5, reduction='tsne')
p1
p2 = DimPlot(scRNAsub, group.by="celltype", label=T, label.size=5, reduction='umap')
p2
p3 = plotc <- p1+p2+ plot_layout(guides = 'collect')
p3
ggsave("tSNE_celltype.pdf", p1, width=7 ,height=6)
ggsave("UMAP_celltype.pdf", p2, width=7 ,height=6)
ggsave("celltype.png", p3, width=10 ,height=5)
####处理找marker基因、singler注释，我们还有一种方法就是根据已有的生物学知识或者文献，用dotplot来手动注释
####这个方法也和之前的一样 这里也不再赘述了，就是有点累 但是是比较精准的一种注释方法
师兄讲的方法，先记录一下，结果不太一样

2021-05-15 scRNA 基础分析：细胞再聚类
单细胞数据分析中，一般需要对可以细分的细胞再聚类，比如本次分析中的T细胞群体可以细分为Naive T cells、...
scRNA基础分析-4：细胞亚类再聚类、注释
scRNA基础分析-1:安装包、导入数据、过滤质控 - 简书 scRNA基础分析-2：降维与聚类 - 简书 s...
scRNA基础分析-5：伪时间分析
scRNA基础分析-1:安装包、导入数据、过滤质控 - 简书 scRNA基础分析-2：降维与聚类 - 简书 s...
scRNA基础分析-6：富集分析
scRNA基础分析-1:安装包、导入数据、过滤质控 - 简书 scRNA基础分析-2：降维与聚类 - 简书 s...
scRNA基础分析-3：鉴定细胞类型
scRNA基础分析-1:安装包、导入数据、过滤质控 - 简书 scRNA基础分析-2：降维与聚类 - 简书 s...
【文献速递】-基于神经网络的迭代转移学习在单细胞RNA-seq分
在单细胞RNA-seq (scRNA-seq)分析中，聚类和细胞类型分类是重要的步骤。随着越来越多的scRNA-s...
2019-04-03 文献阅读 Challenges in un
single cell clustering Key Point scRNA数据分析聚类策略选择聚类的Techn...
完整的单细胞分析流程——特征（基因）选择
背景我们经常在探索性分析中使用scRNA-seq数据来表征细胞间的异质性。诸如聚类和降维之类的过程会根据细胞的基...
单细胞大样本量数据处理新方法||Simpler single c
聚类是分析单细胞RNA序列（scrna-seq）数据的一种常用分析方法，但数据量的快速增长会使这一过程在计算上具有...
单细胞分析：聚类流程（六）
1. 学习目标描述质量控制步骤后scRNA-seq分析的工作流程（聚类）。现在有了高质量的细胞，可以继续工作流...