识别celltype是单细胞数据分析最关键的一步,识别方法主要分为手动和自动注释,暂时记录下手动注释的过程(极其地痛苦,如果可以还是自动注释更方便,但准确度可能不好,最佳的方法是手动+自动的方式)。更多知识分享请到 https://zouhua.top/。
加载R包
library(dplyr)
library(Seurat)
library(tibble)
library(ggplot2)
library(cowplot)
# 设置全局参数
rm(list = ls())
options(stringsAsFactors = F)
options(future.globals.maxSize = 4000 * 1024^2)
colors <- c("#A6CEE3", "#1F78B4", "#08306B", "#B2DF8A", "#006D2C", "#8E0152",
"#DE77AE", "#CAB2D6", "#6A3D9A", "#FB9A99", "#E31A1C", "#B15928",
"#619CFF","#FF67A4","#00BCD8", "#EE2B2B", "#2D6BB4")
加载数据
data.cluster <- readRDS("../../Result/RDS/data.cluster04.rds")
查看cluster
DimPlot(data.cluster,
reduction = "umap",
label = TRUE,
label.size = 6,
cols = colors)
识别所有cluster的marker gene
annotations <- read.csv("../../Result/annotations_mus.csv")
markers <- FindAllMarkers(object = data.cluster,
only.pos = TRUE,
logfc.threshold = 0.25)
#write.csv(markers, "../../Result/data.all.markergenes.csv")
#markers <- read.csv("../../Result/data.all.markergenes.csv", row.names = 1)
评估marker genes
anno_all_markers <- markers %>%
left_join(y = unique(annotations[, c("gene_name", "description")]),
by = c("gene" = "gene_name")) %>%
mutate(Specific=pct.1 - pct.2) #%>%
#arrange(dplyr::desc(Specific), dplyr::desc(avg_logFC))
# Extract top 50 markers per cluster
top50 <- anno_all_markers %>%
group_by(cluster) %>%
top_n(n = 50, wt = avg_logFC) %>%
ungroup()
- 热图展示
mapal <- colorRampPalette(c("#EEEED1", "#AEDCF9", "#10106A"))(20)
DoHeatmap(object = data.cluster, features = top50$gene, label = TRUE) +
scale_fill_gradientn(colours = mapal)
- 计算cluster的相关性
library(pheatmap)
exp_ave <- AverageExpression(data.cluster)
CorOb.cor.exp <- as.data.frame(cor(exp_ave$RNA, method = "pearson"))
pheatmap(CorOb.cor.exp, color=mapal)
网上查找组织细胞的特异基因
Marker genes | Celltype |
---|---|
AQP1, BST2 | endothelial cell |
CXCL2 | Fibroblasts,endothelial cell |
ADRA2C,NTRK2 | Fibroblasts,smooth muscle cell |
RBP7,CTHRC1 | endothelial cell |
LUM, MMP2,SERPINF1,DCN | Fibroblasts |
HYAL2,CALCRL,CD320 | endothelial cell |
ABCC9,CPE | Fibroblasts, endothelial cell |
TTN | cardiomyocyte |
LBH,MYH11,MYLK | smooth muscle cell,Fibroblasts |
NKG7,KLRD1,PRF1 | Natural killer cells |
AIF1,MS4A6A,C3AR1 | Macrophages,Monocytes |
- 查看marker genes在cluster的分布情况
FeaturePlot(data.cluster,
reduction = "umap",
features = c("Cd3g", "Cd3d", "Ptprc"),
pt.size = 0.4,
order = TRUE,
min.cutoff = 'q10',
label = TRUE)
根据marker genes结果重命名cluster
data.cluster <- RenameIdents(object = data.cluster,
"0" = "cardiomyocyte",
"1" = "fibroblasts",
"2" = "cardiomyocyte",
"3" = "macrophage",
"4" = "cardiomyocyte",
"5" = "endothelial cells",
"6" = "cardiomyocyte",
"7" = "T cells",
"8" = "Unknown",
"9" = "granulocyte",
"10" = "Unknown",
"11" = "Unknown",
"12" = "Unknown")
DimPlot(object = data.cluster,
reduction = "umap",
label = TRUE,
label.size = 5,
repel = TRUE,
cols = colors)
提取或丢弃某些cluster
# 提取
data.choose <- subset(data.cluster,
idents = c("cardiomyocyte", "macrophage"),
invert = FALSE)
# 删除
data.cluster <- subset(data.cluster,
idents = "Unknown",
invert = TRUE)
Reference
参考文章如引起任何侵权问题,可以与我联系,谢谢。
网友评论