harmony真的好用,天才的发明者。复现一篇30多个样本的主图,这个数据我也想了好久,由于作者提供了tpm数据,这个数据相当于标准化之后的,就相当于SCT之后的,所以不需要对样本进行标准化了,主图是这样的。

exp1 <- data.table::fread('GSE132465_GEO_processed_CRC_10X_natural_log_TPM_matrix.txt',data.table = F)
rownames(exp1) <- exp1$Index
exp1 <-exp1[,-1]
experiment.aggregate <- CreateSeuratObject(
exp1,
project = "multi",
min.cells = 10,
min.features = 200,
names.field = 1,
names.delim = "_")
experiment.aggregate[["percent.mt"]] <- PercentageFeatureSet(experiment.aggregate,
pattern = "^MT-")
view(experiment.aggregate@meta.data)
experiment.aggregate <- FindVariableFeatures(experiment.aggregate,
selection.method = "vst",
nfeatures = 1000)
all.genes <- rownames(experiment.aggregate)
experiment.aggregate <- ScaleData(experiment.aggregate,
features = all.genes, verbose = FALSE
)
experiment.aggregate <- RunPCA(object = experiment.aggregate,
features = VariableFeatures(experiment.aggregate),
verbose = F,npcs = 50)
experiment.aggregate <- RunHarmony(experiment.aggregate, group.by.vars = "orig.ident")
A1 <- ElbowPlot(experiment.aggregate, ndims = 50)
pdf("A1.pdf",width = 6,height = 5)
print(A1)
dev.off()
dim.use <- 1:20
experiment.aggregate <- FindNeighbors(experiment.aggregate, dims = dim.use, reduction = "harmony")
#基于细胞之间的相似性计算具体的cluster
experiment.aggregate <- FindClusters(experiment.aggregate, resolution = 0.6, reduction = "harmony")
#TSNE算法聚类(降维)
experiment.aggregate <- RunTSNE(experiment.aggregate, dims = dim.use, do.fast = TRUE, reduction = "harmony")
experiment.aggregate@meta.data$celltype3 = "NA"
# 更改 celltype 信息,设置细胞群新名称
experiment.aggregate@meta.data[which(experiment.aggregate@meta.data$seurat_clusters %in% c(1,2,6,8)), "celltype3"] = "T cells"
experiment.aggregate@meta.data[which(experiment.aggregate@meta.data$seurat_clusters %in% c(7,11,15,19)), "celltype3"] = "Stromal cells"
experiment.aggregate@meta.data[which(experiment.aggregate@meta.data$seurat_clusters %in% c(3,4,13,18)), "celltype3"] = "B cells"
experiment.aggregate@meta.data[which(experiment.aggregate@meta.data$seurat_clusters %in% c(0,10,12,14,16,17)), "celltype3"] = "Epithelial cells"
experiment.aggregate@meta.data[which(experiment.aggregate@meta.data$seurat_clusters %in% c(5,9)), "celltype3"] = "Myloid cells"
experiment.aggregate@meta.data[which(experiment.aggregate@meta.data$seurat_clusters %in% c(20)), "celltype3"] = "Mast cells"
A2 <- DimPlot(experiment.aggregate, reduction = "tsne", label = T,group.by = 'celltype3') +NoLegend()
A2
pdf("A2.pdf",width = 8,height = 6)
print(A2)
dev.off()
singleB<- SubsetData(experiment.aggregate,
# 提取数据根据的组名
subset.name = "celltype3",
# 提取的组别
accept.value = c('B cells'))
singleT2<- SubsetData(experiment.aggregate,
# 提取数据根据的组名
subset.name = "celltype3",
# 提取的组别
accept.value = c('T cells'))
save(singleB, file = "singleB.Rdata")
save(singleT2, file = "singleT.Rdata")
最后的图是这样的,感觉一点不差于原文的CCA去除批次效应的结果

网友评论