单细胞测序文献复现

作者: 生信小白花 | 来源:发表于2023-02-18 17:38 被阅读0次

单细胞实战(6)：复现拟南芥单细胞文章中的数据（2）
单细胞CNS文章复现Single-cell profiling
单细胞实战(5)：复现拟南芥单细胞文章中的数据（1）
文献阅读-《Tutorial: guidelines for t
NCB|心咽发育多样化的单细胞转录轨迹分析
单细胞数据挖掘实战：文献复现（四）细胞比例饼图
单细胞数据挖掘实战：文献复现（六）标记基因及可视化
单细胞数据挖掘实战：文献复现（五）细胞亚群并可视化
单细胞数据挖掘实战：文献复现（三）降维、聚类和细胞注释
单细胞数据挖掘实战：文献复现（二）批量创建Seurat对象及质控

Integration of single-cell transcriptomes and chromatin landscapes reveals regulatory programs driving pharyngeal organ development
Nat Commun
IF:17.69

###fyj文献复现
# 10个样本合并，40个PC，1.5relution聚类分析。
##
if(!requireNamespace("BiocManager",quietly=TRUE))
  install.packages("BiocManager")
BiocManager::install("scde")
library("scde")
BiocManager::install("DEsingle")
library("DEsingle")
BiocManager::install("Seurat")
library("Seurat")
library("dplyr")
library("magrittr")
BiocManager::install("SingleCellExperiment")
library("SingleCellExperiment")
BiocManager::install("patchwork")
library("patchwork")

导入所有样本数据，并进行质控

setwd("E:/scRNA/20221010/原始数据/")
#############################1
##1.导入数据
pbmc.data <- Read10X( data.dir="GSM5519137_E9_5_rep2_2018AUG07/")
E9_5_rep2 <- CreateSeuratObject(counts = pbmc.data, project = "E9_5_rep2", min.cells = 3, min.features = 200)
##2.简单过滤，预处理与质量控制
#计算线粒体RNA比例
E9_5_rep2[["percent.mt"]] <- PercentageFeatureSet(E9_5_rep2, pattern = "^MT-")
###数值分布的小提琴图,features的参数可以是矩阵中的一列，也可以是某个基因
VlnPlot(E9_5_rep2, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)
#去除表达基因数量少于1000或多于6000的细胞，去除线粒体RNA比例高于10%的细胞.这个具体标准的筛选，根据之前violin plot来确定。
E9_5_rep2 <- subset(E9_5_rep2, subset = nFeature_RNA > 1000 & nFeature_RNA < 6000 & percent.mt < 1)
dim(E9_5_rep2)
###########################2
E9_5_rep1 <- Read10X( data.dir="GSM5519136_E9_5_rep1_2018JULY18/")
E9_5_rep1 <- CreateSeuratObject(counts = E9_5_rep1, project = "E9_5_rep1", min.cells = 3, min.features = 200)

E9_5_rep1[["percent.mt"]] <- PercentageFeatureSet(E9_5_rep1, pattern = "^MT-")

VlnPlot(E9_5_rep1, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)

E9_5_rep1 <- subset(E9_5_rep1, subset = nFeature_RNA > 1000 & nFeature_RNA < 5000 & percent.mt < 1)
dim(E9_5_rep1)

############################3
E12_5_rep2 <- Read10X( data.dir="E12_5_rep2_2018JUNE28/")
E12_5_rep2 <- CreateSeuratObject(counts = E12_5_rep2, project = "E12_5_rep2", min.cells = 3, min.features = 200)

E12_5_rep2[["percent.mt"]] <- PercentageFeatureSet(E12_5_rep2, pattern = "^MT-")

VlnPlot(E12_5_rep2, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)

E12_5_rep2 <- subset(E12_5_rep2, subset = nFeature_RNA > 1000 & nFeature_RNA < 5000 & percent.mt < 1)
dim(E12_5_rep2)
####################4
E12_5_rep1 <- Read10X( data.dir="E12_5_rep1_2018JUNE28/")
E12_5_rep1 <- CreateSeuratObject(counts = E12_5_rep1, project = "E12_5_rep1", min.cells = 3, min.features = 200)

E12_5_rep1[["percent.mt"]] <- PercentageFeatureSet(E12_5_rep1, pattern = "^MT-")

VlnPlot(E12_5_rep1, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)

E12_5_rep1 <- subset(E12_5_rep1, subset = nFeature_RNA > 1000 & nFeature_RNA < 5000 & percent.mt < 1)
dim(E12_5_rep1)
##################5
E11_5_rep3 <- Read10X( data.dir="E11_5_rep3_2018SEP22/")
E11_5_rep3 <- CreateSeuratObject(counts = E11_5_rep3, project = "E11_5_rep3", min.cells = 3, min.features = 200)

E11_5_rep3[["percent.mt"]] <- PercentageFeatureSet(E11_5_rep3, pattern = "^MT-")

VlnPlot(E11_5_rep3, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)

E11_5_rep3 <- subset(E11_5_rep3, subset = nFeature_RNA > 1000 & nFeature_RNA < 5000 & percent.mt < 1)
dim(E11_5_rep3)

#############6
E11_5_rep2B <- Read10X( data.dir="E11_5_rep2B_2018JULY27/")
E11_5_rep2B <- CreateSeuratObject(counts = E11_5_rep2B, project = "E11_5_rep2B", min.cells = 3, min.features = 200)

E11_5_rep2B[["percent.mt"]] <- PercentageFeatureSet(E11_5_rep2B, pattern = "^MT-")

VlnPlot(E11_5_rep2B, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)

E11_5_rep2B <- subset(E11_5_rep2B, subset = nFeature_RNA > 1000 & nFeature_RNA < 6000 & percent.mt < 1)
dim(E11_5_rep2B)

#############7
E11_5_rep2 <- Read10X( data.dir="E11_5_rep2_2018JUNE26/")
E11_5_rep2 <- CreateSeuratObject(counts = E11_5_rep2, project = "E11_5_rep2", min.cells = 3, min.features = 200)

E11_5_rep2[["percent.mt"]] <- PercentageFeatureSet(E11_5_rep2, pattern = "^MT-")

VlnPlot(E11_5_rep2, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)

E11_5_rep2 <- subset(E11_5_rep2, subset = nFeature_RNA > 1000 & nFeature_RNA < 6000 & percent.mt < 1)
dim(E11_5_rep2)

###############8
E10_5_rep7 <- Read10X( data.dir="E10_5_rep7_2018AUG16/")
E10_5_rep7 <- CreateSeuratObject(counts = E10_5_rep7, project = "E10_5_rep7", min.cells = 3, min.features = 200)

E10_5_rep7[["percent.mt"]] <- PercentageFeatureSet(E10_5_rep7, pattern = "^MT-")

VlnPlot(E10_5_rep7, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)

E10_5_rep7 <- subset(E10_5_rep7, subset = nFeature_RNA > 1000 & nFeature_RNA < 6500 & percent.mt < 1)
dim(E10_5_rep7)

################9
E10_5_rep6 <- Read10X( data.dir="E10_5_rep6_2018SEP22/")
E10_5_rep6 <- CreateSeuratObject(counts = E10_5_rep6, project = "E10_5_rep6", min.cells = 3, min.features = 200)

E10_5_rep6[["percent.mt"]] <- PercentageFeatureSet(E10_5_rep6, pattern = "^MT-")

VlnPlot(E10_5_rep6, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)

E10_5_rep6 <- subset(E10_5_rep6, subset = nFeature_RNA > 1000 & nFeature_RNA < 6500 & percent.mt < 1)
dim(E10_5_rep6)

##############10
E10_5_rep5 <- Read10X( data.dir="E10_5_rep5_2018AUG16/")
E10_5_rep5 <- CreateSeuratObject(counts = E10_5_rep5, project = "E10_5_rep5", min.cells = 3, min.features = 200)

E10_5_rep5[["percent.mt"]] <- PercentageFeatureSet(E10_5_rep5, pattern = "^MT-")

VlnPlot(E10_5_rep5, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)

E10_5_rep5 <- subset(E10_5_rep5, subset = nFeature_RNA > 1000 & nFeature_RNA < 6500 & percent.mt < 1)
dim(E10_5_rep5)

###################
######合并两个以上Seurat对象######## 
#不具有去批次效应功能add.cell.ids = c("E9.5_rep2", "E9.5_rep1","E12.5_rep2","E12.5_rep1","E11.5_rep3","E11.5_rep2B","E11.5_rep2","E10.5_rep7",
 #                                  "E10.5_rep6","E10.5_rep5"),
#list(E9_5_rep2,E9_5_rep1,E12_5_rep2,E12_5_rep1,E11_5_rep3,E11_5_rep2B,E11_5_rep2,E10_5_rep7,E10_5_rep6,E10_5_rep5)
test.seu <- merge(E9_5_rep2, y = c(E9_5_rep1,E12_5_rep2,E12_5_rep1,E11_5_rep3,E11_5_rep2B,E11_5_rep2,E10_5_rep7,E10_5_rep6,E10_5_rep5), 
                   project = "ALL")
test.seu
head(colnames(test.seu))
unique(sapply(X = strsplit(colnames(test.seu), split = "_"), FUN = "[", 1))
table(test.seu$orig.ident)

##############
test.seu <- NormalizeData(test.seu, normalization.method = "LogNormalize", scale.factor = 10000)
test.seu <- FindVariableFeatures(test.seu, selection.method = "vst", nfeatures = 2000)

test.seu <- ScaleData(test.seu, features = rownames(test.seu))
test.seu <- RunPCA(test.seu, features = VariableFeatures(test.seu),npcs = 100)

ElbowPlot(test.seu)#碎石图
test.seu <- FindNeighbors(test.seu, dims = 1:40)
test.seu <- FindClusters(test.seu, resolution = 1.5)
test.seu <- RunUMAP(test.seu, dims = 1:40)
test.seu <- RunTSNE(test.seu, dims = 1:40)
library(cowplot)
library(stringr)
library(tidyverse)
p3 <- DimPlot(test.seu, reduction = "umap", group.by = "orig.ident", pt.size=0.5) 
p3
p4 <- DimPlot(test.seu, reduction = "umap", group.by = "ident", pt.size=0.5, label = TRUE,repel = TRUE) 
p4
fig_umap <- plot_grid(p3, p4, labels = c('orig.ident','ident'),align = "v",ncol = 2) 
fig_umap
ggsave(filename = "2未整合_umap.pdf", plot = fig_umap, device = 'pdf', width = 30, height = 15, units = 'cm')
graph2ppt(fig_umap,file="2未整合_umap.ppt", width=10, aspectr=2)


saveRDS(test.seu,file = "2-1012umap.rds")
test.seu <- readRDS(file = "2-1012umap.rds")

####细胞类型注释
###查看细胞分群依据的差异基因
test.markers <- FindAllMarkers(test.seu, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25)
test.markers %>% group_by(cluster) %>% top_n(n = 2, wt = avg_log2FC)
write.table(test.markers,file="1012_0.5_logfc0.25_markers500.txt",quote=F,sep="\t",row.names=F,col.names=T)
test.markers<-read.table(file="1012_0.5_logfc0.25_markers500.txt",header=TRUE)
head(test.markers)

###区分一下常见marker
celltype_marker=c("Pax9", "Neurod1","Hoxa3","Pdgfra",
  "Epcam","Rxrg","Sox10","Fstl1")
##做小提琴图
VlnPlot(test.seu,features = celltype_marker,pt.size = 0,ncol = 2)

ggsave(filename = "2marker.png",device = "png",width = 44,height = 33,units = "cm")

#提取指定单细胞亚群重新分析
#m = test.seu[,test.seu@meta.data$seurat_clusters %in% c(29,32,36,38,39,42,44)]
pax9_sce1 = test.seu[,test.seu@meta.data$seurat_clusters %in% c(0,1,2,3,4,5,6,7,8,9,10,11,13,14,15,16,17,18,19,20,21,22,24,25,26,28,30,33,34,35,37,40,43,45,46)]
saveRDS(pax9_sce1,file = "1014删减umap.rds")
pax9_sce1 <- readRDS(file = "1014删减umap.rds")

cd4_sce1=pax9_sce1
cd4_sce1 <- NormalizeData(cd4_sce1, normalization.method = "LogNormalize", scale.factor = 10000)
cd4_sce1 <- FindVariableFeatures(cd4_sce1, selection.method = "vst", nfeatures = 2000)

cd4_sce1 <- ScaleData(cd4_sce1, features = rownames(cd4_sce1))
cd4_sce1 <- RunPCA(cd4_sce1, features = VariableFeatures(cd4_sce1),npcs = 100)
DimHeatmap(cd4_sce1, dims =1:30, cells = 500, balanced = TRUE)
ElbowPlot(cd4_sce1, ndims =100)##碎石图
cd4_sce1 <- FindNeighbors(cd4_sce1, dims = 1:64)
cd4_sce1 <- FindClusters(cd4_sce1, resolution = 0.8)
cd4_sce1 <- RunUMAP(cd4_sce1, dims = 1:64)
cd4_sce1 <- RunTSNE(cd4_sce1, dims = 1:64)

library(cowplot)
library(stringr)
library(tidyverse)
p3 <- DimPlot(cd4_sce1, reduction = "umap", group.by = "orig.ident", pt.size=0.5) 
p3
p4 <- DimPlot(cd4_sce1, reduction = "umap", group.by = "ident", pt.size=0.5, label = TRUE,repel = TRUE) 
p4
fig_umap <- plot_grid(p3, p4, labels = c('orig.ident','ident'),align = "v",ncol = 2) 
fig_umap
ggsave(filename = "2未整合_umap.pdf", plot = fig_umap, device = 'pdf', width = 30, height = 15, units = 'cm')
graph2ppt(fig_umap,file="2未整合_umap.ppt", width=10, aspectr=2)


saveRDS(test.seu,file = "2-1012umap.rds")
test.seu <- readRDS(file = "2-1012umap.rds")

单细胞实战(6)：复现拟南芥单细胞文章中的数据（2）
前言之前对拟南芥单细胞测序的文献数据进行复现，数据结果有点微妙，接下来进行拟时间分析分生组织伪时间分析作者首...
单细胞CNS文章复现Single-cell profiling
今天要复现的这篇文献来自新鲜出炉的NC。这篇是关于肺癌的。作者通过对42例晚期肺癌进行单细胞测序，描绘了晚期肺癌...
单细胞实战(5)：复现拟南芥单细胞文章中的数据（1）
前言目前我的课题是植物方面的单细胞测序，所以打算选择植物类的单细胞测序数据进行复现，目前选择了王佳伟老师的《A ...
文献阅读-《Tutorial: guidelines for t
文献名称：教程：单细胞 RNA 测序数据的计算分析指南文献期刊：nature protocols发表时间：2021...
NCB|心咽发育多样化的单细胞转录轨迹分析
单细胞文献解读 · CNS： Cell 深度| 一套普遍适用于各类单细胞测序数据集的锚定整合方案 Nature系列...
单细胞数据挖掘实战：文献复现（四）细胞比例饼图
单细胞数据挖掘实战：文献复现（一）批量读取数据[https://www.jianshu.com/p/c9da06d...
单细胞数据挖掘实战：文献复现（六）标记基因及可视化
单细胞数据挖掘实战：文献复现（一）批量读取数据[https://www.jianshu.com/p/c9da06d...
单细胞数据挖掘实战：文献复现（五）细胞亚群并可视化
单细胞数据挖掘实战：文献复现（一）批量读取数据[https://www.jianshu.com/p/c9da06d...
单细胞数据挖掘实战：文献复现（三）降维、聚类和细胞注释
单细胞数据挖掘实战：文献复现（一）批量读取数据[https://www.jianshu.com/p/c9da06d...
单细胞数据挖掘实战：文献复现（二）批量创建Seurat对象及质控
单细胞数据挖掘实战：文献复现（一）批量读取数据[https://www.jianshu.com/p/c9da06d...