前言
之前对拟南芥单细胞测序的文献数据进行复现,数据结果有点微妙,接下来进行拟时间分析
分生组织伪时间分析
作者首先查看了分生组织相关基因(WOX5,PIN1,RGF3,PLT1)在聚类中的表达情况,WOX5和PIN1主要在cluster12中表达,RGF3以及PLT1主要在cluster19中表达,对应到自己的数据中查看效果
pdf("cell_identify/meristematic.pdf",height = 14,width = 14)
FeaturePlot(wang,features=c('AT3G11260','AT1G73590','AT2G04025','AT3G20840'),cols=c("grey","yellow","red","brown")
,reduction = 'umap',pt.size = 1,label.size = 4)
dev.off()
可以看到对应效果还是不错的,接下来选择cluster12.14.19进行伪时间分析
id<-c("12","14","19")
cell.sub <- subset(wang@meta.data,seurat_clusters==id)
scRNAsub <- subset(wang, cells=row.names(cell.sub))
dim(scRNAsub)
library(monocle)
dir.create("pseudotime121419")
data <- as(as.matrix(scRNAsub@assays$RNA@counts), 'sparseMatrix')
pd <- new('AnnotatedDataFrame', data = scRNAsub@meta.data)
fData <- data.frame(gene_short_name = row.names(data), row.names = row.names(data))
fd <- new('AnnotatedDataFrame', data = fData)
mycds <- newCellDataSet(data,
phenoData = pd,
featureData = fd,
expressionFamily = negbinomial.size())
mycds <- estimateSizeFactors(mycds)
mycds <- estimateDispersions(mycds, cores=4, relative_expr = TRUE)
## 选择代表性基因
##使用monocle选择的高变基因
disp_table <- dispersionTable(mycds)
disp.genes <- subset(disp_table, mean_expression >= 0.1 & dispersion_empirical >= 1 * dispersion_fit)$gene_id
mycds <- setOrderingFilter(mycds, disp.genes)
## 降维以及细胞排序
#降维
mycds <- reduceDimension(mycds, max_components = 2, method = 'DDRTree')
#排序
mycds <- orderCells(mycds)
#State轨迹分布图
plot1 <- plot_cell_trajectory(mycds, color_by = "State")
ggsave("pseudotime121419/State.pdf", plot = plot1, width = 6, height = 5)
ggsave("pseudotime121419/State.png", plot = plot1, width = 6, height = 5)
##Cluster轨迹分布图
plot2 <- plot_cell_trajectory(mycds, color_by = "seurat_clusters")
ggsave("pseudotime121419/Cluster.pdf", plot = plot2, width = 6, height = 5)
ggsave("pseudotime121419/Cluster.png", plot = plot2, width = 6, height = 5)
##Pseudotime轨迹图
plot3 <- plot_cell_trajectory(mycds, color_by = "Pseudotime")
ggsave("pseudotime121419/Pseudotime.pdf", plot = plot3, width = 6, height = 5)
ggsave("pseudotime121419/Pseudotime.png", plot = plot3, width = 6, height = 5)
##合并作图
plotc <- plot1|plot2|plot3
ggsave("pseudotime121419/Combination.pdf", plot = plotc, width = 10, height = 3.5)
ggsave("pseudotime121419/Combination.png", plot = plotc, width = 10, height = 3.5)
##保存结果
write.csv(pData(mycds), "pseudotime121419/pseudotime.csv")
p1 <- plot_cell_trajectory(mycds, color_by = "State") + facet_wrap(~State, nrow = 1)
p2 <- plot_cell_trajectory(mycds, color_by = "seurat_clusters") + facet_wrap(~seurat_clusters, nrow = 1)
plotc <- p1/p2
ggsave("pseudotime121419/trajectory_facet.png", plot = plotc, width = 6, height = 5)
结果也与文献中的有点出入,接下来查看一下BEAM热图和一些基因在轨迹中的表达情况
##BEAM分析
disp_table <- dispersionTable(mycds)
disp.genes <- subset(disp_table, mean_expression >= 0.5&dispersion_empirical >= 1*dispersion_fit)
disp.genes <- as.character(disp.genes$gene_id)
mycds_sub <- mycds[disp.genes,]
#State轨迹分布图
plot1 <- plot_cell_trajectory(mycds_sub, color_by = "State")
ggsave("pseudotime121419/BEAM_State.pdf", plot = plot1, width = 6, height = 5)
ggsave("pseudotime121419/BEAM_State.png", plot = plot1, width = 6, height = 5)
##Cluster轨迹分布图
plot2 <- plot_cell_trajectory(mycds_sub, color_by = "seurat_clusters")
ggsave("pseudotime121419/BEAM_Cluster.pdf", plot = plot2, width = 6, height = 5)
ggsave("pseudotime121419/BEAM_Cluster.png", plot = plot2, width = 6, height = 5)
##Pseudotime轨迹图
plot3 <- plot_cell_trajectory(mycds_sub, color_by = "Pseudotime")
ggsave("pseudotime121419/BEAM_Pseudotime.pdf", plot = plot3, width = 6, height = 5)
ggsave("pseudotime121419/BEAM_Pseudotime.png", plot = plot3, width = 6, height = 5)
##合并作图
plotc <- plot1|plot2|plot3
ggsave("pseudotime121419/BEAM_Combination.pdf", plot = plotc, width = 10, height = 3.5)
ggsave("pseudotime121419/BEAM_Combination.png", plot = plotc, width = 10, height = 3.5)
##保存结果
beam_res <- BEAM(mycds_sub, branch_point = 1, cores = 8)
beam_res <- beam_res[order(beam_res$qval),]
beam_res <- beam_res[,c("gene_short_name", "pval", "qval")]
mycds_sub_beam <- mycds_sub[row.names(subset(beam_res, qval < 1e-4)),]
pdf("pseudotime121419/BEAM_pseudotime_heatmap2.pdf",width = 10, height = 10)
plot_genes_branched_heatmap(mycds_sub_beam, branch_point = 1,
num_clusters = 5, show_rownames = F)
dev.off()
##寻找相应的基因绘制轨迹图
matrix_dir="filtered_gene_bc_matrices/ref/"
barcode.path <- paste0(matrix_dir,"barcodes.tsv")
features.path <- paste0(matrix_dir,"genes.tsv")
matrix.path <- paste0(matrix_dir, "matrix.mtx")
mat1 <- readMM(file = matrix.path)
feature.names = read.delim(features.path,
header = FALSE,
stringsAsFactors = FALSE)
barcode.names = read.delim(barcode.path,
header = FALSE,
stringsAsFactors = FALSE)
colnames(mat1) = barcode.names$V1
rownames(mat1) = feature.names$V1
mat1<-as.matrix(mat1)
gene<-t(as.matrix(mat1[c('AT3G11260','AT1G73590',
'AT2G04025','AT3G20840','AT1G50490'),
colnames(scRNAsub@assays$RNA@counts)]))
colnames(gene)<-c("WOX5","PIN1","RGF3","PLT1","UBC20")
mycds_sub@phenoData@data<-cbind(mycds_sub@phenoData@data,gene)
mycds_sub@phenoData@data[mycds_sub@phenoData@data == 0] <- NA
p1<-plot_cell_trajectory(mycds_sub, color_by = "WOX5")+scale_color_gradient(na.value = "grey",low="yellow",high="red")
p2<-plot_cell_trajectory(mycds_sub, color_by = "PIN1")+scale_color_gradient(na.value = "grey",low="yellow",high="red")
p3<-plot_cell_trajectory(mycds_sub, color_by = "RGF3")+scale_color_gradient(na.value = "grey",low="yellow",high="red")
p4<-plot_cell_trajectory(mycds_sub, color_by = "PLT1")+scale_color_gradient(na.value = "grey",low="yellow",high="red")
p5<-plot_cell_trajectory(mycds_sub, color_by = "UBC20")+scale_color_gradient(na.value = "grey",low="yellow",high="red")
plotc <- p1|p2|p3|p4|p5
ggsave("pseudotime121419/meristematic.pdf", plot = plotc, width = 18, height = 5)
ggsave("pseudotime121419/meristematic.png", plot = plotc, width = 18, height = 5)
文献中没有给出相应的参数,结果还是与文献的结果有点差距
root cap伪时间分析
按照相同的方式对root cap组织进行伪时间分析
结语
对于此次的数据复现,前面的聚类步骤还是能与文献对应,但是后续的伪时间分析差距就有点大了,主要是因为拟时间分析我才刚刚入门学习,对一些分析过程中的参数不太了解,目前的数据与代码我已上传github,欢迎大家批评指正
参考链接:单细胞转录组基础分析六:伪时间分析
转载请注明:周小钊的博客>>>单细胞实战(6):复现拟南芥单细胞文章中的数据(2)
网友评论