美文网首页
单细胞实战(6):复现拟南芥单细胞文章中的数据(2)

单细胞实战(6):复现拟南芥单细胞文章中的数据(2)

作者: 周小钊 | 来源:发表于2020-10-23 16:13 被阅读0次

    前言

    之前对拟南芥单细胞测序的文献数据进行复现,数据结果有点微妙,接下来进行拟时间分析

    分生组织伪时间分析

    作者首先查看了分生组织相关基因(WOX5,PIN1,RGF3,PLT1)在聚类中的表达情况,WOX5和PIN1主要在cluster12中表达,RGF3以及PLT1主要在cluster19中表达,对应到自己的数据中查看效果


    pdf("cell_identify/meristematic.pdf",height = 14,width = 14)
    FeaturePlot(wang,features=c('AT3G11260','AT1G73590','AT2G04025','AT3G20840'),cols=c("grey","yellow","red","brown")
                ,reduction = 'umap',pt.size = 1,label.size = 4)
    dev.off()
    

    可以看到对应效果还是不错的,接下来选择cluster12.14.19进行伪时间分析

    id<-c("12","14","19")
    cell.sub <- subset(wang@meta.data,seurat_clusters==id)
    scRNAsub <- subset(wang, cells=row.names(cell.sub))
    dim(scRNAsub)
    library(monocle)
    dir.create("pseudotime121419")
    data <- as(as.matrix(scRNAsub@assays$RNA@counts), 'sparseMatrix')
    pd <- new('AnnotatedDataFrame', data = scRNAsub@meta.data)
    fData <- data.frame(gene_short_name = row.names(data), row.names = row.names(data))
    fd <- new('AnnotatedDataFrame', data = fData)
    mycds <- newCellDataSet(data,
                            phenoData = pd,
                            featureData = fd,
                            expressionFamily = negbinomial.size())
    mycds <- estimateSizeFactors(mycds)
    mycds <- estimateDispersions(mycds, cores=4, relative_expr = TRUE)
    ## 选择代表性基因
    ##使用monocle选择的高变基因
    disp_table <- dispersionTable(mycds)
    disp.genes <- subset(disp_table, mean_expression >= 0.1 & dispersion_empirical >= 1 * dispersion_fit)$gene_id
    mycds <- setOrderingFilter(mycds, disp.genes)
    ## 降维以及细胞排序
    #降维
    mycds <- reduceDimension(mycds, max_components = 2, method = 'DDRTree')
    #排序
    mycds <- orderCells(mycds)
    #State轨迹分布图
    plot1 <- plot_cell_trajectory(mycds, color_by = "State")
    ggsave("pseudotime121419/State.pdf", plot = plot1, width = 6, height = 5)
    ggsave("pseudotime121419/State.png", plot = plot1, width = 6, height = 5)
    ##Cluster轨迹分布图
    plot2 <- plot_cell_trajectory(mycds, color_by = "seurat_clusters")
    ggsave("pseudotime121419/Cluster.pdf", plot = plot2, width = 6, height = 5)
    ggsave("pseudotime121419/Cluster.png", plot = plot2, width = 6, height = 5)
    ##Pseudotime轨迹图
    plot3 <- plot_cell_trajectory(mycds, color_by = "Pseudotime")
    ggsave("pseudotime121419/Pseudotime.pdf", plot = plot3, width = 6, height = 5)
    ggsave("pseudotime121419/Pseudotime.png", plot = plot3, width = 6, height = 5)
    ##合并作图
    plotc <- plot1|plot2|plot3
    ggsave("pseudotime121419/Combination.pdf", plot = plotc, width = 10, height = 3.5)
    ggsave("pseudotime121419/Combination.png", plot = plotc, width = 10, height = 3.5)
    ##保存结果
    
    write.csv(pData(mycds), "pseudotime121419/pseudotime.csv")
    p1 <- plot_cell_trajectory(mycds, color_by = "State") + facet_wrap(~State, nrow = 1)
    p2 <- plot_cell_trajectory(mycds, color_by = "seurat_clusters") + facet_wrap(~seurat_clusters, nrow = 1)
    plotc <- p1/p2
    ggsave("pseudotime121419/trajectory_facet.png", plot = plotc, width = 6, height = 5)
    

    结果也与文献中的有点出入,接下来查看一下BEAM热图和一些基因在轨迹中的表达情况

    ##BEAM分析
    disp_table <- dispersionTable(mycds)
    disp.genes <- subset(disp_table, mean_expression >= 0.5&dispersion_empirical >= 1*dispersion_fit)
    disp.genes <- as.character(disp.genes$gene_id)
    mycds_sub <- mycds[disp.genes,]
    #State轨迹分布图
    plot1 <- plot_cell_trajectory(mycds_sub, color_by = "State")
    ggsave("pseudotime121419/BEAM_State.pdf", plot = plot1, width = 6, height = 5)
    ggsave("pseudotime121419/BEAM_State.png", plot = plot1, width = 6, height = 5)
    ##Cluster轨迹分布图
    plot2 <- plot_cell_trajectory(mycds_sub, color_by = "seurat_clusters")
    ggsave("pseudotime121419/BEAM_Cluster.pdf", plot = plot2, width = 6, height = 5)
    ggsave("pseudotime121419/BEAM_Cluster.png", plot = plot2, width = 6, height = 5)
    ##Pseudotime轨迹图
    plot3 <- plot_cell_trajectory(mycds_sub, color_by = "Pseudotime")
    ggsave("pseudotime121419/BEAM_Pseudotime.pdf", plot = plot3, width = 6, height = 5)
    ggsave("pseudotime121419/BEAM_Pseudotime.png", plot = plot3, width = 6, height = 5)
    
    ##合并作图
    plotc <- plot1|plot2|plot3
    ggsave("pseudotime121419/BEAM_Combination.pdf", plot = plotc, width = 10, height = 3.5)
    ggsave("pseudotime121419/BEAM_Combination.png", plot = plotc, width = 10, height = 3.5)
    ##保存结果
    beam_res <- BEAM(mycds_sub, branch_point = 1, cores = 8)
    beam_res <- beam_res[order(beam_res$qval),]
    beam_res <- beam_res[,c("gene_short_name", "pval", "qval")]
    mycds_sub_beam <- mycds_sub[row.names(subset(beam_res, qval < 1e-4)),]
    pdf("pseudotime121419/BEAM_pseudotime_heatmap2.pdf",width = 10, height = 10)
    plot_genes_branched_heatmap(mycds_sub_beam,  branch_point = 1, 
                                num_clusters = 5, show_rownames = F)
    dev.off()
    ##寻找相应的基因绘制轨迹图
    matrix_dir="filtered_gene_bc_matrices/ref/"
    barcode.path <- paste0(matrix_dir,"barcodes.tsv")
    features.path <- paste0(matrix_dir,"genes.tsv")
    matrix.path <- paste0(matrix_dir, "matrix.mtx")
    mat1 <- readMM(file = matrix.path)
    feature.names = read.delim(features.path,
                               header = FALSE,
                               stringsAsFactors = FALSE)
    barcode.names = read.delim(barcode.path,
                               header = FALSE,
                               stringsAsFactors = FALSE)
    colnames(mat1) = barcode.names$V1
    rownames(mat1) = feature.names$V1
    mat1<-as.matrix(mat1)
    gene<-t(as.matrix(mat1[c('AT3G11260','AT1G73590',
           'AT2G04025','AT3G20840','AT1G50490'),
         colnames(scRNAsub@assays$RNA@counts)]))
    colnames(gene)<-c("WOX5","PIN1","RGF3","PLT1","UBC20")
    mycds_sub@phenoData@data<-cbind(mycds_sub@phenoData@data,gene)
    mycds_sub@phenoData@data[mycds_sub@phenoData@data == 0] <- NA
    p1<-plot_cell_trajectory(mycds_sub, color_by = "WOX5")+scale_color_gradient(na.value = "grey",low="yellow",high="red")
    p2<-plot_cell_trajectory(mycds_sub, color_by = "PIN1")+scale_color_gradient(na.value = "grey",low="yellow",high="red")
    p3<-plot_cell_trajectory(mycds_sub, color_by = "RGF3")+scale_color_gradient(na.value = "grey",low="yellow",high="red")
    p4<-plot_cell_trajectory(mycds_sub, color_by = "PLT1")+scale_color_gradient(na.value = "grey",low="yellow",high="red")
    p5<-plot_cell_trajectory(mycds_sub, color_by = "UBC20")+scale_color_gradient(na.value = "grey",low="yellow",high="red")
    plotc <- p1|p2|p3|p4|p5
    ggsave("pseudotime121419/meristematic.pdf", plot = plotc, width = 18, height = 5)
    ggsave("pseudotime121419/meristematic.png", plot = plotc, width = 18, height = 5)
    
    

    文献中没有给出相应的参数,结果还是与文献的结果有点差距

    root cap伪时间分析

    按照相同的方式对root cap组织进行伪时间分析


    结语

    对于此次的数据复现,前面的聚类步骤还是能与文献对应,但是后续的伪时间分析差距就有点大了,主要是因为拟时间分析我才刚刚入门学习,对一些分析过程中的参数不太了解,目前的数据与代码我已上传github,欢迎大家批评指正

    参考链接:单细胞转录组基础分析六:伪时间分析


    转载请注明:周小钊的博客>>>单细胞实战(6):复现拟南芥单细胞文章中的数据(2)

    相关文章

      网友评论

          本文标题:单细胞实战(6):复现拟南芥单细胞文章中的数据(2)

          本文链接:https://www.haomeiwen.com/subject/piivmktx.html