(视频教程): Monocle2安装包测试、分析流程及可视化修饰

作者: KS科研分享与服务 | 来源:发表于2024-12-17 11:34 被阅读0次

为什么你的产品开发好了，还需要专业的测试？
基因家族分析（五）
27. 性能测试总体流程
Gephi可视化分析视频教程
2018-12-21
面试2
citespace可视化分析软件视频教程
citespace可视化分析软件视频教程
Python数据分析、数据采集、数据可视化、图像数据处理分析视频
经验

前面我们刚更新完CytoTRACE2，那么也是想着用将其与monocle2分析结合起来，一方面看看效果，一方面辅助起点确定。同时近期不止有一个小伙伴在跑monocle2分析，但是还是因为包出了问题，我们之前发布过一个终集修改版2.26.0（Monocle2终极修改版），使用起来是没有问题的，但是小伙伴确有问题，这让我下定决心要找出问题。此外，也想将整个流程做一个视频讲解，弥补之前没有视频教程的遗憾，有很多小伙伴有此需求。

首先说结论，为什么终极修改包有些人出错，有些人不出错。经过我们的测试发现，安装monocle 2.26.0修改版在R 4.2中，无论是服务器还是本地电脑的运行中，ordercell和BEAM都不会出此案错误，但是在R 4.3中，ordercell没有问题，BEAM依然报错，可见2.26.0修改版与R版本有些关系。那么如果是R4.3，总不能为了monocle降级吧，我们测试发现monocle 2.32.0 在R 4.3中Ordercell没有问题，BEAM有问题，所以对2.32.0也做了修改版，让其在R 4.3中运行无障碍。（注意了：包修改的是它本身的问题，如果是因为流程或者数据的错误，需要自行检查）

接下来走走monocle2的流程吧，这里为了方便，数据也小，我们直接包装了一个函数，实际运行我们还是建议自己按照流程，因为需要中间调整参数。如果你的参数流程已经探索没有问题了，那么可以包装函数，方便分析：


library(Seurat)
library(monocle)
library(viridis)
#========================================================================================
setwd("D:\\KS项目\\公众号文章\\视频教程---monocle2分析测试")
#load data-加载之前做了cytroTRACE2的那个obj
#可以结合辅助看看cell起点
load("D:/KS项目/公众号文章/CytroTRACE2：单细胞转录组数据推断细胞发育潜能/sce_trace.RData")
DimPlot(cytotrace2_sce, label = T, pt.size = 1)
#这个数据是大群里面直接提取了目标细胞进行分析
#========================================================================================

#monocle pieline function
#pay attention---Seuratobject version 5.0.0 
ks_run_Monocle2 <- function(object, #seurat obj or expression matrix (建议数据格式转为matrix,如果数据量大转化为稀疏矩阵as(as.matrix(data), "sparseMatrix"))
                            layer, #used when object is a seurat obj
                            assay, #used when object is a seurat obj
                            lowerDetectionLimit = 0.1, 
                            VARgenesM=c("dispersionTable","seurat","differentialGeneTest"),
                            cellAnno=NULL, 
                            define_root=F,
                            root_state,
                            reverse=NULL
                            ){
  
  
  if(class(object)[1] == 'Seurat') {
    
    data <- GetAssayData(object=object, layer=layer, assay=assay)#get expression matrix data
    
    pd <- new("AnnotatedDataFrame", data = object@meta.data)
    fData <- data.frame(gene_short_name = row.names(data), row.names = row.names(data))
    fd <- new("AnnotatedDataFrame", data = fData)
    
    if(all(data == floor(data))) {
      expressionFamily <- negbinomial.size()
    } else if(any(data < 0)){
      expressionFamily <- uninormal()
    } else {
      expressionFamily <- tobit()
    }
    
    #Creates a new CellDateSet object.
    monocle_cds <- newCellDataSet(data,
                                  phenoData = pd, 
                                  featureData = fd,
                                  lowerDetectionLimit=0.1,
                                  expressionFamily=expressionFamily)
    
  }else{
    
    print("This fucntions only apply for a seurat obj")
  }
  
  
  
  #Estimate size factors and dispersions
  #数据处理
  monocle_cds <- estimateSizeFactors(monocle_cds)#size facotr标准化细胞之间的mRNA差异
  monocle_cds <- estimateDispersions(monocle_cds)
  
  #质量控制-filter cells
  monocle_cds <- detectGenes(monocle_cds, min_expr = 0.1)
  # print(head(fData(monocle_cds)))
  # print(head(pData(monocle_cds)))
  # expressed_genes <- row.names(subset(fData(mouse_monocle), num_cells_expressed >= 10))
  monocle_cds <- monocle_cds[fData(monocle_cds)$num_cells_expressed >= 10, ]
  
  
  #select methods for VariableFeatures
  if(VARgenesM=="dispersionTable"){
    
    disp_table <- dispersionTable(monocle_cds)
    ordering_genes <- subset(disp_table,
                             mean_expression >= 0.1 &
                               dispersion_empirical >= 1.5* dispersion_fit)$gene_id
    
  }
  
  
  if(VARgenesM=="seurat"){
    
    ordering_genes <- VariableFeatures(FindVariableFeatures(object, assay = "RNA"), assay = "RNA")
    
  }
  

  if(VARgenesM=="differentialGeneTest"){
    
    diff_test_res <- differentialGeneTest(monocle_cds,fullModelFormulaStr = paste0("~",cellAnno))##~后面是表示对谁做差异分析的变量
    diff_test_res_sig <- diff_test_res[order(diff_test_res$qval,decreasing=F),]
    
    ordering_sce <- diff_test_res_sig[diff_test_res_sig$qval< 0.01,]
    
    if(nrow(ordering_sce)>3000){
      
      ordering_genes <- ordering_sce$gene_short_name[1:3000]
      
    }else{
      
      ordering_genes <- rdering_sce$gene_short_name
    }
    
  }
  
  
  #Marks genes for clustering
  monocle_cds <- setOrderingFilter(monocle_cds, ordering_genes)
  plot_ordering_genes(monocle_cds)
  
  
  #cluster
  monocle_cds <- reduceDimension(monocle_cds, max_components = 2,reduction_method = 'DDRTree')
  
  #order cells
  monocle_cds <- orderCells(monocle_cds, reverse=reverse)
  
  if(define_root){
    monocle_cds <- monocle_cds <- orderCells(monocle_cds,root_state = root_state)
  }
  
  
  return(monocle_cds)
  
}

分析数据以及一些基本可视化修饰：

#run monocle2
sce_CDS <- ks_run_Monocle2(object = cytotrace2_sce,
                           layer = 'counts',
                           assay = "RNA",
                           VARgenesM="dispersionTable",
                           cellAnno = "cluster")
#可视化拟时
plot_cell_trajectory(sce_CDS,show_branch_points = F,color_by = "Pseudotime",cell_size = 3)+
  scale_color_gradientn(colours = colorRampPalette(c('blue','green','yellow','red'))(100))

#按照celltype着色
plot_cell_trajectory(sce_CDS,show_branch_points = F,color_by = "cluster",cell_size = 3)+
  theme_dr(arrow = grid::arrow(length = unit(0, "inches")))+
  theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank())+
  scale_color_manual(values = c("#E69253", "#EDB931", "#E4502E", "#4378A0"))+
  geom_curve(aes(x=9,y=2,yend=0,xend=5),
             arrow=arrow(length=unit(0.03,"npc")),
             size=1,curvature=0,
             color="black")+
  annotate("text", x=6.5, y=1.5, label = "bold(YSMP)", parse = TRUE, angle=45, size=4)+
  geom_curve(aes(x=-4,y=-1,yend=3,xend=-6),
             arrow=arrow(length=unit(0.03,"npc")),
             size=1,curvature=0,
             color="black")+
  annotate("text", x=-4, y=1, label = "bold(Monocyte~Fate)", parse = TRUE, angle=-75, size=4)+
  geom_curve(aes(x=-5,y=-3,yend=-5,xend=-6),
             arrow=arrow(length=unit(0.03,"npc")),
             size=1,curvature=0,
             color="black")+
  annotate("text", x=-6.5, y=-4, label = "bold(Neu~Fate)", parse = TRUE, angle=75, size=4)

#结合可视化cytroTRACE2结果
plot_cell_trajectory(sce_CDS,show_branch_points = F,color_by = "CytoTRACE2_Relative",cell_size = 3)+
  scale_colour_gradientn(colours = (c( "#000004FF", "#3B0F70FF", "#8C2981FF", "#DE4968FF", "#FE9F6DFF", "#FCFDBFFF")),
                         na.value = "transparent",
                         limits=c(0,1),
                         breaks = c(0,1),
                         labels=c("(More diff.)", "(Less diff.)"),
                         name = "Relative\norder \n" ,
                         guide = guide_colorbar(frame.colour = "black", ticks.colour = "black"))+
  theme_dr(arrow = grid::arrow(length = unit(0, "inches")))+#坐标轴主题修改
  theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank())


#=======================================================================================
#拟时差异基因
expressed_genes=row.names(subset(fData(sce_CDS),use_for_ordering=="TRUE"))
peu_gene <- differentialGeneTest(sce_CDS[expressed_genes,],fullModelFormulaStr = "~sm.ns(Pseudotime)",cores = 1)
# write.csv(peu_gene,file='peu_gene.csv')#保存好文件，这个分析过程挺费时间
peu_gene <- peu_gene[which(peu_gene$qval<0.01 & peu_gene$num_cells_expressed>100),]#筛选显著是的
# peu_gene %>% arrange(qval)  -> peu_gene#按照qval排个序
# peu_gene <- peu_gene[1:100,] #这里我们取前100个基因演示
#改造热图函数，展示需要的基因
source('./new_heatmap.R')
library(pheatmap)
library(grid)
p1 <-plot_pseudotime_heatmap(sce_CDS[peu_gene$gene_short_name,],
                             num_clusters = 4,
                             cores = 2,
                             show_rownames = T,return_heatmap =T,
                             hmcols = viridis(256),
                             use_gene_short_name = T,
                             clustercolor  = c("#d2981a", "#a53e1f", "#457277", "#8f657d"))


p1


#只展示感兴趣的基因
genes <- c("C1QB","C1QC","C1QA","MRC1","LGMN","MS4A7","MAF","FOLR2",
           "HLA-DPA1","CLEC10A","IL10RA","CD163","KCTD12","CLEC7A","MS4A6A","CD14",
           "ITM2A","CYTL1","MDK","SELP","CD24",
           "S100A8","S100A9","S100A12")

source('./add.flag.R')
add.flag(p1, kept.labels = genes, repel.degree = 0.2)


library(RColorBrewer)
plot_cell_trajectory(sce_CDS,show_branch_points = T,color_by = "State",cell_size = 3)
#BEAM
#查看在分叉处的差异基因
BEAM_res <- BEAM(sce_CDS, branch_point = 3, cores=2)
BEAM_res <- BEAM_res[order(BEAM_res$qval),]
BEAM_res <- BEAM_res[,c("gene_short_name", "pval", "qval")]
# write.csv(BEAM_res,file = "BEAM_res.csv")
branch_p <- plot_genes_branched_heatmap(sce_CDS[row.names(subset(BEAM_res, qval < 1e-8)),],
                            branch_point = 3,
                            num_clusters = 4,
                            cores = 2,
                            hmcols = colorRampPalette(c("steelblue", "white","darkorange2", "orangered4"))(100),
                            use_gene_short_name = T,
                            show_rownames = T,
                            return_heatmap = T)

总之，整个流程是比较简单的，最重要的自己对于结果的把握和解释，以及一些新的发现。希望我们的流程对您有所帮助！

为什么你的产品开发好了，还需要专业的测试？
测试的流程：需求分析，系统分析，测试分析。测试方案设计，测试方案评审。测试用例编写及评审。测试执行。测试回归及...
基因家族分析（五）
共线性分析（Synteny analysis）及可视化基因家族流程：基因家族分析（一）基因家族流程：基因家族分...
27. 性能测试总体流程
总体流程收集需求开发测试脚本执行测试分析及报告性能优化执行测试分析及报告重复以上步骤... 收集需...
Gephi可视化分析视频教程
Gephi可视化分析视频教程孤行者不孤独学术点滴今天 Gephi可视化分析视频教程重点提醒：学习软件最好的方式...
2018-12-21
软件测试总结软件测试流程：测试需求分析，分析需求文档提取...
面试2
A、测试基础 1、测试理论及流程（测试类型及优缺点，流程及细节） 2、测试思想（测试设计方法等） 3、测试场景设计...
citespace可视化分析软件视频教程
citespace可视化分析软件视频教程重点提醒：学习软件最好的方式就是看视频学习，我将会录制可视化系列视频，并...
citespace可视化分析软件视频教程
citespace可视化分析软件视频教程重点提醒：学习软件最好的方式就是看视频学习，我将会录制可视化系列视频，并...
Python数据分析、数据采集、数据可视化、图像数据处理分析视频
Python数据分析、数据采集、数据可视化、图像数据处理分析视频教程大数据，云计算，架构，数据分析师，Hadoop...
经验
用户场景需求竞品分析产品文档结构图/功能流程图/模块原型/交互开发/程序开发知识测试/测试过程及...