美文网首页
单细胞聚类批次效应量化_标签匀度

单细胞聚类批次效应量化_标签匀度

作者: 倪桦 | 来源:发表于2023-05-10 12:05 被阅读0次


    推荐指标

    ARI 聚类
    LISI 邻域均度计算

    1、LISI脚本

    library(Seurat);library(SeuratData);library(dplyr);library(ggplot2)
    data("pbmc3k")
    cur_seu <- pbmc3k %>% NormalizeData() %>% FindVariableFeatures() %>% ScaleData() %>% RunPCA() %>% RunUMAP(dims = 1:10)
    cur_seu$batch <- sample(letters[1:10],size = ncol(cur_seu),replace = T)
    X <- cur_seu@reductions$pca@cell.embeddings
    lisi_index <- lisi::compute_lisi(X,meta_data = cur_seu@meta.data, label_colnames="batch",perplexity = 50)
    t.test(lisi_index[[1]])$conf.int ### 均值置信区间
    
    
    p_dim <- DimPlot(cur_seu,group.by = "batch") + labs(title = "")
    p_lisi <- lisi_index %>% 
        ggplot() + 
        geom_boxplot(aes(x = 1,y = batch),notch = T,width = 0.4,outlier.color = "orange",outlier.shape = 8,outlier.alpha = .7) +
        geom_rect(data = lisi_index %>% rstatix::get_summary_stats(),aes(xmin = 1-0.2, xmax = 1+ 0.2, ymin = q1, ymax = median), fill = "#008AA7", color = "black") +
        geom_rect(data = lisi_index %>% rstatix::get_summary_stats(),aes(xmin = 1-0.2, xmax = 1+ 0.2, ymin = median, ymax = q3), fill = "#A2EFFD", color = "black") +
        geom_point(data = lisi_index %>% rstatix::get_summary_stats(),aes(x =1,y = mean),shape = 8,size = 3) +
        scale_y_continuous(position = "right") + theme_classic() + coord_flip() +
        theme(axis.line.y = element_blank(),
              axis.text.y = element_blank(),
              axis.ticks.y = element_blank(),
              plot.margin = margin(l = 52,r = 50,t = 10,b = 10),
              panel.border = element_rect(fill = NA,colour = NA)) +
        labs(y = "iLISI", x = "")
    
    cowplot::plot_grid(p_lisi,p_dim,ncol = 1,rel_heights = c(0.15,0.7),rel_widths = c(0.5,0.2))
    

    2、ARI脚本

    cur_seu <- pbmc3k[,!is.na(pbmc3k$seurat_annotations)]
    cur_seu <- cur_seu %>% NormalizeData() %>% FindVariableFeatures() %>% ScaleData() %>% RunPCA() %>% RunUMAP(dims = 1:10)
    cur_seu <- cur_seu %>% FindNeighbors(dims = 1:20) %>% FindClusters(resolution = 0.5)
    cur_seu$batch <- sample(1:20,size = ncol(cur_seu),replace = T)
    ### 当ARI指数为负数时,说明聚类结果比随机分配还要差,这通常是由于聚类结果与真实标签之间的一致性非常低所导致的,
    ari_info <- GeometricMorphometricsMix::adjRand_test(cur_seu$seurat_clusters, cur_seu$batch) ### H0: 每个簇内样本标记是随机混乱的
    # aricode::ARI(cur_seu$seurat_clusters, cur_seu$batch)
    # entropy_lst <- aricode::entropy(cur_seu$seurat_clusters, cur_seu$batch) ### 其它量化指标,相互熵
    
    

    https://cran.r-project.org/web/packages/distributions3/vignettes/one-sample-t-confidence-interval.html
    https://rdrr.io/github/fruciano/GeometricMorphometricsMix/man/adjRand_test.html
    https://github.com/jchiquet/aricode
    https://carmonalab.github.io/STACAS.demo/STACAS.demo.html
    https://github.com/immunogenomics/LISI
    https://broadinstitute.github.io/2019_scWorkshop/batch-effects.html

    相关文章

      网友评论

          本文标题:单细胞聚类批次效应量化_标签匀度

          本文链接:https://www.haomeiwen.com/subject/ytkdsdtx.html