美文网首页js css html
FindAllMarkers, FindMarkers 以及 F

FindAllMarkers, FindMarkers 以及 F

作者: 小潤澤 | 来源:发表于2023-04-20 16:46 被阅读0次

    在seurat中,如果运行了RunUMAP或者RunTSNE后自动分群后,FindAllMarkers和FindMarkers基本就是一样的;如果没有进行RunUMAP或者RunTSNE分群,那么需要先运行BuildClusterTree(object)函数,利用树聚类先分群

    FindAllMarkers and FindMarkers

    示例:
    首先加载数据

    pbmc.data <- read.csv("GSE117988_raw.expMatrix_PBMC.csv",header=TRUE,row.names = 1)
    pbmc.data <- log2(1 + sweep(pbmc.data, 2, median(colSums(pbmc.data))/colSums(pbmc.data), '*'))
    pbmc<- CreateSeuratObject(counts = pbmc.data, project = "10X pbmc", min.cells = 1, min.features = 0)
    
    pbmc[["percent.mt"]] <- PercentageFeatureSet(pbmc, pattern = "^MT-")
    
    sce=pbmc
    sce <- NormalizeData(sce, normalization.method =  "LogNormalize", scale.factor = 10000)
    GetAssay(sce,assay = "RNA")
    sce <- FindVariableFeatures(sce, selection.method = "vst", nfeatures = 2000) 
    
    sce <- ScaleData(sce) 
    sce <- RunPCA(object = sce, pc.genes = VariableFeatures(sce)) 
    sce <- FindNeighbors(sce, dims = 1:15)
    sce <- FindClusters(sce, resolution = 0.2)
    table(sce@meta.data$RNA_snn_res.0.2) 
    
    set.seed(123)
    sce <- RunTSNE(object = sce, dims = 1:15, do.fast = TRUE)
    ## 运行 FindAllMarkers
    all.markers <- FindAllMarkers(object = sce)
    

    先看FindAllMarkers:

    object = sce
    assay = NULL
    features = NULL
    logfc.threshold = 0.25
    test.use = 'wilcox'
    slot = 'data'
    min.pct = 0.1
    min.diff.pct = -Inf
    node = NULL
    verbose = TRUE
    only.pos = FALSE
    max.cells.per.ident = Inf
    random.seed = 1
    latent.vars = NULL
    min.cells.feature = 3
    min.cells.group = 3
    mean.fxn = NULL
    fc.name = NULL
    base = 2
    return.thresh = 1e-2
    densify = FALSE
    
    if (is.null(x = node)) {
      ## 将分群的结果进行排序
      idents.all <- sort(x = unique(x = Idents(object = object)))
      ## 一共 7 个类群
      ##[1] 0 1 2 3 4 5 6
      ##Levels: 0 1 2 3 4 5 6
    } 
    
    genes.de <- list()
    messages <- list()
    
    ## 分别遍历每一个细胞类群( 0 1 2 3 4 5 6 )
    for (i in 1:length(x = idents.all)) {
      if (verbose) {
        message("Calculating cluster ", idents.all[i])
      }
      genes.de[[i]] <- tryCatch(
        expr = {
          FindMarkers(
            object = object,
            assay = assay,
            ident.1 = if (is.null(x = node)) {
              idents.all[i]
            } else {
              tree
            },
            ident.2 = if (is.null(x = node)) {
              NULL
            } else {
              idents.all[i]
            },
            features = features,
            logfc.threshold = logfc.threshold,
            test.use = test.use,
            slot = slot,
            min.pct = min.pct,
            min.diff.pct = min.diff.pct,
            verbose = verbose,
            only.pos = only.pos,
            max.cells.per.ident = max.cells.per.ident,
            random.seed = random.seed,
            latent.vars = latent.vars,
            min.cells.feature = min.cells.feature,
            min.cells.group = min.cells.group,
            mean.fxn = mean.fxn,
            fc.name = fc.name,
            base = base,
            densify = densify
          )
        },
        error = function(cond) {
          return(cond$message)
        }
      )
    }
    gde.all <- data.frame()
    
    # 对每一类群找到的 marker 基因进行阈值过滤
    for (i in 1:length(x = idents.all)) {
      gde <- genes.de[[i]]
      gde <- gde[order(gde$p_val, -gde[, 2]), ]
      gde <- subset(x = gde, subset = p_val < return.thresh)
      gde$cluster <- idents.all[i]
      gde$gene <- rownames(x = gde)
      gde.all <- rbind(gde.all, gde)
    
    # 重命名  
    rownames(x = gde.all) <- make.unique(names = as.character(x = gde.all$gene))
    

    我们可以知道,当运行完RunUMAP或者RunTSNE后自动分群后,FindAllMarkers 内部其实就是调用了 FindMarkers,并且:

    # FindMarkers 的 ident.1 为RunUMAP或者RunTSNE后自动分群后每一个类群的细胞
    ident.1 = if (is.null(x = node)) 
    {
        idents.all[i]
    } 
    
    # FindMarkers 的 ident.2 默认为空 
    ident.2 = if (is.null(x = node))
    {
         NULL
    } 
    

    而 FindMarkers 在计算差异基因的时候需要先运行 IdentsToCells() 函数

    IdentsToCells <- function(
      object,
      ident.1,
      ident.2,
      cellnames.use
    ) {
      #
      if (is.null(x = ident.1)) {
        stop("Please provide ident.1")
      } else if ((length(x = ident.1) == 1 && ident.1[1] == 'clustertree') || is(object = ident.1, class2 = 'phylo')) {
        if (is.null(x = ident.2)) {
          stop("Please pass a node to 'ident.2' to run FindMarkers on a tree")
        }
        tree <- if (is(object = ident.1, class2 = 'phylo')) {
          ident.1
        } else {
          Tool(object = object, slot = 'BuildClusterTree')
        }
        if (is.null(x = tree)) {
          stop("Please run 'BuildClusterTree' or pass an object of class 'phylo' as 'ident.1'")
        }
        ident.1 <- tree$tip.label[GetLeftDescendants(tree = tree, node = ident.2)]
        ident.2 <- tree$tip.label[GetRightDescendants(tree = tree, node = ident.2)]
      }
      if (length(x = as.vector(x = ident.1)) > 1 &&
          any(as.character(x = ident.1) %in% cellnames.use)) {
        bad.cells <- cellnames.use[which(x = !as.character(x = ident.1) %in% cellnames.use)]
        if (length(x = bad.cells) > 0) {
          stop(paste0("The following cell names provided to ident.1 are not present in the object: ", paste(bad.cells, collapse = ", ")))
        }
      } else {
        ident.1 <- WhichCells(object = object, idents = ident.1)
      }
      # if NULL for ident.2, use all other cells
      if (length(x = as.vector(x = ident.2)) > 1 &&
          any(as.character(x = ident.2) %in% cellnames.use)) {
        bad.cells <- cellnames.use[which(!as.character(x = ident.2) %in% cellnames.use)]
        if (length(x = bad.cells) > 0) {
          stop(paste0("The following cell names provided to ident.2 are not present in the object: ", paste(bad.cells, collapse = ", ")))
        }
      } else {
        if (is.null(x = ident.2)) {
          ident.2 <- setdiff(x = cellnames.use, y = ident.1)
        } else {
          ident.2 <- WhichCells(object = object, idents = ident.2)
        }
      }
      return(list(cells.1 = ident.1, cells.2 = ident.2))
    }
    
    

    对于 FindMarkers 若 ident.2 = NULL ,那么 IdentsToCells() 函数将会选择除 ident.1 类群以外的剩下所有细胞

    # select which data to use
    assay <- assay %||% DefaultAssay(object = sce)
    data.use <- object[[assay]]
    # cellnames.use 代表所有的细胞
    cellnames.use <-  colnames(x = data.use)
    
     # if NULL for ident.2, use all other cells
     if (is.null(x = ident.2)) {
          ## 选择除 ident.1 类群以外的其他所有细胞,setdiff 为取差集
          ident.2 <- setdiff(x = cellnames.use, y = ident.1)
     } else {
          ident.2 <- WhichCells(object = object, idents = ident.2)
       }
    }
    

    因此,对于 FindMarkers 若 ident.2 = NULL ,计算的是 ident.1 细胞类群与除 ident.1 类群以外的剩下所有细胞相比的差异基因;如果 ident.2 = 某一细胞类群 那么计算的是 ident.1 细胞类群与ident.2 类群相比的差异基因

    FindConservedMarkers

    # 安装必要的包
    BiocManager::install('multtest')
    install.packages('metap')
    install.packages('sn')
    library(Seurat)
    
    # 加载示例数据
    data("pbmc_small")
    # 假设所有细胞一共分为 'g1' 和 'g2' 两种细胞类群
    pbmc_small[['groups']] <- sample(x = c('g1', 'g2'), size = ncol(x = pbmc_small), replace = TRUE)
    FindConservedMarkers(pbmc_small, ident.1 = 0, ident.2 = 1, grouping.var = "groups")
    
    pbmc_small[['groups']]
    > pbmc_small[['groups']]
                   groups
    ATGCCAGAACGACT     g1
    CATGGCCTGTGCAT     g2
    GAACCTGATGAACC     g1
    TGACTGGATTCTCA     g1
    AGTCAGACTGCACA     g2
    TCTGATACACGTGT     g1
    TGGTATCTAAACAG     g1
    GCAGCTCTGTTTCT     g1
    GATATAACACGCAT     g2
    AATGTTGACAGTCA     g2
    
    
    ### FindConservedMarkers 源代码分析
    object = pbmc_small
    ident.1 = 0
    ident.2 = 1
    grouping.var = "groups"
    assay = 'RNA'
    slot = 'data'
    min.cells.group = 3
    meta.method = metap::minimump
    verbose = TRUE
    
    # 假设有两个分组, 分别为 'g1' 和 'g2' 
    # ident.1 和 ident.2 为每一组('g1' 和 'g2') 的细胞分群, ident.1 = 0, ident.2 = 1
    # 提取每个细胞对应分组的信息,即每个细胞分为 'g1' 还是 'g2'
    object.var <- FetchData(object = object, vars = grouping.var)
    # 提取 'g1' 和 'g2' 的细胞
    object <- SetIdent(
      object = object,
      cells = colnames(x = object),
      value = paste(Idents(object = object), object.var[, 1], sep = "_")
    )
    levels.split <- names(x = sort(x = table(object.var[, 1])))
    num.groups <- length(levels.split)
    cells <- list()
    
    # 分别将 'g1' 和 'g2' 的细胞单独取出来,  cells[[1]] 为 'g1' 的细胞, cells[[2]] 为 'g2' 的细胞
    for (i in 1:num.groups) {
      cells[[i]] <- rownames(
        x = object.var[object.var[, 1] == levels.split[i], , drop = FALSE]
      )
    }
    marker.test <- list()
    # do marker tests
    ident.2.save <- ident.2
    # 分别遍历 'g1' 和 'g2' 
    for (i in 1:num.groups) {
      level.use <- levels.split[i]
      ident.use.1 <- paste(ident.1, level.use, sep = "_")
      ident.use.1.exists <- ident.use.1 %in% Idents(object = object)
    
      ident.2 <- ident.2.save
      cells.1 <- WhichCells(object = object, idents = ident.use.1)
      ident.use.2 <- paste(ident.2, level.use, sep = "_")
      ident.use.2.exists <- ident.use.2 %in% Idents(object = object)
      
      ## ident.use.1 = "0_g1" or "0_g2"; ident.use.2 = "1_g1" or "1_g2"
      ## 分别对 'g1' 计算类群 0 与类群 1 的差异基因; 对 'g2' 计算类群 0 与类群 1 的差异基因
      marker.test[[i]] <- FindMarkers(
        object = object,
        assay = assay,
        slot = slot,
        ident.1 = ident.use.1,
        ident.2 = ident.use.2,
        verbose = verbose
      )
      names(x = marker.test)[i] <- levels.split[i]
    }
    ## 过滤 NULL 值
    marker.test <- Filter(f = Negate(f = is.null), x = marker.test)
    ## 对 'g1' 中类群 0 与类群 1 差异基因item和 'g2' 中类群 0 与类群 1 差异基因item取交集
    genes.conserved <- Reduce(
      f = intersect,
      x = lapply(
        X = marker.test,
        FUN = function(x) {
          return(rownames(x = x))
        }
      )
    )
    
    ## 将剩下的 logFC, p_val 整理进去
    markers.conserved <- list()
    for (i in 1:length(x = marker.test)) {
      markers.conserved[[i]] <- marker.test[[i]][genes.conserved, ]
      colnames(x = markers.conserved[[i]]) <- paste(
        names(x = marker.test)[i],
        colnames(x = markers.conserved[[i]]),
        sep = "_"
      )
    }
    
    ## 合并对 'g1' 中类群 0 与类群 1 差异基因item和 'g2' 中类群 0 与类群 1 差异基因item取交集后的表格
    ## 包括 logFC, p_val
    markers.combined <- Reduce(cbind, markers.conserved)
    pval.codes <- colnames(x = markers.combined)[grepl(pattern = "*_p_val$", x = colnames(x = markers.combined))]
    if (length(x = pval.codes) > 1) {
      markers.combined$max_pval <- apply(
        X = markers.combined[, pval.codes, drop = FALSE],
        MARGIN = 1,
        FUN = max
      )
      combined.pval <- data.frame(cp = apply(
        X = markers.combined[, pval.codes, drop = FALSE],
        MARGIN = 1,
        FUN = function(x) {
          return(meta.method(x)$p)
        }
      ))
      meta.method.name <- as.character(x = formals()$meta.method)
      colnames(x = combined.pval) <- paste0(meta.method.name, "_p_val")
      markers.combined <- cbind(markers.combined, combined.pval)
      markers.combined <- markers.combined[order(markers.combined[, paste0(meta.method.name, "_p_val")]), ]
    }
    

    所谓的 FindConservedMarkers 其实是在单细胞分析中,有两个处理组 'g1' 和 'g2' ,而 'g1' 和 'g2' 组中又恰好有相同的细胞亚群 0 和 1,求在 'g1' 中细胞亚群 0 和细胞亚群 1 的差异基因,并且与在 'g2' 中细胞亚群 0 和细胞亚群 1 的差异基因取交集;这部分基因定义为 在 'g1' 和 'g2' 组中,细胞亚群 0 和细胞亚群 1 差异保守的基因

    相关文章

      网友评论

        本文标题:FindAllMarkers, FindMarkers 以及 F

        本文链接:https://www.haomeiwen.com/subject/nwyljdtx.html