美文网首页
Monocle2: differentialGeneTest()

Monocle2: differentialGeneTest()

作者: LET149 | 来源:发表于2023-08-24 09:49 被阅读0次

Function and Manual

image.png

https://github.com/cole-trapnell-lab/monocle-release/blob/master/R/differential_expression.R

#' Test genes for differential expression
#' 
#' Tests each gene for differential expression as a function of pseudotime 
#' or according to other covariates as specified. \code{differentialGeneTest} is
#' Monocle's main differential analysis routine. 
#' It accepts a CellDataSet and two model formulae as input, which specify generalized
#' lineage models as implemented by the \code{VGAM} package. 
#' 
#' @param cds a CellDataSet object upon which to perform this operation
#' @param fullModelFormulaStr a formula string specifying the full model in differential expression tests (i.e. likelihood ratio tests) for each gene/feature.
#' @param reducedModelFormulaStr a formula string specifying the reduced model in differential expression tests (i.e. likelihood ratio tests) for each gene/feature.
#' @param relative_expr Whether to transform expression into relative values.
#' @param cores the number of cores to be used while testing each gene for differential expression.
#' @param verbose Whether to show VGAM errors and warnings. Only valid for cores = 1. 
#' @return a data frame containing the p values and q-values from the likelihood ratio tests on the parallel arrays of models.
#' @importFrom Biobase fData
#' @importFrom stats p.adjust
#' @seealso \code{\link[VGAM]{vglm}}
#' @export
differentialGeneTest <- function(cds, 
                                 fullModelFormulaStr="~sm.ns(Pseudotime, df=3)",
                                 reducedModelFormulaStr="~1", 
                                 relative_expr=TRUE,
                                 cores=1, 
                                 verbose=FALSE
){
    status <- NA
    
    if(class(cds)[1] != "CellDataSet") {
        stop("Error cds is not of type 'CellDataSet'")
    }
    
    all_vars <- c(all.vars(formula(fullModelFormulaStr)), all.vars(formula(reducedModelFormulaStr)))
    
    pd <- pData(cds)
    
    for(i in all_vars) {
        x <- pd[, i]
        if(any((c(Inf, NaN, NA) %in% x))){
            stop("Error: Inf, NaN, or NA values were located in pData of cds in columns mentioned in model terms")
        }
    }
    
    
    if (relative_expr && cds@expressionFamily@vfamily %in% c("negbinomial", "negbinomial.size")){
        if (is.null(sizeFactors(cds)) || sum(is.na(sizeFactors(cds)))){
            stop("Error: to call this function with relative_expr==TRUE, you must first call estimateSizeFactors() on the CellDataSet.")
        }
    }
    
    if (cores > 1){
        diff_test_res<-mcesApply(cds, 1, diff_test_helper, 
                                 c("BiocGenerics", "VGAM", "Matrix"), 
                                 cores=cores, 
                                 fullModelFormulaStr=fullModelFormulaStr,
                                 reducedModelFormulaStr=reducedModelFormulaStr,
                                 expressionFamily=cds@expressionFamily,
                                 relative_expr=relative_expr,
                                 disp_func=cds@dispFitInfo[["blind"]]$disp_func,
                                 verbose=verbose
                                 #       ,
                                 # backup_method = backup_method, 
                                 # use_epislon = use_epislon, 
                                 # stepsize = stepsize
        )
        diff_test_res
    }else{
        diff_test_res<-smartEsApply(cds,1,diff_test_helper, 
                                    convert_to_dense=TRUE,
                                    fullModelFormulaStr=fullModelFormulaStr,
                                    reducedModelFormulaStr=reducedModelFormulaStr, 
                                    expressionFamily=cds@expressionFamily, 
                                    relative_expr=relative_expr,
                                    disp_func=cds@dispFitInfo[["blind"]]$disp_func,
                                    verbose=verbose
                                    #          ,
                                    # backup_method = backup_method, 
                                    # use_epislon = use_epislon,
                                    # stepsize = stepsize
                                    
        )
        diff_test_res
    }
    
    diff_test_res <- do.call(rbind.data.frame, diff_test_res)
    
    diff_test_res$qval <- 1
    diff_test_res$qval[which(diff_test_res$status == 'OK')] <- p.adjust(subset(diff_test_res, status == 'OK')[, 'pval'], method="BH")
    
    diff_test_res <- merge(diff_test_res, fData(cds), by="row.names")
    row.names(diff_test_res) <- diff_test_res[, 1] #remove the first column and set the row names to the first column
    diff_test_res[, 1] <- NULL 
    
    diff_test_res[row.names(cds), ] # make sure gene name ordering in the DEG test result is the same as the CDS
}



所有需要的函数和参数

截屏2023-03-09 21.39.17.png
#' @importFrom stats var
calculate_NB_dispersion_hint <- function(disp_func, f_expression, expr_selection_func=mean)
{
    expr_hint <- expr_selection_func(f_expression)
    if (expr_hint > 0 && is.null(expr_hint) == FALSE) {
        disp_guess_fit <- disp_func(expr_hint)
        
        # For NB: Var(Y)=mu*(1+mu/k)
        f_expression_var <- var(f_expression)
        f_expression_mean <- mean(f_expression)
        
        disp_guess_meth_moments <- f_expression_var - f_expression_mean
        disp_guess_meth_moments <- disp_guess_meth_moments / (f_expression_mean^2) #fix the calculation of k
        
        #return (max(disp_guess_fit, disp_guess_meth_moments))
        return (disp_guess_fit)
    }
    return (NULL)
}


#--------------------------------------------------------#
#--------------------------------------------------------#
# differential_expression.R

#' @title Helper function for parallel differential expression testing
#' @param x test
#' @param fullModelFormulaStr a formula string specifying the full model in differential expression tests (i.e. likelihood ratio tests) for each gene/feature.
#' @param reducedModelFormulaStr a formula string specifying the reduced model in differential expression tests (i.e. likelihood ratio tests) for each gene/feature.
#' @param expressionFamily specifies the VGAM family function used for expression responses
#' @param relative_expr Whether to transform expression into relative values
#' @param weights test
#' @param disp_func test
#' @param verbose Whether to show VGAM errors and warnings. Only valid for cores = 1.
#' @name diff_test_helper
#' @description test
diff_test_helper <- function(x, 
                             fullModelFormulaStr, 
                             reducedModelFormulaStr, 
                             expressionFamily, 
                             relative_expr,
                             weights,
                             disp_func=NULL,
                             verbose=FALSE
){ 
    
    reducedModelFormulaStr <- paste("f_expression", reducedModelFormulaStr, sep="")
    fullModelFormulaStr <- paste("f_expression", fullModelFormulaStr, sep="")
    
    x_orig <- x
    disp_guess <- 0
    
    if (expressionFamily@vfamily %in% c("negbinomial", "negbinomial.size")){
        if (relative_expr == TRUE)
        {
            x <- x / Size_Factor
        }
        f_expression <- round(x)
        if (is.null(disp_func) == FALSE){
            disp_guess <- calculate_NB_dispersion_hint(disp_func, round(x_orig))
            if (is.null(disp_guess) == FALSE && disp_guess > 0 && is.na(disp_guess) == FALSE  ) {
                # FIXME: In theory, we could lose some user-provided parameters here
                # e.g. if users supply zero=NULL or something. 
                if (expressionFamily@vfamily == "negbinomial")
                    expressionFamily <- negbinomial(isize=1/disp_guess)
                else
                    expressionFamily <- negbinomial.size(size=1/disp_guess)
            }
        }
    }else if (expressionFamily@vfamily %in% c("uninormal")){
        f_expression <- x
    }else if (expressionFamily@vfamily %in% c("binomialff")){
        f_expression <- x
        #f_expression[f_expression > 1] <- 1
    }else{
        f_expression <- log10(x)
    }
    
    test_res <- tryCatch({
        if (expressionFamily@vfamily %in% c("binomialff")){
            if (verbose){
                full_model_fit <- VGAM::vglm(as.formula(fullModelFormulaStr), epsilon=1e-1, family=expressionFamily)
                reduced_model_fit <- VGAM::vglm(as.formula(reducedModelFormulaStr), epsilon=1e-1, family=expressionFamily)                         
            }else{
                full_model_fit <- suppressWarnings(VGAM::vglm(as.formula(fullModelFormulaStr), epsilon=1e-1, family=expressionFamily))
                reduced_model_fit <- suppressWarnings(VGAM::vglm(as.formula(reducedModelFormulaStr), epsilon=1e-1, family=expressionFamily))                    
            }
        }else{
            if (verbose){
                full_model_fit <- VGAM::vglm(as.formula(fullModelFormulaStr), epsilon=1e-1, family=expressionFamily)
                reduced_model_fit <- VGAM::vglm(as.formula(reducedModelFormulaStr), epsilon=1e-1, family=expressionFamily)                         
            }else{
                full_model_fit <- suppressWarnings(VGAM::vglm(as.formula(fullModelFormulaStr), epsilon=1e-1, family=expressionFamily))
                reduced_model_fit <- suppressWarnings(VGAM::vglm(as.formula(reducedModelFormulaStr), epsilon=1e-1, family=expressionFamily))                    
            }
        }
        
        #print(full_model_fit)
        #print(coef(reduced_model_fit))
        compareModels(list(full_model_fit), list(reduced_model_fit))
    }, 
    #warning = function(w) { FM_fit },
    error = function(e) { 
        if(verbose)
            print (e);
        data.frame(status = "FAIL", family=expressionFamily@vfamily, pval=1.0, qval=1.0)
        #data.frame(status = "FAIL", pval=1.0) 
    }
    )
    test_res
}

#--------------------------------------------------------#
#--------------------------------------------------------#
smartEsApply <- function(X, MARGIN, FUN, convert_to_dense, ...) {
    parent <- environment(FUN)
    if (is.null(parent))
        parent <- emptyenv()
    e1 <- new.env(parent=parent)
    multiassign(names(pData(X)), pData(X), envir=e1)
    environment(FUN) <- e1
    
    if (isSparseMatrix(exprs(X))){
        res <- sparseApply(exprs(X), MARGIN, FUN, convert_to_dense, ...)
    }else{
        res <- apply(exprs(X), MARGIN, FUN, ...)
    }
    
    if (MARGIN == 1)
    {
        names(res) <- row.names(X)
    }else{
        names(res) <- colnames(X)
    }
    
    res
}

#--------------------------------------------------------#
#--------------------------------------------------------#
#' Test genes for differential expression
#' 
#' Tests each gene for differential expression as a function of pseudotime 
#' or according to other covariates as specified. \code{differentialGeneTest} is
#' Monocle's main differential analysis routine. 
#' It accepts a CellDataSet and two model formulae as input, which specify generalized
#' lineage models as implemented by the \code{VGAM} package. 
#' 
#' @param cds a CellDataSet object upon which to perform this operation
#' @param fullModelFormulaStr a formula string specifying the full model in differential expression tests (i.e. likelihood ratio tests) for each gene/feature.
#' @param reducedModelFormulaStr a formula string specifying the reduced model in differential expression tests (i.e. likelihood ratio tests) for each gene/feature.
#' @param relative_expr Whether to transform expression into relative values.
#' @param cores the number of cores to be used while testing each gene for differential expression.
#' @param verbose Whether to show VGAM errors and warnings. Only valid for cores = 1. 
#' @return a data frame containing the p values and q-values from the likelihood ratio tests on the parallel arrays of models.
#' @importFrom Biobase fData
#' @importFrom stats p.adjust
#' @seealso \code{\link[VGAM]{vglm}}
#' @export
differentialGeneTest <- function(cds, 
                                 fullModelFormulaStr="~sm.ns(Pseudotime, df=3)",
                                 reducedModelFormulaStr="~1", 
                                 relative_expr=TRUE,
                                 cores=1, 
                                 verbose=FALSE
){
    status <- NA
    
    if(class(cds)[1] != "CellDataSet") {
        stop("Error cds is not of type 'CellDataSet'")
    }
    
    all_vars <- c(all.vars(formula(fullModelFormulaStr)), all.vars(formula(reducedModelFormulaStr)))
    
    pd <- pData(cds)
    
    for(i in all_vars) {
        x <- pd[, i]
        if(any((c(Inf, NaN, NA) %in% x))){
            stop("Error: Inf, NaN, or NA values were located in pData of cds in columns mentioned in model terms")
        }
    }
    
    
    if (relative_expr && cds@expressionFamily@vfamily %in% c("negbinomial", "negbinomial.size")){
        if (is.null(sizeFactors(cds)) || sum(is.na(sizeFactors(cds)))){
            stop("Error: to call this function with relative_expr==TRUE, you must first call estimateSizeFactors() on the CellDataSet.")
        }
    }
    
    if (cores > 1){
        diff_test_res<-mcesApply(cds, 1, diff_test_helper, 
                                 c("BiocGenerics", "VGAM", "Matrix"), 
                                 cores=cores, 
                                 fullModelFormulaStr=fullModelFormulaStr,
                                 reducedModelFormulaStr=reducedModelFormulaStr,
                                 expressionFamily=cds@expressionFamily,
                                 relative_expr=relative_expr,
                                 disp_func=cds@dispFitInfo[["blind"]]$disp_func,
                                 verbose=verbose
                                 #       ,
                                 # backup_method = backup_method, 
                                 # use_epislon = use_epislon, 
                                 # stepsize = stepsize
        )
        diff_test_res
    }else{
        diff_test_res<-smartEsApply(cds,1,diff_test_helper, 
                                    convert_to_dense=TRUE,
                                    fullModelFormulaStr=fullModelFormulaStr,
                                    reducedModelFormulaStr=reducedModelFormulaStr, 
                                    expressionFamily=cds@expressionFamily, 
                                    relative_expr=relative_expr,
                                    disp_func=cds@dispFitInfo[["blind"]]$disp_func,
                                    verbose=verbose
                                    #          ,
                                    # backup_method = backup_method, 
                                    # use_epislon = use_epislon,
                                    # stepsize = stepsize
                                    
        )
        diff_test_res
    }
    
    diff_test_res <- do.call(rbind.data.frame, diff_test_res)
    
    diff_test_res$qval <- 1
    diff_test_res$qval[which(diff_test_res$status == 'OK')] <- p.adjust(subset(diff_test_res, status == 'OK')[, 'pval'], method="BH")
    
    diff_test_res <- merge(diff_test_res, fData(cds), by="row.names")
    row.names(diff_test_res) <- diff_test_res[, 1] #remove the first column and set the row names to the first column
    diff_test_res[, 1] <- NULL 
    
    diff_test_res[row.names(cds), ] # make sure gene name ordering in the DEG test result is the same as the CDS
}



核心R包:

VGAM: Vector Generalized Additive Models (向量广义可加模型)

VGAM:sm.ns()

拟合曲线:

VGAM:vglm(): 用来拟合 向量广义线性模型 (vglm)

vglm: Vector Generalized Linear Models (向量广义线性模型)

https://rdrr.io/cran/VGAM/man/vglm.html

假设检验:

VGAM::lrtest()

相关文章

网友评论

      本文标题:Monocle2: differentialGeneTest()

      本文链接:https://www.haomeiwen.com/subject/rahlldtx.html