Function and Manual
image.png
https://github.com/cole-trapnell-lab/monocle-release/blob/master/R/differential_expression.R
#' Test genes for differential expression
#'
#' Tests each gene for differential expression as a function of pseudotime
#' or according to other covariates as specified. \code{differentialGeneTest} is
#' Monocle's main differential analysis routine.
#' It accepts a CellDataSet and two model formulae as input, which specify generalized
#' lineage models as implemented by the \code{VGAM} package.
#'
#' @param cds a CellDataSet object upon which to perform this operation
#' @param fullModelFormulaStr a formula string specifying the full model in differential expression tests (i.e. likelihood ratio tests) for each gene/feature.
#' @param reducedModelFormulaStr a formula string specifying the reduced model in differential expression tests (i.e. likelihood ratio tests) for each gene/feature.
#' @param relative_expr Whether to transform expression into relative values.
#' @param cores the number of cores to be used while testing each gene for differential expression.
#' @param verbose Whether to show VGAM errors and warnings. Only valid for cores = 1.
#' @return a data frame containing the p values and q-values from the likelihood ratio tests on the parallel arrays of models.
#' @importFrom Biobase fData
#' @importFrom stats p.adjust
#' @seealso \code{\link[VGAM]{vglm}}
#' @export
differentialGeneTest <- function(cds,
fullModelFormulaStr="~sm.ns(Pseudotime, df=3)",
reducedModelFormulaStr="~1",
relative_expr=TRUE,
cores=1,
verbose=FALSE
){
status <- NA
if(class(cds)[1] != "CellDataSet") {
stop("Error cds is not of type 'CellDataSet'")
}
all_vars <- c(all.vars(formula(fullModelFormulaStr)), all.vars(formula(reducedModelFormulaStr)))
pd <- pData(cds)
for(i in all_vars) {
x <- pd[, i]
if(any((c(Inf, NaN, NA) %in% x))){
stop("Error: Inf, NaN, or NA values were located in pData of cds in columns mentioned in model terms")
}
}
if (relative_expr && cds@expressionFamily@vfamily %in% c("negbinomial", "negbinomial.size")){
if (is.null(sizeFactors(cds)) || sum(is.na(sizeFactors(cds)))){
stop("Error: to call this function with relative_expr==TRUE, you must first call estimateSizeFactors() on the CellDataSet.")
}
}
if (cores > 1){
diff_test_res<-mcesApply(cds, 1, diff_test_helper,
c("BiocGenerics", "VGAM", "Matrix"),
cores=cores,
fullModelFormulaStr=fullModelFormulaStr,
reducedModelFormulaStr=reducedModelFormulaStr,
expressionFamily=cds@expressionFamily,
relative_expr=relative_expr,
disp_func=cds@dispFitInfo[["blind"]]$disp_func,
verbose=verbose
# ,
# backup_method = backup_method,
# use_epislon = use_epislon,
# stepsize = stepsize
)
diff_test_res
}else{
diff_test_res<-smartEsApply(cds,1,diff_test_helper,
convert_to_dense=TRUE,
fullModelFormulaStr=fullModelFormulaStr,
reducedModelFormulaStr=reducedModelFormulaStr,
expressionFamily=cds@expressionFamily,
relative_expr=relative_expr,
disp_func=cds@dispFitInfo[["blind"]]$disp_func,
verbose=verbose
# ,
# backup_method = backup_method,
# use_epislon = use_epislon,
# stepsize = stepsize
)
diff_test_res
}
diff_test_res <- do.call(rbind.data.frame, diff_test_res)
diff_test_res$qval <- 1
diff_test_res$qval[which(diff_test_res$status == 'OK')] <- p.adjust(subset(diff_test_res, status == 'OK')[, 'pval'], method="BH")
diff_test_res <- merge(diff_test_res, fData(cds), by="row.names")
row.names(diff_test_res) <- diff_test_res[, 1] #remove the first column and set the row names to the first column
diff_test_res[, 1] <- NULL
diff_test_res[row.names(cds), ] # make sure gene name ordering in the DEG test result is the same as the CDS
}
所有需要的函数和参数
截屏2023-03-09 21.39.17.png
#' @importFrom stats var
calculate_NB_dispersion_hint <- function(disp_func, f_expression, expr_selection_func=mean)
{
expr_hint <- expr_selection_func(f_expression)
if (expr_hint > 0 && is.null(expr_hint) == FALSE) {
disp_guess_fit <- disp_func(expr_hint)
# For NB: Var(Y)=mu*(1+mu/k)
f_expression_var <- var(f_expression)
f_expression_mean <- mean(f_expression)
disp_guess_meth_moments <- f_expression_var - f_expression_mean
disp_guess_meth_moments <- disp_guess_meth_moments / (f_expression_mean^2) #fix the calculation of k
#return (max(disp_guess_fit, disp_guess_meth_moments))
return (disp_guess_fit)
}
return (NULL)
}
#--------------------------------------------------------#
#--------------------------------------------------------#
# differential_expression.R
#' @title Helper function for parallel differential expression testing
#' @param x test
#' @param fullModelFormulaStr a formula string specifying the full model in differential expression tests (i.e. likelihood ratio tests) for each gene/feature.
#' @param reducedModelFormulaStr a formula string specifying the reduced model in differential expression tests (i.e. likelihood ratio tests) for each gene/feature.
#' @param expressionFamily specifies the VGAM family function used for expression responses
#' @param relative_expr Whether to transform expression into relative values
#' @param weights test
#' @param disp_func test
#' @param verbose Whether to show VGAM errors and warnings. Only valid for cores = 1.
#' @name diff_test_helper
#' @description test
diff_test_helper <- function(x,
fullModelFormulaStr,
reducedModelFormulaStr,
expressionFamily,
relative_expr,
weights,
disp_func=NULL,
verbose=FALSE
){
reducedModelFormulaStr <- paste("f_expression", reducedModelFormulaStr, sep="")
fullModelFormulaStr <- paste("f_expression", fullModelFormulaStr, sep="")
x_orig <- x
disp_guess <- 0
if (expressionFamily@vfamily %in% c("negbinomial", "negbinomial.size")){
if (relative_expr == TRUE)
{
x <- x / Size_Factor
}
f_expression <- round(x)
if (is.null(disp_func) == FALSE){
disp_guess <- calculate_NB_dispersion_hint(disp_func, round(x_orig))
if (is.null(disp_guess) == FALSE && disp_guess > 0 && is.na(disp_guess) == FALSE ) {
# FIXME: In theory, we could lose some user-provided parameters here
# e.g. if users supply zero=NULL or something.
if (expressionFamily@vfamily == "negbinomial")
expressionFamily <- negbinomial(isize=1/disp_guess)
else
expressionFamily <- negbinomial.size(size=1/disp_guess)
}
}
}else if (expressionFamily@vfamily %in% c("uninormal")){
f_expression <- x
}else if (expressionFamily@vfamily %in% c("binomialff")){
f_expression <- x
#f_expression[f_expression > 1] <- 1
}else{
f_expression <- log10(x)
}
test_res <- tryCatch({
if (expressionFamily@vfamily %in% c("binomialff")){
if (verbose){
full_model_fit <- VGAM::vglm(as.formula(fullModelFormulaStr), epsilon=1e-1, family=expressionFamily)
reduced_model_fit <- VGAM::vglm(as.formula(reducedModelFormulaStr), epsilon=1e-1, family=expressionFamily)
}else{
full_model_fit <- suppressWarnings(VGAM::vglm(as.formula(fullModelFormulaStr), epsilon=1e-1, family=expressionFamily))
reduced_model_fit <- suppressWarnings(VGAM::vglm(as.formula(reducedModelFormulaStr), epsilon=1e-1, family=expressionFamily))
}
}else{
if (verbose){
full_model_fit <- VGAM::vglm(as.formula(fullModelFormulaStr), epsilon=1e-1, family=expressionFamily)
reduced_model_fit <- VGAM::vglm(as.formula(reducedModelFormulaStr), epsilon=1e-1, family=expressionFamily)
}else{
full_model_fit <- suppressWarnings(VGAM::vglm(as.formula(fullModelFormulaStr), epsilon=1e-1, family=expressionFamily))
reduced_model_fit <- suppressWarnings(VGAM::vglm(as.formula(reducedModelFormulaStr), epsilon=1e-1, family=expressionFamily))
}
}
#print(full_model_fit)
#print(coef(reduced_model_fit))
compareModels(list(full_model_fit), list(reduced_model_fit))
},
#warning = function(w) { FM_fit },
error = function(e) {
if(verbose)
print (e);
data.frame(status = "FAIL", family=expressionFamily@vfamily, pval=1.0, qval=1.0)
#data.frame(status = "FAIL", pval=1.0)
}
)
test_res
}
#--------------------------------------------------------#
#--------------------------------------------------------#
smartEsApply <- function(X, MARGIN, FUN, convert_to_dense, ...) {
parent <- environment(FUN)
if (is.null(parent))
parent <- emptyenv()
e1 <- new.env(parent=parent)
multiassign(names(pData(X)), pData(X), envir=e1)
environment(FUN) <- e1
if (isSparseMatrix(exprs(X))){
res <- sparseApply(exprs(X), MARGIN, FUN, convert_to_dense, ...)
}else{
res <- apply(exprs(X), MARGIN, FUN, ...)
}
if (MARGIN == 1)
{
names(res) <- row.names(X)
}else{
names(res) <- colnames(X)
}
res
}
#--------------------------------------------------------#
#--------------------------------------------------------#
#' Test genes for differential expression
#'
#' Tests each gene for differential expression as a function of pseudotime
#' or according to other covariates as specified. \code{differentialGeneTest} is
#' Monocle's main differential analysis routine.
#' It accepts a CellDataSet and two model formulae as input, which specify generalized
#' lineage models as implemented by the \code{VGAM} package.
#'
#' @param cds a CellDataSet object upon which to perform this operation
#' @param fullModelFormulaStr a formula string specifying the full model in differential expression tests (i.e. likelihood ratio tests) for each gene/feature.
#' @param reducedModelFormulaStr a formula string specifying the reduced model in differential expression tests (i.e. likelihood ratio tests) for each gene/feature.
#' @param relative_expr Whether to transform expression into relative values.
#' @param cores the number of cores to be used while testing each gene for differential expression.
#' @param verbose Whether to show VGAM errors and warnings. Only valid for cores = 1.
#' @return a data frame containing the p values and q-values from the likelihood ratio tests on the parallel arrays of models.
#' @importFrom Biobase fData
#' @importFrom stats p.adjust
#' @seealso \code{\link[VGAM]{vglm}}
#' @export
differentialGeneTest <- function(cds,
fullModelFormulaStr="~sm.ns(Pseudotime, df=3)",
reducedModelFormulaStr="~1",
relative_expr=TRUE,
cores=1,
verbose=FALSE
){
status <- NA
if(class(cds)[1] != "CellDataSet") {
stop("Error cds is not of type 'CellDataSet'")
}
all_vars <- c(all.vars(formula(fullModelFormulaStr)), all.vars(formula(reducedModelFormulaStr)))
pd <- pData(cds)
for(i in all_vars) {
x <- pd[, i]
if(any((c(Inf, NaN, NA) %in% x))){
stop("Error: Inf, NaN, or NA values were located in pData of cds in columns mentioned in model terms")
}
}
if (relative_expr && cds@expressionFamily@vfamily %in% c("negbinomial", "negbinomial.size")){
if (is.null(sizeFactors(cds)) || sum(is.na(sizeFactors(cds)))){
stop("Error: to call this function with relative_expr==TRUE, you must first call estimateSizeFactors() on the CellDataSet.")
}
}
if (cores > 1){
diff_test_res<-mcesApply(cds, 1, diff_test_helper,
c("BiocGenerics", "VGAM", "Matrix"),
cores=cores,
fullModelFormulaStr=fullModelFormulaStr,
reducedModelFormulaStr=reducedModelFormulaStr,
expressionFamily=cds@expressionFamily,
relative_expr=relative_expr,
disp_func=cds@dispFitInfo[["blind"]]$disp_func,
verbose=verbose
# ,
# backup_method = backup_method,
# use_epislon = use_epislon,
# stepsize = stepsize
)
diff_test_res
}else{
diff_test_res<-smartEsApply(cds,1,diff_test_helper,
convert_to_dense=TRUE,
fullModelFormulaStr=fullModelFormulaStr,
reducedModelFormulaStr=reducedModelFormulaStr,
expressionFamily=cds@expressionFamily,
relative_expr=relative_expr,
disp_func=cds@dispFitInfo[["blind"]]$disp_func,
verbose=verbose
# ,
# backup_method = backup_method,
# use_epislon = use_epislon,
# stepsize = stepsize
)
diff_test_res
}
diff_test_res <- do.call(rbind.data.frame, diff_test_res)
diff_test_res$qval <- 1
diff_test_res$qval[which(diff_test_res$status == 'OK')] <- p.adjust(subset(diff_test_res, status == 'OK')[, 'pval'], method="BH")
diff_test_res <- merge(diff_test_res, fData(cds), by="row.names")
row.names(diff_test_res) <- diff_test_res[, 1] #remove the first column and set the row names to the first column
diff_test_res[, 1] <- NULL
diff_test_res[row.names(cds), ] # make sure gene name ordering in the DEG test result is the same as the CDS
}
核心R包:
VGAM:
Vector Generalized Additive Models (向量广义可加模型)
VGAM:sm.ns()
拟合曲线:
VGAM:vglm()
: 用来拟合 向量广义线性模型 (vglm)vglm:
Vector Generalized Linear Models (向量广义线性模型)
假设检验:
VGAM::lrtest()
网友评论