From 071e2bbf9a97f960a3ac51ca45c883db540b2bae Mon Sep 17 00:00:00 2001 From: Jack Leary Date: Sun, 13 Oct 2024 13:30:30 -0400 Subject: [PATCH] sped up matrix (pseudo)inversion throughout -- related to #241 and #246 --- DESCRIPTION | 1 + NAMESPACE | 2 -- R/RcppExports.R | 4 ++++ R/biasCorrectGEE.R | 12 ++++++------ R/marge2.R | 2 +- R/plotModels.R | 2 ++ R/score_fun_gee.R | 3 +-- R/score_fun_glm.R | 6 ++---- R/stat_out_score_gee_null.R | 11 +++++------ R/stat_out_score_glm_null.R | 12 +++++------- R/waldTestGEE.R | 6 ++++-- src/RcppExports.cpp | 18 ++++++++++++++++-- src/eigenMapInvert.cpp | 4 ++-- src/eigenMapMatMult.cpp | 8 ++++---- src/eigenMapPseudoInverse.cpp | 20 ++++++++++++++++++++ 15 files changed, 73 insertions(+), 38 deletions(-) create mode 100644 src/eigenMapPseudoInverse.cpp diff --git a/DESCRIPTION b/DESCRIPTION index bd1a9bd..1b793c5 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -58,6 +58,7 @@ Suggests: Seurat, bluster, cluster, + speedglm, rmarkdown, gridExtra, BiocStyle, diff --git a/NAMESPACE b/NAMESPACE index 3c85fd4..58ccf4a 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -39,9 +39,7 @@ importFrom(MASS,theta.mm) importFrom(Matrix,Matrix) importFrom(Matrix,bdiag) importFrom(Matrix,colSums) -importFrom(Matrix,diag) importFrom(Matrix,rowMeans) -importFrom(Matrix,solve) importFrom(Matrix,t) importFrom(Rcpp,sourceCpp) importFrom(RcppEigen,fastLmPure) diff --git a/R/RcppExports.R b/R/RcppExports.R index 55561f2..8208f2e 100644 --- a/R/RcppExports.R +++ b/R/RcppExports.R @@ -9,3 +9,7 @@ eigenMapMatMult <- function(A, B, n_cores = 1L) { .Call(`_scLANE_eigenMapMatMult`, A, B, n_cores) } +eigenMapPseudoInverse <- function(A, tolerance = 1e-6, n_cores = 1L) { + .Call(`_scLANE_eigenMapPseudoInverse`, A, tolerance, n_cores) +} + diff --git a/R/biasCorrectGEE.R b/R/biasCorrectGEE.R index 98eed44..665a814 100644 --- a/R/biasCorrectGEE.R +++ b/R/biasCorrectGEE.R @@ -5,7 +5,6 @@ #' @description This functions implements several bias-correction methods for the GEE sandwich variance-covariance matrix; they are to be used when the number of subjects is small or the numer of timepoints per-subject is very large. #' @importFrom stats fitted.values cor #' @importFrom dplyr with_groups summarise -#' @importFrom MASS ginv #' @importFrom Matrix bdiag #' @param fitted.model The fitted model of class \code{geem} returned by \code{\link{marge2}}. Defaults to NULL. #' @param correction.method A string specifying the correction method to be used. Currently supported options are "df" and "kc". Defaults to "kc". @@ -121,9 +120,10 @@ biasCorrectGEE <- function(fitted.model = NULL, } sigma2 <- fitted.model$phi Var_Yi <- sigma2 * R_i - Var_Yi_inv <- try({ - MASS::ginv(Var_Yi) - }, silent = TRUE) + Var_Yi_inv <- try({ eigenMapMatrixInvert(Var_Yi, n_cores = 1L) }, silent = TRUE) + if (inherits(Var_Yi_inv, "try-error")) { + Var_Yi_inv <- try({ eigenMapPseudoInverse(Var_Yi, n_cores = 1L) }, silent = TRUE) + } if (inherits(Var_Yi_inv, "try-error")) { if (verbose) { warning(paste0("Covariance matrix inversion failed for subject ", @@ -137,9 +137,9 @@ biasCorrectGEE <- function(fitted.model = NULL, W <- as.matrix(Matrix::bdiag(cov_matrices)) X <- fitted.model$X XWX <- t(X) %*% W %*% X - XWX_inv <- try({ eigenMapMatrixInvert(XWX, n_cores = 1) }, silent = TRUE) + XWX_inv <- try({ eigenMapMatrixInvert(XWX, n_cores = 1L) }, silent = TRUE) if (inherits(XWX_inv, "try-error")) { - XWX_inv <- MASS::ginv(XWX) + XWX_inv <- eigenMapPseudoInverse(XWX, n_cores = 1L) } H <- X %*% XWX_inv %*% t(X) %*% W tr_H <- sum(diag(H)) diff --git a/R/marge2.R b/R/marge2.R index 9e308d7..1e010f6 100644 --- a/R/marge2.R +++ b/R/marge2.R @@ -853,7 +853,7 @@ marge2 <- function(X_pred = NULL, data = model_df, family = MASS::negative.binomial(theta_hat, link = "log"), trace = FALSE, - model = TRUE, + model = FALSE, y = FALSE, fitted = TRUE) } diff --git a/R/plotModels.R b/R/plotModels.R index e20ef00..5658c8c 100644 --- a/R/plotModels.R +++ b/R/plotModels.R @@ -64,6 +64,8 @@ plotModels <- function(test.dyn.res = NULL, # check inputs if (is.null(expr.mat) || is.null(pt) || is.null(gene) || is.null(test.dyn.res)) { stop("You forgot one or more of the arguments to plotModels().") } if ((is.gee || is.glmm) && is.null(id.vec)) { stop("id.vec must be provided to plotModels() when running in GEE or GLMM mode.") } + cor.structure <- tolower(cor.structure) + if (is.gee && !(cor.structure %in% c("ar1", "independence", "exchangeable"))) { stop("GEE models require a specified correlation structure.") } # get raw counts from SingleCellExperiment, Seurat, or CellDataSets object if (inherits(expr.mat, "SingleCellExperiment")) { expr.mat <- BiocGenerics::counts(expr.mat) diff --git a/R/score_fun_gee.R b/R/score_fun_gee.R index 84a69cf..0c29751 100644 --- a/R/score_fun_gee.R +++ b/R/score_fun_gee.R @@ -5,7 +5,6 @@ #' @author David I. Warton #' @author Jack R. Leary #' @importFrom RcppEigen fastLmPure -#' @importFrom Matrix solve #' @importFrom MASS ginv #' @description Calculate the score statistic for a GEE model. #' @param Y The response variable. Defaults to NULL. @@ -98,7 +97,7 @@ score_fun_gee <- function(Y = NULL, B = J21_transpose, n_cores = 1) Sigma <- Sigma22 - temp_prod_1 - temp_prod_2 + temp_prod_3 - Sigma_inv <- try({ Matrix::solve(Sigma) }, silent = TRUE) + Sigma_inv <- try({ eigenMapMatrixInvert(Sigma, n_cores = 1L) }, silent = TRUE) if (inherits(Sigma_inv, "try-error")) { Sigma_inv <- MASS::ginv(Sigma) } diff --git a/R/score_fun_glm.R b/R/score_fun_glm.R index 96e20fa..36b6d1f 100644 --- a/R/score_fun_glm.R +++ b/R/score_fun_glm.R @@ -5,8 +5,6 @@ #' @author David I. Warton #' @author Jack R. Leary #' @importFrom RcppEigen fastLmPure -#' @importFrom Matrix solve -#' @importFrom MASS ginv #' @description Calculate the score statistic for a GLM model. #' @param Y The response variable. Defaults to NULL. #' @param VS.est_list A product of matrices. Defaults to NULL. @@ -54,9 +52,9 @@ score_fun_glm <- function(Y = NULL, B.est <- eigenMapMatMult(A = t(mu.est * VS.est_i), B = XA, n_cores = 1) - XVX_22 <- try({ Matrix::solve(inv.XVX_22) }, silent = TRUE) + XVX_22 <- try({ eigenMapMatrixInvert(inv.XVX_22, n_cores = 1L) }, silent = TRUE) if (inherits(XVX_22, "try-error")) { - XVX_22 <- MASS::ginv(inv.XVX_22) + XVX_22 <- eigenMapPseudoInverse(inv.XVX_22, n_cores = 1L) } temp_prod <- eigenMapMatMult(A = B.est, B = XVX_22, diff --git a/R/stat_out_score_gee_null.R b/R/stat_out_score_gee_null.R index 19f1802..f684d39 100644 --- a/R/stat_out_score_gee_null.R +++ b/R/stat_out_score_gee_null.R @@ -5,9 +5,8 @@ #' @author David I. Warton #' @author Jack R. Leary #' @importFrom geeM geem -#' @importFrom MASS negative.binomial ginv +#' @importFrom MASS negative.binomial #' @importFrom stats fitted.values -#' @importFrom Matrix solve #' @description A function that calculates parts of the score statistic for GEEs only (it is used for the full path for forward selection). #' @param Y The response variable Defaults to NULL. #' @param B_null The design matrix matrix under the null model Defaults to NULL. @@ -73,9 +72,9 @@ stat_out_score_gee_null <- function(Y = NULL, V_est_i <- eigenMapMatMult(A = temp_prod, B = diag_sqrt_V_est, n_cores = 1) - V_est_i_inv <- try({ Matrix::solve(V_est_i) }, silent = TRUE) + V_est_i_inv <- try({ eigenMapMatrixInvert(V_est_i, n_cores = 1L) }, silent = TRUE) if (inherits(V_est_i_inv, "try-error")) { - V_est_i_inv <- MASS::ginv(V_est_i) + V_est_i_inv <- eigenMapPseudoInverse(V_est_i, n_cores = 1L) } S_est_i <- t(Y)[temp_seq_n] - mu_est_sub temp_prod <- eigenMapMatMult(A = S_est_i, @@ -121,9 +120,9 @@ stat_out_score_gee_null <- function(Y = NULL, if (nrow(J11) == 1 && ncol(J11) == 1) { J11_inv <- 1 / J11 } else { - J11_inv <- try({ Matrix::solve(J11) }, silent = TRUE) + J11_inv <- try({ eigenMapMatrixInvert(J11, n_cores = 1L) }, silent = TRUE) if (inherits(J11_inv, "try-error")) { - J11_inv <- MASS::ginv(J11) + J11_inv <- eigenMapPseudoInverse(J11, n_cores = 1L) } } temp_prod <- eigenMapMatMult(A = J11_inv, diff --git a/R/stat_out_score_glm_null.R b/R/stat_out_score_glm_null.R index 25f19cd..0d30dc5 100644 --- a/R/stat_out_score_glm_null.R +++ b/R/stat_out_score_glm_null.R @@ -6,8 +6,6 @@ #' @author Jack R. Leary #' @importFrom gamlss gamlss #' @importFrom stats fitted.values -#' @importFrom Matrix diag t -#' @importFrom MASS ginv #' @description A function that calculates parts of the score statistic for GLMs only (it is used for the full path for forward selection). #' @param Y : The response variable Defaults to NULL. #' @param B_null : Design matrix under the null model. Defaults to NULL. @@ -29,8 +27,8 @@ stat_out_score_glm_null <- function(Y = NULL, B_null = NULL) { mu_est <- as.matrix(stats::fitted.values(ests)) V_est <- mu_est * (1 + mu_est * sigma_est) # Type I NB variance = mu (1 + mu * sigma); sigma = 1 / theta VS_est_list <- (Y - mu_est) / V_est - mu_V_diag <- Matrix::diag(c(mu_est^2 / V_est)) - temp_prod <- eigenMapMatMult(A = Matrix::t(B_null), + mu_V_diag <- diag(c(mu_est^2 / V_est)) + temp_prod <- eigenMapMatMult(A = t(B_null), B = mu_V_diag, n_cores = 1) A_list_inv <- eigenMapMatMult(A = temp_prod, @@ -39,12 +37,12 @@ stat_out_score_glm_null <- function(Y = NULL, B_null = NULL) { if (ncol(A_list_inv) == 1 && nrow(A_list_inv) == 1) { A_list <- 1 / A_list_inv } else { - A_list <- try({ solve(A_list_inv) }, silent = TRUE) + A_list <- try({ eigenMapMatrixInvert(A_list_inv, n_cores = 1L) }, silent = TRUE) if (inherits(A_list, "try-error")) { - A_list <- MASS::ginv(A_list_inv) + A_list <- eigenMapPseudoInverse(A_list_inv, n_cores = 1L) } } - B1_list <- eigenMapMatMult(A = Matrix::t(B_null), + B1_list <- eigenMapMatMult(A = t(B_null), B = mu_V_diag, n_cores = 1) res <- list(VS.est_list = VS_est_list, diff --git a/R/waldTestGEE.R b/R/waldTestGEE.R index 294c955..6acbc43 100644 --- a/R/waldTestGEE.R +++ b/R/waldTestGEE.R @@ -3,7 +3,6 @@ #' @name waldTestGEE #' @author Jack R. Leary #' @description Performs a basic Wald test to determine whether an alternate model is significantly better than a nested null model. This is the GEE equivalent (kind of) of \code{\link{modelLRT}}. Be careful with small sample sizes. -#' @importFrom MASS ginv #' @importFrom stats pchisq #' @param mod.1 The model under the alternative hypothesis. Must be of class \code{geem}. Defaults to NULL. #' @param mod.0 The model under the null hypothesis. Must be of class \code{geem}. Defaults to NULL. @@ -74,7 +73,10 @@ waldTestGEE <- function(mod.1 = NULL, } vcov_mat <- vcov_mat[coef_idx, coef_idx] wald_test_stat <- try({ - vcov_mat_inv <- eigenMapMatrixInvert(vcov_mat, n_cores = 1) + vcov_mat_inv <- eigenMapMatrixInvert(vcov_mat, n_cores = 1L) + if (inherits(vcov_mat_inv, "try-error")) { + vcov_mat_inv <- eigenMapPseudoInverse(vcov_mat, n_cores = 1L) + } as.numeric(crossprod(coef_vals, vcov_mat_inv) %*% coef_vals) }, silent = TRUE) if (inherits(wald_test_stat, "try-error")) { diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp index ade9a80..afef5d0 100644 --- a/src/RcppExports.cpp +++ b/src/RcppExports.cpp @@ -12,7 +12,7 @@ Rcpp::Rostream& Rcpp::Rcerr = Rcpp::Rcpp_cerr_get(); #endif // eigenMapMatrixInvert -SEXP eigenMapMatrixInvert(const Eigen::Map A, int n_cores); +Eigen::MatrixXd eigenMapMatrixInvert(const Eigen::Map A, int n_cores); RcppExport SEXP _scLANE_eigenMapMatrixInvert(SEXP ASEXP, SEXP n_coresSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; @@ -24,7 +24,7 @@ BEGIN_RCPP END_RCPP } // eigenMapMatMult -SEXP eigenMapMatMult(const Eigen::Map A, const Eigen::Map B, int n_cores); +Eigen::MatrixXd eigenMapMatMult(const Eigen::Map A, const Eigen::Map B, int n_cores); RcppExport SEXP _scLANE_eigenMapMatMult(SEXP ASEXP, SEXP BSEXP, SEXP n_coresSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; @@ -36,10 +36,24 @@ BEGIN_RCPP return rcpp_result_gen; END_RCPP } +// eigenMapPseudoInverse +Eigen::MatrixXd eigenMapPseudoInverse(const Eigen::Map A, double tolerance, int n_cores); +RcppExport SEXP _scLANE_eigenMapPseudoInverse(SEXP ASEXP, SEXP toleranceSEXP, SEXP n_coresSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< const Eigen::Map >::type A(ASEXP); + Rcpp::traits::input_parameter< double >::type tolerance(toleranceSEXP); + Rcpp::traits::input_parameter< int >::type n_cores(n_coresSEXP); + rcpp_result_gen = Rcpp::wrap(eigenMapPseudoInverse(A, tolerance, n_cores)); + return rcpp_result_gen; +END_RCPP +} static const R_CallMethodDef CallEntries[] = { {"_scLANE_eigenMapMatrixInvert", (DL_FUNC) &_scLANE_eigenMapMatrixInvert, 2}, {"_scLANE_eigenMapMatMult", (DL_FUNC) &_scLANE_eigenMapMatMult, 3}, + {"_scLANE_eigenMapPseudoInverse", (DL_FUNC) &_scLANE_eigenMapPseudoInverse, 3}, {NULL, NULL, 0} }; diff --git a/src/eigenMapInvert.cpp b/src/eigenMapInvert.cpp index e09b374..6eef2f4 100644 --- a/src/eigenMapInvert.cpp +++ b/src/eigenMapInvert.cpp @@ -3,8 +3,8 @@ // [[Rcpp::depends(RcppEigen)]] // [[Rcpp::export]] -SEXP eigenMapMatrixInvert(const Eigen::Map A, int n_cores = 1){ +Eigen::MatrixXd eigenMapMatrixInvert(const Eigen::Map A, int n_cores = 1){ Eigen::setNbThreads(n_cores); Eigen::MatrixXd B = A.llt().solve(Eigen::MatrixXd::Identity(A.rows(), A.cols())); - return Rcpp::wrap(B); + return B; } diff --git a/src/eigenMapMatMult.cpp b/src/eigenMapMatMult.cpp index 7a5c368..cf697ef 100644 --- a/src/eigenMapMatMult.cpp +++ b/src/eigenMapMatMult.cpp @@ -3,11 +3,11 @@ // [[Rcpp::depends(RcppEigen)]] // [[Rcpp::export]] -SEXP eigenMapMatMult(const Eigen::Map A, - const Eigen::Map B, - int n_cores = 1) { +Eigen::MatrixXd eigenMapMatMult(const Eigen::Map A, + const Eigen::Map B, + int n_cores = 1) { Eigen::setNbThreads(n_cores); Eigen::MatrixXd C; C.noalias() = A * B; - return Rcpp::wrap(C); + return C; } diff --git a/src/eigenMapPseudoInverse.cpp b/src/eigenMapPseudoInverse.cpp new file mode 100644 index 0000000..88758ef --- /dev/null +++ b/src/eigenMapPseudoInverse.cpp @@ -0,0 +1,20 @@ +#include + +// [[Rcpp::depends(RcppEigen)]] +// [[Rcpp::export]] + +Eigen::MatrixXd eigenMapPseudoInverse(const Eigen::Map A, + double tolerance = 1e-6, + int n_cores = 1) { + Eigen::setNbThreads(n_cores); + Eigen::JacobiSVD svd(A, Eigen::ComputeThinU | Eigen::ComputeThinV); + Eigen::MatrixXd singularValues = svd.singularValues(); + Eigen::MatrixXd singularValuesInv(A.cols(), A.rows()); + singularValuesInv.setZero(); + for (int i = 0; i < singularValues.size(); ++i) { + if (singularValues(i) > tolerance) { + singularValuesInv(i, i) = 1.0 / singularValues(i); + } + } + return svd.matrixV() * singularValuesInv * svd.matrixU().transpose(); +}