From 95a230c2a24b361b9a85dcb4397dab9ed8917d36 Mon Sep 17 00:00:00 2001 From: Jack Leary Date: Tue, 29 Oct 2024 17:21:22 -0400 Subject: [PATCH] fixed broken score testing implementation -- closes #265 --- R/scoreTestGEE.R | 44 ++++++++++++++++++++++++++++-------- R/testDynamic.R | 3 ++- man/scoreTestGEE.Rd | 7 ++++-- tests/testthat/test_scLANE.R | 4 +++- 4 files changed, 44 insertions(+), 14 deletions(-) diff --git a/R/scoreTestGEE.R b/R/scoreTestGEE.R index 801aa70..1a4f102 100644 --- a/R/scoreTestGEE.R +++ b/R/scoreTestGEE.R @@ -8,7 +8,8 @@ #' @param mod.0 The model under the null hypothesis. Must be of class \code{geem}. Defaults to NULL. #' @param alt.df The dataframe used to fit the alternative model. Defaults to NULL. #' @param null.df The dataframe used to fit the null model. Defaults to NULL. -#' @param sandwich.var Boolean specifying whether the sandwich variance-covariance matrix should be used. Defaults to FALSE. +#' @param id.vec A vector of subject IDs to use as input to \code{\link{marge2}}. Defaults to NULL. +#' @param cor.structure A string specifying the working correlation structure used to fit each model. Must be one of "ar1", "independence", or "exchangeable". Defaults to "ar1". #' @return A list containing the Score test statistic, a \emph{p}-value, and the degrees of freedom used in the test. #' @details #' \itemize{ @@ -23,9 +24,10 @@ scoreTestGEE <- function(mod.1 = NULL, mod.0 = NULL, alt.df = NULL, null.df = NULL, - sandwich.var = FALSE) { + id.vec = NULL, + cor.structure = "ar1") { # check inputs - if (is.null(mod.1) || is.null(mod.0) || is.null(alt.df) || is.null(null.df)) { stop("Please provide all inputs to scoreTestGEE().") } + if (is.null(mod.1) || is.null(mod.0) || is.null(alt.df) || is.null(null.df) || is.null(id.vec)) { stop("Please provide all inputs to scoreTestGEE().") } if (inherits(mod.1, "try-error") || inherits(mod.0, "try-error")) { res <- list(Score_Stat = NA_real_, DF = NA_real_, @@ -43,17 +45,39 @@ scoreTestGEE <- function(mod.1 = NULL, P_Val = 1, Notes = NA_character_) } else { + rho <- summary(mod.0)$alpha + sigma2 <- summary(mod.0)$phi X_null <- stats::model.matrix(mod.0, data = null.df) X_alt <- stats::model.matrix(mod.1, data = alt.df) residuals_null <- null.df$Y - exp(stats::predict(mod.0)) - if (sandwich.var) { - V_null <- as.matrix(mod.0$var) - } else { - V_null <- as.matrix(mod.0$naiv.var) + p_alt <- ncol(X_alt) + U <- rep(0, p_alt) + V_U <- matrix(0, nrow = p_alt, ncol = p_alt) + groups <- unique(id.vec) + for (i in seq(groups)) { + group_indices <- which(id.vec == groups[i]) + X_alt_i <- X_alt[group_indices, , drop = FALSE] + residuals_i <- residuals_null[group_indices] + n_i <- length(group_indices) + if (cor.structure == "independence") { + R_i <- diag(1, nrow = n_i, ncol = n_i) + } else if (cor.structure == "exchangeable") { + R_i <- matrix(rho, nrow = n_i, ncol = n_i) + diag(R_i) <- 1 + } else if (cor.structure == "ar1") { + R_i <- matrix(rho^abs(outer(seq(n_i), seq(n_i), "-")), nrow = n_i, ncol = n_i) + } + V_i <- sigma2 * R_i + R_i_inv <- try({ eigenMapMatrixInvert(V_i) }, silent = TRUE) + if (inherits(R_i_inv, "try-error")) { + R_i_inv <- eigenMapPseudoInverse(V_i) + } + X_alt_i_t <- t(X_alt_i) + U_i <- X_alt_i_t %*% (R_i_inv %*% residuals_i) + V_U_i <- X_alt_i_t %*% (R_i_inv %*% X_alt_i) + U <- U + U_i + V_U <- V_U + V_U_i } - X_alt_t <- t(X_alt) - U <- X_alt_t %*% residuals_null - V_U <- (X_alt_t * as.numeric(V_null)) %*% X_alt V_U_inv <- try({ eigenMapMatrixInvert(V_U) }, silent = TRUE) if (inherits(V_U_inv, "try-error")) { V_U_inv <- eigenMapPseudoInverse(V_U) diff --git a/R/testDynamic.R b/R/testDynamic.R index 2a2d77e..2e2d532 100644 --- a/R/testDynamic.R +++ b/R/testDynamic.R @@ -353,7 +353,8 @@ testDynamic <- function(expr.mat = NULL, mod.0 = null_mod, alt.df = as.data.frame(marge_mod$basis_mtx), null.df = null_mod_df, - sandwich.var = ifelse(is.null(gee.bias.correction.method), FALSE, TRUE)) + id.vec = id.vec[lineage_cells], + cor.structure = cor.structure) } } else { test_res <- modelLRT(mod.1 = marge_mod, diff --git a/man/scoreTestGEE.Rd b/man/scoreTestGEE.Rd index 5ea410d..c90200a 100644 --- a/man/scoreTestGEE.Rd +++ b/man/scoreTestGEE.Rd @@ -9,7 +9,8 @@ scoreTestGEE( mod.0 = NULL, alt.df = NULL, null.df = NULL, - sandwich.var = FALSE + id.vec = NULL, + cor.structure = "ar1" ) } \arguments{ @@ -21,7 +22,9 @@ scoreTestGEE( \item{null.df}{The dataframe used to fit the null model. Defaults to NULL.} -\item{sandwich.var}{Boolean specifying whether the sandwich variance-covariance matrix should be used. Defaults to FALSE.} +\item{id.vec}{A vector of subject IDs to use as input to \code{\link{marge2}}. Defaults to NULL.} + +\item{cor.structure}{A string specifying the working correlation structure used to fit each model. Must be one of "ar1", "independence", or "exchangeable". Defaults to "ar1".} } \value{ A list containing the Score test statistic, a \emph{p}-value, and the degrees of freedom used in the test. diff --git a/tests/testthat/test_scLANE.R b/tests/testthat/test_scLANE.R index 5fc60cd..f6b5711 100644 --- a/tests/testthat/test_scLANE.R +++ b/tests/testthat/test_scLANE.R @@ -47,6 +47,7 @@ withr::with_output_sink(tempfile(), { size.factor.offset = cell_offset, is.gee = TRUE, cor.structure = "ar1", + gee.test = "score", id.vec = sim_data$subject, n.cores = 2L) glmm_gene_stats <- testDynamic(sim_data, @@ -139,7 +140,8 @@ withr::with_output_sink(tempfile(), { score_test <- scoreTestGEE(marge_mod_GEE_offset, mod.0 = null_mod_GEE, alt.df = as.data.frame(marge_mod_GEE_offset$basis_mtx), - null.df = data.frame(Y = counts_test[, 3])) + null.df = data.frame(Y = counts_test[, 3]), + id.vec = sim_data$subject) # run GLMM model -- no offset glmm_mod <- fitGLMM(pt_test, Y = counts_test[, 4],