Skip to content

Commit

Permalink
Merge pull request #269 from jr-leary7/dev
Browse files Browse the repository at this point in the history
fixed score test function to provide more accurate values
  • Loading branch information
jr-leary7 authored Nov 12, 2024
2 parents 8a63798 + e02f6c0 commit 3f206ba
Show file tree
Hide file tree
Showing 2 changed files with 44 additions and 24 deletions.
64 changes: 41 additions & 23 deletions R/scoreTestGEE.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#' @author Jack R. Leary
#' @description Performs a basic Lagrange multiplier test to determine whether an alternate model is significantly better than a nested null model. This is the GEE equivalent (kind of) of \code{\link{modelLRT}}. Be careful with small sample sizes.
#' @importFrom stats model.matrix predict pchisq
#' @importFrom Matrix bdiag
#' @param mod.1 The model under the alternative hypothesis. Must be of class \code{geem}. Defaults to NULL.
#' @param mod.0 The model under the null hypothesis. Must be of class \code{geem}. Defaults to NULL.
#' @param alt.df The dataframe used to fit the alternative model. Defaults to NULL.
Expand All @@ -13,10 +14,11 @@
#' @return A list containing the Score test statistic, a \emph{p}-value, and the degrees of freedom used in the test.
#' @details
#' \itemize{
#' \item Calculating the test statistic involves taking the inverse of the variance-covariance matrix of the coefficients. Ideally this would be done using the "true" inverse with something like \code{\link{solve}}, \code{\link{qr.solve}}, or \code{\link{chol2inv}}, but in practice this can cause issues when the variance-covariance matrix is near-singular. With this in mind, we use the Moore-Penrose pseudoinverse as implemented in \code{\link[MASS]{ginv}} instead, which leads to more stable results.
#' \item Calculating the test statistic involves taking the inverse of the variance of the score vector. Ideally this would be done using the true inverse, but in practice this can cause issues when the matrix is near-singular. With this in mind, we use the Moore-Penrose pseudoinverse if the original matrix inversion fails.
#' \item The \emph{p}-value is calculated using an asymptotic \eqn{\Chi^2} distribution, with the degrees of freedom equal to the number of non-intercept coefficients in the alternative model.
#' }
#' @seealso \code{\link[geeM]{geem}}
#' @seealso \code{\link[glmtoolbox]{anova2}}
#' @seealso \code{\link{waldTestGEE}}
#' @seealso \code{\link{modelLRT}}

Expand Down Expand Up @@ -46,19 +48,19 @@ scoreTestGEE <- function(mod.1 = NULL,
Notes = NA_character_)
} else {
rho <- summary(mod.0)$alpha
sigma2 <- summary(mod.0)$phi
phi <- summary(mod.0)$phi
theta <- as.numeric(gsub("\\)", "", gsub(".*\\(", "", mod.1$FunList$family)))
X_null <- stats::model.matrix(mod.0, data = null.df)
X_alt <- stats::model.matrix(mod.1, data = alt.df)
residuals_null <- null.df$Y - exp(stats::predict(mod.0))
r_null <- null.df$Y - stats::predict(mod.0)
p_alt <- ncol(X_alt)
U <- rep(0, p_alt)
V_U <- matrix(0, nrow = p_alt, ncol = p_alt)
groups <- unique(id.vec)
i <- 1
W <- K <- vector("list", length = length(groups))
for (i in seq(groups)) {
group_indices <- which(id.vec == groups[i])
X_alt_i <- X_alt[group_indices, , drop = FALSE]
residuals_i <- residuals_null[group_indices]
n_i <- length(group_indices)
group_idx <- which(id.vec == groups[i])
n_i <- length(group_idx)
# create working correlation matrix R_i
if (cor.structure == "independence") {
R_i <- diag(1, nrow = n_i, ncol = n_i)
} else if (cor.structure == "exchangeable") {
Expand All @@ -67,26 +69,42 @@ scoreTestGEE <- function(mod.1 = NULL,
} else if (cor.structure == "ar1") {
R_i <- matrix(rho^abs(outer(seq(n_i), seq(n_i), "-")), nrow = n_i, ncol = n_i)
}
V_i <- sigma2 * R_i
R_i_inv <- try({ eigenMapMatrixInvert(V_i) }, silent = TRUE)
if (inherits(R_i_inv, "try-error")) {
R_i_inv <- eigenMapPseudoInverse(V_i)
# create working covariance matrix V_i
mu_i <- stats::predict(mod.0)[group_idx]
V_mu_i <- mu_i * (1 + mu_i / theta)
A_i <- diag(V_mu_i)
A_i_sqrt <- sqrt(A_i) # same as taking the 1/2 power of A_i since A_i is diagonal
V_i <- A_i_sqrt %*% R_i %*% A_i_sqrt
# create matrices W_i and K_i
K_i <- diag(mu_i)
V_i_inv <- try({ eigenMapMatrixInvert(V_i) }, silent = TRUE)
if (inherits(V_i_inv, "try-error")) {
V_i_inv <- eigenMapPseudoInverse(V_i)
}
X_alt_i_t <- t(X_alt_i)
U_i <- X_alt_i_t %*% (R_i_inv %*% residuals_i)
V_U_i <- X_alt_i_t %*% (R_i_inv %*% X_alt_i)
U <- U + U_i
V_U <- V_U + V_U_i
W_i <- K_i %*% V_i_inv %*% K_i
W[[i]] <- W_i
K[[i]] <- K_i
}
W <- as.matrix(Matrix::bdiag(W))
K <- as.matrix(Matrix::bdiag(K))
K_inv <- try({ eigenMapMatrixInvert(K) }, silent = TRUE)
if (inherits(K_inv, "try-error")) {
K_inv <- eigenMapPseudoInverse(K)
}
# generate score vector
U <- phi^(-1) * t(X_alt) %*% W %*% K_inv %*% r_null
# generate variance of score vector and invert it
V_U <- phi^(-2) * t(X_alt) %*% W %*% X_alt
V_U_inv <- try({ eigenMapMatrixInvert(V_U) }, silent = TRUE)
if (inherits(V_U_inv, "try-error")) {
V_U_inv <- eigenMapPseudoInverse(V_U)
}
test_stat <- as.numeric(t(U) %*% V_U_inv %*% U)
df1 <- ncol(X_alt) - ncol(X_null)
p_value <- 1 - stats::pchisq(test_stat, df = df1)
res <- list(Score_Stat = test_stat,
DF = df1,
# estimate test statistic and accompanying p-value
S <- t(U) %*% V_U_inv %*% U
S_df <- ncol(X_alt) - ncol(X_null)
p_value <- 1 - stats::pchisq(S, df = S_df)
res <- list(Score_Stat = S,
DF = S_df,
P_Val = p_value,
Notes = NA_character_)
}
Expand Down
4 changes: 3 additions & 1 deletion man/scoreTestGEE.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 3f206ba

Please sign in to comment.