Skip to content

Commit

Permalink
Enable multiple components simultaenously in pool_parameters() (#1018)
Browse files Browse the repository at this point in the history
* Enable multiple components simultaenously in `pool_parameters()`

* typo

* suppress Warnings
  • Loading branch information
strengejacke authored Sep 24, 2024
1 parent 22cc345 commit 0193c46
Show file tree
Hide file tree
Showing 5 changed files with 233 additions and 76 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Type: Package
Package: parameters
Title: Processing of Model Parameters
Version: 0.22.2.12
Version: 0.22.2.13
Authors@R:
c(person(given = "Daniel",
family = "Lüdecke",
Expand Down
3 changes: 3 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@
* `equivalence_test()` and `p_significance()` work with objects returned by
`model_parameters()`.

* `pool_parameters()` now better deals with models with multiple components
(e.g. zero-inflation or dispersion).

* Revision / enhancement of some documentation.

# parameters 0.22.2
Expand Down
156 changes: 88 additions & 68 deletions R/pool_parameters.R
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,10 @@
#'
#' @note
#' Models with multiple components, (for instance, models with zero-inflation,
#' where predictors appear in the count and zero-inflation part) may fail in
#' case of identical names for coefficients in the different model components,
#' since the coefficient table is grouped by coefficient names for pooling. In
#' such cases, coefficients of count and zero-inflation model parts would be
#' combined. Therefore, the `component` argument defaults to
#' `"conditional"` to avoid this.
#' where predictors appear in the count and zero-inflation part, or models with
#' dispersion component) may fail in rare situations. In this case, compute
#' the pooled parameters for components separately, using the `component`
#' argument.
#'
#' Some model objects do not return standard errors (e.g. objects of class
#' `htest`). For these models, no pooled confidence intervals nor p-values
Expand Down Expand Up @@ -68,7 +66,7 @@
pool_parameters <- function(x,
exponentiate = FALSE,
effects = "fixed",
component = "conditional",
component = "all",
verbose = TRUE,
...) {
# check input, save original model -----
Expand Down Expand Up @@ -163,75 +161,92 @@ pool_parameters <- function(x,
params <- params[params$Effects != "random", ]
parameter_values <- x[[1]]$Parameter[x[[1]]$Effects != "random"]
}
estimates <- split(params, factor(params$Parameter, levels = unique(parameter_values)))

# split by component
if (!is.null(params$Component) && insight::n_unique(params$Component) > 1) {
component_values <- x[[1]]$Component
estimates <- split(
params,
list(
factor(params$Parameter, levels = unique(parameter_values)),
factor(params$Component, levels = unique(component_values))
)
)
} else {
component_values <- NULL
estimates <- split(
params,
factor(params$Parameter, levels = unique(parameter_values))
)
}

# pool estimates etc. -----

pooled_params <- do.call(rbind, lapply(estimates, function(i) {
# pooled estimate
pooled_estimate <- mean(i$Coefficient)
# if we split by "component", some of the data frames might be empty
# in this case, just skip...
if (nrow(i) > 0) {
# pooled estimate
pooled_estimate <- mean(i$Coefficient)

# special models that have no standard errors (like "htest" objects)
if (is.null(i$SE) || all(is.na(i$SE))) {
out <- data.frame(
Coefficient = pooled_estimate,
SE = NA,
CI_low = NA,
CI_high = NA,
Statistic = NA,
df_error = NA,
p = NA,
stringsAsFactors = FALSE
)

if (verbose) {
insight::format_alert("Model objects had no standard errors. Cannot compute pooled confidence intervals and p-values.")
}

# special models that have no standard errors (like "htest" objects)
if (is.null(i$SE) || all(is.na(i$SE))) {
out <- data.frame(
Coefficient = pooled_estimate,
SE = NA,
CI_low = NA,
CI_high = NA,
Statistic = NA,
df_error = NA,
p = NA,
stringsAsFactors = FALSE
)
# regular models that have coefficients and standard errors
} else {
# pooled standard error
ubar <- mean(i$SE^2)
tmp <- ubar + (1 + 1 / len) * stats::var(i$Coefficient)
pooled_se <- sqrt(tmp)

# pooled degrees of freedom, Barnard-Rubin adjustment for small samples
df_column <- grep("(\\bdf\\b|\\bdf_error\\b)", colnames(i), value = TRUE)[1]
if (length(df_column)) {
pooled_df <- .barnad_rubin(m = nrow(i), b = stats::var(i$Coefficient), t = tmp, dfcom = unique(i[[df_column]]))
# validation check length
if (length(pooled_df) > 1 && length(pooled_se) == 1) {
pooled_df <- round(mean(pooled_df, na.rm = TRUE))
}
} else {
pooled_df <- Inf
}

if (verbose) {
insight::format_alert("Model objects had no standard errors. Cannot compute pooled confidence intervals and p-values.")
# pooled statistic
pooled_statistic <- pooled_estimate / pooled_se

# confidence intervals
alpha <- (1 + ci) / 2
fac <- suppressWarnings(stats::qt(alpha, df = pooled_df))

out <- data.frame(
Coefficient = pooled_estimate,
SE = pooled_se,
CI_low = pooled_estimate - pooled_se * fac,
CI_high = pooled_estimate + pooled_se * fac,
Statistic = pooled_statistic,
df_error = pooled_df,
p = 2 * stats::pt(abs(pooled_statistic), df = pooled_df, lower.tail = FALSE),
stringsAsFactors = FALSE
)
}

# regular models that have coefficients and standard errors
out
} else {
# pooled standard error
ubar <- mean(i$SE^2)
tmp <- ubar + (1 + 1 / len) * stats::var(i$Coefficient)
pooled_se <- sqrt(tmp)

# pooled degrees of freedom, Barnard-Rubin adjustment for small samples
df_column <- grep("(\\bdf\\b|\\bdf_error\\b)", colnames(i), value = TRUE)[1]
if (length(df_column)) {
pooled_df <- .barnad_rubin(m = nrow(i), b = stats::var(i$Coefficient), t = tmp, dfcom = unique(i[[df_column]]))
# validation check length
if (length(pooled_df) > 1 && length(pooled_se) == 1) {
pooled_df <- round(mean(pooled_df, na.rm = TRUE))
}
} else {
pooled_df <- Inf
}

# pooled statistic
pooled_statistic <- pooled_estimate / pooled_se

# confidence intervals
alpha <- (1 + ci) / 2
fac <- suppressWarnings(stats::qt(alpha, df = pooled_df))

out <- data.frame(
Coefficient = pooled_estimate,
SE = pooled_se,
CI_low = pooled_estimate - pooled_se * fac,
CI_high = pooled_estimate + pooled_se * fac,
Statistic = pooled_statistic,
df_error = pooled_df,
p = 2 * stats::pt(abs(pooled_statistic), df = pooled_df, lower.tail = FALSE),
stringsAsFactors = FALSE
)
}

# add component, when pooling for all components
if (identical(component, "all") && "Component" %in% colnames(i)) {
out$Component <- i$Component[1]
NULL
}
out
}))


Expand All @@ -249,13 +264,13 @@ pool_parameters <- function(x,
stringsAsFactors = FALSE
)
}))
pooled_params$Effects <- "fixed"
}


# reorder ------

pooled_params$Parameter <- parameter_values
columns <- c("Parameter", "Coefficient", "SE", "CI_low", "CI_high", "Statistic", "df_error", "p", "Component")
columns <- c("Parameter", "Coefficient", "SE", "CI_low", "CI_high", "Statistic", "df_error", "p", "Effects", "Component")
pooled_params <- pooled_params[intersect(columns, colnames(pooled_params))]


Expand All @@ -268,6 +283,11 @@ pool_parameters <- function(x,
pooled_params <- merge(pooled_params, pooled_random, all = TRUE, sort = FALSE)
}

# add back component column
if (!is.null(component_values)) {
pooled_params$Component <- component_values
}

# this needs to be done extra here, cannot call ".add_model_parameters_attributes()"
pooled_params <- .add_pooled_params_attributes(
pooled_params,
Expand Down
12 changes: 5 additions & 7 deletions man/pool_parameters.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

136 changes: 136 additions & 0 deletions tests/testthat/test-pool_parameters.R
Original file line number Diff line number Diff line change
Expand Up @@ -28,3 +28,139 @@ test_that("pooled parameters", {
pp3 <- summary(mice::pool(m_mice))
expect_equal(pp2$df_error, pp3$df, tolerance = 1e-3)
})

skip_on_cran()

test_that("pooled parameters, glmmTMB, components", {
skip_if_not_installed("mice")
skip_if_not_installed("glmmTMB")
sim1 <- function(nfac = 4, nt = 10, facsd = 0.1, tsd = 0.15, mu = 0, residsd = 1) {
dat <- expand.grid(fac = factor(letters[1:nfac]), t = 1:nt)
n <- nrow(dat)
dat$REfac <- rnorm(nfac, sd = facsd)[dat$fac]
dat$REt <- rnorm(nt, sd = tsd)[dat$t]
dat$x <- rnorm(n, mean = mu, sd = residsd) + dat$REfac + dat$REt
dat
}

set.seed(101)
d1 <- sim1(mu = 100, residsd = 10)
d2 <- sim1(mu = 200, residsd = 5)
d1$sd <- "ten"
d2$sd <- "five"
dat <- rbind(d1, d2)

set.seed(101)
dat$REfac[sample.int(nrow(dat), 10)] <- NA
dat$x[sample.int(nrow(dat), 10)] <- NA
dat$sd[sample.int(nrow(dat), 10)] <- NA

impdat <- suppressWarnings(mice::mice(dat, printFlag = FALSE))
models <- lapply(1:5, function(i) {
glmmTMB::glmmTMB(
x ~ sd + (1 | t),
dispformula = ~sd,
data = mice::complete(impdat, action = i)
)
})

out <- pool_parameters(models, component = "conditional")
expect_named(
out,
c(
"Parameter", "Coefficient", "SE", "CI_low", "CI_high", "Statistic",
"df_error", "p"
)
)
expect_equal(out$Coefficient, c(187.280225, -87.838969), tolerance = 1e-3)

out <- pool_parameters(models, component = "all", effects = "all")
expect_named(
out,
c(
"Parameter", "Coefficient", "Effects", "SE", "CI_low", "CI_high",
"Statistic", "df_error", "p", "Component"
)
)
expect_equal(
out$Coefficient,
c(187.280225, -87.838969, 3.51576, -1.032665, 0.610992, NaN),
tolerance = 1e-3
)

out <- pool_parameters(models, component = "all", effects = "fixed")
expect_named(
out,
c(
"Parameter", "Coefficient", "SE", "CI_low", "CI_high",
"Statistic", "df_error", "p", "Component"
)
)
expect_equal(
out$Coefficient,
c(187.280225, -87.838969, 3.51576, -1.032665),
tolerance = 1e-3
)
})


test_that("pooled parameters, glmmTMB, zero-inflated", {
skip_if_not_installed("mice")
skip_if_not_installed("glmmTMB")
data(Salamanders, package = "glmmTMB")
set.seed(123)
Salamanders$cover[sample.int(nrow(Salamanders), 50)] <- NA
Salamanders$mined[sample.int(nrow(Salamanders), 10)] <- NA

impdat <- suppressWarnings(mice::mice(Salamanders, printFlag = FALSE))
models <- lapply(1:5, function(i) {
glmmTMB::glmmTMB(
count ~ mined + cover + (1 | site),
ziformula = ~mined,
family = poisson(),
data = mice::complete(impdat, action = i)
)
})

out <- pool_parameters(models)
expect_named(
out,
c(
"Parameter", "Coefficient", "SE", "CI_low", "CI_high", "Statistic",
"df_error", "p", "Component"
)
)
expect_equal(
out$Coefficient,
c(0.13409, 1.198551, -0.181912, 1.253029, -1.844026),
tolerance = 1e-3
)

out <- pool_parameters(models, component = "all", effects = "all")
expect_named(
out,
c(
"Parameter", "Coefficient", "Effects", "SE", "CI_low", "CI_high",
"Statistic", "df_error", "p", "Component"
)
)
expect_equal(
out$Coefficient,
c(0.13409, 1.198551, -0.181912, 1.253029, -1.844026, 0.158795),
tolerance = 1e-3
)

out <- pool_parameters(models, component = "conditional", effects = "fixed")
expect_named(
out,
c(
"Parameter", "Coefficient", "SE", "CI_low", "CI_high",
"Statistic", "df_error", "p"
)
)
expect_equal(
out$Coefficient,
c(0.13409, 1.198551, -0.181912),
tolerance = 1e-3
)
})

0 comments on commit 0193c46

Please sign in to comment.