Enable multiple components simultaenously in pool_parameters() (#1018)

* Enable multiple components simultaenously in `pool_parameters()` * typo * suppress Warnings
easystats · Sep 24, 2024 · 0193c46 · 0193c46
1 parent 22cc345
commit 0193c46
Show file tree

Hide file tree

Showing 5 changed files with 233 additions and 76 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Type: Package
 Package: parameters
 Title: Processing of Model Parameters
-Version: 0.22.2.12
+Version: 0.22.2.13
 Authors@R:
     c(person(given = "Daniel",
              family = "Lüdecke",

diff --git a/NEWS.md b/NEWS.md
@@ -16,6 +16,9 @@
 * `equivalence_test()` and `p_significance()` work with objects returned by
   `model_parameters()`.
 
+* `pool_parameters()` now better deals with models with multiple components
+  (e.g. zero-inflation or dispersion).
+
 * Revision / enhancement of some documentation.
 
 # parameters 0.22.2

diff --git a/R/pool_parameters.R b/R/pool_parameters.R
@@ -17,12 +17,10 @@
 #'
 #' @note
 #' Models with multiple components, (for instance, models with zero-inflation,
-#' where predictors appear in the count and zero-inflation part) may fail in
-#' case of identical names for coefficients in the different model components,
-#' since the coefficient table is grouped by coefficient names for pooling. In
-#' such cases, coefficients of count and zero-inflation model parts would be
-#' combined. Therefore, the `component` argument defaults to
-#' `"conditional"` to avoid this.
+#' where predictors appear in the count and zero-inflation part, or models with
+#' dispersion component) may fail in rare situations. In this case, compute
+#' the pooled parameters for components separately, using the `component`
+#' argument.
 #'
 #' Some model objects do not return standard errors (e.g. objects of class
 #' `htest`). For these models, no pooled confidence intervals nor p-values
@@ -68,7 +66,7 @@
 pool_parameters <- function(x,
                             exponentiate = FALSE,
                             effects = "fixed",
-                            component = "conditional",
+                            component = "all",
                             verbose = TRUE,
                             ...) {
   # check input, save original model -----
@@ -163,75 +161,92 @@ pool_parameters <- function(x,
     params <- params[params$Effects != "random", ]
     parameter_values <- x[[1]]$Parameter[x[[1]]$Effects != "random"]
   }
-  estimates <- split(params, factor(params$Parameter, levels = unique(parameter_values)))
 
+  # split by component
+  if (!is.null(params$Component) && insight::n_unique(params$Component) > 1) {
+    component_values <- x[[1]]$Component
+    estimates <- split(
+      params,
+      list(
+        factor(params$Parameter, levels = unique(parameter_values)),
+        factor(params$Component, levels = unique(component_values))
+      )
+    )
+  } else {
+    component_values <- NULL
+    estimates <- split(
+      params,
+      factor(params$Parameter, levels = unique(parameter_values))
+    )
+  }
 
   # pool estimates etc. -----
 
   pooled_params <- do.call(rbind, lapply(estimates, function(i) {
-    # pooled estimate
-    pooled_estimate <- mean(i$Coefficient)
+    # if we split by "component", some of the data frames might be empty
+    # in this case, just skip...
+    if (nrow(i) > 0) {
+      # pooled estimate
+      pooled_estimate <- mean(i$Coefficient)
+
+      # special models that have no standard errors (like "htest" objects)
+      if (is.null(i$SE) || all(is.na(i$SE))) {
+        out <- data.frame(
+          Coefficient = pooled_estimate,
+          SE = NA,
+          CI_low = NA,
+          CI_high = NA,
+          Statistic = NA,
+          df_error = NA,
+          p = NA,
+          stringsAsFactors = FALSE
+        )
+
+        if (verbose) {
+          insight::format_alert("Model objects had no standard errors. Cannot compute pooled confidence intervals and p-values.")
+        }
 
-    # special models that have no standard errors (like "htest" objects)
-    if (is.null(i$SE) || all(is.na(i$SE))) {
-      out <- data.frame(
-        Coefficient = pooled_estimate,
-        SE = NA,
-        CI_low = NA,
-        CI_high = NA,
-        Statistic = NA,
-        df_error = NA,
-        p = NA,
-        stringsAsFactors = FALSE
-      )
+        # regular models that have coefficients and standard errors
+      } else {
+        # pooled standard error
+        ubar <- mean(i$SE^2)
+        tmp <- ubar + (1 + 1 / len) * stats::var(i$Coefficient)
+        pooled_se <- sqrt(tmp)
+
+        # pooled degrees of freedom, Barnard-Rubin adjustment for small samples
+        df_column <- grep("(\\bdf\\b|\\bdf_error\\b)", colnames(i), value = TRUE)[1]
+        if (length(df_column)) {
+          pooled_df <- .barnad_rubin(m = nrow(i), b = stats::var(i$Coefficient), t = tmp, dfcom = unique(i[[df_column]]))
+          # validation check length
+          if (length(pooled_df) > 1 && length(pooled_se) == 1) {
+            pooled_df <- round(mean(pooled_df, na.rm = TRUE))
+          }
+        } else {
+          pooled_df <- Inf
+        }
 
-      if (verbose) {
-        insight::format_alert("Model objects had no standard errors. Cannot compute pooled confidence intervals and p-values.")
+        # pooled statistic
+        pooled_statistic <- pooled_estimate / pooled_se
+
+        # confidence intervals
+        alpha <- (1 + ci) / 2
+        fac <- suppressWarnings(stats::qt(alpha, df = pooled_df))
+
+        out <- data.frame(
+          Coefficient = pooled_estimate,
+          SE = pooled_se,
+          CI_low = pooled_estimate - pooled_se * fac,
+          CI_high = pooled_estimate + pooled_se * fac,
+          Statistic = pooled_statistic,
+          df_error = pooled_df,
+          p = 2 * stats::pt(abs(pooled_statistic), df = pooled_df, lower.tail = FALSE),
+          stringsAsFactors = FALSE
+        )
       }
-
-      # regular models that have coefficients and standard errors
+      out
     } else {
-      # pooled standard error
-      ubar <- mean(i$SE^2)
-      tmp <- ubar + (1 + 1 / len) * stats::var(i$Coefficient)
-      pooled_se <- sqrt(tmp)
-
-      # pooled degrees of freedom, Barnard-Rubin adjustment for small samples
-      df_column <- grep("(\\bdf\\b|\\bdf_error\\b)", colnames(i), value = TRUE)[1]
-      if (length(df_column)) {
-        pooled_df <- .barnad_rubin(m = nrow(i), b = stats::var(i$Coefficient), t = tmp, dfcom = unique(i[[df_column]]))
-        # validation check length
-        if (length(pooled_df) > 1 && length(pooled_se) == 1) {
-          pooled_df <- round(mean(pooled_df, na.rm = TRUE))
-        }
-      } else {
-        pooled_df <- Inf
-      }
-
-      # pooled statistic
-      pooled_statistic <- pooled_estimate / pooled_se
-
-      # confidence intervals
-      alpha <- (1 + ci) / 2
-      fac <- suppressWarnings(stats::qt(alpha, df = pooled_df))
-
-      out <- data.frame(
-        Coefficient = pooled_estimate,
-        SE = pooled_se,
-        CI_low = pooled_estimate - pooled_se * fac,
-        CI_high = pooled_estimate + pooled_se * fac,
-        Statistic = pooled_statistic,
-        df_error = pooled_df,
-        p = 2 * stats::pt(abs(pooled_statistic), df = pooled_df, lower.tail = FALSE),
-        stringsAsFactors = FALSE
-      )
-    }
-
-    # add component, when pooling for all components
-    if (identical(component, "all") && "Component" %in% colnames(i)) {
-      out$Component <- i$Component[1]
+      NULL
     }
-    out
   }))
 
 
@@ -249,13 +264,13 @@ pool_parameters <- function(x,
         stringsAsFactors = FALSE
       )
     }))
+    pooled_params$Effects <- "fixed"
   }
 
-
   # reorder ------
 
   pooled_params$Parameter <- parameter_values
-  columns <- c("Parameter", "Coefficient", "SE", "CI_low", "CI_high", "Statistic", "df_error", "p", "Component")
+  columns <- c("Parameter", "Coefficient", "SE", "CI_low", "CI_high", "Statistic", "df_error", "p", "Effects", "Component")
   pooled_params <- pooled_params[intersect(columns, colnames(pooled_params))]
 
 
@@ -268,6 +283,11 @@ pool_parameters <- function(x,
     pooled_params <- merge(pooled_params, pooled_random, all = TRUE, sort = FALSE)
   }
 
+  # add back component column
+  if (!is.null(component_values)) {
+    pooled_params$Component <- component_values
+  }
+
   # this needs to be done extra here, cannot call ".add_model_parameters_attributes()"
   pooled_params <- .add_pooled_params_attributes(
     pooled_params,

diff --git a/man/pool_parameters.Rd b/man/pool_parameters.Rd
diff --git a/tests/testthat/test-pool_parameters.R b/tests/testthat/test-pool_parameters.R
@@ -28,3 +28,139 @@ test_that("pooled parameters", {
   pp3 <- summary(mice::pool(m_mice))
   expect_equal(pp2$df_error, pp3$df, tolerance = 1e-3)
 })
+
+skip_on_cran()
+
+test_that("pooled parameters, glmmTMB, components", {
+  skip_if_not_installed("mice")
+  skip_if_not_installed("glmmTMB")
+  sim1 <- function(nfac = 4, nt = 10, facsd = 0.1, tsd = 0.15, mu = 0, residsd = 1) {
+    dat <- expand.grid(fac = factor(letters[1:nfac]), t = 1:nt)
+    n <- nrow(dat)
+    dat$REfac <- rnorm(nfac, sd = facsd)[dat$fac]
+    dat$REt <- rnorm(nt, sd = tsd)[dat$t]
+    dat$x <- rnorm(n, mean = mu, sd = residsd) + dat$REfac + dat$REt
+    dat
+  }
+
+  set.seed(101)
+  d1 <- sim1(mu = 100, residsd = 10)
+  d2 <- sim1(mu = 200, residsd = 5)
+  d1$sd <- "ten"
+  d2$sd <- "five"
+  dat <- rbind(d1, d2)
+
+  set.seed(101)
+  dat$REfac[sample.int(nrow(dat), 10)] <- NA
+  dat$x[sample.int(nrow(dat), 10)] <- NA
+  dat$sd[sample.int(nrow(dat), 10)] <- NA
+
+  impdat <- suppressWarnings(mice::mice(dat, printFlag = FALSE))
+  models <- lapply(1:5, function(i) {
+    glmmTMB::glmmTMB(
+      x ~ sd + (1 | t),
+      dispformula = ~sd,
+      data = mice::complete(impdat, action = i)
+    )
+  })
+
+  out <- pool_parameters(models, component = "conditional")
+  expect_named(
+    out,
+    c(
+      "Parameter", "Coefficient", "SE", "CI_low", "CI_high", "Statistic",
+      "df_error", "p"
+    )
+  )
+  expect_equal(out$Coefficient, c(187.280225, -87.838969), tolerance = 1e-3)
+
+  out <- pool_parameters(models, component = "all", effects = "all")
+  expect_named(
+    out,
+    c(
+      "Parameter", "Coefficient", "Effects", "SE", "CI_low", "CI_high",
+      "Statistic", "df_error", "p", "Component"
+    )
+  )
+  expect_equal(
+    out$Coefficient,
+    c(187.280225, -87.838969, 3.51576, -1.032665, 0.610992, NaN),
+    tolerance = 1e-3
+  )
+
+  out <- pool_parameters(models, component = "all", effects = "fixed")
+  expect_named(
+    out,
+    c(
+      "Parameter", "Coefficient", "SE", "CI_low", "CI_high",
+      "Statistic", "df_error", "p", "Component"
+    )
+  )
+  expect_equal(
+    out$Coefficient,
+    c(187.280225, -87.838969, 3.51576, -1.032665),
+    tolerance = 1e-3
+  )
+})
+
+
+test_that("pooled parameters, glmmTMB, zero-inflated", {
+  skip_if_not_installed("mice")
+  skip_if_not_installed("glmmTMB")
+  data(Salamanders, package = "glmmTMB")
+  set.seed(123)
+  Salamanders$cover[sample.int(nrow(Salamanders), 50)] <- NA
+  Salamanders$mined[sample.int(nrow(Salamanders), 10)] <- NA
+
+  impdat <- suppressWarnings(mice::mice(Salamanders, printFlag = FALSE))
+  models <- lapply(1:5, function(i) {
+    glmmTMB::glmmTMB(
+      count ~ mined + cover + (1 | site),
+      ziformula = ~mined,
+      family = poisson(),
+      data = mice::complete(impdat, action = i)
+    )
+  })
+
+  out <- pool_parameters(models)
+  expect_named(
+    out,
+    c(
+      "Parameter", "Coefficient", "SE", "CI_low", "CI_high", "Statistic",
+      "df_error", "p", "Component"
+    )
+  )
+  expect_equal(
+    out$Coefficient,
+    c(0.13409, 1.198551, -0.181912, 1.253029, -1.844026),
+    tolerance = 1e-3
+  )
+
+  out <- pool_parameters(models, component = "all", effects = "all")
+  expect_named(
+    out,
+    c(
+      "Parameter", "Coefficient", "Effects", "SE", "CI_low", "CI_high",
+      "Statistic", "df_error", "p", "Component"
+    )
+  )
+  expect_equal(
+    out$Coefficient,
+    c(0.13409, 1.198551, -0.181912, 1.253029, -1.844026, 0.158795),
+    tolerance = 1e-3
+  )
+
+  out <- pool_parameters(models, component = "conditional", effects = "fixed")
+  expect_named(
+    out,
+    c(
+      "Parameter", "Coefficient", "SE", "CI_low", "CI_high",
+      "Statistic", "df_error", "p"
+    )
+  )
+  expect_equal(
+    out$Coefficient,
+    c(0.13409, 1.198551, -0.181912),
+    tolerance = 1e-3
+  )
+})