From 2f054c892c775a9349d9db2e7002e8b223f5adf8 Mon Sep 17 00:00:00 2001
From: Jean Cochrane <jeancochrane@users.noreply.github.com>
Date: Wed, 6 Nov 2024 16:09:30 -0600
Subject: [PATCH] Switch from `multisession` parallelization to `multicore` in
 `evaluate` stage (#261)

* Switch from `multisession` parallelization to `multicore` in `evaluate` stage

* Fix comment in pipeline/03-evaluate.R

Co-authored-by: Dan Snow <31494343+dfsnow@users.noreply.github.com>

---------

Co-authored-by: Dan Snow <31494343+dfsnow@users.noreply.github.com>
---
 pipeline/03-evaluate.R | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/pipeline/03-evaluate.R b/pipeline/03-evaluate.R
index 52caf59b..88456297 100644
--- a/pipeline/03-evaluate.R
+++ b/pipeline/03-evaluate.R
@@ -12,8 +12,14 @@ tictoc::tic("Evaluate")
 # Load libraries, helpers, and recipes from files
 purrr::walk(list.files("R/", "\\.R$", full.names = TRUE), source)
 
-# Enable parallel backend for generating stats more quickly
-plan(multisession, workers = num_threads)
+# Enable parallel backend for generating stats faster
+if (supportsMulticore()) {
+  # Limit to half the available cores to avoid hogging resources
+  plan(multicore, workers = ceiling(num_threads / 2))
+} else {
+  # Multisession performance begins to degrade beyond 5 workers
+  plan(multisession, workers = 5)
+}
 
 # Renaming dictionary for input columns. We want the actual value of the column
 # to become geography_id and the NAME of the column to become geography_name