Skip to content

Commit

Permalink
Merge pull request #113 from ropensci/111
Browse files Browse the repository at this point in the history
Resilient seeds for batched replication
  • Loading branch information
wlandau-lilly authored Oct 18, 2022
2 parents 441948c + 61945af commit 74969b2
Show file tree
Hide file tree
Showing 37 changed files with 853 additions and 58 deletions.
3 changes: 3 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,7 @@ export(tar_url)
export(walk_ast)
export(walk_call_knitr)
importFrom(digest,digest)
importFrom(digest,digest2int)
importFrom(dplyr,bind_rows)
importFrom(dplyr,mutate)
importFrom(dplyr,select)
Expand Down Expand Up @@ -174,6 +175,7 @@ importFrom(targets,tar_assert_true)
importFrom(targets,tar_assert_unique)
importFrom(targets,tar_assert_unique_targets)
importFrom(targets,tar_cue)
importFrom(targets,tar_definition)
importFrom(targets,tar_deparse_language)
importFrom(targets,tar_deparse_safe)
importFrom(targets,tar_dir)
Expand Down Expand Up @@ -210,4 +212,5 @@ importFrom(utils,head)
importFrom(vctrs,vec_c)
importFrom(vctrs,vec_rbind)
importFrom(withr,local_options)
importFrom(withr,local_seed)
importFrom(withr,with_options)
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# tarchetypes 0.7.1.9000

* Migrate away from deprecated `targets::tar_path()`.
* Implement and return resilient seeds in batched replication (#111, #113).

# tarchetypes 0.7.1

Expand Down
1 change: 1 addition & 0 deletions R/tar_map2.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#' `command1` produces a data frame of arguments to
#' `command2`, and `command2` dynamically maps over
#' these arguments in batches.
#' @inheritSection tar_rep Replicate-specific seeds
#' @return A list of new target objects.
#' See the "Target objects" section for background.
#' @inheritSection tar_map Target objects
Expand Down
1 change: 1 addition & 0 deletions R/tar_map2_count.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#' @return A list of new target objects.
#' See the "Target objects" section for background.
#' @inheritSection tar_map Target objects
#' @inheritSection tar_rep Replicate-specific seeds
#' @inheritParams tar_map2
#' @inheritParams tar_map2_count_raw
#' @examples
Expand Down
1 change: 1 addition & 0 deletions R/tar_map2_count_raw.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#' @return A list of new target objects.
#' See the "Target objects" section for background.
#' @inheritSection tar_map Target objects
#' @inheritSection tar_rep Replicate-specific seeds
#' @param batches Positive integer of length 1,
#' maximum number of batches (dynamic branches within static branches)
#' of the downstream (`command2`) targets. Batches
Expand Down
28 changes: 22 additions & 6 deletions R/tar_map2_raw.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#' @return A list of new target objects.
#' See the "Target objects" section for background.
#' @inheritSection tar_map Target objects
#' @inheritSection tar_rep Replicate-specific seeds
#' @param name Character of length 1, base name of the targets.
#' @param command1 Language object to create named arguments to `command2`.
#' Must return a data frame with one row per call to `command2`.
Expand Down Expand Up @@ -230,17 +231,21 @@ tar_map2_group <- function(data, group) {
tar_map2_run <- function(command, values, columns) {
command <- substitute(command)
columns <- substitute(columns)
splits <- split(values, f = seq_len(nrow(values)))
out <- lapply(
split(values, f = seq_len(nrow(values))),
tar_map2_run_rep,
X = seq_along(splits),
FUN = tar_map2_run_rep,
command = command,
columns = columns
splits = splits,
columns = columns,
reps = length(splits)
)
do.call(vctrs::vec_rbind, out)
}

tar_map2_run_rep <- function(command, values, columns) {
tar_map2_run_rep <- function(rep, command, splits, columns, reps) {
envir <- targets::tar_envir()
values <- splits[[rep]]
names <- names(values)
lapply(
X = seq_len(ncol(values)),
Expand All @@ -257,7 +262,18 @@ tar_map2_run_rep <- function(command, values, columns) {
)
}
)
out <- eval(command, envir = targets::tar_envir())
pedigree <- targets::tar_definition()$pedigree
name <- pedigree$parent
batch <- pedigree$index
seed <- produce_seed_rep(name = name, batch = batch, rep = rep, reps = reps)
out <- withr::with_seed(
seed = seed,
code = eval(command, envir = targets::tar_envir())
)
columns <- targets::tar_tidyselect_eval(columns, colnames(values))
tar_append_static_values(out, values[, columns])
out <- tar_append_static_values(out, values[, columns])
out[["tar_batch"]] <- as.integer(batch)
out[["tar_rep"]] <- as.integer(rep)
out[["tar_seed"]] <- as.integer(seed)
out
}
1 change: 1 addition & 0 deletions R/tar_map2_size.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#' @return A list of new target objects.
#' See the "Target objects" section for background.
#' @inheritSection tar_map Target objects
#' @inheritSection tar_rep Replicate-specific seeds
#' @inheritParams tar_map2
#' @inheritParams tar_map2_size_raw
#' @examples
Expand Down
1 change: 1 addition & 0 deletions R/tar_map2_size_raw.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#' @return A list of new target objects.
#' See the "Target objects" section for background.
#' @inheritSection tar_map Target objects
#' @inheritSection tar_rep Replicate-specific seeds
#' @param size Positive integer of length 1,
#' maximum number of rows in each batch for
#' the downstream (`command2`) targets. Batches
Expand Down
1 change: 1 addition & 0 deletions R/tar_map_rep.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#' @return A list of new target objects.
#' See the "Target objects" section for background.
#' @inheritSection tar_map Target objects
#' @inheritSection tar_rep Replicate-specific seeds
#' @param command R code for a single replicate. Must return
#' a data frame.
#' @param columns A tidyselect expression to select which columns of `values`
Expand Down
12 changes: 10 additions & 2 deletions R/tar_map_rep_raw.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#' @return A list of new target objects.
#' See the "Target objects" section for background.
#' @inheritSection tar_map Target objects
#' @inheritSection tar_rep Replicate-specific seeds
#' @param command Language object, R code for a single replicate. Must return
#' a data frame.
#' @param names Language object with a tidyselect expression
Expand Down Expand Up @@ -92,8 +93,9 @@ tar_map_rep_raw <- function(
targets::tar_assert_scalar(name)
targets::tar_assert_chr(name)
targets::tar_assert_nzchar(name)
targets::tar_assert_df(values %|||% data.frame())
if (!is.null(values)) {
assert_values_list(values)
values <- tibble::as_tibble(values)
targets::tar_assert_ge(nrow(values), 1L)
}
if (!is.null(names)) {
Expand Down Expand Up @@ -131,9 +133,15 @@ tar_map_rep_raw <- function(
retrieval = "main",
cue = cue
)
command_dynamic <- tar_rep_command_target(
command = command,
name_batch = name_batch,
reps = reps,
iteration = "vector"
)
target_dynamic <- targets::tar_target_raw(
name = name,
command = tar_rep_command_target(command, name_batch, reps, "vector"),
command = command_dynamic,
pattern = tar_rep_pattern(name_batch),
packages = packages,
library = library,
Expand Down
6 changes: 3 additions & 3 deletions R/tar_package.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
#' convenient helper functions to create specialized targets, making
#' pipelines in targets easier and cleaner to write and understand.
#' @name tarchetypes-package
#' @importFrom digest digest
#' @importFrom digest digest digest2int
#' @importFrom dplyr bind_rows mutate select
#' @importFrom fs dir_create is_dir path_ext path_ext_remove
#' path_ext_set path_rel
Expand All @@ -24,7 +24,7 @@
#' tar_assert_match tar_assert_nonmissing tar_assert_positive
#' tar_assert_scalar tar_assert_target tar_assert_target_list
#' tar_assert_true tar_assert_unique tar_assert_unique_targets
#' tar_cue tar_deparse_language tar_deparse_safe
#' tar_cue tar_definition tar_deparse_language tar_deparse_safe
#' tar_dir tar_envir tar_exist_meta
#' tar_group tar_load tar_meta tar_option_get tar_read tar_runtime_object
#' tar_script tar_target tar_target_raw tar_test tar_tidy_eval
Expand All @@ -34,7 +34,7 @@
#' last_col matches num_range one_of starts_with
#' @importFrom utils download.file globalVariables head
#' @importFrom vctrs vec_c vec_rbind
#' @importFrom withr local_options with_options
#' @importFrom withr local_options with_options local_seed
NULL

utils::globalVariables(".x")
20 changes: 17 additions & 3 deletions R/tar_quarto_rep_raw.R
Original file line number Diff line number Diff line change
Expand Up @@ -418,7 +418,15 @@ tar_quarto_rep_run <- function(
rm(deps)
gc()
execute_params <- split(execute_params, f = seq_len(nrow(execute_params)))
out <- map(execute_params, ~tar_quarto_rep_rep(args, .x, default_output_file))
out <- map(
seq_along(execute_params),
~tar_quarto_rep_rep(
rep = .x,
args = args,
execute_params = execute_params,
default_output_file = default_output_file
)
)
out <- unname(unlist(out))
support <- sprintf("%s_files", fs::path_ext_remove(basename(args$input)))
extra_files <- if_any(
Expand All @@ -430,13 +438,19 @@ tar_quarto_rep_run <- function(
unique(c(out, args$input, extra_files))
}

tar_quarto_rep_rep <- function(args, execute_params, default_output_file) {
tar_quarto_rep_rep <- function(rep, args, execute_params, default_output_file) {
withr::local_options(list(crayon.enabled = NULL))
pedigree <- targets::tar_definition()$pedigree
name <- pedigree$parent
batch <- pedigree$index
reps <- length(execute_params)
seed <- produce_seed_rep(name = name, batch = batch, rep = rep, reps = reps)
execute_params <- execute_params[[rep]]
args$output_file <- basename(execute_params[["output_file"]])
args$execute_params <- execute_params
args$execute_params[["output_file"]] <- NULL
args$execute_params[["tar_group"]] <- NULL
do.call(quarto::quarto_render, args)
withr::with_seed(seed = seed, code = do.call(quarto::quarto_render, args))
sort(as.character(fs::path_rel(unlist(args$output_file))))
}

Expand Down
21 changes: 17 additions & 4 deletions R/tar_render_rep_raw.R
Original file line number Diff line number Diff line change
Expand Up @@ -253,21 +253,34 @@ tar_render_rep_run <- function(path, params, args, deps) {
rm(deps)
gc()
envir <- parent.frame()
params <- split(params, f = seq_len(nrow(params)))
args$envir <- args$envir %|||% targets::tar_envir(default = envir)
force(args$envir)
unname(unlist(map(params, ~tar_render_rep_rep(path, .x, args))))
params <- split(params, f = seq_len(nrow(params)))
out <- map(
seq_along(params),
~tar_render_rep_rep(path = path, rep = .x, params = params, args = args)
)
unname(unlist(out))
}

tar_render_rep_rep <- function(path, params, args) {
tar_render_rep_rep <- function(rep, path, params, args) {
withr::local_options(list(crayon.enabled = NULL))
pedigree <- targets::tar_definition()$pedigree
name <- pedigree$parent
batch <- pedigree$index
reps <- length(params)
seed <- produce_seed_rep(name = name, batch = batch, rep = rep, reps = reps)
params <- params[[rep]]
default_path <- tar_render_rep_default_path(path, params)
args$output_file <- params[["output_file"]] %|||% default_path
args$params <- params
args$params[["output_file"]] <- NULL
args$params[["tar_group"]] <- NULL
args$intermediates_dir <- fs::dir_create(tempfile())
output <- do.call(rmarkdown::render, args)
output <- withr::with_seed(
seed = seed,
code = do.call(rmarkdown::render, args)
)
tar_render_paths(output, path)
}

Expand Down
33 changes: 30 additions & 3 deletions R/tar_rep.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
#' of heavily dynamically-branched workflows:
#' <https://books.ropensci.org/targets/dynamic.html#batching>.
#' [tar_rep()] replicates a command in strategically sized batches.
#' @export
#' @family branching
#' @details `tar_rep()` and `tar_rep_raw()` each create two targets:
#' an upstream local stem
Expand All @@ -11,15 +12,41 @@
#' Each batch/branch replicates the command a certain number of times.
#' If the command returns a list or data frame, then
#' the targets from `tar_rep()` will try to append new elements/columns
#' `tar_batch` and `tar_rep` to the output
#' to denote the batch and rep-within-batch IDs, respectively.
#' `tar_batch`, `tar_rep`, and `tar_seed` to the output
#' to denote the batch, rep-within-batch index, and rep-specific seed,
#' respectively.
#'
#' Both batches and reps within each batch
#' are aggregated according to the method you specify
#' in the `iteration` argument. If `"list"`, reps and batches
#' are aggregated with `list()`. If `"vector"`,
#' then `vctrs::vec_c()`. If `"group"`, then `vctrs::vec_rbind()`.
#' @export
#' @section Replicate-specific seeds:
#' In ordinary pipelines, each target has its own unique deterministic
#' pseudo-random number generator seed derived from its target name.
#' In batched replicate, however, each batch is a target with multiple
#' replications within that batch. That is why [tar_rep()]
#' and friends give each *replicate* its own unique seed.
#' Each replicate-specific seed is created
#' based on the dynamic parent target name,
#' batch index, and rep-within-batch index,
#' and the seed is set just before the rep runs.
#' Rep-specific seeds are invariant to batching structure. In other words,
#' `tar_rep(name = x, command = rnorm(1), batches = 100, reps = 1, ...)`
#' produces the same numerical output as
#' `tar_rep(name = x, command = rnorm(1), batches = 10, reps = 10, ...)`
#' (but with different batch names).
#' Other target factories with this seed scheme are [tar_rep2()],
#' [tar_map_rep()], [tar_map2_count()], [tar_map2_size()],
#' and [tar_render_rep()].
#' For the `tar_map2_*()` functions,
#' it is possible to manually supply your own seeds
#' through the `command1` argument and then invoke them in your
#' custom code for `command2` (`set.seed()`, `withr::with_seed`,
#' or `withr::local_seed()`). For [tar_render_rep()],
#' custom seeds can be supplied to the `params` argument
#' and then invoked in the individual R Markdown reports.
#' Likewise with [tar_quarto_rep()] and the `execute_params` argument.
#' @return A list of two targets, one upstream and one downstream.
#' The upstream target returns a numeric index of batch ids,
#' and the downstream one dynamically maps over the batch ids
Expand Down
1 change: 1 addition & 0 deletions R/tar_rep2.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#' @return A new target object to perform batched computation.
#' See the "Target objects" section for background.
#' @inheritSection tar_map Target objects
#' @inheritSection tar_rep Replicate-specific seeds
#' @inheritParams targets::tar_target
#' @param ... Symbols to name one or more upstream batched targets
#' created by [tar_rep()].
Expand Down
20 changes: 15 additions & 5 deletions R/tar_rep2_raw.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#' downstream of [tar_rep()].
#' See the "Target objects" section for background.
#' @inheritSection tar_map Target objects
#' @inheritSection tar_rep Replicate-specific seeds
#' @inheritParams targets::tar_target
#' @param targets Character vector of names of upstream batched targets
#' created by [tar_rep()].
Expand Down Expand Up @@ -132,16 +133,25 @@ tar_rep2_run <- function(command, batches, iteration) {
seq_len(reps),
tar_rep2_run_rep,
command = command,
batches = batches
batches = batches,
reps = reps
)
tar_rep_bind(out, iteration)
}

tar_rep2_run_rep <- function(index, command, batches) {
tar_rep2_run_rep <- function(index, command, batches, reps) {
name <- targets::tar_definition()$pedigree$parent
slice <- slice_batches(batches, index)
out <- eval(command, envir = slice, enclos = targets::tar_envir())
out$tar_batch <- slice[[1]]$tar_batch[1]
out$tar_rep <- slice[[1]]$tar_rep[1]
batch <- slice[[1]]$tar_batch[1]
rep <- slice[[1]]$tar_rep[1]
seed <- produce_seed_rep(name = name, batch = batch, rep = rep, reps = reps)
out <- withr::with_seed(
seed = seed,
code = eval(command, envir = slice, enclos = targets::tar_envir())
)
out$tar_batch <- batch
out$tar_rep <- rep
out$tar_seed <- seed
out
}

Expand Down
Loading

0 comments on commit 74969b2

Please sign in to comment.