Passing arguments to callr::r_bg()
bakerrr uses callr::r_bg()
under the
hood. All r_bg()
arguments can be passed via the
bg_args
argument in bakerrr()
Setup a sample function and its args_list
# Example: Data processing pipeline
process_data <- function(dataset_id, filter_col, threshold) {
# Input validation
if (is.null(dataset_id) || !is.character(dataset_id)) {
stop("Invalid or missing dataset_id: must be character and non-null.")
}
if (!filter_col %in% c("score", "value", "rating")) {
stop("filter_col must be one of 'score', 'value', or 'rating'.")
}
if (!is.numeric(threshold) || threshold <= 0) {
stop("threshold must be positive numeric.")
}
# Simulate loading data
set.seed(as.integer(sub("DS_", "", dataset_id))) # Reproducible per job
n <- sample(100:500, 1)
data <- tibble::tibble(
id = seq_len(n),
score = round(runif(n, 0, 100)),
value = round(rnorm(n, 50, 15)),
rating = sample(1:10, n, replace = TRUE)
)
# Apply filter
filtered <- dplyr::filter(data, data[[filter_col]] >= threshold)
Sys.sleep(runif(1, 0.5, 2)) # Simulate processing time
list(
dataset_id = dataset_id,
total_records = n,
filtered_records = nrow(filtered),
filter_applied = paste(filter_col, ">=", threshold),
timestamp = Sys.time(),
preview = head(filtered, 3)
)
}
# Generate robust argument sets
set.seed(42)
args_list <- purrr::map(1:12, ~ list(
dataset_id = paste0("DS_", .x),
filter_col = sample(c("score", "value", "rating"), 1),
threshold = sample(50:95, 1)
))
# Add intentional error cases
args_list[[5]] <- list(
dataset_id = NULL, # error: dataset_id missing
filter_col = "score",
threshold = 75
)
args_list[[7]] <- list( # error: invalid filter_col
dataset_id = "DS_7",
filter_col = "unknown_col",
threshold = 80
)
args_list[[9]] <- list( # error: invalid threshold
dataset_id = "DS_9",
filter_col = "score",
threshold = -15
)
Write outputs and errors to file while running jobs
When initiating the bakerrr with bakerrr::bakerrr()
,
pass any callr::r_bg()
arguments to bg_args
argument as below:
stirr_job <- bakerrr::bakerrr(
fun = process_data,
args_list = args_list,
n_daemons = min(4, length(args_list)),
# Note how parameters are passed to callr::r_bg()
bg_args = list(
stdout = "bakerrr_out.log",
stderr = "bakerrr_err.log"
)
)
# Execute with real-time monitoring
stirr_job <- stirr_job |>
bakerrr::run_jobs(wait_for_results = TRUE)
# Get results
stirr_job@results