Skip to content

Commit

Permalink
Options for parallelization (#813)
Browse files Browse the repository at this point in the history
  • Loading branch information
mllg authored Jul 20, 2022
1 parent 2dcea2f commit a791a21
Show file tree
Hide file tree
Showing 7 changed files with 35 additions and 46 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: mlr3
Title: Machine Learning in R - Next Generation
Version: 0.13.3-9000
Version: 0.13.4
Authors@R:
c(person(given = "Michel",
family = "Lang",
Expand Down
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
* Added `head()` and `tail()` methods for `Task`.
* Improved printing of multiple objects.


# mlr3 0.13.3

* Most objects now have a new (optional) field `label`, i.e. `Task`,
Expand Down
25 changes: 4 additions & 21 deletions R/benchmark.R
Original file line number Diff line number Diff line change
Expand Up @@ -158,27 +158,10 @@ benchmark = function(design, store_models = FALSE, store_backends = TRUE, encaps
set(grid, j = "mode", value = hotstart_grid$mode)
}

if (getOption("mlr3.debug", FALSE)) {
lg$info("Running benchmark() sequentially in debug mode with %i iterations", n)

res = mapply(workhorse,
task = grid$task, learner = grid$learner, resampling = grid$resampling, iteration = grid$iteration,
mode = grid$mode,
MoreArgs = list(store_models = store_models, lgr_threshold = lgr_threshold, pb = pb),
SIMPLIFY = FALSE, USE.NAMES = FALSE
)
} else {
lg$debug("Running benchmark() via future with %i iterations", n)

res = future.apply::future_mapply(workhorse,
task = grid$task, learner = grid$learner, resampling = grid$resampling, iteration = grid$iteration,
mode = grid$mode,
MoreArgs = list(store_models = store_models, lgr_threshold = lgr_threshold, pb = pb),
SIMPLIFY = FALSE, USE.NAMES = FALSE, future.globals = FALSE,
future.scheduling = structure(TRUE, ordering = "random"), future.packages = "mlr3", future.seed = TRUE,
future.stdout = future_stdout()
)
}
res = future_map(n, workhorse,
task = grid$task, learner = grid$learner, resampling = grid$resampling, iteration = grid$iteration, mode = grid$mode,
MoreArgs = list(store_models = store_models, lgr_threshold = lgr_threshold, pb = pb)
)

grid = insert_named(grid, list(
learner_state = map(res, "learner_state"),
Expand Down
18 changes: 14 additions & 4 deletions R/helper_exec.R
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,20 @@ set_encapsulation = function(learners, encapsulate) {
learners
}

future_stdout = function() {
if (inherits(plan(), "sequential")) {
NA
future_map = function(n, FUN, ..., MoreArgs = list()) {
if (getOption("mlr3.debug", FALSE)) {
lg$info("Running experiments sequentially in debug mode with %i iterations", n)
mapply(FUN, ..., MoreArgs = MoreArgs, SIMPLIFY = FALSE, USE.NAMES = FALSE)
} else {
TRUE
is_sequential = inherits(plan(), "sequential")
scheduling = if (!is_sequential && isTRUE(getOption("mlr3.exec_random", TRUE))) structure(TRUE, ordering = "random") else TRUE
chunk_size = getOption("mlr3.exec_chunk_size", 1)
stdout = if (is_sequential) NA else TRUE

lg$debug("Running resample() via future with %n iterations", n)
future.apply::future_mapply(
FUN, ..., MoreArgs = MoreArgs, SIMPLIFY = FALSE, USE.NAMES = FALSE,
future.globals = FALSE, future.packages = "mlr3", future.seed = TRUE,
future.scheduling = scheduling, future.chunk.size = chunk_size, future.stdout = stdout)
}
}
21 changes: 3 additions & 18 deletions R/resample.R
Original file line number Diff line number Diff line change
Expand Up @@ -105,24 +105,9 @@ resample = function(task, learner, resampling, store_models = FALSE, store_backe
data.table(learner = replicate(n, learner), mode = "train")
}

if (getOption("mlr3.debug", FALSE)) {
lg$info("Running resample() sequentially in debug mode with %i iterations", n)
res = mapply(workhorse,
iteration = seq_len(n), learner = grid$learner, mode = grid$mode,
MoreArgs = list(task = task, resampling = resampling, store_models = store_models, lgr_threshold = lgr_threshold,
pb = pb), SIMPLIFY = FALSE
)
} else {
lg$debug("Running resample() via future with %i iterations", n)

res = future.apply::future_mapply(workhorse,
iteration = seq_len(n), learner = grid$learner, mode = grid$mode,
MoreArgs = list(task = task, resampling = resampling, store_models = store_models, lgr_threshold = lgr_threshold,
pb = pb),
SIMPLIFY = FALSE, future.globals = FALSE, future.scheduling = structure(TRUE, ordering = "random"),
future.packages = "mlr3", future.seed = TRUE, future.stdout = future_stdout()
)
}
res = future_map(n, workhorse, iteration = seq_len(n), learner = grid$learner, mode = grid$mode,
MoreArgs = list(task = task, resampling = resampling, store_models = store_models, lgr_threshold = lgr_threshold, pb = pb)
)

data = data.table(
task = list(task),
Expand Down
7 changes: 6 additions & 1 deletion R/zzz.R
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,14 @@
#' * Encapsulated evaluation: \CRANpkg{evaluate}, \CRANpkg{callr} (external process)
#'
#' @section Package Options:
#' * `"mlr3.exec_random"`: Randomize the order of execution in [resample()] and [benchmark()] during
#' parallelization with \CRANpkg{future}. Defaults to `TRUE`.
#' Note that this does not affect the order of results.
#' * `"mlr3.exec_chunk_size"`: Number of iterations to perform in a single [future::future()] during
#' parallelization with \CRANpkg{future}. Defaults to 1.
#' * `"mlr3.debug"`: If set to `TRUE`, parallelization via \CRANpkg{future} is disabled to simplify
#' debugging and provide more concise tracebacks.
#' Note that results computed with debug mode enabled use a different seeding mechanism and are not reproducible.
#' Note that results computed in debug mode use a different seeding mechanism and are **not reproducible**.
#' * `"mlr3.allow_utf8_names"`: If set to `TRUE`, checks on the feature names are relaxed, allowing
#' non-ascii characters in column names. This is an experimental and temporal option to
#' pave the way for text analysis, and will likely be removed in a future version of the package.
Expand Down
7 changes: 6 additions & 1 deletion man/mlr3-package.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit a791a21

Please sign in to comment.