Skip to content

Commit

Permalink
...
Browse files Browse the repository at this point in the history
  • Loading branch information
sebffischer committed Jan 31, 2024
1 parent 9c88bd3 commit a818ea8
Show file tree
Hide file tree
Showing 8 changed files with 55 additions and 43 deletions.
46 changes: 29 additions & 17 deletions R/LearnerTorchMLP.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
#'
#' @description
#' Fully connected feed forward network with dropout after each activation function.
#' The features can either be a single [`lazy_tensor`] or one or more numeric columns.
#' The features can either be a single [`lazy_tensor`] or one or more numeric columns (but not both).
#'
#' @section Parameters:
#' Parameters from [`LearnerTorch`], as well as:
Expand All @@ -20,14 +20,14 @@
#' A named list with initialization arguments for the activation function.
#' This is intialized to an empty list.
#' * `neurons` :: `integer()`\cr
#' The number of neurons per hidden layer.
#' By default there is no hidden layer.
#' The number of neurons per hidden layer. By default there is no hidden layer.
#' Setting this to `c(10, 20)` would have a the first hidden layer with 10 neurons and the second with 20.
#' * `p` :: `numeric(1)`\cr
#' The dropout probability.
#' Is initialized to `0.5`.
#' The dropout probability. Is initialized to `0.5`.
#' * `shape` :: `integer()` or `NULL`\cr
#' The input shape.
#' Only needs to be present specified when there is a lazy tensor input with unknown shape.
#' The input shape of length 2, e.g. `c(NA, 5)`.
#' Only needs to be present when there is a lazy tensor input with unknown shape (`NULL`).
#' Otherwise the input shape is inferred from the number of numeric features.
#'
#' @export
LearnerTorchMLP = R6Class("LearnerTorchMLP",
Expand All @@ -38,21 +38,22 @@ LearnerTorchMLP = R6Class("LearnerTorchMLP",
initialize = function(task_type, optimizer = NULL, loss = NULL, callbacks = list()) {
check_activation = crate(function(x) check_class(x, "nn_module"), .parent = topenv())
check_activation_args = crate(function(x) check_list(x, names = "unique"), .parent = topenv())
check_neurons = crate(function(x) check_integerish(x, any.missing = FALSE, lower = 1), .parent = topenv())
cehck_shape = crate(function(x) check_shape(x, null_ok = TRUE, len = 2L), .parent = topenv())

param_set = ps(
neurons = p_uty(default = integer(0), tags = "train", custom_check = crate(function(x) {
check_integerish(x, any.missing = FALSE, lower = 1)
})),
neurons = p_uty(tags = c("train", "predict"), custom_check = check_neurons),
p = p_dbl(lower = 0, upper = 1, tags = c("required", "train")),
activation = p_uty(tags = c("required", "train"), custom_check = check_activation),
activation_args = p_uty(tags = c("required", "train"), custom_check = check_activation_args),
shape = p_uty(default = NULL, tags = "train", custom_check = crate(function(x) {
check_shape(x, null_ok = TRUE)
}, .parent = topenv()))
shape = p_uty(tags = "train", custom_check = check_shape)
)

param_set$set_values(
activation = nn_relu,
p = 0.5,
activation_args = list()
activation_args = list(),
neurons = integer(0)
)
properties = switch(task_type,
regr = character(0),
Expand All @@ -78,7 +79,7 @@ LearnerTorchMLP = R6Class("LearnerTorchMLP",
# verify_train_task was already called beforehand, so we can make some assumptions
d_out = get_nout(task)
d_in = if (single_lazy_tensor(task)) {
get_unique_shape(task, param_vals$shape)[2L]
private$.get_input_shape(task, param_vals$shape)[2L]
} else {
length(task$feature_names)
}
Expand All @@ -87,7 +88,7 @@ LearnerTorchMLP = R6Class("LearnerTorchMLP",
},
.dataset = function(task, param_vals) {
if (single_lazy_tensor(task)) {
param_vals$shape = get_unique_shape(task, param_vals$shape)
param_vals$shape = private$.get_input_shape(task, param_vals$shape)
dataset_ltnsr(task, param_vals)
} else {
dataset_num(task, param_vals)
Expand All @@ -99,9 +100,20 @@ LearnerTorchMLP = R6Class("LearnerTorchMLP",
assert(check_true(lazy_tensor_input), check_false(some(features, function(x) x == "lazy_tensor")))

if (lazy_tensor_input) {
shape = get_unique_shape(task, param_vals$shape)
shape = private$.get_input_shape(task, param_vals$shape)
assert_shape(shape, len = 2L)
}
},
.get_input_shape = function(s1, s2) {
if (test_class(s1, "Task")) {
assert_true(identical(s1$feature_types[, "type"][[1L]], "lazy_tensor"))
s1 = dd(s1$data(s1$row_roles$use[1L], s1$feature_names)[[1L]])$pointer_shape
}
assert_shape(s1, null_ok = TRUE)
assert_shape(s2, null_ok = TRUE)
s = unique(discard(list(s1, s2), is.null))
assert_true(length(s) == 1L)
s[[1L]]
}
)
)
Expand Down
4 changes: 4 additions & 0 deletions R/preprocess.R
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,10 @@ unchanged_shapes_image = function(shapes_in, param_vals, task) {
shapes_in
}

unchanged_shapes = function(shapes_in, param_vals, task) {
shapes_in
}

#' @title PipeOpPreprocTorchTrafoNop
#' @usage NULL
#' @name mlr_pipeops_preproc_torch.trafo_nop
Expand Down
2 changes: 1 addition & 1 deletion R/task_dataset.R
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ dataset_num = function(task, param_vals) {
names(ingress) = "input"
task_dataset(
task = task,
feature_ingress_tokens = ingress,
feature_ingress_tokens = md$ingress,
target_batchgetter = get_target_batchgetter(task$task_type),
device = param_vals$device
)
Expand Down
17 changes: 0 additions & 17 deletions R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -194,20 +194,3 @@ auto_cache_lazy_tensors = function(lts) {
any(duplicated(map_chr(lts, function(x) dd(x)$dataset_hash)))
}

unchanged_shapes = function(shapes_in, param_vals, task) {
shapes_in
}

get_unique_shape = function(s1, s2) {
if (test_class(s1, "Task")) {
assert_true(identical(s1$feature_types[, "type"][[1L]], "lazy_tensor"))
s1 = dd(s1$data(s1$row_roles$use[1L], s1$feature_names)[[1L]])$pointer_shape
}
assert_shape(s1, null_ok = TRUE)
assert_shape(s2, null_ok = TRUE)
s = unique(discard(list(s1, s2), is.null))
assert_true(length(s) == 1L)
s[[1L]]
}

assert_compatible_shapes = get_unique_shape
2 changes: 1 addition & 1 deletion R/zzz.R
Original file line number Diff line number Diff line change
Expand Up @@ -95,9 +95,9 @@ register_mlr3 = function() {

register_mlr3pipelines = function() {
mlr_pipeops = utils::getFromNamespace("mlr_pipeops", ns = "mlr3pipelines")
add = mlr_pipeops$add # nolint
iwalk(as.list(mlr3torch_pipeops), function(value, name) {
# metainf is quoted by pipelines
add = mlr_pipeops$add
eval(call("add", quote(name), quote(value$constructor), value$metainf))
})
mlr_reflections$pipeops$valid_tags = unique(c(mlr_reflections$pipeops$valid_tags, mlr3torch_pipeop_tags))
Expand Down
11 changes: 11 additions & 0 deletions benchmarks/dataset.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
devtools::load_all("~/mlr/mlr3")
devtools::load_all("~/mlr/mlr3torch")

lazy_iris = tsk("lazy_iris")
dt = lazy_iris$data(cols = "x")$x
dataset = dt[[1L]][[2L]]$dataset

dt = do.call(c, args = lapply(1:1000, function(i) dt))


profvis::profvis({materialize_internal(dt, rbind = TRUE)})
14 changes: 7 additions & 7 deletions man/mlr_learners.mlp.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions tests/testthat/test_shape.R
Original file line number Diff line number Diff line change
Expand Up @@ -19,4 +19,6 @@ test_that("assert_shape and friends", {
expect_error(assert_shapes(list(c(1, 2), c(2, 3))), regexp = NA)
expect_error(assert_shapes(list(c(4, 5), c(2, 3)), unknown_batch = TRUE))
expect_error(assert_shape(c(NA, 1, 2), len = 2))
# NULL is ok even when len is specified
expect_true(check_shape(NULL, null_ok = TRUE, len = 2))
})

0 comments on commit a818ea8

Please sign in to comment.