From a818ea83a12b3164c1a3f4622b0e4fa05364ebd2 Mon Sep 17 00:00:00 2001
From: Sebastian Fischer <sebf.fischer@gmail.com>
Date: Wed, 31 Jan 2024 08:36:58 +0100
Subject: [PATCH] ...

---
 R/LearnerTorchMLP.R         | 46 +++++++++++++++++++++++--------------
 R/preprocess.R              |  4 ++++
 R/task_dataset.R            |  2 +-
 R/utils.R                   | 17 --------------
 R/zzz.R                     |  2 +-
 benchmarks/dataset.R        | 11 +++++++++
 man/mlr_learners.mlp.Rd     | 14 +++++------
 tests/testthat/test_shape.R |  2 ++
 8 files changed, 55 insertions(+), 43 deletions(-)
 create mode 100644 benchmarks/dataset.R

diff --git a/R/LearnerTorchMLP.R b/R/LearnerTorchMLP.R
index 767484c5..e3a0c108 100644
--- a/R/LearnerTorchMLP.R
+++ b/R/LearnerTorchMLP.R
@@ -9,7 +9,7 @@
 #'
 #' @description
 #' Fully connected feed forward network with dropout after each activation function.
-#' The features can either be a single [`lazy_tensor`] or one or more numeric columns.
+#' The features can either be a single [`lazy_tensor`] or one or more numeric columns (but not both).
 #'
 #' @section Parameters:
 #' Parameters from [`LearnerTorch`], as well as:
@@ -20,14 +20,14 @@
 #'   A named list with initialization arguments for the activation function.
 #'   This is intialized to an empty list.
 #' * `neurons` :: `integer()`\cr
-#'   The number of neurons per hidden layer.
-#'   By default there is no hidden layer.
+#'   The number of neurons per hidden layer. By default there is no hidden layer.
+#'   Setting this to `c(10, 20)` would have a the first hidden layer with 10 neurons and the second with 20.
 #' * `p` :: `numeric(1)`\cr
-#'   The dropout probability.
-#'   Is initialized to `0.5`.
+#'   The dropout probability. Is initialized to `0.5`.
 #' * `shape` :: `integer()` or `NULL`\cr
-#'   The input shape.
-#'   Only needs to be present specified when there is a lazy tensor input with unknown shape.
+#'   The input shape of length 2, e.g. `c(NA, 5)`.
+#'   Only needs to be present when there is a lazy tensor input with unknown shape (`NULL`).
+#'   Otherwise the input shape is inferred from the number of numeric features.
 #'
 #' @export
 LearnerTorchMLP = R6Class("LearnerTorchMLP",
@@ -38,21 +38,22 @@ LearnerTorchMLP = R6Class("LearnerTorchMLP",
     initialize = function(task_type, optimizer = NULL, loss = NULL, callbacks = list()) {
       check_activation = crate(function(x) check_class(x, "nn_module"), .parent = topenv())
       check_activation_args = crate(function(x) check_list(x, names = "unique"), .parent = topenv())
+      check_neurons = crate(function(x) check_integerish(x, any.missing = FALSE, lower = 1), .parent = topenv())
+      cehck_shape = crate(function(x) check_shape(x, null_ok = TRUE, len = 2L), .parent = topenv())
+
       param_set = ps(
-        neurons         = p_uty(default = integer(0), tags = "train", custom_check = crate(function(x) {
-          check_integerish(x, any.missing = FALSE, lower = 1)
-        })),
+        neurons         = p_uty(tags = c("train", "predict"), custom_check = check_neurons),
         p               = p_dbl(lower = 0, upper = 1, tags = c("required", "train")),
         activation      = p_uty(tags = c("required", "train"), custom_check = check_activation),
         activation_args = p_uty(tags = c("required", "train"), custom_check = check_activation_args),
-        shape           = p_uty(default = NULL, tags = "train", custom_check = crate(function(x) {
-            check_shape(x, null_ok = TRUE)
-        }, .parent = topenv()))
+        shape           = p_uty(tags = "train", custom_check = check_shape)
       )
+
       param_set$set_values(
         activation = nn_relu,
         p = 0.5,
-        activation_args = list()
+        activation_args = list(),
+        neurons = integer(0)
       )
       properties = switch(task_type,
         regr = character(0),
@@ -78,7 +79,7 @@ LearnerTorchMLP = R6Class("LearnerTorchMLP",
       # verify_train_task was already called beforehand, so we can make some assumptions
       d_out = get_nout(task)
       d_in = if (single_lazy_tensor(task)) {
-        get_unique_shape(task, param_vals$shape)[2L]
+        private$.get_input_shape(task, param_vals$shape)[2L]
       } else {
         length(task$feature_names)
       }
@@ -87,7 +88,7 @@ LearnerTorchMLP = R6Class("LearnerTorchMLP",
     },
     .dataset = function(task, param_vals) {
       if (single_lazy_tensor(task)) {
-        param_vals$shape = get_unique_shape(task, param_vals$shape)
+        param_vals$shape = private$.get_input_shape(task, param_vals$shape)
         dataset_ltnsr(task, param_vals)
       } else {
         dataset_num(task, param_vals)
@@ -99,9 +100,20 @@ LearnerTorchMLP = R6Class("LearnerTorchMLP",
       assert(check_true(lazy_tensor_input), check_false(some(features, function(x) x == "lazy_tensor")))
 
       if (lazy_tensor_input) {
-        shape = get_unique_shape(task, param_vals$shape)
+        shape = private$.get_input_shape(task, param_vals$shape)
         assert_shape(shape, len = 2L)
       }
+    },
+    .get_input_shape = function(s1, s2) {
+      if (test_class(s1, "Task")) {
+        assert_true(identical(s1$feature_types[, "type"][[1L]], "lazy_tensor"))
+        s1 = dd(s1$data(s1$row_roles$use[1L], s1$feature_names)[[1L]])$pointer_shape
+      }
+      assert_shape(s1, null_ok = TRUE)
+      assert_shape(s2, null_ok = TRUE)
+      s = unique(discard(list(s1, s2), is.null))
+      assert_true(length(s) == 1L)
+      s[[1L]]
     }
   )
 )
diff --git a/R/preprocess.R b/R/preprocess.R
index e19d21cf..0d52fdea 100644
--- a/R/preprocess.R
+++ b/R/preprocess.R
@@ -37,6 +37,10 @@ unchanged_shapes_image = function(shapes_in, param_vals, task) {
   shapes_in
 }
 
+unchanged_shapes = function(shapes_in, param_vals, task) {
+  shapes_in
+}
+
 #' @title PipeOpPreprocTorchTrafoNop
 #' @usage NULL
 #' @name mlr_pipeops_preproc_torch.trafo_nop
diff --git a/R/task_dataset.R b/R/task_dataset.R
index c47bbeda..f0bf678e 100644
--- a/R/task_dataset.R
+++ b/R/task_dataset.R
@@ -165,7 +165,7 @@ dataset_num = function(task, param_vals) {
   names(ingress) = "input"
   task_dataset(
     task = task,
-    feature_ingress_tokens = ingress,
+    feature_ingress_tokens = md$ingress,
     target_batchgetter = get_target_batchgetter(task$task_type),
     device = param_vals$device
   )
diff --git a/R/utils.R b/R/utils.R
index d985b141..15ac4f03 100644
--- a/R/utils.R
+++ b/R/utils.R
@@ -194,20 +194,3 @@ auto_cache_lazy_tensors = function(lts) {
   any(duplicated(map_chr(lts, function(x) dd(x)$dataset_hash)))
 }
 
-unchanged_shapes = function(shapes_in, param_vals, task) {
-  shapes_in
-}
-
-get_unique_shape = function(s1, s2) {
-  if (test_class(s1, "Task")) {
-    assert_true(identical(s1$feature_types[, "type"][[1L]], "lazy_tensor"))
-    s1 = dd(s1$data(s1$row_roles$use[1L], s1$feature_names)[[1L]])$pointer_shape
-  }
-  assert_shape(s1, null_ok = TRUE)
-  assert_shape(s2, null_ok = TRUE)
-  s = unique(discard(list(s1, s2), is.null))
-  assert_true(length(s) == 1L)
-  s[[1L]]
-}
-
-assert_compatible_shapes = get_unique_shape
diff --git a/R/zzz.R b/R/zzz.R
index 8f7f09e4..2cdfaf59 100644
--- a/R/zzz.R
+++ b/R/zzz.R
@@ -95,9 +95,9 @@ register_mlr3 = function() {
 
 register_mlr3pipelines = function() {
   mlr_pipeops = utils::getFromNamespace("mlr_pipeops", ns = "mlr3pipelines")
+  add = mlr_pipeops$add # nolint
   iwalk(as.list(mlr3torch_pipeops), function(value, name) {
     # metainf is quoted by pipelines
-    add = mlr_pipeops$add
     eval(call("add", quote(name), quote(value$constructor), value$metainf))
   })
   mlr_reflections$pipeops$valid_tags = unique(c(mlr_reflections$pipeops$valid_tags, mlr3torch_pipeop_tags))
diff --git a/benchmarks/dataset.R b/benchmarks/dataset.R
new file mode 100644
index 00000000..bc227ea4
--- /dev/null
+++ b/benchmarks/dataset.R
@@ -0,0 +1,11 @@
+devtools::load_all("~/mlr/mlr3")
+devtools::load_all("~/mlr/mlr3torch")
+
+lazy_iris = tsk("lazy_iris")
+dt = lazy_iris$data(cols = "x")$x
+dataset = dt[[1L]][[2L]]$dataset
+
+dt = do.call(c, args = lapply(1:1000, function(i) dt))
+
+
+profvis::profvis({materialize_internal(dt, rbind = TRUE)})
diff --git a/man/mlr_learners.mlp.Rd b/man/mlr_learners.mlp.Rd
index a18ef202..fcdb2ef2 100644
--- a/man/mlr_learners.mlp.Rd
+++ b/man/mlr_learners.mlp.Rd
@@ -6,7 +6,7 @@
 \title{My Little Pony}
 \description{
 Fully connected feed forward network with dropout after each activation function.
-The features can either be a single \code{\link{lazy_tensor}} or one or more numeric columns.
+The features can either be a single \code{\link{lazy_tensor}} or one or more numeric columns (but not both).
 }
 \section{Dictionary}{
 
@@ -53,14 +53,14 @@ The activation function. Is initialized to \code{\link{nn_relu}}.
 A named list with initialization arguments for the activation function.
 This is intialized to an empty list.
 \item \code{neurons} :: \code{integer()}\cr
-The number of neurons per hidden layer.
-By default there is no hidden layer.
+The number of neurons per hidden layer. By default there is no hidden layer.
+Setting this to \code{c(10, 20)} would have a the first hidden layer with 10 neurons and the second with 20.
 \item \code{p} :: \code{numeric(1)}\cr
-The dropout probability.
-Is initialized to \code{0.5}.
+The dropout probability. Is initialized to \code{0.5}.
 \item \code{shape} :: \code{integer()} or \code{NULL}\cr
-The input shape.
-Only needs to be present specified when there is a lazy tensor input with unknown shape.
+The input shape of length 2, e.g. \code{c(NA, 5)}.
+Only needs to be present when there is a lazy tensor input with unknown shape (\code{NULL}).
+Otherwise the input shape is inferred from the number of numeric features.
 }
 }
 
diff --git a/tests/testthat/test_shape.R b/tests/testthat/test_shape.R
index e5a68d8f..dbafcb4d 100644
--- a/tests/testthat/test_shape.R
+++ b/tests/testthat/test_shape.R
@@ -19,4 +19,6 @@ test_that("assert_shape and friends", {
   expect_error(assert_shapes(list(c(1, 2), c(2, 3))), regexp = NA)
   expect_error(assert_shapes(list(c(4, 5), c(2, 3)), unknown_batch = TRUE))
   expect_error(assert_shape(c(NA, 1, 2), len = 2))
+  # NULL is ok even when len is specified
+  expect_true(check_shape(NULL, null_ok = TRUE, len = 2))
 })