Skip to content

Commit

Permalink
rcmdcheck passes
Browse files Browse the repository at this point in the history
  • Loading branch information
sebffischer committed Jan 22, 2024
1 parent 5849806 commit da9b40d
Show file tree
Hide file tree
Showing 42 changed files with 396 additions and 264 deletions.
6 changes: 6 additions & 0 deletions .github/dependabot.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
version: 2
updates:
- package-ecosystem: "github-actions"
directory: "/"
schedule:
interval: "weekly"
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,6 @@ Imports:
data.table,
paradox (>= 0.11.0),
R6,
torchvision,
withr
Suggests:
callr,
Expand All @@ -61,6 +60,7 @@ Suggests:
rmarkdown,
viridis,
testthat (>= 3.0.0),
torchvision,
zip
Remotes:
r-lib/zip,
Expand Down
3 changes: 1 addition & 2 deletions R/DataDescriptor.R
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
#' Character vector that must have the same length as the input of the graph.
#' Specifies how the data from the `dataset` is fed into the preprocessing graph.
#' @param pointer (`character(2)` | `NULL`)\cr
#' Indicating an element on which a model is. Points to an output channel within `graph`:
#' Points to an output channel within `graph`:
#' Element 1 is the `PipeOp`'s id and element 2 is that `PipeOp`'s output channel.
#' @param pointer_shape (`integer` | `NULL`)\cr
#' Shape of the output indicated by `pointer`.
Expand Down Expand Up @@ -111,7 +111,6 @@ DataDescriptor = R6Class("DataDescriptor",
# compilation
dataset_hash = calculate_hash(address(dataset))


self$dataset = dataset
self$graph = graph
self$dataset_shapes = dataset_shapes
Expand Down
40 changes: 30 additions & 10 deletions R/PipeOpTaskPreprocTorch.R
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,9 @@
#'
#' @template param_id
#' @template param_param_vals
#' @param fn (`function`)\cr
#' @param fn (`function` or `character(2)`)\cr
#' The preprocessing function. Should not modify its input in-place.
#' If it is a `character(2)`, the first element should be the namespace and thje second element the name.
#' @param packages (`character()`)\cr
#' The packages the preprocessing function depends on.
#' @param param_set ([`ParamSet`])\cr
Expand Down Expand Up @@ -178,7 +179,8 @@ PipeOpTaskPreprocTorch = R6Class("PipeOpTaskPreprocTorch",
#' Creates a new instance of this [`R6`][R6::R6Class] class.
initialize = function(fn, id = "preproc_torch", param_vals = list(), param_set = ps(), packages = character(0),
stages_init = "both", rowwise = FALSE) { # nolint
private$.fn = assert_function(fn)
assert(check_function(fn), check_character(fn, len = 2L))
private$.fn = fn
private$.rowwise = assert_flag(rowwise)

param_set = assert_param_set(param_set$clone(deep = TRUE))
Expand Down Expand Up @@ -249,6 +251,9 @@ PipeOpTaskPreprocTorch = R6Class("PipeOpTaskPreprocTorch",
#' The preprocessing function.
fn = function(rhs) {
assert_ro_binding(rhs)
if (test_character(private$.fn)) {
private$.fn = getFromNamespace(private$.fn[2L], private$.fn[1L])
}
private$.fn
},
#' @field rowwise
Expand Down Expand Up @@ -296,7 +301,7 @@ PipeOpTaskPreprocTorch = R6Class("PipeOpTaskPreprocTorch",
param_vals$affect_columns = NULL
stages = param_vals$stages
param_vals$stages = NULL
trafo = private$.fn
trafo = self$fn

fn = if (identical(stages, "both") || stage %in% stages) {
if (length(param_vals)) {
Expand Down Expand Up @@ -355,7 +360,7 @@ PipeOpTaskPreprocTorch = R6Class("PipeOpTaskPreprocTorch",
.additional_phash_input = function() {
list(
self$param_set$ids(), self$packages,
formals(private$.fn), body(private$.fn), address(environment(private$.fn))
formals(self$fn), body(self$fn), address(environment(self$fn))
)
},
.shapes_out = function(shapes_in, param_vals, task) list(NULL),
Expand All @@ -370,7 +375,7 @@ PipeOpTaskPreprocTorch = R6Class("PipeOpTaskPreprocTorch",
#' @param param_vals (`list()`)\cr
#' The parameter values.
#' @export
pipeop_preproc_torch = function(id, fn, shapes_out, param_set = NULL, param_vals = list(), packages = character(0),
pipeop_preproc_torch = function(id, fn, shapes_out = NULL, param_set = NULL, param_vals = list(), packages = character(0),
rowwise = FALSE, parent_env = parent.frame()) {
pipeop_preproc_torch_class(
id = id,
Expand All @@ -385,8 +390,6 @@ pipeop_preproc_torch = function(id, fn, shapes_out, param_set = NULL, param_vals


create_ps = function(fn) {
# TODO: could simplify this as we don't need the expression anymore
missing = alist(x = )$x
fmls = formals(fn)
param_names = names(fmls)
# we assume the firs argument is for the tensor
Expand Down Expand Up @@ -436,7 +439,7 @@ create_ps = function(fn) {
#' @export
#' @returns An [`R6Class`][R6::R6Class] instance inheriting from [`PipeOpTaskPreprocTorch`]
#' @examples
#' po_example = pipeop_preproc_torch("preproc_example", function(x, a) x + a)
#' po_example = pipeop_preproc_torch("preproc_example", function(x, a) x + a, )
#' po_example
#' po_example$param_set
pipeop_preproc_torch_class = function(id, fn, shapes_out, param_set = NULL, packages = character(0),
Expand All @@ -445,6 +448,8 @@ pipeop_preproc_torch_class = function(id, fn, shapes_out, param_set = NULL, pack
check_function(shapes_out, args = c("shapes_in", "param_vals", "task"), null.ok = TRUE),
check_choice(shapes_out, c("infer", "unchanged"))
)

# we e.g. want torchvision in suggests, so we cannot already access the function.
if (identical(shapes_out, "infer")) {
shapes_out = crate(function(shapes_in, param_vals, task) {
sin = shapes_in[[1L]]
Expand All @@ -457,7 +462,7 @@ pipeop_preproc_torch_class = function(id, fn, shapes_out, param_set = NULL, pack
sin = sin[-1L]
}
tensor_in = invoke(torch_empty, .args = sin, device = torch_device("meta"))
tensor_out = tryCatch(invoke(private$.fn, tensor_in, .args = param_vals),
tensor_out = tryCatch(invoke(self$fn, tensor_in, .args = param_vals),
error = function(e) {
stopf("Failed to infer output shape, presumably invalid input shape; error message is: %s", e)
}
Expand Down Expand Up @@ -485,9 +490,19 @@ pipeop_preproc_torch_class = function(id, fn, shapes_out, param_set = NULL, pack
param_set = param_set %??% create_ps(fn)

stages_init = if (startsWith(id, "augment_")) "train" else "both"

# the .__construction info construct is used to not having to rely on NSE
init_fun = crate(function(id = id, param_vals = list()) { # nolint
info = private$.__construction_info
param_set = info$param_set$clone(deep = TRUE)
fn = info$fn
if (is.character(fn)) {
fn = getFromNamespace(fn[2L], fn[1L])
}
if (is.null(info$param_set)) {
param_set = create_ps(fn)
} else {
param_set = info$param_set$clone(deep = TRUE)
}
param_set$values = info$init_params # nolint
super$initialize(
id = id,
Expand Down Expand Up @@ -535,6 +550,11 @@ pipeop_preproc_torch_class = function(id, fn, shapes_out, param_set = NULL, pack
}

register_preproc = function(id, fn, param_set = NULL, shapes_out = NULL, packages = character(0), rowwise = FALSE) {
fn_call = substitute(fn)
if (identical(as.list(fn_call)[[1]], quote(`::`))) {
fn = as.character(as.list(fn_call)[-1L])
}

Class = pipeop_preproc_torch_class(id, fn, param_set = param_set, shapes_out = shapes_out,
packages = packages, rowwise = rowwise, parent_env = parent.frame())
assign(Class$classname, Class, parent.frame())
Expand Down
2 changes: 1 addition & 1 deletion R/PipeOpTorchIngress.R
Original file line number Diff line number Diff line change
Expand Up @@ -291,7 +291,7 @@ register_po("torch_ingress_categ", PipeOpTorchIngressCategorical)
#' # Now we try a lazy tensor with unknown shape, i.e. the shapes between the rows can differ
#'
#' ds = dataset(
#' initialize = function() self$x = list(torch_randn(3, 10, 10), torch_randn(10, 8, 8)),
#' initialize = function() self$x = list(torch_randn(3, 10, 10), torch_randn(3, 8, 8)),
#' .getitem = function(i) list(x = self$x[[i]]),
#' .length = function() 2)()
#'
Expand Down
1 change: 1 addition & 0 deletions R/TaskClassif_tiny_imagenet.R
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ load_task_tiny_imagenet = function(id = "tiny_imagenet") {
withr::with_locale(c(LC_COLLATE = "C"), {
dt = cached(constructor_tiny_imagenet, "datasets", "tiny_imagenet")$data
})

dt$image = as_lazy_tensor(dataset_image(dt$image), dataset_shapes = list(x = c(NA, 3, 64, 64)))
dt$..row_id = seq_len(nrow(dt))
DataBackendDataTable$new(data = dt, primary_key = "..row_id")
Expand Down
4 changes: 2 additions & 2 deletions R/lazy_tensor.R
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ new_lazy_tensor = function(data_descriptor, ids) {

#' @export
`[[.lazy_tensor` = function(x, i) {
structure(unclass(x)[[i]], class = c("lazy_tensor", "list"))
unclass(x)[[i]]
}

#' @export
Expand Down Expand Up @@ -74,7 +74,7 @@ c.lazy_tensor = function(...) {
if (!all(map_lgl(dots, is_lazy_tensor))) {
return(NextMethod())
}
if (length(unique(map_chr(dots, function(x) dd(x)$hash))) > 1) {
if (length(unique(map_chr(dots[lengths(dots) != 0], function(x) dd(x)$hash))) > 1) {
stopf("Can only concatenate lazy tensors with the same data descriptors.")
}

Expand Down
90 changes: 46 additions & 44 deletions R/materialize.R
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
#' Either a [`lazy_tensor`] or a `list()` / `data.frame()` containing [`lazy_tensor`] columns.
#' @param rbind (`logical(1)`)\cr
#' Whether to rbind the lazy tensor columns (`TRUE`) or return them as a list of tensors (`FALSE`).
#' In the second case, the batch dimension is present for all individual tensors.
#' In the second case, there is no batch dimension.
#' @return (`list()` of [`lazy_tensor`]s or a [`lazy_tensor`])
#' @param device (`character(1)`)\cr
#' The torch device.
Expand Down Expand Up @@ -87,6 +87,44 @@ materialize.lazy_tensor = function(x, device = "cpu", rbind = FALSE, ...) { # no
materialize_internal(x = x, device = device, cache = NULL, rbind = rbind)
}

get_input = function(ds, ids, varying_shapes, rbind) {
if (is.null(ds$.getbatch)) { # .getindex is never NULL but a function that errs if it was not defined
x = map(ids, function(id) map(ds$.getitem(id), function(x) x$unsqueeze(1)))
if (varying_shapes) {
x
} else {
map(transpose_list(x), function(x) torch_cat(x, dim = 1L))
}
} else {
ds$.getbatch(ids)
}
}

get_output = function(input, graph, varying_shapes, rbind, device) {
output = if (varying_shapes) {
# list --graph--> (list or tensor)
transpose_list(map(input, function(x) graph$train(x, single_input = FALSE)))
} else {
# tensor --graph--> tensor
graph$train(input, single_input = FALSE)
}

# now we get it in the right output format and convert it to the requested device
output = if (rbind) {
if (varying_shapes) { # need to convert from list of tensors to tensor
output = map(output, list_to_batch)
}
map(output, function(x) x$to(device = device))
} else {
if (!varying_shapes) { # need to convert from tensor to list of tensors
output = map(output, function(x) torch_split(x, split_size = 1L, dim = 1L))
}
map(output, function(out) map(out, function(o) o$squeeze(1)$to(device = device)))
}

return(output)
}

#' @title Materialize a Lazy Tensor
#' @description
#' Convert a [`lazy_tensor()`] to a [`torch_tensor()`].
Expand Down Expand Up @@ -142,25 +180,11 @@ materialize_internal = function(x, device = "cpu", cache = NULL, rbind) {

if (input_hit) {
input = cache[[input_hash]]
input_hit = TRUE
}
}

if (!do_caching || !input_hit) {
input = if (is.null(ds$.getbatch)) { # .getindex is never NULL but a function that errs if it was not defined
x = map(ids, function(id) map(ds$.getitem(id), function(x) x$unsqueeze(1)))
if (varying_shapes || !rbind) {
x
} else {
map(transpose_list(x), function(x) torch_cat(x, dim = 1L))
}
} else {
if (rbind) {
ds$.getbatch(ids)
} else {
map(ids, function(id) ds$.getbatch(id))
}
}
input = get_input(ds, ids, varying_shapes, rbind)
}

if (do_caching && !input_hit) {
Expand All @@ -170,42 +194,20 @@ materialize_internal = function(x, device = "cpu", cache = NULL, rbind) {
# input is the output of a dataset so it can contain more than what we need for the graph,
# also we need to set the correct names.
# This is done after retrieving the element from the cache / before saving the element to the cache because
# this can change

input = if (rbind && !varying_shapes) {
set_names(input[data_descriptor$input_map], data_descriptor$graph_input)
} else {
# this can change depending on the preprocessing graph
input = if (varying_shapes) {
map(input, function(x) {
set_names(x[data_descriptor$input_map], data_descriptor$graph_input)
})
}

output = if (rbind && !varying_shapes) {
# tensor --graph--> tensor
graph$train(input, single_input = FALSE)
} else {
# list --graph--> (list or tensor)
out = map(input, function(x) graph$train(x, single_input = FALSE))

if (rbind) {
# here, is a list with hierarchy: [id = [po_id = [ch_nm = ]]]
# We want to obtain a list [po_id = [ch_nm = [...]]] where the [...] is the rbind over all ids
rows = seq_along(out)
out = map(names(out[[1L]]), function(name) torch_cat(map(out[rows], name)))
}
out
set_names(input[data_descriptor$input_map], data_descriptor$graph_input)
}

output = get_output(input, graph, varying_shapes, rbind, device)

if (do_caching) {
cache[[output_hash]] = output
}

# put the tensor on the required device
if (rbind) {
res = output[[pointer_name]]$to(device = device)
} else {
res = map(output, function(o) o[[pointer_name]]$to(device = device))
}

return(res)
output[[pointer_name]]
}
16 changes: 11 additions & 5 deletions R/nn_graph.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,20 +16,26 @@
#' @family Graph Network
#' @export
#' @examples
#' graph = po("module_1", module = nn_linear(10, 20)) %>>%
#' po("module_2", module = nn_relu()) %>>%
#' po("module_3", module = nn_linear(20, 1))
#' graph = mlr3pipelines::Graph$new()
#' graph$add_pipeop(po("module_1", module = nn_linear(10, 20)), clone = FALSE)
#' graph$add_pipeop(po("module_2", module = nn_relu()), clone = FALSE)
#' graph$add_pipeop(po("module_3", module = nn_linear(20, 1)), clone = FALSE)
#' graph$add_edge("module_1", "module_2")
#' graph$add_edge("module_2", "module_3")
#'
#' network = nn_graph(graph, shapes_in = list(module_1.input = c(NA, 10)))
#' network
#'
#' x = torch_randn(16, 10)
#'
#' network(module_1.input = x)
nn_graph = nn_module(
"nn_graph",
initialize = function(graph, shapes_in, output_map = graph$output$name, list_output = FALSE) {
self$graph = as_graph(graph)
self$graph = as_graph(graph, clone = FALSE)
self$graph_input_name = graph$input$name # cache this, it is expensive

# we do NOT verify the input and type of the graph to be `"torch_tensor"`.

# The reason for this is that the graph, when constructed with the PipeOpTorch Machinery, contains PipeOpNOPs,
# which have input and output type *.

Expand Down
Loading

0 comments on commit da9b40d

Please sign in to comment.