From a032ae0d8fb98b002859bb98f301e637a4a39d4d Mon Sep 17 00:00:00 2001 From: "viktor.dobrev" Date: Thu, 11 May 2023 17:57:24 +0200 Subject: [PATCH 01/16] support for other pytorch schedulers and warm up --- neuralpredictors/training/early_stopping.py | 70 ++++++++++++++++++--- 1 file changed, 63 insertions(+), 7 deletions(-) diff --git a/neuralpredictors/training/early_stopping.py b/neuralpredictors/training/early_stopping.py index 2ddb4dd7..f78a9cd3 100644 --- a/neuralpredictors/training/early_stopping.py +++ b/neuralpredictors/training/early_stopping.py @@ -42,8 +42,8 @@ def early_stopping( tracker=None, scheduler=None, lr_decay_steps=1, + number_warmup_epochs=0, ): - """ Early stopping iterator. Keeps track of the best model state during training. Resets the model to its best state, when either the number of maximum epochs or the patience [number of epochs without improvement) @@ -72,10 +72,29 @@ def early_stopping( tracker (Tracker): Tracker to be invoked for every epoch. `log_objective` is invoked with the current value of `objective`. Note that `finalize` method is NOT invoked. - scheduler: scheduler object, which automatically reduces decreases the LR by a specified amount. - The scheduler's `step` method is invoked, passing in the current value of `objective` - lr_decay_steps: Number of times the learning rate should be reduced before stopping the training. + scheduler: scheduler object or tuple of two scheduler objects, which automatically modifies the LR by a specified amount. + The scheduler's `step` method is invoked for a the approptiate scheduler if a tuple of two schedulers is provided. + The current value of `objective` is passed to the `step` method if the scheduler at hand is `ReduceLROnPlateau`. + For example a provided tuple of scheduler can be of the form: + + scheduler = (warmup_scheduler,CosineAnnealingLR(*args,**kwargs)) + + or in case that no scheduler is desired after the warm up: + + scheduler = (warmup_scheduler,None). + + An example warm up scheduler can be defined as: + def warmup_function(current_step: int): + return 1 / (2 ** (float(number_warmup_epochs - current_step - 1))) + + warmup_scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=warmup_function) + + Of course single schedulers can also be provided. + If the warm-up is shifted (goes to a to high learning rate or does not reach the desired learning rate), + consider adjusting the warm up function accordingly. + lr_decay_steps: Number of times the learning rate should be reduced before stopping the training. + number_warmup_epochs: Number of warm-up epochs """ training_status = model.training @@ -107,11 +126,36 @@ def finalize(model, best_state_dict): best_objective = current_objective = _objective() best_state_dict = copy_state(model) + # check if the learning rate scheduler is 'ReduceLROnPlateau' so that we pass the current_objective to step + reduce_lr_on_plateau = False + if isinstance(scheduler, torch.optim.lr_scheduler.ReduceLROnPlateau): + reduce_lr_on_plateau = True + elif isinstance(scheduler, tuple): + if isinstance(scheduler[1], torch.optim.lr_scheduler.ReduceLROnPlateau): + reduce_lr_on_plateau = True + + # check if warm up is to be performed + if isinstance(scheduler, tuple): + warmup = True + if scheduler[0] is None: + logger.warning( + f"Provided warm up scheduler is of type None. Warm up epochs set to {number_warmup_epochs}. Setting number of warm up epochs to 0" + ) + number_warmup_epochs = 0 + else: + warmup = False + + if warmup and number_warmup_epochs == 0: + logger.warning("Warm up scheduler is provided, but number of warm up steps is set to 0") + elif not warmup and number_warmup_epochs > 0: + logger.warning( + f"Number of warm up steps is set to {number_warmup_epochs}, but no warm up scheduler is provided" + ) + for repeat in range(lr_decay_steps): patience_counter = 0 while patience_counter < patience and epoch < max_iter: - for _ in range(interval): epoch += 1 if tracker is not None: @@ -124,9 +168,21 @@ def finalize(model, best_state_dict): current_objective = _objective() - # if a scheduler is defined, a .step with the current objective is all that is needed to reduce the LR + # if a scheduler is defined, a .step with or without the current objective is all that is needed to reduce the LR if scheduler is not None: - scheduler.step(current_objective) + if warmup and epoch < number_warmup_epochs: + scheduler[0].step() + elif reduce_lr_on_plateau: + if not warmup: + scheduler.step(current_objective) + else: + scheduler[1].step(current_objective) + else: + if not warmup: + scheduler.step() + else: + if scheduler[1] is not None: + scheduler[1].step() if current_objective * maximize < best_objective * maximize - tolerance: logger.info(f"[{epoch:03d}|{patience_counter:02d}/{patience:02d}] ---> {current_objective}") From c0a89a44aeefd438bb791528546d5c63571344e5 Mon Sep 17 00:00:00 2001 From: "viktor.dobrev" Date: Mon, 22 May 2023 14:06:31 +0200 Subject: [PATCH 02/16] better in-line comments for schedulers --- neuralpredictors/training/early_stopping.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/neuralpredictors/training/early_stopping.py b/neuralpredictors/training/early_stopping.py index f78a9cd3..c101c44f 100644 --- a/neuralpredictors/training/early_stopping.py +++ b/neuralpredictors/training/early_stopping.py @@ -73,9 +73,10 @@ def early_stopping( Tracker to be invoked for every epoch. `log_objective` is invoked with the current value of `objective`. Note that `finalize` method is NOT invoked. scheduler: scheduler object or tuple of two scheduler objects, which automatically modifies the LR by a specified amount. - The scheduler's `step` method is invoked for a the approptiate scheduler if a tuple of two schedulers is provided. - The current value of `objective` is passed to the `step` method if the scheduler at hand is `ReduceLROnPlateau`. - For example a provided tuple of scheduler can be of the form: + If a tuple of schedulers is provided the 1st scheduler is assumed to be the warm up scheduler. The .step method + for the 1st scheduler will be called while epoch is smaller than number_warmup_epochs afterwards the .step method of + the second scheduler is called. The current value of `objective` is passed to the `step` method if the scheduler at hand is `ReduceLROnPlateau`. + For example a provided tuple of schedulers can be of the form: scheduler = (warmup_scheduler,CosineAnnealingLR(*args,**kwargs)) @@ -137,6 +138,8 @@ def finalize(model, best_state_dict): # check if warm up is to be performed if isinstance(scheduler, tuple): warmup = True + + # check if the warm-up scheduler is not of type None if scheduler[0] is None: logger.warning( f"Provided warm up scheduler is of type None. Warm up epochs set to {number_warmup_epochs}. Setting number of warm up epochs to 0" @@ -145,8 +148,11 @@ def finalize(model, best_state_dict): else: warmup = False + # check if warm up scheduler and number of warm-up epochs is provided if warmup and number_warmup_epochs == 0: logger.warning("Warm up scheduler is provided, but number of warm up steps is set to 0") + + # inform user that no warm-up scheduler is provided althouth warm-up epochs is non zero elif not warmup and number_warmup_epochs > 0: logger.warning( f"Number of warm up steps is set to {number_warmup_epochs}, but no warm up scheduler is provided" @@ -171,13 +177,16 @@ def finalize(model, best_state_dict): # if a scheduler is defined, a .step with or without the current objective is all that is needed to reduce the LR if scheduler is not None: if warmup and epoch < number_warmup_epochs: + # warm-up step scheduler[0].step() elif reduce_lr_on_plateau: + # reduce_lr_on_plateau requires current objective for the step if not warmup: scheduler.step(current_objective) else: scheduler[1].step(current_objective) else: + # .step() for the rest of the schedulers if not warmup: scheduler.step() else: From 3a3554619c54f4693d30370f2f0eef19e7706475 Mon Sep 17 00:00:00 2001 From: Max Burg Date: Thu, 7 Mar 2024 13:39:18 +0100 Subject: [PATCH 03/16] remove obsolete independent_bn_bias argument --- neuralpredictors/layers/cores/conv2d.py | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/neuralpredictors/layers/cores/conv2d.py b/neuralpredictors/layers/cores/conv2d.py index d7fd2e7d..956f8692 100644 --- a/neuralpredictors/layers/cores/conv2d.py +++ b/neuralpredictors/layers/cores/conv2d.py @@ -57,7 +57,6 @@ def __init__( momentum=0.1, pad_input=True, hidden_padding=None, - independent_bn_bias=True, batch_norm: Union[bool, list[bool]] = True, batch_norm_scale: Union[bool, list[bool]] = True, final_batchnorm_scale: bool = True, @@ -93,8 +92,6 @@ def __init__( batch_norm: Boolean, if True appends a BN layer after each convolutional layer batch_norm_scale: If True, learns BN including the scaling factor final_batchnorm_scale: Deprecated. If batch_norm_scale passed as an Iterable, this will be ignored. - independent_bn_bias: Deprecated. If False, will allow for scaling the batch norm, so that batchnorm - and bias can both be true. Defaults to True. hidden_dilation: If set to > 1, will apply dilated convs for all hidden layers laplace_padding: Padding size for the laplace convolution. If padding = None, it defaults to half of the kernel size (recommended). Setting Padding to 0 is not recommended and leads to artefacts, @@ -137,13 +134,6 @@ def __init__( self.bias = bias if isinstance(bias, Iterable) else [bias] * layers - self.independent_bn_bias = independent_bn_bias - if self.independent_bn_bias and not all(self.bias) and not all(self.batch_norm_scale): - warnings.warn( - """The default of `independent_bn_bias=True` will ignore the kwargs `bias`, `batch_norm_scale`. - If you want to use these arguments, please set `independent_bn_bias=False`.""" - ) - super().__init__() regularizer_config = ( dict(padding=laplace_padding, kernel=input_kern) @@ -219,11 +209,6 @@ def set_batchnorm_type(self): def add_bn_layer(self, layer: OrderedDict, layer_idx: int): if self.batch_norm[layer_idx]: hidden_channels = self.hidden_channels[layer_idx] - - if self.independent_bn_bias: - layer["norm"] = self.batchnorm_layer_cls(hidden_channels, momentum=self.momentum) - return - bias = self.bias[layer_idx] scale = self.batch_norm_scale[layer_idx] From 4bf5b54f54e5b827c516932c618dcfdc14a66333 Mon Sep 17 00:00:00 2001 From: Max Burg Date: Thu, 7 Mar 2024 14:32:59 +0100 Subject: [PATCH 04/16] Fix a Bug in SE2dCore when skip > 1 --- neuralpredictors/layers/cores/conv2d.py | 44 +++++++++++-------------- 1 file changed, 20 insertions(+), 24 deletions(-) diff --git a/neuralpredictors/layers/cores/conv2d.py b/neuralpredictors/layers/cores/conv2d.py index 956f8692..2e764d97 100644 --- a/neuralpredictors/layers/cores/conv2d.py +++ b/neuralpredictors/layers/cores/conv2d.py @@ -245,25 +245,27 @@ def add_first_layer(self): self.add_activation(layer) self.features.add_module("layer0", nn.Sequential(layer)) + def add_subsequent_conv_layer(self, layer: OrderedDict, l: int) -> None: + layer[self.conv_layer_name] = self.ConvLayer( + in_channels=self.hidden_channels[l - 1] + if not self.skip > 1 + else min(self.skip, l) * self.hidden_channels[0], + out_channels=self.hidden_channels[l], + kernel_size=self.hidden_kern[l - 1], + stride=self.stride, + padding=self.hidden_padding or ((self.hidden_kern[l - 1] - 1) * self.hidden_dilation + 1) // 2, + dilation=self.hidden_dilation, + bias=self.bias, + ) + def add_subsequent_layers(self): if not isinstance(self.hidden_kern, Iterable): self.hidden_kern = [self.hidden_kern] * (self.num_layers - 1) for l in range(1, self.num_layers): layer = OrderedDict() - if self.hidden_padding is None: - self.hidden_padding = ((self.hidden_kern[l - 1] - 1) * self.hidden_dilation + 1) // 2 - layer[self.conv_layer_name] = self.ConvLayer( - in_channels=self.hidden_channels[l - 1] - if not self.skip > 1 - else min(self.skip, l) * self.hidden_channels[0], - out_channels=self.hidden_channels[l], - kernel_size=self.hidden_kern[l - 1], - stride=self.stride, - padding=self.hidden_padding, - dilation=self.hidden_dilation, - bias=self.bias, - ) + + self.add_subsequent_conv_layer(layer, l) self.add_bn_layer(layer, l) self.add_activation(layer) self.features.add_module("layer{}".format(l), nn.Sequential(layer)) @@ -345,6 +347,9 @@ def __init__( self.init_std = init_std super().__init__(*args, **kwargs, input_regularizer=input_regularizer) + if self.skip > 0: + raise NotImplementedError("Skip connections are not implemented for RotationEquivariant2dCore") + def set_batchnorm_type(self): if not self.rot_eq_batch_norm: self.batchnorm_layer_cls = nn.BatchNorm2d @@ -588,17 +593,8 @@ def add_subsequent_layers(self): for l in range(1, self.num_layers): layer = OrderedDict() - if self.hidden_padding is None: - self.hidden_padding = ((self.hidden_kern[l - 1] - 1) * self.hidden_dilation + 1) // 2 - layer[self.conv_layer_name] = self.ConvLayer( - in_channels=self.hidden_channels if not self.skip > 1 else min(self.skip, l) * self.hidden_channels, - out_channels=self.hidden_channels, - kernel_size=self.hidden_kern[l - 1], - stride=self.stride, - padding=self.hidden_padding, - dilation=self.hidden_dilation, - bias=self.bias, - ) + + self.add_subsequent_conv_layer(layer, l) self.add_bn_layer(layer, l) self.add_activation(layer) if (self.num_layers - l) <= self.n_se_blocks: From ceaba0db20236525ae75c9f91525d2b7e2b6297f Mon Sep 17 00:00:00 2001 From: Max Burg Date: Thu, 7 Mar 2024 16:03:17 +0100 Subject: [PATCH 05/16] move add_bn_layer to Core(ABC) --- neuralpredictors/layers/cores/base.py | 35 +++++++++++++++++++++++++ neuralpredictors/layers/cores/conv2d.py | 19 -------------- neuralpredictors/layers/cores/conv3d.py | 18 ++++--------- 3 files changed, 40 insertions(+), 32 deletions(-) diff --git a/neuralpredictors/layers/cores/base.py b/neuralpredictors/layers/cores/base.py index a6936204..1122030e 100644 --- a/neuralpredictors/layers/cores/base.py +++ b/neuralpredictors/layers/cores/base.py @@ -1,4 +1,5 @@ from abc import ABC, abstractmethod +from collections import OrderedDict from torch import nn @@ -8,6 +9,10 @@ class Core(ABC): Base class for the core models, taking 2d inputs and computing nonlinear features. """ + def __init__(self) -> None: + super().__init__() + self.set_batchnorm_type() + def initialize(self): """ Initialization applied on the core. @@ -29,6 +34,36 @@ def init_conv(m): if m.bias is not None: m.bias.data.fill_(0) + @abstractmethod + def set_batchnorm_type(self): + """ + Set batchnorm_layer_cls, bias_layer_cls, scale_layer_cls class attributes + """ + self.batchnorm_layer_cls = None + self.bias_layer_cls = None + self.scale_layer_cls = None + + def add_bn_layer(self, layer: OrderedDict, layer_idx: int): + for attr in ["batch_norm", "hidden_channels", "independent_bn_bias", "momentum"]: + if not hasattr(self, attr): + raise NotImplementedError(f"Subclasses must have a `{attr}` attribute.") + + if self.batch_norm[layer_idx]: + hidden_channels = self.hidden_channels[layer_idx] + + if self.independent_bn_bias: + layer["norm"] = self.batchnorm_layer_cls(hidden_channels, momentum=self.momentum) + return + + bias = self.bias[layer_idx] + scale = self.batch_norm_scale[layer_idx] + + layer["norm"] = self.batchnorm_layer_cls(hidden_channels, momentum=self.momentum, affine=bias and scale) + if bias and not scale: + layer["bias"] = self.bias_layer_cls(hidden_channels) + elif not bias and scale: + layer["scale"] = self.scale_layer_cls(hidden_channels) + @abstractmethod def regularizer(self): """ diff --git a/neuralpredictors/layers/cores/conv2d.py b/neuralpredictors/layers/cores/conv2d.py index d7fd2e7d..456856bd 100644 --- a/neuralpredictors/layers/cores/conv2d.py +++ b/neuralpredictors/layers/cores/conv2d.py @@ -204,7 +204,6 @@ def __init__( warnings.warn( "group sparsity can not be calculated for the requested conv type. Hidden channels will not be regularized and gamma_hidden is ignored." ) - self.set_batchnorm_type() self.features = nn.Sequential() self.add_first_layer() self.add_subsequent_layers() @@ -215,24 +214,6 @@ def set_batchnorm_type(self): self.bias_layer_cls = Bias2DLayer self.scale_layer_cls = Scale2DLayer - # def add_bn_layer(self, layer, hidden_channels): - def add_bn_layer(self, layer: OrderedDict, layer_idx: int): - if self.batch_norm[layer_idx]: - hidden_channels = self.hidden_channels[layer_idx] - - if self.independent_bn_bias: - layer["norm"] = self.batchnorm_layer_cls(hidden_channels, momentum=self.momentum) - return - - bias = self.bias[layer_idx] - scale = self.batch_norm_scale[layer_idx] - - layer["norm"] = self.batchnorm_layer_cls(hidden_channels, momentum=self.momentum, affine=bias and scale) - if bias and not scale: - layer["bias"] = self.bias_layer_cls(hidden_channels) - elif not bias and scale: - layer["scale"] = self.scale_layer_cls(hidden_channels) - def penultimate_layer_built(self): """Returns True if the penultimate layer has been built.""" return len(self.features) == self.num_layers - 1 diff --git a/neuralpredictors/layers/cores/conv3d.py b/neuralpredictors/layers/cores/conv3d.py index 6950f981..38cf99a4 100644 --- a/neuralpredictors/layers/cores/conv3d.py +++ b/neuralpredictors/layers/cores/conv3d.py @@ -428,6 +428,11 @@ def __init__( self.features.add_module("layer{}".format(l + 1), nn.Sequential(layer)) self.initialize(cuda=cuda) + def set_batchnorm_type(self): + self.batchnorm_layer_cls = nn.BatchNorm3d + self.bias_layer_cls = Bias3DLayer + self.scale_layer_cls = Scale3DLayer + def forward(self, x): for features in self.features: x = features(x) @@ -450,16 +455,3 @@ def get_kernels(self): (temporal_kernel,) + spatial_kernel for temporal_kernel, spatial_kernel in zip(self.temporal_hidden_kernel, self.spatial_hidden_kernel) ] - - def add_bn_layer(self, layer, hidden_channels): - if self.batch_norm: - if self.independent_bn_bias: - layer["norm"] = nn.BatchNorm3d(hidden_channels, momentum=self.momentum) - else: - layer["norm"] = nn.BatchNorm3d( - hidden_channels, momentum=self.momentum, affine=self.bias and self.batch_norm_scale - ) - if self.bias and not self.batch_norm_scale: - layer["bias"] = Bias3DLayer(hidden_channels) - elif self.batch_norm_scale: - layer["scale"] = Scale3DLayer(hidden_channels) From e77ccacb4997080842b5a941cef00868ad504485 Mon Sep 17 00:00:00 2001 From: Max Burg Date: Thu, 7 Mar 2024 16:19:56 +0100 Subject: [PATCH 06/16] add ConvCore for convolutional models only --- neuralpredictors/layers/cores/base.py | 56 +++++++++++++------------ neuralpredictors/layers/cores/conv2d.py | 4 +- neuralpredictors/layers/cores/conv3d.py | 4 +- 3 files changed, 33 insertions(+), 31 deletions(-) diff --git a/neuralpredictors/layers/cores/base.py b/neuralpredictors/layers/cores/base.py index 1122030e..2bee95dc 100644 --- a/neuralpredictors/layers/cores/base.py +++ b/neuralpredictors/layers/cores/base.py @@ -9,10 +9,6 @@ class Core(ABC): Base class for the core models, taking 2d inputs and computing nonlinear features. """ - def __init__(self) -> None: - super().__init__() - self.set_batchnorm_type() - def initialize(self): """ Initialization applied on the core. @@ -34,6 +30,35 @@ def init_conv(m): if m.bias is not None: m.bias.data.fill_(0) + @abstractmethod + def regularizer(self): + """ + Regularization applied on the core. Returns a scalar value. + """ + + @abstractmethod + def forward(self, x): + """ + Forward function for pytorch nn module. + + Args: + x (torch.tensor): input of shape (batch, channels, height, width) + """ + + def __repr__(self): + s = super().__repr__() + s += f" [{self.__class__.__name__} regularizers: " + ret = [] + for attr in filter(lambda x: "gamma" in x or "skip" in x, dir(self)): + ret.append(f"{attr} = {getattr(self, attr)}") + return s + "|".join(ret) + "]\n" + + +class ConvCore(Core): + def __init__(self) -> None: + super().__init__() + self.set_batchnorm_type() + @abstractmethod def set_batchnorm_type(self): """ @@ -63,26 +88,3 @@ def add_bn_layer(self, layer: OrderedDict, layer_idx: int): layer["bias"] = self.bias_layer_cls(hidden_channels) elif not bias and scale: layer["scale"] = self.scale_layer_cls(hidden_channels) - - @abstractmethod - def regularizer(self): - """ - Regularization applied on the core. Returns a scalar value. - """ - - @abstractmethod - def forward(self, x): - """ - Forward function for pytorch nn module. - - Args: - x (torch.tensor): input of shape (batch, channels, height, width) - """ - - def __repr__(self): - s = super().__repr__() - s += f" [{self.__class__.__name__} regularizers: " - ret = [] - for attr in filter(lambda x: "gamma" in x or "skip" in x, dir(self)): - ret.append(f"{attr} = {getattr(self, attr)}") - return s + "|".join(ret) + "]\n" diff --git a/neuralpredictors/layers/cores/conv2d.py b/neuralpredictors/layers/cores/conv2d.py index 456856bd..1f97df3c 100644 --- a/neuralpredictors/layers/cores/conv2d.py +++ b/neuralpredictors/layers/cores/conv2d.py @@ -27,12 +27,12 @@ RotationEquivariantScale2DLayer, ) from ..squeeze_excitation import SqueezeExcitationBlock -from .base import Core +from .base import ConvCore, Core logger = logging.getLogger(__name__) -class Stacked2dCore(Core, nn.Module): +class Stacked2dCore(ConvCore, nn.Module): """ An instantiation of the Core base class. Made up of layers layers of nn.sequential modules. Allows for the flexible implementations of many different architectures, such as convolutional layers, diff --git a/neuralpredictors/layers/cores/conv3d.py b/neuralpredictors/layers/cores/conv3d.py index 38cf99a4..c85f46c4 100644 --- a/neuralpredictors/layers/cores/conv3d.py +++ b/neuralpredictors/layers/cores/conv3d.py @@ -11,10 +11,10 @@ from ...regularizers import DepthLaplaceL21d from ..affine import Bias3DLayer, Scale3DLayer -from .base import Core +from .base import ConvCore -class Core3d(Core): +class Core3d(ConvCore): def initialize(self, cuda=False): self.apply(self.init_conv) self.put_to_cuda(cuda=cuda) From e83133e4890f9bdbb246f0afda4c70d8d51ed1ca Mon Sep 17 00:00:00 2001 From: Max Burg Date: Thu, 7 Mar 2024 16:26:55 +0100 Subject: [PATCH 07/16] [add] documentation string --- neuralpredictors/layers/cores/base.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/neuralpredictors/layers/cores/base.py b/neuralpredictors/layers/cores/base.py index 2bee95dc..6c84c209 100644 --- a/neuralpredictors/layers/cores/base.py +++ b/neuralpredictors/layers/cores/base.py @@ -56,6 +56,9 @@ def __repr__(self): class ConvCore(Core): def __init__(self) -> None: + """ + Derived classes need to define "batch_norm", "hidden_channels", "independent_bn_bias", "momentum" attributes. + """ super().__init__() self.set_batchnorm_type() From f45769bf7973373336086fffee3fa09c188374bf Mon Sep 17 00:00:00 2001 From: Max Burg Date: Thu, 7 Mar 2024 17:31:37 +0100 Subject: [PATCH 08/16] add type checking for abstract class attributes batch_norm and hidden_channels --- neuralpredictors/layers/cores/base.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/neuralpredictors/layers/cores/base.py b/neuralpredictors/layers/cores/base.py index 6c84c209..4c2747b0 100644 --- a/neuralpredictors/layers/cores/base.py +++ b/neuralpredictors/layers/cores/base.py @@ -75,6 +75,9 @@ def add_bn_layer(self, layer: OrderedDict, layer_idx: int): for attr in ["batch_norm", "hidden_channels", "independent_bn_bias", "momentum"]: if not hasattr(self, attr): raise NotImplementedError(f"Subclasses must have a `{attr}` attribute.") + for attr in ["batch_norm", "hidden_channels"]: + if not isinstance(getattr(self, attr), list): + raise ValueError(f"`{attr}` must be a list.") if self.batch_norm[layer_idx]: hidden_channels = self.hidden_channels[layer_idx] From 282ab8c11606c974eed70fcc40ae67b5979281a0 Mon Sep 17 00:00:00 2001 From: Max Burg Date: Thu, 7 Mar 2024 17:33:00 +0100 Subject: [PATCH 09/16] check for bias and batch_norm_scale arguments --- neuralpredictors/layers/cores/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/neuralpredictors/layers/cores/base.py b/neuralpredictors/layers/cores/base.py index 4c2747b0..b161c8a3 100644 --- a/neuralpredictors/layers/cores/base.py +++ b/neuralpredictors/layers/cores/base.py @@ -72,7 +72,7 @@ def set_batchnorm_type(self): self.scale_layer_cls = None def add_bn_layer(self, layer: OrderedDict, layer_idx: int): - for attr in ["batch_norm", "hidden_channels", "independent_bn_bias", "momentum"]: + for attr in ["batch_norm", "hidden_channels", "independent_bn_bias", "momentum", "bias", "batch_norm_scale"]: if not hasattr(self, attr): raise NotImplementedError(f"Subclasses must have a `{attr}` attribute.") for attr in ["batch_norm", "hidden_channels"]: From bb0b005d7b2d223235011d1f60ee7cd9ec6b689c Mon Sep 17 00:00:00 2001 From: Max Burg Date: Thu, 7 Mar 2024 17:33:50 +0100 Subject: [PATCH 10/16] check types for bias and batch_norm_scale --- neuralpredictors/layers/cores/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/neuralpredictors/layers/cores/base.py b/neuralpredictors/layers/cores/base.py index b161c8a3..e551e625 100644 --- a/neuralpredictors/layers/cores/base.py +++ b/neuralpredictors/layers/cores/base.py @@ -75,7 +75,7 @@ def add_bn_layer(self, layer: OrderedDict, layer_idx: int): for attr in ["batch_norm", "hidden_channels", "independent_bn_bias", "momentum", "bias", "batch_norm_scale"]: if not hasattr(self, attr): raise NotImplementedError(f"Subclasses must have a `{attr}` attribute.") - for attr in ["batch_norm", "hidden_channels"]: + for attr in ["batch_norm", "hidden_channels", "bias", "batch_norm_scale"]: if not isinstance(getattr(self, attr), list): raise ValueError(f"`{attr}` must be a list.") From 5120e7d3130d900e969d7f357ef302567fc78fe6 Mon Sep 17 00:00:00 2001 From: Polina Turishcheva Date: Fri, 8 Mar 2024 11:00:12 +0100 Subject: [PATCH 11/16] fix typing import for python 3.8 --- neuralpredictors/layers/cores/conv2d.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/neuralpredictors/layers/cores/conv2d.py b/neuralpredictors/layers/cores/conv2d.py index 1f97df3c..b567122f 100644 --- a/neuralpredictors/layers/cores/conv2d.py +++ b/neuralpredictors/layers/cores/conv2d.py @@ -1,7 +1,7 @@ import logging import warnings from collections import OrderedDict -from typing import Union +from typing import Union, List try: from collections import Iterable @@ -53,13 +53,13 @@ def __init__( input_stride=1, final_nonlinearity=True, elu_shift=(0, 0), - bias: Union[bool, list[bool]] = True, + bias: Union[bool, List[bool]] = True, momentum=0.1, pad_input=True, hidden_padding=None, independent_bn_bias=True, - batch_norm: Union[bool, list[bool]] = True, - batch_norm_scale: Union[bool, list[bool]] = True, + batch_norm: Union[bool, List[bool]] = True, + batch_norm_scale: Union[bool, List[bool]] = True, final_batchnorm_scale: bool = True, hidden_dilation=1, laplace_padding=0, From 0b55b1fad0415a85da0bba16a8ffc08331e965ef Mon Sep 17 00:00:00 2001 From: Polina Turishcheva Date: Fri, 8 Mar 2024 11:23:08 +0100 Subject: [PATCH 12/16] isort fixed --- neuralpredictors/layers/cores/conv2d.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/neuralpredictors/layers/cores/conv2d.py b/neuralpredictors/layers/cores/conv2d.py index b567122f..3412e286 100644 --- a/neuralpredictors/layers/cores/conv2d.py +++ b/neuralpredictors/layers/cores/conv2d.py @@ -1,7 +1,7 @@ import logging import warnings from collections import OrderedDict -from typing import Union, List +from typing import List, Union try: from collections import Iterable From c4858e9088dd8e83ab1a942b79886ce7e0c57b39 Mon Sep 17 00:00:00 2001 From: Max Burg Date: Fri, 8 Mar 2024 11:38:53 +0100 Subject: [PATCH 13/16] merge --- neuralpredictors/layers/cores/conv2d.py | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/neuralpredictors/layers/cores/conv2d.py b/neuralpredictors/layers/cores/conv2d.py index 87c45a5c..b3ffcee7 100644 --- a/neuralpredictors/layers/cores/conv2d.py +++ b/neuralpredictors/layers/cores/conv2d.py @@ -57,14 +57,8 @@ def __init__( momentum=0.1, pad_input=True, hidden_padding=None, -<<<<<<< HEAD - batch_norm: Union[bool, list[bool]] = True, - batch_norm_scale: Union[bool, list[bool]] = True, -======= - independent_bn_bias=True, batch_norm: Union[bool, List[bool]] = True, batch_norm_scale: Union[bool, List[bool]] = True, ->>>>>>> upstream/main final_batchnorm_scale: bool = True, hidden_dilation=1, laplace_padding=0, @@ -115,11 +109,6 @@ def __init__( linear: Boolean, if True, removes all nonlinearities nonlinearity_type: String to set the used nonlinearity type loaded from neuralpredictors.layers.activation nonlinearity_config: Dict of the nonlinearities __init__ parameters. - To enable learning batch_norms bias and scale independently, the arguments bias, batch_norm and batch_norm_scale - work together: By default, all are true. In this case there won't be a bias learned in the convolutional layer, but - batch_norm will learn both its bias and scale. If batch_norm is false, but bias true, a bias will be learned in the - convolutional layer. If batch_norm and bias are true, but batch_norm_scale is false, batch_norm won't have learnable - parameters and a BiasLayer will be added after the batch_norm layer. """ if depth_separable and attention_conv: From 0015784f0f34a1d4ef4bcaa5f8b7a0685da225e9 Mon Sep 17 00:00:00 2001 From: Max Burg Date: Fri, 8 Mar 2024 11:44:47 +0100 Subject: [PATCH 14/16] remove independent bn bias --- neuralpredictors/layers/cores/base.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/neuralpredictors/layers/cores/base.py b/neuralpredictors/layers/cores/base.py index e551e625..001d2d3b 100644 --- a/neuralpredictors/layers/cores/base.py +++ b/neuralpredictors/layers/cores/base.py @@ -57,7 +57,7 @@ def __repr__(self): class ConvCore(Core): def __init__(self) -> None: """ - Derived classes need to define "batch_norm", "hidden_channels", "independent_bn_bias", "momentum" attributes. + Derived classes need to define "batch_norm", "hidden_channels", "momentum", "bias", "batch_norm_scale" attributes. """ super().__init__() self.set_batchnorm_type() @@ -72,7 +72,7 @@ def set_batchnorm_type(self): self.scale_layer_cls = None def add_bn_layer(self, layer: OrderedDict, layer_idx: int): - for attr in ["batch_norm", "hidden_channels", "independent_bn_bias", "momentum", "bias", "batch_norm_scale"]: + for attr in ["batch_norm", "hidden_channels", "momentum", "bias", "batch_norm_scale"]: if not hasattr(self, attr): raise NotImplementedError(f"Subclasses must have a `{attr}` attribute.") for attr in ["batch_norm", "hidden_channels", "bias", "batch_norm_scale"]: @@ -82,10 +82,6 @@ def add_bn_layer(self, layer: OrderedDict, layer_idx: int): if self.batch_norm[layer_idx]: hidden_channels = self.hidden_channels[layer_idx] - if self.independent_bn_bias: - layer["norm"] = self.batchnorm_layer_cls(hidden_channels, momentum=self.momentum) - return - bias = self.bias[layer_idx] scale = self.batch_norm_scale[layer_idx] From 6588ba6cfc986a06152842f4a89eb9dd2eb72d12 Mon Sep 17 00:00:00 2001 From: Max Burg Date: Fri, 8 Mar 2024 11:51:42 +0100 Subject: [PATCH 15/16] remove independent_bn_bias in conv3d, remove duplicate add_bn_layer function --- neuralpredictors/layers/cores/conv3d.py | 40 +++++-------------------- 1 file changed, 7 insertions(+), 33 deletions(-) diff --git a/neuralpredictors/layers/cores/conv3d.py b/neuralpredictors/layers/cores/conv3d.py index c85f46c4..3ed4ebad 100644 --- a/neuralpredictors/layers/cores/conv3d.py +++ b/neuralpredictors/layers/cores/conv3d.py @@ -30,8 +30,14 @@ def init_conv(m): if m.bias is not None: m.bias.data.fill_(0) + def set_batchnorm_type(self): + self.batchnorm_layer_cls = nn.BatchNorm3d + self.bias_layer_cls = Bias3DLayer + self.scale_layer_cls = Scale3DLayer + class Basic3dCore(Core3d, nn.Module): + def __init__( self, input_channels, @@ -54,13 +60,11 @@ def __init__( input_regularizer="LaplaceL2norm", cuda=False, final_nonlin=True, - independent_bn_bias=True, spatial_dilation: int = 1, temporal_dilation: int = 1, hidden_spatial_dilation=1, hidden_temporal_dilation=1, ): - """ :param input_channels: integer, number of input channels as in :param hidden_channels: number of hidden channels (i.e feature maps) in each hidden layer @@ -85,14 +89,6 @@ def __init__( zero is the default however to recreate backwards compatibility. :param input_regularizer: specifies what kind of spatial regularized is applied :param final_nonlin: bool specifiyng whether to include a nonlinearity after last convolutional layer in core - :param independent_bn_bias: If False, will allow for scaling the batch norm, so that batch norm - and bias can both be true. Defaults to True. - - To enable learning batch_norms bias and scale independently, the arguments bias, batch_norm and batch_norm_scale - work together: By default, all are true. In this case there won't be a bias learned in the convolutional layer, but - batch_norm will learn both its bias and scale. If batch_norm is false, but bias true, a bias will be learned in the - convolutional layer. If batch_norm and bias are true, but batch_norm_scale is false, batch_norm won't have learnable - parameters and a BiasLayer will be added after the batch_norm layer. """ super().__init__() @@ -112,7 +108,6 @@ def __init__( self.bias = bias self.batch_norm = batch_norm self.batch_norm_scale = batch_norm_scale - self.independent_bn_bias = independent_bn_bias self.momentum = momentum self.spatial_dilation = spatial_dilation self.temporal_dilation = temporal_dilation @@ -225,19 +220,6 @@ def laplace_temporal(self): def regularizer(self): return self.gamma_input_spatial * self.laplace_spatial(), self.gamma_input_temporal * self.laplace_temporal() - def add_bn_layer(self, layer, hidden_channels): - if self.batch_norm: - if self.independent_bn_bias: - layer["norm"] = nn.BatchNorm3d(hidden_channels, momentum=self.momentum) - else: - layer["norm"] = nn.BatchNorm3d( - hidden_channels, momentum=self.momentum, affine=self.bias and self.batch_norm_scale - ) - if self.bias and not self.batch_norm_scale: - layer["bias"] = Bias3DLayer(hidden_channels) - elif self.batch_norm_scale: - layer["scale"] = Scale3DLayer(hidden_channels) - @property def out_channels(self): return self.hidden_channels[-1] @@ -247,6 +229,7 @@ def get_kernels(self): class Factorized3dCore(Core3d, nn.Module): + def __init__( self, input_channels, @@ -267,7 +250,6 @@ def __init__( batch_norm=True, padding=False, batch_norm_scale=True, - independent_bn_bias=True, momentum=0.01, laplace_padding=None, input_regularizer="LaplaceL2norm", @@ -300,8 +282,6 @@ def __init__( :param batch_norm: bool specifying whether to include batch norm after convolution in core :param padding: whether to pad convolutions. Defaults to False. :param batch_norm_scale: bool, if True, a scaling factor after BN will be learned. - :param independent_bn_bias: If False, will allow for scaling the batch norm, so that batchnorm - and bias can both be true. Defaults to True. :param momentum: momentum for batch norm :param laplace_padding: padding size for the laplace convolution. If padding = None, it defaults to half of the kernel size (recommended). Setting Padding to 0 is not recommended and leads to artefacts, @@ -329,7 +309,6 @@ def __init__( self.bias = bias self.batch_norm = batch_norm self.batch_norm_scale = batch_norm_scale - self.independent_bn_bias = independent_bn_bias self.momentum = momentum self.stride = stride self.spatial_dilation = spatial_dilation @@ -428,11 +407,6 @@ def __init__( self.features.add_module("layer{}".format(l + 1), nn.Sequential(layer)) self.initialize(cuda=cuda) - def set_batchnorm_type(self): - self.batchnorm_layer_cls = nn.BatchNorm3d - self.bias_layer_cls = Bias3DLayer - self.scale_layer_cls = Scale3DLayer - def forward(self, x): for features in self.features: x = features(x) From e354212e227793ef3e2d465739854e0f154f01b5 Mon Sep 17 00:00:00 2001 From: Max Burg Date: Fri, 8 Mar 2024 12:04:49 +0100 Subject: [PATCH 16/16] older black version --- neuralpredictors/layers/cores/conv3d.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/neuralpredictors/layers/cores/conv3d.py b/neuralpredictors/layers/cores/conv3d.py index 3ed4ebad..cd39d263 100644 --- a/neuralpredictors/layers/cores/conv3d.py +++ b/neuralpredictors/layers/cores/conv3d.py @@ -37,7 +37,6 @@ def set_batchnorm_type(self): class Basic3dCore(Core3d, nn.Module): - def __init__( self, input_channels, @@ -229,7 +228,6 @@ def get_kernels(self): class Factorized3dCore(Core3d, nn.Module): - def __init__( self, input_channels,