Skip to content

Commit

Permalink
Merge branch 'main' into fix/shape-dimension-order
Browse files Browse the repository at this point in the history
  • Loading branch information
MaxFBurg authored Mar 8, 2024
2 parents 27d4b70 + 9b9ab51 commit 28aedec
Show file tree
Hide file tree
Showing 4 changed files with 124 additions and 75 deletions.
39 changes: 39 additions & 0 deletions neuralpredictors/layers/cores/base.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from abc import ABC, abstractmethod
from collections import OrderedDict

from torch import nn

Expand Down Expand Up @@ -51,3 +52,41 @@ def __repr__(self):
for attr in filter(lambda x: "gamma" in x or "skip" in x, dir(self)):
ret.append(f"{attr} = {getattr(self, attr)}")
return s + "|".join(ret) + "]\n"


class ConvCore(Core):
def __init__(self) -> None:
"""
Derived classes need to define "batch_norm", "hidden_channels", "momentum", "bias", "batch_norm_scale" attributes.
"""
super().__init__()
self.set_batchnorm_type()

@abstractmethod
def set_batchnorm_type(self):
"""
Set batchnorm_layer_cls, bias_layer_cls, scale_layer_cls class attributes
"""
self.batchnorm_layer_cls = None
self.bias_layer_cls = None
self.scale_layer_cls = None

def add_bn_layer(self, layer: OrderedDict, layer_idx: int):
for attr in ["batch_norm", "hidden_channels", "momentum", "bias", "batch_norm_scale"]:
if not hasattr(self, attr):
raise NotImplementedError(f"Subclasses must have a `{attr}` attribute.")
for attr in ["batch_norm", "hidden_channels", "bias", "batch_norm_scale"]:
if not isinstance(getattr(self, attr), list):
raise ValueError(f"`{attr}` must be a list.")

if self.batch_norm[layer_idx]:
hidden_channels = self.hidden_channels[layer_idx]

bias = self.bias[layer_idx]
scale = self.batch_norm_scale[layer_idx]

layer["norm"] = self.batchnorm_layer_cls(hidden_channels, momentum=self.momentum, affine=bias and scale)
if bias and not scale:
layer["bias"] = self.bias_layer_cls(hidden_channels)
elif not bias and scale:
layer["scale"] = self.scale_layer_cls(hidden_channels)
31 changes: 6 additions & 25 deletions neuralpredictors/layers/cores/conv2d.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import logging
import warnings
from collections import OrderedDict
from typing import Union
from typing import List, Union

try:
from collections import Iterable
Expand All @@ -27,12 +27,12 @@
RotationEquivariantScale2DLayer,
)
from ..squeeze_excitation import SqueezeExcitationBlock
from .base import Core
from .base import ConvCore, Core

logger = logging.getLogger(__name__)


class Stacked2dCore(Core, nn.Module):
class Stacked2dCore(ConvCore, nn.Module):
"""
An instantiation of the Core base class. Made up of layers layers of nn.sequential modules.
Allows for the flexible implementations of many different architectures, such as convolutional layers,
Expand All @@ -53,12 +53,12 @@ def __init__(
input_stride=1,
final_nonlinearity=True,
elu_shift=(0, 0),
bias: Union[bool, list[bool]] = True,
bias: Union[bool, List[bool]] = True,
momentum=0.1,
pad_input=True,
hidden_padding=None,
batch_norm: Union[bool, list[bool]] = True,
batch_norm_scale: Union[bool, list[bool]] = True,
batch_norm: Union[bool, List[bool]] = True,
batch_norm_scale: Union[bool, List[bool]] = True,
final_batchnorm_scale: bool = True,
hidden_dilation=1,
laplace_padding=0,
Expand Down Expand Up @@ -109,11 +109,6 @@ def __init__(
linear: Boolean, if True, removes all nonlinearities
nonlinearity_type: String to set the used nonlinearity type loaded from neuralpredictors.layers.activation
nonlinearity_config: Dict of the nonlinearities __init__ parameters.
To enable learning batch_norms bias and scale independently, the arguments bias, batch_norm and batch_norm_scale
work together: By default, all are true. In this case there won't be a bias learned in the convolutional layer, but
batch_norm will learn both its bias and scale. If batch_norm is false, but bias true, a bias will be learned in the
convolutional layer. If batch_norm and bias are true, but batch_norm_scale is false, batch_norm won't have learnable
parameters and a BiasLayer will be added after the batch_norm layer.
"""

if depth_separable and attention_conv:
Expand Down Expand Up @@ -194,7 +189,6 @@ def __init__(
warnings.warn(
"group sparsity can not be calculated for the requested conv type. Hidden channels will not be regularized and gamma_hidden is ignored."
)
self.set_batchnorm_type()
self.features = nn.Sequential()
self.add_first_layer()
self.add_subsequent_layers()
Expand All @@ -205,19 +199,6 @@ def set_batchnorm_type(self):
self.bias_layer_cls = Bias2DLayer
self.scale_layer_cls = Scale2DLayer

# def add_bn_layer(self, layer, hidden_channels):
def add_bn_layer(self, layer: OrderedDict, layer_idx: int):
if self.batch_norm[layer_idx]:
hidden_channels = self.hidden_channels[layer_idx]
bias = self.bias[layer_idx]
scale = self.batch_norm_scale[layer_idx]

layer["norm"] = self.batchnorm_layer_cls(hidden_channels, momentum=self.momentum, affine=bias and scale)
if bias and not scale:
layer["bias"] = self.bias_layer_cls(hidden_channels)
elif not bias and scale:
layer["scale"] = self.scale_layer_cls(hidden_channels)

def penultimate_layer_built(self):
"""Returns True if the penultimate layer has been built."""
return len(self.features) == self.num_layers - 1
Expand Down
50 changes: 7 additions & 43 deletions neuralpredictors/layers/cores/conv3d.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,10 @@

from ...regularizers import DepthLaplaceL21d
from ..affine import Bias3DLayer, Scale3DLayer
from .base import Core
from .base import ConvCore


class Core3d(Core):
class Core3d(ConvCore):
def initialize(self, cuda=False):
self.apply(self.init_conv)
self.put_to_cuda(cuda=cuda)
Expand All @@ -30,6 +30,11 @@ def init_conv(m):
if m.bias is not None:
m.bias.data.fill_(0)

def set_batchnorm_type(self):
self.batchnorm_layer_cls = nn.BatchNorm3d
self.bias_layer_cls = Bias3DLayer
self.scale_layer_cls = Scale3DLayer


class Basic3dCore(Core3d, nn.Module):
def __init__(
Expand All @@ -54,13 +59,11 @@ def __init__(
input_regularizer="LaplaceL2norm",
cuda=False,
final_nonlin=True,
independent_bn_bias=True,
spatial_dilation: int = 1,
temporal_dilation: int = 1,
hidden_spatial_dilation=1,
hidden_temporal_dilation=1,
):

"""
:param input_channels: integer, number of input channels as in
:param hidden_channels: number of hidden channels (i.e feature maps) in each hidden layer
Expand All @@ -85,14 +88,6 @@ def __init__(
zero is the default however to recreate backwards compatibility.
:param input_regularizer: specifies what kind of spatial regularized is applied
:param final_nonlin: bool specifiyng whether to include a nonlinearity after last convolutional layer in core
:param independent_bn_bias: If False, will allow for scaling the batch norm, so that batch norm
and bias can both be true. Defaults to True.
To enable learning batch_norms bias and scale independently, the arguments bias, batch_norm and batch_norm_scale
work together: By default, all are true. In this case there won't be a bias learned in the convolutional layer, but
batch_norm will learn both its bias and scale. If batch_norm is false, but bias true, a bias will be learned in the
convolutional layer. If batch_norm and bias are true, but batch_norm_scale is false, batch_norm won't have learnable
parameters and a BiasLayer will be added after the batch_norm layer.
"""
super().__init__()

Expand All @@ -112,7 +107,6 @@ def __init__(
self.bias = bias
self.batch_norm = batch_norm
self.batch_norm_scale = batch_norm_scale
self.independent_bn_bias = independent_bn_bias
self.momentum = momentum
self.spatial_dilation = spatial_dilation
self.temporal_dilation = temporal_dilation
Expand Down Expand Up @@ -225,19 +219,6 @@ def laplace_temporal(self):
def regularizer(self):
return self.gamma_input_spatial * self.laplace_spatial(), self.gamma_input_temporal * self.laplace_temporal()

def add_bn_layer(self, layer, hidden_channels):
if self.batch_norm:
if self.independent_bn_bias:
layer["norm"] = nn.BatchNorm3d(hidden_channels, momentum=self.momentum)
else:
layer["norm"] = nn.BatchNorm3d(
hidden_channels, momentum=self.momentum, affine=self.bias and self.batch_norm_scale
)
if self.bias and not self.batch_norm_scale:
layer["bias"] = Bias3DLayer(hidden_channels)
elif self.batch_norm_scale:
layer["scale"] = Scale3DLayer(hidden_channels)

@property
def out_channels(self):
return self.hidden_channels[-1]
Expand Down Expand Up @@ -267,7 +248,6 @@ def __init__(
batch_norm=True,
padding=False,
batch_norm_scale=True,
independent_bn_bias=True,
momentum=0.01,
laplace_padding=None,
input_regularizer="LaplaceL2norm",
Expand Down Expand Up @@ -300,8 +280,6 @@ def __init__(
:param batch_norm: bool specifying whether to include batch norm after convolution in core
:param padding: whether to pad convolutions. Defaults to False.
:param batch_norm_scale: bool, if True, a scaling factor after BN will be learned.
:param independent_bn_bias: If False, will allow for scaling the batch norm, so that batchnorm
and bias can both be true. Defaults to True.
:param momentum: momentum for batch norm
:param laplace_padding: padding size for the laplace convolution. If padding = None, it defaults to half of
the kernel size (recommended). Setting Padding to 0 is not recommended and leads to artefacts,
Expand Down Expand Up @@ -329,7 +307,6 @@ def __init__(
self.bias = bias
self.batch_norm = batch_norm
self.batch_norm_scale = batch_norm_scale
self.independent_bn_bias = independent_bn_bias
self.momentum = momentum
self.stride = stride
self.spatial_dilation = spatial_dilation
Expand Down Expand Up @@ -450,16 +427,3 @@ def get_kernels(self):
(temporal_kernel,) + spatial_kernel
for temporal_kernel, spatial_kernel in zip(self.temporal_hidden_kernel, self.spatial_hidden_kernel)
]

def add_bn_layer(self, layer, hidden_channels):
if self.batch_norm:
if self.independent_bn_bias:
layer["norm"] = nn.BatchNorm3d(hidden_channels, momentum=self.momentum)
else:
layer["norm"] = nn.BatchNorm3d(
hidden_channels, momentum=self.momentum, affine=self.bias and self.batch_norm_scale
)
if self.bias and not self.batch_norm_scale:
layer["bias"] = Bias3DLayer(hidden_channels)
elif self.batch_norm_scale:
layer["scale"] = Scale3DLayer(hidden_channels)
79 changes: 72 additions & 7 deletions neuralpredictors/training/early_stopping.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,8 @@ def early_stopping(
tracker=None,
scheduler=None,
lr_decay_steps=1,
number_warmup_epochs=0,
):

"""
Early stopping iterator. Keeps track of the best model state during training. Resets the model to its
best state, when either the number of maximum epochs or the patience [number of epochs without improvement)
Expand Down Expand Up @@ -72,10 +72,30 @@ def early_stopping(
tracker (Tracker):
Tracker to be invoked for every epoch. `log_objective` is invoked with the current value of `objective`. Note that `finalize`
method is NOT invoked.
scheduler: scheduler object, which automatically reduces decreases the LR by a specified amount.
The scheduler's `step` method is invoked, passing in the current value of `objective`
lr_decay_steps: Number of times the learning rate should be reduced before stopping the training.
scheduler: scheduler object or tuple of two scheduler objects, which automatically modifies the LR by a specified amount.
If a tuple of schedulers is provided the 1st scheduler is assumed to be the warm up scheduler. The .step method
for the 1st scheduler will be called while epoch is smaller than number_warmup_epochs afterwards the .step method of
the second scheduler is called. The current value of `objective` is passed to the `step` method if the scheduler at hand is `ReduceLROnPlateau`.
For example a provided tuple of schedulers can be of the form:
scheduler = (warmup_scheduler,CosineAnnealingLR(*args,**kwargs))
or in case that no scheduler is desired after the warm up:
scheduler = (warmup_scheduler,None).
An example warm up scheduler can be defined as:
def warmup_function(current_step: int):
return 1 / (2 ** (float(number_warmup_epochs - current_step - 1)))
warmup_scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=warmup_function)
Of course single schedulers can also be provided.
If the warm-up is shifted (goes to a to high learning rate or does not reach the desired learning rate),
consider adjusting the warm up function accordingly.
lr_decay_steps: Number of times the learning rate should be reduced before stopping the training.
number_warmup_epochs: Number of warm-up epochs
"""
training_status = model.training

Expand Down Expand Up @@ -107,11 +127,41 @@ def finalize(model, best_state_dict):
best_objective = current_objective = _objective()
best_state_dict = copy_state(model)

# check if the learning rate scheduler is 'ReduceLROnPlateau' so that we pass the current_objective to step
reduce_lr_on_plateau = False
if isinstance(scheduler, torch.optim.lr_scheduler.ReduceLROnPlateau):
reduce_lr_on_plateau = True
elif isinstance(scheduler, tuple):
if isinstance(scheduler[1], torch.optim.lr_scheduler.ReduceLROnPlateau):
reduce_lr_on_plateau = True

# check if warm up is to be performed
if isinstance(scheduler, tuple):
warmup = True

# check if the warm-up scheduler is not of type None
if scheduler[0] is None:
logger.warning(
f"Provided warm up scheduler is of type None. Warm up epochs set to {number_warmup_epochs}. Setting number of warm up epochs to 0"
)
number_warmup_epochs = 0
else:
warmup = False

# check if warm up scheduler and number of warm-up epochs is provided
if warmup and number_warmup_epochs == 0:
logger.warning("Warm up scheduler is provided, but number of warm up steps is set to 0")

# inform user that no warm-up scheduler is provided althouth warm-up epochs is non zero
elif not warmup and number_warmup_epochs > 0:
logger.warning(
f"Number of warm up steps is set to {number_warmup_epochs}, but no warm up scheduler is provided"
)

for repeat in range(lr_decay_steps):
patience_counter = 0

while patience_counter < patience and epoch < max_iter:

for _ in range(interval):
epoch += 1
if tracker is not None:
Expand All @@ -124,9 +174,24 @@ def finalize(model, best_state_dict):

current_objective = _objective()

# if a scheduler is defined, a .step with the current objective is all that is needed to reduce the LR
# if a scheduler is defined, a .step with or without the current objective is all that is needed to reduce the LR
if scheduler is not None:
scheduler.step(current_objective)
if warmup and epoch < number_warmup_epochs:
# warm-up step
scheduler[0].step()
elif reduce_lr_on_plateau:
# reduce_lr_on_plateau requires current objective for the step
if not warmup:
scheduler.step(current_objective)
else:
scheduler[1].step(current_objective)
else:
# .step() for the rest of the schedulers
if not warmup:
scheduler.step()
else:
if scheduler[1] is not None:
scheduler[1].step()

if current_objective * maximize < best_objective * maximize - tolerance:
logger.info(f"[{epoch:03d}|{patience_counter:02d}/{patience:02d}] ---> {current_objective}")
Expand Down

0 comments on commit 28aedec

Please sign in to comment.