From 029c6d8745f07189634b54acb2cc9db76a137ac3 Mon Sep 17 00:00:00 2001 From: AnFreTh Date: Mon, 5 Aug 2024 09:51:41 +0000 Subject: [PATCH 1/7] adding one-hot encoding to embedding_layer --- mambular/arch_utils/embedding_layer.py | 34 ++++++++++++++++++++------ 1 file changed, 26 insertions(+), 8 deletions(-) diff --git a/mambular/arch_utils/embedding_layer.py b/mambular/arch_utils/embedding_layer.py index cd115f5..43fe453 100644 --- a/mambular/arch_utils/embedding_layer.py +++ b/mambular/arch_utils/embedding_layer.py @@ -12,6 +12,7 @@ def __init__( layer_norm_after_embedding=False, use_cls=False, cls_position=0, + cat_encoding="int", ): """ Embedding layer that handles numerical and categorical embeddings. @@ -56,15 +57,23 @@ def __init__( ] ) - self.cat_embeddings = nn.ModuleList( - [ - nn.Sequential( - nn.Embedding(num_categories + 1, d_model), - self.embedding_activation, + self.cat_embeddings = nn.ModuleList() + for feature_name, num_categories in cat_feature_info.items(): + if cat_encoding == "int": + self.cat_embeddings.append( + nn.Sequential( + nn.Embedding(num_categories + 1, d_model), + self.embedding_activation, + ) + ) + elif cat_encoding == "one-hot": + self.cat_embeddings.append( + nn.Sequential( + OneHotEncoding(num_categories), + nn.Linear(num_categories, d_model, bias=False), + self.embedding_activation, + ) ) - for feature_name, num_categories in cat_feature_info.items() - ] - ) if self.use_cls: self.cls_token = nn.Parameter(torch.zeros(1, 1, d_model)) @@ -143,3 +152,12 @@ def forward(self, num_features=None, cat_features=None): ) return x + + +class OneHotEncoding(nn.Module): + def __init__(self, num_categories): + super(OneHotEncoding, self).__init__() + self.num_categories = num_categories + + def forward(self, x): + return torch.nn.functional.one_hot(x, num_classes=self.num_categories).float() From d413fd84b0544d0a535d1302c6f96bd4ac902de1 Mon Sep 17 00:00:00 2001 From: AnFreTh Date: Mon, 5 Aug 2024 09:51:59 +0000 Subject: [PATCH 2/7] adding option to one-hot encode cat features in embedding layer --- mambular/base_models/ft_transformer.py | 5 ++++- mambular/base_models/mambular.py | 9 ++++++--- mambular/base_models/tabtransformer.py | 5 ++++- 3 files changed, 14 insertions(+), 5 deletions(-) diff --git a/mambular/base_models/ft_transformer.py b/mambular/base_models/ft_transformer.py index da5e4ad..ddbf03c 100644 --- a/mambular/base_models/ft_transformer.py +++ b/mambular/base_models/ft_transformer.py @@ -132,9 +132,12 @@ def __init__( embedding_activation=self.hparams.get( "embedding_activation", config.embedding_activation ), - layer_norm_after_embedding=self.hparams.get("layer_norm_after_embedding"), + layer_norm_after_embedding=self.hparams.get( + "layer_norm_after_embedding", config.layer_norm_after_embedding + ), use_cls=True, cls_position=0, + cat_encoding=self.hparams.get("cat_encoding", config.cat_encoding), ) head_activation = self.hparams.get("head_activation", config.head_activation) diff --git a/mambular/base_models/mambular.py b/mambular/base_models/mambular.py index 33b2b6f..d362b8a 100644 --- a/mambular/base_models/mambular.py +++ b/mambular/base_models/mambular.py @@ -150,9 +150,12 @@ def __init__( embedding_activation=self.hparams.get( "embedding_activation", config.embedding_activation ), - layer_norm_after_embedding=self.hparams.get("layer_norm_after_embedding"), - use_cls=True, - cls_position=0, + layer_norm_after_embedding=self.hparams.get( + "layer_norm_after_embedding", config.layer_norm_after_embedding + ), + use_cls=False, + cls_position=-1, + cat_encoding=self.hparams.get("cat_encoding", config.cat_encoding), ) head_activation = self.hparams.get("head_activation", config.head_activation) diff --git a/mambular/base_models/tabtransformer.py b/mambular/base_models/tabtransformer.py index 630b968..d9c5052 100644 --- a/mambular/base_models/tabtransformer.py +++ b/mambular/base_models/tabtransformer.py @@ -139,9 +139,12 @@ def __init__( embedding_activation=self.hparams.get( "embedding_activation", config.embedding_activation ), - layer_norm_after_embedding=self.hparams.get("layer_norm_after_embedding"), + layer_norm_after_embedding=self.hparams.get( + "layer_norm_after_embedding", config.layer_norm_after_embedding + ), use_cls=True, cls_position=0, + cat_encoding=self.hparams.get("cat_encoding", config.cat_encoding), ) head_activation = self.hparams.get("head_activation", config.head_activation) From 07164f5e5cbb78f8cfa2669013e2df9b7774b403 Mon Sep 17 00:00:00 2001 From: AnFreTh Date: Mon, 5 Aug 2024 09:52:10 +0000 Subject: [PATCH 3/7] adjusting configs --- mambular/configs/fttransformer_config.py | 1 + mambular/configs/mambular_config.py | 1 + mambular/configs/tabtransformer_config.py | 1 + 3 files changed, 3 insertions(+) diff --git a/mambular/configs/fttransformer_config.py b/mambular/configs/fttransformer_config.py index 11cc30c..35c3033 100644 --- a/mambular/configs/fttransformer_config.py +++ b/mambular/configs/fttransformer_config.py @@ -85,3 +85,4 @@ class DefaultFTTransformerConfig: layer_norm_eps: float = 1e-05 transformer_dim_feedforward: int = 256 numerical_embedding: str = "ple" + cat_encoding: str = "int" diff --git a/mambular/configs/mambular_config.py b/mambular/configs/mambular_config.py index c6fcd89..2ee5fe1 100644 --- a/mambular/configs/mambular_config.py +++ b/mambular/configs/mambular_config.py @@ -116,3 +116,4 @@ class DefaultMambularConfig: layer_norm_eps: float = 1e-05 AD_weight_decay: bool = False BC_layer_norm: bool = True + cat_encoding: str = "int" diff --git a/mambular/configs/tabtransformer_config.py b/mambular/configs/tabtransformer_config.py index 31e63f4..f0206d6 100644 --- a/mambular/configs/tabtransformer_config.py +++ b/mambular/configs/tabtransformer_config.py @@ -84,3 +84,4 @@ class DefaultTabTransformerConfig: transformer_activation: callable = ReGLU() layer_norm_eps: float = 1e-05 transformer_dim_feedforward: int = 512 + cat_encoding: str = "int" From 71cc68efedc642a5d260ba34162e0af37a9f47d7 Mon Sep 17 00:00:00 2001 From: AnFreTh Date: Mon, 5 Aug 2024 09:52:24 +0000 Subject: [PATCH 4/7] renaming sklearn class attributes --- mambular/models/sklearn_base_classifier.py | 28 +++++++++++----------- mambular/models/sklearn_base_lss.py | 26 ++++++++++---------- mambular/models/sklearn_base_regressor.py | 24 ++++++++++--------- 3 files changed, 40 insertions(+), 38 deletions(-) diff --git a/mambular/models/sklearn_base_classifier.py b/mambular/models/sklearn_base_classifier.py index ec39edc..f442688 100644 --- a/mambular/models/sklearn_base_classifier.py +++ b/mambular/models/sklearn_base_classifier.py @@ -37,7 +37,7 @@ def __init__(self, model, config, **kwargs): } self.preprocessor = Preprocessor(**preprocessor_kwargs) - self.model = None + self.task_model = None # Raise a warning if task is set to 'classification' if preprocessor_kwargs.get("task") == "regression": @@ -194,7 +194,7 @@ def build_model( num_classes = len(np.unique(y)) - self.model = TaskModel( + self.task_model = TaskModel( model_class=self.base_model, num_classes=num_classes, config=self.config, @@ -237,10 +237,10 @@ def get_number_of_params(self, requires_grad=True): else: if requires_grad: return sum( - p.numel() for p in self.model.parameters() if p.requires_grad + p.numel() for p in self.task_model.parameters() if p.requires_grad ) else: - return sum(p.numel() for p in self.model.parameters()) + return sum(p.numel() for p in self.task_model.parameters()) def fit( self, @@ -345,7 +345,7 @@ def fit( num_classes = len(np.unique(y)) - self.model = TaskModel( + self.task_model = TaskModel( model_class=self.base_model, num_classes=num_classes, config=self.config, @@ -379,12 +379,12 @@ def fit( ], **trainer_kwargs ) - self.trainer.fit(self.model, self.data_module) + self.trainer.fit(self.task_model, self.data_module) best_model_path = checkpoint_callback.best_model_path if best_model_path: checkpoint = torch.load(best_model_path) - self.model.load_state_dict(checkpoint["state_dict"]) + self.task_model.load_state_dict(checkpoint["state_dict"]) return self @@ -404,14 +404,14 @@ def predict(self, X): The predicted target values. """ # Ensure model and data module are initialized - if self.model is None or self.data_module is None: + if self.task_model is None or self.data_module is None: raise ValueError("The model or data module has not been fitted yet.") # Preprocess the data using the data module cat_tensors, num_tensors = self.data_module.preprocess_test_data(X) # Move tensors to appropriate device - device = next(self.model.parameters()).device + device = next(self.task_model.parameters()).device if isinstance(cat_tensors, list): cat_tensors = [tensor.to(device) for tensor in cat_tensors] else: @@ -423,11 +423,11 @@ def predict(self, X): num_tensors = num_tensors.to(device) # Set model to evaluation mode - self.model.eval() + self.task_model.eval() # Perform inference with torch.no_grad(): - logits = self.model(num_features=num_tensors, cat_features=cat_tensors) + logits = self.task_model(num_features=num_tensors, cat_features=cat_tensors) # Check the shape of the logits to determine binary or multi-class classification if logits.shape[1] == 1: @@ -484,7 +484,7 @@ def predict_proba(self, X): # Preprocess the data if not isinstance(X, pd.DataFrame): X = pd.DataFrame(X) - device = next(self.model.parameters()).device + device = next(self.task_model.parameters()).device cat_tensors, num_tensors = self.data_module.preprocess_test_data(X) if isinstance(cat_tensors, list): cat_tensors = [tensor.to(device) for tensor in cat_tensors] @@ -497,11 +497,11 @@ def predict_proba(self, X): num_tensors = num_tensors.to(device) # Set the model to evaluation mode - self.model.eval() + self.task_model.eval() # Perform inference with torch.no_grad(): - logits = self.model(num_features=num_tensors, cat_features=cat_tensors) + logits = self.task_model(num_features=num_tensors, cat_features=cat_tensors) if logits.shape[1] > 1: probabilities = torch.softmax(logits, dim=1) else: diff --git a/mambular/models/sklearn_base_lss.py b/mambular/models/sklearn_base_lss.py index 62f2d3a..4e0d6e4 100644 --- a/mambular/models/sklearn_base_lss.py +++ b/mambular/models/sklearn_base_lss.py @@ -58,7 +58,7 @@ def __init__(self, model, config, **kwargs): } self.preprocessor = Preprocessor(**preprocessor_kwargs) - self.model = None + self.task_model = None # Raise a warning if task is set to 'classification' if preprocessor_kwargs.get("task") == "classification": @@ -212,7 +212,7 @@ def build_model( num_classes = len(np.unique(y)) - self.model = TaskModel( + self.task_model = TaskModel( model_class=self.base_model, num_classes=num_classes, config=self.config, @@ -255,10 +255,10 @@ def get_number_of_params(self, requires_grad=True): else: if requires_grad: return sum( - p.numel() for p in self.model.parameters() if p.requires_grad + p.numel() for p in self.task_model.parameters() if p.requires_grad ) else: - return sum(p.numel() for p in self.model.parameters()) + return sum(p.numel() for p in self.task_model.parameters()) def fit( self, @@ -383,7 +383,7 @@ def fit( X, y, X_val, y_val, val_size=val_size, random_state=random_state ) - self.model = TaskModel( + self.task_model = TaskModel( model_class=self.base_model, num_classes=self.family.param_count, family=self.family, @@ -419,12 +419,12 @@ def fit( ], **trainer_kwargs ) - self.trainer.fit(self.model, self.data_module) + self.trainer.fit(self.task_model, self.data_module) best_model_path = checkpoint_callback.best_model_path if best_model_path: checkpoint = torch.load(best_model_path) - self.model.load_state_dict(checkpoint["state_dict"]) + self.task_model.load_state_dict(checkpoint["state_dict"]) return self @@ -444,14 +444,14 @@ def predict(self, X, raw=False): The predicted target values. """ # Ensure model and data module are initialized - if self.model is None or self.data_module is None: + if self.task_model is None or self.data_module is None: raise ValueError("The model or data module has not been fitted yet.") # Preprocess the data using the data module cat_tensors, num_tensors = self.data_module.preprocess_test_data(X) # Move tensors to appropriate device - device = next(self.model.parameters()).device + device = next(self.task_model.parameters()).device if isinstance(cat_tensors, list): cat_tensors = [tensor.to(device) for tensor in cat_tensors] else: @@ -463,14 +463,14 @@ def predict(self, X, raw=False): num_tensors = num_tensors.to(device) # Set model to evaluation mode - self.model.eval() + self.task_model.eval() # Perform inference with torch.no_grad(): - predictions = self.model(num_features=num_tensors, cat_features=cat_tensors) + predictions = self.task_model(num_features=num_tensors, cat_features=cat_tensors) if not raw: - return self.model.family(predictions).cpu().numpy() + return self.task_model.family(predictions).cpu().numpy() # Convert predictions to NumPy array and return else: @@ -506,7 +506,7 @@ def evaluate(self, X, y_true, metrics=None, distribution_family=None): """ # Infer distribution family from model settings if not provided if distribution_family is None: - distribution_family = getattr(self.model, "distribution_family", "normal") + distribution_family = getattr(self.task_model, "distribution_family", "normal") # Setup default metrics if none are provided if metrics is None: diff --git a/mambular/models/sklearn_base_regressor.py b/mambular/models/sklearn_base_regressor.py index 30bedb9..1a098ac 100644 --- a/mambular/models/sklearn_base_regressor.py +++ b/mambular/models/sklearn_base_regressor.py @@ -37,7 +37,7 @@ def __init__(self, model, config, **kwargs): self.preprocessor = Preprocessor(**preprocessor_kwargs) self.base_model = model - self.model = None + self.task_model = None self.built = False # Raise a warning if task is set to 'classification' @@ -190,7 +190,7 @@ def build_model( X, y, X_val, y_val, val_size=val_size, random_state=random_state ) - self.model = TaskModel( + self.task_model = TaskModel( model_class=self.base_model, config=self.config, cat_feature_info=self.data_module.cat_feature_info, @@ -232,10 +232,10 @@ def get_number_of_params(self, requires_grad=True): else: if requires_grad: return sum( - p.numel() for p in self.model.parameters() if p.requires_grad + p.numel() for p in self.task_model.parameters() if p.requires_grad ) else: - return sum(p.numel() for p in self.model.parameters()) + return sum(p.numel() for p in self.task_model.parameters()) def fit( self, @@ -336,7 +336,7 @@ def fit( X, y, X_val, y_val, val_size=val_size, random_state=random_state ) - self.model = TaskModel( + self.task_model = TaskModel( model_class=self.base_model, config=self.config, cat_feature_info=self.data_module.cat_feature_info, @@ -372,12 +372,12 @@ def fit( ], **trainer_kwargs ) - self.trainer.fit(self.model, self.data_module) + self.trainer.fit(self.task_model, self.data_module) best_model_path = checkpoint_callback.best_model_path if best_model_path: checkpoint = torch.load(best_model_path) - self.model.load_state_dict(checkpoint["state_dict"]) + self.task_model.load_state_dict(checkpoint["state_dict"]) return self @@ -397,14 +397,14 @@ def predict(self, X): The predicted target values. """ # Ensure model and data module are initialized - if self.model is None or self.data_module is None: + if self.task_model is None or self.data_module is None: raise ValueError("The model or data module has not been fitted yet.") # Preprocess the data using the data module cat_tensors, num_tensors = self.data_module.preprocess_test_data(X) # Move tensors to appropriate device - device = next(self.model.parameters()).device + device = next(self.task_model.parameters()).device if isinstance(cat_tensors, list): cat_tensors = [tensor.to(device) for tensor in cat_tensors] else: @@ -416,11 +416,13 @@ def predict(self, X): num_tensors = num_tensors.to(device) # Set model to evaluation mode - self.model.eval() + self.task_model.eval() # Perform inference with torch.no_grad(): - predictions = self.model(num_features=num_tensors, cat_features=cat_tensors) + predictions = self.task_model( + num_features=num_tensors, cat_features=cat_tensors + ) # Convert predictions to NumPy array and return return predictions.cpu().numpy() From 53b77c5a4fa8256ace4ec52e7d660a86d4561991 Mon Sep 17 00:00:00 2001 From: AnFreTh Date: Mon, 5 Aug 2024 09:52:37 +0000 Subject: [PATCH 5/7] adjusting class attribute in lightning wrapper --- mambular/base_models/lightning_wrapper.py | 42 ++++------------------- 1 file changed, 6 insertions(+), 36 deletions(-) diff --git a/mambular/base_models/lightning_wrapper.py b/mambular/base_models/lightning_wrapper.py index b26c643..6d3f5c3 100644 --- a/mambular/base_models/lightning_wrapper.py +++ b/mambular/base_models/lightning_wrapper.py @@ -82,7 +82,7 @@ def __init__( else: output_dim = num_classes - self.model = model_class( + self.base_model = model_class( config=config, num_feature_info=num_feature_info, cat_feature_info=cat_feature_info, @@ -107,7 +107,7 @@ def forward(self, num_features, cat_features): Model output. """ - return self.model.forward(num_features, cat_features) + return self.base_model.forward(num_features, cat_features) def compute_loss(self, predictions, y_true): """ @@ -168,16 +168,6 @@ def training_step(self, batch, batch_idx): prog_bar=True, logger=True, ) - elif isinstance(self.loss_fct, nn.MSELoss): - rmse = torch.sqrt(loss) - self.log( - "train_rmse", - rmse, - on_step=True, - on_epoch=True, - prog_bar=True, - logger=True, - ) return loss @@ -205,7 +195,7 @@ def validation_step(self, batch, batch_idx): self.log( "val_loss", val_loss, - on_step=True, + on_step=False, on_epoch=True, prog_bar=True, logger=True, @@ -218,17 +208,7 @@ def validation_step(self, batch, batch_idx): self.log( "val_acc", acc, - on_step=True, - on_epoch=True, - prog_bar=True, - logger=True, - ) - elif isinstance(self.loss_fct, nn.MSELoss): - rmse = torch.sqrt(val_loss) - self.log( - "val_rmse", - rmse, - on_step=True, + on_step=False, on_epoch=True, prog_bar=True, logger=True, @@ -272,17 +252,7 @@ def test_step(self, batch, batch_idx): self.log( "test_acc", acc, - on_step=True, - on_epoch=True, - prog_bar=True, - logger=True, - ) - elif isinstance(self.loss_fct, nn.MSELoss): - rmse = torch.sqrt(test_loss) - self.log( - "test_rmse", - rmse, - on_step=True, + on_step=False, on_epoch=True, prog_bar=True, logger=True, @@ -300,7 +270,7 @@ def configure_optimizers(self): A dictionary containing the optimizer and lr_scheduler configurations. """ optimizer = torch.optim.Adam( - self.model.parameters(), + self.base_model.parameters(), lr=self.lr, weight_decay=self.weight_decay, ) From 4f6b0882bdbfc6b564d316e5723c386f8acc88aa Mon Sep 17 00:00:00 2001 From: AnFreTh Date: Mon, 5 Aug 2024 10:09:36 +0000 Subject: [PATCH 6/7] adjusting config docstrings --- mambular/configs/fttransformer_config.py | 3 ++- mambular/configs/mambular_config.py | 6 ++++-- mambular/configs/tabtransformer_config.py | 2 ++ 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/mambular/configs/fttransformer_config.py b/mambular/configs/fttransformer_config.py index 35c3033..a433753 100644 --- a/mambular/configs/fttransformer_config.py +++ b/mambular/configs/fttransformer_config.py @@ -58,6 +58,8 @@ class DefaultFTTransformerConfig: Epsilon value for layer normalization. transformer_dim_feedforward : int, default=512 Dimensionality of the feed-forward layers in the transformer. + cat_encoding : str, default="int" + whether to use integer encoding or one-hot encoding for cat features. """ lr: float = 1e-04 @@ -84,5 +86,4 @@ class DefaultFTTransformerConfig: transformer_activation: callable = ReGLU() layer_norm_eps: float = 1e-05 transformer_dim_feedforward: int = 256 - numerical_embedding: str = "ple" cat_encoding: str = "int" diff --git a/mambular/configs/mambular_config.py b/mambular/configs/mambular_config.py index 2ee5fe1..4f3e495 100644 --- a/mambular/configs/mambular_config.py +++ b/mambular/configs/mambular_config.py @@ -76,9 +76,11 @@ class DefaultMambularConfig: layer_norm_eps : float, default=1e-05 Epsilon value for layer normalization. AD_weight_decay : bool, default=False - whether weight decay is also applied to A-D matrices + whether weight decay is also applied to A-D matrices. BC_layer_norm: bool, default=True - whether to apply layer normalization to B-C matrices + whether to apply layer normalization to B-C matrices. + cat_encoding : str, default="int" + whether to use integer encoding or one-hot encoding for cat features. """ lr: float = 1e-04 diff --git a/mambular/configs/tabtransformer_config.py b/mambular/configs/tabtransformer_config.py index f0206d6..a1131c9 100644 --- a/mambular/configs/tabtransformer_config.py +++ b/mambular/configs/tabtransformer_config.py @@ -58,6 +58,8 @@ class DefaultTabTransformerConfig: Epsilon value for layer normalization. transformer_dim_feedforward : int, default=512 Dimensionality of the feed-forward layers in the transformer. + cat_encoding : str, default="int" + whether to use integer encoding or one-hot encoding for cat features. """ lr: float = 1e-04 From 71f35e658119cc5ce1d141e1d2ae7c7d0f9e6024 Mon Sep 17 00:00:00 2001 From: AnFreTh Date: Mon, 5 Aug 2024 10:09:48 +0000 Subject: [PATCH 7/7] adjusting docstrings for documentation --- mambular/models/fttransformer.py | 6 ++++++ mambular/models/mambular.py | 24 ++++++++++++++++++++++++ mambular/models/tabtransformer.py | 6 ++++++ 3 files changed, 36 insertions(+) diff --git a/mambular/models/fttransformer.py b/mambular/models/fttransformer.py index 71d3653..efd346e 100644 --- a/mambular/models/fttransformer.py +++ b/mambular/models/fttransformer.py @@ -64,6 +64,8 @@ class FTTransformerRegressor(SklearnBaseRegressor): Epsilon value for layer normalization. transformer_dim_feedforward : int, default=512 Dimensionality of the feed-forward layers in the transformer. + cat_encoding : str, default="int" + whether to use integer encoding or one-hot encoding for cat features. n_bins : int, default=50 The number of bins to use for numerical feature binning. This parameter is relevant only if `numerical_preprocessing` is set to 'binning' or 'one_hot'. @@ -171,6 +173,8 @@ class FTTransformerClassifier(SklearnBaseClassifier): Epsilon value for layer normalization. transformer_dim_feedforward : int, default=512 Dimensionality of the feed-forward layers in the transformer. + cat_encoding : str, default="int" + whether to use integer encoding or one-hot encoding for cat features. n_bins : int, default=50 The number of bins to use for numerical feature binning. This parameter is relevant only if `numerical_preprocessing` is set to 'binning' or 'one_hot'. @@ -278,6 +282,8 @@ class FTTransformerLSS(SklearnBaseLSS): Epsilon value for layer normalization. transformer_dim_feedforward : int, default=512 Dimensionality of the feed-forward layers in the transformer. + cat_encoding : str, default="int" + whether to use integer encoding or one-hot encoding for cat features. n_bins : int, default=50 The number of bins to use for numerical feature binning. This parameter is relevant only if `numerical_preprocessing` is set to 'binning' or 'one_hot'. diff --git a/mambular/models/mambular.py b/mambular/models/mambular.py index 6fc147c..09a518a 100644 --- a/mambular/models/mambular.py +++ b/mambular/models/mambular.py @@ -79,6 +79,14 @@ class MambularRegressor(SklearnBaseRegressor): Whether to append a cls to the end of each 'sequence'. shuffle_embeddings : bool, default=False. Whether to shuffle the embeddings before being passed to the Mamba layers. + layer_norm_eps : float, default=1e-05 + Epsilon value for layer normalization. + AD_weight_decay : bool, default=False + whether weight decay is also applied to A-D matrices. + BC_layer_norm: bool, default=True + whether to apply layer normalization to B-C matrices. + cat_encoding : str, default="int" + whether to use integer encoding or one-hot encoding for cat features. n_bins : int, default=50 The number of bins to use for numerical feature binning. This parameter is relevant only if `numerical_preprocessing` is set to 'binning' or 'one_hot'. @@ -198,6 +206,14 @@ class MambularClassifier(SklearnBaseClassifier): Whether to use learnable feature interactions before passing through mamba blocks. shuffle_embeddings : bool, default=False. Whether to shuffle the embeddings before being passed to the Mamba layers. + layer_norm_eps : float, default=1e-05 + Epsilon value for layer normalization. + AD_weight_decay : bool, default=False + whether weight decay is also applied to A-D matrices. + BC_layer_norm: bool, default=True + whether to apply layer normalization to B-C matrices. + cat_encoding : str, default="int" + whether to use integer encoding or one-hot encoding for cat features. n_bins : int, default=50 The number of bins to use for numerical feature binning. This parameter is relevant only if `numerical_preprocessing` is set to 'binning' or 'one_hot'. @@ -320,6 +336,14 @@ class MambularLSS(SklearnBaseLSS): only if `numerical_preprocessing` is set to 'binning' or 'one_hot'. shuffle_embeddings : bool, default=False. Whether to shuffle the embeddings before being passed to the Mamba layers. + layer_norm_eps : float, default=1e-05 + Epsilon value for layer normalization. + AD_weight_decay : bool, default=False + whether weight decay is also applied to A-D matrices. + BC_layer_norm: bool, default=True + whether to apply layer normalization to B-C matrices. + cat_encoding : str, default="int" + whether to use integer encoding or one-hot encoding for cat features. numerical_preprocessing : str, default="ple" The preprocessing strategy for numerical features. Valid options are 'binning', 'one_hot', 'standardization', and 'normalization'. diff --git a/mambular/models/tabtransformer.py b/mambular/models/tabtransformer.py index 5cd3787..901369e 100644 --- a/mambular/models/tabtransformer.py +++ b/mambular/models/tabtransformer.py @@ -63,6 +63,8 @@ class TabTransformerRegressor(SklearnBaseRegressor): Epsilon value for layer normalization. transformer_dim_feedforward : int, default=512 Dimensionality of the feed-forward layers in the transformer. + cat_encoding : str, default="int" + whether to use integer encoding or one-hot encoding for cat features. n_bins : int, default=50 The number of bins to use for numerical feature binning. This parameter is relevant only if `numerical_preprocessing` is set to 'binning' or 'one_hot'. @@ -170,6 +172,8 @@ class TabTransformerClassifier(SklearnBaseClassifier): Epsilon value for layer normalization. transformer_dim_feedforward : int, default=512 Dimensionality of the feed-forward layers in the transformer. + cat_encoding : str, default="int" + whether to use integer encoding or one-hot encoding for cat features. n_bins : int, default=50 The number of bins to use for numerical feature binning. This parameter is relevant only if `numerical_preprocessing` is set to 'binning' or 'one_hot'. @@ -277,6 +281,8 @@ class TabTransformerLSS(SklearnBaseLSS): Epsilon value for layer normalization. transformer_dim_feedforward : int, default=512 Dimensionality of the feed-forward layers in the transformer. + cat_encoding : str, default="int" + whether to use integer encoding or one-hot encoding for cat features. n_bins : int, default=50 The number of bins to use for numerical feature binning. This parameter is relevant only if `numerical_preprocessing` is set to 'binning' or 'one_hot'.