From 950f676c24e3f3b67917991f63200fa3482b9d84 Mon Sep 17 00:00:00 2001 From: Mikhail Bolev <92105261+kenshi777@users.noreply.github.com> Date: Thu, 17 Oct 2024 14:44:16 +0300 Subject: [PATCH 1/3] Fix `TSDataset.drop_features` allows to drop target (#491) --- CHANGELOG.md | 2 +- etna/datasets/tsdataset.py | 5 ++- tests/test_datasets/test_dataset.py | 9 +++- .../test_filter_transform.py | 43 ++++++++++++------- 4 files changed, 41 insertions(+), 18 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2635d90ab..2bb451112 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -38,7 +38,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixed - Fix working with `embedding_sizes` in `202-NN_examples` notebook ([#489](https://github.com/etna-team/etna/pull/489)) -- +- Disallow dropping target in `TSDataset.drop_features` ([#491](https://github.com/etna-team/etna/pull/491)) - - - diff --git a/etna/datasets/tsdataset.py b/etna/datasets/tsdataset.py index 5a767eeef..3488f9ea2 100644 --- a/etna/datasets/tsdataset.py +++ b/etna/datasets/tsdataset.py @@ -1296,7 +1296,7 @@ def drop_features(self, features: List[str], drop_from_exog: bool = False): Raises ------ ValueError: - If ``features`` list contains target components + If ``features`` list contains target or target components """ features_set = set(features) @@ -1312,6 +1312,9 @@ def drop_features(self, features: List[str], drop_from_exog: bool = False): "Prediction intervals can't be dropped from the dataset using this method! Use `drop_prediction_intervals` method!" ) + if "target" in features_set: + raise ValueError(f"Target can't be dropped from the dataset!") + dfs = [("df", self.df)] if drop_from_exog: dfs.append(("df_exog", self.df_exog)) diff --git a/tests/test_datasets/test_dataset.py b/tests/test_datasets/test_dataset.py index ce3ada570..5a43a4e21 100644 --- a/tests/test_datasets/test_dataset.py +++ b/tests/test_datasets/test_dataset.py @@ -1592,7 +1592,7 @@ def test_drop_features_raise_warning_on_unknown_columns( @pytest.mark.parametrize( "features, expected_regressors", ( - (["target", "regressor_2"], ["regressor_1"]), + (["regressor_2"], ["regressor_1"]), (["out_of_dataset_column"], ["regressor_1", "regressor_2"]), ), ) @@ -1603,6 +1603,13 @@ def test_drop_features_update_regressors(df_and_regressors, features, expected_r assert sorted(ts.regressors) == sorted(expected_regressors) +def test_drop_features_throw_error_on_target(df_and_regressors): + df, df_exog, known_future = df_and_regressors + ts = TSDataset(df=df, df_exog=df_exog, freq="D", known_future=known_future) + with pytest.raises(ValueError, match="Target can't be dropped from the dataset!"): + ts.drop_features(features=["target"], drop_from_exog=False) + + def test_drop_features_throw_error_on_target_components(ts_with_target_components): with pytest.raises( ValueError, diff --git a/tests/test_transforms/test_feature_selection/test_filter_transform.py b/tests/test_transforms/test_feature_selection/test_filter_transform.py index 42ae4e211..6a2fc3e26 100644 --- a/tests/test_transforms/test_feature_selection/test_filter_transform.py +++ b/tests/test_transforms/test_feature_selection/test_filter_transform.py @@ -43,7 +43,7 @@ def test_set_none(): _ = FilterFeaturesTransform() -@pytest.mark.parametrize("include", [[], ["target"], ["exog_1"], ["exog_1", "exog_2", "target"]]) +@pytest.mark.parametrize("include", [["target"], ["target", "exog_1"], ["exog_1", "exog_2", "target"]]) def test_include_filter(ts_with_features, include): """Test that transform remains only features in include.""" original_df = ts_with_features.to_pandas() @@ -60,9 +60,15 @@ def test_include_filter(ts_with_features, include): "exclude, expected_columns", [ ([], ["target", "exog_1", "exog_2"]), - (["target"], ["exog_1", "exog_2"]), + (["exog_1"], ["target", "exog_2"]), (["exog_1", "exog_2"], ["target"]), - (["target", "exog_1", "exog_2"], []), + ( + ["exog_2"], + [ + "target", + "exog_1", + ], + ), ], ) def test_exclude_filter(ts_with_features, exclude, expected_columns): @@ -95,9 +101,9 @@ def test_exclude_filter_wrong_column(ts_with_features): "columns, saved_columns", [ ([], []), - (["target"], ["target"]), + (["exog_1"], ["exog_1"]), (["exog_1", "exog_2"], ["exog_1", "exog_2"]), - (["target", "exog_1", "exog_2"], ["target", "exog_1", "exog_2"]), + (["exog_2"], ["exog_2"]), ], ) def test_transform_exclude_save_columns(ts_with_features, columns, saved_columns, return_features): @@ -120,9 +126,9 @@ def test_transform_exclude_save_columns(ts_with_features, columns, saved_columns @pytest.mark.parametrize( "columns, saved_columns", [ - ([], ["target", "exog_1", "exog_2"]), + (["target", "exog_1"], ["exog_2"]), (["target"], ["exog_1", "exog_2"]), - (["exog_1", "exog_2"], ["target"]), + (["target", "exog_2"], ["exog_1"]), (["target", "exog_1", "exog_2"], []), ], ) @@ -147,12 +153,19 @@ def test_transform_include_save_columns(ts_with_features, columns, saved_columns [ ([], True, ["exog_1", "target", "exog_2"]), ([], False, ["target", "exog_1", "exog_2"]), - (["target"], True, ["exog_1", "target", "exog_2"]), - (["target"], False, ["exog_2", "exog_1"]), + (["exog_1"], True, ["target", "exog_2", "exog_1"]), + (["exog_1"], False, ["exog_2", "target"]), (["exog_1", "exog_2"], True, ["exog_1", "target", "exog_2"]), (["exog_1", "exog_2"], False, ["target"]), - (["target", "exog_1", "exog_2"], True, ["exog_1", "target", "exog_2"]), - (["target", "exog_1", "exog_2"], False, []), + (["exog_2"], True, ["exog_1", "target", "exog_2"]), + ( + ["exog_2"], + False, + [ + "target", + "exog_1", + ], + ), ], ) def test_inverse_transform_back_excluded_columns(ts_with_features, columns, return_features, expected_columns): @@ -169,12 +182,12 @@ def test_inverse_transform_back_excluded_columns(ts_with_features, columns, retu @pytest.mark.parametrize( "columns, return_features, expected_columns", [ - ([], True, ["exog_1", "target", "exog_2"]), - ([], False, []), + (["target", "exog_1"], True, ["exog_1", "target", "exog_2"]), + (["target", "exog_1"], False, ["exog_1", "target"]), (["target"], True, ["exog_1", "target", "exog_2"]), (["target"], False, ["target"]), - (["exog_1", "exog_2"], True, ["exog_1", "target", "exog_2"]), - (["exog_1", "exog_2"], False, ["exog_1", "exog_2"]), + (["target", "exog_2"], True, ["exog_1", "target", "exog_2"]), + (["target", "exog_2"], False, ["exog_2", "target"]), (["target", "exog_1", "exog_2"], True, ["exog_1", "target", "exog_2"]), (["target", "exog_1", "exog_2"], False, ["exog_1", "target", "exog_2"]), ], From 327da676e265d80a694803b21dbe614f2eea1b71 Mon Sep 17 00:00:00 2001 From: Egor Baturin <82458209+egoriyaa@users.noreply.github.com> Date: Tue, 22 Oct 2024 17:23:16 +0300 Subject: [PATCH 2/3] Optimize memory usage in `TFTNativeModel` by eliminating copying during making samples (#494) * remove unnecessary columns * update changelog * add tests for models except deepstate and deepar * fix PR name --------- Co-authored-by: Egor Baturin --- CHANGELOG.md | 2 +- etna/models/nn/tft_native/tft.py | 59 +++++++++++-------- .../test_nn/nbeats/test_nbeats_nets.py | 1 + tests/test_models/test_nn/test_mlp.py | 5 ++ tests/test_models/test_nn/test_patchts.py | 1 + tests/test_models/test_nn/test_rnn.py | 5 ++ .../test_nn/tft_native/test_tft_native.py | 6 ++ 7 files changed, 52 insertions(+), 27 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2bb451112..0b77f3a6a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -39,7 +39,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixed - Fix working with `embedding_sizes` in `202-NN_examples` notebook ([#489](https://github.com/etna-team/etna/pull/489)) - Disallow dropping target in `TSDataset.drop_features` ([#491](https://github.com/etna-team/etna/pull/491)) -- +- Optimize memory usage in `TFTNativeModel` by eliminating copying during making samples ([#494](https://github.com/etna-team/etna/pull/494)) - - - diff --git a/etna/models/nn/tft_native/tft.py b/etna/models/nn/tft_native/tft.py index 7cf08d7dc..b8d902a17 100644 --- a/etna/models/nn/tft_native/tft.py +++ b/etna/models/nn/tft_native/tft.py @@ -381,6 +381,17 @@ def make_samples(self, df: pd.DataFrame, encoder_length: int, decoder_length: in segment = df["segment"].values[0] for feature in self.num_embeddings: df[feature] = df[feature].astype(float).fillna(self.num_embeddings[feature]) + + reals_columns = list(set(self.static_reals + self.time_varying_reals_encoder + self.time_varying_reals_decoder)) + categ_columns = list( + set( + self.static_categoricals + + self.time_varying_categoricals_encoder + + self.time_varying_categoricals_decoder + ) + ) + + df = df[reals_columns + categ_columns] column_to_index = {column: index for index, column in enumerate(df.columns)} values = df.values.T @@ -410,48 +421,44 @@ def _make( return None sample["segment"] = segment - sample["decoder_target"] = ( - values[column_to_index["target"]][start_idx + encoder_length : start_idx + total_sample_length] - .reshape(-1, 1) - .astype(float) + sample["decoder_target"] = values[column_to_index["target"]][ + start_idx + encoder_length : start_idx + total_sample_length + ].reshape( + -1, 1 ) # (decoder_length, 1) for feature in self.static_reals: - sample["static_reals"][feature] = ( - values[column_to_index[feature]][:1].reshape(-1, 1).astype(float) - ) # (1, 1) + sample["static_reals"][feature] = values[column_to_index[feature]][:1].reshape(-1, 1) # (1, 1) for feature in self.static_categoricals: - sample["static_categoricals"][feature] = ( - values[column_to_index[feature]][:1].reshape(-1, 1).astype(float) - ) # (1, 1) + sample["static_categoricals"][feature] = values[column_to_index[feature]][:1].reshape(-1, 1) # (1, 1) for feature in self.time_varying_categoricals_encoder: - sample["time_varying_categoricals_encoder"][feature] = ( - values[column_to_index[feature]][start_idx : start_idx + encoder_length] - .reshape(-1, 1) - .astype(float) + sample["time_varying_categoricals_encoder"][feature] = values[column_to_index[feature]][ + start_idx : start_idx + encoder_length + ].reshape( + -1, 1 ) # (encoder_length, 1) for feature in self.time_varying_categoricals_decoder: - sample["time_varying_categoricals_decoder"][feature] = ( - values[column_to_index[feature]][start_idx + encoder_length : start_idx + total_sample_length] - .reshape(-1, 1) - .astype(float) + sample["time_varying_categoricals_decoder"][feature] = values[column_to_index[feature]][ + start_idx + encoder_length : start_idx + total_sample_length + ].reshape( + -1, 1 ) # (decoder_length, 1) for feature in self.time_varying_reals_encoder: - sample["time_varying_reals_encoder"][feature] = ( - values[column_to_index[feature]][start_idx : start_idx + encoder_length] - .reshape(-1, 1) - .astype(float) + sample["time_varying_reals_encoder"][feature] = values[column_to_index[feature]][ + start_idx : start_idx + encoder_length + ].reshape( + -1, 1 ) # (encoder_length, 1) for feature in self.time_varying_reals_decoder: - sample["time_varying_reals_decoder"][feature] = ( - values[column_to_index[feature]][start_idx + encoder_length : start_idx + total_sample_length] - .reshape(-1, 1) - .astype(float) + sample["time_varying_reals_decoder"][feature] = values[column_to_index[feature]][ + start_idx + encoder_length : start_idx + total_sample_length + ].reshape( + -1, 1 ) # (decoder_length, 1) return sample diff --git a/tests/test_models/test_nn/nbeats/test_nbeats_nets.py b/tests/test_models/test_nn/nbeats/test_nbeats_nets.py index e95583403..eda8dfb7b 100644 --- a/tests/test_models/test_nn/nbeats/test_nbeats_nets.py +++ b/tests/test_models/test_nn/nbeats/test_nbeats_nets.py @@ -53,6 +53,7 @@ def test_make_samples(df_name, request): assert first_sample["target_mask"] is None assert first_sample["segment"] == "segment_1" np.testing.assert_equal(first_sample["history"], expected_first_sample["history"]) + assert first_sample["history"].base is not None @pytest.mark.parametrize( diff --git a/tests/test_models/test_nn/test_mlp.py b/tests/test_models/test_nn/test_mlp.py index 002f12923..fe8bceb07 100644 --- a/tests/test_models/test_nn/test_mlp.py +++ b/tests/test_models/test_nn/test_mlp.py @@ -111,6 +111,11 @@ def test_mlp_make_samples(df_name, cat_columns, request): assert ts_samples[i]["segment"] == "segment_1" for key in expected_sample: np.testing.assert_equal(ts_samples[i][key], expected_sample[key]) + if "categorical" in key: + for column in ts_samples[i][key]: + assert ts_samples[i][key][column].base is not None + else: + assert ts_samples[i][key].base is not None def test_mlp_forward_fail_nans(): diff --git a/tests/test_models/test_nn/test_patchts.py b/tests/test_models/test_nn/test_patchts.py index 22c7aed10..6cef3292e 100644 --- a/tests/test_models/test_nn/test_patchts.py +++ b/tests/test_models/test_nn/test_patchts.py @@ -79,6 +79,7 @@ def test_patchts_make_samples(df_name, request): assert ts_samples[i]["segment"] == "segment_1" for key in expected_sample: np.testing.assert_equal(ts_samples[i][key], expected_sample[key]) + assert ts_samples[i][key].base is not None def test_save_load(example_tsds): diff --git a/tests/test_models/test_nn/test_rnn.py b/tests/test_models/test_nn/test_rnn.py index 4e1974a55..2acc47fef 100644 --- a/tests/test_models/test_nn/test_rnn.py +++ b/tests/test_models/test_nn/test_rnn.py @@ -121,6 +121,11 @@ def test_rnn_make_samples(df_name, cat_columns, request): assert ts_samples[i]["segment"] == "segment_1" for key in expected_sample: np.testing.assert_equal(ts_samples[i][key], expected_sample[key]) + if "categorical" in key: + for column in ts_samples[i][key]: + assert ts_samples[i][key][column].base is not None + else: + assert ts_samples[i][key].base is not None @pytest.mark.parametrize("encoder_length", [1, 2, 10]) diff --git a/tests/test_models/test_nn/tft_native/test_tft_native.py b/tests/test_models/test_nn/tft_native/test_tft_native.py index 4a2a671d4..29db902cc 100644 --- a/tests/test_models/test_nn/tft_native/test_tft_native.py +++ b/tests/test_models/test_nn/tft_native/test_tft_native.py @@ -188,31 +188,37 @@ def test_tft_make_samples( df[[feature]].iloc[:1], first_sample["static_reals"][feature], ) + assert first_sample["static_reals"][feature].base is not None for feature in static_categoricals: np.testing.assert_almost_equal( df[[feature]].iloc[:1], first_sample["static_categoricals"][feature], ) + assert first_sample["static_categoricals"][feature].base is not None for feature in time_varying_categoricals_encoder: np.testing.assert_almost_equal( df[[feature]].iloc[:encoder_length], first_sample["time_varying_categoricals_encoder"][feature], ) + assert first_sample["time_varying_categoricals_encoder"][feature].base is not None for feature in time_varying_categoricals_decoder: np.testing.assert_almost_equal( df[[feature]].iloc[encoder_length : encoder_length + decoder_length], first_sample["time_varying_categoricals_decoder"][feature], ) + assert first_sample["time_varying_categoricals_decoder"][feature].base is not None for feature in time_varying_reals_encoder: np.testing.assert_almost_equal( df[[feature]].iloc[:encoder_length], first_sample["time_varying_reals_encoder"][feature], ) + assert first_sample["time_varying_reals_encoder"][feature].base is not None for feature in time_varying_reals_decoder: np.testing.assert_almost_equal( df[[feature]].iloc[encoder_length : encoder_length + decoder_length], first_sample["time_varying_reals_decoder"][feature], ) + assert first_sample["time_varying_reals_decoder"][feature].base is not None @pytest.mark.parametrize("encoder_length, decoder_length", [(2, 1), (1, 2), (10, 5)]) From 6cd66ae639fc6beb6370033b20d76e930e6b7196 Mon Sep 17 00:00:00 2001 From: Egor Baturin <82458209+egoriyaa@users.noreply.github.com> Date: Tue, 29 Oct 2024 11:10:38 +0300 Subject: [PATCH 3/3] Optimize memory usage in `DeepStateModel` and `DeepARNativeModel` by eliminating copying during making samples (#499) * eliminate samples copying in DeepARNative and DeepState * update changelog * fix * fix changelog * add checks on copying arrays --------- Co-authored-by: Egor Baturin --- CHANGELOG.md | 2 +- etna/models/nn/deepar_native/deepar.py | 20 ++++++++------ etna/models/nn/deepstate/deepstate.py | 26 +++++++++---------- .../deepar_native/test_deepar_native.py | 11 +++++--- tests/test_models/test_nn/test_deepstate.py | 5 ++++ 5 files changed, 38 insertions(+), 26 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0b77f3a6a..44402f9ed 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -40,7 +40,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Fix working with `embedding_sizes` in `202-NN_examples` notebook ([#489](https://github.com/etna-team/etna/pull/489)) - Disallow dropping target in `TSDataset.drop_features` ([#491](https://github.com/etna-team/etna/pull/491)) - Optimize memory usage in `TFTNativeModel` by eliminating copying during making samples ([#494](https://github.com/etna-team/etna/pull/494)) -- +- Optimize memory usage in `DeepStateModel` and `DeepARNativeModel` by eliminating copying during making samples ([#499](https://github.com/etna-team/etna/pull/499)) - - - diff --git a/etna/models/nn/deepar_native/deepar.py b/etna/models/nn/deepar_native/deepar.py index ece67ce34..42475c37f 100644 --- a/etna/models/nn/deepar_native/deepar.py +++ b/etna/models/nn/deepar_native/deepar.py @@ -132,6 +132,11 @@ def forward(self, x: DeepARNativeBatch, *args, **kwargs): # type: ignore decoder_categorical = x["decoder_categorical"] # each (batch_size, decoder_length, 1) decoder_target = x["decoder_target"].float() # (batch_size, decoder_length, 1) decoder_length = decoder_real.shape[1] + weights = x["weight"] + + # scale target values at index 0 + encoder_real[:, :, 0] = encoder_real[:, :, 0] / weights.unsqueeze(1) + decoder_real[:, :, 0] = decoder_real[:, :, 0] / weights.unsqueeze(1) encoder_embeddings = self.embedding(encoder_categorical) if self.embedding is not None else torch.Tensor() decoder_embeddings = self.embedding(decoder_categorical) if self.embedding is not None else torch.Tensor() @@ -191,7 +196,11 @@ def step(self, batch: DeepARNativeBatch, *args, **kwargs): # type: ignore decoder_categorical = batch["decoder_categorical"] # each (batch_size, decoder_length, 1) encoder_target = batch["encoder_target"].float() # (batch_size, encoder_length-1, 1) decoder_target = batch["decoder_target"].float() # (batch_size, decoder_length, 1) - weights = batch["weight"] + weights = batch["weight"] # (batch_size) + + # scale target values at index 0 + encoder_real[:, :, 0] = encoder_real[:, :, 0] / weights.unsqueeze(1) + decoder_real[:, :, 0] = decoder_real[:, :, 0] / weights.unsqueeze(1) encoder_embeddings = self.embedding(encoder_categorical) if self.embedding is not None else torch.Tensor() decoder_embeddings = self.embedding(decoder_categorical) if self.embedding is not None else torch.Tensor() @@ -255,11 +264,10 @@ def _make( return None # Get shifted target and concatenate it with real values features - sample["decoder_real"] = values_real[start_idx + encoder_length : start_idx + total_sample_length].copy() + sample["decoder_real"] = values_real[start_idx + encoder_length : start_idx + total_sample_length] # Get shifted target and concatenate it with real values features - sample["encoder_real"] = values_real[start_idx : start_idx + encoder_length].copy() - sample["encoder_real"] = sample["encoder_real"][1:] + sample["encoder_real"] = values_real[start_idx + 1 : start_idx + encoder_length] for index, feature in enumerate(self.embedding_sizes.keys()): sample["encoder_categorical"][feature] = values_categorical[index][ @@ -276,10 +284,6 @@ def _make( sample["segment"] = segment sample["weight"] = 1 + sample["encoder_target"].mean() if self.scale else 1 - sample["encoder_real"][:, 0] = values_real[start_idx + 1 : start_idx + encoder_length, 0] / sample["weight"] - sample["decoder_real"][:, 0] = ( - values_real[start_idx + encoder_length : start_idx + total_sample_length, 0] / sample["weight"] - ) return sample diff --git a/etna/models/nn/deepstate/deepstate.py b/etna/models/nn/deepstate/deepstate.py index d2f72d02a..7250bde03 100644 --- a/etna/models/nn/deepstate/deepstate.py +++ b/etna/models/nn/deepstate/deepstate.py @@ -4,6 +4,7 @@ from typing import Optional from typing import Tuple +import numpy as np import pandas as pd from typing_extensions import TypedDict @@ -117,9 +118,9 @@ def step(self, batch: DeepStateBatch, *args, **kwargs): # type: ignore : loss, true_target, prediction_target """ - encoder_real = batch["encoder_real"] # (batch_size, seq_length, input_size) + encoder_real = batch["encoder_real"].float() # (batch_size, seq_length, input_size) encoder_categorical = batch["encoder_categorical"] # each (batch_size, seq_length, 1) - targets = batch["encoder_target"] # (batch_size, seq_length, 1) + targets = batch["encoder_target"].float() # (batch_size, seq_length, 1) seq_length = targets.shape[1] datetime_index = batch["datetime_index"].permute(1, 0, 2)[ :, :, :seq_length @@ -159,11 +160,11 @@ def forward(self, x: DeepStateBatch, *args, **kwargs): # type: ignore : forecast with shape (batch_size, decoder_length, 1) """ - encoder_real = x["encoder_real"] # (batch_size, seq_length, input_size) + encoder_real = x["encoder_real"].float() # (batch_size, seq_length, input_size) encoder_categorical = x["encoder_categorical"] # each (batch_size, seq_length, 1) seq_length = encoder_real.shape[1] - targets = x["encoder_target"][:, :seq_length] # (batch_size, seq_length, 1) - decoder_real = x["decoder_real"] # (batch_size, horizon, input_size) + targets = x["encoder_target"][:, :seq_length].float() # (batch_size, seq_length, 1) + decoder_real = x["decoder_real"].float() # (batch_size, horizon, input_size) decoder_categorical = x["decoder_categorical"] # each (batch_size, horizon, 1) datetime_index_train = x["datetime_index"].permute(1, 0, 2)[ :, :, :seq_length @@ -213,26 +214,23 @@ def forward(self, x: DeepStateBatch, *args, **kwargs): # type: ignore def make_samples(self, df: pd.DataFrame, encoder_length: int, decoder_length: int) -> Iterator[dict]: """Make samples from segment DataFrame.""" values_real = df.drop(columns=["target", "segment", "timestamp"] + list(self.embedding_sizes.keys())).values - values_real = torch.from_numpy(values_real).float() # Categories that were not seen during `fit` will be filled with new category for feature in self.embedding_sizes: df[feature] = df[feature].astype(float).fillna(self.embedding_sizes[feature][0]) # Columns in `values_categorical` are in the same order as in `embedding_sizes` - values_categorical = torch.from_numpy(df[self.embedding_sizes.keys()].values.T) + values_categorical = df[self.embedding_sizes.keys()].values.T - values_datetime = torch.from_numpy(self.ssm.generate_datetime_index(df["timestamp"])) - values_datetime = values_datetime.to(torch.int64) + values_datetime = self.ssm.generate_datetime_index(df["timestamp"]).astype(int) values_target = df["target"].values - values_target = torch.from_numpy(values_target).float() segment = df["segment"].values[0] def _make( - values_target: torch.Tensor, - values_real: torch.Tensor, - values_categorical: torch.Tensor, - values_datetime: torch.Tensor, + values_target: np.ndarray, + values_real: np.ndarray, + values_categorical: np.ndarray, + values_datetime: np.ndarray, segment: str, start_idx: int, encoder_length: int, diff --git a/tests/test_models/test_nn/deepar_native/test_deepar_native.py b/tests/test_models/test_nn/deepar_native/test_deepar_native.py index 16e009dee..3cb298b8a 100644 --- a/tests/test_models/test_nn/deepar_native/test_deepar_native.py +++ b/tests/test_models/test_nn/deepar_native/test_deepar_native.py @@ -105,12 +105,11 @@ def test_deepar_make_samples(df_name, scale, weights, cat_columns, request): num_samples_check = 2 df["target_shifted"] = df["target"].shift(1) for i in range(num_samples_check): - df[f"target_shifted_scaled_{i}"] = df["target_shifted"] / weights[i] expected_sample = { - "encoder_real": df[[f"target_shifted_scaled_{i}", "regressor_float", "regressor_int"]] + "encoder_real": df[["target_shifted", "regressor_float", "regressor_int"]] .iloc[1 + i : encoder_length + i] .values, - "decoder_real": df[[f"target_shifted_scaled_{i}", "regressor_float", "regressor_int"]] + "decoder_real": df[["target_shifted", "regressor_float", "regressor_int"]] .iloc[encoder_length + i : encoder_length + decoder_length + i] .values, "encoder_categorical": { @@ -138,6 +137,12 @@ def test_deepar_make_samples(df_name, scale, weights, cat_columns, request): assert ts_samples[i]["segment"] == "segment_1" for key in expected_sample: np.testing.assert_equal(ts_samples[i][key], expected_sample[key]) + if "categorical" in key: + for column in ts_samples[i][key]: + assert ts_samples[i][key][column].base is not None + else: + if key != "weight": + assert ts_samples[i][key].base is not None @pytest.mark.parametrize("encoder_length", [1, 2, 10]) diff --git a/tests/test_models/test_nn/test_deepstate.py b/tests/test_models/test_nn/test_deepstate.py index a56b2cd4a..78efb4370 100644 --- a/tests/test_models/test_nn/test_deepstate.py +++ b/tests/test_models/test_nn/test_deepstate.py @@ -139,6 +139,11 @@ def test_deepstate_make_samples(df_name, cat_columns, request): assert ts_samples[i]["segment"] == "segment_1" for key in expected_sample: np.testing.assert_equal(ts_samples[i][key], expected_sample[key]) + if "categorical" in key: + for column in ts_samples[i][key]: + assert ts_samples[i][key][column].base is not None + else: + assert ts_samples[i][key].base is not None def test_save_load(example_tsds):