Skip to content

Commit

Permalink
combine two tests in one
Browse files Browse the repository at this point in the history
  • Loading branch information
Egor Baturin committed Nov 6, 2024
1 parent 17b2199 commit 1cac8f5
Showing 1 changed file with 16 additions and 58 deletions.
74 changes: 16 additions & 58 deletions tests/test_transforms/test_encoders/test_mean_encoder_transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,51 +121,29 @@ def expected_ts_begin_nan_smooth_2() -> TSDataset:


@pytest.fixture
def multiple_nan_target_new_category_ts() -> TSDataset:
"""Fixture with several timestamp with NaN target for new category where there were no notna targets yet."""
df = generate_ar_df(n_segments=1, start_time="2001-01-01", periods=5)
df["target"] = [np.NaN, 1.5, np.NaN, 3.0, 4.0]

df_exog = generate_ar_df(n_segments=1, start_time="2001-01-01", periods=6)
df_exog.rename(columns={"target": "regressor"}, inplace=True)
df_exog["regressor"] = ["A", "B", "A", "A", "B", "C"]

ts = TSDataset(df=df, df_exog=df_exog, freq="D", known_future="all")

return ts


@pytest.fixture
def expected_multiple_nan_target_new_category_ts() -> TSDataset:
df = generate_ar_df(n_segments=1, start_time="2001-01-01", periods=5)
df.rename(columns={"target": "regressor_mean"}, inplace=True)
df["regressor_mean"] = [np.NaN, np.NaN, np.NaN, np.NaN, 1.5]

ts = TSDataset(df=df, freq="D")

return ts

def multiple_nan_target_category_ts() -> TSDataset:
"""Fixture with segment having multiple NaN targets:
@pytest.fixture
def multiple_nan_target_old_category_ts() -> TSDataset:
"""Fixture with several timestamp with NaN target for category where there was already a notna target."""
df = generate_ar_df(n_segments=1, start_time="2001-01-01", periods=7)
df["target"] = [np.nan, 1.5, np.nan, 3.0, 4.0, np.NaN, np.NaN]
* For `regressor="A"` set of NaN timestamp goes before first notna value
* For `regressor="B"` set of NaN timestamp goes after first notna value
"""
df = generate_ar_df(n_segments=1, start_time="2001-01-01", periods=8)
df["target"] = [np.nan, 1.5, np.nan, 3.0, 4.0, np.NaN, np.NaN, np.NaN]

df_exog = generate_ar_df(n_segments=1, start_time="2001-01-01", periods=8)
df_exog = generate_ar_df(n_segments=1, start_time="2001-01-01", periods=9)
df_exog.rename(columns={"target": "regressor"}, inplace=True)
df_exog["regressor"] = ["A", "B", "A", "A", "B", "B", "B", "C"]
df_exog["regressor"] = ["A", "B", "A", "A", "B", "B", "B", "A", "A"]

ts = TSDataset(df=df, df_exog=df_exog, freq="D", known_future="all")

return ts


@pytest.fixture
def expected_multiple_nan_target_old_category_ts() -> TSDataset:
df = generate_ar_df(n_segments=1, start_time="2001-01-01", periods=7)
def expected_multiple_nan_target_category_ts() -> TSDataset:
df = generate_ar_df(n_segments=1, start_time="2001-01-01", periods=8)
df.rename(columns={"target": "regressor_mean"}, inplace=True)
df["regressor_mean"] = [np.NaN, np.NaN, np.NaN, np.NaN, 1.5, 2.75, 2.75]
df["regressor_mean"] = [np.NaN, np.NaN, np.NaN, np.NaN, 1.5, 2.75, 2.75, 3.0]

ts = TSDataset(df=df, freq="D")

Expand Down Expand Up @@ -429,38 +407,18 @@ def test_mean_segment_encoder(mean_segment_encoder_ts, expected_mean_segment_enc
)


def test_multiple_nan_target_new_category(
multiple_nan_target_new_category_ts, expected_multiple_nan_target_new_category_ts
):
mean_encoder = MeanEncoderTransform(
in_column="regressor",
mode="per-segment",
handle_missing="category",
smoothing=0,
out_column="regressor_mean",
)
mean_encoder.fit_transform(multiple_nan_target_new_category_ts)
assert_frame_equal(
multiple_nan_target_new_category_ts.df.loc[:, pd.IndexSlice[:, "regressor_mean"]],
expected_multiple_nan_target_new_category_ts.df,
atol=0.01,
)


def test_multiple_nan_target_old_category(
multiple_nan_target_old_category_ts, expected_multiple_nan_target_old_category_ts
):
def test_multiple_nan_target_category_ts(multiple_nan_target_category_ts, expected_multiple_nan_target_category_ts):
mean_encoder = MeanEncoderTransform(
in_column="regressor",
mode="per-segment",
handle_missing="category",
smoothing=0,
out_column="regressor_mean",
)
mean_encoder.fit_transform(multiple_nan_target_old_category_ts)
mean_encoder.fit_transform(multiple_nan_target_category_ts)
assert_frame_equal(
multiple_nan_target_old_category_ts.df.loc[:, pd.IndexSlice[:, "regressor_mean"]],
expected_multiple_nan_target_old_category_ts.df,
multiple_nan_target_category_ts.df.loc[:, pd.IndexSlice[:, "regressor_mean"]],
expected_multiple_nan_target_category_ts.df,
atol=0.01,
)

Expand Down

0 comments on commit 1cac8f5

Please sign in to comment.