Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ENH] Parameter to transform data in experiments #322

Merged
merged 18 commits into from
Jan 15, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ classifiers = [
requires-python = ">=3.9,<3.13"
dependencies = [
"aeon>=1.0.0,<1.1.0",
"tsml>=0.5.0,<0.6.0",
"tsml>=0.6.1,<0.7.0",
"scikit-learn>=1.0.0,<1.7.0",
"matplotlib",
"seaborn",
Expand All @@ -55,6 +55,8 @@ all_extras = [
"aeon[all_extras]",
"tsml[all_extras]",
"xgboost",
# temp
"esig>=0.9.7,<1.0.0; platform_system != 'Darwin' and python_version < '3.11'",
]
unstable_extras = [
"aeon[unstable_extras]",
Expand Down
3 changes: 1 addition & 2 deletions tsml_eval/estimators/clustering/consensus/ivc.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,7 @@ class IterativeVotingClustering(BaseEstimator, ClusterMixin):
>>> ivc = IterativeVotingClustering(n_clusters=3, random_state=0)
>>> ivc.fit(iris.data)
IterativeVotingClustering(...)
>>> rand_score(iris.target, ivc.labels_)
0.8737360178970918
>>> s = rand_score(iris.target, ivc.labels_)
"""

def __init__(
Expand Down
3 changes: 1 addition & 2 deletions tsml_eval/estimators/clustering/consensus/simple_vote.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,7 @@ class SimpleVote(BaseEstimator, ClusterMixin):
>>> sv = SimpleVote(n_clusters=3, random_state=0)
>>> sv.fit(iris.data)
SimpleVote(...)
>>> rand_score(iris.target, sv.labels_)
0.8737360178970918
>>> s = rand_score(iris.target, sv.labels_)
"""

def __init__(self, clusterers=None, n_clusters=8, random_state=None):
Expand Down
7 changes: 6 additions & 1 deletion tsml_eval/evaluation/multiple_estimator_evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -1333,7 +1333,12 @@ def _figures_for_statistic(
)

scatter, _ = plot_pairwise_scatter(
scores[:, i], scores[:, n], est1, est2, metric=statistic_name.upper()
scores[:, i],
scores[:, n],
est1,
est2,
metric=statistic_name.upper(),
lower_better=not higher_better,
)
scatter.savefig(
f"{save_path}/{statistic_name}/figures/scatters/{est1}/"
Expand Down
3 changes: 1 addition & 2 deletions tsml_eval/evaluation/storage/classifier_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,8 +95,7 @@ class ClassifierResults(EstimatorResults):
... "/classification/ROCKET/Predictions/Chinatown/testResample0.csv"
... )
>>> cr.calculate_statistics()
>>> cr.accuracy
0.9795918367346939
>>> acc = cr.accuracy
"""

def __init__(
Expand Down
3 changes: 1 addition & 2 deletions tsml_eval/evaluation/storage/clusterer_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,8 +92,7 @@ class ClustererResults(EstimatorResults):
... "/clustering/KMeans/Predictions/Trace/trainResample0.csv"
... )
>>> cr.calculate_statistics()
>>> cr.clustering_accuracy
0.57
>>> acc = cr.clustering_accuracy
"""

def __init__(
Expand Down
3 changes: 1 addition & 2 deletions tsml_eval/evaluation/storage/forecaster_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,8 +66,7 @@ class ForecasterResults(EstimatorResults):
... "/forecasting/NaiveForecaster/Predictions/Airline/testResample0.csv"
... )
>>> fr.calculate_statistics()
>>> fr.mean_absolute_percentage_error
0.19886711926999853
>>> mape = fr.mean_absolute_percentage_error
"""

def __init__(
Expand Down
4 changes: 1 addition & 3 deletions tsml_eval/evaluation/storage/regressor_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,9 +89,7 @@ class RegressorResults(EstimatorResults):
... "/regression/ROCKET/Predictions/Covid3Month/testResample0.csv"
... )
>>> rr.calculate_statistics()
>>> rr.mean_squared_error
0.0015126663111567206

>>> mse = rr.mean_squared_error
"""

def __init__(
Expand Down
8 changes: 5 additions & 3 deletions tsml_eval/experiments/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,18 @@
"get_classifier_by_name",
"get_clusterer_by_name",
"get_regressor_by_name",
"get_data_transform_by_name",
"run_timing_experiment",
"classification_cross_validation",
"classification_cross_validation_folds",
"regression_cross_validation",
"regression_cross_validation_folds",
]

from tsml_eval.experiments._get_classifier import get_classifier_by_name
from tsml_eval.experiments._get_clusterer import get_clusterer_by_name
from tsml_eval.experiments._get_data_transform import get_data_transform_by_name
from tsml_eval.experiments._get_regressor import get_regressor_by_name
from tsml_eval.experiments.cross_validation import (
classification_cross_validation,
classification_cross_validation_folds,
Expand All @@ -32,6 +37,3 @@
run_regression_experiment,
)
from tsml_eval.experiments.scalability import run_timing_experiment
from tsml_eval.experiments.set_classifier import get_classifier_by_name
from tsml_eval.experiments.set_clusterer import get_clusterer_by_name
from tsml_eval.experiments.set_regressor import get_regressor_by_name
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"""Set classifier function."""
"""Get classifier function."""

__maintainer__ = ["TonyBagnall", "MatthewMiddlehurst"]

Expand All @@ -24,6 +24,7 @@
["inceptiontimeclassifier", "inceptiontime"],
["h-inceptiontimeclassifier", "h-inceptiontime"],
["litetimeclassifier", "litetime"],
"litetime-mv",
["individualliteclassifier", "individuallite"],
["disjointcnnclassifier", "disjointcnn"],
]
Expand Down Expand Up @@ -192,7 +193,7 @@ def get_classifier_by_name(
c, random_state, n_jobs, fit_contract, checkpoint, kwargs
)
else:
raise ValueError(f"UNKNOWN CLASSIFIER: {c} in set_classifier")
raise ValueError(f"UNKNOWN CLASSIFIER: {c} in get_classifier_by_name")


def _set_classifier_convolution_based(
Expand Down Expand Up @@ -304,6 +305,10 @@ def _set_classifier_deep_learning(
from aeon.classification.deep_learning import LITETimeClassifier

return LITETimeClassifier(random_state=random_state, **kwargs)
elif c == "litetime-mv":
from aeon.classification.deep_learning import LITETimeClassifier

return LITETimeClassifier(use_litemv=True, random_state=random_state, **kwargs)
elif c == "individualliteclassifier" or c == "individuallite":
from aeon.classification.deep_learning import IndividualLITEClassifier

Expand Down Expand Up @@ -765,7 +770,7 @@ def _set_classifier_shapelet_based(

def _set_classifier_vector(c, random_state, n_jobs, fit_contract, checkpoint, kwargs):
if c == "rotationforestclassifier" or c == "rotationforest" or c == "rotf":
from tsml.vector import RotationForestClassifier
from aeon.classification.sklearn import RotationForestClassifier

return RotationForestClassifier(
random_state=random_state,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"""Set classifier function."""
"""Get clusterer function."""

__maintainer__ = ["TonyBagnall", "MatthewMiddlehurst"]

Expand Down Expand Up @@ -226,7 +226,7 @@ def get_clusterer_by_name(
c, random_state, n_jobs, fit_contract, checkpoint, kwargs
)
else:
raise ValueError(f"UNKNOWN CLUSTERER: {c} in set_clusterer")
raise ValueError(f"UNKNOWN CLUSTERER: {c} in get_clusterer_by_name")


def _set_clusterer_deep_learning(
Expand Down
71 changes: 71 additions & 0 deletions tsml_eval/experiments/_get_data_transform.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
"""get data transformer function."""

__maintainer__ = ["MatthewMiddlehurst"]

from aeon.transformations.collection import Normalizer

from tsml_eval.utils.functions import str_in_nested_list

transformers = [
["normalizer", "normaliser"],
"padder",
]


def get_data_transform_by_name(
transformer_names,
row_normalise=False,
random_state=None,
n_jobs=1,
):
"""Return a transformers matching a given input name(s).

Parameters
----------
transformer_names : str or list of str
String or list of strings indicating the transformer(s) to be returned.
row_normalise : bool, default=False
Adds a Normalizer to the front of the transformer list.
random_state : int, RandomState instance or None, default=None
Random seed or RandomState object to be used in the classifier if available.
n_jobs: int, default=1
The number of jobs to run in parallel for both classifier ``fit`` and
``predict`` if available. `-1` means using all processors.

Return
------
transformers : A transformer or list of transformers.
The transformer(s) matching the input transformer name(s). Returns a list if
more than one transformer is requested.
"""
if transformer_names is None and not row_normalise:
return None

t_list = []
if row_normalise:
t_list.append(Normalizer())

if transformer_names is not None:
if not isinstance(transformer_names, list):
transformer_names = [transformer_names]

for transformer_name in transformer_names:
t = transformer_name.casefold()

if str_in_nested_list(transformers, t):
t_list.append(_set_transformer(t, random_state, n_jobs))
else:
raise ValueError(
f"UNKNOWN TRANSFORMER: {t} in get_data_transform_by_name"
)

return t_list if len(t_list) > 1 else t_list[0]


def _set_transformer(t, random_state, n_jobs):
if t == "normalizer" or t == "normaliser":
return Normalizer()
elif t == "padder":
from aeon.transformations.collection import Padder

return Padder()
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ def get_forecaster_by_name(forecaster_name, random_state=None, n_jobs=1, **kwarg
elif str_in_nested_list(other_forecasters, f):
return _set_forecaster_other(f, random_state, n_jobs, kwargs)
else:
raise ValueError(f"UNKNOWN FORECASTER: {f} in set_forecaster")
raise ValueError(f"UNKNOWN FORECASTER: {f} in get_forecaster_by_name")


def _set_forecaster_stats(f, random_state, n_jobs, kwargs):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ def get_regressor_by_name(
r, random_state, n_jobs, fit_contract, checkpoint, kwargs
)
else:
raise ValueError(f"UNKNOWN REGRESSOR: {r} in set_regressor")
raise ValueError(f"UNKNOWN REGRESSOR: {r} in get_regressor_by_name")


def _set_regressor_convolution_based(
Expand Down
26 changes: 22 additions & 4 deletions tsml_eval/experiments/classification_experiments.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,11 @@
import numba
from aeon.utils.validation._dependencies import _check_soft_dependencies

from tsml_eval.experiments import load_and_run_classification_experiment
from tsml_eval.experiments.set_classifier import get_classifier_by_name
from tsml_eval.experiments import (
get_classifier_by_name,
get_data_transform_by_name,
load_and_run_classification_experiment,
)
from tsml_eval.experiments.tests import _CLASSIFIER_RESULTS_PATH
from tsml_eval.testing.testing_utils import _TEST_DATA_PATH
from tsml_eval.utils.arguments import parse_args
Expand Down Expand Up @@ -81,9 +84,18 @@ def run_experiment(args):
checkpoint=args.checkpoint,
**args.kwargs,
),
row_normalise=args.row_normalise,
classifier_name=args.estimator_name,
resample_id=args.resample_id,
data_transforms=get_data_transform_by_name(
args.data_transform_name,
row_normalise=args.row_normalise,
random_state=(
args.resample_id
if args.random_seed is None
else args.random_seed
),
n_jobs=1,
),
build_train_file=args.train_fold,
write_attributes=args.write_attributes,
att_max_shape=args.att_max_shape,
Expand All @@ -101,6 +113,7 @@ def run_experiment(args):
estimator_name = "ROCKET"
dataset_name = "MinimalChinatown"
row_normalise = False
transform_name = None
resample_id = 0
train_fold = False
write_attributes = True
Expand All @@ -120,16 +133,21 @@ def run_experiment(args):
checkpoint=checkpoint,
**kwargs,
)
transform = get_data_transform_by_name(
transform_name,
row_normalise=row_normalise,
random_state=resample_id,
)
print(f"Local Run of {estimator_name} ({classifier.__class__.__name__}).")

load_and_run_classification_experiment(
data_path,
results_path,
dataset_name,
classifier,
row_normalise=row_normalise,
classifier_name=estimator_name,
resample_id=resample_id,
data_transforms=transform,
build_train_file=train_fold,
write_attributes=write_attributes,
att_max_shape=att_max_shape,
Expand Down
26 changes: 22 additions & 4 deletions tsml_eval/experiments/clustering_experiments.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,11 @@
import numba
from aeon.utils.validation._dependencies import _check_soft_dependencies

from tsml_eval.experiments import load_and_run_clustering_experiment
from tsml_eval.experiments.set_clusterer import get_clusterer_by_name
from tsml_eval.experiments import (
get_clusterer_by_name,
get_data_transform_by_name,
load_and_run_clustering_experiment,
)
from tsml_eval.experiments.tests import _CLUSTERER_RESULTS_PATH
from tsml_eval.testing.testing_utils import _TEST_DATA_PATH
from tsml_eval.utils.arguments import parse_args
Expand Down Expand Up @@ -88,10 +91,19 @@ def run_experiment(args):
row_normalise=args.row_normalise,
**args.kwargs,
),
row_normalise=args.row_normalise,
n_clusters=args.n_clusters,
clusterer_name=args.estimator_name,
resample_id=args.resample_id,
data_transforms=get_data_transform_by_name(
args.data_transform_name,
row_normalise=args.row_normalise,
random_state=(
args.resample_id
if args.random_seed is None
else args.random_seed
),
n_jobs=1,
),
build_test_file=args.test_fold,
write_attributes=args.write_attributes,
att_max_shape=args.att_max_shape,
Expand All @@ -110,6 +122,7 @@ def run_experiment(args):
estimator_name = "KMeans"
dataset_name = "MinimalChinatown"
row_normalise = False
transform_name = None
n_clusters = -1
resample_id = 0
test_fold = False
Expand All @@ -133,17 +146,22 @@ def run_experiment(args):
row_normalise=row_normalise,
**kwargs,
)
transform = get_data_transform_by_name(
transform_name,
row_normalise=row_normalise,
random_state=resample_id,
)
print(f"Local Run of {estimator_name} ({clusterer.__class__.__name__}).")

load_and_run_clustering_experiment(
data_path,
results_path,
dataset_name,
clusterer,
row_normalise=row_normalise,
n_clusters=n_clusters,
clusterer_name=estimator_name,
resample_id=resample_id,
data_transforms=transform,
build_test_file=test_fold,
write_attributes=write_attributes,
att_max_shape=att_max_shape,
Expand Down
Loading