time-series-machine-learning · MatthewMiddlehurst · Jan 15, 2025 · Dec 13, 2024 · Dec 13, 2024 · Dec 15, 2024
diff --git a/pyproject.toml b/pyproject.toml
@@ -42,7 +42,7 @@ classifiers = [
 requires-python = ">=3.9,<3.13"
 dependencies = [
     "aeon>=1.0.0,<1.1.0",
-    "tsml>=0.5.0,<0.6.0",
+    "tsml>=0.6.1,<0.7.0",
     "scikit-learn>=1.0.0,<1.7.0",
     "matplotlib",
     "seaborn",
@@ -55,6 +55,8 @@ all_extras = [
     "aeon[all_extras]",
     "tsml[all_extras]",
     "xgboost",
+    # temp
+    "esig>=0.9.7,<1.0.0; platform_system != 'Darwin' and python_version < '3.11'",
 ]
 unstable_extras = [
     "aeon[unstable_extras]",

diff --git a/tsml_eval/estimators/clustering/consensus/ivc.py b/tsml_eval/estimators/clustering/consensus/ivc.py
@@ -50,8 +50,7 @@ class IterativeVotingClustering(BaseEstimator, ClusterMixin):
     >>> ivc = IterativeVotingClustering(n_clusters=3, random_state=0)
     >>> ivc.fit(iris.data)
     IterativeVotingClustering(...)
-    >>> rand_score(iris.target, ivc.labels_)
-    0.8737360178970918
+    >>> s = rand_score(iris.target, ivc.labels_)
     """
 
     def __init__(

diff --git a/tsml_eval/estimators/clustering/consensus/simple_vote.py b/tsml_eval/estimators/clustering/consensus/simple_vote.py
@@ -40,8 +40,7 @@ class SimpleVote(BaseEstimator, ClusterMixin):
     >>> sv = SimpleVote(n_clusters=3, random_state=0)
     >>> sv.fit(iris.data)
     SimpleVote(...)
-    >>> rand_score(iris.target, sv.labels_)
-    0.8737360178970918
+    >>> s = rand_score(iris.target, sv.labels_)
     """
 
     def __init__(self, clusterers=None, n_clusters=8, random_state=None):

diff --git a/tsml_eval/evaluation/multiple_estimator_evaluation.py b/tsml_eval/evaluation/multiple_estimator_evaluation.py
@@ -1333,7 +1333,12 @@ def _figures_for_statistic(
             )
 
             scatter, _ = plot_pairwise_scatter(
-                scores[:, i], scores[:, n], est1, est2, metric=statistic_name.upper()
+                scores[:, i],
+                scores[:, n],
+                est1,
+                est2,
+                metric=statistic_name.upper(),
+                lower_better=not higher_better,
             )
             scatter.savefig(
                 f"{save_path}/{statistic_name}/figures/scatters/{est1}/"

diff --git a/tsml_eval/evaluation/storage/classifier_results.py b/tsml_eval/evaluation/storage/classifier_results.py
@@ -95,8 +95,7 @@ class ClassifierResults(EstimatorResults):
     ...     "/classification/ROCKET/Predictions/Chinatown/testResample0.csv"
     ... )
     >>> cr.calculate_statistics()
-    >>> cr.accuracy
-    0.9795918367346939
+    >>> acc = cr.accuracy
     """
 
     def __init__(

diff --git a/tsml_eval/evaluation/storage/clusterer_results.py b/tsml_eval/evaluation/storage/clusterer_results.py
@@ -92,8 +92,7 @@ class ClustererResults(EstimatorResults):
     ...     "/clustering/KMeans/Predictions/Trace/trainResample0.csv"
     ... )
     >>> cr.calculate_statistics()
-    >>> cr.clustering_accuracy
-    0.57
+    >>> acc = cr.clustering_accuracy
     """
 
     def __init__(

diff --git a/tsml_eval/evaluation/storage/forecaster_results.py b/tsml_eval/evaluation/storage/forecaster_results.py
@@ -66,8 +66,7 @@ class ForecasterResults(EstimatorResults):
     ...     "/forecasting/NaiveForecaster/Predictions/Airline/testResample0.csv"
     ... )
     >>> fr.calculate_statistics()
-    >>> fr.mean_absolute_percentage_error
-    0.19886711926999853
+    >>> mape = fr.mean_absolute_percentage_error
     """
 
     def __init__(

diff --git a/tsml_eval/evaluation/storage/regressor_results.py b/tsml_eval/evaluation/storage/regressor_results.py
@@ -89,9 +89,7 @@ class RegressorResults(EstimatorResults):
     ...     "/regression/ROCKET/Predictions/Covid3Month/testResample0.csv"
     ... )
     >>> rr.calculate_statistics()
-    >>> rr.mean_squared_error
-    0.0015126663111567206
-
+    >>> mse = rr.mean_squared_error
     """
 
     def __init__(

diff --git a/tsml_eval/experiments/__init__.py b/tsml_eval/experiments/__init__.py
@@ -10,13 +10,18 @@
     "get_classifier_by_name",
     "get_clusterer_by_name",
     "get_regressor_by_name",
+    "get_data_transform_by_name",
     "run_timing_experiment",
     "classification_cross_validation",
     "classification_cross_validation_folds",
     "regression_cross_validation",
     "regression_cross_validation_folds",
 ]
 
+from tsml_eval.experiments._get_classifier import get_classifier_by_name
+from tsml_eval.experiments._get_clusterer import get_clusterer_by_name
+from tsml_eval.experiments._get_data_transform import get_data_transform_by_name
+from tsml_eval.experiments._get_regressor import get_regressor_by_name
 from tsml_eval.experiments.cross_validation import (
     classification_cross_validation,
     classification_cross_validation_folds,
@@ -32,6 +37,3 @@
     run_regression_experiment,
 )
 from tsml_eval.experiments.scalability import run_timing_experiment
-from tsml_eval.experiments.set_classifier import get_classifier_by_name
-from tsml_eval.experiments.set_clusterer import get_clusterer_by_name
-from tsml_eval.experiments.set_regressor import get_regressor_by_name
diff --git a/tsml_eval/experiments/set_classifier.py → tsml_eval/experiments/_get_classifier.py b/tsml_eval/experiments/set_classifier.py → tsml_eval/experiments/_get_classifier.py
@@ -1,4 +1,4 @@
-"""Set classifier function."""
+"""Get classifier function."""
 
 __maintainer__ = ["TonyBagnall", "MatthewMiddlehurst"]
 
@@ -24,6 +24,7 @@
     ["inceptiontimeclassifier", "inceptiontime"],
     ["h-inceptiontimeclassifier", "h-inceptiontime"],
     ["litetimeclassifier", "litetime"],
+    "litetime-mv",
     ["individualliteclassifier", "individuallite"],
     ["disjointcnnclassifier", "disjointcnn"],
 ]
@@ -192,7 +193,7 @@ def get_classifier_by_name(
             c, random_state, n_jobs, fit_contract, checkpoint, kwargs
         )
     else:
-        raise ValueError(f"UNKNOWN CLASSIFIER: {c} in set_classifier")
+        raise ValueError(f"UNKNOWN CLASSIFIER: {c} in get_classifier_by_name")
 
 
 def _set_classifier_convolution_based(
@@ -304,6 +305,10 @@ def _set_classifier_deep_learning(
         from aeon.classification.deep_learning import LITETimeClassifier
 
         return LITETimeClassifier(random_state=random_state, **kwargs)
+    elif c == "litetime-mv":
+        from aeon.classification.deep_learning import LITETimeClassifier
+
+        return LITETimeClassifier(use_litemv=True, random_state=random_state, **kwargs)
     elif c == "individualliteclassifier" or c == "individuallite":
         from aeon.classification.deep_learning import IndividualLITEClassifier
 
@@ -765,7 +770,7 @@ def _set_classifier_shapelet_based(
 
 def _set_classifier_vector(c, random_state, n_jobs, fit_contract, checkpoint, kwargs):
     if c == "rotationforestclassifier" or c == "rotationforest" or c == "rotf":
-        from tsml.vector import RotationForestClassifier
+        from aeon.classification.sklearn import RotationForestClassifier
 
         return RotationForestClassifier(
             random_state=random_state,

diff --git a/tsml_eval/experiments/set_clusterer.py → tsml_eval/experiments/_get_clusterer.py b/tsml_eval/experiments/set_clusterer.py → tsml_eval/experiments/_get_clusterer.py
@@ -1,4 +1,4 @@
-"""Set classifier function."""
+"""Get clusterer function."""
 
 __maintainer__ = ["TonyBagnall", "MatthewMiddlehurst"]
 
@@ -226,7 +226,7 @@ def get_clusterer_by_name(
             c, random_state, n_jobs, fit_contract, checkpoint, kwargs
         )
     else:
-        raise ValueError(f"UNKNOWN CLUSTERER: {c} in set_clusterer")
+        raise ValueError(f"UNKNOWN CLUSTERER: {c} in get_clusterer_by_name")
 
 
 def _set_clusterer_deep_learning(

diff --git a/tsml_eval/experiments/_get_data_transform.py b/tsml_eval/experiments/_get_data_transform.py
@@ -0,0 +1,71 @@
+"""get data transformer function."""
+
+__maintainer__ = ["MatthewMiddlehurst"]
+
+from aeon.transformations.collection import Normalizer
+
+from tsml_eval.utils.functions import str_in_nested_list
+
+transformers = [
+    ["normalizer", "normaliser"],
+    "padder",
+]
+
+
+def get_data_transform_by_name(
+    transformer_names,
+    row_normalise=False,
+    random_state=None,
+    n_jobs=1,
+):
+    """Return a transformers matching a given input name(s).
+
+    Parameters
+    ----------
+    transformer_names : str or list of str
+        String or list of strings indicating the transformer(s) to be returned.
+    row_normalise : bool, default=False
+        Adds a Normalizer to the front of the transformer list.
+    random_state : int, RandomState instance or None, default=None
+        Random seed or RandomState object to be used in the classifier if available.
+    n_jobs: int, default=1
+        The number of jobs to run in parallel for both classifier ``fit`` and
+        ``predict`` if available. `-1` means using all processors.
+
+    Return
+    ------
+    transformers : A transformer or list of transformers.
+        The transformer(s) matching the input transformer name(s). Returns a list if
+        more than one transformer is requested.
+    """
+    if transformer_names is None and not row_normalise:
+        return None
+
+    t_list = []
+    if row_normalise:
+        t_list.append(Normalizer())
+
+    if transformer_names is not None:
+        if not isinstance(transformer_names, list):
+            transformer_names = [transformer_names]
+
+        for transformer_name in transformer_names:
+            t = transformer_name.casefold()
+
+            if str_in_nested_list(transformers, t):
+                t_list.append(_set_transformer(t, random_state, n_jobs))
+            else:
+                raise ValueError(
+                    f"UNKNOWN TRANSFORMER: {t} in get_data_transform_by_name"
+                )
+
+    return t_list if len(t_list) > 1 else t_list[0]
+
+
+def _set_transformer(t, random_state, n_jobs):
+    if t == "normalizer" or t == "normaliser":
+        return Normalizer()
+    elif t == "padder":
+        from aeon.transformations.collection import Padder
+
+        return Padder()
diff --git a/tsml_eval/experiments/set_forecaster.py → tsml_eval/experiments/_get_forecaster.py b/tsml_eval/experiments/set_forecaster.py → tsml_eval/experiments/_get_forecaster.py
@@ -48,7 +48,7 @@ def get_forecaster_by_name(forecaster_name, random_state=None, n_jobs=1, **kwarg
     elif str_in_nested_list(other_forecasters, f):
         return _set_forecaster_other(f, random_state, n_jobs, kwargs)
     else:
-        raise ValueError(f"UNKNOWN FORECASTER: {f} in set_forecaster")
+        raise ValueError(f"UNKNOWN FORECASTER: {f} in get_forecaster_by_name")
 
 
 def _set_forecaster_stats(f, random_state, n_jobs, kwargs):

diff --git a/tsml_eval/experiments/set_regressor.py → tsml_eval/experiments/_get_regressor.py b/tsml_eval/experiments/set_regressor.py → tsml_eval/experiments/_get_regressor.py
@@ -165,7 +165,7 @@ def get_regressor_by_name(
             r, random_state, n_jobs, fit_contract, checkpoint, kwargs
         )
     else:
-        raise ValueError(f"UNKNOWN REGRESSOR: {r} in set_regressor")
+        raise ValueError(f"UNKNOWN REGRESSOR: {r} in get_regressor_by_name")
 
 
 def _set_regressor_convolution_based(

diff --git a/tsml_eval/experiments/classification_experiments.py b/tsml_eval/experiments/classification_experiments.py
@@ -18,8 +18,11 @@
 import numba
 from aeon.utils.validation._dependencies import _check_soft_dependencies
 
-from tsml_eval.experiments import load_and_run_classification_experiment
-from tsml_eval.experiments.set_classifier import get_classifier_by_name
+from tsml_eval.experiments import (
+    get_classifier_by_name,
+    get_data_transform_by_name,
+    load_and_run_classification_experiment,
+)
 from tsml_eval.experiments.tests import _CLASSIFIER_RESULTS_PATH
 from tsml_eval.testing.testing_utils import _TEST_DATA_PATH
 from tsml_eval.utils.arguments import parse_args
@@ -81,9 +84,18 @@ def run_experiment(args):
                     checkpoint=args.checkpoint,
                     **args.kwargs,
                 ),
-                row_normalise=args.row_normalise,
                 classifier_name=args.estimator_name,
                 resample_id=args.resample_id,
+                data_transforms=get_data_transform_by_name(
+                    args.data_transform_name,
+                    row_normalise=args.row_normalise,
+                    random_state=(
+                        args.resample_id
+                        if args.random_seed is None
+                        else args.random_seed
+                    ),
+                    n_jobs=1,
+                ),
                 build_train_file=args.train_fold,
                 write_attributes=args.write_attributes,
                 att_max_shape=args.att_max_shape,
@@ -101,6 +113,7 @@ def run_experiment(args):
         estimator_name = "ROCKET"
         dataset_name = "MinimalChinatown"
         row_normalise = False
+        transform_name = None
         resample_id = 0
         train_fold = False
         write_attributes = True
@@ -120,16 +133,21 @@ def run_experiment(args):
             checkpoint=checkpoint,
             **kwargs,
         )
+        transform = get_data_transform_by_name(
+            transform_name,
+            row_normalise=row_normalise,
+            random_state=resample_id,
+        )
         print(f"Local Run of {estimator_name} ({classifier.__class__.__name__}).")
 
         load_and_run_classification_experiment(
             data_path,
             results_path,
             dataset_name,
             classifier,
-            row_normalise=row_normalise,
             classifier_name=estimator_name,
             resample_id=resample_id,
+            data_transforms=transform,
             build_train_file=train_fold,
             write_attributes=write_attributes,
             att_max_shape=att_max_shape,

diff --git a/tsml_eval/experiments/clustering_experiments.py b/tsml_eval/experiments/clustering_experiments.py
@@ -18,8 +18,11 @@
 import numba
 from aeon.utils.validation._dependencies import _check_soft_dependencies
 
-from tsml_eval.experiments import load_and_run_clustering_experiment
-from tsml_eval.experiments.set_clusterer import get_clusterer_by_name
+from tsml_eval.experiments import (
+    get_clusterer_by_name,
+    get_data_transform_by_name,
+    load_and_run_clustering_experiment,
+)
 from tsml_eval.experiments.tests import _CLUSTERER_RESULTS_PATH
 from tsml_eval.testing.testing_utils import _TEST_DATA_PATH
 from tsml_eval.utils.arguments import parse_args
@@ -88,10 +91,19 @@ def run_experiment(args):
                     row_normalise=args.row_normalise,
                     **args.kwargs,
                 ),
-                row_normalise=args.row_normalise,
                 n_clusters=args.n_clusters,
                 clusterer_name=args.estimator_name,
                 resample_id=args.resample_id,
+                data_transforms=get_data_transform_by_name(
+                    args.data_transform_name,
+                    row_normalise=args.row_normalise,
+                    random_state=(
+                        args.resample_id
+                        if args.random_seed is None
+                        else args.random_seed
+                    ),
+                    n_jobs=1,
+                ),
                 build_test_file=args.test_fold,
                 write_attributes=args.write_attributes,
                 att_max_shape=args.att_max_shape,
@@ -110,6 +122,7 @@ def run_experiment(args):
         estimator_name = "KMeans"
         dataset_name = "MinimalChinatown"
         row_normalise = False
+        transform_name = None
         n_clusters = -1
         resample_id = 0
         test_fold = False
@@ -133,17 +146,22 @@ def run_experiment(args):
             row_normalise=row_normalise,
             **kwargs,
         )
+        transform = get_data_transform_by_name(
+            transform_name,
+            row_normalise=row_normalise,
+            random_state=resample_id,
+        )
         print(f"Local Run of {estimator_name} ({clusterer.__class__.__name__}).")
 
         load_and_run_clustering_experiment(
             data_path,
             results_path,
             dataset_name,
             clusterer,
-            row_normalise=row_normalise,
             n_clusters=n_clusters,
             clusterer_name=estimator_name,
             resample_id=resample_id,
+            data_transforms=transform,
             build_test_file=test_fold,
             write_attributes=write_attributes,
             att_max_shape=att_max_shape,