From 4cf098150f63275efed84e266b26ac206dece2f7 Mon Sep 17 00:00:00 2001 From: Daniel <63580393+danrgll@users.noreply.github.com> Date: Mon, 20 Nov 2023 17:23:09 +0100 Subject: [PATCH 01/20] add yaml to dict function and make it callable in api run --- src/neps/api.py | 28 ++++---- src/neps/search_spaces/search_space.py | 90 ++++++++++++++++++++++++-- 2 files changed, 100 insertions(+), 18 deletions(-) diff --git a/src/neps/api.py b/src/neps/api.py index 24efe7e8..0ef9e006 100644 --- a/src/neps/api.py +++ b/src/neps/api.py @@ -16,7 +16,8 @@ from .optimizers import BaseOptimizer, SearcherMapping from .plot.tensorboard_eval import tblogger from .search_spaces.parameter import Parameter -from .search_spaces.search_space import SearchSpace, pipeline_space_from_configspace +from .search_spaces.search_space import SearchSpace, pipeline_space_from_configspace, \ + pipeline_space_from_yaml from .status.status import post_run_csv from .utils.common import get_searcher_data from .utils.result_utils import get_loss @@ -204,22 +205,22 @@ def run( ) max_cost_total = searcher_kwargs["budget"] del searcher_kwargs["budget"] - + logger = logging.getLogger("neps") logger.info(f"Starting neps.run using root directory {root_directory}") - + if isinstance(searcher, BaseOptimizer): searcher_instance = searcher searcher_name = "custom" searcher_alg = searcher.whoami() user_defined_searcher = True else: - ( + ( searcher_name, - searcher_instance, - searcher_alg, - searcher_config, - searcher_info, + searcher_instance, + searcher_alg, + searcher_config, + searcher_info, user_defined_searcher ) = _run_args( pipeline_space=pipeline_space, @@ -277,7 +278,7 @@ def run( searcher_info["searcher_args_user_modified"] = False else: raise ValueError(f"Unrecognized `searcher`. Not str or BaseOptimizer.") - + metahyper.run( run_pipeline, searcher_instance, @@ -325,6 +326,9 @@ def _run_args( # Support pipeline space as ConfigurationSpace definition if isinstance(pipeline_space, CS.ConfigurationSpace): pipeline_space = pipeline_space_from_configspace(pipeline_space) + # Support pipeline space as YAML file + elif isinstance(pipeline_space, str): + pipeline_space = pipeline_space_from_yaml(pipeline_space) # Support pipeline space as mix of ConfigurationSpace and neps parameters new_pipeline_space: dict[str, Parameter] = dict() @@ -335,7 +339,7 @@ def _run_args( else: new_pipeline_space[key] = value pipeline_space = new_pipeline_space - + # Transform to neps internal representation of the pipeline space pipeline_space = SearchSpace(**pipeline_space) except TypeError as e: @@ -414,7 +418,7 @@ def _run_args( "ignore_errors": ignore_errors, } ) - + searcher_instance = instance_from_map( SearcherMapping, searcher_alg, "searcher", as_class=True )( @@ -422,5 +426,5 @@ def _run_args( budget=max_cost_total, # TODO: use max_cost_total everywhere **searcher_config, ) - + return searcher, searcher_instance, searcher_alg, searcher_config, searcher_info, user_defined_searcher diff --git a/src/neps/search_spaces/search_space.py b/src/neps/search_spaces/search_space.py index 48d0b252..a4b3e225 100644 --- a/src/neps/search_spaces/search_space.py +++ b/src/neps/search_spaces/search_space.py @@ -10,6 +10,7 @@ import ConfigSpace as CS import numpy as np import pandas as pd +import yaml from ..utils.common import has_instance from . import ( @@ -61,6 +62,83 @@ def pipeline_space_from_configspace( return pipeline_space +def pipeline_space_from_yaml(yaml_file_path): + """ + Reads configuration details from a YAML file and creates a dictionary of parameters. + + Args: + yaml_file_path (str): Path to the YAML file containing configuration details. + + Returns: + dict: A dictionary with parameter names as keys and parameter objects as values. + + Raises: + KeyError: If any mandatory configuration for a parameter is missing in the YAML file. + ValueError: If an unknown parameter type is encountered. + ValueError: If YAML file is incorrectly constructed + """ + # Load the YAML file + try: + with open(yaml_file_path, 'r') as file: + config = yaml.safe_load(file) + except yaml.YAMLError as e: + raise ValueError(f"The file at {yaml_file_path} is not a valid YAML file.") from e + + # check for key config_space + if 'config_space' not in config: + raise ValueError( + "The YAML file is incorrectly constructed: 'config_space' key is missing.") + + # Initialize the pipeline space + pipeline_space = {} + # Iterate over the items in the YAML configuration + for name, details in config['config_space'].items(): + try: + param_type = details['type'] + # Handle different parameter types + if param_type == 'int': + pipeline_space[name] = IntegerParameter( + lower=details['lower'], + upper=details['upper'], + log=details.get('log', False), + is_fidelity=details.get('is_fidelity', False), + default=details.get('default', None), + default_confidence=details.get('default_confidence', 'low') + ) + elif param_type == 'float': + pipeline_space[name] = FloatParameter( + lower=details['lower'], + upper=details['upper'], + log=details.get('log', False), + is_fidelity=details.get('is_fidelity', False), + default=details.get('default', None), + default_confidence=details.get('default_confidence', 'low') + ) + elif param_type == 'cat': + pipeline_space[name] = CategoricalParameter( + choices=details['choices'], + is_fidelity=details.get('is_fidelity', False), + default=details.get('default', None), + default_confidence=details.get('default_confidence', 'low') + + ) + elif param_type == 'const': + pipeline_space[name] = ConstantParameter( + value=details['value'], + is_fidelity=details.get('is_fidelity', False) + ) + else: + # Handle unknown parameter types + supported_types = ['const', 'cat', 'int', 'float'] + raise ValueError(f"Unknown parameter type '{param_type}' for '{name}'. " + f"Supported types are: {', '.join(supported_types)}") + except KeyError as e: + # Handle missing mandatory arguments + raise KeyError(f"Mandatory configuration '{e.args[0]}' missing for parameter '{name}' in YAML file.") + + return pipeline_space + + class SearchSpace(collections.abc.Mapping): def __init__(self, **hyperparameters): self.hyperparameters = OrderedDict() @@ -88,7 +166,7 @@ def __init__(self, **hyperparameters): self.has_prior = True elif hasattr(hyperparameter, "has_prior") and hyperparameter.has_prior: self.has_prior = True - + # Variables for tabular bookkeeping self.custom_grid_table = None self.raw_tabular_space = None @@ -101,10 +179,10 @@ def set_custom_grid_space( ): """Set a custom grid space for the search space. - This function is used to set a custom grid space for the pipeline space. - NOTE: Only to be used if a custom set of hyperparameters from the search space - is to be sampled or used for acquisition functions. - WARNING: The type check and the table format requirement is loose and + This function is used to set a custom grid space for the pipeline space. + NOTE: Only to be used if a custom set of hyperparameters from the search space + is to be sampled or used for acquisition functions. + WARNING: The type check and the table format requirement is loose and can break certain components. """ self.custom_grid_table: pd.DataFrame | pd.Series = grid_table @@ -115,7 +193,7 @@ def set_custom_grid_space( if self.custom_grid_table is None or self.raw_tabular_space is None: raise ValueError( "Both grid_table and raw_space must be set!\n" - "A table or list of fixed configs must be supported with a " + "A table or list of fixed configs must be supported with a " "continuous space representing the type and bounds of each " "hyperparameter for accurate modeling." ) From c1a0402664339070cfd22374eefb7a8895f6e632 Mon Sep 17 00:00:00 2001 From: Daniel <63580393+danrgll@users.noreply.github.com> Date: Sat, 25 Nov 2023 15:35:19 +0100 Subject: [PATCH 02/20] add tests, adapt pipeline_space_from_yaml(), fix some errors raised by pre-commit in search space --- src/neps/search_spaces/search_space.py | 99 ++++++++++--------- .../correct_config.yaml | 16 +++ .../correct_config_including_priors.yml | 23 +++++ .../inconsistent_types_config.yml | 17 ++++ .../incorrect_config.txt | 5 + .../missing_key_config.yml | 15 +++ .../test_search_space.py | 84 ++++++++++++++++ 7 files changed, 212 insertions(+), 47 deletions(-) create mode 100644 tests/test_yaml_search_space/correct_config.yaml create mode 100644 tests/test_yaml_search_space/correct_config_including_priors.yml create mode 100644 tests/test_yaml_search_space/inconsistent_types_config.yml create mode 100644 tests/test_yaml_search_space/incorrect_config.txt create mode 100644 tests/test_yaml_search_space/missing_key_config.yml create mode 100644 tests/test_yaml_search_space/test_search_space.py diff --git a/src/neps/search_spaces/search_space.py b/src/neps/search_spaces/search_space.py index a4b3e225..e0816652 100644 --- a/src/neps/search_spaces/search_space.py +++ b/src/neps/search_spaces/search_space.py @@ -74,67 +74,71 @@ def pipeline_space_from_yaml(yaml_file_path): Raises: KeyError: If any mandatory configuration for a parameter is missing in the YAML file. + ValueError: If lower and upper are not the same type of value + ValueError: if choices is not a list ValueError: If an unknown parameter type is encountered. ValueError: If YAML file is incorrectly constructed """ # Load the YAML file try: - with open(yaml_file_path, 'r') as file: + with open(yaml_file_path) as file: config = yaml.safe_load(file) except yaml.YAMLError as e: raise ValueError(f"The file at {yaml_file_path} is not a valid YAML file.") from e # check for key config_space - if 'config_space' not in config: + if "search_space" not in config: raise ValueError( - "The YAML file is incorrectly constructed: 'config_space' key is missing.") + "The YAML file is incorrectly constructed: 'config_space' key is missing." + ) # Initialize the pipeline space pipeline_space = {} # Iterate over the items in the YAML configuration - for name, details in config['config_space'].items(): - try: - param_type = details['type'] - # Handle different parameter types - if param_type == 'int': - pipeline_space[name] = IntegerParameter( - lower=details['lower'], - upper=details['upper'], - log=details.get('log', False), - is_fidelity=details.get('is_fidelity', False), - default=details.get('default', None), - default_confidence=details.get('default_confidence', 'low') - ) - elif param_type == 'float': - pipeline_space[name] = FloatParameter( - lower=details['lower'], - upper=details['upper'], - log=details.get('log', False), - is_fidelity=details.get('is_fidelity', False), - default=details.get('default', None), - default_confidence=details.get('default_confidence', 'low') + for name, details in config["search_space"].items(): + if "lower" in details and "upper" in details: + # Determine if it's an integer or float range parameter + if isinstance(details["lower"], int) and isinstance(details["upper"], int): + param_type = IntegerParameter + elif isinstance(details["lower"], float) and isinstance( + details["upper"], float + ): + param_type = FloatParameter + else: + raise ValueError( + f"Inconsistent types for 'lower' and 'upper' in '{name}'. " + f"Both must be either integers or floats." ) - elif param_type == 'cat': - pipeline_space[name] = CategoricalParameter( - choices=details['choices'], - is_fidelity=details.get('is_fidelity', False), - default=details.get('default', None), - default_confidence=details.get('default_confidence', 'low') - ) - elif param_type == 'const': - pipeline_space[name] = ConstantParameter( - value=details['value'], - is_fidelity=details.get('is_fidelity', False) - ) - else: - # Handle unknown parameter types - supported_types = ['const', 'cat', 'int', 'float'] - raise ValueError(f"Unknown parameter type '{param_type}' for '{name}'. " - f"Supported types are: {', '.join(supported_types)}") - except KeyError as e: - # Handle missing mandatory arguments - raise KeyError(f"Mandatory configuration '{e.args[0]}' missing for parameter '{name}' in YAML file.") + pipeline_space[name] = param_type( + lower=details["lower"], + upper=details["upper"], + log=details.get("log", False), + is_fidelity=details.get("is_fidelity", False), + default=details.get("default", None), + default_confidence=details.get("default_confidence", "low"), + ) + elif "choices" in details: + # Categorical parameter + if not isinstance(details["choices"], list): + raise ValueError(f"The 'choices' for '{name}' must be a list.") + pipeline_space[name] = CategoricalParameter( + choices=details["choices"], + is_fidelity=details.get("is_fidelity", False), + default=details.get("default", None), + default_confidence=details.get("default_confidence", "low"), + ) + elif "value" in details: + # Constant parameter + pipeline_space[name] = ConstantParameter( + value=details["value"], is_fidelity=details.get("is_fidelity", False) + ) + else: + # Handle unknown parameter types + raise KeyError( + f"Unsupported parameter format for '{name}'. " + f"Expected keys not found in {details}." + ) return pipeline_space @@ -175,7 +179,7 @@ def __init__(self, **hyperparameters): def set_custom_grid_space( self, grid_table: pd.Series | pd.DataFrame, - raw_space: SearchSpace | CS.ConfigurationSpace + raw_space: SearchSpace | CS.ConfigurationSpace, ): """Set a custom grid space for the search space. @@ -188,7 +192,8 @@ def set_custom_grid_space( self.custom_grid_table: pd.DataFrame | pd.Series = grid_table self.raw_tabular_space = ( SearchSpace(**raw_space) - if not isinstance(raw_space, SearchSpace) else raw_space + if not isinstance(raw_space, SearchSpace) + else raw_space ) if self.custom_grid_table is None or self.raw_tabular_space is None: raise ValueError( @@ -279,7 +284,7 @@ def _smbo_mutation(self, patience=50, **kwargs): new_config[hp_name] = hp.mutate(**kwargs) break except Exception as e: - self.logger.warning(f"{hp_name} FAILED!") + self.logger.warning(f"{hp_name} FAILED! Error: {e}") continue return new_config diff --git a/tests/test_yaml_search_space/correct_config.yaml b/tests/test_yaml_search_space/correct_config.yaml new file mode 100644 index 00000000..04464f7b --- /dev/null +++ b/tests/test_yaml_search_space/correct_config.yaml @@ -0,0 +1,16 @@ +search_space: + learning_rate: + lower: 0.00001 + upper: 0.1 + log: true + + num_epochs: + lower: 3 + upper: 30 + is_fidelity: True + + optimizer: + choices: ["adam", "sgd", "rmsprop"] + + dropout_rate: + value: 0.5 diff --git a/tests/test_yaml_search_space/correct_config_including_priors.yml b/tests/test_yaml_search_space/correct_config_including_priors.yml new file mode 100644 index 00000000..4bd10e9e --- /dev/null +++ b/tests/test_yaml_search_space/correct_config_including_priors.yml @@ -0,0 +1,23 @@ +search_space: + learning_rate: + lower: 0.00001 + upper: 0.1 + log: true + default: 0.001 + default_confidence: high + + num_epochs: + lower: 3 + upper: 30 + is_fidelity: True + default: 10 + default_confidence: medium + + optimizer: + choices: ["adam", "sgd", "rmsprop"] + default: "sgd" + default_confidence: medium + + dropout_rate: + value: 0.5 + is_fidelity: true diff --git a/tests/test_yaml_search_space/inconsistent_types_config.yml b/tests/test_yaml_search_space/inconsistent_types_config.yml new file mode 100644 index 00000000..3d5eb559 --- /dev/null +++ b/tests/test_yaml_search_space/inconsistent_types_config.yml @@ -0,0 +1,17 @@ +search_space: + learning_rate: + lower: "0.00001" # Lower is now a string + upper: 0.1 + log: true + + num_epochs: + lower: 3 + upper: 30 + is_fidelity: True + + optimizer: + choices: ["adam", "sgd", "rmsprop"] + + dropout_rate: + value: 0.5 + is_fidelity: True diff --git a/tests/test_yaml_search_space/incorrect_config.txt b/tests/test_yaml_search_space/incorrect_config.txt new file mode 100644 index 00000000..c80774ee --- /dev/null +++ b/tests/test_yaml_search_space/incorrect_config.txt @@ -0,0 +1,5 @@ +search_space # : is missing + learning_rate: + lower: 0.00001 + upper: 0.1 + log: true diff --git a/tests/test_yaml_search_space/missing_key_config.yml b/tests/test_yaml_search_space/missing_key_config.yml new file mode 100644 index 00000000..d6dbeb26 --- /dev/null +++ b/tests/test_yaml_search_space/missing_key_config.yml @@ -0,0 +1,15 @@ +search_space: + learning_rate: + lower: 0.00001 + log: true + + num_epochs: + lower: 3 + upper: 30 + is_fidelity: True + + optimizer: + choices: ["adam", "sgd", "rmsprop"] + + dropout_rate: + value: 0.5 diff --git a/tests/test_yaml_search_space/test_search_space.py b/tests/test_yaml_search_space/test_search_space.py new file mode 100644 index 00000000..2eb8580c --- /dev/null +++ b/tests/test_yaml_search_space/test_search_space.py @@ -0,0 +1,84 @@ +import pytest + +from neps import CategoricalParameter, ConstantParameter, FloatParameter, IntegerParameter +from neps.search_spaces.search_space import pipeline_space_from_yaml + + +@pytest.mark.yaml_search_space +def test_correct_yaml_file(): + """Test the function with a correctly formatted YAML file.""" + pipeline_space = pipeline_space_from_yaml( + "tests/test_yaml_search_space/correct_config.yaml" + ) + assert isinstance(pipeline_space, dict) + assert isinstance(pipeline_space["learning_rate"], FloatParameter) + assert pipeline_space["learning_rate"].lower == 0.00001 + assert pipeline_space["learning_rate"].upper == 0.1 + assert pipeline_space["learning_rate"].log is True + assert pipeline_space["optimizer"].is_fidelity is False + assert pipeline_space["learning_rate"].default is None + assert pipeline_space["learning_rate"].default_confidence_score == 0.5 + assert isinstance(pipeline_space["num_epochs"], IntegerParameter) + assert pipeline_space["num_epochs"].log is False + assert pipeline_space["num_epochs"].is_fidelity is True + assert pipeline_space["num_epochs"].default is None + assert pipeline_space["num_epochs"].default_confidence_score == 0.5 + assert isinstance(pipeline_space["optimizer"], CategoricalParameter) + assert pipeline_space["optimizer"].is_fidelity is False + assert pipeline_space["optimizer"].default is None + assert pipeline_space["optimizer"].default_confidence_score == 2 + assert isinstance(pipeline_space["dropout_rate"], ConstantParameter) + assert pipeline_space["dropout_rate"].value == 0.5 + assert pipeline_space["dropout_rate"].is_fidelity is False + + +@pytest.mark.yaml_search_space +def test_correct_including_priors_yaml_file(): + """Test the function with a correctly formatted YAML file.""" + pipeline_space = pipeline_space_from_yaml( + "tests/test_yaml_search_space/correct_config_including_priors.yml" + ) + assert isinstance(pipeline_space, dict) + assert isinstance(pipeline_space["learning_rate"], FloatParameter) + assert pipeline_space["learning_rate"].lower == 0.00001 + assert pipeline_space["learning_rate"].upper == 0.1 + assert pipeline_space["learning_rate"].log is True + assert pipeline_space["learning_rate"].is_fidelity is False + assert pipeline_space["learning_rate"].default == 0.001 + assert pipeline_space["learning_rate"].default_confidence_score == 0.125 + assert isinstance(pipeline_space["num_epochs"], IntegerParameter) + assert pipeline_space["num_epochs"].log is False + assert pipeline_space["num_epochs"].is_fidelity is True + assert pipeline_space["num_epochs"].default == 10 + assert pipeline_space["num_epochs"].default_confidence_score == 0.25 + assert isinstance(pipeline_space["optimizer"], CategoricalParameter) + assert pipeline_space["optimizer"].is_fidelity is False + assert pipeline_space["optimizer"].default == "sgd" + assert pipeline_space["optimizer"].default_confidence_score == 4 + assert isinstance(pipeline_space["dropout_rate"], ConstantParameter) + assert pipeline_space["dropout_rate"].value == 0.5 + assert pipeline_space["dropout_rate"].is_fidelity is True + + +@pytest.mark.yaml_search_space +def test_incorrect_yaml_file(): + """Test the function with an incorrectly formatted YAML file.""" + with pytest.raises(ValueError): + pipeline_space_from_yaml("tests/test_yaml_search_space/incorrect_config.txt") + + +@pytest.mark.yaml_search_space +def test_yaml_file_with_missing_key(): + """Test the function with a YAML file missing a required key.""" + with pytest.raises(KeyError): + pipeline_space_from_yaml("tests/test_yaml_search_space/missing_key_config.yml") + + +@pytest.mark.yaml_search_space +def test_yaml_file_with_inconsistent_types(): + """Test the function with a YAML file having inconsistent types for + 'lower' and 'upper'.""" + with pytest.raises(ValueError): + pipeline_space_from_yaml( + "tests/test_yaml_search_space/inconsistent_types_config.yml" + ) From 65e340f4a13c03200e7b76fb03734f0265e98759 Mon Sep 17 00:00:00 2001 From: Daniel <63580393+danrgll@users.noreply.github.com> Date: Sat, 25 Nov 2023 15:38:02 +0100 Subject: [PATCH 03/20] enable to run tests for yaml --- pyproject.toml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 19c4336f..6cd001d1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -95,8 +95,9 @@ profile = 'black' line_length = 90 [tool.pytest.ini_options] -addopts = "--basetemp ./tests_tmpdir -m 'core_examples or yaml_api'" -markers = ["all_examples", "core_examples", "regression_all", "metahyper", "yaml_api", "summary_csv"] +addopts = "--basetemp ./tests_tmpdir -m 'core_examples or yaml_api or yaml_search_space'" +markers = ["all_examples", "core_examples", "regression_all", "metahyper", "yaml_api", "summary_csv", + "yaml_search_space"] filterwarnings = "ignore::DeprecationWarning:torch.utils.tensorboard.*:" [tool.mypy] From a62c1cc82b3018971a44827ea58f0df8ecb9b641 Mon Sep 17 00:00:00 2001 From: Daniel <63580393+danrgll@users.noreply.github.com> Date: Sat, 25 Nov 2023 18:11:40 +0100 Subject: [PATCH 04/20] adapt tests and documentation for yaml_search_space --- src/neps/search_spaces/search_space.py | 28 +++++++++++++------ .../test_search_space.py | 10 ++++++- 2 files changed, 29 insertions(+), 9 deletions(-) diff --git a/src/neps/search_spaces/search_space.py b/src/neps/search_spaces/search_space.py index e0816652..0f6dea4e 100644 --- a/src/neps/search_spaces/search_space.py +++ b/src/neps/search_spaces/search_space.py @@ -74,10 +74,10 @@ def pipeline_space_from_yaml(yaml_file_path): Raises: KeyError: If any mandatory configuration for a parameter is missing in the YAML file. - ValueError: If lower and upper are not the same type of value + TypeError: If lower and upper are not the same type of value ValueError: if choices is not a list - ValueError: If an unknown parameter type is encountered. - ValueError: If YAML file is incorrectly constructed + KeyError: If an unknown parameter type is encountered. + KeyError: If YAML file is incorrectly constructed """ # Load the YAML file try: @@ -86,16 +86,24 @@ def pipeline_space_from_yaml(yaml_file_path): except yaml.YAMLError as e: raise ValueError(f"The file at {yaml_file_path} is not a valid YAML file.") from e - # check for key config_space + # check for key search_space if "search_space" not in config: - raise ValueError( - "The YAML file is incorrectly constructed: 'config_space' key is missing." + raise KeyError( + "The YAML file is incorrectly constructed: the 'search_space:' " + "reference is missing at the top of the file." ) # Initialize the pipeline space pipeline_space = {} # Iterate over the items in the YAML configuration for name, details in config["search_space"].items(): + if not (isinstance(name, str) and isinstance(details, dict)): + raise KeyError( + f"Invalid format for {name} in YAML file. " + f"Expected 'name' as string and corresponding 'details' as a dictionary. " + f"Found 'name' type: {type(name).__name__}, 'details' type:" + f" {type(details).__name__}." + ) if "lower" in details and "upper" in details: # Determine if it's an integer or float range parameter if isinstance(details["lower"], int) and isinstance(details["upper"], int): @@ -105,7 +113,7 @@ def pipeline_space_from_yaml(yaml_file_path): ): param_type = FloatParameter else: - raise ValueError( + raise TypeError( f"Inconsistent types for 'lower' and 'upper' in '{name}'. " f"Both must be either integers or floats." ) @@ -136,8 +144,12 @@ def pipeline_space_from_yaml(yaml_file_path): else: # Handle unknown parameter types raise KeyError( - f"Unsupported parameter format for '{name}'. " + f"Unsupported parameter format for '{name}'." f"Expected keys not found in {details}." + "Supported parameters:" + "Float and Integer: Expected keys: 'lower', 'upper'" + "Categorical: Expected keys: 'choices'" + "Constant: Expected keys: 'value'" ) return pipeline_space diff --git a/tests/test_yaml_search_space/test_search_space.py b/tests/test_yaml_search_space/test_search_space.py index 2eb8580c..0767912c 100644 --- a/tests/test_yaml_search_space/test_search_space.py +++ b/tests/test_yaml_search_space/test_search_space.py @@ -19,11 +19,14 @@ def test_correct_yaml_file(): assert pipeline_space["learning_rate"].default is None assert pipeline_space["learning_rate"].default_confidence_score == 0.5 assert isinstance(pipeline_space["num_epochs"], IntegerParameter) + assert pipeline_space["num_epochs"].lower == 3 + assert pipeline_space["num_epochs"].upper == 30 assert pipeline_space["num_epochs"].log is False assert pipeline_space["num_epochs"].is_fidelity is True assert pipeline_space["num_epochs"].default is None assert pipeline_space["num_epochs"].default_confidence_score == 0.5 assert isinstance(pipeline_space["optimizer"], CategoricalParameter) + assert pipeline_space["optimizer"].choices == ["adam", "sgd", "rmsprop"] assert pipeline_space["optimizer"].is_fidelity is False assert pipeline_space["optimizer"].default is None assert pipeline_space["optimizer"].default_confidence_score == 2 @@ -47,11 +50,14 @@ def test_correct_including_priors_yaml_file(): assert pipeline_space["learning_rate"].default == 0.001 assert pipeline_space["learning_rate"].default_confidence_score == 0.125 assert isinstance(pipeline_space["num_epochs"], IntegerParameter) + assert pipeline_space["num_epochs"].lower == 3 + assert pipeline_space["num_epochs"].upper == 30 assert pipeline_space["num_epochs"].log is False assert pipeline_space["num_epochs"].is_fidelity is True assert pipeline_space["num_epochs"].default == 10 assert pipeline_space["num_epochs"].default_confidence_score == 0.25 assert isinstance(pipeline_space["optimizer"], CategoricalParameter) + assert pipeline_space["optimizer"].choices == ["adam", "sgd", "rmsprop"] assert pipeline_space["optimizer"].is_fidelity is False assert pipeline_space["optimizer"].default == "sgd" assert pipeline_space["optimizer"].default_confidence_score == 4 @@ -78,7 +84,9 @@ def test_yaml_file_with_missing_key(): def test_yaml_file_with_inconsistent_types(): """Test the function with a YAML file having inconsistent types for 'lower' and 'upper'.""" - with pytest.raises(ValueError): + with pytest.raises(TypeError): pipeline_space_from_yaml( "tests/test_yaml_search_space/inconsistent_types_config.yml" ) + + From c41a346319dbf7ee35c823039310cb5084ee7237 Mon Sep 17 00:00:00 2001 From: Daniel <63580393+danrgll@users.noreply.github.com> Date: Sat, 25 Nov 2023 18:12:49 +0100 Subject: [PATCH 05/20] adapt tests and documentation for yaml_search_space --- tests/test_yaml_search_space/test_search_space.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/test_yaml_search_space/test_search_space.py b/tests/test_yaml_search_space/test_search_space.py index 0767912c..9d81c5f1 100644 --- a/tests/test_yaml_search_space/test_search_space.py +++ b/tests/test_yaml_search_space/test_search_space.py @@ -88,5 +88,3 @@ def test_yaml_file_with_inconsistent_types(): pipeline_space_from_yaml( "tests/test_yaml_search_space/inconsistent_types_config.yml" ) - - From 505519b5a5151fc0bbd22cbc6914a30c6b54cbf0 Mon Sep 17 00:00:00 2001 From: Daniel <63580393+danrgll@users.noreply.github.com> Date: Sat, 25 Nov 2023 21:29:20 +0100 Subject: [PATCH 06/20] add documentation for config_space --- docs/pipeline_space.md | 92 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 92 insertions(+) diff --git a/docs/pipeline_space.md b/docs/pipeline_space.md index e69de29b..3eaeb57e 100644 --- a/docs/pipeline_space.md +++ b/docs/pipeline_space.md @@ -0,0 +1,92 @@ +# Initializing the Search Space + +In NePS, defining the Search Space is one of two essential tasks. You can define it either through a Python dictionary +,YAML file or ConfigSpace. This section provides examples and instructions for both methods. + +## Option 1: Using a Python Dictionary + +To define the Search Space using a Python dictionary, follow these steps: + + +Create a Python dictionary that specifies the parameters and their respective ranges. For example: + +```python +search_space = { + "learning_rate": neps.FloatParameter(lower=0.00001, upper=0.1, log=True), + "num_epochs": neps.IntegerParameter(lower=3, upper=30, is_fidelity=True), + "optimizer": neps.CategoricalParameter(choices=["adam", "sgd", "rmsprop"]), + "dropout_rate": neps.FloatParameter(value=0.5) +} + +``` + +## Option 2: Using a YAML File +Create a YAML file (e.g., search_space.yaml) with the parameter definitions following this structure. + + +```yaml +search_space: # important to start with + learning_rate: + lower: 0.00001 + upper: 0.1 + log: true + + num_epochs: + lower: 3 + upper: 30 + is_fidelity: True + + optimizer: + choices: ["adam", "sgd", "rmsprop"] + + dropout_rate: + value: 0.5 +... +``` +Ensure your YAML file starts with `search_space:`. +This is the root key under which all parameter configurations are defined. + +## Option 3: Using ConfigSpace +For users familiar with the ConfigSpace library, can also define the Search Space through +ConfigurationSpace() +```python +from configspace import ConfigurationSpace, UniformFloatHyperparameter + +configspace = ConfigurationSpace() +configspace.add_hyperparameter(UniformFloatHyperparameter("learning_rate", 0.00001, 0.1, log=True)) +``` +Link: https://github.com/automl/ConfigSpace + +# Supported HyperParameter Types + +### FloatParameter and IntegerParameter +- **Expected Arguments:** + - `lower`: The minimum value of the parameter. + - `upper`: The maximum value of the parameter. +- **Optional Arguments:** + - `log`: Indicates if the parameter uses a logarithmic scale (default: False). + - `is_fidelity`: Marks the parameter as a fidelity parameter (default: False). + - `default`: Sets a prior central value for the parameter (default: None. + - `default_confidence`: Specifies the confidence level of the default value, + indicating how strongly the prior + should be considered default: "low". + +### Categorical Parameter +- **Expected Arguments:** + - `choices`: A list of discrete options that the parameter can take. +- **Optional Arguments:** + - `is_fidelity`: Marks the parameter as a fidelity parameter (default: False). + - `default`: Sets a prior central value for the parameter (default: None. + - `default_confidence`: Specifies the confidence level of the default value, + indicating how strongly the prior + should be considered default: "low". + +### ConstantParameter +- **Expected Arguments:** + - `value`: The fixed value for the parameter. +- **Optional Arguments:** + - `is_fidelity`: Marks the parameter as a fidelity parameter (default: False). + +# Supported ArchitectureParameter Types + + From 5096129c1c74bd4c12f0ede08f7f71aac2a7dd1b Mon Sep 17 00:00:00 2001 From: Daniel <63580393+danrgll@users.noreply.github.com> Date: Mon, 27 Nov 2023 17:36:08 +0100 Subject: [PATCH 07/20] clean up tests --- .github/workflows/tests.yaml | 2 +- pyproject.toml | 5 ++--- tests/test_yaml_search_space/test_search_space.py | 10 +++++----- 3 files changed, 8 insertions(+), 9 deletions(-) diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml index 0211dfbc..157c4afd 100644 --- a/.github/workflows/tests.yaml +++ b/.github/workflows/tests.yaml @@ -41,4 +41,4 @@ jobs: - name: Run pytest timeout-minutes: 15 - run: poetry run pytest -m "all_examples or metahyper or summary_csv" + run: poetry run pytest -m "all_examples or metahyper or summary_csv or yaml_api" diff --git a/pyproject.toml b/pyproject.toml index 4ab5188e..b552d705 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -102,9 +102,8 @@ profile = 'black' line_length = 90 [tool.pytest.ini_options] -addopts = "--basetemp ./tests_tmpdir -m 'core_examples or yaml_api or yaml_search_space'" -markers = ["all_examples", "core_examples", "regression_all", "metahyper", "yaml_api", "summary_csv", - "yaml_search_space"] +addopts = "--basetemp ./tests_tmpdir -m 'core_examples or yaml_api'" +markers = ["all_examples", "core_examples", "regression_all", "metahyper", "yaml_api", "summary_csv"] filterwarnings = "ignore::DeprecationWarning:torch.utils.tensorboard.*:" [tool.mypy] diff --git a/tests/test_yaml_search_space/test_search_space.py b/tests/test_yaml_search_space/test_search_space.py index 9d81c5f1..b76488ef 100644 --- a/tests/test_yaml_search_space/test_search_space.py +++ b/tests/test_yaml_search_space/test_search_space.py @@ -4,7 +4,7 @@ from neps.search_spaces.search_space import pipeline_space_from_yaml -@pytest.mark.yaml_search_space +@pytest.mark.yaml_api def test_correct_yaml_file(): """Test the function with a correctly formatted YAML file.""" pipeline_space = pipeline_space_from_yaml( @@ -35,7 +35,7 @@ def test_correct_yaml_file(): assert pipeline_space["dropout_rate"].is_fidelity is False -@pytest.mark.yaml_search_space +@pytest.mark.yaml_api def test_correct_including_priors_yaml_file(): """Test the function with a correctly formatted YAML file.""" pipeline_space = pipeline_space_from_yaml( @@ -66,21 +66,21 @@ def test_correct_including_priors_yaml_file(): assert pipeline_space["dropout_rate"].is_fidelity is True -@pytest.mark.yaml_search_space +@pytest.mark.yaml_api def test_incorrect_yaml_file(): """Test the function with an incorrectly formatted YAML file.""" with pytest.raises(ValueError): pipeline_space_from_yaml("tests/test_yaml_search_space/incorrect_config.txt") -@pytest.mark.yaml_search_space +@pytest.mark.yaml_api def test_yaml_file_with_missing_key(): """Test the function with a YAML file missing a required key.""" with pytest.raises(KeyError): pipeline_space_from_yaml("tests/test_yaml_search_space/missing_key_config.yml") -@pytest.mark.yaml_search_space +@pytest.mark.yaml_api def test_yaml_file_with_inconsistent_types(): """Test the function with a YAML file having inconsistent types for 'lower' and 'upper'.""" From e8d586ceca14b9d08a65a5b207d75e95fbbe87b3 Mon Sep 17 00:00:00 2001 From: Daniel <63580393+danrgll@users.noreply.github.com> Date: Sun, 3 Dec 2023 23:26:57 +0100 Subject: [PATCH 08/20] add usage of e and 10^ for exponent format as input for yaml file + new tests for optional type argument --- .../hyperparameters/categorical.py | 5 +- .../search_spaces/hyperparameters/float.py | 3 +- src/neps/search_spaces/search_space.py | 350 ++++++++++++++---- .../config_including_unknown_types.yaml | 20 + .../config_including_wrong_types.yaml | 20 + .../correct_config.yaml | 1 + .../correct_config_including_types.yaml | 20 + .../test_search_space.py | 94 +++-- 8 files changed, 404 insertions(+), 109 deletions(-) create mode 100644 tests/test_yaml_search_space/config_including_unknown_types.yaml create mode 100644 tests/test_yaml_search_space/config_including_wrong_types.yaml create mode 100644 tests/test_yaml_search_space/correct_config_including_types.yaml diff --git a/src/neps/search_spaces/hyperparameters/categorical.py b/src/neps/search_spaces/hyperparameters/categorical.py index 39547321..6b5674b2 100644 --- a/src/neps/search_spaces/hyperparameters/categorical.py +++ b/src/neps/search_spaces/hyperparameters/categorical.py @@ -2,11 +2,10 @@ import random from copy import copy, deepcopy -from typing import Iterable +from typing import Iterable, Literal import numpy as np import numpy.typing as npt -from typing_extensions import Literal from ..parameter import Parameter @@ -32,9 +31,7 @@ def __init__( self.upper = default self.default_confidence_score = CATEGORICAL_CONFIDENCE_SCORES[default_confidence] self.has_prior = self.default is not None - self.is_fidelity = is_fidelity - self.choices = list(choices) self.num_choices = len(self.choices) self.probabilities: list[npt.NDArray] = list( diff --git a/src/neps/search_spaces/hyperparameters/float.py b/src/neps/search_spaces/hyperparameters/float.py index c35cafc2..2c8eaeb4 100644 --- a/src/neps/search_spaces/hyperparameters/float.py +++ b/src/neps/search_spaces/hyperparameters/float.py @@ -2,10 +2,10 @@ import math from copy import deepcopy +from typing import Literal import numpy as np import scipy.stats -from typing_extensions import Literal from .numerical import NumericalParameter @@ -37,7 +37,6 @@ def __init__( if self.lower >= self.upper: raise ValueError("Float parameter: bounds error (lower >= upper).") - self.log = log if self.log: diff --git a/src/neps/search_spaces/search_space.py b/src/neps/search_spaces/search_space.py index 0f6dea4e..1679f0ee 100644 --- a/src/neps/search_spaces/search_space.py +++ b/src/neps/search_spaces/search_space.py @@ -3,6 +3,7 @@ import collections.abc import pprint import random +import re from collections import OrderedDict from copy import deepcopy from itertools import product @@ -66,93 +67,298 @@ def pipeline_space_from_yaml(yaml_file_path): """ Reads configuration details from a YAML file and creates a dictionary of parameters. + This function parses a YAML file to extract configuration details and organizes them + into a dictionary. Each key in the dictionary corresponds to a parameter name, and + the value is an object representing the parameter configuration. + Args: - yaml_file_path (str): Path to the YAML file containing configuration details. + yaml_file_path (str): Path to the YAML file containing configuration details. Returns: - dict: A dictionary with parameter names as keys and parameter objects as values. + dict: A dictionary with parameter names as keys and parameter objects as values. Raises: - KeyError: If any mandatory configuration for a parameter is missing in the YAML file. - TypeError: If lower and upper are not the same type of value - ValueError: if choices is not a list - KeyError: If an unknown parameter type is encountered. - KeyError: If YAML file is incorrectly constructed + SearchSpaceFromYamlFileError: Wraps and re-raises exceptions (KeyError, TypeError, + ValueError) that occur during the initialization of the search space from the YAML + file. This custom exception class provides additional context about the error, + enhancing diagnostic clarity and simplifying error handling for function callers. + It includes the type of the original exception and a descriptive message, thereby + localizing error handling to this specific function and preventing the propagation + of these generic exceptions. + + Note: + The YAML file must be structured correctly with appropriate keys and values for + each parameter type. The function validates the structure and content of the YAML + file, raising specific errors for missing mandatory configuration details, type + mismatches, and unknown parameter types. + + Example: + Given a YAML file 'config.yaml', call the function as follows: + pipeline_space = pipeline_space_from_yaml('config.yaml') """ - # Load the YAML file try: - with open(yaml_file_path) as file: - config = yaml.safe_load(file) - except yaml.YAMLError as e: - raise ValueError(f"The file at {yaml_file_path} is not a valid YAML file.") from e - - # check for key search_space - if "search_space" not in config: - raise KeyError( - "The YAML file is incorrectly constructed: the 'search_space:' " - "reference is missing at the top of the file." - ) + # try to load the YAML file + try: + with open(yaml_file_path) as file: + config = yaml.safe_load(file) + except yaml.YAMLError as e: + raise ValueError( + f"The file at {yaml_file_path} is not a valid YAML file." + ) from e - # Initialize the pipeline space - pipeline_space = {} - # Iterate over the items in the YAML configuration - for name, details in config["search_space"].items(): - if not (isinstance(name, str) and isinstance(details, dict)): + # check for init key search_space + if "search_space" not in config: raise KeyError( - f"Invalid format for {name} in YAML file. " - f"Expected 'name' as string and corresponding 'details' as a dictionary. " - f"Found 'name' type: {type(name).__name__}, 'details' type:" - f" {type(details).__name__}." + "The YAML file is incorrectly constructed: the 'search_space:' " + "reference is missing at the top of the file." ) - if "lower" in details and "upper" in details: - # Determine if it's an integer or float range parameter - if isinstance(details["lower"], int) and isinstance(details["upper"], int): - param_type = IntegerParameter - elif isinstance(details["lower"], float) and isinstance( - details["upper"], float - ): - param_type = FloatParameter + + # Initialize the pipeline space + pipeline_space = {} + + # Iterate over the items in the YAML configuration + for name, details in config["search_space"].items(): + if not (isinstance(name, str) and isinstance(details, dict)): + raise KeyError( + f"Invalid format for {name} in YAML file. " + f"Expected 'name' as string and corresponding 'details' as a " + f"dictionary. Found 'name' type: {type(name).__name__}, 'details' " + f"type: {type(details).__name__}." + ) + + # get parameter type + param_type, type_provided = ( + (details["type"], True) + if "type" in details + else (deduce_param_type(name, details), False) + ) + param_type = param_type.lower() + print("create parameter") + # init parameter by checking type + if param_type in ("int", "integer"): + # Integer Parameter + if type_provided: + if "lower" not in details or "upper" not in details: + raise KeyError( + f"Missing 'lower' or 'upper' for integer " + f"parameter '{name}'." + ) + if not isinstance(details["lower"], int) or not isinstance( + details["upper"], int + ): + # for numbers like 1e2 + details["lower"] = int( + convert_scientific_notation(details["lower"]) + ) + details["upper"] = int( + convert_scientific_notation(details["upper"]) + ) + else: + raise TypeError( + f"'lower' and 'upper' must be integer for " + f"integer parameter '{name}'." + ) + + pipeline_space[name] = IntegerParameter( + lower=details["lower"], + upper=details["upper"], + log=details.get("log", False), + is_fidelity=details.get("is_fidelity", False), + default=details.get("default", None), + default_confidence=details.get("default_confidence", "low"), + ) + elif param_type == "float": + # Float Parameter + if type_provided: + if "lower" not in details or "upper" not in details: + raise KeyError( + f"Missing key 'lower' or 'upper' for float " + f"parameter '{name}'." + ) + if not isinstance(details["lower"], float) or not isinstance( + details["upper"], float + ): + # for numbers like 1e-5 + details["lower"] = convert_scientific_notation(details["lower"]) + details["upper"] = convert_scientific_notation(details["upper"]) + else: + raise TypeError( + f"'lower' and 'upper' must be integer for " + f"integer parameter '{name}'." + ) + + pipeline_space[name] = FloatParameter( + lower=details["lower"], + upper=details["upper"], + log=details.get("log", False), + is_fidelity=details.get("is_fidelity", False), + default=details.get("default", None), + default_confidence=details.get("default_confidence", "low"), + ) + elif param_type in ("cat", "categorical"): + # Categorical parameter + if type_provided: + if "choices" not in details: + raise KeyError( + f"Missing key 'choices' for categorical " f"parameter {name}" + ) + if not isinstance(details["choices"], list): + raise TypeError(f"The 'choices' for '{name}' must be a list.") + + pipeline_space[name] = CategoricalParameter( + choices=details["choices"], + is_fidelity=details.get("is_fidelity", False), + default=details.get("default", None), + default_confidence=details.get("default_confidence", "low"), + ) + elif param_type in ("const", "constant"): + # Constant parameter + if type_provided: + if "value" not in details: + raise KeyError( + f"Missing key 'value' for constant parameter " f"{name}" + ) + + pipeline_space[name] = ConstantParameter( + value=details["value"], is_fidelity=details.get("is_fidelity", False) + ) else: + # Handle unknown parameter types raise TypeError( - f"Inconsistent types for 'lower' and 'upper' in '{name}'. " - f"Both must be either integers or floats." + f"Unsupported parameter type{details['type']} for '{name}'.\n" + f"Supported Types for argument type are:\n" + "For integer parameter: int, integer\n" + "For float parameter: float\n" + "For categorical parameter: cat, categorical\n" + "For constant parameter: const, constant\n" ) + except (KeyError, TypeError, ValueError) as e: + raise SearchSpaceFromYamlFileError(e) from e + return pipeline_space - pipeline_space[name] = param_type( - lower=details["lower"], - upper=details["upper"], - log=details.get("log", False), - is_fidelity=details.get("is_fidelity", False), - default=details.get("default", None), - default_confidence=details.get("default_confidence", "low"), - ) - elif "choices" in details: - # Categorical parameter - if not isinstance(details["choices"], list): - raise ValueError(f"The 'choices' for '{name}' must be a list.") - pipeline_space[name] = CategoricalParameter( - choices=details["choices"], - is_fidelity=details.get("is_fidelity", False), - default=details.get("default", None), - default_confidence=details.get("default_confidence", "low"), - ) - elif "value" in details: - # Constant parameter - pipeline_space[name] = ConstantParameter( - value=details["value"], is_fidelity=details.get("is_fidelity", False) - ) + +def convert_scientific_notation(value, show_usage_flag=False): + """Check if the value is a string that matches scientific ^ + and convert it to float.""" + + e_notation_pattern = r"^-?\d+(\.\d+)?[eE]-?\d+$" + # Pattern for '10^' style notation, with optional base and multiplication symbol + ten_power_notation_pattern = r"^(-?\d+)?(\.\d+)?\*?10\^(-?\d+)$" + + if isinstance(value, str): + # Remove all whitespace from the string + value_no_space = value.replace(" ", "") + if re.match(e_notation_pattern, value_no_space): + if show_usage_flag is True: + return float(value), True + else: + return float(value) else: - # Handle unknown parameter types - raise KeyError( - f"Unsupported parameter format for '{name}'." - f"Expected keys not found in {details}." - "Supported parameters:" - "Float and Integer: Expected keys: 'lower', 'upper'" - "Categorical: Expected keys: 'choices'" - "Constant: Expected keys: 'value'" + match = re.match(ten_power_notation_pattern, value_no_space) + if match: + base, decimal, exponent = match.groups() + if decimal: + base = base + decimal + base = float(base) if base else 1 # Default to 1 if base is empty + value = base * (10 ** float(exponent)) + if show_usage_flag is True: + return float(value), True + else: + return float(value) + if show_usage_flag is True: + return float(value), False + else: + return float(value) + + +class SearchSpaceFromYamlFileError(Exception): + """ + Exception raised for errors occurring during the initialization of the search space + from a YAML file. + + Attributes: + exception_type (str): The type of the original exception. + message (str): A detailed message that includes the type of the original exception + and the error description. + + Args: + exception (Exception): The original exception that was raised during the + initialization of the search space from the YAML file. + + Example Usage: + try: + # Code to initialize search space from YAML file + except (KeyError, TypeError, ValueError) as e: + raise SearchSpaceFromYamlFileError(e) + """ + + def __init__(self, exception): + self.exception_type = type(exception).__name__ + self.message = ( + f"Error occurred during initialization of search space from " + f"YAML file.\n {self.exception_type}: {exception}" + ) + super().__init__(self.message) + + +def deduce_param_type(name, details): + """ + Deduces the parameter type based on the provided details. + + This function analyzes the provided details dictionary to determine the type of + parameter. It supports identifying integer, float, categorical, and constant + parameter types. + + Args: + name (str): The name of the parameter. + details (dict): A dictionary containing parameter specifications. + + Returns: + str: The deduced parameter type ('int', 'float', 'categorical', or 'constant'). + + Raises: + TypeError: If the parameter type cannot be deduced from the details, or if the + provided details have inconsistent types for expected keys. + + Example: + param_type = deduce_param_type('example_param', {'lower': 0, 'upper': 10}) + """ + if "lower" in details and "upper" in details: + # Determine if it's an integer or float range parameter + if isinstance(details["lower"], int) and isinstance(details["upper"], int): + param_type = "int" + elif isinstance(details["lower"], float) and isinstance(details["upper"], float): + param_type = "float" + else: + details["lower"], flag_lower = convert_scientific_notation( + details["lower"], show_usage_flag=True ) + details["upper"], flag_upper = convert_scientific_notation( + details["upper"], show_usage_flag=True + ) + # check if one value is 10^format to convert it to float + if flag_lower or flag_upper: + param_type = "float" + else: + raise TypeError( + f"Inconsistent types for 'lower' and 'upper' in '{name}'. " + f"Both must be either integers or floats." + ) - return pipeline_space + return param_type + elif "choices" in details: + return "categorical" + elif "value" in details: + return "constant" + else: + raise TypeError( + f"Unable to deduce parameter type from {name} " + f"with details {details}\n" + "Supported parameters:\n" + "Float and Integer: Expected keys: 'lower', 'upper'\n" + "Categorical: Expected keys: 'choices'\n" + "Constant: Expected keys: 'value'" + ) class SearchSpace(collections.abc.Mapping): @@ -535,8 +741,8 @@ def __str__(self): return pprint.pformat(self.hyperparameters) def is_equal_value(self, other, include_fidelity=True, on_decimal=8): - # This does NOT check that the entire SearchSpace is equal (and thus it is not a dunder method), - # but only checks the configuration + # This does NOT check that the entire SearchSpace is equal (and thus it is + # not a dunder method), but only checks the configuration if self.hyperparameters.keys() != other.hyperparameters.keys(): return False diff --git a/tests/test_yaml_search_space/config_including_unknown_types.yaml b/tests/test_yaml_search_space/config_including_unknown_types.yaml new file mode 100644 index 00000000..62abc0d9 --- /dev/null +++ b/tests/test_yaml_search_space/config_including_unknown_types.yaml @@ -0,0 +1,20 @@ +search_space: + learning_rate: + type: numerical + lower: 0.00001 + upper: 0.1 + log: true + + num_epochs: + type: numerical + lower: 3 + upper: 30 + is_fidelity: True + + optimizer: + type: numerical + choices: ["adam", "sgd", "rmsprop"] + + dropout_rate: + type: numerical + value: 0.5 diff --git a/tests/test_yaml_search_space/config_including_wrong_types.yaml b/tests/test_yaml_search_space/config_including_wrong_types.yaml new file mode 100644 index 00000000..7cd7deda --- /dev/null +++ b/tests/test_yaml_search_space/config_including_wrong_types.yaml @@ -0,0 +1,20 @@ +search_space: + learning_rate: + type: int + lower: 0.00001 + upper: 0.1 + log: true + + num_epochs: + type: float + lower: 3 + upper: 30 + is_fidelity: True + + optimizer: + type: cat + choices: ["adam", "sgd", "rmsprop"] + + dropout_rate: + type: const + value: 0.5 diff --git a/tests/test_yaml_search_space/correct_config.yaml b/tests/test_yaml_search_space/correct_config.yaml index 04464f7b..48ebe477 100644 --- a/tests/test_yaml_search_space/correct_config.yaml +++ b/tests/test_yaml_search_space/correct_config.yaml @@ -7,6 +7,7 @@ search_space: num_epochs: lower: 3 upper: 30 + log: false is_fidelity: True optimizer: diff --git a/tests/test_yaml_search_space/correct_config_including_types.yaml b/tests/test_yaml_search_space/correct_config_including_types.yaml new file mode 100644 index 00000000..5fdb7400 --- /dev/null +++ b/tests/test_yaml_search_space/correct_config_including_types.yaml @@ -0,0 +1,20 @@ +search_space: + learning_rate: + type: float + lower: 0.00001 + upper: 0.1 + log: true + + num_epochs: + type: int + lower: 3 + upper: 30 + is_fidelity: True + + optimizer: + type: cat + choices: ["adam", "sgd", "rmsprop"] + + dropout_rate: + type: const + value: 0.5 diff --git a/tests/test_yaml_search_space/test_search_space.py b/tests/test_yaml_search_space/test_search_space.py index b76488ef..7bfbcf70 100644 --- a/tests/test_yaml_search_space/test_search_space.py +++ b/tests/test_yaml_search_space/test_search_space.py @@ -1,38 +1,45 @@ import pytest from neps import CategoricalParameter, ConstantParameter, FloatParameter, IntegerParameter -from neps.search_spaces.search_space import pipeline_space_from_yaml +from neps.search_spaces.search_space import ( + SearchSpaceFromYamlFileError, + pipeline_space_from_yaml, +) @pytest.mark.yaml_api -def test_correct_yaml_file(): - """Test the function with a correctly formatted YAML file.""" - pipeline_space = pipeline_space_from_yaml( - "tests/test_yaml_search_space/correct_config.yaml" +def test_correct_yaml_files(): + def test_correct_yaml_file(path): + """Test the function with a correctly formatted YAML file.""" + pipeline_space = pipeline_space_from_yaml(path) + assert isinstance(pipeline_space, dict) + assert isinstance(pipeline_space["learning_rate"], FloatParameter) + assert pipeline_space["learning_rate"].lower == 0.00001 + assert pipeline_space["learning_rate"].upper == 0.1 + assert pipeline_space["learning_rate"].log is True + assert pipeline_space["optimizer"].is_fidelity is False + assert pipeline_space["learning_rate"].default is None + assert pipeline_space["learning_rate"].default_confidence_score == 0.5 + assert isinstance(pipeline_space["num_epochs"], IntegerParameter) + assert pipeline_space["num_epochs"].lower == 3 + assert pipeline_space["num_epochs"].upper == 30 + assert pipeline_space["num_epochs"].log is False + assert pipeline_space["num_epochs"].is_fidelity is True + assert pipeline_space["num_epochs"].default is None + assert pipeline_space["num_epochs"].default_confidence_score == 0.5 + assert isinstance(pipeline_space["optimizer"], CategoricalParameter) + assert pipeline_space["optimizer"].choices == ["adam", "sgd", "rmsprop"] + assert pipeline_space["optimizer"].is_fidelity is False + assert pipeline_space["optimizer"].default is None + assert pipeline_space["optimizer"].default_confidence_score == 2 + assert isinstance(pipeline_space["dropout_rate"], ConstantParameter) + assert pipeline_space["dropout_rate"].value == 0.5 + assert pipeline_space["dropout_rate"].is_fidelity is False + + test_correct_yaml_file("tests/test_yaml_search_space/correct_config.yaml") + test_correct_yaml_file( + "tests/test_yaml_search_space/correct_config_including_types" ".yaml" ) - assert isinstance(pipeline_space, dict) - assert isinstance(pipeline_space["learning_rate"], FloatParameter) - assert pipeline_space["learning_rate"].lower == 0.00001 - assert pipeline_space["learning_rate"].upper == 0.1 - assert pipeline_space["learning_rate"].log is True - assert pipeline_space["optimizer"].is_fidelity is False - assert pipeline_space["learning_rate"].default is None - assert pipeline_space["learning_rate"].default_confidence_score == 0.5 - assert isinstance(pipeline_space["num_epochs"], IntegerParameter) - assert pipeline_space["num_epochs"].lower == 3 - assert pipeline_space["num_epochs"].upper == 30 - assert pipeline_space["num_epochs"].log is False - assert pipeline_space["num_epochs"].is_fidelity is True - assert pipeline_space["num_epochs"].default is None - assert pipeline_space["num_epochs"].default_confidence_score == 0.5 - assert isinstance(pipeline_space["optimizer"], CategoricalParameter) - assert pipeline_space["optimizer"].choices == ["adam", "sgd", "rmsprop"] - assert pipeline_space["optimizer"].is_fidelity is False - assert pipeline_space["optimizer"].default is None - assert pipeline_space["optimizer"].default_confidence_score == 2 - assert isinstance(pipeline_space["dropout_rate"], ConstantParameter) - assert pipeline_space["dropout_rate"].value == 0.5 - assert pipeline_space["dropout_rate"].is_fidelity is False @pytest.mark.yaml_api @@ -69,22 +76,47 @@ def test_correct_including_priors_yaml_file(): @pytest.mark.yaml_api def test_incorrect_yaml_file(): """Test the function with an incorrectly formatted YAML file.""" - with pytest.raises(ValueError): + with pytest.raises(SearchSpaceFromYamlFileError) as excinfo: pipeline_space_from_yaml("tests/test_yaml_search_space/incorrect_config.txt") + assert str(excinfo.value.exception_type == "ValueError") @pytest.mark.yaml_api def test_yaml_file_with_missing_key(): """Test the function with a YAML file missing a required key.""" - with pytest.raises(KeyError): + with pytest.raises(SearchSpaceFromYamlFileError) as excinfo: pipeline_space_from_yaml("tests/test_yaml_search_space/missing_key_config.yml") + assert str(excinfo.value.exception_type == "KeyError") @pytest.mark.yaml_api def test_yaml_file_with_inconsistent_types(): """Test the function with a YAML file having inconsistent types for 'lower' and 'upper'.""" - with pytest.raises(TypeError): + with pytest.raises(SearchSpaceFromYamlFileError) as excinfo: pipeline_space_from_yaml( "tests/test_yaml_search_space/inconsistent_types_config.yml" ) + assert str(excinfo.value.exception_type == "TypeError") + + +@pytest.mark.yaml_api +def test_yaml_file_including_wrong_types(): + """Test the function with a YAML file that defines the wrong but existing type + int to float as an optional argument""" + with pytest.raises(SearchSpaceFromYamlFileError) as excinfo: + pipeline_space_from_yaml( + "tests/test_yaml_search_space/config_including_wrong_types.yaml" + ) + assert str(excinfo.value.exception_type == "TypeError") + + +@pytest.mark.yaml_api +def test_yaml_file_including_unkown_types(): + """Test the function with a YAML file that defines an unknown type as an optional + argument""" + with pytest.raises(SearchSpaceFromYamlFileError) as excinfo: + pipeline_space_from_yaml( + "tests/test_yaml_search_space/config_including_unknown_types.yaml" + ) + assert str(excinfo.value.exception_type == "TypeError") From f02e8da1c894e6f384c0d4e2447e4509fe74054a Mon Sep 17 00:00:00 2001 From: Daniel <63580393+danrgll@users.noreply.github.com> Date: Mon, 4 Dec 2023 10:48:24 +0100 Subject: [PATCH 09/20] fix issue regarding tests and search space from yaml file --- docs/pipeline_space.md | 39 +++++++++++---- src/neps/search_spaces/search_space.py | 50 +++++++++++-------- .../config_including_wrong_types.yaml | 2 +- .../correct_config.yaml | 14 ++++++ .../correct_config_including_types.yaml | 19 ++++++- .../test_search_space.py | 21 ++++++++ 6 files changed, 110 insertions(+), 35 deletions(-) diff --git a/docs/pipeline_space.md b/docs/pipeline_space.md index 3eaeb57e..75148816 100644 --- a/docs/pipeline_space.md +++ b/docs/pipeline_space.md @@ -7,7 +7,6 @@ In NePS, defining the Search Space is one of two essential tasks. You can define To define the Search Space using a Python dictionary, follow these steps: - Create a Python dictionary that specifies the parameters and their respective ranges. For example: ```python @@ -15,19 +14,18 @@ search_space = { "learning_rate": neps.FloatParameter(lower=0.00001, upper=0.1, log=True), "num_epochs": neps.IntegerParameter(lower=3, upper=30, is_fidelity=True), "optimizer": neps.CategoricalParameter(choices=["adam", "sgd", "rmsprop"]), - "dropout_rate": neps.FloatParameter(value=0.5) + "dropout_rate": neps.FloatParameter(value=0.5), } - ``` ## Option 2: Using a YAML File -Create a YAML file (e.g., search_space.yaml) with the parameter definitions following this structure. +Create a YAML file (e.g., search_space.yaml) with the parameter definitions following this structure. ```yaml search_space: # important to start with learning_rate: - lower: 0.00001 + lower: 2e-3 # or 2*10^-3 upper: 0.1 log: true @@ -43,50 +41,69 @@ search_space: # important to start with value: 0.5 ... ``` + Ensure your YAML file starts with `search_space:`. This is the root key under which all parameter configurations are defined. ## Option 3: Using ConfigSpace + For users familiar with the ConfigSpace library, can also define the Search Space through ConfigurationSpace() + ```python from configspace import ConfigurationSpace, UniformFloatHyperparameter configspace = ConfigurationSpace() -configspace.add_hyperparameter(UniformFloatHyperparameter("learning_rate", 0.00001, 0.1, log=True)) +configspace.add_hyperparameter( + UniformFloatHyperparameter("learning_rate", 0.00001, 0.1, log=True) +) ``` -Link: https://github.com/automl/ConfigSpace + +For additional information on ConfigSpace and its features, please visit the following link: +https://github.com/automl/ConfigSpace # Supported HyperParameter Types ### FloatParameter and IntegerParameter + - **Expected Arguments:** - `lower`: The minimum value of the parameter. - `upper`: The maximum value of the parameter. - **Optional Arguments:** + - `type`: Specifies the data type of the parameter. Accepted Values: 'int', 'integer', or 'float'. - `log`: Indicates if the parameter uses a logarithmic scale (default: False). - `is_fidelity`: Marks the parameter as a fidelity parameter (default: False). - - `default`: Sets a prior central value for the parameter (default: None. + - `default`: Sets a prior central value for the parameter (default: None). - `default_confidence`: Specifies the confidence level of the default value, - indicating how strongly the prior - should be considered default: "low". + indicating how strongly the prior + should be considered default: "low". ### Categorical Parameter + - **Expected Arguments:** - `choices`: A list of discrete options that the parameter can take. - **Optional Arguments:** + - `type`: Specifies the data type of the parameter. Accepted Values: 'cat' or 'categorical'. - `is_fidelity`: Marks the parameter as a fidelity parameter (default: False). - `default`: Sets a prior central value for the parameter (default: None. - `default_confidence`: Specifies the confidence level of the default value, - indicating how strongly the prior + indicating how strongly the prior should be considered default: "low". ### ConstantParameter + - **Expected Arguments:** - `value`: The fixed value for the parameter. - **Optional Arguments:** + - `type`: Specifies the data type of the parameter. Accepted Values: 'const' or 'constant'. - `is_fidelity`: Marks the parameter as a fidelity parameter (default: False). # Supported ArchitectureParameter Types +**Note**: The definition of Search Space from a YAML file is limited to supporting only Hyperparameter Types. + +If you are interested in exploring Architecture, particularly Hierarchical parameters, you can find detailed examples and usage in the following resources: + +- [Basic Usage Examples](https://github.com/automl/neps/tree/master/neps_examples/basic_usage) - This link provides basic usage examples that can help you understand the fundamentals of Architecture parameters. +- [Experimental Examples](https://github.com/automl/neps/tree/master/neps_examples/experimental) - For more advanced and experimental use cases, including Hierarchical parameters, check out this collection of examples. diff --git a/src/neps/search_spaces/search_space.py b/src/neps/search_spaces/search_space.py index 1679f0ee..f7b449d9 100644 --- a/src/neps/search_spaces/search_space.py +++ b/src/neps/search_spaces/search_space.py @@ -146,18 +146,19 @@ def pipeline_space_from_yaml(yaml_file_path): if not isinstance(details["lower"], int) or not isinstance( details["upper"], int ): - # for numbers like 1e2 - details["lower"] = int( - convert_scientific_notation(details["lower"]) - ) - details["upper"] = int( - convert_scientific_notation(details["upper"]) - ) - else: - raise TypeError( - f"'lower' and 'upper' must be integer for " - f"integer parameter '{name}'." - ) + try: + # for numbers like 1e2 and 10^ + details["lower"] = int( + convert_scientific_notation(details["lower"]) + ) + details["upper"] = int( + convert_scientific_notation(details["upper"]) + ) + except ValueError as e: + raise TypeError( + f"'lower' and 'upper' must be integer for " + f"integer parameter '{name}'." + ) from e pipeline_space[name] = IntegerParameter( lower=details["lower"], @@ -178,14 +179,19 @@ def pipeline_space_from_yaml(yaml_file_path): if not isinstance(details["lower"], float) or not isinstance( details["upper"], float ): - # for numbers like 1e-5 - details["lower"] = convert_scientific_notation(details["lower"]) - details["upper"] = convert_scientific_notation(details["upper"]) - else: - raise TypeError( - f"'lower' and 'upper' must be integer for " - f"integer parameter '{name}'." - ) + try: + # for numbers like 1e-5 and 10^ + details["lower"] = convert_scientific_notation( + details["lower"] + ) + details["upper"] = convert_scientific_notation( + details["upper"] + ) + except ValueError as e: + raise TypeError( + f"'lower' and 'upper' must be integer for " + f"integer parameter '{name}'." + ) from e pipeline_space[name] = FloatParameter( lower=details["lower"], @@ -243,7 +249,7 @@ def convert_scientific_notation(value, show_usage_flag=False): e_notation_pattern = r"^-?\d+(\.\d+)?[eE]-?\d+$" # Pattern for '10^' style notation, with optional base and multiplication symbol - ten_power_notation_pattern = r"^(-?\d+)?(\.\d+)?\*?10\^(-?\d+)$" + ten_power_notation_pattern = r"^(-?\d+)?(\.\d+)?[xX*]?10\^(-?\d+)$" if isinstance(value, str): # Remove all whitespace from the string @@ -260,7 +266,7 @@ def convert_scientific_notation(value, show_usage_flag=False): if decimal: base = base + decimal base = float(base) if base else 1 # Default to 1 if base is empty - value = base * (10 ** float(exponent)) + value = format(base * (10 ** float(exponent)), "e") if show_usage_flag is True: return float(value), True else: diff --git a/tests/test_yaml_search_space/config_including_wrong_types.yaml b/tests/test_yaml_search_space/config_including_wrong_types.yaml index 7cd7deda..69ac631c 100644 --- a/tests/test_yaml_search_space/config_including_wrong_types.yaml +++ b/tests/test_yaml_search_space/config_including_wrong_types.yaml @@ -6,7 +6,7 @@ search_space: log: true num_epochs: - type: float + type: cat lower: 3 upper: 30 is_fidelity: True diff --git a/tests/test_yaml_search_space/correct_config.yaml b/tests/test_yaml_search_space/correct_config.yaml index 48ebe477..71a5b114 100644 --- a/tests/test_yaml_search_space/correct_config.yaml +++ b/tests/test_yaml_search_space/correct_config.yaml @@ -9,6 +9,20 @@ search_space: upper: 30 log: false is_fidelity: True + batch_size: + type: int + lower: 10^2 + upper: 3 * 10^4 + log: true + is_fidelity: false + + sec_learning_rate: + lower: 3.3e-5 + upper: 1E-1 + + parameter_ex: + lower: 3.3 x 10^-5 + upper: 3.2*10^1 optimizer: choices: ["adam", "sgd", "rmsprop"] diff --git a/tests/test_yaml_search_space/correct_config_including_types.yaml b/tests/test_yaml_search_space/correct_config_including_types.yaml index 5fdb7400..751f0db1 100644 --- a/tests/test_yaml_search_space/correct_config_including_types.yaml +++ b/tests/test_yaml_search_space/correct_config_including_types.yaml @@ -6,11 +6,28 @@ search_space: log: true num_epochs: - type: int + type: integer lower: 3 upper: 30 is_fidelity: True + batch_size: + type: "int" + lower: 10^2 + upper: 3 * 10^4 + log: true + is_fidelity: false + + sec_learning_rate: + type: "float" + lower: 3.3e-5 + upper: 1E-1 + + parameter_ex: + type: float + lower: 3.3 x 10^-5 + upper: 3.2*10^1 + optimizer: type: cat choices: ["adam", "sgd", "rmsprop"] diff --git a/tests/test_yaml_search_space/test_search_space.py b/tests/test_yaml_search_space/test_search_space.py index 7bfbcf70..4ec7814c 100644 --- a/tests/test_yaml_search_space/test_search_space.py +++ b/tests/test_yaml_search_space/test_search_space.py @@ -27,6 +27,27 @@ def test_correct_yaml_file(path): assert pipeline_space["num_epochs"].is_fidelity is True assert pipeline_space["num_epochs"].default is None assert pipeline_space["num_epochs"].default_confidence_score == 0.5 + assert isinstance(pipeline_space["batch_size"], IntegerParameter) + assert pipeline_space["batch_size"].lower == 100 + assert pipeline_space["batch_size"].upper == 30000 + assert pipeline_space["batch_size"].log is True + assert pipeline_space["batch_size"].is_fidelity is False + assert pipeline_space["batch_size"].default is None + assert pipeline_space["batch_size"].default_confidence_score == 0.5 + assert isinstance(pipeline_space["sec_learning_rate"], FloatParameter) + assert pipeline_space["sec_learning_rate"].lower == 3.3e-5 + assert pipeline_space["sec_learning_rate"].upper == 0.1 + assert pipeline_space["sec_learning_rate"].log is False + assert pipeline_space["sec_learning_rate"].is_fidelity is False + assert pipeline_space["sec_learning_rate"].default is None + assert pipeline_space["sec_learning_rate"].default_confidence_score == 0.5 + assert isinstance(pipeline_space["parameter_ex"], FloatParameter) + assert pipeline_space["parameter_ex"].lower == 3.3e-5 + assert pipeline_space["parameter_ex"].upper == 32.0 + assert pipeline_space["parameter_ex"].log is False + assert pipeline_space["parameter_ex"].is_fidelity is False + assert pipeline_space["parameter_ex"].default is None + assert pipeline_space["parameter_ex"].default_confidence_score == 0.5 assert isinstance(pipeline_space["optimizer"], CategoricalParameter) assert pipeline_space["optimizer"].choices == ["adam", "sgd", "rmsprop"] assert pipeline_space["optimizer"].is_fidelity is False From 04288ff9331c33bcfdd1fc13c747a24c6ec51b5f Mon Sep 17 00:00:00 2001 From: Daniel <63580393+danrgll@users.noreply.github.com> Date: Mon, 4 Dec 2023 11:21:04 +0100 Subject: [PATCH 10/20] resolve merge conflicts --- .github/workflows/tests.yaml | 1 - src/neps/api.py | 8 +++++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml index 9314029f..8664b86b 100644 --- a/.github/workflows/tests.yaml +++ b/.github/workflows/tests.yaml @@ -42,4 +42,3 @@ jobs: - name: Run pytest timeout-minutes: 15 run: poetry run pytest -m "all_examples or metahyper or neps_api or summary_csv" - diff --git a/src/neps/api.py b/src/neps/api.py index f733c9ef..ff38489b 100644 --- a/src/neps/api.py +++ b/src/neps/api.py @@ -16,8 +16,11 @@ from .optimizers import BaseOptimizer, SearcherMapping from .plot.tensorboard_eval import tblogger from .search_spaces.parameter import Parameter -from .search_spaces.search_space import SearchSpace, pipeline_space_from_configspace, \ - pipeline_space_from_yaml +from .search_spaces.search_space import ( + SearchSpace, + pipeline_space_from_configspace, + pipeline_space_from_yaml, +) from .status.status import post_run_csv from .utils.common import get_searcher_data, get_value from .utils.result_utils import get_loss @@ -418,4 +421,3 @@ def _run_args( searcher_instance, searcher_info, ) - From 65a82e588d42ae6349427a635b774ab782cba182 Mon Sep 17 00:00:00 2001 From: Daniel <63580393+danrgll@users.noreply.github.com> Date: Tue, 5 Dec 2023 18:53:10 +0100 Subject: [PATCH 11/20] add examples + test adaptation to new functionalities + outsorcing utils for yaml search space in own file --- docs/pipeline_space.md | 66 ++++- .../hpo_usage_example.py | 36 +++ .../search_space_example.yaml | 26 ++ src/neps/api.py | 1 + src/neps/search_spaces/search_space.py | 208 +-------------- .../search_spaces/yaml_search_space_utils.py | 237 ++++++++++++++++++ .../config_including_wrong_types.yaml | 2 +- .../correct_config.yaml | 8 +- .../correct_config_including_priors.yml | 7 +- .../correct_config_including_types.yaml | 2 +- .../test_search_space.py | 4 +- 11 files changed, 374 insertions(+), 223 deletions(-) create mode 100644 neps_examples/basic_usage/defining_search_space/hpo_usage_example.py create mode 100644 neps_examples/basic_usage/defining_search_space/search_space_example.yaml create mode 100644 src/neps/search_spaces/yaml_search_space_utils.py diff --git a/docs/pipeline_space.md b/docs/pipeline_space.md index 75148816..d73f85ed 100644 --- a/docs/pipeline_space.md +++ b/docs/pipeline_space.md @@ -30,6 +30,7 @@ search_space: # important to start with log: true num_epochs: + type: int # or "integer" lower: 3 upper: 30 is_fidelity: True @@ -62,7 +63,7 @@ configspace.add_hyperparameter( For additional information on ConfigSpace and its features, please visit the following link: https://github.com/automl/ConfigSpace -# Supported HyperParameter Types +# Supported HyperParameter Types using a YAML File ### FloatParameter and IntegerParameter @@ -70,40 +71,85 @@ https://github.com/automl/ConfigSpace - `lower`: The minimum value of the parameter. - `upper`: The maximum value of the parameter. - **Optional Arguments:** - - `type`: Specifies the data type of the parameter. Accepted Values: 'int', 'integer', or 'float'. - - `log`: Indicates if the parameter uses a logarithmic scale (default: False). - - `is_fidelity`: Marks the parameter as a fidelity parameter (default: False). + - `type`: Specifies the data type of the parameter. + - **Accepted Values**: 'int', 'integer', or 'float'. + - **Note:** If type is not specified e and 10^ notation gets converted to float + - `log`: Boolean that indicates if the parameter uses a logarithmic scale (default: False) + - [Details on how YAML interpret Boolean Values](#important-note-on-yaml-string-and-boolean-interpretation) + - `is_fidelity`: Boolean that marks the parameter as a fidelity parameter (default: False). - `default`: Sets a prior central value for the parameter (default: None). + - **Note:** Currently, if you define a prior for one parameter, you must do so for all your variables. - `default_confidence`: Specifies the confidence level of the default value, indicating how strongly the prior - should be considered default: "low". + should be considered (default: "low"). + - **Accepted Values**: 'low', 'medium', or 'high'. ### Categorical Parameter - **Expected Arguments:** - `choices`: A list of discrete options that the parameter can take. - **Optional Arguments:** - - `type`: Specifies the data type of the parameter. Accepted Values: 'cat' or 'categorical'. + - `type`: Specifies the data type of the parameter. + - Accepted Values: 'cat' or 'categorical'. - `is_fidelity`: Marks the parameter as a fidelity parameter (default: False). - - `default`: Sets a prior central value for the parameter (default: None. + - [Details on how YAML interpret Boolean Values](#important-note-on-yaml-string-and-boolean-interpretation) + - `default`: Sets a prior central value for the parameter (default: None). + - **Note:** Currently, if you define a prior for one parameter, you must do so for all your variables. - `default_confidence`: Specifies the confidence level of the default value, indicating how strongly the prior - should be considered default: "low". + should be considered (default: "low"). ### ConstantParameter - **Expected Arguments:** - `value`: The fixed value for the parameter. - **Optional Arguments:** - - `type`: Specifies the data type of the parameter. Accepted Values: 'const' or 'constant'. + - `type`: Specifies the data type of the parameter. + - Accepted Values: 'const' or 'constant'. - `is_fidelity`: Marks the parameter as a fidelity parameter (default: False). +## Important Note on YAML Data Type Interpretation + +When working with YAML files, it's essential to understand how the format interprets different data types: + +1. **Strings in Quotes:** + + - Any value enclosed in single (`'`) or double (`"`) quotes is treated as a string. + - Example: `"true"`, `'123'` are read as strings. + +1. **Boolean Interpretation:** + + - Specific unquoted values are interpreted as booleans. This includes: + - `true`, `True`, `TRUE` + - `false`, `False`, `FALSE` + - `on`, `On`, `ON` + - `off`, `Off`, `OFF` + - `yes`, `Yes`, `YES` + - `no`, `No`, `NO` + +1. **Numbers:** + + - Unquoted numeric values are interpreted as integers or floating-point numbers, depending on their format. + - Example: `123` is an integer, `4.56` is a float, `1e3` is a float in exponential form. + +1. **Empty Strings:** + + - An empty string `""` or a key with no value is always treated as `null` in YAML. + +1. **Unquoted Non-Boolean, Non-Numeric Strings:** + + - Unquoted values that don't match boolean patterns or numeric formats are treated as strings. + - Example: `example` is a string. + +Remember to use appropriate quotes and formats to ensure values are interpreted as intended. + # Supported ArchitectureParameter Types **Note**: The definition of Search Space from a YAML file is limited to supporting only Hyperparameter Types. If you are interested in exploring Architecture, particularly Hierarchical parameters, you can find detailed examples and usage in the following resources: -- [Basic Usage Examples](https://github.com/automl/neps/tree/master/neps_examples/basic_usage) - This link provides basic usage examples that can help you understand the fundamentals of Architecture parameters. +- [Basic Usage Examples](https://github.com/automl/neps/tree/master/neps_examples/basic_usage) - Basic usage + examples that can help you understand the fundamentals of Architecture parameters. - [Experimental Examples](https://github.com/automl/neps/tree/master/neps_examples/experimental) - For more advanced and experimental use cases, including Hierarchical parameters, check out this collection of examples. diff --git a/neps_examples/basic_usage/defining_search_space/hpo_usage_example.py b/neps_examples/basic_usage/defining_search_space/hpo_usage_example.py new file mode 100644 index 00000000..cd9bff43 --- /dev/null +++ b/neps_examples/basic_usage/defining_search_space/hpo_usage_example.py @@ -0,0 +1,36 @@ +import logging +import time +import numpy as np + +import neps + + +def run_pipeline( + float_name1, + float_name2, + categorical_name1, + categorical_name2, + integer_name1, + integer_name2, +): + # neps optimize to find values that maximizes sum, for demonstration only + loss = -float( + np.sum( + [float_name1, float_name2, categorical_name1, integer_name1, integer_name2] + ) + ) + if categorical_name2 == "a": + loss += 1 + + time.sleep(2) # For demonstration purposes only + return loss + + +logging.basicConfig(level=logging.INFO) +neps.run( + run_pipeline=run_pipeline, + pipeline_space="search_space_example.yaml", + root_directory="results/hyperparameters_example", + post_run_summary=True, + max_evaluations_total=15, +) diff --git a/neps_examples/basic_usage/defining_search_space/search_space_example.yaml b/neps_examples/basic_usage/defining_search_space/search_space_example.yaml new file mode 100644 index 00000000..1b0953ee --- /dev/null +++ b/neps_examples/basic_usage/defining_search_space/search_space_example.yaml @@ -0,0 +1,26 @@ +search_space: + float_name1: + lower: 3e-5 + upper: 0.1 + + float_name2: + type: "float" # Optional, as neps infers type from 'lower' and 'upper' + lower: 1.7 + upper: 42.0 + log: true # Optional, default: False + + categorical_name1: + choices: [0, 1] + + categorical_name2: + type: cat + choices: ["a", "b", "c"] + + integer_name1: + lower: 32 + upper: 128 + fidelity: True # Optional, default: False + + integer_name2: + lower: -5 + upper: 5 diff --git a/src/neps/api.py b/src/neps/api.py index ff38489b..5b0cdac6 100644 --- a/src/neps/api.py +++ b/src/neps/api.py @@ -101,6 +101,7 @@ def run( run_pipeline: Callable, root_directory: str | Path, pipeline_space: dict[str, Parameter | CS.ConfigurationSpace] + | str | CS.ConfigurationSpace | None = None, overwrite_working_directory: bool = False, diff --git a/src/neps/search_spaces/search_space.py b/src/neps/search_spaces/search_space.py index f7b449d9..534ca041 100644 --- a/src/neps/search_spaces/search_space.py +++ b/src/neps/search_spaces/search_space.py @@ -3,7 +3,6 @@ import collections.abc import pprint import random -import re from collections import OrderedDict from copy import deepcopy from itertools import product @@ -23,6 +22,10 @@ ) from .architecture.graph import Graph from .parameter import Parameter +from .yaml_search_space_utils import ( + SearchSpaceFromYamlFileError, + deduce_and_validate_param_type, +) def pipeline_space_from_configspace( @@ -118,48 +121,12 @@ def pipeline_space_from_yaml(yaml_file_path): # Iterate over the items in the YAML configuration for name, details in config["search_space"].items(): - if not (isinstance(name, str) and isinstance(details, dict)): - raise KeyError( - f"Invalid format for {name} in YAML file. " - f"Expected 'name' as string and corresponding 'details' as a " - f"dictionary. Found 'name' type: {type(name).__name__}, 'details' " - f"type: {type(details).__name__}." - ) - # get parameter type - param_type, type_provided = ( - (details["type"], True) - if "type" in details - else (deduce_param_type(name, details), False) - ) - param_type = param_type.lower() - print("create parameter") + param_type = deduce_and_validate_param_type(name, details) + # init parameter by checking type if param_type in ("int", "integer"): # Integer Parameter - if type_provided: - if "lower" not in details or "upper" not in details: - raise KeyError( - f"Missing 'lower' or 'upper' for integer " - f"parameter '{name}'." - ) - if not isinstance(details["lower"], int) or not isinstance( - details["upper"], int - ): - try: - # for numbers like 1e2 and 10^ - details["lower"] = int( - convert_scientific_notation(details["lower"]) - ) - details["upper"] = int( - convert_scientific_notation(details["upper"]) - ) - except ValueError as e: - raise TypeError( - f"'lower' and 'upper' must be integer for " - f"integer parameter '{name}'." - ) from e - pipeline_space[name] = IntegerParameter( lower=details["lower"], upper=details["upper"], @@ -170,29 +137,6 @@ def pipeline_space_from_yaml(yaml_file_path): ) elif param_type == "float": # Float Parameter - if type_provided: - if "lower" not in details or "upper" not in details: - raise KeyError( - f"Missing key 'lower' or 'upper' for float " - f"parameter '{name}'." - ) - if not isinstance(details["lower"], float) or not isinstance( - details["upper"], float - ): - try: - # for numbers like 1e-5 and 10^ - details["lower"] = convert_scientific_notation( - details["lower"] - ) - details["upper"] = convert_scientific_notation( - details["upper"] - ) - except ValueError as e: - raise TypeError( - f"'lower' and 'upper' must be integer for " - f"integer parameter '{name}'." - ) from e - pipeline_space[name] = FloatParameter( lower=details["lower"], upper=details["upper"], @@ -203,14 +147,6 @@ def pipeline_space_from_yaml(yaml_file_path): ) elif param_type in ("cat", "categorical"): # Categorical parameter - if type_provided: - if "choices" not in details: - raise KeyError( - f"Missing key 'choices' for categorical " f"parameter {name}" - ) - if not isinstance(details["choices"], list): - raise TypeError(f"The 'choices' for '{name}' must be a list.") - pipeline_space[name] = CategoricalParameter( choices=details["choices"], is_fidelity=details.get("is_fidelity", False), @@ -219,17 +155,11 @@ def pipeline_space_from_yaml(yaml_file_path): ) elif param_type in ("const", "constant"): # Constant parameter - if type_provided: - if "value" not in details: - raise KeyError( - f"Missing key 'value' for constant parameter " f"{name}" - ) - pipeline_space[name] = ConstantParameter( value=details["value"], is_fidelity=details.get("is_fidelity", False) ) else: - # Handle unknown parameter types + # Handle unknown parameter type raise TypeError( f"Unsupported parameter type{details['type']} for '{name}'.\n" f"Supported Types for argument type are:\n" @@ -243,130 +173,6 @@ def pipeline_space_from_yaml(yaml_file_path): return pipeline_space -def convert_scientific_notation(value, show_usage_flag=False): - """Check if the value is a string that matches scientific ^ - and convert it to float.""" - - e_notation_pattern = r"^-?\d+(\.\d+)?[eE]-?\d+$" - # Pattern for '10^' style notation, with optional base and multiplication symbol - ten_power_notation_pattern = r"^(-?\d+)?(\.\d+)?[xX*]?10\^(-?\d+)$" - - if isinstance(value, str): - # Remove all whitespace from the string - value_no_space = value.replace(" ", "") - if re.match(e_notation_pattern, value_no_space): - if show_usage_flag is True: - return float(value), True - else: - return float(value) - else: - match = re.match(ten_power_notation_pattern, value_no_space) - if match: - base, decimal, exponent = match.groups() - if decimal: - base = base + decimal - base = float(base) if base else 1 # Default to 1 if base is empty - value = format(base * (10 ** float(exponent)), "e") - if show_usage_flag is True: - return float(value), True - else: - return float(value) - if show_usage_flag is True: - return float(value), False - else: - return float(value) - - -class SearchSpaceFromYamlFileError(Exception): - """ - Exception raised for errors occurring during the initialization of the search space - from a YAML file. - - Attributes: - exception_type (str): The type of the original exception. - message (str): A detailed message that includes the type of the original exception - and the error description. - - Args: - exception (Exception): The original exception that was raised during the - initialization of the search space from the YAML file. - - Example Usage: - try: - # Code to initialize search space from YAML file - except (KeyError, TypeError, ValueError) as e: - raise SearchSpaceFromYamlFileError(e) - """ - - def __init__(self, exception): - self.exception_type = type(exception).__name__ - self.message = ( - f"Error occurred during initialization of search space from " - f"YAML file.\n {self.exception_type}: {exception}" - ) - super().__init__(self.message) - - -def deduce_param_type(name, details): - """ - Deduces the parameter type based on the provided details. - - This function analyzes the provided details dictionary to determine the type of - parameter. It supports identifying integer, float, categorical, and constant - parameter types. - - Args: - name (str): The name of the parameter. - details (dict): A dictionary containing parameter specifications. - - Returns: - str: The deduced parameter type ('int', 'float', 'categorical', or 'constant'). - - Raises: - TypeError: If the parameter type cannot be deduced from the details, or if the - provided details have inconsistent types for expected keys. - - Example: - param_type = deduce_param_type('example_param', {'lower': 0, 'upper': 10}) - """ - if "lower" in details and "upper" in details: - # Determine if it's an integer or float range parameter - if isinstance(details["lower"], int) and isinstance(details["upper"], int): - param_type = "int" - elif isinstance(details["lower"], float) and isinstance(details["upper"], float): - param_type = "float" - else: - details["lower"], flag_lower = convert_scientific_notation( - details["lower"], show_usage_flag=True - ) - details["upper"], flag_upper = convert_scientific_notation( - details["upper"], show_usage_flag=True - ) - # check if one value is 10^format to convert it to float - if flag_lower or flag_upper: - param_type = "float" - else: - raise TypeError( - f"Inconsistent types for 'lower' and 'upper' in '{name}'. " - f"Both must be either integers or floats." - ) - - return param_type - elif "choices" in details: - return "categorical" - elif "value" in details: - return "constant" - else: - raise TypeError( - f"Unable to deduce parameter type from {name} " - f"with details {details}\n" - "Supported parameters:\n" - "Float and Integer: Expected keys: 'lower', 'upper'\n" - "Categorical: Expected keys: 'choices'\n" - "Constant: Expected keys: 'value'" - ) - - class SearchSpace(collections.abc.Mapping): def __init__(self, **hyperparameters): self.hyperparameters = OrderedDict() diff --git a/src/neps/search_spaces/yaml_search_space_utils.py b/src/neps/search_spaces/yaml_search_space_utils.py new file mode 100644 index 00000000..ceaf7fe8 --- /dev/null +++ b/src/neps/search_spaces/yaml_search_space_utils.py @@ -0,0 +1,237 @@ +import re + + +def convert_scientific_notation(value, show_usage_flag=False): + """Check if the value is a string that matches scientific ^ or e (specially numbers + like 3.3e-5 with a float value in front, which yaml can not interpret directly as + float) + and convert it to float.""" + + e_notation_pattern = r"^-?\d+(\.\d+)?[eE]-?\d+$" + # Pattern for '10^' style notation, with optional base and multiplication symbol + ten_power_notation_pattern = r"^(-?\d+)?(\.\d+)?[xX*]?10\^(-?\d+)$" + + flag = False # Check if e or 10^notation was detected + + if isinstance(value, str): + # Remove all whitespace from the string + value_no_space = value.replace(" ", "") + + # check for e notation + if re.match(e_notation_pattern, value_no_space): + flag = True + else: + # check for 10^ notation + match = re.match(ten_power_notation_pattern, value_no_space) + if match: + base, decimal, exponent = match.groups() + if decimal: + base = base + decimal + base = float(base) if base else 1 # Default to 1 if base is empty + value = format(base * (10 ** float(exponent)), "e") + flag = True + + if show_usage_flag is True: + return float(value), flag + else: + return float(value) + + +class SearchSpaceFromYamlFileError(Exception): + """ + Exception raised for errors occurring during the initialization of the search space + from a YAML file. + + Attributes: + exception_type (str): The type of the original exception. + message (str): A detailed message that includes the type of the original exception + and the error description. + + Args: + exception (Exception): The original exception that was raised during the + initialization of the search space from the YAML file. + + Example Usage: + try: + # Code to initialize search space from YAML file + except (KeyError, TypeError, ValueError) as e: + raise SearchSpaceFromYamlFileError(e) + """ + + def __init__(self, exception): + self.exception_type = type(exception).__name__ + self.message = ( + f"Error occurred during initialization of search space from " + f"YAML file.\n {self.exception_type}: {exception}" + ) + super().__init__(self.message) + + +def deduce_and_validate_param_type(name, details): + """ + Deduces the parameter type from details and validates them. + + Args: + name (str): The name of the parameter. + details (dict): A dictionary containing parameter specifications. + + Returns: + str: The deduced parameter type ('int', 'float', 'categorical', or 'constant'). + + Raises: + TypeError: If the type cannot be deduced or the details don't align with expected + constraints. + """ + # Deduce type + if "type" in details: + param_type = details["type"].lower() + else: + # Logic to infer type if not explicitly provided + param_type = deduce_param_type(name, details) + + # Validate details based on deduced type + validate_param_details(name, param_type, details) + + return param_type + + +def deduce_param_type(name, details): + """Deduces the parameter type based on the provided details. + + This function analyzes the provided details dictionary to determine the type of + parameter. It supports identifying integer, float, categorical, and constant + parameter types. + + Args: + name (str): The name of the parameter. + details (dict): A dictionary containing parameter specifications. + + Returns: + str: The deduced parameter type ('int', 'float', 'categorical', or 'constant'). + + Raises: + TypeError: If the parameter type cannot be deduced from the details, or if the + provided details have inconsistent types for expected keys. + + Example: + param_type = deduce_param_type('example_param', {'lower': 0, 'upper': 10})""" + # Logic to deduce type from details + if "lower" in details and "upper" in details: + # Determine if it's an integer or float range parameter + if isinstance(details["lower"], int) and isinstance(details["upper"], int): + param_type = "int" + elif isinstance(details["lower"], float) and isinstance(details["upper"], float): + param_type = "float" + else: + details["lower"], flag_lower = convert_scientific_notation( + details["lower"], show_usage_flag=True + ) + details["upper"], flag_upper = convert_scientific_notation( + details["upper"], show_usage_flag=True + ) + # check if one value is 10^format to convert it to float + if flag_lower or flag_upper: + param_type = "float" + else: + raise TypeError( + f"Inconsistent types for 'lower' and 'upper' in '{name}'. " + f"Both must be either integers or floats." + ) + + elif "choices" in details: + param_type = "categorical" + elif "value" in details: + param_type = "constant" + else: + raise TypeError( + f"Unable to deduce parameter type from {name} " + f"with details {details}\n" + "Supported parameters:\n" + "Float and Integer: Expected keys: 'lower', 'upper'\n" + "Categorical: Expected keys: 'choices'\n" + "Constant: Expected keys: 'value'" + ) + return param_type + + +def validate_param_details(name, param_type, details): + if not (isinstance(name, str) and isinstance(details, dict)): + raise KeyError( + f"Invalid format for {name} in YAML file. " + f"Expected 'name' as string and corresponding 'details' as a " + f"dictionary. Found 'name' type: {type(name).__name__}, 'details' " + f"type: {type(details).__name__}." + ) + param_type = param_type.lower() + # init parameter by checking type + if param_type in ("int", "integer"): + # Check Integer Parameter + if "lower" not in details or "upper" not in details: + raise KeyError( + f"Missing 'lower' or 'upper' for integer " f"parameter '{name}'." + ) + if not isinstance(details["lower"], int) or not isinstance(details["upper"], int): + try: + # for numbers like 1e2 and 10^ + lower, flag_lower = convert_scientific_notation( + details["lower"], show_usage_flag=True + ) + upper, flag_upper = convert_scientific_notation( + details["upper"], show_usage_flag=True + ) + # check if one value format is e or 10^ and if its an integer + if flag_lower or flag_upper: + if lower == int(lower) and upper == int(upper): + details["lower"] = lower + details["upper"] = upper + else: + raise ValueError() + else: + raise ValueError() + except ValueError as e: + raise TypeError( + f"'lower' and 'upper' must be integer for " + f"integer parameter '{name}'." + ) from e + + elif param_type == "float": + # Check Float Parameter + if "lower" not in details or "upper" not in details: + raise KeyError( + f"Missing key 'lower' or 'upper' for float " f"parameter '{name}'." + ) + if not isinstance(details["lower"], float) or not isinstance( + details["upper"], float + ): + try: + # for numbers like 1e-5 and 10^ + details["lower"] = convert_scientific_notation(details["lower"]) + details["upper"] = convert_scientific_notation(details["upper"]) + except ValueError as e: + raise TypeError( + f"'lower' and 'upper' must be integer for " + f"integer parameter '{name}'." + ) from e + + elif param_type in ("cat", "categorical"): + # Check Categorical parameter + if "choices" not in details: + raise KeyError(f"Missing key 'choices' for categorical " f"parameter {name}") + if not isinstance(details["choices"], (list, tuple)): + raise TypeError(f"The 'choices' for '{name}' must be a list or tuple.") + + elif param_type in ("const", "constant"): + # Check Constant parameter + if "value" not in details: + raise KeyError(f"Missing key 'value' for constant parameter " f"{name}") + else: + # Handle unknown parameter types + raise TypeError( + f"Unsupported parameter type{details['type']} for '{name}'.\n" + f"Supported Types for argument type are:\n" + "For integer parameter: int, integer\n" + "For float parameter: float\n" + "For categorical parameter: cat, categorical\n" + "For constant parameter: const, constant\n" + ) + return param_type diff --git a/tests/test_yaml_search_space/config_including_wrong_types.yaml b/tests/test_yaml_search_space/config_including_wrong_types.yaml index 69ac631c..206f3a5c 100644 --- a/tests/test_yaml_search_space/config_including_wrong_types.yaml +++ b/tests/test_yaml_search_space/config_including_wrong_types.yaml @@ -6,7 +6,7 @@ search_space: log: true num_epochs: - type: cat + type: int lower: 3 upper: 30 is_fidelity: True diff --git a/tests/test_yaml_search_space/correct_config.yaml b/tests/test_yaml_search_space/correct_config.yaml index 71a5b114..663c34bb 100644 --- a/tests/test_yaml_search_space/correct_config.yaml +++ b/tests/test_yaml_search_space/correct_config.yaml @@ -2,19 +2,19 @@ search_space: learning_rate: lower: 0.00001 upper: 0.1 - log: true + log: TRUE num_epochs: - lower: 3 + lower: -3 upper: 30 log: false - is_fidelity: True + is_fidelity: on batch_size: type: int lower: 10^2 upper: 3 * 10^4 log: true - is_fidelity: false + is_fidelity: FALSE sec_learning_rate: lower: 3.3e-5 diff --git a/tests/test_yaml_search_space/correct_config_including_priors.yml b/tests/test_yaml_search_space/correct_config_including_priors.yml index 4bd10e9e..5a4573e9 100644 --- a/tests/test_yaml_search_space/correct_config_including_priors.yml +++ b/tests/test_yaml_search_space/correct_config_including_priors.yml @@ -11,12 +11,11 @@ search_space: upper: 30 is_fidelity: True default: 10 - default_confidence: medium optimizer: - choices: ["adam", "sgd", "rmsprop"] - default: "sgd" - default_confidence: medium + choices: [adam, sgd, rmsprop] + default: sgd + default_confidence: "medium" dropout_rate: value: 0.5 diff --git a/tests/test_yaml_search_space/correct_config_including_types.yaml b/tests/test_yaml_search_space/correct_config_including_types.yaml index 751f0db1..103648dc 100644 --- a/tests/test_yaml_search_space/correct_config_including_types.yaml +++ b/tests/test_yaml_search_space/correct_config_including_types.yaml @@ -7,7 +7,7 @@ search_space: num_epochs: type: integer - lower: 3 + lower: -3 upper: 30 is_fidelity: True diff --git a/tests/test_yaml_search_space/test_search_space.py b/tests/test_yaml_search_space/test_search_space.py index a7a0cf79..efa28e3b 100644 --- a/tests/test_yaml_search_space/test_search_space.py +++ b/tests/test_yaml_search_space/test_search_space.py @@ -21,7 +21,7 @@ def test_correct_yaml_file(path): assert pipeline_space["learning_rate"].default is None assert pipeline_space["learning_rate"].default_confidence_score == 0.5 assert isinstance(pipeline_space["num_epochs"], IntegerParameter) - assert pipeline_space["num_epochs"].lower == 3 + assert pipeline_space["num_epochs"].lower == -3 assert pipeline_space["num_epochs"].upper == 30 assert pipeline_space["num_epochs"].log is False assert pipeline_space["num_epochs"].is_fidelity is True @@ -83,7 +83,7 @@ def test_correct_including_priors_yaml_file(): assert pipeline_space["num_epochs"].log is False assert pipeline_space["num_epochs"].is_fidelity is True assert pipeline_space["num_epochs"].default == 10 - assert pipeline_space["num_epochs"].default_confidence_score == 0.25 + assert pipeline_space["num_epochs"].default_confidence_score == 0.5 assert isinstance(pipeline_space["optimizer"], CategoricalParameter) assert pipeline_space["optimizer"].choices == ["adam", "sgd", "rmsprop"] assert pipeline_space["optimizer"].is_fidelity is False From 9e1bff8d20a1820a99d9e9ac81fae0ef46fa9687 Mon Sep 17 00:00:00 2001 From: Daniel <63580393+danrgll@users.noreply.github.com> Date: Tue, 5 Dec 2023 18:54:27 +0100 Subject: [PATCH 12/20] changes in yaml_search_space examples --- .../basic_usage/defining_search_space/hpo_usage_example.py | 1 + 1 file changed, 1 insertion(+) diff --git a/neps_examples/basic_usage/defining_search_space/hpo_usage_example.py b/neps_examples/basic_usage/defining_search_space/hpo_usage_example.py index cd9bff43..a121d4df 100644 --- a/neps_examples/basic_usage/defining_search_space/hpo_usage_example.py +++ b/neps_examples/basic_usage/defining_search_space/hpo_usage_example.py @@ -1,5 +1,6 @@ import logging import time + import numpy as np import neps From 40ce160c29834374abb3c0f38d6b576369225ca0 Mon Sep 17 00:00:00 2001 From: Daniel <63580393+danrgll@users.noreply.github.com> Date: Tue, 5 Dec 2023 23:43:30 +0100 Subject: [PATCH 13/20] remove 10^ notation + introduce key checking for parameters + enable e notation usage for all parameters and arguments --- docs/pipeline_space.md | 11 +- .../search_space_example.yaml | 2 +- src/neps/api.py | 1 + .../search_spaces/yaml_search_space_utils.py | 123 +++++++++++++++--- .../correct_config.yaml | 18 +-- .../correct_config_including_priors.yml | 10 +- .../correct_config_including_types.yaml | 15 +-- .../not_allowed_key_config.yml | 26 ++++ .../test_search_space.py | 32 +++-- 9 files changed, 175 insertions(+), 63 deletions(-) create mode 100644 tests/test_yaml_search_space/not_allowed_key_config.yml diff --git a/docs/pipeline_space.md b/docs/pipeline_space.md index d73f85ed..8474dfcf 100644 --- a/docs/pipeline_space.md +++ b/docs/pipeline_space.md @@ -70,10 +70,11 @@ https://github.com/automl/ConfigSpace - **Expected Arguments:** - `lower`: The minimum value of the parameter. - `upper`: The maximum value of the parameter. + - **Accepted Values:** Int or Float depending on the specific parameter type one wishes to use. - **Optional Arguments:** - `type`: Specifies the data type of the parameter. - **Accepted Values**: 'int', 'integer', or 'float'. - - **Note:** If type is not specified e and 10^ notation gets converted to float + - **Note:** If type is not specified e notation gets converted to float - `log`: Boolean that indicates if the parameter uses a logarithmic scale (default: False) - [Details on how YAML interpret Boolean Values](#important-note-on-yaml-string-and-boolean-interpretation) - `is_fidelity`: Boolean that marks the parameter as a fidelity parameter (default: False). @@ -87,7 +88,7 @@ https://github.com/automl/ConfigSpace ### Categorical Parameter - **Expected Arguments:** - - `choices`: A list of discrete options that the parameter can take. + - `choices`: A list of discrete options(int | float | str) that the parameter can take. - **Optional Arguments:** - `type`: Specifies the data type of the parameter. - Accepted Values: 'cat' or 'categorical'. @@ -102,7 +103,7 @@ https://github.com/automl/ConfigSpace ### ConstantParameter - **Expected Arguments:** - - `value`: The fixed value for the parameter. + - `value`: The fixed value(int | float | str) for the parameter. - **Optional Arguments:** - `type`: Specifies the data type of the parameter. - Accepted Values: 'const' or 'constant'. @@ -130,7 +131,9 @@ When working with YAML files, it's essential to understand how the format interp 1. **Numbers:** - Unquoted numeric values are interpreted as integers or floating-point numbers, depending on their format. - - Example: `123` is an integer, `4.56` is a float, `1e3` is a float in exponential form. + - Example: `123` is an integer, `4.56` is a float, `1e3` can be either an integer or a floating-point number, + depending on the type specified by the user. By default, 1e3 is treated as a floating-point number. + This interpretation is unique to our system. 1. **Empty Strings:** diff --git a/neps_examples/basic_usage/defining_search_space/search_space_example.yaml b/neps_examples/basic_usage/defining_search_space/search_space_example.yaml index 1b0953ee..27bd37c1 100644 --- a/neps_examples/basic_usage/defining_search_space/search_space_example.yaml +++ b/neps_examples/basic_usage/defining_search_space/search_space_example.yaml @@ -19,7 +19,7 @@ search_space: integer_name1: lower: 32 upper: 128 - fidelity: True # Optional, default: False + is_fidelity: True # Optional, default: False integer_name2: lower: -5 diff --git a/src/neps/api.py b/src/neps/api.py index 5b0cdac6..222f4b52 100644 --- a/src/neps/api.py +++ b/src/neps/api.py @@ -102,6 +102,7 @@ def run( root_directory: str | Path, pipeline_space: dict[str, Parameter | CS.ConfigurationSpace] | str + | Path | CS.ConfigurationSpace | None = None, overwrite_working_directory: bool = False, diff --git a/src/neps/search_spaces/yaml_search_space_utils.py b/src/neps/search_spaces/yaml_search_space_utils.py index ceaf7fe8..afb040f0 100644 --- a/src/neps/search_spaces/yaml_search_space_utils.py +++ b/src/neps/search_spaces/yaml_search_space_utils.py @@ -2,16 +2,13 @@ def convert_scientific_notation(value, show_usage_flag=False): - """Check if the value is a string that matches scientific ^ or e (specially numbers - like 3.3e-5 with a float value in front, which yaml can not interpret directly as - float) - and convert it to float.""" + """Check if the value is a string that matches scientific e notation and convert it + to float. (specially numbers like 3.3e-5 with a float value in front, which yaml + can not interpret directly as float).""" e_notation_pattern = r"^-?\d+(\.\d+)?[eE]-?\d+$" - # Pattern for '10^' style notation, with optional base and multiplication symbol - ten_power_notation_pattern = r"^(-?\d+)?(\.\d+)?[xX*]?10\^(-?\d+)$" - flag = False # Check if e or 10^notation was detected + flag = False # Check if e notation was detected if isinstance(value, str): # Remove all whitespace from the string @@ -20,16 +17,6 @@ def convert_scientific_notation(value, show_usage_flag=False): # check for e notation if re.match(e_notation_pattern, value_no_space): flag = True - else: - # check for 10^ notation - match = re.match(ten_power_notation_pattern, value_no_space) - if match: - base, decimal, exponent = match.groups() - if decimal: - base = base + decimal - base = float(base) if base else 1 # Default to 1 if base is empty - value = format(base * (10 ** float(exponent)), "e") - flag = True if show_usage_flag is True: return float(value), flag @@ -165,6 +152,20 @@ def validate_param_details(name, param_type, details): param_type = param_type.lower() # init parameter by checking type if param_type in ("int", "integer"): + # check if all keys are allowed + check_allowed_keys( + name, + details, + { + "lower", + "upper", + "type", + "log", + "is_fidelity", + "default", + "default_confidence", + }, + ) # Check Integer Parameter if "lower" not in details or "upper" not in details: raise KeyError( @@ -182,8 +183,8 @@ def validate_param_details(name, param_type, details): # check if one value format is e or 10^ and if its an integer if flag_lower or flag_upper: if lower == int(lower) and upper == int(upper): - details["lower"] = lower - details["upper"] = upper + details["lower"] = int(lower) + details["upper"] = int(upper) else: raise ValueError() else: @@ -193,8 +194,32 @@ def validate_param_details(name, param_type, details): f"'lower' and 'upper' must be integer for " f"integer parameter '{name}'." ) from e + if "default" in details: + if not isinstance(details["default"], int): + default = convert_scientific_notation(details["default"]) + if default == int(default): + details["default"] = int(default) + else: + raise TypeError( + f"default value {details['default']} " + f"must be integer for integer parameter {name}" + ) elif param_type == "float": + # check if all keys are allowed + check_allowed_keys( + name, + details, + { + "lower", + "upper", + "type", + "log", + "is_fidelity", + "default", + "default_confidence", + }, + ) # Check Float Parameter if "lower" not in details or "upper" not in details: raise KeyError( @@ -212,18 +237,66 @@ def validate_param_details(name, param_type, details): f"'lower' and 'upper' must be integer for " f"integer parameter '{name}'." ) from e + if "default" in details: + if not isinstance(details["default"], float): + try: + details["default"] = convert_scientific_notation(details["default"]) + except ValueError as e: + raise TypeError( + f" 'default' must be float for float parameter " f"{name} " + ) from e elif param_type in ("cat", "categorical"): + # check if all keys are allowed + check_allowed_keys( + name, + details, + {"choices", "type", "is_fidelity", "default", "default_confidence"}, + ) # Check Categorical parameter if "choices" not in details: raise KeyError(f"Missing key 'choices' for categorical " f"parameter {name}") if not isinstance(details["choices"], (list, tuple)): raise TypeError(f"The 'choices' for '{name}' must be a list or tuple.") - + for i, element in enumerate(details["choices"]): + try: + converted_value, e_flag = convert_scientific_notation( + element, show_usage_flag=True + ) + if e_flag: + details["choices"][ + i + ] = converted_value # Replace the element at the same position + except ValueError: + pass # If a ValueError occurs, simply continue to the next element + if "default" in details: + e_flag = False + try: + # check if e notation, if then convert to number + default, e_flag = convert_scientific_notation( + details["default"], show_usage_flag=True + ) + except ValueError: + pass + if e_flag is True: + details["default"] = default elif param_type in ("const", "constant"): + # check if all keys are allowed + check_allowed_keys(name, details, {"value", "type", "is_fidelity"}) # Check Constant parameter if "value" not in details: raise KeyError(f"Missing key 'value' for constant parameter " f"{name}") + else: + e_flag = False + try: + converted_value, e_flag = convert_scientific_notation( + details["value"], show_usage_flag=True + ) + except ValueError: + pass + if e_flag: + details["value"] = converted_value + else: # Handle unknown parameter types raise TypeError( @@ -235,3 +308,13 @@ def validate_param_details(name, param_type, details): "For constant parameter: const, constant\n" ) return param_type + + +def check_allowed_keys(name, my_dict, allowed_keys): + """ + Checks if all keys in 'my_dict' are contained in the set 'allowed_keys'. + If an unallowed key is found, an exception is raised. + """ + for key in my_dict: + if key not in allowed_keys: + raise KeyError(f"This key is not allowed: '{key}' for parameter '{name}'") diff --git a/tests/test_yaml_search_space/correct_config.yaml b/tests/test_yaml_search_space/correct_config.yaml index 663c34bb..3cc10e1e 100644 --- a/tests/test_yaml_search_space/correct_config.yaml +++ b/tests/test_yaml_search_space/correct_config.yaml @@ -3,6 +3,7 @@ search_space: lower: 0.00001 upper: 0.1 log: TRUE + is_fidelity: off num_epochs: lower: -3 @@ -11,21 +12,20 @@ search_space: is_fidelity: on batch_size: type: int - lower: 10^2 - upper: 3 * 10^4 - log: true + lower: 1E2 + upper: 3e4 + log: on is_fidelity: FALSE sec_learning_rate: lower: 3.3e-5 - upper: 1E-1 - - parameter_ex: - lower: 3.3 x 10^-5 - upper: 3.2*10^1 + upper: 1.5E-1 optimizer: - choices: ["adam", "sgd", "rmsprop"] + choices: [2, "sgd", 10e-3] dropout_rate: value: 0.5 + + constant_value: + value: 1e3 diff --git a/tests/test_yaml_search_space/correct_config_including_priors.yml b/tests/test_yaml_search_space/correct_config_including_priors.yml index 5a4573e9..2b6d2e80 100644 --- a/tests/test_yaml_search_space/correct_config_including_priors.yml +++ b/tests/test_yaml_search_space/correct_config_including_priors.yml @@ -3,20 +3,20 @@ search_space: lower: 0.00001 upper: 0.1 log: true - default: 0.001 + default: 3.3E-2 default_confidence: high num_epochs: lower: 3 upper: 30 is_fidelity: True - default: 10 + default: 1e1 optimizer: - choices: [adam, sgd, rmsprop] - default: sgd + choices: [adam, 90E-3, rmsprop] + default: 90E-3 default_confidence: "medium" dropout_rate: - value: 0.5 + value: 1E3 is_fidelity: true diff --git a/tests/test_yaml_search_space/correct_config_including_types.yaml b/tests/test_yaml_search_space/correct_config_including_types.yaml index 103648dc..6ed51cfe 100644 --- a/tests/test_yaml_search_space/correct_config_including_types.yaml +++ b/tests/test_yaml_search_space/correct_config_including_types.yaml @@ -2,7 +2,7 @@ search_space: learning_rate: type: float lower: 0.00001 - upper: 0.1 + upper: 1e-1 log: true num_epochs: @@ -13,24 +13,19 @@ search_space: batch_size: type: "int" - lower: 10^2 - upper: 3 * 10^4 + lower: 1e2 + upper: 3E4 log: true is_fidelity: false sec_learning_rate: type: "float" lower: 3.3e-5 - upper: 1E-1 - - parameter_ex: - type: float - lower: 3.3 x 10^-5 - upper: 3.2*10^1 + upper: 1.5E-1 optimizer: type: cat - choices: ["adam", "sgd", "rmsprop"] + choices: [2, "sgd", 10E-3] dropout_rate: type: const diff --git a/tests/test_yaml_search_space/not_allowed_key_config.yml b/tests/test_yaml_search_space/not_allowed_key_config.yml new file mode 100644 index 00000000..b6703282 --- /dev/null +++ b/tests/test_yaml_search_space/not_allowed_key_config.yml @@ -0,0 +1,26 @@ +search_space: + float_name1: + lower: 3e-5 + upper: 0.1 + + float_name2: + type: "float" # Optional, as neps infers type from 'lower' and 'upper' + lower: 1.7 + upper: 42.0 + log: true + + categorical_name1: + choices: [0, 1] + + categorical_name2: + type: cat + choices: ["a", "b", "c"] + + integer_name1: + lower: 32 + upper: 128 + fidelity: True # error, fidelity instead of is_fidelity + + integer_name2: + lower: -5 + upper: 5 diff --git a/tests/test_yaml_search_space/test_search_space.py b/tests/test_yaml_search_space/test_search_space.py index efa28e3b..aecf617a 100644 --- a/tests/test_yaml_search_space/test_search_space.py +++ b/tests/test_yaml_search_space/test_search_space.py @@ -36,20 +36,13 @@ def test_correct_yaml_file(path): assert pipeline_space["batch_size"].default_confidence_score == 0.5 assert isinstance(pipeline_space["sec_learning_rate"], FloatParameter) assert pipeline_space["sec_learning_rate"].lower == 3.3e-5 - assert pipeline_space["sec_learning_rate"].upper == 0.1 + assert pipeline_space["sec_learning_rate"].upper == 0.15 assert pipeline_space["sec_learning_rate"].log is False assert pipeline_space["sec_learning_rate"].is_fidelity is False assert pipeline_space["sec_learning_rate"].default is None assert pipeline_space["sec_learning_rate"].default_confidence_score == 0.5 - assert isinstance(pipeline_space["parameter_ex"], FloatParameter) - assert pipeline_space["parameter_ex"].lower == 3.3e-5 - assert pipeline_space["parameter_ex"].upper == 32.0 - assert pipeline_space["parameter_ex"].log is False - assert pipeline_space["parameter_ex"].is_fidelity is False - assert pipeline_space["parameter_ex"].default is None - assert pipeline_space["parameter_ex"].default_confidence_score == 0.5 assert isinstance(pipeline_space["optimizer"], CategoricalParameter) - assert pipeline_space["optimizer"].choices == ["adam", "sgd", "rmsprop"] + assert pipeline_space["optimizer"].choices == [2, "sgd", 10e-3] assert pipeline_space["optimizer"].is_fidelity is False assert pipeline_space["optimizer"].default is None assert pipeline_space["optimizer"].default_confidence_score == 2 @@ -75,7 +68,7 @@ def test_correct_including_priors_yaml_file(): assert pipeline_space["learning_rate"].upper == 0.1 assert pipeline_space["learning_rate"].log is True assert pipeline_space["learning_rate"].is_fidelity is False - assert pipeline_space["learning_rate"].default == 0.001 + assert pipeline_space["learning_rate"].default == 3.3e-2 assert pipeline_space["learning_rate"].default_confidence_score == 0.125 assert isinstance(pipeline_space["num_epochs"], IntegerParameter) assert pipeline_space["num_epochs"].lower == 3 @@ -85,13 +78,13 @@ def test_correct_including_priors_yaml_file(): assert pipeline_space["num_epochs"].default == 10 assert pipeline_space["num_epochs"].default_confidence_score == 0.5 assert isinstance(pipeline_space["optimizer"], CategoricalParameter) - assert pipeline_space["optimizer"].choices == ["adam", "sgd", "rmsprop"] + assert pipeline_space["optimizer"].choices == ["adam", 90e-3, "rmsprop"] assert pipeline_space["optimizer"].is_fidelity is False - assert pipeline_space["optimizer"].default == "sgd" + assert pipeline_space["optimizer"].default == 90e-3 assert pipeline_space["optimizer"].default_confidence_score == 4 assert isinstance(pipeline_space["dropout_rate"], ConstantParameter) - assert pipeline_space["dropout_rate"].value == 0.5 - assert pipeline_space["dropout_rate"].is_fidelity is True + assert pipeline_space["dropout_rate"].value == 1e3 + assert pipeline_space["dropout_rate"].default == 1e3 @pytest.mark.neps_api @@ -141,3 +134,14 @@ def test_yaml_file_including_unkown_types(): "tests/test_yaml_search_space/config_including_unknown_types.yaml" ) assert str(excinfo.value.exception_type == "TypeError") + + +@pytest.mark.neps_api +def test_yaml_file_including_not_allowed_parameter_keys(): + """Test the function with a YAML file that defines an unknown type as an optional + argument""" + with pytest.raises(SearchSpaceFromYamlFileError) as excinfo: + pipeline_space_from_yaml( + "tests/test_yaml_search_space/not_allowed_key_config.yml" + ) + assert str(excinfo.value.exception_type == "KeyError") From 2723c09d63d41371622ae6827c33111bf6766571 Mon Sep 17 00:00:00 2001 From: Daniel <63580393+danrgll@users.noreply.github.com> Date: Wed, 6 Dec 2023 15:04:27 +0100 Subject: [PATCH 14/20] made code more readable for validate parameter inputs + add tests + comment functions --- src/neps/search_spaces/search_space.py | 39 +- .../search_spaces/yaml_search_space_utils.py | 344 ++++++++++-------- .../inconsistent_types_config.yml | 4 +- .../inconsistent_types_config2.yml | 18 + .../test_search_space.py | 5 + 5 files changed, 239 insertions(+), 171 deletions(-) create mode 100644 tests/test_yaml_search_space/inconsistent_types_config2.yml diff --git a/src/neps/search_spaces/search_space.py b/src/neps/search_spaces/search_space.py index 534ca041..92856910 100644 --- a/src/neps/search_spaces/search_space.py +++ b/src/neps/search_spaces/search_space.py @@ -68,35 +68,38 @@ def pipeline_space_from_configspace( def pipeline_space_from_yaml(yaml_file_path): """ - Reads configuration details from a YAML file and creates a dictionary of parameters. + Reads configuration details from a YAML file and constructs a pipeline space + dictionary. - This function parses a YAML file to extract configuration details and organizes them - into a dictionary. Each key in the dictionary corresponds to a parameter name, and - the value is an object representing the parameter configuration. + This function extracts parameter configurations from a YAML file, validating and + translating them into corresponding parameter objects. The resulting dictionary + maps parameter names to their respective configuration objects. Args: - yaml_file_path (str): Path to the YAML file containing configuration details. + yaml_file_path (str): Path to the YAML file containing parameter configurations. Returns: - dict: A dictionary with parameter names as keys and parameter objects as values. + dict: A dictionary where keys are parameter names and values are parameter + objects (like IntegerParameter, FloatParameter, etc.). Raises: - SearchSpaceFromYamlFileError: Wraps and re-raises exceptions (KeyError, TypeError, - ValueError) that occur during the initialization of the search space from the YAML - file. This custom exception class provides additional context about the error, - enhancing diagnostic clarity and simplifying error handling for function callers. - It includes the type of the original exception and a descriptive message, thereby - localizing error handling to this specific function and preventing the propagation - of these generic exceptions. + SearchSpaceFromYamlFileError: This custom exception is raised if there are issues + with the YAML file's format or contents. It encapsulates underlying exceptions + (KeyError, TypeError, ValueError) that occur during the processing of the YAML + file. This approach localizes error handling, providing clearer context and + diagnostics. The raised exception includes the type of the original error and + a descriptive message. Note: - The YAML file must be structured correctly with appropriate keys and values for - each parameter type. The function validates the structure and content of the YAML - file, raising specific errors for missing mandatory configuration details, type - mismatches, and unknown parameter types. + The YAML file should be properly structured with valid keys and values as per the + expected parameter types. The function employs modular validation and type + deduction logic, ensuring each parameter's configuration adheres to expected + formats and constraints. Any deviation results in an appropriately raised error, + which is then captured by SearchSpaceFromYamlFileError for streamlined error + handling. Example: - Given a YAML file 'config.yaml', call the function as follows: + To use this function with a YAML file 'config.yaml', you can do: pipeline_space = pipeline_space_from_yaml('config.yaml') """ try: diff --git a/src/neps/search_spaces/yaml_search_space_utils.py b/src/neps/search_spaces/yaml_search_space_utils.py index afb040f0..b3789842 100644 --- a/src/neps/search_spaces/yaml_search_space_utils.py +++ b/src/neps/search_spaces/yaml_search_space_utils.py @@ -8,7 +8,7 @@ def convert_scientific_notation(value, show_usage_flag=False): e_notation_pattern = r"^-?\d+(\.\d+)?[eE]-?\d+$" - flag = False # Check if e notation was detected + flag = False # Flag if e notation was detected if isinstance(value, str): # Remove all whitespace from the string @@ -76,7 +76,7 @@ def deduce_and_validate_param_type(name, details): # Logic to infer type if not explicitly provided param_type = deduce_param_type(name, details) - # Validate details based on deduced type + # Validate details of a parameter based on (deduced) type validate_param_details(name, param_type, details) return param_type @@ -103,6 +103,8 @@ def deduce_param_type(name, details): Example: param_type = deduce_param_type('example_param', {'lower': 0, 'upper': 10})""" # Logic to deduce type from details + + # check for int and float conditions if "lower" in details and "upper" in details: # Determine if it's an integer or float range parameter if isinstance(details["lower"], int) and isinstance(details["upper"], int): @@ -116,7 +118,7 @@ def deduce_param_type(name, details): details["upper"], flag_upper = convert_scientific_notation( details["upper"], show_usage_flag=True ) - # check if one value is 10^format to convert it to float + # check if one value is e notation and if so convert it to float if flag_lower or flag_upper: param_type = "float" else: @@ -124,9 +126,11 @@ def deduce_param_type(name, details): f"Inconsistent types for 'lower' and 'upper' in '{name}'. " f"Both must be either integers or floats." ) - + # check for categorical condition elif "choices" in details: param_type = "categorical" + + # check for constant condition elif "value" in details: param_type = "constant" else: @@ -142,6 +146,33 @@ def deduce_param_type(name, details): def validate_param_details(name, param_type, details): + """ + Validates the details of a parameter based on its type. + + This function checks the format and type-specific details of a parameter + specified in a YAML file. It ensures that the 'name' of the parameter is a string + and its 'details' are provided as a dictionary. Depending on the parameter type, + it delegates the validation to the appropriate type-specific validation function. + + Parameters: + name (str): The name of the parameter. It should be a string. + param_type (str): The type of the parameter. Supported types are 'int' (or 'integer'), + 'float', 'cat' (or 'categorical'), and 'const' (or 'constant'). + details (dict): The detailed configuration of the parameter, which includes its + attributes like 'lower', 'upper', 'default', etc. + + Raises: + KeyError: If the 'name' is not a string or 'details' is not a dictionary, or if + the necessary keys in the 'details' are missing based on the parameter type. + TypeError: If the 'param_type' is not one of the supported types. + + Returns: + str: The parameter type in lowercase. + + Example Usage: + validate_param_details("learning_rate", "float", {"lower": 0.01, "upper": 0.1, + "default": 0.05}) + """ if not (isinstance(name, str) and isinstance(details, dict)): raise KeyError( f"Invalid format for {name} in YAML file. " @@ -152,155 +183,20 @@ def validate_param_details(name, param_type, details): param_type = param_type.lower() # init parameter by checking type if param_type in ("int", "integer"): - # check if all keys are allowed - check_allowed_keys( - name, - details, - { - "lower", - "upper", - "type", - "log", - "is_fidelity", - "default", - "default_confidence", - }, - ) - # Check Integer Parameter - if "lower" not in details or "upper" not in details: - raise KeyError( - f"Missing 'lower' or 'upper' for integer " f"parameter '{name}'." - ) - if not isinstance(details["lower"], int) or not isinstance(details["upper"], int): - try: - # for numbers like 1e2 and 10^ - lower, flag_lower = convert_scientific_notation( - details["lower"], show_usage_flag=True - ) - upper, flag_upper = convert_scientific_notation( - details["upper"], show_usage_flag=True - ) - # check if one value format is e or 10^ and if its an integer - if flag_lower or flag_upper: - if lower == int(lower) and upper == int(upper): - details["lower"] = int(lower) - details["upper"] = int(upper) - else: - raise ValueError() - else: - raise ValueError() - except ValueError as e: - raise TypeError( - f"'lower' and 'upper' must be integer for " - f"integer parameter '{name}'." - ) from e - if "default" in details: - if not isinstance(details["default"], int): - default = convert_scientific_notation(details["default"]) - if default == int(default): - details["default"] = int(default) - else: - raise TypeError( - f"default value {details['default']} " - f"must be integer for integer parameter {name}" - ) + validate_integer_parameter(name, details) elif param_type == "float": - # check if all keys are allowed - check_allowed_keys( - name, - details, - { - "lower", - "upper", - "type", - "log", - "is_fidelity", - "default", - "default_confidence", - }, - ) - # Check Float Parameter - if "lower" not in details or "upper" not in details: - raise KeyError( - f"Missing key 'lower' or 'upper' for float " f"parameter '{name}'." - ) - if not isinstance(details["lower"], float) or not isinstance( - details["upper"], float - ): - try: - # for numbers like 1e-5 and 10^ - details["lower"] = convert_scientific_notation(details["lower"]) - details["upper"] = convert_scientific_notation(details["upper"]) - except ValueError as e: - raise TypeError( - f"'lower' and 'upper' must be integer for " - f"integer parameter '{name}'." - ) from e - if "default" in details: - if not isinstance(details["default"], float): - try: - details["default"] = convert_scientific_notation(details["default"]) - except ValueError as e: - raise TypeError( - f" 'default' must be float for float parameter " f"{name} " - ) from e + validate_float_parameter(name, details) elif param_type in ("cat", "categorical"): - # check if all keys are allowed - check_allowed_keys( - name, - details, - {"choices", "type", "is_fidelity", "default", "default_confidence"}, - ) - # Check Categorical parameter - if "choices" not in details: - raise KeyError(f"Missing key 'choices' for categorical " f"parameter {name}") - if not isinstance(details["choices"], (list, tuple)): - raise TypeError(f"The 'choices' for '{name}' must be a list or tuple.") - for i, element in enumerate(details["choices"]): - try: - converted_value, e_flag = convert_scientific_notation( - element, show_usage_flag=True - ) - if e_flag: - details["choices"][ - i - ] = converted_value # Replace the element at the same position - except ValueError: - pass # If a ValueError occurs, simply continue to the next element - if "default" in details: - e_flag = False - try: - # check if e notation, if then convert to number - default, e_flag = convert_scientific_notation( - details["default"], show_usage_flag=True - ) - except ValueError: - pass - if e_flag is True: - details["default"] = default - elif param_type in ("const", "constant"): - # check if all keys are allowed - check_allowed_keys(name, details, {"value", "type", "is_fidelity"}) - # Check Constant parameter - if "value" not in details: - raise KeyError(f"Missing key 'value' for constant parameter " f"{name}") - else: - e_flag = False - try: - converted_value, e_flag = convert_scientific_notation( - details["value"], show_usage_flag=True - ) - except ValueError: - pass - if e_flag: - details["value"] = converted_value + validate_categorical_parameter(name, details) + elif param_type in ("const", "constant"): + validate_constant_parameter(name, details) else: # Handle unknown parameter types raise TypeError( - f"Unsupported parameter type{details['type']} for '{name}'.\n" + f"Unsupported parameter type'{details['type']}' for '{name}'.\n" f"Supported Types for argument type are:\n" "For integer parameter: int, integer\n" "For float parameter: float\n" @@ -310,11 +206,157 @@ def validate_param_details(name, param_type, details): return param_type -def check_allowed_keys(name, my_dict, allowed_keys): +def validate_integer_parameter(name, details): + """validate int parameter and convert e notation values to int""" + # check if all keys are allowed to use and if the mandatory ones are provided + check_keys( + name, + details, + {"lower", "upper", "type", "log", "is_fidelity", "default", "default_confidence"}, + {"lower", "upper"}, + ) + + if not isinstance(details["lower"], int) or not isinstance(details["upper"], int): + try: + # for numbers like 1e2 and 10^ + lower, flag_lower = convert_scientific_notation( + details["lower"], show_usage_flag=True + ) + upper, flag_upper = convert_scientific_notation( + details["upper"], show_usage_flag=True + ) + # check if one value format is e notation and if its an integer + if flag_lower or flag_upper: + if lower == int(lower) and upper == int(upper): + details["lower"] = int(lower) + details["upper"] = int(upper) + else: + raise TypeError() + else: + raise TypeError() + except (ValueError, TypeError) as e: + raise TypeError( + f"'lower' and 'upper' must be integer for " f"integer parameter '{name}'." + ) from e + if "default" in details: + if not isinstance(details["default"], int): + try: + # convert value can raise ValueError + default = convert_scientific_notation(details["default"]) + if default == int(default): + details["default"] = int(default) + else: + raise TypeError() # type of value is not int + except (ValueError, TypeError) as e: + raise TypeError( + f"default value {details['default']} " + f"must be integer for integer parameter {name}" + ) from e + + +def validate_float_parameter(name, details): + """validate float parameter and convert e notation values to float""" + # check if all keys are allowed to use and if the mandatory ones are provided + check_keys( + name, + details, + {"lower", "upper", "type", "log", "is_fidelity", "default", "default_confidence"}, + {"lower", "upper"}, + ) + + if not isinstance(details["lower"], float) or not isinstance(details["upper"], float): + try: + # for numbers like 1e-5 and 10^ + details["lower"] = convert_scientific_notation(details["lower"]) + details["upper"] = convert_scientific_notation(details["upper"]) + except ValueError as e: + raise TypeError( + f"'lower' and 'upper' must be integer for " f"integer parameter '{name}'." + ) from e + if "default" in details: + if not isinstance(details["default"], float): + try: + details["default"] = convert_scientific_notation(details["default"]) + except ValueError as e: + raise TypeError( + f" default'{details['default']}' must be float for float " + f"parameter {name} " + ) from e + + +def validate_categorical_parameter(name, details): + """validate categorical parameter and convert e notation values to float""" + # check if all keys are allowed to use and if the mandatory ones are provided + check_keys( + name, + details, + {"choices", "type", "is_fidelity", "default", "default_confidence"}, + {"choices"}, + ) + + if not isinstance(details["choices"], list): + raise TypeError(f"The 'choices' for '{name}' must be a list.") + for i, element in enumerate(details["choices"]): + try: + converted_value, e_flag = convert_scientific_notation( + element, show_usage_flag=True + ) + if e_flag: + details["choices"][ + i + ] = converted_value # Replace the element at the same position + except ValueError: + pass # If a ValueError occurs, simply continue to the next element + if "default" in details: + e_flag = False + try: + # check if e notation, if then convert to number + default, e_flag = convert_scientific_notation( + details["default"], show_usage_flag=True + ) + except ValueError: + pass # if default value is not in a numeric format, Value Error occurs + if e_flag is True: + details["default"] = default + + +def validate_constant_parameter(name, details): + """Validate constant parameter and convert e notation to float""" + # check if all keys are allowed to use and if the mandatory ones are provided + check_keys(name, details, {"value", "type", "is_fidelity"}, {"value"}) + + # check for e notation and convert it to float + e_flag = False + try: + converted_value, e_flag = convert_scientific_notation( + details["value"], show_usage_flag=True + ) + except ValueError: + # if the value is not able to convert to float a ValueError get raised by + # convert_scientific_notation function + pass + if e_flag: + details["value"] = converted_value + + +def check_keys(name, my_dict, allowed_keys, mandatory_keys): """ - Checks if all keys in 'my_dict' are contained in the set 'allowed_keys'. - If an unallowed key is found, an exception is raised. + Checks if all keys in 'my_dict' are contained in the set 'allowed_keys' and + if all keys in 'mandatory_keys' are present in 'my_dict'. + Raises an exception if an unallowed key is found or if a mandatory key is missing. """ - for key in my_dict: - if key not in allowed_keys: - raise KeyError(f"This key is not allowed: '{key}' for parameter '{name}'") + # Check for unallowed keys + unallowed_keys = [key for key in my_dict if key not in allowed_keys] + if unallowed_keys: + unallowed_keys_str = ", ".join(unallowed_keys) + raise KeyError( + f"Unallowed key(s) '{unallowed_keys_str}' found for parameter '" f"{name}'." + ) + + # Check for missing mandatory keys + missing_mandatory_keys = [key for key in mandatory_keys if key not in my_dict] + if missing_mandatory_keys: + missing_keys_str = ", ".join(missing_mandatory_keys) + raise KeyError( + f"Missing mandatory key(s) '{missing_keys_str}' for parameter '" f"{name}'." + ) diff --git a/tests/test_yaml_search_space/inconsistent_types_config.yml b/tests/test_yaml_search_space/inconsistent_types_config.yml index 3d5eb559..5c3182a2 100644 --- a/tests/test_yaml_search_space/inconsistent_types_config.yml +++ b/tests/test_yaml_search_space/inconsistent_types_config.yml @@ -1,7 +1,7 @@ search_space: learning_rate: - lower: "0.00001" # Lower is now a string - upper: 0.1 + lower: "string" # Lower is now a string + upper: 1e3 log: true num_epochs: diff --git a/tests/test_yaml_search_space/inconsistent_types_config2.yml b/tests/test_yaml_search_space/inconsistent_types_config2.yml new file mode 100644 index 00000000..5f205e92 --- /dev/null +++ b/tests/test_yaml_search_space/inconsistent_types_config2.yml @@ -0,0 +1,18 @@ +search_space: + learning_rate: + type: int + lower: 2.3 # float + upper: 1e3 + log: true + + num_epochs: + lower: 3 + upper: 30 + is_fidelity: True + + optimizer: + choices: ["adam", "sgd", "rmsprop"] + + dropout_rate: + value: 0.5 + is_fidelity: True diff --git a/tests/test_yaml_search_space/test_search_space.py b/tests/test_yaml_search_space/test_search_space.py index aecf617a..4624bd78 100644 --- a/tests/test_yaml_search_space/test_search_space.py +++ b/tests/test_yaml_search_space/test_search_space.py @@ -112,6 +112,11 @@ def test_yaml_file_with_inconsistent_types(): "tests/test_yaml_search_space/inconsistent_types_config.yml" ) assert str(excinfo.value.exception_type == "TypeError") + with pytest.raises(SearchSpaceFromYamlFileError) as excinfo: + pipeline_space_from_yaml( + "tests/test_yaml_search_space/inconsistent_types_config2.yml" + ) + assert str(excinfo.value.exception_type == "TypeError") @pytest.mark.neps_api From 15d4cc885e44f673eaaa6fd2f1b1e16ec6ffaae2 Mon Sep 17 00:00:00 2001 From: Daniel <63580393+danrgll@users.noreply.github.com> Date: Wed, 6 Dec 2023 21:59:01 +0100 Subject: [PATCH 15/20] fix naming of parameters in test --- .../correct_config.yaml | 17 ++-- .../correct_config_including_types.yaml | 17 ++-- .../test_search_space.py | 79 ++++++++++--------- 3 files changed, 61 insertions(+), 52 deletions(-) diff --git a/tests/test_yaml_search_space/correct_config.yaml b/tests/test_yaml_search_space/correct_config.yaml index 3cc10e1e..5f0c641c 100644 --- a/tests/test_yaml_search_space/correct_config.yaml +++ b/tests/test_yaml_search_space/correct_config.yaml @@ -1,31 +1,32 @@ search_space: - learning_rate: + param_float1: lower: 0.00001 upper: 0.1 log: TRUE is_fidelity: off - num_epochs: + param_int1: lower: -3 upper: 30 log: false is_fidelity: on - batch_size: + param_int2: type: int lower: 1E2 upper: 3e4 - log: on + log: ON is_fidelity: FALSE - sec_learning_rate: + param_float2: lower: 3.3e-5 upper: 1.5E-1 - optimizer: + param_cat: choices: [2, "sgd", 10e-3] - dropout_rate: + param_const1: value: 0.5 - constant_value: + param_const2: value: 1e3 + is_fidelity: TRUE diff --git a/tests/test_yaml_search_space/correct_config_including_types.yaml b/tests/test_yaml_search_space/correct_config_including_types.yaml index 6ed51cfe..870d9574 100644 --- a/tests/test_yaml_search_space/correct_config_including_types.yaml +++ b/tests/test_yaml_search_space/correct_config_including_types.yaml @@ -1,32 +1,37 @@ search_space: - learning_rate: + param_float1: type: float lower: 0.00001 upper: 1e-1 log: true - num_epochs: + param_int1: type: integer lower: -3 upper: 30 is_fidelity: True - batch_size: + param_int2: type: "int" lower: 1e2 upper: 3E4 log: true is_fidelity: false - sec_learning_rate: + param_float2: type: "float" lower: 3.3e-5 upper: 1.5E-1 - optimizer: + param_cat: type: cat choices: [2, "sgd", 10E-3] - dropout_rate: + param_const1: type: const value: 0.5 + + param_const2: + type: const + value: 1e3 + is_fidelity: true diff --git a/tests/test_yaml_search_space/test_search_space.py b/tests/test_yaml_search_space/test_search_space.py index 4624bd78..5f01118d 100644 --- a/tests/test_yaml_search_space/test_search_space.py +++ b/tests/test_yaml_search_space/test_search_space.py @@ -1,11 +1,11 @@ import pytest - -from neps import CategoricalParameter, ConstantParameter, FloatParameter, IntegerParameter from neps.search_spaces.search_space import ( SearchSpaceFromYamlFileError, pipeline_space_from_yaml, ) +from neps import CategoricalParameter, ConstantParameter, FloatParameter, IntegerParameter + @pytest.mark.neps_api def test_correct_yaml_files(): @@ -13,42 +13,45 @@ def test_correct_yaml_file(path): """Test the function with a correctly formatted YAML file.""" pipeline_space = pipeline_space_from_yaml(path) assert isinstance(pipeline_space, dict) - assert isinstance(pipeline_space["learning_rate"], FloatParameter) - assert pipeline_space["learning_rate"].lower == 0.00001 - assert pipeline_space["learning_rate"].upper == 0.1 - assert pipeline_space["learning_rate"].log is True - assert pipeline_space["optimizer"].is_fidelity is False - assert pipeline_space["learning_rate"].default is None - assert pipeline_space["learning_rate"].default_confidence_score == 0.5 - assert isinstance(pipeline_space["num_epochs"], IntegerParameter) - assert pipeline_space["num_epochs"].lower == -3 - assert pipeline_space["num_epochs"].upper == 30 - assert pipeline_space["num_epochs"].log is False - assert pipeline_space["num_epochs"].is_fidelity is True - assert pipeline_space["num_epochs"].default is None - assert pipeline_space["num_epochs"].default_confidence_score == 0.5 - assert isinstance(pipeline_space["batch_size"], IntegerParameter) - assert pipeline_space["batch_size"].lower == 100 - assert pipeline_space["batch_size"].upper == 30000 - assert pipeline_space["batch_size"].log is True - assert pipeline_space["batch_size"].is_fidelity is False - assert pipeline_space["batch_size"].default is None - assert pipeline_space["batch_size"].default_confidence_score == 0.5 - assert isinstance(pipeline_space["sec_learning_rate"], FloatParameter) - assert pipeline_space["sec_learning_rate"].lower == 3.3e-5 - assert pipeline_space["sec_learning_rate"].upper == 0.15 - assert pipeline_space["sec_learning_rate"].log is False - assert pipeline_space["sec_learning_rate"].is_fidelity is False - assert pipeline_space["sec_learning_rate"].default is None - assert pipeline_space["sec_learning_rate"].default_confidence_score == 0.5 - assert isinstance(pipeline_space["optimizer"], CategoricalParameter) - assert pipeline_space["optimizer"].choices == [2, "sgd", 10e-3] - assert pipeline_space["optimizer"].is_fidelity is False - assert pipeline_space["optimizer"].default is None - assert pipeline_space["optimizer"].default_confidence_score == 2 - assert isinstance(pipeline_space["dropout_rate"], ConstantParameter) - assert pipeline_space["dropout_rate"].value == 0.5 - assert pipeline_space["dropout_rate"].is_fidelity is False + assert isinstance(pipeline_space["param_float1"], FloatParameter) + assert pipeline_space["param_float1"].lower == 0.00001 + assert pipeline_space["param_float1"].upper == 0.1 + assert pipeline_space["param_float1"].log is True + assert pipeline_space["param_float1"].is_fidelity is False + assert pipeline_space["param_float1"].default is None + assert pipeline_space["param_float1"].default_confidence_score == 0.5 + assert isinstance(pipeline_space["param_int1"], IntegerParameter) + assert pipeline_space["param_int1"].lower == -3 + assert pipeline_space["param_int1"].upper == 30 + assert pipeline_space["param_int1"].log is False + assert pipeline_space["param_int1"].is_fidelity is True + assert pipeline_space["param_int1"].default is None + assert pipeline_space["param_int1"].default_confidence_score == 0.5 + assert isinstance(pipeline_space["param_int2"], IntegerParameter) + assert pipeline_space["param_int2"].lower == 100 + assert pipeline_space["param_int2"].upper == 30000 + assert pipeline_space["param_int2"].log is True + assert pipeline_space["param_int2"].is_fidelity is False + assert pipeline_space["param_int2"].default is None + assert pipeline_space["param_int2"].default_confidence_score == 0.5 + assert isinstance(pipeline_space["param_float2"], FloatParameter) + assert pipeline_space["param_float2"].lower == 3.3e-5 + assert pipeline_space["param_float2"].upper == 0.15 + assert pipeline_space["param_float2"].log is False + assert pipeline_space["param_float2"].is_fidelity is False + assert pipeline_space["param_float2"].default is None + assert pipeline_space["param_float2"].default_confidence_score == 0.5 + assert isinstance(pipeline_space["param_cat"], CategoricalParameter) + assert pipeline_space["param_cat"].choices == [2, "sgd", 10e-3] + assert pipeline_space["param_cat"].is_fidelity is False + assert pipeline_space["param_cat"].default is None + assert pipeline_space["param_cat"].default_confidence_score == 2 + assert isinstance(pipeline_space["param_const1"], ConstantParameter) + assert pipeline_space["param_const1"].value == 0.5 + assert pipeline_space["param_const1"].is_fidelity is False + assert isinstance(pipeline_space["param_const2"], ConstantParameter) + assert pipeline_space["param_const2"].value == 1e3 + assert pipeline_space["param_const2"].is_fidelity is True test_correct_yaml_file("tests/test_yaml_search_space/correct_config.yaml") test_correct_yaml_file( From c70b85ee5c65ed178b84e7c11e94ed8110e2b6ab Mon Sep 17 00:00:00 2001 From: Daniel <63580393+danrgll@users.noreply.github.com> Date: Wed, 6 Dec 2023 22:19:35 +0100 Subject: [PATCH 16/20] enable usage of Path object for yaml_file config_space --- neps/api.py | 2 +- neps/search_spaces/search_space.py | 5 +++-- tests/test_yaml_search_space/test_search_space.py | 8 ++++++-- 3 files changed, 10 insertions(+), 5 deletions(-) diff --git a/neps/api.py b/neps/api.py index 5cf870cd..58f3df01 100644 --- a/neps/api.py +++ b/neps/api.py @@ -318,7 +318,7 @@ def _run_args( if isinstance(pipeline_space, CS.ConfigurationSpace): pipeline_space = pipeline_space_from_configspace(pipeline_space) # Support pipeline space as YAML file - elif isinstance(pipeline_space, str): + elif isinstance(pipeline_space, (str, Path)): pipeline_space = pipeline_space_from_yaml(pipeline_space) # Support pipeline space as mix of ConfigurationSpace and neps parameters diff --git a/neps/search_spaces/search_space.py b/neps/search_spaces/search_space.py index 92856910..d43fd684 100644 --- a/neps/search_spaces/search_space.py +++ b/neps/search_spaces/search_space.py @@ -76,7 +76,8 @@ def pipeline_space_from_yaml(yaml_file_path): maps parameter names to their respective configuration objects. Args: - yaml_file_path (str): Path to the YAML file containing parameter configurations. + yaml_file_path (Union[str, Path]): Path to the YAML file containing parameter + configurations. Returns: dict: A dictionary where keys are parameter names and values are parameter @@ -109,7 +110,7 @@ def pipeline_space_from_yaml(yaml_file_path): config = yaml.safe_load(file) except yaml.YAMLError as e: raise ValueError( - f"The file at {yaml_file_path} is not a valid YAML file." + f"The file at {str(yaml_file_path)} is not a valid YAML file." ) from e # check for init key search_space diff --git a/tests/test_yaml_search_space/test_search_space.py b/tests/test_yaml_search_space/test_search_space.py index 5f01118d..f354b128 100644 --- a/tests/test_yaml_search_space/test_search_space.py +++ b/tests/test_yaml_search_space/test_search_space.py @@ -1,3 +1,5 @@ +from pathlib import Path + import pytest from neps.search_spaces.search_space import ( SearchSpaceFromYamlFileError, @@ -94,7 +96,9 @@ def test_correct_including_priors_yaml_file(): def test_incorrect_yaml_file(): """Test the function with an incorrectly formatted YAML file.""" with pytest.raises(SearchSpaceFromYamlFileError) as excinfo: - pipeline_space_from_yaml("tests/test_yaml_search_space/incorrect_config.txt") + pipeline_space_from_yaml( + Path("tests/test_yaml_search_space/incorrect_config.txt") + ) assert str(excinfo.value.exception_type == "ValueError") @@ -117,7 +121,7 @@ def test_yaml_file_with_inconsistent_types(): assert str(excinfo.value.exception_type == "TypeError") with pytest.raises(SearchSpaceFromYamlFileError) as excinfo: pipeline_space_from_yaml( - "tests/test_yaml_search_space/inconsistent_types_config2.yml" + Path("tests/test_yaml_search_space/inconsistent_types_config2.yml") ) assert str(excinfo.value.exception_type == "TypeError") From 91d8a457db76cee1970fbabd6941325c656b758e Mon Sep 17 00:00:00 2001 From: Daniel <63580393+danrgll@users.noreply.github.com> Date: Thu, 7 Dec 2023 00:08:25 +0100 Subject: [PATCH 17/20] add type specification for arguments + add more detailed DocStrings for paramter validation functions --- neps/search_spaces/search_space.py | 9 +- neps/search_spaces/yaml_search_space_utils.py | 165 ++++++++++++++---- 2 files changed, 140 insertions(+), 34 deletions(-) diff --git a/neps/search_spaces/search_space.py b/neps/search_spaces/search_space.py index d43fd684..bcf5b18b 100644 --- a/neps/search_spaces/search_space.py +++ b/neps/search_spaces/search_space.py @@ -6,6 +6,7 @@ from collections import OrderedDict from copy import deepcopy from itertools import product +from pathlib import Path import ConfigSpace as CS import numpy as np @@ -66,7 +67,11 @@ def pipeline_space_from_configspace( return pipeline_space -def pipeline_space_from_yaml(yaml_file_path): +def pipeline_space_from_yaml( + yaml_file_path: str | Path, +) -> dict[ + str, FloatParameter | IntegerParameter | CategoricalParameter | ConstantParameter +]: """ Reads configuration details from a YAML file and constructs a pipeline space dictionary. @@ -76,7 +81,7 @@ def pipeline_space_from_yaml(yaml_file_path): maps parameter names to their respective configuration objects. Args: - yaml_file_path (Union[str, Path]): Path to the YAML file containing parameter + yaml_file_path (str | Path): Path to the YAML file containing parameter configurations. Returns: diff --git a/neps/search_spaces/yaml_search_space_utils.py b/neps/search_spaces/yaml_search_space_utils.py index b3789842..fe278dd6 100644 --- a/neps/search_spaces/yaml_search_space_utils.py +++ b/neps/search_spaces/yaml_search_space_utils.py @@ -1,10 +1,36 @@ +from __future__ import annotations + import re -def convert_scientific_notation(value, show_usage_flag=False): - """Check if the value is a string that matches scientific e notation and convert it - to float. (specially numbers like 3.3e-5 with a float value in front, which yaml - can not interpret directly as float).""" +def convert_scientific_notation( + value: str | int | float, show_usage_flag=False +) -> float | (float, bool): + """ + Convert a given value to a float if it's a string that matches scientific e notation. + This is especially useful for numbers like "3.3e-5" which YAML parsers may not + directly interpret as floats. + + If the 'show_usage_flag' is set to True, the function returns a tuple of the float + conversion and a boolean flag indicating whether scientific notation was detected. + + Args: + value (str | int | float): The value to convert. Can be an integer, float, + or a string representing a number, possibly in + scientific notation. + show_usage_flag (bool): Optional; defaults to False. If True, the function + also returns a flag indicating whether scientific + notation was detected in the string. + + Returns: + float: The value converted to float if 'show_usage_flag' is False. + (float, bool): A tuple containing the value converted to float and a flag + indicating scientific notation detection if 'show_usage_flag' + is True. + + Raises: + ValueError: If the value is a string and does not represent a valid number. + """ e_notation_pattern = r"^-?\d+(\.\d+)?[eE]-?\d+$" @@ -54,7 +80,9 @@ def __init__(self, exception): super().__init__(self.message) -def deduce_and_validate_param_type(name, details): +def deduce_and_validate_param_type( + name: str, details: dict[str, str | int | float] +) -> str: """ Deduces the parameter type from details and validates them. @@ -82,16 +110,18 @@ def deduce_and_validate_param_type(name, details): return param_type -def deduce_param_type(name, details): +def deduce_param_type(name: str, details: dict[str, int | str | float]) -> str: """Deduces the parameter type based on the provided details. - This function analyzes the provided details dictionary to determine the type of - parameter. It supports identifying integer, float, categorical, and constant - parameter types. + The function interprets the 'details' dictionary to determine the parameter type. + The dictionary should include key-value pairs that describe the parameter's + characteristics, such as lower, upper, default value, or possible choices. + Args: name (str): The name of the parameter. - details (dict): A dictionary containing parameter specifications. + details ((dict[str, int | str | float])): A dictionary containing parameter + specifications. Returns: str: The deduced parameter type ('int', 'float', 'categorical', or 'constant'). @@ -112,12 +142,19 @@ def deduce_param_type(name, details): elif isinstance(details["lower"], float) and isinstance(details["upper"], float): param_type = "float" else: - details["lower"], flag_lower = convert_scientific_notation( - details["lower"], show_usage_flag=True - ) - details["upper"], flag_upper = convert_scientific_notation( - details["upper"], show_usage_flag=True - ) + try: + details["lower"], flag_lower = convert_scientific_notation( + details["lower"], show_usage_flag=True + ) + details["upper"], flag_upper = convert_scientific_notation( + details["upper"], show_usage_flag=True + ) + except ValueError as e: + raise TypeError( + f"Inconsistent types for 'lower' and 'upper' in '{name}'. " + f"Both must be either integers or floats." + ) from e + # check if one value is e notation and if so convert it to float if flag_lower or flag_upper: param_type = "float" @@ -145,7 +182,9 @@ def deduce_param_type(name, details): return param_type -def validate_param_details(name, param_type, details): +def validate_param_details( + name: str, param_type: str, details: dict[str, int | str | float] +): """ Validates the details of a parameter based on its type. @@ -166,9 +205,6 @@ def validate_param_details(name, param_type, details): the necessary keys in the 'details' are missing based on the parameter type. TypeError: If the 'param_type' is not one of the supported types. - Returns: - str: The parameter type in lowercase. - Example Usage: validate_param_details("learning_rate", "float", {"lower": 0.01, "upper": 0.1, "default": 0.05}) @@ -203,11 +239,28 @@ def validate_param_details(name, param_type, details): "For categorical parameter: cat, categorical\n" "For constant parameter: const, constant\n" ) - return param_type -def validate_integer_parameter(name, details): - """validate int parameter and convert e notation values to int""" +def validate_integer_parameter(name: str, details: dict[str, str | int | float]): + """ + Validates and processes an integer parameter's details, converting scientific + notation to integers where necessary. + + This function checks the type of 'lower' and 'upper', and the 'default' + value (if present) for an integer parameter. It also handles conversion of values + in scientific notation (e.g., 1e2) to integers. + + Args: + name (str): The name of the integer parameter. + details (dict[str, str | int | float]): A dictionary containing the parameter's + specifications. Expected keys include + 'lower', 'upper', and optionally 'default', + among others. + + Raises: + TypeError: If 'lower', 'upper', or 'default' are not valid integers or cannot + be converted from scientific notation to integers. + """ # check if all keys are allowed to use and if the mandatory ones are provided check_keys( name, @@ -254,8 +307,24 @@ def validate_integer_parameter(name, details): ) from e -def validate_float_parameter(name, details): - """validate float parameter and convert e notation values to float""" +def validate_float_parameter(name: str, details: dict[str, str | int | float]): + """ + Validates and processes a float parameter's details, converting scientific + notation values to float where necessary. + + This function checks the type of 'lower' and 'upper', and the 'default' + value (if present) for a float parameter. It handles conversion of values in + scientific notation (e.g., 1e-5) to float. + + Args: + name: The name of the float parameter. + details: A dictionary containing the parameter's specifications. Expected keys + include 'lower', 'upper', and optionally 'default', among others. + + Raises: + TypeError: If 'lower', 'upper', or 'default' are not valid floats or cannot + be converted from scientific notation to floats. + """ # check if all keys are allowed to use and if the mandatory ones are provided check_keys( name, @@ -284,8 +353,23 @@ def validate_float_parameter(name, details): ) from e -def validate_categorical_parameter(name, details): - """validate categorical parameter and convert e notation values to float""" +def validate_categorical_parameter(name: str, details: dict[str, str | int | float]): + """ + Validates a categorical parameter, including conversion of scientific notation + values to floats within the choices. + + This function ensures that the 'choices' key in the details is a list and attempts + to convert any elements in scientific notation to floats. It also handles the + 'default' value, converting it from scientific notation if necessary. + + Args: + name: The name of the categorical parameter. + details: A dictionary containing the parameter's specifications. Required key + is 'choices', with 'default' being optional. + + Raises: + TypeError: If 'choices' is not a list + """ # check if all keys are allowed to use and if the mandatory ones are provided check_keys( name, @@ -320,8 +404,20 @@ def validate_categorical_parameter(name, details): details["default"] = default -def validate_constant_parameter(name, details): - """Validate constant parameter and convert e notation to float""" +def validate_constant_parameter(name: str, details: dict[str, str | int | float]): + """ + Validates a constant parameter, including conversion of values in scientific + notation to floats. + + This function checks the 'value' key in the details dictionary and converts any + value expressed in scientific notation to a float. It ensures that the mandatory + 'value' key is provided and appropriately formatted. + + Args: + name: The name of the constant parameter. + details: A dictionary containing the parameter's specifications. The required + key is 'value'. + """ # check if all keys are allowed to use and if the mandatory ones are provided check_keys(name, details, {"value", "type", "is_fidelity"}, {"value"}) @@ -339,14 +435,19 @@ def validate_constant_parameter(name, details): details["value"] = converted_value -def check_keys(name, my_dict, allowed_keys, mandatory_keys): +def check_keys( + name: str, + details: dict[str, str | int | float], + allowed_keys: set, + mandatory_keys: set, +): """ Checks if all keys in 'my_dict' are contained in the set 'allowed_keys' and if all keys in 'mandatory_keys' are present in 'my_dict'. Raises an exception if an unallowed key is found or if a mandatory key is missing. """ # Check for unallowed keys - unallowed_keys = [key for key in my_dict if key not in allowed_keys] + unallowed_keys = [key for key in details if key not in allowed_keys] if unallowed_keys: unallowed_keys_str = ", ".join(unallowed_keys) raise KeyError( @@ -354,7 +455,7 @@ def check_keys(name, my_dict, allowed_keys, mandatory_keys): ) # Check for missing mandatory keys - missing_mandatory_keys = [key for key in mandatory_keys if key not in my_dict] + missing_mandatory_keys = [key for key in mandatory_keys if key not in details] if missing_mandatory_keys: missing_keys_str = ", ".join(missing_mandatory_keys) raise KeyError( From 5d116390b8c616a37bf6bab97c3944359c45cece Mon Sep 17 00:00:00 2001 From: Daniel <63580393+danrgll@users.noreply.github.com> Date: Thu, 7 Dec 2023 20:13:48 +0100 Subject: [PATCH 18/20] fix format of Pipeline Space Documentation for mkdocs --- docs/pipeline_space.md | 112 ++++++++++++++++++++--------------------- neps/api.py | 7 +-- 2 files changed, 58 insertions(+), 61 deletions(-) diff --git a/docs/pipeline_space.md b/docs/pipeline_space.md index 8474dfcf..1ae2f63d 100644 --- a/docs/pipeline_space.md +++ b/docs/pipeline_space.md @@ -25,7 +25,7 @@ Create a YAML file (e.g., search_space.yaml) with the parameter definitions foll ```yaml search_space: # important to start with learning_rate: - lower: 2e-3 # or 2*10^-3 + lower: 2e-3 upper: 0.1 log: true @@ -63,90 +63,90 @@ configspace.add_hyperparameter( For additional information on ConfigSpace and its features, please visit the following link: https://github.com/automl/ConfigSpace -# Supported HyperParameter Types using a YAML File +## Supported Hyperparameter Types using a YAML File -### FloatParameter and IntegerParameter +### Float/Integer Parameter - **Expected Arguments:** - - `lower`: The minimum value of the parameter. - - `upper`: The maximum value of the parameter. - - **Accepted Values:** Int or Float depending on the specific parameter type one wishes to use. + - `lower`: The minimum value of the parameter. + - `upper`: The maximum value of the parameter. + - Accepted Values: Int or Float depending on the specific parameter type one wishes to use. - **Optional Arguments:** - - `type`: Specifies the data type of the parameter. - - **Accepted Values**: 'int', 'integer', or 'float'. - - **Note:** If type is not specified e notation gets converted to float - - `log`: Boolean that indicates if the parameter uses a logarithmic scale (default: False) - - [Details on how YAML interpret Boolean Values](#important-note-on-yaml-string-and-boolean-interpretation) - - `is_fidelity`: Boolean that marks the parameter as a fidelity parameter (default: False). - - `default`: Sets a prior central value for the parameter (default: None). - - **Note:** Currently, if you define a prior for one parameter, you must do so for all your variables. - - `default_confidence`: Specifies the confidence level of the default value, - indicating how strongly the prior - should be considered (default: "low"). - - **Accepted Values**: 'low', 'medium', or 'high'. + - `type`: Specifies the data type of the parameter. + - Accepted Values: 'int', 'integer', or 'float'. + - Note: If type is not specified e notation gets converted to float + - `log`: Boolean that indicates if the parameter uses a logarithmic scale (default: False) + - [Details on how YAML interpret Boolean Values](#important-note-on-yaml-string-and-boolean-interpretation) + - `is_fidelity`: Boolean that marks the parameter as a fidelity parameter (default: False). + - `default`: Sets a prior central value for the parameter (default: None). + - Note: Currently, if you define a prior for one parameter, you must do so for all your variables. + - `default_confidence`: Specifies the confidence level of the default value, + indicating how strongly the prior + should be considered (default: "low"). + - Accepted Values: 'low', 'medium', or 'high'. ### Categorical Parameter - **Expected Arguments:** - - `choices`: A list of discrete options(int | float | str) that the parameter can take. + - `choices`: A list of discrete options(int | float | str) that the parameter can take. - **Optional Arguments:** - - `type`: Specifies the data type of the parameter. - - Accepted Values: 'cat' or 'categorical'. - - `is_fidelity`: Marks the parameter as a fidelity parameter (default: False). - - [Details on how YAML interpret Boolean Values](#important-note-on-yaml-string-and-boolean-interpretation) - - `default`: Sets a prior central value for the parameter (default: None). - - **Note:** Currently, if you define a prior for one parameter, you must do so for all your variables. - - `default_confidence`: Specifies the confidence level of the default value, - indicating how strongly the prior - should be considered (default: "low"). - -### ConstantParameter + - `type`: Specifies the data type of the parameter. + - Accepted Values: 'cat' or 'categorical'. + - `is_fidelity`: Marks the parameter as a fidelity parameter (default: False). + - [Details on how YAML interpret Boolean Values](#important-note-on-yaml-string-and-boolean-interpretation) + - `default`: Sets a prior central value for the parameter (default: None). + - Note: Currently, if you define a prior for one parameter, you must do so for all your variables. + - `default_confidence`: Specifies the confidence level of the default value, + indicating how strongly the prior + should be considered (default: "low"). + +### Constant Parameter - **Expected Arguments:** - - `value`: The fixed value(int | float | str) for the parameter. + - `value`: The fixed value(int | float | str) for the parameter. - **Optional Arguments:** - - `type`: Specifies the data type of the parameter. - - Accepted Values: 'const' or 'constant'. - - `is_fidelity`: Marks the parameter as a fidelity parameter (default: False). + - `type`: Specifies the data type of the parameter. + - Accepted Values: 'const' or 'constant'. + - `is_fidelity`: Marks the parameter as a fidelity parameter (default: False). -## Important Note on YAML Data Type Interpretation +### Important Note on YAML Data Type Interpretation When working with YAML files, it's essential to understand how the format interprets different data types: 1. **Strings in Quotes:** - - Any value enclosed in single (`'`) or double (`"`) quotes is treated as a string. - - Example: `"true"`, `'123'` are read as strings. + - Any value enclosed in single (`'`) or double (`"`) quotes is treated as a string. + - Example: `"true"`, `'123'` are read as strings. -1. **Boolean Interpretation:** +2. **Boolean Interpretation:** - - Specific unquoted values are interpreted as booleans. This includes: - - `true`, `True`, `TRUE` - - `false`, `False`, `FALSE` - - `on`, `On`, `ON` - - `off`, `Off`, `OFF` - - `yes`, `Yes`, `YES` - - `no`, `No`, `NO` + - Specific unquoted values are interpreted as booleans. This includes: + - `true`, `True`, `TRUE` + - `false`, `False`, `FALSE` + - `on`, `On`, `ON` + - `off`, `Off`, `OFF` + - `yes`, `Yes`, `YES` + - `no`, `No`, `NO` -1. **Numbers:** +3. **Numbers:** - - Unquoted numeric values are interpreted as integers or floating-point numbers, depending on their format. - - Example: `123` is an integer, `4.56` is a float, `1e3` can be either an integer or a floating-point number, - depending on the type specified by the user. By default, 1e3 is treated as a floating-point number. - This interpretation is unique to our system. + - Unquoted numeric values are interpreted as integers or floating-point numbers, depending on their format. + - Example: `123` is an integer, `4.56` is a float, `1e3` can be either an integer or a floating-point number, + depending on the type specified by the user. By default, 1e3 is treated as a floating-point number. + This interpretation is unique to our system. -1. **Empty Strings:** +4. **Empty Strings:** - - An empty string `""` or a key with no value is always treated as `null` in YAML. + - An empty string `""` or a key with no value is always treated as `null` in YAML. -1. **Unquoted Non-Boolean, Non-Numeric Strings:** +5. **Unquoted Non-Boolean, Non-Numeric Strings:** - - Unquoted values that don't match boolean patterns or numeric formats are treated as strings. - - Example: `example` is a string. + - Unquoted values that don't match boolean patterns or numeric formats are treated as strings. + - Example: `example` is a string. Remember to use appropriate quotes and formats to ensure values are interpreted as intended. -# Supported ArchitectureParameter Types +## Supported ArchitectureParameter Types **Note**: The definition of Search Space from a YAML file is limited to supporting only Hyperparameter Types. diff --git a/neps/api.py b/neps/api.py index 58f3df01..d035a5b5 100644 --- a/neps/api.py +++ b/neps/api.py @@ -98,11 +98,8 @@ def write_loss_and_config(file_handle, loss_, config_id_, config_): def run( run_pipeline: Callable, root_directory: str | Path, - pipeline_space: dict[str, Parameter | CS.ConfigurationSpace] - | str - | Path - | CS.ConfigurationSpace - | None = None, + pipeline_space: dict[str, Parameter | CS.ConfigurationSpace] | str | Path | + CS.ConfigurationSpace | None = None, overwrite_working_directory: bool = False, post_run_summary: bool = False, development_stage_id=None, From 075c79b67944a881b6153d2f1e79ac4633ef73ae Mon Sep 17 00:00:00 2001 From: Daniel <63580393+danrgll@users.noreply.github.com> Date: Fri, 8 Dec 2023 12:44:43 +0100 Subject: [PATCH 19/20] fix test cases --- neps/search_spaces/yaml_search_space_utils.py | 7 +++---- tests/test_yaml_search_space/test_search_space.py | 12 ++++++------ 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/neps/search_spaces/yaml_search_space_utils.py b/neps/search_spaces/yaml_search_space_utils.py index fe278dd6..e0efe616 100644 --- a/neps/search_spaces/yaml_search_space_utils.py +++ b/neps/search_spaces/yaml_search_space_utils.py @@ -3,9 +3,8 @@ import re -def convert_scientific_notation( - value: str | int | float, show_usage_flag=False -) -> float | (float, bool): +def convert_scientific_notation(value: str | int | float, show_usage_flag=False) \ + -> float | (float, bool): """ Convert a given value to a float if it's a string that matches scientific e notation. This is especially useful for numbers like "3.3e-5" which YAML parsers may not @@ -171,7 +170,7 @@ def deduce_param_type(name: str, details: dict[str, int | str | float]) -> str: elif "value" in details: param_type = "constant" else: - raise TypeError( + raise KeyError( f"Unable to deduce parameter type from {name} " f"with details {details}\n" "Supported parameters:\n" diff --git a/tests/test_yaml_search_space/test_search_space.py b/tests/test_yaml_search_space/test_search_space.py index f354b128..c5cfed06 100644 --- a/tests/test_yaml_search_space/test_search_space.py +++ b/tests/test_yaml_search_space/test_search_space.py @@ -99,7 +99,7 @@ def test_incorrect_yaml_file(): pipeline_space_from_yaml( Path("tests/test_yaml_search_space/incorrect_config.txt") ) - assert str(excinfo.value.exception_type == "ValueError") + assert excinfo.value.exception_type == "ValueError" @pytest.mark.neps_api @@ -107,7 +107,7 @@ def test_yaml_file_with_missing_key(): """Test the function with a YAML file missing a required key.""" with pytest.raises(SearchSpaceFromYamlFileError) as excinfo: pipeline_space_from_yaml("tests/test_yaml_search_space/missing_key_config.yml") - assert str(excinfo.value.exception_type == "KeyError") + assert excinfo.value.exception_type == "KeyError" @pytest.mark.neps_api @@ -123,7 +123,7 @@ def test_yaml_file_with_inconsistent_types(): pipeline_space_from_yaml( Path("tests/test_yaml_search_space/inconsistent_types_config2.yml") ) - assert str(excinfo.value.exception_type == "TypeError") + assert excinfo.value.exception_type == "TypeError" @pytest.mark.neps_api @@ -134,7 +134,7 @@ def test_yaml_file_including_wrong_types(): pipeline_space_from_yaml( "tests/test_yaml_search_space/config_including_wrong_types.yaml" ) - assert str(excinfo.value.exception_type == "TypeError") + assert excinfo.value.exception_type == "TypeError" @pytest.mark.neps_api @@ -145,7 +145,7 @@ def test_yaml_file_including_unkown_types(): pipeline_space_from_yaml( "tests/test_yaml_search_space/config_including_unknown_types.yaml" ) - assert str(excinfo.value.exception_type == "TypeError") + assert excinfo.value.exception_type == "TypeError" @pytest.mark.neps_api @@ -156,4 +156,4 @@ def test_yaml_file_including_not_allowed_parameter_keys(): pipeline_space_from_yaml( "tests/test_yaml_search_space/not_allowed_key_config.yml" ) - assert str(excinfo.value.exception_type == "KeyError") + assert excinfo.value.exception_type == "KeyError" From 48f8df1ad7e144b2be957bbf20bff46fab2a1793 Mon Sep 17 00:00:00 2001 From: Daniel <63580393+danrgll@users.noreply.github.com> Date: Thu, 28 Dec 2023 14:46:39 +0100 Subject: [PATCH 20/20] fix issue #44 --- neps/search_spaces/search_space.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/neps/search_spaces/search_space.py b/neps/search_spaces/search_space.py index bcf5b18b..8f7335ed 100644 --- a/neps/search_spaces/search_space.py +++ b/neps/search_spaces/search_space.py @@ -204,8 +204,13 @@ def __init__(self, **hyperparameters): ) self.fidelity = hyperparameter - # Check if defaults exists to construct prior from - if hasattr(hyperparameter, "default") and hyperparameter.default is not None: + # Check if defaults exists to construct prior from, except of + # ConstantParameter because default gets init always by the given value + if ( + hasattr(hyperparameter, "default") + and hyperparameter.default is not None + and not isinstance(hyperparameter, ConstantParameter) + ): self.has_prior = True elif hasattr(hyperparameter, "has_prior") and hyperparameter.has_prior: self.has_prior = True