Skip to content

Commit

Permalink
rework of some of the code + comment and doc string changes
Browse files Browse the repository at this point in the history
  • Loading branch information
danrgll committed Mar 21, 2024
1 parent 6f764e7 commit 701ef7a
Show file tree
Hide file tree
Showing 2 changed files with 110 additions and 135 deletions.
239 changes: 107 additions & 132 deletions neps/utils/run_args_from_yaml.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

logger = logging.getLogger("neps")

# Define the name of the arguments as variable for easier code maintenance
# Define the name of the arguments as variables for easier code maintenance
RUN_PIPELINE = "run_pipeline"
PIPELINE_SPACE = "pipeline_space"
ROOT_DIRECTORY = "root_directory"
Expand All @@ -27,54 +27,37 @@
SEARCHER_KWARGS = "searcher_kwargs"
MAX_EVALUATIONS_PER_RUN = "max_evaluations_per_run"

EXPECTED_PARAMETERS = [
ROOT_DIRECTORY,
MAX_EVALUATIONS_TOTAL,
MAX_COST_TOTAL,
OVERWRITE_WORKING_DIRECTORY,
POST_RUN_SUMMARY,
DEVELOPMENT_STAGE_ID,
TASK_ID,
MAX_EVALUATIONS_PER_RUN,
CONTINUE_UNTIL_MAX_EVALUATION_COMPLETED,
LOSS_VALUE_ON_ERROR,
COST_VALUE_ON_ERROR,
IGNORE_ERROR,
SEARCHER_PATH,
]


def get_run_args_from_yaml(path):
"""
Loads NEPS optimization settings from a YAML file and validates them.
Load and validate NEPS run arguments from a specified YAML configuration file
provided via run_args.
Ensures all settings are valid NEPS.run() arguments with correct structure. Extracts
special configurations like 'searcher_kwargs', 'run_pipeline', and 'pre_load_hooks',
converting them as necessary. Raises an error for unrecognized parameters or incorrect
file structure.
This function reads a YAML file, extracts the arguments required by NEPS,
validates these arguments, and then returns them in a dictionary. It checks for the
presence and validity of expected parameters, and distinctively handles more complex
configurations, specifically those that are dictionaries(e.g. pipeline_space) or
objects(e.g. run_pipeline) requiring loading.
Args:
path (str): Path to the YAML configuration file.
Parameters:
path (str): The file path to the YAML configuration file.
Returns:
dict: Validated and processed run arguments.
A dictionary of validated run arguments.
Raises:
KeyError: For invalid parameters or missing 'path'/'name' in special
configurations.
KeyError: If any parameter name is invalid.
"""
# Load the YAML configuration file
config = config_loader(path)

# Initialize an empty dictionary to hold the extracted settings
settings = {}

# Define the allowed parameters based on the arguments of neps.run(), that have a
# simple value like a string or int type
# [searcher_kwargs, run_pipeline, preload_hooks, pipeline_space, searcher] require
# special handling due to their
# values necessitating distinct treatment, that's why they are not listed
EXPECTED_PARAMETERS = [
# List allowed NEPS run arguments with simple types (e.g., string, int). Parameters
# like 'searcher_kwargs', 'run_pipeline', 'preload_hooks', 'pipeline_space',
# and 'searcher' are excluded due to needing specialized processing.
expected_parameters = [
ROOT_DIRECTORY,
MAX_EVALUATIONS_TOTAL,
MAX_COST_TOTAL,
Expand All @@ -90,20 +73,22 @@ def get_run_args_from_yaml(path):
SEARCHER_PATH,
]

# Flatten yaml file and ignore hierarchical structure, only consider parameters(keys)
# with an explicit value
# Flatten the YAML file's structure to separate flat parameters (flat_config) and
# those needing special handling (special_configs).
flat_config, special_configs = extract_leaf_keys(config)

# Check if flatten dict just contains neps arguments
# Check if flatten dict (flat_config) just contains the expected parameters
for parameter, value in flat_config.items():
if parameter in EXPECTED_PARAMETERS:
if parameter in expected_parameters:
settings[parameter] = value
else:
raise KeyError(f"Parameter '{parameter}' is not an argument of neps.run().")

# Process complex configurations (e.g., 'pipeline_space', 'searcher') and integrate
# them into 'settings'.
handle_special_argument_cases(settings, special_configs)

# check if arguments have legal types of provided
# check if all provided arguments have legal types
check_run_args(settings)

logger.debug(
Expand Down Expand Up @@ -138,10 +123,8 @@ def config_loader(path):
def extract_leaf_keys(d, special_keys=None):
"""
Recursive function to extract leaf keys and their values from a nested dictionary.
Special keys ('searcher_kwargs', 'run_pipeline', 'pre_load_hooks') are also
extracted if present and their corresponding values (dict) at any level in the
nested
structure.
Special keys (e.g. 'searcher_kwargs', 'run_pipeline') are also extracted if present
and their corresponding values (dict) at any level in the nested structure.
:param d: The dictionary to extract values from.
:param special_keys: A dictionary to store values of special keys.
Expand Down Expand Up @@ -183,126 +166,119 @@ def handle_special_argument_cases(settings, special_configs):
Parameters:
- settings (dict): The dictionary to be updated with processed configurations.
- special_configs (dict): A dictionary containing configuration keys and values
that require special handling.
that require special processing.
Returns:
- None: The function modifies 'settings' in place.
Raises:
- KeyError: If essential keys are missing for loading functions.
"""
# handle arguments that differ from a simple/single value/str
process_config_key(settings, special_configs, PIPELINE_SPACE)
process_config_key(settings, special_configs, SEARCHER)
# Load the value of each key from a dictionary specifying "path" and "name".
process_config_key(settings, special_configs, [PIPELINE_SPACE, SEARCHER,
RUN_PIPELINE])

# handle arguments that differ from a simple/single value/str
if special_configs[SEARCHER_KWARGS] is not None:
configs = {}

# important for handling yaml configs that provide the key but not a value in it,
# increase usability of a yaml template where just values but not the keys have
# to be removed
# Check if values of keys is not None and then add the dict to settings
# Increase flexibility to let value of a key in yaml file empty
for key, value in special_configs[SEARCHER_KWARGS].items():
if value is not None:
configs[key] = value
if len(configs) != 0:
settings[SEARCHER_KWARGS] = configs

if special_configs[RUN_PIPELINE] is not None:
run_pipeline_dict = special_configs[RUN_PIPELINE]
# load function via path and name
try:
func = load_and_return_function(
run_pipeline_dict["path"], run_pipeline_dict["name"]
)
settings[RUN_PIPELINE] = func
except KeyError as e:
raise KeyError(
f"Missing key for argument run_pipeline: {e}. Expect 'path "
f"and 'name' as keys when loading 'run_pipeline' "
f"from 'run_args'"
) from e

if special_configs[PRE_LOAD_HOOKS] is not None:
# Loads functions and returns them in a list.
# Loads the pre_load_hooks functions and add them in a list to settings.
settings[PRE_LOAD_HOOKS] = load_hooks_from_config(
special_configs[PRE_LOAD_HOOKS]
)


def process_config_key(settings, special_configs, key):
"""Process a specific key from 'special_configs' and update 'settings' accordingly.
This function expects 'special_configs[key]' to either be a string or a dictionary.
If it's a string, it's simply added to 'settings'. If it's a dictionary, the function
will attempt to load another function or object specified by the 'path' and 'name'
keys within the dictionary.
def process_config_key(settings, special_configs, keys):
"""
Enhance 'settings' by adding keys and their corresponding values or loaded objects
from 'special_configs'. Keys in 'special_configs' are processed to directly insert
their values into 'settings' or to load functions/objects using 'path' and 'name'.
Key handling varies: 'RUN_PIPELINE' requires a dictionary defining a loadable function
, whereas other keys may accept either strings or dictionaries
Parameters:
- settings (dict): The dictionary to be updated with processed configurations.
- special_configs (dict): A dictionary containing configuration keys and values.
- key (str): The key in 'special_configs' that needs to be processed.
- settings (dict): Dictionary to update.
- special_configs (dict): Contains keys and values for processing.
- keys (list): List of keys to process in 'special_configs'.
Raises:
- KeyError: If the expected keys ('path' and 'name') are missing in the value
dictionary.
- TypeError: If the value associated with 'key' is neither a string nor a dictionary.
"""
if special_configs.get(key) is not None:
value = special_configs[key]
if isinstance(value, str):
settings[key] = value
elif isinstance(value, dict):
try:
func = load_and_return_function(value["path"], value["name"])
settings[key] = func
except KeyError as e:
raise KeyError(
f"Missing key for argument {key}: {e}. Expect 'path' "
f"and 'name' as keys when loading '{key}' "
f"from 'run_args'"
) from e
else:
raise TypeError(f"Value for {key} must be a string or a dictionary, but "
f"got {type(value).__name__}.")


def load_and_return_function(module_path, function_name):
- KeyError: Missing 'path'/'name' for dictionaries.
- TypeError: Incorrect type for key's value; RUN_PIPELINE must be a dict,
others can be dict or string.
"""
for key in keys:
if special_configs.get(key) is not None:
value = special_configs[key]
if isinstance(value, str) and key != RUN_PIPELINE:
# pipeline_space and searcher can also be a string
settings[key] = value
elif isinstance(value, dict):
# dict that should contain 'path' and 'name' for loading value
# (function, dict, class)
try:
func = load_and_return_object(value["path"], value["name"])
settings[key] = func
except KeyError as e:
raise KeyError(
f"Missing key for argument {key}: {e}. Expect 'path' "
f"and 'name' as keys when loading '{key}' "
f"from 'run_args'"
) from e
else:
if key == RUN_PIPELINE:
raise TypeError(
f"Value for {key} must be a dictionary, but got "
f"{type(value).__name__}.")
else:
raise TypeError(f"Value for {key} must be a string or a dictionary, "
f"but got {type(value).__name__}.")


def load_and_return_object(module_path, object_name):
"""
Dynamically imports a function from a specified module file path.
Dynamically imports an object from a specified module file path.
This function can import various types of objects from a module, including
dictionaries, class instances, or functions. It does so by specifying the module's
file system path and the object's name within that module.
Args:
module_path (str): The file system path to the Python module.
function_name (str): The name of the function to import from the module.
object_name (str): The name of the object to import from the module.
Returns:
function: The imported function object.
object: The imported object, which can be of any type (e.g., dict, function,
class).
Raises:
ImportError: If the module or function cannot be found.
ImportError: If the module or object cannot be found.
"""
try:
# Convert file path to module path if necessary
# Convert file system path to module path.
module_name = module_path.replace("/", ".").rstrip(".py")

# Dynamically import the module
# Dynamically import the module.
spec = importlib.util.spec_from_file_location(module_name, module_path)
module = importlib.util.module_from_spec(spec)
sys.modules[module_name] = module
spec.loader.exec_module(module)

# Retrieve the function
func = getattr(module, function_name)
# Retrieve the object.
imported_object = getattr(module, object_name)

except FileNotFoundError as exc:
raise ImportError(f"Module path '{module_path}' not found.") from exc

except AttributeError as exc:
raise ImportError(
f"Function '{function_name}' not found in module {module_name}." f""
f"Object '{object_name}' not found in module '{module_name}'."
) from exc

return func
return imported_object


def load_hooks_from_config(pre_load_hooks_dict):
Expand All @@ -326,7 +302,7 @@ def load_hooks_from_config(pre_load_hooks_dict):
loaded_hooks = []
for hook_config in pre_load_hooks_dict.values():
if "path" in hook_config and "name" in hook_config:
hook_func = load_and_return_function(hook_config["path"], hook_config["name"])
hook_func = load_and_return_object(hook_config["path"], hook_config["name"])
loaded_hooks.append(hook_func)
else:
raise KeyError(
Expand All @@ -337,7 +313,7 @@ def load_hooks_from_config(pre_load_hooks_dict):

def check_run_args(settings):
"""
Validates the types of NEPS configuration settings.
Validates the types of NePS configuration settings.
Checks that each setting's value type matches its expected type. Raises
TypeError for type mismatches.
Expand All @@ -351,7 +327,7 @@ def check_run_args(settings):
# Mapping parameter names to their allowed types
# [task_id, development_stage_id, pre_load_hooks] require special handling of type,
# that's why they are not listed
ALLOWED_TYPES = {
expected_types = {
RUN_PIPELINE: Callable,
ROOT_DIRECTORY: str,
# TODO: Support CS.ConfigurationSpace for pipeline_space
Expand All @@ -378,7 +354,7 @@ def check_run_args(settings):
if not all(callable(item) for item in value):
raise TypeError("All items in 'pre_load_hooks' must be callable.")
else:
expected_type = ALLOWED_TYPES[param]
expected_type = expected_types[param]
if not isinstance(value, expected_type):
raise TypeError(
f"Parameter '{param}' expects a value of type {expected_type}, got "
Expand All @@ -390,24 +366,23 @@ def check_essential_arguments(
run_pipeline, root_directory, pipeline_space, max_cost_total, max_evaluation_total,
searcher):
"""
Verifies that essential NEPS arguments are provided.
Validates essential NEPS configuration arguments.
Checks for the presence of 'run_pipeline', 'root_directory', 'pipeline_space',
and at least one of 'max_cost_total' or 'max_evaluation_total'. If any of these
essential arguments are missing, raises a ValueError indicating the specific
missing argument.
Ensures 'run_pipeline', 'root_directory', 'pipeline_space', and either
'max_cost_total' or 'max_evaluation_total' are provided for NEPS execution.
Raises ValueError with missing argument details. Additionally, checks 'searcher'
is a BaseOptimizer if 'pipeline_space' is absent.
Args:
run_pipeline: The main function to run the pipeline.
root_directory (str): The root directory for storing experiment data.
pipeline_space: The space of the pipeline configurations.
max_cost_total: The maximum total cost allowed for the experiments.
max_evaluation_total: The maximum number of evaluations allowed.
searcher: The optimizer to use for the optimization procedure.
Parameters:
run_pipeline: Function for the pipeline execution.
root_directory (str): Directory path for data storage.
pipeline_space: search space for this run.
max_cost_total: Max allowed total cost for experiments.
max_evaluation_total: Max allowed evaluations.
searcher: Optimizer for the configuration space.
Raises:
ValueError: If any of the essential arguments are missing or both
'max_cost_total' and 'max_evaluation_total' are not provided.
ValueError: Missing or invalid essential arguments.
"""
if not run_pipeline:
raise ValueError("'run_pipeline' is required but was not provided.")
Expand Down
Loading

0 comments on commit 701ef7a

Please sign in to comment.