diff --git a/.github/workflows/pr-conventional-commit.yml b/.github/workflows/pr-conventional-commit.yml
new file mode 100644
index 000000000..86f79c0cc
--- /dev/null
+++ b/.github/workflows/pr-conventional-commit.yml
@@ -0,0 +1,15 @@
+name: PR Conventional Commit Validation
+
+on:
+  pull_request:
+    types: [opened, synchronize, reopened, edited]
+
+jobs:
+  validate-pr-title:
+    runs-on: ubuntu-latest
+    steps:
+      - name: PR Conventional Commit Validation
+        uses:  ytanikin/PRConventionalCommits@1.2.0
+        with:
+          task_types: '["feat","fix","docs","test","ci","refactor","perf","chore","revert"]'
+          add_label: 'false'
\ No newline at end of file
diff --git a/.github/workflows/pre-commit.yaml b/.github/workflows/pre-commit.yaml
index 2eb93db13..33b2e805d 100644
--- a/.github/workflows/pre-commit.yaml
+++ b/.github/workflows/pre-commit.yaml
@@ -25,10 +25,15 @@ jobs:
     runs-on: ubuntu-latest
     steps:
     - uses: actions/checkout@v4
-    - uses: actions/setup-python@v5
+    - name: install the latest version uv
+      uses: astral-sh/setup-uv@v3
       with:
-        python-version: 3.8
-    - run: pip install pre-commit
-    - run: pre-commit install
-    - run: pre-commit run --all-files
-
+        version: latest
+    - name: create virtual environment
+      run: uv venv --python '3.10'
+    - name: install pre-commit
+      run: uv pip install pre-commit
+    - name: install pre-commit hooks
+      run: uv run pre-commit install
+    - name: Run pre-commit hooks
+      run: uv run pre-commit run --all-files
diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml
index dd60a230f..a7b88cb2d 100644
--- a/.github/workflows/tests.yaml
+++ b/.github/workflows/tests.yaml
@@ -16,7 +16,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        python-version: ['3.8', '3.9', '3.10', '3.11']
+        python-version: ['3.10', '3.11', '3.12']
         os: [ubuntu-latest, macos-latest, windows-latest]
     defaults:
       run:
@@ -25,12 +25,14 @@ jobs:
     runs-on: ${{ matrix.os }}
     steps:
       - uses: actions/checkout@v4
-      - run: pipx install poetry
-      - uses: actions/setup-python@v5
+      - name: install the latest version uv
+        uses: astral-sh/setup-uv@v3
         with:
-          python-version: ${{ matrix.python-version }}
-          cache: 'poetry'
-          cache-dependency-path: '**/pyproject.toml'
-      - run: poetry install
-      - run: poetry run pytest -m ""  # Run all markers
-
+          version: latest
+      - name: create venv
+        run: uv venv --python ${{ matrix.python-version }}
+      - name: install dependencies
+        run: uv pip install -e ".[dev]"
+      - name: run tests
+        run: uv run pytest -m ""  # Run all markers
+    
diff --git a/.gitignore b/.gitignore
index e8be93e7b..651960faf 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,6 +1,8 @@
-# Python
+# False Python
 __pycache__
 dist
+**/*.egg-info
+uv.lock
 
 # Log files
 *.out
@@ -19,9 +21,6 @@ site/*
 .vscode/
 .idea/
 
-# Poetry
-poetry.lock
-
 # Misc
 *.sh
 *.model
@@ -47,3 +46,6 @@ jahs_bench_data/
 
 # Yaml tests
 path
+
+# From example that uses MNIST
+.data
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 92ff23565..e7916aee9 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -7,7 +7,7 @@ files: |
   )/.*\.py$
 repos:
   - repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v4.6.0
+    rev: v5.0.0
     hooks:
       - id: check-added-large-files
         files: ".*"
@@ -27,7 +27,7 @@ repos:
         files: '^src/.*\.py$'
 
   - repo: https://github.com/pre-commit/mirrors-mypy
-    rev: v1.11.1
+    rev: v1.14.1
     hooks:
       - id: mypy
         files: |
@@ -36,13 +36,14 @@ repos:
           )/.*\.py$
         additional_dependencies:
           - "types-pyyaml"
+          - "types-requests"
         args:
           - "--no-warn-return-any" # Disable this because it doesn't know about 3rd party imports
           - "--ignore-missing-imports"
           - "--show-traceback"
 
   - repo: https://github.com/python-jsonschema/check-jsonschema
-    rev: 0.29.1
+    rev: 0.31.0
     hooks:
       - id: check-github-workflows
         files: '^github/workflows/.*\.ya?ml$'
@@ -51,7 +52,7 @@ repos:
         files: '^\.github/dependabot\.ya?ml$'
 
   - repo: https://github.com/charliermarsh/ruff-pre-commit
-    rev: v0.5.5
+    rev: v0.8.6
     hooks:
       - id: ruff
         args: [--fix, --exit-non-zero-on-fix, --no-cache]
diff --git a/CITATION.cff b/CITATION.cff
index f62a3157c..1536ffbed 100644
--- a/CITATION.cff
+++ b/CITATION.cff
@@ -21,13 +21,9 @@ authors:
     given-names: Carl
   - family-names: Binxin
     given-names: Ru
-  - family-names: Kober
-    given-names: Nils
-  - family-names: Vallaeys
-    given-names: Théophane
   - family-names: Hutter
     given-names: Frank
 title: "Neural Pipeline Search (NePS)"
-version: 0.12.1
-date-released: 2024-07-03
+version: 0.12.2
+date-released: 2024-07-09
 url: "https://github.com/automl/neps"
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index d311fece2..8c3d08ced 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -16,67 +16,49 @@ Automatic checks are run on every pull request and on every commit to `master`.
 
 There are three required steps and one optional:
 
-1. Optional: Install miniconda and create an environment
-1. Install poetry
-1. Install the neps package using poetry
+1. Install uv
+1. Install the neps package using uv
 1. Activate pre-commit for the repository
 
 For instructions see below.
 
-## 1. Optional: Install miniconda and create a virtual environment
+## 1. Install uv
 
-To manage python versions install e.g., miniconda with
+First, install uv, e.g., via
 
 ```bash
-wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O install_miniconda.sh
-bash install_miniconda.sh -b -p $HOME/.conda  # Change to place of preference
-rm install_miniconda.sh
+# On macOS and Linux.
+curl -LsSf https://astral.sh/uv/install.sh | sh
 ```
 
-Consider running `~/.conda/bin/conda init` or `~/.conda/bin/conda init zsh` .
-
-Then finally create the environment and activate it
-
 ```bash
-conda create -n neps python=3.10
-conda activate neps
+# On Windows.
+powershell -ExecutionPolicy ByPass -c "irm https://astral.sh/uv/install.ps1 | iex"
 ```
 
-## 2. Install poetry
-
-First, install poetry, e.g., via
+## 2. Clone the neps repository
 
 ```bash
-curl -sSL https://install.python-poetry.org | python3 -
-# or directly into your virtual env using `pip install poetry`
+git clone https://github.com/automl/neps.git
+cd neps
 ```
 
-Then consider appending
+## 3. Create a virtual environment and install the neps package
 
 ```bash
-export PATH="$HOME/.local/bin:$PATH"
-```
-
-to your `.zshrc` / `.bashrc` or alternatively simply running the export manually.
-
-## 3. Install the neps Package Using poetry
-
-Clone the repository, e.g.,
-
-```bash
-git clone https://github.com/automl/neps.git
-cd neps
+uv venv --python 3.11
+source .venv/bin/activate
 ```
 
 Then, inside the main directory of neps run
 
 ```bash
-poetry install
+uv pip install -e ".[dev]"
 ```
 
 This will installthe neps package but also additional dev dependencies.
 
-## 4. Activate pre-commit for the repository
+### 4. Activate pre-commit for the repository
 
 With the python environment used to install the neps package run in the main directory of neps
 
@@ -93,9 +75,6 @@ your choice, e.g.
 [VSCode](https://marketplace.visualstudio.com/items?itemName=charliermarsh.ruff),
 [PyCharm](https://plugins.jetbrains.com/plugin/20574-ruff).
 
-
-# Checks and Tests
-
 We have setup checks and tests at several points in the development flow:
 
 - At every commit we automatically run a suite of [pre-commit](https://pre-commit.com/) hooks that perform static code analysis, autoformating, and sanity checks.
@@ -104,11 +83,13 @@ This is setup during our [installation process](https://automl.github.io/neps/co
 The tests correspond directly to examples in [neps_examples](https://github.com/automl/neps/tree/master/neps_examples) and only check for crash-causing errors.
 - At every push all integration tests and regression tests are run automatically using [github actions](https://github.com/automl/neps/actions).
 
-## Linting (Ruff)
+## Checks and tests
+
+### Linting (Ruff)
 For linting we use `ruff` for checking code quality. You can install it locally and use it as so:
 
 ```bash
-pip install ruff
+uv pip install ruff
 ruff check --fix neps  # the --fix flag will try to fix issues it can automatically
 ```
 
@@ -127,11 +108,11 @@ The configuration of `ruff` is in the `pyproject.toml` file and we refer you to
 
 There you can find the documentation for all of the rules employed.
 
-## Type Checking (Mypy)
+### Type Checking (Mypy)
 For type checking we use `mypy`. You can install it locally and use it as so:
 
 ```bash
-pip install mypy
+uv pip install mypy
 mypy neps
 ```
 
@@ -159,22 +140,22 @@ or types defined from NePS, there is probably a good reason for a mypy error.
 If you have issues regarding typing, please feel free to reach out for help `@eddiebergman`.
 
 
-## Examples and Integration Tests
+### Examples and Integration Tests
 
-We use examples in [neps_examples](https://github.com/automl/neps/tree/master/neps_examples) as integration tests, which we run from the main directory via
+We use some examples in [neps_examples](https://github.com/automl/neps/tree/master/neps_examples) as integration tests, which we run from the main directory via
 
 ```bash
 pytest
 ```
 
-If tests fail for you on the master, please raise an issue on github, preferabbly with some informationon the error,
+If tests fail for you on the master, please raise an issue on github, preferably with some information on the error,
 traceback and the environment in which you are running, i.e. python version, OS, etc.
 
 ## Regression Tests
 
 Regression tests are run on each push to the repository to assure the performance of the optimizers don't degrade.
 
-Currently, regression runs are recorded on JAHS-Bench-201 data for 2 tasks: `cifar10` and `fashion_mnist` and only for optimizers: `random_search`, `bayesian_optimization`, `mf_bayesian_optimization`, `regularized_evolution`.
+Currently, regression runs are recorded on JAHS-Bench-201 data for 2 tasks: `cifar10` and `fashion_mnist` and only for optimizers: `random_search`, `bayesian_optimization`, `mf_bayesian_optimization`.
 This information is stored in the `tests/regression_runner.py` as two lists: `TASKS`, `OPTIMIZERS`.
 The recorded results are stored as a json dictionary in the `tests/losses.json` file.
 
@@ -212,11 +193,12 @@ In the case of regression test failure, try running it again first, if the probl
 You can also run tests locally by running:
 
 ```
-poetry run pytest -m regression_all
+uv run pytest -m regression_all
 ```
 
 ## Disabling and Skipping Checks etc.
 
+
 ### Pre-commit: How to not run hooks?
 
 To commit without running `pre-commit` use `git commit --no-verify -m <COMMIT MESSAGE>`.
@@ -231,32 +213,31 @@ There are two options:
   code = "foo"  # type: ignore
   ```
 
-## Managing Dependencies
+### Managing Dependencies
 
-To manage dependencies and for package distribution we use [poetry](https://python-poetry.org/docs/) (replaces pip).
+To manage dependencies we use [uv](https://docs.astral.sh/uv/getting-started/) (replaces pip).
 
-## Add dependencies
+#### Add dependencies
 
 To install a dependency use
 
 ```bash
-poetry add dependency
+uv add dependency
 ```
 
 and commit the updated `pyproject.toml` to git.
 
-For more advanced dependency management see examples in `pyproject.toml` or have a look at the [poetry documentation](https://python-poetry.org/).
+For more advanced dependency management see examples in `pyproject.toml` or have a look at the [uv documentation](https://docs.astral.sh/uv/getting-started/).
 
-## Install dependencies added by others
+#### Install dependencies added by others
 
 When other contributors added dependencies to `pyproject.toml`, you can install them via
 
 ```bash
-poetry lock
-poetry install
+uv pip install -e ".[dev]"
 ```
 
-# Documentation
+## Documentation
 
 We use [MkDocs](https://www.mkdocs.org/getting-started/), more specifically [Material for MkDocs](https://squidfunk.github.io/mkdocs-material/) for documentation.
 To support documentation for multiple versions, we use the plugin [mike](https://github.com/jimporter/mike).
@@ -278,7 +259,7 @@ To publish the documentation run
 mike deploy 0.5.1 latest -p
 ```
 
-# Releasing a New Version
+## Releasing a New Version
 
 There are four steps to releasing a new version of neps:
 
@@ -288,42 +269,30 @@ There are four steps to releasing a new version of neps:
 3. Update Documentation
 4. Publish on PyPI
 
-## 0. Understand Semantic Versioning
+### 0. Understand Semantic Versioning
 
 We follow the [semantic versioning](https://semver.org) scheme.
 
-## 1. Update the Package Version and CITATION.cff
+## 1. Run tests
 
 ```bash
-poetry version v0.9.0
+uv run pytest
 ```
 
-and manually change the version specified in `CITATION.cff`.
-
-## 2. Commit with a Version Tag
-
-First commit and test
+## 2. Update the Package Version and CITATION.cff
 
 ```bash
-git add pyproject.toml
-git commit -m "Bump version from v0.8.4 to v0.9.0"
-pytest
+bump-my-version bump <major | minor | patch>
 ```
 
-Then tag and push
-
-```bash
-git tag v0.9.0
-git push --tags
-git push
-```
+This will automatically update the version in `pyproject.toml` and `CITATION.cff`, tag the commit and push it to the remote repository.
 
-## 3. Update Documentation
+### 3. Update Documentation
 
 First check if the documentation has any issues via
 
 ```bash
-mike deploy 0.9.0 latest -u
+mike deploy <current version> latest -u
 mike serve
 ```
 
@@ -332,19 +301,20 @@ and then looking at it.
 Afterwards, publish it via
 
 ```bash
-mike deploy 0.9.0 latest -up
+mike deploy <current version> latest -up
 ```
 
-## 4. Publish on PyPI
+### 4. Publish on PyPI
 
 To publish to PyPI:
 
-1. Get publishing rights, e.g., asking Danny or Maciej or Neeratyoy.
+1. Get publishing rights, e.g., asking Danny or Neeratyoy.
 2. Be careful, once on PyPI we can not change things.
 3. Run
 
 ```bash
-poetry publish --build
+uv build
+uv publish
 ```
 
 This will ask for your PyPI credentials.
diff --git a/README.md b/README.md
index 8e24cd342..462587abd 100644
--- a/README.md
+++ b/README.md
@@ -5,25 +5,24 @@
 [![License](https://img.shields.io/pypi/l/neural-pipeline-search?color=informational)](LICENSE)
 [![Tests](https://github.com/automl/neps/actions/workflows/tests.yaml/badge.svg)](https://github.com/automl/neps/actions)
 
-Welcome to NePS, a powerful and flexible Python library for hyperparameter optimization (HPO) and neural architecture search (NAS) with its primary goal: **make HPO and NAS usable for deep learners in practice**.
+Welcome to NePS, a powerful and flexible Python library for hyperparameter optimization (HPO) and neural architecture search (NAS) that **makes HPO and NAS practical for deep learners**.
 
-NePS houses recently published and also well-established algorithms that can all be run massively parallel on distributed setups, with tools to analyze runs, restart runs, etc., all **tailored to the needs of deep learning experts**.
+NePS houses recently published and also well-established algorithms that can all be run massively parallel on distributed setups and, in general, NePS is tailored to the needs of deep learning experts.
 
-Take a look at our [documentation](https://automl.github.io/neps/latest/) for all the details on how to use NePS!
+To learn about NePS, check-out [the documentation](https://automl.github.io/neps/latest/), [our examples](neps_examples/), or a [colab tutorial](https://colab.research.google.com/drive/11IOhkmMKsIUhWbHyMYzT0v786O9TPWlH?usp=sharing).
 
 ## Key Features
 
-In addition to the features offered by traditional HPO and NAS libraries, NePS, e.g., stands out with:
+In addition to the features offered by traditional HPO and NAS libraries, NePS stands out with:
 
-
-1. [**Hyperparameter Optimization (HPO) with Prior Knowledge and Cheap Proxies:**](neps_examples/template/priorband_template.py) <br /> <br />
-NePS excels in efficiently tuning hyperparameters using algorithms that enable users to make use of their prior knowledge within the search space. This is leveraged by the insights presented in:
-     - [PriorBand: Practical Hyperparameter Optimization in the Age of Deep Learning](https://arxiv.org/abs/2306.12370)
-     - [πBO: Augmenting Acquisition Functions with User Beliefs for Bayesian Optimization](https://arxiv.org/abs/2204.11051) <br /> <br />
-1. [**Neural Architecture Search (NAS) with General Search Spaces:**](neps_examples/basic_usage/architecture.py) <br /> <br />
-    NePS is equipped to handle context-free grammar search spaces, providing advanced capabilities for designing and optimizing architectures. this is leveraged by the insights presented in:
-     - [Construction of Hierarchical Neural Architecture Search Spaces based on Context-free Grammars](https://arxiv.org/abs/2211.01842) <br /> <br />
-1. [**Easy Parallelization and Design Tailored to DL:**](https://automl.github.io/neps/latest/examples/efficiency/) <br /> <br />
+1. **Hyperparameter Optimization (HPO) Efficient Enough for Deep Learning:** <br />
+    NePS excels in efficiently tuning hyperparameters using algorithms that enable users to make use of their prior knowledge, while also using many other efficiency boosters.
+     - [PriorBand: Practical Hyperparameter Optimization in the Age of Deep Learning (NeurIPS 2023)](https://arxiv.org/abs/2306.12370)
+     - [πBO: Augmenting Acquisition Functions with User Beliefs for Bayesian Optimization (ICLR 2022)](https://arxiv.org/abs/2204.11051) <br /> <br />
+1. **Neural Architecture Search (NAS) with Expressive Search Spaces:** <br />
+    NePS provides capabilities for optimizing DL architectures in an expressive and natural fashion.
+     - [Construction of Hierarchical Neural Architecture Search Spaces based on Context-free Grammars (NeurIPS 2023)](https://arxiv.org/abs/2211.01842) <br /> <br />
+1. **Zero-effort Parallelization and an Experience Tailored to DL:** <br />
      NePS simplifies the process of parallelizing optimization tasks both on individual computers and in distributed
      computing environments. As NePS is made for deep learners, all technical choices are made with DL in mind and common
      DL tools such as Tensorboard are [embraced](https://automl.github.io/neps/latest/reference/analyse/#visualizing-results).
@@ -36,12 +35,6 @@ To install the latest release from PyPI run
 pip install neural-pipeline-search
 ```
 
-To get the latest version from Github run
-
-```bash
-pip install git+https://github.com/automl/neps.git
-```
-
 ## Basic Usage
 
 Using `neps` always follows the same pattern:
@@ -60,7 +53,7 @@ import logging
 
 # 1. Define a function that accepts hyperparameters and computes the validation error
 def run_pipeline(
-    hyperparameter_a: float, hyperparameter_b: int, architecture_parameter: str
+        hyperparameter_a: float, hyperparameter_b: int, architecture_parameter: str
 ) -> dict:
     # Create your model
     model = MyModel(architecture_parameter)
@@ -74,14 +67,13 @@ def run_pipeline(
 
 # 2. Define a search space of parameters; use the same parameter names as in run_pipeline
 pipeline_space = dict(
-    hyperparameter_a=neps.FloatParameter(
+    hyperparameter_a=neps.Float(
         lower=0.001, upper=0.1, log=True  # The search space is sampled in log space
     ),
-    hyperparameter_b=neps.IntegerParameter(lower=1, upper=42),
-    architecture_parameter=neps.CategoricalParameter(["option_a", "option_b"]),
+    hyperparameter_b=neps.Integer(lower=1, upper=42),
+    architecture_parameter=neps.Categorical(["option_a", "option_b"]),
 )
 
-
 # 3. Run the NePS optimization
 logging.basicConfig(level=logging.INFO)
 neps.run(
diff --git a/docs/_code/api_generator.py b/docs/_code/api_generator.py
index 1b9951bf3..b19f40a25 100644
--- a/docs/_code/api_generator.py
+++ b/docs/_code/api_generator.py
@@ -2,7 +2,7 @@
 
 # https://mkdocstrings.github.io/recipes/
 """
-from __future__ import annotations
+
 
 import logging
 from pathlib import Path
diff --git a/docs/_code/example_generator.py b/docs/_code/example_generator.py
index ca866a0ec..6452bbda4 100644
--- a/docs/_code/example_generator.py
+++ b/docs/_code/example_generator.py
@@ -2,7 +2,6 @@
 
 # https://mkdocstrings.github.io/recipes/
 """
-from __future__ import annotations
 
 import logging
 from pathlib import Path
@@ -16,7 +15,6 @@
 EXAMPLE_FOLDER = ROOT / "neps_examples"
 TAB = "    "
 
-
 if not SRCDIR.exists():
     raise FileNotFoundError(
         f"{SRCDIR} does not exist, make sure you are running this from the root of the repository."
diff --git a/docs/citations.md b/docs/citations.md
index b697c49fe..39b1759d2 100644
--- a/docs/citations.md
+++ b/docs/citations.md
@@ -7,19 +7,19 @@ For citing NePS, please refer to the following:
 ### APA Style
 
 ```apa
-Stoll, D., Mallik, N., Schrodi, S., Janowski, M., Garibov, S., Abou Chakra, T., Rogalla, D., Bergman, E., Hvarfner, C., Binxin, R., Kober, N., Vallaeys, T., & Hutter, F. (2023). Neural Pipeline Search (NePS) (Version 0.11.0) [Computer software]. https://github.com/automl/neps
+Stoll, D., Mallik, N., Schrodi, S., Bergman, E., Janowski, M., Garibov, S., Abou Chakra, T., Rogalla, D., Bergman, E., Hvarfner, C., Binxin, R., & Hutter, F. (2023). Neural Pipeline Search (NePS) (Version 0.12.2) [Computer software]. https://github.com/automl/neps
 ```
 
 ### BibTex Style
 
 ```bibtex
 @software{Stoll_Neural_Pipeline_Search_2023,
-author = {Stoll, Danny and Mallik, Neeratyoy and Schrodi, Simon and Janowski, Maciej and Garibov, Samir and Abou Chakra, Tarek and Rogalla, Daniel and Bergman, Eddie and Hvarfner, Carl and Binxin, Ru and Kober, Nils and Vallaeys, Théophane and Hutter, Frank},
+author = {Stoll, Danny and Mallik, Neeratyoy and Schrodi, Simon and Bergmann, Eddie and Janowski, Maciej and Garibov, Samir and Abou Chakra, Tarek and Rogalla, Daniel and Bergman, Eddie and Hvarfner, Carl and Binxin, Ru and Hutter, Frank},
 month = oct,
 title = {{Neural Pipeline Search (NePS)}},
 url = {https://github.com/automl/neps},
-version = {0.11.0},
-year = {2023}
+version = {0.12.2},
+year = {2024}
 }
 ```
 
diff --git a/docs/dev_docs/roadmap.md b/docs/dev_docs/roadmap.md
index c8bf95106..9972334ed 100644
--- a/docs/dev_docs/roadmap.md
+++ b/docs/dev_docs/roadmap.md
@@ -4,66 +4,39 @@
 
 ### Features
 
-- Improve handling of multi-fidelity for large scale (slurm script modification)
-- Evaluate and maybe improve ease-of-use of NePS and DDP etc.
-- Optimize dependencies
-- Improved examples
-
-### Fixes
-
-- Acq search mutation for HPs potentially only mutates 1 parameter
-- `ignore_errors` should work seamlessly with all optimizers
-
-### Refactoring
-
-- Rename: run_pipeline = evaluate_pipeline | evaluate_pipeline_error | compute_pipeline_error | train_and_evaluate
-- Rename: loss = validation_error | error | pipeline_error
-- Rename: XParameter = XSpace or just X?
-- Rename: default-x to prior-x
-- Rename: Use max_cost_total everywhere instead of budget
+- Improve large scale experience
+    - Result saving function (Samir)
+    - Priorband default sampling / pass evaluated configs to neps.run (Samir)
+    - Document large scale
+    - Evaluate and maybe improve ease-of-use of NePS for DDP (Gopalji)
+- Optimize dependencies (Anton)
 
 ### Documentation
 
-- Keep citations doc up to date
-
-### Tests
-
-- Regression tests to run on each push
-
+- Remove templates (Danny)
+- Add New Lightning example (Gopalji)
+- Add DDP examples (Gopalji)
+- Add some larger examples (Gopalji, Anton)
+- Improve new lightning example by adding Tensorboard (Tarek)
+- Add optimizer pages (Anton, Neeratyoy)
 
 ## Before 1.0.0 version
 
 ### Features
 
+- Utility neps.clean to manage existing run results
 - Generate pdf plot after each evaluation
 - Finegrained control over user prior
 - Print search space upon run
 - Utility to generate code for best architecture
 - Core algorithmic feature set (research)
 
-### Fixes
-
-- Contact https://pypi.org/project/neps/ to free up `pip install neps`
-
-### Refactoring
-
-- Improve neps.optimizers:
-    - Maintained vs unmaintained optimizers
-    - Remove unnecessary / broken optimizers
-    - Merge GP and hierarchical GP
-- Break up search space and config aspect
-
 ### Documentation
 
-- NAS documentation
-
-## After 1.0.0
-
-### Features
-
-- Utility neps.clean to manage existing run results
-- Collect data optionally via phone-home to webserver
+- Add example for spawning cloud instances via run pipeline
+- Add NAS documentation
+- Keep a changelog, add to it before each release
 
-### Documentation
+### Tests
 
-- Keep a changelog
+- Regression tests to run on cluster on each version release
diff --git a/docs/doc_yamls/architecture_search_space.py b/docs/doc_yamls/architecture_search_space.py
index 36f8bb381..66771cb3b 100644
--- a/docs/doc_yamls/architecture_search_space.py
+++ b/docs/doc_yamls/architecture_search_space.py
@@ -1,4 +1,4 @@
-from __future__ import annotations
+
 from torch import nn
 import neps
 from neps.search_spaces.architecture import primitives as ops
@@ -86,12 +86,12 @@ def set_recursive_attribute(op_name, predecessor_values):
 
 
 pipeline_space = dict(
-    architecture=neps.ArchitectureParameter(
+    architecture=neps.Architecture(
         set_recursive_attribute=set_recursive_attribute,
         structure=structure,
         primitives=primitives,
     ),
-    optimizer=neps.CategoricalParameter(choices=["sgd", "adam"]),
-    learning_rate=neps.FloatParameter(lower=10e-7, upper=10e-3, log=True),
+    optimizer=neps.Categorical(choices=["sgd", "adam"]),
+    learning_rate=neps.Float(lower=10e-7, upper=10e-3, log=True),
 )
 
diff --git a/docs/doc_yamls/customizing_neps_optimizer.yaml b/docs/doc_yamls/customizing_neps_optimizer.yaml
index a176dc743..5d98140f1 100644
--- a/docs/doc_yamls/customizing_neps_optimizer.yaml
+++ b/docs/doc_yamls/customizing_neps_optimizer.yaml
@@ -1,6 +1,6 @@
 # Customizing NePS Searcher
-run_pipeline:
-  path: path/to/your/run_pipeline.py  # Path to the function file
+evaluate_pipeline:
+  path: path/to/your/evaluate_pipeline.py  # Path to the function file
   name: example_pipeline              # Function name within the file
 
 pipeline_space:
@@ -19,8 +19,3 @@ searcher:
   name: "my_bayesian"                 # optional; changing the searcher_name for better recognition
   # Specific arguments depending on the searcher
   initial_design_size: 7
-  surrogate_model: gp
-  acquisition: EI
-  acquisition_sampler: random
-  random_interleave_prob: 0.1
-
diff --git a/docs/doc_yamls/defining_hooks.yaml b/docs/doc_yamls/defining_hooks.yaml
index d63c3ce58..67032ab69 100644
--- a/docs/doc_yamls/defining_hooks.yaml
+++ b/docs/doc_yamls/defining_hooks.yaml
@@ -1,6 +1,6 @@
 # Hooks
-run_pipeline:
-  path: path/to/your/run_pipeline.py  # Path to the function file
+evaluate_pipeline:
+  path: path/to/your/evaluate_pipeline.py  # Path to the function file
   name: example_pipeline              # Function name within the file
 
 pipeline_space:
diff --git a/docs/doc_yamls/full_configuration_template.yaml b/docs/doc_yamls/full_configuration_template.yaml
index 6b28ccf43..6f90e77b1 100644
--- a/docs/doc_yamls/full_configuration_template.yaml
+++ b/docs/doc_yamls/full_configuration_template.yaml
@@ -1,6 +1,6 @@
 # Full Configuration Template for NePS
-run_pipeline:
-  path: path/to/your/run_pipeline.py  # Path to the function file
+evaluate_pipeline:
+  path: path/to/your/evaluate_pipeline.py  # Path to the function file
   name: example_pipeline              # Function name within the file
 
 pipeline_space:
@@ -31,7 +31,7 @@ max_evaluations_per_run:
 continue_until_max_evaluation_completed: false
 
 # Error Handling
-loss_value_on_error:
+objective_to_minimize_value_on_error:
 cost_value_on_error:
 ignore_errors:
 
diff --git a/docs/doc_yamls/loading_own_optimizer.yaml b/docs/doc_yamls/loading_own_optimizer.yaml
index b23cd0823..5d656372f 100644
--- a/docs/doc_yamls/loading_own_optimizer.yaml
+++ b/docs/doc_yamls/loading_own_optimizer.yaml
@@ -1,6 +1,6 @@
 # Loading Optimizer Class
-run_pipeline:
-  path: path/to/your/run_pipeline.py  # Path to the function file
+evaluate_pipeline:
+  path: path/to/your/evaluate_pipeline.py  # Path to the function file
   name: example_pipeline              # Function name within the file
 
 pipeline_space:
@@ -19,5 +19,3 @@ searcher:
   name: CustomOptimizer               # class name within the file
   # Specific arguments depending on your searcher
   initial_design_size: 7
-  surrogate_model: gp
-  acquisition: EI
diff --git a/docs/doc_yamls/loading_pipeline_space_dict.yaml b/docs/doc_yamls/loading_pipeline_space_dict.yaml
index 966438652..53ff9ec6a 100644
--- a/docs/doc_yamls/loading_pipeline_space_dict.yaml
+++ b/docs/doc_yamls/loading_pipeline_space_dict.yaml
@@ -1,6 +1,6 @@
 # Loading pipeline space from a python dict
-run_pipeline:
-  path: path/to/your/run_pipeline.py  # Path to the function file
+evaluate_pipeline:
+  path: path/to/your/evaluate_pipeline.py  # Path to the function file
   name: example_pipeline              # Function name within the file
 
 pipeline_space:
diff --git a/docs/doc_yamls/outsourcing_optimizer.yaml b/docs/doc_yamls/outsourcing_optimizer.yaml
index 6cdf3a2c7..1ca3a764f 100644
--- a/docs/doc_yamls/outsourcing_optimizer.yaml
+++ b/docs/doc_yamls/outsourcing_optimizer.yaml
@@ -1,6 +1,6 @@
 # Optimizer settings from YAML configuration
-run_pipeline:
-  path: path/to/your/run_pipeline.py  # Path to the function file
+evaluate_pipeline:
+  path: path/to/your/evaluate_pipeline.py  # Path to the function file
   name: example_pipeline              # Function name within the file
 
 pipeline_space:
diff --git a/docs/doc_yamls/outsourcing_pipeline_space.yaml b/docs/doc_yamls/outsourcing_pipeline_space.yaml
index c3d8eaeda..d3f7a65f8 100644
--- a/docs/doc_yamls/outsourcing_pipeline_space.yaml
+++ b/docs/doc_yamls/outsourcing_pipeline_space.yaml
@@ -1,6 +1,6 @@
 # Pipeline space settings from separate YAML
-run_pipeline:
-  path: path/to/your/run_pipeline.py  # Path to the function file
+evaluate_pipeline:
+  path: path/to/your/evaluate_pipeline.py  # Path to the function file
   name: example_pipeline              # Function name within the file
 
 pipeline_space: path/to/your/pipeline_space.yaml
diff --git a/docs/doc_yamls/pipeline_space.yaml b/docs/doc_yamls/pipeline_space.yaml
index 939a5358b..8f877b98f 100644
--- a/docs/doc_yamls/pipeline_space.yaml
+++ b/docs/doc_yamls/pipeline_space.yaml
@@ -3,8 +3,8 @@ learning_rate:
   lower: 1e-5
   upper: 1e-1
   log: true  # Log scale for learning rate
-  default: 1e-2
-  default_confidence: "medium"
+  prior: 1e-2
+  prior_confidence: "medium"
 epochs:
   lower: 5
   upper: 20
@@ -12,10 +12,10 @@ epochs:
 dropout_rate:
   lower: 0.1
   upper: 0.5
-  default: 0.2
-  default_confidence: "high"
+  prior: 0.2
+  prior_confidence: "high"
 optimizer:
   choices: [adam, sgd, adamw]
-  default: adam
-  # if default confidence is not defined it gets its default 'low'
+  prior: adam
+  # if prior confidence is not defined it gets its default 'low'
 batch_size: 64
diff --git a/docs/doc_yamls/run_pipeline.py b/docs/doc_yamls/run_pipeline.py
index cfea0febb..29b33e723 100644
--- a/docs/doc_yamls/run_pipeline.py
+++ b/docs/doc_yamls/run_pipeline.py
@@ -2,6 +2,6 @@
 
 def example_pipeline(learning_rate, optimizer, epochs):
     model = initialize_model()
-    training_loss = train_model(model, optimizer, learning_rate, epochs)
-    evaluation_loss = evaluate_model(model)
-    return {"loss": evaluation_loss, "training_loss": training_loss}
+    training_objective_to_minimize = train_model(model, optimizer, learning_rate, epochs)
+    evaluation_objective_to_minimize = evaluate_model(model)
+    return {"objective_to_minimize": evaluation_objective_to_minimize, "training_objective_to_minimize": training_objective_to_minimize}
diff --git a/docs/doc_yamls/run_pipeline_architecture.py b/docs/doc_yamls/run_pipeline_architecture.py
index edc12469f..bece59330 100644
--- a/docs/doc_yamls/run_pipeline_architecture.py
+++ b/docs/doc_yamls/run_pipeline_architecture.py
@@ -11,4 +11,4 @@ def example_pipeline(architecture, optimizer, learning_rate):
     model = architecture.to_pytorch()
     training_loss = train_model(model, optimizer, learning_rate)
     evaluation_loss = evaluate_model(model)
-    return {"loss": evaluation_loss, "training_loss": training_loss}
+    return {"objective_to_minimize": evaluation_objective_to_minimize, "training_loss": training_loss}
\ No newline at end of file
diff --git a/docs/doc_yamls/run_pipeline_big_search_space.py b/docs/doc_yamls/run_pipeline_big_search_space.py
index 542283f0e..346fe3bb4 100644
--- a/docs/doc_yamls/run_pipeline_big_search_space.py
+++ b/docs/doc_yamls/run_pipeline_big_search_space.py
@@ -1,6 +1,6 @@
 
 def example_pipeline(learning_rate, optimizer, epochs, batch_size, dropout_rate):
     model = initialize_model(dropout_rate)
-    training_loss = train_model(model, optimizer, learning_rate, epochs, batch_size)
-    evaluation_loss = evaluate_model(model)
-    return {"loss": evaluation_loss, "training_loss": training_loss}
+    training_objective_to_minimize = train_model(model, optimizer, learning_rate, epochs, batch_size)
+    evaluation_objective_to_minimize = evaluate_model(model)
+    return {"objective_to_minimize": evaluation_objective_to_minimize, "training_objective_to_minimize": training_objective_to_minimize}
diff --git a/docs/doc_yamls/run_pipeline_extended.py b/docs/doc_yamls/run_pipeline_extended.py
index c891f4d91..7a57f0719 100644
--- a/docs/doc_yamls/run_pipeline_extended.py
+++ b/docs/doc_yamls/run_pipeline_extended.py
@@ -1,6 +1,6 @@
 
 def example_pipeline(learning_rate, optimizer, epochs, batch_size):
     model = initialize_model()
-    training_loss = train_model(model, optimizer, learning_rate, epochs, batch_size)
-    evaluation_loss = evaluate_model(model)
-    return {"loss": evaluation_loss, "training_loss": training_loss}
+    training_objective_to_minimize = train_model(model, optimizer, learning_rate, epochs, batch_size)
+    evaluation_objective_to_minimize = evaluate_model(model)
+    return {"objective_to_minimize": evaluation_objective_to_minimize, "training_objective_to_minimize": training_objective_to_minimize}
diff --git a/docs/doc_yamls/set_up_optimizer.yaml b/docs/doc_yamls/set_up_optimizer.yaml
index f65af7431..94922d78a 100644
--- a/docs/doc_yamls/set_up_optimizer.yaml
+++ b/docs/doc_yamls/set_up_optimizer.yaml
@@ -1,11 +1,5 @@
 strategy: bayesian_optimization
 # Specific arguments depending on the searcher
 initial_design_size: 7
-surrogate_model: gp
-acquisition: EI
-log_prior_weighted: false
-acquisition_sampler: random
-random_interleave_prob: 0.1
-disable_priors: false
-prior_confidence: high
-sample_default_first: false
+use_priors: true
+sample_prior_first: false
diff --git a/docs/doc_yamls/simple_example_including_run_pipeline.yaml b/docs/doc_yamls/simple_example_including_run_pipeline.yaml
index afe8e9b83..ce9cc163a 100644
--- a/docs/doc_yamls/simple_example_including_run_pipeline.yaml
+++ b/docs/doc_yamls/simple_example_including_run_pipeline.yaml
@@ -1,6 +1,6 @@
-# Simple NePS configuration including run_pipeline
-run_pipeline:
-  path: path/to/your/run_pipeline.py  # Path to the function file
+# Simple NePS configuration including evaluate_pipeline
+evaluate_pipeline:
+  path: path/to/your/evaluate_pipeline.py  # Path to the function file
   name: example_pipeline              # Function name within the file
 
 pipeline_space:
diff --git a/docs/getting_started.md b/docs/getting_started.md
index 9f414c0a1..78cad7640 100644
--- a/docs/getting_started.md
+++ b/docs/getting_started.md
@@ -35,10 +35,11 @@ In code, the usage pattern can look like this:
 import neps
 import logging
 
-def run_pipeline( # (1)!
-    hyperparameter_a: float,
-    hyperparameter_b: int,
-    architecture_parameter: str,
+
+def run_pipeline(  # (1)!
+        hyperparameter_a: float,
+        hyperparameter_b: int,
+        architecture_parameter: str,
 ) -> dict:
     # insert here your own model
     model = MyModel(architecture_parameter)
@@ -49,7 +50,7 @@ def run_pipeline( # (1)!
     )
 
     return {
-        "loss": validation_error, #! (2)
+        "loss": validation_error,  # ! (2)
         "info_dict": {
             "training_error": training_error
             # + Other metrics
@@ -58,9 +59,9 @@ def run_pipeline( # (1)!
 
 
 pipeline_space = {  # (3)!
-    "hyperparameter_b":neps.IntegerParameter(1, 42, is_fidelity=True), #! (4)
-    "hyperparameter_a":neps.FloatParameter(1e-3, 1e-1, log=True) #! (5)
-    "architecture_parameter": neps.CategoricalParameter(["option_a", "option_b", "option_c"]),
+    "hyperparameter_b": neps.Integer(1, 42, is_fidelity=True),  # ! (4)
+    "hyperparameter_a": neps.Float(1e-3, 1e-1, log=True)  # ! (5)
+    "architecture_parameter": neps.Categorical(["option_a", "option_b", "option_c"]),
 }
 
 if __name__ == "__main__":
diff --git a/docs/index.md b/docs/index.md
index 94168b41f..ec27c7193 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -11,17 +11,16 @@ NePS houses recently published and also well-established algorithms that can all
 
 ## Key Features
 
-In addition to the features offered by traditional HPO and NAS libraries, NePS, e.g., stands out with:
-
-
-1. [**Hyperparameter Optimization (HPO) with Prior Knowledge and Cheap Proxies:**](.examples/template/priorband_template.py) <br /> <br />
-NePS excels in efficiently tuning hyperparameters using algorithms that enable users to make use of their prior knowledge within the search space. This is leveraged by the insights presented in:
-     - [PriorBand: Practical Hyperparameter Optimization in the Age of Deep Learning](https://arxiv.org/abs/2306.12370)
-     - [πBO: Augmenting Acquisition Functions with User Beliefs for Bayesian Optimization](https://arxiv.org/abs/2204.11051) <br /> <br />
-1. [**Neural Architecture Search (NAS) with General Search Spaces:**](neps_examples/basic_usage/architecture.py) <br /> <br />
-    NePS is equipped to handle context-free grammar search spaces, providing advanced capabilities for designing and optimizing architectures. this is leveraged by the insights presented in:
-     - [Construction of Hierarchical Neural Architecture Search Spaces based on Context-free Grammars](https://arxiv.org/abs/2211.01842) <br /> <br />
-1. [**Easy Parallelization and Design Tailored to DL:**](.examples/efficiency/) <br /> <br />
+In addition to the features offered by traditional HPO and NAS libraries, NePS stands out with:
+
+1. **Hyperparameter Optimization (HPO) Efficient Enough For Deep Learning:** <br />
+    NePS excels in efficiently tuning hyperparameters using algorithms that enable users to make use of their prior knowledge, while also using many other efficiency boosters.
+     - [PriorBand: Practical Hyperparameter Optimization in the Age of Deep Learning (NeurIPS 2023)](https://arxiv.org/abs/2306.12370)
+     - [πBO: Augmenting Acquisition Functions with User Beliefs for Bayesian Optimization (ICLR 2022)](https://arxiv.org/abs/2204.11051) <br /> <br />
+1. **Neural Architecture Search (NAS) with Expressive Search Spaces:** <br />
+    NePS provides capabilities for designing and optimizing architectures in an expressive and natural fashion.
+     - [Construction of Hierarchical Neural Architecture Search Spaces based on Context-free Grammars (NeurIPS 2023)](https://arxiv.org/abs/2211.01842) <br /> <br />
+1. **Zero-effort Parallelization and an Experience Tailored to DL:** <br />
      NePS simplifies the process of parallelizing optimization tasks both on individual computers and in distributed
      computing environments. As NePS is made for deep learners, all technical choices are made with DL in mind and common
      DL tools such as Tensorboard are [embraced](https://automl.github.io/neps/latest/reference/analyse/#visualizing-results).
@@ -32,7 +31,8 @@ NePS excels in efficiently tuning hyperparameters using algorithms that enable u
 
     * [Reference documentation](./reference/neps_run.md) for a quick overview.
     * [API](./api/neps/api.md) for a more detailed reference.
-    * [Examples](./examples/template/basic_template.md) for copy-pastable code to get started.
+    * [Colab Tutorial](https://colab.research.google.com/drive/11IOhkmMKsIUhWbHyMYzT0v786O9TPWlH?usp=sharing) walking through NePS's main features.
+    * [Examples](./examples) for basic code snippets to get started.
 
 ## Installation
 
@@ -42,12 +42,6 @@ To install the latest release from PyPI run
 pip install neural-pipeline-search
 ```
 
-To get the latest version from Github run
-
-```bash
-pip install git+https://github.com/automl/neps.git
-```
-
 ## Basic Usage
 
 Using `neps` always follows the same pattern:
@@ -66,7 +60,7 @@ import logging
 
 # 1. Define a function that accepts hyperparameters and computes the validation error
 def run_pipeline(
-    hyperparameter_a: float, hyperparameter_b: int, architecture_parameter: str
+        hyperparameter_a: float, hyperparameter_b: int, architecture_parameter: str
 ) -> dict:
     # Create your model
     model = MyModel(architecture_parameter)
@@ -80,14 +74,13 @@ def run_pipeline(
 
 # 2. Define a search space of parameters; use the same parameter names as in run_pipeline
 pipeline_space = dict(
-    hyperparameter_a=neps.FloatParameter(
+    hyperparameter_a=neps.Float(
         lower=0.001, upper=0.1, log=True  # The search space is sampled in log space
     ),
-    hyperparameter_b=neps.IntegerParameter(lower=1, upper=42),
-    architecture_parameter=neps.CategoricalParameter(["option_a", "option_b"]),
+    hyperparameter_b=neps.Integer(lower=1, upper=42),
+    architecture_parameter=neps.Categorical(["option_a", "option_b"]),
 )
 
-
 # 3. Run the NePS optimization
 logging.basicConfig(level=logging.INFO)
 neps.run(
diff --git a/docs/reference/cli.md b/docs/reference/cli.md
new file mode 100644
index 000000000..86c23dce5
--- /dev/null
+++ b/docs/reference/cli.md
@@ -0,0 +1,270 @@
+# NePS Command Line Interface
+This section provides a brief overview of the commands available in the NePS CLI.
+
+!!! note "Support of Development and Task ID"
+    The NePS arguments `development_stage_id` and `task_id` are only partially
+    supported. To retrieve results for a specific task or development stage, you must modify the `root_directory` to
+    point to the corresponding folder of your NePS results. For example, if you have task_id 1 and development_stage_id 4,
+    update your root_directory to root_directory/task_1/development_4. This can be done either by specifying the
+    --root-directory option in your command or by updating the root_directory in your corresponding `run_args` yaml
+    file.
+
+
+---
+## **`init` Command**
+
+Generates a default `run_args` YAML configuration file, providing a template that you can customize for your experiments.
+
+**Arguments:**
+
+
+- `-h, --help` (Optional): show this help message and exit
+- `--config-path` (Optional): Optional custom path for generating the configuration file. Default is 'run_config.yaml'.
+- `--template` (Optional): Optional, options between different templates. Required configs(basic) vs all neps configs (complete)
+- `--database` (Optional):  If set, creates the NePS database. This is required if you want to sample and report configurations using only CLI commands. Requires an existing config.yaml.
+
+
+**Example Usage:**
+
+```bash
+neps init --config-path custom/path/config.yaml --template complete
+```
+
+---
+## **`run` Command**
+
+Executes the optimization based on the provided configuration. This command serves as a CLI wrapper around `neps.run`, effectively mapping each CLI argument to a parameter in `neps.run`. It offers a flexible interface that allows you to override the existing settings specified in the YAML configuration file, facilitating dynamic adjustments for managing your experiments.
+
+**Arguments:**
+
+
+- `-h, --help` (Optional): show this help message and exit
+- `--run-args` (Optional): Path to the YAML configuration file.
+- `--run-pipeline` (Optional): Optional: Provide the path to a Python file and a function name separated by a colon, e.g., 'path/to/module.py:function_name'. If provided, it overrides the run_pipeline setting from the YAML configuration.
+- `--pipeline-space` (Optional): Path to the YAML file defining the search space for the optimization. This can be provided here or defined within the 'run_args' YAML file.
+- `--root-directory` (Optional): The directory to save progress to. This is also used to synchronize multiple calls for parallelization.
+- `--overwrite-working-directory` (Optional): If set, deletes the working directory at the start of the run. This is useful, for example, when debugging a run_pipeline function.
+- `--development-stage-id` (Optional): Identifier for the current development stage, used in multi-stage projects.
+- `--task-id` (Optional): Identifier for the current task, useful in projects with multiple tasks.
+- `--post-run-summary` (Optional): Provide a summary of the results after running.
+- `--no-post-run-summary` (Optional): Do not provide a summary of the results after running.
+- `--max-evaluations-total` (Optional): Total number of evaluations to run.
+- `--max-evaluations-per-run` (Optional): Number of evaluations a specific call should maximally do.
+- `--continue-until-max-evaluation-completed` (Optional): If set, only stop after max-evaluations-total have been completed. This is only relevant in the parallel setting.
+- `--max-cost-total` (Optional): No new evaluations will start when this cost is exceeded. Requires returning a cost
+  in the run_pipeline function.
+- `--ignore-errors` (Optional): If set, ignore errors during the optimization process.
+- `--loss-value-on-error` (Optional): Loss value to assume on error.
+- `--cost-value-on-error` (Optional): Cost value to assume on error.
+- `--searcher` (Optional): String key of searcher algorithm to use for optimization.
+- `--searcher-kwargs` (Optional): Additional keyword arguments as key=value pairs for the searcher.
+
+
+**Example Usage:**
+
+```bash
+neps run --run-args path/to/config.yaml --max-evaluations-total 50
+```
+
+---
+## **`status` Command**
+Check the status of the NePS run. This command provides a summary of trials, including pending, evaluating, succeeded, and failed trials. You can filter the trials displayed based on their state.
+
+**Arguments:**
+
+
+- `-h, --help` (Optional): show this help message and exit
+- `--root-directory` (Optional): The path to your root_directory. If not provided, it will be loaded from run_config.yaml.
+- `--pending` (Optional): Show only pending trials.
+- `--evaluating` (Optional): Show only evaluating trials.
+- `--succeeded` (Optional): Show only succeeded trials.
+
+
+**Example Usage:**
+```bash
+neps status --root-directory path/to/directory --succeeded
+```
+
+---
+## **`info-config` Command**
+Provides detailed information about a specific configuration identified by its ID. This includes metadata, configuration values, and trial status.
+
+**Arguments:**
+
+
+- id (Required): The configuration ID to be used.
+
+
+- `-h, --help` (Optional): show this help message and exit
+- `--root-directory` (Optional): Optional: The path to your root_directory. If not provided, it will be loaded from run_config.yaml.
+
+
+**Example Usage:**
+```bash
+neps info-config 42 --root-directory path/to/directory
+```
+
+---
+## **`results` Command**
+Displays the results of the NePS run, listing all incumbent trials in reverse order (most recent first). Optionally,
+you can plot the results to visualize the progression of incumbents over trials.  Additionally, you can dump all
+trials or incumbent trials to a file in the specified format and plot the results to visualize the progression of
+incumbents over trials.
+
+**Arguments:**
+
+
+- `-h, --help` (Optional): show this help message and exit
+- `--root-directory` (Optional): Optional: The path to your root_directory. If not provided, it will be loaded from run_config.yaml.
+- `--plot` (Optional): Plot the incumbents if set.
+- `--dump-all-configs` (Optional): Dump all information about the trials to a file in the specified format (csv, json,
+  parquet).
+- `--dump-incumbents` (Optional): Dump only the information about the incumbent trials to a file in the specified
+  format (csv, json, parquet).
+
+
+
+**Example Usage:**
+
+```bash
+neps results --root-directory path/to/directory --plot
+```
+
+---
+## **`errors` Command**
+Lists all errors found in the specified NePS run. This is useful for debugging or reviewing failed trials.
+
+**Arguments:**
+
+
+- `-h, --help` (Optional): show this help message and exit
+- `--root-directory` (Optional): Optional: The path to your root_directory. If not provided, it will be loaded from run_config.yaml.
+
+
+**Example Usage:**
+
+```bash
+neps errors --root-directory path/to/directory
+```
+
+
+---
+## **`sample-config` Command**
+The sample-config command allows users to generate new configurations based on the current state of the
+NePS optimizer. This is particularly useful when you need to manually intervene in the sampling process, such
+as allocating different computational resources to different configurations.
+
+!!! note "Note"
+    Before using the `sample-config` command, you need to initialize the database by running `neps init --database` if you haven't already executed `neps run`. Running `neps run` will also create a `NePsState`.
+
+**Arguments:**
+
+- `-h, --help` (Optional): show this help message and exit
+- `--worker-id` (Optional): The worker ID for which the configuration is being sampled.
+- `--run-args` (Optional): Path to the YAML configuration file. If not provided, it will search after run_config.yaml.
+- `--number-of-configs` (Optional): Number of configurations to sample (default: 1).
+
+
+**Example Usage:**
+
+
+```bash
+neps sample-config --worker-id worker_1 --number-of-configs 5
+```
+
+---
+## **`report-config` Command**
+The `report-config` command is the counterpart to `sample-config` and reports the outcome of a specific trial by updating its status and associated metrics in the NePS state. This command is crucial for manually managing the evaluation results of sampled configurations.
+
+**Arguments:**
+
+
+- `` (Required): ID of the trial to report
+- `` (Required): Outcome of the trial
+
+
+- `-h, --help` (Optional): show this help message and exit
+- `--worker-id` (Optional): The worker ID for which the configuration is being sampled.
+- `--loss` (Optional): Loss value of the trial
+- `--run-args` (Optional): Path to the YAML file containing run configurations
+- `--cost` (Optional): Cost value of the trial
+- `--learning-curve` (Optional): Learning curve as a list of floats, provided like this --learning-curve 0.9 0.3 0.1
+- `--duration` (Optional): Duration of the evaluation in sec
+- `--err` (Optional): Error message if any
+- `--tb` (Optional): Traceback information if any
+- `--time-end` (Optional): The time the trial ended as either a UNIX timestamp (float) or in 'YYYY-MM-DD HH:MM:SS' format
+
+
+**Example Usage:**
+
+
+```bash
+neps report-config 42 success --worker-id worker_1 --loss 0.95 --duration 120
+```
+
+---
+## **`help` Command**
+Displays help information for the NePS CLI, including a list of available commands and their descriptions.
+
+**Arguments:**
+
+
+- `-h, --help` (Optional): show this help message and exit
+
+
+**Example Usage:**
+
+```bash
+neps help --help
+```
+
+---
+## **Using NePS as a State Machine**
+
+NePS can function as a state machine, allowing you to manually sample and report configurations using CLI commands. This is particularly useful in scenarios like architecture search, where different configurations may require varying computational resources. To utilize NePS in this manner, follow these steps:
+
+### **Step 1: Initialize and Configure `run_config.yaml**
+
+Begin by generating the `run_args` YAML configuration file. This file serves as the blueprint for your optimization experiments.
+
+
+```bash
+neps init
+```
+The `neps init` command creates run_config.yaml, which serves as the default configuration resource for all NePS commands.
+### **Step 2: Initialize the NePS Database**
+
+Set up the NePS database to enable the sampling and reporting of configurations via CLI commands.
+
+```bash
+neps init --database
+```
+This command initializes the NePS database, preparing the necessary folders and files required for managing your NePS run
+
+
+### **Step 3: Sample Configurations**
+
+Generate new configurations based on the existing NePS state. This step allows you to create configurations that you can manually evaluate.
+
+```bash
+neps sample-config --worker-id worker_1 --number-of-configs 5
+```
+
+- **`--worker_id worker_1`**: Identifies the worker responsible for sampling configurations.
+- **`--number-of-configs 5`**: Specifies the number of configurations to sample.
+
+### **Step 4: Evaluate and Report Configurations**
+
+After evaluating each sampled configuration, report its outcome to update the NePS state.
+
+```bash
+neps report-config 42 success --worker-id worker_1 --loss 0.95 --duration 120
+```
+
+- **`42`**: The ID of the trial being reported.
+- **`success`**: The outcome of the trial (`success`, `failed`, `crashed`).
+- **`--worker_id worker_1`**: Identifies the worker reporting the configuration.
+- **`--loss 0.95`**: The loss value obtained from the trial.
+- **`--duration 120`**: The duration of the evaluation in seconds.
+
+
diff --git a/docs/reference/declarative_usage.md b/docs/reference/declarative_usage.md
index 1afebad81..b324c0614 100644
--- a/docs/reference/declarative_usage.md
+++ b/docs/reference/declarative_usage.md
@@ -160,3 +160,71 @@ Define hooks in your YAML configuration to extend the functionality of your expe
     ```python
     --8<-- "docs/doc_yamls/run_pipeline_extended.py"
     ```
+
+=== "run_neps.py"
+    ```python
+    import neps
+    neps.run(run_args="path/to/your/config.yaml")
+    ```
+
+## CLI Usage
+This section provides a brief overview of the primary commands available in the NePS CLI.
+For additional command options, you can directly refer to the help documentation
+provided by each command using --help.
+
+
+### **`init` Command**
+
+Generates a default `run_args` YAML configuration file, providing a template that you can customize for your experiments.
+
+**Options:**
+
+  - `--config-path <path>`: *Optional*. Specify the custom path for generating the configuration file. Defaults to
+  `run_config.yaml` in the current working directory.
+  - `--template [basic|complete]`: *Optional*. Choose between a basic or complete template. The basic template includes only required settings, while the complete template includes all NePS configurations.
+  - `--state-machine`: *Optional*. Creates a NEPS state if set, which requires an existing `run_config.yaml`.
+
+**Example Usage:**
+```bash
+neps init --config-path custom/path/config.yaml --template complete
+```
+
+### **`run` Command**
+
+Executes the optimization based on the provided configuration. This command serves as a CLI wrapper around `neps.run`, effectively mapping each CLI argument to a parameter in `neps.run`. It offers a flexible interface that allows you to override the existing settings specified in the YAML configuration file, facilitating dynamic adjustments for managing your experiments.
+
+**Options:**
+
+  - `--run-args <path>`: Path to the YAML configuration file containing the run arguments.
+  - `--run-pipeline <path/to/module:function_name>`: *Optional*. Specify the path to the Python module and function to use for running the pipeline. Overrides any settings in the YAML file.
+  - `--pipeline-space <path/to/yaml>`: Path to the YAML file defining the search space for the optimization.
+  - `--root-directory <path>`: *Optional*. Directory for saving progress and synchronizing multiple processes. Defaults to the `root_directory` from `run_config.yaml` if not provided.
+  - `--overwrite-working-directory`: *Optional*. If set, deletes the working directory at the start of the run.
+  - `--development-stage-id <id>`: *Optional*. Identifier for the current development stage, useful for multi-stage projects.
+  - `--task-id <id>`: *Optional*. Identifier for the current task, useful for managing projects with multiple tasks.
+  - `--post-run-summary/--no-post-run-summary`: *Optional*. Provides a summary of the run after execution. Enabled by default.
+  - `--max-evaluations-total <int>`: *Optional*. Specifies the total number of evaluations to run.
+  - `--max-evaluations-per-run <int>`: *Optional*. Number of evaluations to run per call.
+  - `--continue-until-max-evaluation-completed`: *Optional*. If set, ensures the run continues until `max-evaluations-total` has been reached.
+  - `--max-cost-total <float>`: *Optional*. Specifies a cost threshold. No new evaluations will start if this cost is exceeded.
+  - `--ignore-errors`: *Optional*. If set, errors during the optimization will be ignored.
+  - `--loss-value-on-error <float>`: *Optional*. Specifies the loss value to assume in case of an error.
+  - `--cost-value-on-error <float>`: *Optional*. Specifies the cost value to assume in case of an error.
+  - `--searcher <key>`: Specifies the searcher algorithm for optimization.
+  - `--searcher-kwargs <key=value>`: *Optional*. Additional keyword arguments for the searcher.
+
+**Example Usage:**
+```bash
+neps run --run-args path/to/config.yaml --max-evaluations-total 50
+```
+
+### **`status` Command**
+Executes the optimization based on the provided configuration. This command serves as a CLI wrapper around neps.run,
+effectively mapping each CLI argument to a parameter in neps.run. This setup offers a flexible interface that allows
+you to override the existing settings specified in the YAML configuration file, facilitating dynamic adjustments for
+managing your experiments.
+
+**Example Usage:**
+```bash
+neps run --run-args path/to/config.yaml
+```
\ No newline at end of file
diff --git a/docs/reference/optimizers.md b/docs/reference/optimizers.md
index c85f73cf6..26e8a0d7c 100644
--- a/docs/reference/optimizers.md
+++ b/docs/reference/optimizers.md
@@ -66,11 +66,10 @@ initial_design_size: 7
 surrogate_model: gp
 acquisition: EI
 log_prior_weighted: false
-acquisition_sampler: random
 random_interleave_prob: 0.1
 disable_priors: false
 prior_confidence: high
-sample_default_first: false
+sample_prior_first: false
 ```
 
 ```python
diff --git a/docs/reference/pipeline_space.md b/docs/reference/pipeline_space.md
index 187413441..c1137e084 100644
--- a/docs/reference/pipeline_space.md
+++ b/docs/reference/pipeline_space.md
@@ -12,10 +12,10 @@ effectively incorporate various parameter types, ensuring that NePS can utilize
 ## Parameters
 NePS currently features 4 primary hyperparameter types:
 
-* [`CategoricalParameter`][neps.search_spaces.hyperparameters.categorical.CategoricalParameter]
-* [`FloatParameter`][neps.search_spaces.hyperparameters.float.FloatParameter]
-* [`IntegerParameter`][neps.search_spaces.hyperparameters.integer.IntegerParameter]
-* [`ConstantParameter`][neps.search_spaces.hyperparameters.constant.ConstantParameter]
+* [`Categorical`][neps.search_spaces.hyperparameters.categorical.Categorical]
+* [`Float`][neps.search_spaces.hyperparameters.float.Float]
+* [`Integer`][neps.search_spaces.hyperparameters.integer.Integer]
+* [`Constant`][neps.search_spaces.hyperparameters.constant.Constant]
 
 Using these types, you can define the parameters that NePS will optimize during the search process.
 The most basic way to pass these parameters is through a Python dictionary, where each key-value
@@ -25,32 +25,32 @@ for optimizing a deep learning model:
 
 ```python
 pipeline_space = {
-    "learning_rate": neps.FloatParameter(0.00001, 0.1, log=True),
-    "num_epochs": neps.IntegerParameter(3, 30, is_fidelity=True),
-    "optimizer": neps.CategoricalParameter(["adam", "sgd", "rmsprop"]),
-    "dropout_rate": neps.ConstantParameter(0.5),
+    "learning_rate": neps.Float(0.00001, 0.1, log=True),
+    "num_epochs": neps.Integer(3, 30, is_fidelity=True),
+    "optimizer": neps.Categorical(["adam", "sgd", "rmsprop"]),
+    "dropout_rate": neps.Constant(0.5),
 }
 
-neps.run(.., pipeline_space=pipeline_space)
+neps.run(.., pipeline_space = pipeline_space)
 ```
 
 ??? example "Quick Parameter Reference"
 
-    === "`CategoricalParameter`"
+    === "`Categorical`"
 
-        ::: neps.search_spaces.hyperparameters.categorical.CategoricalParameter
+        ::: neps.search_spaces.hyperparameters.categorical.Categorical
 
-    === "`FloatParameter`"
+    === "`Float`"
 
-        ::: neps.search_spaces.hyperparameters.float.FloatParameter
+        ::: neps.search_spaces.hyperparameters.float.Float
 
-    === "`IntegerParameter`"
+    === "`Integer`"
 
-        ::: neps.search_spaces.hyperparameters.integer.IntegerParameter
+        ::: neps.search_spaces.hyperparameters.integer.Integer
 
-    === "`ConstantParameter`"
+    === "`Constant`"
 
-        ::: neps.search_spaces.hyperparameters.constant.ConstantParameter
+        ::: neps.search_spaces.hyperparameters.constant.Constant
 
 
 ## Using your knowledge, providing a Prior
@@ -70,10 +70,10 @@ import neps
 neps.run(
     ...,
     pipeline_space={
-        "learning_rate": neps.FloatParameter(1e-4, 1e-1, log=True, default=1e-2, default_confidence="medium"),
-        "num_epochs": neps.IntegerParameter(3, 30, is_fidelity=True),
-        "optimizer": neps.CategoricalParameter(["adam", "sgd", "rmsprop"], default="adam", default_confidence="low"),
-        "dropout_rate": neps.ConstantParameter(0.5),
+        "learning_rate": neps.Float(1e-4, 1e-1, log=True, default=1e-2, default_confidence="medium"),
+        "num_epochs": neps.Integer(3, 30, is_fidelity=True),
+        "optimizer": neps.Categorical(["adam", "sgd", "rmsprop"], default="adam", default_confidence="low"),
+        "dropout_rate": neps.Constant(0.5),
     }
 )
 ```
diff --git a/mkdocs.yml b/mkdocs.yml
index 9b6a4f6b0..464d6fc1b 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -137,6 +137,7 @@ nav:
     - Optimizers: 'reference/optimizers.md'
     - Declarative Usage: 'reference/declarative_usage.md'
     - The Run Function: 'reference/run_pipeline.md'
+    - CLI Usage: 'reference/cli.md'
     - Analysing Runs: 'reference/analyse.md'
   - Examples: "examples/"  # auto-generated
   - API: 'api/' # auto-generated
diff --git a/neps/__init__.py b/neps/__init__.py
index caca68e27..52ee91f73 100644
--- a/neps/__init__.py
+++ b/neps/__init__.py
@@ -1,41 +1,42 @@
 from neps.api import run
 from neps.plot.plot import plot
+from neps.plot.tensorboard_eval import tblogger
 from neps.search_spaces import (
+    Architecture,
     ArchitectureParameter,
+    Categorical,
     CategoricalParameter,
+    Constant,
     ConstantParameter,
+    Float,
     FloatParameter,
+    Function,
     FunctionParameter,
     GraphGrammar,
-    GraphGrammarCell,
-    GraphGrammarRepetitive,
+    Integer,
     IntegerParameter,
 )
 from neps.status.status import get_summary_dict, status
 
-Integer = IntegerParameter
-Float = FloatParameter
-Categorical = CategoricalParameter
-Constant = ConstantParameter
-Architecture = ArchitectureParameter
-
 __all__ = [
     "Architecture",
-    "Integer",
-    "Float",
-    "Categorical",
-    "Constant",
     "ArchitectureParameter",
+    "Categorical",
     "CategoricalParameter",
+    "Constant",
     "ConstantParameter",
+    "Float",
     "FloatParameter",
-    "IntegerParameter",
+    "Function",
     "FunctionParameter",
-    "run",
-    "plot",
-    "get_summary_dict",
-    "status",
     "GraphGrammar",
     "GraphGrammarCell",
     "GraphGrammarRepetitive",
+    "Integer",
+    "IntegerParameter",
+    "get_summary_dict",
+    "plot",
+    "run",
+    "status",
+    "tblogger",
 ]
diff --git a/neps/api.py b/neps/api.py
index 196e371d8..ac80e4a34 100644
--- a/neps/api.py
+++ b/neps/api.py
@@ -1,7 +1,5 @@
 """API for the neps package."""
 
-from __future__ import annotations
-
 import inspect
 import logging
 import warnings
@@ -28,14 +26,10 @@
 
 
 def run(
-    run_pipeline: Callable | None = Default(None),
+    evaluate_pipeline: Callable | None = Default(None),
     root_directory: str | Path | None = Default(None),
     pipeline_space: (
-        dict[str, Parameter | CS.ConfigurationSpace]
-        | str
-        | Path
-        | CS.ConfigurationSpace
-        | None
+        dict[str, Parameter] | str | Path | CS.ConfigurationSpace | None
     ) = Default(None),
     run_args: str | Path | None = Default(None),
     overwrite_working_directory: bool = Default(False),
@@ -47,9 +41,10 @@ def run(
     continue_until_max_evaluation_completed: bool = Default(False),
     max_cost_total: int | float | None = Default(None),
     ignore_errors: bool = Default(False),
-    loss_value_on_error: None | float = Default(None),
+    objective_to_minimize_value_on_error: None | float = Default(None),
     cost_value_on_error: None | float = Default(None),
     pre_load_hooks: Iterable | None = Default(None),
+    sample_batch_size: int | None = Default(None),
     searcher: (
         Literal[
             "default",
@@ -59,7 +54,6 @@ def run(
             "priorband",
             "mobster",
             "asha",
-            "regularized_evolution",
         ]
         | BaseOptimizer
         | Path
@@ -75,14 +69,14 @@ def run(
         the multiple calls to run(.) will be independent.
 
     Args:
-        run_pipeline: The objective function to minimize.
+        evaluate_pipeline: The objective function to minimize.
         pipeline_space: The search space to minimize over.
         root_directory: The directory to save progress to. This is also used to
             synchronize multiple calls to run(.) for parallelization.
         run_args: An option for providing the optimization settings e.g.
-            max_evaluation_total in a YAML file.
+            max_evaluations_total in a YAML file.
         overwrite_working_directory: If true, delete the working directory at the start of
-            the run. This is, e.g., useful when debugging a run_pipeline function.
+            the run. This is, e.g., useful when debugging a evaluate_pipeline function.
         post_run_summary: If True, creates a csv file after each worker is done,
             holding summary information about the configs and results.
         development_stage_id: ID for the current development stage. Only needed if
@@ -96,15 +90,17 @@ def run(
             max_evaluations_total have been completed. This is only relevant in the
             parallel setting.
         max_cost_total: No new evaluations will start when this cost is exceeded. Requires
-            returning a cost in the run_pipeline function, e.g.,
+            returning a cost in the evaluate_pipeline function, e.g.,
             `return dict(loss=loss, cost=cost)`.
         ignore_errors: Ignore hyperparameter settings that threw an error and do not raise
             an error. Error configs still count towards max_evaluations_total.
-        loss_value_on_error: Setting this and cost_value_on_error to any float will
-            supress any error and will use given loss value instead. default: None
-        cost_value_on_error: Setting this and loss_value_on_error to any float will
+        objective_to_minimize_value_on_error: Setting this and cost_value_on_error to any float will
+            supress any error and will use given objective_to_minimize value instead. default: None
+        cost_value_on_error: Setting this and objective_to_minimize_value_on_error to any float will
             supress any error and will use given cost value instead. default: None
         pre_load_hooks: List of functions that will be called before load_results().
+        sample_batch_size: The number of samples to ask for in a single call to the
+            optimizer.
         searcher: Which optimizer to use. Can be a string identifier, an
             instance of BaseOptimizer, or a Path to a custom optimizer.
         **searcher_kwargs: Will be passed to the searcher. This is usually only needed by
@@ -118,15 +114,15 @@ def run(
     Example:
         >>> import neps
 
-        >>> def run_pipeline(some_parameter: float):
+        >>> def evaluate_pipeline(some_parameter: float):
         >>>    validation_error = -some_parameter
         >>>    return validation_error
 
-        >>> pipeline_space = dict(some_parameter=neps.FloatParameter(lower=0, upper=1))
+        >>> pipeline_space = dict(some_parameter=neps.Float(lower=0, upper=1))
 
         >>> logging.basicConfig(level=logging.INFO)
         >>> neps.run(
-        >>>    run_pipeline=run_pipeline,
+        >>>    evaluate_pipeline=evaluate_pipeline,
         >>>    pipeline_space=pipeline_space,
         >>>    root_directory="usage_example",
         >>>    max_evaluations_total=5,
@@ -200,7 +196,7 @@ def run(
             pipeline_space=settings.pipeline_space,
             max_cost_total=settings.max_cost_total,
             ignore_errors=settings.ignore_errors,
-            loss_value_on_error=settings.loss_value_on_error,
+            objective_to_minimize_value_on_error=settings.objective_to_minimize_value_on_error,
             cost_value_on_error=settings.cost_value_on_error,
             searcher=settings.searcher,
             **settings.searcher_kwargs,
@@ -230,7 +226,7 @@ def run(
         )
 
     _launch_runtime(
-        evaluation_fn=settings.run_pipeline,
+        evaluation_fn=settings.evaluate_pipeline,
         optimizer=searcher_instance,
         optimizer_info=searcher_info,
         max_cost_total=settings.max_cost_total,
@@ -238,16 +234,29 @@ def run(
         max_evaluations_total=settings.max_evaluations_total,
         max_evaluations_for_worker=settings.max_evaluations_per_run,
         continue_until_max_evaluation_completed=settings.continue_until_max_evaluation_completed,
-        loss_value_on_error=settings.loss_value_on_error,
+        objective_to_minimize_value_on_error=settings.objective_to_minimize_value_on_error,
         cost_value_on_error=settings.cost_value_on_error,
         ignore_errors=settings.ignore_errors,
         overwrite_optimization_dir=settings.overwrite_working_directory,
         pre_load_hooks=settings.pre_load_hooks,
+        sample_batch_size=settings.sample_batch_size,
     )
 
     if settings.post_run_summary:
         assert settings.root_directory is not None
-        post_run_csv(settings.root_directory)
+        config_data_path, run_data_path = post_run_csv(settings.root_directory)
+        logger.info(
+            "The post run summary has been created, which is a csv file with the "
+            "output of all data in the run."
+            f"\nYou can find a csv of all the configuratins at: {config_data_path}."
+            f"\nYou can find a csv of results at: {run_data_path}."
+        )
+    else:
+        logger.info(
+            "Skipping the creation of the post run summary, which is a csv file with the "
+            " output of all data in the run."
+            "\nSet `post_run_summary=True` to enable it."
+        )
 
 
 def _run_args(
@@ -261,7 +270,7 @@ def _run_args(
     ) = None,
     max_cost_total: int | float | None = None,
     ignore_errors: bool = False,
-    loss_value_on_error: None | float = None,
+    objective_to_minimize_value_on_error: None | float = None,
     cost_value_on_error: None | float = None,
     searcher: (
         Literal[
@@ -272,9 +281,9 @@ def _run_args(
             "priorband",
             "mobster",
             "asha",
-            "regularized_evolution",
         ]
         | BaseOptimizer
+        | dict
     ) = "default",
     **searcher_kwargs,
 ) -> tuple[BaseOptimizer, dict]:
@@ -337,11 +346,11 @@ def _run_args(
         if searcher in ["default", None]:
             # NePS decides the searcher according to the pipeline space.
             if pipeline_space.has_prior:
-                searcher = "priorband" if pipeline_space.has_fidelity else "pibo"
+                searcher = "priorband" if len(pipeline_space.fidelities) > 0 else "pibo"
             else:
                 searcher = (
                     "hyperband"
-                    if pipeline_space.has_fidelity
+                    if len(pipeline_space.fidelities) > 0
                     else "bayesian_optimization"
                 )
             searcher_info["searcher_selection"] = "neps-default"
@@ -402,7 +411,7 @@ def _run_args(
 
     searcher_config.update(
         {
-            "loss_value_on_error": loss_value_on_error,
+            "objective_to_minimize_value_on_error": objective_to_minimize_value_on_error,
             "cost_value_on_error": cost_value_on_error,
             "ignore_errors": ignore_errors,
         }
@@ -412,7 +421,7 @@ def _run_args(
         SearcherMapping, searcher_alg, "searcher", as_class=True
     )(
         pipeline_space=pipeline_space,
-        budget=max_cost_total,  # TODO: use max_cost_total everywhere
+        max_cost_total=max_cost_total,  # TODO: use max_cost_total everywhere
         **searcher_config,
     )
 
diff --git a/neps/env.py b/neps/env.py
index 7cb8eadad..07b6c86c1 100644
--- a/neps/env.py
+++ b/neps/env.py
@@ -3,17 +3,23 @@
 from __future__ import annotations
 
 import os
-from typing import Callable, TypeVar
+from collections.abc import Callable
+from typing import Any, Literal, TypeVar
 
 T = TypeVar("T")
 V = TypeVar("V")
 
+ENV_VARS_USED: dict[str, tuple[Any, Any]] = {}
+
 
 def get_env(key: str, parse: Callable[[str], T], default: V) -> T | V:
     """Get an environment variable or return a default value."""
     if (e := os.environ.get(key)) is not None:
-        return parse(e)
+        value = parse(e)
+        ENV_VARS_USED[key] = (e, value)
+        return value
 
+    ENV_VARS_USED[key] = (default, default)
     return default
 
 
@@ -22,6 +28,38 @@ def is_nullable(e: str) -> bool:
     return e.lower() in ("none", "n", "null")
 
 
+def yaml_or_json(e: str) -> Literal["yaml", "json"]:
+    """Check if an environment variable is either yaml or json."""
+    if e.lower() in ("yaml", "json"):
+        return e.lower()  # type: ignore
+    raise ValueError(f"Expected 'yaml' or 'json', got '{e}'.")
+
+
+LINUX_FILELOCK_FUNCTION = get_env(
+    "NEPS_LINUX_FILELOCK_FUNCTION",
+    parse=str,
+    default="lockf",
+)
+MAX_RETRIES_GET_NEXT_TRIAL = get_env(
+    "NEPS_MAX_RETRIES_GET_NEXT_TRIAL",
+    parse=int,
+    default=10,
+)
+MAX_RETRIES_SET_EVALUATING = get_env(
+    "NEPS_MAX_RETRIES_SET_EVALUATING",
+    parse=int,
+    default=10,
+)
+MAX_RETRIES_CREATE_LOAD_STATE = get_env(
+    "NEPS_MAX_RETRIES_CREATE_LOAD_STATE",
+    parse=int,
+    default=10,
+)
+MAX_RETRIES_WORKER_CHECK_SHOULD_STOP = get_env(
+    "NEPS_MAX_RETRIES_WORKER_CHECK_SHOULD_STOP",
+    parse=int,
+    default=3,
+)
 TRIAL_FILELOCK_POLL = get_env(
     "NEPS_TRIAL_FILELOCK_POLL",
     parse=float,
@@ -30,53 +68,33 @@ def is_nullable(e: str) -> bool:
 TRIAL_FILELOCK_TIMEOUT = get_env(
     "NEPS_TRIAL_FILELOCK_TIMEOUT",
     parse=lambda e: None if is_nullable(e) else float(e),
-    default=None,
-)
-
-JOBQUEUE_FILELOCK_POLL = get_env(
-    "NEPS_JOBQUEUE_FILELOCK_POLL",
-    parse=float,
-    default=0.05,
-)
-JOBQUEUE_FILELOCK_TIMEOUT = get_env(
-    "NEPS_JOBQUEUE_FILELOCK_TIMEOUT",
-    parse=lambda e: None if is_nullable(e) else float(e),
-    default=None,
+    default=120,
 )
-
-SEED_SNAPSHOT_FILELOCK_POLL = get_env(
-    "NEPS_SEED_SNAPSHOT_FILELOCK_POLL",
+FS_SYNC_GRACE_BASE = get_env(
+    "NEPS_FS_SYNC_GRACE_BASE",
     parse=float,
-    default=0.05,
+    default=0.00,  # Keep it low initially to not punish synced os
 )
-SEED_SNAPSHOT_FILELOCK_TIMEOUT = get_env(
-    "NEPS_SEED_SNAPSHOT_FILELOCK_TIMEOUT",
-    parse=lambda e: None if is_nullable(e) else float(e),
-    default=None,
-)
-
-OPTIMIZER_INFO_FILELOCK_POLL = get_env(
-    "NEPS_OPTIMIZER_INFO_FILELOCK_POLL",
+FS_SYNC_GRACE_INC = get_env(
+    "NEPS_FS_SYNC_GRACE_INC",
     parse=float,
-    default=0.05,
-)
-OPTIMIZER_INFO_FILELOCK_TIMEOUT = get_env(
-    "NEPS_OPTIMIZER_INFO_FILELOCK_TIMEOUT",
-    parse=lambda e: None if is_nullable(e) else float(e),
-    default=None,
+    default=0.1,
 )
 
-OPTIMIZER_STATE_FILELOCK_POLL = get_env(
-    "NEPS_OPTIMIZER_STATE_FILELOCK_POLL",
+# NOTE: We want this to be greater than the trials filelock, so that
+# anything requesting to just update the trials is more likely to obtain it
+# as those operations tend to be faster than something that requires optimizer
+# state.
+STATE_FILELOCK_POLL = get_env(
+    "NEPS_STATE_FILELOCK_POLL",
     parse=float,
-    default=0.05,
+    default=0.20,
 )
-OPTIMIZER_STATE_FILELOCK_TIMEOUT = get_env(
-    "NEPS_OPTIMIZER_STATE_FILELOCK_TIMEOUT",
+STATE_FILELOCK_TIMEOUT = get_env(
+    "NEPS_STATE_FILELOCK_TIMEOUT",
     parse=lambda e: None if is_nullable(e) else float(e),
-    default=None,
+    default=120,
 )
-
 GLOBAL_ERR_FILELOCK_POLL = get_env(
     "NEPS_GLOBAL_ERR_FILELOCK_POLL",
     parse=float,
@@ -85,5 +103,15 @@ def is_nullable(e: str) -> bool:
 GLOBAL_ERR_FILELOCK_TIMEOUT = get_env(
     "NEPS_GLOBAL_ERR_FILELOCK_TIMEOUT",
     parse=lambda e: None if is_nullable(e) else float(e),
-    default=None,
+    default=120,
+)
+TRIAL_CACHE_MAX_UPDATES_BEFORE_CONSOLIDATION = get_env(
+    "NEPS_TRIAL_CACHE_MAX_UPDATES_BEFORE_CONSOLIDATION",
+    parse=int,
+    default=10,
+)
+CONFIG_SERIALIZE_FORMAT: Literal["yaml", "json"] = get_env(  # type: ignore
+    "NEPS_CONFIG_SERIALIZE_FORMAT",
+    parse=yaml_or_json,
+    default="yaml",
 )
diff --git a/neps/exceptions.py b/neps/exceptions.py
index 597dfb1f0..c0a3e26a9 100644
--- a/neps/exceptions.py
+++ b/neps/exceptions.py
@@ -2,6 +2,8 @@
 
 from __future__ import annotations
 
+from typing import Any
+
 
 class NePSError(Exception):
     """Base class for all NePS exceptions.
@@ -11,37 +13,32 @@ class NePSError(Exception):
     """
 
 
-class VersionMismatchError(NePSError):
-    """Raised when the version of a resource does not match the expected version."""
-
-
-class VersionedResourceAlreadyExistsError(NePSError):
-    """Raised when a version already exists when trying to create a new versioned
-    data.
-    """
-
-
-class VersionedResourceRemovedError(NePSError):
-    """Raised when a version already exists when trying to create a new versioned
-    data.
-    """
-
-
-class VersionedResourceDoesNotExistsError(NePSError):
-    """Raised when a versioned resource does not exist at a location."""
-
-
 class LockFailedError(NePSError):
     """Raised when a lock cannot be acquired."""
 
 
-class TrialAlreadyExistsError(VersionedResourceAlreadyExistsError):
+class TrialAlreadyExistsError(NePSError):
     """Raised when a trial already exists in the store."""
 
+    def __init__(self, trial_id: str, *args: Any) -> None:
+        """Initialize the exception with the trial id."""
+        super().__init__(trial_id, *args)
+        self.trial_id = trial_id
+
+    def __str__(self) -> str:
+        return f"Trial with id {self.trial_id} already exists!"
+
 
-class TrialNotFoundError(VersionedResourceDoesNotExistsError):
+class TrialNotFoundError(NePSError):
     """Raised when a trial already exists in the store."""
 
 
 class WorkerFailedToGetPendingTrialsError(NePSError):
     """Raised when a worker failed to get pending trials."""
+
+
+class WorkerRaiseError(NePSError):
+    """Raised from a worker when an error is raised.
+
+    Includes additional information on how to recover
+    """
diff --git a/neps/optimizers/__init__.py b/neps/optimizers/__init__.py
index 31cb4c4a5..b3ea4f3a0 100644
--- a/neps/optimizers/__init__.py
+++ b/neps/optimizers/__init__.py
@@ -1,49 +1,42 @@
-from __future__ import annotations
-
+from collections.abc import Callable, Mapping
 from functools import partial
-from typing import Callable, Mapping
 
-from .base_optimizer import BaseOptimizer
-from .bayesian_optimization.cost_cooling import CostCooling
-from .bayesian_optimization.mf_tpe import MultiFidelityPriorWeightedTreeParzenEstimator
-from .bayesian_optimization.optimizer import BayesianOptimization
-from .grid_search.optimizer import GridSearch
-from .multi_fidelity.dyhpo import MFEIBO
-from .multi_fidelity.hyperband import (
+from neps.optimizers.base_optimizer import BaseOptimizer
+from neps.optimizers.bayesian_optimization.optimizer import BayesianOptimization
+from neps.optimizers.grid_search.optimizer import GridSearch
+from neps.optimizers.multi_fidelity import (
+    IFBO,
     MOBSTER,
-    AsynchronousHyperband,
-    Hyperband,
-    HyperbandCustomDefault,
-)
-from .multi_fidelity.successive_halving import (
     AsynchronousSuccessiveHalving,
     AsynchronousSuccessiveHalvingWithPriors,
+    Hyperband,
+    HyperbandCustomDefault,
     SuccessiveHalving,
     SuccessiveHalvingWithPriors,
 )
-from .multi_fidelity_prior.async_priorband import PriorBandAsha, PriorBandAshaHB
-from .multi_fidelity_prior.priorband import PriorBand
-from .random_search.optimizer import RandomSearch
-from .regularized_evolution.optimizer import RegularizedEvolution
+from neps.optimizers.multi_fidelity_prior import (
+    PriorBand,
+    PriorBandAsha,
+    PriorBandAshaHB,
+)
+from neps.optimizers.random_search.optimizer import RandomSearch
 
 # TODO: Rename Searcher to Optimizer...
 SearcherMapping: Mapping[str, Callable[..., BaseOptimizer]] = {
-    "bayesian_optimization": BayesianOptimization,
-    "pibo": partial(BayesianOptimization, disable_priors=False),
-    "cost_cooling_bayesian_optimization": CostCooling,
+    "bayesian_optimization": partial(BayesianOptimization, use_priors=False),
+    "pibo": partial(BayesianOptimization, use_priors=True),
     "random_search": RandomSearch,
-    "cost_cooling": CostCooling,
-    "regularized_evolution": RegularizedEvolution,
-    "assisted_regularized_evolution": partial(RegularizedEvolution, assisted=True),
     "grid_search": GridSearch,
     "successive_halving": SuccessiveHalving,
     "successive_halving_prior": SuccessiveHalvingWithPriors,
     "asha": AsynchronousSuccessiveHalving,
     "hyperband": Hyperband,
     "asha_prior": AsynchronousSuccessiveHalvingWithPriors,
-    "multifidelity_tpe": MultiFidelityPriorWeightedTreeParzenEstimator,
     "hyperband_custom_default": HyperbandCustomDefault,
     "priorband": PriorBand,
+    "priorband_bo": partial(PriorBand, model_based=True),
+    "priorband_asha": PriorBandAsha,
+    "priorband_asha_hyperband": PriorBandAshaHB,
     "mobster": MOBSTER,
-    "mf_ei_bo": MFEIBO,
+    "ifbo": IFBO,
 }
diff --git a/neps/optimizers/base_optimizer.py b/neps/optimizers/base_optimizer.py
index 348046269..06adef711 100644
--- a/neps/optimizers/base_optimizer.py
+++ b/neps/optimizers/base_optimizer.py
@@ -2,173 +2,166 @@
 
 import logging
 from abc import abstractmethod
-from typing import Any, Mapping
+from collections.abc import Mapping
+from dataclasses import dataclass
+from typing import TYPE_CHECKING, Any
+
+from neps.state.trial import Report, Trial
+
+if TYPE_CHECKING:
+    from neps.search_spaces.search_space import SearchSpace
+    from neps.state.optimizer import BudgetInfo
+    from neps.utils.types import ERROR, ResultDict
+
+
+def _get_objective_to_minimize(
+    result: ERROR | ResultDict | float,
+    objective_to_minimize_value_on_error: float | None = None,
+    *,
+    ignore_errors: bool = False,
+) -> ERROR | float:
+    if result == "error":
+        if ignore_errors:
+            return "error"
+
+        if objective_to_minimize_value_on_error is not None:
+            return objective_to_minimize_value_on_error
+
+        raise ValueError(
+            "An error happened during the execution of your evaluate_pipeline function."
+            " You have three options: 1. If the error is expected and corresponds to"
+            " an objective_to_minimize value in your application (e.g., 0% accuracy),"
+            " you can set objective_to_minimize_value_on_error to some float. 2. If "
+            " sometimes your pipeline crashes randomly, you can set ignore_errors=True."
+            " 3. Fix your error."
+        )
+
+    if isinstance(result, dict):
+        return float(result["objective_to_minimize"])
+
+    assert isinstance(result, float)
+    return float(result)
+
+
+def _get_cost(
+    result: ERROR | ResultDict | float,
+    cost_value_on_error: float | None = None,
+    *,
+    ignore_errors: bool = False,
+) -> float | Any:
+    if result == "error":
+        if ignore_errors:
+            return "error"
 
-from dataclasses import asdict, dataclass
-from neps.state.optimizer import BudgetInfo
-from neps.utils.types import ConfigResult, RawConfig, ERROR, ResultDict
-from neps.search_spaces.search_space import SearchSpace
-from neps.utils.data_loading import _get_cost, _get_learning_curve, _get_loss
-from neps.state.trial import Trial
+        if cost_value_on_error is None:
+            raise ValueError(
+                "An error happened during the execution of your evaluate_pipeline"
+                " function. You have three options: 1. If the error is expected and"
+                " corresponds to a cost value in your application, you can set"
+                " cost_value_on_error to some float. 2. If sometimes your pipeline"
+                " crashes randomly, you can set ignore_errors=True. 3. Fix your error."
+            )
+
+        return cost_value_on_error
+
+    if isinstance(result, Mapping):
+        return float(result["cost"])
+
+    return float(result)
 
 
 @dataclass
 class SampledConfig:
-    id: Trial.ID
+    id: str
     config: Mapping[str, Any]
-    previous_config_id: Trial.ID | None
+    previous_config_id: str | None = None
 
 
 class BaseOptimizer:
     """Base sampler class. Implements all the low-level work."""
 
+    # TODO: Remove a lot of these init params
+    # Ideally we just make this a `Protocol`, i.e. an interface
+    # and it has no functionality
     def __init__(
         self,
+        *,
         pipeline_space: SearchSpace,
         patience: int = 50,
         logger: logging.Logger | None = None,
-        budget: int | float | None = None,
-        loss_value_on_error: float | None = None,
+        max_cost_total: int | float | None = None,
+        objective_to_minimize_value_on_error: float | None = None,
         cost_value_on_error: float | None = None,
         learning_curve_on_error: float | list[float] | None = None,
-        ignore_errors=False,
+        ignore_errors: bool = False,
     ) -> None:
         if patience < 1:
             raise ValueError("Patience should be at least 1")
 
-        self.used_budget: float = 0.0
-        self.budget = budget
+        self.max_cost_total = max_cost_total
         self.pipeline_space = pipeline_space
         self.patience = patience
         self.logger = logger or logging.getLogger("neps")
-        self.loss_value_on_error = loss_value_on_error
+        self.objective_to_minimize_value_on_error = objective_to_minimize_value_on_error
         self.cost_value_on_error = cost_value_on_error
         self.learning_curve_on_error = learning_curve_on_error
         self.ignore_errors = ignore_errors
 
     @abstractmethod
-    def load_optimization_state(
-        self,
-        previous_results: dict[str, ConfigResult],
-        pending_evaluations: dict[str, SearchSpace],
-        budget_info: BudgetInfo | None,
-        optimizer_state: dict[str, Any],
-    ) -> None:
-        raise NotImplementedError
-
-    @abstractmethod
-    def get_config_and_ids(self) -> tuple[RawConfig, str, str | None]:
-        """Sample a new configuration
-
-        Returns:
-            config: serializable object representing the configuration
-            config_id: unique identifier for the configuration
-            previous_config_id: if provided, id of a previous on which this
-                configuration is based
-        """
-        raise NotImplementedError
-
     def ask(
         self,
         trials: Mapping[str, Trial],
         budget_info: BudgetInfo | None,
-        optimizer_state: dict[str, Any],
-    ) -> tuple[SampledConfig, dict[str, Any]]:
-        """Sample a new configuration
-
-        !!! note
-
-            The plan is this method replaces the two-step procedure of `load_optimization_state`
-            and `get_config_and_ids` in the future, replacing both with a single method `ask`
-            which would be easier for developer of NePS optimizers to implement.
-
-        !!! note
-
-            The `optimizer_state` right now is just a `dict` that optimizers are free to mutate
-            as desired. A `dict` is not ideal as its _stringly_ typed but this was the least
-            invasive way to add this at the moment. It's actually an existing feature no
-            optimizer uses except _cost-cooling_ which basically just took a value from
-            `budget_info`.
-
-            Ideally an optimizer overwriting this can decide what to return instead of having
-            to rely on them mutating it, however this is the best work-around I could come up with
-            for now.
+        n: int | None = None,
+    ) -> SampledConfig | list[SampledConfig]:
+        """Sample a new configuration.
 
         Args:
             trials: All of the trials that are known about.
-            budget_info: information about the budget
-            optimizer_state: extra state the optimizer would like to keep between calls
+            budget_info: information about the budget constraints.
 
         Returns:
-            SampledConfig: a sampled configuration
-            dict: state the optimizer would like to keep between calls
+            The sampled configuration(s)
         """
-        completed: dict[Trial.ID, ConfigResult] = {}
-        pending: dict[Trial.ID, SearchSpace] = {}
-        for trial_id, trial in trials.items():
-            if trial.report is not None:
-                completed[trial_id] = ConfigResult(
-                    id=trial_id,
-                    config=self.pipeline_space.from_dict(trial.config),
-                    result=trial.report,
-                    # TODO: Better if we could just pass around this metadata
-                    # object instead of converting to a dict each time.
-                    metadata=asdict(trial.metadata),
-                )
-            elif trial.state in (
-                Trial.State.PENDING,
-                Trial.State.SUBMITTED,
-                Trial.State.EVALUATING,
-            ):
-                pending[trial_id] = self.pipeline_space.from_dict(trial.config)
-
-        self.load_optimization_state(
-            previous_results=completed,
-            pending_evaluations=pending,
-            budget_info=budget_info,
-            optimizer_state=optimizer_state,
-        )
-        config, config_id, previous_config_id = self.get_config_and_ids()
-        return SampledConfig(
-            id=config_id, config=config, previous_config_id=previous_config_id
-        ), optimizer_state
-
-    def update_state_post_evaluation(
-        self, state: dict[str, Any], report: Trial.Report
-    ) -> dict[str, Any]:
-        # TODO: There's a slot in `OptimizerState` to store extra things
-        # required for the optimizer but is currently not used
-        # state["key"] = "value"
-        return state
-
-    def get_loss(
-        self, result: ERROR | ResultDict | float | Trial.Report
-    ) -> float | ERROR:
-        """Calls result.utils.get_loss() and passes the error handling through.
-        Please use self.get_loss() instead of get_loss() in all optimizer classes."""
+        ...
 
+    def get_objective_to_minimize(
+        self, result: ERROR | ResultDict | float | Report
+    ) -> float | ERROR:
+        """Calls result.utils.get_objective_to_minimize() and passes the error handling
+        through. Please use self.get_objective_to_minimize() instead of
+        get_objective_to_minimize() in all optimizer classes.
+        """
         # TODO(eddiebergman): This is a forward change for whenever we can have optimizers
-        # use `Trial` and `Report`, they already take care of this and save having to do this
-        # `_get_loss` at every call. We can also then just use `None` instead of the string `"error"`
-        if isinstance(result, Trial.Report):
-            return result.loss if result.loss is not None else "error"
-
-        return _get_loss(
+        # use `Trial` and `Report`, they already take care of this and save having to do
+        # this `_get_objective_to_minimize` at every call. We can also then just use
+        # `None` instead of the string `"error"`
+        if isinstance(result, Report):
+            return (
+                result.objective_to_minimize
+                if result.objective_to_minimize is not None
+                else "error"
+            )
+
+        return _get_objective_to_minimize(
             result,
-            loss_value_on_error=self.loss_value_on_error,
+            objective_to_minimize_value_on_error=self.objective_to_minimize_value_on_error,
             ignore_errors=self.ignore_errors,
         )
 
-    def get_cost(
-        self, result: ERROR | ResultDict | float | Trial.Report
-    ) -> float | ERROR:
+    def get_cost(self, result: ERROR | ResultDict | float | Report) -> float | ERROR:
         """Calls result.utils.get_cost() and passes the error handling through.
-        Please use self.get_cost() instead of get_cost() in all optimizer classes."""
+        Please use self.get_cost() instead of get_cost() in all optimizer classes.
+        """
         # TODO(eddiebergman): This is a forward change for whenever we can have optimizers
-        # use `Trial` and `Report`, they already take care of this and save having to do this
-        # `_get_loss` at every call
-        if isinstance(result, Trial.Report):
-            return result.loss if result.loss is not None else "error"
+        # use `Trial` and `Report`, they already take care of this and save having to do
+        # this `_get_objective_to_minimize` at every call
+        if isinstance(result, Report):
+            return (
+                result.objective_to_minimize
+                if result.objective_to_minimize is not None
+                else "error"
+            )
 
         return _get_cost(
             result,
@@ -176,22 +169,5 @@ def get_cost(
             ignore_errors=self.ignore_errors,
         )
 
-    def get_learning_curve(
-        self, result: str | dict | float | Trial.Report
-    ) -> list[float] | Any:
-        """Calls result.utils.get_loss() and passes the error handling through.
-        Please use self.get_loss() instead of get_loss() in all optimizer classes."""
-        # TODO(eddiebergman): This is a forward change for whenever we can have optimizers
-        # use `Trial` and `Report`, they already take care of this and save having to do this
-        # `_get_loss` at every call
-        if isinstance(result, Trial.Report):
-            return result.learning_curve
-
-        return _get_learning_curve(
-            result,
-            learning_curve_on_error=self.learning_curve_on_error,
-            ignore_errors=self.ignore_errors,
-        )
-
     def whoami(self) -> str:
         return type(self).__name__
diff --git a/neps/optimizers/bayesian_optimization/acquisition_functions/__init__.py b/neps/optimizers/bayesian_optimization/acquisition_functions/__init__.py
index 89cfb4fbf..31cb5b2b0 100644
--- a/neps/optimizers/bayesian_optimization/acquisition_functions/__init__.py
+++ b/neps/optimizers/bayesian_optimization/acquisition_functions/__init__.py
@@ -1,21 +1,16 @@
-from __future__ import annotations
-
+from collections.abc import Callable
 from functools import partial
-from typing import Callable
 
+from neps.optimizers.bayesian_optimization.acquisition_functions.base_acquisition import (
+    BaseAcquisition,
+)
 from neps.optimizers.bayesian_optimization.acquisition_functions.ei import (
     ComprehensiveExpectedImprovement,
 )
-from neps.optimizers.bayesian_optimization.acquisition_functions.mf_ei import MFEI
 from neps.optimizers.bayesian_optimization.acquisition_functions.ucb import (
     UpperConfidenceBound,
-    MF_UCB,
-)
-from neps.optimizers.bayesian_optimization.acquisition_functions.prior_weighted import (
-    DecayingPriorWeightedAcquisition,
 )
 
-
 AcquisitionMapping: dict[str, Callable] = {
     "EI": partial(
         ComprehensiveExpectedImprovement,
@@ -28,33 +23,23 @@
         augmented_ei=False,
         log_ei=True,
     ),
-    #     # Uses the augmented EI heuristic and changed the in-fill criterion to the best test location with
-    #     # the highest *posterior mean*, which are preferred when the optimisation is noisy.
+    ## Uses the augmented EI heuristic and changed the in-fill criterion to the best test
+    ## location with the highest *posterior mean*, which are preferred when the
+    ## optimisation is noisy.
     "AEI": partial(
         ComprehensiveExpectedImprovement,
         in_fill="posterior",
         augmented_ei=True,
     ),
-    "MFEI": partial(
-        MFEI,
-        in_fill="best",
-        augmented_ei=False,
-    ),
     "UCB": partial(
         UpperConfidenceBound,
         maximize=False,
     ),
-    "MF-UCB": partial(
-        MF_UCB,
-        maximize=False,
-    ),
 }
 
 __all__ = [
     "AcquisitionMapping",
+    "BaseAcquisition",
     "ComprehensiveExpectedImprovement",
-    "MFEI",
     "UpperConfidenceBound",
-    "MF_UCB",
-    "DecayingPriorWeightedAcquisition",
 ]
diff --git a/neps/optimizers/bayesian_optimization/acquisition_functions/_ehvi.py b/neps/optimizers/bayesian_optimization/acquisition_functions/_ehvi.py
deleted file mode 100644
index 8722c545c..000000000
--- a/neps/optimizers/bayesian_optimization/acquisition_functions/_ehvi.py
+++ /dev/null
@@ -1,213 +0,0 @@
-# from abc import ABC, abstractmethod
-from itertools import product
-
-import torch
-from torch import Tensor
-from torch.distributions import Normal
-from torch.nn import Module
-
-# class MultiObjectiveBaseAcqusition(ABC):
-#     def __init__(self, surrogate_models: dict):
-#         self.surrogate_models = surrogate_models
-#
-#     def propose_location(self, *args):
-#         """Propose new locations for subsequent sampling
-#         This method should be overriden by respective acquisition function implementations."""
-#         raise NotImplementedError
-#
-#     def optimize(self):
-#         """This is the method that user should call for the Bayesian optimisation main loop."""
-#         raise NotImplementedError
-#
-#     @abstractmethod
-#     def eval(self, x, asscalar: bool = False):
-#         """Evaluate the acquisition function at point x2. This should be overridden by respective acquisition
-#         function implementations"""
-#         raise NotImplementedError
-#
-#     def __call__(self, *args, **kwargs):
-#         return self.eval(*args, **kwargs)
-#
-#     def reset_surrogate_model(self, surrogate_models: dict):
-#         for objective, surrogate_model in surrogate_models.items():
-#             self.surrogate_models[objective] = surrogate_model
-#
-
-
-class ExpectedHypervolumeImprovement(Module):  # , MultiObjectiveBaseAcqusition):
-    def __init__(
-        self,
-        model,
-        ref_point,
-        partitioning,
-    ) -> None:
-        r"""Expected Hypervolume Improvement supporting m>=2 outcomes.
-
-        Implementation from BOtorch, adapted from
-        https://github.com/pytorch/botorch/blob/353f37649fa8d90d881e8ea20c11986b15723ef1/botorch/acquisition/multi_objective/analytic.py#L78
-
-        This implements the computes EHVI using the algorithm from [Yang2019]_, but
-        additionally computes gradients via auto-differentiation as proposed by
-        [Daulton2020qehvi]_.
-
-        Note: this is currently inefficient in two ways due to the binary partitioning
-        algorithm that we use for the box decomposition:
-
-            - We have more boxes in our decomposition
-            - If we used a box decomposition that used `inf` as the upper bound for
-                the last dimension *in all hypercells*, then we could reduce the number
-                of terms we need to compute from 2^m to 2^(m-1). [Yang2019]_ do this
-                by using DKLV17 and LKF17 for the box decomposition.
-
-        TODO: Use DKLV17 and LKF17 for the box decomposition as in [Yang2019]_ for
-        greater efficiency.
-
-        TODO: Add support for outcome constraints.
-
-        Example:
-            >>> model = SingleTaskGP(train_X, train_Y)
-            >>> ref_point = [0.0, 0.0]
-            >>> EHVI = ExpectedHypervolumeImprovement(model, ref_point, partitioning)
-            >>> ehvi = EHVI(test_X)
-
-        Args:
-            model: A fitted model.
-            ref_point: A list with `m` elements representing the reference point (in the
-                outcome space) w.r.t. to which compute the hypervolume. This is a
-                reference point for the objective values (i.e. after applying
-                `objective` to the samples).
-            partitioning: A `NondominatedPartitioning` module that provides the non-
-                dominated front and a partitioning of the non-dominated space in hyper-
-                rectangles.
-            objective: An `AnalyticMultiOutputObjective`.
-        """
-        # TODO: we could refactor this __init__ logic into a
-        # HypervolumeAcquisitionFunction Mixin
-        if len(ref_point) != partitioning.num_outcomes:
-            raise ValueError(
-                "The length of the reference point must match the number of outcomes. "
-                f"Got ref_point with {len(ref_point)} elements, but expected "
-                f"{partitioning.num_outcomes}."
-            )
-        ref_point = torch.tensor(
-            ref_point,
-            dtype=partitioning.pareto_Y.dtype,
-            device=partitioning.pareto_Y.device,
-        )
-        better_than_ref = (partitioning.pareto_Y > ref_point).all(dim=1)
-        if not better_than_ref.any() and partitioning.pareto_Y.shape[0] > 0:
-            raise ValueError(
-                "At least one pareto point must be better than the reference point."
-            )
-        super().__init__()
-        self.model = model
-        self.register_buffer("ref_point", ref_point)
-        self.partitioning = partitioning
-        cell_bounds = self.partitioning.get_hypercell_bounds()
-        self.register_buffer("cell_lower_bounds", cell_bounds[0])
-        self.register_buffer("cell_upper_bounds", cell_bounds[1])
-        # create indexing tensor of shape `2^m x m`
-        self._cross_product_indices = torch.tensor(
-            list(product(*[[0, 1] for _ in range(ref_point.shape[0])])),
-            dtype=torch.long,
-            device=ref_point.device,
-        )
-        self.normal = Normal(0, 1)
-
-    def psi(self, lower: Tensor, upper: Tensor, mu: Tensor, sigma: Tensor) -> None:
-        r"""Compute Psi function.
-
-        For each cell i and outcome k:
-
-            Psi(lower_{i,k}, upper_{i,k}, mu_k, sigma_k) = (
-            sigma_k * PDF((upper_{i,k} - mu_k) / sigma_k) + (
-            mu_k - lower_{i,k}
-            ) * (1 - CDF(upper_{i,k} - mu_k) / sigma_k)
-            )
-
-        See Equation 19 in [Yang2019]_ for more details.
-
-        Args:
-            lower: A `num_cells x m`-dim tensor of lower cell bounds
-            upper: A `num_cells x m`-dim tensor of upper cell bounds
-            mu: A `batch_shape x 1 x m`-dim tensor of means
-            sigma: A `batch_shape x 1 x m`-dim tensor of standard deviations (clamped).
-
-        Returns:
-            A `batch_shape x num_cells x m`-dim tensor of values.
-        """
-        u = (upper - mu) / sigma
-        return sigma * self.normal.log_prob(u).exp() + (mu - lower) * (
-            1 - self.normal.cdf(u)
-        )
-
-    def nu(self, lower: Tensor, upper: Tensor, mu: Tensor, sigma: Tensor) -> None:
-        r"""Compute Nu function.
-
-        For each cell i and outcome k:
-
-            nu(lower_{i,k}, upper_{i,k}, mu_k, sigma_k) = (
-            upper_{i,k} - lower_{i,k}
-            ) * (1 - CDF((upper_{i,k} - mu_k) / sigma_k))
-
-        See Equation 25 in [Yang2019]_ for more details.
-
-        Args:
-            lower: A `num_cells x m`-dim tensor of lower cell bounds
-            upper: A `num_cells x m`-dim tensor of upper cell bounds
-            mu: A `batch_shape x 1 x m`-dim tensor of means
-            sigma: A `batch_shape x 1 x m`-dim tensor of standard deviations (clamped).
-
-        Returns:
-            A `batch_shape x num_cells x m`-dim tensor of values.
-        """
-        return (upper - lower) * (1 - self.normal.cdf((upper - mu) / sigma))
-
-    def forward(self, X: Tensor) -> Tensor:
-        posterior = [[_m.predict(_x) for _m in self.model] for _x in X]
-        mu = torch.tensor([[_m[0].item() for _m in _p] for _p in posterior])[:, None, :]
-        sigma = torch.tensor([[_s[1].item() for _s in _p] for _p in posterior])[
-            :, None, :
-        ]
-
-        # clamp here, since upper_bounds will contain `inf`s, which
-        # are not differentiable
-        cell_upper_bounds = self.cell_upper_bounds.clamp_max(1e8)
-        # Compute psi(lower_i, upper_i, mu_i, sigma_i) for i=0, ... m-2
-        psi_lu = self.psi(
-            lower=self.cell_lower_bounds, upper=cell_upper_bounds, mu=mu, sigma=sigma
-        )
-        # Compute psi(lower_m, lower_m, mu_m, sigma_m)
-        psi_ll = self.psi(
-            lower=self.cell_lower_bounds,
-            upper=self.cell_lower_bounds,
-            mu=mu,
-            sigma=sigma,
-        )
-        # Compute nu(lower_m, upper_m, mu_m, sigma_m)
-        nu = self.nu(
-            lower=self.cell_lower_bounds, upper=cell_upper_bounds, mu=mu, sigma=sigma
-        )
-        # compute the difference psi_ll - psi_lu
-        psi_diff = psi_ll - psi_lu
-
-        # this is batch_shape x num_cells x 2 x (m-1)
-        stacked_factors = torch.stack([psi_diff, nu], dim=-2)
-
-        # Take the cross product of psi_diff and nu across all outcomes
-        # e.g. for m = 2
-        # for each batch and cell, compute
-        # [psi_diff_0, psi_diff_1]
-        # [nu_0, psi_diff_1]
-        # [psi_diff_0, nu_1]
-        # [nu_0, nu_1]
-        # this tensor has shape: `batch_shape x num_cells x 2^m x m`
-        all_factors_up_to_last = stacked_factors.gather(
-            dim=-2,
-            index=self._cross_product_indices.expand(
-                stacked_factors.shape[:-2] + self._cross_product_indices.shape
-            ),
-        )
-        # compute product for all 2^m terms,
-        # sum across all terms and hypercells
-        return all_factors_up_to_last.prod(dim=-1).sum(dim=-1).sum(dim=-1)
diff --git a/neps/optimizers/bayesian_optimization/acquisition_functions/base_acquisition.py b/neps/optimizers/bayesian_optimization/acquisition_functions/base_acquisition.py
index 7249c0fd2..17a1a974f 100644
--- a/neps/optimizers/bayesian_optimization/acquisition_functions/base_acquisition.py
+++ b/neps/optimizers/bayesian_optimization/acquisition_functions/base_acquisition.py
@@ -1,17 +1,30 @@
+from __future__ import annotations
+
 from abc import ABC, abstractmethod
+from collections.abc import Iterable
+from typing import TYPE_CHECKING, Any
+
+if TYPE_CHECKING:
+    import numpy as np
+    import torch
 
 
 class BaseAcquisition(ABC):
     def __init__(self):
-        self.surrogate_model = None
+        self.surrogate_model: Any | None = None
 
     @abstractmethod
-    def eval(self, x, asscalar: bool = False):
+    def eval(
+        self,
+        x: Iterable,
+        *,
+        asscalar: bool = False,
+    ) -> np.ndarray | torch.Tensor | float:
         """Evaluate the acquisition function at point x2."""
         raise NotImplementedError
 
-    def __call__(self, *args, **kwargs):
+    def __call__(self, *args: Any, **kwargs: Any) -> np.ndarray | torch.Tensor | float:
         return self.eval(*args, **kwargs)
 
-    def set_state(self, surrogate_model, **kwargs):
+    def set_state(self, surrogate_model: Any, **kwargs: Any) -> None:
         self.surrogate_model = surrogate_model
diff --git a/neps/optimizers/bayesian_optimization/acquisition_functions/cost_cooling.py b/neps/optimizers/bayesian_optimization/acquisition_functions/cost_cooling.py
index a45cd0510..46fe1309e 100644
--- a/neps/optimizers/bayesian_optimization/acquisition_functions/cost_cooling.py
+++ b/neps/optimizers/bayesian_optimization/acquisition_functions/cost_cooling.py
@@ -1,46 +1,54 @@
-from typing import Iterable, Union
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
 
-import numpy as np
 import torch
+from botorch.acquisition.logei import partial
+
+from neps.optimizers.bayesian_optimization.acquisition_functions.weighted_acquisition import (  # noqa: E501
+    WeightedAcquisition,
+)
+
+if TYPE_CHECKING:
+    from botorch.acquisition import AcquisitionFunction
+    from botorch.acquisition.analytic import GPyTorchModel
+    from torch import Tensor
+
+
+def apply_cost_cooling(
+    acq_values: Tensor,
+    X: Tensor,
+    acq: AcquisitionFunction,
+    cost_model: GPyTorchModel,
+    alpha: float,
+) -> Tensor:
+    # NOTE: We expect **positive** costs from model
+    cost = cost_model.posterior(X).mean
+    cost = cost.squeeze(dim=-1) if cost_model.num_outputs == 1 else cost.sum(dim=-1)
+
+    if acq._log:
+        # Take log of both sides, acq is already log scaled
+        # -- x = acq / cost^alpha
+        # -- log(x) = log(acq) - alpha * log(cost)
+        w = alpha * cost.log()
+        return acq_values - w
+
+    # https://github.com/pytorch/botorch/discussions/2194
+    w = cost.pow(alpha)
+    return torch.where(acq_values > 0, acq_values / w, acq_values * w)
+
 
-from .base_acquisition import BaseAcquisition
-from .ei import ComprehensiveExpectedImprovement
-
-
-class CostCooler(BaseAcquisition):
-    def __init__(
-        self,
-        base_acquisition: BaseAcquisition = ComprehensiveExpectedImprovement,
-    ):
-        self.base_acquisition = base_acquisition
-        self.cost_model = None
-        self.alpha = None
-
-    def eval(
-        self,
-        x: Iterable,
-        **base_acquisition_kwargs,
-    ) -> Union[np.ndarray, torch.Tensor, float]:
-        base_acquisition_value = self.base_acquisition.eval(
-            x=x, **base_acquisition_kwargs
-        )
-        costs, _ = self.cost_model.predict(x)
-        # if costs < 0.001:
-        #     costs = 1
-        if torch.is_tensor(costs):
-            cost_cooled = torch.zeros_like(costs)
-            index = 0
-            for _, y in enumerate(costs.detach().numpy()):
-                if y < 0.0001:
-                    cost_cooled[index] = base_acquisition_value[index]
-                else:
-                    cost_cooled[index] = base_acquisition_value[index] / (y**self.alpha)
-                index += 1
-        # return base_acquisition_value # / (costs**self.alpha).detach().numpy()
-        return cost_cooled
-
-    def set_state(self, surrogate_model, alpha, cost_model, **kwargs):
-        super().set_state(surrogate_model=surrogate_model)
-        self.base_acquisition.set_state(surrogate_model=surrogate_model, **kwargs)
-        self.alpha = alpha
-        self.cost_model = cost_model
+def cost_cooled_acq(
+    acq_fn: AcquisitionFunction,
+    model: GPyTorchModel,
+    used_max_cost_total_percentage: float,
+) -> WeightedAcquisition:
+    assert 0 <= used_max_cost_total_percentage <= 1
+    return WeightedAcquisition(
+        acq=acq_fn,
+        apply_weight=partial(
+            apply_cost_cooling,
+            cost_model=model,
+            alpha=1 - used_max_cost_total_percentage,
+        ),
+    )
diff --git a/neps/optimizers/bayesian_optimization/acquisition_functions/ei.py b/neps/optimizers/bayesian_optimization/acquisition_functions/ei.py
index ba5eb38bd..b8ee5f752 100644
--- a/neps/optimizers/bayesian_optimization/acquisition_functions/ei.py
+++ b/neps/optimizers/bayesian_optimization/acquisition_functions/ei.py
@@ -1,18 +1,23 @@
 from __future__ import annotations
 
-from typing import TYPE_CHECKING, Sequence, Union
-import numpy as np
+from collections.abc import Sequence
+from typing import TYPE_CHECKING, Any
+
 import torch
 from torch.distributions import Normal
 
 from .base_acquisition import BaseAcquisition
 
 if TYPE_CHECKING:
+    import numpy as np
+
     from neps.search_spaces import SearchSpace
 
+
 class ComprehensiveExpectedImprovement(BaseAcquisition):
     def __init__(
         self,
+        *,
         augmented_ei: bool = False,
         xi: float = 0.0,
         in_fill: str = "best",
@@ -47,34 +52,35 @@ def __init__(
         self.xi = xi
         self.in_fill = in_fill
         self.log_ei = log_ei
-        self.incumbent = None
+        self.incumbent: float | None = None
         self.optimize_on_max_fidelity = optimize_on_max_fidelity
 
     def eval(
-        self, x: Sequence[SearchSpace], asscalar: bool = False,
-    ) -> Union[np.ndarray, torch.Tensor, float]:
-        """
-        Return the negative expected improvement at the query point x2
-        """
+        self,
+        x: Sequence[SearchSpace],
+        *,
+        asscalar: bool = False,
+    ) -> np.ndarray | torch.Tensor | float:
+        """Return the negative expected improvement at the query point x2."""
         assert self.incumbent is not None, "EI function not fitted on model"
+        assert self.surrogate_model is not None
 
-        if x[0].has_fidelity and self.optimize_on_max_fidelity:
-            _x = [e.clone() for e in x]
-            for e in _x:
-                e.set_to_max_fidelity()
+        space = x[0]
+        if len(space.fidelities) > 0 and self.optimize_on_max_fidelity:
+            assert len(space.fidelities) == 1
+            fid_name, fid = next(iter(space.fidelities.items()))
+            _x = [space.from_dict({**e._values, fid_name: fid.upper}) for e in x]
         else:
-            _x = x
+            _x = list(x)
+
+        mu, cov = self.surrogate_model.predict(_x)
 
-        try:
-            mu, cov = self.surrogate_model.predict(_x)
-        except ValueError as e:
-            raise e
-            # return -1.0  # in case of error. return ei of -1
         std = torch.sqrt(torch.diag(cov))
         mu_star = self.incumbent
+
         gauss = Normal(torch.zeros(1, device=mu.device), torch.ones(1, device=mu.device))
-        # u = (mu - mu_star - self.xi) / std
-        # ei = std * updf + (mu - mu_star - self.xi) * ucdf
+        # > u = (mu - mu_star - self.xi) / std
+        # > ei = std * updf + (mu - mu_star - self.xi) * ucdf
         if self.log_ei:
             # we expect that f_min is in log-space
             f_min = mu_star - self.xi
@@ -84,7 +90,15 @@ def eval(
             ) * gauss.cdf(v - std)
         else:
             u = (mu_star - mu - self.xi) / std
-            ucdf = gauss.cdf(u)
+            try:
+                ucdf = gauss.cdf(u)
+            except ValueError as e:
+                print(f"u: {u}")  # noqa: T201
+                print(f"mu_star: {mu_star}")  # noqa: T201
+                print(f"mu: {mu}")  # noqa: T201
+                print(f"std: {std}")  # noqa: T201
+                print(f"diag: {cov.diag()}")  # noqa: T201
+                raise e
             updf = torch.exp(gauss.log_prob(u))
             ei = std * updf + (mu_star - mu - self.xi) * ucdf
         if self.augmented_ei:
@@ -94,20 +108,21 @@ def eval(
             )
         if isinstance(_x, list) and asscalar:
             return ei.detach().numpy()
+
         if asscalar:
             ei = ei.detach().numpy().item()
+
         return ei
 
-    def set_state(self, surrogate_model, **kwargs):
+    def set_state(self, surrogate_model: Any, **kwargs: Any) -> None:
         super().set_state(surrogate_model, **kwargs)
+        assert self.surrogate_model is not None
 
         # Compute incumbent
         if self.in_fill == "best":
-            # return torch.max(surrogate_model.y_)
-            self.incumbent = torch.min(self.surrogate_model.y_)
+            self.incumbent = float(torch.min(self.surrogate_model.y_))
         else:
             x = self.surrogate_model.x
             mu_train, _ = self.surrogate_model.predict(x)
-            # incumbent_idx = torch.argmax(mu_train)
             incumbent_idx = torch.argmin(mu_train)
             self.incumbent = self.surrogate_model.y_[incumbent_idx]
diff --git a/neps/optimizers/bayesian_optimization/acquisition_functions/mf_ei.py b/neps/optimizers/bayesian_optimization/acquisition_functions/mf_ei.py
deleted file mode 100644
index 3d19040df..000000000
--- a/neps/optimizers/bayesian_optimization/acquisition_functions/mf_ei.py
+++ /dev/null
@@ -1,205 +0,0 @@
-# type: ignore
-from typing import Any, Iterable, Tuple, Union
-
-import numpy as np
-import pandas as pd
-import torch
-from torch.distributions import Normal
-
-from ....optimizers.utils import map_real_hyperparameters_from_tabular_ids
-from ....search_spaces.search_space import SearchSpace
-from ...multi_fidelity.utils import MFObservedData
-from .ei import ComprehensiveExpectedImprovement
-
-
-class MFEI(ComprehensiveExpectedImprovement):
-    def __init__(
-        self,
-        pipeline_space: SearchSpace,
-        surrogate_model_name: str = None,
-        augmented_ei: bool = False,
-        xi: float = 0.0,
-        in_fill: str = "best",
-        log_ei: bool = False,
-    ):
-        super().__init__(augmented_ei, xi, in_fill, log_ei)
-        self.pipeline_space = pipeline_space
-        self.surrogate_model_name = surrogate_model_name
-        self.surrogate_model = None
-        self.observations = None
-        self.b_step = None
-
-    def get_budget_level(self, config) -> int:
-        return int((config.fidelity.value - config.fidelity.lower) / self.b_step)
-
-    def preprocess(self, x: pd.Series) -> Tuple[Iterable, Iterable]:
-        """Prepares the configurations for appropriate EI calculation.
-
-        Takes a set of points and computes the budget and incumbent for each point, as
-        required by the multi-fidelity Expected Improvement acquisition function.
-        """
-        budget_list = []
-
-        if self.pipeline_space.has_tabular:
-            # preprocess tabular space differently
-            # expected input: IDs pertaining to the tabular data
-            # expected output: IDs pertaining to current observations and set of HPs
-            x = map_real_hyperparameters_from_tabular_ids(x, self.pipeline_space)
-        indices_to_drop = []
-        for i, config in x.items():
-            target_fidelity = config.fidelity.lower
-            if i <= max(self.observations.seen_config_ids):
-                # IMPORTANT to set the fidelity at which EI will be calculated only for
-                # the partial configs that have been observed already
-                target_fidelity = config.fidelity.value + self.b_step
-
-                if np.less_equal(target_fidelity, config.fidelity.upper):
-                    # only consider the configs with fidelity lower than the max fidelity
-                    config.fidelity.set_value(target_fidelity)
-                    budget_list.append(self.get_budget_level(config))
-                else:
-                    # if the target_fidelity higher than the max drop the configuration
-                    indices_to_drop.append(i)
-            else:
-                config.fidelity.set_value(target_fidelity)
-                budget_list.append(self.get_budget_level(config))
-
-        # Drop unused configs
-        x.drop(labels=indices_to_drop, inplace=True)
-
-        performances = self.observations.get_best_performance_for_each_budget()
-        inc_list = []
-        for budget_level in budget_list:
-            if budget_level in performances.index:
-                inc = performances[budget_level]
-            else:
-                inc = self.observations.get_best_seen_performance()
-            inc_list.append(inc)
-
-        return x, torch.Tensor(inc_list)
-
-    def preprocess_gp(self, x: Iterable) -> Tuple[Iterable, Iterable]:
-        x, inc_list = self.preprocess(x)
-        return x.values.tolist(), inc_list
-
-    def preprocess_deep_gp(self, x: Iterable) -> Tuple[Iterable, Iterable]:
-        x, inc_list = self.preprocess(x)
-        x_lcs = []
-        for idx in x.index:
-            if idx in self.observations.df.index.levels[0]:
-                budget_level = self.get_budget_level(x[idx])
-                lc = self.observations.extract_learning_curve(idx, budget_level)
-            else:
-                # initialize a learning curve with a place holder
-                # This is later padded accordingly for the Conv1D layer
-                lc = [0.0]
-            x_lcs.append(lc)
-        self.surrogate_model.set_prediction_learning_curves(x_lcs)
-        return x.values.tolist(), inc_list
-
-    def preprocess_pfn(self, x: Iterable) -> Tuple[Iterable, Iterable, Iterable]:
-        """Prepares the configurations for appropriate EI calculation.
-
-        Takes a set of points and computes the budget and incumbent for each point, as
-        required by the multi-fidelity Expected Improvement acquisition function.
-        """
-        _x, inc_list = self.preprocess(x.copy())
-        _x_tok = self.observations.tokenize(_x, as_tensor=True)
-        len_partial = len(self.observations.seen_config_ids)
-        z_min = x[0].fidelity.lower
-        # converting fidelity to the discrete budget level
-        # STRICT ASSUMPTION: fidelity is the first dimension
-        _x_tok[:len_partial, 0] = (
-            _x_tok[:len_partial, 0] + self.b_step - z_min
-        ) / self.b_step
-        return _x_tok, _x, inc_list
-
-    def eval(self, x: pd.Series, asscalar: bool = False) -> Tuple[np.ndarray, pd.Series]:
-        # _x = x.copy()  # preprocessing needs to change the reference x Series so we don't copy here
-        if self.surrogate_model_name == "pfn":
-            _x_tok, _x, inc_list = self.preprocess_pfn(
-                x.copy()
-            )  # IMPORTANT change from vanilla-EI
-            ei = self.eval_pfn_ei(_x_tok, inc_list)
-        elif self.surrogate_model_name == "deep_gp":
-            _x, inc_list = self.preprocess_deep_gp(
-                x.copy()
-            )  # IMPORTANT change from vanilla-EI
-            ei = self.eval_gp_ei(_x, inc_list)
-            _x = pd.Series(_x, index=np.arange(len(_x)))
-        else:
-            _x, inc_list = self.preprocess_gp(
-                x.copy()
-            )  # IMPORTANT change from vanilla-EI
-            ei = self.eval_gp_ei(_x, inc_list)
-            _x = pd.Series(_x, index=np.arange(len(_x)))
-
-        if ei.is_cuda:
-            ei = ei.cpu()
-        if len(x) > 1 and asscalar:
-            return ei.detach().numpy(), _x
-        else:
-            return ei.detach().numpy().item(), _x
-
-    def eval_pfn_ei(
-        self, x: Iterable, inc_list: Iterable
-    ) -> Union[np.ndarray, torch.Tensor, float]:
-        """PFN-EI modified to preprocess samples and accept list of incumbents."""
-        # x, inc_list = self.preprocess(x)  # IMPORTANT change from vanilla-EI
-        # _x = x.copy()
-        ei = self.surrogate_model.get_ei(x.to(self.surrogate_model.device), inc_list)
-        if len(ei.shape) == 2:
-            ei = ei.flatten()
-        return ei
-
-    def eval_gp_ei(
-        self, x: Iterable, inc_list: Iterable
-    ) -> Union[np.ndarray, torch.Tensor, float]:
-        """Vanilla-EI modified to preprocess samples and accept list of incumbents."""
-        # x, inc_list = self.preprocess(x)  # IMPORTANT change from vanilla-EI
-        _x = x.copy()
-        try:
-            mu, cov = self.surrogate_model.predict(_x)
-        except ValueError as e:
-            raise e
-            # return -1.0  # in case of error. return ei of -1
-        std = torch.sqrt(torch.diag(cov))
-
-        mu_star = inc_list.to(mu.device)  # IMPORTANT change from vanilla-EI
-
-        gauss = Normal(torch.zeros(1, device=mu.device), torch.ones(1, device=mu.device))
-        # u = (mu - mu_star - self.xi) / std
-        # ei = std * updf + (mu - mu_star - self.xi) * ucdf
-        if self.log_ei:
-            # we expect that f_min is in log-space
-            f_min = mu_star - self.xi
-            v = (f_min - mu) / std
-            ei = torch.exp(f_min) * gauss.cdf(v) - torch.exp(
-                0.5 * torch.diag(cov) + mu
-            ) * gauss.cdf(v - std)
-        else:
-            u = (mu_star - mu - self.xi) / std
-            ucdf = gauss.cdf(u)
-            updf = torch.exp(gauss.log_prob(u))
-            ei = std * updf + (mu_star - mu - self.xi) * ucdf
-        if self.augmented_ei:
-            sigma_n = self.surrogate_model.likelihood
-            ei *= 1.0 - torch.sqrt(torch.tensor(sigma_n, device=mu.device)) / torch.sqrt(
-                sigma_n + torch.diag(cov)
-            )
-        return ei
-
-    def set_state(
-        self,
-        pipeline_space: SearchSpace,
-        surrogate_model: Any,
-        observations: MFObservedData,
-        b_step: Union[int, float],
-        **kwargs,
-    ):
-        # overload to select incumbent differently through observations
-        self.pipeline_space = pipeline_space
-        self.surrogate_model = surrogate_model
-        self.observations = observations
-        self.b_step = b_step
-        return
diff --git a/neps/optimizers/bayesian_optimization/acquisition_functions/pibo.py b/neps/optimizers/bayesian_optimization/acquisition_functions/pibo.py
new file mode 100644
index 000000000..3cba54e56
--- /dev/null
+++ b/neps/optimizers/bayesian_optimization/acquisition_functions/pibo.py
@@ -0,0 +1,65 @@
+"""# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+Prior-Guided Acquisition Functions
+
+References:
+
+.. [Hvarfner2022]
+    C. Hvarfner, D. Stoll, A. Souza, M. Lindauer, F. Hutter, L. Nardi. PiBO:
+    Augmenting Acquisition Functions with User Beliefs for Bayesian Optimization.
+    ICLR 2022.
+"""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+from botorch.acquisition.logei import partial
+
+from neps.optimizers.bayesian_optimization.acquisition_functions.weighted_acquisition import (  # noqa: E501
+    WeightedAcquisition,
+)
+
+if TYPE_CHECKING:
+    from botorch.acquisition.acquisition import AcquisitionFunction
+    from torch import Tensor
+
+    from neps.sampling.priors import Prior
+    from neps.search_spaces.domain import Domain
+    from neps.search_spaces.encoding import ConfigEncoder
+
+
+def apply_pibo_acquisition_weight(
+    acq_values: Tensor,
+    X: Tensor,
+    acq: AcquisitionFunction,
+    *,
+    prior: Prior,
+    x_domain: Domain | list[Domain] | ConfigEncoder,
+    prior_exponent: float,
+) -> Tensor:
+    if acq._log:
+        weighted_log_probs = prior.log_pdf(X, frm=x_domain) + prior_exponent
+        return acq_values + weighted_log_probs
+
+    weighted_probs = prior.pdf(X, frm=x_domain).pow(prior_exponent)
+    return acq_values * weighted_probs
+
+
+def pibo_acquisition(
+    acq_fn: AcquisitionFunction,
+    prior: Prior,
+    prior_exponent: float,
+    x_domain: Domain | list[Domain] | ConfigEncoder,
+) -> WeightedAcquisition:
+    return WeightedAcquisition(
+        acq=acq_fn,
+        apply_weight=partial(
+            apply_pibo_acquisition_weight,
+            prior=prior,
+            x_domain=x_domain,
+            prior_exponent=prior_exponent,
+        ),
+    )
diff --git a/neps/optimizers/bayesian_optimization/acquisition_functions/prior_weighted.py b/neps/optimizers/bayesian_optimization/acquisition_functions/prior_weighted.py
deleted file mode 100644
index ca3a3f5b2..000000000
--- a/neps/optimizers/bayesian_optimization/acquisition_functions/prior_weighted.py
+++ /dev/null
@@ -1,61 +0,0 @@
-from typing import Iterable, Union
-
-import numpy as np
-import torch
-
-from .base_acquisition import BaseAcquisition
-
-
-class DecayingPriorWeightedAcquisition(BaseAcquisition):
-    def __init__(
-        self,
-        base_acquisition,
-        pibo_beta=10,
-        log: bool = False,
-    ):
-        super().__init__()
-        self.pibo_beta = pibo_beta
-        self.base_acquisition = base_acquisition
-        self.log = log
-        self.decay_t = 0.0
-
-    def eval(
-        self,
-        x: Iterable,
-        **base_acquisition_kwargs,
-    ) -> Union[np.ndarray, torch.Tensor, float]:
-        acquisition = self.base_acquisition(x, **base_acquisition_kwargs)
-
-        if self.log:
-            min_acq_val = abs(min(acquisition)) if min(acquisition) < 0 else 0
-
-        for i, candidate in enumerate(x):
-            prior_weight = candidate.compute_prior(log=self.log)
-            if prior_weight != 1.0:
-                if self.log:
-                    # for log -> the smaller the prior_weight,
-                    # the more unlikely it is from the prior
-                    # also shift acquisition values to avoid negativ values
-                    acquisition[i] = (
-                        np.log(acquisition[i] + min_acq_val + 1e-12)
-                        + (self.pibo_beta / self.decay_t) * prior_weight
-                    )
-                else:
-                    acquisition[i] *= np.power(
-                        prior_weight + 1e-12, self.pibo_beta / self.decay_t
-                    )
-        return acquisition
-
-    def set_state(self, surrogate_model, **kwargs):
-        if "decay_t" in kwargs:
-            decay_t = kwargs.pop("decay_t")
-        else:
-            train_x = surrogate_model.x
-            if train_x[0].has_fidelity:
-                decay_t = np.sum(
-                    [float(_x.fidelity.value >= _x.fidelity.upper) for _x in train_x]
-                )
-            else:
-                decay_t = len(train_x)
-        self.decay_t = decay_t
-        self.base_acquisition.set_state(surrogate_model, **kwargs)
diff --git a/neps/optimizers/bayesian_optimization/acquisition_functions/ucb.py b/neps/optimizers/bayesian_optimization/acquisition_functions/ucb.py
index adf572665..52587a7a8 100644
--- a/neps/optimizers/bayesian_optimization/acquisition_functions/ucb.py
+++ b/neps/optimizers/bayesian_optimization/acquisition_functions/ucb.py
@@ -1,13 +1,21 @@
-from typing import Iterable, Union
+from __future__ import annotations
+
+import logging
+from collections.abc import Iterable
+from typing import Any
 
 import numpy as np
 import torch
 
-from .base_acquisition import BaseAcquisition
+from neps.optimizers.bayesian_optimization.acquisition_functions.base_acquisition import (
+    BaseAcquisition,
+)
+
+logger = logging.getLogger(__name__)
 
 
 class UpperConfidenceBound(BaseAcquisition):
-    def __init__(self, beta: float=1.0, maximize: bool=False):
+    def __init__(self, *, beta: float = 1.0, maximize: bool = False):
         """Upper Confidence Bound (UCB) acquisition function.
 
         Args:
@@ -18,22 +26,26 @@ def __init__(self, beta: float=1.0, maximize: bool=False):
         super().__init__()
         self.beta = beta  # can be updated as part of the state for dynamism or a schedule
         self.maximize = maximize
-        
+
         # to be initialized as part of the state
         self.surrogate_model = None
 
-    def set_state(self, surrogate_model, **kwargs):
+    def set_state(self, surrogate_model: Any, **kwargs: Any) -> None:
         super().set_state(surrogate_model)
         self.surrogate_model = surrogate_model
         if "beta" in kwargs:
-            if not isinstance(kwargs["beta"], (list, np.array)):
+            if not isinstance(kwargs["beta"], list | np.array):
                 self.beta = kwargs["beta"]
             else:
-                self.logger.warning("Beta is a list, not updating beta value!")
-        
+                logger.warning("Beta is a list, not updating beta value!")
+
     def eval(
-        self, x: Iterable, asscalar: bool = False
-    ) -> Union[np.ndarray, torch.Tensor, float]:
+        self,
+        x: Iterable,
+        *,
+        asscalar: bool = False,
+    ) -> np.ndarray | torch.Tensor | float:
+        assert self.surrogate_model is not None, "Surrogate model is not set."
         try:
             mu, cov = self.surrogate_model.predict(x)
             std = torch.sqrt(torch.diag(cov))
@@ -41,20 +53,5 @@ def eval(
             raise e
         sign = 1 if self.maximize else -1  # LCB is performed if minimize=True
         ucb_scores = mu + sign * np.sqrt(self.beta) * std
-        # if LCB, minimize acquisition, or maximize -acquisition  
-        ucb_scores = ucb_scores.detach().numpy() * sign  
-
-        return ucb_scores
-
-
-class MF_UCB(UpperConfidenceBound):
-
-    def preprocess(self, x: Iterable) -> Iterable:
-        performances = self.observations.get_best_performance_for_each_budget()
-        pass
-
-    def eval(
-        self, x: Iterable, asscalar: bool = False
-    ) -> Union[np.ndarray, torch.Tensor, float]:
-        x = self.preprocess(x)
-        return self.eval(x, asscalar=asscalar)
+        # if LCB, minimize acquisition, or maximize -acquisition
+        return ucb_scores.detach().numpy() * sign
diff --git a/neps/optimizers/bayesian_optimization/acquisition_functions/weighted_acquisition.py b/neps/optimizers/bayesian_optimization/acquisition_functions/weighted_acquisition.py
new file mode 100644
index 000000000..fd23d3319
--- /dev/null
+++ b/neps/optimizers/bayesian_optimization/acquisition_functions/weighted_acquisition.py
@@ -0,0 +1,153 @@
+"""This module provides most of the functionality we require in NePS for now,
+i.e., we need the ability to apply an arbitrary weight to an acquisition function.
+
+I spent some time understanding the meaning of the various dimensions of botorch/gpytorch.
+
+The two primary dimensions to consider are:
+
+* `d` - The dimensionality of the design space, i.e. how many hyperparameters.
+* `batch` - The number of independent evaluations to make, i.e. how many times to
+    evaluate the acquisition function.
+
+There are two extra dimensions which are special cases and need to be accounted for.
+
+* `q` - Comes from the `qXXX` variants of acquisition, these will add an extra dimension
+    `q` to each `batch`, where instead of a `batch` representing a single config to get
+    the acquisition of, we might instead be getting the acquisition of 5 configs together,
+    representing the joint utility of evaluating these 5 configs, relative to other sets
+    of 5 configs. This dimension is _reduced_ away in the final step of the acquisition
+    when suggesting which set of group of 5 configs to suggest.
+
+* `mc_samples` - Comes from the `SampleReducdingXXX` variants of acquisition, will add an
+    extra dimension `mc_samples` which represent the amount of Monte Carlo samples used
+    to estimate the acquisition. These will eventually be _reduced_ away but are present
+    in the intermediate steps. These variants also seem to have `q` variants implicitly
+    and so you are likely to see the `q` dimension whever you see the `mc_samples`
+    dimension, even if it is just `q=1`.
+
+* `m` - The number of objectives in the multi-objective case. We will
+    specifically ignore this for now, however it exists as the last dimension (after `d`)
+    and is the first to be reduced away. They are also used in _constrainted_ settings
+    which we will also ignore for now.
+
+The most expanded tensor shape is the following, with the usual order of reduction being
+the following below. If you are not using a SamplingReducing variant, you will not see
+`mc_samples` and if you are not using a `q` variant, you will not see `q`. The simplest
+case then being `acq(tensor: batch x d)`.
+
+* `batch x q x d`.
+        reduce(..., d) = Config -> Single number  (!!!Acq applies here!!!)
+* `batch x q`.
+        expand(mc_samples , ...) = MC Sampling from posterior (I think)
+* `mc_samples x batch x q`.
+        reduce(..., q) = Joint-Config-Group -> Single number.
+* `mc_samples x batch`
+        reduce(mc_samples, ...) = MC-samples -> statistical estimate
+* `batch`
+
+Finally we get out a batch of values we can argmax over, used to index into either a
+single configuration or a single index into a joint-group of `q` configurations.
+
+!!! tip
+
+    The `mc_samples` is not of concern to the `WeightedAcquisition` below, and
+    broadcasting can be used, as a result, the `apply_weight` function only needs
+    to be able to handle:
+
+    * (X: batch x q x d, acq_values: batch x q, acq: A) -> batch x q
+
+    If utilizing the configurations `X` for weighting, you effectively will want
+    to reduce the `d` dimension.
+
+As a result of this, acquisition functions need to be able to handle arbitrary dimensions
+and act accordingly.
+
+This module mostly follows the structure of the
+`PriorGuidedAcquisitionFunction` which weights the acquisition function by a prior.
+
+* https://botorch.org/api/_modules/botorch/acquisition/prior_guided.html#PriorGuidedAcquisitionFunction
+
+We use this to create a more generic `WeightedAcquisition` which follows the required
+structure to make new weightings easier to implement, but also to serve as an educational
+reference.
+"""
+
+from __future__ import annotations
+
+from collections.abc import Callable
+from typing import TYPE_CHECKING, TypeVar
+
+from botorch.acquisition import SampleReducingMCAcquisitionFunction
+from botorch.acquisition.analytic import AcquisitionFunction, t_batch_mode_transform
+from botorch.acquisition.monte_carlo import concatenate_pending_points
+
+if TYPE_CHECKING:
+    from torch import Tensor
+
+A = TypeVar("A", bound=AcquisitionFunction)
+
+
+class WeightedAcquisition(AcquisitionFunction):
+    """Class for weighting acquisition functions.
+
+    Please see module docstring for more information.
+    """
+
+    def __init__(
+        self,
+        acq: A,
+        apply_weight: Callable[[Tensor, Tensor, A], Tensor],
+    ) -> None:
+        """Initialize the weighted acquisition function.
+
+        Args:
+            acq: The base acquisition function.
+            apply_weight: A function that takes the acquisition function values, the
+                design points and the acquisition function itself and returns the
+                weighted acquisition function values.
+
+                Please see the module docstring for more information on the dimensions
+                and how to handle them.
+        """
+        super().__init__(model=acq.model)
+        # NOTE: We remove the X_pending from the base acquisition function as we will get
+        # it in our own forward with `@concatenate_pending_points` and pass that forward.
+        # This avoids possible duplicates. Also important to explicitly set it to None
+        # even if it does not exist as otherwise the attribute does not exists -_-
+        if (X_pending := getattr(acq, "X_pending", None)) is not None:
+            acq.set_X_pending(None)
+            self.set_X_pending(X_pending)
+        else:
+            acq.set_X_pending(None)
+            self.set_X_pending(None)
+
+        self.apply_weight = apply_weight
+        self.acq = acq
+        self._log = acq._log
+
+    # Taken from PiBO implementation in botorch (PriorGuidedAcquisitionFunction).
+    @concatenate_pending_points
+    @t_batch_mode_transform()  # type: ignore
+    def forward(self, X: Tensor) -> Tensor:
+        """Evaluate a weighted acquisition function on the candidate set X.
+
+        Args:
+            X: A tensor of size `batch_shape x q x d`-dim tensor of `q` `d`-dim
+                design points.
+
+        Returns:
+            A tensor with the `d` dimension reduced away, representing the
+            weighted acquisition function values at the given design points `X`.
+        """
+        if isinstance(self.acq, SampleReducingMCAcquisitionFunction):
+            # shape: mc_samples x batch x q-candidates
+            acq_values = self.acq._non_reduced_forward(X)
+            weighted_acq_values = self.apply_weight(acq_values, X, self.acq)
+            q_reduced_acq = self.acq._q_reduction(weighted_acq_values)
+            sample_reduced_acq = self.acq._sample_reduction(q_reduced_acq)
+            return sample_reduced_acq.squeeze(-1)
+
+        # shape: batch x q-candidates
+        acq_values = self.acq(X).unsqueeze(-1)
+        weighted_acq_values = self.apply_weight(acq_values, X, self.acq)
+        return weighted_acq_values.squeeze(-1)
diff --git a/neps/optimizers/bayesian_optimization/acquisition_samplers/__init__.py b/neps/optimizers/bayesian_optimization/acquisition_samplers/__init__.py
deleted file mode 100644
index e3b125725..000000000
--- a/neps/optimizers/bayesian_optimization/acquisition_samplers/__init__.py
+++ /dev/null
@@ -1,11 +0,0 @@
-from .evolution_sampler import EvolutionSampler
-from .freeze_thaw_sampler import FreezeThawSampler
-from .mutation_sampler import MutationSampler
-from .random_sampler import RandomSampler
-
-AcquisitionSamplerMapping = {
-    "random": RandomSampler,
-    "mutation": MutationSampler,
-    "evolution": EvolutionSampler,
-    "freeze-thaw": FreezeThawSampler,
-}
diff --git a/neps/optimizers/bayesian_optimization/acquisition_samplers/base_acq_sampler.py b/neps/optimizers/bayesian_optimization/acquisition_samplers/base_acq_sampler.py
deleted file mode 100644
index adf47b825..000000000
--- a/neps/optimizers/bayesian_optimization/acquisition_samplers/base_acq_sampler.py
+++ /dev/null
@@ -1,36 +0,0 @@
-from __future__ import annotations
-
-from abc import abstractmethod
-from typing import TYPE_CHECKING, Sequence, Callable
-
-if TYPE_CHECKING:
-    import numpy as np
-    import torch
-    from neps.search_spaces.search_space import SearchSpace
-
-
-class AcquisitionSampler:
-    def __init__(self, pipeline_space: SearchSpace, patience: int = 50):
-        if patience < 1:
-            raise ValueError("Patience should be at least 1")
-
-        self.pipeline_space = pipeline_space
-        self.acquisition_function = None
-        self.x: list[SearchSpace] = []
-        self.y: Sequence[float] | np.ndarray | torch.Tensor = []
-        self.patience = patience
-
-    @abstractmethod
-    def sample(self, acquisition_function: Callable) -> SearchSpace:
-        raise NotImplementedError
-
-    def sample_batch(
-        self, acquisition_function: Callable, batch: int
-    ) -> list[SearchSpace]:
-        return [self.sample(acquisition_function) for _ in range(batch)]
-
-    def set_state(
-        self, x: list[SearchSpace], y: Sequence[float] | np.ndarray | torch.Tensor
-    ) -> None:
-        self.x = x
-        self.y = y
diff --git a/neps/optimizers/bayesian_optimization/acquisition_samplers/evolution_sampler.py b/neps/optimizers/bayesian_optimization/acquisition_samplers/evolution_sampler.py
deleted file mode 100644
index 6a76dcfc9..000000000
--- a/neps/optimizers/bayesian_optimization/acquisition_samplers/evolution_sampler.py
+++ /dev/null
@@ -1,236 +0,0 @@
-import random
-from heapq import nlargest
-from typing import List, Tuple
-
-import numpy as np
-
-from ....search_spaces.search_space import SearchSpace
-from .base_acq_sampler import AcquisitionSampler
-from .random_sampler import RandomSampler
-
-
-class EvolutionSampler(AcquisitionSampler):
-    def __init__(
-        self,
-        pipeline_space: SearchSpace,
-        pool_size: int = 200,
-        num_evolutions: int = 10,
-        p_tournament: float = 0.2,
-        p_crossover: float = 0.5,
-        p_self_crossover: float = 0.5,
-        dynamic: bool = True,
-        max_iters: int = 50,
-        initial_history_best: int = 10,
-        initial_history_last: int = 0,
-        initial_history_acq_best: int = 0,
-        allow_isomorphism: bool = True,
-        check_isomorphism_history: bool = False,
-        patience: int = 50,
-    ):
-        super().__init__(pipeline_space=pipeline_space, patience=patience)
-        self.pool_size = pool_size
-        self.num_evolutions = num_evolutions
-        self.p_tournament = p_tournament
-        self.p_crossover = p_crossover
-        self.p_self_crossover = p_self_crossover
-        self.dynamic = dynamic
-        self.max_iters = max_iters
-        self.initial_history_last = initial_history_last
-        self.initial_history_best = initial_history_best
-        self.initial_history_acq_best = initial_history_acq_best
-        self.allow_isomorphism = allow_isomorphism
-        # check for isomorphisms also in previous graphs
-        self.check_isomorphism_history = check_isomorphism_history
-
-        self.random_sampling = RandomSampler(
-            pipeline_space=pipeline_space, patience=self.patience
-        )
-
-    def set_state(self, x, y) -> None:
-        super().set_state(x, y)
-        self.random_sampling.set_state(x, y)
-
-    def _mutate(self, parent):
-        for _ in range(self.patience):
-            try:
-                # needs to throw an Exception if config is not valid, e.g., empty graph etc.!
-                return parent.mutate()
-            except Exception:
-                continue
-        return False
-
-    def _crossover(self, parent1, parent2):
-        for _ in range(self.patience):
-            try:
-                # needs to throw an Exception if config is not valid, e.g., empty graph etc.!
-                return parent1.crossover(parent2)
-            except Exception:
-                continue
-        return False, False
-
-    def _tournament_selection(self, population: list, fitness: np.ndarray):
-        size = int(len(population) * self.p_tournament)
-        contender_indices = np.random.randint(len(population), size=size)
-        contender_fitness = fitness[contender_indices]
-        indices = nlargest(
-            2, range(len(contender_fitness)), key=lambda idx: contender_fitness[idx]
-        )
-        return contender_indices[indices]
-
-    def _evolve(self, population, fitness):
-        new_pop = []
-        while len(new_pop) < len(population):
-            # sample parents
-            best, second_best = self._tournament_selection(population, fitness)
-            parent1 = population[best]
-            parent2 = population[second_best]
-
-            if random.random() < self.p_crossover:
-                if random.random() < self.p_self_crossover:
-                    child1, child2 = self._crossover(parent1, parent1)
-                else:
-                    child1, child2 = self._crossover(parent1, parent2)
-                if child1 is False:
-                    continue
-                if not self.allow_isomorphism and child1 in new_pop:
-                    continue
-                new_pop.append(child1)
-                if len(new_pop) < len(population):
-                    if not self.allow_isomorphism and child2 in new_pop:
-                        continue
-                    new_pop.append(child2)
-            else:
-                child1 = self._mutate(parent1)
-                if child1 is False:
-                    continue
-                if not self.allow_isomorphism and child1 in new_pop:
-                    continue
-                new_pop.append(child1)
-        return new_pop
-
-    def evolution(
-        self,
-        acquisition_function,
-        previous_samples: list,
-        population_size: int,
-        batch_size: int = None,
-    ):
-        def inner_loop(population, fitness, X_max, acq_max):
-            try:
-                fitness_standardized = fitness / np.sum(fitness)
-            except Exception:
-                fitness_standardized = 1 / len(fitness)
-            population = self._evolve(population, fitness_standardized)
-            # recalc fitness by also evaluating previous best configs
-            fitness = acquisition_function(X_max + population, asscalar=True)
-            # update best config & score
-            indices = nlargest(
-                batch_size, range(len(fitness)), key=lambda idx: fitness[idx]
-            )
-            X_max = [
-                population[idx - batch_size] if idx >= batch_size else X_max[idx]
-                for idx in indices
-            ]
-            acq_max = [float(fitness[idx]) for idx in indices]
-            return population, fitness, X_max, acq_max
-
-        if batch_size is None:
-            batch_size = 1
-
-        new_pop = (
-            self.x
-            if not self.allow_isomorphism and self.check_isomorphism_history
-            else []
-        )
-        population: List[SearchSpace] = []
-        remaining_patience = self.patience
-        while (
-            population_size - len(previous_samples) > len(population)
-            and remaining_patience > 0
-        ):
-            population.extend(
-                [
-                    p_member
-                    for p_member in self.random_sampling.sample_batch(
-                        acquisition_function,
-                        population_size - len(previous_samples) - len(population),
-                    )
-                    if p_member not in new_pop
-                ]
-            )
-            remaining_patience -= 1
-        if (
-            remaining_patience == 0
-            and (population_size - len(previous_samples)) - len(population) > 0
-        ):
-            population += self.random_sampling.sample(
-                population_size - len(previous_samples) - len(population)
-            )
-        population.extend(previous_samples)
-        fitness = acquisition_function(population, asscalar=True)
-
-        # initialize best config & score
-        indices = nlargest(batch_size, range(len(fitness)), key=lambda idx: fitness[idx])
-        X_max = [population[idx] for idx in indices]
-        acq_max = [fitness[idx] for idx in indices]
-        iterations_best = [acq_max]
-        for _ in range(self.num_evolutions):
-            population, fitness, X_max, acq_max = inner_loop(
-                population, fitness, X_max, acq_max
-            )
-            iterations_best.append(acq_max)
-        if self.dynamic:
-            i = self.num_evolutions
-            while i < self.max_iters:
-                population, fitness, X_max, acq_max = inner_loop(
-                    population, fitness, X_max, acq_max
-                )
-                if all(
-                    all(np.isclose(x, l) for l in list(zip(*iterations_best[-5:]))[j])
-                    for j, x in enumerate(acq_max)
-                ):
-                    break
-                iterations_best.append(acq_max)
-                i += 1
-
-        return X_max, population, acq_max
-
-    def sample(self, acquisition_function) -> Tuple[list, list, list]:
-        population: List[SearchSpace] = []
-        if self.initial_history_last > 0 and len(self.x) >= self.initial_history_last:
-            population = self.x[-self.initial_history_last :]
-        if self.initial_history_best > 0 and len(self.x) >= self.initial_history_best:
-            if len(self.y) > self.initial_history_best:
-                indices = np.argpartition(self.y, self.initial_history_best)
-            else:
-                indices = list(range(len(self.y)))
-            for idx in indices[: self.initial_history_best]:
-                population.append(self.x[idx])
-        if (
-            self.initial_history_acq_best > 0
-            and len(self.x) >= self.initial_history_acq_best
-        ):
-            acq_vals = acquisition_function(self.x, asscalar=True)
-            indices = np.argpartition(acq_vals, -self.initial_history_acq_best)
-            for idx in indices[-self.initial_history_acq_best :]:
-                population.append(self.x[idx])
-        if (
-            len(population)
-            < self.initial_history_last
-            + self.initial_history_best
-            + self.initial_history_acq_best
-        ):
-            population += list(
-                random.sample(
-                    self.x,
-                    k=min(
-                        self.initial_history_last
-                        + self.initial_history_best
-                        + self.initial_history_acq_best
-                        - len(population),
-                        len(self.x),
-                    ),
-                )
-            )
-        X_max, _, _ = self.evolution(acquisition_function, population, self.pool_size, 1)
-        return X_max[0]
diff --git a/neps/optimizers/bayesian_optimization/acquisition_samplers/freeze_thaw_sampler.py b/neps/optimizers/bayesian_optimization/acquisition_samplers/freeze_thaw_sampler.py
deleted file mode 100644
index 89b7d9d3e..000000000
--- a/neps/optimizers/bayesian_optimization/acquisition_samplers/freeze_thaw_sampler.py
+++ /dev/null
@@ -1,182 +0,0 @@
-# type: ignore
-from __future__ import annotations
-
-import warnings
-from copy import deepcopy
-
-import numpy as np
-import pandas as pd
-
-from ....search_spaces.search_space import SearchSpace
-from ...multi_fidelity.utils import MFObservedData
-from .base_acq_sampler import AcquisitionSampler
-
-
-class FreezeThawSampler(AcquisitionSampler):
-
-    SAMPLES_TO_DRAW = 100  # number of random samples to draw at lowest fidelity
-
-    def __init__(self, **kwargs):
-        super().__init__(**kwargs)
-        self.observations = None
-        self.b_step = None
-        self.n = None
-        self.pipeline_space = None
-        # args to manage tabular spaces/grid
-        self.is_tabular = False
-        self.sample_full_table = None
-        self.set_sample_full_tabular(True)  # sets flag that samples full table
-
-    def set_sample_full_tabular(self, flag: bool=False):
-        if self.is_tabular:
-            self.sample_full_table = flag
-
-    def _sample_new(
-        self, index_from: int, n: int = None, ignore_fidelity: bool = False
-    ) -> pd.Series:
-        n = n if n is not None else self.SAMPLES_TO_DRAW
-        new_configs = [
-            self.pipeline_space.sample(
-                patience=self.patience, user_priors=False, ignore_fidelity=ignore_fidelity
-            )
-            for _ in range(n)
-        ]
-
-        return pd.Series(
-            new_configs, index=range(index_from, index_from + len(new_configs))
-        )
-
-    def _sample_new_unique(
-        self,
-        index_from: int,
-        n: int = None,
-        patience: int = 10,
-        ignore_fidelity: bool = False,
-    ) -> pd.Series:
-        n = n if n is not None else self.SAMPLES_TO_DRAW
-        assert (
-            patience > 0 and n > 0
-        ), "Patience and SAMPLES_TO_DRAW must be larger than 0"
-
-        existing_configs = self.observations.all_configs_list()
-        new_configs = []
-        for _ in range(n):
-            # Sample patience times for an unobserved configuration
-            for _ in range(patience):
-                _config = self.pipeline_space.sample(
-                    patience=self.patience,
-                    user_priors=False,
-                    ignore_fidelity=ignore_fidelity,
-                )
-                # # Convert continuous into tabular if the space is tabular
-                # _config = continuous_to_tabular(_config, self.tabular_space)
-                # Iterate over all observed configs
-                for config in existing_configs:
-                    if _config.is_equal_value(
-                        config, include_fidelity=not ignore_fidelity
-                    ):
-                        # if the sampled config already exists
-                        # do the next iteration of patience
-                        break
-                else:
-                    # If the new sample is not equal to any previous
-                    # then it's a new config
-                    new_config = _config
-                    break
-            else:
-                # TODO: use logger.warn here instead (karibbov)
-                warnings.warn(
-                    f"Couldn't find an unobserved configuration in {patience} "
-                    f"iterations. Using an observed config instead"
-                )
-                # patience budget exhausted use the last sampled config anyway
-                new_config = _config
-
-            # append the new config to the list
-            new_configs.append(new_config)
-
-        return pd.Series(
-            new_configs, index=range(index_from, index_from + len(new_configs))
-        )
-
-    def sample(
-        self,
-        acquisition_function=None,
-        n: int = None,
-        set_new_sample_fidelity: int | float = None,
-    ) -> list():
-        """Samples a new set and returns the total set of observed + new configs."""
-        partial_configs = self.observations.get_partial_configs_at_max_seen()
-        new_configs = self._sample_new(
-            index_from=self.observations.next_config_id(), n=n, ignore_fidelity=False
-        )
-
-        def __sample_single_new_tabular(index: int):
-            """
-            A function to use in a list comprehension to slightly speed up
-            the sampling process when self.SAMPLE_TO_DRAW is large
-            """
-            config = self.pipeline_space.sample(
-                patience=self.patience, user_priors=False, ignore_fidelity=False
-            )
-            config["id"].set_value(_new_configs[index])
-            config.fidelity.set_value(set_new_sample_fidelity)
-            return config
-
-        if self.is_tabular:
-            _n = n if n is not None else self.SAMPLES_TO_DRAW
-            _partial_ids = {conf["id"].value for conf in partial_configs}
-            _all_ids = set(self.pipeline_space.custom_grid_table.index.values)
-
-            # accounting for unseen configs only, samples remaining table if flag is set
-            max_n = len(_all_ids) + 1 if self.sample_full_table else _n
-            _n = min(max_n, len(_all_ids - _partial_ids))
-
-            _new_configs = np.random.choice(
-                list(_all_ids - _partial_ids), size=_n, replace=False
-            )
-            new_configs = [__sample_single_new_tabular(i) for i in range(_n)]
-            new_configs = pd.Series(
-                new_configs,
-                index=np.arange(
-                    len(partial_configs), len(partial_configs) + len(new_configs)
-                ),
-            )
-
-        elif set_new_sample_fidelity is not None:
-            for config in new_configs:
-                config.fidelity.set_value(set_new_sample_fidelity)
-
-        # Deep copy configs for fidelity updates
-        partial_configs_list = []
-        index_list = []
-        for idx, config in partial_configs.items():
-            _config = config.clone()
-            partial_configs_list.append(_config)
-            index_list.append(idx)
-
-        # We build a new series of partial configs to avoid
-        # incrementing fidelities multiple times due to pass-by-reference
-        partial_configs = pd.Series(partial_configs_list, index=index_list)
-
-        configs = pd.concat([partial_configs, new_configs])
-
-        return configs
-
-    def set_state(
-        self,
-        pipeline_space: SearchSpace,
-        observations: MFObservedData,
-        b_step: int,
-        n: int = None,
-    ):
-        # overload to select incumbent differently through observations
-        self.pipeline_space = pipeline_space
-        self.observations = observations
-        self.b_step = b_step
-        self.n = n if n is not None else self.SAMPLES_TO_DRAW
-        if (
-            hasattr(self.pipeline_space, "custom_grid_table")
-            and self.pipeline_space.custom_grid_table is not None
-        ):
-            self.is_tabular = True
diff --git a/neps/optimizers/bayesian_optimization/acquisition_samplers/mutation_sampler.py b/neps/optimizers/bayesian_optimization/acquisition_samplers/mutation_sampler.py
deleted file mode 100644
index 4c6b17df0..000000000
--- a/neps/optimizers/bayesian_optimization/acquisition_samplers/mutation_sampler.py
+++ /dev/null
@@ -1,154 +0,0 @@
-from __future__ import annotations
-
-from typing import TYPE_CHECKING, Callable, Sequence
-
-import numpy as np
-import torch
-from more_itertools import first
-from typing_extensions import override
-
-from neps.optimizers.bayesian_optimization.acquisition_samplers.base_acq_sampler import (
-    AcquisitionSampler,
-)
-from neps.optimizers.bayesian_optimization.acquisition_samplers.random_sampler import (
-    RandomSampler,
-)
-
-if TYPE_CHECKING:
-    from neps.search_spaces.search_space import SearchSpace
-
-
-def _propose_location(
-    acquisition_function: Callable,
-    candidates: list[SearchSpace],
-    top_n: int = 5,
-    return_distinct: bool = True,
-) -> tuple[list[SearchSpace], np.ndarray | torch.Tensor, np.ndarray]:
-    """top_n: return the top n candidates wrt the acquisition function."""
-    if return_distinct:
-        eis = acquisition_function(candidates, asscalar=True)  # faster
-        eis_, unique_idx = np.unique(eis, return_index=True)
-        try:
-            i = np.argpartition(eis_, -top_n)[-top_n:]
-            indices = np.array([unique_idx[j] for j in i])
-        except ValueError:
-            eis = torch.tensor([acquisition_function(c) for c in candidates])
-            _, indices = eis.topk(top_n)
-    else:
-        eis = torch.tensor([acquisition_function(c) for c in candidates])
-        _, indices = eis.topk(top_n)
-
-    xs = [candidates[int(i)] for i in indices]
-    return xs, eis, indices
-
-
-class MutationSampler(AcquisitionSampler):
-    def __init__(
-        self,
-        pipeline_space,
-        pool_size: int = 250,
-        n_best: int = 10,
-        mutate_size: int | None = None,
-        allow_isomorphism: bool = False,
-        check_isomorphism_history: bool = True,
-        patience: int = 50,
-    ):
-        super().__init__(pipeline_space=pipeline_space, patience=patience)
-        self.pool_size = pool_size
-        self.n_best = n_best
-        self.mutate_size = mutate_size
-        self.allow_isomorphism = allow_isomorphism
-        self.check_isomorphism_history = (
-            check_isomorphism_history  # check for isomorphisms also in previous graphs
-        )
-
-        self.random_sampling = RandomSampler(
-            pipeline_space=pipeline_space, patience=patience
-        )
-
-    @override
-    def set_state(
-        self, x: list[SearchSpace], y: Sequence[float] | np.ndarray | torch.Tensor
-    ) -> None:
-        super().set_state(x, y)
-        self.random_sampling.set_state(x, y)
-
-    @override
-    def sample(self, acquisition_function: Callable) -> SearchSpace:
-        return first(self.sample_batch(acquisition_function, batch=1))
-
-    @override
-    def sample_batch(
-        self,
-        acquisition_function: Callable,
-        batch: int,
-    ) -> list[SearchSpace]:
-        pool = self.create_pool(acquisition_function, self.pool_size)
-
-        samples, _, _ = _propose_location(
-            acquisition_function=acquisition_function,
-            top_n=batch,
-            candidates=pool,
-        )
-        return samples
-
-    def create_pool(
-        self,
-        acquisition_function: Callable,
-        pool_size: int,
-    ) -> list[SearchSpace]:
-        if len(self.x) == 0:
-            return self.random_sampling.sample_batch(acquisition_function, pool_size)
-
-        mutate_size = (
-            int(0.5 * pool_size) if self.mutate_size is None else self.mutate_size
-        )
-        assert (
-            pool_size >= mutate_size
-        ), " pool_size must be larger or equal to mutate_size"
-
-        n_best = len(self.x) if len(self.x) < self.n_best else self.n_best
-        best_configs = [
-            x for (_, x) in sorted(zip(self.y, self.x), key=lambda pair: pair[0])
-        ][:n_best]
-
-        seen: set[int] = set()
-
-        def _hash(_config: SearchSpace) -> int:
-            return hash(_config.hp_values().values())
-
-        evaluation_pool = []
-        per_arch = mutate_size // n_best
-
-        for config in best_configs:
-            remaining_patience = self.patience
-            for _ in range(per_arch):
-                while remaining_patience:
-                    try:
-                        # needs to throw an Exception if config is not valid, e.g., empty graph etc.!
-                        child = config.mutate()
-                    except Exception:
-                        remaining_patience -= 1
-                        continue
-                    hash_child = _hash(child)
-
-                    if not self.allow_isomorphism:
-                        # if disallow isomorphism, we enforce that each time, we mutate n distinct graphs.
-                        # For now we do not check the isomorphism in all of the previous graphs though
-                        if child == config or hash_child in seen:
-                            remaining_patience -= 1
-                            continue
-
-                    evaluation_pool.append(child)
-                    seen.add(hash_child)
-                    break
-
-        # Fill missing pool with random samples
-        nrandom_archs = max(pool_size - len(evaluation_pool), 0)
-        if nrandom_archs:
-            random_evaluation_pool = self.random_sampling.sample_batch(
-                acquisition_function, nrandom_archs
-            )
-            evaluation_pool += random_evaluation_pool
-
-        return evaluation_pool
diff --git a/neps/optimizers/bayesian_optimization/acquisition_samplers/random_sampler.py b/neps/optimizers/bayesian_optimization/acquisition_samplers/random_sampler.py
deleted file mode 100644
index e3b755158..000000000
--- a/neps/optimizers/bayesian_optimization/acquisition_samplers/random_sampler.py
+++ /dev/null
@@ -1,14 +0,0 @@
-from __future__ import annotations
-
-from ....search_spaces.search_space import SearchSpace
-from .base_acq_sampler import AcquisitionSampler
-
-
-class RandomSampler(AcquisitionSampler):
-    def __init__(self, pipeline_space: SearchSpace, patience: int = 100):
-        super().__init__(pipeline_space=pipeline_space, patience=patience)
-
-    def sample(self, acquisition_function=None) -> list[SearchSpace]:
-        return self.pipeline_space.sample(
-            patience=self.patience, user_priors=False, ignore_fidelity=False
-        )
diff --git a/neps/optimizers/bayesian_optimization/cost_cooling.py b/neps/optimizers/bayesian_optimization/cost_cooling.py
deleted file mode 100644
index f2878fe90..000000000
--- a/neps/optimizers/bayesian_optimization/cost_cooling.py
+++ /dev/null
@@ -1,258 +0,0 @@
-from __future__ import annotations
-
-from typing import Any
-from typing_extensions import override
-
-from neps.state.optimizer import BudgetInfo
-from neps.utils.types import ConfigResult
-from neps.utils.common import instance_from_map
-from neps.optimizers.bayesian_optimization.acquisition_functions.cost_cooling import (
-    CostCooler,
-)
-from neps.search_spaces.search_space import SearchSpace
-from neps.optimizers.bayesian_optimization.acquisition_functions import AcquisitionMapping
-from neps.optimizers.bayesian_optimization.acquisition_functions.base_acquisition import (
-    BaseAcquisition,
-)
-from neps.optimizers.bayesian_optimization.acquisition_functions.prior_weighted import (
-    DecayingPriorWeightedAcquisition,
-)
-from neps.optimizers.bayesian_optimization.acquisition_samplers import (
-    AcquisitionSamplerMapping,
-)
-from neps.optimizers.bayesian_optimization.acquisition_samplers.base_acq_sampler import (
-    AcquisitionSampler,
-)
-from neps.optimizers.bayesian_optimization.kernels.get_kernels import get_kernels
-from neps.optimizers.bayesian_optimization.models import SurrogateModelMapping
-from neps.optimizers.bayesian_optimization.optimizer import BayesianOptimization
-
-
-class CostCooling(BayesianOptimization):
-    """Implements a basic cost-cooling as described in
-    "Cost-aware Bayesian Optimization" (https://arxiv.org/abs/2003.10870) by Lee et al."""
-
-    def __init__(
-        self,
-        pipeline_space: SearchSpace,
-        initial_design_size: int = 10,
-        surrogate_model: str | Any = "gp",
-        cost_model: str | Any = "gp",
-        surrogate_model_args: dict = None,
-        cost_model_args: dict = None,
-        optimal_assignment: bool = False,
-        domain_se_kernel: str = None,
-        graph_kernels: list = None,
-        hp_kernels: list = None,
-        acquisition: str | BaseAcquisition = "EI",
-        log_prior_weighted: bool = False,
-        acquisition_sampler: str | AcquisitionSampler = "mutation",
-        random_interleave_prob: float = 0.0,
-        patience: int = 100,
-        budget: None | int | float = None,
-        ignore_errors: bool = False,
-        loss_value_on_error: None | float = None,
-        cost_value_on_error: None | float = None,
-        logger=None,
-    ):
-        """Initialise the BO loop.
-
-        Args:
-            pipeline_space: Space in which to search
-            initial_design_size: Number of 'x' samples that need to be evaluated before
-                selecting a sample using a strategy instead of randomly.
-            surrogate_model: Surrogate model
-            cost_model: Cost model
-            surrogate_model_args: Arguments that will be given to the surrogate model
-                (the Gaussian processes model).
-            cost_model_args: Arguments that will be given to the cost model
-                (the Gaussian processes model).
-            optimal_assignment: whether the optimal assignment kernel should be used.
-            domain_se_kernel: Stationary kernel name
-            graph_kernels: Kernels for NAS
-            hp_kernels: Kernels for HPO
-            acquisition: Acquisition strategy
-            log_prior_weighted: if to use log for prior
-            acquisition_sampler: Acquisition function fetching strategy
-            random_interleave_prob: Frequency at which random configurations are sampled
-                instead of configurations from the acquisition strategy.
-            patience: How many times we try something that fails before giving up.
-            budget: Maximum budget
-            ignore_errors: Ignore hyperparameter settings that threw an error and do not
-                raise an error. Error configs still count towards max_evaluations_total.
-            loss_value_on_error: Setting this and cost_value_on_error to any float will
-                supress any error during bayesian optimization and will use given loss
-                value instead. default: None
-            cost_value_on_error: Setting this and loss_value_on_error to any float will
-                supress any error during bayesian optimization and will use given cost
-                value instead. default: None
-            logger: logger object, or None to use the neps logger
-
-        Raises:
-            ValueError: if patience < 1
-            ValueError: if initial_design_size < 1
-            ValueError: if random_interleave_prob is not between 0.0 and 1.0
-            ValueError: if no kernel is provided
-        """
-        super().__init__(
-            pipeline_space=pipeline_space,
-            patience=patience,
-            logger=logger,
-            budget=budget,
-            ignore_errors=ignore_errors,
-            loss_value_on_error=loss_value_on_error,
-            cost_value_on_error=cost_value_on_error,
-        )
-
-        if initial_design_size < 1:
-            raise ValueError(
-                "BayesianOptimization needs initial_design_size to be at least 1"
-            )
-        if not 0 <= random_interleave_prob <= 1:
-            raise ValueError("random_interleave_prob should be between 0.0 and 1.0")
-
-        self._initial_design_size = initial_design_size
-        self._random_interleave_prob = random_interleave_prob
-        self._num_train_x: int = 0
-        self._pending_evaluations: list = []
-        self._model_update_failed: bool = False
-
-        if ignore_errors:
-            self.logger.warning(
-                "ignore_errors was set, but this optimizer does not support it"
-            )
-
-        surrogate_model_args = surrogate_model_args or {}
-        cost_model_args = cost_model_args or {}
-        graph_kernels, hp_kernels = get_kernels(
-            self.pipeline_space,
-            domain_se_kernel,
-            graph_kernels,
-            hp_kernels,
-            optimal_assignment,
-        )
-        if "graph_kernels" not in surrogate_model_args:
-            surrogate_model_args["graph_kernels"] = graph_kernels
-        if "hp_kernels" not in surrogate_model_args:
-            surrogate_model_args["hp_kernels"] = hp_kernels
-
-        if (
-            not surrogate_model_args["graph_kernels"]
-            and not surrogate_model_args["hp_kernels"]
-        ):
-            raise ValueError("No kernels are provided!")
-
-        if "vectorial_features" not in surrogate_model_args:
-            surrogate_model_args["vectorial_features"] = (
-                self.pipeline_space.get_vectorial_dim()
-            )
-
-        self.surrogate_model = instance_from_map(
-            SurrogateModelMapping,
-            surrogate_model,
-            name="surrogate model",
-            kwargs=surrogate_model_args,
-        )
-
-        if "graph_kernels" not in cost_model_args:
-            cost_model_args["graph_kernels"] = graph_kernels
-        if "hp_kernels" not in cost_model_args:
-            cost_model_args["hp_kernels"] = hp_kernels
-
-        if not cost_model_args["graph_kernels"] and not cost_model_args["hp_kernels"]:
-            raise ValueError("No kernels are provided!")
-
-        if "vectorial_features" not in cost_model_args:
-            cost_model_args["vectorial_features"] = (
-                self.pipeline_space.get_vectorial_dim()
-            )
-
-        self.cost_model = instance_from_map(
-            SurrogateModelMapping,
-            cost_model,
-            name="cost model",  # does changing this string work?
-            kwargs=cost_model_args,
-        )
-
-        orig_acquisition = instance_from_map(
-            AcquisitionMapping,
-            acquisition,
-            name="acquisition function",
-        )
-
-        self.acquisition = CostCooler(orig_acquisition)
-
-        if self.pipeline_space.has_prior:
-            self.acquisition = DecayingPriorWeightedAcquisition(
-                self.acquisition, log=log_prior_weighted
-            )
-
-        self.acquisition_sampler = instance_from_map(
-            AcquisitionSamplerMapping,
-            acquisition_sampler,
-            name="acquisition sampler function",
-            kwargs={"patience": self.patience, "pipeline_space": self.pipeline_space},
-        )
-
-    @override
-    def load_optimization_state(
-        self,
-        previous_results: dict[str, ConfigResult],
-        pending_evaluations: dict[str, SearchSpace],
-        budget_info: BudgetInfo | None,
-        optimizer_state: dict[str, Any],
-    ) -> None:
-        # TODO(Jan): read out cost and fit cost model
-        if budget_info is None:
-            raise ValueError(
-                "Used budget is not set in the optimizer state but is required"
-                " for cost cooling, please return a `'cost'` when you return results"
-                " and/or a `max_cost_budget` when running NePS!"
-            )
-        self.used_budget = budget_info.used_cost_budget
-
-        train_x = [el.config for el in previous_results.values()]
-        train_y = [self.get_loss(el.result) for el in previous_results.values()]
-        train_cost = [self.get_cost(el.result) for el in previous_results.values()]
-        self._num_train_x = len(train_x)
-        self._pending_evaluations = [el for el in pending_evaluations.values()]
-        if self._num_train_x >= self._initial_design_size:
-            try:
-                if len(self._pending_evaluations) > 0:
-                    # We want to use hallucinated results for the evaluations that have
-                    # not finished yet. For this we fit a model on the finished
-                    # evaluations and add these to the other results to fit another model.
-                    self.surrogate_model.fit(train_x, train_y)
-                    self.cost_model.fit(train_x, train_cost)
-                    ys, _ = self.surrogate_model.predict(self._pending_evaluations)
-                    zs, _ = self.cost_model.predict(self._pending_evaluations)
-                    train_x += self._pending_evaluations
-                    train_y += list(ys.detach().numpy())
-                    train_cost += list(zs.detach().numpy())
-
-                self.surrogate_model.fit(train_x, train_y)
-                self.cost_model.fit(train_x, train_cost)
-                # TODO: set acquisition state
-                self.acquisition.set_state(
-                    self.surrogate_model,
-                    alpha=1
-                    - (budget_info.used_cost_budget / budget_info.max_cost_budget),
-                    cost_model=self.cost_model,
-                )
-                self.acquisition_sampler.set_state(x=train_x, y=train_y)
-
-                self._model_update_failed = False
-            except RuntimeError as runtime_error:
-                self.logger.exception(
-                    "Model could not be updated due to below error. Sampling will not use"
-                    " the model."
-                )
-                if self.loss_value_on_error is None or self.cost_value_on_error is None:
-                    raise ValueError(
-                        "A RuntimeError happened and "
-                        "loss_value_on_error or cost_value_on_error "
-                        "value is not provided, please fix the error or "
-                        "provide the values to continue without "
-                        "updating the model"
-                    ) from runtime_error
-                self._model_update_failed = True
diff --git a/neps/optimizers/bayesian_optimization/kernels/__init__.py b/neps/optimizers/bayesian_optimization/kernels/__init__.py
deleted file mode 100644
index 8d11ea81e..000000000
--- a/neps/optimizers/bayesian_optimization/kernels/__init__.py
+++ /dev/null
@@ -1,29 +0,0 @@
-from __future__ import annotations
-
-from functools import partial
-from typing import Callable
-
-from .encoding import NASBOTDistance
-from .vectorial_kernels import HammingKernel, Matern32Kernel, Matern52Kernel, RBFKernel
-from .weisfilerlehman import WeisfilerLehman
-
-StationaryKernelMapping: dict[str, Callable] = {
-    "m52": Matern52Kernel,
-    "m32": Matern32Kernel,
-    "rbf": RBFKernel,
-    "hm": HammingKernel,
-}
-
-GraphKernelMapping: dict[str, Callable] = {
-    "wl": partial(
-        WeisfilerLehman,
-        h=2,
-        oa=False,
-    ),
-    "vh": partial(
-        WeisfilerLehman,
-        h=0,
-        oa=False,
-    ),
-    "nasbot": NASBOTDistance,
-}
diff --git a/neps/optimizers/bayesian_optimization/kernels/combine_kernels.py b/neps/optimizers/bayesian_optimization/kernels/combine_kernels.py
deleted file mode 100644
index 0e464713b..000000000
--- a/neps/optimizers/bayesian_optimization/kernels/combine_kernels.py
+++ /dev/null
@@ -1,215 +0,0 @@
-import logging
-
-import torch
-
-from .utils import extract_configs
-from .vectorial_kernels import HammingKernel, Stationary
-from .weisfilerlehman import GraphKernels
-
-
-def _select_dimensions(k):
-    if isinstance(k, HammingKernel):
-        return "categorical"
-    return "continuous"
-
-
-class CombineKernel:
-    def __init__(
-        self,
-        combined_by="sum",
-        *kernels: list,
-        **kwargs,
-    ):
-        if combined_by not in ["sum", "product"]:
-            raise ValueError(f"Invalid value for combined_by ({combined_by})")
-
-        self.has_graph_kernels = False
-        self.has_vector_kernels = False
-        self.lengthscale_bounds = (None, None)
-        for k in kernels:
-            if isinstance(k, GraphKernels):
-                self.has_graph_kernels = True
-            if not isinstance(k, GraphKernels):
-                self.has_vector_kernels = True
-                self.lengthscale_bounds = k.lengthscale_bounds
-        self.kernels = kernels
-        # Store the training graphs and vector features..
-        self._gram = None
-        self.gr, self.x = None, None
-        self.combined_by = combined_by
-
-    def fit_transform(
-        self,
-        weights: torch.Tensor,
-        configs: list,
-        normalize: bool = True,
-        rebuild_model: bool = True,
-        save_gram_matrix: bool = True,
-        gp_fit: bool = True,
-        feature_lengthscale: list = None,
-        **kwargs,
-    ):
-        N = len(configs)
-        K = torch.zeros(N, N) if self.combined_by == "sum" else torch.ones(N, N)
-
-        gr1, x1 = extract_configs(configs)
-
-        for i, k in enumerate(self.kernels):
-            if isinstance(k, GraphKernels) and None not in gr1:
-                update_val = weights[i] * k.fit_transform(
-                    [g[i] for g in gr1] if isinstance(gr1[0], (list, tuple)) else gr1,
-                    rebuild_model=rebuild_model,
-                    save_gram_matrix=save_gram_matrix,
-                    gp_fit=gp_fit,
-                    **kwargs,
-                )
-
-            elif isinstance(k, Stationary) and None not in x1:
-                key = _select_dimensions(k)
-                update_val = (
-                    weights[i]
-                    * k.fit_transform(
-                        [x_[key] for x_ in x1],
-                        l=feature_lengthscale[key]
-                        if isinstance(feature_lengthscale, dict)
-                        else None,
-                        rebuild_model=rebuild_model,
-                        save_gram_matrix=save_gram_matrix,
-                    )
-                ).double()
-
-            else:
-                raise NotImplementedError(
-                    "For now, only the Stationary custom built kernel_operators are "
-                    "supported! "
-                )
-
-            if self.combined_by == "sum":
-                K += update_val
-            elif self.combined_by == "product":
-                K *= update_val
-
-        if normalize:
-            K_diag = torch.sqrt(torch.diag(K))
-            K /= torch.ger(K_diag, K_diag)
-        if save_gram_matrix:
-            self._gram = K.clone()
-
-        return K
-
-    def transform(
-        self,
-        weights: torch.Tensor,
-        configs: list,
-        x=None,
-        feature_lengthscale=None,
-    ):
-        if self._gram is None:
-            raise ValueError(
-                "The kernel has not been fitted. Call fit_transform first to generate "
-                "the training Gram matrix."
-            )
-        gr, x = extract_configs(configs)
-        # K is in shape of len(Y), len(X)
-        size = len(configs)
-        K = (
-            torch.zeros(size, self._gram.shape[0])
-            if self.combined_by == "sum"
-            else torch.ones(size, self._gram.shape[0])
-        )
-
-        for i, k in enumerate(self.kernels):
-            if isinstance(k, GraphKernels) and None not in gr:
-                update_val = weights[i] * k.transform(
-                    [g[i] for g in gr] if isinstance(gr, list) else gr
-                )
-            elif isinstance(k, Stationary) and None not in x:
-                key = _select_dimensions(k)
-                update_val = (
-                    weights[i]
-                    * k.transform(
-                        [x_[key] for x_ in x],
-                        l=feature_lengthscale[key]
-                        if isinstance(feature_lengthscale, dict)
-                        else None,
-                    ).double()
-                )
-            else:
-                raise NotImplementedError(
-                    "For now, only the Stationary custom built kernel_operators are "
-                    "supported! "
-                )
-
-            if self.combined_by == "sum":
-                K += update_val
-            elif self.combined_by == "product":
-                K *= update_val
-
-        return K.t()
-
-    def clamp_theta_vector(self, theta_vector):
-        if theta_vector is None:
-            return None
-
-        [
-            t_.clamp_(self.lengthscale_bounds[0], self.lengthscale_bounds[1])
-            if t_ is not None and t_.is_leaf
-            else None
-            for t_ in theta_vector.values()
-        ]
-        return theta_vector
-
-
-class SumKernel(CombineKernel):
-    def __init__(self, *kernels, **kwargs):
-        super().__init__("sum", *kernels, **kwargs)
-
-    def forward_t(
-        self,
-        weights: torch.Tensor,
-        gr2: list,
-        x2=None,
-        gr1: list = None,
-        x1=None,
-        feature_lengthscale=None,
-    ):
-        """
-        Compute the kernel gradient w.r.t the feature vector
-        Parameters
-        ----------
-        feature_lengthscale
-        x2
-        x1
-        gr1
-        weights
-        gr2
-
-        Returns ------- grads: k list of 2-tuple. (K, x2) where K is the weighted Gram
-        matrix of that matrix, x2 is the leaf variable on which Jacobian-vector product
-        to be computed.
-
-        """
-        grads = []
-        for i, k in enumerate(self.kernels):
-            if isinstance(k, GraphKernels):
-                handle = k.forward_t(gr2, gr1=gr1)
-                grads.append((weights[i] * handle[0], handle[1], handle[2]))
-            elif isinstance(k, Stationary):
-                key = _select_dimensions(k)
-                handle = k.forward_t(x2=x2[key], x1=x1[key], l=feature_lengthscale[i])
-                grads.append((weights[i] * handle[0], handle[1], handle[2]))
-            else:
-                logging.warning(
-                    "Gradient not implemented for kernel type" + str(k.__name__)
-                )
-                grads.append((None, None))
-        assert len(grads) == len(self.kernels)
-        return grads
-
-
-class ProductKernel(CombineKernel):
-    def __init__(self, *kernels, **kwargs):
-        super().__init__("product", *kernels, **kwargs)
-
-    def dk_dphi(self, weights, gr: list = None, x=None, feature_lengthscale=None):
-        raise NotImplementedError
diff --git a/neps/optimizers/bayesian_optimization/kernels/combine_kernels_hierarchy.py b/neps/optimizers/bayesian_optimization/kernels/combine_kernels_hierarchy.py
deleted file mode 100644
index b35b9d915..000000000
--- a/neps/optimizers/bayesian_optimization/kernels/combine_kernels_hierarchy.py
+++ /dev/null
@@ -1,248 +0,0 @@
-import logging
-
-import numpy as np
-import torch
-
-from .utils import extract_configs_hierarchy
-from .vectorial_kernels import HammingKernel, Stationary
-from .weisfilerlehman import GraphKernels
-
-
-# normalise weights in front of additive kernels
-def transform_weights(weights):
-    return torch.exp(weights) / torch.sum(torch.exp(weights))
-
-
-def _select_dimensions(k):
-    if isinstance(k, HammingKernel):
-        return "categorical"
-    return "continuous"
-
-
-class CombineKernel:
-    def __init__(
-        self,
-        combined_by="sum",
-        *kernels: list,
-        **kwargs,
-    ):
-        if combined_by not in ["sum", "product"]:
-            raise ValueError(f"Invalid value for combined_by ({combined_by})")
-
-        self.has_graph_kernels = False
-        self.has_vector_kernels = False
-        self.hierarchy_consider = kwargs["hierarchy_consider"]
-        self.d_graph_features = kwargs["d_graph_features"]
-        # if use global graph features of the final architecture graph, prepare for normalising
-        # them based on training data
-        if self.d_graph_features > 0:
-            self.train_graph_feature_mean = None
-            self.train_graph_feature_std = None
-
-        self.lengthscale_bounds = (None, None)
-        for k in kernels:
-            if isinstance(k, GraphKernels):
-                self.has_graph_kernels = True
-            if not isinstance(k, GraphKernels):
-                self.has_vector_kernels = True
-                self.lengthscale_bounds = k.lengthscale_bounds
-        self.kernels = kernels
-        # Store the training graphs and vector features..
-        self._gram = None
-        self.gr, self.x = None, None
-        self.combined_by = combined_by
-
-    def fit_transform(
-        self,
-        weights: torch.Tensor,
-        configs: list,
-        normalize: bool = True,
-        rebuild_model: bool = True,
-        save_gram_matrix: bool = True,
-        gp_fit: bool = True,
-        feature_lengthscale: list = None,
-        **kwargs,
-    ):
-        weights = transform_weights(weights.clone())
-        N = len(configs)
-        K = torch.zeros(N, N) if self.combined_by == "sum" else torch.ones(N, N)
-
-        gr1, x1 = extract_configs_hierarchy(
-            configs,
-            d_graph_features=self.d_graph_features,
-            hierarchy_consider=self.hierarchy_consider,
-        )
-
-        # normalise the global graph features if we plan to use them
-        if self.d_graph_features > 0:
-            if gp_fit:
-                # compute the mean and std based on training data
-                self.train_graph_feature_mean = np.mean(x1, 0)
-                self.train_graph_feature_std = np.std(x1, 0)
-            x1 = (x1 - self.train_graph_feature_mean) / self.train_graph_feature_std
-        # k_values = [] # for debug
-        # k_features = [] # for debug
-        for i, k in enumerate(self.kernels):
-            if isinstance(k, GraphKernels) and None not in gr1:
-                if len(gr1) == N and self.hierarchy_consider is None:
-                    # only the final graph is used
-                    k_i = k.fit_transform(
-                        [g[i] for g in gr1] if isinstance(gr1[0], (list, tuple)) else gr1,
-                        rebuild_model=rebuild_model,
-                        save_gram_matrix=save_gram_matrix,
-                        gp_fit=gp_fit,
-                        **kwargs,
-                    )
-                    if normalize:
-                        K_i_diag = torch.sqrt(torch.diag(k_i))
-                        k_i /= torch.ger(K_i_diag, K_i_diag)
-                    update_val = weights[i] * k_i
-
-                else:
-                    # graphs in the early hierarchies are also used;
-                    # assume the combined kernel list always start with graph kernels i.e. kernels=[graph kernels, hp kernels]
-                    gr1_i = gr1[i]
-                    k_i = k.fit_transform(
-                        [g[i] for g in gr1_i]
-                        if isinstance(gr1_i[0], (list, tuple))
-                        else gr1_i,
-                        rebuild_model=rebuild_model,
-                        save_gram_matrix=save_gram_matrix,
-                        gp_fit=gp_fit,
-                        **kwargs,
-                    )
-                    if normalize:
-                        K_i_diag = torch.sqrt(torch.diag(k_i))
-                        k_i /= torch.ger(K_i_diag, K_i_diag)
-
-                    update_val = weights[i] * k_i
-                # k_features.append([value.X.shape[1] for key, value in k.kern.X.items()])
-
-            elif isinstance(k, Stationary) and None not in x1:
-                k_i = k.fit_transform(
-                    x1,
-                    rebuild_model=rebuild_model,
-                    save_gram_matrix=save_gram_matrix,
-                    l=feature_lengthscale,
-                )
-                update_val = (weights[i] * k_i).double()
-            else:
-                raise NotImplementedError(
-                    " For now, only the Stationary custom built kernel_operators are supported!"
-                )
-
-            # k_values.append(k_i) # for debug
-
-            if self.combined_by == "sum":
-                K += update_val
-            elif self.combined_by == "product":
-                K *= update_val
-
-        # self.k_values = k_values # for debug
-        # self.k_features = k_features # for debug
-        # self.weights_trans = weights # for debug
-        # if not normalize:
-        #     K_diag = torch.sqrt(torch.diag(K))
-        #     K /= torch.ger(K_diag, K_diag)
-
-        if save_gram_matrix:
-            self._gram = K.clone()
-
-        return K
-
-    def fit_transform_single_hierarchy(
-        self,
-        weights: torch.Tensor,
-        configs: list,
-        hierarchy_id: int,
-        normalize: bool = True,
-        rebuild_model: bool = True,
-        gp_fit: bool = True,
-        **kwargs,
-    ):
-        weights = transform_weights(weights.clone())
-        # N = len(configs)
-        # K = torch.zeros(N, N) if self.combined_by == "sum" else torch.ones(N, N)
-
-        gr1, _ = extract_configs_hierarchy(
-            configs,
-            d_graph_features=self.d_graph_features,
-            hierarchy_consider=self.hierarchy_consider,
-        )
-        # get the corresponding graph kernel and hierarchy graph data
-        graph_kernel_list = [k for k in self.kernels if isinstance(k, GraphKernels)]
-        # first graph kernel is on the final architecture graph
-        k_single_hierarchy = graph_kernel_list[int(hierarchy_id + 1)]
-        gr1_single_hierarchy = gr1[int(hierarchy_id + 1)]
-        weight_single_hierarchy = weights[int(hierarchy_id + 1)]
-        k_raw = k_single_hierarchy.fit_transform(
-            gr1_single_hierarchy,
-            rebuild_model=rebuild_model,
-            gp_fit=gp_fit,
-            **kwargs,
-        )
-        k_raw = k_raw.to(torch.float32)
-        if normalize:
-            K_diag = torch.sqrt(torch.diag(k_raw))
-            k_raw /= torch.ger(K_diag, K_diag)
-
-        K = weight_single_hierarchy * k_raw
-
-        return K
-
-
-class SumKernel(CombineKernel):
-    def __init__(self, *kernels, **kwargs):
-        super().__init__("sum", *kernels, **kwargs)
-
-    def forward_t(
-        self,
-        weights: torch.Tensor,
-        gr2: list,
-        x2=None,
-        gr1: list = None,
-        x1=None,
-        feature_lengthscale=None,
-    ):
-        """
-        Compute the kernel gradient w.r.t the feature vector
-        Parameters
-        ----------
-        feature_lengthscale
-        x2
-        x1
-        gr1
-        weights
-        gr2
-
-        Returns
-        -------
-        grads: k list of 2-tuple.
-        (K, x2) where K is the weighted Gram matrix of that matrix, x2 is the leaf variable on which Jacobian-vector
-        product to be computed.
-
-        """
-        weights = transform_weights(weights.clone())
-        grads = []
-        for i, k in enumerate(self.kernels):
-            if isinstance(k, GraphKernels):
-                handle = k.forward_t(gr2, gr1=gr1)
-                grads.append((weights[i] * handle[0], handle[1], handle[2]))
-            elif isinstance(k, Stationary):
-                handle = k.forward_t(x2=x2, x1=x1, l=feature_lengthscale)
-                grads.append((weights[i] * handle[0], handle[1], handle[2]))
-            else:
-                logging.warning(
-                    "Gradient not implemented for kernel type" + str(k.__name__)
-                )
-                grads.append((None, None))
-        assert len(grads) == len(self.kernels)
-        return grads
-
-
-class ProductKernel(CombineKernel):
-    def __init__(self, *kernels, **kwargs):
-        super().__init__("product", *kernels, **kwargs)
-
-    def dk_dphi(self, weights, gr: list = None, x=None, feature_lengthscale=None):
-        raise NotImplementedError
diff --git a/neps/optimizers/bayesian_optimization/kernels/encoding.py b/neps/optimizers/bayesian_optimization/kernels/encoding.py
deleted file mode 100644
index 419b6926d..000000000
--- a/neps/optimizers/bayesian_optimization/kernels/encoding.py
+++ /dev/null
@@ -1,277 +0,0 @@
-# Code from https://github.com/xingchenwan/nasbowl
-
-import networkx as nx
-import numpy as np
-import torch
-
-from .graph_kernel import GraphKernels
-
-INPUT = "input"
-OUTPUT = "output"
-CONV3X3 = "conv3x3-bn-relu"
-CONV1X1 = "conv1x1-bn-relu"
-MAXPOOL3X3 = "maxpool3x3"
-OPS = [INPUT, CONV3X3, CONV1X1, MAXPOOL3X3, OUTPUT]
-OPS_EX = [
-    CONV3X3,
-    CONV1X1,
-    MAXPOOL3X3,
-]
-
-OPS_201 = ["avg_pool_3x3", "nor_conv_1x1", "nor_conv_3x3", "none", "skip_connect"]
-
-NUM_VERTICES = 7
-OP_SPOTS = NUM_VERTICES - 2
-MAX_EDGES = 9
-
-
-def get_op_list(string):
-    # given a string, get the list of operations
-    tokens = string.split("|")
-    ops = [t.split("~")[0] for i, t in enumerate(tokens) if i not in [0, 2, 5, 9]]
-    return ops
-
-
-def edit_distance(g1, g2):
-    g1_ops = get_op_list(g1.name)
-    g2_ops = get_op_list(g2.name)
-    return np.sum([1 for i in range(len(g1_ops)) if g1_ops[i] != g2_ops[i]])
-
-
-class NASBOTDistance(GraphKernels):
-    """NASBOT OATMANN distance according to BANANAS paper"""
-
-    def __init__(
-        self,
-        node_name="op_name",
-        include_op_list=None,
-        exclude_op_list=None,
-        lengthscale=3.0,
-        normalize=True,
-        max_size=None,
-        **kwargs,
-    ):
-        super().__init__(**kwargs)
-        self.node_name = node_name
-        self.include_op_list = include_op_list if include_op_list is not None else OPS
-        self.exclude_op_list = exclude_op_list if exclude_op_list is not None else []
-        self.normalize = normalize
-        self.lengthscale = lengthscale
-        self.max_size = max_size
-        self._gram = None
-
-    def _compute_kernel(self, dist, l=None):
-        if dist is None:
-            return 0.0
-        if l is None:
-            l = self.lengthscale
-        return np.exp(-dist / (l**2))
-
-    def _compute_dist(
-        self,
-        g1: nx.Graph,
-        g2: nx.Graph,
-    ):
-        # if cell-based nasbench201
-        if "~" in g1.name:
-            g1_ops = get_op_list(g1.name)
-            g2_ops = get_op_list(g2.name)
-
-            g1_counts = [g1_ops.count(op) for op in OPS_201]
-            g2_counts = [g2_ops.count(op) for op in OPS_201]
-            ops_dist = np.sum(np.abs(np.subtract(g1_counts, g2_counts)))
-            edit_dist = edit_distance(g1, g2)
-            return ops_dist + edit_dist
-        else:
-            # adjacency matrices
-            a1 = nx.to_numpy_array(g1)
-            a2 = nx.to_numpy_array(g2)
-            row_sums = sorted(np.array(a1).sum(axis=0))
-            col_sums = sorted(np.array(a1).sum(axis=1))
-
-            other_row_sums = sorted(np.array(a2).sum(axis=0))
-            other_col_sums = sorted(np.array(a2).sum(axis=1))
-
-            row_sums_arr = np.atleast_2d(row_sums)
-            col_sums_arr = np.atleast_2d(col_sums)
-
-            other_row_sums_arr = np.atleast_2d(other_row_sums)
-            other_col_sums_arr = np.atleast_2d(other_col_sums)
-            row_dist = np.sum(
-                np.abs(np.diag(np.subtract(row_sums_arr, other_row_sums_arr.T)))
-            )
-            col_dist = np.sum(
-                np.abs(np.diag(np.subtract(col_sums_arr, other_col_sums_arr.T)))
-            )
-            counts = [0] * len(self.include_op_list)
-            other_counts = [0] * len(self.include_op_list)
-            for _, attrs in g1.nodes(data=True):
-                op_name = attrs[self.node_name]
-                if op_name not in self.exclude_op_list:
-                    idx = self.include_op_list.index(op_name)
-                    counts[idx] += 1
-            for _, attrs in g2.nodes(data=True):
-                op_name = attrs[self.node_name]
-                if op_name not in self.exclude_op_list:
-                    idx = self.include_op_list.index(op_name)
-                    other_counts[idx] += 1
-
-            ops_dist = np.sum(np.abs(np.subtract(counts, other_counts)))
-            return (row_dist + col_dist + ops_dist) + 0.0
-
-    def forward(self, *graphs: nx.Graph, l: float = None):
-        n = len(graphs)
-        K = torch.zeros((n, n))
-        for i in range(n):
-            for j in range(i, n):
-                K[i, j] = self._compute_kernel(
-                    self._compute_dist(graphs[i], graphs[j]), l
-                )
-                K[j, i] = K[i, j]
-        if self.normalize:
-            K = self.normalize_gram(K)
-        return K
-
-    def fit_transform(
-        self,
-        gr: list,
-        l: float = None,
-        rebuild_model: bool = False,
-        save_gram_matrix: bool = False,
-        **kwargs,
-    ):
-        if not rebuild_model and self._gram is not None:
-            return self._gram
-        K = self.forward(*gr, l=l)
-        if save_gram_matrix:
-            self._gram = K.clone()
-            self._train_x = gr[:]
-        return K
-
-    def transform(self, gr: list, l: float = None, **kwargs):
-        if self._gram is None:
-            raise ValueError("The kernel has not been fitted. Run fit_transform first")
-        n = len(gr)
-        K = torch.zeros((len(self._train_x), n))
-        for i, _ in enumerate(self._train_x):
-            for j in range(n):
-                K[i, j] = self._compute_kernel(
-                    self._compute_dist(self._train_x[i], gr[j]), l
-                )
-        return K
-
-
-class AdjacencyDistance(
-    NASBOTDistance,
-):
-    def _compute_dist(self, g1: nx.Graph, g2: nx.Graph):
-        # adjacency matrices
-        a1 = nx.to_numpy_array(g1)
-        a2 = nx.to_numpy_array(g2)
-        x1 = np.array([attrs[self.node_name] for node, attrs in g1.nodes(data=True)])
-        x2 = np.array([attrs[self.node_name] for node, attrs in g2.nodes(data=True)])
-        graph_dist = np.sum(a1 != a2)
-        ops_dist = np.sum(x1 != x2)
-        return (graph_dist + ops_dist) + 0.0
-
-
-class PathDistance(NASBOTDistance):
-    def get_paths(self, g: nx.Graph):
-        """
-        return all paths from input to output
-        """
-        paths: list = []
-        matrix = nx.to_numpy_array(g)
-        ops: list = []
-        for _, attr in g.nodes(data=True):
-            ops.append(attr[self.node_name])
-        for j in range(0, NUM_VERTICES):
-            if matrix[0][j]:
-                paths.append([[]])
-            else:
-                paths.append([])
-
-        # create paths sequentially
-        for i in range(1, NUM_VERTICES - 1):
-            for j in range(1, NUM_VERTICES):
-                if matrix[i][j]:
-                    for path in paths[i]:
-                        paths[j].append([*path, ops[i]])
-        return paths[-1]
-
-    def get_path_indices(self, g: nx.Graph):
-        """
-        compute the index of each path
-        There are 3^0 + ... + 3^5 paths total.
-        (Paths can be length 0 to 5, and for each path, for each node, there
-        are three choices for the operation.)
-        """
-        paths = self.get_paths(g)
-        mapping = {CONV3X3: 0, CONV1X1: 1, MAXPOOL3X3: 2}
-        path_indices = []
-
-        for path in paths:
-            index = 0
-            for i in range(NUM_VERTICES - 1):
-                if i == len(path):
-                    path_indices.append(index)
-                    break
-                else:
-                    index += len(OPS_EX) ** i * (mapping[path[i]] + 1)
-
-        return tuple(path_indices)
-
-    @staticmethod
-    def get_paths_201(g: nx.Graph):
-        """
-        return all paths from input to output
-        """
-        path_blueprints = [[3], [0, 4], [1, 5], [0, 2, 5]]
-        ops = get_op_list(g.name)
-        paths = []
-        for blueprint in path_blueprints:
-            paths.append([ops[node] for node in blueprint])
-
-        return paths
-
-    def get_path_indices_201(self, g: nx.Graph):
-        """
-        compute the index of each path
-        """
-        paths = self.get_paths_201(g)
-        path_indices = []
-        NUM_OPS = len(OPS_201)
-        for i, path in enumerate(paths):
-            if i == 0:
-                index = 0
-            elif i in [1, 2]:
-                index = NUM_OPS
-            else:
-                index = NUM_OPS + NUM_OPS**2
-            for j, op in enumerate(path):
-                index += OPS_201.index(op) * NUM_OPS**j
-            path_indices.append(index)
-
-        return tuple(path_indices)
-
-    def encode_paths(self, g: nx.Graph):
-        """output one-hot encoding of paths"""
-        if "~" in g.name:
-            LONGEST_PATH_LENGTH = 3
-            num_paths = sum(len(OPS_201) ** i for i in range(1, LONGEST_PATH_LENGTH + 1))
-            path_indices = self.get_path_indices_201(g)
-        elif "101" in g.name:
-            num_paths = sum(len(OPS_EX) ** i for i in range(OP_SPOTS + 1))
-            path_indices = self.get_path_indices(g)
-        else:
-            num_paths = sum(len(self.op_list) ** i for i in range(self.max_size - 1))
-            path_indices = self.get_paths(g)
-        path_encoding = np.zeros(num_paths)
-        for index in path_indices:
-            path_encoding[index] = 1
-        return path_encoding
-
-    def _compute_dist(self, g1: nx.Graph, g2: nx.Graph):
-        encode1 = self.encode_paths(g1)
-        encode2 = self.encode_paths(g2)
-        return np.sum(np.array(encode1 != np.array(encode2)))
diff --git a/neps/optimizers/bayesian_optimization/kernels/get_kernels.py b/neps/optimizers/bayesian_optimization/kernels/get_kernels.py
deleted file mode 100644
index f606f4426..000000000
--- a/neps/optimizers/bayesian_optimization/kernels/get_kernels.py
+++ /dev/null
@@ -1,40 +0,0 @@
-from __future__ import annotations
-
-from neps.utils.common import instance_from_map
-from ....search_spaces.architecture.core_graph_grammar import CoreGraphGrammar
-from ....search_spaces.hyperparameters.categorical import CategoricalParameter
-from ....search_spaces.hyperparameters.float import FloatParameter
-from ....search_spaces.hyperparameters.integer import IntegerParameter
-from ....utils.common import has_instance
-from . import GraphKernelMapping, StationaryKernelMapping
-
-
-def get_kernels(
-    pipeline_space, domain_se_kernel, graph_kernels, hp_kernels, optimal_assignment
-):
-    if not graph_kernels:
-        graph_kernels = []
-        if has_instance(pipeline_space.values(), CoreGraphGrammar):
-            graph_kernels.append("wl")
-    if not hp_kernels:
-        hp_kernels = []
-        if has_instance(pipeline_space.values(), FloatParameter, IntegerParameter):
-            hp_kernels.append("m52")
-        if has_instance(pipeline_space.values(), CategoricalParameter):
-            hp_kernels.append("hm")
-    graph_kernels = [
-        instance_from_map(GraphKernelMapping, kernel, "kernel", as_class=True)(
-            oa=optimal_assignment,
-            se_kernel=instance_from_map(
-                StationaryKernelMapping, domain_se_kernel, "se kernel"
-            ),
-        )
-        for kernel in graph_kernels
-    ]
-    hp_kernels = [
-        instance_from_map(StationaryKernelMapping, kernel, "kernel")
-        for kernel in hp_kernels
-    ]
-    if not graph_kernels and not hp_kernels:
-        raise ValueError("No kernels are provided!")
-    return graph_kernels, hp_kernels
diff --git a/neps/optimizers/bayesian_optimization/kernels/grakel_replace/edge_histogram.py b/neps/optimizers/bayesian_optimization/kernels/grakel_replace/edge_histogram.py
deleted file mode 100644
index f643dcc8e..000000000
--- a/neps/optimizers/bayesian_optimization/kernels/grakel_replace/edge_histogram.py
+++ /dev/null
@@ -1,128 +0,0 @@
-"""The Edge Histogram kernel as defined in :cite:`sugiyama2015halting`."""
-from collections import Counter
-from collections.abc import Iterable
-from warnings import warn
-
-from grakel.graph import Graph
-from numpy import zeros
-from scipy.sparse import csr_matrix
-
-from .vertex_histogram import VertexHistogram
-
-
-class EdgeHistogram(VertexHistogram):
-    """Edge Histogram kernel as found in :cite:`sugiyama2015halting`.
-
-    Parameters
-    ----------
-    sparse : bool, or 'auto', default='auto'
-        Defines if the data will be stored in a sparse format.
-        Sparse format is slower, but less memory consuming and in some cases the only solution.
-        If 'auto', uses a sparse matrix when the number of zeros is more than the half of the matrix size.
-        In all cases if the dense matrix doesn't fit system memory, I sparse approach will be tried.
-
-    Attributes
-    ----------
-    None.
-
-    """
-
-    def parse_input(self, X: Iterable, **kwargs):
-        """Parse and check the given input for EH kernel.
-
-        Parameters
-        ----------
-        X : iterable
-            For the input to pass the test, we must have:
-            Each element must be an iterable with at most three features and at
-            least one. The first that is obligatory is a valid graph structure
-            (adjacency matrix or edge_dictionary) while the second is
-            node_labels and the third edge_labels (that fitting the given graph
-            format).
-
-        Returns
-        -------
-        out : np.array, shape=(len(X), n_labels)
-            A np array for frequency (cols) histograms for all Graphs (rows).
-
-        """
-        if not isinstance(X, Iterable):
-            raise TypeError("input must be an iterable\n")
-        else:
-            rows, cols, data = list(), list(), list()
-            if self._method_calling in [1, 2]:
-                labels = dict()
-                self._labels = labels
-            elif self._method_calling == 3:
-                labels = dict(self._labels)
-            ni = 0
-            for i, x in enumerate(iter(X)):
-                is_iter = isinstance(x, Iterable)
-                if is_iter:
-                    x = list(x)
-                if is_iter and len(x) in [0, 3]:
-                    if len(x) == 0:
-                        warn("Ignoring empty element on index: " + str(i))
-                        continue
-                    else:
-                        # Our element is an iterable of at least 2 elements
-                        L = x[2]
-                elif isinstance(x, Graph):
-                    # get labels in any existing format
-                    L = x.get_labels(purpose="any", label_type="edge")
-                else:
-                    raise TypeError(
-                        "each element of X must be either a "
-                        + "graph object or a list with at least "
-                        + "a graph like object and node labels "
-                        + "dict \n"
-                    )
-
-                if L is None:
-                    raise ValueError("Invalid graph entry at location " + str(i) + "!")
-                # construct the data input for the numpy array
-                for label, frequency in Counter(L.values()).items():
-                    # for the row that corresponds to that graph
-                    rows.append(ni)
-
-                    # and to the value that this label is indexed
-                    col_idx = labels.get(label, None)
-                    if col_idx is None:
-                        # if not indexed, add the new index (the next)
-                        col_idx = len(labels)
-                        labels[label] = col_idx
-
-                    # designate the certain column information
-                    cols.append(col_idx)
-
-                    # as well as the frequency value to data
-                    data.append(frequency)
-                ni += 1
-
-            # Initialise the feature matrix
-            if self._method_calling in [1, 2]:
-                if self.sparse == "auto":
-                    self.sparse_ = (
-                        len(cols) / float(ni * len(labels)) <= 0.5
-                    )
-                else:
-                    self.sparse_ = bool(self.sparse)
-
-            if self.sparse_:
-                features = csr_matrix(
-                    (data, (rows, cols)), shape=(ni, len(labels)), copy=False
-                )
-            else:
-                # Initialise the feature matrix
-                try:
-                    features = zeros(shape=(ni, len(labels)))
-                    features[rows, cols] = data
-                except MemoryError:
-                    warn("memory-error: switching to sparse")
-                    self.sparse_, features = True, csr_matrix(
-                        (data, (rows, cols)), shape=(ni, len(labels)), copy=False
-                    )
-
-            if ni == 0:
-                raise ValueError("parsed input is empty")
-            return features
diff --git a/neps/optimizers/bayesian_optimization/kernels/grakel_replace/utils.py b/neps/optimizers/bayesian_optimization/kernels/grakel_replace/utils.py
deleted file mode 100644
index b34b2c791..000000000
--- a/neps/optimizers/bayesian_optimization/kernels/grakel_replace/utils.py
+++ /dev/null
@@ -1,64 +0,0 @@
-import torch
-
-
-def calculate_kernel_matrix_as_tensor(
-    X, Y=None, oa=False, se_kernel=None, normalize=True
-) -> torch.Tensor:
-    """
-    Same as calculate kernel matrix, but in pytorch framework and uses autodiff to compute the gradient of
-    the kernel function with respect to the feature vector.
-
-    This function is taken out of the class to facilitate derivative computation.
-
-    One difference is that to prevent the un-differentiable point at the min operation if optimal assignment
-    kernel is used, we replace the hard-min with a soft-min differentiable approximation that uses the x-norm
-    approximation.
-
-    Parameters
-    ----------
-    X, Y: the feature vectors (X: train, Y: test). When Y is not supplied, the kernel matrix is computed with
-        respect to itself.
-
-    oa: bool: whether the optimal assignment kernel should be used.
-
-    se_kernel: Defines any successive embedding kernel to be applied over the inner produce of X and Y. If none,
-        a simple
-
-    normalize: bool: Whether to normalize the GP covariance matrix to the range of [0, 1]. Default is True.
-
-    Returns
-    -------
-    K: pytorch tensor, shape = [n_targets, n_inputs]
-    dK_dY: pytorch tensor, of the same shape of K. The derivative of the value of the kernel function with
-    respect to each of the X. If Y is None, the derivative is instead taken at the *training point* (i.e. X).
-    """
-
-    if Y is None:
-        if se_kernel is not None:
-            K = se_kernel.forward(X, X)
-        else:
-            K = X @ X.t()
-        if normalize:
-            K_diag = torch.sqrt(torch.diag(K))
-            K_diag_outer = torch.ger(K_diag, K_diag)
-            return K / K_diag_outer
-    else:
-        assert Y.shape[1] == X.shape[1], (
-            "got Y shape " + str(Y.shape[1]) + " but X shape " + str(X.shape[1])
-        )
-        if se_kernel is not None:
-            K = se_kernel.forward(X, Y)
-        else:
-            K = Y @ X.t()
-        if normalize:
-            Kxx = calculate_kernel_matrix_as_tensor(
-                X, X, oa=oa, se_kernel=se_kernel, normalize=False
-            )
-            Kyy = calculate_kernel_matrix_as_tensor(
-                Y, Y, oa=oa, se_kernel=se_kernel, normalize=False
-            )
-            K_diag_outer = torch.ger(
-                torch.sqrt(torch.diag(Kyy)), torch.sqrt(torch.diag(Kxx))
-            )
-            return K / K_diag_outer
-    return K
diff --git a/neps/optimizers/bayesian_optimization/kernels/grakel_replace/vertex_histogram.py b/neps/optimizers/bayesian_optimization/kernels/grakel_replace/vertex_histogram.py
deleted file mode 100644
index 285b067ce..000000000
--- a/neps/optimizers/bayesian_optimization/kernels/grakel_replace/vertex_histogram.py
+++ /dev/null
@@ -1,448 +0,0 @@
-"""The vertex kernel as defined in :cite:`sugiyama2015halting`."""
-import logging
-from collections import Counter
-from collections.abc import Iterable
-from warnings import warn
-
-import numpy as np
-import torch
-from grakel.graph import Graph
-from grakel.kernels import Kernel
-from numpy import array, einsum, squeeze, zeros
-from scipy.sparse import csr_matrix
-from sklearn.exceptions import NotFittedError
-from sklearn.utils.validation import check_is_fitted
-
-from ..vectorial_kernels import Stationary
-
-
-class VertexHistogram(Kernel):
-    """Vertex Histogram kernel as found in :cite:`sugiyama2015halting`.
-
-    Parameters
-    ----------
-    sparse : bool, or 'auto', default='auto'
-        Defines if the data will be stored in a sparse format.
-        Sparse format is slower, but less memory consuming and in some cases the only solution.
-        If 'auto', uses a sparse matrix when the number of zeros is more than the half of the matrix size.
-        In all cases if the dense matrix doesn't fit system memory, I sparse approach will be tried.
-
-    oa: bool: default=True
-        Defines whether optimal assignment variant of the kernel should be used.
-
-    se_kernel: default=None
-        The standard vectorial kernel to be used for successive embedding (i.e. after the transformation from graph
-        to the vector embedding, whether to use an additional kernel to compute the vector similarity.
-
-    se_kernel_params: dict, default=None
-        Any parameters to be passed to the se_kernel
-
-    mahalanobis_precision: np.array:
-        If supplied, the Malahanobis distance with the precision matrix as supplied will be computed in the dot
-        product, instead of the vanilla dot product.
-
-    Attributes
-    ----------
-    None.
-
-    """
-
-    def __init__(
-        self,
-        n_jobs=None,
-        normalize=False,
-        sparse="auto",
-        oa=False,
-        mahalanobis_precision=None,
-        se_kernel: Stationary = None,
-        requires_ordered_features: bool = False,
-        as_tensor: bool = True,
-    ):
-        """Initialise a vertex histogram kernel.
-
-        require_ordered_features: bool
-            Whether the ordering of the features in the feature matrix matters.
-            If True, the features will be parsed in the same order as the WL
-            node label.
-
-            Note that if called directly (not from Weisfiler Lehman kernel), turning
-            this option on could break the code, as the label in general is non-int.
-
-        """
-        super().__init__(n_jobs=n_jobs, normalize=normalize)
-        self.as_tensor = as_tensor
-        if self.as_tensor:
-            self.sparse = False
-        else:
-            self.sparse = sparse
-        self.oa = oa
-        self.se_kernel = se_kernel
-        self._initialized.update({"sparse": True})
-        self.mahalanobis_precision = mahalanobis_precision
-        self.require_ordered_features = requires_ordered_features
-
-        self._X_diag = None
-        self.X_tensor = None
-        self.Y_tensor = None
-
-        self._labels = None
-        self.sparse_ = None
-        self._method_calling = None
-        self._Y = None
-        self._is_transformed = None
-        self.X = None
-
-    def initialize(self):
-        """Initialize all transformer arguments, needing initialization."""
-        if not self._initialized["n_jobs"]:
-            if self.n_jobs is not None:
-                warn("no implemented parallelization for VertexHistogram")
-            self._initialized["n_jobs"] = True
-        if not self._initialized["sparse"]:
-            if self.sparse not in ["auto", False, True]:
-                TypeError("sparse could be False, True or auto")
-            self._initialized["sparse"] = True
-
-    def parse_input(self, X, label_start_idx=0, label_end_idx=None):
-        """Parse and check the given input for VH kernel.
-
-        Parameters
-        ----------
-        X : iterable
-            For the input to pass the test, we must have:
-            Each element must be an iterable with at most three features and at
-            least one. The first that is obligatory is a valid graph structure
-            (adjacency matrix or edge_dictionary) while the second is
-            node_labels and the third edge_labels (that fitting the given graph
-            format).
-
-
-
-        Returns
-        -------
-        out : np.array, shape=(len(X), n_labels)
-            A np.array for frequency (cols) histograms for all Graphs (rows).
-
-        """
-        if self.require_ordered_features:
-            if label_start_idx is None or label_end_idx is None:
-                raise ValueError(
-                    "When requires_ordered_features flag is True, you must supply the start and end"
-                    "indices of the feature matrix to have consistent feature dimensions!"
-                )
-            assert (
-                label_end_idx > label_start_idx
-            ), "End index must be larger than the start index!"
-
-        if not isinstance(X, Iterable):
-            raise TypeError("input must be an iterable\n")
-        else:
-            rows, cols, data = list(), list(), list()
-            if self._method_calling in [0, 1, 2]:
-                labels = dict()
-                self._labels = labels
-            elif self._method_calling == 3:
-                labels = dict(self._labels)
-            ni = 0
-            for i, x in enumerate(iter(X)):
-                is_iter = isinstance(x, Iterable)
-                if is_iter:
-                    x = list(x)
-                if is_iter and len(x) in [0, 2, 3]:
-                    if len(x) == 0:
-                        warn("Ignoring empty element on index: " + str(i))
-                        continue
-                    else:
-                        # Our element is an iterable of at least 2 elements
-                        L = x[1]
-                elif isinstance(x, Graph):
-                    # get labels in any existing format
-                    L = x.get_labels(purpose="any")
-                else:
-                    raise TypeError(
-                        "each element of X must be either a "
-                        "graph object or a list with at least "
-                        "a graph like object and node labels "
-                        "dict \n"
-                    )
-
-                # construct the data input for the numpy array
-                for label, frequency in Counter(L.values()).items():
-                    # for the row that corresponds to that graph
-                    rows.append(ni)
-
-                    # and to the value that this label is indexed
-                    if self.require_ordered_features:
-                        try:
-                            col_idx = int(label) - label_start_idx  # Offset
-                        except ValueError:
-                            logging.error(
-                                "Failed to convert label to a valid integer. Check whether all labels are"
-                                "numeric, and whether you called this kernel directly instead of from the"
-                                "Weisfiler-Lehman kernel. Falling back to the default unordered feature"
-                                "matrix."
-                            )
-                            self.require_ordered_features = False
-                    if not self.require_ordered_features:
-                        col_idx = labels.get(label, None)
-                        if col_idx is None:
-                            # if not indexed, add the new index (the next)
-                            col_idx = len(labels)
-                            labels[label] = col_idx
-
-                    # designate the certain column information
-                    cols.append(col_idx)
-
-                    # as well as the frequency value to data
-                    data.append(frequency)
-                ni += 1
-
-            if self.require_ordered_features:
-                label_length = max(label_end_idx - label_start_idx, max(cols)) + 1
-            else:
-                label_length = len(labels)
-
-            if self._method_calling in [0, 1, 2]:
-                if self.sparse == "auto":
-                    self.sparse_ = len(cols) / float(ni * label_length) <= 0.5
-                else:
-                    self.sparse_ = bool(self.sparse)
-
-            if self.sparse_:
-                features = csr_matrix(
-                    (data, (rows, cols)), shape=(ni, label_length), copy=False
-                )
-            else:
-                # Initialise the feature matrix
-                try:
-                    features = zeros(shape=(ni, label_length))
-                    features[rows, cols] = data
-
-                except MemoryError:
-                    warn("memory-error: switching to sparse")
-                    self.sparse_, features = True, csr_matrix(
-                        (data, (rows, cols)), shape=(ni, label_length), copy=False
-                    )
-
-            if ni == 0:
-                raise ValueError("parsed input is empty")
-            return features
-
-    def _calculate_kernel_matrix(self, Y=None):
-        """Calculate the kernel matrix given a target_graph and a kernel.
-
-        Each a matrix is calculated between all elements of Y on the rows and
-        all elements of X on the columns.
-
-        Parameters
-        ----------
-        Y : np.array, default=None
-            The array between samples and features.
-
-        Returns
-        -------
-        K : numpy array, shape = [n_targets, n_inputs]
-            The kernel matrix: a calculation between all pairs of graphs
-            between targets and inputs. If Y is None targets and inputs
-            are the taken from self.X. Otherwise Y corresponds to targets
-            and self.X to inputs.
-
-        """
-        if Y is None:
-            if self.oa:
-                K = np.zeros((self.X.shape[0], self.X.shape[0]))
-                for i in range(self.X.shape[0]):
-                    for j in range(i, self.X.shape[0]):
-                        K[i, j] = np.sum(np.minimum(self.X[i, :], self.X[j, :]))
-                        K[j, i] = K[i, j]
-            else:
-                if self.se_kernel is not None:
-                    K = self.se_kernel.forward(self.X, self.X)
-                else:
-                    K = self.X @ self.X.T
-        else:
-            if self.oa:
-                K = np.zeros((Y.shape[0], self.X.shape[0]))
-                for i in range(Y.shape[0]):
-                    for j in range(self.X.shape[0]):
-                        K[i, j] = np.sum(
-                            np.minimum(self.X[j, :], Y[i, : self.X.shape[1]])
-                        )
-            else:
-                if self.se_kernel is not None:
-                    K = self.se_kernel.forward(self.X, Y)
-                else:
-                    K = Y[:, : self.X.shape[1]] @ self.X.T
-
-        if self.sparse_:
-            return K.toarray()
-        else:
-            return K
-
-    def diagonal(self, use_tensor=False):
-        """Calculate the kernel matrix diagonal of the fitted data.
-
-        Parameters
-        ----------
-        None.
-
-        Returns
-        -------
-        X_diag : np.array
-            The diagonal of the kernel matrix, of the fitted. This consists
-            of each element calculated with itself.
-
-        use_tensor: bool:
-            The flag to use whether return tensor instead of numpy array. All other operations are the same
-
-        """
-        # Check is fit had been called
-        check_is_fitted(self, ["X", "sparse_"])
-        try:
-            check_is_fitted(self, ["_X_diag"])
-        except NotFittedError:
-            # Calculate diagonal of X
-            if use_tensor:
-                self._X_diag = torch.einsum("ij,ij->i", [self.X_tensor, self.X_tensor])
-            else:
-                if self.sparse_:
-                    self._X_diag = squeeze(array(self.X.multiply(self.X).sum(axis=1)))
-                else:
-                    self._X_diag = einsum("ij,ij->i", self.X, self.X)
-        try:
-            check_is_fitted(self, ["_Y"])
-            if use_tensor:
-                Y_diag = torch.einsum("ij, ij->i", [self.Y_tensor, self.Y_tensor])
-                return self._X_diag, Y_diag
-            else:
-                if self.sparse_:
-                    Y_diag = squeeze(array(self._Y.multiply(self._Y).sum(axis=1)))
-                else:
-                    Y_diag = einsum("ij,ij->i", self._Y, self._Y)
-                return self._X_diag, Y_diag
-        except NotFittedError:
-            return self._X_diag
-
-    def transform(self, X, return_embedding_only=False, **kwargs):
-        """Calculate the kernel matrix, between given and fitted dataset.
-
-        Parameters
-        ----------
-        X : iterable
-            Each element must be an iterable with at most three features and at
-            least one. The first that is obligatory is a valid graph structure
-            (adjacency matrix or edge_dictionary) while the second is
-            node_labels and the third edge_labels (that fitting the given graph
-            format). If None the kernel matrix is calculated upon fit data.
-            The test samples.
-
-        return_embedding_only: bool
-            Whether returns the vector embedding of the kernel only (without actually
-            computing the kernel function). This is used when computing the derivative
-            of the kernel w.r.t. the test points/
-
-        Returns
-        -------
-        K : numpy array, shape = [n_targets, n_input_graphs]
-            corresponding to the kernel matrix, a calculation between
-            all pairs of graphs between target an features
-
-        """
-        self._method_calling = 3
-        # Check is fit had been called
-        check_is_fitted(self, ["X"])
-
-        # Input validation and parsing
-        if X is None:
-            raise ValueError("`transform` input cannot be None")
-        else:
-            Y = self.parse_input(X, **kwargs)
-        if return_embedding_only:
-            return Y
-
-        self._Y = Y
-        self._is_transformed = True
-
-        # Transform - calculate kernel matrix
-        km = self._calculate_kernel_matrix(Y)
-        # Self transform must appear before the diagonal call on normilization
-        if self.normalize:
-            X_diag, Y_diag = self.diagonal()
-            km /= np.sqrt(np.outer(Y_diag, X_diag))
-        if self.as_tensor:
-            km = torch.tensor(km)
-        return km
-
-    def fit_transform(self, X, **kwargs):
-        """Fit and transform, on the same dataset.
-
-        Parameters
-        ----------
-        X : iterable
-            Each element must be an iterable with at most three features and at
-            least one. The first that is obligatory is a valid graph structure
-            (adjacency matrix or edge_dictionary) while the second is
-            node_labels and the third edge_labels (that fitting the given graph
-            format). If None the kernel matrix is calculated upon fit data.
-            The test samples.
-
-        y : None
-            There is no need of a target in a transformer, yet the pipeline API
-            requires this parameter.
-
-        Returns
-        -------
-        K : numpy array, shape = [n_targets, n_input_graphs]
-            corresponding to the kernel matrix, a calculation between
-            all pairs of graphs between target an features
-
-        """
-        self._method_calling = 2
-        self.fit(X, **kwargs)
-
-        # Transform - calculate kernel matrix
-        km = self._calculate_kernel_matrix()
-
-        self._X_diag = np.diagonal(km)
-        if self.normalize:
-            km = km / np.sqrt(np.outer(self._X_diag, self._X_diag))
-        if self.as_tensor:
-            km = torch.tensor(km)
-        return km
-
-    def fit(self, X, y=None, **kwargs):
-        """Fit a dataset, for a transformer.
-
-        Parameters
-        ----------
-        X : iterable
-            Each element must be an iterable with at most three features and at
-            least one. The first that is obligatory is a valid graph structure
-            (adjacency matrix or edge_dictionary) while the second is
-            node_labels and the third edge_labels (that fitting the given graph
-            format). The train samples.
-
-        y : None
-            There is no need of a target in a transformer, yet the pipeline API
-            requires this parameter.
-
-        Returns
-        -------
-        self : object
-        Returns self.
-
-        """
-        self._is_transformed = False
-        self._method_calling = 1
-
-        # Parameter initialization
-        self.initialize()
-
-        # Input validation and parsing
-        if X is None:
-            raise ValueError("`fit` input cannot be None")
-        else:
-            self.X = self.parse_input(X, **kwargs)
-
-        # Return the transformer
-        return self
diff --git a/neps/optimizers/bayesian_optimization/kernels/grakel_replace/weisfeiler_lehman.py b/neps/optimizers/bayesian_optimization/kernels/grakel_replace/weisfeiler_lehman.py
deleted file mode 100644
index 890c2c8d3..000000000
--- a/neps/optimizers/bayesian_optimization/kernels/grakel_replace/weisfeiler_lehman.py
+++ /dev/null
@@ -1,781 +0,0 @@
-"""The weisfeiler lehman kernel :cite:`shervashidze2011weisfeiler`."""
-
-import collections
-import collections.abc
-import logging
-import warnings
-from ast import literal_eval
-from collections import OrderedDict
-from copy import deepcopy
-from typing import Iterable
-
-import numpy as np
-import torch
-from grakel.graph import Graph
-from grakel.kernels import Kernel
-from sklearn.exceptions import NotFittedError
-from sklearn.utils.validation import check_is_fitted
-
-from .vertex_histogram import VertexHistogram
-
-warnings.filterwarnings("ignore", message="Importing from numpy.matlib is deprecated ")
-
-class WeisfeilerLehman(Kernel):
-    """Compute the Weisfeiler Lehman Kernel.
-
-     See :cite:`shervashidze2011weisfeiler`.
-
-    Parameters
-    ----------
-    h : int, default=5
-        The number of iterations.
-
-    base_graph_kernel : `grakel.kernel_operators.Kernel` or tuple, default=None
-        If tuple it must consist of a valid kernel object and a
-        dictionary of parameters. General parameters concerning
-        normalization, concurrency, .. will be ignored, and the
-        ones of given on `__init__` will be passed in case it is needed.
-        Default `base_graph_kernel` is `VertexHistogram`.
-
-    node_weights: iterable
-        If not None, the nodes will be assigned different weights according
-        to this vector. Must be a dictionary with the following format:
-        {'node_name1': weight1, 'node_name2': weight2 ... }
-        Must be of the same length as the number of different node attributes
-
-    Attributes
-    ----------
-    X : dict
-     Holds a dictionary of fitted subkernel modules for all levels.
-
-    _nx : number
-        Holds the number of inputs.
-
-    _h : int
-        Holds the number, of iterations.
-
-    _base_graph_kernel : function
-        A void function that initializes a base kernel object.
-
-    _inv_labels : dict
-        An inverse dictionary, used for relabeling on each iteration.
-
-    """
-
-    _graph_format = "dictionary"
-
-    def __init__(
-        self,
-        n_jobs=None,
-        normalize: bool = False,
-        h: int = 5,
-        base_graph_kernel=VertexHistogram,
-        node_weights=None,
-        layer_weights=None,
-        as_tensor: bool = True,
-    ):
-        """Initialise a `weisfeiler_lehman` kernel."""
-        super().__init__(n_jobs=n_jobs, normalize=normalize)
-
-        self.h = h
-        self.base_graph_kernel = base_graph_kernel
-        self._initialized.update(
-            {"h": False, "base_graph_kernel": False, "layer_weights": False}
-        )
-        self._base_graph_kernel = None
-        self.weights = None
-        self.node_weights = node_weights
-        self.as_tensor = as_tensor
-        self.layer_weights = layer_weights  # The weights of each layer. If None, each WL iteration has same weight
-        self.feature_dims = [
-            0,
-        ]  # Record the dimensions of the vectors of each WL iteration
-        self._params = None
-        self._h = None
-        self._nx = None
-        self._inv_labels = None
-        self._inv_label_node_attr = None
-        self._label_node_attr = None
-        self._feature_weight = None
-        self._method_calling = None
-        self._is_transformed = None
-        self.X = None
-        self._X_diag = None
-
-        self.X_fit = dict()
-        self.K_precomputed = dict()
-        self.base_graph_kernel_precomputed = dict()
-
-    def initialize(self):
-        """Initialize all transformer arguments, needing initialization."""
-        super().initialize()
-        if not self._initialized["base_graph_kernel"]:
-            base_graph_kernel = self.base_graph_kernel
-            if base_graph_kernel is None:
-                base_graph_kernel, params = VertexHistogram, dict()
-            # TODO: make sure we're always passing like this
-            elif type(base_graph_kernel) is type and issubclass(
-                base_graph_kernel, Kernel
-            ):
-                params = dict()
-            else:
-                try:
-                    base_graph_kernel, params = base_graph_kernel
-                except Exception as _error:
-                    raise TypeError(
-                        "Base kernel was not formulated in "
-                        "the correct way. "
-                        "Check documentation."
-                    ) from _error
-
-                if not (
-                    type(base_graph_kernel) is type
-                    and issubclass(base_graph_kernel, Kernel)
-                ):
-                    raise TypeError(
-                        "The first argument must be a valid "
-                        "grakel.kernel.kernel Object"
-                    )
-                if not isinstance(params, dict):
-                    raise ValueError(
-                        "If the second argument of base "
-                        "kernel exists, it must be a diction"
-                        "ary between parameters names and "
-                        "values"
-                    )
-                params.pop("normalize", None)
-
-            params["normalize"] = False
-            params["n_jobs"] = None
-            self._base_graph_kernel = base_graph_kernel
-            self._params = params
-            self._initialized["base_graph_kernel"] = True
-
-        if not self._initialized["h"]:
-            if not isinstance(self.h, int) or self.h < 0:
-                raise TypeError(
-                    "'h' must be a non-negative integer. Got h:" + str(self.h)
-                )
-            self._h = self.h + 1
-            self._initialized["h"] = True
-
-            if self.layer_weights is None or self.layer_weights.shape[0] != self._h:
-                self.layer_weights = np.ones((self._h,))
-            if self.as_tensor and not isinstance(self.layer_weights, torch.Tensor):
-                self.layer_weights = torch.tensor(self.layer_weights)
-
-            self._initialized["h"] = True
-            self._initialized["layer_weights"] = True
-
-    def change_se_kernel(self, se_kernel):
-        if self._base_graph_kernel is None:
-            self.initialize()
-        self._params["se_kernel"] = se_kernel
-        logging.info("Base kernel changed")
-
-    def parse_input(
-        self, X: Iterable, return_embedding_only: bool = False, gp_fit: bool = True
-    ):
-        """Parse input for weisfeiler lehman.
-
-        Parameters
-        ----------
-        X : iterable
-            For the input to pass the test, we must have:
-            Each element must be an iterable with at most three features and at
-            least one. The first that is obligatory is a valid graph structure
-            (adjacency matrix or edge_dictionary) while the second is
-            node_labels and the third edge_labels (that correspond to the given
-            graph format). A valid input also consists of graph type objects.
-
-        return_embedding_only: bool
-            Whether to return the embedding of the graphs only, instead of computing the kernel all
-            the way to the end.
-
-        gp_fit: bool
-            If False use precomputed vals for first N values, else compute them and save them
-
-        Returns
-        -------
-        base_graph_kernel : object
-        Returns base_graph_kernel.
-
-        if requires_grad is enabled and we call fit_transform or transform, an additional torch tensor
-        K_grad is returned as well.
-
-        """
-        if self._method_calling not in [1, 2]:
-            raise ValueError(
-                "method call must be called either from fit " + "or fit-transform"
-            )
-        elif hasattr(self, "_X_diag"):
-            # Clean _X_diag value
-            delattr(self, "_X_diag")
-
-        # skip kernel computation if we have already computed the corresponding kernel
-        if self._h in self.K_precomputed.keys() and self.X_fit[self._h] == X:
-            K = self.K_precomputed[self._h]
-            base_graph_kernel = self.base_graph_kernel_precomputed[self._h]
-        else:
-            # Input validation and parsing
-            if not isinstance(X, collections.abc.Iterable):
-                raise TypeError("input must be an iterable\n")
-            else:
-                nx = 0
-                Gs_ed, L, distinct_values, extras = dict(), dict(), set(), dict()
-                for idx, x in enumerate(iter(X)):
-                    is_iter = isinstance(x, collections.abc.Iterable)
-                    if is_iter:
-                        x = list(x)
-                    if is_iter and (len(x) == 0 or len(x) >= 2):
-                        if len(x) == 0:
-                            warnings.warn("Ignoring empty element on index: " + str(idx))
-                            continue
-                        else:
-                            if len(x) > 2:
-                                extra = tuple()
-                                if len(x) > 3:
-                                    extra = tuple(x[3:])
-                                x = Graph(
-                                    x[0], x[1], x[2], graph_format=self._graph_format
-                                )
-                                extra = (
-                                    x.get_labels(
-                                        purpose=self._graph_format,
-                                        label_type="edge",
-                                        return_none=True,
-                                    ),
-                                ) + extra
-                            else:
-                                x = Graph(x[0], x[1], {}, graph_format=self._graph_format)
-                                extra = tuple()
-
-                    elif isinstance(x, Graph):
-                        x.desired_format(self._graph_format)
-                        el = x.get_labels(
-                            purpose=self._graph_format,
-                            label_type="edge",
-                            return_none=True,
-                        )
-                        if el is None:
-                            extra = tuple()
-                        else:
-                            extra = (el,)
-
-                    else:
-                        raise TypeError(
-                            "each element of X must be either a "
-                            + "graph object or a list with at least "
-                            + "a graph like object and node labels "
-                            + "dict \n"
-                        )
-                    Gs_ed[nx] = x.get_edge_dictionary()
-                    L[nx] = x.get_labels(purpose="dictionary")
-                    extras[nx] = extra
-                    distinct_values |= set(L[nx].values())
-                    nx += 1
-                if nx == 0:
-                    raise ValueError("parsed input is empty")
-
-            # Save the number of "fitted" graphs.
-            self._nx = nx
-            WL_labels_inverse = OrderedDict()
-
-            # assign a number to each label
-            label_count = 0
-            for dv in sorted(list(distinct_values)):
-                WL_labels_inverse[dv] = label_count
-                label_count += 1
-
-            # Initalize an inverse dictionary of labels for all iterations
-            self._inv_labels = (
-                OrderedDict()
-            )  # Inverse dictionary of labels, in term of the *previous layer*
-            self._inv_labels[0] = deepcopy(WL_labels_inverse)
-            self.feature_dims.append(
-                len(WL_labels_inverse)
-            )  # Update the zeroth iteration feature dim
-
-            self._inv_label_node_attr = (
-                OrderedDict()
-            )  # Inverse dictionary of labels, in term of the *node attribute*
-            self._label_node_attr = (
-                OrderedDict()
-            )  # Same as above, but with key and value inverted
-            self._label_node_attr[0], self._inv_label_node_attr[0] = self.translate_label(
-                WL_labels_inverse, 0
-            )
-
-            if self.node_weights is not None:
-                self._feature_weight = OrderedDict()
-                # Ensure the order is the same
-                self._feature_weight[0] = self._compute_feature_weight(
-                    self.node_weights, 0, WL_labels_inverse
-                )[1]
-            else:
-                self._feature_weight = None
-
-            def generate_graphs(label_count: int, WL_labels_inverse):
-                new_graphs = list()
-                for j in range(self._nx):
-                    new_labels = dict()
-                    for k in L[j].keys():
-                        new_labels[k] = WL_labels_inverse[L[j][k]]
-                    L[j] = new_labels
-                    # add new labels
-                    new_graphs.append((Gs_ed[j], new_labels) + extras[j])
-                yield new_graphs
-
-                for i in range(1, self._h):
-                    label_set, WL_labels_inverse, L_temp = set(), dict(), dict()
-                    for j in range(nx):
-                        # Find unique labels and sort
-                        # them for both graphs
-                        # Keep for each node the temporary
-                        L_temp[j] = dict()
-                        for v in Gs_ed[j].keys():
-                            credential = (
-                                str(L[j][v])
-                                + ","
-                                + str(sorted(L[j][n] for n in Gs_ed[j][v].keys()))
-                            )
-                            L_temp[j][v] = credential
-                            label_set.add(credential)
-
-                    label_list = sorted(list(label_set))
-                    for dv in label_list:
-                        WL_labels_inverse[dv] = label_count
-                        label_count += 1
-
-                    # Recalculate labels
-                    new_graphs = list()
-                    for j in range(nx):
-                        new_labels = dict()
-                        for k in L_temp[j].keys():
-                            new_labels[k] = WL_labels_inverse[L_temp[j][k]]
-                        L[j] = new_labels
-                        # relabel
-                        new_graphs.append((Gs_ed[j], new_labels) + extras[j])
-                    self._inv_labels[i] = WL_labels_inverse
-                    # Compute the translated inverse node label
-                    (
-                        self._label_node_attr[i],
-                        self._inv_label_node_attr[i],
-                    ) = self.translate_label(
-                        WL_labels_inverse, i, self._label_node_attr[i - 1]
-                    )
-                    self.feature_dims.append(
-                        self.feature_dims[-1] + len(self._label_node_attr[i])
-                    )
-                    # Compute the feature weight of the current layer
-                    if self.node_weights is not None:
-                        self._feature_weight[i] = self._compute_feature_weight(
-                            self.node_weights, i, self._inv_label_node_attr[i]
-                        )[1]
-                    # assert len(self._feature_weight[i] == len(WL_labels_inverse))
-                    yield new_graphs
-
-            # Initialise the base graph kernel.
-            base_graph_kernel = {}
-
-            K = []
-            for i, g in enumerate(generate_graphs(label_count, WL_labels_inverse)):
-                param = self._params
-                if self._feature_weight is not None:
-                    param.update({"mahalanobis_precision": self._feature_weight[i]})
-                base_graph_kernel.update({i: self._base_graph_kernel(**param)})
-                if return_embedding_only:
-                    K.append(
-                        base_graph_kernel[i].parse_input(
-                            g,
-                            label_start_idx=self.feature_dims[i],
-                            label_end_idx=self.feature_dims[i + 1],
-                        )
-                    )
-                else:
-                    if self._method_calling == 1:
-                        base_graph_kernel[i].fit(
-                            g,
-                            label_start_idx=self.feature_dims[i],
-                            label_end_idx=self.feature_dims[i + 1],
-                        )
-                    else:
-                        K.append(
-                            self.layer_weights[i]
-                            * base_graph_kernel[i].fit_transform(
-                                g,
-                                label_start_idx=self.feature_dims[i],
-                                label_end_idx=self.feature_dims[i + 1],
-                            )
-                        )
-
-            if gp_fit:
-                self.X_fit[self._h] = X
-                self.K_precomputed[self._h] = K
-                self.base_graph_kernel_precomputed[self._h] = base_graph_kernel
-
-        if return_embedding_only:
-            return K
-        elif self._method_calling == 1:
-            return base_graph_kernel
-        elif self._method_calling == 2:
-            if self.as_tensor:
-                K = torch.stack(K, dim=0).sum(dim=0)
-                return K, base_graph_kernel
-            return np.sum(K, axis=0), base_graph_kernel
-
-    def fit_transform(
-        self, X: Iterable, y=None, gp_fit: bool = True
-    ):
-        """Fit and transform, on the same dataset.
-
-        Parameters
-        ----------
-        X : iterable
-            Each element must be an iterable with at most three features and at
-            least one. The first that is obligatory is a valid graph structure
-            (adjacency matrix or edge_dictionary) while the second is
-            node_labels and the third edge_labels (that fitting the given graph
-            format). If None the kernel matrix is calculated upon fit data.
-            The test samples.
-
-        y : Object, default=None
-            Ignored argument, added for the pipeline.
-
-        Returns
-        -------
-        K : numpy array, shape = [n_targets, n_input_graphs]
-            corresponding to the kernel matrix, a calculation between
-            all pairs of graphs between target an features
-
-        """
-        self._method_calling = 2
-        self._is_transformed = False
-        self.initialize()
-        self.feature_dims = [
-            0,
-        ]  # Flush the feature dimensions
-        if X is None:
-            raise ValueError("transform input cannot be None")
-        else:
-            km, self.X = self.parse_input(X, gp_fit=gp_fit)
-
-        return km
-
-    def transform(self, X: Iterable, return_embedding_only: bool = True):
-        """Calculate the kernel matrix, between given and fitted dataset.
-
-        Parameters
-        ----------
-        X : iterable
-            Each element must be an iterable with at most three features and at
-            least one. The first that is obligatory is a valid graph structure
-            (adjacency matrix or edge_dictionary) while the second is
-            node_labels and the third edge_labels (that fitting the given graph
-            format). If None the kernel matrix is calculated upon fit data.
-            The test samples.
-
-        return_embedding_only: bool
-            Whether to return the embedding of the graphs only, instead of computing the kernel all
-            the way to the end.
-        Returns
-        -------
-        K : numpy array, shape = [n_targets, n_input_graphs]
-            corresponding to the kernel matrix, a calculation between
-            all pairs of graphs between target an features
-
-        """
-        self._method_calling = 3
-        # Check is fit had been called
-        check_is_fitted(self, ["X", "_nx", "_inv_labels"])
-
-        # Input validation and parsing
-        if X is None:
-            raise ValueError("transform input cannot be None")
-        else:
-            if not isinstance(X, collections.abc.Iterable):
-                raise ValueError("input must be an iterable\n")
-            else:
-                nx = 0
-                distinct_values = set()
-                Gs_ed, L = dict(), dict()
-                for i, x in enumerate(iter(X)):
-                    is_iter = isinstance(x, collections.abc.Iterable)
-                    if is_iter:
-                        x = list(x)
-                    if is_iter and len(x) in [0, 2, 3]:
-                        if len(x) == 0:
-                            warnings.warn("Ignoring empty element on index: " + str(i))
-                            continue
-
-                        elif len(x) in [2, 3]:
-                            x = Graph(x[0], x[1], {}, self._graph_format)
-                    elif isinstance(x, Graph):
-                        x.desired_format("dictionary")
-                    else:
-                        raise ValueError(
-                            "each element of X must have at "
-                            + "least one and at most 3 elements\n"
-                        )
-                    Gs_ed[nx] = x.get_edge_dictionary()
-                    L[nx] = x.get_labels(purpose="dictionary")
-
-                    # Hold all the distinct values
-                    distinct_values |= {
-                        v for v in L[nx].values() if v not in self._inv_labels[0]
-                    }
-                    nx += 1
-                if nx == 0:
-                    raise ValueError("parsed input is empty")
-
-        nl = len(self._inv_labels[0])
-        WL_labels_inverse = {
-            dv: idx for (idx, dv) in enumerate(sorted(list(distinct_values)), nl)
-        }
-        WL_labels_inverse = OrderedDict(WL_labels_inverse)
-
-        def generate_graphs_transform(WL_labels_inverse, nl):
-            # calculate the kernel matrix for the 0 iteration
-            new_graphs = list()
-            for j in range(nx):
-                new_labels = dict()
-                for k, v in L[j].items():
-                    if v in self._inv_labels[0]:
-                        new_labels[k] = self._inv_labels[0][v]
-                    else:
-                        new_labels[k] = WL_labels_inverse[v]
-                L[j] = new_labels
-                # produce the new graphs
-                new_graphs.append([Gs_ed[j], new_labels])
-            yield new_graphs
-
-            for i in range(1, self._h):
-                new_graphs = list()
-                L_temp, label_set = dict(), set()
-                nl += len(self._inv_labels[i])
-                for j in range(nx):
-                    # Find unique labels and sort them for both graphs
-                    # Keep for each node the temporary
-                    L_temp[j] = dict()
-                    for v in Gs_ed[j].keys():
-                        credential = (
-                            str(L[j][v])
-                            + ","
-                            + str(sorted(L[j][n] for n in Gs_ed[j][v].keys()))
-                        )
-                        L_temp[j][v] = credential
-                        if credential not in self._inv_labels[i]:
-                            label_set.add(credential)
-
-                # Calculate the new label_set
-                WL_labels_inverse = dict()
-                if len(label_set) > 0:
-                    for dv in sorted(list(label_set)):
-                        idx = len(WL_labels_inverse) + nl
-                        WL_labels_inverse[dv] = idx
-
-                # Recalculate labels
-                new_graphs = list()
-                for j in range(nx):
-                    new_labels = dict()
-                    for k, v in L_temp[j].items():
-                        if v in self._inv_labels[i]:
-                            new_labels[k] = self._inv_labels[i][v]
-                        else:
-                            new_labels[k] = WL_labels_inverse[v]
-                    L[j] = new_labels
-                    # Create the new graphs with the new labels.
-                    new_graphs.append([Gs_ed[j], new_labels])
-                yield new_graphs
-
-        if return_embedding_only:
-            K = []
-            for i, g in enumerate(generate_graphs_transform(WL_labels_inverse, nl)):
-                K.append(
-                    self.X[i].transform(
-                        g,
-                        label_start_idx=self.feature_dims[i],
-                        label_end_idx=self.feature_dims[i + 1],
-                        return_embedding_only=True,
-                    )
-                )
-            return K
-
-        # Calculate the kernel matrix without parallelization
-        if self.as_tensor:
-            summand = [
-                self.layer_weights[i]
-                * self.X[i].transform(
-                    g,
-                    label_start_idx=self.feature_dims[i],
-                    label_end_idx=self.feature_dims[i + 1],
-                )
-                for i, g in enumerate(generate_graphs_transform(WL_labels_inverse, nl))
-            ]
-            K = torch.stack(summand, dim=0).sum(dim=0)
-        else:
-            K = np.sum(
-                (
-                    self.layer_weights[i]
-                    * self.X[i].transform(
-                        g,
-                        label_start_idx=self.feature_dims[i],
-                        label_end_idx=self.feature_dims[i + 1],
-                    )
-                    for (i, g) in enumerate(
-                        generate_graphs_transform(WL_labels_inverse, nl)
-                    )
-                ),
-                axis=0,
-            )
-
-        self._is_transformed = True
-        if self.normalize:
-            X_diag, Y_diag = self.diagonal()
-            if self.as_tensor:
-                div_ = torch.sqrt(torch.ger(Y_diag, X_diag))
-                K /= div_
-            else:
-                old_settings = np.seterr(divide="ignore")
-                K = np.nan_to_num(np.divide(K, np.sqrt(np.outer(Y_diag, X_diag))))
-                np.seterr(**old_settings)
-
-        return K
-
-    def diagonal(self):
-        """Calculate the kernel matrix diagonal for fitted data.
-
-        A funtion called on transform on a seperate dataset to apply
-        normalization on the exterior.
-
-        Parameters
-        ----------
-        None.
-
-        Returns
-        -------
-        X_diag : np.array
-            The diagonal of the kernel matrix, of the fitted data.
-            This consists of kernel calculation for each element with itself.
-
-        Y_diag : np.array
-            The diagonal of the kernel matrix, of the transformed data.
-            This consists of kernel calculation for each element with itself.
-
-        """
-        # Check if fit had been called
-        check_is_fitted(self, ["X"])
-        try:
-            check_is_fitted(self, ["_X_diag"])
-            if self._is_transformed:
-                Y_diag = self.X[0].diagonal()[1]
-                for i in range(1, self._h):
-                    Y_diag += self.X[i].diagonal()[1]
-        except NotFittedError:
-            # Calculate diagonal of X
-            if self._is_transformed:
-                X_diag, Y_diag = self.X[0].diagonal()
-                # X_diag is considered a mutable and should not affect the kernel matrix itself.
-                X_diag.flags.writeable = True
-                for i in range(1, self._h):
-                    x, y = self.X[i].diagonal()
-                    X_diag += x
-                    Y_diag += y
-                    self._X_diag = X_diag
-
-                # case sub kernel is only fitted
-                X_diag = self.X[0].diagonal()
-                # X_diag is considered a mutable and should not affect the kernel matrix itself.
-                X_diag.flags.writeable = True
-                for i in range(1, self._n_iter):
-                    x = self.X[i].diagonal()
-                    X_diag += x
-                self._X_diag = X_diag
-
-        if self.as_tensor:
-            self._X_diag = torch.tensor(self._X_diag)
-            if Y_diag is not None:
-                Y_diag = torch.tensor(Y_diag)
-        if self._is_transformed:
-            return self._X_diag, Y_diag
-        else:
-            return self._X_diag
-
-    @staticmethod
-    def translate_label(curr_layer: dict, h: int, prev_layer: dict = None):
-        """Translate the label to be in terms of the node attributes
-        curr_layer: the WL_label_inverse object. A dictionary with element of the format of
-        {pattern: encoding}
-
-        return:
-           label_in_node_attr: in terms of {encoding: pattern}, but pattern is always in term of the node attribute
-           inv_label_in_node_attr: in terms of {pattern: encoding}
-
-        """
-        if h == 0:
-            return {v: str(k) for k, v in curr_layer.items()}, curr_layer
-        else:
-            assert prev_layer is not None
-            label_in_node_attr, inv_label_in_node_attr = OrderedDict(), OrderedDict()
-            for pattern, encoding in curr_layer.items():
-                # current pattern is in terms of the encoding previous layer. Find the pattern from the prev_layer
-                root, leaf = literal_eval(pattern)
-                root_ = prev_layer[root]
-                leaf_ = [prev_layer[i] for i in leaf]
-                label_in_node_attr.update({encoding: "~".join([root_] + leaf_)})
-                inv_label_in_node_attr.update({"~".join([root_] + leaf_): encoding})
-            return label_in_node_attr, inv_label_in_node_attr
-
-    @staticmethod
-    def _compute_feature_weight(
-        node_weight: OrderedDict, h: int, inv_label_node_attr: OrderedDict
-    ):
-        """
-        Compute the feature weight, based on the average weight of the constituent node attributes.
-        Return:
-            feature_weights: a dictionary with h layers, each of which is a dictionary of the format of
-            {tuple1: weight1; tuplr2, weight2 ...} where tuplex is the tuple representation of the learned graph feature
-
-            feature_weight_flattened: same as above, but in a flattened np format.
-        """
-        feature_weights = OrderedDict()
-        feature_weights_flattened = []
-        if h == 0:
-            feature_weight = OrderedDict(
-                {k: (node_weight[k]) ** 2 for k in inv_label_node_attr.keys()}
-            )
-            feature_weights_flattened = np.array(list(feature_weight.values())).flatten()
-        else:
-            for k, _ in inv_label_node_attr.items():
-                # k is the pattern, v is the encoding
-                k_sep = k.split("~")
-                average_weight = np.mean([(node_weight[i]) ** 2 for i in k_sep])
-                feature_weights.update({k: average_weight})
-                feature_weights_flattened.append(average_weight)
-        feature_weights_flattened = np.array(feature_weights_flattened).flatten()
-        assert len(feature_weights_flattened) == len(inv_label_node_attr)
-        return feature_weights, feature_weights_flattened
-
-    def dK_dX(self, X_test: None):
-        """
-        Do additional forward and backward pass, compute the kernel derivative wrt the testing location.
-        If no test locations are provided, the derivatives are evaluated at the training points
-        Returns
-        -------
-
-        """
-
-
-def efit(obj, data):
-    """Fit an object on data."""
-    obj.fit(data)
-
-
-def efit_transform(obj, data):
-    """Fit-Transform an object on data."""
-    return obj.fit_transform(data)
-
-
-def etransform(obj, data):
-    """Transform an object on data."""
-    return obj.transform(data)
diff --git a/neps/optimizers/bayesian_optimization/kernels/graph_kernel.py b/neps/optimizers/bayesian_optimization/kernels/graph_kernel.py
deleted file mode 100644
index b9d10102f..000000000
--- a/neps/optimizers/bayesian_optimization/kernels/graph_kernel.py
+++ /dev/null
@@ -1,35 +0,0 @@
-import torch
-
-
-class GraphKernels:
-    def __init__(self, **kwargs):
-        super().__init__(**kwargs)
-        self.n_hyperparameters = 0
-        self.rbf_lengthscale = False
-        self.kern = None
-        self.__name__ = "GraphKernelBase"
-
-    @staticmethod
-    def normalize_gram(K: torch.Tensor):
-        K_diag = torch.sqrt(torch.diag(K))
-        K_diag_outer = torch.ger(K_diag, K_diag)
-        return K / K_diag_outer
-
-    def fit_transform(
-        self, gr: list, rebuild_model=False, save_gram_matrix=False, **kwargs
-    ):
-        raise NotImplementedError
-
-    def transform(
-        self,
-        gr: list,
-    ):
-        raise NotImplementedError
-
-    def forward_t(self, gr2, gr1: list = None):
-        """
-        Compute the derivative of the kernel function k(phi, phi*) with respect to phi* (the training point)
-        """
-        raise NotImplementedError(
-            "The kernel gradient is not implemented for the graph kernel called!"
-        )
diff --git a/neps/optimizers/bayesian_optimization/kernels/utils.py b/neps/optimizers/bayesian_optimization/kernels/utils.py
deleted file mode 100644
index 92ee1817b..000000000
--- a/neps/optimizers/bayesian_optimization/kernels/utils.py
+++ /dev/null
@@ -1,151 +0,0 @@
-from __future__ import annotations
-
-from typing import TYPE_CHECKING, Tuple
-
-import networkx as nx
-import numpy as np
-
-if TYPE_CHECKING:
-    from neps.search_spaces.search_space import SearchSpace
-
-
-def transform_to_undirected(gr: list):
-    """Transform a list of directed graphs by undirected graphs."""
-    undirected_gr = []
-    for g in gr:
-        if not isinstance(g, nx.Graph):
-            continue
-        if isinstance(g, nx.DiGraph):
-            undirected_gr.append(g.to_undirected())
-        else:
-            undirected_gr.append(g)
-    return undirected_gr
-
-
-def extract_configs(configs: list[SearchSpace]) -> Tuple[list, list]:
-    """Extracts graph & HPs from configs objects
-
-    Args:
-        configs (list): Object holding graph and/or HPs
-
-    Returns:
-        Tuple[list, list]: list of graphs, list of HPs
-    """
-    config_hps = [conf.get_normalized_hp_categories() for conf in configs]
-    graphs = [hps["graphs"] for hps in config_hps]
-    # Don't call np.array on structured objects
-    # https://github.com/numpy/numpy/issues/24546#issuecomment-1693913119
-    # _nested_graphs = np.array(graphs, dtype=object)
-    # if _nested_graphs.ndim == 3
-    #   graphs = _nested_graphs[:, :, 0].reshape(-1).tolist()
-    # Long hand way of doing the above
-    if (len(graphs) > 0 and isinstance(graphs[0], list)
-        and len(graphs[0]) > 0 and isinstance(graphs[0][0], list)):
-        res = [_list for list_of_list in graphs for _list in list_of_list]
-        graphs = res
-    return graphs, config_hps
-
-
-def graph_metrics(graph, metric=None, directed=True):
-    if directed:
-        G = graph
-    else:
-        G = graph.to_undirected()
-
-    # global metrics
-    if metric == "avg_path_length":
-        avg_path_length = nx.average_shortest_path_length(G)
-        metric_score = avg_path_length
-
-    elif metric == "density":
-        density = nx.density(G)
-        metric_score = density
-
-    else:
-        raise NotImplementedError
-
-    return metric_score
-
-
-def extract_configs_hierarchy(
-    configs: list, d_graph_features: int, hierarchy_consider=None
-) -> Tuple[list, list]:
-    """Extracts graph & graph features from configs objects
-    Args:
-        configs (list): Object holding graph and/or graph features
-        d_graph_features (int): Number of global graph features used; if d_graph_features=0, indicate not using global graph features
-        hierarchy_consider (list or None): Specify graphs at which earlier hierarchical levels to be considered
-    Returns:
-        Tuple[list, list]: list of graphs, list of HPs
-    """
-    N = len(configs)
-
-    config_hps = [conf.get_normalized_hp_categories() for conf in configs]
-    combined_graphs = [hps["graphs"] for hps in config_hps]
-    if N > 0 and hierarchy_consider is not None and combined_graphs[0]:
-        # graphs = list(
-        #     map(
-        #         list,
-        #         zip(
-        #             *[
-        #                 [g[0][0]]
-        #                 + [g[0][1][hierarchy_id] for hierarchy_id in hierarchy_consider]
-        #                 for g in combined_graphs
-        #             ]
-        #         ),
-        #     )
-        # )
-        graphs = list(
-            map(
-                list,
-                zip(
-                    *[
-                        [g[0][0]]
-                        + [
-                            g[0][1][hierarchy_id]
-                            if hierarchy_id in g[0][1]
-                            else g[0][1][max(g[0][1].keys())]
-                            for hierarchy_id in hierarchy_consider
-                        ]
-                        for g in combined_graphs
-                    ]
-                ),
-            )
-        )
-        ### full graph, 0th hierarchy (high-level, smallest), 1st hierarchy, 2nd hierarchy, 3rd hierarchy, ...
-        ### graph gets bigger of hierarchies
-        ### list shape: (1+4) x N
-
-        # modify the node attribute labels on earlier hierarchy graphs e.g.
-        # note the node feature for graph in earlier hierarchical level should be more coarse
-        # e.g. {'op_name': '(Cell diamond (OPS id) (OPS avg_pool) (OPS id) (OPS avg_pool))'} -> {'op_name': 'Cell diamond '}
-        for hg_list in graphs[1:]:
-            for G in hg_list:
-                original_node_labels = nx.get_node_attributes(G, "op_name")
-                new_node_labels = {
-                    k: v.split("(")[1]
-                    for k, v in original_node_labels.items()
-                    if "(" in v and ")" in v
-                }
-                nx.set_node_attributes(G, new_node_labels, name="op_name")
-    else:
-        # graphs = [g[0][0] for g in combined_graphs]
-        graphs = combined_graphs
-
-    if N > 0 and d_graph_features > 0:
-        # graph_features = [c['metafeature'] for c in configs]
-        # these feature values are normalised between 0 and 1
-        # the two graph features used are 'avg_path_length', 'density'
-        graph_features = [
-            [
-                graph_metrics(g[0][0], metric="avg_path_length"),
-                graph_metrics(g[0][0], metric="density"),
-            ]
-            for g in combined_graphs
-        ]
-        graph_features_array = np.vstack(graph_features)  # shape n_archs x 2 (nx(2+d_hp))
-    else:
-        # if not using global graph features of the final architectures, set them to None
-        graph_features_array = [None] * N
-
-    return graphs, graph_features_array
diff --git a/neps/optimizers/bayesian_optimization/kernels/vectorial_kernels.py b/neps/optimizers/bayesian_optimization/kernels/vectorial_kernels.py
deleted file mode 100644
index 6e0b20526..000000000
--- a/neps/optimizers/bayesian_optimization/kernels/vectorial_kernels.py
+++ /dev/null
@@ -1,271 +0,0 @@
-from copy import deepcopy
-from math import sqrt
-from typing import Tuple, Union
-
-import numpy as np
-import torch
-
-
-class Stationary:
-    """Here we follow the structure of GPy to build a sub class of stationary kernel.
-    All the classes (i.e. the class of stationary kernel_operators) derived from this
-    class use the scaled distance to compute the Gram matrix."""
-
-    def __init__(
-        self,
-        lengthscale: Union[float, Tuple[float, ...]] = 1.0,
-        lengthscale_bounds: Tuple[float, float] = (
-            np.exp(-6.754111155189306),
-            np.exp(0.0858637988771976),
-        ),
-        outputscale=1.0,
-        **kwargs,
-    ):
-        super().__init__(**kwargs)
-        self.lengthscale = lengthscale
-        self.lengthscale_bounds = lengthscale_bounds
-        self.outputscale = outputscale
-
-        self._gram = None
-        self._train = None
-
-    def forward(self, x1, x2=None, l=None, **params):
-        if l is not None:
-            return _scaled_distance(l, x1, x2)
-        return _scaled_distance(self.lengthscale, x1, x2)
-
-    def fit_transform(
-        self,
-        x1,
-        l=None,
-        rebuild_model=True,
-        save_gram_matrix=True,
-    ):
-        if not rebuild_model and self._gram is not None:
-            return self._gram
-        K = self.forward(x1, l=l)
-        if save_gram_matrix:
-            self._train = deepcopy(x1)
-            assert isinstance(K, torch.Tensor), "it doesnt work with np arrays.."
-            self._gram = K.clone()
-        return K
-
-    def transform(
-        self,
-        x1,
-        l=None,
-    ):
-        if self._gram is None:
-            raise ValueError("The kernel has not been fitted. Run fit_transform first")
-        return self.forward(self._train, x1, l=l)
-
-    def __call__(self, *args, **kwargs):
-        return self.forward(*args, **kwargs)
-
-    def forward_t(self, x2, x1=None, l=None):
-        if x1 is None:
-            x1 = torch.tensor(self._train)
-        x2 = torch.tensor(x2).requires_grad_(True)
-        K = self.forward(x1, x2, l)
-        return K, x2
-
-    def update_hyperparameters(self, lengthscale):
-        self.lengthscale = [
-            l_.clamp(self.lengthscale_bounds[0], self.lengthscale_bounds[1]).item()
-            for l_ in lengthscale
-        ]
-
-
-class RBFKernel(Stationary):
-    def forward(self, x1, x2=None, l=None, **kwargs):
-        if l is None:
-            dist_sq = _scaled_distance(self.lengthscale, x1, x2, sq_dist=True)
-        else:
-            dist_sq = _scaled_distance(l, x1, x2, sq_dist=True)
-        if isinstance(dist_sq, torch.Tensor):
-            return self.outputscale * torch.exp(-0.5 * dist_sq)
-        return self.outputscale * np.exp(-0.5 * dist_sq)
-
-
-class LayeredRBFKernel(RBFKernel):
-    """
-    Same as the conventional RBF kernel, but adapted in a way as a midway between
-    spherical RBF and ARD RBF. In this case, one weight is assigned to each
-    Weisfiler-Lehman iteration only (e.g. one weight for h=0, another for h=1 and etc.)
-    """
-
-    def forward(self, ard_dims, x1, x2=None, l=None, **kwargs):
-        l = l if l is not None else self.lengthscale
-        assert l.shape[0] == ard_dims.shape[0], (
-            "LayeredRBF expects the lengthscale vector to have the same "
-            "dimensionality as the "
-            "number of WL iterations, but got lengthscale vector of shape"
-            + str(l.shape[0])
-            + "and WL iteration of shape "
-            + str(ard_dims.shape[0])
-        )
-        if not isinstance(ard_dims, torch.Tensor):
-            ard_dims = torch.tensor(ard_dims)
-        M = torch.cat(
-            [torch.ones(int(ard_dims[i])) * l[i] for i in range(ard_dims.shape[0])]
-        )
-        return super().forward(x1, x2, M, **kwargs)
-
-
-class Matern32Kernel(Stationary):
-    def forward(self, x1, x2=None, l=None, **kwargs):
-        if l is None:
-            l = self.lengthscale
-        dist = _scaled_distance(l, x1, x2)
-        if isinstance(dist, torch.Tensor):
-            return (
-                self.outputscale * (1 + sqrt(3.0) * dist) * torch.exp(-sqrt(3.0) * dist)
-            )
-        return self.outputscale * (1 + sqrt(3.0) * dist) * np.exp(-sqrt(3.0) * dist)
-
-
-class Matern52Kernel(Stationary):
-    def forward(self, x1, x2=None, l=None, **kwargs):
-        if l is None:
-            l = self.lengthscale
-        dist = _scaled_distance(l, x1, x2)
-        sq_dist = dist**2
-        if isinstance(dist, torch.Tensor):
-            return (
-                self.outputscale
-                * (1 + sqrt(5.0) * dist + 5.0 / 3.0 * sq_dist)
-                * torch.exp(-sqrt(5.0) * dist)
-            )
-        return (
-            self.outputscale
-            * (1 + sqrt(5.0) * dist + 5.0 / 3.0 * sq_dist)
-            * np.exp(-sqrt(5.0) * dist)
-        )
-
-    def update_hyperparameters(self, lengthscale):
-        if lengthscale is None or "continuous" not in lengthscale.keys():
-            return
-        lengthscale = lengthscale["continuous"]
-        super().update_hyperparameters(lengthscale=lengthscale)
-
-
-class HammingKernel(Stationary):
-    def forward(self, x1, x2=None, l=None, **kwargs):
-        if l is None:
-            dist = _hamming_distance(
-                self.lengthscale,
-                x1,
-                x2,
-            )
-        else:
-            dist = _hamming_distance(
-                l,
-                x1,
-                x2,
-            )
-        return self.outputscale * dist
-
-    def update_hyperparameters(self, lengthscale):
-        if lengthscale is None or "categorical" not in lengthscale.keys():
-            return
-        lengthscale = lengthscale["categorical"]
-        super().update_hyperparameters(lengthscale=lengthscale)
-
-
-class RationalQuadraticKernel(Stationary):
-    def __init__(self, lengthscale, outputscale=1.0, power=2.0, **kwargs):
-        super().__init__(lengthscale, outputscale, **kwargs)
-        self.power = power
-
-    def forward(self, x1, x2=None, **kwargs):
-        dist_sq = _scaled_distance(self.lengthscale, x1, x2, sq_dist=True)
-        return self.outputscale * (1 + dist_sq / 2.0) ** (-self.power)
-
-
-def _unscaled_distance(X, X2=None, sq_dist=False):
-    """The unscaled distance between X and X2. if x2 is not supplied, then the squared Euclidean distance is
-    computed within X"""
-    if isinstance(X, torch.Tensor):
-        assert X.ndimension() == 2
-        if X2 is not None:
-            assert isinstance(X2, torch.Tensor)
-            assert X2.ndimension() == 2
-        if X2 is None:
-            Xsq = torch.sum(X**2, 1)
-            r2 = -2 * X @ X.t() + Xsq[:, None] + Xsq[None, :]
-        else:
-            X1sq = torch.sum(X**2, 1)
-            X2sq = torch.sum(X2**2, 1)
-            r2 = -2 * X @ X2.t() + X1sq[:, None] + X2sq[None, :]
-        r2 += 1e-8
-        r2 = torch.maximum(r2, torch.tensor(0))
-        if not sq_dist:
-            r2 = torch.sqrt(r2)
-    else:
-        assert X.ndim == 2
-        if X2 is not None:
-            assert X2.ndim == 2
-        if X2 is None:
-            Xsq = np.sum(X**2, 1)
-            r2 = -2 * X @ X.transpose() + Xsq[:, None] + Xsq[None, :]
-        else:
-            X1sq = np.sum(X**2, 1)
-            X2sq = np.sum(X2**2, 1)
-            r2 = -2 * X @ X2.transpose() + X1sq[:, None] + X2sq[None, :]
-        if not sq_dist:
-            r2 = np.sqrt(r2)
-    return r2
-
-
-def _scaled_distance(lengthscale, X, X2=None, sq_dist=False):
-    """Compute the *scaled* distance between X and x2 (or, if X2 is not supplied,
-    the distance between X and itself) by the lengthscale. if a scalar (float) or a
-    dim=1 lengthscale vector is supplied, then it is assumed that we use one
-    lengthscale for all dimensions. Otherwise, we have an ARD kernel and in which case
-    the length of the lengthscale vector must be the same as the dimensionality of the
-    problem."""
-    X = torch.tensor(X, dtype=torch.float64)
-    if X2 is None:
-        X2 = X
-    if isinstance(lengthscale, float) or len(lengthscale) == 1:
-        return (
-            _unscaled_distance(X, X2) / lengthscale
-            if sq_dist is False
-            else _unscaled_distance(X, X2, sq_dist=True) / (lengthscale**2)
-        )
-    else:
-        # ARD kernel - one lengthscale per dimension
-        _check_lengthscale(lengthscale, X)
-        dist = _unscaled_distance(X / lengthscale, X2 / lengthscale)
-        return dist if not sq_dist else dist**2
-
-
-def _hamming_distance(lengthscale, X, X2=None):
-    if X2 is None:
-        X2 = X
-
-    def _distance(X, X2, lengthscale=1.0):
-        if isinstance(lengthscale, torch.Tensor):
-            lengthscale = lengthscale.detach().numpy()
-        indicator = np.expand_dims(X, axis=1) != X2
-        K = (-1 / (2 * lengthscale**2) * indicator).sum(axis=2)
-        K = np.exp(K)
-        return torch.from_numpy(K)
-
-    if isinstance(lengthscale, float) or len(lengthscale) == 1:
-        return _distance(X, X2) / lengthscale
-    else:
-        _check_lengthscale(lengthscale, X)
-        return _distance(X, X2, lengthscale)
-
-
-def _check_lengthscale(lengthscale, X):
-    x_shape = len(X[0]) if isinstance(X, list) else X.shape[1]
-    assert len(lengthscale) == x_shape, (
-        "For a non-scaler theta, it needs to be of the same length as the dim"
-        "of the "
-        "input data, but got input dim of "
-        + str(x_shape)
-        + " and lengthscale dimension of "
-        + str(lengthscale.shape[0])
-    )
diff --git a/neps/optimizers/bayesian_optimization/kernels/weisfilerlehman.py b/neps/optimizers/bayesian_optimization/kernels/weisfilerlehman.py
deleted file mode 100644
index e6550d651..000000000
--- a/neps/optimizers/bayesian_optimization/kernels/weisfilerlehman.py
+++ /dev/null
@@ -1,335 +0,0 @@
-import logging
-
-import numpy as np
-import torch
-from grakel.kernels import ShortestPathAttr
-from grakel.utils import graph_from_networkx
-
-from .grakel_replace.edge_histogram import EdgeHistogram
-from .grakel_replace.utils import calculate_kernel_matrix_as_tensor
-from .grakel_replace.vertex_histogram import VertexHistogram
-from .grakel_replace.weisfeiler_lehman import WeisfeilerLehman as _WL
-from .graph_kernel import GraphKernels
-from .utils import transform_to_undirected
-from .vectorial_kernels import Stationary
-
-
-class WeisfilerLehman(GraphKernels):
-    """Weisfiler Lehman kernel using grakel functions"""
-
-    def __init__(
-        self,
-        h: int = 0,
-        base_type: str = "subtree",
-        se_kernel: Stationary = None,
-        layer_weights=None,
-        node_weights=None,
-        oa: bool = False,
-        node_label: str = "op_name",
-        edge_label: tuple = "op_name",
-        n_jobs: int = None,
-        return_tensor: bool = True,
-        requires_grad: bool = False,
-        undirected: bool = False,
-        **kwargs,
-    ):
-        """
-
-        Parameters
-        ----------
-        h: int: The number of Weisfeiler-Lehman iterations
-        base_type: str: defines the base kernel of WL iteration. Possible types are 'subtree' (default), 'sp': shortest path
-        and 'edge' (The latter two are untested)
-        se_kernel: Stationary. defines a stationary vector kernel to be used for successive embedding (i.e. the kernel
-            function on which the vector embedding inner products are computed). if None, use the default linear kernel
-        node_weights
-        oa: whether the optimal assignment variant of the Weisfiler-Lehman kernel should be used
-        node_label: the node_label defining the key node attribute.
-        edge_label: the edge label defining the key edge attribute. only relevant when base_type == 'edge'
-        n_jobs: Parallisation to be used. *current version does not support parallel computing'
-        return_tensor: whether return a torch tensor. If False, a numpy array will be returned.
-        kwargs
-        """
-        super().__init__(**kwargs)
-        if se_kernel is not None and oa:
-            raise ValueError(
-                "Only one or none of se (successive embedding) and oa (optimal assignment) may be true!"
-            )
-        self.h = h
-        self.oa = oa
-        self.node_label = node_label
-        self.edge_label = edge_label
-        self.layer_weights = layer_weights
-        self.se = se_kernel
-        self.requires_grad = requires_grad
-        self.undirected = undirected
-
-        if base_type not in ["subtree", "sp", "edge"]:
-            raise ValueError(f"Invalid value for base_type ({base_type})")
-        if base_type == "subtree":
-            base_kernel = VertexHistogram, {
-                "sparse": False,
-                "requires_ordered_features": requires_grad,
-            }
-            if oa:
-                base_kernel = VertexHistogram, {
-                    "oa": True,
-                    "sparse": False,
-                    "requires_ordered_features": requires_grad,
-                }
-            elif se_kernel is not None:
-                base_kernel = VertexHistogram, {
-                    "se_kernel": se_kernel,
-                    "sparse": False,
-                    "requires_ordered_features": requires_grad,
-                }
-        elif base_type == "edge":
-            base_kernel = EdgeHistogram, {"sparse": False}
-            if oa:
-                base_kernel = EdgeHistogram, {
-                    "oa": True,
-                    "sparse": False,
-                    "requires_ordered_features": requires_grad,
-                }
-            elif se_kernel is not None:
-                base_kernel = EdgeHistogram, {
-                    "se_kernel": se_kernel,
-                    "sparse": False,
-                    "requires_ordered_features": requires_grad,
-                }
-
-        elif base_type == "sp":
-            base_kernel = ShortestPathAttr, {}
-        else:
-            raise NotImplementedError(
-                "The selected WL base kernel type"
-                + str(base_type)
-                + " is not implemented."
-            )
-        self.base_type = base_type
-        self.kern = _WL(
-            n_jobs,
-            h=h,
-            base_graph_kernel=base_kernel,
-            normalize=True,
-            layer_weights=self.layer_weights,
-            node_weights=node_weights,
-        )
-        self.return_tensor = return_tensor
-        self._gram = None
-        self._train, self._train_transformed = None, None
-        self.__name__ = "WeisfeilerLehman"
-
-    def change_se_params(self, params: dict):
-        """Change the kernel parameter of the successive embedding kernel."""
-        if self.se is None:
-            logging.warning("SE kernel is None. change_se_params action voided.")
-            return
-        for k, v in params.items():
-            try:
-                setattr(self.se, k, v)
-            except AttributeError:
-                logging.warning(
-                    str(k) + " is not a valid attribute name of the SE kernel."
-                )
-                continue
-        self.kern.change_se_kernel(self.se)
-
-    def get_info_se_kernel(self):
-        return self.se.lengthscale, self.kern.X[0].X.shape[1]
-
-    def change_kernel_params(self, params: dict):
-        for k, v in params.items():
-            try:
-                getattr(self.kern, k)
-                setattr(self.kern, k, v)
-            except AttributeError:
-                logging.warning(str(k) + " is not a valid attribute name of this kernel.")
-                continue
-            try:
-                setattr(self, k, v)
-            except AttributeError:
-                pass
-        for k in self.kern._initialized.keys():
-            self.kern._initialized[k] = False
-
-        self.kern.initialize()
-
-    def fit_transform(
-        self,
-        gr: list,
-        rebuild_model: bool = False,
-        save_gram_matrix: bool = True,
-        layer_weights=None,
-        gp_fit: bool = True,
-        **kwargs,
-    ):
-        # Transform into GraKeL graph format
-        if rebuild_model is False and self._gram is not None:
-            return self._gram
-        if self.undirected:
-            gr = transform_to_undirected(gr)
-        if self.base_type == "edge":
-            if not all([g.graph_type == "edge_attr" for g in gr]):
-                raise ValueError(
-                    "One or more graphs passed are not edge-attributed graphs. You need all graphs to be"
-                    "in edge format to use 'edge' type Weisfiler-Lehman kernel."
-                )
-
-            gr_ = list(graph_from_networkx(gr, self.node_label, self.edge_label))
-        else:
-            gr_ = list(
-                graph_from_networkx(
-                    gr,
-                    self.node_label,
-                )
-            )
-
-        if rebuild_model or self._gram is None:
-            self._train = gr[:]
-            self._train_transformed = gr_[:]
-
-        if layer_weights is not None and layer_weights is not self.layer_weights:
-            self.change_kernel_params({"layer_weights": layer_weights})
-            self.layer_weights = layer_weights
-
-        K = self.kern.fit_transform(gr_, gp_fit=gp_fit)
-        if self.return_tensor and not isinstance(K, torch.Tensor):
-            K = torch.tensor(K)
-        if save_gram_matrix:
-            self._gram = K.clone()
-            self.layer_weights = self.kern.layer_weights
-        return K
-
-    def transform(
-        self,
-        gr: list,
-    ):
-        """transpose: by default, the grakel produces output in shape of len(y) * len(x2). Use transpose to
-        reshape that to a more conventional shape.."""
-        if self.undirected:
-            gr = transform_to_undirected(gr)
-        if self.base_type == "edge":
-            if not all([g.graph_type == "edge_attr" for g in gr]):
-                raise ValueError(
-                    "One or more graphs passed are not edge-attributed graphs. You need all graphs to be"
-                    "in edge format to use 'edge' type Weisfiler-Lehman kernel."
-                )
-            gr_ = graph_from_networkx(gr, self.node_label, self.edge_label)
-        else:
-            gr_ = graph_from_networkx(
-                gr,
-                self.node_label,
-            )
-
-        K = self.kern.transform(gr_)
-        if self.return_tensor and not isinstance(K, torch.Tensor):
-            K = torch.tensor(K)
-        return K
-
-    def forward_t(self, gr2, gr1=None):
-        """
-        Forward pass, but in tensor format.
-
-        Parameters
-        ----------
-        gr1: single networkx graph
-
-        Returns
-        -------
-        K: the kernel matrix
-        x2 or y: the leaf variable(s) with requires_grad enabled.
-        This allows future Jacobian-vector product to be efficiently computed.
-        """
-        if self.undirected:
-            gr2 = transform_to_undirected(gr2)
-
-        # Convert into GraKel compatible graph format
-        if self.base_type == "edge":
-            gr2 = graph_from_networkx(gr2, self.node_label, self.edge_label)
-        else:
-            gr2 = graph_from_networkx(gr2, self.node_label)
-
-        if gr1 is None:
-            gr1 = self._train_transformed
-        else:
-            if self.undirected:
-                gr1 = transform_to_undirected(gr1)
-            if self.base_type == "edge":
-                gr1 = graph_from_networkx(gr1, self.node_label, self.edge_label)
-            else:
-                gr1 = graph_from_networkx(gr1, self.node_label)
-
-        x_ = torch.tensor(
-            np.concatenate(self.kern.transform(gr1, return_embedding_only=True), axis=1)
-        )
-        y_ = torch.tensor(
-            np.concatenate(self.kern.transform(gr2, return_embedding_only=True), axis=1)
-        )
-
-        # Note that the vector length of the WL procedure is indeterminate, and thus dim(Y) != dim(X) in general.
-        # However, since the newly observed features in the test data is always concatenated at the end of the feature
-        # matrix, these features will not matter for the inference, and as such we can safely truncate the feature
-        # matrix for the test data so that only those appearing in both the training and testing datasets are included.
-
-        x_.requires_grad_()
-        y_ = y_[:, : x_.shape[1]].requires_grad_()
-        K = calculate_kernel_matrix_as_tensor(x_, y_, oa=self.oa, se_kernel=self.se)
-        return K, y_, x_
-
-    def feature_map(self, flatten=True):
-        """
-        Get the feature map in term of encoding (position in the feature index): the feature string.
-        Parameters
-        ----------
-        flatten: whether flatten the dict (originally, the result is layered in term of h (the number of WL iterations).
-
-        Returns
-        -------
-
-        """
-        if not self.requires_grad:
-            logging.warning(
-                "Requires_grad flag is off -- in this case, there is risk that the element order in the "
-                "feature map DOES NOT correspond to the order in the feature matrix. To suppress this warning,"
-                "when initialising the WL kernel, do WeisfilerLehman(requires_grad=True)"
-            )
-        if self._gram is None:
-            return None
-        if not flatten:
-            return self.kern._label_node_attr
-        else:
-            res = {}
-            for _, map_ in self.kern._label_node_attr.items():
-                for k, v in map_.items():
-                    res.update({k: v})
-            return res
-
-    def feature_value(self, X_s):
-        """Given a list of architectures X_s, compute their WL embedding of size N_s x D, where N_s is the length
-        of the list and D is the number of training set features.
-
-        Returns:
-            embedding: torch.Tensor of shape N_s x D, described above
-            names: list of shape D, which has 1-to-1 correspondence to each element of the embedding matrix above
-        """
-        if not self.requires_grad:
-            logging.warning(
-                "Requires_grad flag is off -- in this case, there is risk that the element order in the "
-                "feature map DOES NOT correspond to the order in the feature matrix. To suppress this warning,"
-                "when initialising the WL kernel, do WeisfilerLehman(requires_grad=True)"
-            )
-        feat_map = self.feature_map(flatten=False)
-        len_feat_map = [len(f) for f in feat_map.values()]
-        X_s = graph_from_networkx(
-            X_s,
-            self.node_label,
-        )
-        embedding = self.kern.transform(X_s, return_embedding_only=True)
-        for j, em in enumerate(embedding):
-            # Remove some of the spurious features that pop up sometimes
-            embedding[j] = em[:, : len_feat_map[j]]
-
-        # Generate the final embedding
-        embedding = torch.tensor(np.concatenate(embedding, axis=1))
-        return embedding, list(self.feature_map(flatten=True).values())
diff --git a/neps/optimizers/bayesian_optimization/mf_tpe.py b/neps/optimizers/bayesian_optimization/mf_tpe.py
deleted file mode 100644
index 45e4adc48..000000000
--- a/neps/optimizers/bayesian_optimization/mf_tpe.py
+++ /dev/null
@@ -1,719 +0,0 @@
-from __future__ import annotations
-
-import random
-from copy import deepcopy
-from typing import Any, Iterable
-
-import numpy as np
-import torch
-from scipy.stats import spearmanr
-from typing_extensions import Literal, override
-
-from neps.state.optimizer import BudgetInfo, OptimizationState
-from neps.utils.types import ConfigResult, RawConfig
-from neps.utils.common import instance_from_map
-from neps.search_spaces import (
-    CategoricalParameter,
-    ConstantParameter,
-    FloatParameter,
-    IntegerParameter,
-    SearchSpace,
-)
-from neps.optimizers.base_optimizer import BaseOptimizer
-from neps.optimizers.bayesian_optimization.acquisition_samplers import (
-    AcquisitionSamplerMapping,
-)
-from neps.optimizers.bayesian_optimization.acquisition_samplers.base_acq_sampler import (
-    AcquisitionSampler,
-)
-from neps.optimizers.bayesian_optimization.models import SurrogateModelMapping
-
-CUSTOM_FLOAT_CONFIDENCE_SCORES = dict(FloatParameter.DEFAULT_CONFIDENCE_SCORES)
-CUSTOM_FLOAT_CONFIDENCE_SCORES.update({"ultra": 0.05})
-
-CUSTOM_CATEGORICAL_CONFIDENCE_SCORES = dict(
-    CategoricalParameter.DEFAULT_CONFIDENCE_SCORES
-)
-CUSTOM_CATEGORICAL_CONFIDENCE_SCORES.update({"ultra": 8})
-
-
-class MultiFidelityPriorWeightedTreeParzenEstimator(BaseOptimizer):
-    def __init__(
-        self,
-        pipeline_space: SearchSpace,
-        use_priors: bool = True,
-        prior_num_evals: float = 2.5,
-        good_fraction: float = 0.3334,
-        random_interleave_prob: float = 0.0,
-        initial_design_size: int = 0,
-        prior_as_samples: bool = True,
-        pending_as_bad: bool = True,
-        fidelity_weighting: Literal["linear", "spearman"] = "spearman",
-        surrogate_model: str = "kde",
-        good_model_bw_factor: int = 1.5,
-        joint_kde_modelling: bool = False,
-        threshold_improvement: bool = True,
-        promote_from_acq: bool = True,
-        acquisition_sampler: str | AcquisitionSampler = "mutation",
-        prior_draws: int = 1000,
-        prior_confidence: Literal["low", "medium", "high"] = "medium",
-        surrogate_model_args: dict = None,
-        soft_promotion: bool = True,
-        patience: int = 50,
-        logger=None,
-        budget: None | int | float = None,
-        loss_value_on_error: None | float = None,
-        cost_value_on_error: None | float = None,
-    ):
-        """[summary]
-
-        Args:
-            pipeline_space: Space in which to search
-            prior_num_evals (float, optional): [description]. Defaults to 2.5.
-            good_fraction (float, optional): [description]. Defaults to 0.333.
-            random_interleave_prob: Frequency at which random configurations are sampled
-                instead of configurations from the acquisition strategy.
-            initial_design_size: Number of 'x' samples that are to be evaluated before
-                selecting a sample using a strategy instead of randomly. If there is a
-                user prior, we can rely on the model from the very first iteration.
-            prior_as_samples: Whether to sample from the KDE and incorporate that way, or
-            just have the distribution be an linear combination of the KDE and the prior.
-            Should be True if the prior happens to be unnormalized.
-            pending_as_bad: Whether to treat pending observations as bad, assigning them to
-            the bad KDE to encourage diversity among samples queried in parallel
-            prior_draws: The number of samples drawn from the prior if there is one. This
-            # does not affect the strength of the prior, just how accurately it
-            # is reconstructed by the KDE.
-            patience: How many times we try something that fails before giving up.
-            budget: Maximum budget
-            loss_value_on_error: Setting this and cost_value_on_error to any float will
-                supress any error during bayesian optimization and will use given loss
-                value instead. default: None
-            cost_value_on_error: Setting this and loss_value_on_error to any float will
-                supress any error during bayesian optimization and will use given cost
-                value instead. default: None
-            logger: logger object, or None to use the neps logger
-        """
-        super().__init__(
-            pipeline_space=pipeline_space,
-            patience=patience,
-            logger=logger,
-            budget=budget,
-            loss_value_on_error=loss_value_on_error,
-            cost_value_on_error=cost_value_on_error,
-        )
-        self.pipeline_space = pipeline_space
-        self.good_fraction = good_fraction
-        if self.pipeline_space.has_fidelity:
-            self.min_fidelity = pipeline_space.fidelity.lower
-            self.max_fidelity = pipeline_space.fidelity.upper
-            self.rung_map, self.inverse_rung_map = self._get_rung_maps()
-            self.min_rung = 0
-            self.max_rung = len(self.rung_map) - 1
-
-        else:
-            self.min_rung = 0
-            self.max_rung = 0
-            self.min_fidelity = 1
-            self.max_fidelity = 1
-            self.rung_map, self.inverse_rung_map = self._get_rung_maps()
-
-        if initial_design_size == 0:
-            self._initial_design_size = len(self.pipeline_space) * np.round(
-                1 / self.good_fraction
-            ).astype(int)
-        else:
-            self._initial_design_size = initial_design_size
-        self.promote_from_acq = promote_from_acq
-
-        self.num_rungs = len(self.rung_map)
-        self.use_priors = use_priors
-        self.prior_num_evals = prior_num_evals
-        self._random_interleave_prob = random_interleave_prob
-        self._pending_as_bad = pending_as_bad
-        self.prior_draws = prior_draws
-        self._has_promotable_configs = False
-        self.soft_promotion = soft_promotion
-        self.joint_kde_modelling = joint_kde_modelling
-        # if we use priors, we don't add conigurations as good until is is within the top fraction
-        # This heuristic has not been tried further, but makes sense in the context when we have priors
-        self.round_up = not use_priors
-        self.fidelity_weighting = fidelity_weighting
-        self.threshold_improvement = threshold_improvement
-        # TODO have this read in as part of load_results - it cannot be saved as an attribute when
-        # running parallel instances of the algorithm (since the old configs are shared, not instance-specific)
-        self.old_configs_per_fid = [[] for i in range(self.num_rungs)]
-        # We assume that the information conveyed per fidelity (and the cost) is linear in the
-        # fidelity levels if nothing else is specified
-        if surrogate_model != "kde":
-            raise NotImplementedError(
-                "Only supports KDEs for now. Could (maybe?) support binary classification in the future."
-            )
-        self.acquisition_sampler = instance_from_map(
-            AcquisitionSamplerMapping,
-            acquisition_sampler,
-            name="acquisition sampler function",
-            kwargs={"patience": self.patience, "pipeline_space": self.pipeline_space},
-        )
-        self.prior_confidence = prior_confidence
-        self._enhance_priors()
-        surrogate_model_args = surrogate_model_args or {}
-
-        param_types, num_options, logged_params, is_fidelity = self._get_types()
-        surrogate_model_args["param_types"] = param_types
-        surrogate_model_args["num_options"] = num_options
-        surrogate_model_args["is_fidelity"] = is_fidelity
-        surrogate_model_args["logged_params"] = logged_params
-        good_model_args = deepcopy(surrogate_model_args)
-        good_model_args["bandwidth_factor"] = good_model_bw_factor
-        if self.pipeline_space.has_prior and use_priors:
-            if prior_as_samples:
-                self.prior_samples = [
-                    self.pipeline_space.sample(
-                        patience=self.patience, user_priors=True, ignore_fidelity=False
-                    )
-                    for idx in range(self.prior_draws)
-                ]
-            else:
-                pass
-                # TODO work out affine combination
-        else:
-            self.prior_samples = []
-
-        self.surrogate_models = {
-            "good": instance_from_map(
-                SurrogateModelMapping,
-                surrogate_model,
-                name="surrogate model",
-                kwargs=good_model_args,
-            ),
-            "bad": instance_from_map(
-                SurrogateModelMapping,
-                surrogate_model,
-                name="surrogate model",
-                kwargs=surrogate_model_args,
-            ),
-            "all": instance_from_map(
-                SurrogateModelMapping,
-                surrogate_model,
-                name="surrogate model",
-                kwargs=surrogate_model_args,
-            ),
-        }
-        self.acquisition = self
-        self.acquisition_sampler = instance_from_map(
-            AcquisitionSamplerMapping,
-            acquisition_sampler,
-            name="acquisition sampler function",
-            kwargs={"patience": self.patience, "pipeline_space": self.pipeline_space},
-        )
-
-    def _enhance_priors(self):
-        """Only applicable when priors are given along with a confidence."""
-        if not self.use_priors and self.prior_confidence is None:
-            return
-        for k in self.pipeline_space.keys():
-            if self.pipeline_space[k].is_fidelity:
-                continue
-            elif isinstance(self.pipeline_space[k], (FloatParameter, IntegerParameter)):
-                confidence = CUSTOM_FLOAT_CONFIDENCE_SCORES[self.prior_confidence]
-                self.pipeline_space[k].default_confidence_score = confidence
-            elif isinstance(self.pipeline_space[k], CategoricalParameter):
-                confidence = CUSTOM_CATEGORICAL_CONFIDENCE_SCORES[self.prior_confidence]
-                self.pipeline_space[k].default_confidence_score = confidence
-
-    def _get_rung_maps(self, s: int = 0) -> dict:
-        """Maps rungs (0,1,...,k) to a fidelity value based on fidelity bounds, eta, s."""
-        eta = round(1 / self.good_fraction)
-        new_min_budget = self.min_fidelity * (1 / eta**s)
-        nrungs = (
-            np.floor(np.log(self.max_fidelity / new_min_budget) / np.log(eta)).astype(int)
-            + 1
-        )
-        _max_budget = self.max_fidelity
-        rung_map = dict()
-        inverse_rung_map = dict()
-        for i in reversed(range(nrungs)):
-            # TODO: add +s to keys and TEST
-            rung_value = (
-                int(_max_budget)
-                if isinstance(self.pipeline_space.fidelity, IntegerParameter)
-                else _max_budget
-            )
-
-            rung_map[i + s] = rung_value
-            inverse_rung_map[rung_value] = i + s
-            _max_budget /= eta
-        return rung_map, inverse_rung_map
-
-    def _get_types(self):
-        """extracts the needed types from the configspace for faster retrival later
-
-        type = 0 - numerical (continuous or integer) parameter
-        type >=1 - categorical parameter
-
-        TODO: figure out a way to properly handle ordinal parameters
-
-        """
-        types = []
-        num_values = []
-        logs = []
-        is_fidelity = []
-        for _, hp in self.pipeline_space.items():
-            is_fidelity.append(hp.is_fidelity)
-            if isinstance(hp, CategoricalParameter):
-                # u as in unordered - used to play nice with the statsmodels KDE implementation
-                types.append("u")
-                logs.append(False)
-                num_values.append(len(hp.choices))
-            elif isinstance(hp, IntegerParameter):
-                # o as in ordered
-                types.append("o")
-                logs.append(False)
-                num_values.append(hp.upper - hp.lower + 1)
-            elif isinstance(hp, FloatParameter):
-                # c as in continous
-                types.append("f")
-                logs.append(hp.log)
-                num_values.append(np.inf)
-            elif isinstance(hp, ConstantParameter):
-                # c as in continous
-                types.append("c")
-                logs.append(False)
-                num_values.append(1)
-
-            else:
-                raise ValueError("Unsupported Parametertype %s" % type(hp))
-
-        return types, num_values, logs, is_fidelity
-
-    def __call__(
-        self,
-        x: Iterable,
-        asscalar: bool = False,
-        only_lowest_fidelity=True,
-        only_good=False,
-    ) -> np.ndarray | torch.Tensor | float:
-        """
-        Return the negative expected improvement at the query point
-        """
-        # this is to only make the lowest fidelity viable
-        # TODO have this as a setting in the acq_sampler instead
-        if only_lowest_fidelity:
-            is_lowest_fidelity = (
-                np.array([x_.fidelity.value for x_ in x]) == self.rung_map[self.min_rung]
-            )
-            return np.log(self.surrogate_models["good"].pdf(x)) - np.log(
-                self.surrogate_models["bad"].pdf(x)
-            )
-        else:
-            return np.log(self.surrogate_models["good"].pdf(x)) - np.log(
-                self.surrogate_models["bad"].pdf(x)
-            )
-
-    def _split_by_fidelity(self, configs, losses):
-        if self.pipeline_space.has_fidelity:
-            configs_per_fidelity = [[] for i in range(self.num_rungs)]
-            losses_per_fidelity = [[] for i in range(self.num_rungs)]
-            # per fidelity, add a list to make it a nested list of lists
-            # [[config_A at fid1, config_B at fid1], [config_C at fid2], ...]
-            for config, loss in zip(configs, losses):
-                rung = self.inverse_rung_map[int(config.fidelity.value)]
-                configs_per_fidelity[rung].append(config)
-                losses_per_fidelity[rung].append(loss)
-            return configs_per_fidelity, losses_per_fidelity
-        else:
-            return [configs], [losses]
-
-    def _split_configs(
-        self, configs_per_fid, losses_per_fid, weight_per_fidelity, good_fraction=None
-    ):
-        """Splits configs into good and bad for the KDEs.
-
-        Args:
-            configs ([type]): [description]
-            losses ([type]): [description]
-            round_up (bool, optional): [description]. Defaults to True.
-
-        Returns:
-            [type]: [description]
-        """
-        if good_fraction is None:
-            good_fraction = self.good_fraction
-
-        good_configs, bad_configs = [], []
-        good_configs_weights, bad_configs_weights = [], []
-
-        for fid, (configs_fid, losses_fid) in enumerate(
-            zip(configs_per_fid, losses_per_fid)
-        ):
-            if self.round_up:
-                num_good_configs = np.ceil(len(configs_fid) * good_fraction).astype(int)
-            else:
-                num_good_configs = np.floor(len(configs_fid) * good_fraction).astype(int)
-
-            ordered_loss_indices = np.argsort(losses_fid)
-            good_indices = ordered_loss_indices[0:num_good_configs]
-            bad_indices = ordered_loss_indices[num_good_configs:]
-            good_configs_fid = [configs_fid[idx] for idx in good_indices]
-            bad_configs_fid = [configs_fid[idx] for idx in bad_indices]
-            good_configs.extend(good_configs_fid)
-            bad_configs.extend(bad_configs_fid)
-
-            if self.threshold_improvement:
-                good_configs_weights.extend(
-                    self._compute_improvement_weights(
-                        losses_fid, num_good_configs, weight_per_fidelity[fid]
-                    )
-                )
-            else:
-                good_configs_weights.extend(
-                    [weight_per_fidelity[fid]] * len(good_configs_fid)
-                )
-            bad_configs_weights.extend([weight_per_fidelity[fid]] * len(bad_configs_fid))
-        return good_configs, bad_configs, good_configs_weights, bad_configs_weights
-
-    def _compute_improvement_weights(self, losses, num_good_configs, max_weight):
-        if num_good_configs == 0:
-            return []
-
-        ordered_losses = np.sort(losses)
-        best_bad_loss = ordered_losses[num_good_configs]
-        good_losses = ordered_losses[0:num_good_configs]
-        relative_improvements = (best_bad_loss - good_losses) / (
-            best_bad_loss - good_losses.min()
-        )
-        improvement_weights = max_weight * relative_improvements
-        return improvement_weights
-
-    def compute_fidelity_weights(self, configs_per_fid, losses_per_fid) -> list:
-        # TODO consider pending configurations - will default to a linear weighting
-        # which is not necessarily correct
-        if self.fidelity_weighting == "linear":
-            weight_per_fidelity = self._compute_linear_weights()
-        elif self.fidelity_weighting == "spearman":
-            weight_per_fidelity = self._compute_spearman_weights(
-                configs_per_fid, losses_per_fid
-            )
-        else:
-            raise ValueError(
-                f"No weighting scheme {self.fidelity_weighting} is available."
-            )
-        return weight_per_fidelity
-
-    def _compute_linear_weights(self):
-        return (1 + np.arange(self.min_rung, self.max_rung + 1)) / self.num_rungs
-
-    def _compute_spearman_weights(self, configs_per_fid, losses_per_fid) -> list:
-        min_number_samples = np.round(1 / self.good_fraction).astype(int)
-        samples_per_fid = np.array([len(cfgs_fid) for cfgs_fid in configs_per_fid])
-        max_comparable_fid = (
-            self.max_rung - np.argmax(np.flip(samples_per_fid) >= min_number_samples)
-        ).astype(int)
-        if max_comparable_fid == 0:
-            # if we cannot compare to any otḧer fidelity, return default
-            return self._compute_linear_weights()
-        else:
-            # compare the rankings of the existing configurations to the ranking
-            # of the same configurations at lower rungs
-            spearman = np.ones(self.num_rungs)
-            for fid_idx, (cfgs, losses) in enumerate(
-                zip(configs_per_fid, losses_per_fid)
-            ):
-                if fid_idx >= max_comparable_fid:
-                    spearman[fid_idx] = 1
-
-                else:
-                    comp_losses = losses_per_fid[fid_idx + 1]
-                    comp_configs = configs_per_fid[fid_idx + 1]
-
-                    lower_fid_configs = [None] * len(comp_configs)
-                    lower_fid_losses = [None] * len(comp_configs)
-                    for cfg, loss in zip(cfgs, losses):
-                        # check if the config at the lower fidelity level is in the comparison set
-                        # TODO make this more efficient - probably embarrasingly slof for now
-                        # with the triple-nested loop (although number of configs per level is pretty low)
-                        is_equal_config = [
-                            cfg.is_equal_value(comp_cfg, include_fidelity=False)
-                            for comp_cfg in comp_configs
-                        ]
-                        if any(is_equal_config):
-                            equal_index = np.argmax(is_equal_config)
-                            lower_fid_configs[equal_index] = cfg
-                            lower_fid_losses[equal_index] = loss
-
-                    spearman[fid_idx] = spearmanr(
-                        lower_fid_losses, comp_losses
-                    ).correlation
-
-            spearman = np.clip(spearman, a_min=0, a_max=1)
-            # The correlation with Z_max at fidelity Z-k cannot be larger than at Z-k+1
-            spearman = np.flip(np.multiply.accumulate(np.flip(spearman)))
-            fidelity_weights = spearman * (max_comparable_fid + 1) / (self.max_rung + 1)
-        return fidelity_weights
-
-    def is_init_phase(self) -> bool:
-        """Decides if optimization is still under the warmstart phase/model-based search."""
-        if self._num_train_x >= self._initial_design_size:
-            return False
-        return True
-
-    @override
-    def load_optimization_state(
-        self,
-        previous_results: dict[str, ConfigResult],
-        pending_evaluations: dict[str, SearchSpace],
-        budget_info: BudgetInfo | None,
-        optimizer_state: dict[str, Any],
-    ) -> None:
-        # TODO remove doubles from previous results
-        train_y = [self.get_loss(el.result) for el in previous_results.values()]
-
-        train_x_configs = [el.config for el in previous_results.values()]
-        pending_configs = list(pending_evaluations.values())
-
-        filtered_configs, filtered_indices = self._filter_old_configs(train_x_configs)
-        filtered_y = np.array(train_y)[filtered_indices].tolist()
-
-        self.train_x_configs = train_x_configs
-        self.train_y = train_y
-
-        self._pending_evaluations = pending_evaluations
-        self._num_train_x = len(self.train_x_configs)
-        if not self.is_init_phase():
-            # This is to extract the configurations as numpy arrays on the format num_data x num_dim
-            # TODO when a config is removed in the filtering process, that means that some other
-            # configuration at the lower fidelity will become good, that was previously bad. This
-            # may be good or bad, but I'm not sure. / Carl
-            configs_per_fid, losses_per_fid = self._split_by_fidelity(
-                train_x_configs, train_y
-            )
-            filtered_configs_per_fid, filtered_losses_per_fid = self._split_by_fidelity(
-                filtered_configs, filtered_y
-            )
-            weight_per_fidelity = self.compute_fidelity_weights(
-                configs_per_fid, losses_per_fid
-            )
-
-            good_configs, bad_configs, good_weights, bad_weights = self._split_configs(
-                filtered_configs_per_fid, filtered_losses_per_fid, weight_per_fidelity
-            )
-            if self.use_priors:
-                num_prior_configs = len(self.prior_samples)
-                good_configs.extend(self.prior_samples)
-                prior_sample_constant = self.prior_num_evals / num_prior_configs
-                good_weights.extend([prior_sample_constant] * num_prior_configs)
-
-            fixed_bw = None
-            self.surrogate_models["all"].fit(filtered_configs)
-            if self.joint_kde_modelling:
-                fixed_bw = self.surrogate_models["all"].bw
-
-            self.surrogate_models["good"].fit(
-                good_configs, fixed_bw=fixed_bw, config_weights=good_weights
-            )
-            if self._pending_as_bad:
-                # This is only to compute the weights of the pending configs
-                _, pending_configs, _, pending_weights = self._split_configs(
-                    pending_configs,
-                    [np.inf] * len(pending_configs),
-                    weight_per_fidelity,
-                    good_fraction=0.0,
-                )
-                bad_configs.extend(pending_configs)
-                bad_weights.extend(pending_weights)
-
-            self.surrogate_models["bad"].fit(
-                bad_configs, fixed_bw=fixed_bw, config_weights=bad_weights
-            )
-            # self.visualize_acq(previous_results, weight_per_fidelity)
-
-    def _filter_old_configs(self, configs):
-        new_configs = []
-        new_indices = []
-        old_configs_flat = []
-        for cfgs in self.old_configs_per_fid:
-            old_configs_flat.extend(cfgs)
-
-        for idx, cfg in enumerate(configs):
-            if any([cfg.is_equal_value(old_cfg) for old_cfg in old_configs_flat]):
-                # If true, configs are equal and shouldn't be added
-                continue
-            else:
-                new_configs.append(cfg)
-                new_indices.append(idx)
-        return new_configs, new_indices
-
-    def _get_promotable_configs(self, configs):
-        if self.soft_promotion:
-            configs_for_promotion = self._get_soft_promotable(configs)
-        else:
-            configs_for_promotion = self._get_hard_promotable(configs)
-        return configs_for_promotion
-
-    def _get_hard_promotable(self, configs):
-        # count the number of configs that are at or above any given rung
-        configs_per_rung = np.zeros(self.num_rungs)
-        # check the number of configs per fidelity level
-        for config in configs:
-            rung = self.inverse_rung_map[int(config.fidelity.value)]
-            configs_per_rung[rung] += 1
-
-        cumulative_per_rung = np.flip(np.cumsum(np.flip(configs_per_rung)))
-        cumulative_above = np.append(np.flip(np.cumsum(np.flip(configs_per_rung[1:]))), 0)
-        # then check which one can make the most informed decision on promotions
-        rungs_to_promote = cumulative_per_rung * self.good_fraction - cumulative_above
-
-        # this defaults to max_fidelity if there is no promotable config (cannot promote from)
-        # the top fidelity anyway
-        fid_to_promote = self.num_rungs - np.argmax(np.flip(rungs_to_promote) > 1)
-
-        # TODO check if this returns empty when it needs to
-        if fid_to_promote == self.max_rung:
-            return []
-        return [cfg for cfg in configs if cfg.fidelity.value == fid_to_promote]
-
-    def _get_soft_promotable(self, configs):
-        # TODO implement
-        # count the number of configs that are at or above any given rung
-        new_configs, _ = self._filter_old_configs(configs)
-        configs_per_rung = np.zeros(self.num_rungs)
-
-        # check the number of configs per fidelity level
-        for config in new_configs:
-            rung = self.inverse_rung_map[int(config.fidelity.value)]
-            configs_per_rung[rung] += 1
-
-        # The square root means that we keep the approximate distribution between
-        # rungs as HyperBand
-        rungs_to_promote = configs_per_rung * np.power(
-            self.good_fraction, np.flip(np.sqrt(np.arange(self.num_rungs)))
-        )
-        rungs_to_promote[-1] = 0
-        next_rung_to_promote = np.arange(self.num_rungs)[rungs_to_promote > 1]
-        if len(next_rung_to_promote) == 0:
-            return []
-
-        next_fid_to_promote = self.rung_map[next_rung_to_promote[0]]
-        return [cfg for cfg in new_configs if cfg.fidelity.value == next_fid_to_promote]
-
-    def _promote_existing(self, configs_for_promotion):
-        # TODO we still need to REMOVE the observation at the lower fidelity
-        # i.e. give it zero weight in the KDE, and ensure the count is correct
-        assert len(configs_for_promotion) > 0, "No promotable configurations"
-        if self.promote_from_acq:
-            acq_values = self.__call__(configs_for_promotion, only_lowest_fidelity=False)
-        else:
-            acq_values = self.__call__(
-                configs_for_promotion, only_lowest_fidelity=False, only_good=True
-            )
-
-        next_config = configs_for_promotion[np.argmax(acq_values)]
-        current_rung = self.inverse_rung_map[next_config.fidelity.value]
-        self.old_configs_per_fid[current_rung].append(next_config.copy())
-        new_fidelity = self.rung_map[current_rung + 1]
-        next_config.fidelity.set_value(new_fidelity)
-        return next_config
-
-    def get_config_and_ids(self) -> tuple[RawConfig, str, str | None]:
-        if self._num_train_x == 0 and self._initial_design_size >= 1:
-            # TODO only at lowest fidelity
-            config = self.pipeline_space.sample(
-                patience=self.patience, user_priors=True, ignore_fidelity=False
-            )
-            config.fidelity.set_value(self.rung_map[self.min_rung])
-
-        elif self.is_init_phase():
-            config = self.pipeline_space.sample(
-                patience=self.patience, user_priors=True, ignore_fidelity=True
-            )
-            config.fidelity.set_value(self.rung_map[self.min_rung])
-
-        elif random.random() < self._random_interleave_prob:
-            # TODO only at lowest fidelity
-            config = self.pipeline_space.sample(
-                patience=self.patience, ignore_fidelity=False, user_priors=False
-            )
-            config.fidelity.set_vlaue(self.rung_map[self.min_rung])
-        elif len(self._get_promotable_configs(self.train_x_configs)) > 0:
-            configs_for_promotion = self._get_promotable_configs(self.train_x_configs)
-            config = self._promote_existing(configs_for_promotion)
-
-        else:
-            config = self.acquisition_sampler.sample(self.acquisition)
-            config.fidelity.set_value(self.rung_map[self.min_rung])
-
-        config_id = str(self._num_train_x + len(self._pending_evaluations) + 1)
-        return config.hp_values(), config_id, None
-
-    def visualize_2d(
-        self, ax, previous_results, grid_points: int = 101, color: str = "k"
-    ):
-        X1 = np.linspace(0, 1, grid_points)
-        X2 = np.linspace(0, 1, grid_points)
-        X1, X2 = np.meshgrid(X1, X2)
-        X = np.append(X1.reshape(-1, 1), X2.reshape(-1, 1), axis=1)
-        Z = self.surrogate_models["good"]._pdf(X) / self.surrogate_models["bad"]._pdf(X)
-        Z_min, Z_max = -np.abs(Z).max(), np.abs(Z).max()
-
-        Z = Z.reshape(grid_points, grid_points)
-
-        c = ax.pcolormesh(X1, X2, Z, cmap=color, vmin=Z_min, vmax=Z_max)
-        ax.set_title("pcolormesh")
-        # set the limits of the plot to the limits of the data
-        ax.axis([0, 1, 0, 1])
-        train_x_configs = [el.config for el in previous_results.values()]
-        np_X = self.surrogate_models["good"]._convert_configs_to_numpy(train_x_configs)
-        ax.scatter(np_X[:, 0], np_X[:, 1], s=100)
-        # ax.scatter(np_X[-1, 0], np_X[-1, 1], s=100, c='yellow')
-
-        return ax
-
-    def visualize_acq(self, previous_results, weights_per_fidelity):
-        import matplotlib.pyplot as plt
-
-        train_x_configs = [el.config for el in previous_results.values()]
-        train_y = [self.get_loss(el.result) for el in previous_results.values()]
-
-        filtered_configs, filtered_indices = self._filter_old_configs(train_x_configs)
-        configs_per_fid, losses_per_fid = self._split_by_fidelity(
-            train_x_configs, train_y
-        )
-        filtered_y = np.array(train_y)[filtered_indices].tolist()
-        filtered_configs_per_fid, filtered_losses_per_fid = self._split_by_fidelity(
-            filtered_configs, filtered_y
-        )
-        weight_per_fidelity = self.compute_fidelity_weights(
-            configs_per_fid, losses_per_fid
-        )
-        good_configs, bad_configs, good_weights, bad_weights = self._split_configs(
-            filtered_configs_per_fid, filtered_losses_per_fid, weight_per_fidelity
-        )
-        good_configs_np = self.surrogate_models["all"]._convert_configs_to_numpy(
-            good_configs
-        )
-        bad_configs_np = self.surrogate_models["all"]._convert_configs_to_numpy(
-            bad_configs
-        )
-
-        fig, axes = plt.subplots(1, 3, figsize=(16, 9))
-        axes[0] = self.surrogate_models["good"].visualize_2d(axes[0], color="RdBu")
-        axes[0].scatter(
-            good_configs_np[:, 0],
-            good_configs_np[:, 1],
-            c=good_weights,
-            cmap="spring",
-            s=50,
-            marker="x",
-        )
-        axes[1] = self.surrogate_models["bad"].visualize_2d(axes[1], color="RdBu_r")
-        axes[1].scatter(
-            bad_configs_np[:, 0],
-            bad_configs_np[:, 1],
-            c=bad_weights,
-            s=50,
-            cmap="spring",
-            marker="x",
-        )
-        axes[2] = self.visualize_2d(axes[2], previous_results, color="BrBG")
-        plt.show()
diff --git a/neps/optimizers/bayesian_optimization/models/__init__.py b/neps/optimizers/bayesian_optimization/models/__init__.py
index c76bedfd4..034049a33 100755
--- a/neps/optimizers/bayesian_optimization/models/__init__.py
+++ b/neps/optimizers/bayesian_optimization/models/__init__.py
@@ -1,21 +1,4 @@
-from neps.utils.common import MissingDependencyError
+from neps.optimizers.bayesian_optimization.models.ftpfn import FTPFNSurrogate
+from neps.optimizers.bayesian_optimization.models.gp import make_default_single_obj_gp
 
-from .gp import ComprehensiveGP
-from .gp_hierarchy import ComprehensiveGPHierarchy
-
-try:
-    from .deepGP import DeepGP
-except ImportError as e:
-    DeepGP = MissingDependencyError("gpytorch", e)
-
-try:
-    from .pfn import PFN_SURROGATE  # only if available locally
-except Exception as e:
-    PFN_SURROGATE = MissingDependencyError("pfn", e)
-
-SurrogateModelMapping = {
-    "deep_gp": DeepGP,
-    "gp": ComprehensiveGP,
-    "gp_hierarchy": ComprehensiveGPHierarchy,
-    "pfn": PFN_SURROGATE,
-}
+__all__ = ["FTPFNSurrogate", "make_default_single_obj_gp"]
diff --git a/neps/optimizers/bayesian_optimization/models/deepGP.py b/neps/optimizers/bayesian_optimization/models/deepGP.py
deleted file mode 100644
index d51450437..000000000
--- a/neps/optimizers/bayesian_optimization/models/deepGP.py
+++ /dev/null
@@ -1,634 +0,0 @@
-from __future__ import annotations
-
-import logging
-import os
-from copy import deepcopy
-from pathlib import Path
-
-import gpytorch
-import numpy as np
-import torch
-import torch.nn as nn
-
-from ....search_spaces.search_space import (
-    CategoricalParameter,
-    FloatParameter,
-    IntegerParameter,
-    SearchSpace,
-)
-
-
-def count_non_improvement_steps(root_directory: Path | str) -> int:
-    root_directory = Path(root_directory)
-
-    all_losses_file = root_directory / "all_losses_and_configs.txt"
-    best_loss_fiel = root_directory / "best_loss_trajectory.txt"
-
-    # Read all losses from the file in the order they are explored
-    losses = [
-        float(line[6:])
-        for line in all_losses_file.read_text(encoding="utf-8").splitlines()
-        if "Loss: " in line
-    ]
-    # Get the best seen loss value
-    best_loss = float(best_loss_fiel.read_text(encoding="utf-8").splitlines()[-1].strip())
-
-    # Count the non-improvement
-    count = 0
-    for loss in reversed(losses):
-        if np.greater(loss, best_loss):
-            count += 1
-        else:
-            break
-
-    return count
-
-
-class NeuralFeatureExtractor(nn.Module):
-    """
-    Neural network to be used in the DeepGP
-    """
-
-    def __init__(self, input_size: int, **kwargs):
-        super().__init__()
-
-        # Set number of hyperparameters
-        self.input_size = input_size
-
-        self.n_layers = kwargs.get("n_layers", 2)
-        self.activation = nn.LeakyReLU()
-
-        layer1_units = kwargs.get("layer1_units", 128)
-        self.fc1 = nn.Linear(input_size, layer1_units)
-        self.bn1 = nn.BatchNorm1d(layer1_units)
-
-        previous_layer_units = layer1_units
-        for i in range(2, self.n_layers):
-            next_layer_units = kwargs.get(f"layer{i}_units", 256)
-            setattr(
-                self,
-                f"fc{i}",
-                nn.Linear(previous_layer_units, next_layer_units),
-            )
-            setattr(
-                self,
-                f"bn{i}",
-                nn.BatchNorm1d(next_layer_units),
-            )
-            previous_layer_units = next_layer_units
-
-        setattr(
-            self,
-            f"fc{self.n_layers}",
-            nn.Linear(
-                previous_layer_units + kwargs.get("cnn_nr_channels", 4),
-                # accounting for the learning curve features
-                kwargs.get(f"layer{self.n_layers}_units", 256),
-            ),
-        )
-        self.cnn = nn.Sequential(
-            nn.Conv1d(
-                in_channels=1,
-                kernel_size=(kwargs.get("cnn_kernel_size", 3),),
-                out_channels=4,
-            ),
-            nn.AdaptiveMaxPool1d(1),
-        )
-
-    def forward(self, x, budgets, learning_curves):
-        # add an extra dimensionality for the budget
-        # making it nr_rows x 1.
-        budgets = torch.unsqueeze(budgets, dim=1)
-        # concatenate budgets with examples
-        x = torch.cat((x, budgets), dim=1)
-        x = self.fc1(x)
-        x = self.activation(self.bn1(x))
-
-        for i in range(2, self.n_layers):
-            x = self.activation(getattr(self, f"bn{i}")(getattr(self, f"fc{i}")(x)))
-
-        # add an extra dimensionality for the learning curve
-        # making it nr_rows x 1 x lc_values.
-        learning_curves = torch.unsqueeze(learning_curves, 1)
-        lc_features = self.cnn(learning_curves)
-        # revert the output from the cnn into nr_rows x nr_kernels.
-        lc_features = torch.squeeze(lc_features, 2)
-
-        # put learning curve features into the last layer along with the higher level features.
-        x = torch.cat((x, lc_features), dim=1)
-        x = self.activation(getattr(self, f"fc{self.n_layers}")(x))
-
-        return x
-
-
-class GPRegressionModel(gpytorch.models.ExactGP):
-    """
-    A simple GP model.
-    """
-
-    def __init__(
-        self,
-        train_x: torch.Tensor,
-        train_y: torch.Tensor,
-        likelihood: gpytorch.likelihoods.GaussianLikelihood,
-    ):
-        """
-        Constructor of the GPRegressionModel.
-
-        Args:
-            train_x: The initial train examples for the GP.
-            train_y: The initial train labels for the GP.
-            likelihood: The likelihood to be used.
-        """
-        super().__init__(train_x, train_y, likelihood)
-
-        self.mean_module = gpytorch.means.ConstantMean()
-        self.covar_module = gpytorch.kernels.ScaleKernel(gpytorch.kernels.RBFKernel())
-
-    def forward(self, x):
-        mean_x = self.mean_module(x)
-        covar_x = self.covar_module(x)
-
-        return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)
-
-
-class DeepGP:
-    """
-    Gaussian process with a deep kernel
-    """
-
-    def __init__(
-        self,
-        pipeline_space: SearchSpace,
-        neural_network_args: dict | None = None,
-        logger=None,
-        surrogate_model_fit_args: dict | None = None,
-        # IMPORTANT: Checkpointing does not use file locking,
-        # IMPORTANT: hence, it is not suitable for multiprocessing settings
-        checkpointing: bool = False,
-        root_directory: Path | str | None = None,
-        checkpoint_file: Path | str = "surrogate_checkpoint.pth",
-        refine_epochs: int = 50,
-        **kwargs,
-    ):
-        self.surrogate_model_fit_args = (
-            surrogate_model_fit_args if surrogate_model_fit_args is not None else {}
-        )
-
-        self.checkpointing = checkpointing
-        self.refine_epochs = refine_epochs
-        if checkpointing:
-            assert (
-                root_directory is not None
-            ), "neps root_directory must be provided for the checkpointing"
-            self.root_dir = Path(os.getcwd(), root_directory)
-            self.checkpoint_path = Path(os.getcwd(), root_directory, checkpoint_file)
-
-        super().__init__()
-        self.__preprocess_search_space(pipeline_space)
-        # set the categories array for the encoder
-        self.categories_array = np.array(self.categories)
-
-        if neural_network_args is None:
-            neural_network_args = {}
-        self.nn_args = neural_network_args
-
-        self.device = (
-            torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
-        )
-        # self.device = torch.device("cpu")
-
-        # Save the NN args, necessary for preprocessing
-        self.cnn_kernel_size = neural_network_args.get("cnn_kernel_size", 3)
-        self.model, self.likelihood, self.mll = self.__initialize_gp_model(
-            neural_network_args.get("n_layers", 2)
-        )
-
-        # build the neural network
-        self.nn = NeuralFeatureExtractor(self.input_size, **neural_network_args)
-
-        self.logger = logger or logging.getLogger("neps")
-
-    def __initialize_gp_model(
-        self,
-        train_size: int,
-    ) -> tuple[
-        GPRegressionModel,
-        gpytorch.likelihoods.GaussianLikelihood,
-        gpytorch.mlls.ExactMarginalLogLikelihood,
-    ]:
-        """
-        Called when the surrogate is first initialized or restarted.
-
-        Args:
-            train_size: The size of the current training set.
-
-        Returns:
-            model, likelihood, mll - The GP model, the likelihood and
-                the marginal likelihood.
-        """
-        train_x = torch.ones(train_size, train_size).to(self.device)
-        train_y = torch.ones(train_size).to(self.device)
-
-        likelihood = gpytorch.likelihoods.GaussianLikelihood().to(self.device)
-        model = GPRegressionModel(
-            train_x=train_x, train_y=train_y, likelihood=likelihood
-        ).to(self.device)
-        mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, model).to(self.device)
-        return model, likelihood, mll
-
-    def __preprocess_search_space(self, pipeline_space: SearchSpace):
-        self.categories = []
-        self.categorical_hps = []
-
-        parameter_count = 0
-        for hp_name, hp in pipeline_space.items():
-            # Collect all categories in a list for the encoder
-            if isinstance(hp, CategoricalParameter):
-                self.categorical_hps.append(hp_name)
-                self.categories.extend(hp.choices)
-                parameter_count += len(hp.choices)
-            else:
-                parameter_count += 1
-
-        # add 1 for budget
-        self.input_size = parameter_count
-        self.continuous_params_size = self.input_size - len(self.categories)
-        self.min_fidelity = pipeline_space.fidelity.lower
-        self.max_fidelity = pipeline_space.fidelity.upper
-
-    def __encode_config(self, config: SearchSpace):
-        categorical_encoding = np.zeros_like(self.categories_array)
-        continuous_values = []
-
-        for hp_name, hp in config.items():
-            if hp.is_fidelity:
-                continue  # Ignore fidelity
-            if hp_name in self.categorical_hps:
-                label = hp.value
-                categorical_encoding[np.argwhere(self.categories_array == label)] = 1
-            else:
-                continuous_values.append(hp.value_to_normalized(hp.value))
-
-        continuous_encoding = np.array(continuous_values)
-
-        encoding = np.concatenate([categorical_encoding, continuous_encoding])
-        return encoding
-
-    def __extract_budgets(
-        self, x_train: list[SearchSpace], normalized: bool = True
-    ) -> np.ndarray:
-        budgets = np.array([config.fidelity.value for config in x_train], dtype=np.single)
-        if normalized:
-            normalized_budgets = (budgets - self.min_fidelity) / (
-                self.max_fidelity - self.min_fidelity
-            )
-            budgets = normalized_budgets
-        return budgets
-
-    def __preprocess_learning_curves(
-        self, learning_curves: list[list[float]], padding_value: float = 0.0
-    ) -> np.ndarray:
-        # Add padding to the learning curves to make them the same size
-
-        # Get max learning_curve length
-        max_length = 0
-        for lc in learning_curves:
-            length = len(lc)
-            if length > max_length:
-                max_length = length
-
-        for lc in learning_curves:
-            # add padding to the learning curve to fit the cnn kernel or
-            # the max_length depending on which is the largest
-            padding_length = max([max_length - len(lc), self.cnn_kernel_size - len(lc)])
-            lc.extend([padding_value] * padding_length)
-
-        # TODO: check if the lc values are within bounds [0, 1] (karibbov)
-        # TODO: add normalize_lcs option in the future
-
-        return np.array(learning_curves, dtype=np.single)
-
-    def __reset_xy(
-        self,
-        x_train: list[SearchSpace],
-        y_train: list[float],
-        learning_curves: list[list[float]],
-        normalize_y: bool = False,
-        normalize_budget: bool = True,
-    ):
-        self.normalize_budget = normalize_budget
-        self.normalize_y = normalize_y
-
-        x_train, train_budgets, learning_curves = self._preprocess_input(
-            x_train, learning_curves, normalize_budget
-        )
-
-        y_train = self._preprocess_y(y_train, normalize_y)
-
-        self.x_train = x_train
-        self.train_budgets = train_budgets
-        self.learning_curves = learning_curves
-        self.y_train = y_train
-
-    def _preprocess_input(
-        self,
-        x: list[SearchSpace],
-        learning_curves: list[list[float]],
-        normalize_budget: bool = True,
-    ):
-        budgets = self.__extract_budgets(x, normalize_budget)
-        learning_curves = self.__preprocess_learning_curves(learning_curves)
-
-        x = np.array([self.__encode_config(config) for config in x], dtype=np.single)
-
-        x = torch.tensor(x).to(device=self.device)
-        budgets = torch.tensor(budgets).to(device=self.device)
-        learning_curves = torch.tensor(learning_curves).to(device=self.device)
-
-        return x, budgets, learning_curves
-
-    def _preprocess_y(self, y_train: list[float], normalize_y: bool = False):
-        y_train_array = np.array(y_train, dtype=np.single)
-        self.min_y = y_train_array.min()
-        self.max_y = y_train_array.max()
-        if normalize_y:
-            y_train_array = (y_train_array - self.min_y) / (self.max_y - self.min_y)
-        y_train_array = torch.tensor(y_train_array).to(device=self.device)
-        return y_train_array
-
-    def fit(
-        self,
-        x_train: list[SearchSpace],
-        y_train: list[float],
-        learning_curves: list[list[float]],
-    ):
-        self._fit(x_train, y_train, learning_curves, **self.surrogate_model_fit_args)
-
-    def _fit(
-        self,
-        x_train: list[SearchSpace],
-        y_train: list[float],
-        learning_curves: list[list[float]],
-        normalize_y: bool = False,
-        normalize_budget: bool = True,
-        n_epochs: int = 1000,
-        batch_size: int = 64,
-        optimizer_args: dict | None = None,
-        early_stopping: bool = True,
-        patience: int = 10,
-        perf_patience: int = 10,
-    ):
-        self.__reset_xy(
-            x_train,
-            y_train,
-            learning_curves,
-            normalize_y=normalize_y,
-            normalize_budget=normalize_budget,
-        )
-        self.model, self.likelihood, self.mll = self.__initialize_gp_model(len(y_train))
-        self.nn = NeuralFeatureExtractor(self.input_size, **self.nn_args)
-        self.model.to(self.device)
-        self.likelihood.to(self.device)
-        self.nn.to(self.device)
-
-        if self.checkpointing and self.checkpoint_path.exists():
-            non_improvement_steps = count_non_improvement_steps(self.root_dir)
-            # If checkpointing and patience is not exhausted load a partial model
-            if non_improvement_steps < perf_patience:
-                n_epochs = self.refine_epochs
-                self.load_checkpoint()
-            self.logger.debug(f"No improvement for: {non_improvement_steps} evaulations")
-        self.logger.debug(f"N Epochs for the full training: {n_epochs}")
-
-        initial_state = self.get_state()
-        try:
-            self.__train_model(
-                self.x_train,
-                self.train_budgets,
-                self.learning_curves,
-                self.y_train,
-                n_epochs=n_epochs,
-                batch_size=batch_size,
-                optimizer_args=optimizer_args,
-                early_stopping=early_stopping,
-                patience=patience,
-            )
-            if self.checkpointing:
-                self.save_checkpoint()
-        except gpytorch.utils.errors.NotPSDError:
-            self.logger.info("Model training failed loading the untrained model")
-            self.load_checkpoint(initial_state)
-            # Delete checkpoint to restart training
-            self.delete_checkpoint()
-
-    def __train_model(
-        self,
-        x_train: torch.Tensor,
-        train_budgets: torch.Tensor,
-        learning_curves: torch.Tensor,
-        y_train: torch.Tensor,
-        n_epochs: int = 1000,
-        batch_size: int = 64,
-        optimizer_args: dict | None = None,
-        early_stopping: bool = True,
-        patience: int = 10,
-    ):
-        if optimizer_args is None:
-            optimizer_args = {"lr": 0.001}
-
-        self.model.train()
-        self.likelihood.train()
-        self.nn.train()
-        self.optimizer = torch.optim.Adam(
-            [
-                dict({"params": self.model.parameters()}, **optimizer_args),
-                dict({"params": self.nn.parameters()}, **optimizer_args),
-            ]
-        )
-
-        count_down = patience
-        min_avg_loss_val = np.inf
-        average_loss: float = 0.0
-
-        for epoch_nr in range(0, n_epochs):
-            if early_stopping and count_down == 0:
-                self.logger.info(
-                    f"Epoch: {epoch_nr - 1} surrogate training stops due to early "
-                    f"stopping with the patience: {patience} and "
-                    f"the minimum average loss of {min_avg_loss_val} and "
-                    f"the final average loss of {average_loss}"
-                )
-                break
-
-            n_examples_batch = x_train.size(dim=0)
-
-            # get a random permutation for mini-batches
-            permutation = torch.randperm(n_examples_batch)
-
-            # optimize over mini-batches
-            total_scaled_loss = 0.0
-            for batch_idx, start_index in enumerate(
-                range(0, n_examples_batch, batch_size)
-            ):
-                end_index = start_index + batch_size
-                if end_index > n_examples_batch:
-                    end_index = n_examples_batch
-                indices = permutation[start_index:end_index]
-                batch_x, batch_budget, batch_lc, batch_y = (
-                    x_train[indices],
-                    train_budgets[indices],
-                    learning_curves[indices],
-                    y_train[indices],
-                )
-
-                minibatch_size = end_index - start_index
-                # if only one example in the batch, skip the batch.
-                # Otherwise, the code will fail because of batchnorm
-                if minibatch_size <= 1:
-                    continue
-
-                # Zero backprop gradients
-                self.optimizer.zero_grad()
-
-                projected_x = self.nn(batch_x, batch_budget, batch_lc)
-                self.model.set_train_data(projected_x, batch_y, strict=False)
-                output = self.model(projected_x)
-
-                # try:
-                # Calc loss and backprop derivatives
-                loss = -self.mll(output, self.model.train_targets)
-                episodic_loss_value: float = loss.detach().to("cpu").item()
-                # weighted sum over losses in the batch
-                total_scaled_loss = (
-                    total_scaled_loss + episodic_loss_value * minibatch_size
-                )
-
-                mse = gpytorch.metrics.mean_squared_error(
-                    output, self.model.train_targets
-                )
-                self.logger.debug(
-                    f"Epoch {epoch_nr}  Batch {batch_idx} - MSE {mse:.5f}, "
-                    f"Loss: {episodic_loss_value:.3f}, "
-                    f"lengthscale: {self.model.covar_module.base_kernel.lengthscale.item():.3f}, "
-                    f"noise: {self.model.likelihood.noise.item():.3f}, "
-                )
-
-                loss.backward()
-                self.optimizer.step()
-
-            # Get average weighted loss over every batch
-            average_loss = total_scaled_loss / n_examples_batch
-            if average_loss < min_avg_loss_val:
-                min_avg_loss_val = average_loss
-                count_down = patience
-            elif early_stopping:
-                self.logger.debug(
-                    f"No improvement over the minimum loss value of {min_avg_loss_val} "
-                    f"for the past {patience - count_down} epochs "
-                    f"the training will stop in {count_down} epochs"
-                )
-                count_down -= 1
-            # except Exception as training_error:
-            #     self.logger.error(
-            #         f'The following error happened while training: {training_error}')
-            #     # An error has happened, trigger the restart of the optimization and restart
-            #     # the model with default hyperparameters.
-            #     self.restart = True
-            #     training_errored = True
-            #     break
-
-    def set_prediction_learning_curves(self, learning_curves: list[list[float]]):
-        self.prediction_learning_curves = learning_curves
-
-    def predict(
-        self, x: list[SearchSpace], learning_curves: list[list[float]] | None = None
-    ):
-        # Preprocess input
-        if learning_curves is None:
-            learning_curves = self.prediction_learning_curves
-        x_test, test_budgets, learning_curves = self._preprocess_input(
-            x, learning_curves, self.normalize_budget
-        )
-
-        self.model.eval()
-        self.nn.eval()
-        self.likelihood.eval()
-
-        with torch.no_grad():
-            projected_train_x = self.nn(
-                self.x_train, self.train_budgets, self.learning_curves
-            )
-            self.model.set_train_data(
-                inputs=projected_train_x, targets=self.y_train, strict=False
-            )
-
-            projected_test_x = self.nn(x_test, test_budgets, learning_curves)
-
-            preds = self.likelihood(self.model(projected_test_x))
-
-        means = preds.mean.detach().cpu()
-
-        if self.normalize_y:
-            means = (means + self.min_y) * (self.max_y - self.min_y)
-
-        cov = torch.diag(torch.pow(preds.stddev.detach(), 2)).cpu()
-
-        return means, cov
-
-    def load_checkpoint(self, state: dict | None = None):
-        """
-        Load the state from a previous checkpoint.
-        """
-        if state is None:
-            checkpoint = torch.load(self.checkpoint_path)
-        else:
-            checkpoint = state
-        self.model.load_state_dict(checkpoint["gp_state_dict"])
-        self.nn.load_state_dict(checkpoint["nn_state_dict"])
-        self.likelihood.load_state_dict(checkpoint["likelihood_state_dict"])
-
-        self.model.to(self.device)
-        self.likelihood.to(self.device)
-        self.nn.to(self.device)
-
-    def save_checkpoint(self, state: dict | None = None):
-        """
-        Save the given state or the current state in a
-        checkpoint file.
-
-        Args:
-            checkpoint_path: path to the checkpoint file
-            state: The state to save, if none, it will
-            save the current state.
-        """
-
-        if state is None:
-            torch.save(
-                self.get_state(),
-                self.checkpoint_path,
-            )
-        else:
-            torch.save(
-                state,
-                self.checkpoint_path,
-            )
-
-    def get_state(self) -> dict[str, dict]:
-        """
-        Get the current state of the surrogate.
-
-        Returns:
-            current_state: A dictionary that represents
-                the current state of the surrogate model.
-        """
-        current_state = {
-            "gp_state_dict": deepcopy(self.model.state_dict()),
-            "nn_state_dict": deepcopy(self.nn.state_dict()),
-            "likelihood_state_dict": deepcopy(self.likelihood.state_dict()),
-        }
-
-        return current_state
-
-    def delete_checkpoint(self):
-        self.checkpoint_path.unlink(missing_ok=True)
diff --git a/neps/optimizers/bayesian_optimization/models/ftpfn.py b/neps/optimizers/bayesian_optimization/models/ftpfn.py
new file mode 100644
index 000000000..2990b095b
--- /dev/null
+++ b/neps/optimizers/bayesian_optimization/models/ftpfn.py
@@ -0,0 +1,407 @@
+from __future__ import annotations
+
+from collections.abc import Callable, Mapping
+from pathlib import Path
+from typing import TYPE_CHECKING, Any
+
+import torch
+from ifbo import FTPFN
+
+from neps.sampling.priors import Prior
+
+if TYPE_CHECKING:
+    from neps.sampling.samplers import Sampler
+    from neps.search_spaces.domain import Domain
+    from neps.search_spaces.encoding import ConfigEncoder
+    from neps.search_spaces.search_space import SearchSpace
+    from neps.state.trial import Trial
+
+
+def _keep_highest_budget_evaluation(
+    x: torch.Tensor,
+    id_col: int = 0,
+    budget_col: int = 1,
+) -> torch.Tensor:
+    # Does a lexsort, same as if we sorted by (config_id, budget), where
+    # theyre are sorted according to increasing config_id and then increasing budget.
+    # x[i2] -> sorted by config id and budget
+    i1 = torch.argsort(x[:, budget_col])
+    i2 = i1[torch.argsort(x[i1][:, id_col], stable=True)]
+    sorted_x = x[i2]
+
+    # Now that it's sorted, we want to count the occurence of each id into counts
+    _, counts = torch.unique_consecutive(sorted_x[:, id_col], return_counts=True)
+
+    # Now we can use these counts to get to the last occurence of each id
+    # The -1 is because we want to index from 0 but sum starts at 1.
+    ii = counts.cumsum(0) - 1
+    return sorted_x[ii]
+
+
+def _download_workaround_for_ifbo_issue_10(path: Path | None, version: str) -> Path:
+    # TODO: https://github.com/automl/ifBO/issues/10
+    import requests
+    from ifbo.download import FILE_URL, FILENAME
+
+    target_path = Path(path) if path is not None else Path.cwd().resolve() / ".model"
+    target_path.mkdir(parents=True, exist_ok=True)
+
+    _target_zip_path = target_path / FILENAME(version)
+
+    # Just a heuristic check to determine if the model already exists.
+    # Kind of hard to know what the name of the extracted file will be
+    # Basically we just check if the tar.gz file is there and unpacked.
+    # If there is a new version, then it wont exist and we will download it.
+    if _target_zip_path.exists() and any(
+        p.name.endswith(".pt") for p in target_path.iterdir()
+    ):
+        return target_path
+
+    _file_url = FILE_URL(version)
+
+    # Download the tar.gz file and decompress it
+    response = requests.get(_file_url, allow_redirects=True, timeout=10)
+    if response.status_code != 200:
+        raise ValueError(
+            f"Failed to download the surrogate model from {_file_url}."
+            f" Got status code: {response.status_code}"
+        )
+
+    with Path(_target_zip_path).open("wb") as f:
+        try:
+            f.write(response.content)
+        except Exception as e:
+            raise ValueError(
+                f"Failed to write the surrogate model to {_target_zip_path}."
+            ) from e
+
+    # Decompress the .tar.gz file using tarfile
+    import tarfile
+
+    try:
+        with tarfile.open(_target_zip_path, "r:gz") as tar:
+            # NOTE: There is a filter available from 3.12,
+            # Ideally this should be fixed upstream in ifbo.
+            # Essentially we'd like to only extract the .pt files
+            # and not allow absolute paths
+            tar.extractall(path=target_path)  # noqa: S202
+    except Exception as e:
+        raise ValueError(
+            f"Failed to decompress the surrogate model at {_target_zip_path}."
+        ) from e
+
+    return target_path
+
+
+def _cast_tensor_shapes(x: torch.Tensor) -> torch.Tensor:
+    if len(x.shape) == 3 and x.shape[1] == 1:
+        return x
+    if len(x.shape) == 2:
+        return x.reshape(x.shape[0], 1, x.shape[1])
+    if len(x.shape) == 1:
+        return x.reshape(x.shape[0], 1)
+    raise ValueError(f"Shape not recognized: {x.shape}")
+
+
+# NOTE: Ifbo was trained using 32 bit
+FTPFN_DTYPE = torch.float32
+
+
+def encode_ftpfn(
+    trials: Mapping[str, Trial],
+    space: SearchSpace,
+    budget_domain: Domain,
+    encoder: ConfigEncoder,
+    *,
+    device: torch.device | None = None,
+    dtype: torch.dtype = FTPFN_DTYPE,
+    error_value: float = 0.0,
+    pending_value: float = torch.nan,
+) -> tuple[torch.Tensor, torch.Tensor]:
+    """Encode the trials into a format that the FTPFN model can understand.
+
+    !!! warning "Pending trials"
+
+        For trials which do not have a loss reported yet, they are considered pending.
+        By default this is torch.nan and we recommend fantasizing these values.
+
+    !!! warning "Error values"
+
+        The FTPFN model requires that all loss values lie in the interval [0, 1].
+        By default, using the value of `error_value=0.0`, we encode crashed configurations
+        as having an error value of 0.
+
+    Args:
+        trials: The trials to encode
+        encoder: The encoder to use
+        space: The search space
+        budget_domain: The domain to use for the budgets of the FTPFN
+        device: The device to use
+        dtype: The dtype to use
+
+    Returns:
+        The encoded trials and their corresponding **scores**
+    """
+    # Select all trials which have something we can actually use for modelling
+    # The absence of a report signifies pending
+    selected = dict(trials.items())
+    assert space.fidelity_name is not None
+    assert space.fidelity is not None
+    assert 0 <= error_value <= 1
+    train_configs = encoder.encode(
+        [t.config for t in selected.values()], device=device, dtype=dtype
+    )
+    ids = torch.tensor(
+        [int(config_id.split("_", maxsplit=1)[0]) for config_id in selected],
+        device=device,
+        dtype=dtype,
+    )
+    # PFN uses `0` id for test configurations
+    ids = ids + 1
+
+    train_fidelities = torch.tensor(
+        [t.config[space.fidelity_name] for t in selected.values()],
+        device=device,
+        dtype=dtype,
+    )
+    train_max_cost_total = budget_domain.cast(
+        train_fidelities, frm=space.fidelity.domain, dtype=dtype
+    )
+
+    # TODO: Document that it's on the user to ensure these are already all bounded
+    # We could possibly include some bounded transform to assert this.
+    minimize_ys = torch.tensor(
+        [
+            pending_value
+            if trial.report is None
+            else (
+                error_value
+                if trial.report.objective_to_minimize is None
+                else trial.report.objective_to_minimize
+            )
+            for trial in trials.values()
+        ],
+        device=device,
+        dtype=dtype,
+    )
+    if minimize_ys.max() > 1 or minimize_ys.min() < 0:
+        raise RuntimeError(
+            "ifBO requires that all loss values reported lie in the interval [0, 1]"
+            " but recieved loss value outside of that range!"
+            f"\n{minimize_ys}"
+        )
+    maximize_ys = 1 - minimize_ys
+    x_train = torch.cat(
+        [ids.unsqueeze(1), train_max_cost_total.unsqueeze(1), train_configs], dim=1
+    )
+    return x_train, maximize_ys
+
+
+def decode_ftpfn_data(
+    x: torch.Tensor,
+    encoder: ConfigEncoder,
+    budget_domain: Domain,
+    fidelity_domain: Domain,
+) -> list[tuple[int | None, int | float, dict[str, Any]]]:
+    if x.ndim == 1:
+        x = x.unsqueeze(0)
+
+    _raw_ids = x[:, 0].tolist()
+    # Subtract 1 to get the real id, otherwise if it was a test ID, we say it had None
+    real_ids = [None if _id == 0 else int(_id) - 1 for _id in _raw_ids]
+    fidelities = fidelity_domain.cast(x[:, 1], frm=budget_domain).tolist()
+    configs = encoder.decode(x[:, 2:])
+    return list(zip(real_ids, fidelities, configs, strict=False))
+
+
+def acquire_next_from_ftpfn(
+    *,
+    ftpfn: FTPFNSurrogate,
+    continuation_samples: torch.Tensor,
+    encoder: ConfigEncoder,
+    budget_domain: Domain,
+    initial_samplers: list[tuple[Sampler, int]],
+    local_search_sample_size: int = 128,
+    local_search_confidence: float = 0.95,  # [0, 1]
+    acq_function: Callable[[torch.Tensor], torch.Tensor],
+    seed: torch.Generator | None = None,
+    dtype: torch.dtype | None = FTPFN_DTYPE,
+) -> torch.Tensor:
+    # 1. Remove duplicate configurations from continuation_samples,
+    # keeping only the most recent eval
+    acq_existing = _keep_highest_budget_evaluation(
+        continuation_samples, id_col=0, budget_col=1
+    )
+
+    # 2. Remove configs that have been fully evaluated
+    acq_existing = acq_existing[acq_existing[:, 1] < budget_domain.upper]
+    if len(acq_existing) != 0:
+        # Get the best configuration for continuation
+        acq_scores = acq_function(acq_existing)
+        best_ix = acq_scores.argmax()
+
+        best_score = acq_scores[best_ix].item()
+        best_row = acq_existing[best_ix].clone().detach()
+        del acq_existing
+        del acq_scores
+    else:
+        best_score = -float("inf")
+        best_row = torch.tensor([])
+
+    # We'll be re-using 0 id and min budget alot, just create them once and re-use
+    _N = max(*(s[1] for s in initial_samplers), local_search_sample_size)
+    ids = torch.zeros((_N, 1), dtype=dtype, device=ftpfn.device)
+    min_budget = torch.full(
+        size=(_N, 1), fill_value=budget_domain.lower, dtype=dtype, device=ftpfn.device
+    )
+
+    # Acquisition maximization by sampling from samplers and performing an additional
+    # round of local sampling around the best point
+    local_sample_confidence = [local_search_confidence] * len(encoder.domains)
+    for sampler, size in initial_samplers:
+        # 1. Use provided sampler and eval samples with acq
+        samples = sampler.sample(
+            size, to=encoder.domains, seed=seed, device=ftpfn.device, dtype=dtype
+        )
+        _N = len(samples)
+        X_test = torch.cat([ids[:_N], min_budget[:_N], samples], dim=1)
+        acq_scores = acq_function(X_test)
+
+        # ... update best if needed
+        sample_best_ix = acq_scores.argmax()
+        sample_best_score = acq_scores[sample_best_ix]
+        sample_best_row = X_test[sample_best_ix].clone().detach()
+        if sample_best_score > best_score:
+            best_score = sample_best_score
+            best_row = sample_best_row
+
+        # 2. Sample around best point from above samples and eval acq.
+        _mode = sample_best_row[2:]
+        local_sampler = Prior.from_domains_and_centers(
+            centers=list(zip(_mode.tolist(), local_sample_confidence, strict=False)),
+            domains=encoder.domains,
+        )
+        samples = local_sampler.sample(
+            local_search_sample_size,
+            to=encoder.domains,
+            seed=seed,
+            device=ftpfn.device,
+            dtype=dtype,
+        )
+        _N = len(samples)
+        X_test = torch.cat([ids[:_N], min_budget[:_N], samples], dim=1)
+        acq_scores = acq_function(X_test)
+
+        local_best_ix = acq_scores.argmax()
+        local_best_score = acq_scores[local_best_ix].clone().detach()
+        if local_best_score > best_score:
+            best_score = local_best_score
+            best_row = X_test[local_best_ix].clone().detach()
+
+    # Finally, if the best
+    return best_row
+
+
+_CACHED_FTPFN_MODEL: dict[tuple[str, str], FTPFN] = {}
+
+
+class FTPFNSurrogate:
+    """Wrapper around the IfBO model."""
+
+    def __init__(
+        self,
+        target_path: Path | None = None,
+        version: str = "0.0.1",
+        device: torch.device | None = None,
+    ):
+        if target_path is None:
+            # TODO: We also probably want to link this to the actual root directory
+            # or some shared directory between runs as relying on the path of the initial
+            # python invocation is likely to lead to issues somewhere.
+            # TODO: ifbo support for windows has issues with decompression
+            # We basically just do the same thing they do but manually
+            target_path = _download_workaround_for_ifbo_issue_10(target_path, version)
+
+        key = (str(target_path), version)
+        ftpfn = _CACHED_FTPFN_MODEL.get(key)
+        if ftpfn is None:
+            ftpfn = FTPFN(target_path=target_path, version=version, device=device)
+            _CACHED_FTPFN_MODEL[key] = ftpfn
+
+        self.ftpfn = ftpfn
+        self.device = self.ftpfn.device
+
+    def _get_logits(
+        self, train_x: torch.Tensor, train_y: torch.Tensor, test_x: torch.Tensor
+    ) -> torch.Tensor:
+        return self.ftpfn.model(
+            _cast_tensor_shapes(train_x),
+            _cast_tensor_shapes(train_y),
+            _cast_tensor_shapes(test_x),
+        )
+
+    @torch.no_grad()  # type: ignore
+    def get_mean_performance(
+        self,
+        train_x: torch.Tensor,
+        train_y: torch.Tensor,
+        test_x: torch.Tensor,
+    ) -> torch.Tensor:
+        logits = self._get_logits(train_x, train_y, test_x).squeeze()
+        return self.ftpfn.model.criterion.mean(logits)
+
+    @torch.no_grad()  # type: ignore
+    def get_pi(
+        self,
+        train_x: torch.Tensor,
+        train_y: torch.Tensor,
+        test_x: torch.Tensor,
+        y_best: torch.Tensor | float,
+    ) -> torch.Tensor:
+        logits = self._get_logits(train_x, train_y, test_x)
+        return self.ftpfn.model.criterion.pi(logits.squeeze(), best_f=y_best)
+
+    @torch.no_grad()  # type: ignore
+    def get_ei(
+        self,
+        train_x: torch.Tensor,
+        train_y: torch.Tensor,
+        test_x: torch.Tensor,
+        y_best: torch.Tensor | float,
+    ) -> torch.Tensor:
+        logits = self._get_logits(train_x, train_y, test_x)
+        return self.ftpfn.model.criterion.ei(logits.squeeze(), best_f=y_best)
+
+    @torch.no_grad()  # type: ignore
+    def get_lcb(
+        self,
+        train_x: torch.Tensor,
+        train_y: torch.Tensor,
+        test_x: torch.Tensor,
+        beta: float = (1 - 0.682) / 2,
+    ) -> torch.Tensor:
+        logits = self._get_logits(train_x, train_y, test_x)
+        return self.ftpfn.model.criterion.ucb(
+            logits=logits,
+            best_f=None,
+            rest_prob=beta,
+            # IMPORTANT to be False, calculate the LCB using lower-bound ICDF as per beta
+            maximize=False,
+        )
+
+    @torch.no_grad()  # type: ignore
+    def get_ucb(
+        self,
+        train_x: torch.Tensor,
+        train_y: torch.Tensor,
+        test_x: torch.Tensor,
+        beta: float = (1 - 0.682) / 2,
+    ) -> torch.Tensor:
+        logits = self._get_logits(train_x, train_y, test_x)
+        return self.ftpfn.model.criterion.ucb(
+            logits=logits,
+            best_f=None,
+            rest_prob=beta,
+            # IMPORTANT to be True, calculate the UCB using upper-bound ICDF as per beta
+            maximize=True,
+        )
diff --git a/neps/optimizers/bayesian_optimization/models/gp.py b/neps/optimizers/bayesian_optimization/models/gp.py
index 73ecf019d..2210e44b4 100644
--- a/neps/optimizers/bayesian_optimization/models/gp.py
+++ b/neps/optimizers/bayesian_optimization/models/gp.py
@@ -1,668 +1,418 @@
+"""Gaussian Process models for Bayesian Optimization."""
+
+from __future__ import annotations
+
 import logging
-from copy import deepcopy
-from typing import Iterable, Union
+from collections.abc import Mapping
+from dataclasses import dataclass
+from functools import reduce
+from itertools import product
+from typing import TYPE_CHECKING, Any, TypeVar
 
-import numpy as np
+import gpytorch.constraints
 import torch
-
-from ..kernels.combine_kernels import ProductKernel, SumKernel
-
-# GP model as a weighted average between the vanilla vectorial GP and the graph GP
-from ..kernels.graph_kernel import GraphKernels
-from ..kernels.utils import extract_configs
-from ..kernels.vectorial_kernels import Stationary
-from ..kernels.weisfilerlehman import WeisfilerLehman
-
-
-class ComprehensiveGP:
-    def __init__(
-        self,
-        graph_kernels: Iterable,
-        hp_kernels: Iterable,
-        likelihood: float = 1e-3,
-        weights=None,
-        vectorial_features: list = None,
-        combined_kernel: str = "sum",
-        logger=None,
-        surrogate_model_fit_args: dict = None,
-    ):
-        self.likelihood = likelihood
-        self.surrogate_model_fit_args = surrogate_model_fit_args or {}
-
-        self.domain_kernels: list = []
-        if bool(graph_kernels):
-            self.domain_kernels += list(graph_kernels)
-        if bool(hp_kernels):
-            self.domain_kernels += list(hp_kernels)
-
-        self.n_kernels: int = len(self.domain_kernels)
-        self.n_graph_kernels: int = len(
-            [i for i in self.domain_kernels if isinstance(i, GraphKernels)]
+from botorch.fit import fit_gpytorch_mll
+from botorch.models import SingleTaskGP
+from botorch.models.gp_regression import Log, get_covar_module_with_dim_scaled_prior
+from botorch.models.gp_regression_mixed import CategoricalKernel, OutcomeTransform
+from botorch.models.transforms.outcome import ChainedOutcomeTransform, Standardize
+from botorch.optim import optimize_acqf, optimize_acqf_mixed
+from gpytorch import ExactMarginalLogLikelihood
+from gpytorch.kernels import ScaleKernel
+
+from neps.optimizers.bayesian_optimization.acquisition_functions.cost_cooling import (
+    cost_cooled_acq,
+)
+from neps.optimizers.bayesian_optimization.acquisition_functions.pibo import (
+    pibo_acquisition,
+)
+from neps.search_spaces.encoding import CategoricalToIntegerTransformer, ConfigEncoder
+
+if TYPE_CHECKING:
+    from botorch.acquisition import AcquisitionFunction
+
+    from neps.sampling.priors import Prior
+    from neps.search_spaces.search_space import SearchSpace
+    from neps.state.trial import Trial
+
+logger = logging.getLogger(__name__)
+
+
+T = TypeVar("T")
+
+
+@dataclass
+class GPEncodedData:
+    """Tensor data of finished configurations."""
+
+    x: torch.Tensor
+    y: torch.Tensor
+    cost: torch.Tensor | None = None
+    x_pending: torch.Tensor | None = None
+
+
+def default_categorical_kernel(
+    N: int,
+    active_dims: tuple[int, ...] | None = None,
+) -> ScaleKernel:
+    """Default Categorical kernel for the GP."""
+    # Following BoTorches implementation of the MixedSingleTaskGP
+    return ScaleKernel(
+        CategoricalKernel(
+            ard_num_dims=N,
+            active_dims=active_dims,
+            lengthscale_constraint=gpytorch.constraints.GreaterThan(1e-6),
         )
-        self.n_vector_kernels: int = self.n_kernels - self.n_graph_kernels
-
-        self.vectorial_features = vectorial_features
-
-        if weights is not None:
-            self.fixed_weights = True
-            if weights is not None:
-                assert len(weights) == len(self.n_kernels), (
-                    "the weights vector, if supplied, needs to have the same length as "
-                    "the number of kernel_operators!"
-                )
-            self.init_weights = (
-                weights
-                if isinstance(weights, torch.Tensor)
-                else torch.tensor(weights).flatten()
-            )
-        else:
-            self.fixed_weights = False
-            # Initialise the domain kernel weights to uniform
-            self.init_weights = torch.tensor(
-                [1.0 / self.n_kernels] * self.n_kernels,
-            )
-        self.weights = self.init_weights.clone()
+    )
 
-        if combined_kernel == "product":
-            self.combined_kernel = ProductKernel(
-                *self.domain_kernels, weights=self.weights
-            )
-        elif combined_kernel == "sum":
-            self.combined_kernel = SumKernel(*self.domain_kernels, weights=self.weights)
-        else:
-            raise NotImplementedError(
-                f'Combining kernel {combined_kernel} is not yet implemented! Only "sum" '
-                f'or "product" are currently supported. '
-            )
 
-        self.logger = logger or logging.getLogger("neps")
-        # Cache the Gram matrix inverse and its log-determinant
-        self.K, self.K_i, self.logDetK = [None] * 3
-        self.theta_vector = None
-        self.layer_weights = None
-        self.nlml = None
-
-        self.x_configs: list = None
-        self.y: torch.Tensor = None
-        self.y_: torch.Tensor = None
-        self.y_mean: torch.Tensor = None
-        self.y_std: torch.Tensor = None
-        self.n: int = None
-
-    def _optimize_graph_kernels(self, h_: int, lengthscale_):
-        graphs, _ = extract_configs(self.x_configs)
-        for i, k in enumerate(self.combined_kernel.kernels):
-            if not isinstance(k, GraphKernels):
-                continue
-            elif isinstance(k, WeisfilerLehman):
-                _grid_search_wl_kernel(
-                    k,
-                    h_,
-                    [x[i] for x in graphs]
-                    if isinstance(graphs[0], list)
-                    else [c for c in graphs],
-                    self.y,
-                    self.likelihood,
-                    lengthscales=lengthscale_,
-                )
-            else:
-                self.logger.warning(
-                    "(Graph) kernel optimisation for "
-                    + type(k).__name__
-                    + " not implemented yet."
-                )
-
-    def fit(self, train_x, train_y):
-        self._fit(train_x, train_y, **self.surrogate_model_fit_args)
-
-    def _fit(
-        self,
-        train_x,
-        train_y,
-        iters: int = 20,
-        optimizer: str = "adam",
-        wl_subtree_candidates: tuple = tuple(range(5)),
-        wl_lengthscales: tuple = tuple(np.e**i for i in range(-2, 3)),
-        optimize_lik: bool = True,
-        max_lik: float = 0.01,
-        optimize_wl_layer_weights: bool = False,
-        optimizer_kwargs: dict = None,
-    ):
-        """Called by self.fit"""
-        self._reset_XY(train_x, train_y)
-
-        # Get the node weights, if needed
-
-        if optimizer_kwargs is None:
-            optimizer_kwargs = {"lr": 0.1}
-        if len(wl_subtree_candidates) > 0:
-            self._optimize_graph_kernels(
-                wl_subtree_candidates,
-                wl_lengthscales,
-            )
+def make_default_single_obj_gp(
+    x: torch.Tensor,
+    y: torch.Tensor,
+    encoder: ConfigEncoder,
+    *,
+    y_transform: OutcomeTransform | None = None,
+) -> SingleTaskGP:
+    """Default GP for single objective optimization."""
+    if y.ndim == 1:
+        y = y.unsqueeze(-1)
+
+    if y_transform is None:
+        y_transform = Standardize(m=1)
+
+    numerics: list[int] = []
+    categoricals: list[int] = []
+    for hp_name, transformer in encoder.transformers.items():
+        if isinstance(transformer, CategoricalToIntegerTransformer):
+            categoricals.append(encoder.index_of[hp_name])
+        else:
+            numerics.append(encoder.index_of[hp_name])
+
+    # Purely vectorial
+    if len(categoricals) == 0:
+        return SingleTaskGP(train_X=x, train_Y=y, outcome_transform=y_transform)
+
+    # Purely categorical
+    if len(numerics) == 0:
+        return SingleTaskGP(
+            train_X=x,
+            train_Y=y,
+            covar_module=default_categorical_kernel(len(categoricals)),
+            outcome_transform=y_transform,
+        )
 
-        weights = self.init_weights.clone()
+    # Mixed
+    numeric_kernel = get_covar_module_with_dim_scaled_prior(
+        ard_num_dims=len(numerics),
+        active_dims=tuple(numerics),
+    )
+    cat_kernel = default_categorical_kernel(
+        len(categoricals), active_dims=tuple(categoricals)
+    )
 
-        if (not self.fixed_weights) and len(self.domain_kernels) > 1:
-            weights.requires_grad_(True)
+    # WARNING: I previously tried SingleTaskMixedGp which does the following:
+    #
+    # x K((x1, c1), (x2, c2)) =
+    # x     K_cont_1(x1, x2) + K_cat_1(c1, c2) +
+    # x      K_cont_2(x1, x2) * K_cat_2(c1, c2)
+    #
+    # In a toy example with a single binary categorical which acted like F * {0, 1},
+    # the model collapsed to always predicting `0`. Causing all parameters defining F
+    # to essentially be guess at random. This is a lot more stable but likely not as
+    # good...
+    # TODO: Figure out how to improve stability of this.
+    kernel = numeric_kernel + cat_kernel
+
+    return SingleTaskGP(
+        train_X=x, train_Y=y, covar_module=kernel, outcome_transform=y_transform
+    )
 
-        theta_vector = get_theta_vector(vectorial_features=self.vectorial_features)
 
-        # Whether to include the likelihood (jitter or noise variance) as a hyperparameter
-        likelihood = torch.tensor(
-            self.likelihood,
+def optimize_acq(
+    acq_fn: AcquisitionFunction,
+    encoder: ConfigEncoder,
+    *,
+    n_candidates_required: int = 1,
+    num_restarts: int = 20,
+    n_intial_start_points: int | None = None,
+    acq_options: Mapping[str, Any] | None = None,
+    maximum_allowed_categorical_combinations: int = 30,
+) -> tuple[torch.Tensor, torch.Tensor]:
+    """Optimize the acquisition function."""
+    acq_options = acq_options or {}
+
+    lower = [domain.lower for domain in encoder.domains]
+    upper = [domain.upper for domain in encoder.domains]
+    bounds = torch.tensor([lower, upper], dtype=torch.float64)
+
+    cat_transformers = {
+        name: t for name, t in encoder.transformers.items() if t.domain.is_categorical
+    }
+    if not any(cat_transformers):
+        # Small heuristic to increase the number of candidates as our dimensionality
+        # increases... we apply a cap.
+        if n_intial_start_points is None:
+            # TODO: Need to investigate how num_restarts is used in botorch to inform
+            # this proxy.
+
+            # Cap out at 4096 when len(bounds) >= 8
+            n_intial_start_points = min(64 * len(bounds) ** 2, 4096)
+
+        return optimize_acqf(
+            acq_function=acq_fn,
+            bounds=bounds,
+            q=n_candidates_required,
+            num_restarts=num_restarts,
+            raw_samples=n_intial_start_points,
+            **acq_options,
         )
-        if optimize_lik:
-            likelihood.requires_grad_(True)
-
-        layer_weights = None
-        if optimize_wl_layer_weights:
-            for k in self.domain_kernels:
-                if isinstance(k, WeisfilerLehman):
-                    layer_weights = torch.ones(k.h + 1).requires_grad_(True)
-                    if layer_weights.shape[0] <= 1:
-                        layer_weights = None
-                    else:
-                        break
-
-        # Linking the optimizer variables to the sum kernel
-        optim_vars = []
-        for a in [weights, likelihood, layer_weights]:
-            if a is not None and a.is_leaf and a.requires_grad:
-                optim_vars.append(a)
-
-        if theta_vector is not None:
-            for a in theta_vector.values():
-                if a is not None and a.requires_grad:
-                    optim_vars.append(a)
-        nlml = None
-        if len(optim_vars) == 0:  # Skip optimisation
-            K = self.combined_kernel.fit_transform(
-                weights,
-                self.x_configs,
-                feature_lengthscale=theta_vector,
-                layer_weights=layer_weights,
-                rebuild_model=True,
-            )
-            K_i, logDetK = compute_pd_inverse(K, likelihood)
-        else:
-            # Select the optimizer
-            assert optimizer.lower() in ["adam", "sgd"]
-            if optimizer.lower() == "adam":
-                optim = torch.optim.Adam(optim_vars, **optimizer_kwargs)
-            else:
-                optim = torch.optim.SGD(optim_vars, **optimizer_kwargs)
-
-            K = None
-            for i in range(iters):
-                optim.zero_grad()
-                K = self.combined_kernel.fit_transform(
-                    weights,
-                    self.x_configs,
-                    feature_lengthscale=theta_vector,
-                    layer_weights=layer_weights,
-                    rebuild_model=True,
-                    save_gram_matrix=True,
-                )
-                K_i, logDetK = compute_pd_inverse(K, likelihood)
-                nlml = -compute_log_marginal_likelihood(K_i, logDetK, self.y)
-                nlml.backward()
-                if i % 10 == 0:
-                    self.logger.debug(
-                        f"Iteration: {i}/{iters} "
-                        f"Negative log-marginal likelihood:"
-                        f"{nlml.item()} {theta_vector} {weights} {likelihood}"
-                    )
-                optim.step()  # TODO
-                with torch.no_grad():
-
-                    weights.clamp_(
-                        0.0, 1.0
-                    ) if weights is not None and weights.is_leaf else None
-                    theta_vector = self.combined_kernel.clamp_theta_vector(theta_vector)
-                    likelihood.clamp_(
-                        1e-5, max_lik
-                    ) if likelihood is not None and likelihood.is_leaf else None
-                    layer_weights.clamp_(
-                        0.0, 1.0
-                    ) if layer_weights is not None and layer_weights.is_leaf else None
-
-
-                optim.zero_grad(set_to_none=True)
-
-            K_i, logDetK = compute_pd_inverse(K, likelihood)
-
-        # Apply the optimal hyperparameters
-        self.weights = weights.clone() / torch.sum(weights)
-        self.K_i = K_i.clone()
-        self.K = K.clone()
-        self.logDetK = logDetK.clone()
-        self.likelihood = likelihood.item()
-        self.theta_vector = theta_vector
-        self.layer_weights = layer_weights
-        self.nlml = nlml.detach().cpu() if nlml is not None else None
-
-        for k in self.combined_kernel.kernels:
-            if isinstance(k, Stationary):
-                k.update_hyperparameters(lengthscale=theta_vector)
-
-        self.combined_kernel.weights = weights.clone()
-
-        self.logger.debug("Optimisation summary: ")
-        self.logger.debug(
-            f"Optimal NLML: {nlml}",
+
+    # We need to generate the product of all possible combinations of categoricals,
+    # first we do a sanity check
+    n_combos = reduce(
+        lambda x, y: x * y,  # type: ignore
+        [t.domain.cardinality for t in cat_transformers.values()],
+        1,
+    )
+    if n_combos > maximum_allowed_categorical_combinations:
+        raise ValueError(
+            "The number of fixed categorical dimensions is too high. "
+            "This will lead to an explosion in the number of possible "
+            f"combinations. Got: {n_combos} while the setting for the function"
+            f" is: {maximum_allowed_categorical_combinations=}. Consider reducing the "
+            "dimensions or consider encoding your categoricals in some other format."
         )
-        self.logger.debug(f"Lengthscales: {theta_vector}")
-        try:
-            self.logger.debug(
-                f"Optimal h: {self.domain_kernels[0]._h}",
-            )
-        except AttributeError:
-            pass
-        self.logger.debug(f"Weights: {self.weights}")
-        self.logger.debug(f"Lik: {self.likelihood}")
-        self.logger.debug(f"Optimal layer weights {layer_weights}")
 
-    def predict(self, x_configs, preserve_comp_graph: bool = False):
-        """Kriging predictions"""
+    # Right, now we generate all possible combinations
+    # First, just collect the possible values per cat column
+    # NOTE: Botorchs optim requires them to be as floats
+    cats: dict[int, list[float]] = {
+        encoder.index_of[name]: [
+            float(i)
+            for i in range(transformer.domain.cardinality)  # type: ignore
+        ]
+        for name, transformer in cat_transformers.items()
+    }
+
+    # Second, generate all possible combinations
+    fixed_cats: list[dict[int, float]]
+    if len(cats) == 1:
+        col, choice_indices = next(iter(cats.items()))
+        fixed_cats = [{col: i} for i in choice_indices]
+    else:
+        fixed_cats = [
+            dict(zip(cats.keys(), combo, strict=False))
+            for combo in product(*cats.values())
+        ]
+
+    # TODO: we should deterministically shuffle the fixed_categoricals
+    # as the underlying function does not.
+    return optimize_acqf_mixed(
+        acq_function=acq_fn,
+        bounds=bounds,
+        num_restarts=min(num_restarts // n_combos, 2),
+        raw_samples=n_intial_start_points,
+        q=n_candidates_required,
+        fixed_features_list=fixed_cats,
+        **acq_options,
+    )
 
-        if not isinstance(x_configs, list):
-            # Convert a single input X_s to a singleton list
-            x_configs = [x_configs]
 
-        if self.K_i is None or self.logDetK is None:
-            raise ValueError(
-                "Inverse of Gram matrix is not instantiated. Please call the optimize "
-                "function to fit on the training data first!"
-            )
+def encode_trials_for_gp(
+    trials: Mapping[str, Trial],
+    space: SearchSpace,
+    *,
+    encoder: ConfigEncoder | None = None,
+    device: torch.device | None = None,
+) -> tuple[GPEncodedData, ConfigEncoder]:
+    train_configs: list[Mapping[str, Any]] = []
+    train_losses: list[float] = []
+    train_costs: list[float] = []
+    pending_configs: list[Mapping[str, Any]] = []
+
+    if encoder is None:
+        encoder = ConfigEncoder.from_space(
+            space=space,
+            include_constants_when_decoding=True,
+        )
 
-        # Concatenate the full list
-        X_configs_all = self.x_configs + x_configs
+    for trial in trials.values():
+        if trial.report is None:
+            pending_configs.append(trial.config)
+            continue
 
-        # Make a copy of the sum_kernels for this step, to avoid breaking the autodiff
-        # if grad guided mutation is used
-        if preserve_comp_graph:
-            combined_kernel_copy = deepcopy(self.combined_kernel)
-        else:
-            combined_kernel_copy = self.combined_kernel
-
-        K_full = combined_kernel_copy.fit_transform(
-            self.weights,
-            X_configs_all,
-            layer_weights=self.layer_weights,
-            feature_lengthscale=self.theta_vector,
-            rebuild_model=True,
-            save_gram_matrix=False,
-            gp_fit=False,
+        train_configs.append(trial.config)
+
+        objective_to_minimize = trial.report.objective_to_minimize
+        train_losses.append(
+            torch.nan if objective_to_minimize is None else objective_to_minimize
         )
 
-        K_s = K_full[: self.n :, self.n :]
+        cost = trial.report.cost
+        train_costs.append(torch.nan if cost is None else cost)
+
+    x_train = encoder.encode(train_configs, device=device)
+    y_train = torch.tensor(train_losses, dtype=torch.float64, device=device)
+
+    # OPTIM: The issue here is that the error could be a bug, in which case
+    # if the user restarts, we don't want to too heavily penalize that area.
+    # On the flip side, if the configuration is actually what's causing the
+    # crashes, then we just want to ensure that the GP is discouraged from
+    # visiting that area. Setting to the median also ensures that the GP does
+    # not end up with a highly skewed function apprxoimation, for example,
+    # setting tiny lengthscales, to ensure it can model the sharp change
+    # in the performance around the crashed config.
+    fill_value = torch.nanmedian(y_train).item()
+    y_train = torch.nan_to_num(y_train, nan=fill_value)
+
+    cost_train = torch.tensor(train_costs, dtype=torch.float64, device=device)
+    if len(pending_configs) > 0:
+        x_pending = encoder.encode(pending_configs, device=device)
+    else:
+        x_pending = None
+
+    data = GPEncodedData(x=x_train, y=y_train, cost=cost_train, x_pending=x_pending)
+    return data, encoder
+
+
+def fit_and_acquire_from_gp(
+    *,
+    gp: SingleTaskGP,
+    x_train: torch.Tensor,
+    encoder: ConfigEncoder,
+    acquisition: AcquisitionFunction,
+    prior: Prior | None = None,
+    pibo_exp_term: float | None = None,
+    cost_gp: SingleTaskGP | None = None,
+    costs: torch.Tensor | None = None,
+    cost_percentage_used: float | None = None,
+    costs_on_log_scale: bool = True,
+    seed: int | None = None,
+    n_candidates_required: int | None = None,
+    num_restarts: int = 20,
+    n_initial_start_points: int = 256,
+    maximum_allowed_categorical_combinations: int = 30,
+    acq_options: Mapping[str, Any] | None = None,
+) -> torch.Tensor:
+    """Acquire the next configuration to evaluate using a GP.
+
+    Please see the following for:
+
+    * Making a GP to pass in:
+        [`make_default_single_obj_gp`][neps.optimizers.bayesian_optimization.models.gp.make_default_single_obj_gp]
+    * Encoding configurations:
+        [`encode_trails_for_gp`][neps.optimizers.bayesian_optimization.models.gp.encode_trails_for_gp]
+
+    Args:
+        gp: The GP model to use.
+        x_train: The encoded configurations that have already been evaluated
+        encoder: The encoder used for encoding the configurations
+        acquisition: The acquisition function to use.
+
+            A good default is `qLogNoisyExpectedImprovement` which can
+            handle pending configurations gracefully without fantasization.
+
+        prior: The prior to use over configurations. If this is provided, the
+            acquisition function will be further weighted using the piBO acquisition.
+        pibo_exp_term: The exponential term for the piBO acquisition. If `None` is
+            provided, one will be estimated.
+        costs: The costs of evaluating the configurations. If this is provided,
+            then a secondary GP will be used to estimate the cost of a given
+            configuration and factor into the weighting during the acquisiton of a new
+            configuration.
+        cost_percentage_used: The percentage of the budget used so far. This is used to
+            determine the strength of the cost cooling. Should be between 0 and 1.
+            Must be provided if costs is provided.
+        costs_on_log_scale: Whether the costs are on a log scale.
+        encoder: The encoder used for encoding the configurations
+        seed: The seed to use.
+        n_candidates_required: The number of candidates to return. If left
+            as `None`, only the best candidate will be returned. Otherwise
+            a list of candidates will be returned.
+        num_restarts: The number of restarts to use during optimization.
+        n_initial_start_points: The number of initial start points to use during
+            optimization.
+        maximum_allowed_categorical_combinations: The maximum number of categorical
+            combinations to allow. If the number of combinations exceeds this, an error
+            will be raised.
+        acq_options: Additional options to pass to the botorch `optimizer_acqf` function.
+
+    Returns:
+        The encoded next configuration(s) to evaluate. Use the encoder you provided
+        to decode the configuration.
+    """
+    if seed is not None:
+        raise NotImplementedError("Seed is not implemented yet for gps")
 
-        K_ss = K_full[self.n :, self.n :] + self.likelihood * torch.eye(
-            len(x_configs),
-        )
+    fit_gpytorch_mll(ExactMarginalLogLikelihood(likelihood=gp.likelihood, model=gp))
 
-        mu_s = K_s.t() @ self.K_i @ self.y
-        cov_s = K_ss - K_s.t() @ self.K_i @ K_s
-        cov_s = torch.clamp(cov_s, self.likelihood, np.inf)
-        mu_s = unnormalize_y(mu_s, self.y_mean, self.y_std)
-        std_s = torch.sqrt(cov_s)
-        std_s = unnormalize_y(std_s, None, self.y_std, True)
-        cov_s = std_s**2
-        if preserve_comp_graph:
-            del combined_kernel_copy
-        return mu_s, cov_s
-
-    @property
-    def x(self):
-        return self.x_configs
-
-    def _reset_XY(self, train_x: Iterable, train_y: Union[Iterable, torch.Tensor]):
-        self.x_configs = train_x
-        self.n = len(self.x_configs)
-        train_y_tensor = (
-            train_y
-            if isinstance(train_y, torch.Tensor)
-            else torch.tensor(train_y, dtype=torch.get_default_dtype())
-        )
-        self.y_ = train_y_tensor
-        self.y, self.y_mean, self.y_std = normalize_y(train_y_tensor)
-        # The Gram matrix of the training data
-        self.K_i, self.logDetK = None, None
-
-    def dmu_dphi(
-        self,
-        X_s=None,
-        # compute_grad_var=False,
-        average_across_features=True,
-        average_across_occurrences=False,
-    ):
-        r"""
-        Compute the derivative of the GP posterior mean at the specified input location with respect to the
-        *vector embedding* of the graph (e.g., if using WL-subtree, this function computes the gradient wrt
-        each subtree pattern)
-
-        The derivative is given by
-        $
-        \frac{\partial \mu^*}{\partial \phi ^*} = \frac{\partial K(\phi, \phi^*)}{\partial \phi ^ *}K(\phi, \phi)^{-1}
-        \mathbf{y}
-        $
-
-        which derives directly from the GP posterior mean formula, and since the term $K(\phi, \phi)^{-1} and \mathbf{y}
-        are both independent of the testing points (X_s, or \phi^*}, the posterior gradient is simply the matrix
-        produce of the kernel gradient with the inverse Gram and the training label vector.
-
-        Parameters
-        ----------
-        X_s: The locations on which the GP posterior mean derivatives should be evaluated. If left blank, the
-        derivatives will be evaluated at the training points.
-
-        compute_grad_var: bool. If true, also compute the gradient variance.
-
-        The derivative of GP is also a GP, and thus the predictive distribution of the posterior gradient is Gaussian.
-        The posterior mean is given above, and the posterior variance is:
-        $
-        \mathbb{V}[\frac{\partial f^*}{\partial \phi^*}]= \frac{\partial^2k(\phi^*, \phi^*)}{\partial \phi^*^2} -
-        \frac{\partial k(\phi^*, \Phi)}{\partial \phi^*}K(X, X)^{-1}\frac{\partial k{(\Phi, \phi^*)}}{\partial \phi^*}
-        $
-
-        Returns
-        -------
-        list of K torch.Tensor of the shape N x2 D, where N is the length of the X_s list (each element of which is a
-        networkx graph), K is the number of kernel_operators in the combined kernel and D is the dimensionality of the
-        feature vector (this is determined by the specific graph kernel.
-
-        OR
-
-        list of K torch.Tensor of shape D, if averaged_over_samples flag is enabled.
-        """
-        if self.K_i is None or self.logDetK is None:
+    if prior:
+        if pibo_exp_term is None:
             raise ValueError(
-                "Inverse of Gram matrix is not instantiated. Please call the optimize "
-                "function to fit on the training data first!"
-            )
-        if self.n_vector_kernels:
-            if X_s is not None:
-                V_s = self._get_vectorial_features(X_s, self.vectorial_feactures)
-                V_s, _, _ = standardize_x(V_s, self.x_features_min, self.x_features_max)
-            else:
-                V_s = self.x_features
-                X_s = self.x[:]
-        else:
-            V_s = None
-            X_s = X_s if X_s is not None else self.x[:]
-
-        alpha = (self.K_i @ self.y).double().reshape(1, -1)
-        dmu_dphi = []
-        # dmu_dphi_var = [] if compute_grad_var else None
-
-        Ks_handles = []
-        feature_matrix = []
-        for j, x_s in enumerate(X_s):
-            jacob_vecs = []
-            if V_s is None:
-                handles = self.combined_kernel.forward_t(
-                    self.weights,
-                    [x_s],
-                )
-            else:
-                handles = self.combined_kernel.forward_t(self.weights, [x_s], V_s[j])
-            Ks_handles.append(handles)
-            # Each handle is a 2-tuple. first element is the Gram matrix, second element is the leaf variable
-            feature_vectors = []
-            for handle in handles:
-                k_s, y, _ = handle
-                # k_s is output, leaf is input, alpha is the K_i @ y term which is constant.
-                # When compute_grad_var is not required, computational graphs do not need to be saved.
-                jacob_vecs.append(
-                    torch.autograd.grad(
-                        outputs=k_s, inputs=y, grad_outputs=alpha, retain_graph=False
-                    )[0]
-                )
-                feature_vectors.append(y)
-            feature_matrix.append(feature_vectors)
-            jacob_vecs = torch.cat(jacob_vecs)
-            dmu_dphi.append(jacob_vecs)
-
-        feature_matrix = torch.cat([f[0] for f in feature_matrix])
-        if average_across_features:
-            dmu_dphi = torch.cat(dmu_dphi)
-            # compute the weighted average of the gradient across N_t.
-            # feature matrix is of shape N_t x K x D
-            avg_mu, avg_var, incidences = get_grad(
-                dmu_dphi, feature_matrix, average_across_occurrences
+                "If providing a prior, you must provide the `pibo_exp_term`."
             )
-            return avg_mu, avg_var, incidences
-        return (
-            dmu_dphi,
-            None,
-            feature_matrix.sum(dim=0) if average_across_occurrences else feature_matrix,
-        )
 
+        acquisition = pibo_acquisition(
+            acquisition,
+            prior=prior,
+            prior_exponent=pibo_exp_term,
+            x_domain=encoder.domains,
+        )
 
-def get_grad(grad_matrix, feature_matrix, average_occurrences=False):
-    r"""
-    Average across the samples via a Monte Carlo sampling scheme. Also estimates the
-    empirical variance. :param average_occurrences: if True, do a weighted summation
-    based on the frequency distribution of the occurrence to compute a gradient *per
-    each feature*. Otherwise, each different occurrence (\phi_i = k) will get a
-    different gradient estimate.
-    """
-    assert grad_matrix.shape == feature_matrix.shape
-    # Prune out the all-zero columns that pop up sometimes
-    valid_cols = []
-    for col_idx in range(feature_matrix.size(1)):
-        if not torch.all(feature_matrix[:, col_idx] == 0):
-            valid_cols.append(col_idx)
-    feature_matrix = feature_matrix[:, valid_cols]
-    grad_matrix = grad_matrix[:, valid_cols]
-
-    _, D = feature_matrix.shape
-    if average_occurrences:
-        avg_grad = torch.zeros(D)
-        avg_grad_var = torch.zeros(D)
-        for d in range(D):
-            current_feature = feature_matrix[:, d].clone().detach()
-            instances, indices, counts = torch.unique(
-                current_feature, return_inverse=True, return_counts=True
+    if costs is not None:
+        if cost_percentage_used is None:
+            raise ValueError(
+                "If providing costs, you must provide `cost_percentage_used`."
             )
-            weight_vector = torch.tensor([counts[i] for i in indices]).type(torch.float)
-            weight_vector /= weight_vector.sum()
-            mean = torch.sum(weight_vector * grad_matrix[:, d])
-            # Compute the empirical variance of gradients
-            variance = torch.sum(weight_vector * grad_matrix[:, d] ** 2) - mean**2
-            avg_grad[d] = mean
-            avg_grad_var[d] = variance
-        return avg_grad, avg_grad_var, feature_matrix.sum(dim=0)
-    else:
-        # The maximum number possible occurrences -- 7 is an example, if problem occurs, maybe we can increase this
-        # number. But for now, for both NAS-Bench datasets, this should be more than enough!
-        max_occur = 7
-        avg_grad = torch.zeros(D, max_occur)
-        avg_grad_var = torch.zeros(D, max_occur)
-        incidences = torch.zeros(D, max_occur)
-        for d in range(D):
-            current_feature = feature_matrix[:, d].clone().detach()
-            instances, indices, counts = torch.unique(
-                current_feature, return_inverse=True, return_counts=True
+
+        # We simply ignore missing costs when training the cost GP.
+        missing_costs = torch.isnan(costs)
+        if missing_costs.any():
+            raise ValueError(
+                "Must have at least some configurations reported with a cost"
+                " if using costs with a GP."
             )
-            for i, val in enumerate(instances):
-                # Find index of all feature counts that are equal to the current val
-                feature_at_val = grad_matrix[current_feature == val]
-                avg_grad[d, int(val)] = torch.mean(feature_at_val)
-                avg_grad_var[d, int(val)] = torch.var(feature_at_val)
-                incidences[d, int(val)] = counts[i]
-        return avg_grad, avg_grad_var, incidences
-
-
-# Optimize Graph kernel
-def getBack(var_grad_fn, logger):
-    logger.debug(var_grad_fn)
-    for n in var_grad_fn.next_functions:
-        if n[0]:
-            try:
-                tensor = getattr(n[0], "variable")
-                logger.debug(n[0])
-                logger.debug(f"Tensor with grad found: {tensor}")
-                logger.debug(f" - gradient: {tensor.grad}")
-            except AttributeError:
-                getBack(n[0], logger)
-
-
-def _grid_search_wl_kernel(
-    k: WeisfilerLehman,
-    subtree_candidates,
-    train_x: list,
-    train_y: torch.Tensor,
-    lik: float,
-    subtree_prior=None,
-    lengthscales=None,
-    lengthscales_prior=None,
-):
-    """Optimize the *discrete hyperparameters* of Weisfeiler Lehman kernel.
-    k: a Weisfeiler-Lehman kernel instance
-    hyperparameter_candidate: list of candidate hyperparameter to try
-    train_x: the train data
-    train_y: the train label
-    lik: likelihood
-    lengthscale: if using RBF kernel for successive embedding, the list of lengthscale to be grid searched over
-    """
-    # lik = 1e-6
-    assert len(train_x) == len(train_y)
-    best_nlml = torch.tensor(np.inf)
-    best_subtree_depth = None
-    best_lengthscale = None
-    best_K = None
-    if lengthscales is not None and k.se is not None:
-        candidates = [(h_, l_) for h_ in subtree_candidates for l_ in lengthscales]
-    else:
-        candidates = [(h_, None) for h_ in subtree_candidates]
-
-    for i in candidates:
-        if k.se is not None:
-            k.change_se_params({"lengthscale": i[1]})
-        k.change_kernel_params({"h": i[0]})
-        K = k.fit_transform(train_x, rebuild_model=True, save_gram_matrix=True)
-        # self.logger.debug(K)
-        K_i, logDetK = compute_pd_inverse(K, lik)
-        # self.logger.debug(train_y)
-        nlml = -compute_log_marginal_likelihood(K_i, logDetK, train_y)
-        # self.logger.debug(f"{i} {nlml}")
-        if nlml < best_nlml:
-            best_nlml = nlml
-            best_subtree_depth, best_lengthscale = i
-            best_K = torch.clone(K)
-    # self.logger.debug(f"h: {best_subtree_depth} theta: {best_lengthscale}")
-    # self.logger.debug(best_subtree_depth)
-    k.change_kernel_params({"h": best_subtree_depth})
-    if k.se is not None:
-        k.change_se_params({"lengthscale": best_lengthscale})
-    k._gram = best_K
-
-
-def get_theta_vector(vectorial_features):
-    if vectorial_features is None:
-        return None
-    theta_vector = {}
-    for key, dim in vectorial_features.items():
-        t = torch.ones(dim)
-        if t.shape[0] > 1:
-            t.requires_grad_(True)
-        theta_vector[key] = t
-    return theta_vector
-
-
-def normalize_y(y: torch.Tensor):
-    y_mean = torch.mean(y) if isinstance(y, torch.Tensor) else np.mean(y)
-    y_std = torch.std(y) if isinstance(y, torch.Tensor) else np.std(y)
-    if y_std == 0:
-        y_std = 1
-    y = (y - y_mean) / y_std
-    return y, y_mean, y_std
-
-
-def unnormalize_y(y, y_mean, y_std, scale_std=False):
-    """Similar to the undoing of the pre-processing step above, but on the output predictions"""
-    if not scale_std:
-        return y * y_std + y_mean
-    else:
-        return y * y_std
 
+        if missing_costs.any():
+            not_missing_mask = ~missing_costs
+            x_train_cost = costs[not_missing_mask]
+            y_train_cost = x_train[not_missing_mask]
+        else:
+            x_train_cost = x_train
+            y_train_cost = costs
 
-def standardize_x(
-    x: torch.Tensor, x_min: torch.Tensor = None, x_max: torch.Tensor = None
-):
-    """Standardize the vectorial input into a d-dimensional hypercube [0, 1]^d, where d is the number of features.
-    if x_min ond x_max are supplied, x2 will be standardised using these instead. This is used when standardising the
-    validation/test inputs.
-    """
-    if (x_min is not None and x_max is None) or (x_min is None and x_max is not None):
-        raise ValueError(
-            "Either *both* or *neither* of x_min, x_max need to be supplied!"
+        if costs_on_log_scale:
+            transform = ChainedOutcomeTransform(
+                log=Log(),
+                standardize=Standardize(m=1),
+            )
+        else:
+            transform = Standardize(m=1)
+
+        cost_gp = make_default_single_obj_gp(
+            x_train_cost,
+            y_train_cost,
+            encoder=encoder,
+            y_transform=transform,
+        )
+        fit_gpytorch_mll(
+            ExactMarginalLogLikelihood(likelihood=cost_gp.likelihood, model=cost_gp)
+        )
+        acquisition = cost_cooled_acq(
+            acq_fn=acquisition,
+            model=cost_gp,
+            used_max_cost_total_percentage=cost_percentage_used,
         )
-    if x_min is None:
-        x_min = torch.min(x, 0)[0]
-        x_max = torch.max(x, 0)[0]
-    x = (x - x_min) / (x_max - x_min)
-    return x, x_min, x_max
 
+    _n = n_candidates_required if n_candidates_required is not None else 1
 
-def compute_log_marginal_likelihood(
-    K_i: torch.Tensor,
-    logDetK: torch.Tensor,
-    y: torch.Tensor,
-    normalize: bool = True,
-    log_prior_dist=None,
-):
-    """Compute the zero mean Gaussian process log marginal likelihood given the inverse of Gram matrix K(x2,x2), its
-    log determinant, and the training label vector y.
-    Option:
-
-    normalize: normalize the log marginal likelihood by the length of the label vector, as per the gpytorch
-    routine.
-
-    prior: A pytorch distribution object. If specified, the hyperparameter prior will be taken into consideration and
-    we use Type-II MAP instead of Type-II MLE (compute log_posterior instead of log_evidence)
-    """
-    lml = (
-        -0.5 * y.t() @ K_i @ y
-        + 0.5 * logDetK
-        - y.shape[0]
-        / 2.0
-        * torch.log(
-            2
-            * torch.tensor(
-                np.pi,
-            )
-        )
+    candidates, _scores = optimize_acq(
+        acquisition,
+        encoder,
+        n_candidates_required=_n,
+        num_restarts=num_restarts,
+        n_intial_start_points=n_initial_start_points,
+        acq_options=acq_options,
+        maximum_allowed_categorical_combinations=maximum_allowed_categorical_combinations,
     )
-    if log_prior_dist is not None:
-        lml -= log_prior_dist
-    return lml / y.shape[0] if normalize else lml
-
-
-def compute_pd_inverse(K: torch.tensor, jitter: float = 1e-6):
-    """Compute the inverse of a postive-(semi)definite matrix K using Cholesky inversion."""
-    n = K.shape[0]
-    assert (
-        isinstance(jitter, float) or jitter.ndim == 0
-    ), "only homoscedastic noise variance is allowed here!"
-    is_successful = False
-    fail_count = 0
-    max_fail = 3
-    while fail_count < max_fail and not is_successful:
-        try:
-            jitter_diag = jitter * torch.eye(n, device=K.device) * 10**fail_count
-            K_ = K + jitter_diag
-            try:
-                Kc = torch.linalg.cholesky(K_)
-            except AttributeError:  # For torch < 1.8.0
-                Kc = torch.cholesky(K_)
-            is_successful = True
-        except RuntimeError:
-            fail_count += 1
-    if not is_successful:
-        raise RuntimeError(f"Gram matrix not positive definite despite of jitter:\n{K}")
-    logDetK = -2 * torch.sum(torch.log(torch.diag(Kc)))
-    K_i = torch.cholesky_inverse(Kc)
-    return K_i.to(torch.get_default_dtype()), logDetK.to(torch.get_default_dtype())
+    return candidates
diff --git a/neps/optimizers/bayesian_optimization/models/gp_hierarchy.py b/neps/optimizers/bayesian_optimization/models/gp_hierarchy.py
deleted file mode 100644
index a359b9370..000000000
--- a/neps/optimizers/bayesian_optimization/models/gp_hierarchy.py
+++ /dev/null
@@ -1,1115 +0,0 @@
-import itertools
-import logging
-import warnings
-from copy import deepcopy
-from typing import Iterable, Union
-
-import numpy as np
-import torch
-
-from ..kernels.combine_kernels_hierarchy import ProductKernel, SumKernel
-
-# GP model as a weighted average between the vanilla vectorial GP and the graph GP
-from ..kernels.graph_kernel import GraphKernels
-from ..kernels.utils import extract_configs_hierarchy
-from ..kernels.vectorial_kernels import Stationary
-from ..kernels.weisfilerlehman import WeisfilerLehman
-
-import logging
-
-logger = logging.getLogger(__name__)
-
-
-# Code for psd_safe_cholesky from gypytorch
-class _value_context:
-    _global_value = None
-
-    @classmethod
-    def value(cls):
-        return cls._global_value
-
-    @classmethod
-    def _set_value(cls, value):
-        cls._global_value = value
-
-    def __init__(self, value):
-        self._orig_value = self.__class__.value()
-        self._instance_value = value
-
-    def __enter__(
-        self,
-    ):
-        self.__class__._set_value(self._instance_value)
-
-    def __exit__(self, *args):
-        self.__class__._set_value(self._orig_value)
-        return False
-
-
-class _dtype_value_context:
-    _global_float_value = None
-    _global_double_value = None
-    _global_half_value = None
-
-    @classmethod
-    def value(cls, dtype):
-        if torch.is_tensor(dtype):
-            dtype = dtype.dtype
-        if dtype == torch.float:
-            return cls._global_float_value
-        elif dtype == torch.double:
-            return cls._global_double_value
-        elif dtype == torch.half:
-            return cls._global_half_value
-        else:
-            raise RuntimeError(f"Unsupported dtype for {cls.__name__}.")
-
-    @classmethod
-    def _set_value(cls, float_value, double_value, half_value):
-        if float_value is not None:
-            cls._global_float_value = float_value
-        if double_value is not None:
-            cls._global_double_value = double_value
-        if half_value is not None:
-            cls._global_half_value = half_value
-
-    def __init__(self, float=None, double=None, half=None):
-        self._orig_float_value = self.__class__.value()
-        self._instance_float_value = float
-        self._orig_double_value = self.__class__.value()
-        self._instance_double_value = double
-        self._orig_half_value = self.__class__.value()
-        self._instance_half_value = half
-
-    def __enter__(
-        self,
-    ):
-        self.__class__._set_value(
-            self._instance_float_value,
-            self._instance_double_value,
-            self._instance_half_value,
-        )
-
-    def __exit__(self, *args):
-        self.__class__._set_value(
-            self._orig_float_value, self._orig_double_value, self._orig_half_value
-        )
-        return False
-
-
-class cholesky_jitter(_dtype_value_context):
-    """
-    The jitter value used by `psd_safe_cholesky` when using cholesky solves.
-    - Default for `float`: 1e-6
-    - Default for `double`: 1e-8
-    """
-
-    _global_float_value = 1e-6  # type: ignore[assignment]
-    _global_double_value = 1e-8  # type: ignore[assignment]
-
-    @classmethod
-    def value(cls, dtype=None):
-        if dtype is None:
-            # Deprecated in 1.4: remove in 1.5
-            warnings.warn(
-                "cholesky_jitter is now a _dtype_value_context and should be called with a dtype argument",
-                DeprecationWarning,
-            )
-            return cls._global_float_value
-        return super().value(dtype=dtype)
-
-
-class _feature_flag:
-    r"""Base class for feature flag settings with global scope.
-    The default is set via the `_default` class attribute.
-    """
-
-    _default = False
-    _state = None
-
-    @classmethod
-    def is_default(cls):
-        return cls._state is None
-
-    @classmethod
-    def on(cls):
-        if cls.is_default():
-            return cls._default
-        return cls._state
-
-    @classmethod
-    def off(cls):
-        return not cls.on()
-
-    @classmethod
-    def _set_state(cls, state):
-        cls._state = state
-
-    def __init__(self, state=True):
-        self.prev = self.__class__._state
-        self.state = state
-
-    def __enter__(self):
-        self.__class__._set_state(self.state)
-
-    def __exit__(self, *args):
-        self.__class__._set_state(self.prev)
-        return False
-
-
-class verbose_linalg(_feature_flag):
-    """
-    Print out information whenever running an expensive linear algebra routine (e.g. Cholesky, CG, Lanczos, CIQ, etc.)
-    (Default: False)
-    """
-
-    _default = False
-
-    # Create a global logger
-    logger = logging.getLogger("LinAlg (Verbose)")
-    logger.setLevel(logging.DEBUG)
-
-    # Output logging results to the stdout stream
-    ch = logging.StreamHandler()
-    ch.setLevel(logging.DEBUG)
-    formatter = logging.Formatter("%(name)s - %(levelname)s - %(message)s")
-    ch.setFormatter(formatter)
-    logger.addHandler(ch)
-
-
-class cholesky_max_tries(_value_context):
-    """
-    The max_tries value used by `psd_safe_cholesky` when using cholesky solves.
-    (Default: 3)
-    """
-
-    _global_value = 3  # type: ignore[assignment]
-
-
-class NumericalWarning(RuntimeWarning):
-    """
-    Warning thrown when convergence criteria are not met, or when comptuations require extra stability.
-    """
-
-    pass
-
-
-class NanError(RuntimeError):
-    pass
-
-
-class NotPSDError(RuntimeError):
-    pass
-
-
-def _psd_safe_cholesky(A, out=None, jitter=None, max_tries=None):
-    # Maybe log
-    if verbose_linalg.on():
-        verbose_linalg.logger.debug(f"Running Cholesky on a matrix of size {A.shape}.")
-
-    if out is not None:
-        out = (out, torch.empty(A.shape[:-2], dtype=torch.int32, device=out.device))
-
-    L, info = torch.linalg.cholesky_ex(A, out=out)
-    if not torch.any(info):
-        return L
-
-    isnan = torch.isnan(A)
-    if isnan.any():
-        raise NanError(
-            f"cholesky_cpu: {isnan.sum().item()} of {A.numel()} elements of the {A.shape} tensor are NaN."
-        )
-
-    if jitter is None:
-        jitter = cholesky_jitter.value(A.dtype)
-    if max_tries is None:
-        max_tries = cholesky_max_tries.value()
-    Aprime = A.clone()
-    jitter_prev = 0
-    for i in range(max_tries):
-        jitter_new = jitter * (10**i)
-        # add jitter only where needed
-        diag_add = (
-            ((info > 0) * (jitter_new - jitter_prev))
-            .unsqueeze(-1)
-            .expand(*Aprime.shape[:-1])
-        )
-        Aprime.diagonal(dim1=-1, dim2=-2).add_(diag_add)
-        jitter_prev = jitter_new
-        warnings.warn(
-            f"A not p.d., added jitter of {jitter_new:.1e} to the diagonal",
-            NumericalWarning,
-        )
-        L, info = torch.linalg.cholesky_ex(Aprime, out=out)
-        if not torch.any(info):
-            return L
-    raise NotPSDError(
-        f"Matrix not positive definite after repeatedly adding jitter up to {jitter_new:.1e}."
-    )
-
-
-def psd_safe_cholesky(A, upper=False, out=None, jitter=None, max_tries=None):
-    """Compute the Cholesky decomposition of A. If A is only p.s.d, add a small jitter to the diagonal.
-    Args:
-        A (Tensor):
-            The tensor to compute the Cholesky decomposition of
-        upper (bool, optional):
-            See torch.cholesky
-        out (Tensor, optional):
-            See torch.cholesky
-        jitter (float, optional):
-            The jitter to add to the diagonal of A in case A is only p.s.d. If omitted,
-            uses settings.cholesky_jitter.value()
-        max_tries (int, optional):
-            Number of attempts (with successively increasing jitter) to make before raising an error.
-    """
-    L = _psd_safe_cholesky(A, out=out, jitter=jitter, max_tries=max_tries)
-    if upper:
-        if out is not None:
-            out = out.transpose_(-1, -2)
-        else:
-            L = L.transpose(-1, -2)
-    return L
-
-
-# Code for psd_safe_cholesky from gypytorch
-
-
-class ComprehensiveGPHierarchy:
-    def __init__(
-        self,
-        graph_kernels: Iterable,
-        hp_kernels: Iterable,
-        likelihood: float = 1e-3,
-        weights=None,
-        learn_all_h=False,
-        graph_feature_ard=True,
-        d_graph_features: int = 0,
-        normalize_combined_kernel=True,
-        hierarchy_consider: list = None,  # or a list of integers e.g. [0,1,2,3]
-        vectorial_features: list = None,
-        combined_kernel: str = "sum",
-        verbose: bool = False,
-        surrogate_model_fit_args: dict = None,
-        gpytorch_kinv: bool = False,
-    ):
-        self.likelihood = likelihood
-        self.surrogate_model_fit_args = surrogate_model_fit_args or {}
-        self.learn_all_h = learn_all_h
-        self.hierarchy_consider = hierarchy_consider
-        self.normalize_combined_kernel = normalize_combined_kernel
-        if self.hierarchy_consider is None:
-            self.learn_all_h = False
-        self.domain_kernels: list = []
-        if bool(graph_kernels):
-            self.domain_kernels += list(graph_kernels)
-        if bool(hp_kernels):
-            self.domain_kernels += list(hp_kernels)
-
-        self.hp_kernels = hp_kernels  # impose on scalar graph features
-        self.n_kernels: int = len(self.domain_kernels)
-        self.n_graph_kernels: int = len(
-            [i for i in self.domain_kernels if isinstance(i, GraphKernels)]
-        )
-        self.n_vector_kernels: int = self.n_kernels - self.n_graph_kernels
-        self.graph_feature_ard = graph_feature_ard
-        self.vectorial_features = vectorial_features
-        self.d_graph_features = d_graph_features
-
-        if weights is not None:
-            self.fixed_weights = True
-            if weights is not None:
-                assert len(weights) == self.n_kernels, (
-                    "the weights vector, if supplied, needs to have the same length as "
-                    "the number of kernel_operators!"
-                )
-            self.init_weights = (
-                weights
-                if isinstance(weights, torch.Tensor)
-                else torch.tensor(weights).flatten()
-            )
-        else:
-            self.fixed_weights = False
-            # Initialise the domain kernel weights to uniform
-            self.init_weights = torch.tensor(
-                [1.0 / self.n_kernels] * self.n_kernels,
-            )
-
-        self.weights = self.init_weights.clone()
-
-        if combined_kernel == "product":
-            self.combined_kernel = ProductKernel(
-                *self.domain_kernels,
-                weights=self.weights,
-                hierarchy_consider=self.hierarchy_consider,
-                d_graph_features=self.d_graph_features,
-            )
-        elif combined_kernel == "sum":
-            self.combined_kernel = SumKernel(
-                *self.domain_kernels,
-                weights=self.weights,
-                hierarchy_consider=self.hierarchy_consider,
-                d_graph_features=self.d_graph_features,
-            )
-        else:
-            raise NotImplementedError(
-                f'Combining kernel {combined_kernel} is not yet implemented! Only "sum" '
-                f'or "product" are currently supported. '
-            )
-        # Verbose mode
-        self.verbose = verbose
-        # Cache the Gram matrix inverse and its log-determinant
-        self.K, self.K_i, self.logDetK = [None] * 3
-        self.layer_weights = None
-        self.nlml = None
-
-        self.x_configs: list = None  # type: ignore[assignment]
-        self.y: torch.Tensor = None
-        self.y_: torch.Tensor = None
-        self.y_mean: torch.Tensor = None
-        self.y_std: torch.Tensor = None
-        self.n: int = None  # type: ignore[assignment]
-
-        self.gpytorch_kinv = gpytorch_kinv
-
-    def _optimize_graph_kernels(self, h_: int, lengthscale_):
-        weights = self.init_weights.clone()
-        if self.hierarchy_consider is None:
-            graphs, _ = extract_configs_hierarchy(
-                self.x_configs,
-                d_graph_features=self.d_graph_features,
-                hierarchy_consider=self.hierarchy_consider,
-            )
-            for i, k in enumerate(self.combined_kernel.kernels):
-                if not isinstance(k, GraphKernels):
-                    continue
-                elif isinstance(k, WeisfilerLehman):
-                    _grid_search_wl_kernel(
-                        k,
-                        h_,
-                        [x[i] for x in graphs]
-                        if isinstance(graphs[0], list)
-                        else [c for c in graphs],
-                        self.y,
-                        self.likelihood,
-                        lengthscales=lengthscale_,
-                        gpytorch_kinv=self.gpytorch_kinv,
-                    )
-                else:
-                    logging.warning(
-                        "(Graph) kernel optimisation for "
-                        + type(k).__name__
-                        + " not implemented yet."
-                    )
-        else:
-            if self.learn_all_h:
-                best_nlml = torch.tensor(np.inf)
-                best_subtree_depth_combo = None
-                best_K = None
-                train_y = self.y
-                h_combo_candidates = generate_h_combo_candidates(self.hierarchy_consider)
-
-                for h_combo in h_combo_candidates:
-                    for i, k in enumerate(self.combined_kernel.kernels):
-                        if isinstance(k, WeisfilerLehman):
-                            k.change_kernel_params({"h": h_combo[i]})
-                    K = self.combined_kernel.fit_transform(
-                        weights,
-                        self.x_configs,
-                        normalize=self.normalize_combined_kernel,
-                        layer_weights=None,
-                        rebuild_model=True,
-                        save_gram_matrix=True,
-                    )
-                    K_i, logDetK = compute_pd_inverse(
-                        K, self.likelihood, self.gpytorch_kinv
-                    )
-                    nlml = -compute_log_marginal_likelihood(K_i, logDetK, train_y)
-                    if nlml < best_nlml:
-                        best_nlml = nlml
-                        best_subtree_depth_combo = h_combo
-                        best_K = torch.clone(K)
-                for i, k in enumerate(self.combined_kernel.kernels):
-                    if isinstance(k, WeisfilerLehman):
-                        k.change_kernel_params({"h": best_subtree_depth_combo[i]})  # type: ignore[index]
-                self.combined_kernel._gram = best_K
-            else:
-                best_nlml = torch.tensor(np.inf)
-                best_subtree_depth = None
-                best_K = None
-                train_y = self.y
-
-                for h_i in list(h_):  # type: ignore[call-overload]
-                    # only optimize h in wl kernel
-                    if isinstance(self.combined_kernel.kernels[0], WeisfilerLehman):
-                        self.combined_kernel.kernels[0].change_kernel_params({"h": h_i})
-                        K = self.combined_kernel.fit_transform(
-                            weights,
-                            self.x_configs,
-                            normalize=self.normalize_combined_kernel,
-                            layer_weights=None,
-                            rebuild_model=True,
-                            save_gram_matrix=True,
-                        )
-                        K_i, logDetK = compute_pd_inverse(
-                            K, self.likelihood, self.gpytorch_kinv
-                        )
-                        nlml = -compute_log_marginal_likelihood(K_i, logDetK, train_y)
-                        if nlml < best_nlml:
-                            best_nlml = nlml
-                            best_subtree_depth = h_i
-                            best_K = torch.clone(K)
-                if isinstance(self.combined_kernel.kernels[0], WeisfilerLehman):
-                    self.combined_kernel.kernels[0].change_kernel_params(
-                        {"h": best_subtree_depth}
-                    )
-                    self.combined_kernel._gram = best_K
-
-    def fit(self, train_x: Iterable, train_y: Union[Iterable, torch.Tensor]):
-        self._fit(train_x, train_y, **self.surrogate_model_fit_args)
-
-    def _fit(
-        self,
-        train_x: Iterable,
-        train_y: Union[Iterable, torch.Tensor],
-        iters: int = 20,
-        optimizer: str = "adam",
-        wl_subtree_candidates: tuple = tuple(range(5)),
-        wl_lengthscales: tuple = tuple(
-            np.e**i
-            for i in range(-2, 3)  # type: ignore[name-defined]
-        ),
-        optimize_lik: bool = True,
-        max_lik: float = 0.5,
-        optimize_wl_layer_weights: bool = False,
-        optimizer_kwargs: dict = None,
-    ):
-        # Called by self._fit
-        self._reset_XY(train_x, train_y)
-
-        # Get the node weights, if needed
-        if optimizer_kwargs is None:
-            optimizer_kwargs = {"lr": 0.1}
-        if len(wl_subtree_candidates) > 0:
-            self._optimize_graph_kernels(
-                wl_subtree_candidates,  # type: ignore[arg-type]
-                wl_lengthscales,
-            )
-
-        weights = self.init_weights.clone()
-
-        if (not self.fixed_weights) and len(self.domain_kernels) > 1:
-            weights.requires_grad_(True)
-
-        # set the prior values for the lengthscales of the two global features of the final architecture graph
-        if self.graph_feature_ard:
-            theta_vector = torch.log(torch.tensor([0.6, 0.6]))
-        else:
-            theta_vector = torch.log(torch.tensor([0.6]))
-
-        # if use continuous graph properties and we set to use stationary kernels
-        if self.d_graph_features > 0 and len(self.hp_kernels) > 0:  # type: ignore[arg-type]
-            # TODO modify the code on theta_vector betlow to be compatibale with HPO
-            # theta in this case are the lengthscales for the two global property of
-            # the final architecture graph
-            # theta_vector = get_theta_vector(vectorial_features=self.vectorial_features)
-            theta_vector.requires_grad_(True)
-
-        # Whether to include the likelihood (jitter or noise variance) as a hyperparameter
-        likelihood = torch.tensor(
-            self.likelihood,
-        )
-        if optimize_lik:
-            likelihood.requires_grad_(True)
-
-        layer_weights = None
-        if optimize_wl_layer_weights:
-            for k in self.domain_kernels:
-                if isinstance(k, WeisfilerLehman):
-                    layer_weights = torch.ones(k.h + 1).requires_grad_(True)
-                    if layer_weights.shape[0] <= 1:
-                        layer_weights = None
-                    else:
-                        break
-
-        # Linking the optimizer variables to the sum kernel
-        optim_vars = []
-        # if theta_vector is not None: # TODO used for HPO
-        #     for a in theta_vector.values():
-        #         if a is not None and a.requires_grad:
-        #             optim_vars.append(a)
-        # if we use graph features, we will optimize the corresponding stationary kernel lengthscales
-        if self.d_graph_features > 0 and theta_vector.requires_grad:
-            optim_vars.append(theta_vector)
-
-        for a in [weights, likelihood, layer_weights]:
-            if a is not None and a.is_leaf and a.requires_grad:
-                optim_vars.append(a)
-
-        nlml = None
-        if len(optim_vars) == 0:  # Skip optimisation
-            K = self.combined_kernel.fit_transform(
-                weights,
-                self.x_configs,
-                normalize=self.normalize_combined_kernel,
-                feature_lengthscale=torch.exp(theta_vector),
-                layer_weights=layer_weights,
-                rebuild_model=True,
-            )
-            K_i, logDetK = compute_pd_inverse(K, likelihood, self.gpytorch_kinv)
-        else:
-            # Select the optimizer
-            assert optimizer.lower() in ["adam", "sgd"]
-            if optimizer.lower() == "adam":
-                optim = torch.optim.Adam(optim_vars, **optimizer_kwargs)
-            else:
-                optim = torch.optim.SGD(optim_vars, **optimizer_kwargs)
-
-            K = None
-            optim_vars_list = []
-            nlml_list = []
-            for i in range(iters):
-                optim.zero_grad()
-                K = self.combined_kernel.fit_transform(
-                    weights,
-                    self.x_configs,
-                    normalize=self.normalize_combined_kernel,
-                    feature_lengthscale=torch.exp(theta_vector),
-                    layer_weights=layer_weights,
-                    rebuild_model=True,
-                    save_gram_matrix=True,
-                )
-                K_i, logDetK = compute_pd_inverse(K, likelihood, self.gpytorch_kinv)
-                nlml = -compute_log_marginal_likelihood(K_i, logDetK, self.y)
-                nlml.backward(create_graph=True)
-                if self.verbose and i % 10 == 0:
-                    logger.info(
-                        "Iteration:",
-                        i,
-                        "/",
-                        iters,
-                        "Negative log-marginal likelihood:",
-                        nlml.item(),
-                        theta_vector,
-                        weights,
-                        likelihood,
-                    )
-                optim.step()
-
-                with torch.no_grad():
-                    likelihood.clamp_(
-                        1e-5, max_lik
-                    ) if likelihood is not None and likelihood.is_leaf else None
-
-                optim_vars_list.append(
-                    [
-                        theta_vector.clone().detach(),
-                        weights.clone().detach(),
-                        likelihood.clone().detach(),
-                    ]
-                )
-                nlml_list.append(nlml.item())
-
-                optim.zero_grad(set_to_none=True)
-
-            theta_vector, weights, likelihood = optim_vars_list[np.argmin(nlml_list)]
-            K = self.combined_kernel.fit_transform(
-                weights,
-                self.x_configs,
-                normalize=self.normalize_combined_kernel,
-                feature_lengthscale=torch.exp(theta_vector),
-                layer_weights=layer_weights,
-                rebuild_model=True,
-                save_gram_matrix=True,
-            )
-            K_i, logDetK = compute_pd_inverse(K, likelihood, self.gpytorch_kinv)
-
-        # Apply the optimal hyperparameters
-        # transform the weights in the combine_kernel function
-        self.weights = weights
-        self.K_i = K_i.clone()
-        self.K = K.clone()
-        self.logDetK = logDetK.clone()
-        self.likelihood = likelihood.item()
-        self.theta_vector = theta_vector
-        self.layer_weights = layer_weights
-        self.nlml = nlml.detach().cpu() if nlml is not None else None
-
-        for k in self.combined_kernel.kernels:
-            if isinstance(k, Stationary):
-                k.update_hyperparameters(lengthscale=torch.exp(theta_vector))
-
-        self.combined_kernel.weights = weights.clone()
-
-    def predict(self, x_configs, preserve_comp_graph: bool = False):
-        """Kriging predictions"""
-
-        if not isinstance(x_configs, list):
-            # Convert a single input X_s to a singleton list
-            x_configs = [x_configs]
-
-        if self.K_i is None or self.logDetK is None:
-            raise ValueError(
-                "Inverse of Gram matrix is not instantiated. Please call the optimize "
-                "function to fit on the training data first!"
-            )
-
-        # Concatenate the full list
-        X_configs_all = self.x_configs + x_configs
-
-        # Make a copy of the sum_kernels for this step, to avoid breaking the autodiff
-        # if grad guided mutation is used
-        if preserve_comp_graph:
-            combined_kernel_copy = deepcopy(self.combined_kernel)
-        else:
-            combined_kernel_copy = self.combined_kernel
-
-        K_full = combined_kernel_copy.fit_transform(
-            self.weights,
-            X_configs_all,
-            layer_weights=self.layer_weights,
-            normalize=self.normalize_combined_kernel,
-            feature_lengthscale=torch.exp(self.theta_vector),
-            rebuild_model=True,
-            save_gram_matrix=False,
-            gp_fit=False,
-        )
-
-        K_s = K_full[: self.n :, self.n :]
-
-        K_ss = K_full[self.n :, self.n :] + self.likelihood * torch.eye(
-            len(x_configs),
-        )
-
-        mu_s = K_s.t() @ self.K_i @ self.y
-        cov_s = K_ss - K_s.t() @ self.K_i @ K_s
-        # TODO not taking the diag?
-        cov_s = torch.clamp(cov_s, self.likelihood, np.inf)
-        mu_s = unnormalize_y(mu_s, self.y_mean, self.y_std)
-        std_s = torch.sqrt(cov_s)
-        std_s = unnormalize_y(std_s, None, self.y_std, True)
-        cov_s = std_s**2
-        if preserve_comp_graph:
-            del combined_kernel_copy
-        return mu_s, cov_s
-
-    def predict_single_hierarchy(
-        self, x_configs, hierarchy_id=0, preserve_comp_graph: bool = False
-    ):
-        """Kriging predictions"""
-
-        if not isinstance(x_configs, list):
-            # Convert a single input X_s to a singleton list
-            x_configs = [x_configs]
-
-        if self.K_i is None or self.logDetK is None:
-            raise ValueError(
-                "Inverse of Gram matrix is not instantiated. Please call the optimize function to "
-                "fit on the training data first!"
-            )
-
-        # Concatenate the full list
-        X_configs_all = self.x_configs + x_configs
-
-        # Make a copy of the sum_kernels for this step, to avoid breaking the autodiff if grad guided mutation is used
-        if preserve_comp_graph:
-            combined_kernel_copy = deepcopy(self.combined_kernel)
-        else:
-            combined_kernel_copy = self.combined_kernel
-
-        K_sub_full = combined_kernel_copy.fit_transform_single_hierarchy(
-            self.weights,
-            X_configs_all,
-            normalize=self.normalize_combined_kernel,
-            hierarchy_id=hierarchy_id,
-            feature_lengthscale=torch.exp(self.theta_vector),
-            layer_weights=self.layer_weights,
-            rebuild_model=True,
-            save_gram_matrix=False,
-            gp_fit=False,
-        )
-
-        K_s = K_sub_full[: self.n :, self.n :]
-        K_ss = K_sub_full[self.n :, self.n :]
-        mu_s = K_s.t() @ self.K_i @ self.y
-        cov_s_full = K_ss - K_s.t() @ self.K_i @ K_s
-        cov_s = torch.clamp(cov_s_full, self.likelihood, np.inf)
-        mu_s = unnormalize_y(mu_s, self.y_mean, self.y_std)
-        std_s = torch.sqrt(cov_s)
-        std_s = unnormalize_y(std_s, None, self.y_std, True)
-        cov_s = std_s**2
-        if preserve_comp_graph:
-            del combined_kernel_copy
-        return mu_s, cov_s
-
-    @property
-    def x(self):
-        return self.x_configs
-
-    def _reset_XY(self, train_x: Iterable, train_y: Union[Iterable, torch.Tensor]):
-        self.x_configs = train_x  # type: ignore[assignment]
-        self.n = len(self.x_configs)
-        train_y_tensor = (
-            train_y
-            if isinstance(train_y, torch.Tensor)
-            else torch.tensor(train_y, dtype=torch.get_default_dtype())
-        )
-        self.y_ = train_y_tensor
-        self.y, self.y_mean, self.y_std = normalize_y(train_y_tensor)
-        # The Gram matrix of the training data
-        self.K_i, self.logDetK = None, None
-
-    def dmu_dphi(
-        self,
-        X_s=None,
-        # compute_grad_var=False,
-        average_across_features=True,
-        average_across_occurrences=False,
-    ):
-        r"""
-        Compute the derivative of the GP posterior mean at the specified input location with respect to the
-        *vector embedding* of the graph (e.g., if using WL-subtree, this function computes the gradient wrt
-        each subtree pattern)
-
-        The derivative is given by
-        $
-        \frac{\partial \mu^*}{\partial \phi ^*} = \frac{\partial K(\phi, \phi^*)}{\partial \phi ^ *}K(\phi, \phi)^{-1}
-        \mathbf{y}
-        $
-
-        which derives directly from the GP posterior mean formula, and since the term $K(\phi, \phi)^{-1} and \mathbf{y}
-        are both independent of the testing points (X_s, or \phi^*}, the posterior gradient is simply the matrix
-        produce of the kernel gradient with the inverse Gram and the training label vector.
-
-        Parameters
-        ----------
-        X_s: The locations on which the GP posterior mean derivatives should be evaluated. If left blank, the
-        derivatives will be evaluated at the training points.
-
-        compute_grad_var: bool. If true, also compute the gradient variance.
-
-        The derivative of GP is also a GP, and thus the predictive distribution of the posterior gradient is Gaussian.
-        The posterior mean is given above, and the posterior variance is:
-        $
-        \mathbb{V}[\frac{\partial f^*}{\partial \phi^*}]= \frac{\partial^2k(\phi^*, \phi^*)}{\partial \phi^*^2} -
-        \frac{\partial k(\phi^*, \Phi)}{\partial \phi^*}K(X, X)^{-1}\frac{\partial k{(\Phi, \phi^*)}}{\partial \phi^*}
-        $
-
-        Returns
-        -------
-        list of K torch.Tensor of the shape N x2 D, where N is the length of the X_s list (each element of which is a
-        networkx graph), K is the number of kernel_operators in the combined kernel and D is the dimensionality of the
-        feature vector (this is determined by the specific graph kernel.
-
-        OR
-
-        list of K torch.Tensor of shape D, if averaged_over_samples flag is enabled.
-        """
-        if self.K_i is None or self.logDetK is None:
-            raise ValueError(
-                "Inverse of Gram matrix is not instantiated. Please call the optimize "
-                "function to fit on the training data first!"
-            )
-        if self.n_vector_kernels:
-            if X_s is not None:
-                V_s = self._get_vectorial_features(X_s, self.vectorial_feactures)
-                V_s, _, _ = standardize_x(V_s, self.x_features_min, self.x_features_max)
-            else:
-                V_s = self.x_features
-                X_s = self.x[:]
-        else:
-            V_s = None
-            X_s = X_s if X_s is not None else self.x[:]
-
-        alpha = (self.K_i @ self.y).double().reshape(1, -1)
-        dmu_dphi = []
-        # dmu_dphi_var = [] if compute_grad_var else None
-
-        Ks_handles = []
-        feature_matrix = []
-        for j, x_s in enumerate(X_s):
-            jacob_vecs = []
-            if V_s is None:
-                handles = self.combined_kernel.forward_t(
-                    self.weights,
-                    [x_s],
-                )
-            else:
-                handles = self.combined_kernel.forward_t(self.weights, [x_s], V_s[j])
-            Ks_handles.append(handles)
-            # Each handle is a 2-tuple. first element is the Gram matrix, second element is the leaf variable
-            feature_vectors = []
-            for handle in handles:
-                k_s, y, _ = handle
-                # k_s is output, leaf is input, alpha is the K_i @ y term which is constant.
-                # When compute_grad_var is not required, computational graphs do not need to be saved.
-                jacob_vecs.append(
-                    torch.autograd.grad(
-                        outputs=k_s, inputs=y, grad_outputs=alpha, retain_graph=False
-                    )[0]
-                )
-                feature_vectors.append(y)
-            feature_matrix.append(feature_vectors)
-            jacob_vecs = torch.cat(jacob_vecs)
-            dmu_dphi.append(jacob_vecs)
-
-        feature_matrix = torch.cat([f[0] for f in feature_matrix])
-        if average_across_features:
-            dmu_dphi = torch.cat(dmu_dphi)
-            # compute the weighted average of the gradient across N_t.
-            # feature matrix is of shape N_t x K x D
-            avg_mu, avg_var, incidences = get_grad(
-                dmu_dphi, feature_matrix, average_across_occurrences
-            )
-            return avg_mu, avg_var, incidences
-        return (
-            dmu_dphi,
-            None,
-            feature_matrix.sum(dim=0) if average_across_occurrences else feature_matrix,
-        )
-
-
-def get_grad(grad_matrix, feature_matrix, average_occurrences=False):
-    r"""
-    Average across the samples via a Monte Carlo sampling scheme. Also estimates the
-    empirical variance. :param average_occurrences: if True, do a weighted summation
-    based on the frequency distribution of the occurrence to compute a gradient *per
-    each feature*. Otherwise, each different occurrence (\phi_i = k) will get a
-    different gradient estimate.
-    """
-    assert grad_matrix.shape == feature_matrix.shape
-    # Prune out the all-zero columns that pop up sometimes
-    valid_cols = []
-    for col_idx in range(feature_matrix.size(1)):
-        if not torch.all(feature_matrix[:, col_idx] == 0):
-            valid_cols.append(col_idx)
-    feature_matrix = feature_matrix[:, valid_cols]
-    grad_matrix = grad_matrix[:, valid_cols]
-
-    _, D = feature_matrix.shape
-    if average_occurrences:
-        avg_grad = torch.zeros(D)
-        avg_grad_var = torch.zeros(D)
-        for d in range(D):
-            current_feature = feature_matrix[:, d].clone().detach()
-            instances, indices, counts = torch.unique(
-                current_feature, return_inverse=True, return_counts=True
-            )
-            weight_vector = torch.tensor([counts[i] for i in indices]).type(torch.float)
-            weight_vector /= weight_vector.sum()
-            mean = torch.sum(weight_vector * grad_matrix[:, d])
-            # Compute the empirical variance of gradients
-            variance = torch.sum(weight_vector * grad_matrix[:, d] ** 2) - mean**2
-            avg_grad[d] = mean
-            avg_grad_var[d] = variance
-        return avg_grad, avg_grad_var, feature_matrix.sum(dim=0)
-    else:
-        # The maximum number possible occurrences -- 7 is an example, if problem occurs, maybe we can increase this
-        # number. But for now, for both NAS-Bench datasets, this should be more than enough!
-        max_occur = 7
-        avg_grad = torch.zeros(D, max_occur)
-        avg_grad_var = torch.zeros(D, max_occur)
-        incidences = torch.zeros(D, max_occur)
-        for d in range(D):
-            current_feature = feature_matrix[:, d].clone().detach()
-            instances, indices, counts = torch.unique(
-                current_feature, return_inverse=True, return_counts=True
-            )
-            for i, val in enumerate(instances):
-                # Find index of all feature counts that are equal to the current val
-                feature_at_val = grad_matrix[current_feature == val]
-                avg_grad[d, int(val)] = torch.mean(feature_at_val)
-                avg_grad_var[d, int(val)] = torch.var(feature_at_val)
-                incidences[d, int(val)] = counts[i]
-        return avg_grad, avg_grad_var, incidences
-
-
-# Optimize Graph kernel
-def getBack(var_grad_fn, logger):
-    logger.debug(var_grad_fn)
-    for n in var_grad_fn.next_functions:
-        if n[0]:
-            try:
-                tensor = getattr(n[0], "variable")
-                logger.debug(n[0])
-                logger.debug(f"Tensor with grad found: {tensor}")
-                logger.debug(f" - gradient: {tensor.grad}")
-            except AttributeError:
-                getBack(n[0], logger)
-
-
-def _grid_search_wl_kernel(
-    k: WeisfilerLehman,
-    subtree_candidates,
-    train_x: list,
-    train_y: torch.Tensor,
-    lik: float,
-    subtree_prior=None,
-    lengthscales=None,
-    lengthscales_prior=None,
-    gpytorch_kinv: bool = False,
-):
-    """Optimize the *discrete hyperparameters* of Weisfeiler Lehman kernel.
-    k: a Weisfeiler-Lehman kernel instance
-    hyperparameter_candidate: list of candidate hyperparameter to try
-    train_x: the train data
-    train_y: the train label
-    lik: likelihood
-    lengthscale: if using RBF kernel for successive embedding, the list of lengthscale to be grid searched over
-    """
-    # lik = 1e-6
-    assert len(train_x) == len(train_y)
-    best_nlml = torch.tensor(np.inf)
-    best_subtree_depth = None
-    best_lengthscale = None
-    best_K = None
-    if lengthscales is not None and k.se is not None:
-        candidates = [(h_, l_) for h_ in subtree_candidates for l_ in lengthscales]
-    else:
-        candidates = [(h_, None) for h_ in subtree_candidates]
-
-    for i in candidates:
-        if k.se is not None:
-            k.change_se_params({"lengthscale": i[1]})
-        k.change_kernel_params({"h": i[0]})
-        K = k.fit_transform(train_x, rebuild_model=True, save_gram_matrix=True)
-        K_i, logDetK = compute_pd_inverse(K, lik, gpytorch_kinv)
-        nlml = -compute_log_marginal_likelihood(K_i, logDetK, train_y)
-        if nlml < best_nlml:
-            best_nlml = nlml
-            best_subtree_depth, best_lengthscale = i
-            best_K = torch.clone(K)
-    k.change_kernel_params({"h": best_subtree_depth})
-    if k.se is not None:
-        k.change_se_params({"lengthscale": best_lengthscale})
-    k._gram = best_K
-
-
-def get_theta_vector(vectorial_features):
-    if vectorial_features is None:
-        return None
-    theta_vector = {}
-    for key, dim in vectorial_features.items():
-        t = torch.ones(dim)
-        if t.shape[0] > 1:
-            t.requires_grad_(True)
-        theta_vector[key] = t
-    return theta_vector
-
-
-def normalize_y(y: torch.Tensor):
-    y_mean = torch.mean(y) if isinstance(y, torch.Tensor) else np.mean(y)
-    y_std = torch.std(y) if isinstance(y, torch.Tensor) else np.std(y)
-    if y_std == 0:
-        y_std = 1
-    y = (y - y_mean) / y_std
-    return y, y_mean, y_std
-
-
-def unnormalize_y(y, y_mean, y_std, scale_std=False):
-    """Similar to the undoing of the pre-processing step above, but on the output predictions"""
-    if not scale_std:
-        y = y * y_std + y_mean
-    else:
-        y *= y_std
-    return y
-
-
-def standardize_x(
-    x: torch.Tensor, x_min: torch.Tensor = None, x_max: torch.Tensor = None
-):
-    """Standardize the vectorial input into a d-dimensional hypercube [0, 1]^d, where d is the number of features.
-    if x_min ond x_max are supplied, x2 will be standardised using these instead. This is used when standardising the
-    validation/test inputs.
-    """
-    if (x_min is not None and x_max is None) or (x_min is None and x_max is not None):
-        raise ValueError(
-            "Either *both* or *neither* of x_min, x_max need to be supplied!"
-        )
-    if x_min is None:
-        x_min = torch.min(x, 0)[0]
-        x_max = torch.max(x, 0)[0]
-    x = (x - x_min) / (x_max - x_min)
-    return x, x_min, x_max
-
-
-def compute_log_marginal_likelihood(
-    K_i: torch.Tensor,
-    logDetK: torch.Tensor,
-    y: torch.Tensor,
-    normalize: bool = True,
-    log_prior_dist=None,
-):
-    """Compute the zero mean Gaussian process log marginal likelihood given the inverse of Gram matrix K(x2,x2), its
-    log determinant, and the training label vector y.
-    Option:
-
-    normalize: normalize the log marginal likelihood by the length of the label vector, as per the gpytorch
-    routine.
-
-    prior: A pytorch distribution object. If specified, the hyperparameter prior will be taken into consideration and
-    we use Type-II MAP instead of Type-II MLE (compute log_posterior instead of log_evidence)
-    """
-    lml = (
-        -0.5 * y.t() @ K_i @ y
-        + 0.5 * logDetK
-        - y.shape[0]
-        / 2.0
-        * torch.log(
-            2
-            * torch.tensor(
-                np.pi,
-            )
-        )
-    )
-    if log_prior_dist is not None:
-        lml -= log_prior_dist
-    return lml / y.shape[0] if normalize else lml
-
-
-def generate_h_combo_candidates(hierarchy_consider):
-    h_range_all_hierarchy = [range(min(hier + 2, 4)) for hier in hierarchy_consider]
-    h_range_all_hierarchy = [range(5)] + h_range_all_hierarchy
-    h_combo_all = list(itertools.product(*h_range_all_hierarchy))
-    h_combo_sub = []
-    for h_combo in h_combo_all:
-        sorted_h_combo = sorted(h_combo)
-        if sorted_h_combo not in h_combo_sub:
-            h_combo_sub.append(sorted_h_combo)
-    return h_combo_sub
-
-
-def compute_pd_inverse(
-    K: torch.tensor, jitter: float = 1e-5, gpytorch_kinv: bool = False
-):
-    """Compute the inverse of a postive-(semi)definite matrix K using Cholesky inversion."""
-    if gpytorch_kinv:
-        Kc = psd_safe_cholesky(K)
-        try:
-            Kc.required_grad = True
-        except Exception:
-            Kc = torch.Tensor(Kc)
-    else:
-        n = K.shape[0]
-        assert (
-            isinstance(jitter, float) or jitter.ndim == 0
-        ), "only homoscedastic noise variance is allowed here!"
-        is_successful = False
-        fail_count = 0
-        max_fail = 3
-        while fail_count < max_fail and not is_successful:
-            try:
-                jitter_diag = jitter * torch.eye(n, device=K.device) * 10**fail_count
-                K_ = K + jitter_diag
-                Kc = torch.linalg.cholesky(K_)
-                is_successful = True
-            except RuntimeError:
-                fail_count += 1
-        if not is_successful:
-            raise RuntimeError(
-                f"Gram matrix not positive definite despite of jitter:\n{K}"
-            )
-
-    logDetK = -2 * torch.sum(torch.log(torch.diag(Kc)))
-    K_i = torch.cholesky_inverse(Kc)
-    return K_i.to(torch.get_default_dtype()), logDetK.to(torch.get_default_dtype())
diff --git a/neps/optimizers/bayesian_optimization/optimizer.py b/neps/optimizers/bayesian_optimization/optimizer.py
index 9fc3aeaec..471fd0706 100644
--- a/neps/optimizers/bayesian_optimization/optimizer.py
+++ b/neps/optimizers/bayesian_optimization/optimizer.py
@@ -1,46 +1,57 @@
 from __future__ import annotations
 
-import random
-from typing import Any, TYPE_CHECKING, Literal
+import math
+from collections.abc import Mapping
+from typing import TYPE_CHECKING, Any
 from typing_extensions import override
 
-from neps.state.optimizer import BudgetInfo, OptimizationState
-from neps.utils.types import ConfigResult, RawConfig
-from neps.utils.common import instance_from_map
-from neps.search_spaces import (
-    CategoricalParameter,
-    ConstantParameter,
-    FloatParameter,
-    IntegerParameter,
-    SearchSpace,
-)
-from neps.optimizers.base_optimizer import BaseOptimizer
-from neps.optimizers.bayesian_optimization.acquisition_functions import (
-    AcquisitionMapping,
-    DecayingPriorWeightedAcquisition,
-)
-from neps.optimizers.bayesian_optimization.acquisition_samplers import (
-    AcquisitionSamplerMapping,
-)
-from neps.optimizers.bayesian_optimization.acquisition_samplers.base_acq_sampler import (
-    AcquisitionSampler,
+import torch
+from botorch.acquisition import LinearMCObjective
+from botorch.acquisition.logei import qLogNoisyExpectedImprovement
+
+from neps.optimizers.base_optimizer import BaseOptimizer, SampledConfig
+from neps.optimizers.bayesian_optimization.models.gp import (
+    encode_trials_for_gp,
+    fit_and_acquire_from_gp,
+    make_default_single_obj_gp,
 )
-from neps.optimizers.bayesian_optimization.kernels.get_kernels import get_kernels
-from neps.optimizers.bayesian_optimization.models import SurrogateModelMapping
+from neps.optimizers.initial_design import make_initial_design
+from neps.sampling import Prior
+from neps.search_spaces.encoding import ConfigEncoder
 
 if TYPE_CHECKING:
-    from neps.optimizers.bayesian_optimization.acquisition_functions.base_acquisition import (
-        BaseAcquisition,
-    )
-
-# TODO(eddiebergman): Why not just include in the definition of the parameters.
-CUSTOM_FLOAT_CONFIDENCE_SCORES = dict(FloatParameter.DEFAULT_CONFIDENCE_SCORES)
-CUSTOM_FLOAT_CONFIDENCE_SCORES.update({"ultra": 0.05})
-
-CUSTOM_CATEGORICAL_CONFIDENCE_SCORES = dict(
-    CategoricalParameter.DEFAULT_CONFIDENCE_SCORES
-)
-CUSTOM_CATEGORICAL_CONFIDENCE_SCORES.update({"ultra": 8})
+    from neps.search_spaces import SearchSpace
+    from neps.state import BudgetInfo, Trial
+
+
+def _pibo_exp_term(
+    n_sampled_already: int,
+    ndims: int,
+    initial_design_size: int,
+) -> float:
+    # pibo paper
+    # https://arxiv.org/pdf/2204.11051
+    #
+    # they use some constant determined from max problem budget. seems impractical,
+    # given we might not know the final budget (i.e. imagine you iteratively increase
+    # the budget as you go along).
+    #
+    # instead, we base it on the fact that in lower dimensions, we don't to rely
+    # on the prior for too long as the amount of space you need to cover around the
+    # prior is fairly low. effectively, since the gp needs little samples to
+    # model pretty effectively in low dimension, we can derive the utility from
+    # the prior pretty quickly.
+    #
+    # however, for high dimensional settings, we want to rely longer on the prior
+    # for longer as the number of samples needed to model the area around the prior
+    # is much larger, and deriving the utility will take longer.
+    #
+    # in the end, we would like some curve going from 1->0 as n->inf, where `n` is
+    # the number of samples we have done so far.
+    # the easiest function that does this is `exp(-n)`, with some discounting of `n`
+    # dependant on the number of dimensions.
+    n_bo_samples = n_sampled_already - initial_design_size
+    return math.exp(-n_bo_samples / ndims)
 
 
 class BayesianOptimization(BaseOptimizer):
@@ -49,271 +60,188 @@ class BayesianOptimization(BaseOptimizer):
     def __init__(
         self,
         pipeline_space: SearchSpace,
-        initial_design_size: int = 10,
-        surrogate_model: str | Any = "gp",
-        surrogate_model_args: dict = None,
-        optimal_assignment: bool = False,
-        domain_se_kernel: str = None,
-        graph_kernels: list = None,
-        hp_kernels: list = None,
-        acquisition: str | BaseAcquisition = "EI",
-        log_prior_weighted: bool = False,
-        acquisition_sampler: str | AcquisitionSampler = "mutation",
-        random_interleave_prob: float = 0.0,
-        patience: int = 100,
-        budget: None | int | float = None,
-        ignore_errors: bool = False,
-        loss_value_on_error: None | float = None,
-        cost_value_on_error: None | float = None,
-        logger=None,
-        disable_priors: bool = False,
-        prior_confidence: Literal["low", "medium", "high"] = None,
-        sample_default_first: bool = False,
+        *,
+        initial_design_size: int | None = None,
+        use_priors: bool = False,
+        use_cost: bool = False,
+        cost_on_log_scale: bool = True,
+        sample_prior_first: bool = False,
+        device: torch.device | None = None,
+        encoder: ConfigEncoder | None = None,
+        seed: int | None = None,
+        max_cost_total: Any | None = None,  # TODO: remove
+        surrogate_model: Any | None = None,  # TODO: remove
+        objective_to_minimize_value_on_error: Any | None = None,  # TODO: remove
+        cost_value_on_error: Any | None = None,  # TODO: remove
+        ignore_errors: Any | None = None,  # TODO: remove
     ):
         """Initialise the BO loop.
 
         Args:
             pipeline_space: Space in which to search
-            initial_design_size: Number of 'x' samples that need to be evaluated before
-                selecting a sample using a strategy instead of randomly.
-            surrogate_model: Surrogate model
-            surrogate_model_args: Arguments that will be given to the surrogate model
-                (the Gaussian processes model).
-            optimal_assignment: whether the optimal assignment kernel should be used.
-            domain_se_kernel: Stationary kernel name
-            graph_kernels: Kernels for NAS
-            hp_kernels: Kernels for HPO
-            acquisition: Acquisition strategy
-            log_prior_weighted: if to use log for prior
-            acquisition_sampler: Acquisition function fetching strategy
-            random_interleave_prob: Frequency at which random configurations are sampled
-                instead of configurations from the acquisition strategy.
-            patience: How many times we try something that fails before giving up.
-            budget: Maximum budget
-            ignore_errors: Ignore hyperparameter settings that threw an error and do not
-                raise an error. Error configs still count towards max_evaluations_total.
-            loss_value_on_error: Setting this and cost_value_on_error to any float will
-                supress any error during bayesian optimization and will use given loss
-                value instead. default: None
-            cost_value_on_error: Setting this and loss_value_on_error to any float will
-                supress any error during bayesian optimization and will use given cost
-                value instead. default: None
-            logger: logger object, or None to use the neps logger
-            disable_priors: allows to choose between BO and piBO regardless the search
-                space definition
-            sample_default_first: if True and a default prior exists, the first sampel is
-                the default configuration
+            initial_design_size: Number of samples used before using the surrogate model.
+                If None, it will use the number of parameters in the search space.
+            use_priors: Whether to use priors set on the hyperparameters during search.
+            use_cost: Whether to consider reported "cost" from configurations in decision
+                making. If True, the optimizer will weigh potential candidates by how much
+                they cost, incentivising the optimizer to explore cheap, good performing
+                configurations. This amount is modified over time
+
+                !!! warning
+
+                    If using `cost`, cost must be provided in the reports of the trials.
+
+            cost_on_log_scale: Whether to use the log of the cost when using cost.
+            sample_prior_first: Whether to sample the default configuration first.
+            seed: Seed to use for the random number generator of samplers.
+            device: Device to use for the optimization.
+            encoder: Encoder to use for encoding the configurations. If None, it will
+                will use the default encoder.
 
         Raises:
-            ValueError: if patience < 1
             ValueError: if initial_design_size < 1
-            ValueError: if random_interleave_prob is not between 0.0 and 1.0
             ValueError: if no kernel is provided
         """
-        if disable_priors:
-            pipeline_space.has_prior = False
-            self.prior_confidence = None
-        else:
-            self.prior_confidence = prior_confidence
-
-        super().__init__(
-            pipeline_space=pipeline_space,
-            patience=patience,
-            logger=logger,
-            budget=budget,
-            loss_value_on_error=loss_value_on_error,
-            cost_value_on_error=cost_value_on_error,
-            ignore_errors=ignore_errors,
-        )
-
-        if initial_design_size < 1:
+        if seed is not None:
+            raise NotImplementedError(
+                "Seed is not implemented yet for BayesianOptimization"
+            )
+        if any(pipeline_space.graphs):
+            raise NotImplementedError("Only supports flat search spaces for now!")
+        if any(pipeline_space.fidelities):
             raise ValueError(
-                "BayesianOptimization needs initial_design_size to be at least 1"
+                "Fidelities are not supported for BayesianOptimization."
+                " Please consider setting the fidelity to a constant value."
+                f" Got: {pipeline_space.fidelities}"
             )
-        if not 0 <= random_interleave_prob <= 1:
-            raise ValueError("random_interleave_prob should be between 0.0 and 1.0")
 
-        self._initial_design_size = initial_design_size
-        self._random_interleave_prob = random_interleave_prob
-        self._num_train_x: int = 0
-        self._num_error_evaluations: int = 0
-        self._pending_evaluations: list = []
-        self._model_update_failed: bool = False
-        self.sample_default_first = sample_default_first
+        super().__init__(pipeline_space=pipeline_space)
 
-        surrogate_model_args = surrogate_model_args or {}
-        graph_kernels, hp_kernels = get_kernels(
-            self.pipeline_space,
-            domain_se_kernel,
-            graph_kernels,
-            hp_kernels,
-            optimal_assignment,
+        self.encoder = encoder or ConfigEncoder.from_space(
+            space=pipeline_space,
+            include_constants_when_decoding=True,
         )
-        if "graph_kernels" not in surrogate_model_args:
-            surrogate_model_args["graph_kernels"] = graph_kernels
-        if "hp_kernels" not in surrogate_model_args:
-            surrogate_model_args["hp_kernels"] = hp_kernels
-
-        if (
-            not surrogate_model_args["graph_kernels"]
-            and not surrogate_model_args["hp_kernels"]
-        ):
-            raise ValueError("No kernels are provided!")
-
-        if "vectorial_features" not in surrogate_model_args:
-            surrogate_model_args["vectorial_features"] = (
-                self.pipeline_space.get_vectorial_dim()
+        self.prior = Prior.from_space(pipeline_space) if use_priors is True else None
+        self.use_cost = use_cost
+        self.use_priors = use_priors
+        self.cost_on_log_scale = cost_on_log_scale
+        self.device = device
+        self.sample_prior_first = sample_prior_first
+
+        if initial_design_size is not None:
+            self.n_initial_design = initial_design_size
+        else:
+            self.n_initial_design = len(pipeline_space.numerical) + len(
+                pipeline_space.categoricals
             )
 
-        self.surrogate_model = instance_from_map(
-            SurrogateModelMapping,
-            surrogate_model,
-            name="surrogate model",
-            kwargs=surrogate_model_args,
-        )
-
-        self.acquisition = instance_from_map(
-            AcquisitionMapping,
-            acquisition,
-            name="acquisition function",
+    @override
+    def ask(
+        self,
+        trials: Mapping[str, Trial],
+        budget_info: BudgetInfo | None = None,
+        n: int | None = None,
+    ) -> SampledConfig | list[SampledConfig]:
+        _n = 1 if n is None else n
+        n_sampled = len(trials)
+        config_ids = iter(str(i + 1) for i in range(n_sampled, n_sampled + _n))
+        space = self.pipeline_space
+
+        sampled_configs: list[SampledConfig] = []
+
+        # If the amount of configs evaluated is less than the initial design
+        # requirement, keep drawing from initial design
+        n_evaluated = sum(
+            1
+            for trial in trials.values()
+            if trial.report is not None and trial.report.objective_to_minimize is not None
         )
-        if self.pipeline_space.has_prior:
-            self.acquisition = DecayingPriorWeightedAcquisition(
-                self.acquisition, log=log_prior_weighted
+        if n_evaluated < self.n_initial_design:
+            design_samples = make_initial_design(
+                space=space,
+                encoder=self.encoder,
+                sample_prior_first=self.sample_prior_first if n_sampled == 0 else False,
+                sampler=self.prior if self.prior is not None else "uniform",
+                seed=None,  # TODO: Seeding
+                sample_size=_n,
+                sample_fidelity="max",
             )
 
-        self.acquisition_sampler = instance_from_map(
-            AcquisitionSamplerMapping,
-            acquisition_sampler,
-            name="acquisition sampler function",
-            kwargs={"patience": self.patience, "pipeline_space": self.pipeline_space},
-        )
-        self._enhance_priors()
-
-    def _enhance_priors(self, confidence_score: dict = None) -> None:
-        """Only applicable when priors are given along with a confidence.
-
-        Args:
-            confidence_score: dict
-                The confidence scores for the 2 major variable types.
-                Example: {"categorical": 5.2, "numeric": 0.15}
-        """
-        if self.prior_confidence is None:
-            return
-        if (
-            hasattr(self.pipeline_space, "has_prior")
-            and not self.pipeline_space.has_prior
-        ):
-            return
-        for k, v in self.pipeline_space.items():
-            if v.is_fidelity or isinstance(v, ConstantParameter):
-                continue
-            elif isinstance(v, (FloatParameter, IntegerParameter)):
-                if confidence_score is None:
-                    confidence = CUSTOM_FLOAT_CONFIDENCE_SCORES[self.prior_confidence]
-                else:
-                    confidence = confidence_score["numeric"]
-                self.pipeline_space[k].default_confidence_score = confidence
-            elif isinstance(v, CategoricalParameter):
-                if confidence_score is None:
-                    confidence = CUSTOM_CATEGORICAL_CONFIDENCE_SCORES[
-                        self.prior_confidence
-                    ]
-                else:
-                    confidence = confidence_score["categorical"]
-                self.pipeline_space[k].default_confidence_score = confidence
-        return
-
-    def is_init_phase(self) -> bool:
-        """Decides if optimization is still under the warmstart phase/model-based search."""
-        if self._num_train_x >= self._initial_design_size:
-            return False
-        return True
+            sampled_configs.extend(
+                [
+                    SampledConfig(id=config_id, config=config)
+                    for config_id, config in zip(
+                        config_ids,
+                        design_samples,
+                        strict=False,
+                    )
+                ]
+            )
+            if len(sampled_configs) == _n:
+                if n is None:
+                    return sampled_configs[0]
 
-    @override
-    def load_optimization_state(
-        self,
-        previous_results: dict[str, ConfigResult],
-        pending_evaluations: dict[str, SearchSpace],
-        budget_info: BudgetInfo | None,
-        optimizer_state: dict[str, Any],
-    ) -> None:
-        train_x = [el.config for el in previous_results.values()]
-        train_y = [self.get_loss(el.result) for el in previous_results.values()]
-        if self.ignore_errors:
-            train_x = [x for x, y in zip(train_x, train_y) if y != "error"]
-            train_y_no_error = [y for y in train_y if y != "error"]
-            self._num_error_evaluations = len(train_y) - len(train_y_no_error)
-            train_y = train_y_no_error
-        self._num_train_x = len(train_x)
-        self._pending_evaluations = [el for el in pending_evaluations.values()]
-        if not self.is_init_phase():
-            try:
-                if len(self._pending_evaluations) > 0:
-                    # We want to use hallucinated results for the evaluations that have
-                    # not finished yet. For this we fit a model on the finished
-                    # evaluations and add these to the other results to fit another model.
-                    self.surrogate_model.fit(train_x, train_y)
-                    ys, _ = self.surrogate_model.predict(self._pending_evaluations)
-                    train_x += self._pending_evaluations
-                    train_y += list(ys.detach().numpy())
+                return sampled_configs
 
-                self.surrogate_model.fit(train_x, train_y)
-                self.acquisition.set_state(self.surrogate_model)
-                self.acquisition_sampler.set_state(x=train_x, y=train_y)
+        # Otherwise, we encode trials and setup to fit and acquire from a GP
+        data, encoder = encode_trials_for_gp(
+            trials, space, device=self.device, encoder=self.encoder
+        )
 
-                self._model_update_failed = False
-            except RuntimeError as runtime_error:
-                self.logger.exception(
-                    "Model could not be updated due to below error. Sampling will not use"
-                    " the model."
+        cost_percent = None
+        if self.use_cost:
+            if budget_info is None:
+                raise ValueError(
+                    "Must provide a 'cost' to configurations if using cost"
+                    " with BayesianOptimization."
                 )
-                if self.loss_value_on_error is None or self.cost_value_on_error is None:
-                    raise ValueError(
-                        "A RuntimeError happened and "
-                        "loss_value_on_error or cost_value_on_error "
-                        "value is not provided, please fix the error or "
-                        "provide the values to continue without "
-                        "updating the model"
-                    ) from runtime_error
-                self._model_update_failed = True
-
-    def get_config_and_ids(self) -> tuple[RawConfig, str, str | None]:
-        if (
-            self._num_train_x == 0
-            and self.sample_default_first
-            and self.pipeline_space.has_prior
-        ):
-            config = self.pipeline_space.sample_default_configuration(
-                patience=self.patience, ignore_fidelity=False
-            )
-        elif self._num_train_x == 0 and self._initial_design_size >= 1:
-            config = self.pipeline_space.sample(
-                patience=self.patience, user_priors=True, ignore_fidelity=False
-            )
-        elif random.random() < self._random_interleave_prob:
-            config = self.pipeline_space.sample(
-                patience=self.patience, ignore_fidelity=False
-            )
-        elif self.is_init_phase() or self._model_update_failed:
-            # initial design space
-            config = self.pipeline_space.sample(
-                patience=self.patience, user_priors=True, ignore_fidelity=False
+            if budget_info.max_cost_total is None:
+                raise ValueError("Cost budget must be set if using cost")
+            cost_percent = budget_info.used_cost_budget / budget_info.max_cost_total
+
+        # If we should use the prior, weight the acquisition function by
+        # the probability of it being sampled from the prior.
+        pibo_exp_term = None
+        prior = None
+        if self.prior:
+            pibo_exp_term = _pibo_exp_term(
+                n_sampled, encoder.ncols, self.n_initial_design
             )
-        else:
-            for _ in range(self.patience):
-                config = self.acquisition_sampler.sample(self.acquisition)
-                if config not in self._pending_evaluations:
-                    break
-            else:
-                config = self.pipeline_space.sample(
-                    patience=self.patience, user_priors=True, ignore_fidelity=False
-                )
+            # If the exp term is insignificant, skip prior acq. weighting
+            prior = None if pibo_exp_term < 1e-4 else self.prior
+
+        gp = make_default_single_obj_gp(x=data.x, y=data.y, encoder=encoder)
+        candidates = fit_and_acquire_from_gp(
+            gp=gp,
+            x_train=data.x,
+            encoder=encoder,
+            acquisition=qLogNoisyExpectedImprovement(
+                model=gp,
+                X_baseline=data.x,
+                # Unfortunatly, there's no option to indicate that we minimize
+                # the AcqFunction so we need to do some kind of transformation.
+                # https://github.com/pytorch/botorch/issues/2316#issuecomment-2085964607
+                objective=LinearMCObjective(weights=torch.tensor([-1.0])),
+                X_pending=data.x_pending,
+                prune_baseline=True,
+            ),
+            prior=prior,
+            n_candidates_required=_n,
+            pibo_exp_term=pibo_exp_term,
+            costs=data.cost if self.use_cost else None,
+            cost_percentage_used=cost_percent,
+            costs_on_log_scale=self.cost_on_log_scale,
+        )
 
-        config_id = str(
-            self._num_train_x
-            + self._num_error_evaluations
-            + len(self._pending_evaluations)
-            + 1
+        configs = encoder.decode(candidates)
+        sampled_configs.extend(
+            [
+                SampledConfig(id=config_id, config=config)
+                for config_id, config in zip(config_ids, configs, strict=True)
+            ]
         )
-        return config.hp_values(), config_id, None
+
+        if n is None:
+            return sampled_configs[0]
+
+        return sampled_configs
diff --git a/neps/optimizers/default_searchers/asha.yaml b/neps/optimizers/default_searchers/asha.yaml
index 0b140484b..5a4fcc82b 100644
--- a/neps/optimizers/default_searchers/asha.yaml
+++ b/neps/optimizers/default_searchers/asha.yaml
@@ -5,8 +5,8 @@ early_stopping_rate: 0
 initial_design_type: max_budget
 use_priors: false
 random_interleave_prob: 0.0
-sample_default_first: false
-sample_default_at_target: false
+sample_prior_first: false
+sample_prior_at_target: false
 
 # Arguments that can not be modified by the user
 # sampling_policy: RandomUniformPolicy
diff --git a/neps/optimizers/default_searchers/asha_prior.yaml b/neps/optimizers/default_searchers/asha_prior.yaml
index 95bacb6cc..4122c7972 100644
--- a/neps/optimizers/default_searchers/asha_prior.yaml
+++ b/neps/optimizers/default_searchers/asha_prior.yaml
@@ -5,8 +5,8 @@ early_stopping_rate: 0
 initial_design_type: max_budget
 prior_confidence: medium  # or {"low", "high"}
 random_interleave_prob: 0.0
-sample_default_first: false
-sample_default_at_target: false
+sample_prior_first: false
+sample_prior_at_target: false
 
 # Arguments that can not be modified by the user
 # sampling_policy: FixedPriorPolicy
diff --git a/neps/optimizers/default_searchers/bayesian_optimization.yaml b/neps/optimizers/default_searchers/bayesian_optimization.yaml
index cf3717ab3..49e9fbae6 100644
--- a/neps/optimizers/default_searchers/bayesian_optimization.yaml
+++ b/neps/optimizers/default_searchers/bayesian_optimization.yaml
@@ -1,17 +1,7 @@
 strategy: bayesian_optimization
 # Arguments that can be modified by the user
-initial_design_size: 10
-surrogate_model: gp  # or {"gp_hierarchy"}
-acquisition: EI  # or {"LogEI", "AEI"}
-log_prior_weighted: false
-acquisition_sampler: mutation  # or {"random", "evolution"}
-random_interleave_prob: 0.0
-disable_priors: true
-sample_default_first: false
-
-# Other arguments:
-# surrogate_model_args: None # type: dict
-# optimal_assignment: false # type: bool
-# domain_se_kernel: None # type: str
-# graph_kernels: None # type: list
-# hp_kernels: None # type: list
+initial_design_size: null  # Defaults to depending on number or hyperparameters
+use_cost: false  # Whether to factor in cost when selecting new configurations
+use_priors: false  # Whether to use user set priors in optimization
+sample_prior_first: false # Whether to sample the default configuration first
+device: null  # Device to load the gaussian process model on with torch
diff --git a/neps/optimizers/default_searchers/hyperband.yaml b/neps/optimizers/default_searchers/hyperband.yaml
index b560af485..77bfd5a88 100644
--- a/neps/optimizers/default_searchers/hyperband.yaml
+++ b/neps/optimizers/default_searchers/hyperband.yaml
@@ -4,8 +4,8 @@ eta: 3
 initial_design_type: max_budget
 use_priors: false
 random_interleave_prob: 0.0
-sample_default_first: false
-sample_default_at_target: false
+sample_prior_first: false
+sample_prior_at_target: false
 
 # Arguments that can not be modified by the user
 # sampling_policy: RandomUniformPolicy
diff --git a/neps/optimizers/default_searchers/ifbo.yaml b/neps/optimizers/default_searchers/ifbo.yaml
new file mode 100644
index 000000000..3e9ecb2ba
--- /dev/null
+++ b/neps/optimizers/default_searchers/ifbo.yaml
@@ -0,0 +1,11 @@
+strategy: ifbo
+surrogate_model_args:
+  version: "0.0.1"
+  target_path: null  # Defaults to current_working_directory/.model
+step_size: 1  # Step size to use for partial evaluations
+use_priors: false  # Whether to use priors set through `prior` and `prior_confidence`
+sample_prior_first: false  # Whether to sample the default configuration first
+sample_prior_at_target: false  # Whether to evaluate the default at the maximum fidelity or not
+initial_design_size: "ndim"  # How many initial samples to try before using the model
+n_acquisition_new_configs: 1_000  # Number samples of new configs to include during acqusition
+device: null  # Device to load the model on with torch
diff --git a/neps/optimizers/default_searchers/mobster.yaml b/neps/optimizers/default_searchers/mobster.yaml
index 9ce821b3d..d1f0ed0a8 100644
--- a/neps/optimizers/default_searchers/mobster.yaml
+++ b/neps/optimizers/default_searchers/mobster.yaml
@@ -4,14 +4,13 @@ eta: 3
 initial_design_type: max_budget
 use_priors: false
 random_interleave_prob: 0.0
-sample_default_first: false
-sample_default_at_target: false
+sample_prior_first: false
+sample_prior_at_target: false
 
 # arguments for model
-surrogate_model: gp  # or {"gp_hierarchy"}
+surrogate_model: gp
 acquisition: EI  # or {"LogEI", "AEI"}
 log_prior_weighted: false
-acquisition_sampler: random  # or {"mutation", "evolution"}
 
 # Arguments that can not be modified by the user
 # sampling_policy: RandomUniformPolicy
diff --git a/neps/optimizers/default_searchers/pibo.yaml b/neps/optimizers/default_searchers/pibo.yaml
index 9c386069b..eb44b8b2b 100644
--- a/neps/optimizers/default_searchers/pibo.yaml
+++ b/neps/optimizers/default_searchers/pibo.yaml
@@ -1,18 +1,7 @@
 strategy: pibo
 # Arguments that can be modified by the user
-initial_design_size: 10
-surrogate_model: gp  # or {"gp_hierarchy"}
-acquisition: EI  # or {"LogEI", "AEI"}
-log_prior_weighted: false
-acquisition_sampler: mutation  # or {"random", "evolution"}
-random_interleave_prob: 0.0
-disable_priors: false
-prior_confidence: medium  # or {"low", "high"}
-sample_default_first: false
-
-# Other arguments:
-# surrogate_model_args: None # type: dict
-# optimal_assignment: false # type: bool
-# domain_se_kernel: None # type: str
-# graph_kernels: None # type: list
-# hp_kernels: None # type: list
+initial_design_size: null  # Defaults to depending on number or hyperparameters
+use_cost: false  # Whether to factor in cost when selecting new configurations
+use_priors: true  # Whether to use user set priors in optimization
+sample_prior_first: true # Whether to sample the default configuration first
+device: null  # Device to load the gaussian process model on with torch
diff --git a/neps/optimizers/default_searchers/priorband.yaml b/neps/optimizers/default_searchers/priorband.yaml
index 5d9dac86e..3bb2dcc55 100644
--- a/neps/optimizers/default_searchers/priorband.yaml
+++ b/neps/optimizers/default_searchers/priorband.yaml
@@ -4,8 +4,8 @@ eta: 3
 initial_design_type: max_budget
 prior_confidence: medium  # or {"low", "high"}
 random_interleave_prob: 0.0
-sample_default_first: true
-sample_default_at_target: false
+sample_prior_first: true
+sample_prior_at_target: false
 prior_weight_type: geometric
 inc_sample_type: mutation
 inc_mutation_rate: 0.5
diff --git a/neps/optimizers/default_searchers/priorband_bo.yaml b/neps/optimizers/default_searchers/priorband_bo.yaml
index 5a9fd3a98..49083df25 100644
--- a/neps/optimizers/default_searchers/priorband_bo.yaml
+++ b/neps/optimizers/default_searchers/priorband_bo.yaml
@@ -4,8 +4,8 @@ eta: 3
 initial_design_type: max_budget
 prior_confidence: medium  # or {"low", "high"}
 random_interleave_prob: 0.0
-sample_default_first: true
-sample_default_at_target: false
+sample_prior_first: true
+sample_prior_at_target: false
 prior_weight_type: geometric
 inc_sample_type: mutation
 inc_mutation_rate: 0.5
@@ -16,10 +16,9 @@ inc_style: dynamic
 model_based: true # crucial argument to set to allow model-search
 modelling_type: joint
 initial_design_size: 10
-surrogate_model: gp  # or {"gp_hierarchy"}
+surrogate_model: gp
 acquisition: EI  # or {"LogEI", "AEI"}
 log_prior_weighted: false
-acquisition_sampler: mutation  # or {"random", "evolution"}
 
 # Arguments that can not be modified by the user
 # sampling_policy: EnsemblePolicy
diff --git a/neps/optimizers/default_searchers/regularized_evolution.yaml b/neps/optimizers/default_searchers/regularized_evolution.yaml
deleted file mode 100644
index 040c0b413..000000000
--- a/neps/optimizers/default_searchers/regularized_evolution.yaml
+++ /dev/null
@@ -1,9 +0,0 @@
-strategy: regularized_evolution
-# Arguments that can be modified by the user
-population_size: 30
-sample_size: 10
-assisted: false
-
-# Other arguments
-# assisted_zero_cost_proxy: None # type: Callable
-# assisted_init_population_dir: None # type: str | Path
diff --git a/neps/optimizers/default_searchers/successive_halving.yaml b/neps/optimizers/default_searchers/successive_halving.yaml
index d7d20c9f2..038e56efe 100644
--- a/neps/optimizers/default_searchers/successive_halving.yaml
+++ b/neps/optimizers/default_searchers/successive_halving.yaml
@@ -5,8 +5,8 @@ early_stopping_rate: 0
 initial_design_type: max_budget
 use_priors: false
 random_interleave_prob: 0.0
-sample_default_first: false
-sample_default_at_target: false
+sample_prior_first: false
+sample_prior_at_target: false
 
 # Arguments that can not be modified by the user
 # sampling_policy: RandomUniformPolicy
diff --git a/neps/optimizers/default_searchers/successive_halving_prior.yaml b/neps/optimizers/default_searchers/successive_halving_prior.yaml
index 7778fffdd..2b198f7b4 100644
--- a/neps/optimizers/default_searchers/successive_halving_prior.yaml
+++ b/neps/optimizers/default_searchers/successive_halving_prior.yaml
@@ -5,8 +5,8 @@ early_stopping_rate: 0
 initial_design_type: max_budget
 prior_confidence: medium  # or {"low", "high"}
 random_interleave_prob: 0.0
-sample_default_first: false
-sample_default_at_target: false
+sample_prior_first: false
+sample_prior_at_target: false
 
 # Arguments that can not be modified by the user
 # sampling_policy: FixedPriorPolicy
diff --git a/neps/optimizers/grid_search/optimizer.py b/neps/optimizers/grid_search/optimizer.py
index 4f5ff24ef..9bef62baa 100644
--- a/neps/optimizers/grid_search/optimizer.py
+++ b/neps/optimizers/grid_search/optimizer.py
@@ -1,40 +1,115 @@
 from __future__ import annotations
 
 import random
-from typing import Any
+from collections.abc import Mapping
+from itertools import product
+from typing import TYPE_CHECKING, Any
 from typing_extensions import override
 
-from neps.state.optimizer import BudgetInfo
-from neps.utils.types import ConfigResult, RawConfig
-from neps.search_spaces.search_space import SearchSpace
-from neps.optimizers.base_optimizer import BaseOptimizer
+import torch
+
+from neps.optimizers.base_optimizer import BaseOptimizer, SampledConfig
+from neps.search_spaces import Categorical, Constant, Float, Integer
+from neps.search_spaces.architecture.graph_grammar import GraphParameter
+from neps.search_spaces.domain import UNIT_FLOAT_DOMAIN
+
+if TYPE_CHECKING:
+    from neps.search_spaces.search_space import SearchSpace
+    from neps.state.optimizer import BudgetInfo
+    from neps.state.trial import Trial
+
+
+def _make_grid(
+    space: SearchSpace,
+    *,
+    size_per_numerical_hp: int = 10,
+) -> list[dict[str, Any]]:
+    """Get a grid of configurations from the search space.
+
+    For [`Numerical`][neps.search_spaces.Numerical] hyperparameters,
+    the parameter `size_per_numerical_hp=` is used to determine a grid. If there are
+    any duplicates, e.g. for an
+    [`Integer`][neps.search_spaces.Integer], then we will
+    remove duplicates.
+
+    For [`Categorical`][neps.search_spaces.Categorical]
+    hyperparameters, we include all the choices in the grid.
+
+    For [`Constant`][neps.search_spaces.Constant] hyperparameters,
+    we include the constant value in the grid.
+
+    !!! note "TODO"
+
+        Does not support graph parameters currently.
+
+    !!! note "TODO"
+
+        Include default hyperparameters in the grid.
+        If all HPs have a `default` then add a single configuration.
+        If only partial HPs have defaults then add all combinations of defaults, but
+            only to the end of the list of configs.
+
+    Args:
+        size_per_numerical_hp: The size of the grid for each numerical hyperparameter.
+
+    Returns:
+        A list of configurations from the search space.
+    """
+    param_ranges: dict[str, list[Any]] = {}
+    for name, hp in space.hyperparameters.items():
+        match hp:
+            # NOTE(eddiebergman): This is a temporary fix to avoid graphs
+            # If this is resolved, please update the docstring!
+            case GraphParameter():
+                raise ValueError("Trying to create a grid for graphs!")
+            case Categorical():
+                param_ranges[name] = list(hp.choices)
+            case Constant():
+                param_ranges[name] = [hp.value]
+            case Integer() | Float():
+                if hp.is_fidelity:
+                    param_ranges[name] = [hp.upper]
+                    continue
+
+                if hp.domain.cardinality is None:
+                    steps = size_per_numerical_hp
+                else:
+                    steps = min(size_per_numerical_hp, hp.domain.cardinality)
+
+                xs = torch.linspace(0, 1, steps=steps)
+                numeric_values = hp.domain.cast(xs, frm=UNIT_FLOAT_DOMAIN)
+                uniq_values = torch.unique(numeric_values).tolist()
+                param_ranges[name] = uniq_values
+            case _:
+                raise NotImplementedError(f"Unknown Parameter type: {type(hp)}\n{hp}")
+
+    values = product(*param_ranges.values())
+    keys = list(space.hyperparameters.keys())
+
+    return [dict(zip(keys, p, strict=False)) for p in values]
 
 
 class GridSearch(BaseOptimizer):
-    def __init__(
-        self, pipeline_space: SearchSpace, grid_step_size: int = 10, **optimizer_kwargs
-    ):
-        super().__init__(pipeline_space=pipeline_space, **optimizer_kwargs)
-        self._num_previous_configs: int = 0
-        self.configs_list = self.pipeline_space.get_search_space_grid(
-            size_per_numerical_hp=grid_step_size,
-            include_endpoints=True,
-        )
-        random.shuffle(self.configs_list)
+    def __init__(self, pipeline_space: SearchSpace, seed: int | None = None):
+        super().__init__(pipeline_space=pipeline_space)
+        self.configs_list = _make_grid(pipeline_space)
+        self.seed = seed
 
     @override
-    def load_optimization_state(
+    def ask(
         self,
-        previous_results: dict[str, ConfigResult],
-        pending_evaluations: dict[str, SearchSpace],
+        trials: Mapping[str, Trial],
         budget_info: BudgetInfo | None,
-        optimizer_state: dict[str, Any],
-    ) -> None:
-        self._num_previous_configs = len(previous_results) + len(pending_evaluations)
-
-    def get_config_and_ids(self) -> tuple[RawConfig, str, str | None]:
-        if self._num_previous_configs > len(self.configs_list) - 1:
+        n: int | None = None,
+    ) -> SampledConfig:
+        assert n is None, "TODO"
+        _num_previous_configs = len(trials)
+        if _num_previous_configs > len(self.configs_list) - 1:
             raise ValueError("Grid search exhausted!")
-        config = self.configs_list[self._num_previous_configs]
-        config_id = str(self._num_previous_configs)
-        return config.hp_values(), config_id, None
+
+        rng = random.Random(self.seed)
+        configs = rng.sample(self.configs_list, len(self.configs_list))
+
+        config = configs[_num_previous_configs]
+        config_id = str(_num_previous_configs)
+        return SampledConfig(config=config, id=config_id, previous_config_id=None)
diff --git a/neps/optimizers/info.py b/neps/optimizers/info.py
index 7088f341a..b2494c20c 100644
--- a/neps/optimizers/info.py
+++ b/neps/optimizers/info.py
@@ -1,31 +1,29 @@
 from __future__ import annotations
 
-import os
+from pathlib import Path
 
 import yaml
 
+HERE = Path(__file__).parent.resolve()
+
 
 class SearcherConfigs:
-    """
-    This class provides methods to access default configuration details
+    """This class provides methods to access default configuration details
     for NePS optimizers.
     """
 
     @staticmethod
-    def _get_searchers_folder_path() -> str:
-        """
-        Helper method to get the folder path for default searchers.
+    def _get_searchers_folder_path() -> Path:
+        """Helper method to get the folder path for default searchers.
 
         Returns:
             str: The absolute path to the default searchers folder.
         """
-        script_directory = os.path.dirname(os.path.abspath(__file__))
-        return os.path.join(script_directory, "default_searchers")
+        return HERE / "default_searchers"
 
     @staticmethod
     def get_searchers() -> list[str]:
-        """
-        List all the searcher names that can be used in neps run.
+        """List all the searcher names that can be used in neps run.
 
         Returns:
             list[str]: A list of searcher names.
@@ -33,17 +31,15 @@ def get_searchers() -> list[str]:
         folder_path = SearcherConfigs._get_searchers_folder_path()
         searchers = []
 
-        for file_name in os.listdir(folder_path):
-            if file_name.endswith(".yaml"):
-                searcher_name = os.path.splitext(file_name)[0]
-                searchers.append(searcher_name)
+        for file in folder_path.iterdir():
+            if file.suffix == ".yaml":
+                searchers.append(file.stem)
 
         return searchers
 
     @staticmethod
     def get_available_algorithms() -> list[str]:
-        """
-        List all available algorithms used by NePS searchers.
+        """List all available algorithms used by NePS searchers.
 
         Returns:
             list[str]: A list of algorithm names.
@@ -51,11 +47,10 @@ def get_available_algorithms() -> list[str]:
         folder_path = SearcherConfigs._get_searchers_folder_path()
         prev_algorithms = set()
 
-        for filename in os.listdir(folder_path):
-            if filename.endswith(".yaml"):
-                file_path = os.path.join(folder_path, filename)
-                with open(file_path) as file:
-                    searcher_config = yaml.safe_load(file)
+        for file in folder_path.iterdir():
+            if file.suffix == ".yaml":
+                with file.open("r") as f:
+                    searcher_config = yaml.safe_load(f)
                     algorithm = searcher_config.get("strategy")
                     if algorithm:
                         prev_algorithms.add(algorithm)
@@ -64,8 +59,7 @@ def get_available_algorithms() -> list[str]:
 
     @staticmethod
     def get_searcher_from_algorithm(algorithm: str) -> list[str]:
-        """
-        Get all NePS searchers that use a specific searching algorithm.
+        """Get all NePS searchers that use a specific searching algorithm.
 
         Args:
             algorithm (str): The name of the algorithm needed for the search.
@@ -76,20 +70,18 @@ def get_searcher_from_algorithm(algorithm: str) -> list[str]:
         folder_path = SearcherConfigs._get_searchers_folder_path()
         searchers = []
 
-        for filename in os.listdir(folder_path):
-            if filename.endswith(".yaml"):
-                file_path = os.path.join(folder_path, filename)
-                with open(file_path) as file:
-                    searcher_config = yaml.safe_load(file)
+        for file in folder_path.iterdir():
+            if file.suffix == ".yaml":
+                with file.open("r") as f:
+                    searcher_config = yaml.safe_load(f)
                     if searcher_config.get("strategy") == algorithm:
-                        searchers.append(os.path.splitext(filename)[0])
+                        searchers.append(file.stem)
 
         return searchers
 
     @staticmethod
     def get_searcher_kwargs(searcher: str) -> str:
-        """
-        Get the kwargs and algorithm setup for a specific searcher.
+        """Get the kwargs and algorithm setup for a specific searcher.
 
         Args:
             searcher (str): The name of the searcher to check the details of.
@@ -99,10 +91,10 @@ def get_searcher_kwargs(searcher: str) -> str:
         """
         folder_path = SearcherConfigs._get_searchers_folder_path()
 
-        for filename in os.listdir(folder_path):
-            if filename.endswith(".yaml") and filename.startswith(searcher):
-                file_path = os.path.join(folder_path, filename)
-                with open(file_path) as file:
-                    searcher_config = file.read()
+        for file in folder_path.iterdir():
+            if file.suffix == ".yaml" and file.stem.startswith(searcher):
+                return file.read_text()
 
-        return searcher_config
+        raise FileNotFoundError(
+            f"Searcher {searcher} not found in default searchers folder."
+        )
diff --git a/neps/optimizers/initial_design.py b/neps/optimizers/initial_design.py
new file mode 100644
index 000000000..6de3e5fb5
--- /dev/null
+++ b/neps/optimizers/initial_design.py
@@ -0,0 +1,129 @@
+from __future__ import annotations
+
+from collections.abc import Mapping
+from typing import TYPE_CHECKING, Any, Literal
+
+import torch
+
+from neps.sampling import Sampler
+from neps.sampling.priors import Prior
+
+if TYPE_CHECKING:
+    from neps.search_spaces.encoding import ConfigEncoder
+    from neps.search_spaces.search_space import SearchSpace
+
+
+def make_initial_design(  # noqa: PLR0912, C901
+    *,
+    space: SearchSpace,
+    encoder: ConfigEncoder,
+    sampler: Literal["sobol", "prior", "uniform"] | Sampler,
+    sample_size: int | Literal["ndim"] | None = "ndim",
+    sample_prior_first: bool = True,
+    sample_fidelity: (
+        Literal["min", "max", True] | int | float | dict[str, int | float]
+    ) = True,
+    seed: torch.Generator | None = None,
+) -> list[dict[str, Any]]:
+    """Generate the initial design of the optimization process.
+
+    Args:
+        space: The search space to use.
+        encoder: The encoder to use for encoding/decoding configurations.
+        sampler: The sampler to use for the initial design.
+
+            If set to "sobol", a Sobol sequence will be used.
+            If set to "uniform", a uniform random sampler will be used.
+            If set to "prior", a prior sampler will be used, based on the defaults,
+                and confidence scores of the hyperparameters.
+            If set to a custom sampler, the sampler will be used directly.
+
+        sample_size:
+            The number of configurations to sample.
+
+            If "ndim", the number of configs will be equal to the number of dimensions.
+            If None, no configurations will be sampled.
+
+        sample_prior_first: Whether to sample the prior configuration first.
+        sample_fidelity:
+            At what fidelity to sample the configurations, including the prior.
+
+            If set to "min" or "max", the configuration will be sampled
+            at the minimum or maximum fidelity, respectively. If set to an integer
+            or a float, the configuration will be sampled at that fidelity.
+            When specified as a dictionary, the keys should be the names of the
+            fidelity parameters and the values should be the target fidelities.
+            If set to `True`, the configuration will have its fidelity randomly sampled.
+        seed: The seed to use for the random number generation.
+
+    """
+    configs: list[dict[str, Any]] = []
+
+    # First, we establish what fidelity to apply to them.
+    # This block essentially is in charge of creating a fids() function that can
+    # be called to get the fidelities for each sample. Some are constant, some will
+    # sample per config.
+    match sample_fidelity:
+        case "min":
+            _fids = {name: fid.lower for name, fid in space.fidelities.items()}
+            fids = lambda: _fids
+        case "max":
+            _fids = {name: fid.upper for name, fid in space.fidelities.items()}
+            fids = lambda: _fids
+        case True:
+            fids = lambda: {
+                name: hp.sample_value() for name, hp in space.fidelities.items()
+            }
+        case int() | float():
+            if len(space.fidelities) != 1:
+                raise ValueError(
+                    "The target fidelity should be specified as a dictionary"
+                    " if there are multiple fidelities or no fidelity should"
+                    " be specified."
+                    " Current search space has fidelities: "
+                    f"{list(space.fidelities.keys())}"
+                )
+            name = next(iter(space.fidelities.keys()))
+            fids = lambda: {name: sample_fidelity}
+        case Mapping():
+            missing_keys = set(space.fidelities.keys()) - set(sample_fidelity.keys())
+            if any(missing_keys):
+                raise ValueError(
+                    f"Missing target fidelities for the following fidelities: "
+                    f"{missing_keys}"
+                )
+            fids = lambda: sample_fidelity
+        case _:
+            raise ValueError(
+                "Invalid value for `sample_prior_at_target`. "
+                "Expected 'min', 'max', True, int, float, or dict."
+            )
+
+    if sample_prior_first:
+        configs.append({**space.prior_config, **fids()})
+
+    ndims = len(space.numerical) + len(space.categoricals)
+    if sample_size == "ndim":
+        sample_size = ndims
+    elif sample_size is not None and not sample_size > 0:
+        raise ValueError(
+            "The sample size should be a positive integer if passing an int."
+        )
+
+    if sample_size is not None:
+        match sampler:
+            case "sobol":
+                sampler = Sampler.sobol(ndim=ndims)
+            case "uniform":
+                sampler = Sampler.uniform(ndim=ndims)
+            case "prior":
+                sampler = Prior.from_space(space, include_fidelity=False)
+            case _:
+                pass
+
+        encoded_configs = sampler.sample(sample_size * 2, to=encoder.domains, seed=seed)
+        uniq_x = torch.unique(encoded_configs, dim=0)
+        sample_configs = encoder.decode(uniq_x[:sample_size])
+        configs.extend([{**config, **fids()} for config in sample_configs])
+
+    return configs
diff --git a/neps/optimizers/multi_fidelity/__init__.py b/neps/optimizers/multi_fidelity/__init__.py
index e69de29bb..02e29dc96 100644
--- a/neps/optimizers/multi_fidelity/__init__.py
+++ b/neps/optimizers/multi_fidelity/__init__.py
@@ -0,0 +1,29 @@
+from neps.optimizers.multi_fidelity.hyperband import (
+    MOBSTER,
+    AsynchronousHyperband,
+    AsynchronousHyperbandWithPriors,
+    Hyperband,
+    HyperbandCustomDefault,
+    HyperbandWithPriors,
+)
+from neps.optimizers.multi_fidelity.ifbo import IFBO
+from neps.optimizers.multi_fidelity.successive_halving import (
+    AsynchronousSuccessiveHalving,
+    AsynchronousSuccessiveHalvingWithPriors,
+    SuccessiveHalving,
+    SuccessiveHalvingWithPriors,
+)
+
+__all__ = [
+    "IFBO",
+    "MOBSTER",
+    "AsynchronousHyperband",
+    "AsynchronousHyperbandWithPriors",
+    "AsynchronousSuccessiveHalving",
+    "AsynchronousSuccessiveHalvingWithPriors",
+    "Hyperband",
+    "HyperbandCustomDefault",
+    "HyperbandWithPriors",
+    "SuccessiveHalving",
+    "SuccessiveHalvingWithPriors",
+]
diff --git a/neps/optimizers/multi_fidelity/_dyhpo.py b/neps/optimizers/multi_fidelity/_dyhpo.py
deleted file mode 100644
index da3e36bf9..000000000
--- a/neps/optimizers/multi_fidelity/_dyhpo.py
+++ /dev/null
@@ -1,409 +0,0 @@
-from __future__ import annotations
-
-from typing import Any, List, Union
-from typing_extensions import override
-
-import numpy as np
-
-from neps.state.optimizer import BudgetInfo, OptimizationState
-from neps.utils.types import ConfigResult, RawConfig
-from neps.search_spaces.search_space import FloatParameter, IntegerParameter, SearchSpace
-from neps.optimizers.base_optimizer import BaseOptimizer
-from neps.optimizers.bayesian_optimization.acquisition_functions.base_acquisition import (
-    BaseAcquisition,
-)
-from neps.optimizers.bayesian_optimization.acquisition_samplers.base_acq_sampler import (
-    AcquisitionSampler,
-)
-from neps.optimizers.multi_fidelity.promotion_policy import PromotionPolicy
-from neps.optimizers.multi_fidelity.sampling_policy import (
-    BaseDynamicModelPolicy,
-    ModelPolicy,
-    RandomPromotionDynamicPolicy,
-    SamplingPolicy,
-)
-from neps.optimizers.multi_fidelity.utils import MFObservedData
-
-
-class MFEIBO(BaseOptimizer):
-    """Base class for MF-BO algorithms that use DyHPO like acquisition and budgeting."""
-
-    acquisition: str = "EI"
-
-    def __init__(
-        self,
-        pipeline_space: SearchSpace,
-        budget: int,
-        step_size: Union[int, float] = 1,
-        optimal_assignment: bool = False,
-        use_priors: bool = False,
-        sample_default_first: bool = False,
-        sample_default_at_target: bool = False,
-        sampling_policy: Any = None,
-        promotion_policy: Any = None,
-        sample_policy_args: Union[dict, None] = None,
-        promotion_policy_args: Union[dict, None] = None,
-        promotion_type: str = "model",
-        sample_type: str = "model",
-        sampling_args: Union[dict, None] = None,
-        loss_value_on_error: Union[None, float] = None,
-        cost_value_on_error: Union[None, float] = None,
-        patience: int = 100,
-        ignore_errors: bool = False,
-        logger=None,
-        # arguments for model
-        surrogate_model: Union[str, Any] = "gp",
-        surrogate_model_args: dict = None,
-        domain_se_kernel: str = None,
-        graph_kernels: list = None,
-        hp_kernels: list = None,
-        acquisition: Union[str, BaseAcquisition] = acquisition,
-        acquisition_sampler: Union[str, AcquisitionSampler] = "mutation",
-        model_policy: Any = RandomPromotionDynamicPolicy,
-        log_prior_weighted: bool = False,
-        initial_design_size: int = 10,
-        model_policy_args: Union[dict, None] = None,
-    ):
-        """Initialise
-
-        Args:
-            pipeline_space: Space in which to search
-            budget: Maximum budget
-            use_priors: Allows random samples to be generated from a default
-                Samples generated from a Gaussian centered around the default value
-            sampling_policy: The type of sampling procedure to use
-            promotion_policy: The type of promotion procedure to use
-            loss_value_on_error: Setting this and cost_value_on_error to any float will
-                supress any error during bayesian optimization and will use given loss
-                value instead. default: None
-            cost_value_on_error: Setting this and loss_value_on_error to any float will
-                supress any error during bayesian optimization and will use given cost
-                value instead. default: None
-            logger: logger object, or None to use the neps logger
-            sample_default_first: Whether to sample the default configuration first
-        """
-        super().__init__(
-            pipeline_space=pipeline_space,
-            budget=budget,
-            patience=patience,
-            loss_value_on_error=loss_value_on_error,
-            cost_value_on_error=cost_value_on_error,
-            ignore_errors=ignore_errors,
-            logger=logger,
-        )
-        self._budget_list: List[Union[int, float]] = []
-        self.step_size: Union[int, float] = step_size
-        self._initial_design_size = initial_design_size
-        self._model_update_failed = False
-        self.sample_default_first = sample_default_first
-        self.sample_default_at_target = sample_default_at_target
-
-        self.promotion_type = promotion_type
-        self.sample_type = sample_type
-        self.sampling_args = {} if sampling_args is None else sampling_args
-        self.use_priors = use_priors
-        self.total_fevals: int = 0
-
-        # TODO: Use initialized objects where possible instead of ..._args parameters.
-        # This will also make it easier to write new policies for users.
-        if model_policy_args is None:
-            model_policy_args = dict()
-        if sample_policy_args is None:
-            sample_policy_args = dict()
-        if promotion_policy_args is None:
-            promotion_policy_args = dict()
-
-        self.observed_configs = MFObservedData(
-            columns=["config", "perf"],
-            index_names=["config_id", "budget_id"],
-        )
-
-        if model_policy is not None:
-            model_params = dict(
-                pipeline_space=pipeline_space,
-                surrogate_model=surrogate_model,
-                domain_se_kernel=domain_se_kernel,
-                hp_kernels=hp_kernels,
-                graph_kernels=graph_kernels,
-                surrogate_model_args=surrogate_model_args,
-                acquisition=acquisition,
-                use_priors=use_priors,
-                log_prior_weighted=log_prior_weighted,
-                acquisition_sampler=acquisition_sampler,
-                logger=logger,
-            )
-            model_params.update(model_policy_args)
-            if issubclass(model_policy, BaseDynamicModelPolicy):
-                self.model_policy = model_policy(
-                    observed_configs=self.observed_configs, **model_params
-                )
-            elif issubclass(model_policy, ModelPolicy):
-                self.model_policy = model_policy(**model_params)
-            elif issubclass(model_policy, SamplingPolicy):
-                self.model_policy = model_policy(
-                    pipeline_space=pipeline_space,
-                    patience=patience,
-                    logger=logger,
-                    **model_policy_args,
-                )
-            else:
-                raise ValueError(
-                    f"Model policy can't be {model_policy}. "
-                    f"It must subclass one of the predefined base classes"
-                )
-
-        if sampling_policy is not None:
-            sampling_params = dict(
-                pipeline_space=pipeline_space, patience=patience, logger=logger
-            )
-            if issubclass(sampling_policy, SamplingPolicy):
-                sampling_params.update(sample_policy_args)
-                self.sampling_policy = sampling_policy(**sampling_params)
-            else:
-                raise ValueError(
-                    f"Sampling policy {sampling_policy} must inherit from "
-                    f"SamplingPolicy base class"
-                )
-
-        if promotion_policy is not None:
-            if issubclass(promotion_policy, PromotionPolicy):
-                promotion_params = dict(eta=3)
-                promotion_params.update(promotion_policy_args)
-                self.promotion_policy = promotion_policy(**promotion_params)
-            else:
-                raise ValueError(
-                    f"Promotion policy {promotion_policy} must inherit from "
-                    f"PromotionPolicy base class"
-                )
-
-    def get_budget_level(self, config: SearchSpace) -> int:
-        return int((config.fidelity.value - config.fidelity.lower) / self.step_size)
-
-    def get_budget_value(self, budget_level: Union[int, float]) -> Union[int, float]:
-        if isinstance(self.pipeline_space.fidelity, IntegerParameter):
-            budget_val = int(
-                self.step_size * budget_level + self.pipeline_space.fidelity.lower
-            )
-        elif isinstance(self.pipeline_space.fidelity, FloatParameter):
-            budget_val = (
-                self.step_size * budget_level + self.pipeline_space.fidelity.lower
-            )
-        else:
-            raise NotImplementedError(
-                f"Fidelity parameter: {self.pipeline_space.fidelity}"
-                f"must be one of the types: "
-                f"[IntegerParameter, FloatParameter], but is type:"
-                f"{type(self.pipeline_space.fidelity)}"
-            )
-        self._budget_list.append(budget_val)
-        return budget_val
-
-    @property
-    def is_init_phase(self) -> bool:
-        if self.num_train_configs < self._initial_design_size:
-            return True
-        return False
-
-    @property
-    def num_train_configs(self):
-        return len(self.observed_configs.completed_runs)
-
-    @override
-    def load_optimization_state(
-        self,
-        previous_results: dict[str, ConfigResult],
-        pending_evaluations: dict[str, SearchSpace],
-        budget_info: BudgetInfo | None,
-        optimizer_state: dict[str, Any],
-    ) -> None:
-        """This is basically the fit method.
-
-        Args:
-            previous_results (dict[str, ConfigResult]): [description]
-            pending_evaluations (dict[str, ConfigResult]): [description]
-        """
-
-        # previous optimization run exists and needs to be loaded
-        self._load_previous_observations(previous_results)
-        self.total_fevals = len(previous_results) + len(pending_evaluations)
-
-        # account for pending evaluations
-        self._handle_pending_evaluations(pending_evaluations)
-
-        self.observed_configs.df.sort_index(
-            level=self.observed_configs.df.index.names, inplace=True
-        )
-        self.model_policy.observed_configs = self.observed_configs
-        # fit any model/surrogates
-
-        if not self.is_init_phase:
-            self._fit_models()
-
-    def _load_previous_observations(self, previous_results):
-        for config_id, config_val in previous_results.items():
-            _config, _budget_level = config_id.split("_")
-            perf = self.get_loss(config_val.result)
-            index = (int(_config), int(_budget_level))
-            self.observed_configs.add_data([config_val.config, perf], index=index)
-
-            if not np.isclose(
-                self.observed_configs.df.loc[index, self.observed_configs.perf_col],
-                perf,
-            ):
-                self.observed_configs.update_data(
-                    {
-                        self.observed_configs.config_col: config_val.config,
-                        self.observed_configs.perf_col: perf,
-                    },
-                    index=index,
-                )
-
-    def _handle_pending_evaluations(self, pending_evaluations):
-        for config_id, config_val in pending_evaluations.items():
-            _config, _budget_level = config_id.split("_")
-            index = (int(_config), int(_budget_level))
-
-            if index not in self.observed_configs.df.index:
-                self.observed_configs.add_data([config_val.config, np.nan], index=index)
-            else:
-                self.observed_configs.update_data(
-                    {
-                        self.observed_configs.config_col: config_val.config,
-                        self.observed_configs.perf_col: np.nan,
-                    },
-                    index=index,
-                )
-
-    def _fit_models(self):
-        # TODO: Once done with development catch the model update exceptions
-        # and skip model based suggestions if failed (karibbov)
-        self.model_policy.update_model()
-
-    def is_promotable(self, promotion_type: str = "model") -> Union[int, None]:
-        """
-        Check if there are any configurations to promote, if yes then return the integer
-        ID of the promoted configuration, else return None.
-        """
-        if promotion_type == "model":
-            config_id = self.model_policy.sample(is_promotion=True, **self.sampling_args)
-        elif promotion_type == "policy":
-            config_id = self.promotion_policy.retrieve_promotions()
-        elif promotion_type is None:
-            config_id = None
-        else:
-            raise ValueError(
-                f"'{promotion_type}' based promotion is not possible, please"
-                f"use either 'model', 'policy' or None as promotion_type"
-            )
-
-        return config_id
-
-    def sample_new_config(
-        self,
-        sample_type: str = "model",
-        **kwargs,
-    ) -> SearchSpace:
-        """
-        Sample completely new configuration that
-        hasn't been observed in any fidelity before.
-        Your model_policy and/or sampling_policy must satisfy this constraint
-        """
-        if sample_type == "model":
-            config = self.model_policy.sample(**self.sampling_args)
-        elif sample_type == "policy":
-            config = self.sampling_policy.sample(**self.sampling_args)
-        elif sample_type is None:
-            config = self.pipeline_space.sample(
-                patience=self.patience,
-                user_priors=self.use_priors,
-                ignore_fidelity=True,
-            )
-        else:
-            raise ValueError(
-                f"'{sample_type}' based sampling is not possible, please"
-                f"use either 'model', 'policy' or None as sampling_type"
-            )
-
-        return config
-
-    def get_config_and_ids(self) -> tuple[RawConfig, str, Union[str, None]]:
-        """...and this is the method that decides which point to query.
-
-        Returns:
-            [type]: [description]
-        """
-        _config_id = None
-        fidelity_value_set = False
-        if (
-            self.num_train_configs == 0
-            and self.sample_default_first
-            and self.pipeline_space.has_prior
-        ):
-            config = self.pipeline_space.sample_default_configuration(
-                patience=self.patience, ignore_fidelity=False
-            )
-        elif (
-            (self.num_train_configs == 0 and self._initial_design_size >= 1)
-            or self.is_init_phase
-            or self._model_update_failed
-        ):
-            config = self.pipeline_space.sample(
-                patience=self.patience, user_priors=True, ignore_fidelity=False
-            )
-        else:
-            for _ in range(self.patience):
-                promoted_config_id = self.is_promotable(
-                    promotion_type=self.promotion_type
-                )
-                if (
-                    promoted_config_id is not None
-                    and promoted_config_id in self.observed_configs.df.index.levels[0]
-                ):
-                    current_budget = self.observed_configs.df.loc[
-                        (promoted_config_id,)
-                    ].index[-1]
-                    next_budget = current_budget + 1
-                    config = self.observed_configs.df.loc[
-                        (promoted_config_id, current_budget),
-                        self.observed_configs.config_col,
-                    ]
-                    if np.less_equal(
-                        self.get_budget_value(next_budget), config.fidelity.upper
-                    ):
-                        config.fidelity.set_value(self.get_budget_value(next_budget))
-                        _config_id = promoted_config_id
-                        fidelity_value_set = True
-                        break
-                elif promoted_config_id is not None:
-                    self.logger.warn(
-                        f"Configuration ID: '{promoted_config_id}' is "
-                        f"not promotable because it doesn't exist in "
-                        f"the observed configuration IDs: "
-                        f"{self.observed_configs.df.index.levels[0]}.\n\n"
-                        f"Trying to sample again..."
-                    )
-                else:
-                    # sample_new_config must return a completely new configuration that
-                    # hasn't been observed in any fidelity before
-                    config = self.sample_new_config(sample_type=self.sample_type)
-                    break
-
-                # if the returned config already observed,
-                # set the fidelity to the next budget level if not max already
-                # else set the fidelity to the minimum budget level
-            else:
-                config = self.pipeline_space.sample(
-                    patience=self.patience, user_priors=True, ignore_fidelity=False
-                )
-
-        if not fidelity_value_set:
-            config.fidelity.set_value(self.get_budget_value(0))
-
-        if _config_id is None:
-            _config_id = (
-                self.observed_configs.df.index.get_level_values(0).max() + 1
-                if len(self.observed_configs.df.index.get_level_values(0)) > 0
-                else 0
-            )
-        config_id = f"{_config_id}_{self.get_budget_level(config)}"
-        return config.hp_values(), config_id, None
diff --git a/neps/optimizers/multi_fidelity/dyhpo.py b/neps/optimizers/multi_fidelity/dyhpo.py
deleted file mode 100755
index 598046378..000000000
--- a/neps/optimizers/multi_fidelity/dyhpo.py
+++ /dev/null
@@ -1,473 +0,0 @@
-from __future__ import annotations
-
-from typing import Any
-from typing_extensions import override
-
-import numpy as np
-
-from neps.state.optimizer import BudgetInfo
-from neps.utils.types import ConfigResult, RawConfig
-from neps.utils.common import instance_from_map
-from neps.search_spaces.search_space import FloatParameter, IntegerParameter, SearchSpace
-from neps.optimizers.base_optimizer import BaseOptimizer
-from neps.optimizers.bayesian_optimization.acquisition_functions import AcquisitionMapping
-from neps.optimizers.bayesian_optimization.acquisition_functions.base_acquisition import (
-    BaseAcquisition,
-)
-from neps.optimizers.bayesian_optimization.acquisition_samplers import (
-    AcquisitionSamplerMapping,
-)
-from neps.optimizers.bayesian_optimization.acquisition_samplers.base_acq_sampler import (
-    AcquisitionSampler,
-)
-from neps.optimizers.bayesian_optimization.kernels.get_kernels import get_kernels
-from neps.optimizers.multi_fidelity.mf_bo import FreezeThawModel, PFNSurrogate
-from neps.optimizers.multi_fidelity.utils import MFObservedData
-
-
-class MFEIBO(BaseOptimizer):
-    """Base class for MF-BO algorithms that use DyHPO-like acquisition and budgeting."""
-
-    acquisition: str = "MFEI"
-
-    def __init__(
-        self,
-        pipeline_space: SearchSpace,
-        budget: int | None = None,
-        step_size: int | float = 1,
-        optimal_assignment: bool = False,
-        use_priors: bool = False,
-        sample_default_first: bool = False,
-        sample_default_at_target: bool = False,
-        loss_value_on_error: None | float = None,
-        cost_value_on_error: None | float = None,
-        patience: int = 100,
-        ignore_errors: bool = False,
-        logger=None,
-        # arguments for model
-        surrogate_model: str | Any = "deep_gp",
-        surrogate_model_args: dict | None = None,
-        domain_se_kernel: str | None = None,
-        graph_kernels: list | None = None,
-        hp_kernels: list | None = None,
-        acquisition: str | BaseAcquisition = acquisition,
-        acquisition_args: dict | None = None,
-        acquisition_sampler: str | AcquisitionSampler = "freeze-thaw",
-        acquisition_sampler_args: dict | None = None,
-        model_policy: Any = FreezeThawModel,
-        initial_design_fraction: float = 0.75,
-        initial_design_size: int = 10,
-        initial_design_budget: int | None = None,
-    ):
-        """Initialise
-
-        Args:
-            pipeline_space: Space in which to search
-            budget: Maximum budget
-            use_priors: Allows random samples to be generated from a default
-                Samples generated from a Gaussian centered around the default value
-            sampling_policy: The type of sampling procedure to use
-            promotion_policy: The type of promotion procedure to use
-            loss_value_on_error: Setting this and cost_value_on_error to any float will
-                supress any error during bayesian optimization and will use given loss
-                value instead. default: None
-            cost_value_on_error: Setting this and loss_value_on_error to any float will
-                supress any error during bayesian optimization and will use given cost
-                value instead. default: None
-            logger: logger object, or None to use the neps logger
-            sample_default_first: Whether to sample the default configuration first
-        """
-        super().__init__(
-            pipeline_space=pipeline_space,
-            budget=budget,
-            patience=patience,
-            loss_value_on_error=loss_value_on_error,
-            cost_value_on_error=cost_value_on_error,
-            ignore_errors=ignore_errors,
-            logger=logger,
-        )
-        self.raw_tabular_space = None  # placeholder, can be populated using pre_load_hook
-        self._budget_list: list[int | float] = []
-        self.step_size: int | float = step_size
-        self.min_budget = self.pipeline_space.fidelity.lower
-        # TODO: generalize this to work with real data (not benchmarks)
-        self.max_budget = self.pipeline_space.fidelity.upper
-
-        self._initial_design_fraction = initial_design_fraction
-        (
-            self._initial_design_size,
-            self._initial_design_budget,
-        ) = self._set_initial_design(
-            initial_design_size, initial_design_budget, self._initial_design_fraction
-        )
-        # TODO: Write use cases for these parameters
-        self._model_update_failed = False
-        self.sample_default_first = sample_default_first
-        self.sample_default_at_target = sample_default_at_target
-
-        self.surrogate_model_name = surrogate_model
-
-        self.use_priors = use_priors
-        self.total_fevals: int = 0
-
-        self.observed_configs = MFObservedData(
-            columns=["config", "perf", "learning_curves"],
-            index_names=["config_id", "budget_id"],
-        )
-
-        # Preparing model
-        self.graph_kernels, self.hp_kernels = get_kernels(
-            pipeline_space=pipeline_space,
-            domain_se_kernel=domain_se_kernel,
-            graph_kernels=graph_kernels,
-            hp_kernels=hp_kernels,
-            optimal_assignment=optimal_assignment,
-        )
-        self.surrogate_model_args = (
-            {} if surrogate_model_args is None else surrogate_model_args
-        )
-        self._prep_model_args(self.hp_kernels, self.graph_kernels, pipeline_space)
-
-        # TODO: Better solution than branching based on the surrogate name is needed
-        if surrogate_model in ["deep_gp", "gp"]:
-            model_policy = FreezeThawModel
-        elif surrogate_model == "pfn":
-            model_policy = PFNSurrogate
-        else:
-            raise ValueError("Invalid model option selected!")
-
-        # The surrogate model is initalized here
-        self.model_policy = model_policy(
-            pipeline_space=pipeline_space,
-            surrogate_model=surrogate_model,
-            surrogate_model_args=self.surrogate_model_args,
-        )
-        self.acquisition_args = {} if acquisition_args is None else acquisition_args
-        self.acquisition_args.update(
-            {
-                "pipeline_space": self.pipeline_space,
-                "surrogate_model_name": self.surrogate_model_name,
-            }
-        )
-        self.acquisition = instance_from_map(
-            AcquisitionMapping,
-            acquisition,
-            name="acquisition function",
-            kwargs=self.acquisition_args,
-        )
-        self.acquisition_sampler_args = (
-            {} if acquisition_sampler_args is None else acquisition_sampler_args
-        )
-        self.acquisition_sampler_args.update(
-            {"patience": self.patience, "pipeline_space": self.pipeline_space}
-        )
-        self.acquisition_sampler = instance_from_map(
-            AcquisitionSamplerMapping,
-            acquisition_sampler,
-            name="acquisition sampler function",
-            kwargs=self.acquisition_sampler_args,
-        )
-        self.count = 0
-
-    def _prep_model_args(self, hp_kernels, graph_kernels, pipeline_space):
-        if self.surrogate_model_name in ["gp", "gp_hierarchy"]:
-            # setup for GP implemented in NePS
-            self.surrogate_model_args.update(
-                dict(
-                    # domain_se_kernel=domain_se_kernel,
-                    hp_kernels=hp_kernels,
-                    graph_kernels=graph_kernels,
-                )
-            )
-            if not self.surrogate_model_args["hp_kernels"]:
-                raise ValueError("No kernels are provided!")
-            # if "vectorial_features" not in self.surrogate_model_args:
-            self.surrogate_model_args["vectorial_features"] = (
-                pipeline_space.raw_tabular_space.get_vectorial_dim()
-                if pipeline_space.has_tabular
-                else pipeline_space.get_vectorial_dim()
-            )
-
-    def _set_initial_design(
-        self,
-        initial_design_size: int = None,
-        initial_design_budget: int = None,
-        initial_design_fraction: float = 0.75,
-    ) -> tuple[int | float, int | float]:
-        """Sets the initial design size and budget."""
-
-        # user specified initial_design_size takes precedence
-        if initial_design_budget is not None:
-            _initial_design_budget = initial_design_budget
-        else:
-            _initial_design_budget = self.max_budget
-
-        # user specified initial_design_size takes precedence
-        _initial_design_size = np.inf
-        if initial_design_size is not None:
-            _initial_design_size = initial_design_size
-        if (
-            initial_design_size is None
-            or _initial_design_size * self.min_budget > _initial_design_budget
-        ):
-            # if the initial design budget is less than the budget spent on sampling
-            # the initial design at the minimum budget (fidelity)
-            # 2 choices here:
-            #    1. Reduce initial_design_size
-            #    2. Increase initial_design_budget
-            # we choose to reduce initial_design_size
-            _init_budget = initial_design_fraction * self.max_budget
-            # number of min budget evaluations fitting within initial design budget
-            _initial_design_size = _init_budget // self.min_budget
-
-        self.logger.info(
-            f"\n\ninitial_design_size: {_initial_design_size}\n"
-            f"initial_design_budget: {_initial_design_budget}\n"
-            f"min_budget: {self.min_budget}\n\n"
-        )
-        return _initial_design_size, _initial_design_budget
-
-    def get_budget_level(self, config: SearchSpace) -> int:
-        return int(
-            np.ceil((config.fidelity.value - config.fidelity.lower) / self.step_size)
-        )
-
-    def get_budget_value(self, budget_level: int | float) -> int | float:
-        if isinstance(self.pipeline_space.fidelity, IntegerParameter):
-            budget_val = int(
-                self.step_size * budget_level + self.pipeline_space.fidelity.lower
-            )
-        elif isinstance(self.pipeline_space.fidelity, FloatParameter):
-            budget_val = (
-                self.step_size * budget_level + self.pipeline_space.fidelity.lower
-            )
-        else:
-            raise NotImplementedError(
-                f"Fidelity parameter: {self.pipeline_space.fidelity}"
-                f"must be one of the types: "
-                f"[IntegerParameter, FloatParameter], but is type:"
-                f"{type(self.pipeline_space.fidelity)}"
-            )
-        self._budget_list.append(budget_val)
-        return budget_val
-
-    def total_budget_spent(self) -> int | float:
-        """Calculates the toal budget spent so far.
-
-        This is calculated as a function of the fidelity range provided, that takes into
-        account the minimum budget and the step size.
-        """
-        if len(self.observed_configs.df) == 0:
-            return 0
-
-        n_configs = len(self.observed_configs.seen_config_ids)
-        total_budget_level = sum(self.observed_configs.seen_budget_levels)
-        total_initial_budget_spent = n_configs * self.pipeline_space.fidelity.lower
-        total_budget_spent = (
-            total_initial_budget_spent + total_budget_level * self.step_size
-        )
-
-        return total_budget_spent
-
-    def is_init_phase(self, budget_based: bool = True) -> bool:
-        if budget_based:
-            # Check if we are still in the initial design phase based on
-            # either the budget spent so far or the number of configurations evaluated
-            if self.total_budget_spent() < self._initial_design_budget:
-                return True
-        else:
-            if self.num_train_configs < self._initial_design_size:
-                return True
-        return False
-
-    @property
-    def num_train_configs(self):
-        return len(self.observed_configs.completed_runs)
-
-    @override
-    def load_optimization_state(
-        self,
-        previous_results: dict[str, ConfigResult],
-        pending_evaluations: dict[str, SearchSpace],
-        budget_info: BudgetInfo | None,
-        optimizer_state: dict[str, Any],
-    ) -> None:
-        """This is basically the fit method.
-
-        Args:
-            previous_results (dict[str, ConfigResult]): [description]
-            pending_evaluations (dict[str, ConfigResult]): [description]
-        """
-        self.observed_configs = MFObservedData(
-            columns=["config", "perf", "learning_curves"],
-            index_names=["config_id", "budget_id"],
-        )
-
-        # previous optimization run exists and needs to be loaded
-        self._load_previous_observations(previous_results)
-        self.total_fevals = len(previous_results) + len(pending_evaluations)
-
-        # account for pending evaluations
-        self._handle_pending_evaluations(pending_evaluations)
-
-        # an aesthetic choice more than a functional choice
-        self.observed_configs.df.sort_index(
-            level=self.observed_configs.df.index.names, inplace=True
-        )
-
-        # TODO: can we do better than keeping a copy of the observed configs?
-        # TODO: can we not hide this in load_results and have something that pops out
-        #   more, like a set_state or policy_args
-        self.model_policy.observed_configs = self.observed_configs
-        # fit any model/surrogates
-        init_phase = self.is_init_phase()
-        if not init_phase:
-            self._fit_models()
-
-    @classmethod
-    def _get_config_id_split(cls, config_id: str) -> tuple[str, str]:
-        # assumes config IDs of the format `[unique config int ID]_[int rung ID]`
-        ids = config_id.split("_")
-        _config, _budget = ids[0], ids[1]
-        return _config, _budget
-
-    def _load_previous_observations(self, previous_results):
-        def index_data_split(config_id: str, config_val):
-            _config_id, _budget_id = MFEIBO._get_config_id_split(config_id)
-            index = int(_config_id), int(_budget_id)
-            _data = [
-                config_val.config,
-                self.get_loss(config_val.result),
-                self.get_learning_curve(config_val.result),
-            ]
-            return index, _data
-
-        if len(previous_results) > 0:
-            index_row = [
-                tuple(index_data_split(config_id, config_val))
-                for config_id, config_val in previous_results.items()
-            ]
-            indices, rows = zip(*index_row)
-            self.observed_configs.add_data(data=list(rows), index=list(indices))
-
-    def _handle_pending_evaluations(self, pending_evaluations):
-        for config_id, config_val in pending_evaluations.items():
-            _config, _budget_level = config_id.split("_")
-            index = (int(_config), int(_budget_level))
-
-            if index not in self.observed_configs.df.index:
-                # TODO: Validate this
-                self.observed_configs.add_data(
-                    [config_val, np.nan, [np.nan]], index=index
-                )
-            else:
-                self.observed_configs.update_data(
-                    {
-                        self.observed_configs.config_col: config_val,
-                        self.observed_configs.perf_col: np.nan,
-                        self.observed_configs.lc_col_name: [np.nan],
-                    },
-                    index=index,
-                )
-
-    def _fit_models(self):
-        # TODO: Once done with development catch the model update exceptions
-        # and skip model based suggestions if failed (karibbov)
-        self._prep_model_args(self.hp_kernels, self.graph_kernels, self.pipeline_space)
-        self.model_policy.set_state(self.pipeline_space, self.surrogate_model_args)
-        self.model_policy.update_model()
-        self.acquisition.set_state(
-            self.pipeline_space,
-            self.model_policy.surrogate_model,
-            self.observed_configs,
-            self.step_size,
-        )
-        self.acquisition_sampler.set_state(
-            self.pipeline_space, self.observed_configs, self.step_size
-        )
-
-    def _randomly_promote(self) -> tuple[SearchSpace, int]:
-        """Samples the initial design.
-
-        With an unbiased coin toss (p=0.5) it decides whether to sample a new
-        configuration or continue a partial configuration, until initial_design_size
-        configurations have been sampled.
-        """
-        # sampling a configuration ID from the observed ones
-        _config_ids = np.unique(
-            self.observed_configs.df.index.get_level_values("config_id").values
-        )
-        _config_id = np.random.choice(_config_ids)
-        # extracting the config
-        config = self.observed_configs.df.loc[
-            _config_id, self.observed_configs.config_col
-        ].iloc[0]
-        # extracting the budget level
-        budget = self.observed_configs.df.loc[_config_id].index.values[-1]
-        # calculating fidelity value
-        new_fidelity = self.get_budget_value(budget + 1)
-        # settingt the config fidelity
-        config.fidelity.set_value(new_fidelity)
-        return config, _config_id
-
-    def get_config_and_ids(self) -> tuple[RawConfig, str, str | None]:
-        """...and this is the method that decides which point to query.
-
-        Returns:
-            [type]: [description]
-        """
-        config_id = None
-        previous_config_id = None
-        if self.is_init_phase(budget_based=False):
-            # sample a new config till initial design size is satisfied
-            self.logger.info("sampling...")
-            config = self.pipeline_space.sample(
-                patience=self.patience, user_priors=True, ignore_fidelity=False
-            )
-            assert config.fidelity is not None
-            config.fidelity.set_value(self.min_budget)
-
-            _config_id = self.observed_configs.next_config_id()
-        elif self.is_init_phase(budget_based=True) or self._model_update_failed:
-            # promote a config randomly if initial design size is satisfied but the
-            # initial design budget has not been exhausted
-            self.logger.info("promoting...")
-            config, _config_id = self._randomly_promote()
-        else:
-            if self.count == 0:
-                self.logger.info("\nPartial learning curves as initial design:\n")
-                self.logger.info(f"{self.observed_configs.get_learning_curves()}\n")
-            self.count += 1
-            # main acquisition call here after initial design is turned off
-            self.logger.info("acquiring...")
-            # generates candidate samples for acquisition calculation
-            assert self.pipeline_space.fidelity is not None
-            samples = self.acquisition_sampler.sample(
-                set_new_sample_fidelity=self.pipeline_space.fidelity.lower
-            )  # fidelity values here should be the observations or min. fidelity
-            # calculating acquisition function values for the candidate samples
-            acq, _samples = self.acquisition.eval(  # type: ignore[attr-defined]
-                x=samples, asscalar=True
-            )
-            # maximizing acquisition function
-            _idx = np.argsort(acq)[-1]
-            # extracting the config ID for the selected maximizer
-            _config_id = samples.index[_samples.index.values[_idx]]
-            # `_samples` should have new configs with fidelities set to as required
-            # NOTE: len(samples) need not be equal to len(_samples) as `samples` contain
-            # all (partials + new) configurations obtained from the sampler, but
-            # in `_samples`, configs are removed that have reached maximum epochs allowed
-            # NOTE: `samples` and `_samples` should share the same index values, hence,
-            # avoid using `.iloc` and work with `.loc` on pandas DataFrame/Series
-
-            # Is this "config = _samples.loc[_config_id]"?
-            config = samples.loc[_config_id]
-            config.fidelity.set_value(_samples.loc[_config_id].fidelity.value)
-        # generating correct IDs
-        if _config_id in self.observed_configs.seen_config_ids:
-            config_id = f"{_config_id}_{self.get_budget_level(config)}"
-            previous_config_id = f"{_config_id}_{self.get_budget_level(config) - 1}"
-        else:
-            config_id = f"{self.observed_configs.next_config_id()}_{self.get_budget_level(config)}"
-
-        return config.hp_values(), config_id, previous_config_id
diff --git a/neps/optimizers/multi_fidelity/hyperband.py b/neps/optimizers/multi_fidelity/hyperband.py
index dde96c562..ce78dd200 100644
--- a/neps/optimizers/multi_fidelity/hyperband.py
+++ b/neps/optimizers/multi_fidelity/hyperband.py
@@ -1,21 +1,15 @@
 from __future__ import annotations
 
-import typing
+from abc import abstractmethod
+from collections.abc import Mapping
 from copy import deepcopy
-from typing import Any
+from typing import TYPE_CHECKING, Any, Literal
+from typing_extensions import override
 
 import numpy as np
-from typing_extensions import Literal, override
+import pandas as pd
 
-from neps.state.optimizer import BudgetInfo
-from neps.utils.types import ConfigResult, RawConfig
-from neps.search_spaces.search_space import SearchSpace
-from neps.optimizers.bayesian_optimization.acquisition_functions.base_acquisition import (
-    BaseAcquisition,
-)
-from neps.optimizers.bayesian_optimization.acquisition_samplers.base_acq_sampler import (
-    AcquisitionSampler,
-)
+from neps.optimizers.base_optimizer import SampledConfig
 from neps.optimizers.multi_fidelity.mf_bo import MFBOBase
 from neps.optimizers.multi_fidelity.promotion_policy import (
     AsyncPromotionPolicy,
@@ -32,6 +26,16 @@
     SuccessiveHalving,
     SuccessiveHalvingBase,
 )
+from neps.sampling.priors import Prior
+
+if TYPE_CHECKING:
+    from neps.optimizers.bayesian_optimization.acquisition_functions import (
+        BaseAcquisition,
+    )
+    from neps.search_spaces.search_space import SearchSpace
+    from neps.state.optimizer import BudgetInfo
+    from neps.state.trial import Trial
+    from neps.utils.types import ConfigResult, RawConfig
 
 
 class HyperbandBase(SuccessiveHalvingBase):
@@ -41,57 +45,51 @@ class HyperbandBase(SuccessiveHalvingBase):
 
     def __init__(
         self,
+        *,
         pipeline_space: SearchSpace,
-        budget: int,
+        max_cost_total: int,
         eta: int = 3,
         initial_design_type: Literal["max_budget", "unique_configs"] = "max_budget",
         use_priors: bool = False,
-        sampling_policy: typing.Any = RandomUniformPolicy,
-        promotion_policy: typing.Any = SyncPromotionPolicy,
-        loss_value_on_error: None | float = None,
+        sampling_policy: Any = RandomUniformPolicy,
+        promotion_policy: Any = SyncPromotionPolicy,
+        objective_to_minimize_value_on_error: None | float = None,
         cost_value_on_error: None | float = None,
         ignore_errors: bool = False,
-        logger=None,
-        prior_confidence: Literal["low", "medium", "high"] = None,
+        prior_confidence: Literal["low", "medium", "high"] | None = None,
         random_interleave_prob: float = 0.0,
-        sample_default_first: bool = False,
-        sample_default_at_target: bool = False,
+        sample_prior_first: bool = False,
+        sample_prior_at_target: bool = False,
     ):
-        args = dict(
-            pipeline_space=pipeline_space,
-            budget=budget,
-            eta=eta,
-            early_stopping_rate=self.early_stopping_rate,  # HB subsumes this param of SH
-            initial_design_type=initial_design_type,
-            use_priors=use_priors,
-            sampling_policy=sampling_policy,
-            promotion_policy=promotion_policy,
-            loss_value_on_error=loss_value_on_error,
-            cost_value_on_error=cost_value_on_error,
-            ignore_errors=ignore_errors,
-            logger=logger,
-            prior_confidence=prior_confidence,
-            random_interleave_prob=random_interleave_prob,
-            sample_default_first=sample_default_first,
-            sample_default_at_target=sample_default_at_target,
-        )
+        args = {
+            "pipeline_space": pipeline_space,
+            "max_cost_total": max_cost_total,
+            "eta": eta,
+            "early_stopping_rate": self.early_stopping_rate,  # HB subsumes this from SH
+            "initial_design_type": initial_design_type,
+            "use_priors": use_priors,
+            "sampling_policy": sampling_policy,
+            "promotion_policy": promotion_policy,
+            "objective_to_minimize_value_on_error": objective_to_minimize_value_on_error,
+            "cost_value_on_error": cost_value_on_error,
+            "ignore_errors": ignore_errors,
+            "prior_confidence": prior_confidence,
+            "random_interleave_prob": random_interleave_prob,
+            "sample_prior_first": sample_prior_first,
+            "sample_prior_at_target": sample_prior_at_target,
+        }
         super().__init__(**args)
         # stores the flattened sequence of SH brackets to loop over - the HB heuristic
         # for (n,r) pairing, i.e., (num. configs, fidelity)
         self.full_rung_trace = []
-        self.sh_brackets = {}
+        self.sh_brackets: dict[int, SuccessiveHalvingBase] = {}
         for s in range(self.max_rung + 1):
             args.update({"early_stopping_rate": s})
             self.sh_brackets[s] = SuccessiveHalving(**args)
             # `full_rung_trace` contains the index of SH bracket to run sequentially
             self.full_rung_trace.extend([s] * len(self.sh_brackets[s].full_rung_trace))
         # book-keeping variables
-        self.current_sh_bracket = None  # type: ignore
-        self.old_history_len = None
-
-    def _update_state_counter(self) -> None:
-        # TODO: get rid of this dependency
-        self._counter += 1
+        self.current_sh_bracket: int = 0
 
     def _update_sh_bracket_state(self) -> None:
         # `load_results()` for each of the SH bracket objects are not called as they are
@@ -102,17 +100,17 @@ def _update_sh_bracket_state(self) -> None:
         # `clean_active_brackets` takes care of setting rung information and promotion
         # for the current SH bracket in HB
         # TODO: can we avoid copying full observation history
-        bracket = self.sh_brackets[self.current_sh_bracket]  # type: ignore
+        bracket = self.sh_brackets[self.current_sh_bracket]
         bracket.observed_configs = self.observed_configs.copy()
 
-    def clear_old_brackets(self):
+    def clear_old_brackets(self) -> None:
         """Enforces reset at each new bracket."""
         # unlike synchronous SH, the state is not reset at each rung and a configuration
         # is promoted if the rung has eta configs if it is the top performing
         # base class allows for retaining the whole optimization state
         return
 
-    def _handle_promotions(self):
+    def _handle_promotions(self) -> None:
         self.promotion_policy.set_state(
             max_rung=self.max_rung,
             members=self.rung_members,
@@ -122,25 +120,58 @@ def _handle_promotions(self):
         # promotions are handled by the individual SH brackets which are explicitly
         # called in the _update_sh_bracket_state() function
         # overloaded function disables the need for retrieving promotions for HB overall
-        return
 
     @override
-    def load_optimization_state(
+    def ask(
         self,
-        previous_results: dict[str, ConfigResult],
-        pending_evaluations: dict[str, SearchSpace],
+        trials: Mapping[str, Trial],
         budget_info: BudgetInfo | None,
-        optimizer_state: dict[str, Any],
-    ) -> None:
-        super().load_optimization_state(
-            previous_results=previous_results,
-            pending_evaluations=pending_evaluations,
-            budget_info=budget_info,
-            optimizer_state=optimizer_state
-        )
+        n: int | None = None,
+    ) -> SampledConfig:
+        assert n is None, "TODO"
+        completed: dict[str, ConfigResult] = {
+            trial_id: trial.into_config_result(self.pipeline_space.from_dict)
+            for trial_id, trial in trials.items()
+            if trial.report is not None
+        }
+        pending: dict[str, SearchSpace] = {
+            trial_id: self.pipeline_space.from_dict(trial.config)
+            for trial_id, trial in trials.items()
+            if trial.report is None
+        }
+
+        self.rung_histories = {
+            rung: {"config": [], "perf": []}
+            for rung in range(self.min_rung, self.max_rung + 1)
+        }
+
+        self.observed_configs = pd.DataFrame([], columns=("config", "rung", "perf"))
+
+        # previous optimization run exists and needs to be loaded
+        self._load_previous_observations(completed)
+
+        # account for pending evaluations
+        self._handle_pending_evaluations(pending)
+
+        # process optimization state and bucket observations per rung
+        self._get_rungs_state()
+
+        # filter/reset old SH brackets
+        self.clear_old_brackets()
+
+        # identifying promotion list per rung
+        self._handle_promotions()
+
+        # fit any model/surrogates
+        self._fit_models()
+
         # important for the global HB to run the right SH
         self._update_sh_bracket_state()
 
+        config, _id, previous_id = self.get_config_and_ids()
+        return SampledConfig(id=_id, config=config, previous_config_id=previous_id)
+
+    @abstractmethod
     def get_config_and_ids(self) -> tuple[RawConfig, str, str | None]:
         """...and this is the method that decides which point to query.
 
@@ -151,7 +182,7 @@ def get_config_and_ids(self) -> tuple[RawConfig, str, str | None]:
 
 
 class Hyperband(HyperbandBase):
-    def clear_old_brackets(self):
+    def clear_old_brackets(self) -> None:
         """Enforces reset at each new bracket.
 
         The _get_rungs_state() function creates the `rung_promotions` dict mapping which
@@ -170,7 +201,7 @@ def clear_old_brackets(self):
         _min_rung = self.sh_brackets[self.current_sh_bracket].min_rung
         end = self.sh_brackets[self.current_sh_bracket].config_map[_min_rung]
 
-        if self.sample_default_first and self.sample_default_at_target:
+        if self.sample_prior_first and self.sample_prior_at_target:
             start += 1
             end += 1
 
@@ -185,6 +216,7 @@ def clear_old_brackets(self):
             # for the SH bracket in start-end, calculate total SH budget used, from the
             # correct SH bracket object to make the right budget calculations
 
+            assert isinstance(sh_bracket, SuccessiveHalving)
             bracket_budget_used = sh_bracket._calc_budget_used_in_bracket(
                 deepcopy(self.observed_configs.rung.values[start:end])
             )
@@ -229,7 +261,7 @@ def get_config_and_ids(self) -> tuple[RawConfig, str, str | None]:
             [type]: [description]
         """
         config, config_id, previous_config_id = self.sh_brackets[
-            self.current_sh_bracket  # type: ignore
+            self.current_sh_bracket
         ].get_config_and_ids()
         return config, config_id, previous_config_id
 
@@ -241,37 +273,36 @@ class HyperbandWithPriors(Hyperband):
 
     def __init__(
         self,
+        *,
         pipeline_space: SearchSpace,
-        budget: int,
+        max_cost_total: int,
         eta: int = 3,
         initial_design_type: Literal["max_budget", "unique_configs"] = "max_budget",
-        sampling_policy: typing.Any = FixedPriorPolicy,
-        promotion_policy: typing.Any = SyncPromotionPolicy,
-        loss_value_on_error: None | float = None,
+        sampling_policy: Any = FixedPriorPolicy,
+        promotion_policy: Any = SyncPromotionPolicy,
+        objective_to_minimize_value_on_error: None | float = None,
         cost_value_on_error: None | float = None,
         ignore_errors: bool = False,
-        logger=None,
         prior_confidence: Literal["low", "medium", "high"] = "medium",
         random_interleave_prob: float = 0.0,
-        sample_default_first: bool = False,
-        sample_default_at_target: bool = False,
+        sample_prior_first: bool = False,
+        sample_prior_at_target: bool = False,
     ):
         super().__init__(
             pipeline_space=pipeline_space,
-            budget=budget,
+            max_cost_total=max_cost_total,
             eta=eta,
             initial_design_type=initial_design_type,
             use_priors=self.use_priors,  # key change to the base HB class
             sampling_policy=sampling_policy,
             promotion_policy=promotion_policy,
-            loss_value_on_error=loss_value_on_error,
+            objective_to_minimize_value_on_error=objective_to_minimize_value_on_error,
             cost_value_on_error=cost_value_on_error,
             ignore_errors=ignore_errors,
-            logger=logger,
             prior_confidence=prior_confidence,
             random_interleave_prob=random_interleave_prob,
-            sample_default_first=sample_default_first,
-            sample_default_at_target=sample_default_at_target,
+            sample_prior_first=sample_prior_first,
+            sample_prior_at_target=sample_prior_at_target,
         )
 
 
@@ -280,36 +311,35 @@ class HyperbandCustomDefault(HyperbandWithPriors):
 
     def __init__(
         self,
+        *,
         pipeline_space: SearchSpace,
-        budget: int,
+        max_cost_total: int,
         eta: int = 3,
         initial_design_type: Literal["max_budget", "unique_configs"] = "max_budget",
-        sampling_policy: typing.Any = EnsemblePolicy,
-        promotion_policy: typing.Any = SyncPromotionPolicy,
-        loss_value_on_error: None | float = None,
+        sampling_policy: Any = EnsemblePolicy,
+        promotion_policy: Any = SyncPromotionPolicy,
+        objective_to_minimize_value_on_error: None | float = None,
         cost_value_on_error: None | float = None,
         ignore_errors: bool = False,
-        logger=None,
         prior_confidence: Literal["low", "medium", "high"] = "medium",
         random_interleave_prob: float = 0.0,
-        sample_default_first: bool = False,
-        sample_default_at_target: bool = False,
+        sample_prior_first: bool = False,
+        sample_prior_at_target: bool = False,
     ):
         super().__init__(
             pipeline_space=pipeline_space,
-            budget=budget,
+            max_cost_total=max_cost_total,
             eta=eta,
             initial_design_type=initial_design_type,
             sampling_policy=sampling_policy,
             promotion_policy=promotion_policy,
-            loss_value_on_error=loss_value_on_error,
+            objective_to_minimize_value_on_error=objective_to_minimize_value_on_error,
             cost_value_on_error=cost_value_on_error,
             ignore_errors=ignore_errors,
-            logger=logger,
             prior_confidence=prior_confidence,
             random_interleave_prob=random_interleave_prob,
-            sample_default_first=sample_default_first,
-            sample_default_at_target=sample_default_at_target,
+            sample_prior_first=sample_prior_first,
+            sample_prior_at_target=sample_prior_at_target,
         )
         self.sampling_args = {
             "inc": None,
@@ -331,42 +361,41 @@ class AsynchronousHyperband(HyperbandBase):
 
     def __init__(
         self,
+        *,
         pipeline_space: SearchSpace,
-        budget: int,
+        max_cost_total: int,
         eta: int = 3,
         initial_design_type: Literal["max_budget", "unique_configs"] = "max_budget",
         use_priors: bool = False,
-        sampling_policy: typing.Any = RandomUniformPolicy,
-        promotion_policy: typing.Any = AsyncPromotionPolicy,
-        loss_value_on_error: None | float = None,
+        sampling_policy: Any = RandomUniformPolicy,
+        promotion_policy: Any = AsyncPromotionPolicy,
+        objective_to_minimize_value_on_error: None | float = None,
         cost_value_on_error: None | float = None,
         ignore_errors: bool = False,
-        logger=None,
-        prior_confidence: Literal["low", "medium", "high"] = None,
+        prior_confidence: Literal["low", "medium", "high"] | None = None,
         random_interleave_prob: float = 0.0,
-        sample_default_first: bool = False,
-        sample_default_at_target: bool = False,
+        sample_prior_first: bool = False,
+        sample_prior_at_target: bool = False,
     ):
-        args = dict(
-            pipeline_space=pipeline_space,
-            budget=budget,
-            eta=eta,
-            initial_design_type=initial_design_type,
-            use_priors=use_priors,
-            sampling_policy=sampling_policy,
-            promotion_policy=promotion_policy,
-            loss_value_on_error=loss_value_on_error,
-            cost_value_on_error=cost_value_on_error,
-            ignore_errors=ignore_errors,
-            logger=logger,
-            prior_confidence=prior_confidence,
-            random_interleave_prob=random_interleave_prob,
-            sample_default_first=sample_default_first,
-            sample_default_at_target=sample_default_at_target,
-        )
+        args = {
+            "pipeline_space": pipeline_space,
+            "max_cost_total": max_cost_total,
+            "eta": eta,
+            "initial_design_type": initial_design_type,
+            "use_priors": use_priors,
+            "sampling_policy": sampling_policy,
+            "promotion_policy": promotion_policy,
+            "objective_to_minimize_value_on_error": objective_to_minimize_value_on_error,
+            "cost_value_on_error": cost_value_on_error,
+            "ignore_errors": ignore_errors,
+            "prior_confidence": prior_confidence,
+            "random_interleave_prob": random_interleave_prob,
+            "sample_prior_first": sample_prior_first,
+            "sample_prior_at_target": sample_prior_at_target,
+        }
         super().__init__(**args)
         # overwrite parent class SH brackets with Async SH brackets
-        self.sh_brackets = {}
+        self.sh_brackets: dict[int, SuccessiveHalvingBase] = {}
         for s in range(self.max_rung + 1):
             args.update({"early_stopping_rate": s})
             # key difference from vanilla HB where it runs synchronous SH brackets
@@ -388,7 +417,7 @@ def _update_sh_bracket_state(self) -> None:
             bracket.rung_promotions = bracket.promotion_policy.retrieve_promotions()
             bracket.observed_configs = self.observed_configs.copy()
 
-    def _get_bracket_to_run(self):
+    def _get_bracket_to_run(self) -> int:
         """Samples the ASHA bracket to run.
 
         The selected bracket always samples at its minimum rung. Thus, selecting a bracket
@@ -404,8 +433,7 @@ def _get_bracket_to_run(self):
             self.eta ** (K - s) * (K + 1) / (K - s + 1) for s in range(self.max_rung + 1)
         ]
         bracket_probs = np.array(bracket_probs) / sum(bracket_probs)
-        bracket_next = np.random.choice(range(self.max_rung + 1), p=bracket_probs)
-        return bracket_next
+        return int(np.random.choice(range(self.max_rung + 1), p=bracket_probs))
 
     def get_config_and_ids(self) -> tuple[RawConfig, str, str | None]:
         """...and this is the method that decides which point to query.
@@ -418,7 +446,7 @@ def get_config_and_ids(self) -> tuple[RawConfig, str, str | None]:
         config, config_id, previous_config_id = self.sh_brackets[
             bracket_to_run
         ].get_config_and_ids()
-        return config, config_id, previous_config_id  # type: ignore
+        return config, config_id, previous_config_id
 
 
 class AsynchronousHyperbandWithPriors(AsynchronousHyperband):
@@ -428,37 +456,36 @@ class AsynchronousHyperbandWithPriors(AsynchronousHyperband):
 
     def __init__(
         self,
+        *,
         pipeline_space: SearchSpace,
-        budget: int,
+        max_cost_total: int,
         eta: int = 3,
         initial_design_type: Literal["max_budget", "unique_configs"] = "max_budget",
-        sampling_policy: typing.Any = FixedPriorPolicy,
-        promotion_policy: typing.Any = AsyncPromotionPolicy,
-        loss_value_on_error: None | float = None,
+        sampling_policy: Any = FixedPriorPolicy,
+        promotion_policy: Any = AsyncPromotionPolicy,
+        objective_to_minimize_value_on_error: None | float = None,
         cost_value_on_error: None | float = None,
         ignore_errors: bool = False,
-        logger=None,
         prior_confidence: Literal["low", "medium", "high"] = "medium",
         random_interleave_prob: float = 0.0,
-        sample_default_first: bool = False,
-        sample_default_at_target: bool = False,
+        sample_prior_first: bool = False,
+        sample_prior_at_target: bool = False,
     ):
         super().__init__(
             pipeline_space=pipeline_space,
-            budget=budget,
+            max_cost_total=max_cost_total,
             eta=eta,
             initial_design_type=initial_design_type,
             use_priors=self.use_priors,  # key change to the base Async HB class
             sampling_policy=sampling_policy,
             promotion_policy=promotion_policy,
-            loss_value_on_error=loss_value_on_error,
+            objective_to_minimize_value_on_error=objective_to_minimize_value_on_error,
             cost_value_on_error=cost_value_on_error,
             ignore_errors=ignore_errors,
-            logger=logger,
             prior_confidence=prior_confidence,
             random_interleave_prob=random_interleave_prob,
-            sample_default_first=sample_default_first,
-            sample_default_at_target=sample_default_at_target,
+            sample_prior_first=sample_prior_first,
+            sample_prior_at_target=sample_prior_at_target,
         )
 
 
@@ -468,61 +495,51 @@ class MOBSTER(MFBOBase, AsynchronousHyperband):
 
     def __init__(
         self,
+        *,
         pipeline_space: SearchSpace,
-        budget: int,
+        max_cost_total: int,
         eta: int = 3,
         initial_design_type: Literal["max_budget", "unique_configs"] = "max_budget",
         use_priors: bool = False,
-        sampling_policy: typing.Any = RandomUniformPolicy,
-        promotion_policy: typing.Any = AsyncPromotionPolicy,
-        loss_value_on_error: None | float = None,
+        sampling_policy: Any = RandomUniformPolicy,
+        promotion_policy: Any = AsyncPromotionPolicy,
+        objective_to_minimize_value_on_error: None | float = None,
         cost_value_on_error: None | float = None,
         ignore_errors: bool = False,
-        logger=None,
-        prior_confidence: Literal["low", "medium", "high"] = None,
+        prior_confidence: Literal["low", "medium", "high"] | None = None,
         random_interleave_prob: float = 0.0,
-        sample_default_first: bool = False,
-        sample_default_at_target: bool = False,
+        sample_prior_first: bool = False,
+        sample_prior_at_target: bool = False,
         # new arguments for model
-        model_policy: typing.Any = ModelPolicy,
-        surrogate_model: str | Any = "gp",
-        domain_se_kernel: str = None,
-        hp_kernels: list = None,
-        surrogate_model_args: dict = None,
-        acquisition: str | BaseAcquisition = "EI",
-        log_prior_weighted: bool = False,
-        acquisition_sampler: str | AcquisitionSampler = "random",
+        model_policy: Any = ModelPolicy,
+        surrogate_model: str | Any = "gp",  # TODO: Remove
+        domain_se_kernel: str | None = None,  # TODO: Remove
+        hp_kernels: list | None = None,  # TODO: Remove
+        surrogate_model_args: dict | None = None,  # TODO: Remove
+        acquisition: str | BaseAcquisition = "EI",  # TODO: Remove
+        log_prior_weighted: bool = False,  # TODO: Remove
+        acquisition_sampler: str = "random",  # TODO: Remove
     ):
-        hb_args = dict(
-            pipeline_space=pipeline_space,
-            budget=budget,
-            eta=eta,
-            initial_design_type=initial_design_type,
-            use_priors=use_priors,
-            sampling_policy=sampling_policy,
-            promotion_policy=promotion_policy,
-            loss_value_on_error=loss_value_on_error,
-            cost_value_on_error=cost_value_on_error,
-            ignore_errors=ignore_errors,
-            logger=logger,
-            prior_confidence=prior_confidence,
-            random_interleave_prob=random_interleave_prob,
-            sample_default_first=sample_default_first,
-            sample_default_at_target=sample_default_at_target,
-        )
+        hb_args = {
+            "pipeline_space": pipeline_space,
+            "max_cost_total": max_cost_total,
+            "eta": eta,
+            "initial_design_type": initial_design_type,
+            "use_priors": use_priors,
+            "sampling_policy": sampling_policy,
+            "promotion_policy": promotion_policy,
+            "objective_to_minimize_value_on_error": objective_to_minimize_value_on_error,
+            "cost_value_on_error": cost_value_on_error,
+            "ignore_errors": ignore_errors,
+            "prior_confidence": prior_confidence,
+            "random_interleave_prob": random_interleave_prob,
+            "sample_prior_first": sample_prior_first,
+            "sample_prior_at_target": sample_prior_at_target,
+        }
         super().__init__(**hb_args)
 
         self.pipeline_space.has_prior = self.use_priors
 
-        bo_args = dict(
-            surrogate_model=surrogate_model,
-            domain_se_kernel=domain_se_kernel,
-            hp_kernels=hp_kernels,
-            surrogate_model_args=surrogate_model_args,
-            acquisition=acquisition,
-            log_prior_weighted=log_prior_weighted,
-            acquisition_sampler=acquisition_sampler,
-        )
         # counting non-fidelity dimensions in search space
         ndims = sum(
             1
@@ -531,11 +548,17 @@ def __init__(
         )
         n_min = ndims + 1
         self.init_size = n_min + 1  # in BOHB: init_design >= N_min + 2
-        self.model_policy = model_policy(pipeline_space, **bo_args)
+
+        if self.use_priors:
+            prior = Prior.from_space(self.pipeline_space, include_fidelity=False)
+        else:
+            prior = None
+
+        self.model_policy = model_policy(pipeline_space=pipeline_space, prior=prior)
 
         for _, sh in self.sh_brackets.items():
-            sh.model_policy = self.model_policy
-            sh.sample_new_config = self.sample_new_config
+            sh.model_policy = self.model_policy  # type: ignore
+            sh.sample_new_config = self.sample_new_config  # type: ignore
 
 
 # TODO: TrulyAsyncHyperband
diff --git a/neps/optimizers/multi_fidelity/ifbo.py b/neps/optimizers/multi_fidelity/ifbo.py
new file mode 100755
index 000000000..ae7dfc411
--- /dev/null
+++ b/neps/optimizers/multi_fidelity/ifbo.py
@@ -0,0 +1,293 @@
+from __future__ import annotations
+
+from collections.abc import Mapping
+from typing import TYPE_CHECKING, Any, Literal
+from typing_extensions import override
+
+import numpy as np
+import torch
+
+from neps.optimizers.base_optimizer import BaseOptimizer, SampledConfig
+from neps.optimizers.bayesian_optimization.models.ftpfn import (
+    FTPFNSurrogate,
+    acquire_next_from_ftpfn,
+    decode_ftpfn_data,
+    encode_ftpfn,
+)
+from neps.optimizers.initial_design import make_initial_design
+from neps.sampling.priors import Prior
+from neps.sampling.samplers import Sampler
+from neps.search_spaces.domain import Domain
+from neps.search_spaces.encoding import CategoricalToUnitNorm, ConfigEncoder
+from neps.search_spaces.search_space import Float, Integer, SearchSpace
+
+if TYPE_CHECKING:
+    from neps.state.optimizer import BudgetInfo
+    from neps.state.trial import Trial
+
+# NOTE: Ifbo was trained using 32 bit
+FTPFN_DTYPE = torch.float32
+
+
+def _adjust_pipeline_space_to_match_stepsize(
+    pipeline_space: SearchSpace,
+    step_size: int | float,
+) -> tuple[SearchSpace, int]:
+    """Adjust the pipeline space to be evenly divisible by the step size.
+
+    This is done by incrementing the lower bound of the fidelity domain to the
+    that enables this.
+
+    Args:
+        pipeline_space: The pipeline space to adjust
+        step_size: The size of the step to take in the fidelity domain.
+
+    Returns:
+        The adjusted pipeline space and the number of bins it can be divided into
+    """
+    fidelity = pipeline_space.fidelity
+    fidelity_name = pipeline_space.fidelity_name
+    assert fidelity_name is not None
+    assert isinstance(fidelity, Float | Integer)
+    if fidelity.log:
+        raise NotImplementedError("Log fidelity not yet supported")
+
+    # Can't use mod since it's quite innacurate for floats
+    # Use the fact that we can always write x = n*k + r
+    # where k = stepsize and x = (fid_upper - fid_lower)
+    # > x = n*k + r
+    # > n = x // k
+    # > r = x - n*k
+    x = fidelity.upper - fidelity.lower
+    n = int(x // step_size)
+
+    if n <= 0:
+        raise ValueError(
+            f"Step size ({step_size}) is too large for the fidelity domain {fidelity}."
+            "Considering lowering this parameter to ifBO."
+        )
+
+    r = x - n * step_size
+    new_lower = fidelity.lower + r
+    new_fid = fidelity.__class__(
+        lower=new_lower,
+        upper=fidelity.upper,
+        log=fidelity.log,
+        prior=fidelity.prior,
+        is_fidelity=True,
+        prior_confidence=fidelity.prior_confidence_choice,
+    )
+    return (
+        SearchSpace(**{**pipeline_space.hyperparameters, fidelity_name: new_fid}),
+        n,
+    )
+
+
+class IFBO(BaseOptimizer):
+    """Base class for MF-BO algorithms that use DyHPO-like acquisition and budgeting."""
+
+    def __init__(
+        self,
+        *,
+        pipeline_space: SearchSpace,
+        step_size: int | float = 1,
+        use_priors: bool = False,
+        sample_prior_first: bool = False,
+        sample_prior_at_target: bool = False,
+        surrogate_model_args: dict | None = None,
+        initial_design_size: int | Literal["ndim"] = "ndim",
+        n_acquisition_new_configs: int = 1_000,
+        device: torch.device | None = None,
+        max_cost_total: int | float | None = None,  # TODO: Remove
+        objective_to_minimize_value_on_error: float | None = None,  # TODO: Remove
+        cost_value_on_error: float | None = None,  # TODO: Remove
+        ignore_errors: bool = False,  # TODO: Remove
+    ):
+        """Initialise.
+
+        Args:
+            pipeline_space: Space in which to search
+            step_size: The size of the step to take in the fidelity domain.
+            sampling_policy: The type of sampling procedure to use
+            promotion_policy: The type of promotion procedure to use
+            sample_prior_first: Whether to sample the default configuration first
+            initial_design_size: Number of configs to sample before starting optimization
+
+                If None, the number of configs will be equal to the number of dimensions.
+
+            device: Device to use for the model
+        """
+        # TODO: I'm not sure how this might effect tables, whose lowest fidelity
+        # might be below to possibly increased lower bound.
+        space, fid_bins = _adjust_pipeline_space_to_match_stepsize(
+            pipeline_space, step_size
+        )
+        assert space.fidelity is not None
+        assert isinstance(space.fidelity_name, str)
+
+        super().__init__(pipeline_space=space)
+        self.step_size = step_size
+        self.use_priors = use_priors
+        self.sample_prior_first = sample_prior_first
+        self.sample_prior_at_target = sample_prior_at_target
+        self.device = device
+        self.n_initial_design: int | Literal["ndim"] = initial_design_size
+        self.n_acquisition_new_configs = n_acquisition_new_configs
+        self.surrogate_model_args = surrogate_model_args or {}
+
+        self._min_budget: int | float = space.fidelity.lower
+        self._max_budget: int | float = space.fidelity.upper
+        self._fidelity_name: str = space.fidelity_name
+        self._initial_design: list[dict[str, Any]] | None = None
+
+        self._prior: Prior | None
+        if use_priors:
+            self._prior = Prior.from_space(space, include_fidelity=False)
+        else:
+            self._prior = None
+
+        self._config_encoder: ConfigEncoder = ConfigEncoder.from_space(
+            space=space,
+            include_constants_when_decoding=True,
+            # FTPFN doesn't support categoricals and we were recomended
+            # to just evenly distribute in the unit norm
+            custom_transformers={
+                cat_name: CategoricalToUnitNorm(choices=cat.choices)
+                for cat_name, cat in space.categoricals.items()
+            },
+        )
+
+        # Domain of fidelity values, i.e. what is given in the configs that we
+        # give to the user to evaluate at.
+        self._fid_domain = space.fidelity.domain
+
+        # Domain in which we should pass budgets to ifbo model
+        self._budget_domain = Domain.floating(1 / self._max_budget, 1)
+
+        # Domain from which we assign an index to each budget
+        self._budget_ix_domain = Domain.indices(fid_bins)
+
+    @override
+    def ask(
+        self,
+        trials: Mapping[str, Trial],
+        budget_info: BudgetInfo | None = None,
+        n: int | None = None,
+    ) -> SampledConfig:
+        assert n is None, "TODO"
+        ids = [int(config_id.split("_", maxsplit=1)[0]) for config_id in trials]
+        new_id = max(ids) + 1 if len(ids) > 0 else 0
+
+        # If we havn't passed the intial design phase
+        if self._initial_design is None:
+            self._initial_design = make_initial_design(
+                space=self.pipeline_space,
+                encoder=self._config_encoder,
+                sample_prior_first=self.sample_prior_first,
+                sampler="sobol" if self._prior is None else self._prior,
+                seed=None,  # TODO:
+                sample_fidelity="min",
+                sample_size=self.n_initial_design,
+            )
+
+        if new_id < len(self._initial_design):
+            config = self._initial_design[new_id]
+            config[self._fidelity_name] = self._min_budget
+            return SampledConfig(id=f"{new_id}_0", config=config)
+
+        # Otherwise, we proceed to surrogate phase
+        ftpfn = FTPFNSurrogate(
+            target_path=self.surrogate_model_args.get("target_path", None),
+            version=self.surrogate_model_args.get("version", "0.0.1"),
+            device=self.device,
+        )
+        X, y = encode_ftpfn(
+            trials=trials,
+            space=self.pipeline_space,
+            encoder=self._config_encoder,
+            budget_domain=self._budget_domain,
+            device=self.device,
+            pending_value=torch.nan,
+        )
+
+        # Fantasize if needed
+        pending_mask = torch.isnan(y)
+        if pending_mask.any():
+            not_pending_mask = ~pending_mask
+            not_pending_X = X[not_pending_mask]
+            y[pending_mask] = ftpfn.get_mean_performance(
+                train_x=not_pending_X,
+                train_y=y[not_pending_mask],
+                test_x=X[pending_mask],
+            )
+        else:
+            not_pending_X = X
+
+        # NOTE: Can't really abstract this, requires knowledge that:
+        # 1. The encoding is such that the objective_to_minimize is 1 -
+        # objective_to_minimize
+        # 2. The budget is the second column
+        # 3. The budget is encoded between 1/max_fid and 1
+        rng = np.random.RandomState(len(trials))
+        # Cast the a random budget index into the ftpfn budget domain
+        horizon_increment = self._budget_domain.cast_one(
+            rng.randint(*self._budget_ix_domain.bounds) + 1,
+            frm=self._budget_ix_domain,
+        )
+        f_best = y.max().item()
+        threshold = f_best + (10 ** rng.uniform(-4, -1)) * (1 - f_best)
+
+        def _mfpi_random(samples: torch.Tensor) -> torch.Tensor:
+            # HACK: Because we are modifying the samples inplace, we do,
+            # and then undo the addition
+            original_budget_column = samples[..., 1].clone()
+            samples[..., 1].add_(horizon_increment).clamp_max_(self._budget_domain.upper)
+
+            scores = ftpfn.get_pi(X, y, samples, y_best=threshold)
+
+            samples[..., 1] = original_budget_column
+            return scores
+
+        # Do acquisition on ftpfn
+        sample_dims = self._config_encoder.ncols
+        best_row = acquire_next_from_ftpfn(
+            ftpfn=ftpfn,
+            # How to encode
+            encoder=self._config_encoder,
+            budget_domain=self._budget_domain,
+            # Acquisition function
+            acq_function=_mfpi_random,
+            # Which acquisition samples to consider for continuation
+            continuation_samples=not_pending_X,
+            # How to generate some initial samples
+            initial_samplers=[
+                (Sampler.sobol(ndim=sample_dims), 512),
+                (Sampler.uniform(ndim=sample_dims), 512),
+                (Sampler.borders(ndim=sample_dims), 256),
+            ],
+            seed=None,  # TODO: Seeding
+            # A next step local sampling around best point found by initial_samplers
+            local_search_sample_size=256,
+            local_search_confidence=0.95,
+        )
+        _id, fid, config = decode_ftpfn_data(
+            best_row,
+            self._config_encoder,
+            budget_domain=self._budget_domain,
+            fidelity_domain=self._fid_domain,
+        )[0]
+
+        if _id is None:
+            config[self._fidelity_name] = fid
+            return SampledConfig(id=f"{new_id}_0", config=config)
+        # Convert fidelity to budget index, bump by 1 and convert back
+        budget_ix = self._budget_ix_domain.cast_one(fid, frm=self._fid_domain)
+        next_ix = budget_ix + 1
+        next_fid = self._fid_domain.cast_one(next_ix, frm=self._budget_ix_domain)
+
+        config[self._fidelity_name] = next_fid
+        return SampledConfig(
+            id=f"{_id}_{next_ix}",
+            config=config,
+            previous_config_id=f"{_id}_{budget_ix}",
+        )
diff --git a/neps/optimizers/multi_fidelity/mf_bo.py b/neps/optimizers/multi_fidelity/mf_bo.py
index a24c9d1bd..f4355585e 100755
--- a/neps/optimizers/multi_fidelity/mf_bo.py
+++ b/neps/optimizers/multi_fidelity/mf_bo.py
@@ -1,17 +1,28 @@
-# type: ignore
 from __future__ import annotations
 
+import logging
 from copy import deepcopy
+from typing import TYPE_CHECKING, Any, Literal
 
-import numpy as np
-import pandas as pd
-import torch
+from neps.search_spaces.functions import sample_one_old
 
-from neps.utils.common import instance_from_map
-from ..bayesian_optimization.models import SurrogateModelMapping
-from ..multi_fidelity.utils import normalize_vectorize_config
-from ..multi_fidelity_prior.utils import calc_total_resources_spent, update_fidelity
-from ..utils import map_real_hyperparameters_from_tabular_ids
+
+def update_fidelity(config: SearchSpace, fidelity: int | float) -> SearchSpace:
+    assert config.fidelity is not None
+    config.fidelity.set_value(fidelity)
+    # TODO: Place holder until we can get rid of passing around search spaces
+    # as configurations
+    assert config.fidelity_name is not None
+    config._values[config.fidelity_name] = fidelity
+    return config
+
+
+if TYPE_CHECKING:
+    import pandas as pd
+
+    from neps.search_spaces import SearchSpace
+
+logger = logging.getLogger(__name__)
 
 
 class MFBOBase:
@@ -20,9 +31,25 @@ class MFBOBase:
     Requires certain strict assumptions about fidelities and rung maps.
     """
 
-    def _fit_models(self):
+    # TODO: Make pure function...
+    model_based: bool
+    pipeline_space: SearchSpace
+    observed_configs: pd.DataFrame
+    rung_map: dict
+    max_budget: float
+    modelling_type: Literal["rung", "joint"]
+    rung_histories: dict
+    min_rung: int
+    max_rung: int
+    model_policy: Any
+    sampling_args: dict
+    sampling_policy: Any
+    patience: int
+    use_priors: bool
+    init_size: int
+
+    def _fit_models(self) -> None:
         """Performs necessary procedures to build and use models."""
-
         if not self.model_based:
             # do nothing here if the algorithm has model-based search disabled
             return
@@ -32,9 +59,9 @@ def _fit_models(self):
 
         if self.pipeline_space.has_prior:
             # PriorBand + BO
-            total_resources = calc_total_resources_spent(
-                self.observed_configs, self.rung_map
-            )
+            valid_perf_mask = self.observed_configs["perf"].notna()
+            rungs = self.observed_configs.loc[valid_perf_mask, "rung"]
+            total_resources = sum(self.rung_map[r] for r in rungs)
             decay_t = total_resources / self.max_budget
         else:
             # Mobster
@@ -56,7 +83,7 @@ def _fit_models(self):
                 raise ValueError(
                     "Returned rung is None. Should not be so when not init phase."
                 )
-            self.logger.info(f"Building model at rung {rung}")
+            logger.info(f"Building model at rung {rung}")
             # collecting finished evaluations at `rung`
             train_df = self.observed_configs.loc[
                 self.rung_histories[rung]["config"]
@@ -72,7 +99,7 @@ def _fit_models(self):
             train_y = deepcopy(self.rung_histories[rung]["perf"])
             # extract only the pending configurations that are at `rung`
             pending_df = pending_df[pending_df.rung == rung]
-            pending_x = deepcopy(pending_df.config.values.tolist())
+            pending_x = deepcopy(pending_df["config"].values.tolist())
             # update fidelity
             fidelities = [self.rung_map[rung]] * len(pending_x)
             pending_x = list(map(update_fidelity, pending_x, fidelities))
@@ -109,8 +136,8 @@ def _fit_models(self):
         # and set the acquisition states
         self.model_policy.update_model(train_x, train_y, pending_x, decay_t=decay_t)
 
-    def _active_rung(self):
-        """Returns the highest rung that can fit a model, `None` if no rung is eligible."""
+    def _active_rung(self) -> int | None:
+        """The highest rung that can fit a model, `None` if no rung is eligible."""
         rung = self.max_rung
         while rung >= self.min_rung:
             if len(self.rung_histories[rung]["config"]) >= self.init_size:
@@ -131,8 +158,10 @@ def is_init_phase(self) -> bool:
             # builds a model across all fidelities with the fidelity as a dimension
             # in this case, calculate the total number of function evaluations spent
             # and in vanilla BO fashion use that to compare with the initital design size
-            resources = calc_total_resources_spent(self.observed_configs, self.rung_map)
-            resources /= self.max_budget
+            valid_perf_mask = self.observed_configs["perf"].notna()
+            rungs = self.observed_configs.loc[valid_perf_mask, "rung"]
+            total_resources = sum(self.rung_map[r] for r in rungs)
+            resources = total_resources / self.max_budget
             if resources < self.init_size:
                 return True
         else:
@@ -141,9 +170,9 @@ def is_init_phase(self) -> bool:
 
     def sample_new_config(
         self,
-        rung: int = None,
-        **kwargs,
-    ):
+        rung: int | None = None,
+        **kwargs: Any,
+    ) -> SearchSpace:
         """Samples configuration from policies or random."""
         if self.model_based and not self.is_init_phase():
             incumbent = None
@@ -158,7 +187,7 @@ def sample_new_config(
                 # IMPORTANT step for correct 2-step acquisition
                 incumbent = min(self.rung_histories[rung]["perf"])
             else:
-                fidelity = active_max_fidelity = None
+                raise ValueError("Choice of modelling_type not in 'rung', 'joint'")
             assert (
                 (fidelity is None and active_max_fidelity is not None)
                 or (active_max_fidelity is None and fidelity is not None)
@@ -173,180 +202,10 @@ def sample_new_config(
         elif self.sampling_policy is not None:
             config = self.sampling_policy.sample(**self.sampling_args)
         else:
-            config = self.pipeline_space.sample(
+            config = sample_one_old(
+                self.pipeline_space,
                 patience=self.patience,
                 user_priors=self.use_priors,
                 ignore_fidelity=True,
             )
         return config
-
-
-class FreezeThawModel:
-    """Designed to work with model search in unit step multi-fidelity algorithms."""
-
-    def __init__(
-        self,
-        pipeline_space,
-        surrogate_model: str = "deep_gp",
-        surrogate_model_args: dict = None,
-    ):
-        self.observed_configs = None
-        self.pipeline_space = pipeline_space
-        self.surrogate_model_name = surrogate_model
-        self.surrogate_model_args = (
-            surrogate_model_args if surrogate_model_args is not None else {}
-        )
-        if self.surrogate_model_name in ["deep_gp", "pfn"]:
-            self.surrogate_model_args.update({"pipeline_space": pipeline_space})
-
-        # instantiate the surrogate model
-        self.surrogate_model = instance_from_map(
-            SurrogateModelMapping,
-            self.surrogate_model_name,
-            name="surrogate model",
-            kwargs=self.surrogate_model_args,
-        )
-
-    def _fantasize_pending(self, train_x, train_y, pending_x):
-        # Select configs that are neither pending nor resulted in error
-        completed_configs = self.observed_configs.completed_runs.copy(deep=True)
-        # IMPORTANT: preprocess observations to get appropriate training data
-        train_x, train_lcs, train_y = self.observed_configs.get_training_data_4DyHPO(
-            completed_configs, self.pipeline_space
-        )
-        pending_condition = self.observed_configs.pending_condition
-        if pending_condition.any():
-            pending_configs = self.observed_configs.df.loc[pending_condition]
-            pending_x, pending_lcs, _ = self.observed_configs.get_training_data_4DyHPO(
-                pending_configs
-            )
-            self._fit(train_x, train_y, train_lcs)
-            _y, _ = self._predict(pending_x, pending_lcs)
-            _y = _y.tolist()
-
-            train_x.extend(pending_x)
-            train_y.extend(_y)
-            train_lcs.extend(pending_lcs)
-
-        return train_x, train_y, train_lcs
-
-    def _fit(self, train_x, train_y, train_lcs):
-        if self.surrogate_model_name in ["gp", "gp_hierarchy"]:
-            self.surrogate_model.fit(train_x, train_y)
-        elif self.surrogate_model_name in ["deep_gp", "pfn"]:
-            self.surrogate_model.fit(train_x, train_y, train_lcs)
-        else:
-            # check neps/optimizers/bayesian_optimization/models/__init__.py for options
-            raise ValueError(
-                f"Surrogate model {self.surrogate_model_name} not supported!"
-            )
-
-    def _predict(self, test_x, test_lcs):
-        if self.surrogate_model_name in ["gp", "gp_hierarchy"]:
-            return self.surrogate_model.predict(test_x)
-        elif self.surrogate_model_name in ["deep_gp", "pfn"]:
-            return self.surrogate_model.predict(test_x, test_lcs)
-        else:
-            # check neps/optimizers/bayesian_optimization/models/__init__.py for options
-            raise ValueError(
-                f"Surrogate model {self.surrogate_model_name} not supported!"
-            )
-
-    def set_state(
-        self,
-        pipeline_space,
-        surrogate_model_args,
-        **kwargs,
-    ):
-        self.pipeline_space = pipeline_space
-        self.surrogate_model_args = (
-            surrogate_model_args if surrogate_model_args is not None else {}
-        )
-        # only to handle tabular spaces
-        if self.pipeline_space.has_tabular:
-            if self.surrogate_model_name in ["deep_gp", "pfn"]:
-                self.surrogate_model_args.update(
-                    {"pipeline_space": self.pipeline_space.raw_tabular_space}
-                )
-            # instantiate the surrogate model, again, with the new pipeline space
-            self.surrogate_model = instance_from_map(
-                SurrogateModelMapping,
-                self.surrogate_model_name,
-                name="surrogate model",
-                kwargs=self.surrogate_model_args,
-            )
-
-    def update_model(self, train_x=None, train_y=None, pending_x=None, decay_t=None):
-        if train_x is None:
-            train_x = []
-        if train_y is None:
-            train_y = []
-        if pending_x is None:
-            pending_x = []
-
-        if decay_t is None:
-            decay_t = len(train_x)
-        train_x, train_y, train_lcs = self._fantasize_pending(
-            train_x, train_y, pending_x
-        )
-        self._fit(train_x, train_y, train_lcs)
-
-        return self.surrogate_model, decay_t
-
-
-class PFNSurrogate(FreezeThawModel):
-    """Special class to deal with PFN surrogate model and freeze-thaw acquisition."""
-
-    def __init__(self, *args, **kwargs):
-        super().__init__(*args, **kwargs)
-        self.train_x = None
-        self.train_y = None
-
-    def _fit(self, *args):
-        assert self.surrogate_model_name == "pfn"
-        self.preprocess_training_set()
-        self.surrogate_model.fit(self.train_x, self.train_y)
-
-    def preprocess_training_set(self):
-        _configs = self.observed_configs.df.config.values.copy()
-
-        # onlf if tabular space is present
-        if self.pipeline_space.has_tabular:
-            # placeholder index, will be driooed
-            _idxs = np.arange(len(_configs))
-            # mapping the (id, epoch) space of tabular configs to the actual HPs
-            _configs = map_real_hyperparameters_from_tabular_ids(
-                pd.Series(_configs, index=_idxs), self.pipeline_space
-            ).values
-
-        device = self.surrogate_model.device
-        # TODO: fix or make consistent with `tokenize``
-        configs, idxs, performances = self.observed_configs.get_tokenized_data(
-            self.observed_configs.df.copy().assign(config=_configs)
-        )
-        # TODO: account for fantasization
-        self.train_x = torch.Tensor(np.hstack([idxs, configs])).to(device)
-        self.train_y = torch.Tensor(performances).to(device)
-
-    def preprocess_test_set(self, test_x):
-        _len = len(self.observed_configs.all_configs_list())
-        device = self.surrogate_model.device
-
-        new_idxs = np.arange(_len, len(test_x))
-        base_fidelity = np.array([1] * len(new_idxs))
-        new_token_ids = np.hstack(
-            (new_idxs.T.reshape(-1, 1), base_fidelity.T.reshape(-1, 1))
-        )
-        # the following operation takes each element in the array and stacks it vertically
-        # in this case, should convert a (n,) array to (n, 2) by flattening the elements
-        existing_token_ids = np.vstack(self.observed_configs.token_ids).astype(int)
-        token_ids = np.vstack((existing_token_ids, new_token_ids))
-
-        configs = np.array([normalize_vectorize_config(c) for c in test_x])
-        test_x = torch.Tensor(np.hstack([token_ids, configs])).to(device)
-        return test_x
-
-    def _predict(self, test_x, test_lcs):
-        assert self.surrogate_model_name == "pfn"
-        test_x = self.preprocess_test_set(test_x)
-        return self.surrogate_model.predict(self.train_x, self.train_y, test_x)
diff --git a/neps/optimizers/multi_fidelity/promotion_policy.py b/neps/optimizers/multi_fidelity/promotion_policy.py
index 41b251769..b94cbf0e6 100644
--- a/neps/optimizers/multi_fidelity/promotion_policy.py
+++ b/neps/optimizers/multi_fidelity/promotion_policy.py
@@ -1,27 +1,28 @@
 from __future__ import annotations
 
 from abc import ABC, abstractmethod
+from typing import Any
 
 import numpy as np
 
 
 class PromotionPolicy(ABC):
-    """Base class for implementing a sampling straregy for SH and its subclasses"""
+    """Base class for implementing a sampling straregy for SH and its subclasses."""
 
     def __init__(self, eta: int):
         self.rung_members: dict = {}
         self.rung_members_performance: dict = {}
         self.rung_promotions: dict = {}
-        self.eta = eta  # type: int
-        self.max_rung: int = None
+        self.eta: int = eta
+        self.max_rung: int | None = None
 
     def set_state(
         self,
-        *,  # allows only keyword args
+        *,
         max_rung: int,
         members: dict,
         performances: dict,
-        **kwargs,
+        **kwargs: Any,
     ) -> None:
         self.max_rung = max_rung
         self.rung_members = members
@@ -38,30 +39,30 @@ class SyncPromotionPolicy(PromotionPolicy):
     Promotes only when all predefined number of config slots are full.
     """
 
-    def __init__(self, eta, **kwargs):
+    def __init__(self, eta: int, **kwargs: Any):
         super().__init__(eta, **kwargs)
-        self.config_map: dict = None
-        self.rung_promotions = None
+        self.config_map: dict | None = None
+        self.rung_promotions: dict | None = None
 
     def set_state(
         self,
-        *,  # allows only keyword args
+        *,
         max_rung: int,
         members: dict,
         performances: dict,
         config_map: dict,
-        **kwargs,
+        **kwargs: Any,
     ) -> None:
         super().set_state(max_rung=max_rung, members=members, performances=performances)
         self.config_map = config_map
 
     def retrieve_promotions(self) -> dict:
-        """Returns the top 1/eta configurations per rung if enough configurations seen"""
+        """Returns the top 1/eta configurations per rung if enough configurations seen."""
         assert self.config_map is not None
 
-        self.rung_promotions = {rung: [] for rung in self.config_map.keys()}
+        self.rung_promotions = {rung: [] for rung in self.config_map}
         total_rung_evals = 0
-        for rung in reversed(sorted(self.config_map.keys())):
+        for rung in sorted(self.config_map.keys(), reverse=True):
             total_rung_evals += len(self.rung_members[rung])
             if (
                 total_rung_evals >= self.config_map[rung]
@@ -91,11 +92,12 @@ class AsyncPromotionPolicy(PromotionPolicy):
     Promotes whenever a higher fidelity has at least eta configurations.
     """
 
-    def __init__(self, eta, **kwargs):
+    def __init__(self, eta: int, **kwargs: Any):
         super().__init__(eta, **kwargs)
 
     def retrieve_promotions(self) -> dict:
-        """Returns the top 1/eta configurations per rung if enough configurations seen"""
+        """Returns the top 1/eta configurations per rung if enough configurations seen."""
+        assert self.max_rung is not None
         for rung in range(self.max_rung + 1):
             if rung == self.max_rung:
                 # cease promotions for the highest rung (configs at max budget)
diff --git a/neps/optimizers/multi_fidelity/sampling_policy.py b/neps/optimizers/multi_fidelity/sampling_policy.py
index 9321633c7..dd510c1c6 100644
--- a/neps/optimizers/multi_fidelity/sampling_policy.py
+++ b/neps/optimizers/multi_fidelity/sampling_policy.py
@@ -1,72 +1,75 @@
-# mypy: disable-error-code = assignment
 from __future__ import annotations
 
 import logging
 from abc import ABC, abstractmethod
-from typing import Any
+from typing import TYPE_CHECKING, Any, Literal
 
 import numpy as np
 import pandas as pd
 import torch
-
-from neps.utils.common import instance_from_map
-from ...search_spaces.search_space import SearchSpace
-from ..bayesian_optimization.acquisition_functions import AcquisitionMapping
-from ..bayesian_optimization.acquisition_functions.base_acquisition import (
-    BaseAcquisition,
-)
-from ..bayesian_optimization.acquisition_functions.prior_weighted import (
-    DecayingPriorWeightedAcquisition,
-)
-from ..bayesian_optimization.acquisition_samplers import AcquisitionSamplerMapping
-from ..bayesian_optimization.acquisition_samplers.base_acq_sampler import (
-    AcquisitionSampler,
+from botorch.acquisition import (
+    AcquisitionFunction,
+    LinearMCObjective,
+    qLogNoisyExpectedImprovement,
 )
-from ..bayesian_optimization.kernels.get_kernels import get_kernels
-from ..bayesian_optimization.models import SurrogateModelMapping
-from ..multi_fidelity_prior.utils import (
-    compute_config_dist,
-    custom_crossover,
-    local_mutation,
-    update_fidelity,
+from botorch.fit import fit_gpytorch_mll
+from gpytorch import ExactMarginalLogLikelihood
+
+from neps.optimizers.bayesian_optimization.acquisition_functions.pibo import (
+    pibo_acquisition,
 )
+from neps.optimizers.bayesian_optimization.models.gp import make_default_single_obj_gp
+from neps.sampling.priors import Prior
+from neps.sampling.samplers import Sampler
+from neps.search_spaces.encoding import ConfigEncoder
+from neps.search_spaces.functions import sample_one_old
+
+if TYPE_CHECKING:
+    from botorch.acquisition.analytic import SingleTaskGP
+
+    from neps.search_spaces.search_space import SearchSpace
 
 TOLERANCE = 1e-2  # 1%
 SAMPLE_THRESHOLD = 1000  # num samples to be rejected for increasing hypersphere radius
 DELTA_THRESHOLD = 1e-2  # 1%
 TOP_EI_SAMPLE_COUNT = 10
 
+logger = logging.getLogger(__name__)
+
+
+def update_fidelity(config: SearchSpace, fidelity: int | float) -> SearchSpace:
+    assert config.fidelity is not None
+    config.fidelity.set_value(fidelity)
+    return config
+
 
 class SamplingPolicy(ABC):
-    """Base class for implementing a sampling strategy for SH and its subclasses"""
+    """Base class for implementing a sampling strategy for SH and its subclasses."""
 
-    def __init__(self, pipeline_space: SearchSpace, patience: int = 100, logger=None):
+    def __init__(self, pipeline_space: SearchSpace, patience: int = 100):
         self.pipeline_space = pipeline_space
         self.patience = patience
-        self.logger = logger or logging.getLogger("neps")
 
     @abstractmethod
-    def sample(self, *args, **kwargs) -> SearchSpace:
-        pass
+    def sample(self, *args: Any, **kwargs: Any) -> SearchSpace: ...
 
 
 class RandomUniformPolicy(SamplingPolicy):
-    """A random policy for sampling configuration, i.e. the default for SH / hyperband
+    """A random policy for sampling configuration, i.e. the default for SH / hyperband.
 
     Args:
         SamplingPolicy ([type]): [description]
     """
 
-    def __init__(
-        self,
-        pipeline_space: SearchSpace,
-        logger=None,
-    ):
-        super().__init__(pipeline_space=pipeline_space, logger=logger)
+    def __init__(self, pipeline_space: SearchSpace):
+        super().__init__(pipeline_space=pipeline_space)
 
-    def sample(self, *args, **kwargs) -> SearchSpace:
-        return self.pipeline_space.sample(
-            patience=self.patience, user_priors=False, ignore_fidelity=True
+    def sample(self, *args: Any, **kwargs: Any) -> SearchSpace:
+        return sample_one_old(
+            self.pipeline_space,
+            patience=self.patience,
+            user_priors=False,
+            ignore_fidelity=True,
         )
 
 
@@ -75,15 +78,13 @@ class FixedPriorPolicy(SamplingPolicy):
     a fixed fraction from the prior.
     """
 
-    def __init__(
-        self, pipeline_space: SearchSpace, fraction_from_prior: float = 1, logger=None
-    ):
-        super().__init__(pipeline_space=pipeline_space, logger=logger)
+    def __init__(self, pipeline_space: SearchSpace, fraction_from_prior: float = 1):
+        super().__init__(pipeline_space=pipeline_space)
         assert 0 <= fraction_from_prior <= 1
         self.fraction_from_prior = fraction_from_prior
 
-    def sample(self, *args, **kwargs) -> SearchSpace:
-        """Samples from the prior with a certain probabiliyu
+    def sample(self, *args: Any, **kwargs: Any) -> SearchSpace:
+        """Samples from the prior with a certain probabiliyu.
 
         Returns:
             SearchSpace: [description]
@@ -91,10 +92,13 @@ def sample(self, *args, **kwargs) -> SearchSpace:
         user_priors = False
         if np.random.uniform() < self.fraction_from_prior:
             user_priors = True
-        config = self.pipeline_space.sample(
-            patience=self.patience, user_priors=user_priors, ignore_fidelity=True
+
+        return sample_one_old(
+            self.pipeline_space,
+            patience=self.patience,
+            user_priors=user_priors,
+            ignore_fidelity=True,
         )
-        return config
 
 
 class EnsemblePolicy(SamplingPolicy):
@@ -107,8 +111,9 @@ class EnsemblePolicy(SamplingPolicy):
     def __init__(
         self,
         pipeline_space: SearchSpace,
-        inc_type: str = "mutation",
-        logger=None,
+        inc_type: Literal[
+            "hypersphere", "gaussian", "crossover", "mutation"
+        ] = "mutation",
     ):
         """Samples a policy as per its weights and performs the selected sampling.
 
@@ -124,19 +129,30 @@ def __init__(
                     50% (mutation_rate=0.5) probability of selecting each hyperparmeter
                     for perturbation, sampling a deviation N(value, mutation_std=0.5))
         """
-        super().__init__(pipeline_space=pipeline_space, logger=logger)
+        super().__init__(pipeline_space=pipeline_space)
         self.inc_type = inc_type
         # setting all probabilities uniformly
         self.policy_map = {"random": 0.33, "prior": 0.34, "inc": 0.33}
 
-    def sample_neighbour(self, incumbent, distance, tolerance=TOLERANCE):
+    def sample_neighbour(
+        self,
+        incumbent: SearchSpace,
+        distance: float,
+        tolerance: float = TOLERANCE,
+    ) -> SearchSpace:
         """Samples a config from around the `incumbent` within radius as `distance`."""
         # TODO: how does tolerance affect optimization on landscapes of different scale
         sample_counter = 0
+        from neps.optimizers.multi_fidelity_prior.utils import (
+            compute_config_dist,
+        )
+
         while True:
-            # sampling a config
-            config = self.pipeline_space.sample(
-                patience=self.patience, user_priors=False, ignore_fidelity=False
+            config = sample_one_old(
+                self.pipeline_space,
+                patience=self.patience,
+                user_priors=False,
+                ignore_fidelity=False,
             )
             # computing distance from incumbent
             d = compute_config_dist(config, incumbent)
@@ -153,33 +169,42 @@ def sample_neighbour(self, incumbent, distance, tolerance=TOLERANCE):
         # end of while
         return config
 
-    def sample(
-        self, inc: SearchSpace = None, weights: dict[str, float] = None, *args, **kwargs
+    def sample(  # noqa: PLR0912, C901, PLR0915
+        self,
+        inc: SearchSpace | None = None,
+        weights: dict[str, float] | None = None,
+        *args: Any,
+        **kwargs: Any,
     ) -> SearchSpace:
-        """Samples from the prior with a certain probability
+        """Samples from the prior with a certain probability.
 
         Returns:
             SearchSpace: [description]
         """
+        from neps.optimizers.multi_fidelity_prior.utils import (
+            custom_crossover,
+            local_mutation,
+        )
+
         if weights is not None:
             for key, value in sorted(weights.items()):
                 self.policy_map[key] = value
         else:
-            self.logger.info(f"Using default policy weights: {self.policy_map}")
+            logger.info(f"Using default policy weights: {self.policy_map}")
         prob_weights = [v for _, v in sorted(self.policy_map.items())]
         policy_idx = np.random.choice(range(len(prob_weights)), p=prob_weights)
         policy = sorted(self.policy_map.keys())[policy_idx]
 
-        self.logger.info(
-            f"Sampling from {policy} with weights (i, p, r)={prob_weights}"
-        )
+        logger.info(f"Sampling from {policy} with weights (i, p, r)={prob_weights}")
 
         if policy == "prior":
-            config = self.pipeline_space.sample(
-                patience=self.patience, user_priors=True, ignore_fidelity=True
+            config = sample_one_old(
+                self.pipeline_space,
+                patience=self.patience,
+                user_priors=True,
+                ignore_fidelity=True,
             )
         elif policy == "inc":
-
             if (
                 hasattr(self.pipeline_space, "has_prior")
                 and self.pipeline_space.has_prior
@@ -189,8 +214,8 @@ def sample(
                 user_priors = False
 
             if inc is None:
-                inc = self.pipeline_space.sample_default_configuration().clone()
-                self.logger.warning(
+                inc = self.pipeline_space.from_dict(self.pipeline_space.prior_config)
+                logger.warning(
                     "No incumbent config found, using default as the incumbent."
                 )
 
@@ -198,24 +223,29 @@ def sample(
                 distance = kwargs["distance"]
                 config = self.sample_neighbour(inc, distance)
             elif self.inc_type == "gaussian":
-                # use inc to set the defaults of the configuration
-                _inc = inc.clone()
-                _inc.set_defaults_to_current_values()
-                # then sample with prior=True from that configuration
-                # since the defaults are treated as the prior
-                config = _inc.sample(
-                    patience=self.patience,
-                    user_priors=user_priors,
-                    ignore_fidelity=True,
+                # TODO: These could be lifted higher, ideall we pass
+                # down the encoder we want, where we want it. Also passing
+                # around a `Prior` should be the evidence that we want to use
+                # a prior, not whether the searchspace has a flag active or not.
+                encoder = ConfigEncoder.from_space(inc)
+                sampler = (
+                    Prior.from_space(inc)
+                    if user_priors
+                    else Sampler.uniform(ndim=encoder.ncols)
                 )
+
+                config_tensor = sampler.sample(1, to=encoder.domains)
+                config_dict = encoder.decode(config_tensor)[0]
+                _fids = {fid_name: fid.value for fid_name, fid in inc.fidelities.items()}
+
+                config = inc.from_dict({**config_dict, **_fids})
+
             elif self.inc_type == "crossover":
                 # choosing the configuration for crossover with incumbent
                 # the weight distributed across prior adnd inc
                 _w_priors = 1 - self.policy_map["random"]
                 # re-calculate normalized score ratio for prior-inc
-                w_prior = np.clip(
-                    self.policy_map["prior"] / _w_priors, a_min=0, a_max=1
-                )
+                w_prior = np.clip(self.policy_map["prior"] / _w_priors, a_min=0, a_max=1)
                 w_inc = np.clip(self.policy_map["inc"] / _w_priors, a_min=0, a_max=1)
                 # calculating difference of prior and inc score
                 score_diff = np.abs(w_prior - w_inc)
@@ -223,17 +253,19 @@ def sample(
                 # if the score difference is small, crossover between incumbent and prior
                 # if the score difference is large, crossover between incumbent and random
                 probs = [1 - score_diff, score_diff]  # the order is [prior, random]
-                user_priors = np.random.choice([True, False], p=probs)
                 if (
                     hasattr(self.pipeline_space, "has_prior")
                     and not self.pipeline_space.has_prior
                 ):
                     user_priors = False
-                self.logger.info(
+                else:
+                    user_priors = np.random.choice([True, False], p=probs)
+                logger.info(
                     f"Crossing over with user_priors={user_priors} with p={probs}"
                 )
                 # sampling a configuration either randomly or from a prior
-                _config = self.pipeline_space.sample(
+                _config = sample_one_old(
+                    self.pipeline_space,
                     patience=self.patience,
                     user_priors=user_priors,
                     ignore_fidelity=True,
@@ -256,15 +288,17 @@ def sample(
                     f"{{'mutation', 'crossover', 'hypersphere', 'gaussian'}}"
                 )
         else:
-            # random
-            config = self.pipeline_space.sample(
-                patience=self.patience, user_priors=False, ignore_fidelity=True
+            config = sample_one_old(
+                self.pipeline_space,
+                patience=self.patience,
+                user_priors=False,
+                ignore_fidelity=True,
             )
         return config
 
 
 class ModelPolicy(SamplingPolicy):
-    """A policy for sampling configuration, i.e. the default for SH / hyperband
+    """A policy for sampling configuration, i.e. the default for SH / hyperband.
 
     Args:
         SamplingPolicy ([type]): [description]
@@ -272,93 +306,77 @@ class ModelPolicy(SamplingPolicy):
 
     def __init__(
         self,
+        *,
         pipeline_space: SearchSpace,
-        surrogate_model: str | Any = "gp",
-        domain_se_kernel: str = None,
-        graph_kernels: list = None,
-        hp_kernels: list = None,
-        surrogate_model_args: dict = None,
-        acquisition: str | BaseAcquisition = "EI",
-        log_prior_weighted: bool = False,
-        acquisition_sampler: str | AcquisitionSampler = "random",
-        patience: int = 100,
-        logger=None,
+        prior: Prior | None = None,
+        use_cost: bool = False,
+        device: torch.device | None = None,
     ):
-        super().__init__(pipeline_space=pipeline_space, logger=logger)
-
-        surrogate_model_args = surrogate_model_args or {}
-
-        graph_kernels, hp_kernels = get_kernels(
-            pipeline_space=pipeline_space,
-            domain_se_kernel=domain_se_kernel,
-            graph_kernels=graph_kernels,
-            hp_kernels=hp_kernels,
-            optimal_assignment=False,
-        )
-        if "graph_kernels" not in surrogate_model_args:
-            surrogate_model_args["graph_kernels"] = None
-        if "hp_kernels" not in surrogate_model_args:
-            surrogate_model_args["hp_kernels"] = hp_kernels
-        if not surrogate_model_args["hp_kernels"]:
-            raise ValueError("No kernels are provided!")
-        if "vectorial_features" not in surrogate_model_args:
-            surrogate_model_args[
-                "vectorial_features"
-            ] = pipeline_space.get_vectorial_dim()
-
-        self.surrogate_model = instance_from_map(
-            SurrogateModelMapping,
-            surrogate_model,
-            name="surrogate model",
-            kwargs=surrogate_model_args,
+        if use_cost:
+            raise NotImplementedError("Cost is not implemented yet.")
+
+        super().__init__(pipeline_space=pipeline_space)
+        self.device = device
+        self.prior = prior
+        self._encoder = ConfigEncoder.from_space(
+            pipeline_space,
+            include_constants_when_decoding=True,
         )
+        self._model: SingleTaskGP | None = None
+        self._acq: AcquisitionFunction | None = None
 
-        self.acquisition = instance_from_map(
-            AcquisitionMapping,
-            acquisition,
-            name="acquisition function",
+    def update_model(
+        self,
+        train_x: list[SearchSpace],
+        train_y: list[float],
+        pending_x: list[SearchSpace],
+        decay_t: float | None = None,
+    ) -> None:
+        x_train = self._encoder.encode([config._values for config in train_x])
+        x_pending = self._encoder.encode([config._values for config in pending_x])
+        y_train = torch.tensor(train_y, dtype=torch.float64, device=self.device)
+
+        # TODO: Most of this just copies BO and the duplication can be replaced
+        # once we don't have the two stage `update_model()` and `sample()`
+        y_model = make_default_single_obj_gp(x_train, y_train, encoder=self._encoder)
+
+        fit_gpytorch_mll(
+            ExactMarginalLogLikelihood(likelihood=y_model.likelihood, model=y_model),
         )
-
-        # TODO: Enable only when a flag exists to toggle prior-based decaying of AF
-        # if pipeline_space.has_prior:
-        #     self.acquisition = DecayingPriorWeightedAcquisition(
-        #         self.acquisition, log=log_prior_weighted
-        #     )
-
-        self.acquisition_sampler = instance_from_map(
-            AcquisitionSamplerMapping,
-            acquisition_sampler,
-            name="acquisition sampler function",
-            kwargs={"patience": patience, "pipeline_space": pipeline_space},
+        acq = qLogNoisyExpectedImprovement(
+            y_model,
+            X_baseline=x_train,
+            X_pending=x_pending,
+            # Unfortunatly, there's no option to indicate that we minimize
+            # the AcqFunction so we need to do some kind of transformation.
+            # https://github.com/pytorch/botorch/issues/2316#issuecomment-2085964607
+            objective=LinearMCObjective(weights=torch.tensor([-1.0])),
         )
 
-        self.sampling_args: dict = {}
-
-    def _fantasize_pending(self, train_x, train_y, pending_x):
-        if len(pending_x) == 0:
-            return train_x, train_y
-        # fit model on finished evaluations
-        self.surrogate_model.fit(train_x, train_y)
-        # hallucinating: predict for the pending evaluations
-        _y, _ = self.surrogate_model.predict(pending_x)
-        _y = _y.detach().numpy().tolist()
-        # appending to training data
-        train_x.extend(pending_x)
-        train_y.extend(_y)
-        return train_x, train_y
-
-    def update_model(self, train_x, train_y, pending_x, decay_t=None):
-        if decay_t is None:
-            decay_t = len(train_x)
-        train_x, train_y = self._fantasize_pending(train_x, train_y, pending_x)
-        self.surrogate_model.fit(train_x, train_y)
-        self.acquisition.set_state(self.surrogate_model, decay_t=decay_t)
-        # TODO: set_state should generalize to all options
-        #  no needed to set state of sampler when using `random`
-        # self.acquisition_sampler.set_state(x=train_x, y=train_y)
+        # If we have a prior, wrap the above acquisitionm with a prior weighting
+        if self.prior is not None:
+            assert decay_t is not None
+            # TODO: Ideally we have something based on budget and dimensions, not an
+            # arbitrary term. This 10 is extracted from the old DecayingWeightedPrior
+            pibo_exp_term = 10 / decay_t
+            significant_lower_bound = 1e-4  # No significant impact beyond this point
+            if pibo_exp_term < significant_lower_bound:
+                acq = pibo_acquisition(
+                    acq,
+                    prior=self.prior,
+                    prior_exponent=pibo_exp_term,
+                    x_domain=self._encoder.domains,
+                )
+
+        self._y_model = y_model
+        self._acq = acq
 
+    # TODO: rework with MFBO
     def sample(
-        self, active_max_fidelity: int = None, fidelity: int = None, **kwargs
+        self,
+        active_max_fidelity: int | None = None,
+        fidelity: int | None = None,
+        **kwargs: Any,
     ) -> SearchSpace:
         """Performs the equivalent of optimizing the acquisition function.
 
@@ -373,11 +391,9 @@ def sample(
               variable set to the same value. This value is same as that of the fidelity
               value of the configs in the training data.
         """
-        self.logger.info("Acquiring...")
-
         # sampling random configurations
         samples = [
-            self.pipeline_space.sample(user_priors=False, ignore_fidelity=True)
+            sample_one_old(self.pipeline_space, user_priors=False, ignore_fidelity=True)
             for _ in range(SAMPLE_THRESHOLD)
         ]
 
@@ -411,173 +427,4 @@ def sample(
         # computes the EI for all `samples`
         eis = self.acquisition.eval(x=samples, asscalar=True)
         # extracting the highest scored sample
-        config = samples[np.argmax(eis)]
-        # TODO: can generalize s.t. sampler works for all types, currently,
-        #  random sampler in NePS does not do what is required here
-        # return self.acquisition_sampler.sample(self.acquisition)
-        return config
-
-
-class BaseDynamicModelPolicy(SamplingPolicy):
-    def __init__(
-        self,
-        pipeline_space: SearchSpace,
-        observed_configs: Any = None,
-        surrogate_model: str | Any = "gp",
-        domain_se_kernel: str = None,
-        hp_kernels: list = None,
-        graph_kernels: list = None,
-        surrogate_model_args: dict = None,
-        acquisition: str | BaseAcquisition = "EI",
-        use_priors: bool = False,
-        log_prior_weighted: bool = False,
-        acquisition_sampler: str | AcquisitionSampler = "random",
-        patience: int = 100,
-        logger=None,
-    ):
-        super().__init__(pipeline_space=pipeline_space, logger=logger)
-
-        surrogate_model_args = surrogate_model_args or {}
-
-        graph_kernels, hp_kernels = get_kernels(
-            pipeline_space=pipeline_space,
-            domain_se_kernel=domain_se_kernel,
-            graph_kernels=graph_kernels,
-            hp_kernels=hp_kernels,
-            optimal_assignment=False,
-        )
-        if "graph_kernels" not in surrogate_model_args:
-            surrogate_model_args["graph_kernels"] = graph_kernels
-        if "hp_kernels" not in surrogate_model_args:
-            surrogate_model_args["hp_kernels"] = hp_kernels
-        if not surrogate_model_args["hp_kernels"]:
-            raise ValueError("No kernels are provided!")
-        if "vectorial_features" not in surrogate_model_args:
-            surrogate_model_args[
-                "vectorial_features"
-            ] = pipeline_space.get_vectorial_dim()
-
-        self.surrogate_model = instance_from_map(
-            SurrogateModelMapping,
-            surrogate_model,
-            name="surrogate model",
-            kwargs=surrogate_model_args,
-        )
-
-        self.acquisition = instance_from_map(
-            AcquisitionMapping,
-            acquisition,
-            name="acquisition function",
-        )
-
-        if use_priors and pipeline_space.has_prior:
-            self.acquisition = DecayingPriorWeightedAcquisition(
-                self.acquisition, log=log_prior_weighted
-            )
-
-        self.acquisition_sampler = instance_from_map(
-            AcquisitionSamplerMapping,
-            acquisition_sampler,
-            name="acquisition sampler function",
-            kwargs={"patience": patience, "pipeline_space": pipeline_space},
-        )
-
-        self.sampling_args: dict = {}
-
-        self.observed_configs = observed_configs
-
-    def _fantasize_pending(self, train_x, train_y, pending_x):
-        if len(pending_x) == 0:
-            return train_x, train_y
-        # fit model on finished evaluations
-        self.surrogate_model.fit(train_x, train_y)
-        # hallucinating: predict for the pending evaluations
-        _y, _ = self.surrogate_model.predict(pending_x)
-        _y = _y.detach().numpy().tolist()
-        # appending to training data
-        train_x.extend(pending_x)
-        train_y.extend(_y)
-        return train_x, train_y
-
-    def update_model(self, train_x=None, train_y=None, pending_x=None, decay_t=None):
-        if train_x is None:
-            train_x = []
-        if train_y is None:
-            train_y = []
-        if pending_x is None:
-            pending_x = []
-
-        if decay_t is None:
-            decay_t = len(train_x)
-        train_x, train_y = self._fantasize_pending(train_x, train_y, pending_x)
-        self.surrogate_model.fit(train_x, train_y)
-        self.acquisition.set_state(self.surrogate_model, decay_t=decay_t)
-        self.acquisition_sampler.set_state(x=train_x, y=train_y)
-
-    @abstractmethod
-    def sample(self, *args, **kwargs) -> tuple[int, SearchSpace]:
-        pass
-
-
-class RandomPromotionDynamicPolicy(BaseDynamicModelPolicy):
-    def __init__(self, *args, **kwargs):
-        self.num_train_configs = 0
-
-        super().__init__(*args, **kwargs)
-
-    def _fantasize_pending(self, *args, **kwargs):
-        pending_configs = []
-
-        # Select configs that are neither pending nor resulted in error
-        completed_configs = self.observed_configs.completed_runs.copy(deep=True)
-
-        # Get the config, performance values for the maximum budget runs that are completed
-        max_budget_samples = completed_configs.sort_index().groupby(level=0).last()
-        max_budget_configs = max_budget_samples[
-            self.observed_configs.config_col
-        ].to_list()
-        max_budget_perf = max_budget_samples[self.observed_configs.perf_col].to_list()
-
-        pending_condition = self.observed_configs.pending_condition
-        if pending_condition.any():
-            pending_configs = (
-                self.observed_configs.df[pending_condition]
-                .loc[(), self.observed_configs.config_col]
-                .unique()
-                .to_list()
-            )
-        return super()._fantasize_pending(
-            max_budget_configs, max_budget_perf, pending_configs
-        )
-
-    def sample(self, rand_promotion_prob=0.5, seed=777, is_promotion=False, **kwargs):
-        promoted = False
-        # np.random.seed(seed)
-        if np.random.random_sample() < rand_promotion_prob:
-            config_id = (
-                self.observed_configs.df[~self.observed_configs.error_condition]
-                .sample(1)
-                .index[0][0]
-            )
-            max_budget_id = self.observed_configs.df.loc[(config_id,)].index[-1]
-            config = self.observed_configs.df.loc[
-                (config_id, max_budget_id), self.observed_configs.config_col
-            ]
-            promoted = True
-
-        else:
-            config_id = len(self.observed_configs.df.index.levels[0])
-            config = self.acquisition_sampler.sample(self.acquisition)
-
-        if is_promotion and promoted:
-            return config_id
-        elif is_promotion:
-            return None
-        else:
-            return config
-
-    # def sample(self, **kwargs):
-    #     return self._sample(is_promotion=False, **kwargs)
-    #
-    # def retrieve_promotions(self, **kwargs):
-    #     return self._sample(is_promotion=True, **kwargs)
+        return samples[np.argmax(eis)]
diff --git a/neps/optimizers/multi_fidelity/successive_halving.py b/neps/optimizers/multi_fidelity/successive_halving.py
index a936b0a23..08d8c8d1f 100644
--- a/neps/optimizers/multi_fidelity/successive_halving.py
+++ b/neps/optimizers/multi_fidelity/successive_halving.py
@@ -1,24 +1,16 @@
-# type: ignore
-
 from __future__ import annotations
 
+import logging
 import random
-import typing
+from collections.abc import Mapping
 from copy import deepcopy
+from typing import TYPE_CHECKING, Any, Literal
+from typing_extensions import override
 
 import numpy as np
 import pandas as pd
-from typing_extensions import Literal, override
 
-from neps.utils.types import ConfigResult, RawConfig
-from neps.search_spaces import (
-    CategoricalParameter,
-    ConstantParameter,
-    FloatParameter,
-    IntegerParameter,
-    SearchSpace,
-)
-from neps.optimizers.base_optimizer import BaseOptimizer
+from neps.optimizers.base_optimizer import BaseOptimizer, SampledConfig
 from neps.optimizers.multi_fidelity.promotion_policy import (
     AsyncPromotionPolicy,
     SyncPromotionPolicy,
@@ -27,13 +19,26 @@
     FixedPriorPolicy,
     RandomUniformPolicy,
 )
+from neps.search_spaces import (
+    Categorical,
+    Constant,
+    Float,
+    Integer,
+    SearchSpace,
+)
+from neps.search_spaces.functions import sample_one_old
+
+if TYPE_CHECKING:
+    from neps.state.optimizer import BudgetInfo
+    from neps.state.trial import Trial
+    from neps.utils.types import ConfigResult, RawConfig
 
-CUSTOM_FLOAT_CONFIDENCE_SCORES = dict(FloatParameter.DEFAULT_CONFIDENCE_SCORES)
+logger = logging.getLogger(__name__)
+
+CUSTOM_FLOAT_CONFIDENCE_SCORES = dict(Float.DEFAULT_CONFIDENCE_SCORES)
 CUSTOM_FLOAT_CONFIDENCE_SCORES.update({"ultra": 0.05})
 
-CUSTOM_CATEGORICAL_CONFIDENCE_SCORES = dict(
-    CategoricalParameter.DEFAULT_CONFIDENCE_SCORES
-)
+CUSTOM_CATEGORICAL_CONFIDENCE_SCORES = dict(Categorical.PRIOR_CONFIDENCE_SCORES)
 CUSTOM_CATEGORICAL_CONFIDENCE_SCORES.update({"ultra": 8})
 
 
@@ -42,28 +47,28 @@ class SuccessiveHalvingBase(BaseOptimizer):
 
     def __init__(
         self,
+        *,
         pipeline_space: SearchSpace,
-        budget: int = None,
+        max_cost_total: int | None = None,
         eta: int = 3,
         early_stopping_rate: int = 0,
         initial_design_type: Literal["max_budget", "unique_configs"] = "max_budget",
         use_priors: bool = False,
-        sampling_policy: typing.Any = RandomUniformPolicy,
-        promotion_policy: typing.Any = SyncPromotionPolicy,
-        loss_value_on_error: None | float = None,
+        sampling_policy: Any = RandomUniformPolicy,
+        promotion_policy: Any = SyncPromotionPolicy,
+        objective_to_minimize_value_on_error: None | float = None,
         cost_value_on_error: None | float = None,
         ignore_errors: bool = False,
-        logger=None,
-        prior_confidence: Literal["low", "medium", "high"] = None,
+        prior_confidence: Literal["low", "medium", "high"] | None = None,
         random_interleave_prob: float = 0.0,
-        sample_default_first: bool = False,
-        sample_default_at_target: bool = False,
+        sample_prior_first: bool = False,
+        sample_prior_at_target: bool = False,
     ):
         """Initialise an SH bracket.
 
         Args:
             pipeline_space: Space in which to search
-            budget: Maximum budget
+            max_cost_total: Maximum budget
             eta: The reduction factor used by SH
             early_stopping_rate: Determines the number of rungs in an SH bracket
                 Choosing 0 creates maximal rungs given the fidelity bounds
@@ -73,44 +78,41 @@ def __init__(
                 Samples generated from a Gaussian centered around the default value
             sampling_policy: The type of sampling procedure to use
             promotion_policy: The type of promotion procedure to use
-            loss_value_on_error: Setting this and cost_value_on_error to any float will
-                supress any error during bayesian optimization and will use given loss
-                value instead. default: None
-            cost_value_on_error: Setting this and loss_value_on_error to any float will
-                supress any error during bayesian optimization and will use given cost
-                value instead. default: None
-            logger: logger object, or None to use the neps logger
+            objective_to_minimize_value_on_error: Setting this and cost_value_on_error to
+                any float will supress any error during bayesian optimization and will
+                use given objective_to_minimize value instead. default: None
+            cost_value_on_error: Setting this and objective_to_minimize_value_on_error to
+                any float will supress any error during bayesian optimization and will
+                use given cost value instead. default: None
             prior_confidence: The range of confidence to have on the prior
                 The higher the confidence, the smaller is the standard deviation of the
                 prior distribution centered around the default
             random_interleave_prob: Chooses the fraction of samples from random vs prior
-            sample_default_first: Whether to sample the default configuration first
-            sample_default_at_target: Whether to evaluate the default configuration at
+            sample_prior_first: Whether to sample the prior configuration first
+            sample_prior_at_target: Whether to evaluate the prior configuration at
                 the target fidelity or max budget
         """
         super().__init__(
             pipeline_space=pipeline_space,
-            budget=budget,
-            loss_value_on_error=loss_value_on_error,
+            max_cost_total=max_cost_total,
+            objective_to_minimize_value_on_error=objective_to_minimize_value_on_error,
             cost_value_on_error=cost_value_on_error,
             ignore_errors=ignore_errors,
-            logger=logger,
         )
         if random_interleave_prob < 0 or random_interleave_prob > 1:
             raise ValueError("random_interleave_prob should be in [0.0, 1.0]")
         self.random_interleave_prob = random_interleave_prob
-        self.sample_default_first = sample_default_first
-        self.sample_default_at_target = sample_default_at_target
+        self.sample_prior_first = sample_prior_first
+        self.sample_prior_at_target = sample_prior_at_target
 
+        assert self.pipeline_space.fidelity is not None, "Fidelity parameter not set."
         self.min_budget = self.pipeline_space.fidelity.lower
         self.max_budget = self.pipeline_space.fidelity.upper
         self.eta = eta
         # SH implicitly sets early_stopping_rate to 0
         # the parameter is exposed to allow HB to call SH with different stopping rates
         self.early_stopping_rate = early_stopping_rate
-        self.sampling_policy = sampling_policy(
-            pipeline_space=self.pipeline_space, logger=self.logger
-        )
+        self.sampling_policy = sampling_policy(pipeline_space=self.pipeline_space)
         self.promotion_policy = promotion_policy(self.eta)
 
         # `max_budget_init` checks for the number of configurations that have been
@@ -142,13 +144,11 @@ def __init__(
         # crucial data structure used for determining promotion candidates
         self.observed_configs = pd.DataFrame([], columns=("config", "rung", "perf"))
         # stores which configs occupy each rung at any time
-        self.rung_members: dict = dict()  # stores config IDs per rung
-        self.rung_members_performance: dict = dict()  # performances recorded per rung
-        self.rung_promotions: dict = dict()  # records a promotable config per rung
-        self.total_fevals = 0
+        self.rung_members: dict = {}  # stores config IDs per rung
+        self.rung_members_performance: dict = {}  # performances recorded per rung
+        self.rung_promotions: dict = {}  # records a promotable config per rung
 
         # setup SH state counter
-        self._counter = 0
         self.full_rung_trace = SuccessiveHalving._get_rung_trace(
             self.rung_map, self.config_map
         )
@@ -159,7 +159,9 @@ def __init__(
         # the std. dev or peakiness of distribution
         self.prior_confidence = prior_confidence
         self._enhance_priors()
-        self.rung_histories = None
+        self.rung_histories: dict[
+            int, dict[Literal["config", "perf"], list[int | float]]
+        ] = {}
 
     @classmethod
     def _get_rung_trace(cls, rung_map: dict, config_map: dict) -> list[int]:
@@ -169,12 +171,6 @@ def _get_rung_trace(cls, rung_map: dict, config_map: dict) -> list[int]:
             rung_trace.extend([rung] * config_map[rung])
         return rung_trace
 
-    def get_incumbent_score(self):
-        y_star = np.inf  # minimizing optimizer
-        if len(self.observed_configs):
-            y_star = self.observed_configs.perf.values.min()
-        return y_star
-
     def _get_rung_map(self, s: int = 0) -> dict:
         """Maps rungs (0,1,...,k) to a fidelity value based on fidelity bounds, eta, s."""
         assert s <= self.stopping_rate_limit
@@ -186,18 +182,18 @@ def _get_rung_map(self, s: int = 0) -> dict:
             + 1
         )
         _max_budget = self.max_budget
-        rung_map = dict()
+        rung_map = {}
         for i in reversed(range(nrungs)):
             rung_map[i + s] = (
                 int(_max_budget)
-                if isinstance(self.pipeline_space.fidelity, IntegerParameter)
+                if isinstance(self.pipeline_space.fidelity, Integer)
                 else _max_budget
             )
             _max_budget /= self.eta
         return rung_map
 
     def _get_config_map(self, s: int = 0) -> dict:
-        """Maps rungs (0,1,...,k) to the number of configs for each fidelity"""
+        """Maps rungs (0,1,...,k) to the number of configs for each fidelity."""
         assert s <= self.stopping_rate_limit
         new_min_budget = self.min_budget * (self.eta**s)
         nrungs = (
@@ -210,7 +206,7 @@ def _get_config_map(self, s: int = 0) -> dict:
         _s = self.stopping_rate_limit - s
         # L2 from Alg 1 in https://arxiv.org/pdf/1603.06560.pdf
         _n_config = np.floor(s_max / (_s + 1)) * self.eta**_s
-        config_map = dict()
+        config_map = {}
         for i in range(nrungs):
             config_map[i + s] = int(_n_config)
             _n_config //= self.eta
@@ -223,11 +219,12 @@ def _get_config_id_split(cls, config_id: str) -> tuple[str, str]:
         return _config, _rung
 
     def _load_previous_observations(
-        self, previous_results: dict[str, ConfigResult]
+        self,
+        previous_results: dict[str, ConfigResult],
     ) -> None:
         for config_id, config_val in previous_results.items():
             _config, _rung = self._get_config_id_split(config_id)
-            perf = self.get_loss(config_val.result)
+            perf = self.get_objective_to_minimize(config_val.result)
             if int(_config) in self.observed_configs.index:
                 # config already recorded in dataframe
                 rung_recorded = self.observed_configs.at[int(_config), "rung"]
@@ -242,18 +239,20 @@ def _load_previous_observations(
                     columns=self.observed_configs.columns,
                     index=pd.Series(int(_config)),  # key for config_id
                 )
-                self.observed_configs = pd.concat(
-                    (self.observed_configs, _df)
-                ).sort_index()
+                if self.observed_configs.empty:
+                    self.observed_configs = _df
+                else:
+                    self.observed_configs = pd.concat(
+                        (self.observed_configs, _df)
+                    ).sort_index()
             # for efficiency, redefining the function to have the
             # `rung_histories` assignment inside the for loop
             # rung histories are collected only for `previous` and not `pending` configs
             self.rung_histories[int(_rung)]["config"].append(int(_config))
             self.rung_histories[int(_rung)]["perf"].append(perf)
-        return
 
     def _handle_pending_evaluations(
-        self, pending_evaluations: dict[str, ConfigResult]
+        self, pending_evaluations: dict[str, SearchSpace]
     ) -> None:
         # iterates over all pending evaluations and updates the list of observed
         # configs with the rung and performance as None
@@ -271,14 +270,13 @@ def _handle_pending_evaluations(
             else:
                 self.observed_configs.at[int(_config), "rung"] = int(_rung)
                 self.observed_configs.at[int(_config), "perf"] = np.nan
-        return
 
-    def clean_rung_information(self):
-        self.rung_members = {k: [] for k in self.rung_map.keys()}
-        self.rung_members_performance = {k: [] for k in self.rung_map.keys()}
-        self.rung_promotions = {k: [] for k in self.rung_map.keys()}
+    def clean_rung_information(self) -> None:
+        self.rung_members = {k: [] for k in self.rung_map}
+        self.rung_members_performance = {k: [] for k in self.rung_map}
+        self.rung_promotions = {k: [] for k in self.rung_map}
 
-    def _get_rungs_state(self, observed_configs=None):
+    def _get_rungs_state(self, observed_configs: pd.DataFrame | None = None) -> None:
         """Collects info on configs at a rung and their performance there."""
         # to account for incomplete evaluations from being promoted --> working on a copy
         observed_configs = (
@@ -288,8 +286,8 @@ def _get_rungs_state(self, observed_configs=None):
         )
         # remove the default from being part of a Successive-Halving bracket
         if (
-            self.sample_default_first
-            and self.sample_default_at_target
+            self.sample_prior_first
+            and self.sample_prior_at_target
             and 0 in observed_configs.index.values
         ):
             observed_configs = observed_configs.drop(index=0)
@@ -300,9 +298,8 @@ def _get_rungs_state(self, observed_configs=None):
             idxs = observed_configs.rung == _rung
             self.rung_members[_rung] = observed_configs.index[idxs].values
             self.rung_members_performance[_rung] = observed_configs.perf[idxs].values
-        return
 
-    def _handle_promotions(self):
+    def _handle_promotions(self) -> None:
         self.promotion_policy.set_state(
             max_rung=self.max_rung,
             members=self.rung_members,
@@ -311,23 +308,34 @@ def _handle_promotions(self):
         )
         self.rung_promotions = self.promotion_policy.retrieve_promotions()
 
-    def clear_old_brackets(self):
+    def clear_old_brackets(self) -> None:
         return
 
-    def _fit_models(self):
+    def _fit_models(self) -> None:
         # define any model or surrogate training and acquisition function state setting
         # if adding model-based search to the basic multi-fidelity algorithm
         return
 
     @override
-    def load_optimization_state(
+    def ask(
         self,
-        previous_results: dict[str, ConfigResult],
-        pending_evaluations: dict[str, SearchSpace],
+        trials: Mapping[str, Trial],
         budget_info: BudgetInfo | None,
-        optimizer_state: dict[str, Any],
-    ) -> None:
+        n: int | None = None,
+    ) -> SampledConfig | list[SampledConfig]:
         """This is basically the fit method."""
+        assert n is None, "TODO"
+        completed: dict[str, ConfigResult] = {
+            trial_id: trial.into_config_result(self.pipeline_space.from_dict)
+            for trial_id, trial in trials.items()
+            if trial.report is not None
+        }
+        pending: dict[str, SearchSpace] = {
+            trial_id: self.pipeline_space.from_dict(trial.config)
+            for trial_id, trial in trials.items()
+            if trial.report is None
+        }
+
         self.rung_histories = {
             rung: {"config": [], "perf": []}
             for rung in range(self.min_rung, self.max_rung + 1)
@@ -336,11 +344,10 @@ def load_optimization_state(
         self.observed_configs = pd.DataFrame([], columns=("config", "rung", "perf"))
 
         # previous optimization run exists and needs to be loaded
-        self._load_previous_observations(previous_results)
-        self.total_fevals = len(previous_results) + len(pending_evaluations)
+        self._load_previous_observations(completed)
 
         # account for pending evaluations
-        self._handle_pending_evaluations(pending_evaluations)
+        self._handle_pending_evaluations(pending)
 
         # process optimization state and bucket observations per rung
         self._get_rungs_state()
@@ -354,37 +361,38 @@ def load_optimization_state(
         # fit any model/surrogates
         self._fit_models()
 
-        return
+        config, _id, previous_id = self.get_config_and_ids()
+        return SampledConfig(id=_id, config=config, previous_config_id=previous_id)
 
     def is_init_phase(self) -> bool:
         return True
 
     def sample_new_config(
         self,
-        rung: int = None,
-        **kwargs,
-    ):
+        rung: int | None = None,
+        **kwargs: Any,
+    ) -> SearchSpace:
         # Samples configuration from policy or random
         if self.sampling_policy is None:
-            config = self.pipeline_space.sample(
+            return sample_one_old(
+                self.pipeline_space,
                 patience=self.patience,
                 user_priors=self.use_priors,
                 ignore_fidelity=True,
             )
-        else:
-            config = self.sampling_policy.sample(**self.sampling_args)
-        return config
 
-    def _generate_new_config_id(self):
-        return self.observed_configs.index.max() + 1 if len(self.observed_configs) else 0
+        return self.sampling_policy.sample(**self.sampling_args)
 
-    def get_default_configuration(self):
-        pass
+    def _generate_new_config_id(self) -> int:
+        if len(self.observed_configs) == 0:
+            return 0
+
+        _max = self.observed_configs.index.max()
+        return int(_max) + 1  # type: ignore
 
     def is_promotable(self) -> int | None:
         """Returns an int if a rung can be promoted, else a None."""
         rung_to_promote = None
-
         # # iterates starting from the highest fidelity promotable to the lowest fidelity
         for rung in reversed(range(self.min_rung, self.max_rung)):
             if len(self.rung_promotions[rung]) > 0:
@@ -400,6 +408,9 @@ def get_config_and_ids(self) -> tuple[RawConfig, str, str | None]:
         Returns:
             [type]: [description]
         """
+        fidelity_name = self.pipeline_space.fidelity_name
+        assert fidelity_name is not None
+
         rung_to_promote = self.is_promotable()
         if rung_to_promote is not None:
             # promotes the first recorded promotable config in the argsort-ed rung
@@ -407,27 +418,31 @@ def get_config_and_ids(self) -> tuple[RawConfig, str, str | None]:
             config = row["config"].clone()
             rung = rung_to_promote + 1
             # assigning the fidelity to evaluate the config at
-            config.fidelity.set_value(self.rung_map[rung])
+
+            config_values = config._values
+            config_values[fidelity_name] = self.rung_map[rung]
+
             # updating config IDs
             previous_config_id = f"{row.name}_{rung_to_promote}"
             config_id = f"{row.name}_{rung}"
         else:
             rung_id = self.min_rung
             # using random instead of np.random to be consistent with NePS BO
+            rng = random.Random(None)  # TODO: Seeding
             if (
                 self.use_priors
-                and self.sample_default_first
+                and self.sample_prior_first
                 and len(self.observed_configs) == 0
             ):
-                if self.sample_default_at_target:
+                if self.sample_prior_at_target:
                     # sets the default config to be evaluated at the target fidelity
                     rung_id = self.max_rung
-                    self.logger.info("Next config will be evaluated at target fidelity.")
-                self.logger.info("Sampling the default configuration...")
-                config = self.pipeline_space.sample_default_configuration()
-
-            elif random.random() < self.random_interleave_prob:
-                config = self.pipeline_space.sample(
+                    logger.info("Next config will be evaluated at target fidelity.")
+                logger.info("Sampling the default configuration...")
+                config = self.pipeline_space.from_dict(self.pipeline_space.prior_config)
+            elif rng.random() < self.random_interleave_prob:
+                config = sample_one_old(
+                    self.pipeline_space,
                     patience=self.patience,
                     user_priors=False,  # sample uniformly random
                     ignore_fidelity=True,
@@ -436,14 +451,15 @@ def get_config_and_ids(self) -> tuple[RawConfig, str, str | None]:
                 config = self.sample_new_config(rung=rung_id)
 
             fidelity_value = self.rung_map[rung_id]
-            config.fidelity.set_value(fidelity_value)
+            config_values = config._values
+            config_values[fidelity_name] = fidelity_value
 
             previous_config_id = None
             config_id = f"{self._generate_new_config_id()}_{rung_id}"
 
-        return config.hp_values(), config_id, previous_config_id  # type: ignore
+        return config_values, config_id, previous_config_id
 
-    def _enhance_priors(self, confidence_score=None):
+    def _enhance_priors(self, confidence_score: dict[str, float] | None = None) -> None:
         """Only applicable when priors are given along with a confidence.
 
         Args:
@@ -451,38 +467,39 @@ def _enhance_priors(self, confidence_score=None):
                 The confidence scores for the types.
                 Example: {"categorical": 5.2, "numeric": 0.15}
         """
-        if not self.use_priors and self.prior_confidence is None:
+        if not self.use_priors or self.prior_confidence is None:
             return
+
         for k, v in self.pipeline_space.items():
-            if v.is_fidelity or isinstance(v, ConstantParameter):
+            if v.is_fidelity or isinstance(v, Constant):
                 continue
-            elif isinstance(v, (FloatParameter, IntegerParameter)):
+            if isinstance(v, Float | Integer):
                 if confidence_score is None:
                     confidence = CUSTOM_FLOAT_CONFIDENCE_SCORES[self.prior_confidence]
                 else:
                     confidence = confidence_score["numeric"]
-                self.pipeline_space[k].default_confidence_score = confidence
-            elif isinstance(v, CategoricalParameter):
+                self.pipeline_space[k].prior_confidence_score = confidence
+            elif isinstance(v, Categorical):
                 if confidence_score is None:
                     confidence = CUSTOM_CATEGORICAL_CONFIDENCE_SCORES[
                         self.prior_confidence
                     ]
                 else:
                     confidence = confidence_score["categorical"]
-                self.pipeline_space[k].default_confidence_score = confidence
+                self.pipeline_space[k].prior_confidence_score = confidence
 
 
 class SuccessiveHalving(SuccessiveHalvingBase):
-    def _calc_budget_used_in_bracket(self, config_history: list[int]):
-        budget = 0
-        for rung in self.config_map.keys():
+    def _calc_budget_used_in_bracket(self, config_history: list[int]) -> int:
+        max_cost_total = 0
+        for rung in self.config_map:
             count = sum(config_history == rung)
             # `range(min_rung, rung+1)` counts the black-box cost of promotions since
             # SH budgets assume each promotion involves evaluation from scratch
-            budget += count * sum(np.arange(self.min_rung, rung + 1))
-        return budget
+            max_cost_total += count * sum(np.arange(self.min_rung, rung + 1))
+        return max_cost_total
 
-    def clear_old_brackets(self):
+    def clear_old_brackets(self) -> None:
         """Enforces reset at each new bracket.
 
         The _get_rungs_state() function creates the `rung_promotions` dict mapping which
@@ -497,7 +514,7 @@ def clear_old_brackets(self):
         # indexes to mark separate brackets
         start = 0
         end = self.config_map[self.min_rung]  # length of lowest rung in a bracket
-        if self.sample_default_at_target and self.sample_default_first:
+        if self.sample_prior_at_target and self.sample_prior_first:
             start += 1
             end += 1
         # iterates over the different SH brackets which span start-end by index
@@ -542,39 +559,38 @@ class SuccessiveHalvingWithPriors(SuccessiveHalving):
 
     def __init__(
         self,
+        *,
         pipeline_space: SearchSpace,
-        budget: int,
+        max_cost_total: int,
         eta: int = 3,
         early_stopping_rate: int = 0,
         initial_design_type: Literal["max_budget", "unique_configs"] = "max_budget",
-        sampling_policy: typing.Any = FixedPriorPolicy,
-        promotion_policy: typing.Any = SyncPromotionPolicy,
-        loss_value_on_error: None | float = None,
+        sampling_policy: Any = FixedPriorPolicy,
+        promotion_policy: Any = SyncPromotionPolicy,
+        objective_to_minimize_value_on_error: None | float = None,
         cost_value_on_error: None | float = None,
         ignore_errors: bool = False,
-        logger=None,
         prior_confidence: Literal["low", "medium", "high"] = "medium",  # medium = 0.25
         random_interleave_prob: float = 0.0,
-        sample_default_first: bool = False,
-        sample_default_at_target: bool = False,
+        sample_prior_first: bool = False,
+        sample_prior_at_target: bool = False,
     ):
         super().__init__(
             pipeline_space=pipeline_space,
-            budget=budget,
+            max_cost_total=max_cost_total,
             eta=eta,
             early_stopping_rate=early_stopping_rate,
             initial_design_type=initial_design_type,
             use_priors=self.use_priors,
             sampling_policy=sampling_policy,
             promotion_policy=promotion_policy,
-            loss_value_on_error=loss_value_on_error,
+            objective_to_minimize_value_on_error=objective_to_minimize_value_on_error,
             cost_value_on_error=cost_value_on_error,
             ignore_errors=ignore_errors,
-            logger=logger,
             prior_confidence=prior_confidence,
             random_interleave_prob=random_interleave_prob,
-            sample_default_first=sample_default_first,
-            sample_default_at_target=sample_default_at_target,
+            sample_prior_first=sample_prior_first,
+            sample_prior_at_target=sample_prior_at_target,
         )
 
 
@@ -583,40 +599,39 @@ class AsynchronousSuccessiveHalving(SuccessiveHalvingBase):
 
     def __init__(
         self,
+        *,
         pipeline_space: SearchSpace,
-        budget: int,
+        max_cost_total: int,
         eta: int = 3,
         early_stopping_rate: int = 0,
         initial_design_type: Literal["max_budget", "unique_configs"] = "max_budget",
         use_priors: bool = False,
-        sampling_policy: typing.Any = RandomUniformPolicy,
-        promotion_policy: typing.Any = AsyncPromotionPolicy,  # key difference from SH
-        loss_value_on_error: None | float = None,
+        sampling_policy: Any = RandomUniformPolicy,
+        promotion_policy: Any = AsyncPromotionPolicy,  # key difference from SH
+        objective_to_minimize_value_on_error: None | float = None,
         cost_value_on_error: None | float = None,
         ignore_errors: bool = False,
-        logger=None,
-        prior_confidence: Literal["low", "medium", "high"] = None,
+        prior_confidence: Literal["low", "medium", "high"] | None = None,
         random_interleave_prob: float = 0.0,
-        sample_default_first: bool = False,
-        sample_default_at_target: bool = False,
+        sample_prior_first: bool = False,
+        sample_prior_at_target: bool = False,
     ):
         super().__init__(
             pipeline_space=pipeline_space,
-            budget=budget,
+            max_cost_total=max_cost_total,
             eta=eta,
             early_stopping_rate=early_stopping_rate,
             initial_design_type=initial_design_type,
             use_priors=use_priors,
             sampling_policy=sampling_policy,
             promotion_policy=promotion_policy,
-            loss_value_on_error=loss_value_on_error,
+            objective_to_minimize_value_on_error=objective_to_minimize_value_on_error,
             cost_value_on_error=cost_value_on_error,
             ignore_errors=ignore_errors,
-            logger=logger,
             prior_confidence=prior_confidence,
             random_interleave_prob=random_interleave_prob,
-            sample_default_first=sample_default_first,
-            sample_default_at_target=sample_default_at_target,
+            sample_prior_first=sample_prior_first,
+            sample_prior_at_target=sample_prior_at_target,
         )
 
 
@@ -627,41 +642,36 @@ class AsynchronousSuccessiveHalvingWithPriors(AsynchronousSuccessiveHalving):
 
     def __init__(
         self,
+        *,
         pipeline_space: SearchSpace,
-        budget: int,
+        max_cost_total: int,
         eta: int = 3,
         early_stopping_rate: int = 0,
         initial_design_type: Literal["max_budget", "unique_configs"] = "max_budget",
-        sampling_policy: typing.Any = FixedPriorPolicy,
-        promotion_policy: typing.Any = AsyncPromotionPolicy,  # key difference from SH
-        loss_value_on_error: None | float = None,
+        sampling_policy: Any = FixedPriorPolicy,
+        promotion_policy: Any = AsyncPromotionPolicy,  # key difference from SH
+        objective_to_minimize_value_on_error: None | float = None,
         cost_value_on_error: None | float = None,
         ignore_errors: bool = False,
-        logger=None,
         prior_confidence: Literal["low", "medium", "high"] = "medium",
         random_interleave_prob: float = 0.0,
-        sample_default_first: bool = False,
-        sample_default_at_target: bool = False,
+        sample_prior_first: bool = False,
+        sample_prior_at_target: bool = False,
     ):
         super().__init__(
             pipeline_space=pipeline_space,
-            budget=budget,
+            max_cost_total=max_cost_total,
             eta=eta,
             early_stopping_rate=early_stopping_rate,
             initial_design_type=initial_design_type,
             use_priors=self.use_priors,
             sampling_policy=sampling_policy,
             promotion_policy=promotion_policy,
-            loss_value_on_error=loss_value_on_error,
+            objective_to_minimize_value_on_error=objective_to_minimize_value_on_error,
             cost_value_on_error=cost_value_on_error,
             ignore_errors=ignore_errors,
-            logger=logger,
             prior_confidence=prior_confidence,
             random_interleave_prob=random_interleave_prob,
-            sample_default_first=sample_default_first,
-            sample_default_at_target=sample_default_at_target,
+            sample_prior_first=sample_prior_first,
+            sample_prior_at_target=sample_prior_at_target,
         )
-
-
-if __name__ == "__main__":
-    pass
diff --git a/neps/optimizers/multi_fidelity/utils.py b/neps/optimizers/multi_fidelity/utils.py
index dd36e4892..bbc6557f5 100644
--- a/neps/optimizers/multi_fidelity/utils.py
+++ b/neps/optimizers/multi_fidelity/utils.py
@@ -1,47 +1,15 @@
-# type: ignore
 from __future__ import annotations
 
-from typing import Any, Sequence
+from collections.abc import Sequence
+from copy import deepcopy
+from typing import Any
 
 import numpy as np
 import pandas as pd
-import torch
-
-from ...optimizers.utils import map_real_hyperparameters_from_tabular_ids
-from ...search_spaces.search_space import SearchSpace
-
-
-def continuous_to_tabular(
-    config: SearchSpace, categorical_space: SearchSpace
-) -> SearchSpace:
-    """
-    Convert the continuous parameters in the config into categorical ones based on
-    the categorical_space provided
-    """
-    result = config.clone()
-    for hp_name, _ in config.items():
-        if hp_name in categorical_space.keys():
-            choices = np.array(categorical_space[hp_name].choices)
-            diffs = choices - config[hp_name].value
-            # NOTE: in case of a tie the first value in the choices array will be returned
-            closest = choices[np.abs(diffs).argmin()]
-            result[hp_name].set_value(closest)
-
-    return result
-
-
-def normalize_vectorize_config(
-    config: SearchSpace, ignore_fidelity: bool = True
-) -> np.ndarray:
-    _new_vector = []
-    for _, hp_list in config.get_normalized_hp_categories(ignore_fidelity).items():
-        _new_vector.extend(hp_list)
-    return np.array(_new_vector)
 
 
 class MFObservedData:
-    """
-    (Under development)
+    """(Under development).
 
     This module is used to unify the data access across different Multi-Fidelity
     optimizers. It stores column names and index names. Possible optimizations
@@ -55,6 +23,7 @@ class MFObservedData:
     default_config_col = "config"
     default_perf_col = "perf"
     default_lc_col = "learning_curves"
+    # TODO: deepcopy all the mutable outputs from the dataframe
 
     def __init__(
         self,
@@ -79,17 +48,18 @@ def __init__(
 
         self.config_idx = index_names[0]
         self.budget_idx = index_names[1]
+        self.index_names = index_names
 
         index = pd.MultiIndex.from_tuples([], names=index_names)
 
         self.df = pd.DataFrame([], columns=columns, index=index)
 
     @property
-    def pending_condition(self):
-        return self.df[self.perf_col].isnull()
+    def pending_condition(self) -> pd.Series:
+        return self.df[self.perf_col].isna()
 
     @property
-    def error_condition(self):
+    def error_condition(self) -> pd.Series:
         return self.df[self.perf_col] == "error"
 
     @property
@@ -102,24 +72,30 @@ def seen_budget_levels(self) -> list:
         return self.df.index.levels[1].to_list()
 
     @property
-    def completed_runs(self):
+    def pending_runs_index(self) -> pd.Index | pd.MultiIndex:
+        return self.df.loc[self.pending_condition].index
+
+    @property
+    def completed_runs(self) -> pd.DataFrame:
         return self.df[~(self.pending_condition | self.error_condition)]
 
+    @property
+    def completed_runs_index(self) -> pd.Index | pd.MultiIndex:
+        return self.completed_runs.index
+
     def next_config_id(self) -> int:
         if len(self.seen_config_ids):
             return max(self.seen_config_ids) + 1
-        else:
-            return 0
+        return 0
 
     def add_data(
         self,
         data: list[Any] | list[list[Any]],
         index: tuple[int, ...] | Sequence[tuple[int, ...]] | Sequence[int] | int,
+        *,
         error: bool = False,
-    ):
-        """
-        Add data only if none of the indices are already existing in the DataFrame
-        """
+    ) -> None:
+        """Add data only if none of the indices are already existing in the DataFrame."""
         # TODO: If index is only config_id extend it
         if not isinstance(index, list):
             index_list = [index]
@@ -129,8 +105,9 @@ def add_data(
             data_list = data
 
         if not self.df.index.isin(index_list).any():
-            _df = pd.DataFrame(data_list, columns=self.df.columns, index=index_list)
-            self.df = pd.concat((self.df, _df))
+            index = pd.MultiIndex.from_tuples(index_list, names=self.index_names)
+            _df = pd.DataFrame(data_list, columns=self.df.columns, index=index)
+            self.df = _df.copy() if self.df.empty else pd.concat((self.df, _df))
         elif error:
             raise ValueError(
                 f"Data with at least one of the given indices already "
@@ -142,18 +119,14 @@ def update_data(
         self,
         data_dict: dict[str, list[Any]],
         index: tuple[int, ...] | Sequence[tuple[int, ...]] | Sequence[int] | int,
+        *,
         error: bool = False,
-    ):
-        """
-        Update data if all the indices already exist in the DataFrame
-        """
-        if not isinstance(index, list):
-            index_list = [index]
-        else:
-            index_list = index
+    ) -> None:
+        """Update data if all the indices already exist in the DataFrame."""
+        index_list = [index] if not isinstance(index, list) else index
         if self.df.index.isin(index_list).sum() == len(index_list):
-            column_names, data = zip(*data_dict.items())
-            data = list(zip(*data))
+            column_names, data = zip(*data_dict.items(), strict=False)
+            data = list(zip(*data, strict=False))
             self.df.loc[index_list, list(column_names)] = data
 
         elif error:
@@ -163,7 +136,7 @@ def update_data(
                 f"Given indices: {index_list}"
             )
 
-    def get_learning_curves(self):
+    def get_learning_curves(self) -> pd.DataFrame:
         return self.df.pivot_table(
             index=self.df.index.names[0],
             columns=self.df.index.names[1],
@@ -171,43 +144,10 @@ def get_learning_curves(self):
         )
 
     def all_configs_list(self) -> list[Any]:
-        return self.df.loc[:, self.config_col].values.tolist()
-
-    def get_incumbents_for_budgets(self, maximize: bool = False):
-        """
-        Returns a series object with the best partial configuration for each budget id
-
-        Note: this will always map the best lowest ID if two configurations
-              has the same performance at the same fidelity
-        """
-        learning_curves = self.get_learning_curves()
-        if maximize:
-            config_ids = learning_curves.idxmax(axis=0)
-        else:
-            config_ids = learning_curves.idxmin(axis=0)
-
-        indices = list(zip(config_ids.values.tolist(), config_ids.index.to_list()))
-        partial_configs = self.df.loc[indices, self.config_col].to_list()
-        return pd.Series(partial_configs, index=config_ids.index, name=self.config_col)
-
-    def get_best_performance_for_each_budget(self, maximize: bool = False):
-        """
-        Returns a series object with the best partial configuration for each budget id
+        return self.df.loc[:, self.config_col].sort_index().values.tolist()
 
-        Note: this will always map the best lowest ID if two configurations
-              has the same performance at the same fidelity
-        """
-        learning_curves = self.get_learning_curves()
-        if maximize:
-            performance = learning_curves.max(axis=0)
-        else:
-            performance = learning_curves.min(axis=0)
-
-        return performance
-
-    def get_best_learning_curve_id(self, maximize: bool = False):
-        """
-        Returns a single configuration id of the best observed performance
+    def get_best_learning_curve_id(self, *, maximize: bool = False) -> int:
+        """Returns a single configuration id of the best observed performance.
 
         Note: this will always return the single best lowest ID
               if two configurations has the same performance
@@ -215,33 +155,40 @@ def get_best_learning_curve_id(self, maximize: bool = False):
         learning_curves = self.get_learning_curves()
         if maximize:
             return learning_curves.max(axis=1).idxmax()
-        else:
-            return learning_curves.min(axis=1).idxmin()
+        return learning_curves.min(axis=1).idxmin()
 
-    def get_best_seen_performance(self, maximize: bool = False):
+    def get_best_seen_performance(self, *, maximize: bool = False) -> float:
         learning_curves = self.get_learning_curves()
         if maximize:
             return learning_curves.max(axis=1).max()
-        else:
-            return learning_curves.min(axis=1).min()
+        return learning_curves.min(axis=1).min()
 
-    def add_budget_column(self):
+    def add_budget_column(self) -> pd.DataFrame:
         combined_df = self.df.reset_index(level=1)
-        combined_df.set_index(
-            keys=[self.budget_idx], drop=False, append=True, inplace=True
-        )
-        return combined_df
+        return combined_df.set_index(keys=[self.budget_idx], drop=False, append=True)
 
-    def reduce_to_max_seen_budgets(self):
-        self.df.sort_index(inplace=True)
+    def reduce_to_max_seen_budgets(self) -> pd.DataFrame:
+        self.df = self.df.sort_index()
         combined_df = self.add_budget_column()
         return combined_df.groupby(level=0).last()
 
-    def get_partial_configs_at_max_seen(self):
+    def get_partial_configs_at_max_seen(self) -> pd.Series:
         return self.reduce_to_max_seen_budgets()[self.config_col]
 
-    def extract_learning_curve(self, config_id: int, budget_id: int) -> list[float]:
-        # reduce budget_id to discount the current validation loss
+    def extract_learning_curve(
+        self, config_id: int, budget_id: int | None = None
+    ) -> list[float]:
+        if budget_id is None:
+            # budget_id only None when predicting
+            # extract full observed learning curve for prediction pipeline
+            budget_id = (
+                max(self.df.loc[config_id].index.get_level_values("budget_id").values) + 1
+            )
+
+        # For the first epoch we have no learning curve available
+        if budget_id == 0:
+            return []
+        # reduce budget_id to discount the current validation objective_to_minimize
         # both during training and prediction phase
         budget_id = max(0, budget_id - 1)
         if self.lc_col_name in self.df.columns:
@@ -249,53 +196,29 @@ def extract_learning_curve(self, config_id: int, budget_id: int) -> list[float]:
         else:
             lcs = self.get_learning_curves()
             lc = lcs.loc[config_id, :budget_id].values.flatten().tolist()
-        return lc
-
-    def get_training_data_4DyHPO(
-        self, df: pd.DataFrame, pipeline_space: SearchSpace | None = None
-    ):
-        configs = []
-        learning_curves = []
-        performance = []
-        for idx, row in df.iterrows():
-            config_id = idx[0]
-            budget_id = idx[1]
-            if pipeline_space.has_tabular:
-                _row = pd.Series([row[self.config_col]], index=[config_id])
-                _row = map_real_hyperparameters_from_tabular_ids(_row, pipeline_space)
-                configs.append(_row.values[0])
-            else:
-                configs.append(row[self.config_col])
-            performance.append(row[self.perf_col])
-            learning_curves.append(self.extract_learning_curve(config_id, budget_id))
-        return configs, learning_curves, performance
-
-    def get_tokenized_data(self, df: pd.DataFrame):
-        idxs = df.index.values
-        idxs = np.array([list(idx) for idx in idxs])
-        idxs[:, 1] += 1  # all fidelity IDs begin with 0 in NePS
-        performances = df.perf.values
-        configs = df.config.values
-        configs = np.array([normalize_vectorize_config(c) for c in configs])
-
-        return configs, idxs, performances
-
-    def tokenize(self, df: pd.DataFrame, as_tensor: bool = False):
-        """Function to format data for PFN."""
-        configs = np.array([normalize_vectorize_config(c) for c in df])
-        fidelity = np.array([c.fidelity.value for c in df]).reshape(-1, 1)
-        idx = df.index.values.reshape(-1, 1)
-
-        data = np.hstack([idx, fidelity, configs])
-
-        if as_tensor:
-            device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-            data = torch.Tensor(data).to(device)
-        return data
+        return deepcopy(lc)
+
+    def get_best_performance_per_config(self, *, maximize: bool = False) -> pd.Series:
+        """Returns the best score recorded per config across fidelities seen."""
+        op = np.max if maximize else np.min
+        return (
+            self.df.sort_values(
+                "budget_id", ascending=False
+            )  # sorts with largest budget first
+            .groupby("config_id")  # retains only config_id
+            .first()  # retrieves the largest budget seen for each config_id
+            .learning_curves.apply(  # extracts all values seen till largest budget
+                op
+            )  # finds the minimum over per-config learning curve
+        )
 
-    @property
-    def token_ids(self) -> np.ndarray:
-        return self.df.index.values
+    def get_max_observed_fidelity_level_per_config(self) -> pd.Series:
+        """Returns the highest fidelity level recorded per config seen."""
+        max_z_observed = {
+            _id: self.df.loc[_id, :].index.sort_values()[-1]
+            for _id in self.df.index.get_level_values("config_id").sort_values()
+        }
+        return pd.Series(max_z_observed)
 
 
 if __name__ == "__main__":
@@ -305,7 +228,8 @@ def token_ids(self) -> np.ndarray:
     """
     data = MFObservedData(["config", "perf"], index_names=["config_id", "budget_id"])
 
-    # When adding multiple indices data should be list of rows(lists) and the index should be list of tuples
+    # When adding multiple indices data should be list of rows(lists) and the
+    # index should be list of tuples
     data.add_data(
         [["conf1", 0.5], ["conf2", 0.7], ["conf1", 0.6], ["conf2", 0.4]],
         index=[(0, 0), (1, 1), (0, 3), (1, 0)],
@@ -315,33 +239,13 @@ def token_ids(self) -> np.ndarray:
         index=[(0, 2), (1, 2), (0, 1)],
     )
 
-    print(data.df)
-    print(data.get_learning_curves())
-    print(
-        "Mapping of budget IDs into best performing configurations at each fidelity:\n",
-        data.get_incumbents_for_budgets(),
-    )
-    print(
-        "Best Performance at each budget level:\n",
-        data.get_best_performance_for_each_budget(),
-    )
-    print(
-        "Configuration ID of the best observed performance so far: ",
-        data.get_best_learning_curve_id(),
-    )
-    print(data.extract_learning_curve(0, 2))
-    # data.df.sort_index(inplace=True)
-    print(data.get_partial_configs_at_max_seen())
-
-    # When updating multiple indices at a time both the values in the data dictionary and the indices should be lists
+    # When updating multiple indices at a time both the values in the data dictionary
+    # and the indices should be lists
     data.update_data({"perf": [1.8, 1.5]}, index=[(1, 1), (0, 0)])
-    print(data.df)
 
     data = MFObservedData(["config", "perf"], index_names=["config_id", "budget_id"])
 
     # when adding a single row second level list is not necessary
     data.add_data(["conf1", 0.5], index=(0, 0))
-    print(data.df)
 
     data.update_data({"perf": [1.8], "budget_col": [5]}, index=(0, 0))
-    print(data.df)
diff --git a/neps/optimizers/multi_fidelity_prior/__init__.py b/neps/optimizers/multi_fidelity_prior/__init__.py
index e69de29bb..f272be75b 100644
--- a/neps/optimizers/multi_fidelity_prior/__init__.py
+++ b/neps/optimizers/multi_fidelity_prior/__init__.py
@@ -0,0 +1,11 @@
+from neps.optimizers.multi_fidelity_prior.async_priorband import (
+    PriorBandAsha,
+    PriorBandAshaHB,
+)
+from neps.optimizers.multi_fidelity_prior.priorband import PriorBand
+
+__all__ = [
+    "PriorBand",
+    "PriorBandAsha",
+    "PriorBandAshaHB",
+]
diff --git a/neps/optimizers/multi_fidelity_prior/async_priorband.py b/neps/optimizers/multi_fidelity_prior/async_priorband.py
index 40f6cb290..c664eeb07 100644
--- a/neps/optimizers/multi_fidelity_prior/async_priorband.py
+++ b/neps/optimizers/multi_fidelity_prior/async_priorband.py
@@ -1,19 +1,13 @@
 from __future__ import annotations
 
-import typing
+from collections.abc import Mapping
+from typing import TYPE_CHECKING, Any, Literal
+from typing_extensions import override
 
 import numpy as np
-from typing_extensions import Literal, override
+import pandas as pd
 
-from neps.state.optimizer import BudgetInfo, OptimizationState
-from neps.utils.types import ConfigResult, RawConfig
-from neps.search_spaces.search_space import SearchSpace
-from neps.optimizers.bayesian_optimization.acquisition_functions.base_acquisition import (
-    BaseAcquisition,
-)
-from neps.optimizers.bayesian_optimization.acquisition_samplers.base_acq_sampler import (
-    AcquisitionSampler,
-)
+from neps.optimizers.base_optimizer import SampledConfig
 from neps.optimizers.multi_fidelity.mf_bo import MFBOBase
 from neps.optimizers.multi_fidelity.promotion_policy import AsyncPromotionPolicy
 from neps.optimizers.multi_fidelity.sampling_policy import EnsemblePolicy, ModelPolicy
@@ -21,6 +15,16 @@
     AsynchronousSuccessiveHalvingWithPriors,
 )
 from neps.optimizers.multi_fidelity_prior.priorband import PriorBandBase
+from neps.sampling.priors import Prior
+
+if TYPE_CHECKING:
+    from neps.optimizers.bayesian_optimization.acquisition_functions import (
+        BaseAcquisition,
+    )
+    from neps.search_spaces.search_space import SearchSpace
+    from neps.state.optimizer import BudgetInfo
+    from neps.state.trial import Trial
+    from neps.utils.types import ConfigResult, RawConfig
 
 
 class PriorBandAsha(MFBOBase, PriorBandBase, AsynchronousSuccessiveHalvingWithPriors):
@@ -28,55 +32,57 @@ class PriorBandAsha(MFBOBase, PriorBandBase, AsynchronousSuccessiveHalvingWithPr
 
     def __init__(
         self,
+        *,
         pipeline_space: SearchSpace,
-        budget: int,
+        max_cost_total: int,
         eta: int = 3,
         early_stopping_rate: int = 0,
         initial_design_type: Literal["max_budget", "unique_configs"] = "max_budget",
-        sampling_policy: typing.Any = EnsemblePolicy,  # key difference to ASHA
-        promotion_policy: typing.Any = AsyncPromotionPolicy,  # key difference from SH
-        loss_value_on_error: None | float = None,
+        sampling_policy: Any = EnsemblePolicy,  # key difference to ASHA
+        promotion_policy: Any = AsyncPromotionPolicy,  # key difference from SH
+        objective_to_minimize_value_on_error: None | float = None,
         cost_value_on_error: None | float = None,
         ignore_errors: bool = False,
-        logger=None,
         prior_confidence: Literal["low", "medium", "high"] = "medium",
         random_interleave_prob: float = 0.0,
-        sample_default_first: bool = True,
-        sample_default_at_target: bool = True,
-        prior_weight_type: str = "geometric",  # could also be {"linear", "50-50"}
-        inc_sample_type: str = "mutation",  # or {"crossover", "gaussian", "hypersphere"}
+        sample_prior_first: bool = True,
+        sample_prior_at_target: bool = True,
+        prior_weight_type: Literal["geometric", "linear", "50-50"] = "geometric",
+        inc_sample_type: Literal[
+            "crossover", "gaussian", "hypersphere", "mutation"
+        ] = "mutation",
         inc_mutation_rate: float = 0.5,
         inc_mutation_std: float = 0.25,
-        inc_style: str = "dynamic",  # could also be {"decay", "constant"}
+        inc_style: Literal["dynamic", "constant", "decay"] = "dynamic",
         # arguments for model
         model_based: bool = False,  # crucial argument to set to allow model-search
-        modelling_type: str = "joint",  # could also be {"rung"}
-        initial_design_size: int = None,
-        model_policy: typing.Any = ModelPolicy,
-        surrogate_model: str | typing.Any = "gp",
-        domain_se_kernel: str = None,
-        hp_kernels: list = None,
-        surrogate_model_args: dict = None,
+        modelling_type: Literal["joint", "rung"] = "joint",
+        initial_design_size: int | None = None,
+        model_policy: Any = ModelPolicy,
+        # TODO: Remove these when fixing model policy
+        surrogate_model: str | Any = "gp",
+        domain_se_kernel: str | None = None,
+        hp_kernels: list | None = None,
+        surrogate_model_args: dict | None = None,
         acquisition: str | BaseAcquisition = "EI",
         log_prior_weighted: bool = False,
-        acquisition_sampler: str | AcquisitionSampler = "random",
+        acquisition_sampler: str = "random",
     ):
         super().__init__(
             pipeline_space=pipeline_space,
-            budget=budget,
+            max_cost_total=max_cost_total,
             eta=eta,
             early_stopping_rate=early_stopping_rate,
             initial_design_type=initial_design_type,
             sampling_policy=sampling_policy,
             promotion_policy=promotion_policy,
-            loss_value_on_error=loss_value_on_error,
+            objective_to_minimize_value_on_error=objective_to_minimize_value_on_error,
             cost_value_on_error=cost_value_on_error,
             ignore_errors=ignore_errors,
-            logger=logger,
             prior_confidence=prior_confidence,
             random_interleave_prob=random_interleave_prob,
-            sample_default_first=sample_default_first,
-            sample_default_at_target=sample_default_at_target,
+            sample_prior_first=sample_prior_first,
+            sample_prior_at_target=sample_prior_at_target,
         )
         self.prior_weight_type = prior_weight_type
         self.inc_sample_type = inc_sample_type
@@ -96,15 +102,6 @@ def __init__(
             },
         }
 
-        bo_args = dict(
-            surrogate_model=surrogate_model,
-            domain_se_kernel=domain_se_kernel,
-            hp_kernels=hp_kernels,
-            surrogate_model_args=surrogate_model_args,
-            acquisition=acquisition,
-            log_prior_weighted=log_prior_weighted,
-            acquisition_sampler=acquisition_sampler,
-        )
         self.model_based = model_based
         self.modelling_type = modelling_type
         self.initial_design_size = initial_design_size
@@ -118,7 +115,9 @@ def __init__(
         self.init_size = n_min + 1  # in BOHB: init_design >= N_dim + 2
         if self.modelling_type == "joint" and self.initial_design_size is not None:
             self.init_size = self.initial_design_size
-        self.model_policy = model_policy(pipeline_space, **bo_args)
+
+        prior_dist = Prior.from_space(self.pipeline_space)
+        self.model_policy = model_policy(pipeline_space=pipeline_space, prior=prior_dist)
 
     def get_config_and_ids(
         self,
@@ -129,11 +128,8 @@ def get_config_and_ids(
             [type]: [description]
         """
         rung_to_promote = self.is_promotable()
-        if rung_to_promote is not None:
-            rung = rung_to_promote + 1
-        else:
-            rung = self.min_rung
-        self.set_sampling_weights_and_inc(rung=rung)
+        rung = rung_to_promote + 1 if rung_to_promote is not None else self.min_rung
+        self._set_sampling_weights_and_inc(rung=rung)
         # performs standard ASHA but sampling happens as per the EnsemblePolicy
         return super().get_config_and_ids()
 
@@ -145,65 +141,58 @@ class PriorBandAshaHB(PriorBandAsha):
 
     def __init__(
         self,
+        *,
         pipeline_space: SearchSpace,
-        budget: int,
+        max_cost_total: int,
         eta: int = 3,
         initial_design_type: Literal["max_budget", "unique_configs"] = "max_budget",
-        sampling_policy: typing.Any = EnsemblePolicy,  # key difference to ASHA
-        promotion_policy: typing.Any = AsyncPromotionPolicy,  # key difference from PB
-        loss_value_on_error: None | float = None,
+        sampling_policy: Any = EnsemblePolicy,  # key difference to ASHA
+        promotion_policy: Any = AsyncPromotionPolicy,  # key difference from PB
+        objective_to_minimize_value_on_error: None | float = None,
         cost_value_on_error: None | float = None,
         ignore_errors: bool = False,
-        logger=None,
         prior_confidence: Literal["low", "medium", "high"] = "medium",
         random_interleave_prob: float = 0.0,
-        sample_default_first: bool = True,
-        sample_default_at_target: bool = True,
-        prior_weight_type: str = "geometric",  # could also be {"linear", "50-50"}
-        inc_sample_type: str = "mutation",  # or {"crossover", "gaussian", "hypersphere"}
+        sample_prior_first: bool = True,
+        sample_prior_at_target: bool = True,
+        prior_weight_type: Literal["geometric", "linear", "50-50"] = "geometric",
+        inc_sample_type: Literal[
+            "crossover", "gaussian", "hypersphere", "mutation"
+        ] = "mutation",
         inc_mutation_rate: float = 0.5,
         inc_mutation_std: float = 0.25,
-        inc_style: str = "dynamic",  # could also be {"decay", "constant"}
+        inc_style: Literal["dynamic", "constant", "decay"] = "dynamic",
         # arguments for model
         model_based: bool = False,  # crucial argument to set to allow model-search
-        modelling_type: str = "joint",  # could also be {"rung"}
-        initial_design_size: int = None,
-        model_policy: typing.Any = ModelPolicy,
-        surrogate_model: str | typing.Any = "gp",
-        domain_se_kernel: str = None,
-        hp_kernels: list = None,
-        surrogate_model_args: dict = None,
+        modelling_type: Literal["joint", "rung"] = "joint",
+        initial_design_size: int | None = None,
+        model_policy: Any = ModelPolicy,
+        # TODO: Remove these when fixing model policy
+        surrogate_model: str | Any = "gp",
+        domain_se_kernel: str | None = None,
+        hp_kernels: list | None = None,
+        surrogate_model_args: dict | None = None,
         acquisition: str | BaseAcquisition = "EI",
         log_prior_weighted: bool = False,
-        acquisition_sampler: str | AcquisitionSampler = "random",
+        acquisition_sampler: str = "random",
     ):
         # collecting arguments required by ASHA
-        args = dict(
-            pipeline_space=pipeline_space,
-            budget=budget,
-            eta=eta,
-            early_stopping_rate=self.early_stopping_rate,
-            initial_design_type=initial_design_type,
-            sampling_policy=sampling_policy,
-            promotion_policy=promotion_policy,
-            loss_value_on_error=loss_value_on_error,
-            cost_value_on_error=cost_value_on_error,
-            ignore_errors=ignore_errors,
-            logger=logger,
-            prior_confidence=prior_confidence,
-            random_interleave_prob=random_interleave_prob,
-            sample_default_first=sample_default_first,
-            sample_default_at_target=sample_default_at_target,
-        )
-        bo_args = dict(
-            surrogate_model=surrogate_model,
-            domain_se_kernel=domain_se_kernel,
-            hp_kernels=hp_kernels,
-            surrogate_model_args=surrogate_model_args,
-            acquisition=acquisition,
-            log_prior_weighted=log_prior_weighted,
-            acquisition_sampler=acquisition_sampler,
-        )
+        args: dict[str, Any] = {
+            "pipeline_space": pipeline_space,
+            "max_cost_total": max_cost_total,
+            "eta": eta,
+            "early_stopping_rate": self.early_stopping_rate,
+            "initial_design_type": initial_design_type,
+            "sampling_policy": sampling_policy,
+            "promotion_policy": promotion_policy,
+            "objective_to_minimize_value_on_error": objective_to_minimize_value_on_error,
+            "cost_value_on_error": cost_value_on_error,
+            "ignore_errors": ignore_errors,
+            "prior_confidence": prior_confidence,
+            "random_interleave_prob": random_interleave_prob,
+            "sample_prior_first": sample_prior_first,
+            "sample_prior_at_target": sample_prior_at_target,
+        }
         super().__init__(
             **args,
             prior_weight_type=prior_weight_type,
@@ -215,7 +204,6 @@ def __init__(
             modelling_type=modelling_type,
             initial_design_size=initial_design_size,
             model_policy=model_policy,
-            **bo_args,
         )
 
         # Creating the ASHA (SH) brackets that Hyperband iterates over
@@ -226,8 +214,8 @@ def __init__(
             self.sh_brackets[s] = AsynchronousSuccessiveHalvingWithPriors(**args)
             self.sh_brackets[s].sampling_policy = self.sampling_policy
             self.sh_brackets[s].sampling_args = self.sampling_args
-            self.sh_brackets[s].model_policy = self.model_policy
-            self.sh_brackets[s].sample_new_config = self.sample_new_config
+            self.sh_brackets[s].model_policy = self.model_policy  # type: ignore
+            self.sh_brackets[s].sample_new_config = self.sample_new_config  # type: ignore
 
     def _update_sh_bracket_state(self) -> None:
         # `load_results()` for each of the SH bracket objects are not called as they are
@@ -247,23 +235,57 @@ def _update_sh_bracket_state(self) -> None:
             bracket.rung_histories = self.rung_histories
 
     @override
-    def load_optimization_state(
+    def ask(
         self,
-        previous_results: dict[str, ConfigResult],
-        pending_evaluations: dict[str, SearchSpace],
+        trials: Mapping[str, Trial],
         budget_info: BudgetInfo | None,
-        optimizer_state: dict[str, typing.Any],
-    ) -> None:
-        super().load_optimization_state(
-            previous_results=previous_results,
-            pending_evaluations=pending_evaluations,
-            budget_info=budget_info,
-            optimizer_state=optimizer_state
-        )
+        n: int | None = None,
+    ) -> SampledConfig:
+        """This is basically the fit method."""
+        assert n is None, "TODO"
+        completed: dict[str, ConfigResult] = {
+            trial_id: trial.into_config_result(self.pipeline_space.from_dict)
+            for trial_id, trial in trials.items()
+            if trial.report is not None
+        }
+        pending: dict[str, SearchSpace] = {
+            trial_id: self.pipeline_space.from_dict(trial.config)
+            for trial_id, trial in trials.items()
+            if trial.report is None
+        }
+
+        self.rung_histories = {
+            rung: {"config": [], "perf": []}
+            for rung in range(self.min_rung, self.max_rung + 1)
+        }
+
+        self.observed_configs = pd.DataFrame([], columns=("config", "rung", "perf"))
+
+        # previous optimization run exists and needs to be loaded
+        self._load_previous_observations(completed)
+
+        # account for pending evaluations
+        self._handle_pending_evaluations(pending)
+
+        # process optimization state and bucket observations per rung
+        self._get_rungs_state()
+
+        # filter/reset old SH brackets
+        self.clear_old_brackets()
+
+        # identifying promotion list per rung
+        self._handle_promotions()
+
+        # fit any model/surrogates
+        self._fit_models()
+
         # important for the global HB to run the right SH
         self._update_sh_bracket_state()
 
-    def _get_bracket_to_run(self):
+        config, _id, previous_id = self.get_config_and_ids()
+        return SampledConfig(id=_id, config=config, previous_config_id=previous_id)
+
+    def _get_bracket_to_run(self) -> int:
         """Samples the ASHA bracket to run.
 
         The selected bracket always samples at its minimum rung. Thus, selecting a bracket
@@ -279,8 +301,7 @@ def _get_bracket_to_run(self):
             self.eta ** (K - s) * (K + 1) / (K - s + 1) for s in range(self.max_rung + 1)
         ]
         bracket_probs = np.array(bracket_probs) / sum(bracket_probs)
-        bracket_next = np.random.choice(range(self.max_rung + 1), p=bracket_probs)
-        return bracket_next
+        return int(np.random.choice(range(self.max_rung + 1), p=bracket_probs))
 
     def get_config_and_ids(self) -> tuple[RawConfig, str, str | None]:
         """...and this is the method that decides which point to query.
@@ -291,9 +312,9 @@ def get_config_and_ids(self) -> tuple[RawConfig, str, str | None]:
         # the rung to sample at
         bracket_to_run = self._get_bracket_to_run()
 
-        self.set_sampling_weights_and_inc(rung=bracket_to_run)
+        self._set_sampling_weights_and_inc(rung=bracket_to_run)
         self.sh_brackets[bracket_to_run].sampling_args = self.sampling_args
         config, config_id, previous_config_id = self.sh_brackets[
             bracket_to_run
         ].get_config_and_ids()
-        return config, config_id, previous_config_id  # type: ignore
+        return config, config_id, previous_config_id
diff --git a/neps/optimizers/multi_fidelity_prior/priorband.py b/neps/optimizers/multi_fidelity_prior/priorband.py
index 614ad4b0f..bcbd8c9c4 100644
--- a/neps/optimizers/multi_fidelity_prior/priorband.py
+++ b/neps/optimizers/multi_fidelity_prior/priorband.py
@@ -1,64 +1,84 @@
 from __future__ import annotations
 
-import typing
+import logging
+from typing import TYPE_CHECKING, Any, Literal
 
 import numpy as np
-from typing_extensions import Literal
 
-from neps.utils.types import RawConfig
-from neps.search_spaces.search_space import SearchSpace
-from neps.optimizers.bayesian_optimization.acquisition_functions.base_acquisition import (
-    BaseAcquisition,
-)
-from neps.optimizers.bayesian_optimization.acquisition_samplers.base_acq_sampler import (
-    AcquisitionSampler,
-)
 from neps.optimizers.multi_fidelity.hyperband import HyperbandCustomDefault
 from neps.optimizers.multi_fidelity.mf_bo import MFBOBase
 from neps.optimizers.multi_fidelity.promotion_policy import SyncPromotionPolicy
 from neps.optimizers.multi_fidelity.sampling_policy import EnsemblePolicy, ModelPolicy
+from neps.optimizers.multi_fidelity.successive_halving import SuccessiveHalvingBase
 from neps.optimizers.multi_fidelity_prior.utils import (
-    calc_total_resources_spent,
     compute_config_dist,
     compute_scores,
     get_prior_weight_for_decay,
 )
+from neps.sampling.priors import Prior
+from neps.search_spaces.search_space import SearchSpace
+
+if TYPE_CHECKING:
+    import pandas as pd
+
+    from neps.optimizers.bayesian_optimization.acquisition_functions import (
+        BaseAcquisition,
+    )
+    from neps.utils.types import RawConfig
+
+logger = logging.getLogger(__name__)
 
 
+# TODO: We should just make these functions...
 class PriorBandBase:
     """Class that defines essential properties needed by PriorBand.
 
     Designed to work with the topmost parent class as SuccessiveHalvingBase.
     """
 
-    def find_all_distances_from_incumbent(self, incumbent):
+    # TODO: Dependant variables which should just be made into functions
+    observed_configs: pd.DataFrame
+    eta: int
+    pipeline_space: SearchSpace
+    inc_sample_type: Literal["hypersphere", "mutation", "crossover", "gaussian"]
+    inc_mutation_rate: float
+    inc_mutation_std: float
+    rung_histories: dict[int, dict[Literal["config", "perf"], list[int | float]]]
+    max_rung: int
+    min_rung: int
+    rung_map: dict
+    prior_weight_type: Literal["geometric", "linear", "50-50"]
+    sampling_args: dict[str, Any]
+    inc_style: Literal["dynamic", "decay", "constant"]
+    min_budget: int | float
+    max_budget: int | float
+
+    def find_all_distances_from_incumbent(self, incumbent: SearchSpace) -> list[float]:
         """Finds the distance to the nearest neighbour."""
         dist = lambda x: compute_config_dist(incumbent, x)
         # computing distance of incumbent from all seen points in history
         distances = [dist(config) for config in self.observed_configs.config]
         # ensuring the distances exclude 0 or the distance from itself
-        distances = [d for d in distances if d > 0]
-        return distances
+        return [d for d in distances if d > 0]
 
-    def find_1nn_distance_from_incumbent(self, incumbent):
+    def find_1nn_distance_from_incumbent(self, incumbent: SearchSpace) -> float:
         """Finds the distance to the nearest neighbour."""
         distances = self.find_all_distances_from_incumbent(incumbent)
-        distance = min(distances)
-        return distance
+        return min(distances)
 
-    def find_incumbent(self, rung: int = None) -> SearchSpace:
+    def find_incumbent(self, rung: int | None = None) -> SearchSpace:
         """Find the best performing configuration seen so far."""
         rungs = self.observed_configs.rung.values
         idxs = self.observed_configs.index.values
         while rung is not None:
             # enters this scope is `rung` argument passed and not left empty or None
             if rung not in rungs:
-                self.logger.warn(f"{rung} not in {np.unique(idxs)}")
+                logger.warning(f"{rung} not in {np.unique(idxs)}")  # type: ignore
             # filtering by rung based on argument passed
             idxs = self.observed_configs.rung.values == rung
             # checking width of current rung
             if len(idxs) < self.eta:
-                self.logger.warn(
+                logger.warn(
                     f"Selecting incumbent from a rung with width less than {self.eta}"
                 )
         # extracting the incumbent configuration
@@ -67,17 +87,18 @@ def find_incumbent(self, rung: int = None) -> SearchSpace:
             _perfs = self.observed_configs.loc[idxs].perf.values
             inc_idx = np.nanargmin([np.nan if t is None else t for t in _perfs])
             inc = self.observed_configs.loc[idxs].iloc[inc_idx].config
+            assert isinstance(inc, SearchSpace)
         else:
             # THIS block should not ever execute, but for runtime anomalies, if no
             # incumbent can be extracted, the prior is treated as the incumbent
-            inc = self.pipeline_space.sample_default_configuration()
-            self.logger.warn(
+            inc = self.pipeline_space.from_dict(self.pipeline_space.prior_config)
+            logger.warning(
                 "Treating the prior as the incumbent. "
                 "Please check if this should not happen."
             )
         return inc
 
-    def set_sampling_weights_and_inc(self, rung: int):
+    def _set_sampling_weights_and_inc(self, rung: int) -> dict:
         sampling_args = self.calc_sampling_args(rung)
         if not self.is_activate_inc():
             sampling_args["prior"] += sampling_args["inc"]
@@ -110,24 +131,30 @@ def is_activate_inc(self) -> bool:
         activate_inc = False
 
         # calculate total resource cost required for the first SH bracket in HB
-        if hasattr(self, "sh_brackets") and len(self.sh_brackets) > 1:
+        sh_brackets = getattr(self, "sh_brackets", None)
+        if sh_brackets is not None and len(sh_brackets) > 1:
             # for HB or AsyncHB which invokes multiple SH brackets
-            bracket = self.sh_brackets[self.min_rung]
+            bracket = sh_brackets[self.min_rung]
         else:
             # for SH or ASHA which do not invoke multiple SH brackets
             bracket = self
+
+        assert isinstance(bracket, SuccessiveHalvingBase)
+
         # calculating the total resources spent in the first SH bracket, taking into
         # account the continuations, that is, the resources spent on a promoted config is
         # not fidelity[rung] but (fidelity[rung] - fidelity[rung - 1])
         continuation_resources = bracket.rung_map[bracket.min_rung]
         resources = bracket.config_map[bracket.min_rung] * continuation_resources
         for r in range(1, len(bracket.rung_map)):
-            rung = sorted(list(bracket.rung_map.keys()), reverse=False)[r]
+            rung = sorted(bracket.rung_map.keys(), reverse=False)[r]
             continuation_resources = bracket.rung_map[rung] - bracket.rung_map[rung - 1]
             resources += bracket.config_map[rung] * continuation_resources
 
         # find resources spent so far for all finished evaluations
-        resources_used = calc_total_resources_spent(self.observed_configs, self.rung_map)
+        valid_perf_mask = self.observed_configs["perf"].notna()
+        rungs = self.observed_configs.loc[valid_perf_mask, "rung"]
+        resources_used = sum(self.rung_map[r] for r in rungs)
 
         if resources_used >= resources and len(
             self.rung_histories[self.max_rung]["config"]
@@ -139,7 +166,7 @@ def is_activate_inc(self) -> bool:
             activate_inc = True
         return activate_inc
 
-    def calc_sampling_args(self, rung) -> dict:
+    def calc_sampling_args(self, rung: int) -> dict:
         """Sets the weights for each of the sampling techniques."""
         if self.prior_weight_type == "geometric":
             _w_random = 1
@@ -176,39 +203,39 @@ def calc_sampling_args(self, rung) -> dict:
         w_inc = _w_inc * w_prior
         w_prior = _w_prior * w_prior
 
-        sampling_args = {
+        return {
             "prior": w_prior,
             "inc": w_inc,
             "random": w_random,
         }
-        return sampling_args
 
-    def prior_to_incumbent_ratio(self) -> float | float:
+    def prior_to_incumbent_ratio(self) -> tuple[float, float]:
         """Calculates the normalized weight distribution between prior and incumbent.
 
         Sum of the weights should be 1.
         """
         if self.inc_style == "constant":
             return self._prior_to_incumbent_ratio_constant()
-        elif self.inc_style == "decay":
-            resources = calc_total_resources_spent(self.observed_configs, self.rung_map)
+        if self.inc_style == "decay":
+            valid_perf_mask = self.observed_configs["perf"].notna()
+            rungs = self.observed_configs.loc[valid_perf_mask, "rung"]
+            resources = sum(self.rung_map[r] for r in rungs)
             return self._prior_to_incumbent_ratio_decay(
                 resources, self.eta, self.min_budget, self.max_budget
             )
-        elif self.inc_style == "dynamic":
+        if self.inc_style == "dynamic":
             return self._prior_to_incumbent_ratio_dynamic(self.max_rung)
-        else:
-            raise ValueError(f"Invalid option {self.inc_style}")
+        raise ValueError(f"Invalid option {self.inc_style}")
 
     def _prior_to_incumbent_ratio_decay(
-        self, resources: float, eta: int, min_budget, max_budget
-    ) -> float | float:
+        self, resources: float, eta: int, min_budget: int | float, max_budget: int | float
+    ) -> tuple[float, float]:
         """Decays the prior weightage and increases the incumbent weightage."""
         w_prior = get_prior_weight_for_decay(resources, eta, min_budget, max_budget)
         w_inc = 1 - w_prior
         return w_prior, w_inc
 
-    def _prior_to_incumbent_ratio_constant(self) -> float | float:
+    def _prior_to_incumbent_ratio_constant(self) -> tuple[float, float]:
         """Fixes the weightage of incumbent sampling to 1/eta of prior sampling."""
         # fixing weight of incumbent to 1/eta of prior
         _w_prior = self.eta
@@ -217,7 +244,7 @@ def _prior_to_incumbent_ratio_constant(self) -> float | float:
         w_inc = _w_inc / (_w_prior + _w_inc)
         return w_prior, w_inc
 
-    def _prior_to_incumbent_ratio_dynamic(self, rung: int) -> float | float:
+    def _prior_to_incumbent_ratio_dynamic(self, rung: int) -> tuple[float, float]:
         """Dynamically determines the ratio of weights for prior and incumbent sampling.
 
         Finds the highest rung with eta configurations recorded. Picks the top-1/eta
@@ -232,7 +259,7 @@ def _prior_to_incumbent_ratio_dynamic(self, rung: int) -> float | float:
         # requires at least eta completed configurations to begin computing scores
         if len(self.rung_histories[rung]["config"]) >= self.eta:
             # retrieve the prior
-            prior = self.pipeline_space.sample_default_configuration()
+            prior = self.pipeline_space.from_dict(self.pipeline_space.prior_config)
             # retrieve the global incumbent
             inc = self.find_incumbent()
             # subsetting the top 1/eta configs from the rung
@@ -258,71 +285,70 @@ def _prior_to_incumbent_ratio_dynamic(self, rung: int) -> float | float:
             # normalizing scores to be weighted ratios
             w_prior = prior_score / sum(weighted_top_config_scores)
             w_inc = inc_score / sum(weighted_top_config_scores)
+        elif rung == self.min_rung:
+            # setting `w_inc = eta * w_prior` as default till score calculation begins
+            w_prior = self.eta / (1 + self.eta)
+            w_inc = 1 / (1 + self.eta)
         else:
-            # if eta-configurations NOT recorded yet
-            # check if it is the base rung
-            if rung == self.min_rung:
-                # setting `w_inc = eta * w_prior` as default till score calculation begins
-                w_prior = self.eta / (1 + self.eta)
-                w_inc = 1 / (1 + self.eta)
-            else:
-                # if rung > min.rung then the lower rung could already have enough
-                # configurations and thus can be recursively queried till the base rung
-                return self._prior_to_incumbent_ratio_dynamic(rung - 1)
+            # if rung > min.rung then the lower rung could already have enough
+            # configurations and thus can be recursively queried till the base rung
+            return self._prior_to_incumbent_ratio_dynamic(rung - 1)
         return w_prior, w_inc
 
 
 # order of inheritance (method resolution order) extremely essential for correct behaviour
 class PriorBand(MFBOBase, HyperbandCustomDefault, PriorBandBase):
+    """PriorBand optimizer for multi-fidelity optimization."""
+
     def __init__(
         self,
+        *,
         pipeline_space: SearchSpace,
-        budget: int,
+        max_cost_total: int,
         eta: int = 3,
         initial_design_type: Literal["max_budget", "unique_configs"] = "max_budget",
-        sampling_policy: typing.Any = EnsemblePolicy,
-        promotion_policy: typing.Any = SyncPromotionPolicy,
-        loss_value_on_error: None | float = None,
+        sampling_policy: Any = EnsemblePolicy,
+        promotion_policy: Any = SyncPromotionPolicy,
+        objective_to_minimize_value_on_error: None | float = None,
         cost_value_on_error: None | float = None,
         ignore_errors: bool = False,
-        logger=None,
         prior_confidence: Literal["low", "medium", "high"] = "medium",
         random_interleave_prob: float = 0.0,
-        sample_default_first: bool = True,
-        sample_default_at_target: bool = True,
-        prior_weight_type: str = "geometric",  # could also be {"linear", "50-50"}
-        inc_sample_type: str = "mutation",  # or {"crossover", "gaussian", "hypersphere"}
+        sample_prior_first: bool = True,
+        sample_prior_at_target: bool = True,
+        prior_weight_type: Literal["geometric", "linear", "50-50"] = "geometric",
+        inc_sample_type: Literal[
+            "hypersphere", "mutation", "crossover", "gaussian"
+        ] = "mutation",
         inc_mutation_rate: float = 0.5,
         inc_mutation_std: float = 0.25,
-        inc_style: str = "dynamic",  # could also be {"decay", "constant"}
+        inc_style: Literal["dynamic", "decay", "constant"] = "dynamic",
         # arguments for model
         model_based: bool = False,  # crucial argument to set to allow model-search
-        modelling_type: str = "joint",  # could also be {"rung"}
-        initial_design_size: int = None,
-        model_policy: typing.Any = ModelPolicy,
-        surrogate_model: str | typing.Any = "gp",
-        domain_se_kernel: str = None,
-        hp_kernels: list = None,
-        surrogate_model_args: dict = None,
-        acquisition: str | BaseAcquisition = "EI",
-        log_prior_weighted: bool = False,
-        acquisition_sampler: str | AcquisitionSampler = "random",
+        modelling_type: Literal["joint", "rung"] = "joint",
+        initial_design_size: int | None = None,
+        model_policy: Any = ModelPolicy,
+        # TODO: Remove these when fixing ModelPolicy
+        surrogate_model: str | Any = "gp",
+        surrogate_model_args: dict | None = None,  # TODO: Remove
+        acquisition: str | BaseAcquisition = "EI",  # TODO: Remove
+        log_prior_weighted: bool = False,  # TODO: Remove
+        acquisition_sampler: str = "random",  # TODO: Remove
     ):
         super().__init__(
             pipeline_space=pipeline_space,
-            budget=budget,
+            max_cost_total=max_cost_total,
             eta=eta,
             initial_design_type=initial_design_type,
             sampling_policy=sampling_policy,
             promotion_policy=promotion_policy,
-            loss_value_on_error=loss_value_on_error,
+            objective_to_minimize_value_on_error=objective_to_minimize_value_on_error,
             cost_value_on_error=cost_value_on_error,
             ignore_errors=ignore_errors,
-            logger=logger,
             prior_confidence=prior_confidence,
             random_interleave_prob=random_interleave_prob,
-            sample_default_first=sample_default_first,
-            sample_default_at_target=sample_default_at_target,
+            sample_prior_first=sample_prior_first,
+            sample_prior_at_target=sample_prior_at_target,
         )
         self.prior_weight_type = prior_weight_type
         self.inc_sample_type = inc_sample_type
@@ -333,7 +359,7 @@ def __init__(
         )
         # determines the kind of trade-off between incumbent and prior weightage
         self.inc_style = inc_style  # used by PriorBandBase
-        self.sampling_args = {
+        self.sampling_args: dict[str, Any] = {
             "inc": None,
             "weights": {
                 "prior": 1,  # begin with only prior sampling
@@ -342,15 +368,6 @@ def __init__(
             },
         }
 
-        bo_args = dict(
-            surrogate_model=surrogate_model,
-            domain_se_kernel=domain_se_kernel,
-            hp_kernels=hp_kernels,
-            surrogate_model_args=surrogate_model_args,
-            acquisition=acquisition,
-            log_prior_weighted=log_prior_weighted,
-            acquisition_sampler=acquisition_sampler,
-        )
         self.model_based = model_based
         self.modelling_type = modelling_type
         self.initial_design_size = initial_design_size
@@ -364,13 +381,18 @@ def __init__(
         self.init_size = n_min + 1  # in BOHB: init_design >= N_min + 2
         if self.modelling_type == "joint" and self.initial_design_size is not None:
             self.init_size = self.initial_design_size
-        self.model_policy = model_policy(pipeline_space, **bo_args)
+
+        # TODO: We also create a prior later inside of `compute_scores()`,
+        # in which we should really just pass in the prior dist as it does not move
+        # around in the space.
+        prior_dist = Prior.from_space(self.pipeline_space)
+        self.model_policy = model_policy(pipeline_space=pipeline_space, prior=prior_dist)
 
         for _, sh in self.sh_brackets.items():
             sh.sampling_policy = self.sampling_policy
             sh.sampling_args = self.sampling_args
-            sh.model_policy = self.model_policy
-            sh.sample_new_config = self.sample_new_config
+            sh.model_policy = self.model_policy  # type: ignore
+            sh.sample_new_config = self.sample_new_config  # type: ignore
 
     def get_config_and_ids(self) -> tuple[RawConfig, str, str | None]:
         """...and this is the method that decides which point to query.
@@ -378,44 +400,8 @@ def get_config_and_ids(self) -> tuple[RawConfig, str, str | None]:
         Returns:
             [type]: [description]
         """
-        self.set_sampling_weights_and_inc(rung=self.current_sh_bracket)
+        self._set_sampling_weights_and_inc(rung=self.current_sh_bracket)
 
         for _, sh in self.sh_brackets.items():
             sh.sampling_args = self.sampling_args
         return super().get_config_and_ids()
-
-
-class PriorBandNoIncToPrior(PriorBand):
-    """Disables incumbent sampling to replace with prior-based sampling.
-
-    This is equivalent to running HyperBand with Prior and Random sampling, where their
-    relationship is controlled by the `prior_weight_type` argument.
-    """
-
-    def set_sampling_weights_and_inc(self, rung: int):
-        super().set_sampling_weights_and_inc(rung)
-        # distributing the inc weight to the prior entirely
-        self.sampling_args["weights"]["prior"] += self.sampling_args["weights"]["inc"]
-        self.sampling_args["weights"]["inc"] = 0
-
-        return self.sampling_args
-
-
-class PriorBandNoPriorToInc(PriorBand):
-    """Disables prior based sampling to replace with incumbent-based sampling."""
-
-    def __init__(self, **kwargs):
-        super().__init__(**kwargs)
-        # cannot use prior in this version
-        self.pipeline_space.has_prior = False
-
-    def set_sampling_weights_and_inc(self, rung: int):
-        super().set_sampling_weights_and_inc(rung)
-        # distributing the prior weight to the incumbent entirely
-        if self.sampling_args["weights"]["inc"] > 0:
-            self.sampling_args["weights"]["inc"] += self.sampling_args["weights"]["prior"]
-            self.sampling_args["weights"]["prior"] = 0
-        else:
-            self.sampling_args["weights"]["random"] = 1
-        self.sampling_args["weights"]["prior"] = 0
-        return self.sampling_args
diff --git a/neps/optimizers/multi_fidelity_prior/utils.py b/neps/optimizers/multi_fidelity_prior/utils.py
index edbbadc79..c8a8c7c78 100644
--- a/neps/optimizers/multi_fidelity_prior/utils.py
+++ b/neps/optimizers/multi_fidelity_prior/utils.py
@@ -1,72 +1,88 @@
 from __future__ import annotations
 
+from typing import Any
+
 import numpy as np
-import pandas as pd
-import scipy
+import torch
 
+from neps.sampling.priors import Prior
 from neps.search_spaces import (
-    CategoricalParameter,
-    ConstantParameter,
-    NumericalParameter,
-    Parameter,
+    Categorical,
+    Constant,
     GraphParameter,
+    Float,
+    Integer,
     SearchSpace,
 )
+from neps.search_spaces.encoding import ConfigEncoder
+from neps.search_spaces.functions import sample_one_old, pairwise_dist
 
 
-def update_fidelity(config, fidelity):
+def update_fidelity(config: SearchSpace, fidelity: int | float) -> SearchSpace:
+    assert config.fidelity is not None
     config.fidelity.set_value(fidelity)
     return config
 
 
-# TODO(eddiebergman): Previously this just ignored graphs,
-# now it will likely raise if it encounters one...
+# TODO(eddiebergman): This would be much faster
+# if done in a vectorized manner...
 def local_mutation(
     config: SearchSpace,
     std: float = 0.25,
     mutation_rate: float = 0.5,
     patience: int = 50,
-    mutate_categoricals: bool = True,
-    mutate_graphs: bool = True,
 ) -> SearchSpace:
     """Performs a local search by mutating randomly chosen hyperparameters."""
-    for _ in range(patience):
-        new_config: dict[str, Parameter] = {}
-
-        for hp_name, hp in config.items():
-
-            if hp.is_fidelity or np.random.uniform() > mutation_rate:
-                new_config[hp_name] = hp.clone()
-
-            elif isinstance(hp, CategoricalParameter):
-                if mutate_categoricals:
-                    new_config[hp_name] = hp.mutate(mutation_strategy="local_search")
-                else:
-                    new_config[hp_name] = hp.clone()
-
-            elif isinstance(hp, GraphParameter):
-                if mutate_graphs:
-                    new_config[hp_name] = hp.mutate(mutation_strategy="bananas")
-                else:
-                    new_config[hp_name] = hp.clone()
-
-            elif isinstance(hp, NumericalParameter):
-                new_config[hp_name] = hp.mutate(
-                    mutation_strategy="local_search",
-                    std=std,
+    # Used to check uniqueness later.
+    # TODO: Seeding
+    space = config
+    parameters_to_keep = {}
+    parameters_to_mutate = {}
+
+    for name, parameter in space.hyperparameters.items():
+        if (
+            parameter.is_fidelity
+            or isinstance(parameter, Constant)
+            or np.random.uniform() > mutation_rate
+        ):
+            parameters_to_keep[name] = parameter.value
+        else:
+            parameters_to_mutate[name] = parameter
+
+    if len(parameters_to_mutate) == 0:
+        return space.from_dict(parameters_to_keep)
+
+    new_config: dict[str, Any] = {}
+
+    for hp_name, hp in parameters_to_mutate.items():
+        match hp:
+            case Categorical():
+                assert hp._value_index is not None
+                perm: list[int] = torch.randperm(len(hp.choices)).tolist()
+                ix = perm[0] if perm[0] != hp._value_index else perm[1]
+                new_config[hp_name] = hp.choices[ix]
+            case GraphParameter():
+                new_config[hp_name] = hp.mutate(mutation_strategy="bananas")
+            case Integer() | Float():
+                prior = Prior.from_parameters(
+                    {hp_name: hp},
+                    confidence_values={hp_name: (1 - std)},
                 )
-            elif isinstance(hp, ConstantParameter):
-                new_config[hp_name] = hp.clone()
 
-            else:
+                for _ in range(patience):
+                    sample = prior.sample(1, to=hp.domain).item()
+                    if sample != hp.value:
+                        new_config[hp_name] = hp.value
+                        break
+                else:
+                    raise ValueError(
+                        f"Exhausted patience trying to mutate parameter '{hp_name}'"
+                        f" with value {hp.value}"
+                    )
+            case _:
                 raise NotImplementedError(f"Unknown hp type for {hp_name}: {type(hp)}")
 
-        # if the new config doesn't differ from the original config then regenerate
-        _new_ss = SearchSpace(**new_config)
-        if not config.is_equal_value(_new_ss, include_fidelity=False):
-            return _new_ss
-
-    return config.clone()
+    return space.from_dict(new_config)
 
 
 def custom_crossover(
@@ -80,20 +96,24 @@ def custom_crossover(
     Returns a configuration where each HP in config1 has `crossover_prob`% chance of
     getting config2's value of the corresponding HP. By default, crossover rate is 50%.
     """
-    for _ in range(patience):
+    _existing = config1._values
 
-        child_config = config1.clone()
+    for _ in range(patience):
+        child_config = {}
         for key, hyperparameter in config1.items():
             if not hyperparameter.is_fidelity and np.random.random() < crossover_prob:
-                child_config[key].set_value(config2[key].value)
+                child_config[key] = config2[key].value
+            else:
+                child_config[key] = hyperparameter.value
 
-        if not child_config.is_equal_value(config1):
-            return SearchSpace(**child_config)
+        if _existing != child_config:
+            return config1.from_dict(child_config)
 
     # fail safe check to handle edge cases where config1=config2 or
     # config1 extremely local to config2 such that crossover fails to
     # generate new config in a discrete (sub-)space
-    return config1.sample(
+    return sample_one_old(
+        config1,
         patience=patience,
         user_priors=False,
         ignore_fidelity=True,
@@ -108,77 +128,45 @@ def compute_config_dist(config1: SearchSpace, config2: SearchSpace) -> float:
     Distance returned is the sum of the Euclidean distance of the continous subspace and
     the Hamming distance of the categorical subspace.
     """
-    config1 = config1.get_normalized_hp_categories(ignore_fidelity=True)
-    config2 = config2.get_normalized_hp_categories(ignore_fidelity=True)
-
-    # adding a dim with 0 to all subspaces in case the search space is not mixed type
-
-    # computing euclidean distance over the continuous subspace
-    diff = np.array(config1["continuous"] + [0]) - np.array(config2["continuous"] + [0])
-    d_cont = np.linalg.norm(diff, ord=2)
-
-    # TODO: can we consider the number of choices per dimension
-    # computing hamming distance over the categorical subspace
-    d_cat = scipy.spatial.distance.hamming(
-        config1["categorical"] + [0], config2["categorical"] + [0]
-    )
-
-    distance = d_cont + d_cat
-    return distance
+    encoder = ConfigEncoder.from_parameters({**config1.numerical, **config1.categoricals})
+    configs = encoder.encode([config1._values, config2._values])
+    dist = pairwise_dist(configs, encoder, square_form=False)
+    return float(dist.item())
 
 
 def compute_scores(
     config: SearchSpace,
     prior: SearchSpace,
     inc: SearchSpace,
+    *,
+    include_fidelity: bool = False,
 ) -> tuple[float, float]:
     """Scores the config by a Gaussian around the prior and the incumbent."""
-    _prior = prior.clone()
-    _prior.set_hyperparameters_from_dict(config.hp_values(), defaults=False)
-    # compute the score of config if it was sampled from the prior (as the default)
-    prior_score = _prior.compute_prior()
-
-    _inc = inc.clone()
-    # setting the default to be the incumbent
-    _inc.set_defaults_to_current_values()
-    _inc.set_hyperparameters_from_dict(config.hp_values(), defaults=False)
-    # compute the score of config if it was sampled from the inc (as the default)
-    inc_score = _inc.compute_prior()
+    # TODO: This could lifted up and just done in the class itself
+    # in a vectorized form.
+    encoder = ConfigEncoder.from_space(config, include_fidelity=include_fidelity)
+    encoded_config = encoder.encode([config._values])
+
+    prior_dist = Prior.from_space(
+        prior,
+        center_values=prior._values,
+        include_fidelity=include_fidelity,
+    )
+    inc_dist = Prior.from_space(
+        inc,
+        center_values=inc._values,
+        include_fidelity=include_fidelity,
+    )
 
+    prior_score = prior_dist.pdf(encoded_config, frm=encoder).item()
+    inc_score = inc_dist.pdf(encoded_config, frm=encoder).item()
     return prior_score, inc_score
 
 
-def calc_total_resources_spent(observed_configs: pd.DataFrame, rung_map: dict) -> float:
-    # collects a list of fidelities/rungs reached by configurations that are not pending
-    rungs_used = [
-        observed_configs.at[i, "rung"]
-        for i in range(len(observed_configs))
-        if not np.isnan(observed_configs.at[i, "perf"])
-    ]
-    total_resources = sum(rung_map[r] for r in rungs_used)
-    return total_resources
-
-
-# def get_prior_weight_for_decay(
-#     resources_used: float, eta: int, min_budget, max_budget
-# ) -> float:
-#     nrungs = np.floor(np.log(max_budget / min_budget) / np.log(eta)).astype(int) + 1
-#     unit_HB_resources = nrungs * eta * max_budget
-#     idx = resources_used // unit_HB_resources
-#     start_weight = 1 / eta**idx
-#     end_weight = start_weight / eta
-#     _resources = resources_used / unit_HB_resources - idx
-#
-#     # equation for line in the idx-th HB bracket in terms of resource usage
-#     y = (end_weight - start_weight) * _resources + start_weight
-#
-#     return y
-
-
 def get_prior_weight_for_decay(
-    resources_used: float, eta: int, min_budget, max_budget
+    resources_used: float, eta: int, min_budget: int | float, max_budget: int | float
 ) -> float:
-    """Creates a step function schedule for the prior weight decay.
+    r"""Creates a step function schedule for the prior weight decay.
 
     The prior weight ratio is decayed every time the total resources used is
     equivalent to the cost of one successive halving bracket within the HB schedule.
@@ -188,5 +176,4 @@ def get_prior_weight_for_decay(
     decay = 2
     unit_resources = eta * max_budget
     idx = resources_used // unit_resources
-    weight = 1 / decay**idx
-    return weight
+    return 1 / decay**idx
diff --git a/neps/optimizers/multiple_knowledge_sources/prototype_optimizer.py b/neps/optimizers/multiple_knowledge_sources/prototype_optimizer.py
deleted file mode 100644
index 845552eae..000000000
--- a/neps/optimizers/multiple_knowledge_sources/prototype_optimizer.py
+++ /dev/null
@@ -1,86 +0,0 @@
-from __future__ import annotations
-
-import logging
-from typing import Any
-from typing_extensions import override
-
-from neps.state.optimizer import BudgetInfo, OptimizationState
-from neps.utils.types import ConfigResult, RawConfig
-from neps.search_spaces.search_space import SearchSpace
-from neps.utils.data_loading import read_tasks_and_dev_stages_from_disk
-from neps.optimizers.base_optimizer import BaseOptimizer
-
-
-# TODO: Test if anything breaks after the recent changes
-class KnowledgeSampling(BaseOptimizer):
-    def load_prev_dev_tasks(self):
-        self.prev_task_dev_results = read_tasks_and_dev_stages_from_disk(
-            self.paths_prev_task_and_dev
-        )
-
-    def __init__(
-        self,
-        paths_prev_task_and_dev: list[str],
-        user_prior: dict,
-        **optimizer_kwargs,
-    ):
-        super().__init__(**optimizer_kwargs)
-        self.prev_task_dev_search_space = self.pipeline_space.clone()
-        self._num_previous_configs: int = 0
-        self.paths_prev_task_and_dev = paths_prev_task_and_dev
-        self.prev_task_dev_results = None
-        self.prior_search_spaces: dict[int, Any] = {}
-        self.load_prev_dev_tasks()
-        self.calculate_defaults()
-        self.pipeline_space.set_hyperparameters_from_dict(
-            user_prior, delete_previous_defaults=True, delete_previous_values=True
-        )
-
-    def calculate_defaults(self):
-        configs = self.prev_task_dev_results[self.prev_task_dev_id[0]][
-            self.prev_task_dev_id[1]
-        ]
-        hp_values = configs[0].config
-        self.prev_task_dev_search_space.set_hyperparameters_from_dict(
-            hp_values, delete_previous_defaults=True, delete_previous_values=True
-        )
-
-    @override
-    def load_optimization_state(
-        self,
-        previous_results: dict[str, ConfigResult],
-        pending_evaluations: dict[str, SearchSpace],
-        budget_info: BudgetInfo | None,
-        optimizer_state: dict[str, Any],
-    ) -> None:
-        self._num_previous_configs = len(previous_results) + len(pending_evaluations)
-
-    def get_config_and_ids(self) -> tuple[RawConfig, str, str | None]:
-        config = None
-        i = self._num_previous_configs
-        if i == 0:
-            # User prior
-            config = self.pipeline_space.sample(
-                patience=self.patience, user_priors=True, ignore_fidelity=False
-            )
-            logging.info("Sampling from user prior")
-        elif i == 1:
-            # Tasks / dev steps
-            config = self.prev_task_dev_search_space.sample(
-                patience=self.patience, user_priors=True, ignore_fidelity=False
-            )
-            logging.info("Sampling in mode tasks from previous tasks / dev stage")
-        else:
-            # Random search
-            config = self.pipeline_space.sample(
-                patience=self.patience, user_priors=False, ignore_fidelity=False
-            )
-            logging.info("Sampling from random search")
-
-        hp_values = config.hp_values()
-
-        config_id = str(self._num_previous_configs + 1)
-        logging.info("Config-ID: " + config_id)
-        logging.info("Config:")
-        logging.info(hp_values)
-        return hp_values, config_id, None
diff --git a/neps/optimizers/random_search/optimizer.py b/neps/optimizers/random_search/optimizer.py
index 5aeaff33e..a5df59ad1 100644
--- a/neps/optimizers/random_search/optimizer.py
+++ b/neps/optimizers/random_search/optimizer.py
@@ -1,35 +1,77 @@
+"""Random search optimizer."""
+
 from __future__ import annotations
-from typing import Any
+
+from collections.abc import Mapping
+from typing import TYPE_CHECKING, Any
 from typing_extensions import override
 
-from neps.state.optimizer import BudgetInfo, OptimizationState
-from neps.utils.types import ConfigResult, RawConfig
-from neps.search_spaces.search_space import SearchSpace
-from neps.optimizers.base_optimizer import BaseOptimizer
+from neps.optimizers.base_optimizer import BaseOptimizer, SampledConfig
+from neps.sampling.priors import UniformPrior
+from neps.search_spaces.encoding import ConfigEncoder
+
+if TYPE_CHECKING:
+    from neps.search_spaces.search_space import SearchSpace
+    from neps.state.optimizer import BudgetInfo
+    from neps.state.trial import Trial
 
 
 class RandomSearch(BaseOptimizer):
-    def __init__(self, use_priors=False, ignore_fidelity=True, **optimizer_kwargs):
-        super().__init__(**optimizer_kwargs)
-        self._num_previous_configs: int = 0
+    """A simple random search optimizer."""
+
+    def __init__(
+        self,
+        *,
+        pipeline_space: SearchSpace,
+        use_priors: bool = False,
+        ignore_fidelity: bool = True,
+        seed: int | None = None,
+        **kwargs: Any,  # TODO: Remove
+    ):
+        """Initialize the random search optimizer.
+
+        Args:
+            pipeline_space: The search space to sample from.
+            use_priors: Whether to use priors when sampling.
+            ignore_fidelity: Whether to ignore fidelity when sampling.
+                In this case, the max fidelity is always used.
+            seed: The seed for the random number generator.
+        """
+        super().__init__(pipeline_space=pipeline_space)
         self.use_priors = use_priors
         self.ignore_fidelity = ignore_fidelity
+        if seed is not None:
+            raise NotImplementedError("Seed is not implemented yet for RandomSearch")
+
+        self.seed = seed
+        self.encoder = ConfigEncoder.from_space(
+            pipeline_space,
+            include_fidelity=False,
+            include_constants_when_decoding=True,
+        )
+        self.sampler = UniformPrior(ndim=self.encoder.ncols)
 
     @override
-    def load_optimization_state(
+    def ask(
         self,
-        previous_results: dict[str, ConfigResult],
-        pending_evaluations: dict[str, SearchSpace],
+        trials: Mapping[str, Trial],
         budget_info: BudgetInfo | None,
-        optimizer_state: dict[str, Any],
-    ) -> None:
-        self._num_previous_configs = len(previous_results) + len(pending_evaluations)
-
-    def get_config_and_ids(self) -> tuple[RawConfig, str, str | None]:
-        config = self.pipeline_space.sample(
-            patience=self.patience,
-            user_priors=self.use_priors,
-            ignore_fidelity=self.ignore_fidelity,
-        )
-        config_id = str(self._num_previous_configs + 1)
-        return config.hp_values(), config_id, None
+        n: int | None = None,
+    ) -> SampledConfig | list[SampledConfig]:
+        n_trials = len(trials)
+        _n = 1 if n is None else n
+        configs = self.sampler.sample(_n, to=self.encoder.domains)
+        config_dicts = self.encoder.decode(configs)
+        if n == 1:
+            config = config_dicts[0]
+            config_id = str(n_trials + 1)
+            return SampledConfig(config=config, id=config_id, previous_config_id=None)
+
+        return [
+            SampledConfig(
+                config=config,
+                id=str(n_trials + i + 1),
+                previous_config_id=None,
+            )
+            for i, config in enumerate(config_dicts)
+        ]
diff --git a/neps/optimizers/regularized_evolution/optimizer.py b/neps/optimizers/regularized_evolution/optimizer.py
deleted file mode 100644
index 0860ba1ce..000000000
--- a/neps/optimizers/regularized_evolution/optimizer.py
+++ /dev/null
@@ -1,139 +0,0 @@
-from __future__ import annotations
-
-import math
-import os
-import random
-from pathlib import Path
-from typing import Any, Callable
-from typing_extensions import override
-
-import numpy as np
-import yaml
-
-from neps.state.optimizer import BudgetInfo, OptimizationState
-from neps.utils.types import ConfigResult, RawConfig
-
-from neps.search_spaces.search_space import SearchSpace
-from neps.optimizers.base_optimizer import BaseOptimizer
-
-
-class RegularizedEvolution(BaseOptimizer):
-    def __init__(
-        self,
-        pipeline_space: SearchSpace,
-        population_size: int = 30,
-        sample_size: int = 10,
-        patience: int = 100,
-        budget: None | int | float = None,
-        logger=None,
-        assisted: bool = False,
-        assisted_zero_cost_proxy: Callable | None = None,
-        assisted_init_population_dir: str | Path | None = None,
-        **optimizer_kwargs,
-    ):
-        super().__init__(
-            pipeline_space=pipeline_space,
-            patience=patience,
-            logger=logger,
-            budget=budget,
-            **optimizer_kwargs,
-        )
-
-        if population_size < 1:
-            raise ValueError("RegularizedEvolution needs a population size >= 1")
-        self.population_size = population_size
-        self.sample_size = sample_size
-        self.population: list = []
-        self.pending_evaluations: list = []
-        self.num_train_x: int = 0
-
-        self.assisted = assisted
-        assert not assisted or (assisted and assisted_zero_cost_proxy is not None)
-        self.assisted_zero_cost_proxy = assisted_zero_cost_proxy
-        if assisted_init_population_dir is not None:
-            self.assisted_init_population_dir = Path(assisted_init_population_dir)
-            self.assisted_init_population_dir.mkdir(exist_ok=True)
-
-    @override
-    def load_optimization_state(
-        self,
-        previous_results: dict[str, ConfigResult],
-        pending_evaluations: dict[str, SearchSpace],
-        budget_info: BudgetInfo | None,
-        optimizer_state: dict[str, Any],
-    ) -> None:
-        train_x = [el.config for el in previous_results.values()]
-        train_y = [self.get_loss(el.result) for el in previous_results.values()]
-        self.num_train_x = len(train_x)
-        self.population = [
-            (x, y)
-            for x, y in zip(
-                train_x[-self.population_size :], train_y[-self.population_size :]
-            )
-        ]
-        self.pending_evaluations = [el for el in pending_evaluations.values()]
-
-    def get_config_and_ids(self) -> tuple[RawConfig, str, str | None]:
-        if len(self.population) < self.population_size:
-            if self.assisted:
-                if 0 == len(os.listdir(self.assisted_init_population_dir)):
-                    cur_population_size = self.population_size - len(self.population)
-                    configs = [
-                        self.pipeline_space.sample(
-                            patience=self.patience, user_priors=True
-                        )
-                        for _ in range(cur_population_size * 2)
-                    ]
-                    if self.assisted_zero_cost_proxy is not None:
-                        zero_cost_proxy_values = self.assisted_zero_cost_proxy(x=configs)  # type:  ignore[misc]
-                    else:
-                        raise Exception("Zero cost proxy function is not defined!")
-                    indices = np.argsort(zero_cost_proxy_values)[-cur_population_size:][
-                        ::-1
-                    ]
-                    for idx, config_idx in enumerate(indices):
-                        filename = str(idx).zfill(
-                            int(math.log10(cur_population_size)) + 1
-                        )
-                        with open(
-                            self.assisted_init_population_dir / f"{filename}.yaml",
-                            "w",
-                            encoding="utf-8",
-                        ) as f:
-                            yaml.dump(configs[config_idx].serialize(), f)
-                config_yaml = sorted(os.listdir(self.assisted_init_population_dir))[0]
-                with open(
-                    self.assisted_init_population_dir / config_yaml, encoding="utf-8"
-                ) as f:
-                    config_identifier = yaml.safe_load(f)
-                config = self.pipeline_space.clone()
-                config.load_from(config_identifier)
-                os.remove(self.assisted_init_population_dir / config_yaml)
-            else:
-                config = self.pipeline_space.sample(
-                    patience=self.patience, user_priors=True
-                )
-        else:
-            candidates = [random.choice(self.population) for _ in range(self.sample_size)]
-            parent = min(candidates, key=lambda c: c[1])[0]
-            patience = self.patience
-            while patience > 0:
-                config = self._mutate(parent)
-                if config is False:
-                    config = self.pipeline_space.sample(
-                        patience=self.patience, user_priors=True
-                    )
-                if config not in self.pending_evaluations:
-                    break
-                patience -= 1
-        config_id = str(self.num_train_x + len(self.pending_evaluations) + 1)
-        return config.hp_values(), config_id, None
-
-    def _mutate(self, parent):
-        for _ in range(self.patience):
-            try:
-                # needs to throw an Exception if config is not valid, e.g., empty graph etc.!
-                return parent.mutate()
-            except Exception:
-                continue
-        return False
diff --git a/neps/optimizers/utils.py b/neps/optimizers/utils.py
deleted file mode 100644
index c203f4dbe..000000000
--- a/neps/optimizers/utils.py
+++ /dev/null
@@ -1,47 +0,0 @@
-import pandas as pd
-
-from ..search_spaces.search_space import SearchSpace
-
-
-# def map_real_hyperparameters_from_tabular_ids(
-#     ids: pd.Series, pipeline_space: SearchSpace
-# ) -> pd.Series:
-#         return x
-        
-
-def map_real_hyperparameters_from_tabular_ids(
-    x: pd.Series, pipeline_space: SearchSpace
-) -> pd.Series:
-    """ Maps the tabular IDs to the actual HPs from the pipeline space.
-    
-    Args:
-        x (pd.Series): A pandas series with the tabular IDs.
-            TODO: Mention expected format of the series.
-        pipeline_space (SearchSpace): The pipeline space.
-
-    Returns: 
-        pd.Series: A pandas series with the actual HPs.
-            TODO: Mention expected format of the series.
-    """
-    if len(x) == 0:
-        return x
-    # extract fid name
-    _x = x.iloc[0].hp_values()
-    _x.pop("id")
-    fid_name = list(_x.keys())[0]
-    for i in x.index.values:
-        # extracting actual HPs from the tabular space
-        _config = pipeline_space.custom_grid_table.loc[x.loc[i]["id"].value].to_dict()
-        # updating fidelities as per the candidate set passed
-        _config.update({fid_name: x.loc[i][fid_name].value})
-        # placeholder config from the raw tabular space
-        config = pipeline_space.raw_tabular_space.sample(
-            patience=100, 
-            user_priors=True, 
-            ignore_fidelity=True  # True allows fidelity to appear in the sample
-        )
-        # copying values from table to placeholder config of type SearchSpace
-        config.load_from(_config)
-        # replacing the ID in the candidate set with the actual HPs of the config
-        x.loc[i] = config
-    return x
diff --git a/neps/plot/__main__.py b/neps/plot/__main__.py
index 04aaffd2a..e94e65593 100644
--- a/neps/plot/__main__.py
+++ b/neps/plot/__main__.py
@@ -12,7 +12,7 @@
 Optional arguments:
     -h, --help                  Show this help message and exit
     --scientific_mode           If true, plot from a tree-structured root_directory: benchmark={}/algorithm={}/seed={}
-    --key_to_extract            The metric to be used on the x-axis (if active, make sure run_pipeline returns the metric in the info_dict)
+    --key_to_extract            The metric to be used on the x-axis (if active, make sure evaluate_pipeline returns the metric in the info_dict)
     --benchmarks                List of benchmarks to plot
     --algorithms                List of algorithms to plot
     --consider_continuations    If true, toggle calculation of continuation costs
@@ -57,7 +57,7 @@
 parser.add_argument(
     "--key_to_extract",
     help="The metric to be used on the x-axis (if "
-         "active, make sure run_pipeline returns "
+         "active, make sure evaluate_pipeline returns "
          "the metric in the info_dict)")
 parser.add_argument(
     "--benchmarks",
diff --git a/neps/plot/plot.py b/neps/plot/plot.py
index 020242a46..c36f18424 100644
--- a/neps/plot/plot.py
+++ b/neps/plot/plot.py
@@ -36,7 +36,7 @@ def plot(  # noqa: C901, PLR0913
         scientific_mode: If true, plot from a tree-structured root_directory:
             benchmark={}/algorithm={}/seed={}
         key_to_extract: The metric to be used on the x-axis
-            (if active, make sure run_pipeline returns the metric in the info_dict)
+            (if active, make sure evaluate_pipeline returns the metric in the info_dict)
         benchmarks: List of benchmarks to plot
         algorithms: List of algorithms to plot
         consider_continuations: If true, toggle calculation of continuation costs
diff --git a/neps/plot/plot3D.py b/neps/plot/plot3D.py
new file mode 100644
index 000000000..e0d835988
--- /dev/null
+++ b/neps/plot/plot3D.py
@@ -0,0 +1,260 @@
+"""Plot a 3D landscape of learning curves for a given run."""
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+from pathlib import Path
+
+import numpy as np
+import pandas as pd
+from matplotlib import (
+    cm,
+    pyplot as plt,
+)
+from matplotlib.collections import LineCollection
+from matplotlib.colors import Normalize
+from mpl_toolkits.mplot3d.art3d import Line3DCollection
+
+# Copied from plot.py
+HERE = Path(__file__).parent.absolute()
+
+
+@dataclass
+class Plotter3D:
+    """Plot a 3d landscape of learning curves for a given run."""
+
+    objective_to_minimize_key: str = "Objective to minimize"
+    fidelity_key: str = "epochs"
+    run_path: str | Path | None = None
+    scatter: bool = True
+    footnote: bool = True
+    alpha: float = 0.9
+    scatter_size: float | int = 3
+    bck_color_2d: tuple[float, float, float] = (0.8, 0.82, 0.8)
+    view_angle: tuple[float, float] = (15, -70)
+
+    def __post_init__(self) -> None:
+        if self.run_path is not None:
+            assert (
+                Path(self.run_path).absolute().is_dir()
+            ), f"Path {self.run_path} is not a directory"
+            self.data_path = (
+                Path(self.run_path).absolute() / "summary_csv" / "config_data.csv"
+            )
+            assert self.data_path.exists(), f"File {self.data_path} does not exist"
+            self.df = pd.read_csv(
+                self.data_path,
+                index_col=0,
+                float_precision="round_trip",
+            )
+
+            # Assigned at prep_df stage
+            self.objective_to_minimize_range: tuple[float, float] | None = None
+            self.epochs_range: tuple[float, float] | None = None
+
+    @staticmethod
+    def get_x(df: pd.DataFrame) -> np.ndarray:
+        """Get the x-axis values for the plot."""
+        return df["epochID"].to_numpy()
+
+    @staticmethod
+    def get_y(df: pd.DataFrame) -> np.ndarray:
+        """Get the y-axis values for the plot."""
+        y_ = df["configID"].to_numpy()
+        return np.ones_like(y_) * y_[0]
+
+    @staticmethod
+    def get_z(df: pd.DataFrame) -> np.ndarray:
+        """Get the z-axis values for the plot."""
+        return df["result.objective_to_minimize"].to_numpy()
+
+    @staticmethod
+    def get_color(df: pd.DataFrame) -> np.ndarray:
+        """Get the color values for the plot."""
+        return df.index.to_numpy()
+
+    def prep_df(self, df: pd.DataFrame | None = None) -> pd.DataFrame:
+        """Prepare the dataframe for plotting."""
+        df = self.df if df is None else df
+
+        _fid_key = f"config.{self.fidelity_key}"
+        self.objective_to_minimize_range = (
+            df["result.objective_to_minimize"].min(),
+            df["result.objective_to_minimize"].max(),
+        )  # type: ignore
+        self.epochs_range = (df[_fid_key].min(), df[_fid_key].max())  # type: ignore
+
+        split_values = np.array([[*index.split("_")] for index in df.index])
+        df[["configID", "epochID"]] = split_values
+        df.configID = df.configID.astype(int)
+        df.epochID = df.epochID.astype(int)
+        if df.epochID.min() == 0:
+            df.epochID += 1
+
+        # indices become sampling order
+        time_cols = ["metadata.time_started", "metadata.time_end"]
+        return df.sort_values(by=time_cols).reset_index(drop=True)
+
+    def plot3D(  # noqa: N802, PLR0915
+        self,
+        data: pd.DataFrame | None = None,
+        save_path: str | Path | None = None,
+        filename: str = "freeze_thaw",
+    ) -> None:
+        """Plot the 3D landscape of learning curves."""
+        data = self.prep_df(data)
+
+        # Create the figure and the axes for the plot
+        fig, (ax3D, ax, cax) = plt.subplots(
+            1, 3, figsize=(12, 5), width_ratios=(20, 20, 1)
+        )
+
+        # remove a 2D axis and replace with a 3D projection one
+        ax3D.remove()
+        ax3D = fig.add_subplot(131, projection="3d")
+
+        # Create the normalizer to normalize the color values
+        norm = Normalize(self.get_color(data).min(), self.get_color(data).max())
+
+        # Counters to keep track of the configurations run for only a single fidelity
+        n_lines = 0
+        n_points = 0
+
+        data_groups = data.groupby("configID", sort=False)
+
+        for idx, (_configID, data_) in enumerate(data_groups):
+            x = self.get_x(data_)
+            y = self.get_y(data_)
+            z = self.get_z(data_)
+
+            y = np.ones_like(y) * idx
+            color = self.get_color(data_)
+
+            if len(x) < 2:
+                n_points += 1
+                if self.scatter:
+                    # 3D points
+                    ax3D.scatter(
+                        y,
+                        z,
+                        s=self.scatter_size,
+                        zs=0,
+                        zdir="x",
+                        c=color,
+                        cmap="RdYlBu_r",
+                        norm=norm,
+                        alpha=self.alpha * 0.8,
+                    )
+                    # 2D points
+                    ax.scatter(
+                        x,
+                        z,
+                        s=self.scatter_size,
+                        c=color,
+                        cmap="RdYlBu_r",
+                        norm=norm,
+                        alpha=self.alpha * 0.8,
+                    )
+            else:
+                n_lines += 1
+
+                # Plot 3D
+                # Get segments for all lines
+                points3D = np.array([x, y, z]).T.reshape(-1, 1, 3)
+                segments3D = np.concatenate([points3D[:-1], points3D[1:]], axis=1)
+
+                # Construct lines from segments
+                lc3D = Line3DCollection(
+                    segments3D,  # type: ignore
+                    cmap="RdYlBu_r",
+                    norm=norm,
+                    alpha=self.alpha,
+                )
+                lc3D.set_array(color)
+
+                # Draw lines
+                ax3D.add_collection3d(lc3D)  # type: ignore
+
+                # Plot 2D
+                # Get segments for all lines
+                points = np.array([x, z]).T.reshape(-1, 1, 2)
+                segments = np.concatenate([points[:-1], points[1:]], axis=1)
+
+                # Construct lines from segments
+                lc = LineCollection(
+                    segments,  # type: ignore
+                    cmap="RdYlBu_r",
+                    norm=norm,
+                    alpha=self.alpha,  # type: ignore
+                )
+                lc.set_array(color)
+
+                # Draw lines
+                ax.add_collection(lc)
+
+        assert self.objective_to_minimize_range is not None
+        assert self.epochs_range is not None
+
+        ax3D.axes.set_xlim3d(left=self.epochs_range[0], right=self.epochs_range[1])  # type: ignore
+        ax3D.axes.set_ylim3d(bottom=0, top=data_groups.ngroups)  # type: ignore
+        ax3D.axes.set_zlim3d(
+            bottom=self.objective_to_minimize_range[0],
+            top=self.objective_to_minimize_range[1],
+        )  # type: ignore
+
+        ax3D.set_xlabel("Epochs")
+        ax3D.set_ylabel("Iteration sampled")
+        ax3D.set_zlabel(f"{self.objective_to_minimize_key}")  # type: ignore
+
+        # set view angle
+        ax3D.view_init(elev=self.view_angle[0], azim=self.view_angle[1])  # type: ignore
+
+        ax.autoscale_view()
+        ax.set_xlabel(self.fidelity_key)
+        ax.set_ylabel(f"{self.objective_to_minimize_key}")
+        ax.set_facecolor(self.bck_color_2d)
+        fig.suptitle("ifBO run")
+
+        if self.footnote:
+            fig.text(
+                0.01,
+                0.02,
+                f"Total {n_lines + n_points} configs evaluated; for multiple budgets: "
+                f"{n_lines}, for single budget: {n_points}",
+                ha="left",
+                va="bottom",
+                fontsize=10,
+            )
+
+        plt.colorbar(
+            cm.ScalarMappable(norm=norm, cmap="RdYlBu_r"),
+            cax=cax,
+            label="Iteration",
+            use_gridspec=True,
+            alpha=self.alpha,
+        )
+        fig.tight_layout()
+
+        self.save(save_path, filename)
+        plt.close(fig)
+
+    def save(
+        self,
+        save_path: str | Path | None = None,
+        filename: str = "freeze_thaw",
+    ) -> None:
+        """Save the plot to a file."""
+        path = save_path if save_path is not None else self.run_path
+        assert path is not None
+
+        run_path = Path(path)
+        run_path.mkdir(parents=True, exist_ok=True)
+        assert run_path.is_dir()
+        plot_path = run_path / f"Plot3D_{filename}.png"
+
+        plt.savefig(plot_path, bbox_inches="tight")
+
+
+if __name__ == "__main__":
+    plotter = Plotter3D(run_path="./results", fidelity_key="epochs")
+    plotter.plot3D()
diff --git a/neps/plot/read_results.py b/neps/plot/read_results.py
index 06ff3f1ca..24b8a2880 100644
--- a/neps/plot/read_results.py
+++ b/neps/plot/read_results.py
@@ -62,9 +62,9 @@ def get_cost(idx: str) -> float:
             else:
                 config_cost = config_result.metadata["time_end"] - global_start
 
-        # TODO(eddiebergman): Assumes it never crashed and there's a loss available,
-        # not fixing now but it should be addressed
-        losses.append(config_result.result["loss"])  # type: ignore
+        # TODO(eddiebergman): Assumes it never crashed and there's a
+        # objective_to_minimize available,not fixing now but it should be addressed
+        losses.append(config_result.result["objective_to_minimize"])  # type: ignore
         costs.append(config_cost)
 
     return list(np.minimum.accumulate(losses)), costs, max_cost
diff --git a/neps/plot/tensorboard_eval.py b/neps/plot/tensorboard_eval.py
index e77329b4a..6a32542d3 100644
--- a/neps/plot/tensorboard_eval.py
+++ b/neps/plot/tensorboard_eval.py
@@ -3,8 +3,9 @@
 from __future__ import annotations
 
 import math
+from collections.abc import Mapping
 from pathlib import Path
-from typing import Any, ClassVar, Mapping
+from typing import TYPE_CHECKING, Any, ClassVar
 from typing_extensions import override
 
 import numpy as np
@@ -12,10 +13,17 @@
 from torch.utils.tensorboard.summary import hparams
 from torch.utils.tensorboard.writer import SummaryWriter
 
-from neps.runtime import get_in_progress_trial, get_workers_neps_state
+from neps.runtime import (
+    get_in_progress_trial,
+    get_workers_neps_state,
+    register_notify_trial_end,
+)
 from neps.status.status import get_summary_dict
 from neps.utils.common import get_initial_directory
 
+if TYPE_CHECKING:
+    from neps.state.trial import Trial
+
 
 class SummaryWriter_(SummaryWriter):  # noqa: N801
     """This class inherits from the base SummaryWriter class and provides
@@ -27,7 +35,7 @@ class SummaryWriter_(SummaryWriter):  # noqa: N801
     - Ensures all logs are stored in the same 'tfevent' directory for
       better organization.
     - Updates metric keys to have a consistent 'Summary/' prefix for clarity.
-    - Improves the display of 'Loss' or 'Accuracy' on the Summary file.
+    - Improves the display of 'objective_to_minimize' or 'Accuracy' on the Summary file.
 
     Methods:
     - add_hparams: Overrides the base method to log hyperparameters and
@@ -66,11 +74,7 @@ class tblogger:  # noqa: N801
 
     disable_logging: ClassVar[bool] = False
 
-    logger_bool: ClassVar[bool] = False
-    """logger_bool is true only if tblogger.log is used by the user, this
-    allows to always capturing the configuration data."""
-
-    loss: ClassVar[float | None] = None
+    objective_to_minimize: ClassVar[float | None] = None
     current_epoch: ClassVar[int | None] = None
 
     write_incumbent: ClassVar[bool | None] = None
@@ -87,8 +91,10 @@ def _initiate_internal_configurations() -> None:
         trial = get_in_progress_trial()
         neps_state = get_workers_neps_state()
 
+        register_notify_trial_end("NEPS_TBLOGGER", tblogger.end_of_config)
+
         # We are assuming that neps state is all filebased here
-        root_dir = Path(neps_state.location)
+        root_dir = Path(neps_state.path)
         assert root_dir.exists()
 
         tblogger.config_working_directory = Path(trial.metadata.location)
@@ -97,12 +103,12 @@ def _initiate_internal_configurations() -> None:
             if trial.metadata.previous_trial_location is not None
             else None
         )
+        tblogger.config_id = trial.metadata.id
         tblogger.optimizer_dir = root_dir
         tblogger.config = trial.config
 
     @staticmethod
     def _is_initialized() -> bool:
-        # Returns 'True' if config_writer is already initialized. 'False' otherwise
         return tblogger.config_writer is not None
 
     @staticmethod
@@ -110,7 +116,7 @@ def _initialize_writers() -> None:
         # This code runs only once per config, to assign that config a config_writer.
         if (
             tblogger.config_previous_directory is None
-            and tblogger.config_working_directory
+            and tblogger.config_working_directory is not None
         ):
             # If no fidelities are there yet, define the writer via the config_id
             tblogger.config_id = str(tblogger.config_working_directory).rsplit(
@@ -120,8 +126,9 @@ def _initialize_writers() -> None:
                 tblogger.config_working_directory / "tbevents"
             )
             return
+
         # Searching for the initial directory where tensorboard events are stored.
-        if tblogger.config_working_directory:
+        if tblogger.config_working_directory is not None:
             init_dir = get_initial_directory(
                 pipeline_directory=tblogger.config_working_directory
             )
@@ -135,7 +142,7 @@ def _initialize_writers() -> None:
             )
 
     @staticmethod
-    def end_of_config() -> None:
+    def end_of_config(trial: Trial) -> None:  # noqa: ARG004
         """Closes the writer."""
         if tblogger.config_writer:
             # Close and reset previous config writers for consistent logging.
@@ -324,10 +331,7 @@ def _write_image_config(
         if tblogger.current_epoch >= 0 and tblogger.current_epoch % counter == 0:
             # Log every multiple of "counter"
 
-            if num_images > len(image):
-                # If the number of images requested by the user
-                # is more than the ones available.
-                num_images = len(image)
+            num_images = min(num_images, len(image))
 
             if random_images is False:
                 subset_images = image[:num_images]
@@ -338,7 +342,7 @@ def _write_image_config(
                 # We do not interfere with any randomness from the pipeline
                 num_total_images = len(image)
                 indices = seed.choice(num_total_images, num_images, replace=False)
-                subset_images = image[indices]  # type: ignore
+                subset_images = image[indices]
 
             resized_images = torch.nn.functional.interpolate(
                 subset_images,
@@ -373,20 +377,20 @@ def _write_hparam_config() -> None:
             TensorBoard writer is initialized at the correct directory.
 
             It also depends on the following global variables:
-                - tblogger.loss (float)
+                - tblogger.objective_to_minimize (float)
                 - tblogger.config_writer (SummaryWriter_)
                 - tblogger.config (dict)
                 - tblogger.current_epoch (int)
 
             The function will log hyperparameter configurations along
-            with a metric value (either accuracy or loss) to TensorBoard
+            with a metric value (either accuracy or objective_to_minimize) to TensorBoard
             based on the given configurations.
         """
         if not tblogger._is_initialized():
             tblogger._initialize_writers()
 
-        str_name = "Loss"
-        str_value = tblogger.loss
+        str_name = "Objective to minimize"
+        str_value = tblogger.objective_to_minimize
 
         values = {str_name: str_value}
         # Just an extra safety measure
@@ -407,7 +411,8 @@ def _write_hparam_config() -> None:
 
     @staticmethod
     def _tracking_incumbent_api() -> None:
-        """Track the incumbent (best) loss and log it in the TensorBoard summary.
+        """Track the incumbent (best) objective_to_minimize and log it in the TensorBoard
+            summary.
 
         Note:
             The function relies on the following global variables:
@@ -420,7 +425,7 @@ def _tracking_incumbent_api() -> None:
         summary_dict = get_summary_dict(tblogger.optimizer_dir, add_details=True)
 
         incum_tracker = summary_dict["num_evaluated_configs"]
-        incum_val = summary_dict["best_loss"]
+        incum_val = summary_dict["best_objective_to_minimize"]
 
         if tblogger.summary_writer is None and tblogger.optimizer_dir is not None:
             tblogger.summary_writer = SummaryWriter_(tblogger.optimizer_dir / "summary")
@@ -474,7 +479,7 @@ def get_status() -> bool:
 
     @staticmethod
     def log(
-        loss: float,
+        objective_to_minimize: float,
         current_epoch: int,
         *,
         writer_config_scalar: bool = True,
@@ -486,28 +491,27 @@ def log(
         hyperparameters, and images.
 
         Args:
-            loss: Current loss value.
+            objective_to_minimize: Current objective_to_minimize value.
             current_epoch: Current epoch of the experiment (used as the global step).
-            writer_config_scalar: Displaying the loss or accuracy
+            writer_config_scalar: Displaying the objective_to_minimize or accuracy
                 curve on tensorboard (default: True)
             writer_config_hparam: Write hyperparameters logging of the configs.
             write_summary_incumbent: Set to `True` for a live incumbent trajectory.
             extra_data: Additional experiment data for logging.
         """
         if tblogger.disable_logging:
-            tblogger.logger_bool = False
             return
 
-        tblogger.logger_bool = True
-
         tblogger.current_epoch = current_epoch
-        tblogger.loss = loss
+        tblogger.objective_to_minimize = objective_to_minimize
         tblogger.write_incumbent = write_summary_incumbent
 
         tblogger._initiate_internal_configurations()
 
         if writer_config_scalar:
-            tblogger._write_scalar_config(tag="Loss", value=loss)
+            tblogger._write_scalar_config(
+                tag="objective_to_minimize", value=objective_to_minimize
+            )
 
         if writer_config_hparam:
             tblogger._write_hparam_config()
diff --git a/neps/runtime.py b/neps/runtime.py
index c9988f700..c4c8bff70 100644
--- a/neps/runtime.py
+++ b/neps/runtime.py
@@ -7,35 +7,45 @@
 import os
 import shutil
 import time
+from collections.abc import Callable, Iterable, Iterator, Mapping
 from contextlib import contextmanager
 from dataclasses import dataclass
 from pathlib import Path
 from typing import (
     TYPE_CHECKING,
     Any,
-    Callable,
+    ClassVar,
     Generic,
-    Iterable,
-    Iterator,
     Literal,
-    Mapping,
     TypeVar,
 )
 
+from portalocker import portalocker
+
+from neps.env import (
+    FS_SYNC_GRACE_BASE,
+    FS_SYNC_GRACE_INC,
+    LINUX_FILELOCK_FUNCTION,
+    MAX_RETRIES_CREATE_LOAD_STATE,
+    MAX_RETRIES_GET_NEXT_TRIAL,
+    MAX_RETRIES_WORKER_CHECK_SHOULD_STOP,
+)
 from neps.exceptions import (
     NePSError,
-    VersionMismatchError,
+    TrialAlreadyExistsError,
     WorkerFailedToGetPendingTrialsError,
+    WorkerRaiseError,
 )
 from neps.state._eval import evaluate_trial
-from neps.state.filebased import create_or_load_filebased_neps_state
+from neps.state.neps_state import NePSState
 from neps.state.optimizer import BudgetInfo, OptimizationState, OptimizerInfo
+from neps.state.seed_snapshot import SeedSnapshot
 from neps.state.settings import DefaultReportValues, OnErrorPossibilities, WorkerSettings
 from neps.state.trial import Trial
+from neps.utils.common import gc_disabled
 
 if TYPE_CHECKING:
     from neps.optimizers.base_optimizer import BaseOptimizer
-    from neps.state.neps_state import NePSState
 
 logger = logging.getLogger(__name__)
 
@@ -45,12 +55,8 @@ def _default_worker_name() -> str:
     return f"{os.getpid()}-{isoformat}"
 
 
-N_FAILED_GET_NEXT_PENDING_ATTEMPTS_BEFORE_ERROR = 10
-N_FAILED_TO_SET_TRIAL_STATE = 10
-
 Loc = TypeVar("Loc")
 
-
 # NOTE: As each NEPS process is only ever evaluating a single trial, this global can
 # be retrieved in NePS and refers to what this process is currently evaluating.
 # Note that before `_set_in_progress_trial` is called, this should be cleared
@@ -63,19 +69,19 @@ def _default_worker_name() -> str:
 
 
 # TODO: This only works with a filebased nepsstate
-def get_workers_neps_state() -> NePSState[Path]:
+def get_workers_neps_state() -> NePSState:
     """Get the worker's NePS state."""
     if _WORKER_NEPS_STATE is None:
         raise RuntimeError(
             "The worker's NePS state has not been set! This should only be called"
-            " from within a `run_pipeline` context. If you are not running a pipeline"
-            " and you did not call this function (`get_workers_neps_state`) yourself,"
-            " this is a bug and should be reported to NePS."
+            " from within a `evaluate_pipeline` context. If you are not running a"
+            " pipeline and you did not call this function (`get_workers_neps_state`)"
+            " yourself, this is a bug and should be reported to NePS."
         )
     return _WORKER_NEPS_STATE
 
 
-def _set_workers_neps_state(state: NePSState[Path]) -> None:
+def _set_workers_neps_state(state: NePSState) -> None:
     global _WORKER_NEPS_STATE  # noqa: PLW0603
     _WORKER_NEPS_STATE = state
 
@@ -85,13 +91,21 @@ def get_in_progress_trial() -> Trial:
     if _CURRENTLY_RUNNING_TRIAL_IN_PROCESS is None:
         raise RuntimeError(
             "The worker's NePS state has not been set! This should only be called"
-            " from within a `run_pipeline` context. If you are not running a pipeline"
-            " and you did not call this function (`get_workers_neps_state`) yourself,"
-            " this is a bug and should be reported to NePS."
+            " from within a `evaluate_pipeline` context. If you are not running a"
+            " pipeline and you did not call this function (`get_workers_neps_state`)"
+            " yourself, this is a bug and should be reported to NePS."
         )
     return _CURRENTLY_RUNNING_TRIAL_IN_PROCESS
 
 
+_TRIAL_END_CALLBACKS: dict[str, Callable[[Trial], None]] = {}
+
+
+def register_notify_trial_end(key: str, callback: Callable[[Trial], None]) -> None:
+    """Register a callback to be called when a trial ends."""
+    _TRIAL_END_CALLBACKS[key] = callback
+
+
 @contextmanager
 def _set_global_trial(trial: Trial) -> Iterator[None]:
     global _CURRENTLY_RUNNING_TRIAL_IN_PROCESS  # noqa: PLW0603
@@ -106,6 +120,8 @@ def _set_global_trial(trial: Trial) -> Iterator[None]:
         )
     _CURRENTLY_RUNNING_TRIAL_IN_PROCESS = trial
     yield
+    for _key, callback in _TRIAL_END_CALLBACKS.items():
+        callback(trial)
     _CURRENTLY_RUNNING_TRIAL_IN_PROCESS = None
 
 
@@ -145,6 +161,8 @@ class DefaultWorker(Generic[Loc]):
     worker_cumulative_evaluation_time_seconds: float = 0.0
     """The time spent evaluating configurations by this worker."""
 
+    _GRACE: ClassVar = FS_SYNC_GRACE_BASE
+
     @classmethod
     def new(
         cls,
@@ -166,27 +184,7 @@ def new(
             _pre_sample_hooks=_pre_sample_hooks,
         )
 
-    def _get_next_trial_from_state(self) -> Trial:
-        nxt_trial = self.state.get_next_pending_trial()
-
-        # If we have a trial, we will use it
-        if nxt_trial is not None:
-            logger.info(
-                f"Worker '{self.worker_id}' got previosly sampled trial: {nxt_trial}"
-            )
-
-        # Otherwise sample a new one
-        else:
-            nxt_trial = self.state.sample_trial(
-                worker_id=self.worker_id,
-                optimizer=self.optimizer,
-                _sample_hooks=self._pre_sample_hooks,
-            )
-            logger.info(f"Worker '{self.worker_id}' sampled a new trial: {nxt_trial}")
-
-        return nxt_trial
-
-    def _check_if_should_stop(  # noqa: C901, PLR0912, PLR0911
+    def _check_worker_local_settings(
         self,
         *,
         time_monotonic_start: float,
@@ -194,8 +192,6 @@ def _check_if_should_stop(  # noqa: C901, PLR0912, PLR0911
     ) -> str | Literal[False]:
         # NOTE: Sorry this code is kind of ugly but it's pretty straightforward, just a
         # lot of conditional checking and making sure to check cheaper conditions first.
-        # It would look a little nicer with a match statement but we've got to wait
-        # for python 3.10 for that.
 
         # First check for stopping criterion for this worker in particular as it's
         # cheaper and doesn't require anything from the state.
@@ -205,15 +201,27 @@ def _check_if_should_stop(  # noqa: C901, PLR0912, PLR0911
             OnErrorPossibilities.STOP_WORKER_ERROR,
             OnErrorPossibilities.STOP_ANY_ERROR,
         ):
+            msg = (
+                "Error occurred while evaluating a configuration with this worker and"
+                f" the worker is set to stop with {self.settings.on_error}."
+                "\n"
+                "\n"
+                "If this was a bug in the evaluation code while you were developing your"
+                " pipeline and you have set ignore_errors=True, please delete"
+                " your results folder and fix the error before re-running."
+                "\n"
+                "If this is an issue specifically with the configuration, considering"
+                " setting `ignore_errors=False` to allow the worker to continue"
+                " evaluating other configurations, even if this one failed."
+                "\n"
+                "\n"
+            )
             if self.settings.on_error in (
                 OnErrorPossibilities.RAISE_WORKER_ERROR,
                 OnErrorPossibilities.RAISE_ANY_ERROR,
             ):
-                raise error_from_this_worker
-            return (
-                "Error occurred while evaluating a configuration with this worker and"
-                f" the worker is set to stop with {self.settings.on_error}."
-            )
+                raise WorkerRaiseError(msg) from error_from_this_worker
+            return msg
 
         if (
             self.settings.max_evaluations_for_worker is not None
@@ -257,46 +265,51 @@ def _check_if_should_stop(  # noqa: C901, PLR0912, PLR0911
                 f", given by `{self.settings.max_evaluation_time_for_worker_seconds=}`."
             )
 
+        return False
+
+    def _check_shared_error_stopping_criterion(self) -> str | Literal[False]:
         # We check this global error stopping criterion as it's much
         # cheaper than sweeping the state from all trials.
         if self.settings.on_error in (
             OnErrorPossibilities.RAISE_ANY_ERROR,
             OnErrorPossibilities.STOP_ANY_ERROR,
         ):
-            err = self.state._shared_errors.synced().latest_err_as_raisable()
+            err = self.state.lock_and_get_errors().latest_err_as_raisable()
             if err is not None:
-                if self.settings.on_error == OnErrorPossibilities.RAISE_ANY_ERROR:
-                    raise err
-
-                return (
+                msg = (
                     "An error occurred in another worker and this worker is set to stop"
                     f" with {self.settings.on_error}."
-                    "\n To allow more evaluations, use a different stopping criterion."
+                    "\n"
+                    "If this was a bug in the evaluation code while you were developing"
+                    " your pipeline and you have set ignore_errors=True, please delete"
+                    " your results folder and fix the error before re-running."
+                    "\n"
+                    "If this is an issue specifically with the configuration, considering"
+                    " setting `ignore_errors=False` to allow the worker to continue"
+                    " evaluating other configurations, even if any worker fails."
+                    "\n"
                 )
+                if self.settings.on_error == OnErrorPossibilities.RAISE_ANY_ERROR:
+                    raise WorkerRaiseError(msg) from err
 
-        # If there are no global stopping criterion, we can no just return early.
-        if (
-            self.settings.max_evaluations_total is None
-            and self.settings.max_cost_total is None
-            and self.settings.max_evaluation_time_total_seconds is None
-        ):
-            return False
-
-        # At this point, if we have some global stopping criterion, we need to sweep
-        # the current state of trials to determine if we should stop
-        # NOTE: If these `sum` turn out to somehow be a bottleneck, these could
-        # be precomputed and accumulated over time. This would have to be handled
-        # in the `NePSState` class.
-        trials = self.state.get_all_trials()
+                return msg
+
+        return False
+
+    def _check_global_stopping_criterion(
+        self,
+        trials: Mapping[str, Trial],
+    ) -> str | Literal[False]:
         if self.settings.max_evaluations_total is not None:
             if self.settings.include_in_progress_evaluations_towards_maximum:
                 count = sum(
                     1
                     for _, trial in trials.items()
-                    if trial.report is not None
-                    or trial.state in (Trial.State.EVALUATING, Trial.State.SUBMITTED)
+                    if trial.metadata.state
+                    not in (Trial.State.PENDING, Trial.State.SUBMITTED)
                 )
             else:
+                # This indicates they have completed.
                 count = sum(1 for _, trial in trials.items() if trial.report is not None)
 
             if count >= self.settings.max_evaluations_total:
@@ -337,7 +350,124 @@ def _check_if_should_stop(  # noqa: C901, PLR0912, PLR0911
 
         return False
 
-    def run(self) -> None:  # noqa: C901, PLR0915
+    @property
+    def _requires_global_stopping_criterion(self) -> bool:
+        return (
+            self.settings.max_evaluations_total is not None
+            or self.settings.max_cost_total is not None
+            or self.settings.max_evaluation_time_total_seconds is not None
+        )
+
+    def _get_next_trial(self) -> Trial | Literal["break"]:
+        # If there are no global stopping criterion, we can no just return early.
+        with self.state._optimizer_lock.lock(worker_id=self.worker_id):
+            # NOTE: It's important to release the trial lock before sampling
+            # as otherwise, any other service, such as reporting the result
+            # of a trial. Hence we do not lock these together with the above.
+            # OPTIM: We try to prevent garbage collection from happening in here to
+            # minimize time spent holding on to the lock.
+            with self.state._trial_lock.lock(worker_id=self.worker_id), gc_disabled():
+                # Give the file-system some time to sync if we encountered out-of-order
+                # issues with this worker.
+                if self._GRACE > 0:
+                    time.sleep(self._GRACE)
+
+                trials = self.state._trial_repo.latest()
+
+                if self._requires_global_stopping_criterion:
+                    should_stop = self._check_global_stopping_criterion(trials)
+                    if should_stop is not False:
+                        logger.info(should_stop)
+                        return "break"
+
+                pending_trials = [
+                    trial
+                    for trial in trials.values()
+                    if trial.metadata.state == Trial.State.PENDING
+                ]
+
+                if len(pending_trials) > 0:
+                    earliest_pending = sorted(
+                        pending_trials,
+                        key=lambda t: t.metadata.time_sampled,
+                    )[0]
+                    earliest_pending.set_evaluating(
+                        time_started=time.time(),
+                        worker_id=self.worker_id,
+                    )
+                    self.state._trial_repo.update_trial(
+                        earliest_pending, hints="metadata"
+                    )
+                    logger.info(
+                        "Worker '%s' picked up pending trial: %s.",
+                        self.worker_id,
+                        earliest_pending.id,
+                    )
+                    return earliest_pending
+
+            sampled_trials = self.state._sample_trial(
+                optimizer=self.optimizer,
+                worker_id=self.worker_id,
+                trials=trials,
+                n=self.settings.batch_size,
+            )
+            if isinstance(sampled_trials, Trial):
+                this_workers_trial = sampled_trials
+            else:
+                this_workers_trial = sampled_trials[0]
+                sampled_trials[1:]
+
+            with self.state._trial_lock.lock(worker_id=self.worker_id), gc_disabled():
+                this_workers_trial.set_evaluating(
+                    time_started=time.time(),
+                    worker_id=self.worker_id,
+                )
+                try:
+                    self.state._trial_repo.store_new_trial(sampled_trials)
+                    if isinstance(sampled_trials, Trial):
+                        logger.info(
+                            "Worker '%s' sampled new trial: %s.",
+                            self.worker_id,
+                            this_workers_trial.id,
+                        )
+                    else:
+                        logger.info(
+                            "Worker '%s' sampled new trials: %s.",
+                            self.worker_id,
+                            ",".join(trial.id for trial in sampled_trials),
+                        )
+                    return this_workers_trial
+                except TrialAlreadyExistsError as e:
+                    if e.trial_id in trials:
+                        logger.error(
+                            "The new sampled trial was given an id of '%s', yet this"
+                            " exists in the loaded in trials given to the optimizer. This"
+                            " indicates a bug with the optimizers allocation of ids.",
+                            e.trial_id,
+                        )
+                    else:
+                        _grace = DefaultWorker._GRACE
+                        _inc = FS_SYNC_GRACE_INC
+                        logger.warning(
+                            "The new sampled trial was given an id of '%s', which is not"
+                            " one that was loaded in by the optimizer. This is usually"
+                            " an indication that the file-system you are running on"
+                            " is not atmoic in synchoronizing file operations."
+                            " We have attempted to stabalize this but milage may vary."
+                            " We are incrementing a grace period for file-locks from"
+                            " '%s's to '%s's. You can control the initial"
+                            " grace with 'NEPS_FS_SYNC_GRACE_BASE' and the increment with"
+                            " 'NEPS_FS_SYNC_GRACE_INC'.",
+                            e.trial_id,
+                            _grace,
+                            _grace + _inc,
+                        )
+                        DefaultWorker._GRACE = _grace + FS_SYNC_GRACE_INC
+                    raise e
+
+    # Forgive me lord, for I have sinned, this function is atrocious but complicated
+    # due to locking.
+    def run(self) -> None:  # noqa: C901, PLR0912, PLR0915
         """Run the worker.
 
         Will keep running until one of the criterion defined by the `WorkerSettings`
@@ -351,68 +481,84 @@ def run(self) -> None:  # noqa: C901, PLR0915
         _error_from_evaluation: Exception | None = None
 
         _repeated_fail_get_next_trial_count = 0
+        n_repeated_failed_check_should_stop = 0
         while True:
-            # NOTE: We rely on this function to do logging and raising errors if it should
-            should_stop = self._check_if_should_stop(
-                time_monotonic_start=_time_monotonic_start,
-                error_from_this_worker=_error_from_evaluation,
-            )
-            if should_stop is not False:
-                logger.info(should_stop)
-                break
-
             try:
-                trial_to_eval = self._get_next_trial_from_state()
-                _repeated_fail_get_next_trial_count = 0
-            except Exception as e:
-                _repeated_fail_get_next_trial_count += 1
-                logger.error(
-                    "Error while trying to get the next trial to evaluate.", exc_info=True
+                # First check local worker settings
+                should_stop = self._check_worker_local_settings(
+                    time_monotonic_start=_time_monotonic_start,
+                    error_from_this_worker=_error_from_evaluation,
                 )
-
-                # NOTE: This is to prevent any infinite loops if we can't get a trial
+                if should_stop is not False:
+                    logger.info(should_stop)
+                    break
+
+                # Next check global errs having occured
+                should_stop = self._check_shared_error_stopping_criterion()
+                if should_stop is not False:
+                    logger.info(should_stop)
+                    break
+
+            except WorkerRaiseError as e:
+                # If we raise a specific error, we should stop the worker
+                raise e
+            except Exception as e:
+                # An unknown exception, check our retry countk
+                n_repeated_failed_check_should_stop += 1
                 if (
-                    _repeated_fail_get_next_trial_count
-                    >= N_FAILED_GET_NEXT_PENDING_ATTEMPTS_BEFORE_ERROR
+                    n_repeated_failed_check_should_stop
+                    >= MAX_RETRIES_WORKER_CHECK_SHOULD_STOP
                 ):
-                    raise WorkerFailedToGetPendingTrialsError(
-                        "Worker '%s' failed to get pending trials %d times in a row."
-                        " Bailing!"
+                    raise WorkerRaiseError(
+                        f"Worker {self.worker_id} failed to check if it should stop"
+                        f" {MAX_RETRIES_WORKER_CHECK_SHOULD_STOP} times in a row. Bailing"
                     ) from e
 
-                continue
-
-            # If we can't set this working to evaluating, then just retry the loop
-            try:
-                trial_to_eval.set_evaluating(
-                    time_started=time.time(),
-                    worker_id=self.worker_id,
-                )
-                self.state.put_updated_trial(trial_to_eval)
-                n_failed_set_trial_state = 0
-            except VersionMismatchError:
-                n_failed_set_trial_state += 1
-                logger.debug(
-                    f"Another worker has managed to change trial '{trial_to_eval.id}'"
-                    " to evaluate and put back into state. This is fine and likely means"
-                    " the other worker is evaluating it.",
-                    exc_info=True,
-                )
-            except Exception:
-                n_failed_set_trial_state += 1
                 logger.error(
-                    f"Error trying to set trial '{trial_to_eval.id}' to evaluating.",
+                    "Unexpected error from worker '%s' while checking if it should stop.",
+                    self.worker_id,
                     exc_info=True,
                 )
+                time.sleep(1)  # Help stagger retries
+                continue
 
-            # NOTE: This is to prevent infinite looping if it somehow keeps getting
-            # the same trial and can't set it to evaluating.
-            if n_failed_set_trial_state != 0:
-                if n_failed_set_trial_state >= N_FAILED_TO_SET_TRIAL_STATE:
-                    raise WorkerFailedToGetPendingTrialsError(
-                        "Worker '%s' failed to set trial to evaluating %d times in a row."
-                        " Bailing!"
+            # From here, we now begin sampling or getting the next pending trial.
+            # As the global stopping criterion requires us to check all trials, and
+            # needs to be in locked in-step with sampling and is done inside
+            # _get_next_trial
+            try:
+                trial_to_eval = self._get_next_trial()
+                if trial_to_eval == "break":
+                    break
+                _repeated_fail_get_next_trial_count = 0
+            except Exception as e:
+                _repeated_fail_get_next_trial_count += 1
+                if isinstance(e, portalocker.exceptions.LockException):
+                    logger.debug(
+                        "Worker '%s': Timeout while trying to get the next trial to"
+                        " evaluate. If you are using a model based optimizer, such as"
+                        " Bayesian Optimization, this can occur as the number of"
+                        " configurations get large. There's not much to do here"
+                        " and we will retry to obtain the lock.",
+                        self.worker_id,
+                        exc_info=True,
+                    )
+                else:
+                    logger.debug(
+                        "Worker '%s': Error while trying to get the next trial to"
+                        " evaluate.",
+                        self.worker_id,
+                        exc_info=True,
                     )
+                    time.sleep(1)  # Help stagger retries
+                # NOTE: This is to prevent any infinite loops if we can't get a trial
+                if _repeated_fail_get_next_trial_count >= MAX_RETRIES_GET_NEXT_TRIAL:
+                    raise WorkerFailedToGetPendingTrialsError(
+                        f"Worker {self.worker_id} failed to get pending trials"
+                        f" {MAX_RETRIES_GET_NEXT_TRIAL} times in"
+                        " a row. Bailing!"
+                    ) from e
+
                 continue
 
             # We (this worker) has managed to set it to evaluating, now we can evaluate it
@@ -432,7 +578,7 @@ def run(self) -> None:  # noqa: C901, PLR0915
                 "Worker '%s' evaluated trial: %s as %s.",
                 self.worker_id,
                 evaluated_trial.id,
-                evaluated_trial.state,
+                evaluated_trial.metadata.state,
             )
 
             if report.cost is not None:
@@ -446,16 +592,19 @@ def run(self) -> None:  # noqa: C901, PLR0915
                 logger.exception(report.err)
                 _error_from_evaluation = report.err
 
-            self.state.report_trial_evaluation(
-                optimizer=self.optimizer,
-                trial=evaluated_trial,
-                report=report,
-                worker_id=self.worker_id,
-            )
+            # We do not retry this, as if some other worker has
+            # managed to manipulate this trial in the meantime,
+            # then something has gone wrong
+            with self.state._trial_lock.lock(worker_id=self.worker_id):
+                self.state._report_trial_evaluation(
+                    trial=evaluated_trial,
+                    report=report,
+                    worker_id=self.worker_id,
+                )
 
             logger.debug("Config %s: %s", evaluated_trial.id, evaluated_trial.config)
-            logger.debug("Loss %s: %s", evaluated_trial.id, report.loss)
-            logger.debug("Cost %s: %s", evaluated_trial.id, report.loss)
+            logger.debug("Loss %s: %s", evaluated_trial.id, report.objective_to_minimize)
+            logger.debug("Cost %s: %s", evaluated_trial.id, report.objective_to_minimize)
             logger.debug(
                 "Learning Curve %s: %s", evaluated_trial.id, report.learning_curve
             )
@@ -471,12 +620,13 @@ def _launch_runtime(  # noqa: PLR0913
     optimization_dir: Path,
     max_cost_total: float | None,
     ignore_errors: bool = False,
-    loss_value_on_error: float | None,
+    objective_to_minimize_value_on_error: float | None,
     cost_value_on_error: float | None,
     continue_until_max_evaluation_completed: bool,
     overwrite_optimization_dir: bool,
     max_evaluations_total: int | None,
     max_evaluations_for_worker: int | None,
+    sample_batch_size: int | None,
     pre_load_hooks: Iterable[Callable[[BaseOptimizer], BaseOptimizer]] | None,
 ) -> None:
     if overwrite_optimization_dir and optimization_dir.exists():
@@ -486,18 +636,38 @@ def _launch_runtime(  # noqa: PLR0913
         )
         shutil.rmtree(optimization_dir)
 
-    neps_state = create_or_load_filebased_neps_state(
-        directory=optimization_dir,
-        optimizer_info=OptimizerInfo(optimizer_info),
-        optimizer_state=OptimizationState(
-            budget=(
-                BudgetInfo(max_cost_budget=max_cost_total, used_cost_budget=0)
-                if max_cost_total is not None
-                else None
-            ),
-            shared_state={},  # TODO: Unused for the time being...
-        ),
-    )
+    for _retry_count in range(MAX_RETRIES_CREATE_LOAD_STATE):
+        try:
+            neps_state = NePSState.create_or_load(
+                path=optimization_dir,
+                load_only=False,
+                optimizer_info=OptimizerInfo(optimizer_info),
+                optimizer_state=OptimizationState(
+                    seed_snapshot=SeedSnapshot.new_capture(),
+                    budget=(
+                        BudgetInfo(
+                            max_cost_total=max_cost_total,
+                            used_cost_budget=0,
+                            max_evaluations=max_evaluations_total,
+                            used_evaluations=0,
+                        )
+                    ),
+                    shared_state=None,  # TODO: Unused for the time being...
+                ),
+            )
+            break
+        except Exception:  # noqa: BLE001
+            time.sleep(0.5)
+            logger.debug(
+                "Error while trying to create or load the NePS state. Retrying...",
+                exc_info=True,
+            )
+    else:
+        raise RuntimeError(
+            "Failed to create or load the NePS state after"
+            f" {MAX_RETRIES_CREATE_LOAD_STATE} attempts. Bailing!"
+            " Please enable debug logging to see the errors that occured."
+        )
 
     settings = WorkerSettings(
         on_error=(
@@ -505,12 +675,14 @@ def _launch_runtime(  # noqa: PLR0913
             if ignore_errors
             else OnErrorPossibilities.RAISE_ANY_ERROR
         ),
+        batch_size=sample_batch_size,
         default_report_values=DefaultReportValues(
-            loss_value_on_error=loss_value_on_error,
+            objective_to_minimize_value_on_error=objective_to_minimize_value_on_error,
             cost_value_on_error=cost_value_on_error,
             cost_if_not_provided=None,  # TODO: User can't specify yet
             learning_curve_on_error=None,  # TODO: User can't specify yet
-            learning_curve_if_not_provided="loss",  # report the loss as single value LC
+            learning_curve_if_not_provided="objective_to_minimize",  # report the
+            # objective_to_minimize as single value LC
         ),
         max_evaluations_total=max_evaluations_total,
         include_in_progress_evaluations_towards_maximum=(
@@ -524,6 +696,27 @@ def _launch_runtime(  # noqa: PLR0913
         max_cost_for_worker=None,  # TODO: User can't specify yet
     )
 
+    # HACK: Due to nfs file-systems, locking with the default `flock()` is not reliable.
+    # Hence, we overwrite `portalockers` lock call to use `lockf()` instead.
+    # This is commeneted in their source code that this is an option to use, however
+    # it's not directly advertised as a parameter/env variable or otherwise.
+    import portalocker.portalocker as portalocker_lock_module
+
+    try:
+        import fcntl
+
+        if LINUX_FILELOCK_FUNCTION.lower() == "flock":
+            setattr(portalocker_lock_module, "LOCKER", fcntl.flock)
+        elif LINUX_FILELOCK_FUNCTION.lower() == "lockf":
+            setattr(portalocker_lock_module, "LOCKER", fcntl.lockf)
+        else:
+            raise ValueError(
+                f"Unknown file-locking function '{LINUX_FILELOCK_FUNCTION}'."
+                " Must be one of 'flock' or 'lockf'."
+            )
+    except ImportError:
+        pass
+
     worker = DefaultWorker.new(
         state=neps_state,
         optimizer=optimizer,
diff --git a/neps/sampling/__init__.py b/neps/sampling/__init__.py
new file mode 100644
index 000000000..032290d6c
--- /dev/null
+++ b/neps/sampling/__init__.py
@@ -0,0 +1,4 @@
+from neps.sampling.priors import CenteredPrior, Prior, UniformPrior
+from neps.sampling.samplers import Sampler, Sobol
+
+__all__ = ["CenteredPrior", "Prior", "Sampler", "Sobol", "UniformPrior"]
diff --git a/neps/sampling/distributions.py b/neps/sampling/distributions.py
new file mode 100644
index 000000000..e9c93b0aa
--- /dev/null
+++ b/neps/sampling/distributions.py
@@ -0,0 +1,283 @@
+"""Custom distributions for NEPS."""
+
+from __future__ import annotations
+
+import math
+from collections.abc import Mapping
+from dataclasses import dataclass
+from numbers import Number
+from typing import TYPE_CHECKING, ClassVar
+from typing_extensions import override
+
+import torch
+from torch.distributions import Distribution, Uniform, constraints
+from torch.distributions.utils import broadcast_all
+
+from neps.search_spaces.domain import Domain
+
+if TYPE_CHECKING:
+    from torch.distributions.constraints import Constraint
+
+CONST_SQRT_2 = torch.tensor(math.sqrt(2), dtype=torch.float64)
+CONST_INV_SQRT_2PI = torch.tensor(1 / math.sqrt(2 * math.pi), dtype=torch.float64)
+CONST_INV_SQRT_2 = torch.tensor(1 / math.sqrt(2), dtype=torch.float64)
+CONST_LOG_INV_SQRT_2PI = torch.tensor(math.log(CONST_INV_SQRT_2PI), dtype=torch.float64)
+CONST_LOG_SQRT_2PI_E = torch.tensor(
+    0.5 * math.log(2 * math.pi * math.e),
+    dtype=torch.float64,
+)
+
+# from https://github.com/toshas/torch_truncnorm
+
+
+class TruncatedStandardNormal(Distribution):
+    """Truncated Standard Normal distribution.
+
+    Source: https://people.sc.fsu.edu/~jburkardt/presentations/truncated_normal.pdf
+    """
+
+    arg_constraints: ClassVar[Mapping[str, Constraint]] = {
+        "a": constraints.real,
+        "b": constraints.real,
+    }  # type: ignore
+    has_rsample: ClassVar[bool] = True
+    eps: ClassVar[float] = 1e-6
+
+    def __init__(
+        self,
+        a: torch.Tensor,
+        b: torch.Tensor,
+        validate_args: bool | None = None,
+        device: torch.device | None = None,
+    ):
+        """Initialize a truncated standard normal distribution.
+
+        Args:
+            a: Lower truncation bound.
+            b: Upper truncation bound.
+            validate_args: Whether to validate input.
+            device: Device to use.
+        """
+        self.a, self.b = broadcast_all(a, b)
+        self.a = self.a.to(device)
+        self.b = self.b.to(device)
+
+        if isinstance(a, Number) and isinstance(b, Number):
+            batch_shape = torch.Size()
+        else:
+            batch_shape = self.a.size()
+
+        super().__init__(batch_shape, validate_args=validate_args)
+
+        if self.a.dtype != self.b.dtype:
+            raise ValueError("Truncation bounds types are different")
+
+        if any((self.a >= self.b).view(-1).tolist()):
+            raise ValueError("Incorrect truncation range")
+
+        eps = self.eps
+        self._dtype_min_gt_0 = eps
+        self._dtype_max_lt_1 = 1 - eps
+        self._little_phi_a = self._little_phi(self.a)
+        self._little_phi_b = self._little_phi(self.b)
+        self._big_phi_a = self._big_phi(self.a)
+        self._big_phi_b = self._big_phi(self.b)
+        self._Z = (self._big_phi_b - self._big_phi_a).clamp(eps, 1 - eps)
+        self._log_Z = self._Z.log()
+        little_phi_coeff_a = torch.nan_to_num(self.a, nan=math.nan)
+        little_phi_coeff_b = torch.nan_to_num(self.b, nan=math.nan)
+        self._lpbb_m_lpaa_d_Z = (
+            self._little_phi_b * little_phi_coeff_b
+            - self._little_phi_a * little_phi_coeff_a
+        ) / self._Z
+        self._mean = -(self._little_phi_b - self._little_phi_a) / self._Z
+        self._variance = (
+            1
+            - self._lpbb_m_lpaa_d_Z
+            - ((self._little_phi_b - self._little_phi_a) / self._Z) ** 2
+        )
+        self._entropy = CONST_LOG_SQRT_2PI_E + self._log_Z - 0.5 * self._lpbb_m_lpaa_d_Z
+
+    @constraints.dependent_property  # type: ignore
+    @override  # type: ignore
+    def support(self) -> constraints._Interval:
+        return constraints.interval(self.a, self.b)
+
+    @property
+    @override  # type: ignore
+    def mean(self) -> torch.Tensor:
+        return self._mean
+
+    @property
+    @override  # type: ignore
+    def variance(self) -> torch.Tensor:
+        return self._variance
+
+    @override  # type: ignore
+    def entropy(self) -> torch.Tensor:
+        return self._entropy
+
+    @staticmethod
+    def _little_phi(x: torch.Tensor) -> torch.Tensor:
+        return (-(x**2) * 0.5).exp() * CONST_INV_SQRT_2PI
+
+    def _big_phi(self, x: torch.Tensor) -> torch.Tensor:
+        phi = 0.5 * (1 + (x * CONST_INV_SQRT_2).erf())
+        return phi.clamp(self.eps, 1 - self.eps)
+
+    @staticmethod
+    def _inv_big_phi(x: torch.Tensor) -> torch.Tensor:
+        return CONST_SQRT_2 * (2 * x - 1).erfinv()
+
+    @override  # type: ignore
+    def cdf(self, value: torch.Tensor) -> torch.Tensor:
+        if self._validate_args:
+            self._validate_sample(value)
+        return ((self._big_phi(value) - self._big_phi_a) / self._Z).clamp(0, 1)
+
+    @override  # type: ignore
+    def icdf(self, value: torch.Tensor) -> torch.Tensor:
+        y = self._big_phi_a + value * self._Z
+        y = y.clamp(self.eps, 1 - self.eps)
+        return self._inv_big_phi(y)
+
+    @override  # type: ignore
+    def log_prob(self, value: torch.Tensor) -> torch.Tensor:
+        if self._validate_args:
+            self._validate_sample(value)
+        return CONST_LOG_INV_SQRT_2PI - self._log_Z - (value**2) * 0.5
+
+    @override  # type: ignore
+    def rsample(self, sample_shape: torch.Size | None = None) -> torch.Tensor:
+        if sample_shape is None:
+            sample_shape = torch.Size([])
+        shape = self._extended_shape(sample_shape)
+        p = torch.empty(shape, device=self.a.device).uniform_(
+            self._dtype_min_gt_0, self._dtype_max_lt_1
+        )
+        return self.icdf(p)
+
+
+class TruncatedNormal(TruncatedStandardNormal):
+    """Truncated Normal distribution.
+
+    https://people.sc.fsu.edu/~jburkardt/presentations/truncated_normal.pdf
+    """
+
+    def __init__(
+        self,
+        loc: float | torch.Tensor,
+        scale: float | torch.Tensor,
+        a: float | torch.Tensor,
+        b: float | torch.Tensor,
+        validate_args: bool | None = None,
+        device: torch.device | None = None,
+    ):
+        """Initialize a truncated standard normal distribution.
+
+        Args:
+            loc: The mean of the distribution.
+            scale: The std of the distribution.
+            a: The lower bound of the distribution.
+            b: The upper bound of the distribution.
+            validate_args: Whether to validate input.
+            device: Device to use.
+        """
+        scale = torch.as_tensor(scale, device=device)
+        scale = scale.clamp_min(self.eps)
+
+        self.loc, self.scale, a, b = broadcast_all(loc, scale, a, b)
+        a = a.to(device)  # type: ignore
+        b = b.to(device)  # type: ignore
+        self._non_std_a = a
+        self._non_std_b = b
+        a = (a - self.loc) / self.scale
+        b = (b - self.loc) / self.scale
+        super().__init__(a, b, validate_args=validate_args)  # type: ignore
+        self._log_scale = self.scale.log()
+        self._mean = self._mean * self.scale + self.loc
+        self._variance = self._variance * self.scale**2
+        self._entropy += self._log_scale
+
+    def _to_std_rv(self, value: torch.Tensor) -> torch.Tensor:
+        return (value - self.loc) / self.scale
+
+    def _from_std_rv(self, value: torch.Tensor) -> torch.Tensor:
+        return value * self.scale + self.loc
+
+    @override
+    def cdf(self, value: torch.Tensor) -> torch.Tensor:
+        return super().cdf(self._to_std_rv(value))
+
+    @override
+    def icdf(self, value: torch.Tensor) -> torch.Tensor:
+        sample = self._from_std_rv(super().icdf(value))
+
+        # clamp data but keep gradients
+        sample_clip = torch.stack(
+            [sample.detach(), self._non_std_a.detach().expand_as(sample)], 0
+        ).max(0)[0]
+        sample_clip = torch.stack(
+            [sample_clip, self._non_std_b.detach().expand_as(sample)], 0
+        ).min(0)[0]
+        sample.data.copy_(sample_clip)
+        return sample
+
+    @override
+    def log_prob(self, value: torch.Tensor) -> torch.Tensor:
+        value = self._to_std_rv(value)
+        return super().log_prob(value) - self._log_scale
+
+
+class UniformWithUpperBound(Uniform):
+    """Uniform distribution with upper bound inclusive.
+
+    This is mostly a hack because torch's version of Uniform does not include
+    the upper bound which only causes a problem when considering the log_prob.
+    Otherwise the upper bound works with every other method.
+    """
+
+    # OPTIM: This could probably be optimized a lot but I'm not sure how it effects
+    # gradients. Could probably do a different path depending on if `value` requires
+    # gradients or not.
+    @override  # type: ignore
+    def log_prob(self, value: torch.Tensor) -> torch.Tensor:
+        if self._validate_args:
+            self._validate_sample(value)
+
+        lb = self.low.le(value).type_as(self.low)
+        ub = self.high.ge(value).type_as(self.low)  # The main change, is `gt` in original
+        return torch.log(lb.mul(ub)) - torch.log(self.high - self.low)
+
+
+@dataclass
+class TorchDistributionWithDomain:
+    """A torch distribution with an associated domain it samples over."""
+
+    distribution: Distribution
+    domain: Domain
+
+
+UNIT_UNIFORM_DIST = TorchDistributionWithDomain(
+    distribution=UniformWithUpperBound(0, 1),
+    domain=Domain.unit_float(),
+)
+
+if __name__ == "__main__":
+    loc = 0.95
+    for confidence in torch.linspace(0.0, 0.8, 8):
+        scale = 1 - confidence
+        dist = TruncatedNormal(
+            loc=loc,
+            scale=scale,
+            a=0.0,
+            b=1.0,
+        )
+        xs = torch.linspace(0, 1, 100)
+        ys = dist.log_prob(xs)
+        import matplotlib.pyplot as plt
+
+        plt.plot(xs, ys, label=f"confidence={confidence}")
+        plt.plot(loc, dist.log_prob(torch.tensor(loc)), "ro")
+    plt.legend()
+    plt.show()
diff --git a/neps/sampling/priors.py b/neps/sampling/priors.py
new file mode 100644
index 000000000..be43cb01e
--- /dev/null
+++ b/neps/sampling/priors.py
@@ -0,0 +1,495 @@
+"""Priors for search spaces.
+
+Loosely speaking, they are joint distributions over multiple independent
+variables, i.e. each column of a tensor is assumed to be independent and
+can be acted on independently.
+
+See the class doc description of [`Prior`][neps.priors.Prior] for more details.
+"""
+
+from __future__ import annotations
+
+from abc import abstractmethod
+from collections.abc import Iterable, Mapping, Sequence
+from dataclasses import dataclass, field
+from typing import TYPE_CHECKING, Any
+from typing_extensions import override
+
+import torch
+
+from neps.sampling.distributions import (
+    UNIT_UNIFORM_DIST,
+    TorchDistributionWithDomain,
+    TruncatedNormal,
+)
+from neps.sampling.samplers import Sampler
+from neps.search_spaces import Categorical
+from neps.search_spaces.domain import UNIT_FLOAT_DOMAIN, Domain
+from neps.search_spaces.encoding import ConfigEncoder
+
+if TYPE_CHECKING:
+    from torch.distributions import Distribution
+
+    from neps.search_spaces import Float, Integer, SearchSpace
+
+
+class Prior(Sampler):
+    """A protocol for priors over search spaces.
+
+    Extends from the [`Sampler`][neps.samplers.Sampler] protocol.
+
+    At it's core, the two methods that need to be implemented are
+    `log_pdf` and `sample`. The `log_pdf` method should return the
+    log probability of a given tensor of samples under its distribution.
+    The `sample` method should return a tensor of samples from distribution.
+
+    All values given to the `log_pdf` and the ones returned from the
+    `sample` method are assumed to be in the value domain of the prior,
+    i.e. the [`.domains`][neps.priors.Prior] attribute.
+
+    !!! warning
+
+        The domain in which samples are actually drawn from not necessarily
+        need to match that of the value domain. For example, the
+        [`UniformPrior`][neps.priors.UniformPrior] class uses a unit uniform
+        distribution to sample from the unit interval before converting
+        samples to the value domain.
+
+        **As a result, the `log_pdf` and `pdf` method may not give the same
+        values as you might expect for a distribution over the value domain.**
+
+        For example, consider a value domain `[0, 1e9]`. You might expect
+        the `pdf` to be `1e-9` (1 / 1e9) for any given value inside the domain.
+        However, since the `UniformPrior` samples from the unit interval, the `pdf` will
+        actually be `1` (1 / 1) for any value inside the domain.
+    """
+
+    @abstractmethod
+    def log_pdf(
+        self,
+        x: torch.Tensor,
+        *,
+        frm: ConfigEncoder | list[Domain] | Domain,
+    ) -> torch.Tensor:
+        """Compute the log pdf of values in `x` under a prior.
+
+        The last dimenion of `x` is assumed to be independent, such that the
+        log pdf of the entire tensor is the sum of the log
+        pdf of each element in that dimension.
+
+        For example, if `x` is of shape `(n_samples, n_dims)`, then the
+        you will be given back a tensor of shape `(n_samples,)` with the
+        each entry being the log pdf of the corresponding sample.
+
+        Args:
+            x: Tensor of shape (..., n_dims)
+                In the case of a 1D tensor, the shape is assumed to be (n_dims,)
+            frm: The domain of the values in `x`. If a single domain, then all the
+                values are assumed to be from that domain, otherwise each column
+                `n_dims` in (n_samples, n_dims) is from the corresponding domain.
+                If a `ConfigEncoder` is passed in, it will just take it's domains
+                for use.
+
+        Returns:
+            Tensor of shape (...,), with the last dimension reduced out. In the
+            case that only single dimensional tensor is passed, the returns value
+            is a scalar.
+        """
+
+    def pdf(
+        self, x: torch.Tensor, *, frm: ConfigEncoder | Domain | list[Domain]
+    ) -> torch.Tensor:
+        """Compute the pdf of values in `x` under a prior.
+
+        See [`log_pdf()`][neps.priors.Prior.log_pdf] for details on shapes.
+        """
+        return torch.exp(self.log_pdf(x, frm=frm))
+
+    @classmethod
+    def uniform(cls, ncols: int) -> UniformPrior:
+        """Create a uniform prior for a given list of domains.
+
+        Args:
+            ncols: The number of columns in the tensor to sample.
+        """
+        return UniformPrior(ndim=ncols)
+
+    @classmethod
+    def from_parameters(
+        cls,
+        parameters: Mapping[str, Categorical | Float | Integer],
+        *,
+        center_values: Mapping[str, Any] | None = None,
+        confidence_values: Mapping[str, float] | None = None,
+    ) -> CenteredPrior:
+        """Please refer to [`from_space()`][neps.priors.Prior.from_space]
+        for more details.
+        """
+        # TODO: This needs to be moved to the search space class, however
+        # to not break the current prior based APIs used elsewhere, we can
+        # just manually create this here.
+        # We use confidence here where `0` means no confidence and `1` means
+        # absolute confidence. This gets translated in to std's and weights
+        # accordingly in a `CenteredPrior`
+        _mapping = {"low": 0.25, "medium": 0.5, "high": 0.75}
+
+        center_values = center_values or {}
+        confidence_values = confidence_values or {}
+        domains: list[Domain] = []
+        centers: list[tuple[Any, float] | None] = []
+        for name, hp in parameters.items():
+            domains.append(hp.domain)
+
+            default = center_values.get(name, hp.prior)
+            if default is None:
+                centers.append(None)
+                continue
+
+            confidence_score = confidence_values.get(
+                name,
+                _mapping[hp.prior_confidence_choice],
+            )
+            center = hp.choices.index(default) if isinstance(hp, Categorical) else default
+            centers.append((center, confidence_score))
+
+        return Prior.from_domains_and_centers(domains=domains, centers=centers)
+
+    @classmethod
+    def from_domains_and_centers(
+        cls,
+        domains: Iterable[Domain] | ConfigEncoder,
+        centers: Iterable[None | tuple[int | float, float]],
+        *,
+        device: torch.device | None = None,
+    ) -> CenteredPrior:
+        """Create a prior for a given list of domains.
+
+        Will use a `TruncatedNormal` distribution for all parameters,
+        except those who have a domain marked with `is_categorical=True`,
+        using a `Categorical` distribution instead.
+        If the center for a given domain is `None`, a uniform prior
+        will be used instead.
+
+        For non-categoricals, this will be interpreted as the mean and
+        std `(1 - confidence)` for a truncnorm. For categorical values,
+        the _center_ will contain a probability mass of `confidence` with
+        the remaining `(1 - confidence)` probability mass distributed uniformly
+        amongest the other choices.
+
+        The order of the items in `domains` matters and should align
+        with any tensors that you will use to evaluate from the prior.
+        I.e. the first domain in `domains` will be the first column
+        of a tensor that this prior can be used on.
+
+        Args:
+            domains: domains over which to have a centered prior.
+            centers: centers for the priors, i.e. the mode of the prior for that
+                domain, along with the confidence of that mode, which get's
+                re-interpreted as the std of the truncnorm or the probability
+                mass for the categorical.
+
+                If `None`, a uniform prior will be used.
+
+                !!! warning
+
+                    The values contained in centers should be contained within the
+                    domain. All confidence levels should be within the `[0, 1]` range.
+
+            confidence: The confidence level for the center. Entries containing `None`
+                should match with `centers` that are `None`. If not, this is considered an
+                error.
+            device: Device to place the tensors on for distributions.
+
+        Returns:
+            A prior for the search space.
+        """
+        match domains:
+            case ConfigEncoder():
+                domains = domains.domains
+            case _:
+                domains = list(domains)
+
+        distributions: list[TorchDistributionWithDomain] = []
+        for domain, center_conf in zip(domains, centers, strict=True):
+            # If the center is None, we use a uniform distribution. We try to match
+            # the distributions to all be unit uniform as it can speed up sampling when
+            # consistentaly the same. This still works for categoricals
+            if center_conf is None:
+                distributions.append(UNIT_UNIFORM_DIST)
+                continue
+
+            center, conf = center_conf
+            assert 0 <= conf <= 1
+
+            # If categorical, treat it as a weighted distribution over integers
+            if domain.is_categorical:
+                domain_as_ints = domain.as_integer_domain()
+                assert domain_as_ints.cardinality is not None
+
+                weight_for_choice = conf
+                remaining_weight = 1 - weight_for_choice
+
+                distributed_weight = remaining_weight / (domain_as_ints.cardinality - 1)
+                weights = torch.full(
+                    (domain_as_ints.cardinality,),
+                    distributed_weight,
+                    device=device,
+                    dtype=torch.float64,
+                )
+                center_index = domain_as_ints.cast_one(center, frm=domain)
+                weights[int(center_index)] = conf
+
+                dist = TorchDistributionWithDomain(
+                    distribution=torch.distributions.Categorical(
+                        probs=weights, validate_args=False
+                    ),
+                    domain=domain,
+                )
+                distributions.append(dist)
+                continue
+
+            # Otherwise, we use a continuous truncnorm
+            unit_center = domain.to_unit_one(center)
+            scale = torch.tensor(1 - conf, device=device, dtype=torch.float64)
+            a = torch.tensor(0.0, device=device, dtype=torch.float64)
+            b = torch.tensor(1.0, device=device, dtype=torch.float64)
+            dist = TorchDistributionWithDomain(
+                distribution=TruncatedNormal(
+                    loc=unit_center,
+                    scale=scale,
+                    a=a,
+                    b=b,
+                    device=device,
+                    validate_args=False,
+                ),
+                domain=UNIT_FLOAT_DOMAIN,
+            )
+            distributions.append(dist)
+
+        return CenteredPrior(distributions=distributions)
+
+    @classmethod
+    def from_space(
+        cls,
+        space: SearchSpace,
+        *,
+        center_values: Mapping[str, Any] | None = None,
+        confidence_values: Mapping[str, float] | None = None,
+        include_fidelity: bool = False,
+    ) -> CenteredPrior:
+        """Create a prior distribution from a search space.
+
+        Takes care to insert things in the correct order.
+
+        Args:
+            space: The search space to createa a prior from. Will look
+                at the `.default` and `.prior_confidence` of the parameters
+                to create a truncated normal.
+                Any parameters that do not have a `.default` will be covered by
+                a uniform distribution.
+            center_values: Any additional values that should be used
+                for centering the prior. Overwrites whatever is set by default
+                in the `space`
+            confidence_values: Any additional values that should be
+                used for determining the strength of the prior. Values should
+                be between 0 and 1. Overwrites whatever is set by default in
+                the `space`.
+            include_fidelity: Whether to include computing the prior over the
+                fidelity of te search space.
+
+        Returns:
+            The prior distribution
+        """
+        params = {**space.numerical, **space.categoricals}
+        if include_fidelity:
+            params.update(space.fidelities)
+
+        return Prior.from_parameters(
+            params,
+            center_values=center_values,
+            confidence_values=confidence_values,
+        )
+
+
+@dataclass
+class CenteredPrior(Prior):
+    """A prior that is centered around a given value with a given confidence.
+
+    This prior is useful for creating priors for search spaces where the
+    values are centered around a given value with a given confidence level.
+
+    You can use a `torch.distribution.Uniform` for any values which do
+    not have a center and confidence level, i.e. no prior information.
+
+    You can create this class more easily using
+    [`Prior.make_centered()`][neps.priors.Prior.make_centered].
+    """
+
+    distributions: list[TorchDistributionWithDomain]
+    """Distributions along with the corresponding domains they sample from."""
+
+    _distribution_domains: list[Domain] = field(init=False)
+
+    # OPTIM: These are used for an optimization in `log_pdf`
+    _meaningful_ixs: list[int] = field(init=False)
+    _meaningful_doms: list[Domain] = field(init=False)
+    _meaningful_dists: list[Distribution] = field(init=False)
+
+    def __post_init__(self) -> None:
+        self._distribution_domains = [dist.domain for dist in self.distributions]
+
+        rest: list[tuple[int, Domain, Distribution]] = []
+        for i, dist in enumerate(self.distributions):
+            if dist != UNIT_UNIFORM_DIST:
+                rest.append((i, dist.domain, dist.distribution))
+
+        if len(rest) == 0:
+            self._meaningful_ixs = []
+            self._meaningful_doms = []
+            self._meaningful_dists = []
+            return
+
+        self._meaningful_ixs, self._meaningful_doms, self._meaningful_dists = zip(  # type: ignore
+            *rest, strict=True
+        )
+
+    @property
+    @override
+    def ncols(self) -> int:
+        return len(self.distributions)
+
+    @override
+    def log_pdf(
+        self, x: torch.Tensor, *, frm: list[Domain] | Domain | ConfigEncoder
+    ) -> torch.Tensor:
+        if x.ndim == 0:
+            raise ValueError("Expected a tensor of shape (..., ncols).")
+
+        if x.ndim == 1:
+            x = x.unsqueeze(0)
+
+        if x.shape[-1] != len(self.distributions):
+            raise ValueError(
+                f"Got a tensor `x` whose last dimesion (the hyperparameter dimension)"
+                f" is of length {x.shape[-1]=} but"
+                f" the CenteredPrior called has {len(self.distributions)=}"
+                " distributions to use for calculating the `log_pdf`. Perhaps"
+                " the config or the prior have a mismatch as one includes a"
+                " fidelity?"
+            )
+
+        # OPTIM: We can actually just skip elements that are distributed uniformly as
+        # **assuming** they are all correctly in bounds, their log_pdf will be 0 and
+        # contribute nothing.
+        # It also helps numeric stability to avoid useless computations.
+        if len(self._meaningful_ixs) == 0:
+            return torch.zeros(x.shape[:-1], dtype=torch.float64, device=x.device)
+
+        match frm:
+            case Domain():
+                pass
+            case ConfigEncoder():
+                frm = [frm.domains[i] for i in self._meaningful_ixs]
+            case Sequence():
+                frm = [frm[i] for i in self._meaningful_ixs]
+            case _:
+                raise TypeError(f"Unexpected type {type(frm)=}")
+
+        # Cast all values from the value domains to the domain of the sampler.
+        translated_x = Domain.translate(
+            x[..., self._meaningful_ixs],
+            frm=frm,
+            to=self._meaningful_doms,
+        )
+
+        # Calculate the log probabilities of the sample domain tensors under their
+        # respective distributions.
+        # NOTE: There's no gaurantee these are actually probabilities and so we
+        # treat them as unnormalized log pdfs
+        itr = iter(zip(self._meaningful_ixs, self._meaningful_dists, strict=False))
+        first_i, first_dist = next(itr)
+        log_pdfs = first_dist.log_prob(translated_x[..., first_i])
+
+        for i, dist in itr:
+            log_pdfs = log_pdfs + dist.log_prob(translated_x[..., i])
+
+        return log_pdfs
+
+    @override
+    def sample(
+        self,
+        n: int | torch.Size,
+        *,
+        to: Domain | list[Domain] | ConfigEncoder,
+        seed: torch.Generator | None = None,
+        device: torch.device | None = None,
+        dtype: torch.dtype | None = None,
+    ) -> torch.Tensor:
+        if seed is not None:
+            raise NotImplementedError("Seeding is not yet implemented.")
+
+        _out_shape = (
+            torch.Size((n, self.ncols))
+            if isinstance(n, int)
+            else torch.Size((*n, self.ncols))
+        )
+        _n = torch.Size((n,)) if isinstance(n, int) else n
+
+        out = torch.empty(_out_shape, device=device, dtype=dtype)
+        for i, dist in enumerate(self.distributions):
+            out[..., i] = dist.distribution.sample(_n)
+
+        return Domain.translate(out, frm=self._distribution_domains, to=to, dtype=dtype)
+
+
+@dataclass
+class UniformPrior(Prior):
+    """A prior that is uniform over a given domain.
+
+    Uses a UnitUniform under the hood before converting to the value domain.
+    """
+
+    ndim: int
+    """The number of columns in the tensor to sample from."""
+
+    @property
+    @override
+    def ncols(self) -> int:
+        return self.ndim
+
+    @override
+    def log_pdf(
+        self,
+        x: torch.Tensor,
+        *,
+        frm: Domain | list[Domain] | ConfigEncoder,
+    ) -> torch.Tensor:
+        # NOTE: We just assume everything is in bounds...
+        shape = x.shape[:-1]  # Select everything up to last dimension (configuration)
+        return torch.zeros(shape, dtype=torch.float64, device=x.device)
+
+    @override
+    def sample(
+        self,
+        n: int | torch.Size,
+        *,
+        to: Domain | list[Domain] | ConfigEncoder,
+        seed: torch.Generator | None = None,
+        device: torch.device | None = None,
+        dtype: torch.dtype | None = None,
+    ) -> torch.Tensor:
+        if seed is not None:
+            raise NotImplementedError("Seeding is not yet implemented.")
+
+        _n = (
+            torch.Size((n, self.ndim))
+            if isinstance(n, int)
+            else torch.Size((*n, self.ndim))
+        )
+        # Doesn't like integer dtypes
+        if dtype is not None and dtype.is_floating_point:
+            samples = torch.rand(_n, device=device, dtype=dtype)
+        else:
+            samples = torch.rand(_n, device=device)
+
+        return Domain.translate(samples, frm=UNIT_FLOAT_DOMAIN, to=to, dtype=dtype)
diff --git a/neps/sampling/samplers.py b/neps/sampling/samplers.py
new file mode 100644
index 000000000..43558eff6
--- /dev/null
+++ b/neps/sampling/samplers.py
@@ -0,0 +1,354 @@
+"""Samplers for generating points in a search space.
+
+These are similar to [`Prior`][neps.priors.Prior] objects, but they
+do not necessarily have an easily definable pdf.
+"""
+
+from __future__ import annotations
+
+from abc import ABC, abstractmethod
+from collections.abc import Sequence
+from dataclasses import dataclass, field
+from functools import reduce
+from typing import TYPE_CHECKING
+from typing_extensions import override
+
+import torch
+from more_itertools import all_equal
+
+from neps.search_spaces.domain import UNIT_FLOAT_DOMAIN, Domain
+from neps.search_spaces.encoding import ConfigEncoder
+
+if TYPE_CHECKING:
+    from neps.sampling.priors import UniformPrior
+
+
+class Sampler(ABC):
+    """A protocol for sampling tensors and vonerting them to a given domain."""
+
+    @property
+    @abstractmethod
+    def ncols(self) -> int:
+        """The number of columns in the samples produced by this sampler."""
+        ...
+
+    @abstractmethod
+    def sample(
+        self,
+        n: int | torch.Size,
+        *,
+        to: Domain | list[Domain] | ConfigEncoder,
+        seed: torch.Generator | None = None,
+        device: torch.device | None = None,
+        dtype: torch.dtype | None = None,
+    ) -> torch.Tensor:
+        """Sample `n` points and convert them to the given domain.
+
+        Args:
+            n: The number of points to sample. If a torch.Size, an additional dimension
+                will be added with [`.ncols`][neps.samplers.Sampler.ncols].
+                For example, if `n = 5`, the output will be `(5, ncols)`. If
+                `n = (5, 3)`, the output will be `(5, 3, ncols)`.
+            to: If a single domain, `.ncols` columns will be produced form that one
+                domain. If a list of domains, then it must have the same length as the
+                number of columns, with each column being in the corresponding domain.
+            seed: The seed generator
+            dtype: The dtype of the output tensor.
+            device: The device to cast the samples to.
+
+        Returns:
+            A tensor of (n, ndim) points sampled cast to the given domain.
+        """
+
+    def sample_one(
+        self,
+        *,
+        to: Domain | list[Domain] | ConfigEncoder,
+        seed: torch.Generator | None = None,
+        device: torch.device | None = None,
+        dtype: torch.dtype | None = None,
+    ) -> torch.Tensor:
+        """Sample a single point and convert it to the given domain.
+
+        The configuration will be a single dimensional tensor of shape
+        `(ncols,)`.
+
+        Please see [`sample`][neps.samplers.Sampler.sample] for more details.
+        """
+        return self.sample(1, to=to, seed=seed, device=device, dtype=dtype).squeeze(0)
+
+    @classmethod
+    def sobol(cls, ndim: int, *, scramble: bool = True) -> Sobol:
+        """Create a Sobol sampler.
+
+        Args:
+            ndim: The number of columns to sample.
+            scramble: Whether to scramble the Sobol sequence.
+
+        Returns:
+            A Sobol sampler.
+        """
+        return Sobol(ndim=ndim, scramble=scramble)
+
+    @classmethod
+    def uniform(cls, ndim: int) -> UniformPrior:
+        """Create a uniform sampler.
+
+        Args:
+            ndim: The number of columns to sample.
+
+        Returns:
+            A uniform sampler.
+        """
+        from neps.sampling.priors import UniformPrior
+
+        return UniformPrior(ndim=ndim)
+
+    @classmethod
+    def borders(cls, ndim: int) -> BorderSampler:
+        """Create a border sampler.
+
+        Args:
+            ndim: The number of dimensions to sample.
+
+        Returns:
+            A border sampler.
+        """
+        return BorderSampler(ndim=ndim)
+
+
+# Technically this could be a prior with a uniform distribution
+@dataclass
+class Sobol(Sampler):
+    """Sample from a Sobol sequence."""
+
+    ndim: int
+    """The number of dimensions to sample for."""
+
+    scramble: bool = True
+    """Whether to scramble the Sobol sequence."""
+
+    def __post_init__(self) -> None:
+        if self.ndim < 1:
+            raise ValueError(
+                "The number of dimensions must be at least 1."
+                f" Got {self.ndim} dimensions."
+            )
+
+    @property
+    @override
+    def ncols(self) -> int:
+        return self.ndim
+
+    @override
+    def sample(
+        self,
+        n: int | torch.Size,
+        *,
+        to: Domain | list[Domain] | ConfigEncoder,
+        seed: torch.Generator | None = None,
+        device: torch.device | None = None,
+        dtype: torch.dtype | None = None,
+    ) -> torch.Tensor:
+        if seed is not None:
+            raise NotImplementedError("Setting the seed is not supported yet")
+
+        # Sobol can only produce 2d tensors. To handle batches or arbitrary
+        # dimensions, we get a count of the total number of samples needed
+        # and reshape the output tensor to the desired shape, if needed.
+        _n = n if isinstance(n, int) else reduce(lambda x, y: x * y, n)
+
+        _seed = (
+            None if seed is None else torch.randint(0, 2**31, (1,), generator=seed).item()
+        )
+        sobol = torch.quasirandom.SobolEngine(
+            dimension=self.ndim, scramble=self.scramble, seed=_seed
+        )
+
+        # If integer dtype, sobol will refuse, we need to cast then
+        if dtype is not None and not dtype.is_floating_point:
+            x = sobol.draw(_n, dtype=torch.float64)
+            x = x.to(dtype=dtype, device=device)
+        else:
+            x = sobol.draw(_n, dtype=dtype)
+
+        # If we got extra dimensions, such as batch dimensions, we need to
+        # reshape the tensor to the desired shape.
+        if isinstance(n, torch.Size):
+            x = x.view(*n, self.ncols)
+
+        return Domain.translate(x, frm=UNIT_FLOAT_DOMAIN, to=to)
+
+
+@dataclass
+class WeightedSampler(Sampler):
+    """A sampler that samples from a weighted combination of samplers."""
+
+    samplers: Sequence[Sampler]
+    """The samplers to sample from."""
+
+    weights: torch.Tensor
+    """The weights for each sampler."""
+
+    sampler_probabilities: torch.Tensor = field(init=False, repr=False)
+    """The probabilities for each sampler. Normalized weights."""
+
+    def __post_init__(self) -> None:
+        if len(self.samplers) < 2:
+            raise ValueError(
+                f"At least two samplers must be given. Got {len(self.samplers)}"
+            )
+
+        if self.weights.ndim != 1:
+            raise ValueError("Weights must be a 1D tensor.")
+
+        if len(self.samplers) != len(self.weights):
+            raise ValueError("The number of samplers and weights must be the same.")
+
+        ncols = [sampler.ncols for sampler in self.samplers]
+        if not all_equal(ncols):
+            raise ValueError(
+                "All samplers must have the same number of columns." f" Got {ncols}."
+            )
+
+        self._ncols = ncols[0]
+        self.sampler_probabilities = self.weights / self.weights.sum()
+
+    @property
+    @override
+    def ncols(self) -> int:
+        return self._ncols
+
+    @override
+    def sample(
+        self,
+        n: int | torch.Size,
+        *,
+        to: Domain | list[Domain] | ConfigEncoder,
+        seed: torch.Generator | None = None,
+        device: torch.device | None = None,
+        dtype: torch.dtype | None = None,
+    ) -> torch.Tensor:
+        if dtype is None:
+            match to:
+                case Domain():
+                    dtype = to.preffered_dtype
+                case ConfigEncoder():
+                    dtype = (
+                        torch.float64
+                        if any(d.preffered_dtype.is_floating_point for d in to.domains)
+                        else torch.int64
+                    )
+                case _:
+                    dtype = (
+                        torch.float64
+                        if any(d.preffered_dtype.is_floating_point for d in to)
+                        else torch.int64
+                    )
+
+        if seed is not None:
+            raise NotImplementedError("Seeding is not yet implemented.")
+
+        # Calculate the total number of samples required
+        if isinstance(n, int):
+            total_samples = n
+            output_shape = (n, self.ncols)
+        else:
+            total_samples = reduce(lambda x, y: x * y, n)
+            output_shape = (*n, self.ncols)
+
+        # Randomly select which sampler to sample from for each of the total_samples
+        chosen_samplers = torch.empty((total_samples,), device=device, dtype=torch.int64)
+        chosen_samplers = torch.multinomial(
+            self.sampler_probabilities,
+            total_samples,
+            replacement=True,
+            generator=seed,
+            out=chosen_samplers,
+        )
+
+        # Create an empty tensor to hold all samples
+        output_samples = torch.empty(
+            (total_samples, self.ncols),
+            device=device,
+            dtype=dtype,
+        )
+
+        # Loop through each sampler and its associated indices
+        for i, sampler in enumerate(self.samplers):
+            # Find indices where the chosen sampler is i
+            _i = torch.tensor(i, dtype=torch.int64, device=device)
+            indices = torch.where(chosen_samplers == _i)[0]
+
+            if len(indices) > 0:
+                # Sample from the sampler for the required number of indices
+                samples_from_sampler = sampler.sample(
+                    len(indices),
+                    to=to,
+                    seed=seed,
+                    device=device,
+                    dtype=dtype,
+                )
+                output_samples[indices] = samples_from_sampler
+
+        # Reshape to the output shape including ncols dimension
+        return output_samples.view(output_shape)
+
+
+@dataclass
+class BorderSampler(Sampler):
+    """A sampler that samples from the border of a hypercube."""
+
+    ndim: int
+
+    @property
+    @override
+    def ncols(self) -> int:
+        return self.ndim
+
+    @property
+    def n_possible(self) -> int:
+        """The amount of possible border configurations."""
+        return 2**self.ndim
+
+    @override
+    def sample(
+        self,
+        n: int | torch.Size,
+        *,
+        to: Domain | list[Domain] | ConfigEncoder,
+        seed: torch.Generator | None = None,
+        device: torch.device | None = None,
+        dtype: torch.dtype | None = None,
+    ) -> torch.Tensor:
+        dtype = dtype or torch.float64
+
+        _arange = torch.arange(self.n_possible, device=device, dtype=torch.int32)
+        # Calculate the total number of samples required
+        if isinstance(n, int):
+            total_samples = min(n, self.n_possible)
+            output_shape = (total_samples, self.ncols)
+        else:
+            total_samples = reduce(lambda x, y: x * y, n)
+            if total_samples > self.n_possible:
+                raise ValueError(
+                    f"The shape of samples requested (={n}) is more than the number of "
+                    f"possible border configurations (={self.n_possible})."
+                )
+            output_shape = (*n, self.ncols)
+
+        if self.n_possible <= total_samples:
+            configs = _arange
+        else:
+            # Otherwise, we take a random sample of the 2**n possible border configs
+            rand_ix = torch.randperm(self.n_possible, generator=seed, device=device)[
+                :total_samples
+            ]
+            configs = _arange[rand_ix]
+
+        # https://stackoverflow.com/a/63546308/5332072
+        bit_masks = 2 ** _arange[: self.ndim]
+        configs = configs.unsqueeze(1).bitwise_and(bit_masks).ne(0).to(dtype)
+        # Reshape to the output shape including ncols dimension
+        configs = configs.view(output_shape)
+        return Domain.translate(configs, frm=UNIT_FLOAT_DOMAIN, to=to)
diff --git a/neps/search_spaces/__init__.py b/neps/search_spaces/__init__.py
index 7eb4332ab..b726b8ae1 100644
--- a/neps/search_spaces/__init__.py
+++ b/neps/search_spaces/__init__.py
@@ -1,38 +1,48 @@
-from neps.search_spaces.architecture.api import ArchitectureParameter, FunctionParameter
+from neps.search_spaces.architecture.api import (
+    Architecture,
+    ArchitectureParameter,
+    Function,
+    FunctionParameter,
+)
 from neps.search_spaces.architecture.graph_grammar import (
+    CoreGraphGrammar,
     GraphGrammar,
-    GraphGrammarCell,
-    GraphGrammarRepetitive,
     GraphParameter,
 )
 from neps.search_spaces.hyperparameters import (
+    Categorical,
     CategoricalParameter,
+    Constant,
     ConstantParameter,
+    Float,
     FloatParameter,
+    Integer,
     IntegerParameter,
+    Numerical,
     NumericalParameter,
 )
-from neps.search_spaces.parameter import (
-    MutatableParameter,
-    Parameter,
-    ParameterWithPrior,
-)
+from neps.search_spaces.parameter import Parameter, ParameterWithPrior
 from neps.search_spaces.search_space import SearchSpace
 
 __all__ = [
+    "Architecture",
     "ArchitectureParameter",
+    "Categorical",
     "CategoricalParameter",
+    "Constant",
     "ConstantParameter",
+    "CoreGraphGrammar",
+    "Float",
     "FloatParameter",
+    "Function",
     "FunctionParameter",
     "GraphGrammar",
-    "GraphGrammarCell",
-    "GraphGrammarRepetitive",
     "GraphParameter",
+    "Integer",
     "IntegerParameter",
+    "Numerical",
     "NumericalParameter",
     "Parameter",
     "ParameterWithPrior",
-    "MutatableParameter",
     "SearchSpace",
 ]
diff --git a/neps/search_spaces/architecture/api.py b/neps/search_spaces/architecture/api.py
index 98af2f14c..9521bd7aa 100644
--- a/neps/search_spaces/architecture/api.py
+++ b/neps/search_spaces/architecture/api.py
@@ -1,18 +1,20 @@
 from __future__ import annotations
 
 import inspect
-from typing import Callable
+from typing import TYPE_CHECKING, Callable
 
 import networkx as nx
-from torch import nn
 
 from .cfg import Grammar
 from .cfg_variants.constrained_cfg import ConstrainedGrammar
-from .graph_grammar import GraphGrammar, GraphGrammarMultipleRepetitive
+from .graph_grammar import GraphGrammar
+
+if TYPE_CHECKING:
+    from torch import nn
 
 
 def _dict_structure_to_str(
-    structure: dict, primitives: dict, repetitive_mapping: dict = None
+    structure: dict, primitives: dict, repetitive_mapping: dict | None = None
 ) -> str:
     def _save_replace(string: str, __old: str, __new: str):
         while string.count(__old) > 0:
@@ -25,18 +27,18 @@ def _save_replace(string: str, __old: str, __new: str):
     grammar = grammar.replace("(", " ")
     grammar = grammar.replace(")", "")
     grammar = grammar.replace(",", "")
-    for primitive in primitives.keys():
+    for primitive in primitives:
         grammar = _save_replace(grammar, f" {primitive} ", f' "{primitive}" ')
         grammar = _save_replace(grammar, f" {primitive}\n", f' "{primitive}"\n')
     if repetitive_mapping is not None:
-        for placeholder in repetitive_mapping.keys():
+        for placeholder in repetitive_mapping:
             grammar = _save_replace(grammar, f" {placeholder} ", f' "{placeholder}" ')
             grammar = _save_replace(grammar, f" {placeholder}\n", f' "{placeholder}"\n')
     return grammar
 
 
 def _build(graph, set_recursive_attribute):
-    in_node = [n for n in graph.nodes if graph.in_degree(n) == 0][0]
+    in_node = next(n for n in graph.nodes if graph.in_degree(n) == 0)
     for n in nx.topological_sort(graph):
         for pred in graph.predecessors(n):
             e = (pred, n)
@@ -44,20 +46,17 @@ def _build(graph, set_recursive_attribute):
             if pred == in_node:
                 predecessor_values = None
             else:
-                pred_pred = list(graph.predecessors(pred))[0]
+                pred_pred = next(iter(graph.predecessors(pred)))
                 predecessor_values = graph.edges[(pred_pred, pred)]
             graph.edges[e].update(set_recursive_attribute(op_name, predecessor_values))
 
 
-def ArchitectureParameter(**kwargs):
-    """Factory function"""
-
+def Architecture(**kwargs):
+    """Factory function."""
     if "structure" not in kwargs:
         raise ValueError("Factory function requires structure")
     if not isinstance(kwargs["structure"], list) or len(kwargs["structure"]) == 1:
         base = GraphGrammar
-    else:
-        base = GraphGrammarMultipleRepetitive
 
     class _FunctionParameter(base):
         def __init__(
@@ -71,6 +70,8 @@ def __init__(
             | dict
             | list[dict],
             primitives: dict,
+            # TODO: Follow this rabbit hole for `constraint_kwargs`,
+            # it can all be deleted my friend
             constraint_kwargs: dict | None = None,
             name: str = "ArchitectureParameter",
             set_recursive_attribute: Callable | None = None,
@@ -89,9 +90,9 @@ def __init__(
                     _dict_structure_to_str(
                         st,
                         primitives,
-                        repetitive_mapping=kwargs["terminal_to_sublanguage_map"]
-                        if "terminal_to_sublanguage_map" in kwargs
-                        else None,
+                        repetitive_mapping=kwargs.get(
+                            "terminal_to_sublanguage_map", None
+                        ),
                     )
                     if isinstance(st, dict)
                     else st
@@ -144,9 +145,7 @@ def to_pytorch(self) -> nn.Module:
                 self.prune_graph()
 
                 if self._set_recursive_attribute:
-                    m = _build(
-                        self, self._set_recursive_attribute
-                    )
+                    m = _build(self, self._set_recursive_attribute)
 
                 if m is not None:
                     return m
@@ -155,16 +154,52 @@ def to_pytorch(self) -> nn.Module:
                 self.update_op_names()
             return super().to_pytorch()  # create PyTorch model
 
-        def to_tensorflow(self, inputs):
-            composed_function = self.compose_functions(flatten_graph=False)
-            return composed_function(inputs)
-
         def create_new_instance_from_id(self, identifier: str):
-            g = ArchitectureParameter(**self.input_kwargs)  # type: ignore[arg-type]
+            g = Architecture(**self.input_kwargs)  # type: ignore[arg-type]
             g.load_from(identifier)
             return g
 
     return _FunctionParameter(**kwargs)
 
 
-FunctionParameter = ArchitectureParameter
+def ArchitectureParameter(**kwargs):
+    """Deprecated: Use `Architecture` instead of `ArchitectureParameter`.
+
+    This function remains for backward compatibility and will raise a deprecation
+    warning if used.
+    """
+    import warnings
+
+    warnings.warn(
+        (
+            "Usage of 'neps.ArchitectureParameter' is deprecated and will be removed in"
+            " future releases. Please use 'neps.Architecture' instead."
+        ),
+        DeprecationWarning,
+        stacklevel=2,
+    )
+
+    return Architecture(**kwargs)
+
+
+Function = Architecture
+
+
+def FunctionParameter(**kwargs):
+    """Deprecated: Use `Function` instead of `FunctionParameter`.
+
+    This function remains for backward compatibility and will raise a deprecation
+    warning if used.
+    """
+    import warnings
+
+    warnings.warn(
+        (
+            "Usage of 'neps.FunctionParameter' is deprecated and will be removed in"
+            " future releases. Please use 'neps.Function' instead."
+        ),
+        DeprecationWarning,
+        stacklevel=2,
+    )
+
+    return Function(**kwargs)
diff --git a/neps/search_spaces/architecture/cfg.py b/neps/search_spaces/architecture/cfg.py
index f7815f6d4..392f56353 100644
--- a/neps/search_spaces/architecture/cfg.py
+++ b/neps/search_spaces/architecture/cfg.py
@@ -1,25 +1,18 @@
 from __future__ import annotations
-import itertools
+
 import math
-import sys
-from collections import defaultdict, deque
-from functools import partial
-from queue import LifoQueue
-from typing import Deque, Tuple, Hashable
+from typing import Hashable
 
 import numpy as np
 from nltk import CFG, Production
 from nltk.grammar import Nonterminal
-from scipy.integrate._ivp.radau import P
-from torch import Value
 
 
 class Grammar(CFG):
-    """
-    Extended context free grammar (CFG) class from the NLTK python package
+    """Extended context free grammar (CFG) class from the NLTK python package
     We have provided functionality to sample from the CFG.
     We have included generation capability within the class (before it was an external function)
-    Also allow sampling to return whole trees (not just the string of terminals)
+    Also allow sampling to return whole trees (not just the string of terminals).
     """
 
     def __init__(self, *args, **kwargs):
@@ -36,18 +29,15 @@ def __init__(self, *args, **kwargs):
             {i for i in non_unique_nonterminals if non_unique_nonterminals.count(i) > 1}
         )
 
-        self.max_sampling_level = 2
-
-        self.convergent = False
         self._prior = None
 
-        self.check_grammar()
-
-    def set_convergent(self):
-        self.convergent = True
-
-    def set_unconstrained(self):
-        self.convergent = False
+        if len(set(self.terminals).intersection(set(self.nonterminals))) > 0:
+            raise Exception(
+                f"Same terminal and nonterminal symbol: {set(self.terminals).intersection(set(self.nonterminals))}!"
+            )
+        for nt in self.nonterminals:
+            if len(self.productions(Nonterminal(nt))) == 0:
+                raise Exception(f"There is no production for nonterminal {nt}")
 
     @property
     def prior(self):
@@ -76,100 +66,13 @@ def _check_prior(value: dict):
             _check_prior(value)
         self._prior = value
 
-    def check_grammar(self):
-        if len(set(self.terminals).intersection(set(self.nonterminals))) > 0:
-            raise Exception(
-                f"Same terminal and nonterminal symbol: {set(self.terminals).intersection(set(self.nonterminals))}!"
-            )
-        for nt in self.nonterminals:
-            if len(self.productions(Nonterminal(nt))) == 0:
-                raise Exception(f"There is no production for nonterminal {nt}")
-
-    @property
-    def compute_space_size(self) -> int:
-        """Computes the size of the space described by the grammar.
-
-        Args:
-            primitive_nonterminal (str, optional): The primitive nonterminal of the grammar. Defaults to "OPS".
-
-        Returns:
-            int: size of space described by grammar.
-        """
-
-        def recursive_worker(nonterminal: Nonterminal, memory_bank: dict = None) -> int:
-            if memory_bank is None:
-                memory_bank = {}
-
-            potential_productions = self.productions(lhs=nonterminal)
-            _possibilites = 0
-            for potential_production in potential_productions:
-                edges_nonterminals = [
-                    rhs_sym
-                    for rhs_sym in potential_production.rhs()
-                    if str(rhs_sym) in self.nonterminals
-                ]
-                possibilities_per_edge = [
-                    memory_bank[str(e_nonterminal)]
-                    if str(e_nonterminal) in memory_bank.keys()
-                    else recursive_worker(e_nonterminal, memory_bank)
-                    for e_nonterminal in edges_nonterminals
-                ]
-                memory_bank.update(
-                    {
-                        str(e_nonterminal): possibilities_per_edge[i]
-                        for i, e_nonterminal in enumerate(edges_nonterminals)
-                    }
-                )
-                product = 1
-                for p in possibilities_per_edge:
-                    product *= p
-                _possibilites += product
-            return _possibilites
-
-        return recursive_worker(self.start())
-
-    def generator(self, n=1, depth=5):
-        # return the first n strings generated by the CFG of a maximum depth
-        sequences = []
-        for sentence in self._generate(n=n, depth=depth):
-            sequences.append(" ".join(sentence))
-        return sequences
-
-    def sampler_restricted(self, n, max_length=5, cfactor=0.1, min_length=0):
-        # sample n unqiue sequences from the CFG
-        # such that the number of terminals is between min_length and max_length
-        # cfactor controls the avg length of sampled sequence (see self.sampler)
-        # setting smaller cfactor can reduce number of samples required to find n of specified size
-
-        # store in a dict fr quick look up when seeing if its a unique sample
-        sequences_dict = {}
-        sequences = [[]] * n
-        i = 0
-        while i < n:
-            sample = self._convergent_sampler(symbol=self.start(), cfactor=cfactor)
-            # split up words, depth and num productions
-            tree = sample[0] + ")"
-            # count number of terminals
-            length = 0
-            for t in self.terminals:
-                length += tree.count(t + ")")
-            # check satisfies depth restrictions
-            if (length <= max_length) and (length >= min_length):
-                # check not already in samples
-                if tree not in sequences_dict:
-                    sequences_dict[tree] = "true"
-                    sequences[i] = tree
-                    i += 1
-        return sequences
-
     def sampler(
         self,
         n=1,
-        start_symbol: str = None,
+        start_symbol: str | None = None,
         user_priors: bool = False,
     ):
         # sample n sequences from the CFG
-        # convergent: avoids very long sequences (we advise setting True)
         # cfactor: the factor to downweight productions (cfactor=1 returns to naive sampling strategy)
         #          smaller cfactor provides smaller sequences (on average)
 
@@ -178,24 +81,20 @@ def sampler(
         # less likely it is to terminate. Therefore, we set the default sampler (setting convergent=True) to
         # downweight frequent productions when traversing the grammar.
         # see https://eli.thegreenplace.net/2010/01/28/generating-random-sentences-from-a-context-free-236grammar
-        if start_symbol is None:
-            start_symbol = self.start()
-        else:
-            start_symbol = Nonterminal(start_symbol)
+        start_symbol = self.start() if start_symbol is None else Nonterminal(start_symbol)
 
-        if self.convergent:
-            cfactor = 0.1
-            return [
-                f"{self._convergent_sampler(symbol=start_symbol, cfactor=cfactor)[0]})"
-                for i in range(0, n)
-            ]
-        else:
-            return [
-                f"{self._sampler(symbol=start_symbol, user_priors=user_priors)})"
-                for i in range(0, n)
-            ]
+        return [
+            f"{self._sampler(symbol=start_symbol, user_priors=user_priors)})"
+            for _ in range(n)
+        ]
 
-    def _sampler(self, symbol=None, user_priors: bool = False, *, _cache: dict[Hashable, str] | None = None):
+    def _sampler(
+        self,
+        symbol=None,
+        user_priors: bool = False,
+        *,
+        _cache: dict[Hashable, str] | None = None,
+    ):
         # simple sampler where each production is sampled uniformly from all possible productions
         # Tree choses if return tree or list of terminals
         # recursive implementation
@@ -205,14 +104,14 @@ def _sampler(self, symbol=None, user_priors: bool = False, *, _cache: dict[Hasha
         # init the sequence
         tree = "(" + str(symbol)
         # collect possible productions from the starting symbol
-        productions = self.productions(lhs=symbol)
+        productions = list(self.productions(lhs=symbol))
         # sample
-        if 0 == len(productions):
+        if len(productions) == 0:
             raise Exception(f"Nonterminal {symbol} has no productions!")
         if user_priors and self._prior is not None:
-            production = choice(productions, probs=self._prior[str(symbol)])
+            production = np.random.choice(productions, p=self._prior[str(symbol)])
         else:
-            production = choice(productions)
+            production = np.random.choice(productions)
 
         for sym in production.rhs():
             if isinstance(sym, str):
@@ -228,81 +127,13 @@ def _sampler(self, symbol=None, user_priors: bool = False, *, _cache: dict[Hasha
 
         return tree
 
-    def sampler_maxMin_func(self, symbol: str = None, largest: bool = True):
-        tree = "(" + str(symbol)
-        # collect possible productions from the starting symbol
-        productions = self.productions(lhs=symbol)
-        # sample
-        production = productions[-1 if largest else 0]
-        for sym in production.rhs():
-            if isinstance(sym, str):
-                # if terminal then add string to sequence
-                tree = tree + " " + sym
-            else:
-                tree = tree + " " + self.sampler_maxMin_func(sym, largest=largest) + ")"
-        return tree
-
-    def _convergent_sampler(
-        self, cfactor, symbol=None, pcount=defaultdict(int)
-    ):
-        # sampler that down-weights the probability of selcting the same production many times
-        # ensuring that the sampled trees are not 'too' long (size to be controlled by cfactor)
-        #
-        # recursive implementation
-        #:pcount: storage for the productions used in the current branch
-
-        # init the sequence
-        tree = "(" + str(symbol)
-        # init counter of tree depth and number of production rules
-        depth, num_prod = 1, 1
-        # collect possible productions from the starting symbol
-        productions = self.productions(lhs=symbol)
-        # init sampling weights
-        weights = []
-        # calc weights for the possible productions
-        for prod in productions:
-            if prod in pcount:
-                # if production already occured in branch then downweight
-                weights.append(cfactor ** (pcount[prod]))
-            else:
-                # otherwise set to be 1
-                weights.append(1.0)
-        # normalize weights to get probabilities
-        norm = sum(weights)
-        probs = [weight / norm for weight in weights]
-        # sample
-        production = choice(productions, probs)
-        # update counts
-        pcount[production] += 1
-        depths = []
-        for sym in production.rhs():
-            if isinstance(sym, str):
-                # if terminal then add string to sequence
-                tree = tree + " " + sym
-            else:
-                # otherwise keep generating the sequence
-                recursion = self._convergent_sampler(
-                    symbol=sym, cfactor=cfactor, pcount=pcount
-                )
-                depths.append(recursion[1])
-                num_prod += recursion[2]
-                tree = tree + " " + recursion[0] + ")"
-        # count the maximum depth and update
-
-        if len(depths) > 0:
-            depth = max(depths) + 1
-        # update counts
-        pcount[production] -= 1
-        return tree, depth, num_prod
-
     def compute_prior(self, string_tree: str, log: bool = True) -> float:
         prior_prob = 1.0 if not log else 0.0
 
         symbols = self.nonterminals + self.terminals
         q_production_rules: list[tuple[list, int]] = []
         non_terminal_productions: dict[str, list[Production]] = {
-            sym: self.productions(lhs=Nonterminal(sym))
-            for sym in self.nonterminals
+            sym: self.productions(lhs=Nonterminal(sym)) for sym in self.nonterminals
         }
 
         _symbols_by_size = sorted(symbols, key=len, reverse=True)
@@ -322,11 +153,11 @@ def compute_prior(self, string_tree: str, log: bool = True) -> float:
                     continue
 
                 # special case: "(" is (part of) a terminal
-                if string_tree[i - 1: i + 2] != " ( ":
+                if string_tree[i - 1 : i + 2] != " ( ":
                     i += 1
                     continue
 
-            if char == ")" and not string_tree[i - 1] == " ":
+            if char == ")" and string_tree[i - 1] != " ":
                 # closing symbol of production
                 production = q_production_rules.pop()[0][0]
                 lhs_production = production.lhs()
@@ -336,7 +167,7 @@ def compute_prior(self, string_tree: str, log: bool = True) -> float:
                     prior_prob += np.log(self.prior[(lhs_production)][idx] + 1e-15)
                 else:
                     prior_prob *= self.prior[str(lhs_production)][idx]
-                i+=1
+                i += 1
                 continue
 
             _s = string_tree[i : i + _longest]
@@ -344,7 +175,9 @@ def compute_prior(self, string_tree: str, log: bool = True) -> float:
                 if _s.startswith(sym):
                     break
             else:
-                raise RuntimeError(f"Terminal or nonterminal at position {i} does not exist")
+                raise RuntimeError(
+                    f"Terminal or nonterminal at position {i} does not exist"
+                )
 
             i += len(sym) - 1
 
@@ -362,8 +195,7 @@ def compute_prior(self, string_tree: str, log: bool = True) -> float:
                     new_productions = [
                         production
                         for production in _productions
-                        if str(production.rhs()[_count])
-                        == sym
+                        if str(production.rhs()[_count]) == sym
                     ]
                     q_production_rules[-1] = (new_productions, _count + 1)
 
@@ -377,64 +209,6 @@ def compute_prior(self, string_tree: str, log: bool = True) -> float:
 
         return prior_prob
 
-    def _generate(self, start=None, depth=None, n=None):
-        """
-        see https://www.nltk.org/_modules/nltk/parse/generate.html
-        Generates an iterator of all sentences from a CFG.
-
-        :param grammar: The Grammar used to generate sentences.
-        :param start: The Nonterminal from which to start generate sentences.
-        :param depth: The maximal depth of the generated tree.
-        :param n: The maximum number of sentences to return.
-        :return: An iterator of lists of terminal tokens.
-        """
-        if not start:
-            start = self.start()
-        if depth is None:
-            depth = sys.maxsize
-
-        iter_prod = self._generate_all([start], depth)
-
-        if n:
-            iter_prod = itertools.islice(iter_prod, n)
-
-        return iter_prod
-
-    def _generate_all(self, items, depth):
-        # see https://www.nltk.org/_modules/nltk/parse/generate.html
-        if items:
-            try:
-                for frag1 in self._generate_one(items[0], depth):
-                    for frag2 in self._generate_all(items[1:], depth):
-                        yield frag1 + frag2
-            except RuntimeError as _error:
-                if _error.message == "maximum recursion depth exceeded":
-                    # Helpful error message while still showing the recursion stack.
-                    raise RuntimeError(
-                        "The grammar has rule(s) that yield infinite recursion!!"
-                    ) from _error
-                else:
-                    raise
-        else:
-            yield []
-
-    def _generate_one(self, item, depth):
-        # see https://www.nltk.org/_modules/nltk/parse/generate.html
-        if depth > 0:
-            if isinstance(item, Nonterminal):
-                for prod in self.productions(lhs=item):
-                    yield from self._generate_all(prod.rhs(), depth - 1)
-            else:
-                yield [item]
-
-    @staticmethod
-    def _remove_empty_spaces(child):
-        while child[0] == " ":
-            child = child[1:]
-        while child[-1] == " ":
-            child = child[:-1]
-        return child
-
     def mutate(
         self, parent: str, subtree_index: int, subtree_node: str, patience: int = 50
     ) -> str:
@@ -461,52 +235,9 @@ def mutate(
                 break
             _patience -= 1
 
-        child = self._remove_empty_spaces(child)
-
-        return child
-
-    def crossover(
-        self,
-        parent1: str,
-        parent2: str,
-        patience: int = 50,
-        return_crossover_subtrees: bool = False,
-    ):
-        # randomly swap subtrees in two trees
-        # if no suitiable subtree exists then return False
-        subtree_node, subtree_index = self.rand_subtree(parent1)
-        # chop out subtree
-        pre, sub, post = self.remove_subtree(parent1, subtree_index)
-        _patience = patience
-        while _patience > 0:
-            # sample subtree from donor
-            donor_subtree_index = self.rand_subtree_fixed_head(parent2, subtree_node)
-            # if no subtrees with right head node return False
-            if not donor_subtree_index:
-                _patience -= 1
-            else:
-                donor_pre, donor_sub, donor_post = self.remove_subtree(
-                    parent2, donor_subtree_index
-                )
-                # return the two new tree
-                child1 = pre + donor_sub + post
-                child2 = donor_pre + sub + donor_post
-
-                child1 = self._remove_empty_spaces(child1)
-                child2 = self._remove_empty_spaces(child2)
-
-                if return_crossover_subtrees:
-                    return (
-                        child1,
-                        child2,
-                        (pre, sub, post),
-                        (donor_pre, donor_sub, donor_post),
-                    )
-                return child1, child2
+        return child.strip()
 
-        return False, False
-
-    def rand_subtree(self, tree: str) -> Tuple[str, int]:
+    def rand_subtree(self, tree: str) -> tuple[str, int]:
         """Helper function to choose a random subtree in a given parse tree.
         Runs a single pass through the tree (stored as string) to look for
         the location of swappable nonterminal symbols.
@@ -520,7 +251,7 @@ def rand_subtree(self, tree: str) -> Tuple[str, int]:
         split_tree = tree.split(" ")
         swappable_indices = [
             i
-            for i in range(0, len(split_tree))
+            for i in range(len(split_tree))
             if split_tree[i][1:] in self.swappable_nonterminals
         ]
         r = np.random.randint(1, len(swappable_indices))
@@ -530,7 +261,7 @@ def rand_subtree(self, tree: str) -> Tuple[str, int]:
 
     @staticmethod
     def rand_subtree_fixed_head(
-        tree: str, head_node: str, swappable_indices: list = None
+        tree: str, head_node: str, swappable_indices: list | None = None
     ) -> int:
         # helper function to choose a random subtree from a given tree with a specific head node
         # if no such subtree then return False, otherwise return the index of the subtree
@@ -539,7 +270,7 @@ def rand_subtree_fixed_head(
         if swappable_indices is None:
             split_tree = tree.split(" ")
             swappable_indices = [
-                i for i in range(0, len(split_tree)) if split_tree[i][1:] == head_node
+                i for i in range(len(split_tree)) if split_tree[i][1:] == head_node
             ]
         if not isinstance(swappable_indices, list):
             raise TypeError("Expected list for swappable indices!")
@@ -553,15 +284,14 @@ def rand_subtree_fixed_head(
                 if len(swappable_indices) > 1
                 else 0
             )
-            chosen_non_terminal_index = swappable_indices[r]
-            return chosen_non_terminal_index
+            return swappable_indices[r]
 
     @staticmethod
-    def remove_subtree(tree: str, index: int) -> Tuple[str, str, str]:
+    def remove_subtree(tree: str, index: int) -> tuple[str, str, str]:
         """Helper functioon to remove a subtree from a parse tree
         given its index.
         E.g. '(S (S (T 2)) (ADD +) (T 1))'
-        becomes '(S (S (T 2)) ', '(T 1))'  after removing (ADD +)
+        becomes '(S (S (T 2)) ', '(T 1))'  after removing (ADD +).
 
         Args:
             tree (str): parse tree
@@ -588,276 +318,3 @@ def remove_subtree(tree: str, index: int) -> Tuple[str, str, str]:
         post_subtree = right[current_index + 1 :]
         removed = "".join(split_tree[index]) + " " + right[: current_index + 1]
         return (pre_subtree, removed, post_subtree)
-
-    @staticmethod
-    def unparse_tree(tree: str):
-        string = []
-        temp = ""
-        # perform single pass of tree
-        for char in tree:
-            if char == " ":
-                temp = ""
-            elif char == ")":
-                if temp[-1] != ")":
-                    string.append(temp)
-                temp += char
-            else:
-                temp += char
-        return " ".join(string)
-
-
-class DepthConstrainedGrammar(Grammar):
-    def __init__(self, *args, **kwargs):
-        super().__init__(*args, **kwargs)
-        self.depth_constraints = None
-
-    def set_depth_constraints(self, depth_constraints):
-        self.depth_constraints = depth_constraints
-        if not all(k in self.nonterminals for k in self.depth_constraints.keys()):
-            raise Exception(
-                f"Nonterminal {set(self.depth_constraints.keys())-set(self.nonterminals)} does not exist in grammar"
-            )
-
-    @staticmethod
-    def is_depth_constrained():
-        return True
-
-    def sampler(  # type: ignore[override]
-        self,
-        n: int = 1,
-        start_symbol: str = None,
-        depth_information: dict = None,
-    ):
-        if self.depth_constraints is None:
-            raise ValueError("Depth constraints are not set!")
-
-        if start_symbol is None:
-            start_symbol = self.start()
-        else:
-            start_symbol = Nonterminal(start_symbol)
-
-        if depth_information is None:
-            depth_information = {}
-        return [
-            f"{self._depth_constrained_sampler(symbol=start_symbol, depth_information=depth_information)})"
-            for i in range(0, n)
-        ]
-
-    def _compute_depth_information_for_pre(self, tree: str) -> dict:
-        depth_information = {nt: 0 for nt in self.nonterminals}
-        q_nonterminals: Deque = deque()
-        for split in tree.split(" "):
-            if split == "":
-                continue
-            elif split[0] == "(":
-                q_nonterminals.append(split[1:])
-                depth_information[split[1:]] += 1
-                continue
-            while split[-1] == ")":
-                nt = q_nonterminals.pop()
-                depth_information[nt] -= 1
-                split = split[:-1]
-        return depth_information
-
-    def _compute_depth_information(self, tree: str) -> tuple:
-        split_tree = tree.split(" ")
-        depth_information = [0] * len(split_tree)
-        subtree_depth = [0] * len(split_tree)
-        helper_subtree_depth = [0] * len(split_tree)
-        helper_dict_depth_information = {nt: 0 for nt in self.nonterminals}
-        helper_dict_subtree_depth: dict = {nt: deque() for nt in self.nonterminals}
-        q_nonterminals: Deque = deque()
-        for i, split in enumerate(split_tree):
-            if split == "":
-                continue
-            elif split[0] == "(":
-                nt = split[1:]
-                q_nonterminals.append(nt)
-                depth_information[i] = helper_dict_depth_information[nt] + 1
-                helper_dict_depth_information[nt] += 1
-                helper_dict_subtree_depth[nt].append(i)
-                for j in helper_dict_subtree_depth[nt]:
-                    subtree_depth[j] = max(subtree_depth[j], helper_subtree_depth[j] + 1)
-                    helper_subtree_depth[j] += 1
-                continue
-            while split[-1] == ")":
-                nt = q_nonterminals.pop()
-                helper_dict_depth_information[nt] -= 1
-                for j in helper_dict_subtree_depth[nt]:
-                    helper_subtree_depth[j] -= 1
-                _ = helper_dict_subtree_depth[nt].pop()
-                split = split[:-1]
-        return depth_information, subtree_depth
-
-    def _compute_max_depth(self, tree: str, subtree_node: str) -> int:
-        max_depth = 0
-        depth_information = {nt: 0 for nt in self.nonterminals}
-        q_nonterminals: Deque = deque()
-        for split in tree.split(" "):
-            if split == "":
-                continue
-            elif split[0] == "(":
-                q_nonterminals.append(split[1:])
-                depth_information[split[1:]] += 1
-                if split[1:] == subtree_node and depth_information[split[1:]] > max_depth:
-                    max_depth = depth_information[split[1:]]
-                continue
-            while split[-1] == ")":
-                nt = q_nonterminals.pop()
-                depth_information[nt] -= 1
-                split = split[:-1]
-        return max_depth
-
-    def _depth_constrained_sampler(self, symbol=None, depth_information: dict = None):
-        if depth_information is None:
-            depth_information = {}
-        # init the sequence
-        tree = "(" + str(symbol)
-        # collect possible productions from the starting symbol & filter if constraints are violated
-        lhs = str(symbol)
-        if lhs in depth_information.keys():
-            depth_information[lhs] += 1
-        else:
-            depth_information[lhs] = 1
-        if (
-            lhs in self.depth_constraints.keys()
-            and depth_information[lhs] >= self.depth_constraints[lhs]
-        ):
-            productions = [
-                production
-                for production in self.productions(lhs=symbol)
-                if lhs
-                not in [str(sym) for sym in production.rhs() if not isinstance(sym, str)]
-            ]
-        else:
-            productions = self.productions(lhs=symbol)
-
-        if len(productions) == 0:
-            raise Exception(
-                "There can be no word sampled! This is due to the grammar and/or constraints."
-            )
-
-        # sample
-        production = choice(productions)
-        for sym in production.rhs():
-            if isinstance(sym, str):
-                # if terminal then add string to sequence
-                tree = tree + " " + sym
-            else:
-                tree = (
-                    tree
-                    + " "
-                    + self._depth_constrained_sampler(sym, depth_information)
-                    + ")"
-                )
-        depth_information[lhs] -= 1
-        return tree
-
-    def mutate(
-        self, parent: str, subtree_index: int, subtree_node: str, patience: int = 50
-    ) -> str:
-        # chop out subtree
-        pre, _, post = self.remove_subtree(parent, subtree_index)
-        _patience = patience
-        while _patience > 0:
-            # only sample subtree -> avoids full sampling of large parse trees
-            depth_information = self._compute_depth_information_for_pre(pre)
-            new_subtree = self.sampler(
-                1, start_symbol=subtree_node, depth_information=depth_information
-            )[0]
-            child = pre + new_subtree + post
-            if parent != child:  # ensure that parent is really mutated
-                break
-            _patience -= 1
-        child = self._remove_empty_spaces(child)
-        return child
-
-    def crossover(
-        self,
-        parent1: str,
-        parent2: str,
-        patience: int = 50,
-        return_crossover_subtrees: bool = False,
-    ):
-        # randomly swap subtrees in two trees
-        # if no suitiable subtree exists then return False
-        subtree_node, subtree_index = self.rand_subtree(parent1)
-        # chop out subtree
-        pre, sub, post = self.remove_subtree(parent1, subtree_index)
-        head_node_depth = self._compute_depth_information_for_pre(pre)[subtree_node] + 1
-        sub_depth = self._compute_max_depth(sub, subtree_node)
-        _patience = patience
-        while _patience > 0:
-            # sample subtree from donor
-            donor_subtree_index = self._rand_subtree_fixed_head(
-                parent2, subtree_node, head_node_depth, sub_depth=sub_depth
-            )
-            # if no subtrees with right head node return False
-            if not donor_subtree_index:
-                _patience -= 1
-            else:
-                donor_pre, donor_sub, donor_post = self.remove_subtree(
-                    parent2, donor_subtree_index
-                )
-                # return the two new tree
-                child1 = pre + donor_sub + post
-                child2 = donor_pre + sub + donor_post
-                child1 = self._remove_empty_spaces(child1)
-                child2 = self._remove_empty_spaces(child2)
-
-                if return_crossover_subtrees:
-                    return (
-                        child1,
-                        child2,
-                        (pre, sub, post),
-                        (donor_pre, donor_sub, donor_post),
-                    )
-                return child1, child2
-
-        return False, False
-
-    def _rand_subtree_fixed_head(
-        self,
-        tree: str,
-        head_node: str,
-        head_node_depth: int = 0,
-        sub_depth: int = 0,
-    ) -> int:
-        # helper function to choose a random subtree from a given tree with a specific head node
-        # if no such subtree then return False, otherwise return the index of the subtree
-
-        # single pass through tree (stored as string) to look for the location of swappable_non_terminmals
-        if head_node in self.depth_constraints:
-            depth_information, subtree_depth = self._compute_depth_information(tree)
-            split_tree = tree.split(" ")
-            swappable_indices = [
-                i
-                for i in range(len(split_tree))
-                if split_tree[i][1:] == head_node
-                and head_node_depth - 1 + subtree_depth[i]
-                <= self.depth_constraints[head_node]
-                and depth_information[i] - 1 + sub_depth
-                <= self.depth_constraints[head_node]
-            ]
-        else:
-            swappable_indices = None
-        return super().rand_subtree_fixed_head(
-            tree=tree, head_node=head_node, swappable_indices=swappable_indices
-        )
-
-
-# helper function for quickly getting a single sample from multinomial with probs
-def choice(options, probs=None):
-    x = np.random.rand()
-    if probs is None:
-        # then uniform probs
-        num = len(options)
-        probs = [1 / num] * num
-    cum = 0
-    choice = -1
-    for i, p in enumerate(probs):
-        cum += p
-        if x < cum:
-            choice = i
-            break
-    return options[choice]
diff --git a/neps/search_spaces/architecture/cfg_variants/cfg_resolution.py b/neps/search_spaces/architecture/cfg_variants/cfg_resolution.py
deleted file mode 100644
index 5bc8fb5e2..000000000
--- a/neps/search_spaces/architecture/cfg_variants/cfg_resolution.py
+++ /dev/null
@@ -1,385 +0,0 @@
-from collections import deque
-from typing import Deque
-
-import networkx as nx
-import numpy as np
-from nltk.grammar import Nonterminal
-
-from ..cfg import Grammar, choice
-
-
-class ResolutionGrammar(Grammar):
-    def __init__(self, *args, **kwargs):
-        super().__init__(*args, **kwargs)
-        self.n_downsamples = None
-        self.terminal_to_graph_map = None
-        self.downsampling_lhs = None
-        self.downsample_terminal = None
-        self.depth_constraints = None
-
-    def set_resolution_constraints(
-        self,
-        n_downsamples: int,
-        terminal_to_graph: dict,
-        downsampling_lhs: str,
-        downsample_terminal: str = "downsample",
-        depth_constraints: dict = None,
-    ):
-        self.n_downsamples = n_downsamples
-
-        terminal_to_graph_map: dict = {}
-        for k, v in terminal_to_graph.items():
-            terminal_to_graph_map[k] = {}
-            terminal_to_graph_map[k]["edge_list"] = v
-
-            G = nx.DiGraph()
-            G.add_edges_from(v)
-            src = [n for n, d in G.in_degree() if d == 0][0]
-            tgt = [n for n, d in G.out_degree() if d == 0][0]
-            terminal_to_graph_map[k]["paths"] = {
-                k: [] for k in range(1, nx.dag_longest_path_length(G) + 1)
-            }
-            for path in nx.all_simple_edge_paths(G, source=src, target=tgt):
-                terminal_to_graph_map[k]["paths"][len(path)].append(path[::-1])
-
-        self.terminal_to_graph_map = terminal_to_graph_map
-
-        self.downsampling_lhs = downsampling_lhs
-        self.swappable_nonterminals.remove(self.downsampling_lhs)
-
-        self.downsample_terminal = downsample_terminal
-
-        if depth_constraints is not None:
-            self.depth_constraints = depth_constraints
-            if not all(k in self.nonterminals for k in self.depth_constraints.keys()):
-                raise Exception(
-                    f"Nonterminal {set(self.depth_constraints.keys())-set(self.nonterminals)} does not exist in grammar"
-                )
-        else:
-            self.depth_constraints = {}
-
-    @staticmethod
-    def is_resolution_constrained():
-        return True
-
-    def sampler(
-        self,
-        n=1,
-        start_symbol: str = None,
-        n_downsamples: int = None,
-        depth_information: dict = None,
-    ):
-        if start_symbol is None:
-            start_symbol = self.start()
-        else:
-            start_symbol = Nonterminal(start_symbol)
-
-        if depth_information is None:
-            depth_information = {}
-        if n_downsamples is None:
-            n_downsamples = self.n_downsamples
-        return [
-            f"{self._resolution_constrained_sampler(symbol=start_symbol, n_downsamples=n_downsamples, depth_information=depth_information)})"
-            for _ in range(n)
-        ]
-
-    def _compute_depth_information_for_pre(self, tree: str) -> dict:
-        depth_information = {nt: 0 for nt in self.nonterminals}
-        q_nonterminals: Deque = deque()
-        for split in tree.split(" "):
-            if split == "":
-                continue
-            elif split[0] == "(":
-                q_nonterminals.append(split[1:])
-                depth_information[split[1:]] += 1
-                continue
-            while split[-1] == ")":
-                nt = q_nonterminals.pop()
-                depth_information[nt] -= 1
-                split = split[:-1]
-        return depth_information
-
-    def _compute_depth_information(self, tree: str) -> tuple:
-        split_tree = tree.split(" ")
-        depth_information = [0] * len(split_tree)
-        subtree_depth = [0] * len(split_tree)
-        helper_subtree_depth = [0] * len(split_tree)
-        helper_dict_depth_information = {nt: 0 for nt in self.nonterminals}
-        helper_dict_subtree_depth: dict = {nt: deque() for nt in self.nonterminals}
-        q_nonterminals: Deque = deque()
-        for i, split in enumerate(split_tree):
-            if split == "":
-                continue
-            elif split[0] == "(":
-                nt = split[1:]
-                q_nonterminals.append(nt)
-                depth_information[i] = helper_dict_depth_information[nt] + 1
-                helper_dict_depth_information[nt] += 1
-                helper_dict_subtree_depth[nt].append(i)
-                for j in helper_dict_subtree_depth[nt]:
-                    subtree_depth[j] = max(subtree_depth[j], helper_subtree_depth[j] + 1)
-                    helper_subtree_depth[j] += 1
-                continue
-            while split[-1] == ")":
-                nt = q_nonterminals.pop()
-                helper_dict_depth_information[nt] -= 1
-                for j in helper_dict_subtree_depth[nt]:
-                    helper_subtree_depth[j] -= 1
-                _ = helper_dict_subtree_depth[nt].pop()
-                split = split[:-1]
-        return depth_information, subtree_depth
-
-    def _compute_max_depth(self, tree: str, subtree_node: str) -> int:
-        max_depth = 0
-        depth_information = {nt: 0 for nt in self.nonterminals}
-        q_nonterminals: Deque = deque()
-        for split in tree.split(" "):
-            if split == "":
-                continue
-            elif split[0] == "(":
-                q_nonterminals.append(split[1:])
-                depth_information[split[1:]] += 1
-                if split[1:] == subtree_node and depth_information[split[1:]] > max_depth:
-                    max_depth = depth_information[split[1:]]
-                continue
-            while split[-1] == ")":
-                nt = q_nonterminals.pop()
-                depth_information[nt] -= 1
-                split = split[:-1]
-        return max_depth
-
-    @staticmethod
-    def assign_downsamples(edge_list, paths, n_downsamples):
-        if n_downsamples == 0:
-            return [0] * len(edge_list)
-        edge_list_to_downsamples = {e: 0 for e in edge_list}
-
-        if max(paths.keys()) >= n_downsamples:
-            for path in paths[n_downsamples]:
-                for e in path:
-                    edge_list_to_downsamples[e] = 1
-
-        for k in reversed(sorted(paths.keys())):
-            k_paths = paths[k]
-            if len(k_paths) == 0 or k == n_downsamples:
-                continue
-            tmp_indices = list(range(len(k_paths)))
-            np.random.shuffle(tmp_indices)
-            for idx in tmp_indices:
-                path = k_paths[idx]
-                already_set_n_downsamples = sum(edge_list_to_downsamples[e] for e in path)
-                if already_set_n_downsamples == n_downsamples:
-                    continue
-                _path = [e for e in path if edge_list_to_downsamples[e] == 0]
-
-                _n_downsamples = n_downsamples - already_set_n_downsamples
-                if len(_path) == 1:
-                    edge_list_to_downsamples[path[0]] = _n_downsamples
-                elif len(_path) < _n_downsamples:
-                    indices = np.random.choice(
-                        list(range(len(_path))),
-                        size=n_downsamples // len(_path),
-                        replace=False,
-                    )
-                    for i, e in enumerate(_path):
-                        edge_list_to_downsamples[e] = (
-                            n_downsamples // len(_path) + 1
-                            if i in indices
-                            else n_downsamples // len(_path)
-                        )
-                else:
-                    indices = np.random.choice(
-                        list(range(len(_path))),
-                        size=_n_downsamples,
-                        replace=False,
-                    )
-                    for i in indices:
-                        edge_list_to_downsamples[_path[i]] = 1
-
-        return [edge_list_to_downsamples[e] for e in edge_list]
-
-    def _resolution_constrained_sampler(
-        self, symbol=None, n_downsamples: int = 0, depth_information: dict = None
-    ):
-        if depth_information is None:
-            depth_information = {}
-
-        # init the sequence
-        tree = "(" + str(symbol)
-
-        lhs = str(symbol)
-        if lhs in depth_information.keys():
-            depth_information[lhs] += 1
-        else:
-            depth_information[lhs] = 1
-
-        # collect possible productions from the starting symbol & filter if constraints are violated
-        if lhs == self.downsampling_lhs:
-            productions = [
-                production
-                for production in self.productions(lhs=symbol)
-                if sum(str(x) == self.downsample_terminal for x in production.rhs())
-                == n_downsamples
-            ]
-        elif (
-            lhs in self.depth_constraints.keys()
-            and depth_information[lhs] < self.depth_constraints[lhs]["min"]["number"]
-        ):
-            productions = [
-                production
-                for production in self.productions(lhs=symbol)
-                if not (
-                    len(production.rhs()) == 1
-                    and str(production.rhs()[0])
-                    in self.depth_constraints[lhs]["min"]["exclude_rhs"]
-                )
-            ]
-        elif (
-            lhs in self.depth_constraints.keys()
-            and depth_information[lhs] >= self.depth_constraints[lhs]["max"]["number"]
-        ):
-            productions = [
-                production
-                for production in self.productions(lhs=symbol)
-                if lhs
-                not in [str(sym) for sym in production.rhs() if not isinstance(sym, str)]
-            ]
-        else:
-            productions = self.productions(lhs=symbol)
-
-        if len(productions) == 0:
-            raise Exception(
-                "There can be no word sampled! This is due to the grammar and/or constraints."
-            )
-
-        # sample
-        production = choice(productions)
-        n_downsamples_per_edge = []
-        counter = 0
-        for sym in production.rhs():
-            if isinstance(sym, str):
-                tree = tree + " " + sym
-                if sym in self.terminal_to_graph_map.keys():
-                    n_downsamples_per_edge = self.assign_downsamples(
-                        self.terminal_to_graph_map[sym]["edge_list"],
-                        self.terminal_to_graph_map[sym]["paths"],
-                        n_downsamples,
-                    )
-            else:
-                if counter < len(n_downsamples_per_edge):
-                    _n_downsamples = n_downsamples_per_edge[counter]
-                elif (
-                    len(production.rhs()) == 1
-                    and str(production.rhs()[0]) == self.downsampling_lhs
-                ):
-                    _n_downsamples = n_downsamples
-                else:
-                    _n_downsamples = 0
-                tree = (
-                    tree
-                    + " "
-                    + self._resolution_constrained_sampler(
-                        sym,
-                        n_downsamples=_n_downsamples,
-                        depth_information=depth_information,
-                    )
-                    + ")"
-                )
-                counter += 1
-
-        depth_information[lhs] -= 1
-        return tree
-
-    def mutate(
-        self, parent: str, subtree_index: int, subtree_node: str, patience: int = 50
-    ) -> str:
-        # chop out subtree
-        pre, _, post = self.remove_subtree(parent, subtree_index)
-        _patience = patience
-        while _patience > 0:
-            # only sample subtree -> avoids full sampling of large parse trees
-            depth_information = self._compute_depth_information_for_pre(pre)
-            new_subtree = self.sampler(
-                1, start_symbol=subtree_node, depth_information=depth_information
-            )[0]
-            child = pre + new_subtree + post
-            if parent != child:  # ensure that parent is really mutated
-                break
-            _patience -= 1
-        child = self._remove_empty_spaces(child)
-        return child
-
-    def crossover(
-        self,
-        parent1: str,
-        parent2: str,
-        patience: int = 50,
-        return_crossover_subtrees: bool = False,
-    ):
-        # randomly swap subtrees in two trees
-        # if no suitiable subtree exists then return False
-        subtree_node, subtree_index = self.rand_subtree(parent1)
-        # chop out subtree
-        pre, sub, post = self.remove_subtree(parent1, subtree_index)
-        head_node_depth = self._compute_depth_information_for_pre(pre)[subtree_node] + 1
-        sub_depth = self._compute_max_depth(sub, subtree_node)
-        _patience = patience
-        while _patience > 0:
-            # sample subtree from donor
-            donor_subtree_index = self._rand_subtree_fixed_head(
-                parent2, subtree_node, head_node_depth, sub_depth=sub_depth
-            )
-            # if no subtrees with right head node return False
-            if not donor_subtree_index:
-                _patience -= 1
-            else:
-                donor_pre, donor_sub, donor_post = self.remove_subtree(
-                    parent2, donor_subtree_index
-                )
-                # return the two new tree
-                child1 = pre + donor_sub + post
-                child2 = donor_pre + sub + donor_post
-
-                child1 = self._remove_empty_spaces(child1)
-                child2 = self._remove_empty_spaces(child2)
-
-                if return_crossover_subtrees:
-                    return (
-                        child1,
-                        child2,
-                        (pre, sub, post),
-                        (donor_pre, donor_sub, donor_post),
-                    )
-
-                return child1, child2
-
-        return False, False
-
-    def _rand_subtree_fixed_head(
-        self,
-        tree: str,
-        head_node: str,
-        head_node_depth: int = 0,
-        sub_depth: int = 0,
-    ) -> int:
-        # helper function to choose a random subtree from a given tree with a specific head node
-        # if no such subtree then return False, otherwise return the index of the subtree
-
-        # single pass through tree (stored as string) to look for the location of swappable_non_terminmals
-        if head_node in self.depth_constraints:
-            depth_information, subtree_depth = self._compute_depth_information(tree)
-            split_tree = tree.split(" ")
-            swappable_indices = [
-                i
-                for i in range(len(split_tree))
-                if split_tree[i][1:] == head_node
-                and head_node_depth - 1 + subtree_depth[i]
-                <= self.depth_constraints[head_node]
-                and depth_information[i] - 1 + sub_depth
-                <= self.depth_constraints[head_node]
-            ]
-        else:
-            swappable_indices = None
-        return super().rand_subtree_fixed_head(
-            tree=tree, head_node=head_node, swappable_indices=swappable_indices
-        )
diff --git a/neps/search_spaces/architecture/cfg_variants/constrained_cfg.py b/neps/search_spaces/architecture/cfg_variants/constrained_cfg.py
index a79ce212d..aa1e05eac 100644
--- a/neps/search_spaces/architecture/cfg_variants/constrained_cfg.py
+++ b/neps/search_spaces/architecture/cfg_variants/constrained_cfg.py
@@ -1,21 +1,18 @@
 from __future__ import annotations
 
-import itertools
 import math
 from collections import deque
-from copy import deepcopy
 from functools import partial
 from queue import LifoQueue
-from typing import Deque
 
 import numpy as np
 from nltk.grammar import Nonterminal
 
-from ..cfg import Grammar, choice
+from neps.search_spaces.architecture.cfg import Grammar
 
 
 class Constraint:
-    def __init__(self, current_derivation: str = None) -> None:
+    def __init__(self, current_derivation: str | None = None) -> None:
         self.current_derivation = current_derivation
 
     @staticmethod
@@ -28,9 +25,6 @@ def get_not_allowed_productions(self, productions: str) -> list[bool] | bool:
     def update_context(self, new_part: str) -> None:
         raise NotImplementedError
 
-    def get_all_potential_productions(self, production) -> list:
-        raise NotImplementedError
-
     def mutate_not_allowed_productions(
         self, nonterminal: str, before: str, after: str, possible_productions: list
     ) -> list:
@@ -38,22 +32,18 @@ def mutate_not_allowed_productions(
 
 
 class ConstrainedGrammar(Grammar):
+    def set_constraints(self, constraints: dict, none_operation: str | None = None):
+        self.constraints = constraints
+        self.none_operation = none_operation
+        self.constraint_is_class = isinstance(self.constraints, Constraint)
+
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
         self.constraints = None
         self.none_operation = None
         self.constraint_is_class: bool = False
 
-        self._prior: dict = None
-
-    def set_constraints(self, constraints: dict, none_operation: str = None):
-        self.constraints = constraints
-        self.none_operation = none_operation
-        self.constraint_is_class = isinstance(self.constraints, Constraint)
-
-    @staticmethod
-    def is_constrained():
-        return True
+        self._prior: dict | None = None
 
     @property
     def prior(self):
@@ -85,14 +75,11 @@ def _check_prior(value: dict):
     def sampler(  # type: ignore[override]
         self,
         n=1,
-        start_symbol: str = None,
+        start_symbol: str | None = None,
         not_allowed_productions=None,
         user_priors: bool = False,
     ):
-        if start_symbol is None:
-            start_symbol = self.start()
-        else:
-            start_symbol = Nonterminal(start_symbol)
+        start_symbol = self.start() if start_symbol is None else Nonterminal(start_symbol)
 
         return [
             self._constrained_sampler(
@@ -130,7 +117,7 @@ def _constrained_sampler(
         # init the sequence
         tree = "(" + str(symbol)
         # collect possible productions from the starting symbol
-        productions = self.productions(lhs=symbol)
+        productions = list(self.productions(lhs=symbol))
         if not_allowed_productions is not None and len(not_allowed_productions) > 0:
             productions = list(
                 filter(
@@ -154,12 +141,12 @@ def _constrained_sampler(
                 probs = [p for i, p in enumerate(probs) if i not in not_allowed_indices]
                 # rescale s.t. probs sum up to one
                 cur_prob_sum = sum(probs)
-                probs = list(map(lambda x: x / cur_prob_sum, probs))
+                probs = [x / cur_prob_sum for x in probs]
             assert len(probs) == len(productions)
 
-            production = choice(productions, probs=probs)
+            production = np.random.choice(productions, p=probs)
         else:
-            production = choice(productions)
+            production = np.random.choice(productions)
         counter = 0
         if self.constraint_is_class:
             constraints = self.constraints.initialize_constraints(production.rhs()[0])
@@ -216,9 +203,7 @@ def skip_char(char: str) -> bool:
                 and string_tree[i + 1] == " "
             ):
                 return False
-            if char == "(":
-                return True
-            return False
+            return char == "("
 
         def find_longest_match(
             i: int, string_tree: str, symbols: list, max_match: int
@@ -253,7 +238,7 @@ def find_longest_match(
             char = string_tree[i]
             if skip_char(char):
                 pass
-            elif char == ")" and not string_tree[i - 1] == " ":
+            elif char == ")" and string_tree[i - 1] != " ":
                 # closing symbol of production
                 production = q_production_rules.get(block=False)[0][0]
                 idx = self.productions(production.lhs()).index(production)
@@ -264,9 +249,9 @@ def find_longest_match(
                 ):
                     outer_production = q_production_rules.queue[-1][0][0]
                     if len(q_production_rules.queue) not in current_derivations:
-                        current_derivations[
-                            len(q_production_rules.queue)
-                        ] = self.constraints(outer_production.rhs()[0])
+                        current_derivations[len(q_production_rules.queue)] = (
+                            self.constraints(outer_production.rhs()[0])
+                        )
                     context_information = self.constraints(
                         outer_production.rhs()[0],
                         current_derivations[len(q_production_rules.queue)],
@@ -303,7 +288,7 @@ def find_longest_match(
                         ]
                         # rescale s.t. prior sum up to one
                         cur_prob_sum = sum(prior)
-                        prior = list(map(lambda x: x / cur_prob_sum, prior))
+                        prior = [x / cur_prob_sum for x in prior]
                         idx -= sum(idx > i for i in not_allowed_indices)
 
                 prior = prior[idx]
@@ -343,7 +328,7 @@ def find_longest_match(
         return prior_prob
 
     def _compute_current_context(self, pre_subtree: str, post_subtree: str):
-        q_nonterminals: Deque = deque()
+        q_nonterminals: deque = deque()
         for sym in pre_subtree.split(" "):
             if sym == "":
                 continue
@@ -371,7 +356,7 @@ def _compute_current_context(self, pre_subtree: str, post_subtree: str):
         if len(productions) == 0:
             raise Exception("Cannot find corresponding production!")
 
-        q_context: Deque = deque()
+        q_context: deque = deque()
         current_derivation = []
         rhs_counter = 0
         tmp_str = ""
@@ -399,7 +384,7 @@ def _compute_current_context(self, pre_subtree: str, post_subtree: str):
                     q_context.pop()
                     s = s[:-1]
                 if len(q_context) == 0:
-                    tmp_str = self._remove_empty_spaces(tmp_str)
+                    tmp_str = tmp_str.strip()
                     current_derivation.append(tmp_str)
                     if len(productions) == 1 and len(current_derivation) == len(
                         self.constraints(productions[0].rhs()[0])
@@ -441,7 +426,7 @@ def _compute_current_context(self, pre_subtree: str, post_subtree: str):
                             q_context.pop()
                         s = s[:-1]
                     if len(q_context) == 0:
-                        tmp_str = self._remove_empty_spaces(tmp_str)
+                        tmp_str = tmp_str.strip()
                         current_derivation.append(tmp_str)
                         if len(productions) == 1 and len(current_derivation) == len(
                             self.constraints(productions[0].rhs()[0])
@@ -476,9 +461,9 @@ def mutate(
                     not_allowed_productions = self._get_not_allowed_productions(
                         self.productions(lhs=Nonterminal(subtree_node)),
                         context_information[
-                            [i for i, cd in enumerate(current_derivation) if cd is None][
-                                0
-                            ]
+                            next(
+                                i for i, cd in enumerate(current_derivation) if cd is None
+                            )
                         ],
                     )
                 elif isinstance(context_information, bool):
@@ -508,248 +493,4 @@ def mutate(
             ):
                 break
             _patience -= 1
-        child = self._remove_empty_spaces(child)
-        return child
-
-    def crossover(
-        self,
-        parent1: str,
-        parent2: str,
-        patience: int = 50,
-        return_crossover_subtrees: bool = False,
-    ):
-        if self.constraint_is_class:
-            raise NotImplementedError
-        _patience = patience
-        while _patience > 0:
-            subtree_node, subtree_index = self.rand_subtree(parent1)
-            # chop out subtree
-            pre, sub, post = self.remove_subtree(parent1, subtree_index)
-            rhs, current_derivation = self._compute_current_context(pre, post)
-            context_information = self.constraints(
-                rhs,
-                current_derivation,
-            )
-            if isinstance(context_information, list):
-                parent1_not_allowed_productions = self._get_not_allowed_productions(
-                    self.productions(lhs=Nonterminal(subtree_node)),
-                    context_information[
-                        [i for i, cd in enumerate(current_derivation) if cd is None][0]
-                    ],
-                )
-            elif isinstance(context_information, bool):
-                parent1_not_allowed_productions = self._get_not_allowed_productions(
-                    self.productions(lhs=Nonterminal(subtree_node)), context_information
-                )
-            else:
-                raise NotImplementedError
-            first_try = True
-            while first_try or _patience % 10 != 0:
-                first_try = False
-                # sample subtree from donor
-                donor_subtree_index = self.rand_subtree_fixed_head(parent2, subtree_node)
-                # if no subtrees with right head node return False
-                if not donor_subtree_index:
-                    _patience -= 1
-                else:
-                    donor_pre, donor_sub, donor_post = self.remove_subtree(
-                        parent2, donor_subtree_index
-                    )
-                    if sub == donor_sub:  # ensure that there is really a crossover
-                        _patience -= 1
-                        continue
-                    rhs, current_derivation = self._compute_current_context(
-                        donor_pre, donor_post
-                    )
-                    context_information = self.constraints(
-                        rhs,
-                        current_derivation,
-                    )
-                    if isinstance(context_information, list):
-                        parent2_not_allowed_productions = (
-                            self._get_not_allowed_productions(
-                                self.productions(lhs=Nonterminal(subtree_node)),
-                                context_information[
-                                    [
-                                        i
-                                        for i, cd in enumerate(current_derivation)
-                                        if cd is None
-                                    ][0]
-                                ],
-                            )
-                        )
-                    elif isinstance(context_information, bool):
-                        parent2_not_allowed_productions = (
-                            self._get_not_allowed_productions(
-                                self.productions(lhs=Nonterminal(subtree_node)),
-                                context_information,
-                            )
-                        )
-                    else:
-                        raise NotImplementedError
-                    if (
-                        any(
-                            prod.rhs()[0] == "zero"
-                            for prod in parent1_not_allowed_productions
-                        )
-                        and "zero" in donor_sub
-                        and donor_sub.count("(") == 1
-                        and donor_sub.count(")") == 1
-                    ):
-                        _patience -= 1
-                        continue
-                    if (
-                        any(
-                            prod.rhs()[0] == "zero"
-                            for prod in parent2_not_allowed_productions
-                        )
-                        and "zero" in sub
-                        and sub.count("(") == 1
-                        and sub.count(")") == 1
-                    ):
-                        _patience -= 1
-                        continue
-                    # return the two new tree
-                    child1 = pre + donor_sub + post
-                    child2 = donor_pre + sub + donor_post
-                    child1 = self._remove_empty_spaces(child1)
-                    child2 = self._remove_empty_spaces(child2)
-
-                    if return_crossover_subtrees:
-                        return (
-                            child1,
-                            child2,
-                            (pre, sub, post),
-                            (donor_pre, donor_sub, donor_post),
-                        )
-                    return child1, child2
-
-        raise Exception("Cannot do crossover")
-
-    @property
-    def compute_space_size(self) -> int:
-        """Computes the size of the space described by the grammar.
-
-        Args:
-            primitive_nonterminal (str, optional): The primitive nonterminal of the grammar. Defaults to "OPS".
-
-        Returns:
-            int: size of space described by grammar.
-        """
-
-        def recursive_worker(nonterminal: Nonterminal, memory_bank: dict = None) -> int:
-            def _get_all_variants(production):
-                variants = [production]
-                nonterminals = [
-                    i
-                    for i, sym in enumerate(production.rhs())
-                    if isinstance(sym, Nonterminal)
-                ]
-                max_zero_op = len(nonterminals)
-                for n_zero_op in range(1, max_zero_op):
-                    for zero_combination in itertools.combinations(
-                        nonterminals, n_zero_op
-                    ):
-                        current_derivation = self.constraints(production.rhs()[0])
-                        counter = 0
-                        valid_production = True
-                        for i, sym in enumerate(production.rhs()):
-                            if not isinstance(sym, str):
-                                context_information = self.constraints(
-                                    production.rhs()[0],
-                                    current_derivation,
-                                )
-                                if isinstance(context_information, list):
-                                    not_allowed_productions = (
-                                        self._get_not_allowed_productions(
-                                            self.productions(lhs=sym),
-                                            context_information[counter],
-                                        )
-                                    )
-                                elif isinstance(context_information, bool):
-                                    not_allowed_productions = (
-                                        self._get_not_allowed_productions(
-                                            self.productions(lhs=sym),
-                                            context_information,
-                                        )
-                                    )
-                                else:
-                                    raise NotImplementedError
-                                if (
-                                    i in zero_combination
-                                    and len(not_allowed_productions) > 0
-                                ):
-                                    valid_production = False
-                                    break
-                                if i >= max(zero_combination):
-                                    break
-                                if i in zero_combination:
-                                    current_derivation[counter] = self.none_operation
-                                counter += 1
-                        if valid_production:
-                            new_production = deepcopy(production)
-                            rhs = list(new_production.rhs())
-                            new_production._rhs = tuple(
-                                self.none_operation if i in zero_combination else r
-                                for i, r in enumerate(rhs)
-                            )
-                            variants.append(new_production)
-                return variants
-
-            if memory_bank is None:
-                memory_bank = {}
-
-            _potential_productions = self.productions(lhs=nonterminal)
-            potential_productions = []
-            for potential_production in _potential_productions:
-                nonterminals = list(
-                    {
-                        sym
-                        for sym in potential_production.rhs()
-                        if isinstance(sym, Nonterminal)
-                    }
-                )
-                if self.constraint_is_class:
-                    potential_productions += (
-                        self.constraints.get_all_potential_productions(
-                            potential_production
-                        )
-                    )
-                else:
-                    if any(
-                        production.rhs()[0] == self.none_operation
-                        for nonterminal in nonterminals
-                        for production in self.productions(nonterminal)
-                    ):
-                        potential_productions += _get_all_variants(potential_production)
-                    elif not (
-                        len(potential_production.rhs()) == 1
-                        and potential_production.rhs()[0] == self.none_operation
-                    ):
-                        potential_productions.append(potential_production)
-            _possibilites = 0
-            for potential_production in potential_productions:
-                nonterminals = [
-                    rhs_sym
-                    for rhs_sym in potential_production.rhs()
-                    if isinstance(rhs_sym, Nonterminal)
-                ]
-                possibilities_per_edge = [
-                    memory_bank[str(e_nonterminal)]
-                    if str(e_nonterminal) in memory_bank.keys()
-                    else recursive_worker(e_nonterminal, memory_bank)
-                    for e_nonterminal in nonterminals
-                ]
-                memory_bank.update(
-                    {
-                        str(e_nonterminal): possibilities_per_edge[i]
-                        for i, e_nonterminal in enumerate(nonterminals)
-                    }
-                )
-                product = 1
-                for p in possibilities_per_edge:
-                    product *= p
-                _possibilites += product
-            return _possibilites
-
-        return recursive_worker(self.start())
+        return child.strip()
diff --git a/neps/search_spaces/architecture/core_graph_grammar.py b/neps/search_spaces/architecture/core_graph_grammar.py
index 277ae9fcd..dfd39292a 100644
--- a/neps/search_spaces/architecture/core_graph_grammar.py
+++ b/neps/search_spaces/architecture/core_graph_grammar.py
@@ -1,12 +1,9 @@
-from __future__ import annotations
-
 import collections
 import inspect
 import queue
 from abc import abstractmethod
 from copy import deepcopy
 from functools import partial
-from typing import Deque
 
 import networkx as nx
 import numpy as np
@@ -48,13 +45,13 @@ def __init__(
         self,
         grammars: list[Grammar] | Grammar,
         terminal_to_op_names: dict,
-        terminal_to_graph_edges: dict = None,
+        terminal_to_graph_edges: dict | None = None,
         edge_attr: bool = True,
         edge_label: str = "op_name",
-        zero_op: list = None,
-        identity_op: list = None,
-        name: str = None,
-        scope: str = None,
+        zero_op: list | None = None,
+        identity_op: list | None = None,
+        name: str | None = None,
+        scope: str | None = None,
         return_all_subgraphs: bool = False,
         return_graph_per_hierarchy: bool = False,
     ):
@@ -90,12 +87,9 @@ def __init__(
         self.return_all_subgraphs = return_all_subgraphs
         self.return_graph_per_hierarchy = return_graph_per_hierarchy
 
-    def get_grammars(self) -> list[Grammar]:
-        return self.grammars
-
     def clear_graph(self):
         while len(self.nodes()) != 0:
-            self.remove_node(list(self.nodes())[0])
+            self.remove_node(next(iter(self.nodes())))
 
     @abstractmethod
     def id_to_string_tree(self, identifier: str):
@@ -118,656 +112,6 @@ def _check_graph(graph: nx.DiGraph):
         if len(graph) == 0 or graph.number_of_edges() == 0:
             raise ValueError("Invalid DAG")
 
-    def prune_tree(
-        self,
-        tree: nx.DiGraph,
-        terminal_to_torch_map_keys: collections.abc.KeysView,
-        node_label: str = "op_name",
-    ) -> nx.DiGraph:
-        """Prunes unnecessary parts of parse tree, i.e., only one child
-
-        Args:
-            tree (nx.DiGraph): Parse tree
-
-        Returns:
-            nx.DiGraph: Pruned parse tree
-        """
-
-        def dfs(visited: set, tree: nx.DiGraph, node: int) -> nx.DiGraph:
-            if node not in visited:
-                visited.add(node)
-
-                i = 0
-                while i < len(tree.nodes[node]["children"]):
-                    former_len = len(tree.nodes[node]["children"])
-                    child = tree.nodes[node]["children"][i]
-                    tree = dfs(
-                        visited,
-                        tree,
-                        child,
-                    )
-                    if former_len == len(tree.nodes[node]["children"]):
-                        i += 1
-
-                if len(tree.nodes[node]["children"]) == 1:
-                    predecessor = list(tree.pred[node])
-                    if len(predecessor) > 0:
-                        tree.add_edge(predecessor[0], tree.nodes[node]["children"][0])
-                        old_children = tree.nodes[predecessor[0]]["children"]
-                        idx = [i for i, c in enumerate(old_children) if c == node][0]
-                        tree.nodes[predecessor[0]]["children"] = (
-                            old_children[: idx + 1]
-                            + [tree.nodes[node]["children"][0]]
-                            + old_children[idx + 1 :]
-                        )
-                        tree.nodes[predecessor[0]]["children"].remove(node)
-
-                    tree.remove_node(node)
-                elif (
-                    tree.nodes[node]["terminal"]
-                    and tree.nodes[node][node_label] not in terminal_to_torch_map_keys
-                ):
-                    predecessor = list(tree.pred[node])[0]
-                    tree.nodes[predecessor]["children"].remove(node)
-                    tree.remove_node(node)
-            return tree
-
-        return dfs(set(), tree, self._find_root(tree))
-
-    @staticmethod
-    def _dfs_preorder_nodes(G: nx.DiGraph, source: str = None) -> list[int]:
-        """Generates nodes in DFS pre-ordering starting at source.
-        Note that after pruning we cannot reconstruct the associated string tree!
-
-        Args:
-            G (nx.DiGraph): NetworkX DAG
-            source (str, optional): Starting node for DFS. Defaults to None.
-
-        Returns:
-            generator: List of nodes in a DFS pre-ordering.
-        """
-        edges = nx.dfs_labeled_edges(G, source=source)
-        return list(v for _, v, d in edges if d == "forward")
-
-    @staticmethod
-    def _find_leafnodes(G):
-        leafnode = []
-        for i in G.nodes:
-            head = []
-            if nx.descendants(G, i) == set():  # find all leaf nodes
-                for a in nx.ancestors(G, i):  # get all ancestors for leaf node
-                    if (
-                        nx.ancestors(G, a) == set()
-                    ):  # Determine if ancestor is a head node
-                        head.append(a)
-            if len(head) == 1:  # if this leaf had only one head then append to leafnode
-                leafnode.append(i)
-        return leafnode
-
-    @staticmethod
-    def _get_neighbors_from_parse_tree(tree: nx.DiGraph, node: int) -> list[int]:
-        return tree.nodes[node]["children"]
-
-    @staticmethod
-    def _find_root(G):
-        return [n for n, d in G.in_degree() if d == 0][0]
-
-    @staticmethod
-    def _relabel_nodes(G: nx.DiGraph, mapping: dict) -> nx.DiGraph:
-        """Relabels the nodes and adjusts children list accordingly.
-
-        Args:
-            G (nx.DiGraph): graph to relabel
-            mapping (dict): node mapping
-
-        Returns:
-            nx.DiGraph: relabeled graph (copied)
-        """
-        # recreation of graph is faster
-        tree_relabeled = nx.DiGraph()
-        tree_relabeled.add_nodes_from(
-            [
-                (
-                    mapping[n[0]],
-                    {
-                        k: v if k != "children" else [mapping[_n] for _n in v]
-                        for k, v in n[1].items()
-                    },
-                )
-                for n in G.nodes(data=True)
-            ]
-        )
-        tree_relabeled.add_edges_from([(mapping[e[0]], mapping[e[1]]) for e in G.edges()])
-        return tree_relabeled
-
-    def assemble_trees(
-        self,
-        base_tree: str | nx.DiGraph,
-        motif_trees: list[str] | list[nx.DiGraph],
-        terminal_to_sublanguage_map: dict = None,
-        node_label: str = "op_name",
-    ) -> str | nx.DiGraph:
-        """Assembles the base parse tree with the motif parse trees
-
-        Args:
-            base_tree (nx.DiGraph): Base parse tree
-            motif_trees (List[nx.DiGraph]): List of motif parse trees
-            node_label (str, optional): node label key. Defaults to "op_name".
-
-        Returns:
-            nx.DiGraph: Assembled parse tree
-        """
-        if not all([isinstance(base_tree, type(tree)) for tree in motif_trees]):
-            raise ValueError("All trees must be of the same type!")
-        if isinstance(base_tree, str):
-            ensembled_tree_string = base_tree
-            if terminal_to_sublanguage_map is None:
-                raise NotImplementedError
-
-            for motif, replacement in zip(
-                terminal_to_sublanguage_map.keys(), motif_trees
-            ):
-                if motif in ensembled_tree_string:
-                    ensembled_tree_string = ensembled_tree_string.replace(
-                        motif, replacement
-                    )
-            return ensembled_tree_string
-        elif isinstance(base_tree, nx.DiGraph):
-            raise NotImplementedError
-            leafnodes = self._find_leafnodes(base_tree)
-            root_nodes = [self._find_root(G) for G in motif_trees]
-            root_op_names = np.array(
-                [
-                    motif_tree.nodes[root_node][node_label]
-                    for motif_tree, root_node in zip(motif_trees, root_nodes)
-                ]
-            )
-            largest_node_number = max(base_tree.nodes())
-            # ensembled_tree = base_tree.copy()
-            # recreation is slightly faster
-            ensembled_tree: nx.DiGraph = nx.DiGraph()
-            ensembled_tree.add_nodes_from(base_tree.nodes(data=True))
-            ensembled_tree.add_edges_from(base_tree.edges())
-            for leafnode in leafnodes:
-                idx = np.where(base_tree.nodes[leafnode][node_label] == root_op_names)[0]
-                if len(idx) == 0:
-                    continue
-                if len(idx) > 1:
-                    raise ValueError(
-                        "More than two similar terminal/start symbols are not supported!"
-                    )
-
-                tree = motif_trees[idx[0]]
-                # generate mapping
-                mapping = {
-                    n: n_new
-                    for n, n_new in zip(
-                        tree.nodes(),
-                        range(
-                            largest_node_number + 1,
-                            largest_node_number + 1 + len(tree),
-                        ),
-                    )
-                }
-                largest_node_number = largest_node_number + 1 + len(tree)
-                tree_relabeled = self._relabel_nodes(G=tree, mapping=mapping)
-
-                # compose trees
-                predecessor_in_base_tree = list(ensembled_tree.pred[leafnode])[0]
-                motif_tree_root_node = self._find_root(tree_relabeled)
-                successors_in_motif_tree = tree_relabeled.nodes[motif_tree_root_node][
-                    "children"
-                ]
-
-                # delete unnecessary edges
-                ensembled_tree.remove_node(leafnode)
-                tree_relabeled.remove_node(motif_tree_root_node)
-                # add new edges
-                tree_relabeled.add_node(predecessor_in_base_tree)
-                for n in successors_in_motif_tree:
-                    tree_relabeled.add_edge(predecessor_in_base_tree, n)
-
-                ensembled_tree.update(
-                    edges=tree_relabeled.edges(data=True),
-                    nodes=tree_relabeled.nodes(data=True),
-                )
-
-                idx = np.where(
-                    np.array(ensembled_tree.nodes[predecessor_in_base_tree]["children"])
-                    == leafnode
-                )[0][0]
-                old_children = ensembled_tree.nodes[predecessor_in_base_tree]["children"]
-                ensembled_tree.nodes[predecessor_in_base_tree]["children"] = (
-                    old_children[: idx + 1]
-                    + successors_in_motif_tree
-                    + old_children[idx + 1 :]
-                )
-                ensembled_tree.nodes[predecessor_in_base_tree]["children"].remove(
-                    leafnode
-                )
-            return ensembled_tree
-        else:
-            raise NotImplementedError(
-                f"Assembling of trees of type {type(base_tree)} is not supported!"
-            )
-
-    def build_graph_from_tree(
-        self,
-        tree: nx.DiGraph,
-        terminal_to_torch_map: dict,
-        node_label: str = "op_name",
-        flatten_graph: bool = True,
-        return_cell: bool = False,
-    ) -> None | Graph:
-        """Builds the computational graph from a parse tree.
-
-        Args:
-            tree (nx.DiGraph): parse tree.
-            terminal_to_torch_map (dict): Mapping from terminal symbols to primitives or topologies.
-            node_label (str, optional): Key to access terminal symbol. Defaults to "op_name".
-            return_cell (bool, optional): Whether to return a cell. Is only needed if cell is repeated multiple times.
-            Defaults to False.
-
-        Returns:
-            Tuple[Union[None, Graph]]: computational graph (self) or cell.
-        """
-
-        def _build_graph_from_tree(
-            visited: set,
-            tree: nx.DiGraph,
-            node: int,
-            terminal_to_torch_map: dict,
-            node_label: str,
-            is_primitive: bool = False,
-        ):
-            """Recursive DFS-esque function to build computational graph from parse tree
-
-            Args:
-                visited (set): set of visited nodes.
-                tree (nx.DiGraph): parse tree.
-                node (int): node index.
-                terminal_to_torch_map (dict): mapping from terminal symbols to primitives or topologies.
-                node_label (str): key to access operation name
-
-            Raises:
-                Exception: primitive or topology is unknown, i.e., it is probably missing in the terminal to
-                torch mapping
-                Exception: leftmost children can only be primitive, topology or have one child
-
-            Returns:
-                [type]: computational graph.
-            """
-            if node not in visited:
-                subgraphs = []
-                primitive_hps = []
-                if len(tree.out_edges(node)) == 0:
-                    if is_primitive:
-                        return tree.nodes[node][node_label]
-                    else:
-                        if (
-                            tree.nodes[node][node_label]
-                            not in terminal_to_torch_map.keys()
-                        ):
-                            raise Exception(
-                                f"Unknown primitive or topology: {tree.nodes[node][node_label]}"
-                            )
-                        return deepcopy(
-                            terminal_to_torch_map[tree.nodes[node][node_label]]
-                        )
-                if len(tree.out_edges(node)) == 1:
-                    return _build_graph_from_tree(
-                        visited,
-                        tree,
-                        list(tree.neighbors(node))[0],
-                        terminal_to_torch_map,
-                        node_label,
-                        is_primitive,
-                    )
-                # for idx, neighbor in enumerate(tree.neighbors(node)):
-                for idx, neighbor in enumerate(
-                    self._get_neighbors_from_parse_tree(tree, node)
-                ):
-                    if idx == 0:  # topology or primitive
-                        n = neighbor
-                        while not tree.nodes[n]["terminal"]:
-                            if len(tree.out_edges(n)) != 1:
-                                raise Exception(
-                                    "Leftmost Child can only be primitive, topology or recursively have one child!"
-                                )
-                            n = next(tree.neighbors(n))
-                        if is_primitive:
-                            primitive_hp_key = tree.nodes[n][node_label]
-                            primitive_hp_dict = {primitive_hp_key: None}
-                            is_primitive_op = True
-                        else:
-                            if (
-                                tree.nodes[n][node_label]
-                                not in terminal_to_torch_map.keys()
-                            ):
-                                raise Exception(
-                                    f"Unknown primitive or topology: {tree.nodes[n][node_label]}"
-                                )
-                            graph_el = terminal_to_torch_map[tree.nodes[n][node_label]]
-                            is_primitive_op = issubclass(
-                                graph_el.func
-                                if isinstance(graph_el, partial)
-                                else graph_el,
-                                AbstractPrimitive,
-                            )
-                    elif not tree.nodes[neighbor][
-                        "terminal"
-                    ]:  # exclude '[' ']' ... symbols
-                        if is_primitive:
-                            primitive_hp_dict[primitive_hp_key] = _build_graph_from_tree(
-                                visited,
-                                tree,
-                                neighbor,
-                                terminal_to_torch_map,
-                                node_label,
-                                is_primitive_op,
-                            )
-                        elif is_primitive_op:
-                            primitive_hps.append(
-                                _build_graph_from_tree(
-                                    visited,
-                                    tree,
-                                    neighbor,
-                                    terminal_to_torch_map,
-                                    node_label,
-                                    is_primitive_op,
-                                )
-                            )
-                        else:
-                            subgraphs.append(
-                                _build_graph_from_tree(
-                                    visited,
-                                    tree,
-                                    neighbor,
-                                    terminal_to_torch_map,
-                                    node_label,
-                                    is_primitive_op,
-                                )
-                            )
-                    elif (
-                        tree.nodes[neighbor][node_label] in terminal_to_torch_map.keys()
-                    ):  # exclude '[' ']' ... symbols
-                        # TODO check if there is a potential bug here?
-                        subgraphs.append(
-                            deepcopy(
-                                terminal_to_torch_map[tree.nodes[neighbor][node_label]]
-                            )
-                        )
-
-                if is_primitive:
-                    return primitive_hp_dict
-                elif is_primitive_op:
-                    return dict(
-                        collections.ChainMap(*([{"op": graph_el}] + primitive_hps))
-                    )
-                else:
-                    return graph_el(*subgraphs)
-
-        def _flatten_graph(
-            graph,
-            flattened_graph,
-            start_node: int = None,
-            end_node: int = None,
-        ):
-            nodes: dict = {}
-            for u, v, data in graph.edges(data=True):
-                if u in nodes.keys():
-                    _u = nodes[u]
-                else:
-                    _u = (
-                        1
-                        if len(flattened_graph.nodes.keys()) == 0
-                        else max(flattened_graph.nodes.keys()) + 1
-                    )
-                    _u = (
-                        start_node
-                        if graph.in_degree(u) == 0 and start_node is not None
-                        else _u
-                    )
-                    nodes[u] = _u
-                    if _u not in flattened_graph.nodes.keys():
-                        flattened_graph.add_node(_u)
-
-                if v in nodes.keys():
-                    _v = nodes[v]
-                else:
-                    _v = max(flattened_graph.nodes.keys()) + 1
-                    _v = (
-                        end_node
-                        if graph.out_degree(v) == 0 and end_node is not None
-                        else _v
-                    )
-                    nodes[v] = _v
-                    if _v not in flattened_graph.nodes.keys():
-                        flattened_graph.add_node(_v)
-
-                if isinstance(data["op"], Graph):
-                    flattened_graph = _flatten_graph(
-                        data["op"], flattened_graph, start_node=_u, end_node=_v
-                    )
-                else:
-                    flattened_graph.add_edge(_u, _v)
-                    flattened_graph.edges[_u, _v].update(data)
-
-            return flattened_graph
-
-        root_node = self._find_root(tree)
-        graph = _build_graph_from_tree(
-            set(), tree, root_node, terminal_to_torch_map, node_label
-        )
-        self._check_graph(graph)
-        if return_cell:
-            cell = (
-                _flatten_graph(graph, flattened_graph=Graph()) if flatten_graph else graph
-            )
-            return cell
-        else:
-            if flatten_graph:
-                _flatten_graph(graph, flattened_graph=self)
-            else:
-                self.add_edge(0, 1)
-                self.edges[0, 1].set("op", graph)
-            return None
-
-    def to_graph_repr(self, graph: Graph, edge_attr: bool) -> nx.DiGraph:
-        """Transforms NASLib-esque graph to NetworkX graph.
-
-        Args:
-            graph (Graph): NASLib-esque graph.
-            edge_attr (bool): Transform to edge attribution or node attribution.
-
-        Returns:
-            nx.DiGraph: edge- or node-attributed representation of computational graph.
-        """
-        if edge_attr:
-            g = nx.DiGraph()
-            g.add_nodes_from(graph.nodes())
-            for u, v in graph.edges():
-                if isinstance(graph.edges[u, v]["op"], Graph):
-                    g.add_edge(u, v, op_name=graph.edges[u, v]["op"].name)
-                else:
-                    g.add_edge(
-                        u, v, **{self.edge_label: graph.edges[u, v][self.edge_label]}
-                    )
-            g.graph_type = "edge_attr"
-        else:
-            g = nx.DiGraph()
-            src = [n for n in graph.nodes() if graph.in_degree(n) == 0][0]
-            tgt = [n for n in graph.nodes() if graph.out_degree(n) == 0][0]
-            nof_edges = graph.size()
-            g.add_nodes_from(
-                [
-                    (0, {self.edge_label: "input"}),
-                    (nof_edges + 1, {self.edge_label: "output"}),
-                ]
-            )
-            node_counter = 1
-            open_edge: dict = {}
-            for node in nx.topological_sort(graph):
-                for edge in graph.out_edges(node):
-                    g.add_node(
-                        node_counter,
-                        **{self.edge_label: graph.edges[edge][self.edge_label]},
-                    )
-
-                    u, v = edge
-                    if u == src:  # special case for input node
-                        g.add_edge(0, node_counter)
-                    if v == tgt:  # special case of output node
-                        g.add_edge(node_counter, nof_edges + 1)
-                    if (
-                        u in open_edge.keys()
-                    ):  # add edge between already seen nodes and new node
-                        for node_count in open_edge[u]:
-                            g.add_edge(node_count, node_counter)
-
-                    if v in open_edge.keys():
-                        open_edge[v].append(node_counter)
-                    else:
-                        open_edge[v] = [node_counter]
-                    node_counter += 1
-            g.graph_type = "node_attr"
-
-        self._check_graph(g)
-
-        return g
-
-    @staticmethod
-    def from_stringTree_to_nxTree(
-        string_tree: str, grammar: Grammar, sym_name: str = "op_name"
-    ) -> nx.DiGraph:
-        """Transforms a parse tree from string representation to NetworkX representation.
-
-        Args:
-            string_tree (str): parse tree.
-            grammar (Grammar): context-free grammar which generated the parse tree in string represenation.
-            sym_name (str, optional): Key to save the terminal symbols. Defaults to "op_name".
-
-        Returns:
-            nx.DiGraph: parse tree as NetworkX representation.
-        """
-
-        def skip_char(char: str) -> bool:
-            if char in [" ", "\t", "\n"]:
-                return True
-            # special case: "(" is (part of) a terminal
-            if (
-                i != 0
-                and char == "("
-                and string_tree[i - 1] == " "
-                and string_tree[i + 1] == " "
-            ):
-                return False
-            if char == "(":
-                return True
-            return False
-
-        def find_longest_match(
-            i: int, string_tree: str, symbols: list[str], max_match: int
-        ) -> int:
-            # search for longest matching symbol and add it
-            # assumes that the longest match is the true match
-            j = min(i + max_match, len(string_tree) - 1)
-            while j > i and j < len(string_tree):
-                if string_tree[i:j] in symbols:
-                    break
-                j -= 1
-            if j == i:
-                raise Exception(f"Terminal or nonterminal at position {i} does not exist")
-            return j
-
-        if isinstance(grammar, list) and len(grammar) > 1:
-            full_grammar = deepcopy(grammar[0])
-            rules = full_grammar.productions()
-            nonterminals = full_grammar.nonterminals
-            terminals = full_grammar.terminals
-            for g in grammar[1:]:
-                rules.extend(g.productions())
-                nonterminals.extend(g.nonterminals)
-                terminals.extend(g.terminals)
-            grammar = full_grammar
-            raise NotImplementedError("TODO check implementation")
-
-        symbols = grammar.nonterminals + grammar.terminals
-        max_match = max(map(len, symbols))
-        find_longest_match_func = partial(
-            find_longest_match,
-            string_tree=string_tree,
-            symbols=symbols,
-            max_match=max_match,
-        )
-
-        G = nx.DiGraph()
-        q: queue.LifoQueue = queue.LifoQueue()
-        q_children: queue.LifoQueue = queue.LifoQueue()
-        node_number = 0
-        i = 0
-        while i < len(string_tree):
-            char = string_tree[i]
-            if skip_char(char):
-                pass
-            elif char == ")" and not string_tree[i - 1] == " ":
-                # closing symbol of production
-                _node_number = q.get(block=False)
-                _node_children = q_children.get(block=False)
-                G.nodes[_node_number]["children"] = _node_children
-            else:
-                j = find_longest_match_func(i)
-                sym = string_tree[i:j]
-                i = j - 1
-                node_number += 1
-                G.add_node(
-                    node_number,
-                    **{
-                        sym_name: sym,
-                        "terminal": sym in grammar.terminals,
-                        "children": [],
-                    },
-                )
-                if not q.empty():
-                    G.add_edge(q.queue[-1], node_number)
-                    q_children.queue[-1].append(node_number)
-                if sym in grammar.nonterminals:
-                    q.put(node_number)
-                    q_children.put([])
-            i += 1
-
-        if len(q.queue) != 0:
-            raise Exception("Invalid string_tree")
-        return G
-
-    def from_nxTree_to_stringTree(
-        self, nxTree: nx.DiGraph, node_label: str = "op_name"
-    ) -> str:
-        """Transforms parse tree represented as NetworkX DAG to string representation.
-
-        Args:
-            nxTree (nx.DiGraph): parse tree.
-            node_label (str, optional): key to access operation names. Defaults to "op_name".
-
-        Returns:
-            str: parse tree represented as string.
-        """
-
-        def dfs(visited, graph, node):
-            if node not in visited:
-                visited.add(node)
-                if graph.nodes[node]["terminal"]:
-                    return f"{graph.nodes[node][node_label]}"
-                tmp_str = f"{f'({graph.nodes[node][node_label]}'}" + " "
-                # for neighbor in graph.neighbors(node):
-                for neighbor in self._get_neighbors_from_parse_tree(graph, node):
-                    tmp_str += dfs(visited, graph, neighbor) + " "
-                tmp_str = tmp_str[:-1] + ")"
-                return tmp_str
-            return ""
-
-        return dfs(set(), nxTree, node=self._find_root(nxTree))
-
     def update_op_names(self):
         # update op names
         for u, v in self.edges():
@@ -785,8 +129,8 @@ def from_stringTree_to_graph_repr(
         sym_name: str = "op_name",
         prune: bool = True,
         add_subtree_map: bool = False,
-        return_all_subgraphs: bool = None,
-        return_graph_per_hierarchy: bool = None,
+        return_all_subgraphs: bool | None = None,
+        return_graph_per_hierarchy: bool | None = None,
     ) -> nx.DiGraph | tuple[nx.DiGraph, collections.OrderedDict]:
         """Generates graph from parse tree in string representation.
         Note that we ignore primitive HPs!
@@ -821,17 +165,17 @@ def get_node_labels(graph: nx.DiGraph):
         def get_hierarchicy_dict(
             string_tree: str,
             subgraphs: dict,
-            hierarchy_dict: dict = None,
+            hierarchy_dict: dict | None = None,
             hierarchy_level_counter: int = 0,
         ):
             if hierarchy_dict is None:
                 hierarchy_dict = {}
-            if hierarchy_level_counter not in hierarchy_dict.keys():
+            if hierarchy_level_counter not in hierarchy_dict:
                 hierarchy_dict[hierarchy_level_counter] = []
             hierarchy_dict[hierarchy_level_counter].append(string_tree)
             node_labels = get_node_labels(subgraphs[string_tree])
             for _, node_label in node_labels:
-                if node_label in subgraphs.keys():
+                if node_label in subgraphs:
                     hierarchy_dict = get_hierarchicy_dict(
                         node_label, subgraphs, hierarchy_dict, hierarchy_level_counter + 1
                     )
@@ -916,15 +260,13 @@ def to_node_attributed_edge_list(
                 if v == tgt:
                     node_list.append((ni, 1))
 
-                for e_ in filter(
-                    lambda e: (e[1] == u), edge_list
-                ):
+                for e_ in filter(lambda e: (e[1] == u), edge_list):
                     node_list.append((edge_to_node_map[e_], ni))
 
             return node_list, edge_to_node_map
 
         def skip_char(char: str) -> bool:
-            return True if char in [" ", "\t", "\n", "[", "]"] else False
+            return char in [" ", "\t", "\n", "[", "]"]
 
         if prune:
             add_subtree_map = False
@@ -937,14 +279,14 @@ def skip_char(char: str) -> bool:
 
         G = nx.DiGraph()
         if add_subtree_map:
-            q_nonterminals: Deque = collections.deque()
+            q_nonterminals: collections.deque = collections.deque()
         if compute_subgraphs:
-            q_subtrees: Deque = collections.deque()
-            q_subgraphs: Deque = collections.deque()
+            q_subtrees: collections.deque = collections.deque()
+            q_subgraphs: collections.deque = collections.deque()
             subgraphs_dict = collections.OrderedDict()
         if edge_attr:
             node_offset = 0
-            q_el: Deque = collections.deque()  # edge-attr
+            q_el: collections.deque = collections.deque()  # edge-attr
             terminal_to_graph = self.terminal_to_graph_edges
         else:  # node-attributed
             G.add_node(0, **{sym_name: "input"})
@@ -1261,16 +603,18 @@ def get_graph_representation(
     ) -> nx.DiGraph:
         """This functions takes an identifier and constructs the
         (multi-variate) composition of the functions it describes.
+
         Args:
             identifier (str): identifier
             grammar (Grammar): grammar
             flatten_graph (bool, optional): Whether to flatten the graph. Defaults to True.
+
         Returns:
-            nx.DiGraph: (multi-variate) composition of functions
+            nx.DiGraph: (multi-variate) composition of functions.
         """
 
         def _skip_char(char: str) -> bool:
-            return True if char in [" ", "\t", "\n", "[", "]"] else False
+            return char in [" ", "\t", "\n", "[", "]"]
 
         def _get_sym_from_split(split: str) -> str:
             start_idx, end_idx = 0, len(split)
@@ -1298,9 +642,7 @@ def to_node_attributed_edge_list(
                 if v in tgt:
                     node_list.append((ni, v))
 
-                for e_ in filter(
-                    lambda e: (e[1] == u), edge_list
-                ):
+                for e_ in filter(lambda e: (e[1] == u), edge_list):
                     node_list.append((edge_to_node_map[e_], ni))
 
             return node_list, edge_to_node_map
@@ -1329,12 +671,11 @@ def to_node_attributed_edge_list(
 
             if sym in grammar.terminals:
                 is_topology = False
-                if inspect.isclass(self.terminal_to_op_names[sym]) and issubclass(
-                    self.terminal_to_op_names[sym], AbstractTopology
-                ):
-                    is_topology = True
-                elif isinstance(self.terminal_to_op_names[sym], partial) and issubclass(
-                    self.terminal_to_op_names[sym].func, AbstractTopology
+                if (
+                    inspect.isclass(self.terminal_to_op_names[sym])
+                    and issubclass(self.terminal_to_op_names[sym], AbstractTopology)
+                    or isinstance(self.terminal_to_op_names[sym], partial)
+                    and issubclass(self.terminal_to_op_names[sym].func, AbstractTopology)
                 ):
                     is_topology = True
 
@@ -1360,10 +701,7 @@ def to_node_attributed_edge_list(
                         if (
                             topology in terminal_to_graph
                             and terminal_to_graph[topology] is not None
-                        ):
-                            raise NotImplementedError
-                            # edges = terminal_to_graph[topology]
-                        elif isinstance(topology, partial):
+                        ) or isinstance(topology, partial):
                             raise NotImplementedError
                         else:
                             composed_function = topology(*primitives)
@@ -1435,27 +773,27 @@ def prune_graph(self, graph: nx.DiGraph | Graph = None, edge_attr: bool = True):
             graph.remove_edges_from(remove_edge_list)
         else:
             for n in list(nx.topological_sort(graph)):
-                if n in graph.nodes():
-                    if (
-                        graph.nodes[n]["op_name"] in self.zero_op
-                        or graph.nodes[n]["op_name"] in self.identity_op
-                    ):
-                        if graph.nodes[n]["op_name"] in self.identity_op:
-                            # reconnect edges for removed nodes with 'skip_connect'
-                            graph.add_edges_from(
-                                [
-                                    (e_i[0], e_o[1])
-                                    for e_i in graph.in_edges(n)
-                                    for e_o in graph.out_edges(n)
-                                ]
-                            )
-                        # remove nodes with 'skip_connect' or 'none' label
-                        graph.remove_node(n)
+                if n in graph.nodes() and (
+                    graph.nodes[n]["op_name"] in self.zero_op
+                    or graph.nodes[n]["op_name"] in self.identity_op
+                ):
+                    if graph.nodes[n]["op_name"] in self.identity_op:
+                        # reconnect edges for removed nodes with 'skip_connect'
+                        graph.add_edges_from(
+                            [
+                                (e_i[0], e_o[1])
+                                for e_i in graph.in_edges(n)
+                                for e_o in graph.out_edges(n)
+                            ]
+                        )
+                    # remove nodes with 'skip_connect' or 'none' label
+                    graph.remove_node(n)
 
         graph = self.prune_unconnected_parts(graph, src_node, tgt_node)
 
         if not use_self:
             return graph
+        return None
 
     @staticmethod
     def prune_unconnected_parts(graph, src_node, tgt_node):
@@ -1486,35 +824,18 @@ def _backtrack_remove(graph, node: int):
                     graph = _backtrack_remove(graph, n)
         return graph
 
-    def _sampler_maxMin(self, largest: bool = True) -> str | list[str]:
-        """Samples new parse tree(s) based on grammars.
-        Assumes that the first rule of each production leads to
-        smallest DAG and last to largest DAG!
-
-        Args:
-            largest (bool, optional): To find largest DAG, set to True. For smallest DAG set to False. Defaults to True.
-
-        Returns:
-            Union[str, List[str]]: Parse tree or list of parse trees
-        """
-        trees = [
-            grammar.sampler_maxMin_func(grammar.start(), largest) + ")"
-            for grammar in self.grammars
-        ]
-        return trees if len(trees) > 1 else trees[0]
-
     @staticmethod
     def flatten_graph(
         graph: nx.DiGraph,
         flattened_graph: Graph = None,
-        start_node: int = None,
-        end_node: int = None,
+        start_node: int | None = None,
+        end_node: int | None = None,
     ):
         if flattened_graph is None:
             flattened_graph = Graph()
         nodes: dict = {}
         for u, v, data in graph.edges(data=True):
-            if u in nodes.keys():
+            if u in nodes:
                 _u = nodes[u]
             else:
                 _u = (
@@ -1528,17 +849,17 @@ def flatten_graph(
                     else _u
                 )
                 nodes[u] = _u
-                if _u not in flattened_graph.nodes.keys():  # type: ignore[union-attr]
+                if _u not in flattened_graph.nodes:  # type: ignore[union-attr]
                     flattened_graph.add_node(_u)  # type: ignore[union-attr]
                     flattened_graph.nodes[_u].update(graph.nodes[u])  # type: ignore[union-attr]
 
-            if v in nodes.keys():
+            if v in nodes:
                 _v = nodes[v]
             else:
                 _v = max(flattened_graph.nodes.keys()) + 1  # type: ignore[union-attr]
                 _v = end_node if graph.out_degree(v) == 0 and end_node is not None else _v
                 nodes[v] = _v
-                if _v not in flattened_graph.nodes.keys():  # type: ignore[union-attr]
+                if _v not in flattened_graph.nodes:  # type: ignore[union-attr]
                     flattened_graph.add_node(_v)  # type: ignore[union-attr]
                 flattened_graph.nodes[_v].update(  # type: ignore[union-attr]
                     graph.nodes[v]
@@ -1587,7 +908,7 @@ def _compose_functions(
             char = descriptor[i]
             if skip_char(char, descriptor, i):
                 pass
-            elif char == ")" and not descriptor[i - 1] == " ":
+            elif char == ")" and descriptor[i - 1] != " ":
                 # closing symbol of production
                 if q_nonterminals.qsize() == q_topologies.qsize():
                     topology, number_of_primitives = q_topologies.get(block=False)
@@ -1606,14 +927,13 @@ def _compose_functions(
 
                 if sym in grammar.terminals and descriptor[i - 1] != "(":
                     is_topology = False
-                    if inspect.isclass(self.terminal_to_op_names[sym]) and issubclass(
-                        self.terminal_to_op_names[sym], AbstractTopology
-                    ):
-                        is_topology = True
-                    elif isinstance(
-                        self.terminal_to_op_names[sym], partial
-                    ) and issubclass(
-                        self.terminal_to_op_names[sym].func, AbstractTopology
+                    if (
+                        inspect.isclass(self.terminal_to_op_names[sym])
+                        and issubclass(self.terminal_to_op_names[sym], AbstractTopology)
+                        or isinstance(self.terminal_to_op_names[sym], partial)
+                        and issubclass(
+                            self.terminal_to_op_names[sym].func, AbstractTopology
+                        )
                     ):
                         is_topology = True
 
@@ -1640,7 +960,7 @@ def _compose_functions(
         return composed_function
 
     def graph_to_self(self, graph: nx.DiGraph, clear_self: bool = True) -> None:
-        """Copies graph to self
+        """Copies graph to self.
 
         Args:
             graph (nx.DiGraph): graph
@@ -1653,84 +973,6 @@ def graph_to_self(self, graph: nx.DiGraph, clear_self: bool = True) -> None:
         for n, data in graph.nodes(data=True):
             self.nodes[n].update(**data)
 
-    def _unparse_tree(
-        self, identifier: str, grammar: Grammar, as_composition: bool = True,
-    ):
-        descriptor = self.id_to_string_tree(identifier)
-
-        symbols = grammar.nonterminals + grammar.terminals
-        max_match = max(map(len, symbols))
-        find_longest_match_func = partial(
-            find_longest_match,
-            descriptor=descriptor,
-            symbols=symbols,
-            max_match=max_match,
-        )
-
-        q_nonterminals: queue.LifoQueue = queue.LifoQueue()
-        q_topologies: queue.LifoQueue = queue.LifoQueue()
-        q_primitives: queue.LifoQueue = queue.LifoQueue()
-        i = 0
-        while i < len(descriptor):
-            char = descriptor[i]
-            if skip_char(char, descriptor, i):
-                pass
-            elif char == ")" and not descriptor[i - 1] == " ":
-                # closing symbol of production
-                if q_nonterminals.qsize() == q_topologies.qsize():
-                    topology, number_of_primitives = q_topologies.get(block=False)
-                    primitives = [
-                        q_primitives.get(block=False) for _ in range(number_of_primitives)
-                    ][::-1]
-                    if as_composition:
-                        if topology == "Linear1":
-                            composed_function = primitives[0]
-                        else:
-                            composed_function = (
-                                topology + "(" + ", ".join(primitives) + ")"
-                            )
-                        # composed_function = topology + "(" + ", ".join(primitives) + ")"
-                    else:
-                        composed_function = " ".join([topology] + primitives)
-                    if not q_topologies.empty():
-                        q_primitives.put(composed_function)
-                        q_topologies.queue[-1][1] += 1
-                _ = q_nonterminals.get(block=False)
-            else:
-                j = find_longest_match_func(i)
-                sym = descriptor[i:j]
-                i = j - 1
-
-                if sym in grammar.terminals:
-                    is_topology = False
-                    if inspect.isclass(self.terminal_to_op_names[sym]) and issubclass(
-                        self.terminal_to_op_names[sym], AbstractTopology
-                    ):
-                        is_topology = True
-                    elif isinstance(
-                        self.terminal_to_op_names[sym], partial
-                    ) and issubclass(
-                        self.terminal_to_op_names[sym].func, AbstractTopology
-                    ):
-                        is_topology = True
-
-                    if is_topology:
-                        q_topologies.put([sym, 0])
-                    else:  # is primitive operation
-                        q_primitives.put(sym)
-                        q_topologies.queue[-1][1] += 1  # count number of primitives
-                elif sym in grammar.nonterminals:
-                    q_nonterminals.put(sym)
-                else:
-                    raise Exception(f"Unknown symbol {sym}")
-
-            i += 1
-
-        if not q_topologies.empty():
-            raise Exception("Invalid descriptor")
-
-        return composed_function
-
 
 def skip_char(char: str, descriptor: str, i: int) -> bool:
     if char in [" ", "\t", "\n"]:
@@ -1738,9 +980,7 @@ def skip_char(char: str, descriptor: str, i: int) -> bool:
     # special case: "(" is (part of) a terminal
     if i != 0 and char == "(" and descriptor[i - 1] == " " and descriptor[i + 1] == " ":
         return False
-    if char == "(":
-        return True
-    return False
+    return char == "("
 
 
 def find_longest_match(
diff --git a/neps/search_spaces/architecture/crossover.py b/neps/search_spaces/architecture/crossover.py
deleted file mode 100644
index 83e104a12..000000000
--- a/neps/search_spaces/architecture/crossover.py
+++ /dev/null
@@ -1,179 +0,0 @@
-import random
-from typing import Callable, List, Tuple
-
-import numpy as np
-
-from .cfg import Grammar
-
-
-def simple_crossover(
-    parent1: str,
-    parent2: str,
-    grammar: Grammar,
-    patience: int = 50,
-    return_crossover_subtrees: bool = False,
-) -> Tuple[str, str]:
-    if return_crossover_subtrees:
-        return grammar.crossover(
-            parent1=parent1,
-            parent2=parent2,
-            patience=patience,
-            return_crossover_subtrees=return_crossover_subtrees,
-        )
-    return grammar.crossover(
-        parent1=parent1,
-        parent2=parent2,
-        patience=patience,
-    )
-
-
-def repetitive_search_space_crossover(
-    base_parent: Tuple[str, str],
-    motif_parents: Tuple[List[str], List[str]],
-    base_grammar: Grammar,
-    motif_grammars: List[Grammar],
-    terminal_to_sublanguage_map: dict,
-    number_of_repetitive_motifs_per_grammar: list,
-    inner_crossover_strategy: Callable,
-    fixed_macro_parent: bool = False,
-    multiple_repetitive: bool = False,
-):
-    def _motifs_in_base_tree(base_parent, terminal_to_sublanguage_map):
-        return [
-            i + 1
-            for i, k in enumerate(terminal_to_sublanguage_map.keys())
-            if k in base_parent
-        ]
-
-    child1_string_trees = [base_parent[0]] + motif_parents[0]
-    child2_string_trees = [base_parent[1]] + motif_parents[1]
-    parent1_potential_motif_candidates = _motifs_in_base_tree(
-        base_parent[0], terminal_to_sublanguage_map
-    )
-    parent2_potential_motif_candidates = _motifs_in_base_tree(
-        base_parent[1], terminal_to_sublanguage_map
-    )
-
-    random_draw = random.randint(
-        1 if fixed_macro_parent else 0,
-        min(
-            len(parent1_potential_motif_candidates),
-            len(parent2_potential_motif_candidates),
-        ),
-    )
-    if random_draw == 0:  # crossover high level grammar, but keep repetitive motifs fixed
-        # parent1_motifs = _motifs_in_base_tree(
-        #     child1_string_trees[0], terminal_to_sublanguage_map
-        # )
-        # parent2_motifs = _motifs_in_base_tree(
-        #     child2_string_trees[0], terminal_to_sublanguage_map
-        # )
-        (
-            _,
-            _,
-            subtrees_child1,
-            subtrees_child2,
-        ) = inner_crossover_strategy(
-            child1_string_trees[0],
-            child2_string_trees[0],
-            base_grammar,
-            return_crossover_subtrees=True,
-        )
-        subtrees_child1 = list(subtrees_child1)
-        subtrees_child2 = list(subtrees_child2)
-        # new_child1_motifs = _motifs_in_base_tree(
-        #     subtrees_child2[1], terminal_to_sublanguage_map
-        # )
-        # new_child2_motifs = _motifs_in_base_tree(
-        #     subtrees_child1[1], terminal_to_sublanguage_map
-        # )
-
-        # old_child1_string_trees = deepcopy(child1_string_trees)
-        # tmp = number_of_repetitive_motifs_per_grammar[1]
-        # free_motifs = list(set(range(1, tmp + 1)) - set(parent1_motifs))
-        # if len(free_motifs) > 0:
-        #     substitute_terminals = list(terminal_to_sublanguage_map.keys())
-        #     if len(new_child1_motifs) > len(free_motifs):  # too many new child motifs
-        #         new_child1_motifs = random.sample(
-        #             new_child1_motifs,
-        #             k=len(free_motifs),
-        #         )
-        #     elif len(new_child1_motifs) < len(
-        #         free_motifs
-        #     ):  # more free spots than necessary
-        #         free_motifs = random.sample(
-        #             free_motifs,
-        #             k=len(new_child1_motifs),
-        #         )
-        #     for fm, nm in zip(free_motifs, new_child1_motifs):
-        #         child1_string_trees[fm] = child2_string_trees[nm].replace(
-        #             substitute_terminals[nm], substitute_terminals[fm]
-        #         )
-        #         subtrees_child2[1] = subtrees_child2[1].replace(
-        #             substitute_terminals[nm], substitute_terminals[fm]
-        #         )
-        child1_string_trees[0] = (
-            subtrees_child1[0] + subtrees_child2[1] + subtrees_child1[2]
-        )
-
-        # free_motifs = list(set(range(1, tmp + 1)) - set(parent2_motifs))
-        # if len(free_motifs) > 0:
-        #     substitute_terminals = list(terminal_to_sublanguage_map.keys())
-        #     if len(new_child2_motifs) > len(free_motifs):
-        #         new_child2_motifs = random.sample(
-        #             new_child2_motifs,
-        #             k=len(free_motifs),
-        #         )
-        #     elif len(new_child2_motifs) < len(free_motifs):
-        #         free_motifs = random.sample(
-        #             free_motifs,
-        #             k=len(new_child2_motifs),
-        #         )
-        #     for fm, nm in zip(free_motifs, new_child2_motifs):
-        #         child2_string_trees[fm] = old_child1_string_trees[nm].replace(
-        #             substitute_terminals[nm], substitute_terminals[fm]
-        #         )
-        #         subtrees_child1[1] = subtrees_child1[1].replace(
-        #             substitute_terminals[nm], substitute_terminals[fm]
-        #         )
-        child2_string_trees[0] = (
-            subtrees_child2[0] + subtrees_child1[1] + subtrees_child2[2]
-        )
-    else:
-        if multiple_repetitive:
-            # TODO more general procedure
-            coin_toss = random.randint(1, len(child1_string_trees) - 1)
-            motif_grammar_idx = next(
-                i
-                for i, x in enumerate(np.cumsum(number_of_repetitive_motifs_per_grammar))
-                if x >= coin_toss
-            )
-            (
-                child1_string_trees[coin_toss],
-                child2_string_trees[coin_toss],
-            ) = inner_crossover_strategy(
-                child1_string_trees[coin_toss],
-                child2_string_trees[coin_toss],
-                motif_grammars[motif_grammar_idx],
-            )
-        else:
-            parent1_random_draw = random.randint(
-                0, len(parent1_potential_motif_candidates) - 1
-            )
-            parent2_random_draw = random.randint(
-                0, len(parent2_potential_motif_candidates) - 1
-            )
-            (
-                child1_string_trees[parent1_random_draw + 1],
-                child2_string_trees[parent2_random_draw + 1],
-            ) = inner_crossover_strategy(
-                child1_string_trees[parent1_random_draw + 1],
-                child2_string_trees[parent2_random_draw + 1],
-                motif_grammars[0],
-            )
-
-    if any(not st for st in child1_string_trees) or any(
-        not st for st in child2_string_trees
-    ):
-        return False, False
-    return child1_string_trees, child2_string_trees
diff --git a/neps/search_spaces/architecture/graph.py b/neps/search_spaces/architecture/graph.py
index f776b231f..b7dd5b5ec 100644
--- a/neps/search_spaces/architecture/graph.py
+++ b/neps/search_spaces/architecture/graph.py
@@ -1,106 +1,25 @@
+from __future__ import annotations
+
 import copy
 import inspect
 import logging
-import os
 import random
-import sys
-from collections import Counter
-from typing import Callable
-from typing import Counter as CounterType
 import types
+from pathlib import Path
+from more_itertools import collapse
+
 import networkx as nx
 import torch
 from networkx.algorithms.dag import lexicographical_topological_sort
-from pathlib import Path
 from torch import nn
 
-from neps.utils.types import AttrDict
 from .primitives import AbstractPrimitive, Identity
 
-
-def log_formats(x):
-    if isinstance(x, torch.Tensor):
-        return x.shape
-    if isinstance(x, dict):
-        return {k: log_formats(v) for k, v in x.items()}
-    else:
-        return x
-
-
-def _find_caller():
-    """
-    Returns:
-        str: module name of the caller
-        tuple: a hashable key to be used to identify different callers
-    """
-    frame = sys._getframe(2)
-    while frame:
-        code = frame.f_code
-        if os.path.join("utils", "logger.") not in code.co_filename:
-            mod_name = frame.f_globals["__name__"]
-            if mod_name == "__main__":
-                mod_name = "detectron2"
-            return mod_name, (code.co_filename, frame.f_lineno, code.co_name)
-        frame = frame.f_back
-
-
-_LOG_COUNTER: CounterType = Counter()
-_LOG_TIMER: dict = {}
-
-
-def log_first_n(lvl, msg, n=1, *, name=None, key="caller"):
-    """
-    Log only for the first n times.
-    Args:
-        lvl (int): the logging level
-        msg (str):
-        n (int):
-        name (str): name of the logger to use. Will use the caller's module by default.
-        key (str or tuple[str]): the string(s) can be one of "caller" or
-            "message", which defines how to identify duplicated logs.
-            For example, if called with `n=1, key="caller"`, this function
-            will only log the first call from the same caller, regardless of
-            the message content.
-            If called with `n=1, key="message"`, this function will log the
-            same content only once, even if they are called from different places.
-            If called with `n=1, key=("caller", "message")`, this function
-            will not log only if the same caller has logged the same message before.
-    """
-    if isinstance(key, str):
-        key = (key,)
-    assert len(key) > 0
-
-    caller_module, caller_key = _find_caller()
-    hash_key = ()
-    if "caller" in key:
-        hash_key = hash_key + caller_key
-    if "message" in key:
-        hash_key = hash_key + (msg,)
-
-    _LOG_COUNTER[hash_key] += 1
-    if _LOG_COUNTER[hash_key] <= n:
-        logging.getLogger(name or caller_module).log(lvl, msg)
-
-
-def iter_flatten(iterable):
-    """
-    Flatten a potentially deeply nested python list
-    """
-    # taken from https://rightfootin.blogspot.com/2006/09/more-on-python-flatten.html
-    it = iter(iterable)
-    for e in it:
-        if isinstance(e, (list, tuple)):
-            yield from iter_flatten(e)
-        else:
-            yield e
-
-
 logger = logging.getLogger(__name__)
 
 
 class Graph(torch.nn.Module, nx.DiGraph):
-    """
-    Base class for defining a search space. Add nodes and edges
+    """Base class for defining a search space. Add nodes and edges
     as for a directed acyclic graph in `networkx`. Nodes can contain
     graphs as children, also edges can contain graphs as operations.
 
@@ -134,7 +53,7 @@ class Graph(torch.nn.Module, nx.DiGraph):
     >>> graph = getFancySearchSpace()
     >>> graph.parse()
     >>> logits = graph(data)
-    >>> optimizer.min(loss(logits, target))
+    >>> optimizer.min(objective_to_minimize(logits, target))
 
     To update the pytorch module representation (e.g. after removing or adding
     some new edges), you have to unparse. Beware that this is not fast, so it should
@@ -163,19 +82,18 @@ class Graph(torch.nn.Module, nx.DiGraph):
     """
     QUERYABLE = False
 
-    def __init__(self, name: str = None, scope: str = None):
-        """
-        Initialise a graph. The edges are automatically filled with an EdgeData object
+    def __init__(self, name: str | None = None, scope: str | None = None):
+        """Initialise a graph. The edges are automatically filled with an EdgeData object
         which defines the default operation as Identity. The default combination operation
         is set as sum.
 
         Note:
-            When inheriting form `Graph` note that `__init__()` cannot take any parameters.
-            This is due to the way how networkx is implemented, i.e. graphs are reconstructed
-            internally and no parameters for init are considered.
+            When inheriting form `Graph` note that `__init__()` cannot take any
+            parameters. This is due to the way how networkx is implemented, i.e. graphs
+            are reconstructed internally and no parameters for init are considered.
 
-            Our recommended solution is to create static attributes before initialization and
-            then load them dynamically in `__init__()`.
+            Our recommended solution is to create static attributes before initialization
+            and then load them dynamically in `__init__()`.
 
             >>> def __init__(self):
             >>>     num_classes = self.NUM_CLASSES
@@ -207,7 +125,7 @@ def __init__(self, name: str = None, scope: str = None):
         # `input` is required for storing the results of incoming edges.
 
         # self._nxgraph.node_attr_dict_factory = lambda: dict({'input': {}, 'comb_op': sum})
-        self.node_attr_dict_factory = lambda: dict({"input": {}, "comb_op": sum})
+        self.node_attr_dict_factory = lambda: {"input": {}, "comb_op": sum}
 
         # remember to add all members also in `unparse()`
         self.name = name
@@ -220,8 +138,7 @@ def __eq__(self, other):
         return self.name == other.name and self.scope == other.scope
 
     def __hash__(self):
-        """
-        As it is very complicated to compare graphs (i.e. check all edge
+        """As it is very complicated to compare graphs (i.e. check all edge
         attributes, do the have shared attributes, ...) use just the name
         for comparison.
 
@@ -234,27 +151,10 @@ def __hash__(self):
         return h
 
     def __repr__(self):
-        return "Graph {}-{:.07f}, scope {}, {} nodes".format(
-            self.name, self._id, self.scope, self.number_of_nodes()
-        )
-
-    def modules_str(self):
-        """
-        Once the graph has been parsed, prints the modules as they appear in pytorch.
-        """
-        if self.is_parsed:
-            result = ""
-            for g in self._get_child_graphs(single_instances=True) + [self]:
-                result += "Graph {}:\n {}\n==========\n".format(
-                    g.name, torch.nn.Module.__repr__(g)
-                )
-            return result
-        else:
-            return self.__repr__()
+        return f"Graph {self.name}-{self._id:.07f}, scope {self.scope}, {self.number_of_nodes()} nodes"
 
     def set_scope(self, scope: str, recursively=True):
-        """
-        Sets the scope of this instance of the graph.
+        """Sets the scope of this instance of the graph.
 
         The function should be used in a builder-like pattern
         `'subgraph'=Graph().set_scope("scope")`.
@@ -274,8 +174,7 @@ def set_scope(self, scope: str, recursively=True):
         return self
 
     def add_node(self, node_index, **attr):
-        """
-        Adds a node to the graph.
+        """Adds a node to the graph.
 
         Note that adding a node using an index that has been used already
         will override its attributes.
@@ -288,8 +187,7 @@ def add_node(self, node_index, **attr):
         nx.DiGraph.add_node(self, node_index, **attr)
 
     def copy(self):
-        """
-        Copy as defined in networkx, i.e. a shallow copy.
+        """Copy as defined in networkx, i.e. a shallow copy.
 
         Just handling recursively nested graphs seperately.
         """
@@ -301,7 +199,7 @@ def copy_dict(d):
                     copied_dict[k] = v.copy()
                 elif isinstance(v, list):
                     copied_dict[k] = [i.copy() if isinstance(i, Graph) else i for i in v]
-                elif isinstance(v, torch.nn.Module) or isinstance(v, AbstractPrimitive):
+                elif isinstance(v, (AbstractPrimitive, torch.nn.Module)):
                     copied_dict[k] = copy.deepcopy(v)
             return copied_dict
 
@@ -317,181 +215,6 @@ def copy_dict(d):
         G.name = self.name
         return G
 
-    def set_input(self, node_idxs: list):
-        """
-        Route the input from specific parent edges to the input nodes of
-        this subgraph. Inputs are assigned in lexicographical order.
-
-        Example:
-        - Parent node (i.e. node where `self` is located on) has two
-          incoming edges from nodes 3 and 5.
-        - `self` has two input nodes 1 and 2 (i.e. nodes without
-          an incoming edge)
-        - `node_idxs = [5, 3]`
-        Then input of node 5 is routed to node 1 and input of node 3
-        is routed to node 2.
-
-        Similarly, if `node_idxs = [5, 5]` then input of node 5 is routed
-        to both node 1 and 2. Warning: In this case the output of another
-        incoming edge is ignored!
-
-        Should be used in a builder-like pattern: `'subgraph'=Graph().set_input([5, 3])`
-
-        Args:
-            node_idx (list): The index of the nodes where the data is coming from.
-
-        Returns:
-            Graph: self with input node indices set.
-
-        """
-        num_innodes = sum(self.in_degree(n) == 0 for n in self.nodes)
-        assert num_innodes == len(
-            node_idxs
-        ), "Expecting node index for every input node. Excpected {}, got {}".format(
-            num_innodes, len(node_idxs)
-        )
-        self.input_node_idxs = node_idxs  # type: ignore[assignment]
-        return self
-
-    def num_input_nodes(self) -> int:
-        """
-        The number of input nodes, i.e. the nodes without an
-        incoming edge.
-
-        Returns:
-            int: Number of input nodes.
-        """
-        return sum(self.in_degree(n) == 0 for n in self.nodes)
-
-    def _assign_x_to_nodes(self, x):
-        """
-        Assign x to the input nodes of self. Depending whether on
-        edge or nodes.
-
-        Performs also several sanity checks of the input.
-
-        Args:
-            x (Tensor or dict): Input to be assigned.
-        """
-        # We need dict in case of cell and int in case of motif
-        assert isinstance(x, dict) or isinstance(x, torch.Tensor)
-
-        if self.input_node_idxs is None:
-            assert (
-                self.num_input_nodes() == 1
-            ), "There are more than one input nodes but input indeces are not defined."
-            input_node = [n for n in self.nodes if self.in_degree(n) == 0][0]
-            assert (
-                len(list(self.predecessors(input_node))) == 0
-            ), "Expecting node 1 to be the parent."
-            assert (
-                "subgraph" not in self.nodes[input_node].keys()
-            ), "Expecting node 1 not to have a subgraph as it serves as input node."
-            assert isinstance(x, torch.Tensor)
-            self.nodes[input_node]["input"] = {0: x}
-        else:
-            # assign the input to the corresponding nodes
-            assert all(
-                [i in x.keys() for i in self.input_node_idxs]
-            ), "got x from an unexpected input edge"
-            if self.num_input_nodes() > len(x):
-                # here is the case where the same input is assigned to more than one node
-                # this can happen when there are cells with two inputs but at the very first
-                # layer of the network, there is just one output (i.e. the data inputed to the
-                # makro input node). Handle it and log a Info. This should happen only rarly
-                logger.debug(
-                    f"We are using the same x for two inputs in graph {self.name}"
-                )
-            input_node_iterator = iter(self.input_node_idxs)
-            for node_idx in lexicographical_topological_sort(self):
-                if self.in_degree(node_idx) == 0:
-                    self.nodes[node_idx]["input"] = {0: x[next(input_node_iterator)]}
-
-    def forward(self, x, *args):
-        """
-        Forward some data through the graph. This is done recursively
-        in case there are graphs defined on nodes or as 'op' on edges.
-
-        Args:
-            x (Tensor or dict): The input. If the graph sits on a node the
-                input can be a dict with {source_idx: Tensor} to be routed
-                to the defined input nodes. If the graph sits on an edge,
-                x is the feature tensor.
-            args: This is only required to handle cases where the graph sits
-                on an edge and receives an EdgeData object which will be ignored
-        """
-        logger.debug(f"Graph {self.name} called. Input {log_formats(x)}.")
-
-        # Assign x to the corresponding input nodes
-        self._assign_x_to_nodes(x)
-
-        for node_idx in lexicographical_topological_sort(self):
-            node = self.nodes[node_idx]
-            logger.debug(
-                "Node {}-{}, current data {}, start processing...".format(
-                    self.name, node_idx, log_formats(node)
-                )
-            )
-
-            # node internal: process input if necessary
-            if ("subgraph" in node and "comb_op" not in node) or (
-                "comb_op" in node and "subgraph" not in node
-            ):
-                log_first_n(
-                    logging.WARN, "Comb_op is ignored if subgraph is defined!", n=1
-                )
-            # TODO: merge 'subgraph' and 'comb_op'. It is basicallly the same thing. Also in parse()
-            if "subgraph" in node:
-                x = node["subgraph"].forward(node["input"])
-            else:
-                if len(node["input"].values()) == 1:
-                    x = list(node["input"].values())[0]
-                else:
-                    x = node["comb_op"](
-                        [node["input"][k] for k in sorted(node["input"].keys())]
-                    )
-            node["input"] = {}  # clear the input as we have processed it
-
-            if (
-                len(list(self.neighbors(node_idx))) == 0
-                and node_idx < list(lexicographical_topological_sort(self))[-1]
-            ):
-                # We have more than one output node. This is e.g. the case for
-                # auxillary losses. Attach them to the graph, handling must done
-                # by the user.
-                logger.debug(
-                    "Graph {} has more then one output node. Storing output of non-maximum index node {} at graph dict".format(
-                        self, node_idx
-                    )
-                )
-                self.graph[f"out_from_{node_idx}"] = x
-            else:
-                # outgoing edges: process all outgoing edges
-                for neigbor_idx in self.neighbors(node_idx):
-                    edge_data = self.get_edge_data(node_idx, neigbor_idx)
-                    # inject edge data only for AbstractPrimitive, not Graphs
-                    if isinstance(edge_data.op, Graph):
-                        edge_output = edge_data.op.forward(x)
-                    elif isinstance(edge_data.op, AbstractPrimitive):
-                        logger.debug(
-                            "Processing op {} at edge {}-{}".format(
-                                edge_data.op, node_idx, neigbor_idx
-                            )
-                        )
-                        edge_output = edge_data.op.forward(x)
-                    else:
-                        raise ValueError(
-                            "Unknown class as op: {}. Expected either Graph or AbstactPrimitive".format(
-                                edge_data.op
-                            )
-                        )
-                    self.nodes[neigbor_idx]["input"].update({node_idx: edge_output})
-
-            logger.debug(f"Node {self.name}-{node_idx}, processing done.")
-
-        logger.debug(f"Graph {self.name} exiting. Output {log_formats(x)}.")
-        return x
-
     def to_pytorch(self, **kwargs) -> nn.Module:
         return self._to_pytorch(**kwargs)
 
@@ -504,7 +227,7 @@ def _import_code(code: str, name: str):
         if not self.is_parsed:
             self.parse()
 
-        input_node = [n for n in self.nodes if self.in_degree(n) == 0][0]
+        input_node = next(n for n in self.nodes if self.in_degree(n) == 0)
         input_name = "x0"
         self.nodes[input_node]["input"] = {0: input_name}
 
@@ -522,7 +245,7 @@ def _import_code(code: str, name: str):
                 input_name = f"x{max_xidx + 1}"
                 used_input_names.append(max_xidx + 1)
                 forward_f.append(_forward_f)
-                x = f"x{max_xidx+1}"
+                x = f"x{max_xidx + 1}"
             else:
                 if len(node["input"].values()) == 1:
                     x = next(iter(node["input"].values()))
@@ -532,7 +255,7 @@ def _import_code(code: str, name: str):
                         "__name__" in dir(node["comb_op"])
                         and node["comb_op"].__name__ == "sum"
                     ):
-                        _forward_f = f"x{max_xidx+1}=sum(["
+                        _forward_f = f"x{max_xidx + 1}=sum(["
                     elif isinstance(node["comb_op"], torch.nn.Module):
                         submodule_list.append(node["comb_op"])
                         _forward_f = f"x{max_xidx + 1}=self.module_list[{len(submodule_list) - 1}](["
@@ -543,7 +266,7 @@ def _import_code(code: str, name: str):
                         _forward_f += inp + ","
                     _forward_f = _forward_f[:-1] + "])"
                     forward_f.append(_forward_f)
-                    x = f"x{max_xidx+1}"
+                    x = f"x{max_xidx + 1}"
                 if int(x[1:]) not in used_input_names:
                     used_input_names.append(int(x[1:]))
             node["input"] = {}  # clear the input as we have processed it
@@ -579,9 +302,7 @@ def _import_code(code: str, name: str):
                         forward_f.append(_forward_f)
                     else:
                         raise ValueError(
-                            "Unknown class as op: {}. Expected either Graph or AbstactPrimitive".format(
-                                edge_data.op
-                            )
+                            f"Unknown class as op: {edge_data.op}. Expected either Graph or AbstactPrimitive"
                         )
                     self.nodes[neigbor_idx]["input"].update({node_idx: input_name})
 
@@ -608,15 +329,14 @@ def _import_code(code: str, name: str):
         model.set_module_list(submodule_list)
 
         if write_out:
-            tmp_path = Path(os.path.dirname(os.path.realpath(__file__))) / "model.py"
+            tmp_path = Path(__file__).parent.resolve() / "model.py"
             with open(tmp_path, "w", encoding="utf-8") as outfile:
                 outfile.write(model_file)
 
         return model
 
     def parse(self):
-        """
-        Convert the graph into a neural network which can then
+        """Convert the graph into a neural network which can then
         be optimized by pytorch.
         """
         for node_idx in lexicographical_topological_sort(self):
@@ -626,12 +346,11 @@ def parse(self):
                     f"{self.name}-subgraph_at({node_idx})",
                     self.nodes[node_idx]["subgraph"],
                 )
-            else:
-                if isinstance(self.nodes[node_idx]["comb_op"], torch.nn.Module):
-                    self.add_module(
-                        f"{self.name}-comb_op_at({node_idx})",
-                        self.nodes[node_idx]["comb_op"],
-                    )
+            elif isinstance(self.nodes[node_idx]["comb_op"], torch.nn.Module):
+                self.add_module(
+                    f"{self.name}-comb_op_at({node_idx})",
+                    self.nodes[node_idx]["comb_op"],
+                )
 
             for neigbor_idx in self.neighbors(node_idx):
                 edge_data = self.get_edge_data(node_idx, neigbor_idx)
@@ -649,8 +368,7 @@ def parse(self):
         self.is_parsed = True
 
     def unparse(self):
-        """
-        Undo the pytorch parsing by reconstructing the graph uusing the
+        """Undo the pytorch parsing by reconstructing the graph uusing the
         networkx data structures.
 
         This is done recursively also for child graphs.
@@ -689,8 +407,7 @@ def unparse(self):
         return g
 
     def _get_child_graphs(self, single_instances: bool = False) -> list:
-        """
-        Get all child graphs of the current graph.
+        """Get all child graphs of the current graph.
 
         Args:
             single_instances (bool): Whether to return multiple instances
@@ -730,9 +447,7 @@ def _get_child_graphs(self, single_instances: bool = False) -> list:
                                 graphs.append(child_op._get_child_graphs())
                     else:
                         logger.debug(
-                            "Got embedded op, but is neither a graph nor a list: {}".format(
-                                embedded_ops
-                            )
+                            f"Got embedded op, but is neither a graph nor a list: {embedded_ops}"
                         )
             elif inspect.isclass(edge_data.op):
                 assert not issubclass(
@@ -744,7 +459,7 @@ def _get_child_graphs(self, single_instances: bool = False) -> list:
             else:
                 raise ValueError(f"Unknown format of op: {edge_data.op}")
 
-        graphs = [g for g in iter_flatten(graphs)]
+        graphs = list(collapse(graphs))
 
         if single_instances:
             single: list = []
@@ -755,50 +470,9 @@ def _get_child_graphs(self, single_instances: bool = False) -> list:
         else:
             return sorted(graphs, key=lambda g: g.name)
 
-    def get_all_edge_data(
-        self, key: str, scope="all", private_edge_data: bool = False
-    ) -> list:
-        """
-        Get edge attributes of this graph and all child graphs in one go.
-
-        Args:
-            key (str): The key of the attribute
-            scope (str): The scope to be applied
-            private_edge_data (bool): Whether to return data from graph copies as well.
-
-        Returns:
-            list: All data in a list.
-        """
-        assert scope is not None
-        result = []
-        for graph in self._get_child_graphs(single_instances=not private_edge_data) + [
-            self
-        ]:
-            if (
-                scope == "all"
-                or graph.scope == scope
-                or (isinstance(scope, list) and graph.scope in scope)
-            ):
-                for _, _, edge_data in graph.edges.data():
-                    if edge_data.has(key):
-                        result.append(edge_data[key])
-        return result
-
-    def set_at_edges(self, key, value, shared=False):
-        """
-        Sets the attribute for all edges in this and any child graph
-        """
-        for graph in self._get_child_graphs(single_instances=shared) + [self]:
-            logger.debug(f"Updating edges of graph {graph.name}")
-            for _, _, edge_data in graph.edges.data():
-                if not edge_data.is_final():
-                    edge_data.set(key, value, shared)
-
     def compile(self):
-        """
-        Instanciates the ops at the edges using the arguments specified at the edges
-        """
-        for graph in self._get_child_graphs(single_instances=False) + [self]:
+        """Instanciates the ops at the edges using the arguments specified at the edges."""
+        for graph in [*self._get_child_graphs(single_instances=False), self]:
             logger.debug(f"Compiling graph {graph.name}")
             for _, v, edge_data in graph.edges.data():
                 if not edge_data.is_final():
@@ -830,237 +504,17 @@ def compile(self):
                     else:
                         raise ValueError(f"Unkown format of op: {op}")
 
-    @staticmethod
-    def _verify_update_function(update_func: Callable, private_edge_data: bool):
-        """
-        Verify that the update function actually modifies only
-        shared/private edge data attributes based on setting of
-        `private_edge_data`.
-
-        Args:
-            update_func (callable): callable that expects one argument
-                named `current_edge_data`.
-            private_edge_data (bool): Whether the update function is applied
-                to all graph instances including copies or just to one instance
-                per graph
-        """
-
-        test = EdgeData()
-        test.set("shared", True, shared=True)
-        test.set("op", [True])
-
-        try:
-            result = test.clone()
-            update_func(current_edge_data=result)
-        except Exception:
-            log_first_n(
-                logging.WARN,
-                "Update function could not be veryfied. Be cautious with the "
-                "setting of `private_edge_data` in `update_edges()`",
-                n=5,
-            )
-            return
-
-        assert isinstance(
-            result, EdgeData
-        ), "Update function does not return the edge data object."
-
-        if private_edge_data:
-            assert result._shared == test._shared, (
-                "The update function changes shared data although `private_edge_data` set to True. "
-                "This is not the indended use of `update_edges`. The update function should only modify "
-                "private edge data."
-            )
-        else:
-            assert result._private == test._private, (
-                "The update function changes private data although `private_edge_data` set to False. "
-                "This is not the indended use of `update_edges`. The update function should only modify "
-                "shared edge data."
-            )
-
-    def update_edges(
-        self, update_func: Callable, scope="all", private_edge_data: bool = False
-    ):
-        """
-        This updates the edge data of this graph and all child graphs.
-        This is the preferred way to manipulate the edges after the definition
-        of the graph, e.g. by optimizers who want to insert their own op.
-        `update_func(current_edge_data)`. This way optimizers
-        can initialize and store necessary information at edges.
-
-        Note that edges marked as 'final' will not be updated here.
-
-        Args:
-            update_func (callable): Function which accepts one argument called `current_edge_data`.
-                and returns the modified EdgeData object.
-            scope (str or list(str)): Can be "all" or list of scopes to be updated.
-            private_edge_data (bool): If set to true, this means update_func will be
-                applied to all edges. THIS IS NOT RECOMMENDED FOR SHARED
-                ATTRIBUTES. Shared attributes should be set only once, we
-                take care it is syncronized across all copies of this graph.
-
-                The only usecase for setting it to true is when actually changing
-                `op` during the initialization of the optimizer (e.g. replacing it
-                with MixedOp or SampleOp)
-        """
-        Graph._verify_update_function(update_func, private_edge_data)
-        assert scope is not None
-        for graph in self._get_child_graphs(single_instances=not private_edge_data) + [
-            self
-        ]:
-            if (
-                scope == "all"
-                or scope == graph.scope
-                or (isinstance(scope, list) and graph.scope in scope)
-            ):
-                logger.debug(f"Updating edges of graph {graph.name}")
-                for u, v, edge_data in graph.edges.data():
-                    if not edge_data.is_final():
-                        edge = AttrDict(head=u, tail=v, data=edge_data)
-                        update_func(edge=edge)
-        self._delete_flagged_edges()
-
-    def update_nodes(
-        self, update_func: Callable, scope="all", single_instances: bool = True
-    ):
-        """
-        Update the nodes of the graph and its incoming and outgoing edges by iterating over the
-        graph and applying `update_func` to each of it. This is the
-        preferred way to change the search space once it has been defined.
-
-        Note that edges marked as 'final' will not be updated here.
-
-        Args:
-            update_func (callable): Function that accepts three incoming parameters named
-                `node, in_edges, out_edges`.
-                    - `node` is a tuple (int, dict) containing the
-                      index and the attributes of the current node.
-                    - `in_edges` is a list of tuples with the index of
-                      the tail of the edge and its EdgeData.
-                    - `out_edges is a list of tuples with the index of
-                      the head of the edge and its EdgeData.
-            scope (str or list(str)): Can be "all" or list of scopes to be updated. Only graphs
-                and child graphs with the specified scope are considered
-            single_instance (bool): If set to false, this means update_func will be
-                applied to nodes of all copies of a graphs. THIS IS NOT RECOMMENDED FOR SHARED
-                ATTRIBUTES, i.e. when manipulating the shared data of incoming or outgoing edges.
-                Shared attributes should be set only once, we take care it is syncronized across
-                all copies of this graph.
-
-                The only usecase for setting it to true is when actually changing
-                `op` during the initialization of the optimizer (e.g. replacing it
-                with MixedOp or SampleOp)
-        """
-        assert scope is not None
-        for graph in self._get_child_graphs(single_instances) + [self]:
-            if (
-                scope == "all"
-                or graph.scope == scope
-                or (isinstance(scope, list) and graph.scope in scope)
-            ):
-                logger.debug(f"Updating nodes of graph {graph.name}")
-                for node_idx in lexicographical_topological_sort(graph):
-                    node = (node_idx, graph.nodes[node_idx])
-                    in_edges = list(graph.in_edges(node_idx, data=True))  # (v, u, data)
-                    in_edges = [
-                        (v, data) for v, u, data in in_edges if not data.is_final()
-                    ]  # u is same for all
-                    out_edges = list(graph.out_edges(node_idx, data=True))  # (v, u, data)
-                    out_edges = [
-                        (u, data) for v, u, data in out_edges if not data.is_final()
-                    ]  # v is same for all
-                    update_func(node=node, in_edges=in_edges, out_edges=out_edges)
-        self._delete_flagged_edges()
-
-    def _delete_flagged_edges(self):
-        """
-        Delete edges which associated EdgeData is flagged as deleted.
-        """
-        for graph in self._get_child_graphs(single_instances=False) + [
-            self
-        ]:  # we operate on shallow copies
-            to_remove = []
-            for u, v, edge_data in graph.edges.data():
-                if edge_data.is_deleted():
-                    to_remove.append((u, v))
-            if to_remove:
-                # logger.info("Removing edges {} from graph {}".format(to_remove, graph))
-                graph.remove_edges_from(to_remove)
-
     def clone(self):
-        """
-        Deep copy of the current graph.
+        """Deep copy of the current graph.
 
         Returns:
             Graph: Deep copy of the graph.
         """
         return copy.deepcopy(self)
 
-    def reset_weights(self, inplace: bool = False):
-        """
-        Resets the weights for the 'op' at all edges.
-
-        Args:
-            inplace (bool): Do the operation in place or
-                return a modified copy.
-        Returns:
-            Graph: Returns the modified version of the graph.
-        """
-
-        def weight_reset(m):
-            if isinstance(m, torch.nn.Conv2d) or isinstance(m, torch.nn.Linear):
-                m.reset_parameters()
-
-        if inplace:
-            graph = self
-        else:
-            graph = self.clone()
-
-        graph.apply(weight_reset)
-
-        return graph
-
-    def prepare_discretization(self):
-        """
-        In some cases the search space is manipulated before the final
-        discretization is happening, e.g. DARTS. In such chases this should
-        be defined in the search space, so all optimizers can call it.
-        """
-
-    def prepare_evaluation(self):
-        """
-        In some cases the evaluation architecture does not match the searched
-        one. An example is where the makro_model is extended to increase the
-        parameters. This is done here.
-        """
-
-    def get_dense_edges(self):
-        """
-        Returns the edge indices (i, j) that would make a fully connected
-        DAG without circles such that i < j and i != j. Assumes nodes are
-        already created.
-
-        Returns:
-            list: list of edge indices.
-        """
-        edges = []
-        nodes = sorted(list(self.nodes()))
-        for i in nodes:
-            for j in nodes:
-                if i != j and j > i:
-                    edges.append((i, j))
-        return edges
-
-    def add_edges_densly(self):
-        """
-        Adds edges to get a fully connected DAG without cycles
-        """
-        self.add_edges_from(self.get_dense_edges())
-
 
 class EdgeData:
-    """
-    Class that holds data for each edge.
+    """Class that holds data for each edge.
     Data can be shared between instances of the graph
     where the edges lives in.
 
@@ -1071,10 +525,9 @@ class EdgeData:
     in a dict-like fashion with `[key]`. To set a new item use `.set()`.
     """
 
-    def __init__(self, data: dict = None):
-        """
-        Initializes a new EdgeData object.
-        'op' is set as Identity() and private by default
+    def __init__(self, data: dict | None = None):
+        """Initializes a new EdgeData object.
+        'op' is set as Identity() and private by default.
 
         Args:
             data (dict): Inject some initial data. Will be always private.
@@ -1093,20 +546,6 @@ def __init__(self, data: dict = None):
         for k, v in data.items():
             self.set(k, v, shared=False)
 
-    def has(self, key: str):
-        """
-        Checks whether `key` exists.
-
-        Args:
-            key (str): The key to check.
-
-        Returns:
-            bool: True if key exists, False otherwise.
-
-        """
-        assert not key.startswith("_"), "Access to private keys not allowed!"
-        return key in self._private.keys() or key in self._shared.keys()
-
     def __getitem__(self, key: str):
         assert not str(key).startswith("_"), "Access to private keys not allowed!"
         return self.__getattr__(str(key))
@@ -1119,7 +558,7 @@ def get(self, key: str, default):
 
     def __getattr__(self, key: str):
         if key.startswith("__"):  # Required for deepcopy, not sure why
-            raise AttributeError(key)  #
+            raise AttributeError(key)
         assert not key.startswith("_"), "Access to private keys not allowed!"
         if key in self._private:
             return self._private[key]
@@ -1135,14 +574,13 @@ def __setattr__(self, name: str, val):
             raise ValueError("not allowed. use set().")
 
     def __str__(self):
-        return f"private: <{str(self._private)}>, shared: <{str(self._shared)}>"
+        return f"private: <{self._private!s}>, shared: <{self._shared!s}>"
 
     def __repr__(self):
         return self.__str__()
 
     def update(self, data):
-        """
-        Update the data in here. If the data is added as dict,
+        """Update the data in here. If the data is added as dict,
         then all variables will be handled as private.
 
         Args:
@@ -1159,8 +597,7 @@ def update(self, data):
             raise ValueError(f"Unsupported type {data}")
 
     def remove(self, key: str):
-        """
-        Removes an item from the EdgeData
+        """Removes an item from the EdgeData.
 
         Args:
             key (str): The key for the item to be removed.
@@ -1173,8 +610,7 @@ def remove(self, key: str):
             raise KeyError(f"Tried to delete unkown key {key}")
 
     def copy(self):
-        """
-        When a graph is copied to get multiple instances (e.g. when
+        """When a graph is copied to get multiple instances (e.g. when
         reusing subgraphs at more than one location) then
         this function will be called for all edges.
 
@@ -1204,8 +640,7 @@ def copy(self):
         return new_self
 
     def set(self, key: str, value, shared=False):
-        """
-        Used to assign a new item to the EdgeData object.
+        """Used to assign a new item to the EdgeData object.
 
         Args:
             key (str): The key.
@@ -1214,9 +649,7 @@ def set(self, key: str, value, shared=False):
                 be a shallow copy between different instances of EdgeData
                 (and consequently between different instances of Graph).
         """
-        assert isinstance(key, str), "Accepting only string keys, got {}".format(
-            type(key)
-        )
+        assert isinstance(key, str), f"Accepting only string keys, got {type(key)}"
         assert not key.startswith("_"), "Access to private keys not allowed!"
         assert not self.is_final(), "Trying to change finalized edge!"
         if shared:
@@ -1224,15 +657,13 @@ def set(self, key: str, value, shared=False):
                 raise ValueError("Key {} alredy defined as non-shared")
             else:
                 self._shared[key] = value
+        elif key in self._shared:
+            raise ValueError(f"Key {key} alredy defined as shared")
         else:
-            if key in self._shared:
-                raise ValueError(f"Key {key} alredy defined as shared")
-            else:
-                self._private[key] = value
+            self._private[key] = value
 
     def clone(self):
-        """
-        Return a true deep copy of EdgeData. Even shared
+        """Return a true deep copy of EdgeData. Even shared
         items are not shared anymore.
 
         Returns:
@@ -1240,31 +671,9 @@ def clone(self):
         """
         return copy.deepcopy(self)
 
-    def delete(self):
-        """
-        Flag to delete the edge where this instance is attached to.
-        """
-        self._shared["_deleted"] = True
-
-    def is_deleted(self):
-        """
-        Returns true if the edge is flagged to be deleted
-        """
-        return self._shared["_deleted"]
-
-    def finalize(self):
-        """
-        Sets this edge as final. This means it cannot be changed
-        anymore and will also not appear in the update functions
-        of the graph.
-        """
-        self._private["_final"] = True
-        return self
-
     def is_final(self):
-        """
-        Returns:
-            bool: True if the edge was finalized, False else
+        """Returns:
+        bool: True if the edge was finalized, False else.
         """
         return self._private["_final"]
 
diff --git a/neps/search_spaces/architecture/graph_grammar.py b/neps/search_spaces/architecture/graph_grammar.py
index e21e94d8a..e8ce9e906 100644
--- a/neps/search_spaces/architecture/graph_grammar.py
+++ b/neps/search_spaces/architecture/graph_grammar.py
@@ -1,23 +1,23 @@
 from __future__ import annotations
 
 from abc import abstractmethod
-from collections import OrderedDict
 from copy import deepcopy
-from functools import partial
 from typing import Any, ClassVar, Mapping
 from typing_extensions import override, Self
-from neps.utils.types import NotSet, _NotSet
+from neps.utils.types import NotSet
+from typing import TYPE_CHECKING, Any, ClassVar, Mapping
+from typing_extensions import Self, override
 
 import networkx as nx
-import numpy as np
-from nltk import Nonterminal
 
-from ..parameter import ParameterWithPrior, MutatableParameter
-from .cfg import Grammar
-from .cfg_variants.constrained_cfg import ConstrainedGrammar
+from neps.search_spaces.parameter import ParameterWithPrior
+from neps.utils.types import NotSet
+
 from .core_graph_grammar import CoreGraphGrammar
-from .crossover import repetitive_search_space_crossover, simple_crossover
-from .mutations import bananas_mutate, repetitive_search_space_mutation, simple_mutate
+from .mutations import bananas_mutate, simple_mutate
+
+if TYPE_CHECKING:
+    from .cfg import Grammar
 
 
 # TODO(eddiebergman): This is a halfway solution, but essentially a lot
@@ -28,7 +28,9 @@
 # The problem here is that the `Parameter` expects the `load_from`
 # and the `.value` to be the same type, which is not the case for
 # graph based parameters.
-class GraphParameter(ParameterWithPrior[nx.DiGraph, str], MutatableParameter):
+class GraphParameter(  # noqa: D101
+    ParameterWithPrior[nx.DiGraph, str]
+):
     # NOTE(eddiebergman): What I've managed to learn so far is that
     # these hyperparameters work mostly with strings externally,
     # i.e. setting the value through `load_from` or `set_value` should be a string.
@@ -38,9 +40,10 @@ class GraphParameter(ParameterWithPrior[nx.DiGraph, str], MutatableParameter):
     # At serialization time, it doesn't actually serialize the .value but instead
     # relies on the string it was passed initially, I'm not actually sure if there's
     # a way to go from the graph object to the string in this code...
-    # Essentially on the outside, we need to ensure we don't pass ih the graph object itself
+    # Essentially on the outside, we need to ensure we don't pass ih the graph object
+    # itself
     DEFAULT_CONFIDENCE_SCORES: ClassVar[Mapping[str, float]] = {"not_in_use": 1.0}
-    default_confidence_choice = "not_in_use"
+    prior_confidence_choice = "not_in_use"
     has_prior: bool
     input_kwargs: dict[str, Any]
 
@@ -80,8 +83,8 @@ def set_value(self, value: str | None) -> None:
         # `self.value = None`
         if not isinstance(value, str):
             raise ValueError(
-                f"Expected a string for setting value a `GraphParameter`",
-                f" got {type(value)}"
+                "Expected a string for setting value a `GraphParameter`",
+                f" got {type(value)}",
             )
         self.reset()
         self.normalized_value = value
@@ -91,12 +94,6 @@ def set_value(self, value: str | None) -> None:
 
         self.create_from_id(value)
 
-    @override
-    def set_default(self, default: str | None) -> None:
-        # TODO(eddiebergman): Could find no mention of the word 'default' in the
-        # GraphGrammers' hence... well this is all I got
-        self.default = default
-
     @override
     def sample_value(self, *, user_priors: bool = False) -> nx.DiGraph:
         # TODO(eddiebergman): This could definitely be optimized
@@ -104,57 +101,30 @@ def sample_value(self, *, user_priors: bool = False) -> nx.DiGraph:
         # of it.
         return self.sample(user_priors=user_priors).value
 
-    @classmethod
-    def serialize_value(cls, value: nx.DiGraph) -> str:
-        """Functionality relying on this for GraphParameters should
-        special case and use `self.id`.
-
-        !!! warning
-
-            Graph parameters don't directly support serialization.
-            Instead they rely on holding on to the original string value
-            from which they were created from.
-        """
-        raise NotImplementedError
-
-    @classmethod
-    def deserialize_value(cls, value: str) -> nx.DiGraph:
-        """Functionality relying on this for GraphParameters should
-        special case for whever this is needed...
-
-        !!! warning
-
-            Graph parameters don't directly support serialization.
-            Instead they rely on holding on to the original string value
-            from which they were created from.
-        """
-        raise NotImplementedError
-
     @override
-    def load_from(self, value: str | Self) -> None:
-        if isinstance(value, GraphParameter):
-            value = value.id
-        self.create_from_id(value)
+    def load_from(self, value: Any) -> None:
+        match value:
+            case GraphParameter():
+                value = value.id
+            case str():
+                self.create_from_id(value)
+            case _:
+                raise TypeError(f"Unrecognized type {type(value)}")
 
     @abstractmethod
-    def mutate(self, parent: Self | None = None, *, mutation_strategy: str = "bananas") -> Self: ...
+    def mutate(  # noqa: D102
+        self, parent: Self | None = None, *, mutation_strategy: str = "bananas"
+    ) -> Self: ...
 
-    @abstractmethod
-    def crossover(self, parent1: Self, parent2: Self | None = None) -> tuple[Self, Self]:
-        ...
-
-    def _get_non_unique_neighbors(self, num_neighbours: int) -> list[Self]:
-        raise NotImplementedError
-
-    def value_to_normalized(self, value: nx.DiGraph) -> float:
+    def value_to_normalized(self, value: nx.DiGraph) -> float:  # noqa: D102
         raise NotImplementedError
 
-    def normalized_to_value(self, normalized_value: float) -> nx.DiGraph:
+    def normalized_to_value(self, normalized_value: float) -> nx.DiGraph:  # noqa: D102
         raise NotImplementedError
 
     @override
     def clone(self) -> Self:
-        new_self =  self.__class__(**self.input_kwargs)
+        new_self = self.__class__(**self.input_kwargs)
 
         # HACK(eddiebergman): It seems the subclasses all have these and
         # so we just copy over those attributes, deepcloning anything that is mutable
@@ -163,7 +133,6 @@ def clone(self) -> Self:
                 ("_value", True),
                 ("string_tree", False),
                 ("string_tree_list", False),
-                ("nxTree", False),
                 ("_function_id", False),
             )
             for _attr, is_mutable in _attrs_that_subclasses_use_to_reoresent_a_value:
@@ -178,24 +147,29 @@ def clone(self) -> Self:
 
         return new_self
 
+
 class GraphGrammar(GraphParameter, CoreGraphGrammar):
     hp_name = "graph_grammar"
 
-    def __init__(
+    def __init__(  # noqa: D107, PLR0913
         self,
         grammar: Grammar,
         terminal_to_op_names: dict,
-        prior: dict = None,
-        terminal_to_graph_edges: dict = None,
-        edge_attr: bool = True,
+        prior: dict | None = None,
+        terminal_to_graph_edges: dict | None = None,
+        edge_attr: bool = True,  # noqa: FBT001, FBT002
         edge_label: str = "op_name",
-        zero_op: list = ["Zero", "zero"],
-        identity_op: list = ["Identity", "id"],
-        new_graph_repr_func: bool = False,
-        name: str = None,
-        scope: str = None,
+        zero_op: list | None = None,
+        identity_op: list | None = None,
+        new_graph_repr_func: bool = False,  # noqa: FBT001, FBT002
+        name: str | None = None,
+        scope: str | None = None,
         **kwargs,
     ):
+        if identity_op is None:
+            identity_op = ["Identity", "id"]
+        if zero_op is None:
+            zero_op = ["Zero", "zero"]
         if isinstance(grammar, list) and len(grammar) != 1:
             raise NotImplementedError("Does not support multiple grammars")
 
@@ -212,11 +186,10 @@ def __init__(
             scope=scope,
             **kwargs,
         )
-        GraphParameter.__init__(self, value=None, default=None, is_fidelity=False)
+        GraphParameter.__init__(self, value=None, prior=None, is_fidelity=False)
 
         self.string_tree: str = ""
         self._function_id: str = ""
-        self.nxTree: nx.DiGraph | None = None
         self.new_graph_repr_func = new_graph_repr_func
 
         if prior is not None:
@@ -227,7 +200,9 @@ def __init__(
     def sample(self, *, user_priors: bool = False) -> Self:
         copy_self = self.clone()
         copy_self.reset()
-        copy_self.string_tree = copy_self.grammars[0].sampler(1, user_priors=user_priors)[0]
+        copy_self.string_tree = copy_self.grammars[0].sampler(1, user_priors=user_priors)[
+            0
+        ]
         _ = copy_self.value  # required for checking if graph is valid!
         return copy_self
 
@@ -285,29 +260,12 @@ def mutate(
             self.string_tree_to_id(child_string_tree)
         )
 
-    @override
-    def crossover(self, parent1: Self, parent2: Self | None = None) -> tuple[Self, Self]:
-        if parent2 is None:
-            parent2 = self
-        parent1_string_tree = parent1.string_tree
-        parent2_string_tree = parent2.string_tree
-        children = simple_crossover(
-            parent1_string_tree, parent2_string_tree, self.grammars[0]
-        )
-        if all(not c for c in children):
-            raise Exception("Cannot create crossover")
-
-        return tuple(
-            parent2.create_new_instance_from_id(self.string_tree_to_id(child))
-            for child in children
-        )
-
     @override
     def compute_prior(self, *, log: bool = True) -> float:
         return self.grammars[0].compute_prior(self.string_tree, log=log)
 
     @property
-    def id(self) -> str:
+    def id(self) -> str:  # noqa: D102
         if self._function_id is None or self._function_id == "":
             if self.string_tree == "":
                 raise ValueError("Cannot infer identifier!")
@@ -318,7 +276,7 @@ def id(self) -> str:
     def id(self, value: str) -> None:
         self._function_id = value
 
-    def create_from_id(self, identifier: str) -> None:
+    def create_from_id(self, identifier: str) -> None:  # noqa: D102
         self.reset()
         self._function_id = identifier
         self.id = identifier
@@ -326,723 +284,25 @@ def create_from_id(self, identifier: str) -> None:
         _ = self.value  # required for checking if graph is valid!
 
     @staticmethod
-    def id_to_string_tree(identifier: str) -> str:
+    def id_to_string_tree(identifier: str) -> str:  # noqa: D102
         return identifier
 
     @staticmethod
-    def string_tree_to_id(string_tree: str) -> str:
+    def string_tree_to_id(string_tree: str) -> str:  # noqa: D102
         return string_tree
 
-    @property
-    def search_space_size(self) -> int:
-        return self.grammars[0].compute_space_size
-
     @abstractmethod
-    def create_new_instance_from_id(self, identifier: str):
+    def create_new_instance_from_id(self, identifier: str):  # noqa: D102
         raise NotImplementedError
 
-    def reset(self) -> None:
+    def reset(self) -> None:  # noqa: D102
         self.clear_graph()
         self.string_tree = ""
-        self.nxTree = None
         self._value = None
         self._function_id = ""
 
-    def compose_functions(self, flatten_graph: bool = True) -> nx.DiGraph:
-        return self._compose_functions(self.id, self.grammars[0], flatten_graph)
-
-    def unparse_tree(self, identifier: str, as_composition: bool = True):
-        return self._unparse_tree(identifier, self.grammars[0], as_composition)
-
-    def get_dictionary(self) -> dict[str, str]:
-        return {"graph_grammar": self.id}
-
-    def create_nx_tree(self, string_tree: str) -> nx.DiGraph:
-        nxTree = self.from_stringTree_to_nxTree(string_tree, self.grammars[0])
-        return self.prune_tree(
-            nxTree, terminal_to_torch_map_keys=self.terminal_to_op_names.keys()
-        )
-
-
-class GraphGrammarCell(GraphGrammar):
-    hp_name = "graph_grammar_cell"
-
-    def __init__(
-        self,
-        grammar: Grammar,
-        terminal_to_op_names: dict,
-        terminal_to_graph_edges: dict = None,
-        edge_attr: bool = True,
-        edge_label: str = "op_name",
-        zero_op: list = ["Zero", "zero"],
-        identity_op: list = ["Identity", "id"],
-        name: str = None,
-        scope: str = None,
-        **kwargs,
-    ):
-        super().__init__(
-            grammar,
-            terminal_to_op_names,
-            terminal_to_graph_edges,
-            edge_attr=edge_attr,
-            edge_label=edge_label,
-            zero_op=zero_op,
-            identity_op=identity_op,
-            name=name,
-            scope=scope,
-            **kwargs,
-        )
-
-        self.cell = None
-
-    def reset(self) -> None:
-        super().reset()
-        self.cell = None
-
-    @abstractmethod
-    def create_graph_from_string(self, child: str):
-        raise NotImplementedError
-
-
-class GraphGrammarRepetitive(GraphParameter, CoreGraphGrammar):
-    hp_name = "graph_grammar_repetitive"
-
-    def __init__(
-        self,
-        grammars: list[Grammar],
-        terminal_to_op_names: dict,
-        terminal_to_sublanguage_map: dict,
-        number_of_repetitive_motifs: int,
-        terminal_to_graph_edges: dict = None,
-        edge_attr: bool = True,
-        edge_label: str = "op_name",
-        zero_op: list = ["Zero", "zero"],
-        identity_op: list = ["Identity", "id"],
-        name: str = None,
-        scope: str = None,
-    ):
-        CoreGraphGrammar.__init__(
-            self,
-            grammars=grammars,
-            terminal_to_op_names=terminal_to_op_names,
-            terminal_to_graph_edges=terminal_to_graph_edges,
-            edge_attr=edge_attr,
-            edge_label=edge_label,
-            zero_op=zero_op,
-            identity_op=identity_op,
-            name=name,
-            scope=scope,
-        )
-        GraphParameter.__init__(self, value=None, default=None, is_fidelity=False)
-
-        self.id: str = ""
-        self.string_tree: str = ""
-        self.string_tree_list: list[str] = []
-        self.nxTree: nx.DiGraph | None = None
-        self._value: nx.DiGraph | None = None
-
-        self.full_grammar = self.get_full_grammar(self.grammars)
-        self.terminal_to_sublanguage_map = terminal_to_sublanguage_map
-        self.number_of_repetitive_motifs = number_of_repetitive_motifs
-
-    @override
-    def mutate(
-        self,
-        parent: Self | None = None,
-        mutation_rate: float = 1.0,
-        mutation_strategy: str = "bananas",
-    ) -> Self:
-        raise NotImplementedError
-        if parent is None:
-            parent = self
-
-        # bananas mutate
-        if mutation_strategy == "bananas":
-            inner_mutation_strategy = partial(bananas_mutate, mutation_rate=mutation_rate)
-            child_string_tree_list, is_same = repetitive_search_space_mutation(
-                base_parent=parent.string_tree_list[0],
-                motif_parents=parent.string_tree_list[1:],
-                base_grammar=self.grammars[0],
-                motif_grammars=self.grammars[1:],
-                terminal_to_sublanguage_map=self.terminal_to_sublanguage_map,
-                inner_mutation_strategy=inner_mutation_strategy,
-            )
-        else:
-            child_string_tree_list, is_same = repetitive_search_space_mutation(
-                base_parent=parent.string_tree_list[0],
-                motif_parents=parent.string_tree_list[1:],
-                base_grammar=self.grammars[0],
-                motif_grammars=self.grammars[1:],
-                terminal_to_sublanguage_map=self.terminal_to_sublanguage_map,
-                inner_mutation_strategy=super().mutate,
-            )
-
-        if all(is_same):
-            raise ValueError("Parent is the same as child!")
-
-        return self.create_graph_from_string(child_string_tree_list)
-
-    @override
-    def crossover(
-        self,
-        parent1: Self,
-        parent2: Self | None = None,
-    ) -> tuple[Self, Self]:
-        raise NotImplementedError
-        if parent2 is None:
-            parent2 = self
-        children = repetitive_search_space_crossover(
-            base_parent=(parent1.string_tree_list[0], parent2.string_tree_list[0]),
-            motif_parents=(parent1.string_tree_list[1:], parent2.string_tree_list[1:]),
-            base_grammar=self.grammars[0],
-            motif_grammars=self.grammars[1:],
-            terminal_to_sublanguage_map=self.terminal_to_sublanguage_map,
-            inner_crossover_strategy=simple_crossover,
-        )
-        if all(not c for c in children):
-            raise Exception("Cannot create crossover")
-        return [parent2.create_graph_from_string(child) for child in children]
-
-    @override
-    def sample(self, *, user_priors: bool = False) -> Self:
-        copy_self = self.clone()
-        copy_self.reset()
-        copy_self.string_tree_list = [grammar.sampler(1)[0] for grammar in copy_self.grammars]
-        copy_self.string_tree = copy_self.assemble_trees(
-            copy_self.string_tree_list[0],
-            copy_self.string_tree_list[1:],
-            terminal_to_sublanguage_map=copy_self.terminal_to_sublanguage_map,
-        )
-        copy_self.id = "\n".join(copy_self.string_tree_list)
-        _ = copy_self.value  # required for checking if graph is valid!
-        return copy_self
-
-    @property
-    @override
-    def value(self) -> nx.DiGraph:
-        if self._value is None:
-            _val = self.from_stringTree_to_graph_repr(
-                self.string_tree,
-                self.full_grammar,
-                valid_terminals=self.terminal_to_op_names.keys(),
-                edge_attr=self.edge_attr,
-            )
-            assert isinstance(_val, nx.DiGraph)
-            self._value = _val
-        return self._value
-
-    @override
-    def compute_prior(self, *, log: bool = True) -> float:
-        prior_probs = [
-            g.compute_prior(st, log=log)
-            for g, st in zip(self.grammars, self.string_tree_list)
-        ]
-        if log:
-            return sum(prior_probs)
-        else:
-            return np.prod(prior_probs)
-
-    def __eq__(self, other: Any) -> bool:
-        if not isinstance(other, GraphGrammarRepetitive):
-            return NotImplemented
-
-        return self.id == other.id
-
-    def reset(self) -> None:
-        self.clear_graph()
-        self.string_tree_list = []
-        self.string_tree = ""
-        self.nxTree = None
-        self._value = None
-        self.id = ""
-
-    @staticmethod
-    def get_full_grammar(grammars):
-        full_grammar = deepcopy(grammars[0])
-        rules = full_grammar.productions()
-        nonterminals = full_grammar.nonterminals
-        terminals = full_grammar.terminals
-        for g in grammars[1:]:
-            rules.extend(g.productions())
-            nonterminals.extend(g.nonterminals)
-            terminals.extend(g.terminals)
-        return full_grammar
-
-    @abstractmethod
-    def create_graph_from_string(self, child: list[str]):
-        raise NotImplementedError
-
-    def get_dictionary(self) -> dict[str, str]:
-        return {"graph_grammar": "\n".join(self.string_tree_list)}
-
-    def create_nx_tree(self, string_tree: str) -> nx.DiGraph:
-        nxTree = self.from_stringTree_to_nxTree(string_tree, self.full_grammar)
-        return self.prune_tree(
-            nxTree, terminal_to_torch_map_keys=self.terminal_to_op_names.keys()
-        )
-
-    def create_from_id(self, identifier: str | list[str]) -> None:
-        self.reset()
-        self.string_tree_list = (
-            identifier.split("\n") if isinstance(identifier, str) else identifier
-        )
-        self.string_tree = self.assemble_trees(
-            self.string_tree_list[0],
-            self.string_tree_list[1:],
-            terminal_to_sublanguage_map=self.terminal_to_sublanguage_map,
-        )
-        self.id = "\n".join(self.string_tree_list)
-        _ = self.value  # required for checking if graph is valid!
-
-    @property
-    def search_space_size(self) -> int:
-        def recursive_worker(
-            nonterminal: Nonterminal, grammar, lower_level_motifs: int = 0
-        ) -> int:
-            primitive_nonterminal = "OPS"
-            if str(nonterminal) == primitive_nonterminal:
-                return (
-                    lower_level_motifs * self.number_of_repetitive_motifs
-                    + len(grammar.productions(lhs=Nonterminal(primitive_nonterminal)))
-                    - self.number_of_repetitive_motifs
-                )
-            potential_productions = grammar.productions(lhs=nonterminal)
-            _possibilites = 0
-            for potential_production in potential_productions:
-                edges_nonterminals = [
-                    rhs_sym
-                    for rhs_sym in potential_production.rhs()
-                    if str(rhs_sym) in grammar.nonterminals
-                ]
-                possibilities_per_edge = [
-                    recursive_worker(e_nonterminal, grammar, lower_level_motifs)
-                    for e_nonterminal in edges_nonterminals
-                ]
-                product = 1
-                for p in possibilities_per_edge:
-                    product *= p
-                _possibilites += product
-            return _possibilites
-
-        lower_level_motifs = recursive_worker(self.grammars[1].start(), self.grammars[1])
-        return recursive_worker(
-            self.grammars[0].start(),
-            self.grammars[0],
-            lower_level_motifs=lower_level_motifs,
-        )
-
-
-class GraphGrammarMultipleRepetitive(GraphParameter, CoreGraphGrammar):
-    hp_name = "graph_grammar_multiple_repetitive"
-
-    def __init__(
+    def compose_functions(  # noqa: D102
         self,
-        grammars: list[Grammar] | list[ConstrainedGrammar],
-        terminal_to_op_names: dict,
-        terminal_to_sublanguage_map: dict,
-        prior: list[dict] = None,
-        terminal_to_graph_edges: dict = None,
-        fixed_macro_grammar: bool = False,
-        edge_attr: bool = True,
-        edge_label: str = "op_name",
-        zero_op: list = ["Zero", "zero"],
-        identity_op: list = ["Identity", "id"],
-        name: str = None,
-        scope: str = None,
-        **kwargs,
-    ):
-        def _check_mapping(macro_grammar, motif_grammars, terminal_to_sublanguage_map):
-            for terminal, start_symbol in terminal_to_sublanguage_map.items():
-                if terminal not in macro_grammar.terminals:
-                    raise Exception(f"Terminal {terminal} not defined in macro grammar")
-                if not any(
-                    start_symbol == str(grammar.start()) for grammar in motif_grammars
-                ):
-                    raise Exception(
-                        f"Start symbol {start_symbol} not defined in motif grammar"
-                    )
-
-        def _identify_macro_grammar(grammar, terminal_to_sublanguage_map):
-            grammars = deepcopy(grammar)
-            motif_grammars = []
-            for start_symbol in terminal_to_sublanguage_map.values():
-                motif_grammars += [
-                    grammar
-                    for grammar in grammars
-                    if start_symbol == str(grammar.start())
-                ]
-                grammars = [
-                    grammar
-                    for grammar in grammars
-                    if start_symbol != str(grammar.start())
-                ]
-            if len(grammars) != 1:
-                raise Exception("Cannot identify macro grammar")
-            return grammars[0], motif_grammars
-
-        if prior is not None:
-            assert len(grammars) == len(
-                prior
-            ), "At least one of the grammars has no prior defined!"
-            for g, p in zip(grammars, prior):
-                g.prior = p
-        self.has_prior = prior is not None
-
-        self.macro_grammar, grammars = _identify_macro_grammar(
-            grammars, terminal_to_sublanguage_map
-        )
-        _check_mapping(self.macro_grammar, grammars, terminal_to_sublanguage_map)
-
-        self.fixed_macro_grammar = fixed_macro_grammar
-        if not self.fixed_macro_grammar:
-            grammars.insert(0, self.macro_grammar)
-
-        self.terminal_to_sublanguage_map = OrderedDict(terminal_to_sublanguage_map)
-        if any(
-            k in terminal_to_op_names for k in self.terminal_to_sublanguage_map.keys()
-        ):
-            raise Exception(
-                f"Terminals {[k for k in self.terminal_to_sublanguage_map.keys()]} already defined in primitives mapping and cannot be used for repetitive substitutions"
-            )
-        self.number_of_repetitive_motifs_per_grammar = [
-            sum(
-                map(
-                    (str(grammar.start())).__eq__,
-                    self.terminal_to_sublanguage_map.values(),
-                )
-            )
-            if str(grammar.start()) in self.terminal_to_sublanguage_map.values()
-            else 1
-            for grammar in grammars
-        ]
-
-        CoreGraphGrammar.__init__(
-            self,
-            grammars=grammars,
-            terminal_to_op_names={
-                **terminal_to_op_names,
-                **self.terminal_to_sublanguage_map,
-            },
-            terminal_to_graph_edges=terminal_to_graph_edges,
-            edge_attr=edge_attr,
-            edge_label=edge_label,
-            zero_op=zero_op,
-            identity_op=identity_op,
-            name=name,
-            scope=scope,
-            **kwargs,
-        )
-        GraphParameter.__init__(self, value=None, default=None, is_fidelity=False)
-
-        self._function_id: str = ""
-        self.string_tree: str = ""
-        self.string_tree_list: list[str] = []
-        self.nxTree: nx.DiGraph | None = None
-        self._value: nx.DiGraph | None = None
-
-        if self.fixed_macro_grammar:
-            self.fixed_macro_string_tree = self.macro_grammar.sampler(1)[0]
-
-        if self.fixed_macro_grammar:
-            self.full_grammar = self.get_full_grammar(
-                [self.macro_grammar] + self.grammars
-            )
-        else:
-            self.full_grammar = self.get_full_grammar(self.grammars)
-
-    @override
-    def sample(self, *, user_priors: bool = False) -> Self:
-        copy_self = self.clone()
-        copy_self.reset()
-        copy_self.string_tree_list = [
-            grammar.sampler(1, user_priors=user_priors)[0]
-            for grammar, number_of_motifs in zip(
-                copy_self.grammars, copy_self.number_of_repetitive_motifs_per_grammar
-            )
-            for _ in range(number_of_motifs)
-        ]
-        copy_self.string_tree = copy_self.assemble_string_tree(copy_self.string_tree_list)
-        _ = copy_self.value  # required for checking if graph is valid!
-        return copy_self
-
-    @property
-    @override
-    def value(self) -> nx.DiGraph:
-        if self._value is None:
-            if self.fixed_macro_grammar:
-                self._value = []
-                string_list_idx = 0
-                for grammar, number_of_motifs in zip(
-                    self.grammars, self.number_of_repetitive_motifs_per_grammar
-                ):
-                    for _ in range(number_of_motifs):
-                        self._value.append(
-                            self.from_stringTree_to_graph_repr(
-                                self.string_tree_list[string_list_idx],
-                                grammar,
-                                valid_terminals=self.terminal_to_op_names.keys(),
-                                edge_attr=self.edge_attr,
-                            )
-                        )
-                        string_list_idx += 1
-                self._value = self._value[0]  # TODO trick
-            else:
-                self._value = self.from_stringTree_to_graph_repr(
-                    self.string_tree,
-                    self.full_grammar,
-                    valid_terminals=self.terminal_to_op_names.keys(),
-                    edge_attr=self.edge_attr,
-                )
-                motif_trees = self.string_tree_list[1:]
-                repetitive_mapping = {
-                    replacement: motif
-                    for motif, replacement in zip(
-                        self.terminal_to_sublanguage_map.keys(), motif_trees
-                    )
-                }
-                for subgraph in self._value[1].values():
-                    old_node_attributes = nx.get_node_attributes(subgraph, "op_name")
-                    new_node_labels = {
-                        k: (repetitive_mapping[v] if v in motif_trees else v)
-                        for k, v in old_node_attributes.items()
-                    }
-                    nx.set_node_attributes(subgraph, new_node_labels, name="op_name")
-        return self._value
-
-    @override
-    def mutate(
-        self,
-        parent: Self | None = None,
-        mutation_rate: float = 1.0,
-        mutation_strategy: str = "bananas",
-    ) -> Self:
-        if parent is None:
-            parent = self
-
-        bananas_inner_mutation = partial(bananas_mutate, mutation_rate=mutation_rate)
-        child_string_tree_list, is_same = repetitive_search_space_mutation(
-            base_parent=self.fixed_macro_string_tree
-            if self.fixed_macro_grammar
-            else parent.string_tree_list[0],
-            motif_parents=parent.string_tree_list
-            if self.fixed_macro_grammar
-            else parent.string_tree_list[1:],
-            base_grammar=self.macro_grammar,
-            motif_grammars=self.grammars
-            if self.fixed_macro_grammar
-            else self.grammars[1:],
-            terminal_to_sublanguage_map=self.terminal_to_sublanguage_map,
-            number_of_repetitive_motifs_per_grammar=self.number_of_repetitive_motifs_per_grammar,
-            inner_mutation_strategy=bananas_inner_mutation
-            if mutation_strategy == "bananas"
-            else super().mutate,
-            fixed_macro_parent=self.fixed_macro_grammar,
-        )
-
-        if all(is_same):
-            raise ValueError("Parent is the same as child!")
-
-        if self.fixed_macro_grammar:
-            child_string_tree_list = child_string_tree_list[1:]
-
-        return self.create_new_instance_from_id(
-            self.string_tree_list_to_id(child_string_tree_list)
-        )
-
-    @override
-    def crossover(
-        self,
-        parent1: Self,
-        parent2: Self | None = None,
-    ) -> tuple[Self, Self]:
-        if parent2 is None:
-            parent2 = self
-        children = repetitive_search_space_crossover(
-            base_parent=(parent1.fixed_macro_string_tree, parent2.fixed_macro_string_tree)
-            if self.fixed_macro_grammar
-            else (parent1.string_tree_list[0], parent2.string_tree_list[0]),
-            motif_parents=(parent1.string_tree_list, parent2.string_tree_list)
-            if self.fixed_macro_grammar
-            else (parent1.string_tree_list[1:], parent2.string_tree_list[1:]),
-            base_grammar=self.macro_grammar,
-            motif_grammars=self.grammars
-            if self.fixed_macro_grammar
-            else self.grammars[1:],
-            terminal_to_sublanguage_map=self.terminal_to_sublanguage_map,
-            number_of_repetitive_motifs_per_grammar=self.number_of_repetitive_motifs_per_grammar,
-            inner_crossover_strategy=simple_crossover,
-            fixed_macro_parent=self.fixed_macro_grammar,
-            multiple_repetitive=True,
-        )
-        if all(not c for c in children):
-            raise Exception("Cannot create crossover")
-
-        return tuple(
-            parent2.create_new_instance_from_id(
-                self.string_tree_list_to_id(
-                    child[1:] if self.fixed_macro_grammar else child
-                )
-            )
-            for child in children
-        )
-
-    @override
-    def compute_prior(self, *, log: bool = True) -> float:
-        prior_probs = [
-            g.compute_prior(st, log=log)
-            for g, st in zip(self.grammars, self.string_tree_list)
-        ]
-        if log:
-            return sum(prior_probs)
-        else:
-            return np.prod(prior_probs)
-
-    @property
-    def id(self) -> str:
-        if self._function_id is None or self._function_id == "":
-            if len(self.string_tree_list) == 0:
-                raise ValueError("Cannot infer identifier")
-            self._function_id = self.string_tree_list_to_id(self.string_tree_list)
-        return self._function_id
-
-    @id.setter
-    def id(self, value: str) -> None:
-        self._function_id = value
-
-    @staticmethod
-    def id_to_string_tree_list(identifier: str) -> list[str]:
-        return identifier.split("\n")
-
-    def id_to_string_tree(self, identifier: str) -> str:
-        string_tree_list = self.id_to_string_tree_list(identifier)
-        return self.assemble_string_tree(string_tree_list)
-
-    @staticmethod
-    def string_tree_list_to_id(string_tree_list: list[str]) -> str:
-        return "\n".join(string_tree_list)
-
-    def string_tree_to_id(self, string_tree: str) -> str:
-        raise NotImplementedError
-
-    def assemble_string_tree(self, string_tree_list: list[str]) -> str:
-        if self.fixed_macro_grammar:
-            string_tree = self.assemble_trees(
-                self.fixed_macro_string_tree,
-                string_tree_list,
-                terminal_to_sublanguage_map=self.terminal_to_sublanguage_map,
-            )
-        else:
-            string_tree = self.assemble_trees(
-                string_tree_list[0],
-                string_tree_list[1:],
-                terminal_to_sublanguage_map=self.terminal_to_sublanguage_map,
-            )
-        return string_tree
-
-    def __eq__(self, other: Any) -> bool:
-        if not isinstance(other, GraphGrammarMultipleRepetitive):
-            return NotImplemented
-        return self.id == other.id
-
-    def reset(self) -> None:
-        self.clear_graph()
-        self.string_tree_list = []
-        self.string_tree = ""
-        self.nxTree = None
-        self._value = None
-        self._function_id = ""
-
-    def compose_functions(self, flatten_graph: bool = True):
-        return self._compose_functions(self.id, self.full_grammar, flatten_graph)
-
-    def unparse_tree(self, identifier: str, as_composition: bool = True):
-        return self._unparse_tree(identifier, self.full_grammar, as_composition)
-
-    @staticmethod
-    def get_full_grammar(grammars):
-        full_grammar = deepcopy(grammars[0])
-        rules = full_grammar.productions()
-        nonterminals = full_grammar.nonterminals
-        terminals = full_grammar.terminals
-        for g in grammars[1:]:
-            rules.extend(g.productions())
-            nonterminals.extend(g.nonterminals)
-            terminals.extend(g.terminals)
-        return full_grammar
-
-    @abstractmethod
-    def create_new_instance_from_id(self, child: str):
-        raise NotImplementedError
-
-    def get_dictionary(self) -> dict[str, str]:
-        return {"graph_grammar": self.id}
-
-    def create_nx_tree(self, string_tree: str) -> nx.DiGraph:
-        nxTree = self.from_stringTree_to_nxTree(string_tree, self.full_grammar)
-        return self.prune_tree(
-            nxTree, terminal_to_torch_map_keys=self.terminal_to_op_names.keys()
-        )
-
-    def create_from_id(self, identifier: str) -> None:
-        self.reset()
-        self.id = identifier
-        self.string_tree_list = self.id_to_string_tree_list(self.id)
-        self.string_tree = self.id_to_string_tree(self.id)
-        _ = self.value  # required for checking if graph is valid!
-
-    @property
-    def search_space_size(self) -> int:
-        def recursive_worker(
-            nonterminal: Nonterminal, grammar, lower_level_motifs: dict = None
-        ) -> int:
-            if lower_level_motifs is None:
-                lower_level_motifs = {}
-            potential_productions = grammar.productions(lhs=nonterminal)
-            _possibilites = 0
-            for potential_production in potential_productions:
-                edges_nonterminals = [
-                    rhs_sym
-                    for rhs_sym in potential_production.rhs()
-                    if str(rhs_sym) in grammar.nonterminals
-                ]
-                possibilities_per_edge = [
-                    recursive_worker(e_nonterminal, grammar, lower_level_motifs)
-                    for e_nonterminal in edges_nonterminals
-                ]
-                possibilities_per_edge += [
-                    lower_level_motifs[str(rhs_sym)]
-                    for rhs_sym in potential_production.rhs()
-                    if str(rhs_sym) in lower_level_motifs.keys()
-                ]
-                product = 1
-                for p in possibilities_per_edge:
-                    product *= p
-                _possibilites += product
-            return _possibilites
-
-        if self.fixed_macro_grammar:
-            if len(self.grammars) > 1:
-                raise Exception(
-                    "Compute space size for fixed macro only works for one repetitive level"
-                )
-            return np.prod(
-                [
-                    grammar.compute_space_size
-                    for grammar, n_grammar in zip(
-                        self.grammars, self.number_of_repetitive_motifs_per_grammar
-                    )
-                    for _ in range(n_grammar)
-                ]
-            )
-        else:
-            if len(self.grammars) > 2:
-                raise Exception(
-                    "Compute space size for no fixed macro only works for one repetitive level"
-                )
-            macro_space_size = self.grammars[0].compute_space_size
-            motif_space_size = self.grammars[1].compute_space_size
-            return (
-                macro_space_size
-                // self.number_of_repetitive_motifs_per_grammar[1]
-                * motif_space_size
-            )
+        flatten_graph: bool = True,  # noqa: FBT001, FBT002
+    ) -> nx.DiGraph:
+        return self._compose_functions(self.id, self.grammars[0], flatten_graph)
diff --git a/neps/search_spaces/architecture/mutations.py b/neps/search_spaces/architecture/mutations.py
index f07e3347e..21660afe1 100644
--- a/neps/search_spaces/architecture/mutations.py
+++ b/neps/search_spaces/architecture/mutations.py
@@ -1,10 +1,13 @@
+from __future__ import annotations
+
 import random
-from typing import Callable, List, Tuple
+from typing import TYPE_CHECKING
 
-from .cfg import Grammar
+if TYPE_CHECKING:
+    from .cfg import Grammar
 
 
-def simple_mutate(parent_string_tree: str, grammar: Grammar) -> Tuple[str, bool]:
+def simple_mutate(parent_string_tree: str, grammar: Grammar) -> tuple[str, bool]:  # noqa: D103
     # works if there is only one grammar
     # randomly choose a subtree from the parent and replace
     # with a new randomly generated subtree
@@ -19,17 +22,17 @@ def simple_mutate(parent_string_tree: str, grammar: Grammar) -> Tuple[str, bool]
     return child_string_tree, parent_string_tree == child_string_tree
 
 
-def bananas_mutate(
+def bananas_mutate(  # noqa: D103
     parent_string_tree: str,
     grammar: Grammar,
     mutation_rate: float = 1.0,
-    mutation_prob: float = None,
+    mutation_prob: float | None = None,
     patience: int = 50,
-) -> Tuple[str, bool]:
+) -> tuple[str, bool]:
     split_tree = parent_string_tree.split(" ")
     swappable_indices = [
         i
-        for i in range(0, len(split_tree))
+        for i in range(len(split_tree))
         if split_tree[i][1:] in grammar.swappable_nonterminals
     ]
     _mutation_prob = (
@@ -40,7 +43,7 @@ def bananas_mutate(
     idx = 0
     while idx < len(swappable_indices):
         swap_idx = swappable_indices[idx]
-        if random.random() < _mutation_prob:
+        if random.random() < _mutation_prob:  # noqa: S311
             subtree_node = split_tree[swap_idx][1:]
             subtree_idx = swap_idx
             child_string_tree = grammar.mutate(
@@ -54,7 +57,7 @@ def bananas_mutate(
             split_tree = child_string_tree.split(" ")
             swappable_indices = [
                 i
-                for i in range(0, len(split_tree))
+                for i in range(len(split_tree))
                 if split_tree[i][1:] in grammar.swappable_nonterminals
             ]
             _mutation_prob = (
@@ -65,73 +68,3 @@ def bananas_mutate(
         idx += 1
 
     return child_string_tree, child_string_tree == parent_string_tree
-
-
-def repetitive_search_space_mutation(
-    base_parent: str,
-    motif_parents: List[str],
-    base_grammar: Grammar,
-    motif_grammars: List[Grammar],
-    terminal_to_sublanguage_map: dict,
-    number_of_repetitive_motifs_per_grammar: list,
-    inner_mutation_strategy: Callable,
-    mutation_rate: float = 1.0,
-    mutation_prob: float = None,
-    fixed_macro_parent: bool = False,
-) -> Tuple[List[str], List[bool]]:
-    def _motifs_in_base_tree(base_parent, terminal_to_sublanguage_map):
-        return [
-            i
-            for i, k in enumerate(terminal_to_sublanguage_map.keys())
-            if k in base_parent
-        ]
-
-    indices = _motifs_in_base_tree(base_parent, terminal_to_sublanguage_map)
-    if fixed_macro_parent:
-        mutation_prob = (
-            mutation_rate / len(indices) if mutation_prob is None else mutation_prob
-        )
-    else:
-        mutation_prob = (
-            mutation_rate / (len(indices) + 1) if mutation_prob is None else mutation_prob
-        )
-
-    child_string_trees = []
-    if not fixed_macro_parent and random.random() < mutation_prob:
-        child_string_trees.append(inner_mutation_strategy(base_parent, base_grammar))
-        indices = _motifs_in_base_tree(base_parent, terminal_to_sublanguage_map)
-        mutation_prob = (
-            mutation_rate / (len(indices) + 1) if mutation_prob is None else mutation_prob
-        )
-    else:
-        child_string_trees.append((base_parent, True))
-
-    parent_string_idx = 0
-    _number_of_repetitive_motifs_per_grammar = (
-        number_of_repetitive_motifs_per_grammar[1:]
-        if not fixed_macro_parent
-        else number_of_repetitive_motifs_per_grammar
-    )
-    for grammar, number_of_motifs in zip(
-        motif_grammars, _number_of_repetitive_motifs_per_grammar
-    ):
-        for _ in range(number_of_motifs):
-            if parent_string_idx in indices and random.random() < mutation_prob:
-                child_string_trees.append(
-                    inner_mutation_strategy(motif_parents[parent_string_idx], grammar)
-                )
-            else:
-                child_string_trees.append((motif_parents[parent_string_idx], True))
-            parent_string_idx += 1
-    # child_string_trees.extend(
-    #     [
-    #         inner_mutation_strategy(parent_string_tree, grammar)
-    #         if i in indices and random.random() < mutation_prob
-    #         else (parent_string_tree, True)
-    #         for i, (parent_string_tree, grammar) in enumerate(
-    #             zip(motif_parents, motif_grammars)
-    #         )
-    #     ]
-    # )
-
-    return [c[0] for c in child_string_trees], [c[1] for c in child_string_trees]
diff --git a/neps/search_spaces/architecture/primitives.py b/neps/search_spaces/architecture/primitives.py
index eebb828d8..18c11d283 100644
--- a/neps/search_spaces/architecture/primitives.py
+++ b/neps/search_spaces/architecture/primitives.py
@@ -1,3 +1,5 @@
+from __future__ import annotations  # noqa: D100
+
 from abc import ABCMeta, abstractmethod
 
 import torch
@@ -5,34 +7,21 @@
 
 
 class _AbstractPrimitive(nn.Module, metaclass=ABCMeta):
-    """
-    Use this class when creating new operations for edges.
+    """Use this class when creating new operations for edges.
 
     This is required because we are agnostic to operations
     at the edges. As a consequence, they can contain subgraphs
     which requires naslib to detect and properly process them.
     """
 
-    def __init__(self, kwargs):
-        super().__init__()
-
-        self.init_params = {
-            k: v
-            for k, v in kwargs.items()
-            if k != "self" and not k.startswith("_") and k != "kwargs"
-        }
-
     @abstractmethod
     def forward(self, x):
-        """
-        The forward processing of the operation.
-        """
+        """The forward processing of the operation."""
         raise NotImplementedError
 
     @abstractmethod
     def get_embedded_ops(self):
-        """
-        Return any embedded ops so that they can be
+        """Return any embedded ops so that they can be
         analysed whether they contain a child graph, e.g.
         a 'motif' in the hierachical search space.
 
@@ -46,86 +35,88 @@ def get_op_name(self):
         return type(self).__name__
 
 
-class AbstractPrimitive(_AbstractPrimitive):
-    def forward(self, x):
+class AbstractPrimitive(_AbstractPrimitive):  # noqa: D101
+    def forward(self, x):  # noqa: D102
         raise NotImplementedError
 
-    def get_embedded_ops(self):
+    def get_embedded_ops(self):  # noqa: D102
         return None
 
 
 class Identity(AbstractPrimitive):
-    """
-    An implementation of the Identity operation.
-    """
+    """An implementation of the Identity operation."""
 
-    def __init__(self, **kwargs):
+    def __init__(self, **kwargs):  # noqa: D107
         super().__init__(locals())
 
-    def forward(self, x):
+    def forward(self, x: object) -> object:  # noqa: D102
         return x
 
 
 class Zero(AbstractPrimitive):
-    """
-    Implementation of the zero operation. It removes
+    """Implementation of the zero operation. It removes
     the connection by multiplying its input with zero.
     """
 
     def __init__(self, stride, **kwargs):
-        """
-        When setting stride > 1 then it is assumed that the
+        """When setting stride > 1 then it is assumed that the
         channels must be doubled.
         """
         super().__init__(locals())
         self.stride = int(stride)
 
-    def forward(self, x):
+    def forward(self, x):  # noqa: D102
         if self.stride == 1:
             return x.mul(0.0)
-        else:
-            return x[:, :, :: self.stride, :: self.stride].mul(0.0)
+
+        return x[:, :, :: self.stride, :: self.stride].mul(0.0)
 
     def __repr__(self):
         return f"<Zero (stride={self.stride})>"
 
 
 class Zero1x1(AbstractPrimitive):
-    """
-    Implementation of the zero operation. It removes
+    """Implementation of the zero operation. It removes
     the connection by multiplying its input with zero.
     """
 
     def __init__(self, stride, **kwargs):
-        """
-        When setting stride > 1 then it is assumed that the
+        """When setting stride > 1 then it is assumed that the
         channels must be doubled.
         """
         super().__init__(locals())
         self.stride = int(stride)
 
-    def forward(self, x):
+    def forward(self, x):  # noqa: D102
         if self.stride == 1:
             return x.mul(0.0)
-        else:
-            x = x[:, :, :: self.stride, :: self.stride].mul(0.0)
-            return torch.cat([x, x], dim=1)  # double the channels TODO: ugly as hell
+
+        x = x[:, :, :: self.stride, :: self.stride].mul(0.0)
+        return torch.cat([x, x], dim=1)  # double the channels TODO: ugly as hell
 
     def __repr__(self):
         return f"<Zero1x1 (stride={self.stride})>"
 
 
 class SepConv(AbstractPrimitive):
-    """
-    Implementation of Separable convolution operation as
+    """Implementation of Separable convolution operation as
     in the DARTS paper, i.e. 2 sepconv directly after another.
     """
 
-    def __init__(self, C_in, C_out, kernel_size, stride, padding, affine=True, **kwargs):
+    def __init__(  # noqa: D107
+        self,
+        c_in: int,
+        c_out: int,
+        kernel_size: int,
+        stride: int,
+        padding: int,
+        affine: bool = True,  # noqa: FBT001, FBT002
+        **kwargs,
+    ):
         super().__init__(locals())
 
-        C_in = int(C_in)
-        C_out = int(C_out)
+        c_in = int(c_in)
+        c_out = int(c_out)
         kernel_size = int(kernel_size)
         stride = int(stride)
         padding = int(padding)
@@ -135,53 +126,60 @@ def __init__(self, C_in, C_out, kernel_size, stride, padding, affine=True, **kwa
         self.op = nn.Sequential(
             nn.ReLU(inplace=False),
             nn.Conv2d(
-                C_in,
-                C_in,
+                c_in,
+                c_in,
                 kernel_size=kernel_size,
                 stride=stride,
                 padding=padding,
-                groups=C_in,
+                groups=c_in,
                 bias=False,
             ),
-            nn.Conv2d(C_in, C_in, kernel_size=1, padding=0, bias=False),
-            nn.BatchNorm2d(C_in, affine=affine),
+            nn.Conv2d(c_in, c_in, kernel_size=1, padding=0, bias=False),
+            nn.BatchNorm2d(c_in, affine=affine),
             nn.ReLU(inplace=False),
             nn.Conv2d(
-                C_in,
-                C_in,
+                c_in,
+                c_in,
                 kernel_size=kernel_size,
                 stride=1,
                 padding=padding,
-                groups=C_in,
+                groups=c_in,
                 bias=False,
             ),
-            nn.Conv2d(C_in, C_out, kernel_size=1, padding=0, bias=False),
-            nn.BatchNorm2d(C_out, affine=affine),
+            nn.Conv2d(c_in, c_out, kernel_size=1, padding=0, bias=False),
+            nn.BatchNorm2d(c_out, affine=affine),
         )
 
-    def forward(self, x):
+    def forward(self, x):  # noqa: D102
         return self.op(x)
 
     @property
-    def get_op_name(self):
+    def get_op_name(self):  # noqa: D102
         op_name = super().get_op_name
         op_name += f"{self.kernel_size}x{self.kernel_size}"
         return op_name
 
 
 class DilConv(AbstractPrimitive):
-    """
-    Implementation of a dilated separable convolution as
+    """Implementation of a dilated separable convolution as
     used in the DARTS paper.
     """
 
-    def __init__(
-        self, C_in, C_out, kernel_size, stride, padding, dilation, affine=True, **kwargs
+    def __init__(  # noqa: D107
+        self,
+        c_in: int,
+        c_out: int,
+        kernel_size: int,
+        stride: int,
+        padding: int,
+        dilation: int,
+        affine: bool = True,  # noqa: FBT001, FBT002
+        **kwargs,
     ):
         super().__init__(locals())
 
-        C_in = int(C_in)
-        C_out = int(C_out)
+        c_in = int(c_in)
+        c_out = int(c_out)
         kernel_size = int(kernel_size)
         stride = int(stride)
         padding = int(padding)
@@ -192,68 +190,66 @@ def __init__(
         self.op = nn.Sequential(
             nn.ReLU(inplace=False),
             nn.Conv2d(
-                C_in,
-                C_in,
+                c_in,
+                c_in,
                 kernel_size=kernel_size,
                 stride=stride,
                 padding=padding,
                 dilation=dilation,
-                groups=C_in,
+                groups=c_in,
                 bias=False,
             ),
-            nn.Conv2d(C_in, C_out, kernel_size=1, padding=0, bias=False),
-            nn.BatchNorm2d(C_out, affine=affine),
+            nn.Conv2d(c_in, c_out, kernel_size=1, padding=0, bias=False),
+            nn.BatchNorm2d(c_out, affine=affine),
         )
 
-    def forward(self, x):
+    def forward(self, x):  # noqa: D102
         return self.op(x)
 
     @property
-    def get_op_name(self):
+    def get_op_name(self):  # noqa: D102
         op_name = super().get_op_name
         op_name += f"{self.kernel_size}x{self.kernel_size}"
         return op_name
 
 
 class Stem(AbstractPrimitive):
-    """
-    This is used as an initial layer directly after the
+    """This is used as an initial layer directly after the
     image input.
     """
 
-    def __init__(self, C_out, C_in=3, **kwargs):
+    def __init__(self, c_out: int, c_in: int = 3, **kwargs):  # noqa: D107
         super().__init__(locals())
 
-        C_out = int(C_out)
+        c_out = int(c_out)
 
         self.seq = nn.Sequential(
-            nn.Conv2d(C_in, C_out, 3, padding=1, bias=False), nn.BatchNorm2d(C_out)
+            nn.Conv2d(c_in, c_out, 3, padding=1, bias=False), nn.BatchNorm2d(c_out)
         )
 
-    def forward(self, x):
+    def forward(self, x):  # noqa: D102
         return self.seq(x)
 
 
 class Sequential(AbstractPrimitive):
-    """
-    Implementation of `torch.nn.Sequential` to be used
+    """Implementation of `torch.nn.Sequential` to be used
     as op on edges.
     """
 
-    def __init__(self, *args, **kwargs):
+    def __init__(self, *args, **kwargs):  # noqa: D107
         super().__init__(locals())
         self.primitives = args
         self.op = nn.Sequential(*args)
 
-    def forward(self, x):
+    def forward(self, x):  # noqa: D102
         return self.op(x)
 
-    def get_embedded_ops(self):
+    def get_embedded_ops(self):  # noqa: D102
         return list(self.primitives)
 
 
-class MaxPool(AbstractPrimitive):
-    def __init__(self, kernel_size, stride, **kwargs):
+class MaxPool(AbstractPrimitive):  # noqa: D101
+    def __init__(self, kernel_size: int, stride: int, **kwargs):  # noqa: D107
         super().__init__(locals())
 
         kernel_size = int(kernel_size)
@@ -261,35 +257,42 @@ def __init__(self, kernel_size, stride, **kwargs):
 
         self.maxpool = nn.MaxPool2d(kernel_size, stride=stride, padding=1)
 
-    def forward(self, x):
-        x = self.maxpool(x)
-        return x
+    def forward(self, x):  # noqa: D102
+        return self.maxpool(x)
 
 
 class MaxPool1x1(AbstractPrimitive):
-    """
-    Implementation of MaxPool with an optional 1x1 convolution
+    """Implementation of MaxPool with an optional 1x1 convolution
     in case stride > 1. The 1x1 convolution is required to increase
     the number of channels.
     """
 
-    def __init__(self, kernel_size, stride, C_in, C_out, affine=True, **kwargs):
+    def __init__(  # noqa: D107
+        self,
+        kernel_size: int,
+        stride: int,
+        c_in: int,
+        c_out: int,
+        affine: bool = True,  # noqa: FBT001, FBT002
+        **kwargs,
+    ):
         super().__init__(locals())
 
         kernel_size = int(kernel_size)
         stride = int(stride)
-        C_in = int(C_in)
-        C_out = int(C_out)
+        c_in = int(c_in)
+        c_out = int(c_out)
         affine = bool(affine)
 
         self.stride = stride
         self.maxpool = nn.MaxPool2d(kernel_size, stride=stride, padding=1)
         if stride > 1:
-            assert C_in is not None and C_out is not None
-            self.conv = nn.Conv2d(C_in, C_out, 1, stride=1, padding=0, bias=False)
-            self.bn = nn.BatchNorm2d(C_out, affine=affine)
+            assert c_in is not None
+            assert c_out is not None
+            self.conv = nn.Conv2d(c_in, c_out, 1, stride=1, padding=0, bias=False)
+            self.bn = nn.BatchNorm2d(c_out, affine=affine)
 
-    def forward(self, x):
+    def forward(self, x):  # noqa: D102
         x = self.maxpool(x)
         if self.stride > 1:
             x = self.conv(x)
@@ -298,34 +301,32 @@ def forward(self, x):
 
 
 class AvgPool(AbstractPrimitive):
-    """
-    Implementation of Avergae Pooling.
-    """
+    """Implementation of Avergae Pooling."""
 
-    def __init__(self, kernel_size, stride, **kwargs):
+    def __init__(self, kernel_size: int, stride: int, **kwargs):  # noqa: D107
         stride = int(stride)
         super().__init__(locals())
-        self.avgpool = nn.AvgPool2d(3, stride=stride, padding=1, count_include_pad=False)
+        self.avgpool = nn.AvgPool2d(
+            kernel_size=3, stride=stride, padding=1, count_include_pad=False
+        )
 
-    def forward(self, x):
-        x = self.avgpool(x)
-        return x
+    def forward(self, x):  # noqa: D102
+        return self.avgpool(x)
 
 
 class AvgPool1x1(AbstractPrimitive):
-    """
-    Implementation of Avergae Pooling with an optional
+    """Implementation of Avergae Pooling with an optional
     1x1 convolution afterwards. The convolution is required
     to increase the number of channels if stride > 1.
     """
 
-    def __init__(
+    def __init__(  # noqa: D107
         self,
-        kernel_size,
-        stride,
-        C_in,
-        C_out,
-        affine=True,
+        kernel_size: int,
+        stride: int,
+        c_in: int,
+        c_out: int,
+        affine: bool = True,  # noqa: FBT001, FBT002
         **kwargs,
     ):
         super().__init__(locals())
@@ -333,11 +334,12 @@ def __init__(
         self.stride = int(stride)
         self.avgpool = nn.AvgPool2d(3, stride=stride, padding=1, count_include_pad=False)
         if stride > 1:
-            assert C_in is not None and C_out is not None
-            self.conv = nn.Conv2d(C_in, C_out, 1, stride=1, padding=0, bias=False)
-            self.bn = nn.BatchNorm2d(C_out, affine=affine)
+            assert c_in is not None
+            assert c_out is not None
+            self.conv = nn.Conv2d(c_in, c_out, 1, stride=1, padding=0, bias=False)
+            self.bn = nn.BatchNorm2d(c_out, affine=affine)
 
-    def forward(self, x):
+    def forward(self, x):  # noqa: D102
         x = self.avgpool(x)
         if self.stride > 1:
             x = self.conv(x)
@@ -345,8 +347,16 @@ def forward(self, x):
         return x
 
 
-class ReLUConvBN(AbstractPrimitive):
-    def __init__(self, C_in, C_out, kernel_size, stride=1, affine=True, **kwargs):
+class ReLUConvBN(AbstractPrimitive):  # noqa: D101
+    def __init__(  # noqa: D107
+        self,
+        c_in: int,
+        c_out: int,
+        kernel_size: int,
+        stride: int = 1,
+        affine: bool = True,  # noqa: FBT001, FBT002
+        **kwargs,
+    ):
         super().__init__(locals())
         kernel_size = int(kernel_size)
         stride = int(stride)
@@ -355,113 +365,134 @@ def __init__(self, C_in, C_out, kernel_size, stride=1, affine=True, **kwargs):
         pad = 0 if int(stride) == 1 and kernel_size == 1 else 1
         self.op = nn.Sequential(
             nn.ReLU(inplace=False),
-            nn.Conv2d(C_in, C_out, kernel_size, stride=stride, padding=pad, bias=False),
-            nn.BatchNorm2d(C_out, affine=affine),
+            nn.Conv2d(c_in, c_out, kernel_size, stride=stride, padding=pad, bias=False),
+            nn.BatchNorm2d(c_out, affine=affine),
         )
 
-    def forward(self, x):
+    def forward(self, x):  # noqa: D102
         return self.op(x)
 
     @property
-    def get_op_name(self):
+    def get_op_name(self):  # noqa: D102
         op_name = super().get_op_name
         op_name += f"{self.kernel_size}x{self.kernel_size}"
         return op_name
 
 
 class ConvBnReLU(AbstractPrimitive):
-    """
-    Implementation of 2d convolution, followed by 2d batch normalization and ReLU activation.
+    """Implementation of 2d convolution, followed by 2d batch normalization and
+    ReLU activation.
     """
 
-    def __init__(self, C_in, C_out, kernel_size, stride=1, affine=True, **kwargs):
+    def __init__(  # noqa: D107
+        self,
+        c_in: int,
+        c_out: int,
+        kernel_size: int,
+        stride: int = 1,
+        affine: bool = True,  # noqa: FBT001, FBT002
+        **kwargs,
+    ):
         super().__init__(locals())
         self.kernel_size = kernel_size
         pad = 0 if stride == 1 and kernel_size == 1 else 1
         self.op = nn.Sequential(
-            nn.Conv2d(C_in, C_out, kernel_size, stride=stride, padding=pad, bias=False),
-            nn.BatchNorm2d(C_out, affine=affine),
+            nn.Conv2d(c_in, c_out, kernel_size, stride=stride, padding=pad, bias=False),
+            nn.BatchNorm2d(c_out, affine=affine),
             nn.ReLU(inplace=False),
         )
 
-    def forward(self, x):
+    def forward(self, x):  # noqa: D102
         return self.op(x)
 
     @property
-    def get_op_name(self):
+    def get_op_name(self):  # noqa: D102
         op_name = super().get_op_name
         op_name += f"{self.kernel_size}x{self.kernel_size}"
         return op_name
 
 
 class ConvBn(AbstractPrimitive):
-    """
-    Implementation of 2d convolution, followed by 2d batch normalization and ReLU activation.
+    """Implementation of 2d convolution, followed by 2d batch normalization and ReLU
+    activation.
     """
 
-    def __init__(self, C_in, C_out, kernel_size, stride=1, affine=True, **kwargs):
+    def __init__(  # noqa: D107
+        self,
+        c_in: int,
+        c_out: int,
+        kernel_size: int,
+        stride=1,
+        affine: bool = True,  # noqa: FBT001, FBT002
+        **kwargs,
+    ):
         super().__init__(locals())
         self.kernel_size = kernel_size
         pad = 0 if stride == 1 and kernel_size == 1 else 1
         self.op = nn.Sequential(
-            nn.Conv2d(C_in, C_out, kernel_size, stride=stride, padding=pad, bias=False),
-            nn.BatchNorm2d(C_out, affine=affine),
+            nn.Conv2d(c_in, c_out, kernel_size, stride=stride, padding=pad, bias=False),
+            nn.BatchNorm2d(c_out, affine=affine),
         )
 
-    def forward(self, x):
+    def forward(self, x):  # noqa: D102
         return self.op(x)
 
     @property
-    def get_op_name(self):
+    def get_op_name(self):  # noqa: D102
         op_name = super().get_op_name
         op_name += f"{self.kernel_size}x{self.kernel_size}"
         return op_name
 
 
 class Concat1x1(AbstractPrimitive):
-    """
-    Implementation of the channel-wise concatination followed by a 1x1 convolution
+    """Implementation of the channel-wise concatination followed by a 1x1 convolution
     to retain the channel dimension.
     """
 
-    def __init__(
-        self, num_in_edges, C_out, affine=True, **kwargs
+    def __init__(  # noqa: D107
+        self,
+        num_in_edges: int,
+        c_out: int,
+        affine: bool = True,  # noqa: FBT001, FBT002
+        **kwargs,
     ):
         super().__init__(locals())
         self.conv = nn.Conv2d(
-            num_in_edges * C_out, C_out, kernel_size=1, stride=1, padding=0, bias=False
+            num_in_edges * c_out, c_out, kernel_size=1, stride=1, padding=0, bias=False
         )
-        self.bn = nn.BatchNorm2d(C_out, affine=affine)
+        self.bn = nn.BatchNorm2d(c_out, affine=affine)
 
     def forward(self, x):
-        """
-        Expecting a list of input tensors. Stacking them channel-wise
-        and applying 1x1 conv
+        """Expecting a list of input tensors. Stacking them channel-wise
+        and applying 1x1 conv.
         """
         x = torch.cat(x, dim=1)
         x = self.conv(x)
-        x = self.bn(x)
-        return x
+        return self.bn(x)
 
 
-class ResNetBasicblock(AbstractPrimitive):
-    def __init__(
-        self, C_in, C_out, stride, affine=True, **kwargs
+class ResNetBasicblock(AbstractPrimitive):  # noqa: D101
+    def __init__(  # noqa: D107
+        self,
+        c_in: int,
+        c_out: int,
+        stride: int,
+        affine: bool = True,  # noqa: FBT001, FBT002
+        **kwargs,
     ):
         super().__init__(locals())
-        assert stride == 1 or stride == 2, f"invalid stride {stride}"
-        self.conv_a = ReLUConvBN(C_in, C_out, 3, stride)
-        self.conv_b = ReLUConvBN(C_out, C_out, 3)
+        assert stride in (1, 2), f"invalid stride {stride}"
+        self.conv_a = ReLUConvBN(c_in, c_out, 3, stride)
+        self.conv_b = ReLUConvBN(c_out, c_out, 3)
         if stride == 2:
             self.downsample = nn.Sequential(
-                # nn.AvgPool2d(kernel_size=2, stride=2, padding=0),
-                nn.Conv2d(C_in, C_out, kernel_size=1, stride=2, padding=0, bias=False),
-                nn.BatchNorm2d(C_out),
+                nn.Conv2d(c_in, c_out, kernel_size=1, stride=2, padding=0, bias=False),
+                nn.BatchNorm2d(c_out),
             )
         else:
             self.downsample = None
 
-    def forward(self, x):
+    def forward(self, x):  # noqa: D102
         basicblock = self.conv_a(x)
         basicblock = self.conv_b(basicblock)
         residual = self.downsample(x) if self.downsample is not None else x
diff --git a/neps/search_spaces/architecture/topologies.py b/neps/search_spaces/architecture/topologies.py
index b45db8321..431b3d3c7 100644
--- a/neps/search_spaces/architecture/topologies.py
+++ b/neps/search_spaces/architecture/topologies.py
@@ -1,27 +1,31 @@
+from __future__ import annotations  # noqa: D100
+
 import inspect
 import queue
-from abc import ABCMeta, abstractmethod
+from abc import ABCMeta
 from functools import partial
 from typing import Callable
 
 from .graph import Graph
 
 
-class AbstractTopology(Graph, metaclass=ABCMeta):
-    edge_list: list = []
+class AbstractTopology(Graph, metaclass=ABCMeta):  # noqa: D101
+    edge_list: list = []  # noqa: RUF012
 
-    def __init__(self, name: str = None, scope: str = None, merge_fn: Callable = sum):
+    def __init__(  # noqa: D107
+        self, name: str | None = None, scope: str | None = None, merge_fn: Callable = sum
+    ):
         super().__init__(name=name, scope=scope)
 
         self.merge_fn = merge_fn
 
-    def mutate(self):
+    def mutate(self):  # noqa: D102
         pass
 
-    def sample(self):
+    def sample(self):  # noqa: D102
         pass
 
-    def create_graph(self, vals: dict):
+    def create_graph(self, vals: dict):  # noqa: C901, D102
         def get_args_and_defaults(func):
             signature = inspect.signature(func)
             return list(signature.parameters.keys()), {
@@ -36,18 +40,18 @@ def get_op_name_from_dict(val: dict):
             args: dict = {}
             arg_names, default_args = get_args_and_defaults(op)
             for arg_name in arg_names:
-                if arg_name == "self" or arg_name == "kwargs" or arg_name in args.keys():
+                if arg_name in ("self", "kwargs") or arg_name in args:
                     continue
-                if arg_name in val.keys():
+                if arg_name in val:
                     args[arg_name] = val[arg_name]
-                elif arg_name in default_args.keys():
+                elif arg_name in default_args:
                     args[arg_name] = default_args[arg_name]
                 else:
                     args[arg_name] = 42
 
             if "groups" in args and args["groups"] != 1:
-                args["C_in"] = args["groups"]
-                args["C_out"] = args["groups"]
+                args["c_in"] = args["groups"]
+                args["c_out"] = args["groups"]
 
             return op(**args).get_op_name
 
@@ -57,24 +61,23 @@ def get_op_name_from_dict(val: dict):
             if isinstance(val, dict):
                 _val = val
                 _val["op_name"] = get_op_name_from_dict(val)
+            elif isinstance(val, int):  # for synthetic benchmarks
+                _val = {"op": val, "op_name": val}
+            elif hasattr(val, "get_op_name"):
+                _val = {"op": val, "op_name": val.get_op_name}
+            elif callable(val):
+                _val = {"op": val, "op_name": val.__name__}
             else:
-                if isinstance(val, int):  # for synthetic benchmarks
-                    _val = {"op": val, "op_name": val}
-                elif hasattr(val, "get_op_name"):
-                    _val = {"op": val, "op_name": val.get_op_name}
-                elif callable(val):
-                    _val = {"op": val, "op_name": val.__name__}
-                else:
-                    raise Exception(f"Cannot extract op name from {val}")
+                raise Exception(f"Cannot extract op name from {val}")
 
             self.edges[u, v].update(_val)
 
     @property
-    def get_op_name(self):
+    def get_op_name(self):  # noqa: D102
         return type(self).__name__
 
-    def __call__(self, x):
-        cur_node_idx = [node for node in self.nodes if self.in_degree(node) == 0][0]
+    def __call__(self, x):  # noqa: D102
+        cur_node_idx = next(node for node in self.nodes if self.in_degree(node) == 0)
         predecessor_inputs = {cur_node_idx: [x]}
         next_successors = queue.Queue()
         next_successors.put(cur_node_idx)
@@ -103,18 +106,8 @@ def __call__(self, x):
         return inputs
 
 
-class AbstractVariableTopology(AbstractTopology):
-    def __init__(self, name: str = None, scope: str = None, **kwargs):
-        super().__init__(name, scope, **kwargs)
-
-    @staticmethod
-    @abstractmethod
-    def get_edge_list(**kwargs):
-        raise NotImplementedError
-
-
 class _SequentialNEdge(AbstractTopology):
-    edge_list: list = []
+    edge_list: list = []  # noqa: RUF012
 
     def __init__(self, *edge_vals, number_of_edges: int, **kwargs):
         super().__init__(**kwargs)
@@ -132,18 +125,18 @@ def get_edge_list(number_of_edges: int):
 LinearNEdge = _SequentialNEdge
 
 
-def get_sequential_n_edge(number_of_edges: int):
+def get_sequential_n_edge(number_of_edges: int):  # noqa: D103
     return partial(_SequentialNEdge, number_of_edges=number_of_edges)
 
 
-class Residual(AbstractTopology):
-    edge_list = [
+class Residual(AbstractTopology):  # noqa: D101
+    edge_list = [  # noqa: RUF012
         (1, 2),
         (1, 3),
         (2, 3),
     ]
 
-    def __init__(self, *edge_vals, **kwargs):
+    def __init__(self, *edge_vals, **kwargs):  # noqa: D107
         super().__init__(**kwargs)
 
         self.name = "residual"
@@ -151,10 +144,10 @@ def __init__(self, *edge_vals, **kwargs):
         self.set_scope(self.name)
 
 
-class Diamond(AbstractTopology):
-    edge_list = [(1, 2), (1, 3), (2, 4), (3, 4)]
+class Diamond(AbstractTopology):  # noqa: D101
+    edge_list = [(1, 2), (1, 3), (2, 4), (3, 4)]  # noqa: RUF012
 
-    def __init__(self, *edge_vals, **kwargs):
+    def __init__(self, *edge_vals, **kwargs):  # noqa: D107
         super().__init__(**kwargs)
 
         self.name = "diamond"
@@ -162,10 +155,10 @@ def __init__(self, *edge_vals, **kwargs):
         self.set_scope(self.name)
 
 
-class DiamondMid(AbstractTopology):
-    edge_list = [(1, 2), (1, 3), (2, 3), (2, 4), (3, 4)]
+class DiamondMid(AbstractTopology):  # noqa: D101
+    edge_list = [(1, 2), (1, 3), (2, 3), (2, 4), (3, 4)]  # noqa: RUF012
 
-    def __init__(self, *edge_vals, **kwargs):
+    def __init__(self, *edge_vals, **kwargs):  # noqa: D107
         super().__init__(**kwargs)
 
         self.name = "diamond_mid"
@@ -174,7 +167,7 @@ def __init__(self, *edge_vals, **kwargs):
 
 
 class _DenseNNodeDAG(AbstractTopology):
-    edge_list: list = []
+    edge_list: list = []  # noqa: RUF012
 
     def __init__(self, *edge_vals, number_of_nodes: int, **kwargs):
         super().__init__(**kwargs)
@@ -190,5 +183,5 @@ def get_edge_list(number_of_nodes: int):
         return [(i + 1, j + 1) for j in range(number_of_nodes) for i in range(j)]
 
 
-def get_dense_n_node_dag(number_of_nodes: int):
+def get_dense_n_node_dag(number_of_nodes: int):  # noqa: D103
     return partial(_DenseNNodeDAG, number_of_nodes=number_of_nodes)
diff --git a/neps/search_spaces/domain.py b/neps/search_spaces/domain.py
new file mode 100644
index 000000000..5d1a76286
--- /dev/null
+++ b/neps/search_spaces/domain.py
@@ -0,0 +1,452 @@
+"""A class representing a domain, a range for a value + properties.
+
+Some properties include:
+
+* The lower and upper bounds of the domain.
+* Whether the domain is a log domain.
+* Whether the domain is float/int.
+* Whether the domain is split into bins.
+
+With that, the primary method of a domain is to be able to
+[`cast()`][neps.search_spaces.domain.Domain.cast] a tensor of
+values from one to domain to another,
+e.g. `values_a = domain_a.cast(values_b, frm=domain_b)`.
+
+This can be used to convert float samples to integers, integers
+to log space, etc.
+
+The core method to do so is to be able to cast
+[`to_unit()`][neps.search_spaces.domain.Domain.to_unit] which takes
+values to a unit interval [0, 1], and then to be able to cast values in [0, 1]
+to the new domain with [`from_unit()`][neps.search_spaces.domain.Domain.from_unit].
+
+There are some shortcuts implemented in `cast`, such as skipping going through
+the unit interval if the domains are the same, as no transformation is needed.
+
+The primary methods for creating a domain are
+
+* [`Domain.float(l, u, ...)`][neps.search_spaces.domain.Domain.float] -
+    Used for modelling float ranges
+* [`Domain.int(l, u, ...)`][neps.search_spaces.domain.Domain.int] -
+    Used for modelling integer ranges
+* [`Domain.indices(n)`][neps.search_spaces.domain.Domain.indices] -
+    Primarly used to model categorical choices
+
+If you have a tensor of values, where each column corresponds to a different domain,
+you can take a look at [`Domain.translate()`][neps.search_spaces.domain.Domain.translate]
+
+If you need a unit-interval domain, please use the
+[`Domain.unit_float()`][neps.search_spaces.domain.Domain.unit_float]
+or `UNIT_FLOAT_DOMAIN` constant.
+"""
+
+from __future__ import annotations
+
+import math
+from collections.abc import Iterable
+from dataclasses import dataclass, field
+from typing import TYPE_CHECKING, Generic, TypeVar
+
+import torch
+from torch import Tensor
+
+if TYPE_CHECKING:
+    from neps.search_spaces.encoding import ConfigEncoder
+
+Number = int | float
+V = TypeVar("V", int, float)
+
+
+@dataclass(frozen=True)
+class Domain(Generic[V]):
+    """A domain for a value.
+
+    The primary methods for creating a domain are
+
+    * [`Domain.float(l, u, ...)`][neps.search_spaces.domain.Domain.float] -
+        Used for modelling float ranges
+    * [`Domain.int(l, u, ...)`][neps.search_spaces.domain.Domain.int] -
+        Used for modelling integer ranges
+    * [`Domain.indices(n)`][neps.search_spaces.domain.Domain.indices] -
+        Primarly used to model categorical choices
+    """
+
+    lower: V
+    """The lower bound of the domain."""
+
+    upper: V
+    """The upper bound of the domain."""
+
+    round: bool
+    """Whether to round the values to the nearest integer."""
+
+    log_bounds: tuple[float, float] | None = None
+    """The log bounds of the domain, if the domain is in log space."""
+
+    bins: int | None = None
+    """The number of discrete bins to split the domain into.
+
+    Includes both endpoints of the domain and values are rounded to the nearest bin
+    value.
+    """
+
+    is_categorical: bool = False
+    """Whether the domain is representing a categorical.
+
+    The domain does not use this information directly, but it can be useful for external
+    classes that consume Domain objects. This can only be set to `True` if the
+    `cardinality` of the domain is finite, i.e. `bins` is not `None` OR `round`
+    is `True` or the boundaries are both integers.
+    """
+
+    is_unit_float: bool = field(init=False, repr=False)
+    is_int: bool = field(init=False, repr=False)
+    length: V = field(init=False, repr=False)
+    cardinality: int | None = field(init=False, repr=False)
+    bounds: tuple[V, V] = field(init=False, repr=False)
+    preffered_dtype: torch.dtype = field(init=False, repr=False)
+
+    def __post_init__(self) -> None:
+        assert isinstance(self.lower, type(self.upper))
+        is_int = isinstance(self.lower, int)
+        object.__setattr__(self, "is_int", is_int)
+        object.__setattr__(
+            self,
+            "is_unit_float",
+            self.lower == 0 and self.upper == 1 and is_int and not self.round,
+        )
+        object.__setattr__(self, "length", self.upper - self.lower)
+
+        if self.bins:
+            cardinality = self.bins
+        elif self.round:
+            cardinality = int(self.upper - self.lower + 1)
+        else:
+            cardinality = None
+            if self.is_categorical:
+                raise ValueError(
+                    "Categorical domain must have finite cardinality but"
+                    " `bins` is `None` and `round` is `False` and"
+                    " boundaries are not integers."
+                )
+        object.__setattr__(self, "cardinality", cardinality)
+
+        preferred_dtype = torch.int64 if is_int else torch.float64
+        object.__setattr__(self, "preffered_dtype", preferred_dtype)
+
+        mid = self.from_unit(torch.tensor(0.5)).item()
+        if is_int:
+            mid = int(round(mid))
+
+        object.__setattr__(self, "bounds", (self.lower, self.upper))
+
+    @classmethod
+    def floating(
+        cls,
+        lower: Number,
+        upper: Number,
+        *,
+        log: bool = False,
+        bins: int | None = None,
+        is_categorical: bool = False,
+    ) -> Domain[float]:
+        """Create a domain for a range of float values.
+
+        Args:
+            lower: The lower bound of the domain.
+            upper: The upper bound of the domain.
+            log: Whether the domain is in log space.
+            bins: The number of discrete bins to split the domain into.
+            is_categorical: Whether the domain is representing a categorical.
+
+        Returns:
+            A domain for a range of float values.
+        """
+        return Domain(
+            lower=float(lower),
+            upper=float(upper),
+            log_bounds=(math.log(lower), math.log(upper)) if log else None,
+            bins=bins,
+            round=False,
+            is_categorical=is_categorical,
+        )
+
+    @classmethod
+    def integer(
+        cls,
+        lower: Number,
+        upper: Number,
+        *,
+        log: bool = False,
+        bins: int | None = None,
+        is_categorical: bool = False,
+    ) -> Domain[int]:
+        """Create a domain for a range of integer values.
+
+        Args:
+            lower: The lower bound of the domain.
+            upper: The upper bound of the domain (inclusive).
+            log: Whether the domain is in log space.
+            bins: The number of discrete bins to split the domain into.
+            is_categorical: Whether the domain is representing a categorical.
+
+        Returns:
+            A domain for a range of integer values.
+        """
+        return Domain(
+            lower=int(round(lower)),
+            upper=int(round(upper)),
+            log_bounds=(math.log(lower), math.log(upper)) if log else None,
+            round=True,
+            bins=bins,
+            is_categorical=is_categorical,
+        )
+
+    @classmethod
+    def indices(cls, n: int, *, is_categorical: bool = False) -> Domain[int]:
+        """Create a domain for a range of indices.
+
+        Like range based functions this domain is inclusive of the lower bound
+        and exclusive of the upper bound.
+
+        Args:
+            n: The number of indices.
+            is_categorical: Whether the domain is representing a categorical.
+
+        Returns:
+            A domain for a range of indices.
+        """
+        return Domain.integer(0, n - 1, is_categorical=is_categorical)
+
+    def to_unit(self, x: Tensor, *, dtype: torch.dtype | None = None) -> Tensor:
+        """Transform a tensor of values from this domain to the unit interval [0, 1].
+
+        Args:
+            x: Tensor of values in this domain to convert.
+            dtype: The dtype to convert to
+
+        Returns:
+            Same shape tensor with the values normalized to the unit interval [0, 1].
+        """
+        if dtype is None:
+            dtype = torch.float64
+        elif not dtype.is_floating_point:
+            raise ValueError(f"Unit interval only allows floating dtypes, got {dtype}.")
+
+        q = self.cardinality
+        if self.is_unit_float and q is None:
+            return x.to(dtype)
+
+        if self.log_bounds is not None:
+            x = torch.log(x)
+            lower, upper = self.log_bounds
+        else:
+            lower, upper = self.lower, self.upper
+
+        x = (x - lower) / (upper - lower)
+
+        if q is not None:
+            quantization_levels = torch.floor(x * q).clip(0, q - 1)
+            x = quantization_levels / (q - 1)
+
+        return x.type(dtype)
+
+    def from_unit(self, x: Tensor, *, dtype: torch.dtype | None = None) -> Tensor:
+        """Transform a tensor of values from the unit interval [0, 1] to this domain.
+
+        Args:
+            x: A tensor of values in the unit interval [0, 1] to convert.
+            dtype: The dtype to convert to
+
+        Returns:
+            Same shape tensor with the lifted into this domain.
+        """
+        dtype = dtype or self.preffered_dtype
+        if self.is_unit_float:
+            return x.to(dtype)
+
+        q = self.cardinality
+        if q is not None:
+            quantization_levels = torch.floor(x * q).clip(0, q - 1)
+            x = quantization_levels / (q - 1)
+
+        # Now we scale to the new domain
+        if self.log_bounds is not None:
+            lower, upper = self.log_bounds
+            x = x * (upper - lower) + lower
+            x = torch.exp(x)
+        else:
+            lower, upper = self.lower, self.upper
+            x = x * (upper - lower) + lower
+
+        if self.round:
+            x = torch.round(x)
+
+        return x.type(dtype)
+
+    def cast(self, x: Tensor, frm: Domain, *, dtype: torch.dtype | None = None) -> Tensor:
+        """Cast a tensor of values frm the domain `frm` to this domain.
+
+        If you need to cast a tensor of mixed domains, use
+        [`Domain.translate()`][neps.search_spaces.domain.Domain.translate].
+
+        Args:
+            x: Tensor of values in the `frm` domain to cast to this domain.
+            frm: The domain to cast from.
+            dtype: The dtype to convert to
+
+        Returns:
+            Same shape tensor with the values cast to this domain.
+        """
+        dtype = dtype or self.preffered_dtype
+        # NOTE: In general, we should always be able to go through the unit interval
+        # [0, 1] to be able to transform between domains. However sometimes we can
+        # bypass some steps, dependant on the domains, hence the ugliness...
+
+        # Shortcut 1. (Same Domain)
+        # We can shortcut out going through normalized space if all the boundaries and
+        # they live on the same scale. However, if their bins don't line up, we will
+        # have to go through unit space to figure out the bins
+        same_bounds = self.lower == frm.lower and self.upper == frm.upper
+        same_log_bounds = self.log_bounds == frm.log_bounds
+        same_cardinality = self.cardinality == frm.cardinality
+        if same_bounds and same_log_bounds and same_cardinality:
+            if self.round:
+                x = torch.round(x)
+            return x.type(dtype)
+
+        # Shortcut 2. (From normalized)
+        # The domain we are coming from is already normalized, we only need to lift
+        if frm.is_unit_float:
+            return self.from_unit(x, dtype=dtype)  # type: ignore
+
+        # Shortcut 3. (Log lift)
+        # We can also shortcut out if the only diffrence is that we are coming frm the
+        # log bounds of this domain. We dont care if where we came from was binned or not,
+        # we just lift it up with `np.exp` and round if needed
+        if (self.lower, self.upper) == frm.log_bounds and self.cardinality is None:
+            x = torch.exp(x)
+            if self.round:
+                x = torch.round(x)
+            return x.type(dtype)
+
+        # Otherwise, through the unit interval we go
+        lift = self.from_unit(frm.to_unit(x), dtype=dtype)
+        return lift  # noqa: RET504
+
+    @classmethod
+    def unit_float(cls) -> Domain[float]:
+        """Get a domain for the unit interval [0, 1]."""
+        return UNIT_FLOAT_DOMAIN
+
+    @classmethod
+    def translate(
+        cls,
+        x: Tensor,
+        frm: Domain | Iterable[Domain] | ConfigEncoder,
+        to: Domain | Iterable[Domain] | ConfigEncoder,
+        *,
+        dtype: torch.dtype | None = None,
+    ) -> Tensor:
+        """Cast a tensor of mixed domains to a new set of mixed domains.
+
+        Args:
+            x: Tensor of shape (..., n_dims) with each dim `i` corresponding
+                to the domain `frm[i]`.
+            frm: List of domains to cast from. If list, must be length of `n_dims`,
+                otherwise we assume the single domain provided is the one to be used
+                across all dimensions.
+            to: List of domains to cast to. If list, must be length as `n_dims`,
+                otherwise we assume the single domain provided is the one to be used
+                across all dimensions.
+            dtype: The dtype of the converted tensor
+
+        Returns:
+            Tensor of the same shape as `x` with the last dimension casted
+                from the domain `frm[i]` to the domain `to[i]`.
+        """
+        if x.ndim == 0:
+            raise ValueError("Expected a tensor with at least one dimension.")
+
+        if x.ndim == 1:
+            x = x.unsqueeze(0)
+
+        ndims = x.shape[-1]
+
+        # If both are not a list, we can just cast the whole tensor
+        if isinstance(frm, Domain) and isinstance(to, Domain):
+            return to.cast(x, frm=frm, dtype=dtype)
+
+        from neps.search_spaces.encoding import ConfigEncoder
+
+        frm = (
+            [frm] * ndims
+            if isinstance(frm, Domain)
+            else (frm.domains if isinstance(frm, ConfigEncoder) else list(frm))
+        )
+        to = (
+            [to] * ndims
+            if isinstance(to, Domain)
+            else (to.domains if isinstance(to, ConfigEncoder) else list(to))
+        )
+
+        if len(frm) != ndims:
+            raise ValueError(
+                "The number of domains in `frm` must match the number of tensors"
+                " if provided as a list."
+                f" Expected {ndims} from last dimension of {x.shape}, got {len(frm)}."
+            )
+
+        if len(to) != ndims:
+            raise ValueError(
+                "The number of domains in `to` must match the number of tensors"
+                " if provided as a list."
+                f" Expected {ndims} from last dimension of {x.shape=}, got {len(to)}."
+            )
+
+        out = torch.empty_like(x, dtype=dtype)
+        for i, (f, t) in enumerate(zip(frm, to, strict=False)):
+            out[..., i] = t.cast(x[..., i], frm=f, dtype=dtype)
+
+        return out
+
+    def cast_one(self, x: float | int, frm: Domain) -> float | int:
+        """Cast a single value from the domain `frm` to this domain.
+
+        Args:
+            x: Value in the `frm` domain to cast to this domain.
+            frm: The domain to cast from.
+
+        Returns:
+            Value cast to this domain.
+        """
+        return self.cast(torch.tensor(x), frm=frm).item()
+
+    def to_unit_one(self, x: float | int) -> float:
+        """Transform a single value from this domain to the unit interval [0, 1].
+
+        Args:
+            x: Value in this domain to convert.
+
+        Returns:
+            Value normalized to the unit interval [0, 1].
+        """
+        return self.to_unit(torch.tensor(x)).item()
+
+    def as_integer_domain(self) -> Domain:
+        """Get the integer version of this domain.
+
+        !!! warning
+
+            This is only possible if this domain has a finite cardinality
+        """
+        if self.cardinality is None:
+            raise ValueError(
+                "Cannot get integer representation of this domain as its"
+                " cardinality is non-finite."
+            )
+
+        return Domain.indices(self.cardinality, is_categorical=self.is_categorical)
+
+
+UNIT_FLOAT_DOMAIN = Domain.floating(0.0, 1.0)
diff --git a/neps/search_spaces/encoding.py b/neps/search_spaces/encoding.py
new file mode 100644
index 000000000..d95464993
--- /dev/null
+++ b/neps/search_spaces/encoding.py
@@ -0,0 +1,566 @@
+"""Encoding of hyperparameter configurations into tensors.
+
+For the most part, you can just use
+[`ConfigEncoder.from_space()`][neps.search_spaces.encoding.ConfigEncoder.from_space]
+to create an encoder over a list of hyperparameters, along with any constants you
+want to include when decoding configurations.
+"""
+
+from __future__ import annotations
+
+from collections.abc import Mapping, Sequence
+from dataclasses import dataclass, field
+from typing import TYPE_CHECKING, Any, Generic, TypeVar
+from typing_extensions import Protocol, override
+
+import torch
+
+from neps.search_spaces.domain import UNIT_FLOAT_DOMAIN, Domain
+from neps.search_spaces.hyperparameters.categorical import Categorical
+from neps.search_spaces.hyperparameters.float import Float
+from neps.search_spaces.hyperparameters.integer import Integer
+
+if TYPE_CHECKING:
+    from neps.search_spaces.parameter import Parameter
+    from neps.search_spaces.search_space import SearchSpace
+
+V = TypeVar("V", int, float)
+
+
+class TensorTransformer(Protocol):
+    """A protocol for encoding and decoding hyperparameter values into tensors."""
+
+    domain: Domain
+
+    def encode(
+        self,
+        x: Sequence[Any],
+        *,
+        out: torch.Tensor | None = None,
+        dtype: torch.dtype | None = None,
+        device: torch.device | None = None,
+    ) -> torch.Tensor:
+        """Encode a sequence of hyperparameter values into a tensor.
+
+        Args:
+            x: A sequence of hyperparameter values.
+            out: An optional tensor to write the encoded values to.
+            dtype: The dtype of the tensor.
+            device: The device of the tensor.
+
+        Returns:
+            The encoded tensor.
+        """
+        ...
+
+    def decode(self, x: torch.Tensor) -> list[Any]:
+        """Decode a tensor of hyperparameter values into a sequence of values.
+
+        Args:
+            x: A tensor of hyperparameter values.
+
+        Returns:
+            A sequence of hyperparameter values.
+        """
+        ...
+
+
+@dataclass
+class CategoricalToIntegerTransformer(TensorTransformer):
+    """A transformer that encodes categorical values into integers."""
+
+    choices: Sequence[Any]
+
+    domain: Domain = field(init=False)
+    _lookup: dict[Any, int] | None = field(init=False)
+
+    def __post_init__(self) -> None:
+        assert len(self.choices) > 0
+
+        self.domain = Domain.indices(len(self.choices), is_categorical=True)
+        self._lookup = None
+        if len(self.choices) > 3:
+            try:
+                self._lookup = {c: i for i, c in enumerate(self.choices)}
+            except TypeError:
+                self._lookup = None
+
+    @override
+    def encode(
+        self,
+        x: Sequence[Any],
+        *,
+        out: torch.Tensor | None = None,
+        dtype: torch.dtype | None = None,
+        device: torch.device | None = None,
+    ) -> torch.Tensor:
+        if dtype is None:
+            dtype = torch.int if out is None else out.dtype
+
+        values = (
+            [self._lookup[c] for c in x]
+            if self._lookup
+            else [self.choices.index(c) for c in x]
+        )
+
+        tensor = torch.tensor(values, dtype=torch.int64, device=device)
+        if out is None:
+            return tensor.to(dtype)
+
+        out.copy_(tensor.to(out.dtype)).round_()
+        return out
+
+    @override
+    def decode(self, x: torch.Tensor) -> list[Any]:
+        return [self.choices[int(i)] for i in torch.round(x).tolist()]
+
+
+@dataclass
+class CategoricalToUnitNorm(TensorTransformer):
+    """A transformer that encodes categorical values into a unit normalized tensor.
+
+    If there are `n` choices, the tensor will have `n` bins between `0` and `1`.
+    """
+
+    choices: Sequence[Any]
+
+    domain: Domain = field(init=False)
+    _integer_transformer: CategoricalToIntegerTransformer = field(init=False)
+
+    def __post_init__(self) -> None:
+        self.domain = Domain.floating(
+            0.0,
+            1.0,
+            bins=len(self.choices),
+            is_categorical=True,
+        )
+        self._integer_transformer = CategoricalToIntegerTransformer(self.choices)
+
+    @override
+    def encode(
+        self,
+        x: Sequence[Any],
+        *,
+        out: torch.Tensor | None = None,
+        dtype: torch.dtype | None = None,
+        device: torch.device | None = None,
+    ) -> torch.Tensor:
+        integers = self._integer_transformer.encode(
+            x,
+            dtype=dtype if dtype is not None else torch.float64,
+            device=device,
+        )
+        binned_floats = self.domain.cast(
+            integers, frm=self._integer_transformer.domain, dtype=dtype
+        )
+        if out is not None:
+            return out.copy_(binned_floats)
+
+        return binned_floats
+
+    @override
+    def decode(self, x: torch.Tensor) -> list[Any]:
+        x = torch.round(x * (len(self.choices) - 1)).type(torch.int64)
+        return self._integer_transformer.decode(x)
+
+
+# TODO: Maybe add a shift argument, could be useful to have `0` as midpoint
+# and `-0.5` as lower bound with `0.5` as upper bound.
+@dataclass
+class MinMaxNormalizer(TensorTransformer, Generic[V]):
+    """A transformer that normalizes values to the unit interval."""
+
+    original_domain: Domain[V]
+    bins: int | None = None
+
+    domain: Domain[float] = field(init=False)
+
+    def __post_init__(self) -> None:
+        if self.bins is None:
+            self.domain = UNIT_FLOAT_DOMAIN
+        else:
+            self.domain = Domain.floating(0.0, 1.0, bins=self.bins)
+
+    @override
+    def encode(
+        self,
+        x: Sequence[V],
+        *,
+        out: torch.Tensor | None = None,
+        dtype: torch.dtype | None = None,
+        device: torch.device | None = None,
+    ) -> torch.Tensor:
+        if out is not None:
+            dtype = out.dtype
+            device = out.device
+        else:
+            dtype = torch.float64 if dtype is None else dtype
+
+        values = torch.tensor(x, dtype=dtype, device=device)
+        values = self.domain.cast(values, frm=self.original_domain)
+        if out is None:
+            return values
+
+        out.copy_(values)
+        return out
+
+    @override
+    def decode(self, x: torch.Tensor) -> list[V]:
+        values = self.original_domain.from_unit(x)
+        return values.tolist()
+
+
+@dataclass
+class ConfigEncoder:
+    """An encoder for hyperparameter configurations.
+
+    This class is used to encode and decode hyperparameter configurations into tensors
+    and back. It's main uses currently are to support surrogate models that require
+    tensors.
+
+    The primary methods/properties to be aware of are:
+    * [`from_space()`](neps.search_spaces.encoding.ConfigEncoder.default]: Create a
+        default encoder over a list of hyperparameters. Please see the method docs for
+        more details on how it encodes different types of hyperparameters.
+    * [`encode()`]]neps.search_spaces.encoding.ConfigEncoder.encode]: Encode a list of
+        configurations into a single tensor using the transforms of the encoder.
+    * [`decode()`][neps.search_spaces.encoding.ConfigEncoder.decode]: Decode a 2d tensor
+        of length `N` into a list of `N` configurations.
+    * [`domains`][neps.search_spaces.encoding.ConfigEncoder.domains): The
+        [`Domain`][neps.search_spaces.domain.Domain] that each hyperparameter is encoded
+        into. This is useful in combination with classes like
+        [`Sampler`][neps.sampling.samplers.Sampler],
+        [`Prior`][neps.sampling.priors.Prior], and
+        [`TorchDistributionWithDomain`][neps.sampling.distributions.TorchDistributionWithDomain],
+        which require knowledge of the
+        domains of each column for the tensor, for example, to sample values directly
+        into the encoded space, getting log probabilities of the encoded values.
+    * [`ncols`][neps.search_spaces.encoding.ConfigEncoder.ncols]: The number of columns
+        in the encoded tensor, useful for initializing some `Sampler`s.
+    """
+
+    transformers: dict[str, TensorTransformer]
+    constants: Mapping[str, Any] = field(default_factory=dict)
+
+    index_of: dict[str, int] = field(init=False)
+    domain_of: dict[str, Domain] = field(init=False)
+    n_numerical: int = field(init=False)
+    n_categorical: int = field(init=False)
+    categorical_slice: slice | None = field(init=False)
+    numerical_slice: slice | None = field(init=False)
+    numerical_domains: list[Domain] = field(init=False)
+    categorical_domains: list[Domain] = field(init=False)
+
+    def __post_init__(self) -> None:
+        # Sort such that numericals are sorted first and categoricals after,
+        # with sorting within each group being done by name
+        transformers = sorted(
+            self.transformers.items(),
+            key=lambda t: (t[1].domain.is_categorical, t[0]),
+        )
+        self.transformers = dict(transformers)
+
+        n_numerical = 0
+        n_categorical = 0
+        for _, transformer in transformers:
+            if transformer.domain.is_categorical:
+                n_categorical += 1
+            else:
+                n_numerical += 1
+
+        self.index_of = {name: i for i, name in enumerate(self.transformers.keys())}
+        self.domain_of = {name: t.domain for name, t in self.transformers.items()}
+        self.n_numerical = n_numerical
+        self.n_categorical = n_categorical
+        self.numerical_domains = [
+            t.domain for t in self.transformers.values() if not t.domain.is_categorical
+        ]
+        self.categorical_domains = [
+            t.domain for t in self.transformers.values() if t.domain.is_categorical
+        ]
+        self.numerical_slice = slice(0, n_numerical) if n_numerical > 0 else None
+        self.categorical_slice = (
+            slice(n_numerical, n_numerical + n_categorical) if n_categorical > 0 else None
+        )
+
+    def select_categorical(self, x: torch.Tensor) -> torch.Tensor | None:
+        """Select the categorical columns from a tensor.
+
+        Args:
+            x: A tensor of shape `(N, ncols)`.
+
+        Returns:
+            A tensor of shape `(N, n_categorical)`.
+        """
+        if self.categorical_slice is None:
+            return None
+
+        return x[:, self.categorical_slice]
+
+    def select_numerical(self, x: torch.Tensor) -> torch.Tensor | None:
+        """Select the numerical columns from a tensor.
+
+        Args:
+            x: A tensor of shape `(N, ncols)`.
+
+        Returns:
+            A tensor of shape `(N, n_numerical)`.
+        """
+        if self.numerical_slice is None:
+            return None
+
+        return x[:, self.numerical_slice]
+
+    def pdist(
+        self,
+        x: torch.Tensor,
+        *,
+        numerical_ord: int = 2,
+        categorical_ord: int = 0,
+        dtype: torch.dtype = torch.float64,
+        square_form: bool = False,
+    ) -> torch.Tensor:
+        """Compute the pairwise distance between rows of a tensor.
+
+        Will sum the results of the numerical and categorical distances.
+        The encoding will be normalized such that all numericals lie within the unit
+        cube, and categoricals will by default, have a `p=0` norm, which is equivalent
+        to the Hamming distance.
+
+        Args:
+            x: A tensor of shape `(N, ncols)`.
+            numerical_ord: The order of the norm to use for the numerical columns.
+            categorical_ord: The order of the norm to use for the categorical columns.
+            dtype: The dtype of the output tensor.
+            square_form: If `True`, the output will be a square matrix of shape
+                `(N, N)`. If `False`, the output will be a single dim tensor of shape
+                `1/2 * N * (N - 1)`.
+
+        Returns:
+            The distances, shaped according to `square_form`.
+        """
+        categoricals = self.select_categorical(x)
+        numericals = self.select_numerical(x)
+
+        dists: torch.Tensor | None = None
+        if numericals is not None:
+            # Ensure they are all within the unit cube
+            numericals = Domain.translate(
+                numericals,
+                frm=self.numerical_domains,
+                to=UNIT_FLOAT_DOMAIN,
+            )
+
+            dists = torch.nn.functional.pdist(numericals, p=numerical_ord)
+
+        if categoricals is not None:
+            cat_dists = torch.nn.functional.pdist(categoricals, p=categorical_ord)
+            if dists is None:
+                dists = cat_dists
+            else:
+                dists += cat_dists
+
+        if dists is None:
+            raise ValueError("No columns to compute distances on.")
+
+        if not square_form:
+            return dists
+
+        # Turn the single dimensional vector into a square matrix
+        N = len(x)
+        sq = torch.zeros((N, N), dtype=dtype)
+        row_ix, col_ix = torch.triu_indices(N, N, offset=1)
+        sq[row_ix, col_ix] = dists
+        sq[col_ix, row_ix] = dists
+        return sq
+
+    @property
+    def ncols(self) -> int:
+        """The number of columns in the encoded tensor."""
+        return len(self.transformers)
+
+    @property
+    def domains(self) -> list[Domain]:
+        """The domains of the encoded hyperparameters."""
+        return list(self.domain_of.values())
+
+    def encode(
+        self,
+        x: Sequence[Mapping[str, Any]],
+        *,
+        device: torch.device | None = None,
+        dtype: torch.dtype | None = None,
+    ) -> torch.Tensor:
+        """Encode a list of hyperparameter configurations into a tensor.
+
+        !!! warning "Constants"
+
+            Constants included in configurations will not be encoded into the tensor,
+            but are included when decoding.
+
+        !!! warning "Parameters with no transformers"
+
+            Any parameters in the configurations, whos key is not in
+            `self.transformers`, will be ignored.
+
+        Args:
+            x: A list of hyperparameter configurations.
+            device: The device of the tensor.
+            dtype: The dtype of the tensor.
+
+        Returns:
+            A tensor of shape `(len(x), ncols)` containing the encoded configurations.
+        """
+        dtype = torch.float64 if dtype is None else dtype
+        width = len(self.transformers)
+        buffer = torch.empty((len(x), width), dtype=dtype, device=device)
+
+        for hp_name, transformer in self.transformers.items():
+            values = [conf[hp_name] for conf in x]
+            lookup = self.index_of[hp_name]
+
+            # Encode directly into buffer
+            transformer.encode(
+                values,
+                out=buffer[:, lookup],
+                dtype=dtype,
+                device=device,
+            )
+
+        return buffer
+
+    def decode_one(self, x: torch.Tensor) -> dict[str, Any]:
+        """Decode a tensor representing one configuration into a dict."""
+        if x.ndim == 1:
+            x = x.unsqueeze(0)
+        return self.decode(x)[0]
+
+    def decode(self, x: torch.Tensor) -> list[dict[str, Any]]:
+        """Decode a tensor of hyperparameter configurations into a list of configurations.
+
+        Args:
+            x: A tensor of shape `(N, ncols)` containing the encoded configurations.
+
+        Returns:
+            A list of `N` configurations, including any constants that were included
+            when creating the encoder.
+        """
+        values: dict[str, list[Any]] = {}
+        N = len(x)
+        for hp_name, transformer in self.transformers.items():
+            lookup = self.index_of[hp_name]
+            tensor = x[:, lookup]
+            values[hp_name] = transformer.decode(tensor)
+
+        constants = {name: [v] * N for name, v in self.constants.items()}
+        values.update(constants)
+
+        keys = list(values.keys())
+        return [
+            dict(zip(keys, vals, strict=False))
+            for vals in zip(*values.values(), strict=False)
+        ]
+
+    @classmethod
+    def from_space(
+        cls,
+        space: SearchSpace,
+        *,
+        include_fidelity: bool = False,
+        include_constants_when_decoding: bool = True,
+        custom_transformers: dict[str, TensorTransformer] | None = None,
+    ) -> ConfigEncoder:
+        """Create a default encoder over a list of hyperparameters.
+
+        This method creates a default encoder over a list of hyperparameters. It
+        automatically creates transformers for each hyperparameter based on its type.
+        The transformers are as follows:
+
+        * `Float` and `Integer` are normalized to the unit interval.
+        * `Categorical` is transformed into an integer.
+
+        Args:
+            space: The search space to build an encoder for
+            include_constants_when_decoding: Whether to include constants in the encoder.
+                These will not be present in the encoded tensors obtained in
+                [`encode()`][neps.search_spaces.encoding.ConfigEncoder.encode]
+                but will present when using
+                [`decode()`][neps.search_spaces.encoding.ConfigEncoder.decode].
+            include_fidelity: Whether to include fidelities in the encoding
+            custom_transformers: A mapping of hyperparameter names
+                to custom transformers to use
+
+        Returns:
+            A `ConfigEncoder` instance
+        """
+        parameters = {**space.numerical, **space.categoricals}
+        if include_fidelity:
+            parameters.update(space.fidelities)
+
+        return ConfigEncoder.from_parameters(
+            parameters=parameters,
+            constants=space.constants if include_constants_when_decoding else None,
+            custom_transformers=custom_transformers,
+        )
+
+    @classmethod
+    def from_parameters(
+        cls,
+        parameters: Mapping[str, Parameter],
+        constants: Mapping[str, Any] | None = None,
+        *,
+        custom_transformers: dict[str, TensorTransformer] | None = None,
+    ) -> ConfigEncoder:
+        """Create a default encoder over a list of hyperparameters.
+
+        This method creates a default encoder over a list of hyperparameters. It
+        automatically creates transformers for each hyperparameter based on its type.
+        The transformers are as follows:
+
+        * `Float` and `Integer` are normalized to the unit interval.
+        * `Categorical` is transformed into an integer.
+
+        Args:
+            parameters: A mapping of hyperparameter names to hyperparameters.
+            constants: A mapping of constant hyperparameters to include when decoding.
+            custom_transformers: A mapping of hyperparameter names to custom transformers.
+
+        Returns:
+            A `ConfigEncoder` instance
+        """
+        if constants is not None:
+            overlap = set(parameters) & set(constants)
+            if any(overlap):
+                raise ValueError(
+                    "`constants=` and `parameters=` cannot have overlapping"
+                    f" keys: {overlap=}"
+                )
+            if custom_transformers is not None:
+                overlap = set(custom_transformers) & set(constants)
+                if any(overlap):
+                    raise ValueError(
+                        f"Can not apply `custom_transformers=`"
+                        f" to `constants=`: {overlap=}"
+                    )
+        else:
+            constants = {}
+
+        custom = custom_transformers or {}
+        transformers: dict[str, TensorTransformer] = {}
+        for name, hp in parameters.items():
+            if name in custom:
+                transformers[name] = custom[name]
+                continue
+
+            match hp:
+                case Float() | Integer():
+                    transformers[name] = MinMaxNormalizer(hp.domain)  # type: ignore
+                case Categorical():
+                    transformers[name] = CategoricalToIntegerTransformer(hp.choices)
+                case _:
+                    raise ValueError(
+                        f"Unsupported parameter type: {type(hp)}. If hp is a constant, "
+                        " please provide it as `constants=`."
+                    )
+
+        return cls(transformers, constants=constants)
diff --git a/neps/search_spaces/functions.py b/neps/search_spaces/functions.py
new file mode 100644
index 000000000..ba8b6bc9c
--- /dev/null
+++ b/neps/search_spaces/functions.py
@@ -0,0 +1,133 @@
+"""Functions for working with search spaces."""
+
+from __future__ import annotations
+
+import logging
+from typing import TYPE_CHECKING
+
+import torch
+
+from neps.search_spaces.domain import UNIT_FLOAT_DOMAIN, Domain
+from neps.search_spaces.parameter import Parameter, ParameterWithPrior
+from neps.search_spaces.search_space import SearchSpace
+
+if TYPE_CHECKING:
+    from neps.search_spaces.encoding import ConfigEncoder
+
+logger = logging.getLogger(__name__)
+
+
+def pairwise_dist(
+    x: torch.Tensor,
+    encoder: ConfigEncoder,
+    *,
+    numerical_ord: int = 2,
+    categorical_ord: int = 0,
+    dtype: torch.dtype = torch.float64,
+    square_form: bool = False,
+) -> torch.Tensor:
+    """Compute the pairwise distance between rows of a tensor.
+
+    Will sum the results of the numerical and categorical distances.
+    The encoding will be normalized such that all numericals lie within the unit
+    cube, and categoricals will by default, have a `p=0` norm, which is equivalent
+    to the Hamming distance.
+
+    Args:
+        x: A tensor of shape `(N, ncols)`.
+        encoder: The encoder used to encode the configs into the tensor.
+        numerical_ord: The order of the norm to use for the numerical columns.
+        categorical_ord: The order of the norm to use for the categorical columns.
+        dtype: The dtype of the output tensor.
+        square_form: If `True`, the output will be a square matrix of shape
+            `(N, N)`. If `False`, the output will be a single dim tensor of shape
+            `1/2 * N * (N - 1)`.
+
+    Returns:
+        The distances, shaped according to `square_form`.
+    """
+    categoricals = encoder.select_categorical(x)
+    numericals = encoder.select_numerical(x)
+
+    dists: torch.Tensor | None = None
+    if numericals is not None:
+        # Ensure they are all within the unit cube
+        numericals = Domain.translate(
+            numericals,
+            frm=encoder.numerical_domains,
+            to=UNIT_FLOAT_DOMAIN,
+        )
+
+        dists = torch.nn.functional.pdist(numericals, p=numerical_ord)
+
+    if categoricals is not None:
+        # Does Hamming distance
+        cat_dists = torch.nn.functional.pdist(categoricals, p=categorical_ord)
+        if dists is None:
+            dists = cat_dists
+        else:
+            dists += cat_dists
+
+    if dists is None:
+        raise ValueError("No columns to compute distances on.")
+
+    if not square_form:
+        return dists
+
+    # Turn the single dimensional vector into a square matrix
+    N = len(x)
+    sq = torch.zeros((N, N), dtype=dtype)
+    row_ix, col_ix = torch.triu_indices(N, N, offset=1)
+    sq[row_ix, col_ix] = dists
+    sq[col_ix, row_ix] = dists
+    return sq
+
+
+def sample_one_old(
+    space: SearchSpace,
+    *,
+    user_priors: bool = False,
+    patience: int = 1,
+    ignore_fidelity: bool = True,
+) -> SearchSpace:
+    """Sample a configuration from the search space.
+
+    Args:
+        space: The search space to sample from.
+        user_priors: Whether to use user priors when sampling.
+        patience: The number of times to try to sample a valid value for a
+            hyperparameter.
+        ignore_fidelity: Whether to ignore the fidelity parameter when sampling.
+
+    Returns:
+        A sampled configuration from the search space.
+    """
+    sampled_hps: dict[str, Parameter] = {}
+
+    for name, hp in space.hyperparameters.items():
+        if hp.is_fidelity and ignore_fidelity:
+            sampled_hps[name] = hp.clone()
+            continue
+
+        for attempt in range(patience):
+            try:
+                if user_priors and isinstance(hp, ParameterWithPrior):
+                    sampled_hps[name] = hp.sample(user_priors=user_priors)
+                else:
+                    sampled_hps[name] = hp.sample()
+                break
+            except Exception as e:  # noqa: BLE001
+                logger.warning(
+                    f"Attempt {attempt + 1}/{patience} failed for"
+                    f" sampling {name}: {e!s}"
+                )
+        else:
+            logger.error(
+                f"Failed to sample valid value for {name} after {patience} attempts"
+            )
+            raise ValueError(
+                f"Could not sample valid value for hyperparameter {name}"
+                f" in {patience} tries!"
+            )
+
+    return SearchSpace(**sampled_hps)
diff --git a/neps/search_spaces/hyperparameters/__init__.py b/neps/search_spaces/hyperparameters/__init__.py
index 98ed1aa9f..14e7ce792 100644
--- a/neps/search_spaces/hyperparameters/__init__.py
+++ b/neps/search_spaces/hyperparameters/__init__.py
@@ -1,13 +1,21 @@
-from neps.search_spaces.hyperparameters.categorical import CategoricalParameter
-from neps.search_spaces.hyperparameters.constant import ConstantParameter
-from neps.search_spaces.hyperparameters.float import FloatParameter
-from neps.search_spaces.hyperparameters.integer import IntegerParameter
-from neps.search_spaces.hyperparameters.numerical import NumericalParameter
+from neps.search_spaces.hyperparameters.categorical import (
+    Categorical,
+    CategoricalParameter,
+)
+from neps.search_spaces.hyperparameters.constant import Constant, ConstantParameter
+from neps.search_spaces.hyperparameters.float import Float, FloatParameter
+from neps.search_spaces.hyperparameters.integer import Integer, IntegerParameter
+from neps.search_spaces.hyperparameters.numerical import Numerical, NumericalParameter
 
 __all__ = [
+    "Categorical",
     "CategoricalParameter",
+    "Constant",
     "ConstantParameter",
-    "IntegerParameter",
+    "Float",
     "FloatParameter",
+    "Integer",
+    "IntegerParameter",
+    "Numerical",
     "NumericalParameter",
 ]
diff --git a/neps/search_spaces/hyperparameters/categorical.py b/neps/search_spaces/hyperparameters/categorical.py
index 447d1f906..1d0a13d30 100644
--- a/neps/search_spaces/hyperparameters/categorical.py
+++ b/neps/search_spaces/hyperparameters/categorical.py
@@ -2,45 +2,36 @@
 
 from __future__ import annotations
 
-from typing import (
-    TYPE_CHECKING,
-    Any,
-    ClassVar,
-    Iterable,
-    Literal,
-    Mapping,
-    Union,
-)
-from typing_extensions import Self, TypeAlias, override
+from collections.abc import Iterable, Mapping
+from typing import TYPE_CHECKING, Any, ClassVar, Literal, TypeAlias
+from typing_extensions import Self, override
 
 import numpy as np
 import numpy.typing as npt
 from more_itertools import all_unique
 
-from neps.search_spaces.parameter import MutatableParameter, ParameterWithPrior
+from neps.search_spaces.domain import Domain
+from neps.search_spaces.parameter import ParameterWithPrior
 
 if TYPE_CHECKING:
     from neps.utils.types import f64
 
-CategoricalTypes: TypeAlias = Union[float, int, str]
+CategoricalTypes: TypeAlias = float | int | str
 
 
-class CategoricalParameter(
-    ParameterWithPrior[CategoricalTypes, CategoricalTypes],
-    MutatableParameter,
-):
+class Categorical(ParameterWithPrior[CategoricalTypes, CategoricalTypes]):
     """A list of **unordered** choices for a parameter.
 
     This kind of [`Parameter`][neps.search_spaces.parameter] is used
     to represent hyperparameters that can take on a discrete set of unordered
     values. For example, the `optimizer` hyperparameter in a neural network
-    search space can be a `CategoricalParameter` with choices like
+    search space can be a `Categorical` with choices like
     `#!python ["adam", "sgd", "rmsprop"]`.
 
     ```python
     import neps
 
-    optimizer_choice = neps.CategoricalParameter(
+    optimizer_choice = neps.Categorical(
         ["adam", "sgd", "rmsprop"],
         default="adam"
     )
@@ -48,11 +39,10 @@ class CategoricalParameter(
 
     Please see the [`Parameter`][neps.search_spaces.parameter],
     [`ParameterWithPrior`][neps.search_spaces.parameter.ParameterWithPrior],
-    [`MutatableParameter`][neps.search_spaces.parameter.MutatableParameter] classes
     for more details on the methods available for this class.
     """
 
-    DEFAULT_CONFIDENCE_SCORES: ClassVar[Mapping[str, Any]] = {
+    PRIOR_CONFIDENCE_SCORES: ClassVar[Mapping[str, Any]] = {
         "low": 2,
         "medium": 4,
         "high": 6,
@@ -62,26 +52,26 @@ def __init__(
         self,
         choices: Iterable[float | int | str],
         *,
-        default: float | int | str | None = None,
-        default_confidence: Literal["low", "medium", "high"] = "low",
+        prior: float | int | str | None = None,
+        prior_confidence: Literal["low", "medium", "high"] = "low",
     ):
-        """Create a new `CategoricalParameter`.
+        """Create a new `Categorical`.
 
         Args:
             choices: choices for the hyperparameter.
-            default: default value for the hyperparameter, must be in `choices=`
+            prior: prior value for the hyperparameter, must be in `choices=`
                 if provided.
-            default_confidence: confidence score for the default value, used when
+            prior_confidence: confidence score for the prior value, used when
                 considering prior based optimization.
         """
         choices = list(choices)
         if len(choices) <= 1:
             raise ValueError("Categorical choices must have more than one value.")
 
-        super().__init__(value=None, is_fidelity=False, default=default)
+        super().__init__(value=None, is_fidelity=False, prior=prior)
 
         for choice in choices:
-            if not isinstance(choice, (float, int, str)):
+            if not isinstance(choice, float | int | str):
                 raise TypeError(
                     f'Choice "{choice}" is not of a valid type (float, int, str)'
                 )
@@ -89,9 +79,9 @@ def __init__(
         if not all_unique(choices):
             raise ValueError(f"Choices must be unique but got duplicates.\n{choices}")
 
-        if default is not None and default not in choices:
+        if prior is not None and prior not in choices:
             raise ValueError(
-                f"Default value {default} is not in the provided choices {choices}"
+                f"Default value {prior} is not in the provided choices {choices}"
             )
 
         self.choices = list(choices)
@@ -101,22 +91,23 @@ def __init__(
         # currently we do a list.index() operation which is O(n).
         # However for small sized categoricals this is likely faster than
         # a lookup table.
-        # For now we can just cache the index of the value and default.
+        # For now we can just cache the index of the value and prior.
         self._value_index: int | None = None
 
-        self.default_confidence_choice = default_confidence
-        self.default_confidence_score = self.DEFAULT_CONFIDENCE_SCORES[default_confidence]
-        self.has_prior = self.default is not None
-        self._default_index: int | None = (
-            self.choices.index(default) if default is not None else None
+        self.prior_confidence_choice = prior_confidence
+        self.prior_confidence_score = self.PRIOR_CONFIDENCE_SCORES[prior_confidence]
+        self.has_prior = self.prior is not None
+        self._prior_index: int | None = (
+            self.choices.index(prior) if prior is not None else None
         )
+        self.domain = Domain.indices(len(self.choices))
 
     @override
     def clone(self) -> Self:
         clone = self.__class__(
             choices=self.choices,
-            default=self.default,
-            default_confidence=self.default_confidence_choice,  # type: ignore
+            prior=self.prior,
+            prior_confidence=self.prior_confidence_choice,  # type: ignore
         )
         if self.value is not None:
             clone.set_value(self.value)
@@ -131,113 +122,30 @@ def __eq__(self, other: Any) -> bool:
             self.choices == other.choices
             and self.value == other.value
             and self.is_fidelity == other.is_fidelity
-            and self.default == other.default
+            and self.prior == other.prior
             and self.has_prior == other.has_prior
-            and self.default_confidence_score == other.default_confidence_score
+            and self.prior_confidence_score == other.prior_confidence_score
         )
 
     def __repr__(self) -> str:
         return f"<Categorical, choices: {self.choices}, value: {self.value}>"
 
     def _compute_user_prior_probabilities(self) -> npt.NDArray[f64]:
-        # The default value should have "default_confidence_score" more probability
+        # The prior value should have "prior_confidence_score" more probability
         # than all the other values.
-        assert self._default_index is not None
+        assert self._prior_index is not None
         probabilities = np.ones(len(self.choices))
-        probabilities[self._default_index] = self.default_confidence_score
+        probabilities[self._prior_index] = self.prior_confidence_score
         return probabilities / np.sum(probabilities)
 
-    @override
-    def compute_prior(self, log: bool = False) -> float:
-        assert self._value_index is not None
-
-        probabilities = self._compute_user_prior_probabilities()
-        return float(
-            np.log(probabilities[self._value_index] + 1e-12)
-            if log
-            else probabilities[self._value_index]
-        )
-
     @override
     def sample_value(self, *, user_priors: bool = False) -> Any:
-        if user_priors and self.default is not None:
-            probabilities = self._compute_user_prior_probabilities()
-            return np.random.choice(self.choices, p=probabilities)
-
-        return np.random.choice(self.choices)
-
-    @override
-    def mutate(
-        self,
-        parent: Self | None = None,
-        mutation_rate: float = 1.0,
-        mutation_strategy: str = "local_search",
-        **kwargs: Any,
-    ) -> Self:
-        if self.is_fidelity:
-            raise ValueError("Trying to mutate fidelity param!")
-
-        if parent is None:
-            parent = self
-
-        if mutation_strategy == "simple":
-            child = parent.sample()
-        elif mutation_strategy == "local_search":
-            child = self._get_non_unique_neighbors(num_neighbours=1)[0]
-        else:
-            raise NotImplementedError
-
-        if parent.value == child.value:
-            raise ValueError("Parent is the same as child!")
-
-        return child
-
-    @override
-    def crossover(self, parent1: Self, parent2: Self | None = None) -> tuple[Self, Self]:
-        if self.is_fidelity:
-            raise ValueError("Trying to crossover fidelity param!")
-
-        if parent2 is None:
-            parent2 = self
-
-        assert parent1.value is not None
-        assert parent2.value is not None
-
-        child1 = parent1.clone()
-        child1.set_value(parent2.value)
-
-        child2 = parent2.clone()
-        child2.set_value(parent1.value)
-
-        return child1, child2
-
-    @override
-    def _get_non_unique_neighbors(
-        self,
-        num_neighbours: int,
-        *,
-        std: float = 0.2,
-    ) -> list[Self]:
-        assert self._value_index is not None
-
         indices = np.arange(len(self.choices))
-        bot = indices[: self._value_index]
-        top = indices[self._value_index + 1 :]
-        available_neighbours = np.concatenate([bot, top])
-
-        selected_indices = np.random.choice(
-            available_neighbours,
-            size=num_neighbours,
-            replace=True,
-        )
-
-        new_neighbours: list[Self] = []
-        for value_index in selected_indices:
-            new_param = self.clone()
-            new_param.set_value(self.choices[value_index])
-            new_neighbours.append(new_param)
+        if user_priors and self.prior is not None:
+            probabilities = self._compute_user_prior_probabilities()
+            return self.choices[np.random.choice(indices, p=probabilities)]
 
-        return new_neighbours
+        return self.choices[np.random.choice(indices)]
 
     @override
     def value_to_normalized(self, value: Any) -> float:
@@ -247,23 +155,6 @@ def value_to_normalized(self, value: Any) -> float:
     def normalized_to_value(self, normalized_value: float) -> Any:
         return self.choices[int(np.rint(normalized_value))]
 
-    @override
-    def set_default(self, default: Any | None) -> None:
-        if default is None:
-            self.default = None
-            self._default_index = None
-            self.has_prior = False
-            return
-
-        if default not in self.choices:
-            raise ValueError(
-                f"Default value {default} is not in the provided choices {self.choices}"
-            )
-
-        self.default = default
-        self._default_index = self.choices.index(default)
-        self.has_prior = True
-
     @override
     def set_value(self, value: Any | None) -> None:
         if value is None:
@@ -276,12 +167,46 @@ def set_value(self, value: Any | None) -> None:
         self._value_index = self.choices.index(value)
         self.normalized_value = float(self._value_index)
 
-    @override
-    @classmethod
-    def serialize_value(cls, value: CategoricalTypes) -> CategoricalTypes:
-        return value
 
-    @override
-    @classmethod
-    def deserialize_value(cls, value: CategoricalTypes) -> CategoricalTypes:
-        return value
+class CategoricalParameter(Categorical):
+    """Deprecated: Use `Categorical` instead of `CategoricalParameter`.
+
+    This class remains for backward compatibility and will raise a deprecation
+    warning if used.
+    """
+
+    def __init__(
+        self,
+        choices: Iterable[float | int | str],
+        *,
+        prior: float | int | str | None = None,
+        prior_confidence: Literal["low", "medium", "high"] = "low",
+    ):
+        """Initialize a deprecated `CategoricalParameter`.
+
+        Args:
+            choices: choices for the hyperparameter.
+            prior: prior value for the hyperparameter, must be in `choices=`
+                if provided.
+            prior_confidence: confidence score for the prior value, used when
+                condsider prior based optimization.
+
+        Raises:
+            DeprecationWarning: A warning indicating that `neps.CategoricalParameter` is
+            deprecated and `neps.Categorical` should be used instead.
+        """
+        import warnings
+
+        warnings.warn(
+            (
+                "Usage of 'neps.CategoricalParameter' is deprecated and will be removed "
+                "in future releases. Please use 'neps.Categorical' instead."
+            ),
+            DeprecationWarning,
+            stacklevel=2,
+        )
+        super().__init__(
+            choices=choices,
+            prior=prior,
+            prior_confidence=prior_confidence,
+        )
diff --git a/neps/search_spaces/hyperparameters/constant.py b/neps/search_spaces/hyperparameters/constant.py
index e35647b6a..155ddc9e2 100644
--- a/neps/search_spaces/hyperparameters/constant.py
+++ b/neps/search_spaces/hyperparameters/constant.py
@@ -10,43 +10,42 @@
 T = TypeVar("T", int, float, str)
 
 
-class ConstantParameter(Parameter[T, T]):
+class Constant(Parameter[T, T]):
     """A constant value for a parameter.
 
     This kind of [`Parameter`][neps.search_spaces.parameter] is used
     to represent hyperparameters with values that should not change during
     optimization. For example, the `batch_size` hyperparameter in a neural
-    network search space can be a `ConstantParameter` with a value of `32`.
+    network search space can be a `Constant` with a value of `32`.
 
     ```python
     import neps
 
-    batch_size = neps.ConstantParameter(32)
+    batch_size = neps.Constant(32)
     ```
 
     !!! note
 
-        As the name suggests, the value of a `ConstantParameter` only have one
-        value and so its [`.default`][neps.search_spaces.parameter.Parameter.default]
+        As the name suggests, the value of a `Constant` only have one
+        value and so its [`.prior`][neps.search_spaces.parameter.Parameter.prior]
         and [`.value`][neps.search_spaces.parameter.Parameter.value] should always be
         the same.
 
         This also implies that the
-        [`.default`][neps.search_spaces.parameter.Parameter.default] can never be `None`.
+        [`.prior`][neps.search_spaces.parameter.Parameter.prior] can never be `None`.
 
         Please use
-        [`.set_constant_value()`][neps.search_spaces.hyperparameters.constant.ConstantParameter.set_constant_value]
+        [`.set_constant_value()`][neps.search_spaces.hyperparameters.constant.Constant.set_constant_value]
         if you need to change the value of the constant parameter.
-
     """
 
     def __init__(self, value: T):
-        """Create a new `ConstantParameter`.
+        """Create a new `Constant`.
 
         Args:
             value: value for the hyperparameter.
         """
-        super().__init__(value=value, default=value, is_fidelity=False)  # type: ignore
+        super().__init__(value=value, prior=value, is_fidelity=False)  # type: ignore
         self._value: T = value  # type: ignore
 
     @override
@@ -72,33 +71,6 @@ def __repr__(self) -> str:
     def sample_value(self) -> T:
         return self.value
 
-    @override
-    def set_default(self, default: T | None) -> None:
-        """Set the default of the constant parameter.
-
-        !!! note
-
-            This method is a no-op but will raise a `ValueError` if the default
-            is different from the current default.
-
-            Please see
-            [`.set_constant_value()`][neps.search_spaces.hyperparameters.constant.ConstantParameter.set_constant_value]
-            which can be used to set both the
-            [`.value`][neps.search_spaces.parameter.Parameter.value]
-            and the [`.default`][neps.search_spaces.parameter.Parameter.default] at once
-
-        Args:
-            default: value to set the default to.
-
-        Raises:
-            ValueError: if the default is different from the current default.
-        """
-        if default != self.default:
-            raise ValueError(
-                f"Constant does not allow changing the default value. "
-                f"Tried to set default to {default}, but it is already {self.default}"
-            )
-
     @override
     def set_value(self, value: T | None) -> None:
         """Set the value of the constant parameter.
@@ -109,10 +81,10 @@ def set_value(self, value: T | None) -> None:
             is different from the current value.
 
             Please see
-            [`.set_constant_value()`][neps.search_spaces.hyperparameters.constant.ConstantParameter.set_constant_value]
+            [`.set_constant_value()`][neps.search_spaces.hyperparameters.constant.Constant.set_constant_value]
             which can be used to set both the
             [`.value`][neps.search_spaces.parameter.Parameter.value]
-            and the [`.default`][neps.search_spaces.parameter.Parameter.default] at once
+            and the [`.prior`][neps.search_spaces.parameter.Parameter.prior] at once
 
         Args:
             value: value to set the parameter to.
@@ -122,23 +94,10 @@ def set_value(self, value: T | None) -> None:
         """
         if value != self._value:
             raise ValueError(
-                f"Constant does not allow chaning the set value. "
+                f"Constant does not allow changing the set value. "
                 f"Tried to set value to {value}, but it is already {self.value}"
             )
 
-    def set_constant_value(self, value: T) -> None:
-        """Set the value of the constant parameter.
-
-        !!! note
-
-            This method is used to set the
-            [`.value`][neps.search_spaces.parameter.Parameter.value]
-            including the [`.default`][neps.search_spaces.parameter.Parameter.default]
-            It is used internally and should not be used by the user.
-        """
-        self._value = value
-        self.default = value
-
     @override
     def value_to_normalized(self, value: T) -> float:
         return 1.0 if value == self._value else 0.0
@@ -147,21 +106,32 @@ def value_to_normalized(self, value: T) -> float:
     def normalized_to_value(self, normalized_value: float) -> T:
         return self._value
 
-    @override
-    def _get_non_unique_neighbors(
-        self,
-        num_neighbours: int,
-        *,
-        std: float = 0.2,
-    ) -> list[Self]:
-        raise ValueError("ConstantParameter have no neighbours")
 
-    @override
-    @classmethod
-    def serialize_value(cls, value: T) -> T:
-        return value
+class ConstantParameter(Constant):
+    """Deprecated: Use `Constant` instead of `ConstantParameter`.
 
-    @override
-    @classmethod
-    def deserialize_value(cls, value: T) -> T:
-        return value
+    This class remains for backward compatibility and will raise a deprecation
+    warning if used.
+    """
+
+    def __init__(self, value: T):
+        """Initialize a deprecated `ConstantParameter`.
+
+        Args:
+            value: value for the hyperparameter.
+
+        Raises:
+            DeprecationWarning: A warning indicating that `neps.ConstantParameter` is
+            deprecated and `neps.Constant` should be used instead.
+        """
+        import warnings
+
+        warnings.warn(
+            (
+                "Usage of 'neps.ConstantParameter' is deprecated and will be removed in"
+                " future releases. Please use 'neps.Constant' instead."
+            ),
+            DeprecationWarning,
+            stacklevel=2,
+        )
+        super().__init__(value=value)
diff --git a/neps/search_spaces/hyperparameters/float.py b/neps/search_spaces/hyperparameters/float.py
index b780f3ff4..fef72f684 100644
--- a/neps/search_spaces/hyperparameters/float.py
+++ b/neps/search_spaces/hyperparameters/float.py
@@ -3,18 +3,20 @@
 from __future__ import annotations
 
 import math
-from typing import TYPE_CHECKING, ClassVar, Literal, Mapping
+from collections.abc import Mapping
+from typing import TYPE_CHECKING, ClassVar, Literal
 from typing_extensions import Self, override
 
 import numpy as np
 
-from neps.search_spaces.hyperparameters.numerical import NumericalParameter
+from neps.search_spaces.domain import Domain
+from neps.search_spaces.hyperparameters.numerical import Numerical
 
 if TYPE_CHECKING:
     from neps.utils.types import Number
 
 
-class FloatParameter(NumericalParameter[float]):
+class Float(Numerical[float]):
     """A float value for a parameter.
 
     This kind of [`Parameter`][neps.search_spaces.parameter] is used
@@ -22,17 +24,17 @@ class FloatParameter(NumericalParameter[float]):
     it exists
     on a log scale.
     For example, `l2_norm` could be a value in `(0.1)`, while the `learning_rate`
-    hyperparameter in a neural network search space can be a `FloatParameter`
+    hyperparameter in a neural network search space can be a `Float`
     with a range of `(0.0001, 0.1)` but on a log scale.
 
     ```python
     import neps
 
-    l2_norm = neps.FloatParameter(0, 1)
-    learning_rate = neps.FloatParameter(1e-4, 1e-1, log=True)
+    l2_norm = neps.Float(0, 1)
+    learning_rate = neps.Float(1e-4, 1e-1, log=True)
     ```
 
-    Please see the [`NumericalParameter`][neps.search_spaces.numerical.NumericalParameter]
+    Please see the [`Numerical`][neps.search_spaces.numerical.Numerical]
     class for more details on the methods available for this class.
     """
 
@@ -49,27 +51,28 @@ def __init__(
         *,
         log: bool = False,
         is_fidelity: bool = False,
-        default: Number | None = None,
-        default_confidence: Literal["low", "medium", "high"] = "low",
+        prior: Number | None = None,
+        prior_confidence: Literal["low", "medium", "high"] = "low",
     ):
-        """Create a new `FloatParameter`.
+        """Create a new `Float`.
 
         Args:
             lower: lower bound for the hyperparameter.
             upper: upper bound for the hyperparameter.
             log: whether the hyperparameter is on a log scale.
             is_fidelity: whether the hyperparameter is fidelity.
-            default: default value for the hyperparameter.
-            default_confidence: confidence score for the default value, used when
+            prior: prior value for the hyperparameter.
+            prior_confidence: confidence score for the prior value, used when
                 condsidering prior based optimization..
         """
         super().__init__(
             lower=float(lower),
             upper=float(upper),
             log=log,
-            default=float(default) if default is not None else None,
-            default_confidence=default_confidence,
+            prior=float(prior) if prior is not None else None,
+            prior_confidence=prior_confidence,
             is_fidelity=is_fidelity,
+            domain=Domain.floating(lower, upper, log=log),
         )
 
     @override
@@ -79,47 +82,25 @@ def clone(self) -> Self:
             upper=self.upper,
             log=self.log,
             is_fidelity=self.is_fidelity,
-            default=self.default,
-            default_confidence=self.default_confidence_choice,
+            prior=self.prior,
+            prior_confidence=self.prior_confidence_choice,
         )
         if self.value is not None:
             clone.set_value(self.value)
 
         return clone
 
-    @override
-    def set_default(self, default: float | None) -> None:
-        if default is None:
-            self.default = None
-            self.has_prior = False
-            self.log_default = None
-            return
-
-        if not self.lower <= default <= self.upper:
-            cls_name = self.__class__.__name__
-            raise ValueError(
-                f"{cls_name} parameter: default bounds error. Expected lower <= default"
-                f" <= upper, but got lower={self.lower}, default={default},"
-                f" upper={self.upper}"
-            )
-
-        self.default = float(default)
-        self.has_prior = True
-        if self.log:
-            self.log_default = np.log(self.default)
-
     @override
     def set_value(self, value: float | None) -> None:
         if value is None:
             self._value = None
             self.normalized_value = None
-            self.log_value = None
             return
 
         if not self.lower <= value <= self.upper:
             cls_name = self.__class__.__name__
             raise ValueError(
-                f"{cls_name} parameter: default bounds error. Expected lower <= default"
+                f"{cls_name} parameter: prior bounds error. Expected lower <= prior"
                 f" <= upper, but got lower={self.lower}, value={value},"
                 f" upper={self.upper}"
             )
@@ -127,21 +108,19 @@ def set_value(self, value: float | None) -> None:
         value = float(value)
         self._value = value
         self.normalized_value = self.value_to_normalized(value)
-        if self.log:
-            self.log_value = np.log(value)
 
     @override
     def sample_value(self, *, user_priors: bool = False) -> float:
         if self.log:
             assert self.log_bounds is not None
             low, high = self.log_bounds
-            default = self.log_default
+            prior = self.log_prior
         else:
-            low, high, default = self.lower, self.upper, self.default
+            low, high, prior = self.lower, self.upper, self.prior
 
         if user_priors and self.has_prior:
             dist, std = self._get_truncnorm_prior_and_std()
-            value = dist.rvs() * std + default
+            value = dist.rvs() * std + prior
         else:
             value = np.random.uniform(low=low, high=high)
 
@@ -173,33 +152,58 @@ def normalized_to_value(self, normalized_value: float) -> float:
         _value = np.exp(normalized_value) if self.log else normalized_value
         return float(_value)
 
-    @override
-    def _get_non_unique_neighbors(
-        self,
-        num_neighbours: int,
-        *,
-        std: float = 0.2,
-    ) -> list[Self]:
-        neighbours: list[Self] = []
+    def __repr__(self) -> str:
+        float_repr = f"{self.value:.07f}" if self.value is not None else "None"
+        return f"<Float, range: [{self.lower}, {self.upper}], value: {float_repr}>"
 
-        assert self.value is not None
-        vectorized_val = self.value_to_normalized(self.value)
 
-        # TODO(eddiebergman): This whole thing can be vectorized, not sure
-        # if we ever have enough num_neighbours to make it worth it
-        while len(neighbours) < num_neighbours:
-            n_val = np.random.normal(vectorized_val, std)
-            if n_val < 0 or n_val > 1:
-                continue
+class FloatParameter(Float):
+    """Deprecated: Use `Float` instead of `FloatParameter`.
 
-            sampled_value = self.normalized_to_value(n_val)
+    This class remains for backward compatibility and will raise a deprecation
+    warning if used.
+    """
 
-            neighbour = self.clone()
-            neighbour.set_value(sampled_value)
-            neighbours.append(neighbour)
+    def __init__(
+        self,
+        lower: Number,
+        upper: Number,
+        *,
+        log: bool = False,
+        is_fidelity: bool = False,
+        prior: Number | None = None,
+        prior_confidence: Literal["low", "medium", "high"] = "low",
+    ):
+        """Initialize a deprecated `FloatParameter`.
 
-        return neighbours
+        Args:
+            lower: lower bound for the hyperparameter.
+            upper: upper bound for the hyperparameter.
+            log: whether the hyperparameter is on a log scale.
+            is_fidelity: whether the hyperparameter is fidelity.
+            prior: prior value for the hyperparameter.
+            prior_confidence: confidence score for the prior value, used when
+                condsidering prior based optimization..
 
-    def __repr__(self) -> str:
-        float_repr = f"{self.value:.07f}" if self.value is not None else "None"
-        return f"<Float, range: [{self.lower}, {self.upper}], value: {float_repr}>"
+        Raises:
+            DeprecationWarning: A warning indicating that `neps.FloatParameter` is
+            deprecated and `neps.Float` should be used instead.
+        """
+        import warnings
+
+        warnings.warn(
+            (
+                "Usage of 'neps.FloatParameter' is deprecated and will be removed in"
+                " future releases. Please use 'neps.Float' instead."
+            ),
+            DeprecationWarning,
+            stacklevel=2,
+        )
+        super().__init__(
+            lower=lower,
+            upper=upper,
+            log=log,
+            is_fidelity=is_fidelity,
+            prior=prior,
+            prior_confidence=prior_confidence,
+        )
diff --git a/neps/search_spaces/hyperparameters/integer.py b/neps/search_spaces/hyperparameters/integer.py
index 6462cc63d..0386032d9 100644
--- a/neps/search_spaces/hyperparameters/integer.py
+++ b/neps/search_spaces/hyperparameters/integer.py
@@ -2,33 +2,35 @@
 
 from __future__ import annotations
 
-from typing import TYPE_CHECKING, ClassVar, Literal, Mapping
+from collections.abc import Mapping
+from typing import TYPE_CHECKING, Any, ClassVar, Literal
 from typing_extensions import Self, override
 
 import numpy as np
 
-from neps.search_spaces.hyperparameters.float import FloatParameter
-from neps.search_spaces.hyperparameters.numerical import NumericalParameter
+from neps.search_spaces.domain import Domain
+from neps.search_spaces.hyperparameters.float import Float
+from neps.search_spaces.hyperparameters.numerical import Numerical
 
 if TYPE_CHECKING:
     from neps.utils.types import Number
 
 
-class IntegerParameter(NumericalParameter[int]):
+class Integer(Numerical[int]):
     """An integer value for a parameter.
 
     This kind of [`Parameter`][neps.search_spaces.parameter] is used
     to represent hyperparameters with continuous integer values, optionally specifying
     f it exists on a log scale.
     For example, `batch_size` could be a value in `(32, 128)`, while the `num_layers`
-    hyperparameter in a neural network search space can be a `IntegerParameter`
+    hyperparameter in a neural network search space can be a `Integer`
     with a range of `(1, 1000)` but on a log scale.
 
     ```python
     import neps
 
-    batch_size = neps.IntegerParameter(32, 128)
-    num_layers = neps.IntegerParameter(1, 1000, log=True)
+    batch_size = neps.Integer(32, 128)
+    num_layers = neps.Integer(1, 1000, log=True)
     ```
     """
 
@@ -45,18 +47,18 @@ def __init__(
         *,
         log: bool = False,
         is_fidelity: bool = False,
-        default: Number | None = None,
-        default_confidence: Literal["low", "medium", "high"] = "low",
+        prior: Number | None = None,
+        prior_confidence: Literal["low", "medium", "high"] = "low",
     ):
-        """Create a new `IntegerParameter`.
+        """Create a new `Integer`.
 
         Args:
             lower: lower bound for the hyperparameter.
             upper: upper bound for the hyperparameter.
             log: whether the hyperparameter is on a log scale.
             is_fidelity: whether the hyperparameter is fidelity.
-            default: default value for the hyperparameter.
-            default_confidence: confidence score for the default value, used when
+            prior: prior value for the hyperparameter.
+            prior_confidence: confidence score for the prior value, used when
                 condsider prior based optimization.
         """
         lower = int(np.rint(lower))
@@ -64,7 +66,7 @@ def __init__(
         _size = upper - lower + 1
         if _size <= 1:
             raise ValueError(
-                f"IntegerParameter: expected at least 2 possible values in the range,"
+                f"Integer: expected at least 2 possible values in the range,"
                 f" got upper={upper}, lower={lower}."
             )
 
@@ -73,20 +75,21 @@ def __init__(
             upper=int(np.rint(upper)),
             log=log,
             is_fidelity=is_fidelity,
-            default=int(np.rint(default)) if default is not None else None,
-            default_confidence=default_confidence,
+            prior=int(np.rint(prior)) if prior is not None else None,
+            prior_confidence=prior_confidence,
+            domain=Domain.integer(lower, upper, log=log),
         )
 
         # We subtract/add 0.499999 from lower/upper bounds respectively, such that
         # sampling in the float space gives equal probability for all integer values,
         # i.e. [x - 0.499999, x + 0.499999]
-        self.float_hp = FloatParameter(
+        self.float_hp = Float(
             lower=self.lower - 0.499999,
             upper=self.upper + 0.499999,
             log=self.log,
             is_fidelity=is_fidelity,
-            default=default,
-            default_confidence=default_confidence,
+            prior=prior,
+            prior_confidence=prior_confidence,
         )
 
     def __repr__(self) -> str:
@@ -99,8 +102,8 @@ def clone(self) -> Self:
             upper=self.upper,
             log=self.log,
             is_fidelity=self.is_fidelity,
-            default=self.default,
-            default_confidence=self.default_confidence_choice,
+            prior=self.prior,
+            prior_confidence=self.prior_confidence_choice,
         )
         if self.value is not None:
             clone.set_value(self.value)
@@ -108,34 +111,21 @@ def clone(self) -> Self:
         return clone
 
     @override
-    def load_from(self, value: Number) -> None:
+    def load_from(self, value: Any) -> None:
         self._value = int(np.rint(value))
 
-    @override
-    def set_default(self, default: int | None) -> None:
-        if default is None:
-            self.default = None
-            self.has_prior = False
-            self.float_hp.set_default(None)
-        else:
-            _default = int(round(default))
-            self.default = _default
-            self.has_prior = True
-            self.float_hp.set_default(_default)
-
     @override
     def set_value(self, value: int | None) -> None:
         if value is None:
             self._value = None
             self.normalized_value = None
-            self.log_value = None
             self.float_hp.set_value(None)
             return
 
         if not self.lower <= value <= self.upper:
             cls_name = self.__class__.__name__
             raise ValueError(
-                f"{cls_name} parameter: default bounds error. Expected lower <= default"
+                f"{cls_name} parameter: prior bounds error. Expected lower <= prior"
                 f" <= upper, but got lower={self.lower}, value={value},"
                 f" upper={self.upper}"
             )
@@ -145,8 +135,6 @@ def set_value(self, value: int | None) -> None:
         self.float_hp.set_value(value)
         self._value = value
         self.normalized_value = self.value_to_normalized(value)
-        if self.log:
-            self.log_value = np.log(value)
 
     @override
     def sample_value(self, *, user_priors: bool = False) -> int:
@@ -161,36 +149,54 @@ def value_to_normalized(self, value: int) -> float:
     def normalized_to_value(self, normalized_value: float) -> int:
         return int(np.rint(self.float_hp.normalized_to_value(normalized_value)))
 
-    @override
-    def set_default_confidence_score(self, default_confidence: str) -> None:
-        self.float_hp.set_default_confidence_score(default_confidence)
-        super().set_default_confidence_score(default_confidence)
-
-    @override
-    def _get_non_unique_neighbors(
-        self,
-        num_neighbours: int,
-        *,
-        std: float = 0.2,
-    ) -> list[Self]:
-        neighbours: list[Self] = []
 
-        assert self.value is not None
-        vectorized_val = self.value_to_normalized(self.value)
+class IntegerParameter(Integer):
+    """Deprecated: Use `Integer` instead of `IntegerParameter`.
 
-        # TODO(eddiebergman): This whole thing can be vectorized, not sure
-        # if we ever have enough num_neighbours to make it worth it
-        while len(neighbours) < num_neighbours:
-            n_val = np.random.normal(vectorized_val, std)
-            if n_val < 0 or n_val > 1:
-                continue
+    This class remains for backward compatibility and will raise a deprecation
+    warning if used.
+    """
 
-            sampled_value = self.normalized_to_value(n_val)
-            if sampled_value == self.value:
-                continue
+    def __init__(
+        self,
+        lower: Number,
+        upper: Number,
+        *,
+        log: bool = False,
+        is_fidelity: bool = False,
+        prior: Number | None = None,
+        prior_confidence: Literal["low", "medium", "high"] = "low",
+    ):
+        """Initialize a deprecated `IntegerParameter`.
 
-            neighbour = self.clone()
-            neighbour.set_value(sampled_value)
-            neighbours.append(neighbour)
+        Args:
+            lower: lower bound for the hyperparameter.
+            upper: upper bound for the hyperparameter.
+            log: whether the hyperparameter is on a log scale.
+            is_fidelity: whether the hyperparameter is fidelity.
+            prior: prior value for the hyperparameter.
+            prior_confidence: confidence score for the prior value, used when
+                condsider prior based optimization.
 
-        return neighbours
+        Raises:
+            DeprecationWarning: A warning indicating that `neps.IntegerParameter` is
+            deprecated and `neps.Integer` should be used instead.
+        """
+        import warnings
+
+        warnings.warn(
+            (
+                "Usage of 'neps.IntegerParameter' is deprecated and will be removed in"
+                " future releases. Please use 'neps.Integer' instead."
+            ),
+            DeprecationWarning,
+            stacklevel=2,
+        )
+        super().__init__(
+            lower=lower,
+            upper=upper,
+            log=log,
+            is_fidelity=is_fidelity,
+            prior=prior,
+            prior_confidence=prior_confidence,
+        )
diff --git a/neps/search_spaces/hyperparameters/numerical.py b/neps/search_spaces/hyperparameters/numerical.py
index 9aaaf6d1c..037f0be97 100644
--- a/neps/search_spaces/hyperparameters/numerical.py
+++ b/neps/search_spaces/hyperparameters/numerical.py
@@ -1,39 +1,36 @@
-"""The [`NumericalParameter`][neps.search_spaces.NumericalParameter] is
+"""The [`Numerical`][neps.search_spaces.Numerical] is
 a [`Parameter`][neps.search_spaces.Parameter] that represents a numerical
 range.
 
 The two primary numerical hyperparameters are:
 
-* [`FloatParameter`][neps.search_spaces.FloatParameter] for continuous
+* [`Float`][neps.search_spaces.Float] for continuous
     float values.
-* [`IntegerParameter`][neps.search_spaces.IntegerParameter] for discrete
+* [`Integer`][neps.search_spaces.Integer] for discrete
     integer values.
 
-The [`NumericalParameter`][neps.search_spaces.NumericalParameter] is a
+The [`Numerical`][neps.search_spaces.Numerical] is a
 base class for both of these hyperparameters, and includes methods from
 both [`ParameterWithPrior`][neps.search_spaces.ParameterWithPrior],
 allowing you to set a confidence along with a
-[`.default`][neps.search_spaces.Parameter.default] that can be used
-with certain algorithms, as well as
-[`MutatableParameter`][neps.search_spaces.MutatableParameter],
-which allows for [`mutate()`][neps.search_spaces.NumericalParameter.mutate]
-and [`crossover()`][neps.search_spaces.NumericalParameter.crossover] operations.
+[`.prior`][neps.search_spaces.Parameter.prior] that can be used
+with certain algorithms.
 """
 
 from __future__ import annotations
 
+from collections.abc import Mapping
 from functools import lru_cache
-from typing import TYPE_CHECKING, Any, ClassVar, Literal, Mapping, TypeVar
-from typing_extensions import Self, override
+from typing import TYPE_CHECKING, Any, ClassVar, Literal, TypeVar
+from typing_extensions import override
 
 import numpy as np
 import scipy
 
-from neps.search_spaces.parameter import MutatableParameter, ParameterWithPrior
+from neps.search_spaces.parameter import ParameterWithPrior
 
 if TYPE_CHECKING:
-    from neps.search_spaces.hyperparameters.float import FloatParameter
-    from neps.search_spaces.hyperparameters.integer import IntegerParameter
+    from neps.search_spaces.domain import Domain
     from neps.utils.types import TruncNorm
 
 T = TypeVar("T", int, float)
@@ -47,27 +44,26 @@
 def _get_truncnorm_prior_and_std(
     low: int | float,
     high: int | float,
-    default: int | float,
+    prior: int | float,
     confidence_score: float,
 ) -> tuple[TruncNorm, float]:
     std = (high - low) * confidence_score
-    a, b = (low - default) / std, (high - default) / std
+    a, b = (low - prior) / std, (high - prior) / std
     return scipy.stats.truncnorm(a, b), float(std)
 
 
-class NumericalParameter(ParameterWithPrior[T, T], MutatableParameter):
+class Numerical(ParameterWithPrior[T, T]):
     """A numerical hyperparameter is bounded by a lower and upper value.
 
     Attributes:
         lower: The lower bound of the numerical hyperparameter.
         upper: The upper bound of the numerical hyperparameter.
         log: Whether the hyperparameter is in log space.
-        log_value: The log value of the hyperparameter, if `log=True`.
         log_bounds: The log bounds of the hyperparameter, if `log=True`.
-        log_default: The log default value of the hyperparameter, if `log=True`
-            and a `default` is set.
-        default_confidence_choice: The default confidence choice.
-        default_confidence_score: The default confidence score.
+        log_prior: The log prior value of the hyperparameter, if `log=True`
+            and a `prior` is set.
+        prior_confidence_choice: The prior confidence choice.
+        prior_confidence_score: The prior confidence score.
         has_prior: Whether the hyperparameter has a prior.
     """
 
@@ -79,9 +75,10 @@ def __init__(
         upper: T,
         *,
         log: bool = False,
-        default: T | None,
+        prior: T | None,
         is_fidelity: bool,
-        default_confidence: Literal["low", "medium", "high"] = "low",
+        domain: Domain[T],
+        prior_confidence: Literal["low", "medium", "high"] = "low",
     ):
         """Initialize the numerical hyperparameter.
 
@@ -89,11 +86,12 @@ def __init__(
             lower: The lower bound of the numerical hyperparameter.
             upper: The upper bound of the numerical hyperparameter.
             log: Whether the hyperparameter is in log space.
-            default: The default value of the hyperparameter.
+            prior: The prior value of the hyperparameter.
             is_fidelity: Whether the hyperparameter is a fidelity parameter.
-            default_confidence: The default confidence choice.
+            domain: The domain of the hyperparameter.
+            prior_confidence: The prior confidence choice.
         """
-        super().__init__(value=None, default=default, is_fidelity=is_fidelity)  # type: ignore
+        super().__init__(value=None, prior=prior, is_fidelity=is_fidelity)  # type: ignore
         _cls_name = self.__class__.__name__
         if lower >= upper:
             raise ValueError(
@@ -107,18 +105,24 @@ def __init__(
                 f" Actual values: lower={lower}, upper={upper}"
             )
 
-        if default is not None and not lower <= default <= upper:
+        if prior is not None and not lower <= prior <= upper:
             raise ValueError(
-                f"Float parameter: default bounds error. Expected lower <= default"
-                f" <= upper, but got lower={lower}, default={default},"
+                f"Float parameter: prior bounds error. Expected lower <= prior"
+                f" <= upper, but got lower={lower}, prior={prior},"
                 f" upper={upper}"
             )
 
-        if default_confidence not in self.DEFAULT_CONFIDENCE_SCORES:
+        if prior_confidence not in self.DEFAULT_CONFIDENCE_SCORES:
             raise ValueError(
-                f"{_cls_name} parameter: default confidence score error. Expected one of "
+                f"{_cls_name} parameter: prior confidence score error. Expected one of "
                 f"{list(self.DEFAULT_CONFIDENCE_SCORES.keys())}, but got "
-                f"{default_confidence}"
+                f"{prior_confidence}"
+            )
+
+        if is_fidelity and (lower <= 0 or upper <= 0):
+            raise ValueError(
+                f"{_cls_name} parameter: fidelity parameter bounds error (log scale "
+                f"can't have bounds <= 0). Actual values: lower={lower}, upper={upper}"
             )
 
         # Validate 'log' and 'is_fidelity' types to prevent configuration errors
@@ -133,23 +137,19 @@ def __init__(
         self.lower: T = lower
         self.upper: T = upper
         self.log: bool = log
-        self.log_value: float | None = None
+        self.domain: Domain[T] = domain
         self.log_bounds: tuple[float, float] | None = None
-        self.log_default: float | None = None
+        self.log_prior: float | None = None
         if self.log:
             self.log_bounds = (float(np.log(lower)), float(np.log(upper)))
-            self.log_default = (
-                float(np.log(self.default)) if self.default is not None else None
-            )
+            self.log_prior = float(np.log(self.prior)) if self.prior is not None else None
 
-        self.default_confidence_choice: Literal["low", "medium", "high"] = (
-            default_confidence
-        )
+        self.prior_confidence_choice: Literal["low", "medium", "high"] = prior_confidence
 
-        self.default_confidence_score: float = self.DEFAULT_CONFIDENCE_SCORES[
-            default_confidence
+        self.prior_confidence_score: float = self.DEFAULT_CONFIDENCE_SCORES[
+            prior_confidence
         ]
-        self.has_prior: bool = self.default is not None
+        self.has_prior: bool = self.prior is not None
 
     @override
     def __eq__(self, other: Any) -> bool:
@@ -162,147 +162,77 @@ def __eq__(self, other: Any) -> bool:
             and self.log == other.log
             and self.is_fidelity == other.is_fidelity
             and self.value == other.value
-            and self.default == other.default
-            and self.default_confidence_score == other.default_confidence_score
+            and self.prior == other.prior
+            and self.prior_confidence_score == other.prior_confidence_score
         )
 
-    @override
-    def compute_prior(self, *, log: bool = False) -> float:
-        default = self.log_default if self.log else self.default
-
-        assert self.value is not None
-        assert default is not None
-
-        value = np.log(self.value) if self.log else self.value
-        value -= default
-        dist, std = self._get_truncnorm_prior_and_std()
-        value /= std
-        prior = np.log(dist.pdf(value) + 1e-12) if log else dist.pdf(value)
-        return float(prior)
-
-    @override
-    def mutate(
-        self,
-        parent: Self | None = None,
-        mutation_rate: float = 1.0,
-        mutation_strategy: str = "local_search",
-        **kwargs: Any,
-    ) -> Self:
-        if self.is_fidelity:
-            raise ValueError("Trying to mutate fidelity param!")
-
-        if parent is None:
-            parent = self
-
-        if mutation_strategy == "simple":
-            child = self.clone()
-            child.sample()
-        elif mutation_strategy == "local_search" and "std" in kwargs:
-            child = self._get_non_unique_neighbors(std=kwargs["std"], num_neighbours=1)[0]
-        elif mutation_strategy == "local_search":
-            child = self._get_non_unique_neighbors(num_neighbours=1)[0]
-        else:
-            raise NotImplementedError
-
-        if parent.value == child.value:
-            raise ValueError("Parent is the same as child!")
-
-        return child
-
-    @override
-    def crossover(self, parent1: Self, parent2: Self | None = None) -> tuple[Self, Self]:
-        if self.is_fidelity:
-            raise ValueError("Trying to crossover fidelity param!")
-
-        if parent2 is None:
-            parent2 = self
-
-        assert parent1.value is not None
-        assert parent2.value is not None
-
-        crossover_value = (parent1.value + parent2.value) / 2
-
-        proxy_self = self.clone()
-        proxy_self.set_value(crossover_value)  # type: ignore
-
-        tt = tuple(proxy_self._get_non_unique_neighbors(std=0.1, num_neighbours=2))
-        assert len(tt) == 2
-        return tt
-
     def _get_truncnorm_prior_and_std(self) -> tuple[TruncNorm, float]:
         if self.log:
             assert self.log_bounds is not None
             low, high = self.log_bounds
-            default = self.log_default
+            prior = self.log_prior
         else:
             low, high = self.lower, self.upper
-            default = self.default
+            prior = self.prior
 
-        assert default is not None
+        assert prior is not None
         return _get_truncnorm_prior_and_std(
             low=low,
             high=high,
-            default=default,
-            confidence_score=self.default_confidence_score,
+            prior=prior,
+            confidence_score=self.prior_confidence_score,
         )
 
-    def to_integer(self) -> IntegerParameter:
-        """Convert the numerical hyperparameter to an integer hyperparameter."""
-        from neps.search_spaces.hyperparameters.integer import IntegerParameter
-
-        as_int = lambda x: int(np.rint(x))
 
-        int_hp = IntegerParameter(
-            lower=as_int(self.lower),
-            upper=as_int(self.upper),
-            is_fidelity=self.is_fidelity,
-            default=as_int(self.default) if self.default is not None else None,
-            default_confidence=self.default_confidence_choice,  # type: ignore
-        )
-        int_hp.set_value(as_int(self.value) if self.value is not None else None)
-        return int_hp
-
-    def to_float(self) -> FloatParameter:
-        """Convert the numerical hyperparameter to a float hyperparameter."""
-        from neps.search_spaces.hyperparameters.integer import FloatParameter
-
-        float_hp = FloatParameter(
-            lower=float(self.lower),
-            upper=float(self.upper),
-            is_fidelity=self.is_fidelity,
-            default=float(self.default) if self.default is not None else None,
-            default_confidence=self.default_confidence_choice,  # type: ignore
-        )
-        float_hp.set_value(float(self.value) if self.value is not None else None)
-        return float_hp
-
-    def grid(self, *, size: int, include_endpoint: bool = True) -> list[T]:
-        """Generate a grid of values for the numerical hyperparameter.
+class NumericalParameter(Numerical):
+    """Deprecated: Use `Numerical` instead of `NumericalParameter`.
 
-        !!! note "Duplicates"
+    This class remains for backward compatibility and will raise a deprecation
+    warning if used.
+    """
 
-            The grid may contain duplicates if the hyperparameter is an integer,
-            for example if the lower bound is `0` and the upper bound is `10`, but
-            `size=20`.
+    def __init__(
+        self,
+        lower: T,
+        upper: T,
+        *,
+        log: bool = False,
+        prior: T | None,
+        is_fidelity: bool,
+        domain: Domain[T],
+        prior_confidence: Literal["low", "medium", "high"] = "low",
+    ):
+        """Initialize a deprecated `NumericalParameter`.
 
         Args:
-            size: The number of values to generate.
-            include_endpoint: Whether to include the upper bound in the grid.
+            lower: The lower bound of the numerical hyperparameter.
+            upper: The upper bound of the numerical hyperparameter.
+            log: Whether the hyperparameter is in log space.
+            prior: The prior value of the hyperparameter.
+            is_fidelity: Whether the hyperparameter is a fidelity parameter.
+            domain: The domain of the hyperparameter.
+            prior_confidence: The prior confidence choice.
 
-        Returns:
-            A list of values for the numerical hyperparameter.
+        Raises:
+            DeprecationWarning: A warning indicating that `neps.NumericalParameter` is
+            deprecated and `neps.Numerical` should be used instead.
         """
-        return [
-            self.normalized_to_value(x)
-            for x in np.linspace(0, 1, num=size, endpoint=include_endpoint)
-        ]
-
-    @override
-    @classmethod
-    def serialize_value(cls, value: T) -> T:
-        return value
-
-    @override
-    @classmethod
-    def deserialize_value(cls, value: T) -> T:
-        return value
+        import warnings
+
+        warnings.warn(
+            (
+                "Usage of 'neps.NumericalParameter' is deprecated and will be removed in"
+                " future releases. Please use 'neps.Numerical' instead."
+            ),
+            DeprecationWarning,
+            stacklevel=2,
+        )
+        super().__init__(
+            lower=lower,
+            upper=upper,
+            log=log,
+            prior=prior,
+            is_fidelity=is_fidelity,
+            domain=domain,
+            prior_confidence=prior_confidence,
+        )
diff --git a/neps/search_spaces/parameter.py b/neps/search_spaces/parameter.py
index a2f6c09c0..f8b763cba 100644
--- a/neps/search_spaces/parameter.py
+++ b/neps/search_spaces/parameter.py
@@ -4,11 +4,6 @@
 holds a [`.value`][neps.search_spaces.Parameter.value] which can be
 set or empty, in which case it is `None`.
 
-!!! tip
-
-    A `Parameter` which allows for mutations and crossovers should implement
-    the [`MutatableParameter`][neps.search_spaces.MutatableParameter] protocol.
-
 !!! tip
 
     A `Parameter` which allows for defining a
@@ -23,8 +18,9 @@
 from __future__ import annotations
 
 from abc import ABC, abstractmethod
-from typing import Any, ClassVar, Generic, Mapping, TypeVar, runtime_checkable
-from typing_extensions import Protocol, Self
+from collections.abc import Mapping
+from typing import Any, ClassVar, Generic, TypeVar
+from typing_extensions import Self
 
 ValueT = TypeVar("ValueT")
 SerializedT = TypeVar("SerializedT")
@@ -34,7 +30,7 @@ class Parameter(ABC, Generic[ValueT, SerializedT]):
     """A base class for hyperparameters.
 
     Attributes:
-        default: default value for the hyperparameter. This value
+        prior: default value for the hyperparameter. This value
             is used as a prior to inform algorithms about a decent
             default value for the hyperparameter, as well as use
             attributes from [`ParameterWithPrior`][neps.search_spaces.ParameterWithPrior],
@@ -48,17 +44,17 @@ def __init__(
         self,
         *,
         value: ValueT | None,
-        default: ValueT | None,
+        prior: ValueT | None,
         is_fidelity: bool,
     ):
         """Create a new `Parameter`.
 
         Args:
             value: value for the hyperparameter.
-            default: default value for the hyperparameter.
+            prior: default value for the hyperparameter.
             is_fidelity: whether the hyperparameter is fidelity.
         """
-        self.default = default
+        self.prior = prior
         self.is_fidelity = is_fidelity
 
         # TODO(eddiebergman): The reason to have this not as a straight alone
@@ -72,6 +68,9 @@ def __init__(
             self.value_to_normalized(value) if value is not None else None
         )
 
+        # TODO: Pass in through subclasses
+        self.prior_confidence_score: float
+
     # TODO(eddiebergman): All this does is just check values which highly unlikely
     # what we want. However this needs to be tackled in a seperate PR.
     #
@@ -83,7 +82,7 @@ def __eq__(self, other: Any) -> bool:
             return NotImplemented
 
         if self.value is not None and other.value is not None:
-            return self.serialize_value(self.value) == self.serialize_value(other.value)
+            return self.value == other.value
 
         return False
 
@@ -114,17 +113,6 @@ def sample(self) -> Self:
     def sample_value(self) -> ValueT:
         """Sample a new value."""
 
-    @abstractmethod
-    def set_default(self, default: ValueT | None) -> None:
-        """Set the default value for the hyperparameter.
-
-        The `default=` is used as a prior and used to inform
-        algorithms about a decent default value for the hyperparameter.
-
-        Args:
-            default: default value for the hyperparameter.
-        """
-
     @abstractmethod
     def set_value(self, value: ValueT | None) -> None:
         """Set the value for the hyperparameter.
@@ -141,10 +129,10 @@ def value_to_normalized(self, value: ValueT) -> float:
         but roughly refers to numeric values.
 
         * `(0, 1)` scaling in the case of
-            a [`NumericalParameter`][neps.search_spaces.NumericalParameter],
-        * `{0.0, 1.0}` for a [`ConstantParameter`][neps.search_spaces.ConstantParameter],
+            a [`Numerical`][neps.search_spaces.Numerical],
+        * `{0.0, 1.0}` for a [`Constant`][neps.search_spaces.Constant],
         * `[0, 1, ..., n]` for a
-            [`Categorical`][neps.search_spaces.CategoricalParameter].
+            [`Categorical`][neps.search_spaces.Categorical].
 
         Args:
             value: value to convert.
@@ -164,72 +152,31 @@ def normalized_to_value(self, normalized_value: float) -> ValueT:
             The value.
         """
 
-    @abstractmethod
-    def _get_non_unique_neighbors(
-        self,
-        num_neighbours: int,
-        *,
-        std: float = 0.2,
-    ) -> list[Self]: ...
-
-    def _get_single_neighbor(self, *, std: float = 0.2) -> Self:
-        """Override this if a faster implementation is possible."""
-        return self._get_non_unique_neighbors(num_neighbours=1, std=std)[0]
-
-    @classmethod
-    @abstractmethod
-    def serialize_value(cls, value: ValueT) -> SerializedT:
-        """Ensure the hyperparameter value is in a serializable format.
-
-
-        Returns:
-            A serializable version of the hyperparameter value
-        """
-
-    @classmethod
-    @abstractmethod
-    def deserialize_value(cls, value: SerializedT) -> ValueT:
-        """Deserialize a serialized value into the hyperparameter's value.
-
-        Args:
-            value: value to deserialize.
-        """
-
-    def load_from(self, value: SerializedT) -> None:
+    def load_from(self, value: Any) -> None:
         """Load a serialized value into the hyperparameter's value.
 
         Args:
             value: value to load.
         """
-        deserialized_value = self.deserialize_value(value)
-        self.set_value(deserialized_value)
+        self.set_value(value)
 
 
 class ParameterWithPrior(Parameter[ValueT, SerializedT]):
     """A base class for hyperparameters with priors.
 
     Attributes:
-        default_confidence_choice: The choice of how confident any algorithm should
-            be in the default value being a good value.
-        default_confidence_score: A score used by algorithms to utilize the default value.
+        prior_confidence_choice: The choice of how confident any algorithm should
+            be in the prior value being a good value.
+        prior_confidence_score: A score used by algorithms to utilize the prior value.
         has_prior: whether the hyperparameter has a prior that can be used by an
-            algorithm. In many cases, this refers to having a default value.
+            algorithm. In many cases, this refers to having a prior value.
     """
 
     DEFAULT_CONFIDENCE_SCORES: ClassVar[Mapping[str, float]]
-    default_confidence_choice: str
-    default_confidence_score: float
+    prior_confidence_choice: str
+    prior_confidence_score: float
     has_prior: bool
 
-    @abstractmethod
-    def compute_prior(self, *, log: bool = True) -> float:
-        """Compute the likelihood of the currently set value from
-        the sampling distribution of the hyperparameter.
-
-        Args:
-            log: whether to return the log likelihood.
-        """
-
     # NOTE(eddiebergman): Like the normal `Parameter.sample` but with `user_priors`.
     @abstractmethod
     def sample_value(self, *, user_priors: bool = False) -> ValueT:
@@ -248,26 +195,6 @@ def sample_value(self, *, user_priors: bool = False) -> ValueT:
             The sampled value.
         """
 
-    def set_default_confidence_score(self, default_confidence: str) -> None:
-        """Set the default confidence score for the hyperparameter.
-
-        Args:
-            default_confidence: the choice of how confident any algorithm should
-                be in the default value being a good value.
-
-        Raises:
-            ValueError: if the confidence score is not a valid choice.
-        """
-        if default_confidence not in self.DEFAULT_CONFIDENCE_SCORES:
-            cls_name = self.__class__.__name__
-            raise ValueError(
-                f"Invalid default confidence score: {default_confidence}"
-                f" for {cls_name}. Expected one of:"
-                f" {list(self.DEFAULT_CONFIDENCE_SCORES.keys())}"
-            )
-
-        self.default_confidence_score = self.DEFAULT_CONFIDENCE_SCORES[default_confidence]
-
     def sample(self, *, user_priors: bool = False) -> Self:
         """Sample a new version of this `Parameter` with a random value.
 
@@ -287,39 +214,3 @@ def sample(self, *, user_priors: bool = False) -> Self:
         copy_self = self.clone()
         copy_self.set_value(value)
         return copy_self
-
-
-@runtime_checkable
-class MutatableParameter(Protocol):
-    """A protocol for hyperparameters that can be mutated.
-
-    Particpating parameters must implement the
-    [`mutate()`][neps.search_spaces.MutatableParameter.mutate] method
-    and the [`crossover()`][neps.search_spaces.MutatableParameter.crossover]
-    method.
-    """
-
-    def mutate(self, parent: Self | None = None) -> Self:
-        """Mutate the hyperparameter.
-
-        Args:
-            parent: the parent hyperparameter to mutate from.
-
-        Returns:
-            The mutated hyperparameter.
-        """
-        ...
-
-    def crossover(self, parent1: Self, parent2: Self | None = None) -> tuple[Self, Self]:
-        """Crossover the hyperparameter with another hyperparameter.
-
-        Args:
-            parent1: the first parent hyperparameter.
-            parent2: the second parent hyperparameter.
-                If left as `None`, this hyperparameter will be used as the second parent
-                to crossover with.
-
-        Returns:
-            A tuple of the two crossovered hyperparameters.
-        """
-        ...
diff --git a/neps/search_spaces/search_space.py b/neps/search_spaces/search_space.py
index 992709c6c..3cb69b855 100644
--- a/neps/search_spaces/search_space.py
+++ b/neps/search_spaces/search_space.py
@@ -5,26 +5,24 @@
 from __future__ import annotations
 
 import logging
-import operator
 import pprint
-from itertools import product
+from collections.abc import Iterator, Mapping
 from pathlib import Path
-from typing import TYPE_CHECKING, Any, Hashable, Iterator, Literal, Mapping
-from typing_extensions import Self
+from typing import Any
 
 import ConfigSpace as CS
-import numpy as np
 import yaml
 
 from neps.search_spaces.architecture.graph_grammar import GraphParameter
+from neps.search_spaces.domain import UNIT_FLOAT_DOMAIN
 from neps.search_spaces.hyperparameters import (
-    CategoricalParameter,
-    ConstantParameter,
-    FloatParameter,
-    IntegerParameter,
-    NumericalParameter,
+    Categorical,
+    Constant,
+    Float,
+    Integer,
+    Numerical,
 )
-from neps.search_spaces.parameter import MutatableParameter, Parameter, ParameterWithPrior
+from neps.search_spaces.parameter import Parameter, ParameterWithPrior
 from neps.search_spaces.yaml_search_space_utils import (
     SearchSpaceFromYamlFileError,
     deduce_type,
@@ -33,10 +31,6 @@
     formatting_float,
     formatting_int,
 )
-from neps.utils.types import NotSet, _NotSet
-
-if TYPE_CHECKING:
-    import pandas as pd
 
 logger = logging.getLogger(__name__)
 
@@ -63,30 +57,30 @@ def pipeline_space_from_configspace(
 
     for hyperparameter in configspace.get_hyperparameters():
         if isinstance(hyperparameter, CS.Constant):
-            parameter = ConstantParameter(value=hyperparameter.value)
+            parameter = Constant(value=hyperparameter.value)
         elif isinstance(hyperparameter, CS.CategoricalHyperparameter):
-            parameter = CategoricalParameter(
+            parameter = Categorical(
                 hyperparameter.choices,
-                default=hyperparameter.default_value,
+                prior=hyperparameter.default_value,
             )
         elif isinstance(hyperparameter, CS.OrdinalHyperparameter):
-            parameter = CategoricalParameter(
+            parameter = Categorical(
                 hyperparameter.sequence,
-                default=hyperparameter.default_value,
+                prior=hyperparameter.default_value,
             )
         elif isinstance(hyperparameter, CS.UniformIntegerHyperparameter):
-            parameter = IntegerParameter(
+            parameter = Integer(
                 lower=hyperparameter.lower,
                 upper=hyperparameter.upper,
                 log=hyperparameter.log,
-                default=hyperparameter.default_value,
+                prior=hyperparameter.default_value,
             )
         elif isinstance(hyperparameter, CS.UniformFloatHyperparameter):
-            parameter = FloatParameter(
+            parameter = Float(
                 lower=hyperparameter.lower,
                 upper=hyperparameter.upper,
                 log=hyperparameter.log,
-                default=hyperparameter.default_value,
+                prior=hyperparameter.default_value,
             )
         else:
             raise ValueError(f"Unknown hyperparameter type {hyperparameter}")
@@ -111,7 +105,7 @@ def pipeline_space_from_yaml(  # noqa: C901
             format, contents, or if the dictionary is invalid.
     """
     try:
-        if isinstance(config, (str, Path)):
+        if isinstance(config, str | Path):
             # try to load the YAML file
             try:
                 yaml_file_path = Path(config)
@@ -134,21 +128,23 @@ def pipeline_space_from_yaml(  # noqa: C901
 
         pipeline_space: dict[str, Parameter] = {}
 
-        for name, details in config.items():
+        if len(config) == 1 and "pipeline_space" in config:
+            config = config["pipeline_space"]
+        for name, details in config.items():  # type: ignore
             param_type = deduce_type(name, details)
 
             if param_type in ("int", "integer"):
                 formatted_details = formatting_int(name, details)
-                pipeline_space[name] = IntegerParameter(**formatted_details)
+                pipeline_space[name] = Integer(**formatted_details)
             elif param_type == "float":
                 formatted_details = formatting_float(name, details)
-                pipeline_space[name] = FloatParameter(**formatted_details)
+                pipeline_space[name] = Float(**formatted_details)
             elif param_type in ("cat", "categorical"):
                 formatted_details = formatting_cat(name, details)
-                pipeline_space[name] = CategoricalParameter(**formatted_details)
+                pipeline_space[name] = Categorical(**formatted_details)
             elif param_type == "const":
                 const_details = formatting_const(details)
-                pipeline_space[name] = ConstantParameter(const_details)
+                pipeline_space[name] = Constant(const_details)  # type: ignore
             else:
                 # Handle unknown parameter type
                 raise TypeError(
@@ -186,7 +182,7 @@ class SearchSpace(Mapping[str, Any]):
         know better what to document.
     """
 
-    def __init__(self, **hyperparameters: Parameter):
+    def __init__(self, **hyperparameters: Parameter):  # noqa: C901, PLR0912
         """Initialize the SearchSpace with hyperparameters.
 
         Args:
@@ -194,7 +190,7 @@ def __init__(self, **hyperparameters: Parameter):
         """
         # Ensure a consistent ordering for uses throughout the lib
         _hyperparameters = sorted(hyperparameters.items(), key=lambda x: x[0])
-        _fidelity_param: NumericalParameter | None = None
+        _fidelity_param: Numerical | None = None
         _fidelity_name: str | None = None
         _has_prior: bool = False
 
@@ -207,9 +203,9 @@ def __init__(self, **hyperparameters: Parameter):
                         "multiple is_fidelity=True)"
                     )
 
-                if not isinstance(hp, NumericalParameter):
+                if not isinstance(hp, Numerical):
                     raise ValueError(
-                        "neps only suport float and integer fidelity parameters"
+                        f"Only float and integer fidelities supported, got {hp}"
                     )
 
                 _fidelity_param = hp
@@ -219,459 +215,62 @@ def __init__(self, **hyperparameters: Parameter):
                 _has_prior = True
 
         self.hyperparameters: dict[str, Parameter] = dict(_hyperparameters)
-        self.fidelity: NumericalParameter | None = _fidelity_param
+        self.fidelity: Numerical | None = _fidelity_param
         self.fidelity_name: str | None = _fidelity_name
         self.has_prior: bool = _has_prior
 
-        # TODO(eddiebergman): This should be a seperate thing most likely and not
-        # in a `SearchSpace`.
-        # Variables for tabular bookkeeping
-        self.custom_grid_table: pd.Series | pd.DataFrame | None = None
-        self.raw_tabular_space: SearchSpace | None = None
-        self.has_tabular: bool = False
-
-    def set_custom_grid_space(
-        self,
-        grid_table: pd.Series | pd.DataFrame,
-        raw_space: SearchSpace | CS.ConfigurationSpace,
-    ) -> None:
-        """Set a custom grid space for the search space.
-
-        This function is used to set a custom grid space for the pipeline space.
-
-        !!! warning
-
-            The type check and the table format requirement is loose and
-            can break certain components.
-
-        Note:
-            Only to be used if a custom set of hyperparameters from the search space
-            is to be sampled or used for acquisition functions.
-        """
-        if grid_table is None or raw_space is None:
-            raise ValueError(
-                "Both grid_table and raw_space must be set!\n"
-                "A table or list of fixed configs must be supported with a "
-                "continuous space representing the type and bounds of each "
-                "hyperparameter for accurate modeling."
-            )
-
-        self.custom_grid_table = grid_table
-        self.raw_tabular_space = (
-            SearchSpace(**raw_space)
-            if not isinstance(raw_space, SearchSpace)
-            else raw_space
-        )
-        self.has_tabular = True
-
-    @property
-    def has_fidelity(self) -> bool:
-        """Check if the search space has a fidelity parameter."""
-        return self.fidelity is not None
-
-    def compute_prior(self, *, log: bool = False, ignore_fidelity: bool = False) -> float:
-        """Compute the prior probability of the search space.
-
-        This is better know as the `pdf` of the configuration in the search space, or a
-        relative measure of how likely this configuration is under the search space.
-
-        Args:
-            log: Whether to compute the log of the prior.
-            ignore_fidelity: Whether to ignore the fidelity parameter when
-                computing the prior.
-
-
-        Returns:
-            The likelihood of the configuration in the search space.
-        """
-        density_value = 0.0 if log else 1.0
-        op = operator.add if log else operator.mul
-
-        prior_hps = (
-            hp
-            for hp in self.hyperparameters.values()
-            if isinstance(hp, ParameterWithPrior) and hp.has_prior
-        )
-
-        for hyperparameter in prior_hps:
-            if ignore_fidelity and hyperparameter.is_fidelity:
-                continue
-
-            hp_prior = hyperparameter.compute_prior(log=log)
-            density_value = op(density_value, hp_prior)
-
-        return density_value
-
-    def sample(
-        self,
-        *,
-        user_priors: bool = False,
-        patience: int = 1,
-        ignore_fidelity: bool = True,
-    ) -> SearchSpace:
-        """Sample a configuration from the search space.
-
-        Args:
-            user_priors: Whether to use user priors when sampling.
-            patience: The number of times to try to sample a valid value for a
-                hyperparameter.
-            ignore_fidelity: Whether to ignore the fidelity parameter when sampling.
-
-        Returns:
-            A sampled configuration from the search space.
-        """
-        sampled_hps: dict[str, Parameter] = {}
-
-        for name, hp in self.hyperparameters.items():
-            if hp.is_fidelity and ignore_fidelity:
-                sampled_hps[name] = hp.clone()
-                continue
-
-            for _ in range(patience):
-                try:
-                    if user_priors and isinstance(hp, ParameterWithPrior):
-                        sampled_hps[name] = hp.sample(user_priors=user_priors)
-                    else:
-                        sampled_hps[name] = hp.sample()
-                    break
-                except ValueError:
-                    logger.warning(
-                        f"Could not sample valid value for hyperparameter {name}!"
-                    )
-            else:
-                raise ValueError(
-                    f"Could not sample valid value for hyperparameter {name}"
-                    f" in {patience} tries!"
-                )
-
-        return SearchSpace(**sampled_hps)
-
-    def mutate(
-        self,
-        *,
-        parent: SearchSpace | None = None,
-        mutation_rate: float = 1.0,
-        mutation_strategy: Literal["smbo"] = "smbo",
-        patience: int = 50,
-        **kwargs: Any,
-    ) -> SearchSpace:
-        """Mutate the search space.
-
-        Args:
-            parent: The parent configuration to mutate from.
-            mutation_rate: The rate at which to mutate the search space.
-            mutation_strategy: The strategy to use for mutation.
-            patience: The number of times to try to mutate a valid value for a
-                hyperparameter.
-            **kwargs: Additional keyword arguments to pass to the mutation strategy.
-
-        Returns:
-            The mutated search space.
-        """
-        if mutation_strategy == "smbo":
-            args = {
-                "parent": parent,
-                "mutation_rate": mutation_rate,
-                "mutation_strategy": "local_search",  # fixing property for SMBO mutation
-            }
-            kwargs.update(args)
-            new_config = self._smbo_mutation(patience=patience, **kwargs)
-        else:
-            raise NotImplementedError("No such mutation strategy!")
-
-        return SearchSpace(**new_config)
-
-    # TODO(eddiebergman): This function seems very weak, i.e. it's only mutating
-    # one hyperparamter and copying the rest, very expensive for little gain.
-    def _smbo_mutation(self, *, patience: int = 5, **kwargs: Any) -> Self:
-        non_fidelity_mutatable_params = {
-            hp_name: hp
-            for hp_name, hp in self.hyperparameters.items()
-            if not hp.is_fidelity and isinstance(hp, MutatableParameter)
-        }
-
-        for _ in range(patience):
-            chosen_hp_name = np.random.choice(list(non_fidelity_mutatable_params))
-            hp = non_fidelity_mutatable_params[chosen_hp_name]
-
-            try:
-                mutated_param = hp.mutate(**kwargs)
-            except Exception as e:  # noqa: BLE001
-                logger.warning(f"{chosen_hp_name} failed to mutate! Error: {e}, {kwargs}")
+        self.prior_config = {}
+        for name, hp in _hyperparameters:
+            if hp.prior is not None:
+                self.prior_config[name] = hp.prior
                 continue
 
-            new_params = {
-                hp_name: hp.clone() if hp_name != chosen_hp_name else mutated_param
-                for hp_name, hp in self.hyperparameters.items()
-            }
-            return self.__class__(**new_params)
-
-        raise ValueError(
-            f"Could not mutate valid value for hyperparameter in {patience} tries!"
-        )
-
-    def crossover(
-        self,
-        config2: SearchSpace,
-        crossover_probability_per_hyperparameter: float = 1.0,
-        patience: int = 50,
-        crossover_strategy: str = "simple",
-    ) -> tuple[SearchSpace, SearchSpace]:
-        """Crossover this configuration with another.
-
-        Args:
-            config2: The other search space to crossover with.
-            crossover_probability_per_hyperparameter: The probability of crossing over
-                each hyperparameter.
-            patience: The number of times to try to crossover a valid value for a
-                hyperparameter.
-            crossover_strategy: The strategy to use for crossover.
-
-        Returns:
-            A tuple of the two new configurations.
-        """
-        if crossover_strategy == "simple":
-            new_config1, new_config2 = self._simple_crossover(
-                config2=config2,
-                crossover_probability_per_hyperparameter=crossover_probability_per_hyperparameter,
-                patience=patience,
-            )
-        else:
-            raise NotImplementedError("No such crossover strategy!")
-
-        if len(self.hyperparameters.keys()) != len(new_config1):
-            raise Exception("Cannot crossover")
-
-        return SearchSpace(**new_config1), SearchSpace(**new_config2)
-
-    def _simple_crossover(
-        self,
-        config2: SearchSpace,
-        crossover_probability_per_hyperparameter: float = 1.0,
-        patience: int = 50,
-    ) -> tuple[dict[str, Parameter], dict[str, Parameter]]:
-        new_config1: dict[str, Parameter] = {}
-        new_config2: dict[str, Parameter] = {}
-
-        for key, hyperparameter in self.hyperparameters.items():
-            other_hp = config2.hyperparameters[key]
-            if (
-                isinstance(hyperparameter, MutatableParameter)
-                and not hyperparameter.is_fidelity
-                and np.random.random() < crossover_probability_per_hyperparameter
-            ):
-                for _ in range(patience):
-                    try:
-                        child1, child2 = hyperparameter.crossover(other_hp)  # type: ignore
-                        new_config1[key] = child1
-                        new_config2[key] = child2
-                    except Exception:  # noqa: S112, BLE001
+            match hp:
+                case Categorical():
+                    first_choice = hp.choices[0]
+                    self.prior_config[name] = first_choice
+                case Integer() | Float():
+                    if hp.is_fidelity:
+                        self.prior_config[name] = hp.upper
                         continue
-                    else:
-                        break
-            else:
-                new_config1[key] = hyperparameter.clone()
-                new_config2[key] = other_hp.clone()
-
-        return new_config1, new_config2
-
-    def get_normalized_hp_categories(
-        self,
-        *,
-        ignore_fidelity: bool = False,
-    ) -> dict[Literal["continuous", "categorical", "graphs"], list[Any]]:
-        """Get the normalized values for each hyperparameter in the configuration.
-
-        Args:
-            ignore_fidelity: Whether to ignore the fidelity parameter when getting the
-                normalized values.
 
-        Returns:
-            A dictionary of the normalized values for each hyperparameter,
-            separated by type.
-        """
-        hps: dict[Literal["continuous", "categorical", "graphs"], list[Any]] = {
-            "continuous": [],
-            "categorical": [],
-            "graphs": [],
+                    midpoint = hp.domain.cast_one(0.5, frm=UNIT_FLOAT_DOMAIN)
+                    self.prior_config[name] = midpoint
+                case Constant():
+                    self.prior_config[name] = hp.value
+                case GraphParameter():
+                    self.prior_config[name] = hp.prior
+                case _:
+                    raise TypeError(f"Unknown hyperparameter type {hp}")
+
+        self.categoricals: Mapping[str, Categorical] = {
+            k: hp for k, hp in _hyperparameters if isinstance(hp, Categorical)
         }
-        for hp in self.values():
-            if ignore_fidelity and hp.is_fidelity:
-                continue
-
-            if isinstance(hp, ConstantParameter):
-                continue
-
-            # TODO(eddiebergman): Not sure this covers all graph parameters but a search
-            # for `def value` that have a property decorator is all that could have
-            # worked previously for graphs
-            if isinstance(hp, GraphParameter):
-                hps["graphs"].append(hp.value)
-
-            elif isinstance(hp, CategoricalParameter):
-                assert hp.value is not None
-                hp_value = hp.value_to_normalized(hp.value)
-                hps["categorical"].append(hp_value)
-
-            # TODO(eddiebergman): Technically integer is not continuous
-            elif isinstance(hp, NumericalParameter):
-                assert hp.value is not None
-                hp_value = hp.value_to_normalized(hp.value)
-                hps["continuous"].append(hp_value)
-            else:
-                raise NotImplementedError(f"Unknown Parameter type: {type(hp)}\n{hp}")
-
-        return hps
-
-    def hp_values(self) -> dict[str, Any]:
-        """Get the values for each hyperparameter in this configuration."""
-        return {
+        self.numerical: Mapping[str, Integer | Float] = {
+            k: hp
+            for k, hp in _hyperparameters
+            if isinstance(hp, Integer | Float) and not hp.is_fidelity
+        }
+        self.graphs: Mapping[str, GraphParameter] = {
+            k: hp for k, hp in _hyperparameters if isinstance(hp, GraphParameter)
+        }
+        self.constants: Mapping[str, Any] = {
+            k: hp.value for k, hp in _hyperparameters if isinstance(hp, Constant)
+        }
+        # NOTE: For future of multiple fidelities
+        self.fidelities: Mapping[str, Integer | Float] = {}
+        if _fidelity_param is not None and _fidelity_name is not None:
+            assert isinstance(_fidelity_param, Integer | Float)
+            self.fidelities = {_fidelity_name: _fidelity_param}
+
+        # TODO: Deprecate out, ideally configs are just dictionaries,
+        # not attached to this space object
+        self._values = {
             hp_name: hp if isinstance(hp, GraphParameter) else hp.value
             for hp_name, hp in self.hyperparameters.items()
         }
 
-    def add_hyperparameter(self, name: str, hp: Parameter) -> None:
-        """Add a hyperparameter to the search space.
-
-        Args:
-            name: The name of the hyperparameter.
-            hp: The hyperparameter to add.
-        """
-        self.hyperparameters[str(name)] = hp
-        self.hyperparameters = dict(
-            sorted(self.hyperparameters.items(), key=lambda x: x[0])
-        )
-
-    def get_vectorial_dim(self) -> dict[Literal["continuous", "categorical"], int] | None:
-        """Get the vectorial dimension of the search space.
-
-        The count of [`NumericalParameter`][neps.search_spaces.NumericalParameter]
-        are put under the key `#!python "continuous"` and the count of
-        [`CategoricalParameter`][neps.search_spaces.CategoricalParameter] are put under
-        the key `#!python "categorical"` in the return dict.
-
-        If there are no numerical or categorical hyperparameters **or constant**
-        parameters, then `None` is returned.
-
-        Returns:
-            The vectorial dimension
-        """
-        if not any(
-            isinstance(hp, (NumericalParameter, CategoricalParameter, ConstantParameter))
-            for hp in self.values()
-        ):
-            return None
-
-        features: dict[Literal["continuous", "categorical"], int] = {
-            "continuous": 0,
-            "categorical": 0,
-        }
-        for hp in self.values():
-            if isinstance(hp, ConstantParameter):
-                pass
-            elif isinstance(hp, GraphParameter):
-                # TODO(eddiebergman): This was what the old behaviour would do...
-                pass
-            elif isinstance(hp, CategoricalParameter):
-                features["categorical"] += 1
-            elif isinstance(hp, NumericalParameter):
-                features["continuous"] += 1
-            else:
-                raise NotImplementedError(f"Unknown Parameter type: {type(hp)}\n{hp}")
-
-        return features
-
-    def set_to_max_fidelity(self) -> None:
-        """Set the configuration to the maximum fidelity."""
-        if self.fidelity is None:
-            raise ValueError("No fidelity parameter in the search space!")
-
-        self.fidelity.set_value(self.fidelity.upper)
-
-    def get_search_space_grid(
-        self,
-        *,
-        size_per_numerical_hp: int = 10,
-        include_endpoints: bool = True,
-    ) -> list[SearchSpace]:
-        """Get a grid of configurations from the search space.
-
-        For [`NumericalParameter`][neps.search_spaces.NumericalParameter] hyperparameters,
-        the parameter `size_per_numerical_hp=` is used to determine a grid. If there are
-        any duplicates, e.g. for an
-        [`IntegerParameter`][neps.search_spaces.IntegerParameter], then we will
-        remove duplicates.
-
-        For [`CategoricalParameter`][neps.search_spaces.CategoricalParameter]
-        hyperparameters, we include all the choices in the grid.
-
-        For [`ConstantParameter`][neps.search_spaces.ConstantParameter] hyperparameters,
-        we include the constant value in the grid.
-
-        !!! note "TODO"
-
-            Does not support graph parameters currently.
-
-        Args:
-            size_per_numerical_hp: The size of the grid for each numerical hyperparameter.
-            include_endpoints: Whether to include the endpoints of the grid.
-
-        Returns:
-            A list of configurations from the search space.
-        """
-        param_ranges = []
-        for hp in self.hyperparameters.values():
-            # NOTE(eddiebergman): This is a temporary fix to avoid graphs
-            # If this is resolved, please update the docstring!
-            if isinstance(hp, GraphParameter):
-                raise ValueError("Trying to create a grid for graphs!")
-
-            if isinstance(hp, CategoricalParameter):
-                param_ranges.append(hp.choices)
-                continue
-
-            if isinstance(hp, ConstantParameter):
-                param_ranges.append([hp.value])
-                continue
-
-            if isinstance(hp, NumericalParameter):
-                grid = hp.grid(
-                    size=size_per_numerical_hp,
-                    include_endpoint=include_endpoints,
-                )
-                _grid = np.clip(grid, hp.lower, hp.upper).astype(np.float64)
-                _grid = (
-                    _grid.astype(np.int64) if isinstance(hp, IntegerParameter) else _grid
-                )
-                _grid = np.unique(grid).tolist()
-                param_ranges.append(grid)
-                continue
-
-            raise NotImplementedError(f"Unknown Parameter type: {type(hp)}\n{hp}")
-
-        full_grid = product(*param_ranges)
-
-        return [
-            SearchSpace(
-                **{
-                    name: ConstantParameter(value=value)  # type: ignore
-                    for name, value in zip(self.hyperparameters.keys(), config_values)
-                }
-            )
-            for config_values in full_grid
-        ]
-
-    def serialize(self) -> dict[str, Hashable]:
-        """Serialize the configuration to a dictionary that can be written to disk."""
-        serialized_config = {}
-        for name, hp in self.hyperparameters.items():
-            if hp.value is None:
-                raise ValueError(
-                    f"Hyperparameter {name} has no value set and can't" " be serialized!"
-                )
-            serialized_config[name] = hp.serialize_value(hp.value)
-        return serialized_config
-
+    # TODO: Deprecate and remove
     def from_dict(self, config: Mapping[str, Any | GraphParameter]) -> SearchSpace:
         """Create a new instance of this search space with parameters set from the config.
 
@@ -681,148 +280,13 @@ def from_dict(self, config: Mapping[str, Any | GraphParameter]) -> SearchSpace:
         new = self.clone()
         for name, val in config.items():
             new.hyperparameters[name].load_from(val)
+            new._values[name] = new.hyperparameters[name].value
 
         return new
 
-    def clone(self, *, _with_tabular: bool = False) -> SearchSpace:
+    def clone(self) -> SearchSpace:
         """Create a copy of the search space."""
-        new_copy = self.__class__(
-            **{k: v.clone() for k, v in self.hyperparameters.items()}
-        )
-        if _with_tabular and self.has_tabular:
-            assert self.custom_grid_table is not None
-            assert self.raw_tabular_space is not None
-            new_copy.set_custom_grid_space(
-                grid_table=self.custom_grid_table,
-                raw_space=self.raw_tabular_space,
-            )
-
-        return new_copy
-
-    def sample_default_configuration(
-        self,
-        *,
-        patience: int = 1,
-        ignore_fidelity: bool = True,
-        ignore_missing_defaults: bool = False,
-    ) -> SearchSpace:
-        """Sample the default configuration from the search space.
-
-        By default, if there is no default set for a hyperparameter, an error will be
-        raised. If `ignore_missing_defaults=True`, then a sampled value will be used
-        instead.
-
-        Args:
-            patience: The number of times to try to sample a valid value for a
-                hyperparameter.
-            ignore_fidelity: Whether to ignore the fidelity parameter when sampling.
-            ignore_missing_defaults: Whether to ignore missing defaults when setting
-                the default configuration.
-
-        Returns:
-            The default configuration.
-        """
-        # Sample a random config and then set the defaults if there are any
-        config = self.sample(patience=patience, ignore_fidelity=ignore_fidelity)
-        for hp_name, hp in self.hyperparameters.items():
-            if hp.is_fidelity and ignore_fidelity:
-                continue
-
-            if hp.default is None:
-                if not ignore_missing_defaults:
-                    raise ValueError(f"No defaults specified for {hp} in the space.")
-
-                # Use the sampled value instead
-            else:
-                config[hp_name].set_value(hp.default)
-
-        return config
-
-    def set_defaults_to_current_values(self) -> None:
-        """Update the configuration/search space to use the current values as defaults."""
-        for hp in self.hyperparameters.values():
-            if isinstance(hp, NumericalParameter):
-                hp.set_default(hp.value)
-
-    def set_hyperparameters_from_dict(  # noqa: C901
-        self,
-        hyperparameters: Mapping[str, Any],
-        *,
-        defaults: bool = True,
-        values: bool = True,
-        # TODO(eddiebergman): The existence of this makes me think
-        # all hyperparameters that accept confidence should use the same keys
-        confidence: str = "low",
-        delete_previous_defaults: bool = False,
-        delete_previous_values: bool = False,
-        overwrite_constants: bool = False,
-    ) -> None:
-        """Set the hyperparameters from a dictionary of values.
-
-        !!! note "Constant Hyperparameters"
-
-            [`ConstantParameter`][neps.search_spaces.ConstantParameter] hyperparameters
-            have only a single possible value and hence only a single possible default.
-            If `overwrite_constants=` is `False`, then it will remain unchanged and
-            ignore the new value.
-
-            If `overwrite_constants=` is `True`, then the constant hyperparameter will
-            be updated, requiring both `defaults=True` and `values=True` to be set.
-
-            The arguments `delete_previous_defaults` and `delete_previous_values` are
-            ignored for [`ConstantParameter`][neps.search_spaces.ConstantParameter].
-
-        Args:
-            hyperparameters: The dictionary of hyperparameters to set with values.
-            defaults: Whether to set the defaults to these values.
-            values: Whether to set the value of the hyperparameters to these values.
-            confidence: The confidence score to use when setting the default.
-                Only applies if `defaults=True`.
-            delete_previous_defaults: Whether to delete the previous defaults.
-            delete_previous_values: Whether to delete the previous values.
-            overwrite_constants: Whether to overwrite constant hyperparameters.
-
-        Raises:
-            ValueError: If the value is invalid for the hyperparameter.
-        """
-        if values is False and defaults is False:
-            raise ValueError("At least one of `values` or `defaults` must be True.")
-
-        for hp_key, current_hp in self.hyperparameters.items():
-            new_hp_value = hyperparameters.get(hp_key, NotSet)
-            if isinstance(new_hp_value, _NotSet):
-                continue
-
-            # Handle constants specially as they have particular logic which
-            # is different from the other hyperparameters
-            if isinstance(current_hp, ConstantParameter):
-                if not overwrite_constants:
-                    continue
-
-                if not (defaults and values):
-                    raise ValueError(
-                        "Cannot have a constant parameter with a seperate default and"
-                        " and value. Please provide both `values=True` and"
-                        " `defaults=True` if passing `overwrite_constants=True`"
-                        f" with a new value for the constant '{hp_key}'."
-                    )
-
-                current_hp.set_constant_value(new_hp_value)
-                continue
-
-            if delete_previous_defaults:
-                current_hp.set_default(None)
-
-            if delete_previous_values:
-                current_hp.set_value(None)
-
-            if defaults:
-                current_hp.set_default(new_hp_value)
-                if isinstance(current_hp, ParameterWithPrior):
-                    current_hp.set_default_confidence_score(confidence)
-
-            if values:
-                current_hp.set_value(new_hp_value)
+        return self.__class__(**{k: v.clone() for k, v in self.hyperparameters.items()})
 
     def __getitem__(self, key: str) -> Parameter:
         return self.hyperparameters[key]
@@ -835,46 +299,3 @@ def __len__(self) -> int:
 
     def __str__(self) -> str:
         return pprint.pformat(self.hyperparameters)
-
-    def is_equal_value(
-        self,
-        other: SearchSpace,
-        *,
-        include_fidelity: bool = True,
-        on_decimal: int = 8,
-    ) -> bool:
-        """Check if the configuration is equal to another configuration.
-
-        !!! warning
-
-            This does **NOT** check that the entire `SearchSpace` is equal (and thus it is
-            not a dunder method), but only checks the configuration values.
-
-        Args:
-            other: The other configuration to compare to.
-            include_fidelity: Whether to include the fidelity parameter in the comparison.
-            on_decimal: The decimal to round to when comparing float values.
-
-        Returns:
-            Whether the configuration values are equal.
-        """
-        if self.hyperparameters.keys() != other.hyperparameters.keys():
-            return False
-
-        for hp_key, this_hp in self.hyperparameters.items():
-            if this_hp.is_fidelity and (not include_fidelity):
-                continue
-
-            other_hp = other.hyperparameters[hp_key]
-            if not isinstance(other_hp, type(this_hp)):
-                return False
-
-            if isinstance(this_hp.value, float):
-                this_norm = this_hp.value_to_normalized(this_hp.value)
-                other_norm = other_hp.value_to_normalized(other_hp.value)  # type: ignore
-                if np.round(this_norm - other_norm, on_decimal) != 0:
-                    return False
-            elif this_hp.value != other_hp.value:
-                return False
-
-        return True
diff --git a/neps/search_spaces/yaml_search_space_utils.py b/neps/search_spaces/yaml_search_space_utils.py
index 8b25b1b04..ff6d72ad8 100644
--- a/neps/search_spaces/yaml_search_space_utils.py
+++ b/neps/search_spaces/yaml_search_space_utils.py
@@ -1,5 +1,3 @@
-from __future__ import annotations
-
 import logging
 import re
 from typing import Literal, overload
diff --git a/neps/state/__init__.py b/neps/state/__init__.py
index 6508dba2c..b8eb55af3 100644
--- a/neps/state/__init__.py
+++ b/neps/state/__init__.py
@@ -1,19 +1,11 @@
-from neps.state.protocols import (
-    Locker,
-    ReaderWriter,
-    Synced,
-    VersionedResource,
-    Versioner,
-)
+from neps.state.optimizer import BudgetInfo, OptimizationState, OptimizerInfo
 from neps.state.seed_snapshot import SeedSnapshot
 from neps.state.trial import Trial
 
 __all__ = [
-    "Locker",
+    "BudgetInfo",
+    "OptimizationState",
+    "OptimizerInfo",
     "SeedSnapshot",
-    "Synced",
     "Trial",
-    "ReaderWriter",
-    "Versioner",
-    "VersionedResource",
 ]
diff --git a/neps/state/_eval.py b/neps/state/_eval.py
index 0d08dfdd7..915e38235 100644
--- a/neps/state/_eval.py
+++ b/neps/state/_eval.py
@@ -4,10 +4,9 @@
 import logging
 import time
 import traceback
+from collections.abc import Callable, Mapping
 from pathlib import Path
-from typing import TYPE_CHECKING, Any, Callable, Literal, Mapping, TypeVar
-
-from neps.exceptions import NePSError
+from typing import TYPE_CHECKING, Any, Literal, TypeVar
 
 if TYPE_CHECKING:
     from neps.state.settings import DefaultReportValues
@@ -19,36 +18,6 @@
 _notset = object()
 
 
-class GotNonePendingTrialForEvalautionError(NePSError):
-    """Raised when trying to evaluate a trial that is not in a pending state."""
-
-    def __init__(
-        self,
-        trial_id: Trial.ID,
-        state: Trial.State,
-        worker_id: str,
-        *args: Any,
-    ):
-        """Initialize the error.
-
-        Args:
-            trial_id: The ID of the trial that was not in a pending state.
-            state: The state of the trial.
-            worker_id: The ID of the worker that picked up this trial.
-            *args: Additional arguments to pass to the parent class.
-        """
-        super().__init__(trial_id, state, worker_id, *args)
-        self.trial_id = trial_id
-        self.state = state
-        self.worker_id = worker_id
-
-    def __str__(self) -> str:
-        return (
-            f"Trial '{self.trial_id}' is not in a pending state but in '{self.state}'."
-            f"This trial was picked up for evaluation by worker '{self.worker_id}'."
-        )
-
-
 def _check_float(value: Any, name: str) -> float:
     try:
         return float(value)
@@ -63,15 +32,17 @@ def parse_user_result(
     user_result: float | dict[str, Any],
     *,
     default_cost_value: float | None = None,
-    default_learning_curve: Literal["loss"] | list[float] | None = None,
+    default_learning_curve: Literal["objective_to_minimize"] | list[float] | None = None,
 ) -> tuple[float, float | None, list[float] | None, dict[str, Any]]:
     """Check if the trial has succeeded."""
     if isinstance(user_result, Mapping):
-        extracted_loss = user_result.pop("loss", _notset)
-        if extracted_loss is _notset:
+        extracted_objective_to_minimize = user_result.pop(
+            "objective_to_minimize", _notset
+        )
+        if extracted_objective_to_minimize is _notset:
             raise KeyError(
-                "The 'loss' should be provided in the evaluation result if providing"
-                " a dictionary."
+                "The 'objective_to_minimize' should be provided in the evaluation result"
+                " if providing a dictionary."
             )
         extracted_cost = user_result.pop("cost", default_cost_value)
 
@@ -87,30 +58,32 @@ def parse_user_result(
             else:
                 extracted_learning_curve = default_learning_curve
 
-        if extracted_learning_curve == "loss":
-            extracted_learning_curve = [extracted_loss]
+        if extracted_learning_curve == "objective_to_minimize":
+            extracted_learning_curve = [extracted_objective_to_minimize]
 
         extra = user_result
     else:
-        extracted_loss = user_result
+        extracted_objective_to_minimize = user_result
         extracted_learning_curve = (
             None
             if default_learning_curve is None
             else [user_result]
-            if default_learning_curve == "loss"
+            if default_learning_curve == "objective_to_minimize"
             else default_learning_curve
         )
         extracted_cost = default_cost_value
         extra = {}
 
-    loss = _check_float(extracted_loss, "loss")
+    objective_to_minimize = _check_float(
+        extracted_objective_to_minimize, "objective_to_minimize"
+    )
     cost = _check_float(extracted_cost, "cost") if extracted_cost is not None else None
     learning_curve = (
         [float(v) for v in extracted_learning_curve]
         if extracted_learning_curve is not None
         else None
     )
-    return loss, cost, learning_curve, extra
+    return objective_to_minimize, cost, learning_curve, extra
 
 
 def _eval_trial(
@@ -131,7 +104,7 @@ def _eval_trial(
         logger.exception(e)
         report = trial.set_complete(
             report_as="crashed",
-            loss=default_report_values.loss_value_on_error,
+            objective_to_minimize=default_report_values.objective_to_minimize_value_on_error,
             cost=default_report_values.cost_value_on_error,
             learning_curve=default_report_values.learning_curve_on_error,
             extra=None,
@@ -145,14 +118,14 @@ def _eval_trial(
         time_end = time.time()
         logger.info(f"Successful evaluation of '{trial.id}': {user_result}.")
 
-        loss, cost, learning_curve, extra = parse_user_result(
+        objective_to_minimize, cost, learning_curve, extra = parse_user_result(
             dict(user_result) if isinstance(user_result, Mapping) else user_result,
             default_cost_value=default_report_values.cost_if_not_provided,
             default_learning_curve=default_report_values.learning_curve_if_not_provided,
         )
         report = trial.set_complete(
             report_as="success",
-            loss=loss,
+            objective_to_minimize=objective_to_minimize,
             cost=cost,
             learning_curve=learning_curve,
             err=None,
diff --git a/neps/state/err_dump.py b/neps/state/err_dump.py
index 167ab48fd..9f50ddefd 100644
--- a/neps/state/err_dump.py
+++ b/neps/state/err_dump.py
@@ -73,5 +73,5 @@ def empty(self) -> bool:
     def latest_err_as_raisable(self) -> SerializedError | None:
         """Get the latest error."""
         if self.errs:
-            return self.errs[-1].as_raisable()
+            return self.errs[-1].as_raisable()  # type: ignore
         return None
diff --git a/neps/state/filebased.py b/neps/state/filebased.py
index 6940016d5..6ea08bdf5 100644
--- a/neps/state/filebased.py
+++ b/neps/state/filebased.py
@@ -1,49 +1,22 @@
-"""This module houses the implementation of a NePSState that
-does everything on the filesystem, i.e. locking, versioning and
-storing/loading.
-
-The main components are:
-* [`FileVersioner`][neps.state.filebased.FileVersioner]: A versioner that
-    stores a version tag on disk, usually for a resource like a Trial.
-* [`FileLocker`][neps.state.filebased.FileLocker]: A locker that uses a file
-    to lock between processes.
-* [`TrialRepoInDirectory`][neps.state.filebased.TrialRepoInDirectory]: A
-    repository of Trials that are stored in a directory.
-* `ReaderWriterXXX`: Reader/writers for various resources NePSState needs
-* [`load_filebased_neps_state`][neps.state.filebased.load_filebased_neps_state]:
-    A function to load a NePSState from a directory.
-* [`create_filebased_neps_state`][neps.state.filebased.create_filebased_neps_state]:
-    A function to create a new NePSState in a directory.
-"""
+"""TODO."""
 
 from __future__ import annotations
 
+import contextlib
 import json
 import logging
+import pprint
+import time
+from collections.abc import Iterable, Iterator
 from contextlib import contextmanager
-from dataclasses import asdict, dataclass, field
+from dataclasses import asdict, dataclass
 from pathlib import Path
-from typing import ClassVar, Iterable, Iterator, TypeVar
-from typing_extensions import override
-from uuid import uuid4
+from typing import Literal, TypeAlias, TypeVar
 
-import numpy as np
 import portalocker as pl
 
-from neps.env import (
-    GLOBAL_ERR_FILELOCK_POLL,
-    GLOBAL_ERR_FILELOCK_TIMEOUT,
-    SEED_SNAPSHOT_FILELOCK_POLL,
-    SEED_SNAPSHOT_FILELOCK_TIMEOUT,
-    TRIAL_FILELOCK_POLL,
-    TRIAL_FILELOCK_TIMEOUT,
-)
-from neps.exceptions import NePSError
+from neps.env import CONFIG_SERIALIZE_FORMAT, ENV_VARS_USED
 from neps.state.err_dump import ErrDump
-from neps.state.neps_state import NePSState
-from neps.state.optimizer import BudgetInfo, OptimizationState, OptimizerInfo
-from neps.state.protocols import Locker, ReaderWriter, Synced, TrialRepo, Versioner
-from neps.state.seed_snapshot import SeedSnapshot
 from neps.state.trial import Trial
 from neps.utils.files import deserialize, serialize
 
@@ -51,387 +24,135 @@
 K = TypeVar("K")
 T = TypeVar("T")
 
-
-def make_sha() -> str:
-    """Generate a str hex sha."""
-    return uuid4().hex
-
-
-@dataclass
-class FileVersioner(Versioner):
-    """A versioner that stores a version tag on disk."""
-
-    version_file: Path
-
-    @override
-    def current(self) -> str | None:
-        if not self.version_file.exists():
-            return None
-        return self.version_file.read_text()
-
-    @override
-    def bump(self) -> str:
-        sha = make_sha()
-        self.version_file.write_text(sha)
-        return sha
+TrialWriteHint: TypeAlias = Literal["metadata", "report", "config"]
 
 
 @dataclass
-class TrialRepoInDirectory(TrialRepo[Path]):
-    """A repository of Trials that are stored in a directory."""
-
-    directory: Path
-    _cache: dict[Trial.ID, Synced[Trial, Path]] = field(default_factory=dict)
-
-    @override
-    def all_trial_ids(self) -> set[Trial.ID]:
-        """List all the trial ids in this trial Repo."""
-        return {
-            config_path.name.replace("config_", "")
-            for config_path in self.directory.iterdir()
-            if config_path.name.startswith("config_") and config_path.is_dir()
-        }
-
-    @override
-    def get_by_id(
-        self,
-        trial_id: Trial.ID,
-        *,
-        lock_poll: float = TRIAL_FILELOCK_POLL,
-        lock_timeout: float | None = TRIAL_FILELOCK_TIMEOUT,
-    ) -> Synced[Trial, Path]:
-        """Get a Trial by its ID.
-
-        !!! note
-
-            This will **not** explicitly sync the trial and it is up to the caller
-            to do so. Most of the time, the caller should be a NePSState
-            object which will do that for you. However if the trial is not in the
-            cache, then it will be loaded from disk which requires syncing.
-
-        Args:
-            trial_id: The ID of the trial to get.
-            lock_poll: The poll time for the file lock.
-            lock_timeout: The timeout for the file lock.
-
-        Returns:
-            The trial with the given ID.
-        """
-        trial = self._cache.get(trial_id)
-        if trial is not None:
-            return trial
-
-        config_path = self.directory / f"config_{trial_id}"
-        if not config_path.exists():
-            raise TrialRepo.TrialNotFoundError(trial_id, config_path)
-
-        trial = Synced.load(
-            location=config_path,
-            locker=FileLocker(
-                lock_path=config_path / ".lock",
-                poll=lock_poll,
-                timeout=lock_timeout,
-            ),
-            versioner=FileVersioner(version_file=config_path / ".version"),
-            reader_writer=ReaderWriterTrial(),
-        )
-        self._cache[trial_id] = trial
-        return trial
-
-    @override
-    def get_by_ids(self, trial_ids: Iterable[Trial.ID]) -> dict[str, Synced[Trial, Path]]:
-        """Get multiple Trials by their IDs.
-
-        !!! note
-            See [`get_by_id()`][neps.state.filebased.TrialRepoInDirectory.get_by_id]
-            for notes on the trials syncing.
-
-        Args:
-            trial_ids: The IDs of the trials to get.
-
-        Returns:
-            A dictionary of the trials with the given IDs.
-
-        Raises:
-            TrialRepo.TrialNotFoundError: If a trial is not found.
-        """
-        return {trial_id: self.get_by_id(trial_id) for trial_id in trial_ids}
-
-    @override
-    def put_new(
-        self,
-        trial: Trial,
-        *,
-        lock_poll: float = TRIAL_FILELOCK_POLL,
-        lock_timeout: float | None = TRIAL_FILELOCK_TIMEOUT,
-    ) -> Synced[Trial, Path]:
-        """Put a new Trial into the repository.
-
-        Args:
-            trial: The trial to put.
-            lock_poll: The poll time for the file lock.
-            lock_timeout: The timeout for the file lock.
-
-        Returns:
-            The synced trial.
-
-        Raises:
-            TrialRepo.TrialAlreadyExistsError: If the trial already exists in the
-                repository.
-        """
-        config_path = self.directory / f"config_{trial.metadata.id}"
-        if config_path.exists():
-            raise TrialRepo.TrialAlreadyExistsError(
-                f"Trial '{trial.metadata.id}' already exists as '{config_path}'."
-            )
-
-        # HACK: We do this here as there is no way to know where a Trial will
-        # be located when it's created...
-        trial.metadata.location = str(config_path)
-        shared_trial = Synced.new(
-            data=trial,
-            location=config_path,
-            locker=FileLocker(
-                lock_path=config_path / ".lock",
-                poll=lock_poll,
-                timeout=lock_timeout,
-            ),
-            versioner=FileVersioner(version_file=config_path / ".version"),
-            reader_writer=ReaderWriterTrial(),
-        )
-        self._cache[trial.metadata.id] = shared_trial
-        return shared_trial
-
-    @override
-    def all(self) -> dict[Trial.ID, Synced[Trial, Path]]:
-        """Get a dictionary of all the Trials in the repository.
-
-        !!! note
-            See [`get_by_id()`][neps.state.filebased.TrialRepoInDirectory.get_by_id]
-            for notes on the trials syncing.
-        """
-        return {trial_id: self.get_by_id(trial_id) for trial_id in self.all_trial_ids()}
+class ReaderWriterTrial:
+    """ReaderWriter for Trial objects."""
 
-    @override
-    def pending(self) -> Iterable[tuple[Trial.ID, Synced[Trial, Path]]]:
-        pending = [
-            (_id, t, trial.metadata.time_sampled)
-            for (_id, t) in self.all().items()
-            if (trial := t.synced()).state == Trial.State.PENDING
-        ]
-        return iter((_id, t) for _id, t, _ in sorted(pending, key=lambda x: x[2]))
+    # Report and config are kept as yaml since they are most likely to be
+    # read
+    CONFIG_FILENAME = f"config.{CONFIG_SERIALIZE_FORMAT}"
+    REPORT_FILENAME = f"report.{CONFIG_SERIALIZE_FORMAT}"
 
+    # Metadata is put as json as it's more likely to be machine read and
+    # is much faster.
+    METADATA_FILENAME = "metadata.json"
 
-@dataclass
-class ReaderWriterTrial(ReaderWriter[Trial, Path]):
-    """ReaderWriter for Trial objects."""
-
-    CONFIG_FILENAME = "config.yaml"
-    METADATA_FILENAME = "metadata.yaml"
-    STATE_FILENAME = "state.txt"
-    REPORT_FILENAME = "report.yaml"
     PREVIOUS_TRIAL_ID_FILENAME = "previous_trial_id.txt"
 
-    @override
     @classmethod
     def read(cls, directory: Path) -> Trial:
+        """Read a trial from a directory."""
         config_path = directory / cls.CONFIG_FILENAME
         metadata_path = directory / cls.METADATA_FILENAME
-        state_path = directory / cls.STATE_FILENAME
         report_path = directory / cls.REPORT_FILENAME
 
+        with metadata_path.open("r") as f:
+            metadata = json.load(f)
+
+        metadata["state"] = Trial.State(metadata["state"])
+
         return Trial(
-            config=deserialize(config_path),
-            metadata=Trial.MetaData(**deserialize(metadata_path)),
-            state=Trial.State(state_path.read_text(encoding="utf-8").strip()),
+            config=deserialize(config_path, file_format=CONFIG_SERIALIZE_FORMAT),
+            metadata=Trial.MetaData(**metadata),
             report=(
-                Trial.Report(**deserialize(report_path)) if report_path.exists() else None
+                Trial.Report(
+                    **deserialize(report_path, file_format=CONFIG_SERIALIZE_FORMAT),
+                )
+                if report_path.exists()
+                else None
             ),
         )
 
-    @override
     @classmethod
-    def write(cls, trial: Trial, directory: Path) -> None:
+    def write(  # noqa: C901, PLR0912
+        cls,
+        trial: Trial,
+        directory: Path,
+        *,
+        hints: Iterable[TrialWriteHint] | TrialWriteHint | None = None,
+    ) -> None:
+        """Write a trial to a directory.
+
+        Args:
+            trial: The trial to write.
+            directory: The directory to write the trial to.
+            hints: What to write. If None, write everything.
+        """
         config_path = directory / cls.CONFIG_FILENAME
         metadata_path = directory / cls.METADATA_FILENAME
-        state_path = directory / cls.STATE_FILENAME
-
-        serialize(trial.config, config_path)
-        serialize(asdict(trial.metadata), metadata_path)
-        state_path.write_text(trial.state.value, encoding="utf-8")
-
-        if trial.metadata.previous_trial_id is not None:
-            previous_trial_path = directory / cls.PREVIOUS_TRIAL_ID_FILENAME
-            previous_trial_path.write_text(trial.metadata.previous_trial_id)
-
-        if trial.report is not None:
-            report_path = directory / cls.REPORT_FILENAME
-            serialize(asdict(trial.report), report_path)
-
-
-@dataclass
-class ReaderWriterSeedSnapshot(ReaderWriter[SeedSnapshot, Path]):
-    """ReaderWriter for SeedSnapshot objects."""
-
-    # It seems like they're all uint32 but I can't be sure.
-    PY_RNG_STATE_DTYPE: ClassVar = np.int64
-
-    PY_RNG_TUPLE_FILENAME: ClassVar = "py_rng.npy"
-    NP_RNG_STATE_FILENAME: ClassVar = "np_rng_state.npy"
-    TORCH_RNG_STATE_FILENAME: ClassVar = "torch_rng_state.pt"
-    TORCH_CUDA_RNG_STATE_FILENAME: ClassVar = "torch_cuda_rng_state.pt"
-    SEED_INFO_FILENAME: ClassVar = "seed_info.json"
-
-    @override
-    @classmethod
-    def read(cls, directory: Path) -> SeedSnapshot:
-        seedinfo_path = directory / cls.SEED_INFO_FILENAME
-        py_rng_path = directory / cls.PY_RNG_TUPLE_FILENAME
-        np_rng_path = directory / cls.NP_RNG_STATE_FILENAME
-        torch_rng_path = directory / cls.TORCH_RNG_STATE_FILENAME
-        torch_cuda_rng_path = directory / cls.TORCH_CUDA_RNG_STATE_FILENAME
-
-        # Load and set pythons rng
-        py_rng_state = tuple(
-            int(x) for x in np.fromfile(py_rng_path, dtype=cls.PY_RNG_STATE_DTYPE)
-        )
-        np_rng_state = np.fromfile(np_rng_path, dtype=np.uint32)
-        seed_info = deserialize(seedinfo_path)
-
-        torch_exists = torch_rng_path.exists() or torch_cuda_rng_path.exists()
-
-        # By specifying `weights_only=True`, it disables arbitrary object loading
-        torch_rng_state = None
-        torch_cuda_rng = None
-        if torch_exists:
-            import torch
-
-            if torch_rng_path.exists():
-                torch_rng_state = torch.load(torch_rng_path, weights_only=True)
-
-            if torch_cuda_rng_path.exists():
-                # By specifying `weights_only=True`, it disables arbitrary object loading
-                torch_cuda_rng = torch.load(torch_cuda_rng_path, weights_only=True)
-
-        return SeedSnapshot(
-            np_rng=(
-                seed_info["np_rng_kind"],
-                np_rng_state,
-                seed_info["np_pos"],
-                seed_info["np_has_gauss"],
-                seed_info["np_cached_gauss"],
-            ),
-            py_rng=(
-                seed_info["py_rng_version"],
-                py_rng_state,
-                seed_info["py_guass_next"],
-            ),
-            torch_rng=torch_rng_state,
-            torch_cuda_rng=torch_cuda_rng,
-        )
-
-    @override
-    @classmethod
-    def write(cls, snapshot: SeedSnapshot, directory: Path) -> None:
-        seedinfo_path = directory / cls.SEED_INFO_FILENAME
-        py_rng_path = directory / cls.PY_RNG_TUPLE_FILENAME
-        np_rng_path = directory / cls.NP_RNG_STATE_FILENAME
-        torch_rng_path = directory / cls.TORCH_RNG_STATE_FILENAME
-        torch_cuda_rng_path = directory / cls.TORCH_CUDA_RNG_STATE_FILENAME
-
-        py_rng_version, py_rng_state, py_guass_next = snapshot.py_rng
-
-        np.array(py_rng_state, dtype=cls.PY_RNG_STATE_DTYPE).tofile(py_rng_path)
-
-        seed_info = {
-            "np_rng_kind": snapshot.np_rng[0],
-            "np_pos": snapshot.np_rng[2],
-            "np_has_gauss": snapshot.np_rng[3],
-            "np_cached_gauss": snapshot.np_rng[4],
-            "py_rng_version": py_rng_version,
-            "py_guass_next": py_guass_next,
-        }
-        serialize(seed_info, seedinfo_path)
-        np_rng_state = snapshot.np_rng[1]
-        np_rng_state.tofile(np_rng_path)
-
-        if snapshot.torch_rng is not None:
-            import torch
-
-            torch.save(snapshot.torch_rng, torch_rng_path)
-
-        if snapshot.torch_cuda_rng is not None:
-            import torch
-
-            torch.save(snapshot.torch_cuda_rng, torch_cuda_rng_path)
-
-
-@dataclass
-class ReaderWriterOptimizerInfo(ReaderWriter[OptimizerInfo, Path]):
-    """ReaderWriter for OptimizerInfo objects."""
-
-    INFO_FILENAME: ClassVar = "info.yaml"
 
-    @override
-    @classmethod
-    def read(cls, directory: Path) -> OptimizerInfo:
-        info_path = directory / cls.INFO_FILENAME
-        return OptimizerInfo(info=deserialize(info_path))
-
-    @override
-    @classmethod
-    def write(cls, optimizer_info: OptimizerInfo, directory: Path) -> None:
-        info_path = directory / cls.INFO_FILENAME
-        serialize(optimizer_info.info, info_path)
+        if isinstance(hints, str):
+            match hints:
+                case "config":
+                    serialize(
+                        trial.config,
+                        config_path,
+                        check_serialized=False,
+                        file_format=CONFIG_SERIALIZE_FORMAT,
+                    )
+                case "metadata":
+                    data = asdict(trial.metadata)
+                    data["state"] = data["state"].value
+                    with metadata_path.open("w") as f:
+                        json.dump(data, f)
+
+                    if trial.metadata.previous_trial_id is not None:
+                        previous_trial_path = directory / cls.PREVIOUS_TRIAL_ID_FILENAME
+                        previous_trial_path.write_text(trial.metadata.previous_trial_id)
+                case "report":
+                    if trial.report is None:
+                        raise ValueError(
+                            "Cannot write report 'hint' when report is None."
+                        )
+
+                    report_path = directory / cls.REPORT_FILENAME
+                    _report = asdict(trial.report)
+                    if (err := _report.get("err")) is not None:
+                        _report["err"] = str(err)
+
+                    serialize(
+                        _report,
+                        report_path,
+                        check_serialized=False,
+                        file_format=CONFIG_SERIALIZE_FORMAT,
+                    )
+                case _:
+                    raise ValueError(f"Invalid hint: {hints}")
+        elif isinstance(hints, Iterable):
+            for hint in hints:
+                cls.write(trial, directory, hints=hint)  # type: ignore
+        elif hints is None:
+            # We don't know, write everything
+            cls.write(trial, directory, hints=["config", "metadata"])
+
+            if trial.report is not None:
+                cls.write(trial, directory, hints="report")
+        else:
+            raise ValueError(f"Invalid hint: {hints}")
 
 
-# TODO(eddiebergman): If an optimizer wants to store some hefty state, i.e. a numpy array
-# or something, this is horribly inefficient and we would need to adapt OptimizerState to
-# handle this.
-# TODO(eddiebergman): May also want to consider serializing budget into a seperate entity
 @dataclass
-class ReaderWriterOptimizationState(ReaderWriter[OptimizationState, Path]):
-    """ReaderWriter for OptimizationState objects."""
-
-    STATE_FILE_NAME: ClassVar = "state.yaml"
-
-    @override
-    @classmethod
-    def read(cls, directory: Path) -> OptimizationState:
-        state_path = directory / cls.STATE_FILE_NAME
-        state = deserialize(state_path)
-        budget_info = state.get("budget")
-        budget = BudgetInfo(**budget_info) if budget_info is not None else None
-        return OptimizationState(
-            shared_state=state.get("shared_state") or {},
-            budget=budget,
-        )
-
-    @override
-    @classmethod
-    def write(cls, info: OptimizationState, directory: Path) -> None:
-        info_path = directory / cls.STATE_FILE_NAME
-        serialize(asdict(info), info_path)
-
-
-@dataclass
-class ReaderWriterErrDump(ReaderWriter[ErrDump, Path]):
+class ReaderWriterErrDump:
     """ReaderWriter for shared error lists."""
 
-    name: str
+    @classmethod
+    def read(cls, path: Path) -> ErrDump:
+        """Read an error dump from a file."""
+        if not path.exists():
+            return ErrDump([])
 
-    @override
-    def read(self, directory: Path) -> ErrDump:
-        errors_path = directory / f"{self.name}-errors.jsonl"
-        with errors_path.open("r") as f:
+        with path.open("r") as f:
             data = [json.loads(line) for line in f]
 
         return ErrDump([ErrDump.SerializableTrialError(**d) for d in data])
 
-    @override
-    def write(self, err_dump: ErrDump, directory: Path) -> None:
-        errors_path = directory / f"{self.name}-errors.jsonl"
-        with errors_path.open("w") as f:
+    @classmethod
+    def write(cls, err_dump: ErrDump, path: Path) -> None:
+        """Write an error dump to a file."""
+        with path.open("w") as f:
             lines = [json.dumps(asdict(trial_err)) for trial_err in err_dump.errs]
             f.write("\n".join(lines))
 
@@ -440,7 +161,7 @@ def write(self, err_dump: ErrDump, directory: Path) -> None:
 
 
 @dataclass
-class FileLocker(Locker):
+class FileLocker:
     """File-based locker using `portalocker`.
 
     [`FileLocker`][neps.state.locker.file.FileLocker] implements
@@ -455,218 +176,49 @@ class FileLocker(Locker):
 
     def __post_init__(self) -> None:
         self.lock_path = self.lock_path.resolve().absolute()
-
-    @override
-    def is_locked(self) -> bool:
-        if not self.lock_path.exists():
-            return False
-        try:
-            with self.lock(fail_if_locked=True):
-                pass
-            return False
-        except pl.exceptions.LockException:
-            return True
-
-    @override
-    @contextmanager
-    def lock(
-        self,
-        *,
-        fail_if_locked: bool = False,
-    ) -> Iterator[None]:
-        self.lock_path.parent.mkdir(parents=True, exist_ok=True)
-        self.lock_path.touch(exist_ok=True)
-        logger.debug("Acquiring lock on %s", self.lock_path)
-        with pl.Lock(
+        self._lock = pl.Lock(
             self.lock_path,
             check_interval=self.poll,
             timeout=self.timeout,
             flags=FILELOCK_EXCLUSIVE_NONE_BLOCKING,
-            fail_when_locked=fail_if_locked,
-        ):
-            yield
-        logger.debug("Released lock on %s", self.lock_path)
-
-
-def load_filebased_neps_state(directory: Path) -> NePSState[Path]:
-    """Load a NePSState from a directory.
-
-    Args:
-        directory: The directory to load the state from.
-
-    Returns:
-        The loaded NePSState.
-
-    Raises:
-        FileNotFoundError: If no NePSState is found at the given directory.
-    """
-    if not directory.exists():
-        raise FileNotFoundError(f"No NePSState found at '{directory}'.")
-    directory.mkdir(parents=True, exist_ok=True)
-    config_dir = directory / "configs"
-    config_dir.mkdir(parents=True, exist_ok=True)
-    seed_dir = directory / ".seed_state"
-    seed_dir.mkdir(parents=True, exist_ok=True)
-    error_dir = directory / ".errors"
-    error_dir.mkdir(parents=True, exist_ok=True)
-    optimizer_state_dir = directory / ".optimizer_state"
-    optimizer_state_dir.mkdir(parents=True, exist_ok=True)
-    optimizer_info_dir = directory / ".optimizer_info"
-    optimizer_info_dir.mkdir(parents=True, exist_ok=True)
-
-    return NePSState(
-        location=str(directory.absolute().resolve()),
-        _trials=TrialRepoInDirectory(config_dir),
-        _optimizer_info=Synced.load(
-            location=optimizer_info_dir,
-            versioner=FileVersioner(version_file=optimizer_info_dir / ".version"),
-            locker=FileLocker(
-                lock_path=optimizer_info_dir / ".lock",
-                poll=0.01,
-                timeout=None,
-            ),
-            reader_writer=ReaderWriterOptimizerInfo(),
-        ),
-        _seed_state=Synced.load(
-            location=seed_dir,
-            reader_writer=ReaderWriterSeedSnapshot(),
-            versioner=FileVersioner(version_file=seed_dir / ".version"),
-            locker=FileLocker(
-                lock_path=seed_dir / ".lock",
-                poll=SEED_SNAPSHOT_FILELOCK_POLL,
-                timeout=SEED_SNAPSHOT_FILELOCK_TIMEOUT,
-            ),
-        ),
-        _shared_errors=Synced.load(
-            location=error_dir,
-            reader_writer=ReaderWriterErrDump("all"),
-            versioner=FileVersioner(version_file=error_dir / ".all.version"),
-            locker=FileLocker(
-                lock_path=error_dir / ".all.lock",
-                poll=GLOBAL_ERR_FILELOCK_POLL,
-                timeout=GLOBAL_ERR_FILELOCK_TIMEOUT,
-            ),
-        ),
-        _optimizer_state=Synced.load(
-            location=optimizer_state_dir,
-            reader_writer=ReaderWriterOptimizationState(),
-            versioner=FileVersioner(version_file=optimizer_state_dir / ".version"),
-            locker=FileLocker(
-                lock_path=optimizer_state_dir / ".lock",
-                poll=GLOBAL_ERR_FILELOCK_POLL,
-                timeout=GLOBAL_ERR_FILELOCK_TIMEOUT,
-            ),
-        ),
-    )
-
-
-def create_or_load_filebased_neps_state(
-    directory: Path,
-    *,
-    optimizer_info: OptimizerInfo,
-    optimizer_state: OptimizationState,
-) -> NePSState[Path]:
-    """Create a new NePSState in a directory or load the existing one
-    if it already exists.
-
-    !!! warning
-
-        We check that the optimizer info in the NePSState on disk matches
-        the one that is passed. However we do not lock this check so it
-        is possible that if two processes try to create a NePSState at the
-        same time, both with different optimizer infos, that one will fail
-        to create the NePSState. This is a limitation of the current design.
-
-        In principal, we could allow multiple optimizers to be run and share
-        the same set of trials.
+        )
 
-    Args:
-        directory: The directory to create the state in.
-        optimizer_info: The optimizer info to use.
-        optimizer_state: The optimizer state to use.
+    @contextmanager
+    def lock(self, *, worker_id: str | None = None) -> Iterator[None]:
+        """Lock the file.
 
-    Returns:
-        The NePSState.
+        Args:
+            worker_id: The id of the worker trying to acquire the lock.
 
-    Raises:
-        NePSError: If the optimizer info on disk does not match the one provided.
-    """
-    is_new = not directory.exists()
-    directory.mkdir(parents=True, exist_ok=True)
-    config_dir = directory / "configs"
-    config_dir.mkdir(parents=True, exist_ok=True)
-    seed_dir = directory / ".seed_state"
-    seed_dir.mkdir(parents=True, exist_ok=True)
-    error_dir = directory / ".errors"
-    error_dir.mkdir(parents=True, exist_ok=True)
-    optimizer_state_dir = directory / ".optimizer_state"
-    optimizer_state_dir.mkdir(parents=True, exist_ok=True)
-    optimizer_info_dir = directory / ".optimizer_info"
-    optimizer_info_dir.mkdir(parents=True, exist_ok=True)
-
-    # We have to do one bit of sanity checking to ensure that the optimzier
-    # info on disk manages the one we have recieved, otherwise we are unsure which
-    # optimizer is being used.
-    # NOTE: We assume that we do not have to worry about a race condition
-    # here where we have two different NePSState objects with two different optimizer
-    # infos trying to be created at the same time. This avoids the need to lock to
-    # check the optimizer info. If this assumption changes, then we would have
-    # to first lock before we do this check
-    optimizer_info_reader_writer = ReaderWriterOptimizerInfo()
-    if not is_new:
-        existing_info = optimizer_info_reader_writer.read(optimizer_info_dir)
-        if existing_info != optimizer_info:
-            raise NePSError(
-                "The optimizer info on disk does not match the one provided."
-                f"\nOn disk: {existing_info}\nProvided: {optimizer_info}"
-                f"\n\nLoaded the one on disk from {optimizer_info_dir}."
-            )
-
-    return NePSState(
-        location=str(directory.absolute().resolve()),
-        _trials=TrialRepoInDirectory(config_dir),
-        _optimizer_info=Synced.new_or_load(
-            data=optimizer_info,  # type: ignore
-            location=optimizer_info_dir,
-            versioner=FileVersioner(version_file=optimizer_info_dir / ".version"),
-            locker=FileLocker(
-                lock_path=optimizer_info_dir / ".lock",
-                poll=0.01,
-                timeout=None,
-            ),
-            reader_writer=ReaderWriterOptimizerInfo(),
-        ),
-        _seed_state=Synced.new_or_load(
-            data=SeedSnapshot.new_capture(),
-            location=seed_dir,
-            reader_writer=ReaderWriterSeedSnapshot(),
-            versioner=FileVersioner(version_file=seed_dir / ".version"),
-            locker=FileLocker(
-                lock_path=seed_dir / ".lock",
-                poll=SEED_SNAPSHOT_FILELOCK_POLL,
-                timeout=SEED_SNAPSHOT_FILELOCK_TIMEOUT,
-            ),
-        ),
-        _shared_errors=Synced.new_or_load(
-            data=ErrDump(),
-            location=error_dir,
-            reader_writer=ReaderWriterErrDump("all"),
-            versioner=FileVersioner(version_file=error_dir / ".all.version"),
-            locker=FileLocker(
-                lock_path=error_dir / ".all.lock",
-                poll=GLOBAL_ERR_FILELOCK_POLL,
-                timeout=GLOBAL_ERR_FILELOCK_TIMEOUT,
-            ),
-        ),
-        _optimizer_state=Synced.new_or_load(
-            data=optimizer_state,
-            location=optimizer_state_dir,
-            reader_writer=ReaderWriterOptimizationState(),
-            versioner=FileVersioner(version_file=optimizer_state_dir / ".version"),
-            locker=FileLocker(
-                lock_path=optimizer_state_dir / ".lock",
-                poll=GLOBAL_ERR_FILELOCK_POLL,
-                timeout=GLOBAL_ERR_FILELOCK_TIMEOUT,
-            ),
-        ),
-    )
+                Used for debug messaging purposes.
+        """
+        try:
+            with self._lock:
+                if worker_id is not None:
+                    logger.debug(
+                        "Worker %s acquired lock on %s at %s",
+                        worker_id,
+                        self.lock_path,
+                        time.time(),
+                    )
+
+                yield
+        except pl.exceptions.LockException as e:
+            raise pl.exceptions.LockException(
+                f"Failed to acquire lock after timeout of {self.timeout} seconds."
+                " This most likely indicates that another process has crashed while"
+                " holding the lock."
+                f"\n\nLock path: {self.lock_path}"
+                "\n\nIf you belive this is not the case, you can set some of these"
+                " environment variables to increase the timeout:"
+                f"\n\n{pprint.pformat(ENV_VARS_USED)}"
+            ) from e
+        finally:
+            if worker_id is not None:
+                with contextlib.suppress(Exception):
+                    logger.debug(
+                        "Worker %s released lock on %s at %s",
+                        worker_id,
+                        self.lock_path,
+                        time.time(),
+                    )
diff --git a/neps/state/neps_state.py b/neps/state/neps_state.py
index 8afaee628..92bd2e5ad 100644
--- a/neps/state/neps_state.py
+++ b/neps/state/neps_state.py
@@ -10,109 +10,380 @@
 
 from __future__ import annotations
 
+import io
 import logging
+import pickle
 import time
+from collections.abc import Callable, Iterable
 from dataclasses import dataclass, field
-from typing import TYPE_CHECKING, Callable, Generic, TypeVar, overload
-
-from more_itertools import take
-
+from pathlib import Path
+from typing import (
+    Literal,
+    TypeAlias,
+    TypeVar,
+    overload,
+)
+
+from neps.env import (
+    GLOBAL_ERR_FILELOCK_POLL,
+    GLOBAL_ERR_FILELOCK_TIMEOUT,
+    STATE_FILELOCK_POLL,
+    STATE_FILELOCK_TIMEOUT,
+    TRIAL_CACHE_MAX_UPDATES_BEFORE_CONSOLIDATION,
+    TRIAL_FILELOCK_POLL,
+    TRIAL_FILELOCK_TIMEOUT,
+)
+from neps.exceptions import NePSError, TrialAlreadyExistsError, TrialNotFoundError
+from neps.optimizers.base_optimizer import BaseOptimizer
 from neps.state.err_dump import ErrDump
+from neps.state.filebased import (
+    FileLocker,
+    ReaderWriterErrDump,
+    ReaderWriterTrial,
+    TrialWriteHint,
+)
 from neps.state.optimizer import OptimizationState, OptimizerInfo
-from neps.state.trial import Trial
-
-if TYPE_CHECKING:
-    from neps.optimizers.base_optimizer import BaseOptimizer
-    from neps.state.protocols import Synced, TrialRepo
-    from neps.state.seed_snapshot import SeedSnapshot
+from neps.state.trial import Report, Trial
+from neps.utils.files import atomic_write, deserialize, serialize
 
 logger = logging.getLogger(__name__)
 
+
 # TODO: Technically we don't need the same Location type for all shared objects.
 Loc = TypeVar("Loc")
 T = TypeVar("T")
 
+Version: TypeAlias = str
+
+Resource: TypeAlias = Literal[
+    "optimizer_info", "optimizer_state", "seed_state", "errors", "configs"
+]
+
+
+N_UNSAFE_RETRIES = 10
+
+CONFIG_PREFIX_LEN = len("config_")
 
+
+# TODO: Ergonomics of this class sucks
 @dataclass
-class NePSState(Generic[Loc]):
-    """The main state object that holds all the shared state objects."""
+class TrialRepo:
+    """A repository for trials that are stored on disk.
+
+    !!! warning
+
+        This class does not implement locking and it is up to the caller to ensure
+        there are no race conflicts.
+    """
+
+    CACHE_FILE_NAME = ".trial_cache.pkl"
+    UPDATE_CONSOLIDATION_LIMIT = TRIAL_CACHE_MAX_UPDATES_BEFORE_CONSOLIDATION
+
+    directory: Path
+    cache_path: Path = field(init=False)
+
+    def __post_init__(self) -> None:
+        self.directory.mkdir(parents=True, exist_ok=True)
+        self.cache_path = self.directory / self.CACHE_FILE_NAME
 
-    location: str
+    def list_trial_ids(self) -> list[str]:
+        """List all the trial ids on disk."""
+        return [
+            config_path.name[CONFIG_PREFIX_LEN:]
+            for config_path in self.directory.iterdir()
+            if config_path.name.startswith("config_") and config_path.is_dir()
+        ]
 
-    _trials: TrialRepo[Loc] = field(repr=False)
-    _optimizer_info: Synced[OptimizerInfo, Loc]
-    _seed_state: Synced[SeedSnapshot, Loc] = field(repr=False)
-    _optimizer_state: Synced[OptimizationState, Loc]
-    _shared_errors: Synced[ErrDump, Loc] = field(repr=False)
+    def _read_pkl_and_maybe_consolidate(
+        self,
+        *,
+        consolidate: bool | None = None,
+    ) -> dict[str, Trial]:
+        with self.cache_path.open("rb") as f:
+            _bytes = f.read()
+
+        buffer = io.BytesIO(_bytes)
+        trials: dict[str, Trial] = {}
+        updates: list[Trial] = []
+        while True:
+            try:
+                datum = pickle.load(buffer)  # noqa: S301
+
+                # If it's a `dict`, this is the whol trials cache
+                if isinstance(datum, dict):
+                    assert len(trials) == 0, "Multiple caches present."
+                    trials = datum
+
+                # If it's a `list`, these are multiple updates
+                elif isinstance(datum, list):
+                    updates.extend(datum)
+
+                # Otherwise it's a single update
+                else:
+                    assert isinstance(datum, Trial), "Not a trial."
+                    updates.append(datum)
+            except EOFError:
+                break
+
+        trials.update({trial.id: trial for trial in updates})
+        if consolidate is True or (
+            len(updates) > self.UPDATE_CONSOLIDATION_LIMIT and consolidate is None
+        ):
+            logger.debug(
+                "Consolidating trial cache with %d trials and %d updates.",
+                len(trials),
+                len(updates),
+            )
+            pickle_bytes = pickle.dumps(trials, protocol=pickle.HIGHEST_PROTOCOL)
+            with atomic_write(self.cache_path, "wb") as f:
+                f.write(pickle_bytes)
+
+        return trials
+
+    def latest(self) -> dict[str, Trial]:
+        """Get the latest trials from the cache."""
+        if not self.cache_path.exists():
+            # If we end up with no cache but there are trials on disk, we need to read in.
+            if any(path.name.startswith("config_") for path in self.directory.iterdir()):
+                trial_ids = self.list_trial_ids()
+                trials = {
+                    trial_id: self.load_trial_from_disk(trial_id)
+                    for trial_id in trial_ids
+                }
+                pickle_bytes = pickle.dumps(trials, protocol=pickle.HIGHEST_PROTOCOL)
+                with atomic_write(self.cache_path, "wb") as f:
+                    f.write(pickle_bytes)
+
+            return {}
+
+        return self._read_pkl_and_maybe_consolidate()
+
+    def store_new_trial(self, trial: Trial | list[Trial]) -> None:
+        """Write a new trial to disk.
 
-    def put_updated_trial(self, trial: Trial, /) -> None:
-        """Update the trial with the new information.
+        Raises:
+            TrialAlreadyExistsError: If the trial already exists on disk.
+        """
+        if isinstance(trial, Trial):
+            config_path = self.directory / f"config_{trial.id}"
+            if config_path.exists():
+                raise TrialAlreadyExistsError(trial.id, config_path)
+
+            bytes_ = pickle.dumps(trial, protocol=pickle.HIGHEST_PROTOCOL)
+            with atomic_write(self.cache_path, "ab") as f:
+                f.write(bytes_)
+
+            config_path.mkdir(parents=True, exist_ok=True)
+            ReaderWriterTrial.write(
+                trial,
+                self.directory / f"config_{trial.id}",
+                hints=["config", "metadata"],
+            )
+        else:
+            for child_trial in trial:
+                config_path = self.directory / f"config_{child_trial.id}"
+                if config_path.exists():
+                    raise TrialAlreadyExistsError(child_trial.id, config_path)
+                config_path.mkdir(parents=True, exist_ok=True)
+
+            bytes_ = pickle.dumps(trial, protocol=pickle.HIGHEST_PROTOCOL)
+            with atomic_write(self.cache_path, "ab") as f:
+                f.write(bytes_)
+
+            for child_trial in trial:
+                ReaderWriterTrial.write(
+                    child_trial,
+                    self.directory / f"config_{child_trial.id}",
+                    hints=["config", "metadata"],
+                )
+
+    def update_trial(
+        self,
+        trial: Trial,
+        *,
+        hints: Iterable[TrialWriteHint] | TrialWriteHint | None = ("report", "metadata"),
+    ) -> None:
+        """Update a trial on disk.
 
         Args:
             trial: The trial to update.
+            hints: The hints to use when updating the trial. Defines what files need
+                to be updated.
+                If you don't know, leave `None`, this is a micro-optimization.
+        """
+        bytes_ = pickle.dumps(trial, protocol=pickle.HIGHEST_PROTOCOL)
+        with atomic_write(self.cache_path, "ab") as f:
+            f.write(bytes_)
+
+        ReaderWriterTrial.write(trial, self.directory / f"config_{trial.id}", hints=hints)
+
+    def load_trial_from_disk(self, trial_id: str) -> Trial:
+        """Load a trial from disk.
 
         Raises:
-            VersionMismatchError: If the trial has been updated since it was last
-                fetched by the worker using this state. This indicates that some other
-                worker has updated the trial in the meantime and the changes from
-                this worker are rejected.
+            TrialNotFoundError: If the trial is not found on disk.
         """
-        shared_trial = self._trials.get_by_id(trial.id)
-        shared_trial.put(trial)
+        config_path = self.directory / f"config_{trial_id}"
+        if not config_path.exists():
+            raise TrialNotFoundError(
+                f"Trial {trial_id} not found at expected path of {config_path}."
+            )
 
-    def get_trial_by_id(self, trial_id: str, /) -> Trial:
-        """Get a trial by its id."""
-        return self._trials.get_by_id(trial_id).synced()
+        return ReaderWriterTrial.read(config_path)
+
+
+@dataclass
+class NePSState:
+    """The main state object that holds all the shared state objects."""
+
+    path: Path
+
+    _trial_lock: FileLocker = field(repr=False)
+    _trial_repo: TrialRepo = field(repr=False)
+
+    _optimizer_lock: FileLocker = field(repr=False)
+
+    _optimizer_info_path: Path = field(repr=False)
+    _optimizer_info: OptimizerInfo = field(repr=False)
+
+    _optimizer_state_path: Path = field(repr=False)
+    _optimizer_state: OptimizationState = field(repr=False)
+
+    _err_lock: FileLocker = field(repr=False)
+    _shared_errors_path: Path = field(repr=False)
+    _shared_errors: ErrDump = field(repr=False)
+
+    def lock_and_read_trials(self) -> dict[str, Trial]:
+        """Acquire the state lock and read the trials."""
+        with self._trial_lock.lock():
+            return self._trial_repo.latest()
+
+    @overload
+    def lock_and_sample_trial(
+        self, optimizer: BaseOptimizer, *, worker_id: str, n: None = None
+    ) -> Trial: ...
+    @overload
+    def lock_and_sample_trial(
+        self, optimizer: BaseOptimizer, *, worker_id: str, n: int
+    ) -> list[Trial]: ...
+
+    def lock_and_sample_trial(
+        self, optimizer: BaseOptimizer, *, worker_id: str, n: int | None = None
+    ) -> Trial | list[Trial]:
+        """Acquire the state lock and sample a trial."""
+        with self._optimizer_lock.lock():
+            with self._trial_lock.lock():
+                trials_ = self._trial_repo.latest()
+
+            trials = self._sample_trial(
+                optimizer,
+                trials=trials_,
+                worker_id=worker_id,
+                n=n,
+            )
+
+            with self._trial_lock.lock():
+                self._trial_repo.store_new_trial(trials)
+
+            return trials
+
+    def lock_and_report_trial_evaluation(
+        self,
+        trial: Trial,
+        report: Report,
+        *,
+        worker_id: str,
+    ) -> None:
+        """Acquire the state lock and report the trial evaluation."""
+        with self._trial_lock.lock(), self._err_lock.lock():
+            self._report_trial_evaluation(trial, report, worker_id=worker_id)
 
-    def get_trials_by_ids(self, trial_ids: list[str], /) -> dict[str, Trial | None]:
-        """Get trials by their ids."""
-        return {
-            _id: shared_trial.synced()
-            for _id, shared_trial in self._trials.get_by_ids(trial_ids).items()
-        }
+    @overload
+    def _sample_trial(
+        self,
+        optimizer: BaseOptimizer,
+        *,
+        worker_id: str,
+        trials: dict[str, Trial],
+        n: int,
+        _sample_hooks: list[Callable] | None = ...,
+    ) -> list[Trial]: ...
 
-    def sample_trial(
+    @overload
+    def _sample_trial(
         self,
         optimizer: BaseOptimizer,
         *,
         worker_id: str,
+        trials: dict[str, Trial],
+        n: None,
+        _sample_hooks: list[Callable] | None = ...,
+    ) -> Trial: ...
+
+    def _sample_trial(
+        self,
+        optimizer: BaseOptimizer,
+        *,
+        worker_id: str,
+        trials: dict[str, Trial],
+        n: int | None,
         _sample_hooks: list[Callable] | None = None,
-    ) -> Trial:
+    ) -> Trial | list[Trial]:
         """Sample a new trial from the optimizer.
 
+        !!! warning
+
+            Responsibility of locking is on caller.
+
         Args:
             optimizer: The optimizer to sample the trial from.
             worker_id: The worker that is sampling the trial.
+            n: The number of trials to sample.
+            trials: The current trials.
             _sample_hooks: A list of hooks to apply to the optimizer before sampling.
 
         Returns:
             The new trial.
         """
-        with self._optimizer_state.acquire() as (
-            opt_state,
-            put_opt,
-        ), self._seed_state.acquire() as (seed_state, put_seed_state):
-            trials: dict[Trial.ID, Trial] = {}
-            for trial_id, shared_trial in self._trials.all().items():
-                trial = shared_trial.synced()
-                trials[trial_id] = trial
-
-            seed_state.set_as_global_seed_state()
-
-            # TODO: Not sure if any existing pre_load hooks required
-            # it to be done after `load_results`... I hope not.
-            if _sample_hooks is not None:
-                for hook in _sample_hooks:
-                    optimizer = hook(optimizer)
-
-            # NOTE: We don't want optimizers mutating this before serialization
-            budget = opt_state.budget.clone() if opt_state.budget is not None else None
-            sampled_config, new_opt_state = optimizer.ask(
-                trials=trials,
-                budget_info=budget,
-                optimizer_state=opt_state.shared_state,
+        with self._optimizer_state_path.open("rb") as f:
+            opt_state: OptimizationState = pickle.load(f)  # noqa: S301
+
+        opt_state.seed_snapshot.set_as_global_seed_state()
+
+        # TODO: Not sure if any existing pre_load hooks required
+        # it to be done after `load_results`... I hope not.
+        if _sample_hooks is not None:
+            for hook in _sample_hooks:
+                optimizer = hook(optimizer)  # type: ignore
+
+        assert isinstance(optimizer, BaseOptimizer)
+        if opt_state.budget is not None:
+            # NOTE: All other values of budget are ones that should remain
+            # constant, there are currently only these two which are dynamic as
+            # optimization unfold
+            opt_state.budget.used_cost_budget = sum(
+                trial.report.cost
+                for trial in trials.values()
+                if trial.report is not None and trial.report.cost is not None
             )
+            opt_state.budget.used_evaluations = len(trials)
+
+        sampled_configs = optimizer.ask(
+            trials=trials,
+            budget_info=opt_state.budget.clone()
+            if opt_state.budget is not None
+            else None,
+            n=n,
+        )
 
+        if not isinstance(sampled_configs, list):
+            sampled_configs = [sampled_configs]
+
+        # TODO: Not implemented yet.
+        shared_state = opt_state.shared_state
+
+        sampled_trials: list[Trial] = []
+        for sampled_config in sampled_configs:
             if sampled_config.previous_config_id is not None:
                 previous_trial = trials.get(sampled_config.previous_config_id)
                 if previous_trial is None:
@@ -125,27 +396,30 @@ def sample_trial(
 
             trial = Trial.new(
                 trial_id=sampled_config.id,
-                location="",  # HACK: This will be set by the `TrialRepo`
+                location="",  # HACK: This will be set by the `TrialRepo` in `put_new`
                 config=sampled_config.config,
                 previous_trial=sampled_config.previous_config_id,
                 previous_trial_location=previous_trial_location,
                 time_sampled=time.time(),
                 worker_id=worker_id,
             )
-            shared_trial = self._trials.put_new(trial)
-            seed_state.recapture()
-            put_seed_state(seed_state)
-            put_opt(
-                OptimizationState(budget=opt_state.budget, shared_state=new_opt_state)
-            )
+            sampled_trials.append(trial)
+
+        opt_state.shared_state = shared_state
+        opt_state.seed_snapshot.recapture()
+        with self._optimizer_state_path.open("wb") as f:
+            pickle.dump(opt_state, f, protocol=pickle.HIGHEST_PROTOCOL)
 
-        return trial
+        if n is None:
+            assert len(sampled_trials) == 1
+            return sampled_trials[0]
 
-    def report_trial_evaluation(
+        return sampled_trials
+
+    def _report_trial_evaluation(
         self,
         trial: Trial,
-        report: Trial.Report,
-        optimizer: BaseOptimizer,
+        report: Report,
         *,
         worker_id: str,
     ) -> None:
@@ -158,74 +432,217 @@ def report_trial_evaluation(
             optimizer: The optimizer to update and get the state from
             worker_id: The worker that evaluated the trial.
         """
-        shared_trial = self._trials.get_by_id(trial.id)
-        # TODO: This would fail if some other worker has already updated the trial.
-
         # IMPORTANT: We need to attach the report to the trial before updating the things.
         trial.report = report
-        shared_trial.put(trial)
-        logger.debug("Updated trial '%s' with status '%s'", trial.id, trial.state)
-        with self._optimizer_state.acquire() as (opt_state, put_opt_state):
-            optimizer.update_state_post_evaluation(opt_state.shared_state, report)
-
-            # TODO: If an optimizer doesn't use the state, this is a waste of time.
-            # Update the budget if we have one.
-            if opt_state.budget is not None:
-                budget_info = opt_state.budget
-
-                if report.cost is not None:
-                    budget_info.used_cost_budget += report.cost
-            put_opt_state(opt_state)
+        self._trial_repo.update_trial(trial, hints=["report", "metadata"])
 
         if report.err is not None:
-            with self._shared_errors.acquire() as (errs, put_errs):
-                trial_err = ErrDump.SerializableTrialError(
-                    trial_id=trial.id,
-                    worker_id=worker_id,
-                    err_type=type(report.err).__name__,
-                    err=str(report.err),
-                    tb=report.tb,
+            with self._err_lock.lock():
+                err_dump = ReaderWriterErrDump.read(self._shared_errors_path)
+                err_dump.errs.append(
+                    ErrDump.SerializableTrialError(
+                        trial_id=trial.id,
+                        worker_id=worker_id,
+                        err_type=type(report.err).__name__,
+                        err=str(report.err),
+                        tb=report.tb,
+                    )
                 )
-                errs.append(trial_err)
-                put_errs(errs)
+                ReaderWriterErrDump.write(err_dump, self._shared_errors_path)
 
-    def get_errors(self) -> ErrDump:
+    def all_trial_ids(self) -> list[str]:
+        """Get all the trial ids."""
+        return self._trial_repo.list_trial_ids()
+
+    def lock_and_get_errors(self) -> ErrDump:
         """Get all the errors that have occurred during the optimization."""
-        return self._shared_errors.synced()
+        with self._err_lock.lock():
+            return ReaderWriterErrDump.read(self._shared_errors_path)
+
+    def lock_and_get_optimizer_info(self) -> OptimizerInfo:
+        """Get the optimizer information."""
+        with self._optimizer_lock.lock():
+            return OptimizerInfo(info=deserialize(self._optimizer_info_path))
+
+    def lock_and_get_optimizer_state(self) -> OptimizationState:
+        """Get the optimizer state."""
+        with self._optimizer_lock.lock():  # noqa: SIM117
+            with self._optimizer_state_path.open("rb") as f:
+                obj = pickle.load(f)  # noqa: S301
+                assert isinstance(obj, OptimizationState)
+                return obj
+
+    def lock_and_get_trial_by_id(self, trial_id: str) -> Trial:
+        """Get a trial by its id."""
+        with self._trial_lock.lock():
+            return self._trial_repo.load_trial_from_disk(trial_id)
+
+    def unsafe_retry_get_trial_by_id(self, trial_id: str) -> Trial:
+        """Get a trial by id but use unsafe retries."""
+        for _ in range(N_UNSAFE_RETRIES):
+            try:
+                return self._trial_repo.load_trial_from_disk(trial_id)
+            except TrialNotFoundError as e:
+                raise e
+            except Exception as e:  # noqa: BLE001
+                logger.warning(
+                    "Failed to get trial '%s' due to an error: %s", trial_id, e
+                )
+                time.sleep(0.1)
+                continue
+
+        raise NePSError(
+            f"Failed to get trial '{trial_id}' after {N_UNSAFE_RETRIES} retries."
+        )
+
+    def put_updated_trial(
+        self,
+        trial: Trial,
+        *,
+        hints: list[TrialWriteHint] | TrialWriteHint | None = None,
+    ) -> None:
+        """Update the trial.
+
+        Args:
+            trial: The trial to update.
+            hints: The hints to use when updating the trial. Defines what files need
+                to be updated.
+                If you don't know, leave `None`, this is a micro-optimization.
+        """
+        with self._trial_lock.lock():
+            self._trial_repo.update_trial(trial, hints=hints)
 
     @overload
-    def get_next_pending_trial(self) -> Trial | None: ...
+    def lock_and_get_next_pending_trial(self) -> Trial | None: ...
+
     @overload
-    def get_next_pending_trial(self, n: int | None = None) -> list[Trial]: ...
+    def lock_and_get_next_pending_trial(self, n: int | None = None) -> list[Trial]: ...
 
-    def get_next_pending_trial(self, n: int | None = None) -> Trial | list[Trial] | None:
-        """Get the next pending trial to evaluate.
+    def lock_and_get_next_pending_trial(
+        self,
+        n: int | None = None,
+    ) -> Trial | list[Trial] | None:
+        """Get the next pending trial."""
+        with self._trial_lock.lock():
+            trials = self._trial_repo.latest()
+            pendings = sorted(
+                [
+                    trial
+                    for trial in trials.values()
+                    if trial.metadata.state == Trial.State.PENDING
+                ],
+                key=lambda t: t.metadata.time_sampled,
+            )
+            if n is None:
+                return pendings[0] if pendings else None
+            return pendings[:n]
+
+    @classmethod
+    def create_or_load(
+        cls,
+        path: Path,
+        *,
+        load_only: bool = False,
+        optimizer_info: OptimizerInfo | None = None,
+        optimizer_state: OptimizationState | None = None,
+    ) -> NePSState:
+        """Create a new NePSState in a directory or load the existing one
+        if it already exists, depending on the argument.
 
-        Args:
-            n: The number of trials to get. If `None`, get the next trial.
+        !!! warning
 
-        Returns:
-            The next trial or a list of trials if `n` is not `None`.
-        """
-        _pending_itr = (
-            shared_trial.synced() for _, shared_trial in self._trials.pending()
-        )
-        if n is not None:
-            return take(n, _pending_itr)
-        return next(_pending_itr, None)
+            We check that the optimizer info in the NePSState on disk matches
+            the one that is passed. However we do not lock this check so it
+            is possible that if two processes try to create a NePSState at the
+            same time, both with different optimizer infos, that one will fail
+            to create the NePSState. This is a limitation of the current design.
 
-    def all_trial_ids(self) -> set[Trial.ID]:
-        """Get all the trial ids that are known about."""
-        return self._trials.all_trial_ids()
+            In principal, we could allow multiple optimizers to be run and share
+            the same set of trials.
 
-    def get_all_trials(self) -> dict[Trial.ID, Trial]:
-        """Get all the trials that are known about."""
-        return {_id: trial.synced() for _id, trial in self._trials.all().items()}
+        Args:
+            path: The directory to create the state in.
+            load_only: If True, only load the state and do not create a new one.
+            optimizer_info: The optimizer info to use.
+            optimizer_state: The optimizer state to use.
 
-    def optimizer_info(self) -> OptimizerInfo:
-        """Get the optimizer information."""
-        return self._optimizer_info.synced()
+        Returns:
+            The NePSState.
 
-    def optimizer_state(self) -> OptimizationState:
-        """Get the optimizer state."""
-        return self._optimizer_state.synced()
+        Raises:
+            NePSError: If the optimizer info on disk does not match the one provided.
+        """
+        is_new = not path.exists()
+        if load_only:
+            if is_new:
+                raise FileNotFoundError(f"No NePSState found at '{path}'.")
+        else:
+            assert optimizer_info is not None
+            assert optimizer_state is not None
+
+        path.mkdir(parents=True, exist_ok=True)
+        config_dir = path / "configs"
+        config_dir.mkdir(parents=True, exist_ok=True)
+
+        optimizer_info_path = path / "optimizer_info.yaml"
+        optimizer_state_path = path / "optimizer_state.pkl"
+        shared_errors_path = path / "shared_errors.jsonl"
+
+        # We have to do one bit of sanity checking to ensure that the optimzier
+        # info on disk manages the one we have recieved, otherwise we are unsure which
+        # optimizer is being used.
+        # NOTE: We assume that we do not have to worry about a race condition
+        # here where we have two different NePSState objects with two different optimizer
+        # infos trying to be created at the same time. This avoids the need to lock to
+        # check the optimizer info. If this assumption changes, then we would have
+        # to first lock before we do this check
+        if not is_new:
+            existing_info = OptimizerInfo(info=deserialize(optimizer_info_path))
+            if not load_only and existing_info != optimizer_info:
+                raise NePSError(
+                    "The optimizer info on disk does not match the one provided."
+                    f"\nOn disk: {existing_info}\nProvided: {optimizer_info}"
+                    f"\n\nLoaded the one on disk from {path}."
+                )
+            with optimizer_state_path.open("rb") as f:
+                optimizer_state = pickle.load(f)  # noqa: S301
+
+            optimizer_info = existing_info
+            error_dump = ReaderWriterErrDump.read(shared_errors_path)
+        else:
+            assert optimizer_info is not None
+            assert optimizer_state is not None
+
+            serialize(optimizer_info.info, path=optimizer_info_path)
+            with optimizer_state_path.open("wb") as f:
+                pickle.dump(optimizer_state, f, protocol=pickle.HIGHEST_PROTOCOL)
+
+            error_dump = ErrDump([])
+
+        return NePSState(
+            path=path,
+            _trial_repo=TrialRepo(config_dir),
+            # Locks,
+            _trial_lock=FileLocker(
+                lock_path=path / ".configs.lock",
+                poll=TRIAL_FILELOCK_POLL,
+                timeout=TRIAL_FILELOCK_TIMEOUT,
+            ),
+            _optimizer_lock=FileLocker(
+                lock_path=path / ".optimizer.lock",
+                poll=STATE_FILELOCK_POLL,
+                timeout=STATE_FILELOCK_TIMEOUT,
+            ),
+            _err_lock=FileLocker(
+                lock_path=path / ".errors.lock",
+                poll=GLOBAL_ERR_FILELOCK_POLL,
+                timeout=GLOBAL_ERR_FILELOCK_TIMEOUT,
+            ),
+            # State
+            _optimizer_info_path=optimizer_info_path,
+            _optimizer_info=optimizer_info,
+            _optimizer_state_path=optimizer_state_path,
+            _optimizer_state=optimizer_state,  # type: ignore
+            _shared_errors_path=shared_errors_path,
+            _shared_errors=error_dump,
+        )
diff --git a/neps/state/optimizer.py b/neps/state/optimizer.py
index f4000b074..ffe90b3cc 100644
--- a/neps/state/optimizer.py
+++ b/neps/state/optimizer.py
@@ -2,28 +2,26 @@
 
 from __future__ import annotations
 
-from dataclasses import dataclass
-from typing import Any, Mapping
+from collections.abc import Mapping
+from dataclasses import dataclass, replace
+from typing import TYPE_CHECKING, Any
+
+if TYPE_CHECKING:
+    from neps.state.seed_snapshot import SeedSnapshot
 
 
 @dataclass
 class BudgetInfo:
     """Information about the budget of an optimizer."""
 
-    max_cost_budget: float
-    used_cost_budget: float
-
-    @property
-    def remaining_cost_budget(self) -> float:
-        """The remaining budget."""
-        return self.max_cost_budget - self.used_cost_budget
+    max_cost_total: float | None = None
+    used_cost_budget: float = 0.0
+    max_evaluations: int | None = None
+    used_evaluations: int = 0
 
     def clone(self) -> BudgetInfo:
-        """Clone the budget info."""
-        return BudgetInfo(
-            max_cost_budget=self.max_cost_budget,
-            used_cost_budget=self.used_cost_budget,
-        )
+        """Create a copy of the budget info."""
+        return replace(self)
 
 
 @dataclass
@@ -33,7 +31,10 @@ class OptimizationState:
     budget: BudgetInfo | None
     """Information regarind the budget used by the optimization trajectory."""
 
-    shared_state: dict[str, Any]
+    seed_snapshot: SeedSnapshot
+    """The state of the random number generators at the time of the last sample."""
+
+    shared_state: dict[str, Any] | None
     """Any information the optimizer wants to store between calls
     to sample and post evaluations.
 
diff --git a/neps/state/protocols.py b/neps/state/protocols.py
deleted file mode 100644
index 78fcee0d3..000000000
--- a/neps/state/protocols.py
+++ /dev/null
@@ -1,560 +0,0 @@
-"""This module defines the protocols used by
-[`NePSState`][neps.state.neps_state.NePSState] and
-[`Synced`][neps.state.synced.Synced] to ensure atomic operations to the state itself.
-"""
-
-from __future__ import annotations
-
-import logging
-from contextlib import contextmanager
-from copy import deepcopy
-from dataclasses import dataclass
-from typing import TYPE_CHECKING, Callable, ClassVar, Generic, Iterable, Iterator, TypeVar
-from typing_extensions import Protocol, Self
-
-from neps.exceptions import (
-    LockFailedError,
-    TrialAlreadyExistsError,
-    TrialNotFoundError,
-    VersionedResourceAlreadyExistsError,
-    VersionedResourceDoesNotExistsError,
-    VersionedResourceRemovedError,
-    VersionMismatchError,
-)
-
-if TYPE_CHECKING:
-    from neps.state import Trial
-
-logger = logging.getLogger(__name__)
-
-T = TypeVar("T")
-K = TypeVar("K")
-
-# https://github.com/MaT1g3R/option/issues/40
-K2 = TypeVar("K2")
-T2 = TypeVar("T2")
-
-Loc_contra = TypeVar("Loc_contra", contravariant=True)
-
-
-class Versioner(Protocol):
-    """A versioner that can bump the version of a resource.
-
-    It should have some [`current()`][neps.state.protocols.Versioner.current] method
-    to give the current version tag of a resource and a
-    [`bump()`][neps.state.protocols.Versioner.bump] method to provide a new version tag.
-
-    These [`current()`][neps.state.protocols.Versioner.current] and
-    [`bump()`][neps.state.protocols.Versioner.bump] methods do not need to be atomic
-    but they should read/write to external state, i.e. file-system, database, etc.
-    """
-
-    def current(self) -> str | None:
-        """Return the current version as defined by the external state, i.e.
-        the version of the tag on disk.
-
-        Returns:
-            The current version if there is one written.
-        """
-        ...
-
-    def bump(self) -> str:
-        """Create a new external version tag.
-
-        Returns:
-            The new version tag.
-        """
-        ...
-
-
-class Locker(Protocol):
-    """A locker that can be used to communicate between workers."""
-
-    LockFailedError: ClassVar = LockFailedError
-
-    @contextmanager
-    def lock(self) -> Iterator[None]:
-        """Initiate the lock as a context manager, releasing it when done."""
-        ...
-
-    def is_locked(self) -> bool:
-        """Check if lock is...well, locked.
-
-        Should return True if the resource is locked, even if the lock is held by the
-        current worker/process.
-        """
-        ...
-
-
-class ReaderWriter(Protocol[T, Loc_contra]):
-    """A reader-writer that can read and write some resource T with location Loc.
-
-    For example, a `ReaderWriter[Trial, Path]` indicates a class that can read and write
-    trials, given some `Path`.
-    """
-
-    def read(self, loc: Loc_contra, /) -> T:
-        """Read the resource at the given location."""
-        ...
-
-    def write(self, value: T, loc: Loc_contra, /) -> None:
-        """Write the resource at the given location."""
-        ...
-
-
-class TrialRepo(Protocol[K]):
-    """A repository of trials.
-
-    The primary purpose of this protocol is to ensure consistent access to trial,
-    the ability to put in a new trial and know about the trials that are stored there.
-    """
-
-    TrialAlreadyExistsError: ClassVar = TrialAlreadyExistsError
-    TrialNotFoundError: ClassVar = TrialNotFoundError
-
-    def all_trial_ids(self) -> set[Trial.ID]:
-        """List all the trial ids in this trial Repo."""
-        ...
-
-    def get_by_id(self, trial_id: Trial.ID) -> Synced[Trial, K]:
-        """Get a trial by its id."""
-        ...
-
-    def get_by_ids(self, trial_ids: list[Trial.ID]) -> dict[str, Synced[Trial, K]]:
-        """Get trials by their ids."""
-        ...
-
-    def put_new(self, trial: Trial) -> Synced[Trial, K]:
-        """Put a new trial in the repo."""
-        ...
-
-    def all(self) -> dict[Trial.ID, Synced[Trial, K]]:
-        """Get all trials in the repo."""
-        ...
-
-    def pending(self) -> Iterable[tuple[Trial.ID, Synced[Trial, K]]]:
-        """Get all pending trials in the repo.
-
-        !!! note
-            This should return trials in the order in which they should be next evaluated,
-            usually the order in which they were put in the repo.
-        """
-        ...
-
-
-@dataclass
-class VersionedResource(Generic[T, K]):
-    """A resource that will be read if it needs to update to the latest version.
-
-    Relies on 3 main components:
-    * A [`Versioner`][neps.state.protocols.Versioner] to manage the versioning of the
-        resource.
-    * A [`ReaderWriter`][neps.state.protocols.ReaderWriter] to read and write the
-        resource.
-    * The location of the resource that can be used for the reader-writer.
-    """
-
-    VersionMismatchError: ClassVar = VersionMismatchError
-    VersionedResourceDoesNotExistsError: ClassVar = VersionedResourceDoesNotExistsError
-    VersionedResourceAlreadyExistsError: ClassVar = VersionedResourceAlreadyExistsError
-    VersionedResourceRemovedError: ClassVar = VersionedResourceRemovedError
-
-    _current: T
-    _location: K
-    _version: str
-    _versioner: Versioner
-    _reader_writer: ReaderWriter[T, K]
-
-    @staticmethod
-    def new(
-        *,
-        data: T2,
-        location: K2,
-        versioner: Versioner,
-        reader_writer: ReaderWriter[T2, K2],
-    ) -> VersionedResource[T2, K2]:
-        """Create a new VersionedResource.
-
-        This will create a new resource if it doesn't exist, otherwise,
-        if it already exists, it will raise an error.
-
-        Use [`load()`][neps.state.protocols.VersionedResource.load] if you want to
-        load an existing resource.
-
-        Args:
-            data: The data to be stored.
-            location: The location where the data will be stored.
-            versioner: The versioner to be used.
-            reader_writer: The reader-writer to be used.
-
-        Returns:
-            A new VersionedResource
-
-        Raises:
-            VersionedResourceAlreadyExistsError: If a versioned resource already exists
-                at the given location.
-        """
-        current_version = versioner.current()
-        if current_version is not None:
-            raise VersionedResourceAlreadyExistsError(
-                f"A versioend resource already already exists at '{location}'"
-                f" with version '{current_version}'"
-            )
-
-        version = versioner.bump()
-        reader_writer.write(data, location)
-        return VersionedResource(
-            _current=data,
-            _location=location,
-            _version=version,
-            _versioner=versioner,
-            _reader_writer=reader_writer,
-        )
-
-    @classmethod
-    def load(
-        cls,
-        *,
-        location: K2,
-        versioner: Versioner,
-        reader_writer: ReaderWriter[T2, K2],
-    ) -> VersionedResource[T2, K2]:
-        """Load an existing VersionedResource.
-
-        This will load an existing resource if it exists, otherwise, it will raise an
-        error.
-
-        Use [`new()`][neps.state.protocols.VersionedResource.new] if you want to
-        create a new resource.
-
-        Args:
-            location: The location of the resource.
-            versioner: The versioner to be used.
-            reader_writer: The reader-writer to be used.
-
-        Returns:
-            A VersionedResource
-
-        Raises:
-            VersionedResourceDoesNotExistsError: If no versioned resource exists at
-                the given location.
-        """
-        version = versioner.current()
-        if version is None:
-            raise cls.VersionedResourceDoesNotExistsError(
-                f"No versioned resource exists at '{location}'."
-            )
-        data = reader_writer.read(location)
-        return VersionedResource(
-            _current=data,
-            _location=location,
-            _version=version,
-            _versioner=versioner,
-            _reader_writer=reader_writer,
-        )
-
-    def sync_and_get(self) -> T:
-        """Get the data and version of the resource."""
-        self.sync()
-        return self._current
-
-    def sync(self) -> None:
-        """Sync the resource with the latest version."""
-        current_version = self._versioner.current()
-        if current_version is None:
-            raise self.VersionedResourceRemovedError(
-                f"Versioned resource at '{self._location}' has been removed!"
-                f" Last known version was '{self._version}'."
-            )
-
-        if self._version != current_version:
-            self._current = self._reader_writer.read(self._location)
-            self._version = current_version
-
-    def put(self, data: T) -> None:
-        """Put the data and version of the resource.
-
-        Raises:
-            VersionMismatchError: If the version of the resource is not the same as the
-                current version. This implies that the resource has been updated by
-                another worker.
-        """
-        current_version = self._versioner.current()
-        if self._version != current_version:
-            raise self.VersionMismatchError(
-                f"Version mismatch - ours: '{self._version}', remote: '{current_version}'"
-                f" Tried to put data at '{self._location}'. Doing so would overwrite"
-                " changes made by another worker. The solution is to pull the latest"
-                " version of the resource and try again."
-                " The most possible reasons for this error is that a lock was not"
-                " utilized when getting this resource before putting it back."
-            )
-
-        self._reader_writer.write(data, self._location)
-        self._current = data
-        self._version = self._versioner.bump()
-
-    def current(self) -> T:
-        """Get the current data of the resource."""
-        return self._current
-
-    def is_stale(self) -> bool:
-        """Check if the resource is stale."""
-        return self._version != self._versioner.current()
-
-    def location(self) -> K:
-        """Get the location of the resource."""
-        return self._location
-
-
-@dataclass
-class Synced(Generic[T, K]):
-    """Manages a versioned resource but it's methods also implement locking procedures
-    for accessing it.
-
-    Its types are parametrized by two type variables:
-
-    * `T` is the type of the data stored in the resource.
-    * `K` is the type of the location of the resource, for example `Path`
-
-    This wraps a [`VersionedResource`][neps.state.protocols.VersionedResource] and
-    additionally provides utility to perform atmoic operations on it using a
-    [`Locker`][neps.state.protocols.Locker].
-
-    This is used by [`NePSState`][neps.state.neps_state.NePSState] to manage the state
-    of trials and other shared resources.
-
-    It consists of 2 main components:
-
-    * A [`VersionedResource`][neps.state.protocols.VersionedResource] to manage the
-        versioning of the resource.
-    * A [`Locker`][neps.state.protocols.Locker] to manage the locking of the resource.
-
-    The primary methods to interact with a resource that is behined a `Synced` are:
-
-    * [`synced()`][neps.state.protocols.Synced.synced] to get the data of the resource
-        after syncing it to it's latest verison.
-    * [`acquire()`][neps.state.protocols.Synced.acquire] context manager to get latest
-        version of the data while also mainting a lock on it. This additionally provides
-        a `put()` operation to put the data back. This can primarily be used to get the
-        data, perform some mutation on it and then put it back, while not allowing other
-        workers access to the data.
-    """
-
-    LockFailedError: ClassVar = Locker.LockFailedError
-    VersionedResourceRemovedError: ClassVar = (
-        VersionedResource.VersionedResourceRemovedError
-    )
-    VersionMismatchError: ClassVar = VersionedResource.VersionMismatchError
-    VersionedResourceAlreadyExistsError: ClassVar = (
-        VersionedResource.VersionedResourceAlreadyExistsError
-    )
-    VersionedResourceDoesNotExistsError: ClassVar = (
-        VersionedResource.VersionedResourceDoesNotExistsError
-    )
-
-    _resource: VersionedResource[T, K]
-    _locker: Locker
-
-    @classmethod
-    def new(
-        cls,
-        *,
-        locker: Locker,
-        data: T2,
-        location: K2,
-        versioner: Versioner,
-        reader_writer: ReaderWriter[T2, K2],
-    ) -> Synced[T2, K2]:
-        """Create a new Synced resource.
-
-        This will create a new resource if it doesn't exist, otherwise,
-        if it already exists, it will raise an error.
-
-        Use [`load()`][neps.state.protocols.Synced.load] if you want to load an existing
-        resource. Use [`new_or_load()`][neps.state.protocols.Synced.new_or_load] if you
-        want to create a new resource if it doesn't exist, otherwise load an existing
-        resource.
-
-        Args:
-            locker: The locker to be used.
-            data: The data to be stored.
-            location: The location where the data will be stored.
-            versioner: The versioner to be used.
-            reader_writer: The reader-writer to be used.
-
-        Returns:
-            A new Synced resource.
-
-        Raises:
-            VersionedResourceAlreadyExistsError: If a versioned resource already exists
-                at the given location.
-        """
-        with locker.lock():
-            vr = VersionedResource.new(
-                data=data,
-                location=location,
-                versioner=versioner,
-                reader_writer=reader_writer,
-            )
-            return Synced(_resource=vr, _locker=locker)
-
-    @classmethod
-    def load(
-        cls,
-        *,
-        locker: Locker,
-        location: K2,
-        versioner: Versioner,
-        reader_writer: ReaderWriter[T2, K2],
-    ) -> Synced[T2, K2]:
-        """Load an existing Synced resource.
-
-        This will load an existing resource if it exists, otherwise, it will raise an
-        error.
-
-        Use [`new()`][neps.state.protocols.Synced.new] if you want to create a new
-        resource. Use [`new_or_load()`][neps.state.protocols.Synced.new_or_load] if you
-        want to create a new resource if it doesn't exist, otherwise load an existing
-        resource.
-
-        Args:
-            locker: The locker to be used.
-            location: The location of the resource.
-            versioner: The versioner to be used.
-            reader_writer: The reader-writer to be used.
-
-        Returns:
-            A Synced resource.
-
-        Raises:
-            VersionedResourceDoesNotExistsError: If no versioned resource exists at
-                the given location.
-        """
-        with locker.lock():
-            return Synced(
-                _resource=VersionedResource.load(
-                    location=location,
-                    versioner=versioner,
-                    reader_writer=reader_writer,
-                ),
-                _locker=locker,
-            )
-
-    @classmethod
-    def new_or_load(
-        cls,
-        *,
-        locker: Locker,
-        data: T2,
-        location: K2,
-        versioner: Versioner,
-        reader_writer: ReaderWriter[T2, K2],
-    ) -> Synced[T2, K2]:
-        """Create a new Synced resource if it doesn't exist, otherwise load it.
-
-        This will create a new resource if it doesn't exist, otherwise, it will load
-        an existing resource.
-
-        Use [`new()`][neps.state.protocols.Synced.new] if you want to create a new
-        resource and fail otherwise. Use [`load()`][neps.state.protocols.Synced.load]
-        if you want to load an existing resource and fail if it doesn't exist.
-
-        Args:
-            locker: The locker to be used.
-            data: The data to be stored.
-
-                !!! warning
-
-                    This will be ignored if the data already exists.
-
-            location: The location where the data will be stored.
-            versioner: The versioner to be used.
-            reader_writer: The reader-writer to be used.
-
-        Returns:
-            A Synced resource.
-        """
-        try:
-            return Synced.new(
-                locker=locker,
-                data=data,
-                location=location,
-                versioner=versioner,
-                reader_writer=reader_writer,
-            )
-        except VersionedResourceAlreadyExistsError:
-            return Synced.load(
-                locker=locker,
-                location=location,
-                versioner=versioner,
-                reader_writer=reader_writer,
-            )
-
-    def synced(self) -> T:
-        """Get the data of the resource atomically."""
-        with self._locker.lock():
-            return self._resource.sync_and_get()
-
-    def location(self) -> K:
-        """Get the location of the resource."""
-        return self._resource.location()
-
-    def put(self, data: T) -> None:
-        """Update the data atomically."""
-        with self._locker.lock():
-            self._resource.put(data)
-
-    @contextmanager
-    def acquire(self) -> Iterator[tuple[T, Callable[[T], None]]]:
-        """Acquire the lock and get the data of the resource.
-
-        This is a context manager that returns the data of the resource and a function
-        to put the data back.
-
-        !!! note
-            This is the primary way to get the resource, mutate it and put it back.
-            Otherwise you likely want [`synced()`][neps.state.protocols.Synced.synced]
-            or [`put()`][neps.state.protocols.Synced.put].
-
-        Yields:
-            A tuple containing the data of the resource and a function to put the data
-            back.
-        """
-        with self._locker.lock():
-            self._resource.sync()
-            yield self._resource.current(), self._put_unsafe
-
-    def deepcopy(self) -> Self:
-        """Create a deep copy of the shared resource."""
-        return deepcopy(self)
-
-    def _components(self) -> tuple[T, K, Versioner, ReaderWriter[T, K], Locker]:
-        """Get the components of the shared resource."""
-        return (
-            self._resource.current(),
-            self._resource.location(),
-            self._resource._versioner,
-            self._resource._reader_writer,
-            self._locker,
-        )
-
-    def _unsynced(self) -> T:
-        """Get the current data of the resource **without** locking and syncing it."""
-        return self._resource.current()
-
-    def _is_stale(self) -> bool:
-        """Check if the data held currently is not the latest version."""
-        return self._resource.is_stale()
-
-    def _is_locked(self) -> bool:
-        """Check if the resource is locked."""
-        return self._locker.is_locked()
-
-    def _put_unsafe(self, data: T) -> None:
-        """Put the data without checking for staleness or acquiring the lock.
-
-        !!! warning
-            This should only really be called if you know what you're doing.
-        """
-        self._resource.put(data)
diff --git a/neps/state/seed_snapshot.py b/neps/state/seed_snapshot.py
index 0f9fad87c..4a26370b0 100644
--- a/neps/state/seed_snapshot.py
+++ b/neps/state/seed_snapshot.py
@@ -5,18 +5,17 @@
 import contextlib
 import random
 from dataclasses import dataclass
-from typing import TYPE_CHECKING, Any, List, Tuple, Union
-from typing_extensions import TypeAlias
+from typing import TYPE_CHECKING, Any, TypeAlias
 
 import numpy as np
 
 if TYPE_CHECKING:
     import torch
 
-    NP_RNG_STATE: TypeAlias = Tuple[str, np.ndarray, int, int, float]
-    PY_RNG_STATE: TypeAlias = Tuple[int, Tuple[int, ...], Union[int, None]]
+    NP_RNG_STATE: TypeAlias = tuple[str, np.ndarray, int, int, float]
+    PY_RNG_STATE: TypeAlias = tuple[int, tuple[int, ...], int | None]
     TORCH_RNG_STATE: TypeAlias = torch.Tensor
-    TORCH_CUDA_RNG_STATE: TypeAlias = List[torch.Tensor]
+    TORCH_CUDA_RNG_STATE: TypeAlias = list[torch.Tensor]
 
 
 @dataclass
@@ -105,7 +104,7 @@ def __eq__(self, other: Any, /) -> bool:  # noqa: PLR0911
 
             if not all(
                 torch.equal(a, b)
-                for a, b in zip(self.torch_cuda_rng, other.torch_cuda_rng)
+                for a, b in zip(self.torch_cuda_rng, other.torch_cuda_rng, strict=False)
             ):
                 return False
 
diff --git a/neps/state/settings.py b/neps/state/settings.py
index f34a94353..148d8c277 100644
--- a/neps/state/settings.py
+++ b/neps/state/settings.py
@@ -11,8 +11,8 @@
 class DefaultReportValues:
     """Values to use when an error occurs."""
 
-    loss_value_on_error: float | None = None
-    """The value to use for the loss when an error occurs."""
+    objective_to_minimize_value_on_error: float | None = None
+    """The value to use for the objective_to_minimize when an error occurs."""
 
     cost_value_on_error: float | None = None
     """The value to use for the cost when an error occurs."""
@@ -23,11 +23,13 @@ class DefaultReportValues:
     learning_curve_on_error: list[float] | None = None
     """The value to use for the learning curve when an error occurs.
 
-    If `'loss'`, the learning curve will be set to the loss value but as
-    a list with a single value.
+    If `'objective_to_minimize'`, the learning curve will be set to the
+    objective_to_minimize value but as a list with a single value.
     """
 
-    learning_curve_if_not_provided: Literal["loss"] | list[float] | None = None
+    learning_curve_if_not_provided: (
+        Literal["objective_to_minimize"] | list[float] | None
+    ) = None
     """The value to use for the learning curve when the evaluation function does
     not provide one."""
 
@@ -72,6 +74,9 @@ class WorkerSettings:
     default_report_values: DefaultReportValues
     """Values to use when an error occurs or was not specified."""
 
+    batch_size: int | None
+    """The number of configurations to sample in a single batch."""
+
     # --------- Global Stopping Criterion ---------
     max_evaluations_total: int | None
     """The maximum number of evaluations to run in total.
diff --git a/neps/state/trial.py b/neps/state/trial.py
index 862e2bbb8..0ead5a21c 100644
--- a/neps/state/trial.py
+++ b/neps/state/trial.py
@@ -3,20 +3,19 @@
 from __future__ import annotations
 
 import logging
+from collections.abc import Callable, Mapping
 from dataclasses import asdict, dataclass
 from enum import Enum
-from typing import TYPE_CHECKING, Any, Callable, ClassVar, Literal, Mapping
+from typing import TYPE_CHECKING, Any, ClassVar, Literal
 from typing_extensions import Self
 
 import numpy as np
 
 from neps.exceptions import NePSError
-from neps.utils.types import ConfigResult
 
 if TYPE_CHECKING:
     from neps.search_spaces import SearchSpace
-    from neps.utils.types import ERROR, RawConfig
-
+    from neps.utils.types import ERROR, ConfigResult, RawConfig
 
 logger = logging.getLogger(__name__)
 
@@ -44,8 +43,10 @@ class MetaData:
 
     id: str
     location: str
-    previous_trial_id: Trial.ID | None
+    state: State
+    previous_trial_id: str | None
     previous_trial_location: str | None
+
     sampling_worker_id: str
     time_sampled: float
 
@@ -61,8 +62,8 @@ class MetaData:
 class Report:
     """A failed report of the evaluation of a configuration."""
 
-    trial_id: Trial.ID
-    loss: float | None
+    trial_id: str
+    objective_to_minimize: float | None
     cost: float | None
     learning_curve: list[float] | None  # TODO: Serializing a large list into yaml sucks!
     extra: Mapping[str, Any]
@@ -78,12 +79,16 @@ def __post_init__(self) -> None:
     def to_deprecate_result_dict(self) -> dict[str, Any] | ERROR:
         """Return the report as a dictionary."""
         if self.reported_as == "success":
-            d = {"loss": self.loss, "cost": self.cost, **self.extra}
+            d = {
+                "objective_to_minimize": self.objective_to_minimize,
+                "cost": self.cost,
+                **self.extra,
+            }
 
             # HACK: Backwards compatibility. Not sure how much this is needed
             # but it should be removed once optimizers stop calling the
-            # `get_loss`, `get_cost`, `get_learning_curve` methods of `BaseOptimizer`
-            # and just use the `Report` directly.
+            # `get_objective_to_minimize`, `get_cost`, `get_learning_curve` methods of
+            #  `BaseOptimizer` and just use the `Report` directly.
             if "info_dict" not in d or "learning_curve" not in d["info_dict"]:
                 d.setdefault("info_dict", {})["learning_curve"] = self.learning_curve
             return d
@@ -91,8 +96,8 @@ def to_deprecate_result_dict(self) -> dict[str, Any] | ERROR:
         return "error"
 
     def __eq__(self, value: Any, /) -> bool:
-        # HACK : Since it could be probably that one of loss or cost is nan,
-        # we need a custom comparator for this object
+        # HACK : Since it could be probably that one of objective_to_minimize or cost is
+        # nan, we need a custom comparator for this object
         # HACK : We also have to skip over the `Err` object since when it's deserialized,
         # we can not recover the original object/type.
         if not isinstance(value, Report):
@@ -107,7 +112,7 @@ def __eq__(self, value: Any, /) -> bool:
             if k == "err":
                 if str(v) != str(other_v):
                     return False
-            elif k in ("loss", "cost"):
+            elif k in ("objective_to_minimize", "cost"):
                 if v is not None and np.isnan(v):
                     if other_v is None or not np.isnan(other_v):
                         return False
@@ -123,7 +128,6 @@ def __eq__(self, value: Any, /) -> bool:
 class Trial:
     """A trial is a configuration and it's associated data."""
 
-    ID: ClassVar = str
     State: ClassVar = State
     Report: ClassVar = Report
     MetaData: ClassVar = MetaData
@@ -131,17 +135,16 @@ class Trial:
 
     config: Mapping[str, Any]
     metadata: MetaData
-    state: State
     report: Report | None
 
     @classmethod
     def new(
         cls,
         *,
-        trial_id: Trial.ID,
+        trial_id: str,
         config: Mapping[str, Any],
         location: str,
-        previous_trial: Trial.ID | None,
+        previous_trial: str | None,
         previous_trial_location: str | None,
         time_sampled: float,
         worker_id: int | str,
@@ -149,10 +152,10 @@ def new(
         """Create a new trial object that was just sampled."""
         worker_id = str(worker_id)
         return cls(
-            state=State.PENDING,
             config=config,
             metadata=MetaData(
                 id=trial_id,
+                state=State.PENDING,
                 location=location,
                 time_sampled=time_sampled,
                 previous_trial_id=previous_trial,
@@ -163,7 +166,7 @@ def new(
         )
 
     @property
-    def id(self) -> Trial.ID:
+    def id(self) -> str:
         """Return the id of the trial."""
         return self.metadata.id
 
@@ -174,12 +177,13 @@ def into_config_result(
         """Convert the trial and report to a `ConfigResult` object."""
         if self.report is None:
             raise self.NotReportedYetError("The trial has not been reported yet.")
+        from neps.utils.types import ConfigResult
 
         result: dict[str, Any] | ERROR
         if self.report.reported_as == "success":
             result = {
                 **self.report.extra,
-                "loss": self.report.loss,
+                "objective_to_minimize": self.report.objective_to_minimize,
                 "cost": self.report.cost,
             }
         else:
@@ -195,20 +199,20 @@ def into_config_result(
     def set_submitted(self, *, time_submitted: float) -> None:
         """Set the trial as submitted."""
         self.metadata.time_submitted = time_submitted
-        self.state = State.SUBMITTED
+        self.metadata.state = State.SUBMITTED
 
     def set_evaluating(self, *, time_started: float, worker_id: int | str) -> None:
         """Set the trial as in progress."""
         self.metadata.time_started = time_started
         self.metadata.evaluating_worker_id = str(worker_id)
-        self.state = State.EVALUATING
+        self.metadata.state = State.EVALUATING
 
     def set_complete(
         self,
         *,
         report_as: Literal["success", "failed", "crashed"],
         time_end: float,
-        loss: float | None,
+        objective_to_minimize: float | None,
         cost: float | None,
         learning_curve: list[float] | None,
         err: Exception | None,
@@ -218,11 +222,11 @@ def set_complete(
     ) -> Report:
         """Set the report for the trial."""
         if report_as == "success":
-            self.state = State.SUCCESS
+            self.metadata.state = State.SUCCESS
         elif report_as == "failed":
-            self.state = State.FAILED
+            self.metadata.state = State.FAILED
         elif report_as == "crashed":
-            self.state = State.CRASHED
+            self.metadata.state = State.CRASHED
         else:
             raise ValueError(f"Invalid report_as: '{report_as}'")
 
@@ -231,7 +235,9 @@ def set_complete(
 
         extra = {} if extra is None else extra
 
-        loss = float(loss) if loss is not None else None
+        objective_to_minimize = (
+            float(objective_to_minimize) if objective_to_minimize is not None else None
+        )
         cost = float(cost) if cost is not None else None
         if learning_curve is not None:
             learning_curve = [float(v) for v in learning_curve]
@@ -240,7 +246,7 @@ def set_complete(
             trial_id=self.metadata.id,
             reported_as=report_as,
             evaluation_duration=evaluation_duration,
-            loss=loss,
+            objective_to_minimize=objective_to_minimize,
             cost=cost,
             learning_curve=learning_curve,
             extra=extra,
@@ -250,40 +256,16 @@ def set_complete(
 
     def set_corrupted(self) -> None:
         """Set the trial as corrupted."""
-        self.state = State.CORRUPTED
+        self.metadata.state = State.CORRUPTED
 
     def reset(self) -> None:
         """Reset the trial to a pending state."""
-        self.state = State.PENDING
         self.metadata = MetaData(
             id=self.metadata.id,
+            state=State.PENDING,
             location=self.metadata.location,
             previous_trial_id=self.metadata.previous_trial_id,
             previous_trial_location=self.metadata.previous_trial_location,
             time_sampled=self.metadata.time_sampled,
             sampling_worker_id=self.metadata.sampling_worker_id,
         )
-
-
-def to_config_result(
-    trial: Trial,
-    report: Report,
-    config_to_search_space: Callable[[RawConfig], SearchSpace],
-) -> ConfigResult:
-    """Convert the trial and report to a `ConfigResult` object."""
-    result: dict[str, Any] | ERROR
-    if report.reported_as == "success":
-        result = {
-            **report.extra,
-            "loss": report.loss,
-            "cost": report.cost,
-        }
-    else:
-        result = "error"
-
-    return ConfigResult(
-        trial.id,
-        config=config_to_search_space(trial.config),
-        result=result,
-        metadata=asdict(trial.metadata),
-    )
diff --git a/neps/status/__main__.py b/neps/status/__main__.py
index 744a3417b..b1266937e 100644
--- a/neps/status/__main__.py
+++ b/neps/status/__main__.py
@@ -1,18 +1,20 @@
 """Displays status information about a working directory of a neps.run.
 
 Usage:
-    python -m neps.status [-h] [--best_losses] [--best_configs] [--all_configs]
-                          working_directory
+    python -m neps.status [-h] [--best_objective_to_minimizees] [--best_configs]
+        [--all_configs] working_directory
 
 Positional arguments:
     working_directory  The working directory given to neps.run
 
 Optional arguments:
     -h, --help         show this help message and exit
-    --best_losses      Show the trajectory of the best loss across evaluations
-    --best_configs     Show the trajectory of the best configs and their losses
+    --best_objective_to_minimizees      Show the trajectory of the best
+        objective_to_minimize across evaluations
+    --best_configs     Show the trajectory of the best configs and their
+        objective_to_minimizees
                      across evaluations
-    --all_configs      Show all configs and their losses
+    --all_configs      Show all configs and their objective_to_minimizees
 
 Note:
     We have to use the __main__.py construct due to the issues explained in
@@ -36,14 +38,15 @@
 parser.add_argument("root_directory", type=Path,
                     help="The working directory given to neps.run")
 parser.add_argument(
-    "--best_losses",
+    "--best_objective_to_minimizees",
     action="store_true",
-    help="Show the trajectory of the best loss across evaluations",
+    help="Show the trajectory of the best objective_to_minimize across evaluations",
 )
 parser.add_argument(
     "--best_configs",
     action="store_true",
-    help="Show the trajectory of the best configs and their losses across evaluations",
+    help="Show the trajectory of the best configs and their losses "
+        "across evaluations",
 )
 parser.add_argument(
     "--all_configs",
diff --git a/neps/status/status.py b/neps/status/status.py
index e2f43eb6a..2283730bb 100644
--- a/neps/status/status.py
+++ b/neps/status/status.py
@@ -9,9 +9,10 @@
 
 import pandas as pd
 
-from neps.state.filebased import load_filebased_neps_state
+from neps.runtime import get_workers_neps_state
+from neps.state.filebased import FileLocker
+from neps.state.neps_state import NePSState
 from neps.state.trial import Trial
-from neps.utils._locker import Locker
 from neps.utils.types import ConfigID, _ConfigResultForStats
 
 if TYPE_CHECKING:
@@ -37,9 +38,12 @@ def get_summary_dict(
 
     # NOTE: We don't lock the shared state since we are just reading and don't need to
     # make decisions based on the state
-    shared_state = load_filebased_neps_state(root_directory)
+    try:
+        shared_state = get_workers_neps_state()
+    except RuntimeError:
+        shared_state = NePSState.create_or_load(root_directory)
 
-    trials = shared_state.get_all_trials()
+    trials = shared_state.lock_and_read_trials()
 
     evaluated: dict[ConfigID, _ConfigResultForStats] = {}
 
@@ -58,12 +62,12 @@ def get_summary_dict(
     in_progress = {
         trial.id: trial.config
         for trial in trials.values()
-        if trial.State == Trial.State.EVALUATING
+        if trial.metadata.state == Trial.State.EVALUATING
     }
     pending = {
         trial.id: trial.config
         for trial in trials.values()
-        if trial.State == Trial.State.PENDING
+        if trial.metadata.state == Trial.State.PENDING
     }
 
     summary: dict[str, Any] = {}
@@ -77,7 +81,7 @@ def get_summary_dict(
     summary["num_pending_configs"] = len(in_progress) + len(pending)
     summary["num_pending_configs_with_worker"] = len(in_progress)
 
-    summary["best_loss"] = float("inf")
+    summary["best_objective_to_minimize"] = float("inf")
     summary["best_config_id"] = None
     summary["best_config_metadata"] = None
     summary["best_config"] = None
@@ -85,9 +89,12 @@ def get_summary_dict(
     for evaluation in evaluated.values():
         if evaluation.result == "error":
             summary["num_error"] += 1
-        loss = evaluation.loss
-        if isinstance(loss, float) and loss < summary["best_loss"]:
-            summary["best_loss"] = loss
+        objective_to_minimize = evaluation.objective_to_minimize
+        if (
+            isinstance(objective_to_minimize, float)
+            and objective_to_minimize < summary["best_objective_to_minimize"]
+        ):
+            summary["best_objective_to_minimize"] = objective_to_minimize
             summary["best_config"] = evaluation.config
             summary["best_config_id"] = evaluation.id
             summary["best_config_metadata"] = evaluation.metadata
@@ -107,7 +114,8 @@ def status(
 
     Args:
         root_directory: The root directory given to neps.run.
-        best_losses: If true, show the trajectory of the best loss across evaluations
+        best_losses: If true, show the trajectory of the best objective_to_minimize
+            across evaluations
         best_configs: If true, show the trajectory of the best configs and their losses
             across evaluations
         all_configs: If true, show all configs and their losses
@@ -133,34 +141,40 @@ def status(
             return summary["previous_results"], summary["pending_configs"]
 
         print()
-        print(f"Best loss: {summary['best_loss']}")
+        print(f"Best objective_to_minimize: {summary['best_objective_to_minimize']}")
         print(f"Best config id: {summary['best_config_id']}")
         print(f"Best config: {summary['best_config']}")
 
         if best_losses:
             print()
-            print("Best loss across evaluations:")
-            best_loss_trajectory = root_directory / "best_loss_trajectory.txt"
-            print(best_loss_trajectory.read_text(encoding="utf-8"))
+            print("Best objective_to_minimize across evaluations:")
+            best_objective_to_minimize_trajectory = (
+                root_directory / "best_objective_to_minimize_trajectory.txt"
+            )
+            print(best_objective_to_minimize_trajectory.read_text(encoding="utf-8"))
 
         if best_configs:
             print()
             print("Best configs and their losses across evaluations:")
             print(79 * "-")
-            best_loss_config = root_directory / "best_loss_with_config_trajectory.txt"
-            print(best_loss_config.read_text(encoding="utf-8"))
+            best_objective_to_minimize_config = (
+                root_directory / "best_objective_to_minimize_with_config_trajectory.txt"
+            )
+            print(best_objective_to_minimize_config.read_text(encoding="utf-8"))
 
         if all_configs:
             print()
             print("All evaluated configs and their losses:")
             print(79 * "-")
-            all_loss_config = root_directory / "all_losses_and_configs.txt"
-            print(all_loss_config.read_text(encoding="utf-8"))
+            all_objective_to_minimize_config = (
+                root_directory / "all_losses_and_configs.txt"
+            )
+            print(all_objective_to_minimize_config.read_text(encoding="utf-8"))
 
     return summary["previous_results"], summary["pending_configs"]
 
 
-def _initiate_summary_csv(root_directory: str | Path) -> tuple[Path, Path, Locker]:
+def _initiate_summary_csv(root_directory: str | Path) -> tuple[Path, Path, FileLocker]:
     """Initializes a summary CSV and an associated locker for file access control.
 
     Args:
@@ -181,7 +195,7 @@ def _initiate_summary_csv(root_directory: str | Path) -> tuple[Path, Path, Locke
     csv_config_data = summary_csv_directory / "config_data.csv"
     csv_run_data = summary_csv_directory / "run_status.csv"
 
-    csv_locker = Locker(summary_csv_directory / ".csv_lock")
+    csv_locker = FileLocker(summary_csv_directory / ".csv_lock", poll=2, timeout=600)
 
     return (
         csv_config_data,
@@ -264,7 +278,7 @@ def _get_dataframes_from_summary(
         "num_evaluated_configs": summary["num_evaluated_configs"],
         "num_pending_configs": summary["num_pending_configs"],
         "num_pending_configs_with_worker": summary["num_pending_configs_with_worker"],
-        "best_loss": summary["best_loss"],
+        "best_objective_to_minimize": summary["best_objective_to_minimize"],
         "best_config_id": summary["best_config_id"],
         "num_error": summary["num_error"],
     }
@@ -282,7 +296,7 @@ def _get_dataframes_from_summary(
 def _save_data_to_csv(
     config_data_file_path: Path,
     run_data_file_path: Path,
-    locker: Locker,
+    locker: FileLocker,
     config_data_df: pd.DataFrame,
     run_data_df: pd.DataFrame,
 ) -> None:
@@ -299,7 +313,7 @@ def _save_data_to_csv(
         config_data_df: The DataFrame containing configuration data.
         run_data_df: The DataFrame containing additional run data.
     """
-    with locker(poll=2):
+    with locker.lock():
         try:
             pending_configs = run_data_df.loc["num_pending_configs", "value"]
             pending_configs_with_worker = run_data_df.loc[
@@ -309,7 +323,7 @@ def _save_data_to_csv(
             # Represents the last worker
             if int(pending_configs) == 0 and int(pending_configs_with_worker) == 0:
                 config_data_df = config_data_df.sort_values(
-                    by="result.loss",
+                    by="result.objective_to_minimize",
                     ascending=True,
                 )
                 config_data_df.to_csv(config_data_file_path, index=False, mode="w")
@@ -330,7 +344,7 @@ def _save_data_to_csv(
                 # check if the current worker has more evaluated configs than the previous
                 if int(num_evaluated_configs_csv) < num_evaluated_configs_run.iloc[0]:
                     config_data_df = config_data_df.sort_values(
-                        by="result.loss",
+                        by="result.objective_to_minimize",
                         ascending=True,
                     )
                     config_data_df.to_csv(config_data_file_path, index=False, mode="w")
@@ -338,7 +352,7 @@ def _save_data_to_csv(
             # Represents the first worker to be evaluated
             else:
                 config_data_df = config_data_df.sort_values(
-                    by="result.loss",
+                    by="result.objective_to_minimize",
                     ascending=True,
                 )
                 config_data_df.to_csv(config_data_file_path, index=False, mode="w")
@@ -347,13 +361,18 @@ def _save_data_to_csv(
             raise RuntimeError(f"Error during data saving: {e}") from e
 
 
-def post_run_csv(root_directory: str | Path) -> None:
+def post_run_csv(root_directory: str | Path) -> tuple[Path, Path]:
     """Create CSV files summarizing the run data.
 
     Args:
         root_directory: The root directory of the NePS run.
+
+    Returns:
+        The paths to the configuration data CSV and the run data CSV.
     """
     csv_config_data, csv_rundata, csv_locker = _initiate_summary_csv(root_directory)
+    csv_config_data = Path(csv_config_data).absolute().resolve()
+    csv_rundata = Path(csv_rundata).absolute().resolve()
 
     df_config_data, df_run_data = _get_dataframes_from_summary(
         root_directory,
@@ -369,13 +388,4 @@ def post_run_csv(root_directory: str | Path) -> None:
         df_config_data,
         df_run_data,
     )
-
-
-# TODO(eddiebergman): This function name is misleading as it doesn't get anything.
-def get_run_summary_csv(root_directory: str | Path) -> None:
-    """Create CSV files summarizing the run data.
-
-    Args:
-        root_directory: The root directory of the NePS run.
-    """
-    post_run_csv(root_directory=root_directory)
+    return csv_config_data, csv_rundata
diff --git a/neps/utils/_locker.py b/neps/utils/_locker.py
deleted file mode 100644
index 0b9a92d58..000000000
--- a/neps/utils/_locker.py
+++ /dev/null
@@ -1,61 +0,0 @@
-from __future__ import annotations
-
-from contextlib import contextmanager
-from pathlib import Path
-from typing import IO, Iterator
-
-import portalocker as pl
-
-EXCLUSIVE_NONE_BLOCKING = pl.LOCK_EX | pl.LOCK_NB
-
-
-class Locker:
-    FailedToAcquireLock = pl.exceptions.LockException
-
-    def __init__(self, lock_path: Path):
-        self.lock_path = lock_path
-        self.lock_path.touch(exist_ok=True)
-
-    @contextmanager
-    def try_lock(self) -> Iterator[bool]:
-        try:
-            with self.acquire(fail_when_locked=True):
-                yield True
-        except self.FailedToAcquireLock:
-            yield False
-
-    def is_locked(self) -> bool:
-        with self.try_lock() as acquired_lock:
-            return not acquired_lock
-
-    @contextmanager
-    def __call__(
-        self,
-        poll: float = 1,
-        *,
-        timeout: float | None = None,
-        fail_when_locked: bool = False,
-    ) -> Iterator[IO]:
-        with pl.Lock(
-            self.lock_path,
-            check_interval=poll,
-            timeout=timeout,
-            flags=EXCLUSIVE_NONE_BLOCKING,
-            fail_when_locked=fail_when_locked,
-        ) as fh:
-            yield fh  # We almost never use it but nothing better to yield
-
-    @contextmanager
-    def acquire(
-        self,
-        poll: float = 1.0,
-        *,
-        timeout: float | None = None,
-        fail_when_locked: bool = False,
-    ) -> Iterator[IO]:
-        with self(
-            poll,
-            timeout=timeout,
-            fail_when_locked=fail_when_locked,
-        ) as fh:
-            yield fh
diff --git a/neps/utils/cli.py b/neps/utils/cli.py
index b5492c50b..901b2ac75 100644
--- a/neps/utils/cli.py
+++ b/neps/utils/cli.py
@@ -1,27 +1,177 @@
+# flake8: noqa
 """This module provides a command-line interface (CLI) for NePS."""
 
 from __future__ import annotations
-
+import warnings
+from typing import Tuple
+from datetime import timedelta, datetime
+import seaborn as sns
+import matplotlib.pyplot as plt
+import os
+import numpy as np
 import argparse
 import logging
+import yaml
 from pathlib import Path
-
+from typing import Optional, List
 import neps
 from neps.api import Default
+from neps.state.seed_snapshot import SeedSnapshot
+from neps.status.status import post_run_csv
+import pandas as pd
+from neps.utils.run_args import (
+    RUN_ARGS,
+    EVALUATE_PIPELINE,
+    ROOT_DIRECTORY,
+    POST_RUN_SUMMARY,
+    MAX_EVALUATIONS_PER_RUN,
+    MAX_EVALUATIONS_TOTAL,
+    MAX_COST_TOTAL,
+    PIPELINE_SPACE,
+    DEVELOPMENT_STAGE_ID,
+    TASK_ID,
+    SEARCHER,
+    SEARCHER_KWARGS,
+    IGNORE_ERROR,
+    OBJECTIVE_TO_MINIMIZE_VALUE_ON_ERROR,
+    COST_VALUE_ON_ERROR,
+    CONTINUE_UNTIL_MAX_EVALUATION_COMPLETED,
+    OVERWRITE_WORKING_DIRECTORY,
+    get_run_args_from_yaml,
+)
+from neps.optimizers.base_optimizer import BaseOptimizer
+from neps.utils.run_args import load_and_return_object
+from neps.state.neps_state import NePSState
+from neps.state.trial import Trial
+from neps.exceptions import TrialNotFoundError
+from neps.status.status import get_summary_dict
+from neps.api import _run_args
+from neps.state.optimizer import BudgetInfo, OptimizationState, OptimizerInfo
+
+# Suppress specific warnings
+warnings.filterwarnings("ignore", category=UserWarning, module="torch._utils")
+
+
+def validate_directory(path: Path) -> bool:
+    """
+    Validates whether the given path exists and is a directory.
+
+    Args:
+        path (Path): The path to validate.
+
+    Returns:
+        bool: True if valid, False otherwise.
+    """
+    if not path.exists():
+        print(f"Error: The directory '{path}' does not exist.")
+        return False
+    if not path.is_dir():
+        print(f"Error: The path '{path}' exists but is not a directory.")
+        return False
+    return True
+
+
+def get_root_directory(args: argparse.Namespace) -> Optional[Path]:
+    # Command-line argument handling
+    if args.root_directory:
+        root_dir = Path(args.root_directory)
+        if validate_directory(root_dir):
+            return root_dir
+        else:
+            return None
+
+    # Configuration file handling
+    config_path = Path("run_config.yaml").resolve()
+    if config_path.exists():
+        try:
+            with config_path.open("r") as file:
+                config = yaml.safe_load(file)
+        except yaml.YAMLError as e:
+            print(f"Error parsing '{config_path}': {e}")
+            return None
+
+        root_directory = config.get(ROOT_DIRECTORY)
+        if root_directory:
+            root_directory_path = Path(root_directory)
+            if validate_directory(root_directory_path):
+                return root_directory_path
+            else:
+                return None
+        else:
+            print(
+                "Error: The 'run_config.yaml' file exists but does not contain the "
+                "'root_directory' key."
+            )
+            return None
+    else:
+        print(
+            "Error: 'root_directory' must be provided as a command-line argument "
+            "or defined in 'run_config.yaml'."
+        )
+        return None
 
 
 def init_config(args: argparse.Namespace) -> None:
     """Creates a 'run_args' configuration YAML file template if it does not already
     exist.
     """
-    config_path = Path(args.config_path) if args.config_path else Path("config.yaml")
-    if not config_path.exists():
+    config_path = (
+        Path(args.config_path).resolve()
+        if args.config_path
+        else Path("run_config.yaml").resolve()
+    )
+
+    if args.database:
+        if config_path.exists():
+            run_args = get_run_args_from_yaml(config_path)
+            max_cost_total = run_args.get(MAX_COST_TOTAL)
+            # Create the optimizer
+            _, optimizer_info = load_optimizer(run_args)
+            if optimizer_info is None:
+                return
+
+            try:
+                directory = run_args.get(ROOT_DIRECTORY)
+                if directory is None:
+                    return
+                else:
+                    directory = Path(directory)
+                is_new = not directory.exists()
+                _ = NePSState.create_or_load(
+                    path=directory,
+                    optimizer_info=OptimizerInfo(optimizer_info),
+                    optimizer_state=OptimizationState(
+                        seed_snapshot=SeedSnapshot.new_capture(),
+                        budget=(
+                            BudgetInfo(max_cost_total=max_cost_total, used_cost_budget=0)
+                            if max_cost_total is not None
+                            else None
+                        ),
+                        shared_state=None,  # TODO: Unused for the time being...
+                    ),
+                )
+                if is_new:
+                    print("NePS state was successfully created.")
+                else:
+                    print("NePS state was already created.")
+            except Exception as e:
+                print(f"Error creating neps state: {e}")
+        else:
+            print(
+                f"{config_path} does not exist. Make sure that your configuration "
+                f"file already exists if you don't have specified your own path. "
+                f"Run 'neps init' to create run_config.yaml"
+            )
+
+    elif not config_path.exists():
         with config_path.open("w") as file:
-            file.write(
-                """# Add your NEPS configuration settings here
+            template = args.template if args.template else "basic"
+            if template == "basic":
+                file.write(
+                    """# Add your NEPS configuration settings here
 
-run_pipeline:
-  path: "path/to/your/run_pipeline.py"
+evaluate_pipeline:
+  path: "path/to/your/evaluate_pipeline.py"
   name: name_of_your_pipeline_function
 
 pipeline_space:
@@ -42,9 +192,56 @@ def init_config(args: argparse.Namespace) -> None:
 max_evaluations_total:
 overwrite_working_directory:
 """
-            )
+                )
+            elif template == "complete":
+                file.write(
+                    """# Full Configuration Template for NePS
+
+evaluate_pipeline:
+  path: path/to/your/evaluate_pipeline.py  # Path to the function file
+  name: example_pipeline              # Function name within the file
+
+pipeline_space:
+  learning_rate:
+    lower: 1e-5
+    upper: 1e-1
+    log: true
+  epochs:
+    lower: 5
+    upper: 20
+    is_fidelity: true
+  optimizer:
+    choices: [adam, sgd, adamw]
+  batch_size: 64
+
+root_directory: path/to/results       # Directory for result storage
+max_evaluations_total: 20             # Budget
+max_cost_total:
+
+# Debug and Monitoring
+overwrite_working_directory: false
+post_run_summary: true
+development_stage_id:
+task_id:
+
+# Parallelization Setup
+max_evaluations_per_run:
+continue_until_max_evaluation_completed: true
+
+# Error Handling
+objective_to_minimize_value_on_error:
+cost_value_on_error:
+ignore_errors:
+
+# Customization Options
+searcher: hyperband       # Internal key to select a NePS optimizer.
+
+# Hooks
+pre_load_hooks:
+"""
+                )
     else:
-        pass
+        print(f"Path {config_path} does already exist.")
 
 
 def parse_kv_pairs(kv_list: list[str]) -> dict:
@@ -83,44 +280,885 @@ def run_optimization(args: argparse.Namespace) -> None:
     """Collects arguments from the parser and runs the NePS optimization.
     Args: args (argparse.Namespace): Parsed command-line arguments.
     """
+    if isinstance(args.run_args, Default):
+        run_args = Path("run_config.yaml")
+    else:
+        run_args = args.run_args
+    if not isinstance(args.evaluate_pipeline, Default):
+        module_path, function_name = args.evaluate_pipeline.split(":")
+        evaluate_pipeline = load_and_return_object(
+            module_path, function_name, EVALUATE_PIPELINE
+        )
+
+    else:
+        evaluate_pipeline = args.evaluate_pipeline
+
     kwargs = {}
     if args.searcher_kwargs:
         kwargs = parse_kv_pairs(args.searcher_kwargs)  # convert kwargs
 
     # Collect arguments from args and prepare them for neps.run
     options = {
-        "run_args": args.run_args,
-        "pipeline_space": args.pipeline_space,
-        "root_directory": args.root_directory,
-        "overwrite_working_directory": args.overwrite_working_directory,
-        "post_run_summary": args.post_run_summary,
-        "development_stage_id": args.development_stage_id,
-        "task_id": args.task_id,
-        "max_evaluations_total": args.max_evaluations_total,
-        "max_evaluations_per_run": args.max_evaluations_per_run,
-        "continue_until_max_evaluation_completed": (
+        RUN_ARGS: run_args,
+        EVALUATE_PIPELINE: evaluate_pipeline,
+        PIPELINE_SPACE: args.pipeline_space,
+        ROOT_DIRECTORY: args.root_directory,
+        OVERWRITE_WORKING_DIRECTORY: args.overwrite_working_directory,
+        POST_RUN_SUMMARY: args.post_run_summary,
+        DEVELOPMENT_STAGE_ID: args.development_stage_id,
+        TASK_ID: args.task_id,
+        MAX_EVALUATIONS_TOTAL: args.max_evaluations_total,
+        MAX_EVALUATIONS_PER_RUN: args.max_evaluations_per_run,
+        CONTINUE_UNTIL_MAX_EVALUATION_COMPLETED: (
             args.continue_until_max_evaluation_completed
         ),
-        "max_cost_total": args.max_cost_total,
-        "ignore_errors": args.ignore_errors,
-        "loss_value_on_error": args.loss_value_on_error,
-        "cost_value_on_error": args.cost_value_on_error,
-        "searcher": args.searcher,
+        MAX_COST_TOTAL: args.max_cost_total,
+        IGNORE_ERROR: args.ignore_errors,
+        OBJECTIVE_TO_MINIMIZE_VALUE_ON_ERROR: args.objective_to_minimize_value_on_error,
+        COST_VALUE_ON_ERROR: args.cost_value_on_error,
+        SEARCHER: args.searcher,
         **kwargs,
     }
     logging.basicConfig(level=logging.INFO)
     neps.run(**options)
 
 
+def info_config(args: argparse.Namespace) -> None:
+    """Handles the info-config command by providing information based on directory
+    and id."""
+    directory_path = get_root_directory(args)
+    if directory_path is None:
+        return
+    config_id = args.id
+
+    neps_state = load_neps_state(directory_path)
+    if neps_state is None:
+        return
+    try:
+        trial = neps_state.unsafe_retry_get_trial_by_id(config_id)
+    except TrialNotFoundError:
+        print(f"No trial found with ID {config_id}.")
+        return
+
+    print("Trial Information:")
+    print(f"  Trial ID: {trial.metadata.id}")
+    print(f"  State: {trial.metadata.state}")
+    print(f"  Configurations:")
+    for key, value in trial.config.items():
+        print(f"    {key}: {value}")
+
+    print("\nMetadata:")
+    print(f"  Location: {trial.metadata.location}")
+    print(f"  Previous Trial ID: {trial.metadata.previous_trial_id}")
+    print(f"  Sampling Worker ID: {trial.metadata.sampling_worker_id}")
+    print(f"  Time Sampled: {convert_timestamp(trial.metadata.time_sampled)}")
+    print(f"  Evaluating Worker ID: {trial.metadata.evaluating_worker_id}")
+    print(f"  Evaluation Duration: {format_duration(trial.metadata.evaluation_duration)}")
+    print(f"  Time Started: {convert_timestamp(trial.metadata.time_started)}")
+    print(f"  Time End: {convert_timestamp(trial.metadata.time_end)}")
+
+    if trial.report is not None:
+        print("\nReport:")
+        print(f"  Objective_to_minimize: {trial.report.objective_to_minimize}")
+        print(f"  Cost: {trial.report.cost}")
+        print(f"  Reported As: {trial.report.reported_as}")
+        error = trial.report.err
+        if error is not None:
+            print(f"  Error Type: {type(error).__name__}")
+            print(f"  Error Message: {str(error)}")
+            print(f"  Traceback:")
+            print(f"    {trial.report.tb}")
+    else:
+        print("No report available.")
+
+
+def load_neps_errors(args: argparse.Namespace) -> None:
+    """Handles the 'errors' command by loading errors from the neps_state."""
+    directory_path = get_root_directory(args)
+    if directory_path is None:
+        return
+
+    neps_state = load_neps_state(directory_path)
+    if neps_state is None:
+        return
+    errors = neps_state.lock_and_get_errors()
+
+    if not errors.errs:
+        print("No errors found.")
+        return
+
+    # Print out the errors in a human-readable format
+    print(f"Loaded Errors from directory: {directory_path}\n")
+
+    for error in errors.errs:
+        print(f"Error in Trial ID: {error.trial_id}")
+        print(f"  Worker ID: {error.worker_id}")
+        print(f"  Error Type: {error.err_type}")
+        print(f"  Error Message: {error.err}")
+        print(f"  Traceback:")
+        print(f"{error.tb}")
+        print("\n" + "-" * 50 + "\n")
+
+
+def sample_config(args: argparse.Namespace) -> None:
+    """Handles the sample-config command which samples configurations from the NePS
+    state."""
+    # Load run_args from the provided path or default to run_config.yaml
+    if args.run_args:
+        run_args_path = Path(args.run_args)
+    else:
+        run_args_path = Path("run_config.yaml")
+
+    if not run_args_path.exists():
+        print(f"Error: run_args file {run_args_path} does not exist.")
+        return
+
+    run_args = get_run_args_from_yaml(run_args_path)
+
+    # Get root_directory from the run_args
+    root_directory = run_args.get(ROOT_DIRECTORY)
+    if not root_directory:
+        print("Error: 'root_directory' is not specified in the run_args file.")
+        return
+
+    root_directory = Path(root_directory)
+    if not root_directory.exists():
+        print(f"Error: The directory {root_directory} does not exist.")
+        return
+
+    neps_state = load_neps_state(root_directory)
+    if neps_state is None:
+        return
+
+    # Get the worker_id and number_of_configs from arguments
+    worker_id = args.worker_id
+    num_configs = args.number_of_configs if args.number_of_configs else 1
+
+    optimizer, _ = load_optimizer(run_args)
+    if optimizer is None:
+        return
+
+    # Sample trials
+    for _ in range(num_configs):
+        try:
+            trial = neps_state.lock_and_sample_trial(optimizer, worker_id=worker_id)
+        except Exception as e:
+            print(f"Error during configuration sampling: {e}")
+            continue  # Skip to the next iteration
+
+        print(f"Sampled configuration with Trial ID: {trial.id}")
+        print(f"Location: {trial.metadata.location}")
+        print("Configuration:")
+        for key, value in trial.config.items():
+            print(f"  {key}: {value}")
+        print("\n")
+
+
+def convert_timestamp(timestamp: float | None) -> str:
+    """Convert a UNIX timestamp to a human-readable datetime string."""
+    if timestamp is None:
+        return "None"
+    return datetime.fromtimestamp(timestamp).strftime("%Y-%m-%d %H:%M:%S")
+
+
+def format_duration(seconds: float | None) -> str:
+    """Convert duration in seconds to a h:min:sec format."""
+    if seconds is None:
+        return "None"
+    duration = str(timedelta(seconds=seconds))
+    # Remove milliseconds for alignment
+    if "." in duration:
+        duration = duration.split(".")[0]
+    return duration
+
+
+def compute_duration(start_time: float) -> str:
+    """Compute duration from start_time to current time."""
+    return format_duration(datetime.now().timestamp() - start_time)
+
+
+def status(args: argparse.Namespace) -> None:
+    """Handles the status command, providing a summary of the NEPS run."""
+    # Get the root_directory from args or load it from run_config.yaml
+    directory_path = get_root_directory(args)
+    if directory_path is None:
+        return
+
+    neps_state = load_neps_state(directory_path)
+    if neps_state is None:
+        return
+
+    summary = get_summary_dict(directory_path, add_details=True)
+
+    # Calculate the number of trials in different states
+    trials = neps_state.lock_and_read_trials()
+    evaluating_trials_count = sum(
+        1 for trial in trials.values() if trial.metadata.state == Trial.State.EVALUATING
+    )
+    pending_trials_count = summary["num_pending_configs"]
+    succeeded_trials_count = summary["num_evaluated_configs"] - summary["num_error"]
+    failed_trials_count = summary["num_error"]
+
+    # Print summary
+    print("NePS Status:")
+    print("-----------------------------")
+    print(f"Optimizer: {neps_state.lock_and_get_optimizer_info().info['searcher_alg']}")
+    print(f"Succeeded Trials: {succeeded_trials_count}")
+    print(f"Failed Trials (Errors): {failed_trials_count}")
+    print(f"Active Trials: {evaluating_trials_count}")
+    print(f"Pending Trials: {pending_trials_count}")
+    print(f"Best Objective_to_minimize Achieved: {summary['best_objective_to_minimize']}")
+
+    print("\nLatest Trials:")
+    print("-----------------------------")
+
+    # Retrieve and sort the trials by time_sampled
+    sorted_trials = sorted(
+        trials.values(), key=lambda t: t.metadata.time_sampled, reverse=True
+    )
+
+    # Filter trials based on state
+    if args.pending:
+        filtered_trials = [
+            trial for trial in sorted_trials if trial.metadata.state.name == "PENDING"
+        ]
+    elif args.evaluating:
+        filtered_trials = [
+            trial for trial in sorted_trials if trial.metadata.state.name == "EVALUATING"
+        ]
+    elif args.succeeded:
+        filtered_trials = [
+            trial for trial in sorted_trials if trial.metadata.state.name == "SUCCESS"
+        ]
+    else:
+        filtered_trials = sorted_trials[:7]
+
+    # Define column headers with fixed width
+    header_format = "{:<20} {:<10} {:<10} {:<40} {:<12} {:<10}"
+    row_format = "{:<20} {:<10} {:<10} {:<40} {:<12} {:<10}"
+
+    # Print the header
+    print(
+        header_format.format(
+            "Sampled Time",
+            "Duration",
+            "Trial ID",
+            "Worker ID",
+            "State",
+            "Objective_to_minimize",
+        )
+    )
+
+    # Print the details of the filtered trials
+    for trial in filtered_trials:
+        time_sampled = convert_timestamp(trial.metadata.time_sampled)
+        if trial.metadata.state.name in ["PENDING", "EVALUATING"]:
+            duration = compute_duration(trial.metadata.time_sampled)
+        else:
+            duration = (
+                format_duration(trial.metadata.evaluation_duration)
+                if trial.metadata.evaluation_duration
+                else "N/A"
+            )
+        trial_id = trial.id
+        worker_id = trial.metadata.sampling_worker_id
+        state = trial.metadata.state.name
+        objective_to_minimize = (
+            f"{trial.report.objective_to_minimize:.6f}"
+            if (trial.report and trial.report.objective_to_minimize is not None)
+            else "N/A"
+        )
+
+        print(
+            row_format.format(
+                time_sampled, duration, trial_id, worker_id, state, objective_to_minimize
+            )
+        )
+
+    # If no specific filter is applied, print the best trial and optimizer info
+    if not args.pending and not args.evaluating and not args.succeeded:
+        if summary["best_config_id"] is not None:
+            print("\nBest Trial:")
+            print("-----------------------------")
+            print(f"ID: {summary['best_config_id']}")
+            print(f"Loss: {summary['best_objective_to_minimize']}")
+            print("Config:")
+            for key, value in summary["best_config"].items():
+                print(f"  {key}: {value}")
+
+            print(
+                f"\nMore details: neps info-config {summary['best_config_id']} "
+                f"(use --root-directory if not using run_config.yaml)"
+            )
+        else:
+            print("\nBest Trial:")
+            print("-----------------------------")
+            print("\nNo successful trial found.")
+
+        # Display optimizer information
+        optimizer_info = neps_state.lock_and_get_optimizer_info().info
+        searcher_name = optimizer_info.get("searcher_name", "N/A")
+        searcher_alg = optimizer_info.get("searcher_alg", "N/A")
+        searcher_args = optimizer_info.get("searcher_args", {})
+
+        print("\nOptimizer Information:")
+        print("-----------------------------")
+        print(f"Name: {searcher_name}")
+        print(f"Algorithm: {searcher_alg}")
+        print("Parameter:")
+        for arg, value in searcher_args.items():
+            print(f"  {arg}: {value}")
+
+        print("-----------------------------")
+
+
+def results(args: argparse.Namespace) -> None:
+    """Handles the 'results' command by displaying incumbents, optionally plotting,
+    and dumping results to files based on the specified options."""
+    directory_path = get_root_directory(args)
+    if directory_path is None:
+        return
+
+    # Attempt to generate the summary CSV
+    try:
+        csv_config_data_path, _ = post_run_csv(directory_path)
+    except Exception as e:
+        print(f"Error generating summary CSV: {e}")
+        return
+
+    summary_csv_dir = csv_config_data_path.parent  # 'summary_csv' directory
+
+    # Load NePS state
+    neps_state = load_neps_state(directory_path)
+    if neps_state is None:
+        return
+
+    def sort_trial_id(trial_id: str) -> List[int]:
+        parts = trial_id.split("_")  # Split the ID by '_'
+        # Convert each part to an integer for proper numeric sorting
+        return [int(part) for part in parts]
+
+    trials = neps_state.lock_and_read_trials()
+    sorted_trials = sorted(trials.values(), key=lambda x: sort_trial_id(x.id))
+
+    # Compute incumbents
+    incumbents = compute_incumbents(sorted_trials)
+    incumbents_ids = [trial.id for trial in incumbents]
+
+    # Handle Dump Options
+    if args.dump_all_configs or args.dump_incumbents:
+        if args.dump_all_configs:
+            dump_all_configs(csv_config_data_path, summary_csv_dir, args.dump_all_configs)
+            return
+
+        if args.dump_incumbents:
+            dump_incumbents(
+                csv_config_data_path,
+                summary_csv_dir,
+                args.dump_incumbents,
+                incumbents_ids,
+            )
+            return
+
+    # Display Results
+    display_results(directory_path, incumbents)
+
+    # Handle Plotting
+    if args.plot:
+        plot_path = plot_incumbents(sorted_trials, incumbents, summary_csv_dir)
+        print(f"Plot saved to '{plot_path}'.")
+
+
+def load_neps_state(directory_path: Path) -> Optional[NePSState]:
+    """Load the NePS state with error handling."""
+    try:
+        return NePSState.create_or_load(directory_path, load_only=True)
+    except Exception as e:
+        print(f"Unexpected error loading NePS state: {e}")
+    return None
+
+
+def compute_incumbents(sorted_trials: List[Trial]) -> List[Trial]:
+    """Compute the list of incumbent trials based on the best objective_to_minimize."""
+    best_objective_to_minimize = float("inf")
+    incumbents = []
+    for trial in sorted_trials:
+        if (
+            trial.report is not None
+            and trial.report.objective_to_minimize is not None
+            and trial.report.objective_to_minimize < best_objective_to_minimize
+        ):
+            best_objective_to_minimize = trial.report.objective_to_minimize
+            incumbents.append(trial)
+    return incumbents[::-1]  # Reverse for most recent first
+
+
+def dump_all_configs(
+    csv_config_data_path: Path, summary_csv_dir: Path, dump_format: str
+) -> None:
+    """Dump all configurations to the specified format."""
+    dump_format = dump_format.lower()
+    supported_formats = ["csv", "json", "parquet"]
+    if dump_format not in supported_formats:
+        print(
+            f"Unsupported dump format: '{dump_format}'. "
+            f"Supported formats are: {supported_formats}."
+        )
+        return
+
+    base_name = csv_config_data_path.stem  # 'config_data'
+
+    if dump_format == "csv":
+        # CSV is already available
+        print(
+            f"All trials successfully dumped to '{summary_csv_dir}/{base_name}.{dump_format}'."
+        )
+    else:
+        # Define output file path with desired extension
+        output_file_name = f"{base_name}.{dump_format}"
+        output_file_path = summary_csv_dir / output_file_name
+
+        try:
+            # Read the existing CSV into DataFrame
+            df = pd.read_csv(csv_config_data_path)
+
+            # Save to the desired format
+            if dump_format == "json":
+                df.to_json(output_file_path, orient="records", indent=4)
+            elif dump_format == "parquet":
+                df.to_parquet(output_file_path, index=False)
+
+            print(f"All trials successfully dumped to '{output_file_path}'.")
+        except Exception as e:
+            print(f"Error dumping all trials to '{dump_format}': {e}")
+
+
+def dump_incumbents(
+    csv_config_data_path: Path,
+    summary_csv_dir: Path,
+    dump_format: str,
+    incumbents_ids: List[str],
+) -> None:
+    """Dump incumbent trials to the specified format."""
+    dump_format = dump_format.lower()
+    supported_formats = ["csv", "json", "parquet"]
+    if dump_format not in supported_formats:
+        print(
+            f"Unsupported dump format: '{dump_format}'. Supported formats are: {supported_formats}."
+        )
+        return
+
+    base_name = "incumbents"  # Name for incumbents file
+
+    if not incumbents_ids:
+        print("No incumbent trials found to dump.")
+        return
+
+    try:
+        # Read the existing CSV into DataFrame
+        df = pd.read_csv(csv_config_data_path)
+
+        # Filter DataFrame for incumbent IDs
+        df_incumbents = df[df["config_id"].isin(incumbents_ids)]
+
+        if df_incumbents.empty:
+            print("No incumbent trials found in the summary CSV.")
+            return
+
+        # Define output file path with desired extension
+        output_file_name = f"{base_name}.{dump_format}"
+        output_file_path = summary_csv_dir / output_file_name
+
+        # Save to the desired format
+        if dump_format == "csv":
+            df_incumbents.to_csv(output_file_path, index=False)
+        elif dump_format == "json":
+            df_incumbents.to_json(output_file_path, orient="records", indent=4)
+        elif dump_format == "parquet":
+            df_incumbents.to_parquet(output_file_path, index=False)
+
+        print(f"Incumbent trials successfully dumped to '{output_file_path}'.")
+    except Exception as e:
+        print(f"Error dumping incumbents to '{dump_format}': {e}")
+
+
+def display_results(directory_path: Path, incumbents: List[Trial]) -> None:
+    """Display the results of the NePS run."""
+    print(f"Results for NePS run: {directory_path}")
+    print("--------------------")
+    print("All Incumbent Trials:")
+    header = f"{'ID':<6} {'Loss':<12} {'Config':<60}"
+    print(header)
+    print("-" * len(header))
+    if incumbents:
+        for trial in incumbents:
+            if (
+                trial.report is not None
+                and trial.report.objective_to_minimize is not None
+            ):
+                config = ", ".join(f"{k}: {v}" for k, v in trial.config.items())
+                print(
+                    f"{trial.id:<6} {trial.report.objective_to_minimize:<12.6f} {config:<60}"
+                )
+            else:
+                print(f"Trial {trial.id} has no valid objective_to_minimize.")
+    else:
+        print("No Incumbent Trials found.")
+
+
+def plot_incumbents(
+    all_trials: List[Trial], incumbents: List[Trial], directory_path: Path
+) -> str:
+    """Plot the evolution of incumbent trials over the total number of trials."""
+    id_to_index = {trial.id: idx + 1 for idx, trial in enumerate(all_trials)}
+
+    # Collect data for plotting
+    x_values = [id_to_index[incumbent.id] for incumbent in incumbents]
+    y_values = [
+        incumbent.report.objective_to_minimize
+        for incumbent in incumbents
+        if incumbent.report is not None
+        and incumbent.report.objective_to_minimize is not None
+    ]
+
+    plt.figure(figsize=(12, 6))
+    sns.lineplot(
+        x=x_values,
+        y=y_values,
+        marker="o",
+        linestyle="-",
+        markersize=8,
+        color="dodgerblue",
+    )
+    plt.xlabel("Number of Trials")
+    plt.ylabel("Loss")
+    plt.title("Evolution of Incumbents Over Trials")
+
+    # Dynamically set x-axis ticks based on the number of trials
+    num_trials = len(all_trials)
+    if num_trials < 20:
+        tick_spacing = 1  # Every trial is labeled if fewer than 20 trials
+    else:
+        tick_spacing = max(
+            5, round(num_trials / 10 / 5) * 5
+        )  # Round to nearest multiple of 5
+
+    ticks = np.arange(0, num_trials + 1, tick_spacing)
+    ticks[0] = 1
+    plt.xticks(ticks)
+
+    sns.set_style("whitegrid")
+    plt.grid(True, linestyle="--", linewidth=0.5)
+    plt.tight_layout()
+
+    # Save the figure
+    plot_file_name = "incumbents_evolution.png"
+    plot_path = os.path.join(directory_path, plot_file_name)
+    plt.savefig(plot_path)
+    plt.close()
+
+    return plot_path
+
+
+def print_help(args: Optional[argparse.Namespace] = None) -> None:
+    """Prints help information for the NEPS CLI."""
+    help_text = """
+Usage: neps [COMMAND] [OPTIONS]
+
+Available Commands:
+-------------------
+
+neps init [OPTIONS]
+    Generates a 'run_args' YAML template file.
+    Options:
+    --config-path <path/to/config.yaml> (Optional: Specify the path for the config
+    file. Default is run_config.yaml)
+    --template [basic|complete] (Optional: Choose between a basic or complete template.)
+    --database (Optional: Creates a NEPS state. Requires an existing config.yaml.)
+
+neps run [OPTIONS]
+    Runs a neural pipeline search.
+    Options:
+    --run-args <path_to_run_args> (Path to the YAML configuration file.)
+    --run-pipeline <path_to_module:function_name> (Path and function for the pipeline.)
+    --pipeline-space <path_to_yaml> (Path to the YAML defining the search space.)
+    --root-directory <path> (Optional: Directory for saving progress and
+    synchronization. Default is 'root_directory' from run_config.yaml if not provided.)
+    --overwrite-working-directory (Deletes the working directory at the start of the run.)
+    --development-stage-id <id> (Identifier for the development stage.)
+    --task-id <id> (Identifier for the task.)
+    --post-run-summary/--no-post-run-summary (Toggle summary after running.)
+    --max-evaluations-total <int> (Total number of evaluations to run.)
+    --max-evaluations-per-run <int> (Max evaluations per run call.)
+    --continue-until-max-evaluation-completed (Continue until max evaluations are completed.)
+    --max-cost-total <float> (Max cost before halting new evaluations.)
+    --ignore-errors (Ignore errors during optimization.)
+    --objective_to_minimize-value-on-error <float> (Assumed objective_to_minimize value on error.)
+    --cost-value-on-error <float> (Assumed cost value on error.)
+    --searcher <key> (Searcher algorithm key for optimization.)
+    --searcher-kwargs <key=value>... (Additional kwargs for the searcher.)
+
+neps info-config <id> [OPTIONS]
+    Provides detailed information about a specific configuration by its ID.
+    Options:
+    --root-directory <path> (Optional: Path to your root_directory. Default is
+    'root_directory' from run_config.yaml if not provided.)
+
+neps errors [OPTIONS]
+    Lists all errors from the specified NePS run.
+    Options:
+    --root-directory <path> (Optional: Path to your root_directory. Default is
+    'root_directory' from run_config.yaml if not provided.)
+
+neps sample-config [OPTIONS]
+    Sample a configuration from the existing NePS state.
+    Options:
+    --root-directory <path> (Optional: Path to your root_directory. Default is
+    'root_directory' from run_config.yaml if not provided.)
+
+neps status [OPTIONS]
+    Check the status of the NePS run.
+    Options:
+    --root-directory <path> (Optional: Path to your root_directory. Default is
+    'root_directory' from run_config.yaml if not provided.)
+    --pending (Show only pending trials.)
+    --evaluating (Show only evaluating trials.)
+    --succeeded (Show only succeeded trials.)
+
+neps results [OPTIONS]
+    Display results of the NePS run.
+    Options:
+    --root-directory <path> (Optional: Path to your root_directory. Defaults is
+    'root_directory' from run_config.yaml if not provided.)
+    --plot (Plot the results if set.)
+
+neps help
+    Displays this help message.
+    """
+    print(help_text)
+
+
+def generate_markdown_from_parser(parser: argparse.ArgumentParser, filename: str) -> None:
+    lines = []
+
+    # Add the general parser description
+    if parser.description:
+        lines.append(f"# {parser.description}")
+        lines.append("\n")
+
+    # Extract subparsers (if they exist)
+    subcommands = {}
+    for action in parser._actions:
+        if isinstance(action, argparse._SubParsersAction):
+            for cmd, subparser in action.choices.items():
+                subcommands[cmd] = subparser
+
+    # Order subcommands: init, run, status, help (always last), followed by any others
+    order = ["init", "run", "status"]
+    sorted_subcommands = [cmd for cmd in order if cmd in subcommands]
+    sorted_subcommands += [
+        cmd for cmd in subcommands if cmd not in order and cmd != "help"
+    ]
+    if "help" in subcommands:
+        sorted_subcommands.append("help")
+
+    # Iterate through sorted subcommands and generate the documentation
+    for cmd in sorted_subcommands:
+        subparser = subcommands[cmd]
+
+        # Command header
+        lines.append(f"## **`{cmd}` Command**")
+        lines.append("\n")
+
+        # Command description
+        if subparser.description:
+            lines.append(f"{subparser.description}")
+            lines.append("\n")
+
+        # Extract and list options (Required and Optional)
+        lines.append("**Arguments:**")
+        lines.append("\n")
+
+        required_args = []
+        optional_args = []
+
+        for sub_action in subparser._actions:
+            option_strings = ", ".join(sub_action.option_strings)
+            option_help = sub_action.help or "No description available."
+            # Categorize based on whether the argument is required
+            if sub_action.required:
+                required_args.append(f"- `{option_strings}` (Required): {option_help}")
+            else:
+                optional_args.append(f"- `{option_strings}` (Optional): {option_help}")
+
+        # Add Required arguments section
+        if required_args:
+            lines.extend(required_args)
+            lines.append("\n")
+
+        # Add Optional arguments section
+        if optional_args:
+            lines.extend(optional_args)
+            lines.append("\n")
+
+        # Add Example Usage
+        lines.append(f"**Example Usage:**")
+        lines.append("\n")
+        lines.append("```bash")
+        lines.append(f"neps {cmd} --help")
+        lines.append("```")
+        lines.append("\n")
+
+    # Write the lines to the specified markdown file
+    with open(filename, "w") as f:
+        f.write("\n".join(lines))
+
+
+def handle_report_config(args: argparse.Namespace) -> None:
+    """Handles the report-config command which updates reports for
+    trials in the NePS state."""
+    # Load run_args from the provided path or default to run_config.yaml
+    if args.run_args:
+        run_args_path = Path(args.run_args)
+    else:
+        run_args_path = Path("run_config.yaml")
+    if not run_args_path.exists():
+        print(f"Error: run_args file {run_args_path} does not exist.")
+        return
+
+    run_args = get_run_args_from_yaml(run_args_path)
+
+    # Get root_directory from run_args
+    root_directory = run_args.get("root_directory")
+    if not root_directory:
+        print("Error: 'root_directory' is not specified in the run_args file.")
+        return
+
+    root_directory = Path(root_directory)
+    if not root_directory.exists():
+        print(f"Error: The directory {root_directory} does not exist.")
+        return
+
+    neps_state = load_neps_state(root_directory)
+    if neps_state is None:
+        return
+
+    # Load the existing trial by ID
+    try:
+        trial = neps_state.unsafe_retry_get_trial_by_id(args.trial_id)
+        if not trial:
+            print(f"No trial found with ID {args.trial_id}")
+            return
+    except Exception as e:
+        print(f"Error fetching trial with ID {args.trial_id}: {e}")
+        return None
+
+    # Update state of the trial and create report
+    report = trial.set_complete(
+        report_as=args.reported_as,
+        time_end=args.time_end,
+        objective_to_minimize=args.objective_to_minimize,
+        cost=args.cost,
+        learning_curve=args.learning_curve,
+        err=Exception(args.err) if args.err else None,
+        tb=args.tb,
+        evaluation_duration=args.duration,
+        extra={},
+    )
+
+    # Update NePS state
+    try:
+        neps_state._report_trial_evaluation(
+            trial=trial, report=report, worker_id=args.worker_id
+        )
+    except Exception as e:
+        print(f"Error updating the report for trial {args.trial_id}: {e}")
+        return None
+
+    print(f"Report for trial ID {trial.metadata.id} has been successfully updated.")
+
+    print("\n--- Report Summary ---")
+    print(f"Trial ID: {trial.metadata.id}")
+    print(f"Reported As: {report.reported_as}")
+    print(f"Time Ended: {convert_timestamp(trial.metadata.time_end)}")
+    print(
+        f"Loss: {report.objective_to_minimize if report.objective_to_minimize is not None else 'N/A'}"
+    )
+    print(f"Cost: {report.cost if report.cost is not None else 'N/A'}")
+    print(f"Evaluation Duration: {format_duration(report.evaluation_duration)}")
+
+    if report.learning_curve:
+        print(f"Learning Curve: {' '.join(map(str, report.learning_curve))}")
+    else:
+        print("Learning Curve: N/A")
+
+    if report.err:
+        print(f"Error Type: {type(report.err).__name__}")
+        print(f"Error Message: {str(report.err)}")
+        print("Traceback:")
+        print(report.tb if report.tb else "N/A")
+    else:
+        print("Error: None")
+
+    print("----------------------\n")
+
+
+def load_optimizer(run_args: dict) -> Tuple[Optional[BaseOptimizer], Optional[dict]]:
+    """Create an optimizer"""
+    try:
+        searcher_info = {
+            "searcher_name": "",
+            "searcher_alg": "",
+            "searcher_selection": "",
+            "neps_decision_tree": True,
+            "searcher_args": {},
+        }
+
+        # Call _run_args() to create the optimizer
+        optimizer, searcher_info = _run_args(
+            searcher_info=searcher_info,
+            pipeline_space=run_args.get(PIPELINE_SPACE),
+            max_cost_total=run_args.get(MAX_COST_TOTAL, None),
+            ignore_errors=run_args.get(IGNORE_ERROR, False),
+            objective_to_minimize_value_on_error=run_args.get(
+                OBJECTIVE_TO_MINIMIZE_VALUE_ON_ERROR, None
+            ),
+            cost_value_on_error=run_args.get(COST_VALUE_ON_ERROR, None),
+            searcher=run_args.get(SEARCHER, "default"),
+            **run_args.get(SEARCHER_KWARGS, {}),
+        )
+        return optimizer, searcher_info
+    except Exception as e:
+        print(f"Error creating optimizer: {e}")
+        return None, None
+
+
+def parse_time_end(time_str: str) -> float:
+    """Parses a UNIX timestamp or a human-readable time string
+    and returns a UNIX timestamp."""
+    try:
+        # First, try to interpret the input as a UNIX timestamp
+        return float(time_str)
+    except ValueError:
+        pass
+
+    try:
+        # If that fails, try to interpret it as a human-readable datetime
+        # string (YYYY-MM-DD HH:MM:SS)
+        dt = datetime.strptime(time_str, "%Y-%m-%d %H:%M:%S")
+        return dt.timestamp()  # Convert to UNIX timestamp (float)
+    except ValueError:
+        raise argparse.ArgumentTypeError(
+            f"Invalid time format: '{time_str}'. "
+            f"Use UNIX timestamp or 'YYYY-MM-DD HH:MM:SS'."
+        )
+
+
 def main() -> None:
     """CLI entry point.
 
     This function sets up the command-line interface (CLI) for NePS using argparse.
     It defines the available subcommands and their respective arguments.
-
-    Available commands:
-        - init: Generates a 'run_args' YAML template file.
-        - run: Runs the optimization with specified configuration.
     """
     parser = argparse.ArgumentParser(description="NePS Command Line Interface")
     subparsers = parser.add_subparsers(
@@ -128,13 +1166,26 @@ def main() -> None:
     )
 
     # Subparser for "init" command
-    parser_init = subparsers.add_parser("init", help="Generate 'run_args' YAML file")
+    parser_init = subparsers.add_parser("init", help="Generate 'run_args' " "YAML file")
     parser_init.add_argument(
         "--config-path",
         type=str,
         default=None,
         help="Optional custom path for generating the configuration file. "
-        "Default is 'config.yaml'.",
+        "Default is 'run_config.yaml'.",
+    )
+    parser_init.add_argument(
+        "--template",
+        type=str,
+        choices=["basic", "complete"],
+        default="basic",
+        help="Optional, options between different templates. Required configs(basic) vs "
+        "all neps configs (complete)",
+    )
+    parser_init.add_argument(
+        "--database",
+        action="store_true",
+        help="If set, creates a NEPS state. Requires an existing config.yaml.",
     )
     parser_init.set_defaults(func=init_config)
 
@@ -142,11 +1193,21 @@ def main() -> None:
     parser_run = subparsers.add_parser("run", help="Run a neural pipeline search.")
     # Adding arguments to the 'run' subparser with defaults
     parser_run.add_argument(
-        "run_args",
+        "--run-args",
         type=str,
-        help="Path to the YAML configuration file. For CLI usage this file must include "
-        "the 'run_pipeline' settings.",
+        help="Path to the YAML configuration file.",
+        default=Default(None),
     )
+    parser_run.add_argument(
+        "--run-pipeline",
+        type=str,
+        help="Optional: Provide the path to a Python file and a function name separated "
+        "by a colon, e.g., 'path/to/module.py:function_name'. "
+        "If provided, it overrides the evaluate_pipeline setting from the YAML "
+        "configuration.",
+        default=Default(None),
+    )
+
     parser_run.add_argument(
         "--pipeline-space",
         type=str,
@@ -167,7 +1228,7 @@ def main() -> None:
         action="store_true",
         default=Default(False),  # noqa: FBT003
         help="If set, deletes the working directory at the start of the run. "
-        "This is useful, for example, when debugging a run_pipeline function. "
+        "This is useful, for example, when debugging a evaluate_pipeline function. "
         "(default: %(default)s)",
     )
     parser_run.add_argument(
@@ -223,8 +1284,8 @@ def main() -> None:
         type=float,
         default=Default(None),
         help="No new evaluations will start when this cost is exceeded. Requires "
-        "returning a cost in the run_pipeline function, e.g., `return dict("
-        "loss=loss, cost=cost)`. (default: %(default)s)",
+        "returning a cost in the evaluate_pipeline function, e.g., `return dict("
+        "objective_to_minimize=objective_to_minimize, cost=cost)`. (default: %(default)s)",
     )
     parser_run.add_argument(
         "--ignore-errors",
@@ -234,7 +1295,7 @@ def main() -> None:
         "default)s)",
     )
     parser_run.add_argument(
-        "--loss-value-on-error",
+        "--objective_to_minimize-value-on-error",
         type=float,
         default=Default(None),
         help="Loss value to assume on error. (default: %(default)s)",
@@ -263,6 +1324,161 @@ def main() -> None:
 
     parser_run.set_defaults(func=run_optimization)
 
+    # Subparser for "info-config" command
+    parser_info_config = subparsers.add_parser(
+        "info-config", help="Provides information about " "specific config."
+    )
+    parser_info_config.add_argument(
+        "id", type=str, help="The configuration ID to be used."
+    )
+    parser_info_config.add_argument(
+        "--root-directory",
+        type=str,
+        help="Optional: The path to your root_directory. If not provided, "
+        "it will be loaded from run_config.yaml.",
+    )
+    parser_info_config.set_defaults(func=info_config)
+
+    # Subparser for "errors" command
+    parser_errors = subparsers.add_parser("errors", help="List all errors.")
+    parser_errors.add_argument(
+        "--root-directory",
+        type=str,
+        help="Optional: The path to your "
+        "root_directory. If not provided, it will be "
+        "loaded from run_config.yaml.",
+    )
+    parser_errors.set_defaults(func=load_neps_errors)
+
+    # Subparser for "sample-config" command
+    parser_sample_config = subparsers.add_parser(
+        "sample-config", help="Sample configurations from the existing NePS state."
+    )
+    parser_sample_config.add_argument(
+        "--worker-id",
+        type=str,
+        default="cli",
+        help="The worker ID for which the configuration is being sampled.",
+    )
+    parser_sample_config.add_argument(
+        "--run-args",
+        type=str,
+        help="Optional: Path to the YAML configuration file.",
+    )
+    parser_sample_config.add_argument(
+        "--number-of-configs",
+        type=int,
+        default=1,
+        help="Optional: Number of configurations to sample (default: 1).",
+    )
+    parser_sample_config.set_defaults(func=sample_config)
+
+    report_parser = subparsers.add_parser(
+        "report-config", help="Report of a specific trial"
+    )
+    report_parser.add_argument("trial_id", type=str, help="ID of the trial to report")
+    report_parser.add_argument(
+        "reported_as",
+        type=str,
+        choices=["success", "failed", "crashed"],
+        help="Outcome of the trial",
+    )
+    report_parser.add_argument(
+        "--worker-id",
+        type=str,
+        default="cli",
+        help="The worker ID for which the configuration is being sampled.",
+    )
+    report_parser.add_argument(
+        "--objective_to_minimize", type=float, help="Loss value of the trial"
+    )
+    report_parser.add_argument(
+        "--run-args", type=str, help="Path to the YAML file containing run configurations"
+    )
+    report_parser.add_argument(
+        "--cost", type=float, help="Cost value of the trial (optional)"
+    )
+    report_parser.add_argument(
+        "--learning-curve",
+        type=float,
+        nargs="+",
+        help="Learning curve as a list of floats (optional), provided like this "
+        "--learning-curve 0.9 0.3 0.1",
+    )
+    report_parser.add_argument(
+        "--duration", type=float, help="Duration of the evaluation in sec (optional)"
+    )
+    report_parser.add_argument("--err", type=str, help="Error message if any (optional)")
+    report_parser.add_argument(
+        "--tb", type=str, help="Traceback information if any (optional)"
+    )
+    report_parser.add_argument(
+        "--time-end",
+        type=parse_time_end,  # Using the custom parser function
+        help="The time the trial ended as either a "
+        "UNIX timestamp (float) or in 'YYYY-MM-DD HH:MM:SS' format",
+    )
+    report_parser.set_defaults(func=handle_report_config)
+
+    # Subparser for "status" command
+    parser_status = subparsers.add_parser(
+        "status", help="Check the status of the NePS run."
+    )
+    parser_status.add_argument(
+        "--root-directory",
+        type=str,
+        help="Optional: The path to your root_directory. If not provided, "
+        "it will be loaded from run_config.yaml.",
+    )
+    parser_status.add_argument(
+        "--pending", action="store_true", help="Show only pending trials."
+    )
+    parser_status.add_argument(
+        "--evaluating", action="store_true", help="Show only evaluating trials."
+    )
+    parser_status.add_argument(
+        "--succeeded", action="store_true", help="Show only succeeded trials."
+    )
+    parser_status.set_defaults(func=status)
+
+    # Subparser for "results" command
+    parser_results = subparsers.add_parser(
+        "results", help="Display results of the NePS run."
+    )
+    parser_results.add_argument(
+        "--root-directory",
+        type=str,
+        help="Optional: The path to your root_directory. If not provided, "
+        "it will be loaded from run_config.yaml.",
+    )
+    parser_results.add_argument(
+        "--plot", action="store_true", help="Plot the results if set."
+    )
+
+    # Create a mutually exclusive group for dump options
+    dump_group = parser_results.add_mutually_exclusive_group()
+    dump_group.add_argument(
+        "--dump-all-configs",
+        type=str,
+        choices=["csv", "json", "parquet"],
+        help="Dump all trials to a file in the specified format (csv, json, parquet).",
+    )
+    dump_group.add_argument(
+        "--dump-incumbents",
+        type=str,
+        choices=["csv", "json", "parquet"],
+        help="Dump incumbent trials to a file in the specified format "
+        "(csv, json, parquet).",
+    )
+
+    parser_results.set_defaults(func=results)
+
+    # Subparser for "help" command
+    parser_help = subparsers.add_parser("help", help="Displays help information.")
+    parser_help.set_defaults(func=print_help)
+
+    # updating documentation
+    generate_markdown_from_parser(parser, "cli.md")
     args = parser.parse_args()
 
     if hasattr(args, "func"):
diff --git a/neps/utils/common.py b/neps/utils/common.py
index 2a9ca586d..6fd4a10ab 100644
--- a/neps/utils/common.py
+++ b/neps/utils/common.py
@@ -2,16 +2,17 @@
 
 from __future__ import annotations
 
+import gc
 import inspect
+from collections.abc import Iterator, Mapping, Sequence
+from contextlib import contextmanager
 from functools import partial
 from pathlib import Path
-from typing import Any, Iterable, Mapping, Sequence
+from typing import Any
 
 import torch
 import yaml
 
-from neps.runtime import get_in_progress_trial, get_workers_neps_state
-
 
 # TODO(eddiebergman): I feel like this function should throw an error if it can't
 # find anything to load, rather than returning None. In this case, we should provide
@@ -34,6 +35,8 @@ def load_checkpoint(
         A dictionary containing the checkpoint values, or None if the checkpoint file
         does not exist hence no checkpointing was previously done.
     """
+    from neps.runtime import get_in_progress_trial
+
     if directory is None:
         trial = get_in_progress_trial()
         directory = trial.metadata.previous_trial_location
@@ -47,7 +50,7 @@ def load_checkpoint(
     if not checkpoint_path.exists():
         return None
 
-    checkpoint = torch.load(checkpoint_path)
+    checkpoint = torch.load(checkpoint_path, weights_only=True)
 
     if model is not None and "model_state_dict" in checkpoint:
         model.load_state_dict(checkpoint["model_state_dict"])
@@ -74,6 +77,8 @@ def save_checkpoint(
         optimizer: The optimizer to save.
         checkpoint_name: The name of the checkpoint file.
     """
+    from neps.runtime import get_in_progress_trial
+
     if directory is None:
         in_progress_trial = get_in_progress_trial()
         directory = in_progress_trial.metadata.location
@@ -112,6 +117,8 @@ def load_lightning_checkpoint(
         A tuple containing the checkpoint path (str) and the loaded checkpoint data (dict)
         or (None, None) if no checkpoint files are found in the directory.
     """
+    from neps.runtime import get_in_progress_trial
+
     if previous_pipeline_directory is None:
         trial = get_in_progress_trial()
         previous_pipeline_directory = trial.metadata.previous_trial_location
@@ -135,10 +142,13 @@ def load_lightning_checkpoint(
 
     assert len(ckpt_files) == 1
     checkpoint_path = ckpt_files[0]
-    checkpoint = torch.load(checkpoint_path)
+    checkpoint = torch.load(checkpoint_path, weights_only=True)
     return checkpoint_path, checkpoint
 
 
+_INTIAL_DIRECTORY_CACHE: dict[str, Path] = {}
+
+
 # TODO: We should have a better way to have a shared folder between trials.
 # Right now, the fidelity lineage is linear, however this will be a difficulty
 # when/if we have a tree structure.
@@ -152,24 +162,31 @@ def get_initial_directory(pipeline_directory: Path | str | None = None) -> Path:
     Returns:
         The initial directory.
     """
+    from neps.runtime import get_in_progress_trial, get_workers_neps_state
+
     neps_state = get_workers_neps_state()
     if pipeline_directory is not None:
-        pipeline_directory = Path(pipeline_directory)
         # TODO: Hard coded assumption
-        config_id = pipeline_directory.name.split("_", maxsplit=1)[-1]
-        trial = neps_state.get_trial_by_id(config_id)
+        config_id = Path(pipeline_directory).name.split("_", maxsplit=1)[-1]
+        trial = neps_state.unsafe_retry_get_trial_by_id(config_id)
     else:
         trial = get_in_progress_trial()
 
+    if trial.metadata.id in _INTIAL_DIRECTORY_CACHE:
+        return _INTIAL_DIRECTORY_CACHE[trial.metadata.id]
+
     # Recursively find the initial directory
     while (prev_trial_id := trial.metadata.previous_trial_id) is not None:
-        trial = neps_state.get_trial_by_id(prev_trial_id)
+        trial = neps_state.unsafe_retry_get_trial_by_id(prev_trial_id)
 
     initial_dir = trial.metadata.location
 
     # TODO: Hard coded assumption that we are operating in a filebased neps
     assert isinstance(initial_dir, str)
-    return Path(initial_dir)
+    path = Path(initial_dir)
+
+    _INTIAL_DIRECTORY_CACHE[trial.metadata.id] = path
+    return path
 
 
 def get_searcher_data(
@@ -236,7 +253,7 @@ def get_value(obj: Any) -> Any:
     """Honestly, don't know why you would use this. Please try not to."""
     if obj is None:
         return None
-    if isinstance(obj, (str, int, float, bool)):
+    if isinstance(obj, str | int | float | bool):
         return obj
     if isinstance(obj, dict):
         return {key: get_value(value) for key, value in obj.items()}
@@ -246,34 +263,6 @@ def get_value(obj: Any) -> Any:
     return obj.__name__
 
 
-def has_instance(itr: Iterable[Any], *types: type) -> bool:
-    """Check if any instance in the collection is of the given types."""
-    return any(isinstance(el, types) for el in itr)
-
-
-def filter_instances(itr: Iterable[Any], *types: type) -> list[Any]:
-    """Filter instances of a collection by the given types."""
-    return [el for el in itr if isinstance(el, types)]
-
-
-class MissingDependencyError(ImportError):
-    """Raise when a dependency is missing for an optional feature."""
-
-    def __init__(self, dep: str, cause: Exception, *args: Any):
-        """Initialize the error with the missing dependency and the original error."""
-        super().__init__(dep, cause, *args)
-        self.dep = dep
-        self.__cause__ = cause  # This is what `raise a from b` does
-
-    def __str__(self) -> str:
-        return (
-            f"Some required dependency-({self.dep}) to use this optional feature is "
-            f"missing. Please, include neps[experimental] dependency group in your "
-            f"installation of neps to be able to use all the optional features."
-            f" Otherwise, just install ({self.dep})"
-        )
-
-
 def is_partial_class(obj: Any) -> bool:
     """Check if the object is a (partial) class, or an instance."""
     if isinstance(obj, partial):
@@ -281,7 +270,7 @@ def is_partial_class(obj: Any) -> bool:
     return inspect.isclass(obj)
 
 
-def instance_from_map(  # noqa: C901, PLR0912
+def instance_from_map(  # noqa: C901
     mapping: dict[str, Any],
     request: str | list | tuple | type,
     name: str = "mapping",
@@ -332,9 +321,6 @@ def instance_from_map(  # noqa: C901, PLR0912
     else:
         raise ValueError(f"Object {request} invalid key for {name}")
 
-    if isinstance(instance, MissingDependencyError):
-        raise instance
-
     # Check if the request is a class if it is mandatory
     if (args_dict or as_class) and not is_partial_class(instance):
         raise ValueError(
@@ -355,3 +341,17 @@ def instance_from_map(  # noqa: C901, PLR0912
             raise TypeError(f"{e} when calling {instance} with {args_dict}") from e
 
     return instance
+
+
+@contextmanager
+def gc_disabled() -> Iterator[None]:
+    """Context manager to disable garbage collection for a block.
+
+    We specifically put this around file I/O operations to minimize the time
+    spend garbage collecting while having the file handle open.
+    """
+    gc.disable()
+    try:
+        yield
+    finally:
+        gc.enable()
diff --git a/neps/utils/data_loading.py b/neps/utils/data_loading.py
deleted file mode 100644
index a0f86210e..000000000
--- a/neps/utils/data_loading.py
+++ /dev/null
@@ -1,451 +0,0 @@
-"""Utility functions for loading data from disk."""
-
-from __future__ import annotations
-
-import json
-import os
-import re
-from dataclasses import asdict
-from itertools import chain
-from pathlib import Path
-from typing import Any, Mapping, TypedDict
-
-import numpy as np
-import yaml
-
-from neps.state.filebased import load_filebased_neps_state
-from neps.utils.types import ERROR, ConfigID, ResultDict, _ConfigResultForStats
-
-
-def _get_loss(
-    result: ERROR | ResultDict | float,
-    loss_value_on_error: float | None = None,
-    *,
-    ignore_errors: bool = False,
-) -> ERROR | float:
-    if result == "error":
-        if ignore_errors:
-            return "error"
-
-        if loss_value_on_error is not None:
-            return loss_value_on_error
-
-        raise ValueError(
-            "An error happened during the execution of your run_pipeline function."
-            " You have three options: 1. If the error is expected and corresponds to"
-            " a loss value in your application (e.g., 0% accuracy), you can set"
-            " loss_value_on_error to some float. 2. If sometimes your pipeline"
-            " crashes randomly, you can set ignore_errors=True. 3. Fix your error."
-        )
-
-    if isinstance(result, dict):
-        return float(result["loss"])
-
-    assert isinstance(result, float)
-    return float(result)
-
-
-def _get_cost(
-    result: ERROR | ResultDict | float,
-    cost_value_on_error: float | None = None,
-    *,
-    ignore_errors: bool = False,
-) -> float | Any:
-    if result == "error":
-        if ignore_errors:
-            return "error"
-
-        if cost_value_on_error is None:
-            raise ValueError(
-                "An error happened during the execution of your run_pipeline function."
-                " You have three options: 1. If the error is expected and corresponds to"
-                " a cost value in your application, you can set"
-                " cost_value_on_error to some float. 2. If sometimes your pipeline"
-                " crashes randomly, you can set ignore_errors=True. 3. Fix your error."
-            )
-
-        return cost_value_on_error
-
-    if isinstance(result, Mapping):
-        return float(result["cost"])
-
-    return float(result)
-
-
-def _get_learning_curve(
-    result: str | dict | float,
-    learning_curve_on_error: list[float] | float | None = None,
-    *,
-    ignore_errors: bool = False,
-) -> list[float] | Any:
-    if result == "error":
-        if ignore_errors:
-            return "error"
-
-        if learning_curve_on_error is None:
-            raise ValueError(
-                "An error happened during the execution of your run_pipeline function."
-                " You have three options: 1. If the error is expected and corresponds to"
-                " a learning curve value in your application, you can set"
-                " learning_curve_on_error to some float or list of floats."
-                " 2. If sometimes your pipeline"
-                " crashes randomly, you can set ignore_errors=True. 3. Fix your error."
-            )
-
-        if isinstance(learning_curve_on_error, float):
-            learning_curve_on_error = [learning_curve_on_error]
-
-        return learning_curve_on_error
-
-    if isinstance(result, dict):
-        return result["info_dict"]["learning_curve"]
-
-    return float(result)
-
-
-def read_tasks_and_dev_stages_from_disk(
-    paths: list[str | Path],
-) -> dict[int, dict[int, dict[str, _ConfigResultForStats]]]:
-    """Reads the given tasks and dev stages from the disk.
-
-    Args:
-        paths: List of paths to the previous runs.
-
-    Returns:
-        dict[task_id, dict[dev_stage, dict[config_id, ConfigResult]].
-    """
-    path_iter = chain.from_iterable(Path(path).iterdir() for path in paths)
-
-    results: dict[int, dict[int, dict[str, _ConfigResultForStats]]] = {}
-
-    for task_dir_path in path_iter:
-        if not is_valid_task_path(task_dir_path):
-            continue
-
-        task_id = get_id_from_path(task_dir_path)
-        if task_id is None:
-            continue
-
-        results[task_id] = {}
-
-        for dev_dir_path in task_dir_path.iterdir():
-            if not is_valid_dev_path(dev_dir_path):
-                continue
-
-            dev_id = get_id_from_path(dev_dir_path)
-            if dev_id is None:
-                continue
-
-            state = load_filebased_neps_state(Path(dev_dir_path))
-            trials = state.get_all_trials()
-
-            evaluated: dict[ConfigID, _ConfigResultForStats] = {}
-
-            for trial in trials.values():
-                if trial.report is None:
-                    continue
-
-                _result_for_stats = _ConfigResultForStats(
-                    trial.id,
-                    trial.config,
-                    trial.report.to_deprecate_result_dict(),
-                    asdict(trial.metadata),
-                )
-                evaluated[trial.id] = _result_for_stats
-
-            results[task_id][dev_id] = evaluated
-
-    return results
-
-
-def read_user_prior_results_from_disk(
-    path: str | Path,
-) -> dict[str, dict[str, _ConfigResultForStats]]:
-    """Reads the user prior results from the disk.
-
-    Args:
-        path: Path to the user prior results.
-
-    Returns:
-        dict[prior_dir_name, dict[config_id, ConfigResult]].
-    """
-    path = Path(path)
-    if not path.is_dir():
-        raise ValueError(f"Path '{path}' is not a directory.")
-
-    results = {}
-    for prior_dir in path.iterdir():
-        if not prior_dir.is_dir():
-            continue
-
-        state = load_filebased_neps_state(Path(prior_dir))
-        trials = state.get_all_trials()
-        evaluated: dict[ConfigID, _ConfigResultForStats] = {}
-
-        for trial in trials.values():
-            if trial.report is None:
-                continue
-
-            assert trial.report is not None
-            _result_for_stats = _ConfigResultForStats(
-                trial.id,
-                trial.config,
-                trial.report.to_deprecate_result_dict(),
-                asdict(trial.metadata),
-            )
-            evaluated[trial.id] = _result_for_stats
-
-        results[prior_dir.name] = evaluated
-
-    return results
-
-
-_VALID_TASK_PATH_PATTERN = re.compile(r".*task_\d+")
-
-
-def is_valid_task_path(path: str | Path | None) -> bool:
-    """Checks if the given path is a valid task path.
-
-    It follows the pattern task_00000, where 00000 is replaced by the task id.
-    """
-    if path is None:
-        return False
-
-    return (
-        _VALID_TASK_PATH_PATTERN.fullmatch(str(path)) is not None and Path(path).is_dir()
-    )
-
-
-def is_valid_dev_path(path: str | Path | None) -> bool:
-    """Checks if the given path is a valid path to development stages.
-
-    It follows the pattern task_00000/dev_00000, where 00000 is replaced by the
-    task and development stage ids.
-    """
-    if path is None:
-        return False
-
-    # TODO: Test for \ and | in the path, not only any non-alphanumerical character.
-    #  Currently, false positives are possible.
-    #  This regex expression does not work: ".*task_\d+[\/\\]dev_\d+"
-    pattern = re.compile(r".*task_\d+\Wdev_\d+")
-    return pattern.fullmatch(str(path)) is not None and Path(path).is_dir()
-
-
-def is_valid_seed_path(path: str | Path | None) -> bool:
-    """Checks if the given path is a valid path to a seed.
-
-    It follows the pattern seed_00000, where 00000 is replaced by the seed.
-    """
-    if path is None:
-        return False
-    path = Path(path)
-
-    if not path.is_dir():
-        return False
-
-    return path.name.startswith("seed")
-
-
-def get_id_from_path(path: str | Path | None) -> int | None:
-    """Extracts the id from the given path.
-
-    The id is the last part of the path, which is a multiple digit number.
-
-    Note:
-        I think this refers to task ids and not config ids!!!
-    """
-    if path is None:
-        return None
-    numbers = re.findall(r"\d+", str(path))
-    if len(numbers) == 0:
-        return None
-
-    return int(numbers[-1])
-
-
-class BestLossesDict(TypedDict):
-    """Summary of the best losses over multiple seeds."""
-
-    best_loss_mean: float
-    best_loss_std: float
-    best_loss_std_err: float
-    best_loss_min: float
-    best_loss_max: float
-    best_loss_median: float
-    best_loss_quantile_25: float
-    best_loss_quantile_75: float
-
-
-# TODO(unknown): Implement summarize results for nested working directories
-# with multiple experiments
-def summarize_results(  # noqa: C901
-    working_dir: str | Path,
-    final_task_id: int | None = None,
-    final_dev_id: int | None = None,
-    sub_dir: str = "",
-    *,
-    write_to_file: bool = True,
-) -> BestLossesDict:
-    """Summarizes the results of the given working directory.
-
-    This includes runs over multiple seeds.
-    The results are saved in the working directory.
-
-    Args:
-        working_dir: path to the working directory that contains directories for all seeds
-        final_task_id: id of the tasks whose results should be summarized.
-            If None, all tasks are summarized.
-        final_dev_id: if of the development stage whose results should be summarized.
-            If None, all development stages are summarized.
-        sub_dir: subdirectory to look into for specific seeds.
-            * If subdir is provided: `working_dir/something/<subdir>`
-            * Otherwise: `working_dir/something`
-        write_to_file: if True, the results are written to a file in the working
-            directory, using the latest taks and dev stage ids.
-            `summary_task_<task_id>_dev_<dev_id>.yaml`
-    """
-    working_dir = Path(working_dir)
-
-    best_losses = []
-    for seed_dir in working_dir.iterdir():
-        if not is_valid_seed_path(seed_dir):
-            continue
-
-        if sub_dir:
-            seed_dir = seed_dir / sub_dir  # noqa: PLW2901
-
-        final_results: dict[ConfigID, _ConfigResultForStats]
-        if final_task_id is not None and final_dev_id is not None:
-            results = read_tasks_and_dev_stages_from_disk([seed_dir])
-
-            # TODO(unknown): only use IDs if provided
-            final_results = results[final_task_id][final_dev_id]
-        else:
-            state = load_filebased_neps_state(Path(seed_dir))
-            trials = state.get_all_trials()
-
-            final_results = {}
-            for trial in trials.values():
-                if trial.report is None:
-                    continue
-
-                assert trial.report is not None
-                _result_for_stats = _ConfigResultForStats(
-                    trial.id,
-                    trial.config,
-                    trial.report.to_deprecate_result_dict(),
-                    asdict(trial.metadata),
-                )
-                final_results[trial.id] = _result_for_stats
-
-        # This part is copied from neps.status()
-        best_loss: float = float("inf")
-        num_error = 0
-        for _, evaluation in final_results.items():
-            if evaluation.result == "error":
-                num_error += 1
-            loss = _get_loss(evaluation.result, ignore_errors=True)
-            if isinstance(loss, float) and loss < best_loss:
-                best_loss = loss
-
-        best_losses.append(best_loss)
-
-    if len(best_losses) == 0:
-        raise ValueError(f"No results found in directort {working_dir}.")
-
-    best_losses_dict = BestLossesDict(
-        best_loss_mean=float(np.mean(best_losses)),
-        best_loss_std=float(np.std(best_losses)),
-        best_loss_std_err=float(np.std(best_losses) / np.sqrt(np.size(best_losses))),
-        best_loss_min=float(np.min(best_losses)),
-        best_loss_max=float(np.max(best_losses)),
-        best_loss_median=float(np.median(best_losses)),
-        best_loss_quantile_25=float(np.quantile(best_losses, 0.25)),
-        best_loss_quantile_75=float(np.quantile(best_losses, 0.75)),
-    )
-
-    if write_to_file:
-        task_id_str = str(final_task_id).zfill(5)
-        dev_id_str = str(final_dev_id).zfill(5)
-        file_path = working_dir / ("summary_task_" + task_id_str + "_dev_" + dev_id_str)
-
-        with file_path.with_suffix(".yaml").open("w") as f:
-            yaml.dump(best_losses_dict, f, default_flow_style=False)
-
-        with file_path.with_suffix(".json").open("w") as f:
-            json.dump(best_losses_dict, f)
-
-    return best_losses_dict
-
-
-def summarize_results_all_tasks_all_devs(
-    path: str | Path,
-    sub_dir: str = "",
-    file_name: str = "summary",
-    user_prior_dir: str | Path | None = None,
-) -> Any:
-    """Summarizes the results of all tasks and all development stages.
-
-    This includes runs overrmultiple seeds. The results are saved in
-    the working directory.
-    """
-    # go into the first seed directory and read the tasks and dev stages
-    path = Path(path)
-    os.scandir(path)
-
-    # TODO(eddiebergman): Please see issue #80
-    for seed_dir in path.iterdir():
-        if not is_valid_seed_path(seed_dir):
-            continue
-
-        seed_dir_path = seed_dir / sub_dir if sub_dir else seed_dir
-        results = read_tasks_and_dev_stages_from_disk([seed_dir_path])
-        break
-    else:
-        raise ValueError(f"No results found in directory {path}.")
-
-    summary = {}
-    for task_id, task in results.items():
-        for dev_id, _ in task.items():
-            summary[(task_id, dev_id)] = summarize_results(
-                path,
-                final_task_id=task_id,
-                final_dev_id=dev_id,
-                sub_dir=sub_dir,
-                write_to_file=False,
-            )
-
-    summary_user_prior = {}
-    # TODO(eddiebergman): Please see issue #80, figure out what user_prior_dir is
-    if user_prior_dir is not None:
-        user_prior_dir = Path(user_prior_dir)
-
-        if sub_dir:
-            previously_inferred_path = os.path.join(sub_dir, str(user_prior_dir))  # noqa: PTH118
-            raise NotImplementedError(
-                "Sorry, don't know what should have been done here but we now explicitly"
-                "raise instead of silently summarizing what would be a non-existant path"
-                f"before. Previously inferred path was: {previously_inferred_path}"
-            )
-
-        user_prior_results = read_user_prior_results_from_disk(user_prior_dir)
-        for prior_name, _ in user_prior_results.items():
-            summary_user_prior[prior_name] = summarize_results(
-                working_dir=path,
-                sub_dir=str(user_prior_dir / prior_name),
-                write_to_file=False,
-            )
-
-    with (path / file_name).with_suffix(".jsonl").open("w") as f:
-        # write jsonl file with one line per task and dev stage
-        for (task_id, dev_id), metrics in summary.items():
-            f.write(
-                json.dumps(
-                    {"IDs": {"task_id": task_id, "dev_id": dev_id}, "metrics": metrics}
-                )
-            )
-            f.write("\n")
-        for prior_name, metrics in summary_user_prior.items():
-            f.write(json.dumps({"IDs": {"prior_name": prior_name}, "metrics": metrics}))
-            f.write("\n")
diff --git a/neps/utils/files.py b/neps/utils/files.py
index ddb0627c8..b49c53011 100644
--- a/neps/utils/files.py
+++ b/neps/utils/files.py
@@ -3,12 +3,39 @@
 from __future__ import annotations
 
 import dataclasses
+import io
+import os
+from collections.abc import Iterable, Iterator, Mapping
+from contextlib import contextmanager
 from enum import Enum
 from pathlib import Path
-from typing import Any, Iterable, Mapping
+from typing import IO, Any, Literal
 
 import yaml
 
+try:
+    from yaml import (
+        CDumper as YamlDumper,  # type: ignore
+        CSafeLoader as SafeLoader,  # type: ignore
+    )
+except ImportError:
+    from yaml import SafeLoader, YamlDumper  # type: ignore
+
+
+@contextmanager
+def atomic_write(file_path: Path | str, *args: Any, **kwargs: Any) -> Iterator[IO]:
+    """Write to a file atomically.
+
+    This means that the file will be flushed to disk and explicitly ask the operating
+    systems to sync the contents to disk. This ensures that other processes that read
+    from this file should see the contents immediately.
+    """
+    with open(file_path, *args, **kwargs) as file_stream:  # noqa: PTH123
+        yield file_stream
+        file_stream.flush()
+        os.fsync(file_stream.fileno())
+        file_stream.close()
+
 
 def serializable_format(data: Any) -> Any:  # noqa: PLR0911
     """Format data to be serializable."""
@@ -40,24 +67,53 @@ def serializable_format(data: Any) -> Any:  # noqa: PLR0911
     return data
 
 
-def serialize(data: Any, path: Path | str, *, sort_keys: bool = True) -> None:
+def serialize(
+    data: Any,
+    path: Path,
+    *,
+    check_serialized: bool = True,
+    file_format: Literal["json", "yaml"] = "yaml",
+    sort_keys: bool = True,
+) -> None:
     """Serialize data to a yaml file."""
-    data = serializable_format(data)
-    path = Path(path)
-    with path.open("w") as file_stream:
+    if check_serialized:
+        data = serializable_format(data)
+
+    buf = io.StringIO()
+    if file_format == "yaml":
         try:
-            return yaml.safe_dump(data, file_stream, sort_keys=sort_keys)
+            yaml.dump(data, buf, YamlDumper, sort_keys=sort_keys)
         except yaml.representer.RepresenterError as e:
             raise TypeError(
                 "Could not serialize to yaml! The object "
                 f"{e.args[1]} of type {type(e.args[1])} is not."
             ) from e
+    elif file_format == "json":
+        import json
 
+        json.dump(data, buf, sort_keys=sort_keys)
+    else:
+        raise ValueError(f"Unknown format: {file_format}")
 
-def deserialize(path: Path | str) -> dict[str, Any]:
+    _str = buf.getvalue()
+    path.write_text(_str)
+
+
+def deserialize(
+    path: Path | str,
+    *,
+    file_format: Literal["json", "yaml"] = "yaml",
+) -> dict[str, Any]:
     """Deserialize data from a yaml file."""
     with Path(path).open("r") as file_stream:
-        data = yaml.full_load(file_stream)  # type: ignore
+        if file_format == "json":
+            import json
+
+            data = json.load(file_stream)
+        elif file_format == "yaml":
+            data = yaml.load(file_stream, SafeLoader)
+        else:
+            raise ValueError(f"Unknown format: {file_format}")
 
     if not isinstance(data, dict):
         raise TypeError(
@@ -66,8 +122,3 @@ def deserialize(path: Path | str) -> dict[str, Any]:
         )
 
     return data
-
-
-def empty_file(file_path: Path) -> bool:
-    """Check if a file does not exist, or if it does, if it is empty."""
-    return not file_path.exists() or file_path.stat().st_size <= 0
diff --git a/neps/utils/run_args.py b/neps/utils/run_args.py
index 9d7f64457..5a94ebc7e 100644
--- a/neps/utils/run_args.py
+++ b/neps/utils/run_args.py
@@ -7,8 +7,9 @@
 import importlib.util
 import logging
 import sys
+from collections.abc import Callable
 from pathlib import Path
-from typing import Any, Callable
+from typing import Any
 
 import yaml
 
@@ -19,7 +20,7 @@
 
 # Define the name of the arguments as variables for easier code maintenance
 RUN_ARGS = "run_args"
-RUN_PIPELINE = "run_pipeline"
+EVALUATE_PIPELINE = "evaluate_pipeline"
 PIPELINE_SPACE = "pipeline_space"
 ROOT_DIRECTORY = "root_directory"
 MAX_EVALUATIONS_TOTAL = "max_evaluations_total"
@@ -29,7 +30,7 @@
 DEVELOPMENT_STAGE_ID = "development_stage_id"
 TASK_ID = "task_id"
 CONTINUE_UNTIL_MAX_EVALUATION_COMPLETED = "continue_until_max_evaluation_completed"
-LOSS_VALUE_ON_ERROR = "loss_value_on_error"
+OBJECTIVE_TO_MINIMIZE_VALUE_ON_ERROR = "objective_to_minimize_value_on_error"
 COST_VALUE_ON_ERROR = "cost_value_on_error"
 IGNORE_ERROR = "ignore_errors"
 SEARCHER = "searcher"
@@ -40,7 +41,7 @@
 MAX_EVALUATIONS_PER_RUN = "max_evaluations_per_run"
 
 
-def get_run_args_from_yaml(path: str) -> dict:
+def get_run_args_from_yaml(path: str | Path) -> dict:
     """Load and validate NEPS run arguments from a specified YAML configuration file
     provided via run_args.
 
@@ -48,7 +49,7 @@ def get_run_args_from_yaml(path: str) -> dict:
     validates these arguments, and then returns them in a dictionary. It checks for the
     presence and validity of expected parameters, and distinctively handles more complex
     configurations, specifically those that are dictionaries(e.g. pipeline_space) or
-    objects(e.g. run_pipeline) requiring loading.
+    objects(e.g. evaluate_pipeline) requiring loading.
 
     Args:
         path (str): The file path to the YAML configuration file.
@@ -66,7 +67,7 @@ def get_run_args_from_yaml(path: str) -> dict:
     settings = {}
 
     # List allowed NePS run arguments with simple types (e.g., string, int). Parameters
-    # like 'run_pipeline', 'preload_hooks', 'pipeline_space',
+    # like 'evaluate_pipeline', 'preload_hooks', 'pipeline_space',
     # and 'searcher' are excluded due to needing specialized processing.
     expected_parameters = [
         ROOT_DIRECTORY,
@@ -78,7 +79,7 @@ def get_run_args_from_yaml(path: str) -> dict:
         TASK_ID,
         MAX_EVALUATIONS_PER_RUN,
         CONTINUE_UNTIL_MAX_EVALUATION_COMPLETED,
-        LOSS_VALUE_ON_ERROR,
+        OBJECTIVE_TO_MINIMIZE_VALUE_ON_ERROR,
         COST_VALUE_ON_ERROR,
         IGNORE_ERROR,
     ]
@@ -97,7 +98,7 @@ def get_run_args_from_yaml(path: str) -> dict:
                 f"provided via run_args."
                 f"See here all valid arguments:"
                 f" {', '.join(expected_parameters)}, "
-                f"'run_pipeline', 'preload_hooks', 'pipeline_space'"
+                f"'evaluate_pipeline', 'preload_hooks', 'pipeline_space'"
             )
 
     # Process complex configurations (e.g., 'pipeline_space', 'searcher') and integrate
@@ -115,7 +116,7 @@ def get_run_args_from_yaml(path: str) -> dict:
     return settings
 
 
-def config_loader(path: str) -> dict:
+def config_loader(path: str | Path) -> dict:
     """Loads a YAML file and returns the contents under the 'run_args' key.
 
     Args:
@@ -145,7 +146,7 @@ def config_loader(path: str) -> dict:
 
 def extract_leaf_keys(d: dict, special_keys: dict | None = None) -> tuple[dict, dict]:
     """Recursive function to extract leaf keys and their values from a nested dictionary.
-    Special keys (e.g.'run_pipeline') are also extracted if present
+    Special keys (e.g.'evaluate_pipeline') are also extracted if present
     and their corresponding values (dict) at any level in the nested structure.
 
     Args:
@@ -158,7 +159,7 @@ def extract_leaf_keys(d: dict, special_keys: dict | None = None) -> tuple[dict,
     """
     if special_keys is None:
         special_keys = {
-            RUN_PIPELINE: None,
+            EVALUATE_PIPELINE: None,
             PRE_LOAD_HOOKS: None,
             SEARCHER: None,
             PIPELINE_SPACE: None,
@@ -193,7 +194,7 @@ def handle_special_argument_cases(settings: dict, special_configs: dict) -> None
 
     """
     # process special configs
-    process_run_pipeline(RUN_PIPELINE, special_configs, settings)
+    process_evaluate_pipeline(EVALUATE_PIPELINE, special_configs, settings)
     process_pipeline_space(PIPELINE_SPACE, special_configs, settings)
     process_searcher(SEARCHER, special_configs, settings)
 
@@ -273,7 +274,7 @@ def process_searcher(key: str, special_configs: dict, settings: dict) -> None:
                 settings[SEARCHER_KWARGS] = searcher
                 searcher = load_and_return_object(path, name, key)
 
-        elif isinstance(searcher, (str, Path)):
+        elif isinstance(searcher, str | Path):
             pass
         else:
             raise TypeError(
@@ -283,7 +284,7 @@ def process_searcher(key: str, special_configs: dict, settings: dict) -> None:
         settings[key] = searcher
 
 
-def process_run_pipeline(key: str, special_configs: dict, settings: dict) -> None:
+def process_evaluate_pipeline(key: str, special_configs: dict, settings: dict) -> None:
     """Processes the run pipeline configuration and updates the settings dictionary.
 
     Args:
@@ -404,7 +405,7 @@ def check_run_args(settings: dict) -> None:
     # [task_id, development_stage_id, pre_load_hooks] require special handling of type,
     # that's why they are not listed
     expected_types = {
-        RUN_PIPELINE: Callable,
+        EVALUATE_PIPELINE: Callable,
         ROOT_DIRECTORY: str,
         # TODO: Support CS.ConfigurationSpace for pipeline_space
         PIPELINE_SPACE: (str, dict),
@@ -414,7 +415,7 @@ def check_run_args(settings: dict) -> None:
         MAX_COST_TOTAL: (int, float),
         MAX_EVALUATIONS_PER_RUN: int,
         CONTINUE_UNTIL_MAX_EVALUATION_COMPLETED: bool,
-        LOSS_VALUE_ON_ERROR: float,
+        OBJECTIVE_TO_MINIMIZE_VALUE_ON_ERROR: float,
         COST_VALUE_ON_ERROR: float,
         IGNORE_ERROR: bool,
         SEARCHER_KWARGS: dict,
@@ -428,7 +429,7 @@ def check_run_args(settings: dict) -> None:
             if not all(callable(item) for item in value):
                 raise TypeError("All items in 'pre_load_hooks' must be callable.")
         elif param == SEARCHER:
-            if not (isinstance(value, (str, dict)) or issubclass(value, BaseOptimizer)):
+            if not (isinstance(value, str | dict) or issubclass(value, BaseOptimizer)):
                 raise TypeError(
                     "Parameter 'searcher' must be a string or a class that is a subclass "
                     "of BaseOptimizer."
@@ -446,33 +447,33 @@ def check_run_args(settings: dict) -> None:
 
 
 def check_essential_arguments(
-    run_pipeline: Callable | None,
+    evaluate_pipeline: Callable | None,
     root_directory: str | None,
     pipeline_space: dict | None,
     max_cost_total: int | None,
-    max_evaluation_total: int | None,
+    max_evaluations_total: int | None,
     searcher: BaseOptimizer | dict | str | None,
 ) -> None:
     """Validates essential NePS configuration arguments.
 
-    Ensures 'run_pipeline', 'root_directory', 'pipeline_space', and either
-    'max_cost_total' or 'max_evaluation_total' are provided for NePS execution.
+    Ensures 'evaluate_pipeline', 'root_directory', 'pipeline_space', and either
+    'max_cost_total' or 'max_evaluations_total' are provided for NePS execution.
     Raises ValueError with missing argument details. Additionally, checks 'searcher'
     is a BaseOptimizer if 'pipeline_space' is absent.
 
     Args:
-        run_pipeline: Function for the pipeline execution.
+        evaluate_pipeline: Function for the pipeline execution.
         root_directory (str): Directory path for data storage.
         pipeline_space: search space for this run.
         max_cost_total: Max allowed total cost for experiments.
-        max_evaluation_total: Max allowed evaluations.
+        max_evaluations_total: Max allowed evaluations.
         searcher: Optimizer for the configuration space.
 
     Raises:
         ValueError: Missing or invalid essential arguments.
     """
-    if not run_pipeline:
-        raise ValueError("'run_pipeline' is required but was not provided.")
+    if not evaluate_pipeline:
+        raise ValueError("'evaluate_pipeline' is required but was not provided.")
     if not root_directory:
         raise ValueError("'root_directory' is required but was not provided.")
     if not pipeline_space and not isinstance(searcher, BaseOptimizer):
@@ -480,9 +481,9 @@ def check_essential_arguments(
         # provide the search_space because it's the argument of the searcher.
         raise ValueError("'pipeline_space' is required but was not provided.")
 
-    if not max_evaluation_total and not max_cost_total:
+    if not max_evaluations_total and not max_cost_total:
         raise ValueError(
-            "'max_evaluation_total' or 'max_cost_total' is required but "
+            "'max_evaluations_total' or 'max_cost_total' is required but "
             "both were not provided."
         )
 
@@ -505,7 +506,7 @@ class Settings:
     arguments (run_args (yaml) and neps func_args).
     """
 
-    def __init__(self, func_args: dict, yaml_args: str | Default | None = None):
+    def __init__(self, func_args: dict, yaml_args: Path | str | Default | None = None):
         """Initializes the Settings object by merging function arguments with YAML
         configuration settings and assigning them to class attributes. It checks for
         necessary configurations and handles default values where specified.
@@ -514,7 +515,7 @@ def __init__(self, func_args: dict, yaml_args: str | Default | None = None):
         func_args (dict): The function arguments directly passed to NePS.
         yaml_args (dict | None): Optional. YAML file arguments provided via run_args.
         """
-        self.run_pipeline = UNSET
+        self.evaluate_pipeline = UNSET
         self.root_directory = UNSET
         self.pipeline_space = UNSET
         self.overwrite_working_directory = UNSET
@@ -526,10 +527,11 @@ def __init__(self, func_args: dict, yaml_args: str | Default | None = None):
         self.continue_until_max_evaluation_completed = UNSET
         self.max_cost_total = UNSET
         self.ignore_errors = UNSET
-        self.loss_value_on_error = UNSET
+        self.objective_to_minimize_value_on_error = UNSET
         self.cost_value_on_error = UNSET
         self.pre_load_hooks = UNSET
         self.searcher = UNSET
+        self.sample_batch_size = UNSET
         self.searcher_kwargs = UNSET
 
         if not isinstance(yaml_args, Default) and yaml_args is not None:
@@ -597,7 +599,7 @@ def check(self) -> None:
                 f"{', '.join(unassigned_attributes)}"
             )
         check_essential_arguments(
-            self.run_pipeline,  # type: ignore
+            self.evaluate_pipeline,  # type: ignore
             self.root_directory,  # type: ignore
             self.pipeline_space,  # type: ignore
             self.max_cost_total,  # type: ignore
diff --git a/neps/utils/types.py b/neps/utils/types.py
index a6b6c5404..e5fd343a1 100644
--- a/neps/utils/types.py
+++ b/neps/utils/types.py
@@ -2,23 +2,22 @@
 
 from __future__ import annotations
 
+from collections.abc import Mapping
 from dataclasses import dataclass
-from typing import TYPE_CHECKING, Any, Dict, Literal, Mapping, Union
-from typing_extensions import TypeAlias
+from typing import TYPE_CHECKING, Any, Literal, TypeAlias
 
 import numpy as np
 
 if TYPE_CHECKING:
     from neps.search_spaces.search_space import SearchSpace
-    from neps.state.trial import Trial
+    from neps.state.trial import Report
 
 # TODO(eddiebergman): We can turn this to an enum at some
 # point to prevent having to isinstance and str match
 ERROR: TypeAlias = Literal["error"]
-Number: TypeAlias = Union[int, float, np.number]
+Number: TypeAlias = int | float
 ConfigID: TypeAlias = str
 RawConfig: TypeAlias = Mapping[str, Any]
-Metadata: TypeAlias = Dict[str, Any]
 ResultDict: TypeAlias = Mapping[str, Any]
 
 # NOTE(eddiebergman): Getting types for scipy distributions sucks
@@ -33,9 +32,7 @@ def __repr__(self) -> str:
 
 NotSet = _NotSet()
 
-
 f64 = np.float64
-i64 = np.int64
 
 
 # TODO(eddiebergman): Ideally, use `Trial` objects which can carry a lot more
@@ -51,7 +48,7 @@ class ConfigResult:
     config: SearchSpace
     """Configuration that was evaluated."""
 
-    result: Trial.Report | ResultDict | ERROR
+    result: Report | ResultDict | ERROR
     """Some dictionary of results."""
 
     metadata: dict
@@ -71,18 +68,7 @@ class _ConfigResultForStats:
     metadata: dict
 
     @property
-    def loss(self) -> float | ERROR:
+    def objective_to_minimize(self) -> float | ERROR:
         if isinstance(self.result, dict):
-            return float(self.result["loss"])
+            return float(self.result["objective_to_minimize"])
         return "error"
-
-
-# NOTE: Please try to avoid using this class and prefer a dict if its dynamic
-# or make a dataclass if the fields are known and are static
-class AttrDict(dict):
-    """Dictionary that allows access to keys as attributes."""
-
-    def __init__(self, *args: Any, **kwargs: Any):
-        """Initialize like a dict."""
-        super().__init__(*args, **kwargs)
-        self.__dict__ = self
diff --git a/neps/utils/validation.py b/neps/utils/validation.py
deleted file mode 100644
index 884df0c5f..000000000
--- a/neps/utils/validation.py
+++ /dev/null
@@ -1,31 +0,0 @@
-"""Validation utilities for the NePS package."""
-
-from __future__ import annotations
-
-import inspect
-from typing import Any, Callable
-
-from neps.exceptions import NePSError
-
-
-class DeprecatedArgumentError(NePSError):
-    """Raised when a deprecated argument is used."""
-
-
-def validate_run_pipeline_arguments(f: Callable[..., Any]) -> None:
-    """Validate the arguments of a run pipeline function to see if deprcated arguments
-    are used.
-    """
-    evaluation_fn_params = inspect.signature(f).parameters
-    if "previous_working_directory" in evaluation_fn_params:
-        raise RuntimeError(
-            "the argument: 'previous_working_directory' was deprecated. "
-            f"In the function: '{f.__name__}', please,  "
-            "use 'previous_pipeline_directory' instead. ",
-        )
-    if "working_directory" in evaluation_fn_params:
-        raise RuntimeError(
-            "the argument: 'working_directory' was deprecated. "
-            f"In the function: '{f.__name__}', please,  "
-            "use 'pipeline_directory' instead. ",
-        )
diff --git a/neps_examples/README.md b/neps_examples/README.md
index 9eb04e5d7..b0b642cee 100644
--- a/neps_examples/README.md
+++ b/neps_examples/README.md
@@ -2,17 +2,11 @@
 
 1. **Basic usage examples** demonstrate fundamental usage.
 Learn how to perform Hyperparameter Optimization (HPO), Neural Architecture Search (NAS), and Joint Architecture and Hyperparameter Search (JAHS).
-Understand how to analyze runs on a basic level, emphasizing that no neural network training is involved at this stage; the search is performed on functions to introduce NePS.
+Understand how to analyze runs on a basic level.
 
 2. **Efficiency examples** showcase how to enhance efficiency in NePS.
 Learn about expert priors, multi-fidelity, and parallelization to streamline your pipeline and optimize search processes.
 
-3. **Convenience examples** show tensorboard compatibility and its integration, explore the compatibility with PyTorch Lightning, and understand file management within the run pipeline function used in NePS.
+3. **Convenience examples** show tensorboard compatibility and its integration, explore the compatibility with PyTorch Lightning, see the declarative API, understand file management within the run pipeline function used in NePS.
 
-4. **Experimental examples** tailored for NePS contributors.
-These examples provide insights and practices for experimental scenarios.
-
-5. **Templates** to find a basic fill-in template to kickstart your hyperparameter search with NePS.
-Use this template as a foundation for your projects, saving time and ensuring a structured starting point.
-
-6. **YAML usage examples** to define NePS configurations and search spaces with YAML files, streamlining the setup and execution of experiments.
+4. **Experimental examples** tailored for NePS contributors. These examples provide insights and practices for experimental scenarios.
diff --git a/neps_examples/__init__.py b/neps_examples/__init__.py
index df7d3589f..dc7468928 100644
--- a/neps_examples/__init__.py
+++ b/neps_examples/__init__.py
@@ -1,7 +1,23 @@
 all_main_examples = {  # Used for printing in python -m neps_examples
-    "basic_usage": ["analyse", "architecture", "architecture_and_hyperparameters", "hpo_usage_example", "hyperparameters"],
-    "convenience": ["logging_additional_info", "neps_tblogger_tutorial", "running_on_slurm_scripts", "neps_x_lightning", "working_directory_per_pipeline"],
-    "efficiency": ["expert_priors_for_hyperparameters", "multi_fidelity", "multi_fidelity_and_expert_priors"],
+    "basic_usage": [
+        "analyse",
+        "architecture",
+        "architecture_and_hyperparameters",
+        "hyperparameters",
+    ],
+    "convenience": [
+        "logging_additional_info",
+        "neps_tblogger_tutorial",
+        "running_on_slurm_scripts",
+        "neps_x_lightning",
+        "running_on_slurm_scripts",
+        "working_directory_per_pipeline",
+    ],
+    "efficiency": [
+        "expert_priors_for_hyperparameters",
+        "multi_fidelity",
+        "multi_fidelity_and_expert_priors",
+    ],
 }
 
 core_examples = [  # Run locally and on github actions
@@ -15,7 +31,6 @@
     "basic_usage/architecture_and_hyperparameters",
     "experimental/hierarchical_architecture",
     "efficiency/expert_priors_for_hyperparameters",
-    "experimental/hierarchical_architecture_hierarchical_GP",
     "convenience/logging_additional_info",
     "convenience/working_directory_per_pipeline",
     "convenience/neps_tblogger_tutorial",
diff --git a/neps_examples/basic_usage/architecture.py b/neps_examples/basic_usage/architecture.py
index 57ac74cc2..adca3544f 100644
--- a/neps_examples/basic_usage/architecture.py
+++ b/neps_examples/basic_usage/architecture.py
@@ -1,4 +1,7 @@
-from __future__ import annotations
+raise NotImplementedError(
+    "Support for graphs was temporarily removed, if you'd like to use a version"
+    " of NePS that supports graphs, please use version v0.12.2"
+)
 
 import logging
 
@@ -111,7 +114,7 @@ def run_pipeline(architecture):
 
 
 pipeline_space = dict(
-    architecture=neps.ArchitectureParameter(
+    architecture=neps.Architecture(
         set_recursive_attribute=set_recursive_attribute,
         structure=structure,
         primitives=primitives,
diff --git a/neps_examples/basic_usage/architecture_and_hyperparameters.py b/neps_examples/basic_usage/architecture_and_hyperparameters.py
index e0b63fe45..b7f4bd637 100644
--- a/neps_examples/basic_usage/architecture_and_hyperparameters.py
+++ b/neps_examples/basic_usage/architecture_and_hyperparameters.py
@@ -1,3 +1,8 @@
+raise NotImplementedError(
+    "Support for graphs was temporarily removed, if you'd like to use a version"
+    " of NePS that supports graphs, please use version v0.12.2"
+)
+
 import logging
 
 from torch import nn
@@ -104,13 +109,13 @@ def run_pipeline(**config):
 
 
 pipeline_space = dict(
-    architecture=neps.ArchitectureParameter(
+    architecture=neps.Architecture(
         set_recursive_attribute=set_recursive_attribute,
         structure=structure,
         primitives=primitives,
     ),
-    optimizer=neps.CategoricalParameter(choices=["sgd", "adam"]),
-    learning_rate=neps.FloatParameter(lower=10e-7, upper=10e-3, log=True),
+    optimizer=neps.Categorical(choices=["sgd", "adam"]),
+    learning_rate=neps.Float(lower=10e-7, upper=10e-3, log=True),
 )
 
 logging.basicConfig(level=logging.INFO)
diff --git a/neps_examples/basic_usage/hpo_usage_example.py b/neps_examples/basic_usage/hpo_usage_example.py
deleted file mode 100644
index f1c585735..000000000
--- a/neps_examples/basic_usage/hpo_usage_example.py
+++ /dev/null
@@ -1,35 +0,0 @@
-import logging
-import time
-
-import numpy as np
-
-import neps
-
-def run_pipeline(
-    float_name1,
-    float_name2,
-    categorical_name1,
-    categorical_name2,
-    integer_name1,
-    integer_name2,
-):
-    # neps optimize to find values that maximizes sum, for demonstration only
-    loss = -float(
-        np.sum(
-            [float_name1, float_name2, categorical_name1, integer_name1, integer_name2]
-        )
-    )
-    if categorical_name2 == "a":
-        loss += 1
-
-    return loss
-
-
-logging.basicConfig(level=logging.INFO)
-neps.run(
-    run_pipeline=run_pipeline,
-    pipeline_space="search_space_example.yaml",
-    root_directory="results/hyperparameters_example",
-    post_run_summary=True,
-    max_evaluations_total=15,
-)
diff --git a/neps_examples/basic_usage/hyperparameters.py b/neps_examples/basic_usage/hyperparameters.py
index 164b49cbd..f86a5ae41 100644
--- a/neps_examples/basic_usage/hyperparameters.py
+++ b/neps_examples/basic_usage/hyperparameters.py
@@ -1,28 +1,32 @@
 import logging
 import time
+from warnings import warn
 
 import numpy as np
 
 import neps
 
+def evaluate_pipeline(float1, float2, categorical, integer1, integer2):
+    warn("run_pipeline is deprecated, use evaluate_pipeline instead", DeprecationWarning)
+    return evaluate_pipeline(float1, float2, categorical, integer1, integer2)
 
-def run_pipeline(float1, float2, categorical, integer1, integer2):
-    loss = -float(np.sum([float1, float2, int(categorical), integer1, integer2]))
+def evaluate_pipeline(float1, float2, categorical, integer1, integer2):
+    objective_to_minimize = -float(np.sum([float1, float2, int(categorical), integer1, integer2]))
     # time.sleep(0.7)  # For demonstration purposes
-    return loss
+    return objective_to_minimize
 
 
 pipeline_space = dict(
-    float1=neps.FloatParameter(lower=0, upper=1),
-    float2=neps.FloatParameter(lower=-10, upper=10),
-    categorical=neps.CategoricalParameter(choices=[0, 1]),
-    integer1=neps.IntegerParameter(lower=0, upper=1),
-    integer2=neps.IntegerParameter(lower=1, upper=1000, log=True),
+    float1=neps.Float(lower=0, upper=1),
+    float2=neps.Float(lower=-10, upper=10),
+    categorical=neps.Categorical(choices=[0, 1]),
+    integer1=neps.Integer(lower=0, upper=1),
+    integer2=neps.Integer(lower=1, upper=1000, log=True),
 )
 
 logging.basicConfig(level=logging.INFO)
 neps.run(
-    run_pipeline=run_pipeline,
+    evaluate_pipeline=evaluate_pipeline,
     pipeline_space=pipeline_space,
     root_directory="results/hyperparameters_example",
     post_run_summary=True,
diff --git a/neps_examples/declarative_usage/README.md b/neps_examples/convenience/declarative_usage/README.md
similarity index 100%
rename from neps_examples/declarative_usage/README.md
rename to neps_examples/convenience/declarative_usage/README.md
diff --git a/neps_examples/declarative_usage/config.yaml b/neps_examples/convenience/declarative_usage/config.yaml
similarity index 100%
rename from neps_examples/declarative_usage/config.yaml
rename to neps_examples/convenience/declarative_usage/config.yaml
diff --git a/neps_examples/declarative_usage/hpo_example.py b/neps_examples/convenience/declarative_usage/hpo_example.py
similarity index 91%
rename from neps_examples/declarative_usage/hpo_example.py
rename to neps_examples/convenience/declarative_usage/hpo_example.py
index 8af8af2e3..0e599440b 100644
--- a/neps_examples/declarative_usage/hpo_example.py
+++ b/neps_examples/convenience/declarative_usage/hpo_example.py
@@ -60,7 +60,7 @@ def training_pipeline(num_layers, num_neurons, epochs, learning_rate, optimizer)
         optimizer (str): Name of the optimizer to use ('adam' or 'sgd').
 
     Returns:
-        float: The average loss over the validation set after training.
+        float: The average objective_to_minimize over the validation set after training.
 
     Raises:
         KeyError: If the specified optimizer is not supported.
@@ -101,20 +101,20 @@ def training_pipeline(num_layers, num_neurons, epochs, learning_rate, optimizer)
         for batch_idx, (data, target) in enumerate(train_loader):
             optimizer.zero_grad()
             output = model(data)
-            loss = criterion(output, target)
-            loss.backward()
+            objective_to_minimize = criterion(output, target)
+            objective_to_minimize.backward()
             optimizer.step()
 
     # Validation loop
     model.eval()
-    val_loss = 0
+    val_objective_to_minimize = 0
     with torch.no_grad():
         for data, target in val_loader:
             output = model(data)
-            val_loss += criterion(output, target).item()
+            val_objective_to_minimize += criterion(output, target).item()
 
-    val_loss /= len(val_loader.dataset)
-    return val_loss
+    val_objective_to_minimize /= len(val_loader.dataset)
+    return val_objective_to_minimize
 
 
 if __name__ == "__main__":
diff --git a/neps_examples/convenience/logging_additional_info.py b/neps_examples/convenience/logging_additional_info.py
index 5bde13c6c..70b2681c8 100644
--- a/neps_examples/convenience/logging_additional_info.py
+++ b/neps_examples/convenience/logging_additional_info.py
@@ -1,17 +1,21 @@
 import logging
 import time
+from warnings import warn
 
 import numpy as np
 
 import neps
 
-
 def run_pipeline(float1, float2, categorical, integer1, integer2):
+    warn("run_pipeline is deprecated, use evaluate_pipeline instead", DeprecationWarning)
+    return evaluate_pipeline(float1, float2, categorical, integer1, integer2)
+
+def evaluate_pipeline(float1, float2, categorical, integer1, integer2):
     start = time.time()
-    loss = -float(np.sum([float1, float2, int(categorical), integer1, integer2]))
+    objective_to_minimize = -float(np.sum([float1, float2, int(categorical), integer1, integer2]))
     end = time.time()
     return {
-        "loss": loss,
+        "objective_to_minimize": objective_to_minimize,
         "info_dict": {  # Optionally include additional information as an info_dict
             "train_time": end - start,
         },
@@ -19,16 +23,16 @@ def run_pipeline(float1, float2, categorical, integer1, integer2):
 
 
 pipeline_space = dict(
-    float1=neps.FloatParameter(lower=0, upper=1),
-    float2=neps.FloatParameter(lower=-10, upper=10),
-    categorical=neps.CategoricalParameter(choices=[0, 1]),
-    integer1=neps.IntegerParameter(lower=0, upper=1),
-    integer2=neps.IntegerParameter(lower=1, upper=1000, log=True),
+    float1=neps.Float(lower=0, upper=1),
+    float2=neps.Float(lower=-10, upper=10),
+    categorical=neps.Categorical(choices=[0, 1]),
+    integer1=neps.Integer(lower=0, upper=1),
+    integer2=neps.Integer(lower=1, upper=1000, log=True),
 )
 
 logging.basicConfig(level=logging.INFO)
 neps.run(
-    run_pipeline=run_pipeline,
+    evaluate_pipeline=evaluate_pipeline,
     pipeline_space=pipeline_space,
     root_directory="results/logging_additional_info",
     max_evaluations_total=5,
diff --git a/neps_examples/convenience/neps_tblogger_tutorial.py b/neps_examples/convenience/neps_tblogger_tutorial.py
index 724ac4e3e..f3fd83220 100644
--- a/neps_examples/convenience/neps_tblogger_tutorial.py
+++ b/neps_examples/convenience/neps_tblogger_tutorial.py
@@ -51,6 +51,7 @@
 import random
 import time
 from typing import Tuple
+from warnings import warn
 
 import numpy as np
 import torch
@@ -155,7 +156,7 @@ def forward(self, x: torch.Tensor):
 # misclassified images.
 
 
-def loss_ev(model: nn.Module, data_loader: DataLoader) -> float:
+def objective_to_minimize_ev(model: nn.Module, data_loader: DataLoader) -> float:
     # Set the model in evaluation mode (no gradient computation).
     model.eval()
 
@@ -208,22 +209,22 @@ def training(
     for x, y in train_loader:
         optimizer.zero_grad()
         output = model(x)
-        loss = criterion(output, y)
-        loss.backward()
+        objective_to_minimize = criterion(output, y)
+        objective_to_minimize.backward()
         optimizer.step()
 
         predicted_labels = torch.argmax(output, dim=1)
         incorrect_mask = predicted_labels != y
         incorrect_images.append(x[incorrect_mask])
 
-    # Calculate validation loss using the loss_ev function.
-    validation_loss = loss_ev(model, validation_loader)
+    # Calculate validation objective_to_minimize using the objective_to_minimize_ev function.
+    validation_objective_to_minimize = objective_to_minimize_ev(model, validation_loader)
 
     # Return the misclassified image by during model training.
     if len(incorrect_images) > 0:
         incorrect_images = torch.cat(incorrect_images, dim=0)
 
-    return (validation_loss, incorrect_images)
+    return (validation_objective_to_minimize, incorrect_images)
 
 
 #############################################################
@@ -232,9 +233,9 @@ def training(
 
 def pipeline_space() -> dict:
     pipeline = dict(
-        lr=neps.FloatParameter(lower=1e-5, upper=1e-1, log=True),
-        optim=neps.CategoricalParameter(choices=["Adam", "SGD"]),
-        weight_decay=neps.FloatParameter(lower=1e-4, upper=1e-1, log=True),
+        lr=neps.Float(lower=1e-5, upper=1e-1, log=True),
+        optim=neps.Categorical(choices=["Adam", "SGD"]),
+        weight_decay=neps.Float(lower=1e-4, upper=1e-1, log=True),
     )
 
     return pipeline
@@ -243,8 +244,13 @@ def pipeline_space() -> dict:
 #############################################################
 # Implement the pipeline run search.
 
-
 def run_pipeline(lr, optim, weight_decay):
+    # Deprecated function, use evaluate_pipeline instead
+    warn("run_pipeline is deprecated, use evaluate_pipeline instead", DeprecationWarning)
+    return evaluate_pipeline(lr, optim, weight_decay)
+
+
+def evaluate_pipeline(lr, optim, weight_decay):
     # Create the network model.
     model = MLP()
 
@@ -268,7 +274,7 @@ def run_pipeline(lr, optim, weight_decay):
     criterion = nn.CrossEntropyLoss()
 
     for i in range(max_epochs):
-        loss, miss_img = training(
+        objective_to_minimize, miss_img = training(
             optimizer=optimizer,
             model=model,
             criterion=criterion,
@@ -295,10 +301,10 @@ def run_pipeline(lr, optim, weight_decay):
         # 4. First two layer gradients passed as scalar configs.
 
         tblogger.log(
-            loss=loss,
+            objective_to_minimize=objective_to_minimize,
             current_epoch=i,
             write_summary_incumbent=False,  # Set to `True` for a live incumbent trajectory.
-            writer_config_scalar=True,  # Set to `True` for a live loss trajectory for each config.
+            writer_config_scalar=True,  # Set to `True` for a live objective_to_minimize trajectory for each config.
             writer_config_hparam=True,  # Set to `True` for live parallel coordinate, scatter plot matrix, and table view.
             # Appending extra data
             extra_data={
@@ -313,15 +319,15 @@ def run_pipeline(lr, optim, weight_decay):
 
         scheduler.step()
 
-        print(f"  Epoch {i + 1} / {max_epochs} Val Error: {loss} ")
+        print(f"  Epoch {i + 1} / {max_epochs} Val Error: {objective_to_minimize} ")
 
     # Calculate training and test accuracy.
-    train_accuracy = loss_ev(model, train_loader)
-    test_accuracy = loss_ev(model, test_loader)
+    train_accuracy = objective_to_minimize_ev(model, train_loader)
+    test_accuracy = objective_to_minimize_ev(model, test_loader)
 
     # Return a dictionary with relevant metrics and information.
     return {
-        "loss": loss,
+        "objective_to_minimize": objective_to_minimize,
         "info_dict": {
             "train_accuracy": train_accuracy,
             "test_accuracy": test_accuracy,
@@ -351,7 +357,7 @@ def run_pipeline(lr, optim, weight_decay):
     # tblogger.get_status()
 
     run_args = dict(
-        run_pipeline=run_pipeline,
+        evaluate_pipeline=evaluate_pipeline,
         pipeline_space=pipeline_space(),
         root_directory="results/neps_tblogger_example",
         searcher="random_search",
diff --git a/neps_examples/convenience/neps_x_lightning.py b/neps_examples/convenience/neps_x_lightning.py
index de1426e83..8e019957d 100644
--- a/neps_examples/convenience/neps_x_lightning.py
+++ b/neps_examples/convenience/neps_x_lightning.py
@@ -1,363 +1,373 @@
-"""
-Exploring NePS Compatibility with PyTorch Lightning
-=======================================================
-
-1. Introduction:
-----------------
-Welcome to this tutorial on utilizing NePS-generated files and directories
-in conjunction with PyTorch Lightning.
-
-2. Setup:
----------
-Ensure you have the necessary dependencies installed. You can install the 'NePS'
-package by executing the following command:
-
-```bash
-pip install neural-pipeline-search
-```
-
-Additionally, note that 'NePS' does not include 'torchvision' as a dependency.
-You can install it with this command:
-
-```bash
-pip install torchvision==0.14
-```
-
-Make sure to download the torchvision version that is compatible with your
-pytorch version. More info on this link:
-
-https://pypi.org/project/torchvision/
-
-Additionally, you will need to install the PyTorch Lightning package. This can
-be achieved with the following command:
-
-```bash
-pip install lightning
-```
-
-These dependencies ensure you have everything you need for this tutorial.
-"""
-import argparse
-import glob
-import logging
-import random
-import time
-from pathlib import Path
-from typing import Tuple
-
-import lightning as L
-import numpy as np
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-from lightning.pytorch.callbacks import ModelCheckpoint
-from lightning.pytorch.loggers import TensorBoardLogger
-from torch.utils.data import SubsetRandomSampler
-from torch.utils.data.dataloader import DataLoader
-from torchmetrics import Accuracy
-from torchvision.datasets import MNIST
-from torchvision.transforms import transforms
-
-import neps
-from neps.utils.common import get_initial_directory, load_lightning_checkpoint
-
-#############################################################
-# Definig the seeds for reproducibility
-
-
-def set_seed(seed=123):
-    torch.manual_seed(seed)
-    np.random.seed(seed)
-    random.seed(seed)
-
-
-#############################################################
-# Define the lightning model
-
-
-class LitMNIST(L.LightningModule):
-    def __init__(
-        self,
-        configuration: dict,
-        n_train: int = 8192,
-        n_valid: int = 1024,
-    ):
-        super().__init__()
-
-        # Initialize the model's hyperparameters with the configuration
-        self.save_hyperparameters(configuration)
-
-        self.n_train = n_train
-        self.n_valid = n_valid
-
-        # Define data transformation and loss function
-        self.transform = transforms.ToTensor()
-        self.criterion = nn.NLLLoss()
-
-        # Define the model's architecture
-        self.linear1 = nn.Linear(in_features=784, out_features=392)
-        self.linear2 = nn.Linear(in_features=392, out_features=196)
-        self.linear3 = nn.Linear(in_features=196, out_features=10)
-
-        # Define PyTorch Lightning metrics for training, validation, and testing
-        metric = Accuracy(task="multiclass", num_classes=10)
-        self.train_accuracy = metric.clone()
-        self.val_accuracy = metric.clone()
-        self.test_accuracy = metric.clone()
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        # Forward pass function
-        x = x.view(x.size(0), -1)
-        x = F.relu(self.linear1(x))
-        x = F.relu(self.linear2(x))
-        x = self.linear3(x)
-
-        return F.log_softmax(x, dim=1)
-
-    def common_step(
-        self, batch: Tuple[torch.Tensor, torch.Tensor], batch_idx: int
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """
-        Perform a forward pass and compute loss, predictions, and get the ground
-        truth labels for a batch of data.
-        """
-        x, y = batch
-        logits = self.forward(x)
-        loss = self.criterion(logits, y)
-        preds = torch.argmax(logits, dim=1)
-
-        return loss, preds, y
-
-    def training_step(
-        self, batch: Tuple[torch.Tensor, torch.Tensor], batch_idx: int
-    ) -> float:
-        loss, preds, y = self.common_step(batch, batch_idx)
-        self.train_accuracy.update(preds, y)
-
-        self.log_dict(
-            {"train_loss": loss, "train_acc": self.val_accuracy.compute()},
-            on_epoch=True,
-            on_step=False,
-            prog_bar=True,
-        )
-
-        return loss
-
-    def validation_step(
-        self, batch: Tuple[torch.Tensor, torch.Tensor], batch_idx: int
-    ) -> None:
-        loss, preds, y = self.common_step(batch, batch_idx)
-        self.val_accuracy.update(preds, y)
-
-        self.log_dict(
-            {"val_loss": loss, "val_acc": self.val_accuracy.compute()},
-            on_epoch=True,
-            on_step=False,
-            prog_bar=True,
-        )
-
-    def test_step(
-        self, batch: Tuple[torch.Tensor, torch.Tensor], batch_idx: int
-    ) -> None:
-        _, preds, y = self.common_step(batch, batch_idx)
-        self.test_accuracy.update(preds, y)
-
-        self.log(name="test_acc", value=self.test_accuracy.compute())
-
-    def configure_optimizers(self) -> torch.optim.Optimizer:
-        # Configure and return the optimizer based on the configuration
-        if self.hparams.optimizer == "Adam":
-            optimizer = torch.optim.Adam(
-                self.parameters(),
-                lr=self.hparams.lr,
-                weight_decay=self.hparams.weight_decay,
-            )
-        elif self.hparams.optimizer == "SGD":
-            optimizer = torch.optim.SGD(
-                self.parameters(),
-                lr=self.hparams.lr,
-                weight_decay=self.hparams.weight_decay,
-            )
-        else:
-            raise ValueError(
-                "The optimizer choices is not one of the available optimizers"
-            )
-        return optimizer
-
-    def on_train_end(self):
-        # Get the metric at the end of the training and log it with respect to
-        # it's hyperparameters
-        val_acc_metric = {
-            "val_accuracy": self.val_accuracy.compute(),
-        }
-
-        # Log hyperparameters
-        self.logger.log_hyperparams(self.hparams, metrics=val_acc_metric)
-
-    def prepare_data(self) -> None:
-        # Downloading the dataste if not already downloaded
-        MNIST(self.hparams.data_dir, train=True, download=True)
-        MNIST(self.hparams.data_dir, train=False, download=True)
-
-    def setup(self, stage: str = None) -> None:
-        # Assign train/val datasets for use in dataloaders
-        if stage == "fit" or stage is None:
-            self.mnist_full = MNIST(
-                self.hparams.data_dir, train=True, transform=self.transform
-            )
-
-            # Create random subsets of the training dataset for validation.
-            self.train_sampler = SubsetRandomSampler(range(self.n_train))
-            self.val_sampler = SubsetRandomSampler(
-                range(self.n_train, self.n_train + self.n_valid)
-            )
-
-        # Assign test dataset for use in dataloader
-        if stage == "test" or stage is None:
-            self.mnist_test = MNIST(
-                self.hparams.data_dir, train=False, transform=self.transform
-            )
-
-    def train_dataloader(self) -> DataLoader:
-        return DataLoader(
-            self.mnist_full,
-            batch_size=self.hparams.batch_size,
-            sampler=self.train_sampler,
-            num_workers=16,
-        )
-
-    def val_dataloader(self) -> DataLoader:
-        return DataLoader(
-            self.mnist_full,
-            batch_size=self.hparams.batch_size,
-            sampler=self.val_sampler,
-            num_workers=16,
-        )
-
-    def test_dataloader(self) -> DataLoader:
-        return DataLoader(
-            self.mnist_test,
-            batch_size=self.hparams.batch_size,
-            num_workers=16,
-        )
-
-
-#############################################################
-# Define search space
-
-
-def search_space() -> dict:
-    # Define a dictionary to represent the hyperparameter search space
-    space = dict(
-        data_dir=neps.ConstantParameter("./data"),
-        batch_size=neps.ConstantParameter(64),
-        lr=neps.FloatParameter(lower=1e-5, upper=1e-2, log=True, default=1e-3),
-        weight_decay=neps.FloatParameter(
-            lower=1e-5, upper=1e-3, log=True, default=5e-4
-        ),
-        optimizer=neps.CategoricalParameter(choices=["Adam", "SGD"], default="Adam"),
-        epochs=neps.IntegerParameter(lower=1, upper=9, log=False, is_fidelity=True),
-    )
-    return space
-
-
-#############################################################
-# Define the run pipeline function
-
-
-def run_pipeline(pipeline_directory, previous_pipeline_directory, **config) -> dict:
-    # Initialize the first directory to store the event and checkpoints files
-    init_dir = get_initial_directory(pipeline_directory)
-    checkpoint_dir = init_dir / "checkpoints"
-
-    # Initialize the model and checkpoint dir
-    model = LitMNIST(config)
-
-    # Create the TensorBoard logger for logging
-    logger = TensorBoardLogger(
-        save_dir=init_dir, name="data", version="logs", default_hp_metric=False
-    )
-
-    # Add checkpoints at the end of training
-    checkpoint_callback = ModelCheckpoint(
-        dirpath=checkpoint_dir,
-        filename="{epoch}-{val_loss:.2f}",
-    )
-
-    # Use this function to load the previous checkpoint if it exists
-    checkpoint_path, checkpoint = load_lightning_checkpoint(
-        previous_pipeline_directory=previous_pipeline_directory,
-        checkpoint_dir=checkpoint_dir,
-    )
-
-    if checkpoint is None:
-        previously_spent_epochs = 0
-    else:
-        previously_spent_epochs = checkpoint["epoch"]
-
-    # Create a PyTorch Lightning Trainer
-    epochs = config["epochs"]
-
-    trainer = L.Trainer(
-        logger=logger,
-        max_epochs=epochs,
-        callbacks=[checkpoint_callback],
-    )
-
-    # Train the model and retrieve training/validation metrics
-    if checkpoint_path:
-        trainer.fit(model, ckpt_path=checkpoint_path)
-    else:
-        trainer.fit(model)
-
-    train_accuracy = trainer.logged_metrics.get("train_acc", None)
-    val_loss = trainer.logged_metrics.get("val_loss", None)
-    val_accuracy = trainer.logged_metrics.get("val_acc", None)
-
-    # Test the model and retrieve test metrics
-    trainer.test(model)
-
-    test_accuracy = trainer.logged_metrics.get("test_acc", None)
-
-    return {
-        "loss": val_loss,
-        "cost": epochs - previously_spent_epochs,
-        "info_dict": {
-            "train_accuracy": train_accuracy,
-            "val_accuracy": val_accuracy,
-            "test_accuracy": test_accuracy,
-        },
-    }
-
-
-if __name__ == "__main__":
-    # Parse command line arguments
-    parser = argparse.ArgumentParser()
-    parser.add_argument(
-        "--max_evaluations_total",
-        type=int,
-        default=15,
-        help="Number of different configurations to train",
-    )
-    args = parser.parse_args()
-
-    # Initialize the logger and record start time
-    start_time = time.time()
-    set_seed(112)
-    logging.basicConfig(level=logging.INFO)
-
-    # Run NePS with specified parameters
-    neps.run(
-        run_pipeline=run_pipeline,
-        pipeline_space=search_space(),
-        root_directory="results/hyperband",
-        max_evaluations_total=args.max_evaluations_total,
-        searcher="hyperband",
-    )
-
-    # Record the end time and calculate execution time
-    end_time = time.time()
-    execution_time = end_time - start_time
-
-    # Log the execution time
-    logging.info(f"Execution time: {execution_time} seconds")
+"""
+Exploring NePS Compatibility with PyTorch Lightning
+=======================================================
+
+1. Introduction:
+----------------
+Welcome to this tutorial on utilizing NePS-generated files and directories
+in conjunction with PyTorch Lightning.
+
+2. Setup:
+---------
+Ensure you have the necessary dependencies installed. You can install the 'NePS'
+package by executing the following command:
+
+```bash
+pip install neural-pipeline-search
+```
+
+Additionally, note that 'NePS' does not include 'torchvision' as a dependency.
+You can install it with this command:
+
+```bash
+pip install torchvision==0.14
+```
+
+Make sure to download the torchvision version that is compatible with your
+pytorch version. More info on this link:
+
+https://pypi.org/project/torchvision/
+
+Additionally, you will need to install the PyTorch Lightning package. This can
+be achieved with the following command:
+
+```bash
+pip install lightning
+```
+
+These dependencies ensure you have everything you need for this tutorial.
+"""
+import argparse
+import glob
+import logging
+import random
+import time
+from pathlib import Path
+from typing import Tuple
+from warnings import warn
+
+import lightning as L
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from lightning.pytorch.callbacks import ModelCheckpoint
+from lightning.pytorch.loggers import TensorBoardLogger
+from torch.utils.data import SubsetRandomSampler
+from torch.utils.data.dataloader import DataLoader
+from torchmetrics import Accuracy
+from torchvision.datasets import MNIST
+from torchvision.transforms import transforms
+
+import neps
+from neps.utils.common import get_initial_directory, load_lightning_checkpoint
+
+#############################################################
+# Definig the seeds for reproducibility
+
+
+def set_seed(seed=123):
+    torch.manual_seed(seed)
+    np.random.seed(seed)
+    random.seed(seed)
+
+
+#############################################################
+# Define the lightning model
+
+
+class LitMNIST(L.LightningModule):
+    def __init__(
+        self,
+        configuration: dict,
+        n_train: int = 8192,
+        n_valid: int = 1024,
+    ):
+        super().__init__()
+
+        # Initialize the model's hyperparameters with the configuration
+        self.save_hyperparameters(configuration)
+
+        self.n_train = n_train
+        self.n_valid = n_valid
+
+        # Define data transformation and objective_to_minimize function
+        self.transform = transforms.ToTensor()
+        self.criterion = nn.NLLLoss()
+
+        # Define the model's architecture
+        self.linear1 = nn.Linear(in_features=784, out_features=392)
+        self.linear2 = nn.Linear(in_features=392, out_features=196)
+        self.linear3 = nn.Linear(in_features=196, out_features=10)
+
+        # Define PyTorch Lightning metrics for training, validation, and testing
+        metric = Accuracy(task="multiclass", num_classes=10)
+        self.train_accuracy = metric.clone()
+        self.val_accuracy = metric.clone()
+        self.test_accuracy = metric.clone()
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        # Forward pass function
+        x = x.view(x.size(0), -1)
+        x = F.relu(self.linear1(x))
+        x = F.relu(self.linear2(x))
+        x = self.linear3(x)
+
+        return F.log_softmax(x, dim=1)
+
+    def common_step(
+        self, batch: Tuple[torch.Tensor, torch.Tensor], batch_idx: int
+    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
+        """
+        Perform a forward pass and compute objective_to_minimize, predictions, and get the ground
+        truth labels for a batch of data.
+        """
+        x, y = batch
+        logits = self.forward(x)
+        objective_to_minimize = self.criterion(logits, y)
+        preds = torch.argmax(logits, dim=1)
+
+        return objective_to_minimize, preds, y
+
+    def training_step(
+        self, batch: Tuple[torch.Tensor, torch.Tensor], batch_idx: int
+    ) -> float:
+        objective_to_minimize, preds, y = self.common_step(batch, batch_idx)
+        self.train_accuracy.update(preds, y)
+
+        self.log_dict(
+            {"train_objective_to_minimize": objective_to_minimize, "train_acc": self.val_accuracy.compute()},
+            on_epoch=True,
+            on_step=False,
+            prog_bar=True,
+        )
+
+        return objective_to_minimize
+
+    def validation_step(
+        self, batch: Tuple[torch.Tensor, torch.Tensor], batch_idx: int
+    ) -> None:
+        objective_to_minimize, preds, y = self.common_step(batch, batch_idx)
+        self.val_accuracy.update(preds, y)
+
+        self.log_dict(
+            {"val_objective_to_minimize": objective_to_minimize, "val_acc": self.val_accuracy.compute()},
+            on_epoch=True,
+            on_step=False,
+            prog_bar=True,
+        )
+
+    def test_step(
+        self, batch: Tuple[torch.Tensor, torch.Tensor], batch_idx: int
+    ) -> None:
+        _, preds, y = self.common_step(batch, batch_idx)
+        self.test_accuracy.update(preds, y)
+
+        self.log(name="test_acc", value=self.test_accuracy.compute())
+
+    def configure_optimizers(self) -> torch.optim.Optimizer:
+        # Configure and return the optimizer based on the configuration
+        if self.hparams.optimizer == "Adam":
+            optimizer = torch.optim.Adam(
+                self.parameters(),
+                lr=self.hparams.lr,
+                weight_decay=self.hparams.weight_decay,
+            )
+        elif self.hparams.optimizer == "SGD":
+            optimizer = torch.optim.SGD(
+                self.parameters(),
+                lr=self.hparams.lr,
+                weight_decay=self.hparams.weight_decay,
+            )
+        else:
+            raise ValueError(
+                "The optimizer choices is not one of the available optimizers"
+            )
+        return optimizer
+
+    def on_train_end(self):
+        # Get the metric at the end of the training and log it with respect to
+        # it's hyperparameters
+        val_acc_metric = {
+            "val_accuracy": self.val_accuracy.compute(),
+        }
+
+        # Log hyperparameters
+        self.logger.log_hyperparams(self.hparams, metrics=val_acc_metric)
+
+    def prepare_data(self) -> None:
+        # Downloading the dataste if not already downloaded
+        MNIST(self.hparams.data_dir, train=True, download=True)
+        MNIST(self.hparams.data_dir, train=False, download=True)
+
+    def setup(self, stage: str = None) -> None:
+        # Assign train/val datasets for use in dataloaders
+        if stage == "fit" or stage is None:
+            self.mnist_full = MNIST(
+                self.hparams.data_dir, train=True, transform=self.transform
+            )
+
+            # Create random subsets of the training dataset for validation.
+            self.train_sampler = SubsetRandomSampler(range(self.n_train))
+            self.val_sampler = SubsetRandomSampler(
+                range(self.n_train, self.n_train + self.n_valid)
+            )
+
+        # Assign test dataset for use in dataloader
+        if stage == "test" or stage is None:
+            self.mnist_test = MNIST(
+                self.hparams.data_dir, train=False, transform=self.transform
+            )
+
+    def train_dataloader(self) -> DataLoader:
+        return DataLoader(
+            self.mnist_full,
+            batch_size=self.hparams.batch_size,
+            sampler=self.train_sampler,
+            num_workers=16,
+        )
+
+    def val_dataloader(self) -> DataLoader:
+        return DataLoader(
+            self.mnist_full,
+            batch_size=self.hparams.batch_size,
+            sampler=self.val_sampler,
+            num_workers=16,
+        )
+
+    def test_dataloader(self) -> DataLoader:
+        return DataLoader(
+            self.mnist_test,
+            batch_size=self.hparams.batch_size,
+            num_workers=16,
+        )
+
+
+#############################################################
+# Define search space
+
+
+def search_space() -> dict:
+    # Define a dictionary to represent the hyperparameter search space
+    space = dict(
+        data_dir=neps.Constant("./data"),
+        batch_size=neps.Constant(64),
+        lr=neps.Float(lower=1e-5, upper=1e-2, log=True, prior=1e-3),
+        weight_decay=neps.Float(
+            lower=1e-5, upper=1e-3, log=True, prior=5e-4
+        ),
+        optimizer=neps.Categorical(choices=["Adam", "SGD"], prior="Adam"),
+        epochs=neps.Integer(lower=1, upper=9, log=False, is_fidelity=True),
+    )
+    return space
+
+
+#############################################################
+# Define the run pipeline function
+
+def run_pipeline(pipeline_directory, previous_pipeline_directory, **config):
+    # Deprecated function, use evaluate_pipeline instead
+    warn("run_pipeline is deprecated, use evaluate_pipeline instead", DeprecationWarning)
+    return evaluate_pipeline(
+        pipeline_directory,
+        previous_pipeline_directory,
+        **config,
+    )
+
+
+def evaluate_pipeline(pipeline_directory, previous_pipeline_directory, **config) -> dict:
+    # Initialize the first directory to store the event and checkpoints files
+    init_dir = get_initial_directory(pipeline_directory)
+    checkpoint_dir = init_dir / "checkpoints"
+
+    # Initialize the model and checkpoint dir
+    model = LitMNIST(config)
+
+    # Create the TensorBoard logger for logging
+    logger = TensorBoardLogger(
+        save_dir=init_dir, name="data", version="logs", default_hp_metric=False
+    )
+
+    # Add checkpoints at the end of training
+    checkpoint_callback = ModelCheckpoint(
+        dirpath=checkpoint_dir,
+        filename="{epoch}-{val_objective_to_minimize:.2f}",
+    )
+
+    # Use this function to load the previous checkpoint if it exists
+    checkpoint_path, checkpoint = load_lightning_checkpoint(
+        previous_pipeline_directory=previous_pipeline_directory,
+        checkpoint_dir=checkpoint_dir,
+    )
+
+    if checkpoint is None:
+        previously_spent_epochs = 0
+    else:
+        previously_spent_epochs = checkpoint["epoch"]
+
+    # Create a PyTorch Lightning Trainer
+    epochs = config["epochs"]
+
+    trainer = L.Trainer(
+        logger=logger,
+        max_epochs=epochs,
+        callbacks=[checkpoint_callback],
+    )
+
+    # Train the model and retrieve training/validation metrics
+    if checkpoint_path:
+        trainer.fit(model, ckpt_path=checkpoint_path)
+    else:
+        trainer.fit(model)
+
+    train_accuracy = trainer.logged_metrics.get("train_acc", None)
+    val_objective_to_minimize = trainer.logged_metrics.get("val_objective_to_minimize", None)
+    val_accuracy = trainer.logged_metrics.get("val_acc", None)
+
+    # Test the model and retrieve test metrics
+    trainer.test(model)
+
+    test_accuracy = trainer.logged_metrics.get("test_acc", None)
+
+    return {
+        "objective_to_minimize": val_objective_to_minimize,
+        "cost": epochs - previously_spent_epochs,
+        "info_dict": {
+            "train_accuracy": train_accuracy,
+            "val_accuracy": val_accuracy,
+            "test_accuracy": test_accuracy,
+        },
+    }
+
+
+if __name__ == "__main__":
+    # Parse command line arguments
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--max_evaluations_total",
+        type=int,
+        default=15,
+        help="Number of different configurations to train",
+    )
+    args = parser.parse_args()
+
+    # Initialize the logger and record start time
+    start_time = time.time()
+    set_seed(112)
+    logging.basicConfig(level=logging.INFO)
+
+    # Run NePS with specified parameters
+    neps.run(
+        evaluate_pipeline=evaluate_pipeline,
+        pipeline_space=search_space(),
+        root_directory="results/hyperband",
+        max_evaluations_total=args.max_evaluations_total,
+        searcher="hyperband",
+    )
+
+    # Record the end time and calculate execution time
+    end_time = time.time()
+    execution_time = end_time - start_time
+
+    # Log the execution time
+    logging.info(f"Execution time: {execution_time} seconds")
diff --git a/neps_examples/convenience/running_on_slurm_scripts.py b/neps_examples/convenience/running_on_slurm_scripts.py
index 7ccc5374f..7a52c21ba 100644
--- a/neps_examples/convenience/running_on_slurm_scripts.py
+++ b/neps_examples/convenience/running_on_slurm_scripts.py
@@ -5,6 +5,7 @@
 import os
 import time
 from pathlib import Path
+from warnings import warn
 
 import neps
 
@@ -27,9 +28,14 @@ def _get_validation_error(pipeline_directory: Path):
         return float(validation_error_file.read_text())
     return None
 
-
 def run_pipeline_via_slurm(
     pipeline_directory: Path, optimizer: str, learning_rate: float
+):
+    warn("run_pipeline_via_slurm is deprecated, use evaluate_pipeline_via_slurm instead", DeprecationWarning)
+    return evaluate_pipeline_via_slurm(pipeline_directory, optimizer, learning_rate)
+
+def evaluate_pipeline_via_slurm(
+    pipeline_directory: Path, optimizer: str, learning_rate: float
 ):
     script = f"""#!/bin/bash
 #SBATCH --time 0-00:05
@@ -52,13 +58,13 @@ def run_pipeline_via_slurm(
 
 
 pipeline_space = dict(
-    optimizer=neps.CategoricalParameter(choices=["sgd", "adam"]),
-    learning_rate=neps.FloatParameter(lower=10e-7, upper=10e-3, log=True),
+    optimizer=neps.Categorical(choices=["sgd", "adam"]),
+    learning_rate=neps.Float(lower=10e-7, upper=10e-3, log=True),
 )
 
 logging.basicConfig(level=logging.INFO)
 neps.run(
-    run_pipeline=run_pipeline_via_slurm,
+    evaluate_pipeline=evaluate_pipeline_via_slurm,
     pipeline_space=pipeline_space,
     root_directory="results/slurm_script_example",
     max_evaluations_total=5,
diff --git a/neps_examples/convenience/working_directory_per_pipeline.py b/neps_examples/convenience/working_directory_per_pipeline.py
index c79977f16..ce36e9296 100644
--- a/neps_examples/convenience/working_directory_per_pipeline.py
+++ b/neps_examples/convenience/working_directory_per_pipeline.py
@@ -1,5 +1,6 @@
 import logging
 from pathlib import Path
+from warnings import warn
 
 import numpy as np
 
@@ -7,25 +8,29 @@
 
 
 def run_pipeline(pipeline_directory: Path, float1, categorical, integer1):
+    warn("run_pipeline is deprecated, use evaluate_pipeline instead", DeprecationWarning)
+    return evaluate_pipeline(pipeline_directory, float1, categorical, integer1)
+
+def evaluate_pipeline(pipeline_directory: Path, float1, categorical, integer1):
     # When adding pipeline_directory to run_pipeline, neps detects its presence and
     # passes a directory unique for each pipeline configuration. You can then use this
     # pipeline_directory to create / save files pertaining to a specific pipeline, e.g.:
     weight_file = pipeline_directory / "weight_file.txt"
     weight_file.write_text("0")
 
-    loss = -float(np.sum([float1, int(categorical), integer1]))
-    return loss
+    objective_to_minimize = -float(np.sum([float1, int(categorical), integer1]))
+    return objective_to_minimize
 
 
 pipeline_space = dict(
-    float1=neps.FloatParameter(lower=0, upper=1),
-    categorical=neps.CategoricalParameter(choices=[0, 1]),
-    integer1=neps.IntegerParameter(lower=0, upper=1),
+    float1=neps.Float(lower=0, upper=1),
+    categorical=neps.Categorical(choices=[0, 1]),
+    integer1=neps.Integer(lower=0, upper=1),
 )
 
 logging.basicConfig(level=logging.INFO)
 neps.run(
-    run_pipeline=run_pipeline,
+    evaluate_pipeline=evaluate_pipeline,
     pipeline_space=pipeline_space,
     root_directory="results/working_directory_per_pipeline",
     max_evaluations_total=5,
diff --git a/neps_examples/efficiency/expert_priors_for_hyperparameters.py b/neps_examples/efficiency/expert_priors_for_hyperparameters.py
index 11f50c9db..e85802ba0 100644
--- a/neps_examples/efficiency/expert_priors_for_hyperparameters.py
+++ b/neps_examples/efficiency/expert_priors_for_hyperparameters.py
@@ -1,10 +1,14 @@
 import logging
 import time
+from warnings import warn
 
 import neps
 
-
 def run_pipeline(some_float, some_integer, some_cat):
+    warn("run_pipeline is deprecated, use evaluate_pipeline instead", DeprecationWarning)
+    return evaluate_pipeline(some_float, some_integer, some_cat)
+
+def evaluate_pipeline(some_float, some_integer, some_cat):
     start = time.time()
     if some_cat != "a":
         y = some_float + some_integer
@@ -12,7 +16,7 @@ def run_pipeline(some_float, some_integer, some_cat):
         y = -some_float - some_integer
     end = time.time()
     return {
-        "loss": y,
+        "objective_to_minimize": y,
         "info_dict": {
             "test_score": y,
             "train_time": end - start,
@@ -23,20 +27,20 @@ def run_pipeline(some_float, some_integer, some_cat):
 # neps uses the default values and a confidence in this default value to construct a prior
 # that speeds up the search
 pipeline_space = dict(
-    some_float=neps.FloatParameter(
-        lower=1, upper=1000, log=True, default=900, default_confidence="medium"
+    some_float=neps.Float(
+        lower=1, upper=1000, log=True, prior=900, prior_confidence="medium"
     ),
-    some_integer=neps.IntegerParameter(
-        lower=0, upper=50, default=35, default_confidence="low"
+    some_integer=neps.Integer(
+        lower=0, upper=50, prior=35, prior_confidence="low"
     ),
-    some_cat=neps.CategoricalParameter(
-        choices=["a", "b", "c"], default="a", default_confidence="high"
+    some_cat=neps.Categorical(
+        choices=["a", "b", "c"], prior="a", prior_confidence="high"
     ),
 )
 
 logging.basicConfig(level=logging.INFO)
 neps.run(
-    run_pipeline=run_pipeline,
+    evaluate_pipeline=evaluate_pipeline,
     pipeline_space=pipeline_space,
     root_directory="results/user_priors_example",
     max_evaluations_total=15,
diff --git a/neps_examples/efficiency/multi_fidelity.py b/neps_examples/efficiency/multi_fidelity.py
index bdbcc9650..8f0eedc66 100644
--- a/neps_examples/efficiency/multi_fidelity.py
+++ b/neps_examples/efficiency/multi_fidelity.py
@@ -1,4 +1,5 @@
 import logging
+from warnings import warn
 
 import numpy as np
 import torch
@@ -43,6 +44,10 @@ def get_model_and_optimizer(learning_rate):
 
 
 def run_pipeline(pipeline_directory, previous_pipeline_directory, learning_rate, epoch):
+    warn("run_pipeline is deprecated, use evaluate_pipeline instead", DeprecationWarning)
+    return evaluate_pipeline(pipeline_directory, previous_pipeline_directory, learning_rate, epoch)
+
+def evaluate_pipeline(pipeline_directory, previous_pipeline_directory, learning_rate, epoch):
     model, optimizer = get_model_and_optimizer(learning_rate)
     checkpoint_name = "checkpoint.pth"
 
@@ -67,19 +72,19 @@ def run_pipeline(pipeline_directory, previous_pipeline_directory, learning_rate,
         pipeline_directory / checkpoint_name,
     )
 
-    loss = np.log(learning_rate / epoch)  # Replace with actual error
+    objective_to_minimize = np.log(learning_rate / epoch)  # Replace with actual error
     epochs_spent_in_this_call = epoch - epochs_previously_spent  # Optional for stopping
-    return dict(loss=loss, cost=epochs_spent_in_this_call)
+    return dict(objective_to_minimize=objective_to_minimize, cost=epochs_spent_in_this_call)
 
 
 pipeline_space = dict(
-    learning_rate=neps.FloatParameter(lower=1e-4, upper=1e0, log=True),
-    epoch=neps.IntegerParameter(lower=1, upper=10, is_fidelity=True),
+    learning_rate=neps.Float(lower=1e-4, upper=1e0, log=True),
+    epoch=neps.Integer(lower=1, upper=10, is_fidelity=True),
 )
 
 logging.basicConfig(level=logging.INFO)
 neps.run(
-    run_pipeline=run_pipeline,
+    evaluate_pipeline=evaluate_pipeline,
     pipeline_space=pipeline_space,
     root_directory="results/multi_fidelity_example",
     # Optional: Do not start another evaluation after <=50 epochs, corresponds to cost
diff --git a/neps_examples/efficiency/multi_fidelity_and_expert_priors.py b/neps_examples/efficiency/multi_fidelity_and_expert_priors.py
index 032b83dfa..6a7655b0d 100644
--- a/neps_examples/efficiency/multi_fidelity_and_expert_priors.py
+++ b/neps_examples/efficiency/multi_fidelity_and_expert_priors.py
@@ -1,4 +1,5 @@
 import logging
+from warnings import warn
 
 import numpy as np
 
@@ -6,26 +7,30 @@
 
 
 def run_pipeline(float1, float2, integer1, fidelity):
-    loss = -float(np.sum([float1, float2, integer1])) / fidelity
-    return loss
+    warn("run_pipeline is deprecated, use evaluate_pipeline instead", DeprecationWarning)
+    return evaluate_pipeline(float1, float2, integer1, fidelity)
+
+def evaluate_pipeline(float1, float2, integer1, fidelity):
+    objective_to_minimize = -float(np.sum([float1, float2, integer1])) / fidelity
+    return objective_to_minimize
 
 
 pipeline_space = dict(
-    float1=neps.FloatParameter(
-        lower=1, upper=1000, log=False, default=600, default_confidence="medium"
+    float1=neps.Float(
+        lower=1, upper=1000, log=False, prior=600, prior_confidence="medium"
     ),
-    float2=neps.FloatParameter(
-        lower=-10, upper=10, default=0, default_confidence="medium"
+    float2=neps.Float(
+        lower=-10, upper=10, prior=0, prior_confidence="medium"
     ),
-    integer1=neps.IntegerParameter(
-        lower=0, upper=50, default=35, default_confidence="low"
+    integer1=neps.Integer(
+        lower=0, upper=50, prior=35, prior_confidence="low"
     ),
-    fidelity=neps.IntegerParameter(lower=1, upper=10, is_fidelity=True),
+    fidelity=neps.Integer(lower=1, upper=10, is_fidelity=True),
 )
 
 logging.basicConfig(level=logging.INFO)
 neps.run(
-    run_pipeline=run_pipeline,
+    evaluate_pipeline=evaluate_pipeline,
     pipeline_space=pipeline_space,
     root_directory="results/multifidelity_priors",
     max_evaluations_total=25,  # For an alternate stopping method see multi_fidelity.py
diff --git a/neps_examples/experimental/cost_aware.py b/neps_examples/experimental/cost_aware.py
deleted file mode 100644
index e6cd5ebfd..000000000
--- a/neps_examples/experimental/cost_aware.py
+++ /dev/null
@@ -1,41 +0,0 @@
-import logging
-import time
-
-import numpy as np
-
-import neps
-
-
-def run_pipeline(
-    pipeline_directory, float1, float2, categorical, integer1, integer2
-):
-    start = time.time()
-    y = -float(np.sum([float1, float2, int(categorical), integer1, integer2]))
-    end = time.time()
-    return {
-        "loss": y,
-        "cost": (end - start) + float1,
-    }
-
-
-pipeline_space = dict(
-    float1=neps.FloatParameter(lower=0, upper=1, log=False),
-    float2=neps.FloatParameter(
-        lower=0, upper=10, log=False, default=10, default_confidence="medium"
-    ),
-    categorical=neps.CategoricalParameter(choices=[0, 1]),
-    integer1=neps.IntegerParameter(lower=0, upper=1, log=False),
-    integer2=neps.IntegerParameter(lower=0, upper=1, log=False),
-)
-
-logging.basicConfig(level=logging.INFO)
-neps.run(
-    run_pipeline=run_pipeline,
-    pipeline_space=pipeline_space,
-    root_directory="results/cost_aware_example",
-    searcher="cost_cooling",
-    max_evaluations_total=12,  # TODO(Jan): remove
-    initial_design_size=5,
-    budget=100,
-)
-previous_results, pending_configs = neps.status("results/cost_aware_example")
diff --git a/neps_examples/experimental/expert_priors_for_architecture_and_hyperparameters.py b/neps_examples/experimental/expert_priors_for_architecture_and_hyperparameters.py
index fd01f7c14..073f69925 100644
--- a/neps_examples/experimental/expert_priors_for_architecture_and_hyperparameters.py
+++ b/neps_examples/experimental/expert_priors_for_architecture_and_hyperparameters.py
@@ -1,5 +1,6 @@
 import logging
 import time
+from warnings import warn
 
 from torch import nn
 
@@ -64,12 +65,16 @@
 
 
 def set_recursive_attribute(op_name, predecessor_values):
-    in_channels = 64 if predecessor_values is None else predecessor_values["C_out"]
+    in_channels = 64 if predecessor_values is None else predecessor_values["c_out"]
     out_channels = in_channels * 2 if op_name == "ResNetBasicblock" else in_channels
-    return dict(C_in=in_channels, C_out=out_channels)
+    return dict(c_in=in_channels, c_out=out_channels)
 
 
 def run_pipeline(some_architecture, some_float, some_integer, some_cat):
+    warn("run_pipeline is deprecated, use evaluate_pipeline instead", DeprecationWarning)
+    return evaluate_pipeline(some_architecture, some_float, some_integer, some_cat)
+
+def evaluate_pipeline(some_architecture, some_float, some_integer, some_cat):
     start = time.time()
 
     in_channels = 3
@@ -79,7 +84,7 @@ def run_pipeline(some_architecture, some_float, some_integer, some_cat):
 
     model = some_architecture.to_pytorch()
     model = nn.Sequential(
-        ops.Stem(base_channels, C_in=in_channels),
+        ops.Stem(base_channels, c_in=in_channels),
         model,
         nn.AdaptiveAvgPool2d(1),
         nn.Flatten(),
@@ -97,7 +102,7 @@ def run_pipeline(some_architecture, some_float, some_integer, some_cat):
     end = time.time()
 
     return {
-        "loss": y,
+        "objective_to_minimize": y,
         "info_dict": {
             "test_score": y,
             "train_time": end - start,
@@ -106,29 +111,29 @@ def run_pipeline(some_architecture, some_float, some_integer, some_cat):
 
 
 pipeline_space = dict(
-    some_architecture=neps.FunctionParameter(
+    some_architecture=neps.Function(
         set_recursive_attribute=set_recursive_attribute,
         structure=structure,
         primitives=primitives,
         name="pibo",
         prior=prior_distr,
     ),
-    some_float=neps.FloatParameter(
-        lower=1, upper=1000, log=True, default=900, default_confidence="medium"
+    some_float=neps.Float(
+        lower=1, upper=1000, log=True, prior=900, prior_confidence="medium"
     ),
-    some_integer=neps.IntegerParameter(
-        lower=0, upper=50, default=35, default_confidence="low"
+    some_integer=neps.Integer(
+        lower=0, upper=50, prior=35, prior_confidence="low"
     ),
-    some_cat=neps.CategoricalParameter(
-        choices=["a", "b", "c"], default="a", default_confidence="high"
+    some_cat=neps.Categorical(
+        choices=["a", "b", "c"], prior="a", prior_confidence="high"
     ),
 )
 
 logging.basicConfig(level=logging.INFO)
 neps.run(
-    run_pipeline=run_pipeline,
+    evaluate_pipeline=evaluate_pipeline,
     pipeline_space=pipeline_space,
     root_directory="results/user_priors_with_graphs",
     max_evaluations_total=15,
-    log_prior_weighted=True,
+    use_priors=True,
 )
diff --git a/neps_examples/experimental/fault_tolerance.py b/neps_examples/experimental/fault_tolerance.py
deleted file mode 100644
index 9c4a9d2c5..000000000
--- a/neps_examples/experimental/fault_tolerance.py
+++ /dev/null
@@ -1,91 +0,0 @@
-""" To test the fault tolerance, run this script multiple times.
-"""
-
-import logging
-
-import torch
-import torch.nn.functional as F
-from torch import nn, optim
-
-import neps
-
-
-class TheModelClass(nn.Module):
-    """Taken from https://pytorch.org/tutorials/beginner/saving_loading_models.html"""
-
-    def __init__(self):
-        super().__init__()
-        self.conv1 = nn.Conv2d(3, 6, 5)
-        self.pool = nn.MaxPool2d(2, 2)
-        self.conv2 = nn.Conv2d(6, 16, 5)
-        self.fc1 = nn.Linear(16 * 5 * 5, 120)
-        self.fc2 = nn.Linear(120, 84)
-        self.fc3 = nn.Linear(84, 10)
-
-    def forward(self, x):
-        x = self.pool(F.relu(self.conv1(x)))
-        x = self.pool(F.relu(self.conv2(x)))
-        x = x.view(-1, 16 * 5 * 5)
-        x = F.relu(self.fc1(x))
-        x = F.relu(self.fc2(x))
-        x = self.fc3(x)
-        return x
-
-
-def get_model_and_optimizer(learning_rate):
-    """Taken from https://pytorch.org/tutorials/beginner/saving_loading_models.html"""
-    model = TheModelClass()
-    optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9)
-    return model, optimizer
-
-
-def run_pipeline(pipeline_directory, learning_rate):
-    model, optimizer = get_model_and_optimizer(learning_rate)
-    checkpoint_path = pipeline_directory / "checkpoint.pth"
-
-    # Check if there is a previous state of the model training that crashed
-    if checkpoint_path.exists():
-        checkpoint = torch.load(checkpoint_path)
-        model.load_state_dict(checkpoint["model_state_dict"])
-        optimizer.load_state_dict(checkpoint["optimizer_state_dict"])
-        epoch_already_trained = checkpoint["epoch"]
-        print(f"Read in model trained for {epoch_already_trained} epochs")
-    else:
-        epoch_already_trained = 0
-
-    for epoch in range(epoch_already_trained, 101):
-        epoch += 1
-
-        # Train model here ....
-
-        # Repeatedly save your progress
-        if epoch % 10 == 0:
-            torch.save(
-                {
-                    "epoch": epoch,
-                    "model_state_dict": model.state_dict(),
-                    "optimizer_state_dict": optimizer.state_dict(),
-                },
-                checkpoint_path,
-            )
-
-        # Here we simulate a crash! E.g., due to job runtime limits
-        if epoch == 50 and learning_rate < 0.2:
-            print("Oh no! A simulated crash!")
-            exit()
-
-    return learning_rate  # Replace with actual error
-
-
-pipeline_space = dict(
-    learning_rate=neps.FloatParameter(lower=0, upper=1),
-)
-
-logging.basicConfig(level=logging.INFO)
-neps.run(
-    run_pipeline=run_pipeline,
-    pipeline_space=pipeline_space,
-    root_directory="results/fault_tolerance_example",
-    max_evaluations_total=15,
-)
-previous_results, pending_configs = neps.status("results/fault_tolerance_example")
diff --git a/neps_examples/experimental/freeze_thaw.py b/neps_examples/experimental/freeze_thaw.py
new file mode 100644
index 000000000..9c63d109e
--- /dev/null
+++ b/neps_examples/experimental/freeze_thaw.py
@@ -0,0 +1,180 @@
+import logging
+from pathlib import Path
+import torch
+import torch.nn as nn
+import torch.optim as optim
+from torch.utils.data import DataLoader
+from torchvision import datasets, transforms
+
+import neps
+from neps import tblogger
+from neps.plot.plot3D import Plotter3D
+
+
+class SimpleNN(nn.Module):
+    def __init__(self, input_size, num_layers, num_neurons):
+        super().__init__()
+        layers = [nn.Flatten()]
+
+        for _ in range(num_layers):
+            layers.append(nn.Linear(input_size, num_neurons))
+            layers.append(nn.ReLU())
+            input_size = num_neurons  # Set input size for the next layer
+
+        layers.append(nn.Linear(num_neurons, 10))  # Output layer for 10 classes
+        self.model = nn.Sequential(*layers)
+
+    def forward(self, x):
+        return self.model(x)
+
+
+def training_pipeline(
+    pipeline_directory,
+    previous_pipeline_directory,
+    num_layers,
+    num_neurons,
+    epochs,
+    learning_rate,
+    weight_decay
+):
+    """
+    Trains and validates a simple neural network on the MNIST dataset.
+
+    Args:
+        num_layers (int): Number of hidden layers in the network.
+        num_neurons (int): Number of neurons in each hidden layer.
+        epochs (int): Number of training epochs.
+        learning_rate (float): Learning rate for the optimizer.
+        optimizer (str): Name of the optimizer to use ('adam' or 'sgd').
+
+    Returns:
+        float: The average objective_to_minimize over the validation set after training.
+
+    Raises:
+        KeyError: If the specified optimizer is not supported.
+    """
+    # Transformations applied on each image
+    transform = transforms.Compose(
+        [
+            transforms.ToTensor(),
+            transforms.Normalize(
+                (0.1307,), (0.3081,)
+            ),  # Mean and Std Deviation for MNIST
+        ]
+    )
+
+    # Loading MNIST dataset
+    dataset = datasets.MNIST(
+        root="./.data", train=True, download=True, transform=transform
+    )
+    train_set, val_set = torch.utils.data.random_split(dataset, [50000, 10000])
+    train_loader = DataLoader(train_set, batch_size=64, shuffle=True)
+    val_loader = DataLoader(val_set, batch_size=1000, shuffle=False)
+
+    model = SimpleNN(28 * 28, num_layers, num_neurons)
+    criterion = nn.CrossEntropyLoss()
+
+    # Select optimizer
+    optimizer = optim.AdamW(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
+
+    # Loading potential checkpoint
+    start_epoch = 1
+    if previous_pipeline_directory is not None:
+       if (Path(previous_pipeline_directory) / "checkpoint.pt").exists():
+          states = torch.load(Path(previous_pipeline_directory) / "checkpoint.pt")
+          model = states["model"]
+          optimizer = states["optimizer"]
+          start_epoch = states["epochs"]
+
+    # Training loop
+    for epoch in range(start_epoch, epochs + 1):
+        model.train()
+        for batch_idx, (data, target) in enumerate(train_loader):
+            optimizer.zero_grad()
+            output = model(data)
+            objective_to_minimize = criterion(output, target)
+            objective_to_minimize.backward()
+            optimizer.step()
+
+    # Validation loop
+    model.eval()
+    val_objective_to_minimize = 0
+    val_correct = 0
+    val_total = 0
+    with torch.no_grad():
+        for data, target in val_loader:
+            output = model(data)
+            val_objective_to_minimize += criterion(output, target).item()
+
+            # Get the predicted class
+            _, predicted = torch.max(output.data, 1)
+
+            # Count correct predictions
+            val_total += target.size(0)
+            val_correct += (predicted == target).sum().item()
+
+    val_objective_to_minimize /= len(val_loader.dataset)
+    val_err = 1 - val_correct / val_total
+
+    # Saving checkpoint
+    states = {
+       "model": model,
+       "optimizer": optimizer,
+       "epochs": epochs,
+    }
+    torch.save(states, Path(pipeline_directory) / "checkpoint.pt")
+
+    # Logging
+    tblogger.log(
+        objective_to_minimize=val_objective_to_minimize,
+        current_epoch=epochs,
+        # Set to `True` for a live incumbent trajectory.
+        write_summary_incumbent=True,
+        # Set to `True` for a live objective_to_minimize trajectory for each config.
+        writer_config_scalar=True,
+        # Set to `True` for live parallel coordinate, scatter plot matrix, and table view.
+        writer_config_hparam=True,
+        # Appending extra data
+        extra_data={
+            "train_objective_to_minimize": tblogger.scalar_logging(objective_to_minimize.item()),
+            "val_err": tblogger.scalar_logging(val_err),
+        },
+    )
+
+    return val_err
+
+
+if __name__ == "__main__":
+    logging.basicConfig(level=logging.INFO)
+
+    pipeline_space = {
+        "learning_rate": neps.Float(1e-5, 1e-1, log=True),
+        "num_layers": neps.Integer(1, 5),
+        "num_neurons": neps.Integer(64, 128),
+        "weight_decay": neps.Float(1e-5, 0.1, log=True),
+        "epochs": neps.Integer(1, 10, is_fidelity=True),
+    }
+
+    neps.run(
+        pipeline_space=pipeline_space,
+        evaluate_pipeline=training_pipeline,
+        searcher="ifbo",
+        max_evaluations_total=50,
+        root_directory="./debug/ifbo-mnist/",
+        overwrite_working_directory=False,  # set to False for a multi-worker run
+        # (optional) ifbo hyperparameters
+        step_size=1,
+        # (optional) ifbo surrogate model hyperparameters (for FT-PFN)
+        surrogate_model_args=dict(
+            version="0.0.1",
+            target_path=None,
+        ),
+    )
+
+    # NOTE: this is `experimental` and may not work as expected
+    ## plotting a 3D plot for learning curves explored by ifbo
+    plotter = Plotter3D(
+        run_path="./debug/ifbo-mnist/",  # same as `root_directory` above
+        fidelity_key="epochs",  # same as `pipeline_space`
+    )
+    plotter.plot3D(filename="ifbo")
diff --git a/neps_examples/experimental/hierarchical_architecture.py b/neps_examples/experimental/hierarchical_architecture.py
index db101be40..440b116aa 100644
--- a/neps_examples/experimental/hierarchical_architecture.py
+++ b/neps_examples/experimental/hierarchical_architecture.py
@@ -1,4 +1,7 @@
-from __future__ import annotations
+raise NotImplementedError(
+    "Support for graphs was temporarily removed, if you'd like to use a version"
+    " of NePS that supports graphs, please use version v0.12.2"
+)
 
 import logging
 
@@ -56,12 +59,12 @@
 
 
 def set_recursive_attribute(op_name, predecessor_values):
-    in_channels = 64 if predecessor_values is None else predecessor_values["C_out"]
+    in_channels = 64 if predecessor_values is None else predecessor_values["c_out"]
     out_channels = in_channels * 2 if op_name == "ResNetBasicblock" else in_channels
-    return dict(C_in=in_channels, C_out=out_channels)
+    return dict(c_in=in_channels, c_out=out_channels)
 
 
-def run_pipeline(architecture):
+def run_pipeline(architecture: neps.Function):
     in_channels = 3
     n_classes = 20
     base_channels = 64
@@ -69,7 +72,7 @@ def run_pipeline(architecture):
 
     model = architecture.to_pytorch()
     model = nn.Sequential(
-        ops.Stem(base_channels, C_in=in_channels),
+        ops.Stem(base_channels, c_in=in_channels),
         model,
         nn.AdaptiveAvgPool2d(1),
         nn.Flatten(),
@@ -83,7 +86,7 @@ def run_pipeline(architecture):
 
 
 pipeline_space = dict(
-    architecture=neps.FunctionParameter(
+    architecture=neps.Function(
         set_recursive_attribute=set_recursive_attribute,
         structure=structure,
         primitives=primitives,
diff --git a/neps_examples/experimental/hierarchical_architecture_hierarchical_GP.py b/neps_examples/experimental/hierarchical_architecture_hierarchical_GP.py
deleted file mode 100644
index 3db93bde6..000000000
--- a/neps_examples/experimental/hierarchical_architecture_hierarchical_GP.py
+++ /dev/null
@@ -1,145 +0,0 @@
-from __future__ import annotations
-
-import logging
-import time
-
-from torch import nn
-
-import neps
-from neps.optimizers.bayesian_optimization.kernels import GraphKernelMapping
-from neps.optimizers.bayesian_optimization.models.gp_hierarchy import (
-    ComprehensiveGPHierarchy,
-)
-from neps.search_spaces.architecture import primitives as ops
-from neps.search_spaces.architecture import topologies as topos
-
-primitives = {
-    "id": ops.Identity(),
-    "conv3x3": {"op": ops.ReLUConvBN, "kernel_size": 3, "stride": 1, "padding": 1},
-    "conv1x1": {"op": ops.ReLUConvBN, "kernel_size": 1},
-    "avg_pool": {"op": ops.AvgPool1x1, "kernel_size": 3, "stride": 1},
-    "downsample": {"op": ops.ResNetBasicblock, "stride": 2},
-    "residual": topos.Residual,
-    "diamond": topos.Diamond,
-    "linear": topos.get_sequential_n_edge(2),
-    "diamond_mid": topos.DiamondMid,
-}
-
-structure = {
-    "S": [
-        "diamond D2 D2 D1 D1",
-        "diamond D1 D2 D2 D1",
-        "diamond D1 D1 D2 D2",
-        "linear D2 D1",
-        "linear D1 D2",
-        "diamond_mid D1 D2 D1 D2 D1",
-        "diamond_mid D2 D2 Cell D1 D1",
-    ],
-    "D2": [
-        "diamond D1 D1 D1 D1",
-        "linear D1 D1",
-        "diamond_mid D1 D1 Cell D1 D1",
-    ],
-    "D1": [
-        "diamond D1Helper D1Helper Cell Cell",
-        "diamond Cell Cell D1Helper D1Helper",
-        "diamond D1Helper Cell Cell D1Helper",
-        "linear D1Helper Cell",
-        "linear Cell D1Helper",
-        "diamond_mid D1Helper D1Helper Cell Cell Cell",
-        "diamond_mid Cell D1Helper D1Helper D1Helper Cell",
-    ],
-    "D1Helper": ["linear Cell downsample"],
-    "Cell": [
-        "residual OPS OPS OPS",
-        "diamond OPS OPS OPS OPS",
-        "linear OPS OPS",
-        "diamond_mid OPS OPS OPS OPS OPS",
-    ],
-    "OPS": ["conv3x3", "conv1x1", "avg_pool", "id"],
-}
-
-
-def set_recursive_attribute(op_name, predecessor_values):
-    in_channels = 64 if predecessor_values is None else predecessor_values["C_out"]
-    out_channels = in_channels * 2 if op_name == "ResNetBasicblock" else in_channels
-    return dict(C_in=in_channels, C_out=out_channels)
-
-
-def run_pipeline(architecture):
-    start = time.time()
-
-    in_channels = 3
-    n_classes = 20
-    base_channels = 64
-    out_channels = 512
-
-    model = architecture.to_pytorch()
-    model = nn.Sequential(
-        ops.Stem(base_channels, C_in=in_channels),
-        model,
-        nn.AdaptiveAvgPool2d(1),
-        nn.Flatten(),
-        nn.Linear(out_channels, n_classes),
-    )
-
-    number_of_params = sum(p.numel() for p in model.parameters())
-    y = abs(1.5e7 - number_of_params) / 1.5e7
-
-    end = time.time()
-
-    return {
-        "loss": y,
-        "info_dict": {
-            "test_score": y,
-            "train_time": end - start,
-        },
-    }
-
-
-pipeline_space = dict(
-    architecture=neps.FunctionParameter(
-        set_recursive_attribute=set_recursive_attribute,
-        structure=structure,
-        primitives=primitives,
-        name="makrograph",
-        return_graph_per_hierarchy=True,
-    )
-)
-
-early_hierarchies_considered = "0_1_2_3"
-hierarchy_considered = [int(hl) for hl in early_hierarchies_considered.split("_")]
-graph_kernels = ["wl"] * (len(hierarchy_considered) + 1)
-wl_h = [2, 1] + [2] * (len(hierarchy_considered) - 1)
-graph_kernels = [
-    GraphKernelMapping[kernel](
-        h=wl_h[j],
-        oa=False,
-        se_kernel=None,
-    )
-    for j, kernel in enumerate(graph_kernels)
-]
-surrogate_model = ComprehensiveGPHierarchy
-surrogate_model_args = {
-    "graph_kernels": graph_kernels,
-    "hp_kernels": [],
-    "verbose": False,
-    "hierarchy_consider": hierarchy_considered,
-    "d_graph_features": 0,
-    "vectorial_features": None,
-}
-
-logging.basicConfig(level=logging.INFO)
-neps.run(
-    run_pipeline=run_pipeline,
-    pipeline_space=pipeline_space,
-    root_directory="results/hierarchical_architecture_example_new",
-    max_evaluations_total=15,
-    searcher="bayesian_optimization",
-    surrogate_model=surrogate_model,
-    surrogate_model_args=surrogate_model_args,
-)
-
-previous_results, pending_configs = neps.status(
-    "results/hierarchical_architecture_example_new"
-)
diff --git a/neps_examples/experimental/user_priors_from_arbitrary_densities.py b/neps_examples/experimental/user_priors_from_arbitrary_densities.py
deleted file mode 100644
index 4c734cd2b..000000000
--- a/neps_examples/experimental/user_priors_from_arbitrary_densities.py
+++ /dev/null
@@ -1,151 +0,0 @@
-import neps
-
-def run_pipeline(some_float, some_integer, some_cat):
-    if some_cat != "a":
-        y = some_float + some_integer
-    else:
-        y = -some_float - some_integer
-    return y
-
-# ========================================================================================
-# Current API
-# User prior is given as a default value and a confidence level specified in the parameter itself
-pipeline_space = dict(
-    some_float=neps.FloatParameter(
-        lower=1, upper=1000, log=True, default=900, default_confidence="medium"
-    ),
-    some_integer=neps.IntegerParameter(
-        lower=0, upper=50, default=35, default_confidence="low"
-    ),
-    some_cat=neps.CategoricalParameter(
-        choices=["a", "b", "c"], default="a", default_confidence="high"
-    )
-)
-neps.run(
-    run_pipeline=run_pipeline,
-    pipeline_space=pipeline_space,
-    root_directory="results",
-    max_evaluations_total=15,
-)
-
-# ========================================================================================
-# New API, variant 01
-# User prior is passed to neps.run and not specified in the pipeline_space
-# The prior is given as one of the following:
-# 1) A (non-factorized) density function that returns the likelihood of a given parameter configuration
-# 2) A dicttionary of marginal densities for each parameter. Then the factorized density is used.
-# 3) A dictionary of default values and confidence levels for each parameter. Then a gaussian prior is used.
-
-pipeline_space = dict(
-    some_float=neps.FloatParameter(lower=1, upper=1000, log=True),
-    some_integer=neps.IntegerParameter(lower=0, upper=50),
-    some_cat=neps.CategoricalParameter(choices=["a", "b", "c"])
-)
-
-# 1) A (non-factorized) density function that returns the likelihood of a given parameter configuration
-def prior_01(some_float, some_integer, some_cat):
-    # some exponential distribution
-    if some_cat != "a":
-        return np.exp(-(some_float + some_integer - 1))
-    else:
-        return np.exp(-(-some_float - some_integer + 1050))
-
-# 2) A dictionary of marginal densities for each parameter. Then the factorized density is used.
-prior_02 = dict(
-    some_float=lambda x: 1/400 if 800 < x < 1000 else 1/1600, # prior on interval [800, 1000]
-    some_integer=lambda k: 30**k/np.math.factorial(k) * np.exp(-k), # poisson prior on integers k=30
-    some_cat=lambda x: 1/2*(x=="b") + 1/3*(x=="c") + 1/6*(x=="a")
-)
-
-# 3) A dictionary of default values and confidence levels for each parameter. Then a gaussian prior is used.
-prior_03 = dict(
-    some_float=dict(default=900, default_confidence="medium"),
-    some_integer=dict(default=35, default_confidence="low"),
-    some_cat=dict(default="a", default_confidence="high")
-)
-
-# Combination of 2) and 3)
-prior_04 = dict(
-    some_float=dict(default=900, default_confidence="medium"),
-    some_integer=lambda k: 30**k/np.math.factorial(k) * np.exp(-k), # poisson prior on integers k=30
-    some_cat=dict(default="a", default_confidence="high")
-)
-
-# Pass the prior to neps.run
-
-neps.run(
-    prior=prior_01, # or prior_02 or prior_03 or prior_04
-    run_pipeline=run_pipeline,
-    pipeline_space=pipeline_space,
-    root_directory="results",
-    max_evaluations_total=15,
-)
-
-# ========================================================================================
-# New API, variant 02
-# User prior is specfied in the pipeline_space and not directly passed to neps.run
-# Same possibiities for priors as in variant 01
-
-# 1) A (non-factorized) density function that returns the likelihood of a given parameter configuration
-def prior_01(some_float, some_integer, some_cat):
-    # some exponential distribution
-    if some_cat != "a":
-        return np.exp(-(some_float + some_integer - 1))
-    else:
-        return np.exp(-(-some_float - some_integer + 1050))
-
-pipeline_space_01 = dict(
-    some_float=neps.FloatParameter(lower=1, upper=1000, log=True),
-    some_integer=neps.IntegerParameter(lower=0, upper=50),
-    some_cat=neps.CategoricalParameter(choices=["a", "b", "c"]),
-    _prior=prior_01
-)
-
-# 2) A dictionary of marginal densities for each parameter. Then the factorized density is used.
-pipeline_space_02 = dict(
-    some_float=neps.FloatParameter(
-        lower=1, upper=1000, log=True,
-        prior_fun=lambda x: 1/400 if 800 < x < 1000 else 1/1600
-    ),
-    some_integer=neps.IntegerParameter(lower=0, upper=50,
-        prior_fun=lambda k: 30**k/np.math.factorial(k) * np.exp(-k)
-),
-    some_cat=neps.CategoricalParameter(choices=["a", "b", "c"],
-        prior_fun=lambda x: 1/2*(x=="b") + 1/3*(x=="c") + 1/6*(x=="a")
-    )
-)
-
-# 3) A dictionary of default values and confidence levels for each parameter. Then a gaussian prior is used.
-# Same as in the current API
-pipeline_space_03 = dict(
-    some_float=neps.FloatParameter(
-        lower=1, upper=1000, log=True, default=900, default_confidence="medium"
-    ),
-    some_integer=neps.IntegerParameter(
-        lower=0, upper=50, default=35, default_confidence="low"
-    ),
-    some_cat=neps.CategoricalParameter(
-        choices=["a", "b", "c"], default="a", default_confidence="high"
-    )
-)
-
-# Combination of 2) and 3)
-pipeline_space_04 = dict(
-    some_float=neps.FloatParameter(
-        lower=1, upper=1000, log=True, default=900, default_confidence="medium",
-    ),
-    some_integer=neps.IntegerParameter(
-        lower=0, upper=50,
-        prior_fun=lambda k: 30**k/np.math.factorial(k) * np.exp(-k)
-    ),
-    some_cat=neps.CategoricalParameter(
-        choices=["a", "b", "c"], default="a", default_confidence="high")
-)
-
-# Pass the pipeline_space to neps.run
-neps.run(
-    run_pipeline=run_pipeline,
-    pipeline_space=pipeline_space_01, # or pipeline_space_02 or pipeline_space_03 or pipeline_space_04
-    root_directory="results",
-    max_evaluations_total=15,
-)
diff --git a/neps_examples/template/basic_template.py b/neps_examples/template/basic_template.py
deleted file mode 100644
index 3717150ae..000000000
--- a/neps_examples/template/basic_template.py
+++ /dev/null
@@ -1,81 +0,0 @@
-"""
-NOTE!!! This code is not meant to be executed.
-It is only to serve as a template to help interface NePS with an existing ML/DL pipeline.
-
-The following script is designed as a template for using NePS.
-It describes the crucial components that a user needs to provide in order to interface
-a NePS optimizer.
-
-The 2 crucial components are:
-* The search space, called the `pipeline_space` in NePS
-  * This defines the set of hyperparameters that the optimizer will search over
-  * This declaration also allows injecting priors in the form of defaults per hyperparameter
-* The `run_pipeline` function
-  * This function is called by the optimizer and is responsible for running the pipeline
-  * The function should at the minimum expect the hyperparameters as keyword arguments
-  * The function should return the loss of the pipeline as a float
-    * If the return value is a dictionary, it should have a key called "loss" with the loss as a float
-
-
-Overall, running an optimizer from NePS involves 4 clear steps:
-1. Importing neccessary packages including neps.
-2. Designing the search space as a dictionary.
-3. Creating the run_pipeline and returning the loss and other wanted metrics.
-4. Using neps run with the optimizer of choice.
-"""
-
-import logging
-
-import neps
-
-
-logger = logging.getLogger("neps_template.run")
-
-
-def pipeline_space() -> dict:
-    # Create the search space based on NEPS parameters and return the dictionary.
-    # Example:
-    space = dict(
-        lr=neps.FloatParameter(
-            lower=1e-5,
-            upper=1e-2,
-            log=True,      # If True, the search space is sampled in log space
-            default=1e-3,  # a non-None value here acts as the mode of the prior distribution
-        ),
-    )
-    return space
-
-
-def run_pipeline(**config) -> dict | float:
-    # Run pipeline should include the following steps:
-
-    # 1. Defining the model.
-    # 1.1 Load any checkpoint if necessary
-    # 2. Each optimization variable should get its values from the pipeline space.
-    #   Example:
-    #   learning_rate = config["lr"]
-    # 3. The training loop
-    # 3.1 Save any checkpoint if necessary
-    # 4. Returning the loss, which can be either as a single float or as part of
-    #   an info dictionary containing other metrics.
-
-    # Can use global logger to log any information
-    logger.info(f"Running pipeline with config: {config}")
-
-    return dict or float
-
-
-if __name__ == "__main__":
-    # 1. Creating the logger
-
-
-    # 2. Passing the correct arguments to the neps.run function
-    # For more information on the searcher, please take a look at this link:
-    # https://github.com/automl/neps/tree/master/neps/optimizers/README.md
-
-    neps.run(
-        run_pipeline=run_pipeline,        # User TODO (defined above)
-        pipeline_space=pipeline_space(),  # User TODO (defined above)
-        root_directory="results",
-        max_evaluations_total=10,
-    )
diff --git a/neps_examples/template/lightning_template.py b/neps_examples/template/lightning_template.py
deleted file mode 100644
index 9c674fc4a..000000000
--- a/neps_examples/template/lightning_template.py
+++ /dev/null
@@ -1,175 +0,0 @@
-""" Boilerplate code to optimize a simple PyTorch Lightning model.
-
-NOTE!!! This code is not meant to be executed.
-It is only to serve as a template to help interface NePS with an existing ML/DL pipeline.
-
-
-The following script describes the crucial components that a user needs to provide
-in order to interface with Lightning.
-
-The 3 crucial components are:
-* The search space, called the `pipeline_space` in NePS
-  * This defines the set of hyperparameters that the optimizer will search over
-  * This declaration also allows injecting priors in the form of defaults per hyperparameter
-* The `lightning module`
-  * This defines the training, validation, and testing of the model
-  * This distributes the hyperparameters
-  * This can be used to create the Dataloaders for training, validation, and testing
-* The `run_pipeline` function
-  * This function is called by the optimizer and is responsible for running the pipeline
-  * The function should at the minimum expect the hyperparameters as keyword arguments
-  * The function should return the loss of the pipeline as a float
-    * If the return value is a dictionary, it should have a key called "loss" with the loss as a float
-
-Overall, running an optimizer from NePS with Lightning involves 5 clear steps:
-1. Importing neccessary packages including NePS and Lightning.
-2. Designing the search space as a dictionary.
-3. Creating the LightningModule with the required parameters
-4. Creating the run_pipeline and returning the loss and other wanted metrics.
-5. Using neps run with the optimizer of choice.
-
-For a more detailed guide, please refer to:
-https://github.com/automl/neps/blob/master/neps_examples/convenience/neps_x_lightning.py
-"""
-import logging
-
-import lightning as L
-import torch
-from lightning.pytorch.callbacks import ModelCheckpoint
-from lightning.pytorch.loggers import TensorBoardLogger
-
-import neps
-from neps.utils.common import get_initial_directory, load_lightning_checkpoint
-
-logger = logging.getLogger("neps_template.run")
-
-
-def pipeline_space() -> dict:
-    # Create the search space based on NEPS parameters and return the dictionary.
-    # IMPORTANT:
-    space = dict(
-        lr=neps.FloatParameter(
-            lower=1e-5,
-            upper=1e-2,
-            log=True,  # If True, the search space is sampled in log space
-            default=1e-3,  # a non-None value here acts as the mode of the prior distribution
-        ),
-        optimizer=neps.CategoricalParameter(choices=["Adam", "SGD"], default="Adam"),
-        epochs=neps.IntegerParameter(
-            lower=1,
-            upper=9,
-            is_fidelity=True,  # IMPORTANT to set this to True for the fidelity parameter
-        ),
-    )
-    return space
-
-
-class LitModel(L.LightningModule):
-    def __init__(self, configuration: dict):
-        super().__init__()
-
-        self.save_hyperparameters(configuration)
-
-        # You can now define your criterion, data transforms, model layers, and
-        # metrics obtained during training
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        # Forward pass function
-        pass
-
-    def training_step(self, batch: torch.Tensor, batch_idx: int) -> torch.Tensor:
-        # Training step function
-        # Training metric of choice
-        pass
-
-    def validation_step(self, batch: torch.Tensor, batch_idx: int) -> torch.Tensor:
-        # Validation step function
-        # Validation metric of choice
-        pass
-
-    def test_step(self, batch: torch.Tensor, batch_idx: int) -> torch.Tensor:
-        # Test step function
-        # Test metric of choice
-        pass
-
-    def configure_optimizers(self) -> torch.optim.Optimizer:
-        # Define the optimizer base on the configuration
-        if self.hparams.optimizer == "Adam":
-            optimizer = torch.optim.Adam(self.parameters(), lr=self.hparams.lr)
-        elif self.hparams.optimizer == "SGD":
-            optimizer = torch.optim.SGD(self.parameters(), lr=self.hparams.lr)
-        else:
-            raise ValueError(f"{self.hparams.optimizer} is not a valid optimizer")
-        return optimizer
-
-    # Here one can now configure the dataloaders for the model
-    # Further details can be found here:
-    # https://lightning.ai/docs/pytorch/stable/data/datamodule.html
-    # https://github.com/automl/neps/blob/master/neps_examples/convenience/neps_x_lightning.py
-
-
-def run_pipeline(
-    pipeline_directory,  # The directory where the config is saved
-    previous_pipeline_directory,  # The directory of the config's immediate lower fidelity
-    **config,  # The hyperparameters to be used in the pipeline
-) -> dict | float:
-    # Start by getting the initial directory which will be used to store tensorboard
-    # event files and checkpoint files
-    init_dir = get_initial_directory(pipeline_directory)
-    checkpoint_dir = init_dir / "checkpoints"
-    tensorboard_dir = init_dir / "tensorboard"
-
-    # Create the model
-    model = LitModel(config)
-
-    # Create the TensorBoard logger and the checkpoint callback
-    logger = TensorBoardLogger(
-        save_dir=tensorboard_dir, name="data", version="logs", default_hp_metric=False
-    )
-    checkpoint_callback = ModelCheckpoint(dirpath=checkpoint_dir)
-
-    # Checking for any checkpoint files and checkpoint data, returns None if
-    # no checkpoint files exist.
-    checkpoint_path, checkpoint_data = load_lightning_checkpoint(
-        previous_pipeline_directory=previous_pipeline_directory,
-        checkpoint_dir=checkpoint_dir,
-    )
-
-    # Create a PyTorch Lightning Trainer
-    epochs = config["epochs"]
-
-    trainer = L.Trainer(
-        logger=logger,
-        max_epochs=epochs,
-        callbacks=[checkpoint_callback],
-    )
-
-    # Train, test, and get their corresponding metrics
-    if checkpoint_path:
-        trainer.fit(model, ckpt_path=checkpoint_path)
-    else:
-        trainer.fit(model)
-    val_loss = trainer.logged_metrics.get("val_loss", None)
-
-    trainer.test(model)
-    test_loss = trainer.logged_metrics.get("test_loss", None)
-
-    # Return a dictionary with the results, or a single float value (loss)
-    return {
-        "loss": val_loss,
-        "info_dict": {
-            "test_loss": test_loss,
-        },
-    }
-
-
-# end of run_pipeline
-
-if __name__ == "__main__":
-    neps.run(
-        run_pipeline=run_pipeline,  # User TODO (defined above)
-        pipeline_space=pipeline_space(),  # User TODO (defined above)
-        root_directory="results",
-        max_evaluations_total=25,  # total number of times `run_pipeline` is called
-        searcher="priorband",  # "priorband_bo" for longer budgets, and set `initial_design_size``
-    )
diff --git a/neps_examples/template/priorband_template.py b/neps_examples/template/priorband_template.py
deleted file mode 100644
index a8bd8f3ca..000000000
--- a/neps_examples/template/priorband_template.py
+++ /dev/null
@@ -1,155 +0,0 @@
-""" Boilerplate code to optimize a simple PyTorch model using PriorBand.
-
-NOTE!!! This code is not meant to be executed.
-It is only to serve as a template to help interface NePS with an existing ML/DL pipeline.
-
-
-The following script is designed as a template for using `PriorBand` from NePS.
-It describes the crucial components that a user needs to provide in order to interface PriorBand.
-
-The 2 crucial components are:
-* The search space, called the `pipeline_space` in NePS
-  * This defines the set of hyperparameters that the optimizer will search over
-  * This declaration also allows injecting priors in the form of defaults per hyperparameter
-* The `run_pipeline` function
-  * This function is called by the optimizer and is responsible for running the pipeline
-  * The function should at the minimum expect the hyperparameters as keyword arguments
-  * The function should return the loss of the pipeline as a float
-    * If the return value is a dictionary, it should have a key called "loss" with the loss as a float
-
-
-Overall, running an optimizer from NePS involves 4 clear steps:
-1. Importing neccessary packages including neps.
-2. Designing the search space as a dictionary.
-3. Creating the run_pipeline and returning the loss and other wanted metrics.
-4. Using neps run with the optimizer of choice.
-"""
-
-
-import logging
-
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-
-import neps
-from neps.utils.common import load_checkpoint, save_checkpoint
-
-logger = logging.getLogger("neps_template.run")
-
-
-def pipeline_space() -> dict:
-    # Create the search space based on NEPS parameters and return the dictionary.
-    # IMPORTANT:
-    space = dict(
-        lr=neps.FloatParameter(
-            lower=1e-5,
-            upper=1e-2,
-            log=True,  # If True, the search space is sampled in log space
-            default=1e-3,  # a non-None value here acts as the mode of the prior distribution
-        ),
-        wd=neps.FloatParameter(
-            lower=0,
-            upper=1e-1,
-            log=True,
-            default=1e-3,
-        ),
-        epoch=neps.IntegerParameter(
-            lower=1,
-            upper=10,
-            is_fidelity=True,  # IMPORTANT to set this to True for the fidelity parameter
-        ),
-    )
-    return space
-
-
-def run_pipeline(
-    pipeline_directory,  # The directory where the config is saved
-    previous_pipeline_directory,  # The directory of the config's immediate lower fidelity
-    **config,  # The hyperparameters to be used in the pipeline
-) -> dict | float:
-    # Defining the model
-    #  Can define outside the function or import from a file, package, etc.
-    class my_model(nn.Module):
-        def __init__(self) -> None:
-            super().__init__()
-            self.linear1 = nn.Linear(in_features=224, out_features=512)
-            self.linear2 = nn.Linear(in_features=512, out_features=10)
-
-        def forward(self, x):
-            x = F.relu(self.linear1(x))
-            x = self.linear2(x)
-            return x
-
-    # Instantiates the model
-    model = my_model()
-
-    # IMPORTANT: Extracting hyperparameters from passed config
-    learning_rate = config["lr"]
-    weight_decay = config["wd"]
-
-    # Initializing the optimizer
-    optimizer = torch.optim.Adam(
-        model.parameters(), lr=learning_rate, weight_decay=weight_decay
-    )
-
-    ## Checkpointing
-    # loading the checkpoint if it exists
-    previous_state = load_checkpoint(  # predefined function from neps
-        directory=previous_pipeline_directory,
-        model=model,  # relies on pass-by-reference
-        optimizer=optimizer,  # relies on pass-by-reference
-    )
-    # adjusting run budget based on checkpoint
-    if previous_state is not None:
-        epoch_already_trained = previous_state["epochs"]
-        # + Anything else saved in the checkpoint.
-    else:
-        epoch_already_trained = 0
-        # + Anything else with default value.
-
-    # Extracting target epochs from config
-    max_epochs = config.fidelity.value if config.has_fidelity else None
-    if max_epochs is None:
-        raise ValueError("The fidelity parameter is not defined in the config.")
-
-    # User TODO:
-    #  Load relevant data for training and validation
-
-    # Actual model training
-    for epoch in range(epoch_already_trained, max_epochs):
-        # Training loop
-        ...
-        # Validation loop
-        ...
-        logger.info(f"Epoch: {epoch}, Loss: {...}, Val. acc.: {...}")
-
-    # Save the checkpoint data in the current directory
-    save_checkpoint(
-        directory=pipeline_directory,
-        values_to_save={"epochs": max_epochs},
-        model=model,
-        optimizer=optimizer,
-    )
-
-    # Return a dictionary with the results, or a single float value (loss)
-    return {
-        "loss": ...,
-        "info_dict": {
-            "train_accuracy": ...,
-            "test_accuracy": ...,
-        },
-    }
-
-
-# end of run_pipeline
-
-
-if __name__ == "__main__":
-    neps.run(
-        run_pipeline=run_pipeline,  # User TODO (defined above)
-        pipeline_space=pipeline_space(),  # User TODO (defined above)
-        root_directory="results",
-        max_evaluations_total=25,  # total number of times `run_pipeline` is called
-        searcher="priorband",  # "priorband_bo" for longer budgets, and set `initial_design_size``
-    )
diff --git a/pyproject.toml b/pyproject.toml
index 06b4baa48..978c5da47 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,32 +1,29 @@
-[tool.poetry]
+[project]
 name = "neural-pipeline-search"
 version = "v0.12.2"
 description = "Neural Pipeline Search helps deep learning experts find the best neural pipeline."
 authors = [
-  "Danny Stoll <stolld@cs.uni-freiburg.de>",
-  "Neeratyoy Mallik <mallik@cs.uni-freiburg.de>",
-  "Simon Schrodi",
-  "Eddie Bergman",
-  "Maciej Janowski",
-  "Samir Garibov",
-  "Tarek Abou Chakra",
-  "Daniel Rogalla",
-  "Carl Hvarfner",
-  "Binxin Ru",
-  "Nils Kober",
-  "Théophane Vallaeys",
-  "Frank Hutter",
+    { name = "Danny Stoll", email = "stolld@cs.uni-freiburg.de" },
+    { name = "Neeratyoy Mallik", email = "mallik@cs.uni-freiburg.de" },
+    { name = "Simon Schrodi" },
+    { name = "Eddie Bergman" },
+    { name = "Maciej Janowski" },
+    { name = "Samir Garibov" },
+    { name = "Tarek Abou Chakra" },
+    { name = "Daniel Rogalla" },
+    { name = "Carl Hvarfner" },
+    { name = "Binxin Ru" },
+    { name = "Nils Kober" },
+    { name = "Théophane Vallaeys" },
+    { name = "Frank Hutter" },
 ]
 readme = "README.md"
-license = "Apache-2.0"
-homepage = "https://github.com/automl/neps"
-repository = "https://github.com/automl/neps"
-documentation = "https://automl.github.io/neps/"
+license = { file = "LICENSE" }
 keywords = [
-  "Neural Pipeline Search",
-  "Neural Architecture Search",
-  "Hyperparameter Optimization",
-  "AutoML",
+    "Neural Pipeline Search",
+    "Neural Architecture Search",
+    "Hyperparameter Optimization",
+    "AutoML",
 ]
 classifiers = [
   "Development Status :: 4 - Beta",
@@ -37,103 +34,117 @@ classifiers = [
   "Operating System :: POSIX :: Linux",
   "Operating System :: Microsoft :: Windows",
   "Operating System :: MacOS",
-  "Programming Language :: Python :: 3.8",
-  "Programming Language :: Python :: 3.9",
   "Programming Language :: Python :: 3.10",
   "Programming Language :: Python :: 3.11",
+  "Programming Language :: Python :: 3.12",
   "Topic :: Scientific/Engineering :: Artificial Intelligence",
   "Topic :: System :: Distributed Computing",
 ]
-packages = [{ include = "neps" }, { include = "neps_examples" }]
-
-
-[tool.poetry.dependencies]
-python = ">=3.8,<3.12"
-ConfigSpace = "^0.7"
-grakel = "^0.1"
-numpy = "^1"
-pandas = "^2"
-networkx = "^2.6.3"
-nltk = "^3.6.4"
-scipy = "^1"
-torch = ">1.7.0,!=2.0.1, !=2.1.0"
-matplotlib = "^3"
-more-itertools = "*"
-portalocker = "^2"
-seaborn = "^0.13"
-pyyaml = "^6"
-tensorboard = "^2"
-typing-extensions = "*"
-torchvision = ">=0.8.0"
+requires-python = ">=3.10,<3.13"
 
-[tool.poetry.group.dev.dependencies]
-ruff = "^0.4"
-pre-commit = "^3"
-mypy = "^1"
-pytest = "^7"
-pytest-cases = "^3"
-types-PyYAML = "^6"
-mkdocs-material = "*"
-mkdocs-autorefs = "*"
-mkdocs-gen-files = "*"
-mkdocstrings = { extras = ["python"], version = "*" }
-mkdocs-literate-nav = "*"
-mike = "*"
-black = "*"                                           # This allows mkdocstrings to format signatures in the docs
+dependencies = [
+    "ConfigSpace>=0.7,<1.0",
+    "grakel>=0.1,<0.2",
+    "numpy>=1.0,<2.0",
+    "pandas>=2.0,<3.0",
+    "networkx>=2.6.3,<3.0",
+    "nltk>=3.6.4,<4.0",
+    "scipy>=1.13.1",
+    "torch>=2.0.1",
+    "matplotlib>=3.0,<4.0",
+    "more-itertools",
+    "portalocker>=2.0,<3.0",
+    "seaborn>=0.13,<0.14",
+    "pyyaml>=6.0,<7.0",
+    "tensorboard>=2.0,<3.0",
+    "typing-extensions",
+    "torchvision>=0.8.0",
+    "ifbo>=0.3.10",
+    "botorch>=0.12",
+    "gpytorch==1.13.0",
+]
 
+[project.urls]
+homepage = "https://github.com/automl/neps"
+repository = "https://github.com/automl/neps"
+documentation = "https://automl.github.io/neps/"
 
-[tool.poetry.group.experimental]
-optional = true
+[project.optional-dependencies]
+dev = [
+    "ruff",
+    "pre-commit>=3,<4",
+    "mypy>=1,<2",
+    "pytest>=7,<8",
+    "pytest-cases>=3,<4",
+    "types-PyYAML>=6,<7",
+    "mkdocs-material",
+    "mkdocs-autorefs",
+    "mkdocs-gen-files",
+    "mkdocstrings[python]",
+    "mkdocs-literate-nav",
+    "mike",
+    "black",  # This allows mkdocstrings to format signatures in the docs
+]
 
-[tool.poetry.group.experimental.dependencies]
-gpytorch = "1.8.0"
+[tool.setuptools.packages.find]
+include = [
+    "neps",
+    "neps.*",
+    "neps_examples",
+]
+exclude = []
 
 [build-system]
-requires = ["poetry-core>=1.1.0"]
-build-backend = "poetry.core.masonry.api"
+requires = [
+    "setuptools>=68.2.2",
+    "wheel>=0.41.2",
+]
+
+build-backend = "setuptools.build_meta"
 
 # TODO(eddiebergman): Include more of these as we go on in migration
 # "tests",
 # "neps_examples",
 [tool.ruff]
-target-version = "py38"
+target-version = "py310"
 output-format = "full"
 line-length = 90
 src = ["neps"]
 
 # TODO(eddiebergman): Include more of these as we go on in migration
 exclude = [
-  "neps/optimizers/**/*.py",
-  "neps/search_spaces/architecture/**/*.py",
-  "neps/search_spaces/yaml_search_space_utils.py",
-  "neps/utils/run_args_from_yaml.py",
-  "neps/api.py",
-  "tests",
-  "neps_examples",
-  ".bzr",
-  ".direnv",
-  ".eggs",
-  ".git",
-  ".hg",
-  ".mypy_cache",
-  ".nox",
-  ".pants.d",
-  ".ruff_cache",
-  ".svn",
-  ".tox",
-  ".venv",
-  "__pypackages__",
-  "_build",
-  "buck-out",
-  "build",
-  "dist",
-  "node_modules",
-  "venv",
-  "docs",
+    "neps/optimizers/multi_fidelity_prior/utils.py",
+    "neps/search_spaces/architecture/**/*.py",
+    "neps/search_spaces/yaml_search_space_utils.py",
+    "neps/search_spaces/architecture",
+    "neps/utils/run_args_from_yaml.py",
+    "neps/api.py",
+    "tests",
+    "neps_examples",
+    ".bzr",
+    ".direnv",
+    ".eggs",
+    ".git",
+    ".hg",
+    ".mypy_cache",
+    ".nox",
+    ".pants.d",
+    ".ruff_cache",
+    ".svn",
+    ".tox",
+    ".venv",
+    "__pypackages__",
+    "_build",
+    "buck-out",
+    "build",
+    "dist",
+    "node_modules",
+    "venv",
+    "docs",
 ]
 
 [tool.ruff.lint]
-# Extend what ruff is allowed to fix, even it it may break
+# Extend what ruff is allowed to fix, even if it may break
 # This is okay given we use it all the time and it ensures
 # better practices. Would be dangerous if using for first
 # time on established project.
@@ -143,52 +154,52 @@ extend-safe-fixes = ["ALL"]
 dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$"
 
 select = [
-  "A",
-  # "ANN", # Handled by mypy
-  "ARG",
-  "B",
-  "BLE",
-  "COM",
-  "C4",
-  "D",
-  # "DTZ",  # One day I should know how to utilize timezones and dates...
-  "E",
-  # "EXE", Meh
-  "ERA",
-  "F",
-  "FBT",
-  "I",
-  # "ISC",  # Favours implicit string concatenation
-  "INP",
-  # "INT", # I don't understand this one
-  "N",
-  "NPY",
-  "PD",
-  "PLC",
-  "PLE",
-  "PLR",
-  "PLW",
-  "PIE",
-  "PT",
-  "PTH",
-  # "PYI", # Specific to .pyi files for type stubs
-  "Q",
-  "PGH004",
-  "RET",
-  "RUF",
-  "C90",
-  "S",
-  # "SLF",    # Private member accessed (sure, it's python)
-  "SIM",
-  # "TRY", # Good in principle, would take a lot of work to statisfy
-  "T10",
-  "T20",
-  "TID",
-  "TCH",
-  "UP",
-  "N",
-  "W",
-  "YTT",
+    "A",
+    # "ANN", # Handled by mypy
+    "ARG",
+    "B",
+    "BLE",
+    "COM",
+    "C4",
+    "D",
+    # "DTZ",  # One day I should know how to utilize timezones and dates...
+    "E",
+    # "EXE", Meh
+    "ERA",
+    "F",
+    "FBT",
+    "I",
+    # "ISC",  # Favours implicit string concatenation
+    "INP",
+    # "INT", # I don't understand this one
+    "N",
+    "NPY",
+    "PD",
+    "PLC",
+    "PLE",
+    "PLR",
+    "PLW",
+    "PIE",
+    "PT",
+    "PTH",
+    # "PYI", # Specific to .pyi files for type stubs
+    "Q",
+    "PGH004",
+    "RET",
+    "RUF",
+    "C90",
+    "S",
+    # "SLF",    # Private member accessed (sure, it's python)
+    "SIM",
+    # "TRY", # Good in principle, would take a lot of work to statisfy
+    "T10",
+    "T20",
+    "TID",
+    "TCH",
+    "UP",
+    "N",
+    "W",
+    "YTT",
 ]
 
 ignore = [
@@ -203,54 +214,64 @@ ignore = [
   "S101",    # Use of assert detected.
   "W292",    # No newline at end of file
   "PLC1901", # "" can be simplified to be falsey
-  "TCH003",  # Move stdlib import into TYPE_CHECKING
+  "TC003",   # Move stdlib import into TYPE_CHECKING
   "B010",    # Do not use `setattr`
   "PD901",   # Use a better name than 'df'
   "PD011",   # Use .to_numpy() instead of .values  (triggers on report.values)
+  "PD008",   # Use `.loc` instead of `.at`. If speed is important, use NumPy.
   "COM812",  # Require trailing commas, recommended to ignore due to ruff formatter
   "PLR2004", # No magic numbers inline
   "N817",    # CamelCase import as (ignore for ConfigSpace)
+  "N999",    # Invalid name for module
   "NPY002",  # Replace legacy `np.random.choice` call with `np.random.Generator`
+  "N803",    # Arguments should start with a lower case letter.
+  "S311",    # Standard pseudo-random generators are not suitable for cryptographic purposes
 ]
 
 
 # Exclude a variety of commonly ignored directories.
 [tool.ruff.lint.per-file-ignores]
 "tests/*.py" = [
-  "S101",
-  "D101",
-  "D102",
-  "D103",
-  "ANN001",
-  "ANN201",
-  "FBT001",
-  "D100",
-  "PD901",  #  X is a bad variable name. (pandas)
-  "TCH",
-  "N803",
-  "C901",   # Too complex
+    "S101",
+    "D101",
+    "D102",
+    "D103",
+    "ANN001",
+    "ANN201",
+    "FBT001",
+    "D100",
+    "PD901", #  X is a bad variable name. (pandas)
+    "TCH",
+    "N803",
+    "C901", # Too complex
 ]
 "__init__.py" = ["I002"]
 "neps_examples/*" = [
-  "INP001",
-  "I002",
-  "E741",
-  "D101",
-  "D103",
-  "T20",
-  "D415",
-  "ERA001",
-  "E402",
-  "E501",
+    "INP001",
+    "I002",
+    "E741",
+    "D101",
+    "D103",
+    "T20",
+    "D415",
+    "ERA001",
+    "E402",
+    "E501",
 ]
 "docs/*" = ["INP001"]
+# TODO
+"neps/optimizers/**.py" = [
+  "D", # Documentation of everything
+  "ARG002", # Unused arguments, bunch of them in inits
+  "PLR0913", # Too many arguments...
+]
 
 
 [tool.ruff.lint.isort]
 known-first-party = ["neps"]
 known-third-party = []
-no-lines-before = ["future"]
 required-imports = ["from __future__ import annotations"]
+no-lines-before = ["future"]
 combine-as-imports = true
 extra-standard-library = ["typing_extensions"]
 force-wrap-aliases = true
@@ -264,17 +285,17 @@ max-args = 10 # Changed from default of 5
 [tool.pytest.ini_options]
 addopts = "--basetemp ./tests_tmpdir -m 'not ci_examples'"
 markers = [
-  "ci_examples",
-  "core_examples",
-  "regression_all",
-  "runtime",
-  "neps_api",
-  "summary_csv",
+    "ci_examples",
+    "core_examples",
+    "regression_all",
+    "runtime",
+    "neps_api",
+    "summary_csv",
 ]
 filterwarnings = "ignore::DeprecationWarning:torch.utils.tensorboard.*:"
 
 [tool.mypy]
-python_version = "3.8"  # Match minimum supported version
+python_version = "3.10"  # Match minimum supported version
 packages = ["neps"]
 show_error_codes = true
 
@@ -300,12 +321,32 @@ check_untyped_defs = true
 # TODO(eddiebergman): Improve coverage on these modules
 [[tool.mypy.overrides]]
 module = [
-  "neps.api",
-  "neps.optimizers.*",
-  "neps.search_spaces.architecture.*",
-  "neps.utils.run_args_from_yaml",
+    "neps.api",
+    "neps.search_spaces.architecture.*",
+    "neps.utils.run_args_from_yaml",
+    "neps.optimizers.multi_fidelity.successive_halving",
+    "neps.optimizers.multi_fidelity.sampling_policy",
+    "neps.optimizers.multi_fidelity.promotion_policy",
+    "neps.optimizers.bayesian_optimization.acquisition_functions.ei",
+    "neps.optimizers.bayesian_optimization.acquisition_functions.prior_weighted",
+    "neps.optimizers.bayesian_optimization.acquisition_functions.ucb",
+    "neps.optimizers.bayesian_optimization.acquisition_functions.base_acquisition",
+    "neps.optimizers.bayesian_optimization.acquisition_functions.weighted_acquisition",
+    "requests.*",
 ]
 ignore_errors = true
 
-[tool.poetry.scripts]
-neps = "neps.utils.cli:main"
+[tool.bumpversion]
+current_version = "0.12.2"
+commit = true
+tag = true
+
+[[tool.bumpversion.files]]
+filename = "pyproject.toml"
+search = "version = \"v{current_version}\""
+replace = "version = \"v{new_version}\""
+
+[[tool.bumpversion.files]]
+filename = "CITATION.cff"
+search = "version: {current_version}"
+replace = "version: {new_version}"
diff --git a/tests/joint_config_space.py b/tests/joint_config_space.py
index 999e58053..1e5b2b62e 100644
--- a/tests/joint_config_space.py
+++ b/tests/joint_config_space.py
@@ -1,3 +1,5 @@
+from __future__ import annotations
+
 import ConfigSpace as CS
 from jahs_bench.lib.core.constants import Activations
 
@@ -16,37 +18,37 @@
             "Op1",
             choices=list(range(5)),
             default_value=0,
-            meta=dict(help="The operation on the first edge of the cell."),
+            meta={"help": "The operation on the first edge of the cell."},
         ),
         CS.CategoricalHyperparameter(
             "Op2",
             choices=list(range(5)),
             default_value=0,
-            meta=dict(help="The operation on the second edge of the cell."),
+            meta={"help": "The operation on the second edge of the cell."},
         ),
         CS.CategoricalHyperparameter(
             "Op3",
             choices=list(range(5)),
             default_value=0,
-            meta=dict(help="The operation on the third edge of the cell."),
+            meta={"help": "The operation on the third edge of the cell."},
         ),
         CS.CategoricalHyperparameter(
             "Op4",
             choices=list(range(5)),
             default_value=0,
-            meta=dict(help="The operation on the fourth edge of the cell."),
+            meta={"help": "The operation on the fourth edge of the cell."},
         ),
         CS.CategoricalHyperparameter(
             "Op5",
             choices=list(range(5)),
             default_value=0,
-            meta=dict(help="The operation on the fifth edge of the cell."),
+            meta={"help": "The operation on the fifth edge of the cell."},
         ),
         CS.CategoricalHyperparameter(
             "Op6",
             choices=list(range(5)),
             default_value=0,
-            meta=dict(help="The operation on the sixth edge of the cell."),
+            meta={"help": "The operation on the sixth edge of the cell."},
         ),
         # CS.OrdinalHyperparameter("Resolution", sequence=[0.25, 0.5, 1.], default_value=1.,
         #     meta=dict(help="The sample resolution of the input images w.r.t. one side of the "
@@ -57,21 +59,21 @@
             "TrivialAugment",
             choices=[True, False],
             default_value=False,
-            meta=dict(
-                help="Controls whether or not TrivialAugment is used for pre-processing "
+            meta={
+                "help": "Controls whether or not TrivialAugment is used for pre-processing "
                 "data. If False (default), a set of manually chosen transforms is "
                 "applied during pre-processing. If True, these are skipped in favor of "
                 "applying random transforms selected by TrivialAugment."
-            ),
+            },
         ),
         CS.CategoricalHyperparameter(
             "Activation",
             choices=list(Activations.__members__.keys()),
             default_value="ReLU",
-            meta=dict(
-                help="Which activation function is to be used for the network. "
+            meta={
+                "help": "Which activation function is to be used for the network. "
                 "Default is ReLU."
-            ),
+            },
         ),
     ]
 )
@@ -81,11 +83,11 @@
     "Optimizer",
     choices=["SGD"],
     default_value="SGD",
-    meta=dict(
-        help="Which optimizer to use for training this model. "
+    meta={
+        "help": "Which optimizer to use for training this model. "
         "This is just a placeholder for now, to be used "
         "properly in future versions."
-    ),
+    },
 )
 lr = CS.UniformFloatHyperparameter(
     "LearningRate",
@@ -93,11 +95,11 @@
     upper=1e0,
     default_value=1e-1,
     log=True,
-    meta=dict(
-        help="The learning rate for the optimizer used during model training. In the "
+    meta={
+        "help": "The learning rate for the optimizer used during model training. In the "
         "case of adaptive learning rate optimizers such as Adam, this is the "
         "initial learning rate."
-    ),
+    },
 )
 weight_decay = CS.UniformFloatHyperparameter(
     "WeightDecay",
@@ -105,7 +107,7 @@
     upper=1e-2,
     default_value=5e-4,
     log=True,
-    meta=dict(help="Weight decay to be used by the " "optimizer during model training."),
+    meta={"help": "Weight decay to be used by the " "optimizer during model training."},
 )
 
 joint_config_space.add_hyperparameters([optimizers, lr, weight_decay])
diff --git a/tests/losses.json b/tests/losses.json
index ea2871ef8..9c1e161c6 100644
--- a/tests/losses.json
+++ b/tests/losses.json
@@ -1 +1 @@
-{"random_search": {"cifar10": [10.570075988769531, 11.034774780273438, 10.087379455566406, 10.419334411621094, 10.432853698730469, 8.5814208984375, 10.457664489746094, 11.290725708007812, 10.5799560546875, 9.727294921875, 9.501747131347656, 10.170425415039062, 9.806816101074219, 10.66925048828125, 10.491233825683594, 9.634124755859375, 9.191375732421875, 8.844978332519531, 10.2298583984375, 10.434898376464844, 9.487800598144531, 10.427787780761719, 10.502761840820312, 10.279136657714844, 10.1964111328125, 11.07977294921875, 9.425407409667969, 9.701408386230469, 10.23583984375, 10.708969116210938, 9.558792114257812, 9.986930847167969, 10.56585693359375, 10.63482666015625, 11.0650634765625, 10.207893371582031, 9.922348022460938, 11.085418701171875, 11.045547485351562, 10.198143005371094, 10.31964111328125, 10.305580139160156, 10.344978332519531, 9.250556945800781, 10.117431640625, 9.896835327148438, 9.52008056640625, 9.566474914550781, 10.341537475585938, 9.719619750976562, 9.478080749511719, 9.690483093261719, 10.247886657714844, 10.712760925292969, 10.926887512207031, 10.650840759277344, 10.041717529296875, 11.061965942382812, 10.502098083496094, 10.246772766113281, 10.305030822753906, 9.34393310546875, 9.596229553222656, 10.688194274902344, 10.123970031738281, 10.976806640625, 9.254158020019531, 10.040168762207031, 9.82464599609375, 10.861183166503906, 10.738700866699219, 9.88568115234375, 10.434127807617188, 9.848808288574219, 10.386619567871094, 10.578804016113281, 9.660018920898438, 10.445465087890625, 10.763755798339844, 10.146713256835938, 10.194534301757812, 9.678909301757812, 10.699859619140625, 9.986000061035156, 9.880363464355469, 10.01849365234375, 10.228996276855469, 10.0753173828125, 10.508392333984375, 10.601631164550781, 11.081802368164062, 10.485763549804688, 9.727066040039062, 10.158500671386719, 9.9129638671875, 11.635810852050781, 10.416252136230469, 11.731185913085938, 10.537567138671875, 11.287857055664062], "fashion_mnist": [5.09051513671875, 5.3495025634765625, 5.090606689453125, 5.306755065917969, 4.944160461425781, 5.066337585449219, 5.340538024902344, 5.605537414550781, 5.409492492675781, 4.850059509277344, 5.271759033203125, 4.999168395996094, 5.353126525878906, 5.377349853515625, 5.094085693359375, 4.8185882568359375, 5.007720947265625, 5.248565673828125, 4.989860534667969, 5.30499267578125, 4.9860076904296875, 5.429695129394531, 5.023948669433594, 5.251014709472656, 5.251373291015625, 5.018852233886719, 5.361701965332031, 5.115943908691406, 5.258811950683594, 5.1905975341796875, 5.2493438720703125, 4.862884521484375, 5.296844482421875, 5.2973480224609375, 5.001739501953125, 5.132057189941406, 5.379150390625, 5.0308380126953125, 5.3088226318359375, 5.2285919189453125, 4.874839782714844, 4.875190734863281, 4.905540466308594, 5.091346740722656, 5.354927062988281, 5.405769348144531, 4.9038238525390625, 5.291114807128906, 5.022491455078125, 5.3507843017578125, 4.900177001953125, 5.125740051269531, 4.790794372558594, 4.908744812011719, 5.427764892578125, 4.928062438964844, 5.122749328613281, 5.211883544921875, 4.912879943847656, 5.304801940917969, 5.118843078613281, 5.316253662109375, 5.3155670166015625, 4.891822814941406, 5.075309753417969, 5.0142974853515625, 5.219169616699219, 4.976043701171875, 5.457160949707031, 5.560661315917969, 5.253791809082031, 5.599967956542969, 5.248786926269531, 4.922248840332031, 5.365226745605469, 4.915000915527344, 5.304725646972656, 5.5551300048828125, 5.2591552734375, 5.234474182128906, 5.337677001953125, 5.391693115234375, 4.871734619140625, 5.320770263671875, 4.729896545410156, 5.177436828613281, 5.51544189453125, 5.3778228759765625, 5.18963623046875, 5.084320068359375, 5.321952819824219, 5.127876281738281, 5.319007873535156, 5.161949157714844, 5.094940185546875, 5.2720794677734375, 5.098762512207031, 5.069129943847656, 5.495567321777344, 5.208320617675781], "hartmann3": [-3.7983644860179906, -3.4647375444431776, -3.724169378773501, -3.504482275855162, -3.5785228865447265, -3.2721948147194295, -3.424084036888103, -3.7072770292237176, -3.809994424648415, -3.6122056535539344, -3.5854465335983425, -3.844968774956946, -3.7347735478598283, -3.3691644158659364, -3.5412900367276676, -3.475734478264119, -3.8200439315391703, -3.83138442558726, -3.477400690334344, -3.4059398111190027, -3.7302972540667945, -3.1786964283460017, -3.5846499739643733, -3.256067420434629, -3.7187271090335243, -3.7826435340290523, -3.6342242353280025, -3.7605790715819465, -3.84909095948994, -3.5053069712058815, -3.7184922209021334, -3.800851244225936, -3.640804694679649, -3.3445513278128844, -3.8286547389644268, -3.573509742962088, -3.3190981974729334, -3.7609483078955943, -3.7744748548210194, -3.740621329285652, -3.6575432714187976, -3.686124319032891, -3.2467145074719324, -3.613123426231481, -3.8021768045842785, -3.621244873548109, -3.6192377014972266, -3.790856298972633, -3.548585974001048, -3.5592414866476254, -3.748377631924132, -3.7640467078382986, -3.7261908737840193, -3.581735983650286, -3.497987958071237, -3.720547971292352, -3.5412710119799535, -3.7391815825328405, -3.707270453492867, -3.666516126665965, -3.6723740963465534, -3.860704880219101, -3.3901487007606894, -3.6526042397434133, -3.821241682607301, -3.7313455393716906, -3.617482549039436, -3.8578496678333662, -3.6637739511791554, -3.7972669220230264, -3.5790734458422886, -3.7957396427176944, -3.4887409887061613, -3.778853898995849, -3.681495553302428, -3.692316299676422, -3.6735452557842283, -3.5961136884767786, -3.6657513195141593, -3.40861050776996, -3.8221486334983172, -3.767732293800785, -3.1725058921931346, -3.7536159697200975, -3.732562952763116, -3.3891581960764374, -3.4043726566370287, -3.569473096316691, -3.5687144990134247, -3.614801958410597, -3.799566283154234, -3.7199199442907918, -3.7185932588354116, -3.7187450018200634, -3.7366497076525995, -3.7475969099503734, -3.8002552452531257, -3.4060344407612617, -3.6688420010180787, -3.8308094701889], "hartmann6": [-2.942062167483819, -1.4270925701133137, -2.1670911981727583, -2.1391854030934256, -1.9703696121582686, -2.594122312444073, -2.765667751003329, -1.8110638451435552, -2.6196122976169867, -2.2412214116916607, -2.452084960741839, -1.8427121515062914, -2.608997350701581, -2.9014141339786677, -1.890320716157041, -2.545449071520854, -2.6583290500643884, -1.6054330860086385, -2.5408742738896675, -1.7074266404496088, -1.85969822199324, -2.65282030100074, -2.372449979122238, -1.4817042674371381, -2.533274868880178, -2.6426776279101176, -2.8844629338229093, -2.1509993233474045, -2.4576802851489497, -2.754241097009174, -2.8614515046033975, -3.1393263421404116, -2.5080712124365503, -1.9081559295607045, -2.144271742789371, -2.3086881255087097, -2.4972047804985724, -2.4153105932057017, -2.4135416612728533, -2.1755330721224277, -2.8801109890706234, -2.532838834617856, -3.0013330323245184, -2.845572945180195, -2.200193341199692, -1.7219543694068289, -2.6192546058487793, -2.2359822745696376, -2.1176613414017162, -1.8822616390525695, -2.4086827634039714, -2.2816243000743723, -2.165480067594273, -2.551451690430731, -2.638452146179405, -2.49884193760565, -2.357393575663534, -1.8583791294401601, -1.9636624434435468, -1.9951930274271241, -1.886440016674821, -2.432228785576797, -2.624508437250958, -2.7519888636176346, -2.9086333455217708, -2.0741067316762414, -2.446973157988633, -1.6462176720172292, -2.774427246629535, -2.201021955667192, -2.8070888873925735, -2.9702639208221466, -2.5146096807555276, -2.4388073485628983, -2.711317839707429, -2.4530651355252613, -2.734370008195378, -2.5801823248997273, -2.617856907872104, -2.0293432923318777, -1.93754359750606, -2.404251688411716, -2.0994030071591565, -1.9350609878168923, -2.263173223412282, -2.1345613646378223, -2.4387632069846914, -2.0504854619835515, -1.4384752897760327, -2.1621792846176895, -1.9256183341026185, -1.6618419098621113, -2.567175842548426, -1.8326320955843476, -2.0687530228373707, -1.8359872962404584, -2.444666881848311, -2.1568004943308154, -2.8749755249504685, -1.7131984047333892]}, "mf_bayesian_optimization": {"cifar10": [8.102218627929688, 9.120758056640625, 9.197235107421875, 9.314567565917969, 10.190299987792969, 9.6614990234375, 8.895378112792969, 9.573394775390625, 9.210296630859375, 8.729545593261719, 8.708778381347656, 10.21875, 10.008346557617188, 8.446937561035156, 9.194610595703125, 9.100334167480469, 10.321441650390625, 8.973121643066406, 9.705284118652344, 9.438888549804688, 9.307945251464844, 8.909683227539062, 9.4857177734375, 9.239250183105469, 8.85089111328125, 10.386962890625, 9.804962158203125, 10.164108276367188, 8.109199523925781, 8.016578674316406, 8.767509460449219, 8.346847534179688, 8.876449584960938, 9.234138488769531, 9.700332641601562, 8.373954772949219, 9.015975952148438, 8.562347412109375, 9.981231689453125, 8.512466430664062, 9.041938781738281, 9.176300048828125, 8.693832397460938, 9.337608337402344, 9.373138427734375, 8.25970458984375, 11.478584289550781, 9.039764404296875, 8.903793334960938, 7.851806640625, 9.292510986328125, 10.145538330078125, 8.894271850585938, 9.318794250488281, 9.97802734375, 10.720413208007812, 9.400299072265625, 9.318153381347656, 10.211288452148438, 8.429229736328125, 8.929550170898438, 9.603477478027344, 9.132301330566406, 8.73175048828125, 9.69183349609375, 9.170875549316406, 10.596893310546875, 8.982208251953125, 8.338356018066406, 10.031143188476562, 9.769561767578125, 9.627120971679688, 8.767288208007812, 9.834907531738281, 8.803001403808594, 8.625770568847656, 9.299758911132812, 9.298690795898438, 9.5645751953125, 9.03472900390625, 9.215667724609375, 9.008460998535156, 9.05419921875, 9.662025451660156, 8.660446166992188, 9.04095458984375, 10.133522033691406, 9.288856506347656, 10.933906555175781, 7.94622802734375, 8.634315490722656, 8.298187255859375, 9.50299072265625, 9.299980163574219, 9.237464904785156, 8.387069702148438, 9.435394287109375, 9.357254028320312, 9.201988220214844, 8.980323791503906], "fashion_mnist": [4.912925720214844, 4.808631896972656, 5.056343078613281, 5.1057891845703125, 4.89794921875, 4.8985443115234375, 4.901756286621094, 4.851066589355469, 5.0069122314453125, 4.854034423828125, 4.8776397705078125, 5.075859069824219, 4.946296691894531, 5.324104309082031, 5.109672546386719, 5.163368225097656, 4.86181640625, 5.0963134765625, 4.693626403808594, 4.952384948730469, 5.1372528076171875, 4.749778747558594, 4.839080810546875, 4.9800872802734375, 4.949943542480469, 4.892311096191406, 4.92108154296875, 5.054450988769531, 4.80377197265625, 5.033592224121094, 4.9016876220703125, 4.927215576171875, 5.0565948486328125, 5.106781005859375, 4.75128173828125, 4.719673156738281, 4.8021240234375, 5.003562927246094, 4.785224914550781, 5.1726531982421875, 5.156364440917969, 5.115470886230469, 4.777351379394531, 4.98150634765625, 4.730522155761719, 5.6309814453125, 5.045310974121094, 4.917938232421875, 5.200675964355469, 4.863861083984375, 4.795806884765625, 5.223625183105469, 5.415069580078125, 5.372398376464844, 4.945793151855469, 5.2683258056640625, 5.101051330566406, 5.241966247558594, 4.903839111328125, 4.976020812988281, 4.9986724853515625, 5.24957275390625, 5.062644958496094, 5.031547546386719, 5.14990234375, 4.78924560546875, 5.056243896484375, 4.898658752441406, 5.126441955566406, 4.981636047363281, 5.2509307861328125, 4.970909118652344, 5.175140380859375, 5.048194885253906, 5.134803771972656, 5.205291748046875, 4.92401123046875, 5.131980895996094, 5.33258056640625, 5.019996643066406, 5.0273895263671875, 5.010467529296875, 5.144783020019531, 5.032691955566406, 4.8980255126953125, 5.2896270751953125, 5.2210845947265625, 5.11920166015625, 5.291419982910156, 5.0905609130859375, 5.156303405761719, 4.8618011474609375, 4.871925354003906, 4.887908935546875, 4.993476867675781, 5.145660400390625, 4.9629974365234375, 5.022300720214844, 4.946128845214844, 5.3415069580078125]}, "bayesian_optimization": {"cifar10": [11.834663391113281, 10.850135803222656, 13.106803894042969, 10.621971130371094, 12.089996337890625, 11.989456176757812, 9.944725036621094, 12.151741027832031, 9.771446228027344, 9.653892517089844, 14.155509948730469, 9.849746704101562, 11.660682678222656, 9.726371765136719, 8.373817443847656, 8.547760009765625, 8.521583557128906, 8.890823364257812, 9.660530090332031, 10.417236328125, 10.407752990722656, 10.489616394042969, 8.149627685546875, 9.338088989257812, 9.880485534667969, 9.053581237792969, 10.286293029785156, 9.050323486328125, 12.33587646484375, 9.067794799804688, 10.680534362792969, 10.400833129882812, 7.941017150878906, 8.674705505371094, 8.025360107421875, 8.910812377929688, 9.311843872070312, 9.874847412109375, 7.759437561035156, 11.285186767578125, 8.689544677734375, 10.093734741210938, 9.649566650390625, 12.401290893554688, 9.348533630371094, 8.556083679199219, 9.835929870605469, 11.356254577636719, 11.3424072265625, 9.509292602539062, 9.295768737792969, 11.967460632324219, 10.924751281738281, 13.746360778808594, 10.807502746582031, 12.209007263183594, 11.160293579101562, 8.994827270507812, 10.222831726074219, 10.316673278808594, 8.706809997558594, 8.860237121582031, 8.495071411132812, 10.071884155273438, 11.452072143554688, 11.503829956054688, 10.215606689453125, 8.048088073730469, 8.259941101074219, 11.198013305664062, 10.408485412597656, 11.677360534667969, 10.564231872558594, 8.570747375488281, 8.69677734375, 8.675621032714844, 11.110565185546875, 9.985069274902344, 12.029739379882812, 8.316459655761719, 9.269187927246094, 15.092262268066406, 9.952056884765625, 9.06463623046875, 9.109573364257812, 9.456016540527344, 8.761322021484375, 12.986724853515625, 8.250045776367188, 8.607147216796875, 8.646743774414062, 8.980308532714844, 13.667716979980469, 11.633316040039062, 10.320816040039062, 12.442756652832031, 8.036582946777344, 8.479621887207031, 9.891937255859375, 11.2366943359375], "fashion_mnist": [4.9603424072265625, 5.071281433105469, 4.962150573730469, 4.827796936035156, 5.347526550292969, 4.6628875732421875, 4.999702453613281, 4.886085510253906, 5.3385467529296875, 4.770286560058594, 4.6191864013671875, 4.685035705566406, 4.7356719970703125, 4.6202850341796875, 5.357818603515625, 5.702720642089844, 5.026557922363281, 4.743721008300781, 4.7940521240234375, 4.773796081542969, 5.129631042480469, 4.84552001953125, 5.792655944824219, 5.2164764404296875, 4.673927307128906, 4.660346984863281, 4.624336242675781, 5.1123504638671875, 5.088066101074219, 5.051849365234375, 4.8744964599609375, 5.350379943847656, 4.981048583984375, 4.9394378662109375, 4.8551177978515625, 4.895240783691406, 5.573570251464844, 5.437744140625, 5.227935791015625, 4.942718505859375, 4.915061950683594, 4.697944641113281, 5.247138977050781, 4.7621612548828125, 4.958221435546875, 5.188468933105469, 5.064666748046875, 4.766532897949219, 5.066398620605469, 4.884849548339844, 4.673484802246094, 4.997337341308594, 4.749031066894531, 4.78302001953125, 4.9005889892578125, 4.967002868652344, 4.698448181152344, 4.86181640625, 5.0426483154296875, 4.780479431152344, 4.6356658935546875, 4.8165435791015625, 5.2922515869140625, 4.7222900390625, 5.3287200927734375, 4.797332763671875, 4.7588958740234375, 5.6631011962890625, 5.182411193847656, 5.007575988769531, 4.9674072265625, 4.693756103515625, 4.718101501464844, 5.490745544433594, 4.863746643066406, 5.2880859375, 5.3683624267578125, 4.564300537109375, 4.8800506591796875, 5.093727111816406, 5.62841796875, 4.820304870605469, 4.636566162109375, 5.5658416748046875, 5.124908447265625, 5.017478942871094, 5.150848388671875, 4.7201080322265625, 5.049140930175781, 4.790504455566406, 5.594146728515625, 5.1763458251953125, 5.105926513671875, 4.721961975097656, 5.21624755859375, 4.770225524902344, 5.327949523925781, 4.7087249755859375, 4.6690673828125, 5.120002746582031], "hartmann3": [-3.8178953072486967, -3.704775176517175, -3.8616219953873934, -3.5335807178711205, -3.7453646895666224, -3.724349458315775, -3.8082700218796206, -3.8112266622014936, -3.6381864416516088, -3.757543399794309, -3.717965871204767, -3.857327714892407, -3.755620258329924, -3.7764124673312653, -3.8166786066128644, -3.3266149308162176, -3.6318030080195567, -3.6783783725352532, -3.828153557778992, -3.7445079370049683, -3.7748854966315606, -3.768296234547888, -3.5998019566930024, -3.396255586934601, -3.8565237529701757, -3.6704283751493643, -3.8143244378302397, -1.9604315298529318, -3.613903912451665, -3.7866736749598773, -3.772166915524116, -3.7557386131680284, -3.75795099598279, -3.795670231190154, -3.8618487074128836, -3.8318902221548905, -3.6560392853114103, -3.7088067903799655, -3.6229461526417706, -3.7796465581026544, -3.789455489311304, -3.364116534127566, -3.7297619176629064, -3.4017365238028496, -3.6427163466230876, -3.3534894320962003, -3.6923051184426523, -3.7716634314504374, -3.7845428160866206, -3.753420518879679, -3.784187054931985, -3.5838034135058146, -3.8502436470074795, -2.597000906824191, -3.453444993373902, -3.785393240641991, -3.529974457228139, -3.740708317652978, -3.734122308582061, -3.67567657536361, -3.488047110341496, -3.735042140290241, -3.596701110723103, -3.730395457081181, -3.7041648761040484, -3.5465548528547504, -3.796429126486114, -3.6896101771297314, -3.66816253719168, -3.809565241525358, -3.6070273168821254, -3.743868657747066, -3.637445738600241, -3.726638981152361, -3.329452580660945, -3.769717580846608, -3.590972402696094, -3.7411588951193324, -3.78491914351238, -3.7754830039322345, -3.646186867377315, -3.6787080372572842, -3.5105613248742977, -3.801374009126365, -3.71401997938509, -3.7338092770206477, -3.7977390131650894, -3.530603761892602, -3.647723502655943, -3.7781449708359878, -3.6322055033004457, -3.1523083833378727, -3.412512781728038, -3.8303062762884994, -3.7226096486393296, -3.553696760208801, -3.6001168164084145, -3.581568826546383, -3.794874288307295, -3.581545967706646], "hartmann6": [-1.4549353278454038, -3.017174282345673, -2.9924333806676526, -2.721419510024227, -3.2973752131191634, -1.8341551991045113, -3.2668843561655243, -2.8347365300591334, -3.282790730047836, -3.1865756593491024, -2.982051709701689, -2.7485144603759237, -3.24173025958149, -3.134187749287507, -2.986062914159029, -2.932255875192143, -2.54217381789621, -3.1846937051481965, -2.902783188254062, -3.155614111164215, -2.899345934137389, -2.983779033637031, -3.2899323827033413, -3.1264045197377137, -3.142967961445229, -2.9324723031348694, -3.190017392761569, -3.292192486034387, -2.762574857954942, -3.2702396728108596, -2.6880860918265923, -2.989670689749928, -3.253750888535704, -2.8542984600236787, -3.252832804604744, -2.5442162213989556, -2.6052102152992065, -3.067795857145709, -2.8822447448882658, -3.140518761414347, -3.075813039891277, -3.03146184308219, -3.2979506275466615, -2.7197729302344933, -2.641564083074762, -2.702144998419366, -3.1670226043156444, -3.2932413787494665, -3.2923375595533435, -2.8727455141505818, -3.148190457843324, -2.820802211825644, -3.063369160676188, -3.0529188332750943, -3.0680238909470483, -3.2818403924328803, -3.2660725750584554, -3.131088071126102, -3.2890769050177533, -1.5829467114483784, -2.810513145953112, -3.312869701840981, -3.2124587900344044, -1.165456499117023, -1.6041283006949516, -3.0070669378242534, -2.7050223399135094, -2.708042778787764, -3.184524278644351, -3.1294539190549733, -2.9347834341589607, -2.7914377384651723, -3.2807067958203495, -1.6031238415481386, -2.9332137593080883, -2.897540573469963, -2.898552345504198, -2.63074616349976, -2.712932420759872, -2.4988020509933175, -3.259261535112579, -3.286437497756355, -3.092165340365627, -3.196218552395296, -2.916500796349388, -2.957113417613621, -3.303814006958171, -3.078648692296201, -1.4767638288161589, -3.1955443506729577, -3.17131783842418, -2.720688718208119, -3.254299537458789, -2.9759483354354503, -2.9408882599451656, -2.8628376552784673, -2.819730502978512, -2.92169177708724, -3.0304590769661273, -3.0737396309589915]}, "regularized_evolution": {"cifar10": [10.107582092285156, 9.520843505859375, 8.731643676757812, 8.231781005859375, 10.061798095703125, 10.9552001953125, 9.511985778808594, 8.839279174804688, 9.315437316894531, 9.627677917480469, 9.760971069335938, 10.734519958496094, 9.237754821777344, 9.849411010742188, 9.741424560546875, 9.282638549804688, 9.165473937988281, 8.618354797363281, 9.739921569824219, 8.523796081542969, 8.625900268554688, 9.806434631347656, 8.877159118652344, 9.420265197753906, 7.879829406738281, 8.453681945800781, 8.827445983886719, 10.401702880859375, 11.076934814453125, 10.405952453613281, 8.852424621582031, 9.059211730957031, 9.660049438476562, 9.769844055175781, 9.571403503417969, 8.866630554199219, 9.145759582519531, 8.661094665527344, 10.621932983398438, 8.402488708496094, 8.990676879882812, 10.960052490234375, 8.716392517089844, 9.582839965820312, 10.187080383300781, 9.640823364257812, 9.538169860839844, 8.895378112792969, 8.897392272949219, 10.087417602539062, 9.055076599121094, 8.749122619628906, 9.713951110839844, 9.383880615234375, 9.275482177734375, 8.950386047363281, 8.554473876953125, 8.957572937011719, 8.468040466308594, 9.218788146972656, 9.147377014160156, 9.137710571289062, 9.396354675292969, 9.089248657226562, 9.910232543945312, 8.546852111816406, 9.587806701660156, 8.831207275390625, 9.317192077636719, 9.451950073242188, 9.916313171386719, 9.579933166503906, 8.438957214355469, 9.584976196289062, 10.351776123046875, 9.997940063476562, 9.426177978515625, 8.798568725585938, 9.774726867675781, 9.340301513671875, 8.953514099121094, 9.648643493652344, 9.332923889160156, 8.733863830566406, 9.07720947265625, 8.212104797363281, 8.998641967773438, 9.869590759277344, 9.327613830566406, 11.690711975097656, 10.925613403320312, 8.709243774414062, 8.951286315917969, 10.128707885742188, 10.042984008789062, 9.740119934082031, 8.671112060546875, 8.944656372070312, 9.891273498535156, 8.564323425292969], "fashion_mnist": [5.084075927734375, 4.671539306640625, 4.790641784667969, 4.85968017578125, 5.050285339355469, 4.665718078613281, 4.6994781494140625, 4.88525390625, 5.0764923095703125, 5.210502624511719, 4.816200256347656, 4.760398864746094, 4.981224060058594, 5.028961181640625, 5.3006744384765625, 5.3182220458984375, 4.718177795410156, 4.7538909912109375, 5.352783203125, 5.076850891113281, 4.581573486328125, 4.827545166015625, 5.046295166015625, 4.75640869140625, 4.946769714355469, 4.973381042480469, 4.941658020019531, 4.729461669921875, 5.070960998535156, 4.723106384277344, 5.069541931152344, 4.975563049316406, 4.885887145996094, 4.86627197265625, 4.8705596923828125, 5.018989562988281, 5.26788330078125, 4.9696197509765625, 4.665184020996094, 4.802696228027344, 4.666969299316406, 4.918922424316406, 4.85528564453125, 5.161415100097656, 5.086524963378906, 5.154914855957031, 4.926856994628906, 4.851318359375, 4.856849670410156, 4.625007629394531, 4.956878662109375, 4.7001953125, 4.871803283691406, 4.79998779296875, 4.6083831787109375, 5.109153747558594, 4.738014221191406, 4.8326416015625, 5.0099639892578125, 4.973625183105469, 4.866447448730469, 4.673736572265625, 4.7554168701171875, 4.90093994140625, 4.648658752441406, 4.666786193847656, 4.734596252441406, 4.9136962890625, 5.2140350341796875, 5.2700042724609375, 4.715911865234375, 4.6934051513671875, 5.029487609863281, 4.841804504394531, 4.770545959472656, 4.654754638671875, 5.36187744140625, 4.9387054443359375, 4.9837646484375, 4.8358154296875, 4.842079162597656, 4.647956848144531, 4.9645538330078125, 4.962257385253906, 4.886100769042969, 4.8009185791015625, 5.3010101318359375, 4.7446746826171875, 4.9282379150390625, 5.127586364746094, 5.088226318359375, 4.911018371582031, 4.9875946044921875, 5.1578216552734375, 4.6427154541015625, 4.7746124267578125, 4.795440673828125, 4.9670867919921875, 5.047050476074219, 4.951896667480469], "hartmann3": [-3.8562787224204964, -3.8388735922156725, -3.8179794607549646, -3.790100735404807, -3.8546041385365237, -3.7207374365476666, -3.6027631829124536, -3.8617355143359897, -3.856372625265474, -3.857722403448557, -3.8268693901565127, -3.824593165442213, -3.7554529620062227, -3.855391545455628, -3.8161909865332957, -3.8374526965485307, -3.791961410296789, -3.853389877304351, -3.8524385727075114, -3.852681724644147, -3.854164095243434, -3.832540861326269, -3.8359003132613094, -3.82130863968297, -3.824206433392289, -3.815069102985653, -3.8448062343174287, -3.7476654770832725, -3.819239794496651, -3.853219551093164, -3.805053890149402, -3.860497878690962, -3.77205578249802, -3.859263689226762, -3.8464101986138024, -3.8508208221476474, -3.8087084696676916, -3.8315752184442577, -3.8184271283316167, -3.8277820085400247, -3.859155909401613, -3.839821280613561, -3.8007580229054936, -3.82821295594473, -3.8616472042332273, -3.75625603600848, -3.835979319257598, -3.7443258555968364, -3.7442679847670206, -3.843629560947634, -3.8502378641824406, -3.7774695451445925, -3.6969482338585427, -3.7635584215418874, -3.755155232934306, -3.8455690747283326, -3.68627874454927, -3.8493624756638543, -3.8112834553307726, -3.782143495270823, -3.7505881992355876, -3.831263077689587, -3.860817786769835, -3.8381543848970585, -3.8561922161515647, -3.794851659935362, -3.8121407700894316, -3.858317701496111, -3.763869801645391, -3.0595421824286064, -3.7843970420789623, -3.843971416985147, -3.851186621917104, -3.828422621962983, -3.845568204893809, -3.734118431597455, -3.674826229065421, -3.836203480673958, -3.8468505270285736, -3.829252158980824, -3.7968843407324773, -3.0743453843543866, -3.8543788542590725, -3.810722488571046, -3.59406484410796, -3.838672873814559, -3.7946033690965866, -3.829614566332377, -3.8149590577512997, -3.8113855659582354, -3.859886925823134, -3.78891319578021, -3.858580676417837, -3.767750071789717, -3.808665821595132, -3.858082965700558, -3.838164546954905, -3.7348828844157183, -3.7964971335413003, -3.8347940725469334], "hartmann6": [-3.0175243478147995, -2.0009376991510632, -2.896225130450772, -2.8460289168312434, -2.791994164189921, -2.8145869344168983, -2.8473873119360684, -2.6525114198639654, -2.901503841382755, -2.895988089946355, -2.6369841347139666, -2.4266954041403586, -2.8206737634160066, -2.9669357255185056, -2.904756305990689, -2.7257091348059337, -3.15749125714347, -3.1468415420436804, -3.137599193001579, -2.8803578417304907, -2.9451866826888176, -2.6076409463244072, -3.0339937836967565, -3.195572055988055, -3.0584091108948526, -2.9498413563782084, -2.4003373047784424, -3.1112445504043533, -3.081151929140931, -2.9728655666390176, -2.6497737706814988, -2.8839463502842406, -3.099140237913345, -1.286800841167182, -1.5078317025140526, -1.8867960731011642, -3.036622650112657, -2.920702437379702, -2.9384053747463406, -2.842704638674794, -3.0547867776898823, -3.100610671626193, -3.0256562118860644, -3.248830671414974, -2.9703043094098085, -2.833596569009291, -2.9703343505283746, -2.764487552933483, -2.7730412288027826, -2.6603351370257307, -2.8455734732617657, -3.0244957750113515, -2.702190942337655, -2.9462863451887653, -2.8079542062414764, -1.7788443982098423, -2.9517567313346347, -2.739193444795246, -3.1766993980665372, -3.056941190810896, -2.814913652425015, -2.9581066639641644, -3.092847769058895, -3.186681662843155, -3.0733832314275413, -3.05509498850725, -3.082055424203452, -3.1568331956629483, -2.9121866914793606, -3.0957468827625982, -3.176596073464635, -3.085095113598279, -3.125591088303841, -3.0544961979949816, -3.1196936635829524, -3.038661545112117, -2.2598963494348334, -3.007665645679462, -3.109884999112568, -2.256290411894697, -2.4259505553091065, -2.479006801533956, -3.0394001221826246, -2.860837257792445, -3.0624012885560394, -2.8681126360385325, -2.8417950442426942, -2.9021782922951243, -3.204125709528632, -2.621863717141578, -2.770091392979379, -3.0904561826236927, -3.143603744084295, -2.9977860373080456, -2.121829749975224, -2.537276473447735, -2.91123437633307, -2.932783811821915, -3.0651512272243857, -3.166404824288317]}}
+{"random_search": {"cifar10": [10.570075988769531, 11.034774780273438, 10.087379455566406, 10.419334411621094, 10.432853698730469, 8.5814208984375, 10.457664489746094, 11.290725708007812, 10.5799560546875, 9.727294921875, 9.501747131347656, 10.170425415039062, 9.806816101074219, 10.66925048828125, 10.491233825683594, 9.634124755859375, 9.191375732421875, 8.844978332519531, 10.2298583984375, 10.434898376464844, 9.487800598144531, 10.427787780761719, 10.502761840820312, 10.279136657714844, 10.1964111328125, 11.07977294921875, 9.425407409667969, 9.701408386230469, 10.23583984375, 10.708969116210938, 9.558792114257812, 9.986930847167969, 10.56585693359375, 10.63482666015625, 11.0650634765625, 10.207893371582031, 9.922348022460938, 11.085418701171875, 11.045547485351562, 10.198143005371094, 10.31964111328125, 10.305580139160156, 10.344978332519531, 9.250556945800781, 10.117431640625, 9.896835327148438, 9.52008056640625, 9.566474914550781, 10.341537475585938, 9.719619750976562, 9.478080749511719, 9.690483093261719, 10.247886657714844, 10.712760925292969, 10.926887512207031, 10.650840759277344, 10.041717529296875, 11.061965942382812, 10.502098083496094, 10.246772766113281, 10.305030822753906, 9.34393310546875, 9.596229553222656, 10.688194274902344, 10.123970031738281, 10.976806640625, 9.254158020019531, 10.040168762207031, 9.82464599609375, 10.861183166503906, 10.738700866699219, 9.88568115234375, 10.434127807617188, 9.848808288574219, 10.386619567871094, 10.578804016113281, 9.660018920898438, 10.445465087890625, 10.763755798339844, 10.146713256835938, 10.194534301757812, 9.678909301757812, 10.699859619140625, 9.986000061035156, 9.880363464355469, 10.01849365234375, 10.228996276855469, 10.0753173828125, 10.508392333984375, 10.601631164550781, 11.081802368164062, 10.485763549804688, 9.727066040039062, 10.158500671386719, 9.9129638671875, 11.635810852050781, 10.416252136230469, 11.731185913085938, 10.537567138671875, 11.287857055664062], "fashion_mnist": [5.09051513671875, 5.3495025634765625, 5.090606689453125, 5.306755065917969, 4.944160461425781, 5.066337585449219, 5.340538024902344, 5.605537414550781, 5.409492492675781, 4.850059509277344, 5.271759033203125, 4.999168395996094, 5.353126525878906, 5.377349853515625, 5.094085693359375, 4.8185882568359375, 5.007720947265625, 5.248565673828125, 4.989860534667969, 5.30499267578125, 4.9860076904296875, 5.429695129394531, 5.023948669433594, 5.251014709472656, 5.251373291015625, 5.018852233886719, 5.361701965332031, 5.115943908691406, 5.258811950683594, 5.1905975341796875, 5.2493438720703125, 4.862884521484375, 5.296844482421875, 5.2973480224609375, 5.001739501953125, 5.132057189941406, 5.379150390625, 5.0308380126953125, 5.3088226318359375, 5.2285919189453125, 4.874839782714844, 4.875190734863281, 4.905540466308594, 5.091346740722656, 5.354927062988281, 5.405769348144531, 4.9038238525390625, 5.291114807128906, 5.022491455078125, 5.3507843017578125, 4.900177001953125, 5.125740051269531, 4.790794372558594, 4.908744812011719, 5.427764892578125, 4.928062438964844, 5.122749328613281, 5.211883544921875, 4.912879943847656, 5.304801940917969, 5.118843078613281, 5.316253662109375, 5.3155670166015625, 4.891822814941406, 5.075309753417969, 5.0142974853515625, 5.219169616699219, 4.976043701171875, 5.457160949707031, 5.560661315917969, 5.253791809082031, 5.599967956542969, 5.248786926269531, 4.922248840332031, 5.365226745605469, 4.915000915527344, 5.304725646972656, 5.5551300048828125, 5.2591552734375, 5.234474182128906, 5.337677001953125, 5.391693115234375, 4.871734619140625, 5.320770263671875, 4.729896545410156, 5.177436828613281, 5.51544189453125, 5.3778228759765625, 5.18963623046875, 5.084320068359375, 5.321952819824219, 5.127876281738281, 5.319007873535156, 5.161949157714844, 5.094940185546875, 5.2720794677734375, 5.098762512207031, 5.069129943847656, 5.495567321777344, 5.208320617675781], "hartmann3": [-3.7983644860179906, -3.4647375444431776, -3.724169378773501, -3.504482275855162, -3.5785228865447265, -3.2721948147194295, -3.424084036888103, -3.7072770292237176, -3.809994424648415, -3.6122056535539344, -3.5854465335983425, -3.844968774956946, -3.7347735478598283, -3.3691644158659364, -3.5412900367276676, -3.475734478264119, -3.8200439315391703, -3.83138442558726, -3.477400690334344, -3.4059398111190027, -3.7302972540667945, -3.1786964283460017, -3.5846499739643733, -3.256067420434629, -3.7187271090335243, -3.7826435340290523, -3.6342242353280025, -3.7605790715819465, -3.84909095948994, -3.5053069712058815, -3.7184922209021334, -3.800851244225936, -3.640804694679649, -3.3445513278128844, -3.8286547389644268, -3.573509742962088, -3.3190981974729334, -3.7609483078955943, -3.7744748548210194, -3.740621329285652, -3.6575432714187976, -3.686124319032891, -3.2467145074719324, -3.613123426231481, -3.8021768045842785, -3.621244873548109, -3.6192377014972266, -3.790856298972633, -3.548585974001048, -3.5592414866476254, -3.748377631924132, -3.7640467078382986, -3.7261908737840193, -3.581735983650286, -3.497987958071237, -3.720547971292352, -3.5412710119799535, -3.7391815825328405, -3.707270453492867, -3.666516126665965, -3.6723740963465534, -3.860704880219101, -3.3901487007606894, -3.6526042397434133, -3.821241682607301, -3.7313455393716906, -3.617482549039436, -3.8578496678333662, -3.6637739511791554, -3.7972669220230264, -3.5790734458422886, -3.7957396427176944, -3.4887409887061613, -3.778853898995849, -3.681495553302428, -3.692316299676422, -3.6735452557842283, -3.5961136884767786, -3.6657513195141593, -3.40861050776996, -3.8221486334983172, -3.767732293800785, -3.1725058921931346, -3.7536159697200975, -3.732562952763116, -3.3891581960764374, -3.4043726566370287, -3.569473096316691, -3.5687144990134247, -3.614801958410597, -3.799566283154234, -3.7199199442907918, -3.7185932588354116, -3.7187450018200634, -3.7366497076525995, -3.7475969099503734, -3.8002552452531257, -3.4060344407612617, -3.6688420010180787, -3.8308094701889], "hartmann6": [-2.942062167483819, -1.4270925701133137, -2.1670911981727583, -2.1391854030934256, -1.9703696121582686, -2.594122312444073, -2.765667751003329, -1.8110638451435552, -2.6196122976169867, -2.2412214116916607, -2.452084960741839, -1.8427121515062914, -2.608997350701581, -2.9014141339786677, -1.890320716157041, -2.545449071520854, -2.6583290500643884, -1.6054330860086385, -2.5408742738896675, -1.7074266404496088, -1.85969822199324, -2.65282030100074, -2.372449979122238, -1.4817042674371381, -2.533274868880178, -2.6426776279101176, -2.8844629338229093, -2.1509993233474045, -2.4576802851489497, -2.754241097009174, -2.8614515046033975, -3.1393263421404116, -2.5080712124365503, -1.9081559295607045, -2.144271742789371, -2.3086881255087097, -2.4972047804985724, -2.4153105932057017, -2.4135416612728533, -2.1755330721224277, -2.8801109890706234, -2.532838834617856, -3.0013330323245184, -2.845572945180195, -2.200193341199692, -1.7219543694068289, -2.6192546058487793, -2.2359822745696376, -2.1176613414017162, -1.8822616390525695, -2.4086827634039714, -2.2816243000743723, -2.165480067594273, -2.551451690430731, -2.638452146179405, -2.49884193760565, -2.357393575663534, -1.8583791294401601, -1.9636624434435468, -1.9951930274271241, -1.886440016674821, -2.432228785576797, -2.624508437250958, -2.7519888636176346, -2.9086333455217708, -2.0741067316762414, -2.446973157988633, -1.6462176720172292, -2.774427246629535, -2.201021955667192, -2.8070888873925735, -2.9702639208221466, -2.5146096807555276, -2.4388073485628983, -2.711317839707429, -2.4530651355252613, -2.734370008195378, -2.5801823248997273, -2.617856907872104, -2.0293432923318777, -1.93754359750606, -2.404251688411716, -2.0994030071591565, -1.9350609878168923, -2.263173223412282, -2.1345613646378223, -2.4387632069846914, -2.0504854619835515, -1.4384752897760327, -2.1621792846176895, -1.9256183341026185, -1.6618419098621113, -2.567175842548426, -1.8326320955843476, -2.0687530228373707, -1.8359872962404584, -2.444666881848311, -2.1568004943308154, -2.8749755249504685, -1.7131984047333892]}, "mf_bayesian_optimization": {"cifar10": [8.102218627929688, 9.120758056640625, 9.197235107421875, 9.314567565917969, 10.190299987792969, 9.6614990234375, 8.895378112792969, 9.573394775390625, 9.210296630859375, 8.729545593261719, 8.708778381347656, 10.21875, 10.008346557617188, 8.446937561035156, 9.194610595703125, 9.100334167480469, 10.321441650390625, 8.973121643066406, 9.705284118652344, 9.438888549804688, 9.307945251464844, 8.909683227539062, 9.4857177734375, 9.239250183105469, 8.85089111328125, 10.386962890625, 9.804962158203125, 10.164108276367188, 8.109199523925781, 8.016578674316406, 8.767509460449219, 8.346847534179688, 8.876449584960938, 9.234138488769531, 9.700332641601562, 8.373954772949219, 9.015975952148438, 8.562347412109375, 9.981231689453125, 8.512466430664062, 9.041938781738281, 9.176300048828125, 8.693832397460938, 9.337608337402344, 9.373138427734375, 8.25970458984375, 11.478584289550781, 9.039764404296875, 8.903793334960938, 7.851806640625, 9.292510986328125, 10.145538330078125, 8.894271850585938, 9.318794250488281, 9.97802734375, 10.720413208007812, 9.400299072265625, 9.318153381347656, 10.211288452148438, 8.429229736328125, 8.929550170898438, 9.603477478027344, 9.132301330566406, 8.73175048828125, 9.69183349609375, 9.170875549316406, 10.596893310546875, 8.982208251953125, 8.338356018066406, 10.031143188476562, 9.769561767578125, 9.627120971679688, 8.767288208007812, 9.834907531738281, 8.803001403808594, 8.625770568847656, 9.299758911132812, 9.298690795898438, 9.5645751953125, 9.03472900390625, 9.215667724609375, 9.008460998535156, 9.05419921875, 9.662025451660156, 8.660446166992188, 9.04095458984375, 10.133522033691406, 9.288856506347656, 10.933906555175781, 7.94622802734375, 8.634315490722656, 8.298187255859375, 9.50299072265625, 9.299980163574219, 9.237464904785156, 8.387069702148438, 9.435394287109375, 9.357254028320312, 9.201988220214844, 8.980323791503906], "fashion_mnist": [4.912925720214844, 4.808631896972656, 5.056343078613281, 5.1057891845703125, 4.89794921875, 4.8985443115234375, 4.901756286621094, 4.851066589355469, 5.0069122314453125, 4.854034423828125, 4.8776397705078125, 5.075859069824219, 4.946296691894531, 5.324104309082031, 5.109672546386719, 5.163368225097656, 4.86181640625, 5.0963134765625, 4.693626403808594, 4.952384948730469, 5.1372528076171875, 4.749778747558594, 4.839080810546875, 4.9800872802734375, 4.949943542480469, 4.892311096191406, 4.92108154296875, 5.054450988769531, 4.80377197265625, 5.033592224121094, 4.9016876220703125, 4.927215576171875, 5.0565948486328125, 5.106781005859375, 4.75128173828125, 4.719673156738281, 4.8021240234375, 5.003562927246094, 4.785224914550781, 5.1726531982421875, 5.156364440917969, 5.115470886230469, 4.777351379394531, 4.98150634765625, 4.730522155761719, 5.6309814453125, 5.045310974121094, 4.917938232421875, 5.200675964355469, 4.863861083984375, 4.795806884765625, 5.223625183105469, 5.415069580078125, 5.372398376464844, 4.945793151855469, 5.2683258056640625, 5.101051330566406, 5.241966247558594, 4.903839111328125, 4.976020812988281, 4.9986724853515625, 5.24957275390625, 5.062644958496094, 5.031547546386719, 5.14990234375, 4.78924560546875, 5.056243896484375, 4.898658752441406, 5.126441955566406, 4.981636047363281, 5.2509307861328125, 4.970909118652344, 5.175140380859375, 5.048194885253906, 5.134803771972656, 5.205291748046875, 4.92401123046875, 5.131980895996094, 5.33258056640625, 5.019996643066406, 5.0273895263671875, 5.010467529296875, 5.144783020019531, 5.032691955566406, 4.8980255126953125, 5.2896270751953125, 5.2210845947265625, 5.11920166015625, 5.291419982910156, 5.0905609130859375, 5.156303405761719, 4.8618011474609375, 4.871925354003906, 4.887908935546875, 4.993476867675781, 5.145660400390625, 4.9629974365234375, 5.022300720214844, 4.946128845214844, 5.3415069580078125]}, "bayesian_optimization": {"cifar10": [11.834663391113281, 10.850135803222656, 13.106803894042969, 10.621971130371094, 12.089996337890625, 11.989456176757812, 9.944725036621094, 12.151741027832031, 9.771446228027344, 9.653892517089844, 14.155509948730469, 9.849746704101562, 11.660682678222656, 9.726371765136719, 8.373817443847656, 8.547760009765625, 8.521583557128906, 8.890823364257812, 9.660530090332031, 10.417236328125, 10.407752990722656, 10.489616394042969, 8.149627685546875, 9.338088989257812, 9.880485534667969, 9.053581237792969, 10.286293029785156, 9.050323486328125, 12.33587646484375, 9.067794799804688, 10.680534362792969, 10.400833129882812, 7.941017150878906, 8.674705505371094, 8.025360107421875, 8.910812377929688, 9.311843872070312, 9.874847412109375, 7.759437561035156, 11.285186767578125, 8.689544677734375, 10.093734741210938, 9.649566650390625, 12.401290893554688, 9.348533630371094, 8.556083679199219, 9.835929870605469, 11.356254577636719, 11.3424072265625, 9.509292602539062, 9.295768737792969, 11.967460632324219, 10.924751281738281, 13.746360778808594, 10.807502746582031, 12.209007263183594, 11.160293579101562, 8.994827270507812, 10.222831726074219, 10.316673278808594, 8.706809997558594, 8.860237121582031, 8.495071411132812, 10.071884155273438, 11.452072143554688, 11.503829956054688, 10.215606689453125, 8.048088073730469, 8.259941101074219, 11.198013305664062, 10.408485412597656, 11.677360534667969, 10.564231872558594, 8.570747375488281, 8.69677734375, 8.675621032714844, 11.110565185546875, 9.985069274902344, 12.029739379882812, 8.316459655761719, 9.269187927246094, 15.092262268066406, 9.952056884765625, 9.06463623046875, 9.109573364257812, 9.456016540527344, 8.761322021484375, 12.986724853515625, 8.250045776367188, 8.607147216796875, 8.646743774414062, 8.980308532714844, 13.667716979980469, 11.633316040039062, 10.320816040039062, 12.442756652832031, 8.036582946777344, 8.479621887207031, 9.891937255859375, 11.2366943359375], "fashion_mnist": [4.9603424072265625, 5.071281433105469, 4.962150573730469, 4.827796936035156, 5.347526550292969, 4.6628875732421875, 4.999702453613281, 4.886085510253906, 5.3385467529296875, 4.770286560058594, 4.6191864013671875, 4.685035705566406, 4.7356719970703125, 4.6202850341796875, 5.357818603515625, 5.702720642089844, 5.026557922363281, 4.743721008300781, 4.7940521240234375, 4.773796081542969, 5.129631042480469, 4.84552001953125, 5.792655944824219, 5.2164764404296875, 4.673927307128906, 4.660346984863281, 4.624336242675781, 5.1123504638671875, 5.088066101074219, 5.051849365234375, 4.8744964599609375, 5.350379943847656, 4.981048583984375, 4.9394378662109375, 4.8551177978515625, 4.895240783691406, 5.573570251464844, 5.437744140625, 5.227935791015625, 4.942718505859375, 4.915061950683594, 4.697944641113281, 5.247138977050781, 4.7621612548828125, 4.958221435546875, 5.188468933105469, 5.064666748046875, 4.766532897949219, 5.066398620605469, 4.884849548339844, 4.673484802246094, 4.997337341308594, 4.749031066894531, 4.78302001953125, 4.9005889892578125, 4.967002868652344, 4.698448181152344, 4.86181640625, 5.0426483154296875, 4.780479431152344, 4.6356658935546875, 4.8165435791015625, 5.2922515869140625, 4.7222900390625, 5.3287200927734375, 4.797332763671875, 4.7588958740234375, 5.6631011962890625, 5.182411193847656, 5.007575988769531, 4.9674072265625, 4.693756103515625, 4.718101501464844, 5.490745544433594, 4.863746643066406, 5.2880859375, 5.3683624267578125, 4.564300537109375, 4.8800506591796875, 5.093727111816406, 5.62841796875, 4.820304870605469, 4.636566162109375, 5.5658416748046875, 5.124908447265625, 5.017478942871094, 5.150848388671875, 4.7201080322265625, 5.049140930175781, 4.790504455566406, 5.594146728515625, 5.1763458251953125, 5.105926513671875, 4.721961975097656, 5.21624755859375, 4.770225524902344, 5.327949523925781, 4.7087249755859375, 4.6690673828125, 5.120002746582031], "hartmann3": [-3.8178953072486967, -3.704775176517175, -3.8616219953873934, -3.5335807178711205, -3.7453646895666224, -3.724349458315775, -3.8082700218796206, -3.8112266622014936, -3.6381864416516088, -3.757543399794309, -3.717965871204767, -3.857327714892407, -3.755620258329924, -3.7764124673312653, -3.8166786066128644, -3.3266149308162176, -3.6318030080195567, -3.6783783725352532, -3.828153557778992, -3.7445079370049683, -3.7748854966315606, -3.768296234547888, -3.5998019566930024, -3.396255586934601, -3.8565237529701757, -3.6704283751493643, -3.8143244378302397, -1.9604315298529318, -3.613903912451665, -3.7866736749598773, -3.772166915524116, -3.7557386131680284, -3.75795099598279, -3.795670231190154, -3.8618487074128836, -3.8318902221548905, -3.6560392853114103, -3.7088067903799655, -3.6229461526417706, -3.7796465581026544, -3.789455489311304, -3.364116534127566, -3.7297619176629064, -3.4017365238028496, -3.6427163466230876, -3.3534894320962003, -3.6923051184426523, -3.7716634314504374, -3.7845428160866206, -3.753420518879679, -3.784187054931985, -3.5838034135058146, -3.8502436470074795, -2.597000906824191, -3.453444993373902, -3.785393240641991, -3.529974457228139, -3.740708317652978, -3.734122308582061, -3.67567657536361, -3.488047110341496, -3.735042140290241, -3.596701110723103, -3.730395457081181, -3.7041648761040484, -3.5465548528547504, -3.796429126486114, -3.6896101771297314, -3.66816253719168, -3.809565241525358, -3.6070273168821254, -3.743868657747066, -3.637445738600241, -3.726638981152361, -3.329452580660945, -3.769717580846608, -3.590972402696094, -3.7411588951193324, -3.78491914351238, -3.7754830039322345, -3.646186867377315, -3.6787080372572842, -3.5105613248742977, -3.801374009126365, -3.71401997938509, -3.7338092770206477, -3.7977390131650894, -3.530603761892602, -3.647723502655943, -3.7781449708359878, -3.6322055033004457, -3.1523083833378727, -3.412512781728038, -3.8303062762884994, -3.7226096486393296, -3.553696760208801, -3.6001168164084145, -3.581568826546383, -3.794874288307295, -3.581545967706646], "hartmann6": [-1.4549353278454038, -3.017174282345673, -2.9924333806676526, -2.721419510024227, -3.2973752131191634, -1.8341551991045113, -3.2668843561655243, -2.8347365300591334, -3.282790730047836, -3.1865756593491024, -2.982051709701689, -2.7485144603759237, -3.24173025958149, -3.134187749287507, -2.986062914159029, -2.932255875192143, -2.54217381789621, -3.1846937051481965, -2.902783188254062, -3.155614111164215, -2.899345934137389, -2.983779033637031, -3.2899323827033413, -3.1264045197377137, -3.142967961445229, -2.9324723031348694, -3.190017392761569, -3.292192486034387, -2.762574857954942, -3.2702396728108596, -2.6880860918265923, -2.989670689749928, -3.253750888535704, -2.8542984600236787, -3.252832804604744, -2.5442162213989556, -2.6052102152992065, -3.067795857145709, -2.8822447448882658, -3.140518761414347, -3.075813039891277, -3.03146184308219, -3.2979506275466615, -2.7197729302344933, -2.641564083074762, -2.702144998419366, -3.1670226043156444, -3.2932413787494665, -3.2923375595533435, -2.8727455141505818, -3.148190457843324, -2.820802211825644, -3.063369160676188, -3.0529188332750943, -3.0680238909470483, -3.2818403924328803, -3.2660725750584554, -3.131088071126102, -3.2890769050177533, -1.5829467114483784, -2.810513145953112, -3.312869701840981, -3.2124587900344044, -1.165456499117023, -1.6041283006949516, -3.0070669378242534, -2.7050223399135094, -2.708042778787764, -3.184524278644351, -3.1294539190549733, -2.9347834341589607, -2.7914377384651723, -3.2807067958203495, -1.6031238415481386, -2.9332137593080883, -2.897540573469963, -2.898552345504198, -2.63074616349976, -2.712932420759872, -2.4988020509933175, -3.259261535112579, -3.286437497756355, -3.092165340365627, -3.196218552395296, -2.916500796349388, -2.957113417613621, -3.303814006958171, -3.078648692296201, -1.4767638288161589, -3.1955443506729577, -3.17131783842418, -2.720688718208119, -3.254299537458789, -2.9759483354354503, -2.9408882599451656, -2.8628376552784673, -2.819730502978512, -2.92169177708724, -3.0304590769661273, -3.0737396309589915]}}
diff --git a/tests/regression_objectives.py b/tests/regression_objectives.py
index 6655fcbb7..97adb6874 100644
--- a/tests/regression_objectives.py
+++ b/tests/regression_objectives.py
@@ -1,29 +1,26 @@
 from __future__ import annotations
 
 import warnings
+from collections.abc import Callable
 from pathlib import Path
-from typing import Any, Callable
+from typing import Any, Literal
 
 import numpy as np
-from typing_extensions import Literal
 
 import neps
 from neps.search_spaces.search_space import SearchSpace, pipeline_space_from_configspace
 
 
 class RegressionObjectiveBase:
-    """
-    Base class for creating new synthetic or real objectives for the regression tests
+    """Base class for creating new synthetic or real objectives for the regression tests
     Regression runner uses properties defined here,
-    each property should be appropriately defined by the subclasses
+    each property should be appropriately defined by the subclasses.
     """
 
     def __init__(self, optimizer: str, task: str):
         self.optimizer = optimizer
         self.task = task
-        self.has_fidelity = (self.optimizer != "regularized_evolution") and (
-            self.optimizer != "random_search"
-        )
+        self.has_fidelity = self.optimizer != "random_search"
         self._run_pipeline: Callable | None = None
         self._pipeline_space: SearchSpace | dict[str, Any] = {}
 
@@ -35,8 +32,7 @@ def pipeline_space(self) -> SearchSpace | dict[str, Any]:
                 f" the subclass {type(self)} must implement "
                 f"a pipeline_space attribute"
             )
-        else:
-            return self._pipeline_space
+        return self._pipeline_space
 
     @pipeline_space.setter
     def pipeline_space(self, value):
@@ -44,17 +40,32 @@ def pipeline_space(self, value):
 
     @property
     def run_pipeline(self) -> Callable:
+        warnings.warn("run_pipeline is deprecated, use evaluate_pipeline instead", DeprecationWarning, stacklevel=2)
         if self._run_pipeline is None:
             raise NotImplementedError(
                 f"run_pipeline can not be None, "
                 f"the subclass {type(self)} must "
                 f"implement a run_pipeline Callable"
             )
-        else:
-            return self._run_pipeline
+        return self._run_pipeline
+
+    @property
+    def evaluate_pipeline(self) -> Callable:
+        if self._run_pipeline is None:
+            raise NotImplementedError(
+                f"evaluate_pipeline can not be None, "
+                f"the subclass {type(self)} must "
+                f"implement a evaluate_pipeline Callable"
+            )
+        return self._run_pipeline
 
     @run_pipeline.setter
     def run_pipeline(self, value):
+        warnings.warn("run_pipeline is deprecated, use evaluate_pipeline instead", DeprecationWarning, stacklevel=2)
+        self._run_pipeline = value
+
+    @evaluate_pipeline.setter
+    def evaluate_pipeline(self, value):
         self._run_pipeline = value
 
     def __call__(self, *args, **kwargs) -> dict[str, Any]:
@@ -63,9 +74,7 @@ def __call__(self, *args, **kwargs) -> dict[str, Any]:
 
 class JAHSObjective(RegressionObjectiveBase):
     def evaluation_func(self):
-        """
-        If the optimizer is cost aware, return the evaluation function with cost
-        """
+        """If the optimizer is cost aware, return the evaluation function with cost."""
         import jahs_bench
 
         self.benchmark = jahs_bench.Benchmark(
@@ -78,11 +87,11 @@ def cost_evaluation(**joint_configuration):
 
             results = self.benchmark(joint_configuration, nepochs=epoch)
             return {
-                "loss": 100 - results[epoch]["valid-acc"],
+                "objective_to_minimize": 100 - results[epoch]["valid-acc"],
                 "cost": results[epoch]["runtime"],
             }
 
-        def loss_evaluation(**joint_configuration):
+        def objective_to_minimize_evaluation(**joint_configuration):
             epoch = joint_configuration.pop("epoch")
             joint_configuration.update({"N": 5, "W": 16, "Resolution": 1.0})
 
@@ -91,8 +100,7 @@ def loss_evaluation(**joint_configuration):
 
         if "cost" in self.optimizer:
             return cost_evaluation
-        else:
-            return loss_evaluation
+        return objective_to_minimize_evaluation
 
     def __init__(
         self,
@@ -103,8 +111,7 @@ def __init__(
         save_dir: str | Path = "jahs_bench_data",
         **kwargs,
     ):
-        """
-        Download benchmark, initialize Pipeline space and evaluation function
+        """Download benchmark, initialize Pipeline space and evaluation function.
 
         Args:
             optimizer: The optimizer that will be run, this is used to determine the
@@ -122,16 +129,13 @@ def __init__(
 
         self.pipeline_space = pipeline_space_from_configspace(joint_config_space)
 
-        # For Regularized evolution sampler ignores fidelity hyperparameters
-        # by sampling None for them
-
-        self.pipeline_space["epoch"] = neps.IntegerParameter(
+        self.pipeline_space["epoch"] = neps.Integer(
             lower=1, upper=200, is_fidelity=self.has_fidelity
         )
         self.run_pipeline = self.evaluation_func()
 
         self.surrogate_model = "gp" if self.optimizer != "random_search" else None
-        self.surrogate_model_args = kwargs.get("surrogate_model_args", None)
+        self.surrogate_model_args = kwargs.get("surrogate_model_args")
 
 
 class HartmannObjective(RegressionObjectiveBase):
@@ -140,15 +144,12 @@ class HartmannObjective(RegressionObjectiveBase):
 
     def evaluation_fn(self) -> Callable:
         def hartmann3(**z_nX):
-            if self.has_fidelity:
-                z = z_nX.get("z")
-            else:
-                z = self.z_max
+            z = z_nX.get("z") if self.has_fidelity else self.z_max
 
             X_0 = z_nX.get("X_0")
             X_1 = z_nX.get("X_1")
             X_2 = z_nX.get("X_2")
-            Xs = tuple((X_0, X_1, X_2))
+            Xs = (X_0, X_1, X_2)
 
             log_z = np.log(z)
             log_lb, log_ub = np.log(self.z_min), np.log(self.z_max)
@@ -179,20 +180,17 @@ def hartmann3(**z_nX):
 
             noise = np.abs(rng.normal(size=H.size)) * self.noise * (1 - log_z_scaled)
 
-            loss = float((H + noise)[0])
+            objective_to_minimize = float((H + noise)[0])
             cost = 0.05 + (1 - 0.05) * (z / self.z_max) ** 2
 
-            result = {"loss": loss}
+            result = {"objective_to_minimize": objective_to_minimize}
             if "cost" in self.optimizer:
                 result.update({"cost": cost})
 
             return result
 
         def hartmann6(**z_nX):
-            if self.has_fidelity:
-                z = z_nX.get("z")
-            else:
-                z = self.z_max
+            z = z_nX.get("z") if self.has_fidelity else self.z_max
 
             X_0 = z_nX.get("X_0")
             X_1 = z_nX.get("X_1")
@@ -200,7 +198,7 @@ def hartmann6(**z_nX):
             X_3 = z_nX.get("X_3")
             X_4 = z_nX.get("X_4")
             X_5 = z_nX.get("X_5")
-            Xs = tuple((X_0, X_1, X_2, X_3, X_4, X_5))
+            Xs = (X_0, X_1, X_2, X_3, X_4, X_5)
 
             # Change by Carl - z now comes in normalized
             log_z = np.log(z)
@@ -238,21 +236,17 @@ def hartmann6(**z_nX):
 
             noise = np.abs(rng.normal(size=H.size)) * self.noise * (1 - log_z_scaled)
 
-            loss = float((H + noise)[0])
+            objective_to_minimize = float((H + noise)[0])
             cost = 0.05 + (1 - 0.05) * (z / self.z_max) ** 2
 
-            result = {"loss": loss}
+            result = {"objective_to_minimize": objective_to_minimize}
             if "cost" in self.optimizer:
                 result.update({"cost": cost})
 
             return result
 
-        if self.dim == 3:
-            hartmann_fn = hartmann3
-        else:
-            hartmann_fn = hartmann6
+        return hartmann3 if self.dim == 3 else hartmann6
 
-        return hartmann_fn
 
     def __init__(
         self,
@@ -263,8 +257,7 @@ def __init__(
         seed: int = 1337,
         **kwargs,
     ):
-        """
-        Initialize Pipeline space and evaluation function
+        """Initialize Pipeline space and evaluation function.
 
         Args:
             optimizer: The optimizer that will be run, this is used to determine the
@@ -283,11 +276,11 @@ def __init__(
             )
 
         self.pipeline_space: dict[str, Any] = {
-            f"X_{i}": neps.FloatParameter(lower=0.0, upper=1.0) for i in range(self.dim)
+            f"X_{i}": neps.Float(lower=0.0, upper=1.0) for i in range(self.dim)
         }
 
         if self.has_fidelity:
-            self.pipeline_space["z"] = neps.IntegerParameter(
+            self.pipeline_space["z"] = neps.Integer(
                 lower=self.z_min, upper=self.z_max, is_fidelity=self.has_fidelity
             )
 
@@ -297,6 +290,6 @@ def __init__(
         self.random_state = np.random.default_rng(seed)
 
         self.surrogate_model = "gp" if self.optimizer != "random_search" else None
-        self.surrogate_model_args = kwargs.get("surrogate_model_args", None)
+        self.surrogate_model_args = kwargs.get("surrogate_model_args")
 
         self.run_pipeline = self.evaluation_fn()
diff --git a/tests/regression_runner.py b/tests/regression_runner.py
index 0a5a8898a..7cba1c65c 100644
--- a/tests/regression_runner.py
+++ b/tests/regression_runner.py
@@ -3,8 +3,8 @@
 
 import json
 import logging
+from collections.abc import Callable
 from pathlib import Path
-from typing import Callable
 
 import numpy as np
 from scipy.stats import kstest
@@ -28,8 +28,7 @@
 
 
 def incumbent_at(root_directory: str | Path, step: int):
-    """
-    Return the incumbent of the run at step n
+    """Return the incumbent of the run at step n.
 
     Args:
         root_directory: root directory of the optimization run
@@ -41,30 +40,28 @@ def incumbent_at(root_directory: str | Path, step: int):
         for line in log_file.read_text(encoding="utf-8").splitlines()
         if "Loss: " in line
     ]
-    incumbent_at_n = min(losses[:step])
-    return incumbent_at_n
+    return min(losses[:step])
 
 
 class RegressionRunner:
-    """This class runs the optimization algorithms and stores the results in separate files"""
+    """This class runs the optimization algorithms and stores the results in separate files."""
 
     def __init__(
         self,
         objective: RegressionObjectiveBase | Callable,
         iterations: int = 100,
         max_evaluations: int = 150,
-        budget: int = 10000,
+        max_cost_total: int = 10000,
         experiment_name: str = "",
         **kwargs,
     ):
-        """
-        Download benchmark, initialize Pipeline space, evaluation function and set paths,
+        """Download benchmark, initialize Pipeline space, evaluation function and set paths,.
 
         Args:
             objective: callable that takes a configuration as input and evaluates it
             iterations: number of times to record the whole optimization process
             max_evaluations: maximum number of total evaluations for each optimization process
-            budget: budget for cost aware optimizers
+            max_cost_total: budget for cost aware optimizers
             experiment_name: string to identify different experiments
         """
         self.objective = objective
@@ -73,21 +70,21 @@ def __init__(
             self.optimizer = self.objective.optimizer
             self.pipeline_space = self.objective.pipeline_space
         else:
-            self.task = kwargs.get("task", None)
+            self.task = kwargs.get("task")
             if self.task is None:
                 raise AttributeError(
                     f"self.task can not be {self.task}, "
                     f"please provide a task argument"
                 )
 
-            self.optimizer = kwargs.get("optimizer", None)
+            self.optimizer = kwargs.get("optimizer")
             if self.optimizer is None:
                 raise AttributeError(
                     f"self.optimizer can not be {self.optimizer}, "
                     f"please provide an optimizer argument"
                 )
 
-            self.pipeline_space = kwargs.get("pipeline_space", None)
+            self.pipeline_space = kwargs.get("pipeline_space")
             if self.pipeline_space is None:
                 raise AttributeError(
                     f"self.pipeline_space can not be {self.pipeline_space}, "
@@ -100,7 +97,7 @@ def __init__(
         self.benchmark = None
 
         # Cost cooling optimizer expects budget but none of the others does
-        self.budget = budget if "cost" in self.optimizer else None
+        self.max_cost_total = max_cost_total if "cost" in self.optimizer else None
         self.max_evaluations = max_evaluations
 
         self.final_losses: list[float] = []
@@ -114,17 +111,17 @@ def root_directory(self):
 
     @property
     def final_losses_path(self):
-        return Path(self.root_directory, self.loss_file_name)
+        return Path(self.root_directory, self.objective_to_minimize_file_name)
 
     @property
-    def loss_file_name(self):
+    def objective_to_minimize_file_name(self):
         return f"final_losses_{self.max_evaluations}_.txt"
 
     def save_losses(self):
         if not self.final_losses_path.parent.exists():
             Path(self.root_directory).mkdir()
         with self.final_losses_path.open(mode="w+", encoding="utf-8") as f:
-            f.writelines([str(loss) + "\n" for loss in self.final_losses])
+            f.writelines([str(objective_to_minimize) + "\n" for objective_to_minimize in self.final_losses])
         logging.info(
             f"Saved the results of {len(self.final_losses)} "
             f"runs of {self.max_evaluations} "
@@ -133,22 +130,18 @@ def save_losses(self):
 
     def neps_run(self, working_directory: Path):
         neps.run(
-            run_pipeline=self.objective,
+            evaluate_pipeline=self.objective,
             pipeline_space=self.pipeline_space,
             searcher=self.optimizer,
-            max_cost_total=self.budget,
+            max_cost_total=self.max_cost_total,
             root_directory=working_directory,
             max_evaluations_total=self.max_evaluations,
         )
 
-        best_error = incumbent_at(working_directory, self.max_evaluations)
-        return best_error
+        return incumbent_at(working_directory, self.max_evaluations)
 
     def run_regression(self, save=False):
-        """
-        Run iterations number of neps runs
-        """
-
+        """Run iterations number of neps runs."""
         for i in range(self.iterations):
             working_directory = Path(self.root_directory, "results/test_run_" + str(i))
 
@@ -162,19 +155,17 @@ def run_regression(self, save=False):
         return np.array(self.final_losses)
 
     def read_results(self):
-        """
-        Read the results of the last run.
+        """Read the results of the last run.
         Either returns results of the most recent run, or
-        return the values from LOSS_FILE
+        return the values from LOSS_FILE.
         """
-
         if self.final_losses:
             return np.array(self.final_losses)
-        elif self.final_losses_path.exists():
+        if self.final_losses_path.exists():
             # Read from final_losses_path for each regression run
             self.final_losses = [
-                float(loss)
-                for loss in self.final_losses_path.read_text(
+                float(objective_to_minimize)
+                for objective_to_minimize in self.final_losses_path.read_text(
                     encoding="utf-8"
                 ).splitlines()[: self.iterations]
             ]
@@ -191,8 +182,8 @@ def read_results(self):
                 # Try reading from the LOSS_FILE in the worst case
                 if LOSS_FILE.exists():
                     with LOSS_FILE.open(mode="r", encoding="utf-8") as f:
-                        loss_dict = json.load(f)
-                    self.final_losses = loss_dict[self.optimizer][self.task]
+                        objective_to_minimize_dict = json.load(f)
+                    self.final_losses = objective_to_minimize_dict[self.optimizer][self.task]
                 else:
                     raise FileNotFoundError(
                         f"Results from the previous runs are not "
@@ -201,13 +192,11 @@ def read_results(self):
         return np.array(self.final_losses)
 
     def test(self):
-        """
-        Target run for the regression test, keep all the parameters same.
+        """Target run for the regression test, keep all the parameters same.
 
         Args:
             max_evaluations: Number of evaluations after which to terminate optimization.
         """
-
         # Sample losses of self.sample_size runs
         samples = []
         for i in range(self.sample_size):
@@ -250,12 +239,11 @@ def median_threshold(
         with json_file.open(mode="r", encoding="utf-8") as f:
             losses_dict = json.load(f)
     else:
-        losses_dict = dict()
+        losses_dict = {}
 
-    print(f"Optimizers the results are already recorded for: {losses_dict.keys()}")
     for optimizer in OPTIMIZERS:
         if optimizer in losses_dict:
-            print(f"For {optimizer} recorded tasks are: {losses_dict[optimizer].keys()}")
+            pass
         for task in TASKS:
             if (
                 isinstance(losses_dict.get(optimizer, None), dict)
@@ -272,10 +260,6 @@ def median_threshold(
                 runner.run_regression(save=True)
                 best_results = runner.read_results().tolist()
                 minv, maxv = min(best_results), max(best_results)
-                print(
-                    f"For optimizer {optimizer} on {task}:\n "
-                    f"\tMin of best results: {minv}\n\tMax of best results: {maxv}"
-                )
                 if isinstance(losses_dict.get(optimizer, None), dict) and isinstance(
                     losses_dict[optimizer].get(task, None), list
                 ):
diff --git a/tests/settings.py b/tests/settings.py
index d33a5c41b..fc2be3fef 100644
--- a/tests/settings.py
+++ b/tests/settings.py
@@ -1,3 +1,5 @@
+from __future__ import annotations
+
 from pathlib import Path
 
 ITERATIONS = 100
@@ -7,7 +9,6 @@
     "random_search",
     # "mf_bayesian_optimization",
     "bayesian_optimization",
-    "regularized_evolution",
 ]
 
 TASKS = [
diff --git a/tests/test_config_encoder.py b/tests/test_config_encoder.py
new file mode 100644
index 000000000..c07500bbe
--- /dev/null
+++ b/tests/test_config_encoder.py
@@ -0,0 +1,215 @@
+from __future__ import annotations
+
+import pytest
+import torch
+
+from neps.search_spaces.domain import Domain
+from neps.search_spaces.encoding import (
+    CategoricalToIntegerTransformer,
+    ConfigEncoder,
+    MinMaxNormalizer,
+)
+from neps.search_spaces.hyperparameters import Categorical, Float, Integer
+
+
+def test_config_encoder_default() -> None:
+    parameters = {
+        "a": Categorical(["cat", "mouse", "dog"]),
+        "b": Integer(5, 6),
+        "c": Float(5, 6),
+    }
+
+    encoder = ConfigEncoder.from_parameters(parameters)
+
+    # Numericals first, alphabetic
+    # Categoricals last, alphabetic
+    assert encoder.transformers == {
+        "b": MinMaxNormalizer(parameters["b"].domain),
+        "c": MinMaxNormalizer(parameters["c"].domain),
+        "a": CategoricalToIntegerTransformer(parameters["a"].choices),
+    }
+
+    # Domains, (of each column) match those of the transformers
+    assert encoder.domains == [
+        Domain.unit_float(),
+        Domain.unit_float(),
+        Domain.indices(n=len(parameters["a"].choices), is_categorical=True),
+    ]
+
+    assert encoder.ncols == len(parameters)
+    assert encoder.n_numerical == 2
+    assert encoder.n_categorical == 1
+    assert encoder.numerical_slice == slice(0, 2)
+    assert encoder.categorical_slice == slice(2, 3)
+    assert encoder.index_of == {"a": 2, "b": 0, "c": 1}
+    assert encoder.domain_of == {
+        "b": Domain.unit_float(),
+        "c": Domain.unit_float(),
+        "a": Domain.indices(n=len(parameters["a"].choices), is_categorical=True),
+    }
+    assert encoder.constants == {}
+
+    configs = [
+        {"c": 5.5, "b": 5, "a": "cat"},
+        {"c": 5.5, "b": 5, "a": "dog"},
+        {"c": 6, "b": 6, "a": "mouse"},
+    ]
+    encoded = encoder.encode(configs)
+    expcected_encoding = torch.tensor(
+        [
+            # b,   c,   a
+            [0.0, 0.5, 0.0],  # config 1
+            [0.0, 0.5, 2.0],  # config 2
+            [1.0, 1.0, 1.0],  # config 3
+        ],
+        dtype=torch.float64,
+    )
+    torch.testing.assert_close(encoded, expcected_encoding, check_dtype=True)
+
+    decoded = encoder.decode(encoded)
+    assert decoded == configs
+
+
+def test_config_encoder_pdist_calculation() -> None:
+    parameters = {
+        "a": Categorical(["cat", "mouse", "dog"]),
+        "b": Integer(1, 10),
+        "c": Float(1, 10),
+    }
+    encoder = ConfigEncoder.from_parameters(parameters)
+    config1 = {"a": "cat", "b": 1, "c": 1.0}
+    config2 = {"a": "mouse", "b": 10, "c": 10.0}
+
+    # Same config, no distance
+    x = encoder.encode([config1, config1])
+    dist = encoder.pdist(x, square_form=False)
+    assert dist.item() == 0.0
+
+    # Opposite configs, max distance
+    x = encoder.encode([config1, config2])
+    dist = encoder.pdist(x, square_form=False)
+
+    # The first config should have it's p2 euclidean distance as the norm
+    # of the distances between these two configs, i.e. the distance along the
+    # diagonal of a unit-square they belong to
+    _first_config_numerical_encoding = torch.tensor([[0.0, 0.0]], dtype=torch.float64)
+    _second_config_numerical_encoding = torch.tensor([[1.0, 1.0]], dtype=torch.float64)
+    _expected_numerical_dist = torch.linalg.norm(
+        _first_config_numerical_encoding - _second_config_numerical_encoding,
+        ord=2,
+    )
+
+    # The categorical distance should just be one, as they are different
+    _expected_categorical_dist = 1.0
+
+    _expected_dist = _expected_numerical_dist + _expected_categorical_dist
+    assert torch.isclose(dist, _expected_dist)
+
+
+def test_config_encoder_pdist_squareform() -> None:
+    parameters = {
+        "a": Categorical(["cat", "mouse", "dog"]),
+        "b": Integer(1, 10),
+        "c": Float(1, 10),
+    }
+    encoder = ConfigEncoder.from_parameters(parameters)
+    config1 = {"a": "cat", "b": 1, "c": 1.0}
+    config2 = {"a": "dog", "b": 5, "c": 5}
+    config3 = {"a": "mouse", "b": 10, "c": 10.0}
+
+    # Same config, no distance
+    x = encoder.encode([config1, config2, config3])
+    dist = encoder.pdist(x, square_form=False)
+
+    # 3 possible distances
+    assert dist.shape == (3,)
+    torch.testing.assert_close(
+        dist,
+        torch.tensor([1.6285, 2.4142, 1.7857], dtype=torch.float64),
+        atol=1e-4,
+        rtol=1e-4,
+    )
+
+    dist_sq = encoder.pdist(x, square_form=True)
+    assert dist_sq.shape == (3, 3)
+
+    # Distance to self along diagonal should be 0
+    torch.testing.assert_close(dist_sq.diagonal(), torch.zeros(3, dtype=torch.float64))
+
+    # Should be symmetric
+    torch.testing.assert_close(dist_sq, dist_sq.T)
+
+
+def test_config_encoder_accepts_custom_transformers() -> None:
+    parameters = {
+        "b": Integer(5, 6),
+        "a": Float(5, 6),
+        "c": Categorical(["cat", "mouse", "dog"]),
+    }
+    encoder = ConfigEncoder.from_parameters(
+        parameters,
+        custom_transformers={
+            "c": CategoricalToIntegerTransformer(parameters["c"].choices)
+        },
+    )
+    assert encoder.transformers["c"] == CategoricalToIntegerTransformer(
+        parameters["c"].choices
+    )
+
+
+def test_config_encoder_removes_constants_in_encoding_and_includes_in_decoding() -> None:
+    parameters = {
+        "b": Integer(5, 6),
+        "a": Float(5, 6),
+        "c": Categorical(["cat", "mouse", "dog"]),
+    }
+
+    x = "raspberry"
+
+    encoder = ConfigEncoder.from_parameters(parameters, constants={"x": x})
+    assert encoder.constants == {"x": x}
+
+    enc_x = encoder.encode([{"a": 5.5, "b": 5, "c": "cat", "x": x}])
+
+    assert enc_x.shape == (1, 3)  # No x, just a, b, c
+
+    dec_x = encoder.decode(enc_x)
+    assert dec_x == [{"a": 5.5, "b": 5, "c": "cat", "x": x}]
+
+    # This doesn't have to hold true, but it's our current behaviour, we could make
+    # weaker gaurantees but then we'd have to clone the constants, even if it's very large
+    assert dec_x[0]["x"] is x
+
+
+def test_config_encoder_complains_if_missing_entry_in_config() -> None:
+    parameters = {
+        "b": Integer(5, 6),
+        "a": Float(5, 6),
+        "c": Categorical(["cat", "mouse", "dog"]),
+    }
+
+    encoder = ConfigEncoder.from_parameters(parameters)
+
+    with pytest.raises(KeyError):
+        encoder.encode([{"a": 5.5, "b": 5}])
+
+
+def test_config_encoder_sorts_parameters_by_name_for_consistent_ordering() -> None:
+    parameters = {
+        "a": Categorical([0, 1]),
+        "b": Integer(0, 1),
+        "c": Float(0, 1),
+    }
+    p1 = dict(sorted(parameters.items()))
+    p2 = dict(sorted(parameters.items(), reverse=True))
+
+    encoder_1 = ConfigEncoder.from_parameters(p1)
+    encoder_2 = ConfigEncoder.from_parameters(p2)
+
+    assert encoder_1.index_of["a"] == 2
+    assert encoder_1.index_of["b"] == 0
+    assert encoder_1.index_of["c"] == 1
+
+    assert encoder_2.index_of["a"] == 2
+    assert encoder_2.index_of["b"] == 0
+    assert encoder_2.index_of["c"] == 1
diff --git a/tests/test_domain.py b/tests/test_domain.py
new file mode 100644
index 000000000..ab3d3894d
--- /dev/null
+++ b/tests/test_domain.py
@@ -0,0 +1,228 @@
+from __future__ import annotations
+
+import pytest
+import torch
+from pytest_cases import parametrize
+
+from neps.search_spaces.domain import Domain
+
+T = torch.tensor
+
+
+@parametrize(
+    "x, frm, expected",
+    [
+        # Remains unchanged if from unit-float
+        (T([0, 0.5, 1.0]), Domain.unit_float(), T([0, 0.5, 1.0])),
+        # Converts integers to float
+        (T([0, 1]), Domain.unit_float(), T([0.0, 1.0])),
+        # Integer conversion
+        (T([0, 1, 2, 3, 4]), Domain.integer(0, 4), T([0.0, 0.25, 0.5, 0.75, 1.0])),
+        # Negatives
+        (
+            T([-0.5, -0.25, 0.0, 0.25, 0.5]),
+            Domain.floating(-0.5, 0.5),
+            T([0.0, 0.25, 0.5, 0.75, 1.0]),
+        ),
+        # Log scale
+        (
+            T([1e-4, 1e-3, 1e-2, 1e-1, 1]),
+            Domain.floating(1e-4, 1, log=True),
+            T([0.0, 0.25, 0.5, 0.75, 1.0]),
+        ),
+        # Binned
+        (
+            torch.arange(10),
+            Domain.integer(0, 10, bins=5),
+            T([0.0, 0.0, 0.25, 0.25, 0.5, 0.5, 0.75, 0.75, 1.0, 1.0]),
+        ),
+    ],
+)
+def test_domain_to_unit(x: torch.Tensor, frm: Domain, expected: torch.Tensor) -> None:
+    y = frm.to_unit(x)
+    assert y.dtype == torch.float64
+    torch.testing.assert_close(y, expected, check_dtype=False, msg=f"{y} != {expected}")
+
+
+def test_domain_to_unit_dtype_with_floating() -> None:
+    domain = Domain.integer(0, 4)
+    x = T([0, 1, 2, 3, 4], dtype=torch.int32)
+
+    expected_64 = T([0.0, 0.25, 0.5, 0.75, 1.0], dtype=torch.float64)
+    y_64 = domain.to_unit(x, dtype=torch.float64)
+    torch.testing.assert_close(y_64, expected_64, check_dtype=True)
+
+    expected_32 = T([0.0, 0.25, 0.5, 0.75, 1.0], dtype=torch.float32)
+    y_32 = domain.to_unit(x, dtype=torch.float32)
+    torch.testing.assert_close(y_32, expected_32, check_dtype=True)
+
+
+def test_domain_to_unit_dtype_with_integer_fails() -> None:
+    domain = Domain.integer(0, 4)
+    x = T([0, 1, 2, 3, 4], dtype=torch.int32)
+
+    with pytest.raises(ValueError, match="only allows floating dtypes"):
+        domain.to_unit(x, dtype=torch.int32)
+
+
+@parametrize(
+    "x, to, expected",
+    [
+        # Remains unchanged if from unit-float
+        (
+            T([0, 0.5, 1.0]),
+            Domain.unit_float(),
+            T([0, 0.5, 1.0], dtype=torch.float64),
+        ),
+        # Converts floats to integers
+        (
+            T([0.0, 1.0]),
+            Domain.integer(0, 1),
+            T([0, 1], dtype=torch.int64),
+        ),
+        # Integer range
+        (
+            T([0, 0.25, 0.5, 0.75, 1.0]),
+            Domain.integer(0, 4),
+            T([0, 1, 2, 3, 4], dtype=torch.int64),
+        ),
+        # Negatives
+        (
+            T([0.0, 0.25, 0.5, 0.75, 1.0]),
+            Domain.floating(-0.5, 0.5),
+            T([-0.5, -0.25, 0.0, 0.25, 0.5], dtype=torch.float64),
+        ),
+        # Log scale
+        (
+            T([0.0, 0.25, 0.5, 0.75, 1.0]),
+            Domain.floating(1e-4, 1, log=True),
+            T([1e-4, 1e-3, 1e-2, 1e-1, 1], dtype=torch.float64),
+        ),
+        # Binned
+        (
+            T([0.0, 0.25, 0.5, 0.75, 1.0]),
+            Domain.integer(0, 20, bins=5),
+            T([0, 5, 10, 15, 20], dtype=torch.int64),
+        ),
+    ],
+)
+def test_domain_from_unit(x: torch.Tensor, to: Domain, expected: torch.Tensor) -> None:
+    x = x.to(dtype=torch.float64)
+    y = to.from_unit(x)
+    torch.testing.assert_close(y, expected, check_dtype=True, msg=f"{y} != {expected}")
+
+
+def test_domain_from_unit_dtype() -> None:
+    x = T([0.0, 0.25, 0.5, 0.75, 1.0], dtype=torch.float64)
+    domain = Domain.integer(0, 4)
+
+    expected_f64 = T([0.0, 1.0, 2.0, 3.0, 4.0], dtype=torch.float64)
+    y_f64 = domain.from_unit(x, dtype=torch.float64)
+    torch.testing.assert_close(y_f64, expected_f64, check_dtype=True)
+
+    expected_f32 = T([0, 1, 2, 3, 4], dtype=torch.float32)
+    y_f32 = domain.from_unit(x, dtype=torch.float32)
+    torch.testing.assert_close(y_f32, expected_f32, check_dtype=True)
+
+    expected_i32 = T([0, 1, 2, 3, 4], dtype=torch.int32)
+    y_i32 = domain.from_unit(x, dtype=torch.int32)
+    torch.testing.assert_close(y_i32, expected_i32, check_dtype=True)
+
+    expected_i64 = T([0, 1, 2, 3, 4], dtype=torch.int64)
+    y_i64 = domain.from_unit(x, dtype=torch.int64)
+    torch.testing.assert_close(y_i64, expected_i64, check_dtype=True)
+
+
+@parametrize(
+    "x, frm, to, expected",
+    [
+        (
+            T([1e-2, 1e-1, 1e0, 1e1, 1e2], dtype=torch.float64),
+            Domain.floating(1e-2, 1e2, log=True),
+            Domain.floating(-2, 2),
+            T([-2, -1, 0, 1, 2], dtype=torch.float64),
+        ),
+        (
+            T([0, 2, 4, 6, 8], dtype=torch.int64),
+            Domain.integer(0, 8, bins=5),
+            Domain.integer(0, 4),
+            T([0, 1, 2, 3, 4], dtype=torch.int64),
+        ),
+        (
+            T([10, 12.5, 15], dtype=torch.float64),
+            Domain.floating(10, 15),
+            Domain.floating(2, 3),
+            T([2, 2.5, 3.0], dtype=torch.float64),
+        ),
+    ],
+)
+def test_domain_casting(
+    x: torch.Tensor, frm: Domain, to: Domain, expected: torch.Tensor
+) -> None:
+    y = to.cast(x, frm=frm)
+    torch.testing.assert_close(y, expected, check_dtype=True, msg=f"{y} != {expected}")
+
+    x_back = frm.cast(y, frm=to)
+    torch.testing.assert_close(x_back, x, check_dtype=True, msg=f"{x_back} != {x}")
+
+
+@parametrize(
+    "x, frm, to, expected",
+    [
+        (
+            # This test combines all the previous cast domains in one go as a single tensor
+            T(
+                [
+                    [1e-2, 1e-1, 1e0, 1e1, 1e2],
+                    [0, 2, 4, 6, 8],
+                    [10, 12.5, 15, 17.5, 20],
+                ]
+            ).transpose(0, 1),
+            [
+                Domain.floating(1e-2, 1e2, log=True),
+                Domain.integer(0, 8, bins=5),
+                Domain.floating(10, 20),
+            ],  # from
+            [Domain.floating(-2, 2), Domain.integer(0, 4), Domain.floating(2, 4)],  # to
+            T(
+                [
+                    [-2, -1, 0, 1, 2],
+                    [0, 1, 2, 3, 4],
+                    [2, 2.5, 3, 3.5, 4],
+                ]
+            ).transpose(0, 1),
+        ),
+        (
+            # This was a random case found while testing samplers which seemed to fail
+            # Uniform noise convert to integers
+            # 0-0.25 -> 12,
+            # 0.25-0.5 -> 13,
+            # 0.5-0.75 -> 14
+            # 0.75-1 -> 15
+            T(
+                [
+                    [0.2350, 0.6488, 0.6411],
+                    [0.6457, 0.2897, 0.6879],
+                    [0.7401, 0.4268, 0.7607],
+                ]
+            ),
+            Domain.unit_float(),
+            Domain.integer(12, 15),
+            T(
+                [
+                    [12, 14, 14],
+                    [14, 13, 14],
+                    [14, 13, 15],
+                ]
+            ),
+        ),
+    ],
+)
+def test_translate(
+    x: torch.Tensor,
+    frm: list[Domain],
+    to: list[Domain],
+    expected: torch.Tensor,
+) -> None:
+    y = Domain.translate(x, frm=frm, to=to)
+    torch.testing.assert_close(y, expected, check_dtype=True, msg=f"{y} != {expected}")
diff --git a/tests/test_examples.py b/tests/test_examples.py
index abdd10c58..5510c084e 100644
--- a/tests/test_examples.py
+++ b/tests/test_examples.py
@@ -1,10 +1,11 @@
+from __future__ import annotations
+
 import logging
 import os
 import runpy
 from pathlib import Path
 
 import pytest
-
 from neps_examples import ci_examples, core_examples
 
 
@@ -38,6 +39,14 @@ def test_core_examples(example):
         # Run hyperparameters example to have something to analyse
         runpy.run_path(str(core_examples_scripts[0]), run_name="__main__")
 
+    if example.name in (
+        "architecture.py",
+        "architecture_and_hyperparameters.py",
+        "hierarchical_architecture.py",
+        "expert_priors_for_architecture_and_hyperparameters.py",
+    ):
+        pytest.xfail("Architecture were removed temporarily")
+
     runpy.run_path(str(example), run_name="__main__")
 
 
diff --git a/neps/optimizers/bayesian_optimization/kernels/grakel_replace/__init__.py b/tests/test_neps_api/__init__.py
similarity index 100%
rename from neps/optimizers/bayesian_optimization/kernels/grakel_replace/__init__.py
rename to tests/test_neps_api/__init__.py
diff --git a/tests/test_neps_api/solution_yamls/bo_neps_decided.yaml b/tests/test_neps_api/solution_yamls/bo_neps_decided.yaml
index 76935d6c3..f2ca84725 100644
--- a/tests/test_neps_api/solution_yamls/bo_neps_decided.yaml
+++ b/tests/test_neps_api/solution_yamls/bo_neps_decided.yaml
@@ -3,11 +3,8 @@ searcher_alg: bayesian_optimization
 searcher_selection: neps-default
 neps_decision_tree: true
 searcher_args:
-  initial_design_size: 10
-  surrogate_model: gp
-  acquisition: EI
-  log_prior_weighted: false
-  acquisition_sampler: mutation
-  random_interleave_prob: 0.0
-  disable_priors: true
-  sample_default_first: false
+  initial_design_size: null
+  use_priors: false
+  use_cost: false
+  sample_prior_first: false
+  device: null
diff --git a/tests/test_neps_api/solution_yamls/bo_user_decided.yaml b/tests/test_neps_api/solution_yamls/bo_user_decided.yaml
deleted file mode 100644
index c87b923ad..000000000
--- a/tests/test_neps_api/solution_yamls/bo_user_decided.yaml
+++ /dev/null
@@ -1,29 +0,0 @@
-searcher_name: bayesian_optimization
-searcher_alg: bayesian_optimization
-searcher_selection: neps-default
-neps_decision_tree: false
-searcher_args:
-  initial_design_size: 10
-  surrogate_model: ComprehensiveGPHierarchy
-  acquisition: EI
-  log_prior_weighted: false
-  acquisition_sampler: mutation
-  random_interleave_prob: 0.0
-  disable_priors: true
-  sample_default_first: false
-  surrogate_model_args:
-    graph_kernels:
-    - WeisfeilerLehman
-    - WeisfeilerLehman
-    - WeisfeilerLehman
-    - WeisfeilerLehman
-    - WeisfeilerLehman
-    hp_kernels: []
-    verbose: false
-    hierarchy_consider:
-    - 0
-    - 1
-    - 2
-    - 3
-    d_graph_features: 0
-    vectorial_features: null
diff --git a/tests/test_neps_api/solution_yamls/hyperband_neps_decided.yaml b/tests/test_neps_api/solution_yamls/hyperband_neps_decided.yaml
index 29bf8dec9..dbd7723f5 100644
--- a/tests/test_neps_api/solution_yamls/hyperband_neps_decided.yaml
+++ b/tests/test_neps_api/solution_yamls/hyperband_neps_decided.yaml
@@ -7,5 +7,5 @@ searcher_args:
   initial_design_type: max_budget
   use_priors: false
   random_interleave_prob: 0.0
-  sample_default_first: false
-  sample_default_at_target: false
+  sample_prior_first: false
+  sample_prior_at_target: false
diff --git a/tests/test_neps_api/solution_yamls/pibo_neps_decided.yaml b/tests/test_neps_api/solution_yamls/pibo_neps_decided.yaml
index 7d5f19da1..d94ea209f 100644
--- a/tests/test_neps_api/solution_yamls/pibo_neps_decided.yaml
+++ b/tests/test_neps_api/solution_yamls/pibo_neps_decided.yaml
@@ -3,12 +3,8 @@ searcher_alg: pibo
 searcher_selection: neps-default
 neps_decision_tree: true
 searcher_args:
-  initial_design_size: 10
-  surrogate_model: gp
-  acquisition: EI
-  log_prior_weighted: false
-  acquisition_sampler: mutation
-  random_interleave_prob: 0.0
-  disable_priors: false
-  prior_confidence: medium
-  sample_default_first: false
+  initial_design_size: null
+  use_priors: true
+  use_cost: false
+  sample_prior_first: true
+  device: null
diff --git a/tests/test_neps_api/solution_yamls/priorband_bo_user_decided.yaml b/tests/test_neps_api/solution_yamls/priorband_bo_user_decided.yaml
index cd7c82ec3..c0a98cb37 100644
--- a/tests/test_neps_api/solution_yamls/priorband_bo_user_decided.yaml
+++ b/tests/test_neps_api/solution_yamls/priorband_bo_user_decided.yaml
@@ -7,8 +7,8 @@ searcher_args:
   initial_design_type: max_budget
   prior_confidence: medium
   random_interleave_prob: 0.0
-  sample_default_first: true
-  sample_default_at_target: false
+  sample_prior_first: true
+  sample_prior_at_target: false
   prior_weight_type: geometric
   inc_sample_type: mutation
   inc_mutation_rate: 0.5
@@ -20,4 +20,3 @@ searcher_args:
   surrogate_model: gp
   acquisition: EI
   log_prior_weighted: false
-  acquisition_sampler: mutation
diff --git a/tests/test_neps_api/solution_yamls/priorband_neps_decided.yaml b/tests/test_neps_api/solution_yamls/priorband_neps_decided.yaml
index eb3b01790..6899bd00a 100644
--- a/tests/test_neps_api/solution_yamls/priorband_neps_decided.yaml
+++ b/tests/test_neps_api/solution_yamls/priorband_neps_decided.yaml
@@ -7,8 +7,8 @@ searcher_args:
   initial_design_type: max_budget
   prior_confidence: medium
   random_interleave_prob: 0.0
-  sample_default_first: true
-  sample_default_at_target: false
+  sample_prior_first: true
+  sample_prior_at_target: false
   prior_weight_type: geometric
   inc_sample_type: mutation
   inc_mutation_rate: 0.5
diff --git a/tests/test_neps_api/solution_yamls/user_yaml_bo.yaml b/tests/test_neps_api/solution_yamls/user_yaml_bo.yaml
index 156d67e41..c6cbe0eec 100644
--- a/tests/test_neps_api/solution_yamls/user_yaml_bo.yaml
+++ b/tests/test_neps_api/solution_yamls/user_yaml_bo.yaml
@@ -4,11 +4,5 @@ searcher_selection: user-yaml
 neps_decision_tree: false
 searcher_args:
   initial_design_size: 5
-  surrogate_model: gp
-  acquisition: EI
-  log_prior_weighted: false
-  acquisition_sampler: random
-  random_interleave_prob: 0.1
-  disable_priors: false
-  prior_confidence: high
-  sample_default_first: false
+  use_priors: true
+  sample_prior_first: true
diff --git a/tests/test_neps_api/test_api.py b/tests/test_neps_api/test_api.py
index 324080072..ae63b253d 100644
--- a/tests/test_neps_api/test_api.py
+++ b/tests/test_neps_api/test_api.py
@@ -28,105 +28,32 @@ def no_logs_gte_error(caplog):
     assert not errors
 
 
-testing_scripts = [
-    "default_neps",
-    "baseoptimizer_neps",
-    "user_yaml_neps",
-]
+HERE = Path(__file__).resolve().parent
 
-examples_folder = Path(__file__, "..", "testing_scripts").resolve()
-solution_folder = Path(__file__, "..", "solution_yamls").resolve()
+testing_scripts = ["default_neps", "baseoptimizer_neps", "user_yaml_neps"]
+EXAMPLES_FOLDER = HERE / "testing_scripts"
+SOLUTION_FOLDER = HERE / "solution_yamls"
 neps_api_example_script = [
-    examples_folder / f"{example}.py" for example in testing_scripts
+    EXAMPLES_FOLDER / f"{example}.py" for example in testing_scripts
 ]
 
 
 @pytest.mark.neps_api
-def test_default_examples(tmp_path):
+@pytest.mark.parametrize("example_script", neps_api_example_script)
+def test_default_examples(tmp_path: Path, example_script: Path) -> None:
     # Running the example files holding multiple neps.run commands.
-
-    runpy.run_path(
-        neps_api_example_script[0],
-        run_name="__main__",
-    )
-
-    # Testing each folder with its corresponding expected dictionary
-    for folder_name in os.listdir(tmp_path):
-        folder_path = os.path.join(tmp_path, folder_name)
-
-        assert os.path.exists(folder_path), f"Directory does not exist: {folder_path}"
-
-        info_yaml_path = os.path.join(folder_path, ".optimizer_info", "info.yaml")
-
-        assert os.path.exists(
-            str(info_yaml_path)
-        ), f"File does not exist: {info_yaml_path}\n{os.listdir(folder_path)}"
-
-        # Load the YAML file
-        with open(str(info_yaml_path)) as yaml_config:
-            loaded_data = yaml.safe_load(yaml_config)
-
-        with open(str(solution_folder / (folder_name + ".yaml"))) as solution_yaml:
-            expected_data = yaml.safe_load(solution_yaml)
-
-        assert loaded_data == expected_data
-
-
-@pytest.mark.neps_api
-def test_baseoptimizer_examples(tmp_path):
-    # Running the example files holding multiple neps.run commands.
-
-    runpy.run_path(
-        neps_api_example_script[1],
-        run_name="__main__",
-    )
+    runpy.run_path(str(example_script), run_name="__main__")
 
     # Testing each folder with its corresponding expected dictionary
-    for folder_name in os.listdir(tmp_path):
-        folder_path = os.path.join(tmp_path, folder_name)
-
-        assert os.path.exists(folder_path), f"Directory does not exist: {folder_path}"
-
-        info_yaml_path = os.path.join(folder_path, ".optimizer_info", "info.yaml")
-
-        assert os.path.exists(
-            str(info_yaml_path)
-        ), f"File does not exist: {info_yaml_path}"
-
-        # Load the YAML file
-        with open(str(info_yaml_path)) as yaml_config:
-            loaded_data = yaml.safe_load(yaml_config)
-
-        with open(str(solution_folder / (folder_name + ".yaml"))) as solution_yaml:
-            expected_data = yaml.safe_load(solution_yaml)
-
-        assert loaded_data == expected_data
-
-
-@pytest.mark.neps_api
-def test_user_created_yaml_examples(tmp_path):
-    runpy.run_path(
-        neps_api_example_script[2],
-        run_name="__main__",
-    )
-
-    # Testing each folder with its corresponding expected dictionary
-    for folder_name in os.listdir(tmp_path):
-        folder_path = os.path.join(tmp_path, folder_name)
-
-        assert os.path.exists(folder_path), f"Directory does not exist: {folder_path}"
-
-        info_yaml_path = os.path.join(folder_path, ".optimizer_info", "info.yaml")
-
-        assert os.path.exists(
-            str(info_yaml_path)
-        ), f"File does not exist: {info_yaml_path}"
+    for folder in tmp_path.iterdir():
+        info_yaml_path = folder / "optimizer_info.yaml"
 
-        # Load the YAML file
-        with open(str(info_yaml_path)) as yaml_config:
-            loaded_data = yaml.safe_load(yaml_config)
+        assert info_yaml_path.exists()
+        loaded_data = yaml.safe_load(info_yaml_path.read_text())
 
-        with open(str(solution_folder / (folder_name + ".yaml"))) as solution_yaml:
-            expected_data = yaml.safe_load(solution_yaml)
+        solution_yaml_path = SOLUTION_FOLDER / (folder.name + ".yaml")
+        solution_data = yaml.safe_load(solution_yaml_path.read_text())
 
-        assert loaded_data == expected_data
+        assert (
+            loaded_data == solution_data
+        ), f"Solution Path: {solution_yaml_path}\nLoaded Path: {info_yaml_path}\n"
diff --git a/tests/test_neps_api/testing_scripts/baseoptimizer_neps.py b/tests/test_neps_api/testing_scripts/baseoptimizer_neps.py
index 1fe9a2199..9a4a4591d 100644
--- a/tests/test_neps_api/testing_scripts/baseoptimizer_neps.py
+++ b/tests/test_neps_api/testing_scripts/baseoptimizer_neps.py
@@ -1,29 +1,39 @@
+from __future__ import annotations
+
 import logging
+from warnings import warn
 
 import neps
 from neps.optimizers.bayesian_optimization.optimizer import BayesianOptimization
 from neps.optimizers.multi_fidelity.hyperband import Hyperband
 from neps.search_spaces.search_space import SearchSpace
 
-pipeline_space_fidelity = dict(
-    val1=neps.FloatParameter(lower=-10, upper=10),
-    val2=neps.IntegerParameter(lower=1, upper=5, is_fidelity=True),
-)
+pipeline_space_fidelity = {
+    "val1": neps.Float(lower=-10, upper=10),
+    "val2": neps.Integer(lower=1, upper=5, is_fidelity=True),
+}
 
-pipeline_space = dict(
-    val1=neps.FloatParameter(lower=-10, upper=10),
-    val2=neps.IntegerParameter(lower=1, upper=5),
-)
+pipeline_space = {
+    "val1": neps.Float(lower=-10, upper=10),
+    "val2": neps.Integer(lower=1, upper=5),
+}
 
 
 def run_pipeline(val1, val2):
-    loss = val1 * val2
-    return loss
+    warn("run_pipeline is deprecated, use evaluate_pipeline instead", DeprecationWarning, stacklevel=2)
+    return evaluate_pipeline(val1, val2)
+
+def evaluate_pipeline(val1, val2):
+    return val1 * val2
 
 
 def run_pipeline_fidelity(val1, val2):
-    loss = val1 * val2
-    return {"loss": loss, "cost": 1}
+    warn("run_pipeline_fidelity is deprecated, use evaluate_pipeline_fidelity instead", DeprecationWarning, stacklevel=2)
+    return evaluate_pipeline_fidelity(val1, val2)
+
+def evaluate_pipeline_fidelity(val1, val2):
+    objective_to_minimize = val1 * val2
+    return {"objective_to_minimize": objective_to_minimize, "cost": 1}
 
 
 logging.basicConfig(level=logging.INFO)
@@ -34,7 +44,7 @@ def run_pipeline_fidelity(val1, val2):
     pipeline_space=search_space, initial_design_size=5
 )
 neps.run(
-    run_pipeline=run_pipeline,
+    evaluate_pipeline=evaluate_pipeline,
     root_directory="bo_custom_created",
     max_evaluations_total=1,
     searcher=my_custom_searcher_1,
@@ -42,9 +52,9 @@ def run_pipeline_fidelity(val1, val2):
 
 # Case 2: Testing BaseOptimizer as searcher with Hyperband
 search_space_fidelity = SearchSpace(**pipeline_space_fidelity)
-my_custom_searcher_2 = Hyperband(pipeline_space=search_space_fidelity, budget=1)
+my_custom_searcher_2 = Hyperband(pipeline_space=search_space_fidelity, max_cost_total=1)
 neps.run(
-    run_pipeline=run_pipeline_fidelity,
+    evaluate_pipeline=evaluate_pipeline_fidelity,
     root_directory="hyperband_custom_created",
     max_cost_total=1,
     searcher=my_custom_searcher_2,
diff --git a/tests/test_neps_api/testing_scripts/default_neps.py b/tests/test_neps_api/testing_scripts/default_neps.py
index 5384042aa..815adcc9e 100644
--- a/tests/test_neps_api/testing_scripts/default_neps.py
+++ b/tests/test_neps_api/testing_scripts/default_neps.py
@@ -1,35 +1,37 @@
+from __future__ import annotations
+
 import logging
+from warnings import warn
 
 import neps
-from neps.optimizers.bayesian_optimization.kernels import GraphKernelMapping
-from neps.optimizers.bayesian_optimization.models.gp_hierarchy import (
-    ComprehensiveGPHierarchy,
-)
 
-pipeline_space_fidelity_priors = dict(
-    val1=neps.FloatParameter(lower=-10, upper=10, default=1),
-    val2=neps.IntegerParameter(lower=1, upper=5, is_fidelity=True),
-)
+pipeline_space_fidelity_priors = {
+    "val1": neps.Float(lower=-10, upper=10, prior=1),
+    "val2": neps.Integer(lower=1, upper=5, is_fidelity=True),
+}
 
-pipeline_space_not_fidelity_priors = dict(
-    val1=neps.FloatParameter(lower=-10, upper=10, default=1),
-    val2=neps.IntegerParameter(lower=1, upper=5, default=1),
-)
+pipeline_space_not_fidelity_priors = {
+    "val1": neps.Float(lower=-10, upper=10, prior=1),
+    "val2": neps.Integer(lower=1, upper=5, prior=1),
+}
 
-pipeline_space_fidelity = dict(
-    val1=neps.FloatParameter(lower=-10, upper=10),
-    val2=neps.IntegerParameter(lower=1, upper=5, is_fidelity=True),
-)
+pipeline_space_fidelity = {
+    "val1": neps.Float(lower=-10, upper=10),
+    "val2": neps.Integer(lower=1, upper=5, is_fidelity=True),
+}
 
-pipeline_space_not_fidelity = dict(
-    val1=neps.FloatParameter(lower=-10, upper=10),
-    val2=neps.IntegerParameter(lower=1, upper=5),
-)
+pipeline_space_not_fidelity = {
+    "val1": neps.Float(lower=-10, upper=10),
+    "val2": neps.Integer(lower=1, upper=5),
+}
 
 
 def run_pipeline(val1, val2):
-    loss = val1 * val2
-    return loss
+    warn("run_pipeline is deprecated, use evaluate_pipeline instead", DeprecationWarning, stacklevel=2)
+    return evaluate_pipeline(val1, val2)
+
+def evaluate_pipeline(val1, val2):
+    return val1 * val2
 
 
 logging.basicConfig(level=logging.INFO)
@@ -40,7 +42,7 @@ def run_pipeline(val1, val2):
 # Case 1: Choosing priorband
 
 neps.run(
-    run_pipeline=run_pipeline,
+    evaluate_pipeline=evaluate_pipeline,
     pipeline_space=pipeline_space_fidelity_priors,
     root_directory="priorband_bo_user_decided",
     max_evaluations_total=1,
@@ -49,45 +51,12 @@ def run_pipeline(val1, val2):
     eta=3,
 )
 
-# Case 2: Choosing Bayesian optimization
-
-early_hierarchies_considered = "0_1_2_3"
-hierarchy_considered = [int(hl) for hl in early_hierarchies_considered.split("_")]
-graph_kernels = ["wl"] * (len(hierarchy_considered) + 1)
-wl_h = [2, 1] + [2] * (len(hierarchy_considered) - 1)
-graph_kernels = [
-    GraphKernelMapping[kernel](
-        h=wl_h[j],
-        oa=False,
-        se_kernel=None,
-    )
-    for j, kernel in enumerate(graph_kernels)
-]
-surrogate_model = ComprehensiveGPHierarchy
-surrogate_model_args = {
-    "graph_kernels": graph_kernels,
-    "hp_kernels": [],
-    "verbose": False,
-    "hierarchy_consider": hierarchy_considered,
-    "d_graph_features": 0,
-    "vectorial_features": None,
-}
-neps.run(
-    run_pipeline=run_pipeline,
-    pipeline_space=pipeline_space_not_fidelity,
-    root_directory="bo_user_decided",
-    max_evaluations_total=1,
-    searcher="bayesian_optimization",
-    surrogate_model=surrogate_model,
-    surrogate_model_args=surrogate_model_args,
-)
-
 # Testing neps decision tree on deciding the searcher and rejecting the
 # additional arguments.
 
 # Case 1: Choosing priorband
 neps.run(
-    run_pipeline=run_pipeline,
+    evaluate_pipeline=evaluate_pipeline,
     pipeline_space=pipeline_space_fidelity_priors,
     root_directory="priorband_neps_decided",
     max_evaluations_total=1,
@@ -97,7 +66,7 @@ def run_pipeline(val1, val2):
 
 # Case 2: Choosing bayesian_optimization
 neps.run(
-    run_pipeline=run_pipeline,
+    evaluate_pipeline=evaluate_pipeline,
     pipeline_space=pipeline_space_not_fidelity,
     root_directory="bo_neps_decided",
     max_evaluations_total=1,
@@ -105,7 +74,7 @@ def run_pipeline(val1, val2):
 
 # Case 3: Choosing pibo
 neps.run(
-    run_pipeline=run_pipeline,
+    evaluate_pipeline=evaluate_pipeline,
     pipeline_space=pipeline_space_not_fidelity_priors,
     root_directory="pibo_neps_decided",
     max_evaluations_total=1,
@@ -114,7 +83,7 @@ def run_pipeline(val1, val2):
 
 # Case 4: Choosing hyperband
 neps.run(
-    run_pipeline=run_pipeline,
+    evaluate_pipeline=evaluate_pipeline,
     pipeline_space=pipeline_space_fidelity,
     root_directory="hyperband_neps_decided",
     max_evaluations_total=1,
diff --git a/tests/test_neps_api/testing_scripts/user_yaml_neps.py b/tests/test_neps_api/testing_scripts/user_yaml_neps.py
index 5d862d9db..2320a90df 100644
--- a/tests/test_neps_api/testing_scripts/user_yaml_neps.py
+++ b/tests/test_neps_api/testing_scripts/user_yaml_neps.py
@@ -1,27 +1,38 @@
+from __future__ import annotations
+
 import logging
-import os
 from pathlib import Path
+from warnings import warn
+
 import neps
 
-pipeline_space = dict(
-    val1=neps.FloatParameter(lower=-10, upper=10),
-    val2=neps.IntegerParameter(lower=1, upper=5),
-)
+pipeline_space = {
+    "val1": neps.Float(lower=-10, upper=10),
+    "val2": neps.Integer(lower=1, upper=5),
+}
 
 
 def run_pipeline(val1, val2):
-    loss = val1 * val2
-    return loss
+    warn(
+        "run_pipeline is deprecated, use evaluate_pipeline instead",
+        DeprecationWarning,
+        stacklevel=2,
+    )
+    return evaluate_pipeline(val1, val2)
+
+
+def evaluate_pipeline(val1, val2):
+    return val1 * val2
 
 
 logging.basicConfig(level=logging.INFO)
 
 # Testing using created yaml with api
-script_directory = os.path.dirname(os.path.abspath(__file__))
-parent_directory = os.path.join(script_directory, os.pardir)
+script_directory = Path(__file__).resolve().parent
+parent_directory = script_directory.parent
 searcher_path = Path(parent_directory) / "testing_yaml" / "optimizer_test"
 neps.run(
-    run_pipeline=run_pipeline,
+    evaluate_pipeline=evaluate_pipeline,
     pipeline_space=pipeline_space,
     root_directory="user_yaml_bo",
     max_evaluations_total=1,
diff --git a/tests/test_neps_api/testing_yaml/optimizer_test.yaml b/tests/test_neps_api/testing_yaml/optimizer_test.yaml
index f65af7431..e6efcf0e7 100644
--- a/tests/test_neps_api/testing_yaml/optimizer_test.yaml
+++ b/tests/test_neps_api/testing_yaml/optimizer_test.yaml
@@ -1,11 +1,5 @@
 strategy: bayesian_optimization
 # Specific arguments depending on the searcher
 initial_design_size: 7
-surrogate_model: gp
-acquisition: EI
-log_prior_weighted: false
-acquisition_sampler: random
-random_interleave_prob: 0.1
-disable_priors: false
-prior_confidence: high
-sample_default_first: false
+use_priors: true
+sample_prior_first: true
diff --git a/tests/test_regression.py b/tests/test_regression.py
index 6223d2fdd..4f5ea96fe 100644
--- a/tests/test_regression.py
+++ b/tests/test_regression.py
@@ -1,3 +1,5 @@
+from __future__ import annotations
+
 import logging
 import os
 
diff --git a/tests/test_runtime/test_default_report_values.py b/tests/test_runtime/test_default_report_values.py
index 652db9de6..74c56f466 100644
--- a/tests/test_runtime/test_default_report_values.py
+++ b/tests/test_runtime/test_default_report_values.py
@@ -1,36 +1,39 @@
 from __future__ import annotations
 
 from pathlib import Path
+
 from pytest_cases import fixture
 
 from neps.optimizers.random_search.optimizer import RandomSearch
 from neps.runtime import DefaultWorker
+from neps.search_spaces import Float
 from neps.search_spaces.search_space import SearchSpace
-from neps.state.filebased import create_or_load_filebased_neps_state
 from neps.state.neps_state import NePSState
 from neps.state.optimizer import OptimizationState, OptimizerInfo
+from neps.state.seed_snapshot import SeedSnapshot
 from neps.state.settings import DefaultReportValues, OnErrorPossibilities, WorkerSettings
-from neps.search_spaces import FloatParameter
 from neps.state.trial import Trial
 
 
 @fixture
-def neps_state(tmp_path: Path) -> NePSState[Path]:
-    return create_or_load_filebased_neps_state(
-        directory=tmp_path / "neps_state",
+def neps_state(tmp_path: Path) -> NePSState:
+    return NePSState.create_or_load(
+        path=tmp_path / "neps_state",
         optimizer_info=OptimizerInfo(info={"nothing": "here"}),
-        optimizer_state=OptimizationState(budget=None, shared_state={}),
+        optimizer_state=OptimizationState(
+            budget=None, seed_snapshot=SeedSnapshot.new_capture(), shared_state={}
+        ),
     )
 
 
 def test_default_values_on_error(
     neps_state: NePSState,
 ) -> None:
-    optimizer = RandomSearch(pipeline_space=SearchSpace(a=FloatParameter(0, 1)))
+    optimizer = RandomSearch(pipeline_space=SearchSpace(a=Float(0, 1)))
     settings = WorkerSettings(
         on_error=OnErrorPossibilities.IGNORE,
         default_report_values=DefaultReportValues(
-            loss_value_on_error=2.4,  # <- Highlight
+            objective_to_minimize_value_on_error=2.4,  # <- Highlight
             cost_value_on_error=2.4,  # <- Highlight
             learning_curve_on_error=[2.4, 2.5],  # <- Highlight
         ),
@@ -42,6 +45,7 @@ def test_default_values_on_error(
         max_wallclock_time_for_worker_seconds=None,
         max_evaluation_time_for_worker_seconds=None,
         max_cost_for_worker=None,
+        batch_size=None,
     )
 
     def eval_function(*args, **kwargs) -> float:
@@ -56,20 +60,21 @@ def eval_function(*args, **kwargs) -> float:
     )
     worker.run()
 
-    trials = neps_state.get_all_trials()
+    trials = neps_state.lock_and_read_trials()
     n_crashed = sum(
-        trial.state == Trial.State.CRASHED is not None for trial in trials.values()
+        trial.metadata.state == Trial.State.CRASHED is not None
+        for trial in trials.values()
     )
     assert len(trials) == 1
     assert n_crashed == 1
 
-    assert neps_state.get_next_pending_trial() is None
-    assert len(neps_state.get_errors()) == 1
+    assert neps_state.lock_and_get_next_pending_trial() is None
+    assert len(neps_state.lock_and_get_errors()) == 1
 
     trial = trials.popitem()[1]
-    assert trial.state == Trial.State.CRASHED
+    assert trial.metadata.state == Trial.State.CRASHED
     assert trial.report is not None
-    assert trial.report.loss == 2.4
+    assert trial.report.objective_to_minimize == 2.4
     assert trial.report.cost == 2.4
     assert trial.report.learning_curve == [2.4, 2.5]
 
@@ -77,7 +82,7 @@ def eval_function(*args, **kwargs) -> float:
 def test_default_values_on_not_specified(
     neps_state: NePSState,
 ) -> None:
-    optimizer = RandomSearch(pipeline_space=SearchSpace(a=FloatParameter(0, 1)))
+    optimizer = RandomSearch(pipeline_space=SearchSpace(a=Float(0, 1)))
     settings = WorkerSettings(
         on_error=OnErrorPossibilities.IGNORE,
         default_report_values=DefaultReportValues(
@@ -92,6 +97,7 @@ def test_default_values_on_not_specified(
         max_wallclock_time_for_worker_seconds=None,
         max_evaluation_time_for_worker_seconds=None,
         max_cost_for_worker=None,
+        batch_size=None,
     )
 
     def eval_function(*args, **kwargs) -> float:
@@ -106,30 +112,33 @@ def eval_function(*args, **kwargs) -> float:
     )
     worker.run()
 
-    trials = neps_state.get_all_trials()
+    trials = neps_state.lock_and_read_trials()
     n_sucess = sum(
-        trial.state == Trial.State.SUCCESS is not None for trial in trials.values()
+        trial.metadata.state == Trial.State.SUCCESS is not None
+        for trial in trials.values()
     )
     assert len(trials) == 1
     assert n_sucess == 1
 
-    assert neps_state.get_next_pending_trial() is None
-    assert len(neps_state.get_errors()) == 0
+    assert neps_state.lock_and_get_next_pending_trial() is None
+    assert len(neps_state.lock_and_get_errors()) == 0
 
     trial = trials.popitem()[1]
-    assert trial.state == Trial.State.SUCCESS
+    assert trial.metadata.state == Trial.State.SUCCESS
     assert trial.report is not None
     assert trial.report.cost == 2.4
     assert trial.report.learning_curve == [2.4, 2.5]
 
 
-def test_default_value_loss_curve_take_loss_value(
+def test_default_value_objective_to_minimize_curve_take_objective_to_minimize_value(
     neps_state: NePSState,
 ) -> None:
-    optimizer = RandomSearch(pipeline_space=SearchSpace(a=FloatParameter(0, 1)))
+    optimizer = RandomSearch(pipeline_space=SearchSpace(a=Float(0, 1)))
     settings = WorkerSettings(
         on_error=OnErrorPossibilities.IGNORE,
-        default_report_values=DefaultReportValues(learning_curve_if_not_provided="loss"),
+        default_report_values=DefaultReportValues(
+            learning_curve_if_not_provided="objective_to_minimize"
+        ),
         max_evaluations_total=None,
         include_in_progress_evaluations_towards_maximum=False,
         max_cost_total=None,
@@ -138,6 +147,7 @@ def test_default_value_loss_curve_take_loss_value(
         max_wallclock_time_for_worker_seconds=None,
         max_evaluation_time_for_worker_seconds=None,
         max_cost_for_worker=None,
+        batch_size=None,
     )
 
     LOSS = 1.0
@@ -154,17 +164,18 @@ def eval_function(*args, **kwargs) -> float:
     )
     worker.run()
 
-    trials = neps_state.get_all_trials()
+    trials = neps_state.lock_and_read_trials()
     n_sucess = sum(
-        trial.state == Trial.State.SUCCESS is not None for trial in trials.values()
+        trial.metadata.state == Trial.State.SUCCESS is not None
+        for trial in trials.values()
     )
     assert len(trials) == 1
     assert n_sucess == 1
 
-    assert neps_state.get_next_pending_trial() is None
-    assert len(neps_state.get_errors()) == 0
+    assert neps_state.lock_and_get_next_pending_trial() is None
+    assert len(neps_state.lock_and_get_errors()) == 0
 
     trial = trials.popitem()[1]
-    assert trial.state == Trial.State.SUCCESS
+    assert trial.metadata.state == Trial.State.SUCCESS
     assert trial.report is not None
     assert trial.report.learning_curve == [LOSS]
diff --git a/tests/test_runtime/test_error_handling_strategies.py b/tests/test_runtime/test_error_handling_strategies.py
index 5e8194484..7650cbc2a 100644
--- a/tests/test_runtime/test_error_handling_strategies.py
+++ b/tests/test_runtime/test_error_handling_strategies.py
@@ -1,29 +1,34 @@
 from __future__ import annotations
 
-import pytest
 from dataclasses import dataclass
-from pandas.core.common import contextlib
 from pathlib import Path
+
+import pytest
+from pandas.core.common import contextlib
 from pytest_cases import fixture, parametrize
 
+from neps.exceptions import WorkerRaiseError
 from neps.optimizers.random_search.optimizer import RandomSearch
 from neps.runtime import DefaultWorker
+from neps.search_spaces import Float
 from neps.search_spaces.search_space import SearchSpace
-from neps.state.err_dump import SerializedError
-from neps.state.filebased import create_or_load_filebased_neps_state
 from neps.state.neps_state import NePSState
 from neps.state.optimizer import OptimizationState, OptimizerInfo
+from neps.state.seed_snapshot import SeedSnapshot
 from neps.state.settings import DefaultReportValues, OnErrorPossibilities, WorkerSettings
-from neps.search_spaces import FloatParameter
 from neps.state.trial import Trial
 
 
 @fixture
-def neps_state(tmp_path: Path) -> NePSState[Path]:
-    return create_or_load_filebased_neps_state(
-        directory=tmp_path / "neps_state",
+def neps_state(tmp_path: Path) -> NePSState:
+    return NePSState.create_or_load(
+        path=tmp_path / "neps_state",
         optimizer_info=OptimizerInfo(info={"nothing": "here"}),
-        optimizer_state=OptimizationState(budget=None, shared_state={}),
+        optimizer_state=OptimizationState(
+            budget=None,
+            seed_snapshot=SeedSnapshot.new_capture(),
+            shared_state=None,
+        ),
     )
 
 
@@ -35,7 +40,7 @@ def test_worker_raises_when_error_in_self(
     neps_state: NePSState,
     on_error: OnErrorPossibilities,
 ) -> None:
-    optimizer = RandomSearch(pipeline_space=SearchSpace(a=FloatParameter(0, 1)))
+    optimizer = RandomSearch(pipeline_space=SearchSpace(a=Float(0, 1)))
     settings = WorkerSettings(
         on_error=on_error,  # <- Highlight
         default_report_values=DefaultReportValues(),
@@ -47,6 +52,7 @@ def test_worker_raises_when_error_in_self(
         max_wallclock_time_for_worker_seconds=None,
         max_evaluation_time_for_worker_seconds=None,
         max_cost_for_worker=None,
+        batch_size=None,
     )
 
     def eval_function(*args, **kwargs) -> float:
@@ -59,22 +65,23 @@ def eval_function(*args, **kwargs) -> float:
         settings=settings,
         _pre_sample_hooks=None,
     )
-    with pytest.raises(ValueError, match="This is an error"):
+    with pytest.raises(WorkerRaiseError):
         worker.run()
 
-    trials = neps_state.get_all_trials()
+    trials = neps_state.lock_and_read_trials()
     n_crashed = sum(
-        trial.state == Trial.State.CRASHED is not None for trial in trials.values()
+        trial.metadata.state == Trial.State.CRASHED is not None
+        for trial in trials.values()
     )
     assert len(trials) == 1
     assert n_crashed == 1
 
-    assert neps_state.get_next_pending_trial() is None
-    assert len(neps_state.get_errors()) == 1
+    assert neps_state.lock_and_get_next_pending_trial() is None
+    assert len(neps_state.lock_and_get_errors()) == 1
 
 
 def test_worker_raises_when_error_in_other_worker(neps_state: NePSState) -> None:
-    optimizer = RandomSearch(pipeline_space=SearchSpace(a=FloatParameter(0, 1)))
+    optimizer = RandomSearch(pipeline_space=SearchSpace(a=Float(0, 1)))
     settings = WorkerSettings(
         on_error=OnErrorPossibilities.RAISE_ANY_ERROR,  # <- Highlight
         default_report_values=DefaultReportValues(),
@@ -86,6 +93,7 @@ def test_worker_raises_when_error_in_other_worker(neps_state: NePSState) -> None
         max_wallclock_time_for_worker_seconds=None,
         max_evaluation_time_for_worker_seconds=None,
         max_cost_for_worker=None,
+        batch_size=None,
     )
 
     def evaler(*args, **kwargs) -> float:
@@ -107,23 +115,24 @@ def evaler(*args, **kwargs) -> float:
     )
 
     # Worker1 should run 1 and error out
-    with contextlib.suppress(ValueError):
+    with contextlib.suppress(WorkerRaiseError):
         worker1.run()
 
     # Worker2 should not run and immeditaly error out, however
     # it will have loaded in a serialized error
-    with pytest.raises(SerializedError):
+    with pytest.raises(WorkerRaiseError):
         worker2.run()
 
-    trials = neps_state.get_all_trials()
+    trials = neps_state.lock_and_read_trials()
     n_crashed = sum(
-        trial.state == Trial.State.CRASHED is not None for trial in trials.values()
+        trial.metadata.state == Trial.State.CRASHED is not None
+        for trial in trials.values()
     )
     assert len(trials) == 1
     assert n_crashed == 1
 
-    assert neps_state.get_next_pending_trial() is None
-    assert len(neps_state.get_errors()) == 1
+    assert neps_state.lock_and_get_next_pending_trial() is None
+    assert len(neps_state.lock_and_get_errors()) == 1
 
 
 @pytest.mark.parametrize(
@@ -134,7 +143,7 @@ def test_worker_does_not_raise_when_error_in_other_worker(
     neps_state: NePSState,
     on_error: OnErrorPossibilities,
 ) -> None:
-    optimizer = RandomSearch(pipeline_space=SearchSpace(a=FloatParameter(0, 1)))
+    optimizer = RandomSearch(pipeline_space=SearchSpace(a=Float(0, 1)))
     settings = WorkerSettings(
         on_error=OnErrorPossibilities.RAISE_WORKER_ERROR,  # <- Highlight
         default_report_values=DefaultReportValues(),
@@ -146,6 +155,7 @@ def test_worker_does_not_raise_when_error_in_other_worker(
         max_wallclock_time_for_worker_seconds=None,
         max_evaluation_time_for_worker_seconds=None,
         max_cost_for_worker=None,
+        batch_size=None,
     )
 
     @dataclass
@@ -176,7 +186,7 @@ def __call__(self, *args, **kwargs) -> float:
 
     # Worker1 should run 1 and error out
     evaler.do_raise = True
-    with contextlib.suppress(ValueError):
+    with contextlib.suppress(WorkerRaiseError):
         worker1.run()
     assert worker1.worker_cumulative_eval_count == 1
 
@@ -185,16 +195,18 @@ def __call__(self, *args, **kwargs) -> float:
     worker2.run()
     assert worker2.worker_cumulative_eval_count == 1
 
-    trials = neps_state.get_all_trials()
+    trials = neps_state.lock_and_read_trials()
     n_success = sum(
-        trial.state == Trial.State.SUCCESS is not None for trial in trials.values()
+        trial.metadata.state == Trial.State.SUCCESS is not None
+        for trial in trials.values()
     )
     n_crashed = sum(
-        trial.state == Trial.State.CRASHED is not None for trial in trials.values()
+        trial.metadata.state == Trial.State.CRASHED is not None
+        for trial in trials.values()
     )
     assert n_success == 1
     assert n_crashed == 1
     assert len(trials) == 2
 
-    assert neps_state.get_next_pending_trial() is None
-    assert len(neps_state.get_errors()) == 1
+    assert neps_state.lock_and_get_next_pending_trial() is None
+    assert len(neps_state.lock_and_get_errors()) == 1
diff --git a/tests/test_runtime/test_stopping_criterion.py b/tests/test_runtime/test_stopping_criterion.py
index 28426a1fe..e380a9fc0 100644
--- a/tests/test_runtime/test_stopping_criterion.py
+++ b/tests/test_runtime/test_stopping_criterion.py
@@ -2,32 +2,37 @@
 
 import time
 from pathlib import Path
+
 from pytest_cases import fixture
 
 from neps.optimizers.random_search.optimizer import RandomSearch
 from neps.runtime import DefaultWorker
+from neps.search_spaces import Float
 from neps.search_spaces.search_space import SearchSpace
-from neps.state.filebased import create_or_load_filebased_neps_state
 from neps.state.neps_state import NePSState
 from neps.state.optimizer import OptimizationState, OptimizerInfo
+from neps.state.seed_snapshot import SeedSnapshot
 from neps.state.settings import DefaultReportValues, OnErrorPossibilities, WorkerSettings
-from neps.search_spaces import FloatParameter
 from neps.state.trial import Trial
 
 
 @fixture
-def neps_state(tmp_path: Path) -> NePSState[Path]:
-    return create_or_load_filebased_neps_state(
-        directory=tmp_path / "neps_state",
+def neps_state(tmp_path: Path) -> NePSState:
+    return NePSState.create_or_load(
+        path=tmp_path / "neps_state",
         optimizer_info=OptimizerInfo(info={"nothing": "here"}),
-        optimizer_state=OptimizationState(budget=None, shared_state={}),
+        optimizer_state=OptimizationState(
+            budget=None,
+            seed_snapshot=SeedSnapshot.new_capture(),
+            shared_state=None,
+        ),
     )
 
 
 def test_max_evaluations_total_stopping_criterion(
     neps_state: NePSState,
 ) -> None:
-    optimizer = RandomSearch(pipeline_space=SearchSpace(a=FloatParameter(0, 1)))
+    optimizer = RandomSearch(pipeline_space=SearchSpace(a=Float(0, 1)))
     settings = WorkerSettings(
         on_error=OnErrorPossibilities.IGNORE,
         default_report_values=DefaultReportValues(),
@@ -39,6 +44,7 @@ def test_max_evaluations_total_stopping_criterion(
         max_wallclock_time_for_worker_seconds=None,
         max_evaluation_time_for_worker_seconds=None,
         max_cost_for_worker=None,
+        batch_size=None,
     )
 
     def eval_function(*args, **kwargs) -> float:
@@ -54,14 +60,14 @@ def eval_function(*args, **kwargs) -> float:
     worker.run()
 
     assert worker.worker_cumulative_eval_count == 3
-    assert neps_state.get_next_pending_trial() is None
-    assert len(neps_state.get_errors()) == 0
+    assert neps_state.lock_and_get_next_pending_trial() is None
+    assert len(neps_state.lock_and_get_errors()) == 0
 
-    trials = neps_state.get_all_trials()
+    trials = neps_state.lock_and_read_trials()
     for _, trial in trials.items():
-        assert trial.state == Trial.State.SUCCESS
+        assert trial.metadata.state == Trial.State.SUCCESS
         assert trial.report is not None
-        assert trial.report.loss == 1.0
+        assert trial.report.objective_to_minimize == 1.0
 
     # New worker has the same total number of evaluations so it should not run anything.
     new_worker = DefaultWorker.new(
@@ -73,14 +79,14 @@ def eval_function(*args, **kwargs) -> float:
     )
     new_worker.run()
     assert new_worker.worker_cumulative_eval_count == 0
-    assert neps_state.get_next_pending_trial() is None
-    assert len(neps_state.get_errors()) == 0
+    assert neps_state.lock_and_get_next_pending_trial() is None
+    assert len(neps_state.lock_and_get_errors()) == 0
 
 
 def test_worker_evaluations_total_stopping_criterion(
     neps_state: NePSState,
 ) -> None:
-    optimizer = RandomSearch(pipeline_space=SearchSpace(a=FloatParameter(0, 1)))
+    optimizer = RandomSearch(pipeline_space=SearchSpace(a=Float(0, 1)))
     settings = WorkerSettings(
         on_error=OnErrorPossibilities.IGNORE,
         default_report_values=DefaultReportValues(),
@@ -92,6 +98,7 @@ def test_worker_evaluations_total_stopping_criterion(
         max_wallclock_time_for_worker_seconds=None,
         max_evaluation_time_for_worker_seconds=None,
         max_cost_for_worker=None,
+        batch_size=None,
     )
 
     def eval_function(*args, **kwargs) -> float:
@@ -107,15 +114,15 @@ def eval_function(*args, **kwargs) -> float:
     worker.run()
 
     assert worker.worker_cumulative_eval_count == 2
-    assert neps_state.get_next_pending_trial() is None
-    assert len(neps_state.get_errors()) == 0
+    assert neps_state.lock_and_get_next_pending_trial() is None
+    assert len(neps_state.lock_and_get_errors()) == 0
 
-    trials = neps_state.get_all_trials()
+    trials = neps_state.lock_and_read_trials()
     assert len(trials) == 2
     for _, trial in trials.items():
-        assert trial.state == Trial.State.SUCCESS
+        assert trial.metadata.state == Trial.State.SUCCESS
         assert trial.report is not None
-        assert trial.report.loss == 1.0
+        assert trial.report.objective_to_minimize == 1.0
 
     # New worker should run 2 more evaluations
     new_worker = DefaultWorker.new(
@@ -128,21 +135,21 @@ def eval_function(*args, **kwargs) -> float:
     new_worker.run()
 
     assert worker.worker_cumulative_eval_count == 2
-    assert neps_state.get_next_pending_trial() is None
-    assert len(neps_state.get_errors()) == 0
+    assert neps_state.lock_and_get_next_pending_trial() is None
+    assert len(neps_state.lock_and_get_errors()) == 0
 
-    trials = neps_state.get_all_trials()
+    trials = neps_state.lock_and_read_trials()
     assert len(trials) == 4  # Now we should have 4 of them
     for _, trial in trials.items():
-        assert trial.state == Trial.State.SUCCESS
+        assert trial.metadata.state == Trial.State.SUCCESS
         assert trial.report is not None
-        assert trial.report.loss == 1.0
+        assert trial.report.objective_to_minimize == 1.0
 
 
 def test_include_in_progress_evaluations_towards_maximum_with_work_eval_count(
     neps_state: NePSState,
 ) -> None:
-    optimizer = RandomSearch(pipeline_space=SearchSpace(a=FloatParameter(0, 1)))
+    optimizer = RandomSearch(pipeline_space=SearchSpace(a=Float(0, 1)))
     settings = WorkerSettings(
         on_error=OnErrorPossibilities.IGNORE,
         default_report_values=DefaultReportValues(),
@@ -154,10 +161,11 @@ def test_include_in_progress_evaluations_towards_maximum_with_work_eval_count(
         max_wallclock_time_for_worker_seconds=None,
         max_evaluation_time_for_worker_seconds=None,
         max_cost_for_worker=None,
+        batch_size=None,
     )
 
     # We put in one trial as being inprogress
-    pending_trial = neps_state.sample_trial(optimizer, worker_id="dummy")
+    pending_trial = neps_state.lock_and_sample_trial(optimizer, worker_id="dummy")
     pending_trial.set_evaluating(time_started=0.0, worker_id="dummy")
     neps_state.put_updated_trial(pending_trial)
 
@@ -175,30 +183,30 @@ def eval_function(*args, **kwargs) -> float:
 
     assert worker.worker_cumulative_eval_count == 1
     assert (
-        neps_state.get_next_pending_trial() is None
+        neps_state.lock_and_get_next_pending_trial() is None
     )  # should have no pending trials to be picked up
-    assert len(neps_state.get_errors()) == 0
+    assert len(neps_state.lock_and_get_errors()) == 0
 
-    trials = neps_state.get_all_trials()
+    trials = neps_state.lock_and_read_trials()
     assert len(trials) == 2
 
     the_pending_trial = trials[pending_trial.id]
     assert the_pending_trial == pending_trial
-    assert the_pending_trial.state == Trial.State.EVALUATING
+    assert the_pending_trial.metadata.state == Trial.State.EVALUATING
     assert the_pending_trial.report is None
 
     the_completed_trial_id = next(iter(trials.keys() - {pending_trial.id}))
     the_completed_trial = trials[the_completed_trial_id]
 
-    assert the_completed_trial.state == Trial.State.SUCCESS
+    assert the_completed_trial.metadata.state == Trial.State.SUCCESS
     assert the_completed_trial.report is not None
-    assert the_completed_trial.report.loss == 1.0
+    assert the_completed_trial.report.objective_to_minimize == 1.0
 
 
 def test_max_cost_total(
     neps_state: NePSState,
 ) -> None:
-    optimizer = RandomSearch(pipeline_space=SearchSpace(a=FloatParameter(0, 1)))
+    optimizer = RandomSearch(pipeline_space=SearchSpace(a=Float(0, 1)))
     settings = WorkerSettings(
         on_error=OnErrorPossibilities.IGNORE,
         default_report_values=DefaultReportValues(),
@@ -210,10 +218,11 @@ def test_max_cost_total(
         max_wallclock_time_for_worker_seconds=None,
         max_evaluation_time_for_worker_seconds=None,
         max_cost_for_worker=None,
+        batch_size=None,
     )
 
     def eval_function(*args, **kwargs) -> dict:
-        return {"loss": 1.0, "cost": 1.0}
+        return {"objective_to_minimize": 1.0, "cost": 1.0}
 
     worker = DefaultWorker.new(
         state=neps_state,
@@ -227,11 +236,11 @@ def eval_function(*args, **kwargs) -> dict:
     assert worker.worker_cumulative_eval_count == 2
     assert worker.worker_cumulative_eval_cost == 2.0
     assert (
-        neps_state.get_next_pending_trial() is None
+        neps_state.lock_and_get_next_pending_trial() is None
     )  # should have no pending trials to be picked up
-    assert len(neps_state.get_errors()) == 0
+    assert len(neps_state.lock_and_get_errors()) == 0
 
-    trials = neps_state.get_all_trials()
+    trials = neps_state.lock_and_read_trials()
     assert len(trials) == 2
 
     # New worker should now not run anything as the total cost has been reached.
@@ -249,7 +258,7 @@ def eval_function(*args, **kwargs) -> dict:
 def test_worker_cost_total(
     neps_state: NePSState,
 ) -> None:
-    optimizer = RandomSearch(pipeline_space=SearchSpace(a=FloatParameter(0, 1)))
+    optimizer = RandomSearch(pipeline_space=SearchSpace(a=Float(0, 1)))
     settings = WorkerSettings(
         on_error=OnErrorPossibilities.IGNORE,
         default_report_values=DefaultReportValues(),
@@ -261,10 +270,11 @@ def test_worker_cost_total(
         max_wallclock_time_for_worker_seconds=None,
         max_evaluation_time_for_worker_seconds=None,
         max_cost_for_worker=2,  # <- Highlight, only 2 maximum evaluations allowed
+        batch_size=None,
     )
 
     def eval_function(*args, **kwargs) -> dict:
-        return {"loss": 1.0, "cost": 1.0}
+        return {"objective_to_minimize": 1.0, "cost": 1.0}
 
     worker = DefaultWorker.new(
         state=neps_state,
@@ -278,11 +288,11 @@ def eval_function(*args, **kwargs) -> dict:
     assert worker.worker_cumulative_eval_count == 2
     assert worker.worker_cumulative_eval_cost == 2.0
     assert (
-        neps_state.get_next_pending_trial() is None
+        neps_state.lock_and_get_next_pending_trial() is None
     )  # should have no pending trials to be picked up
-    assert len(neps_state.get_errors()) == 0
+    assert len(neps_state.lock_and_get_errors()) == 0
 
-    trials = neps_state.get_all_trials()
+    trials = neps_state.lock_and_read_trials()
     assert len(trials) == 2
 
     # New worker should also run 2 more trials
@@ -297,18 +307,18 @@ def eval_function(*args, **kwargs) -> dict:
     assert new_worker.worker_cumulative_eval_count == 2
     assert new_worker.worker_cumulative_eval_cost == 2.0
     assert (
-        neps_state.get_next_pending_trial() is None
+        neps_state.lock_and_get_next_pending_trial() is None
     )  # should have no pending trials to be picked up
-    assert len(neps_state.get_errors()) == 0
+    assert len(neps_state.lock_and_get_errors()) == 0
 
-    trials = neps_state.get_all_trials()
+    trials = neps_state.lock_and_read_trials()
     assert len(trials) == 4  # 2 more trials were ran
 
 
 def test_worker_wallclock_time(
     neps_state: NePSState,
 ) -> None:
-    optimizer = RandomSearch(pipeline_space=SearchSpace(a=FloatParameter(0, 1)))
+    optimizer = RandomSearch(pipeline_space=SearchSpace(a=Float(0, 1)))
     settings = WorkerSettings(
         on_error=OnErrorPossibilities.IGNORE,
         default_report_values=DefaultReportValues(),
@@ -320,6 +330,7 @@ def test_worker_wallclock_time(
         max_wallclock_time_for_worker_seconds=1,  # <- highlight, 1 second
         max_evaluation_time_for_worker_seconds=None,
         max_cost_for_worker=None,
+        batch_size=None,
     )
 
     def eval_function(*args, **kwargs) -> float:
@@ -338,10 +349,10 @@ def eval_function(*args, **kwargs) -> float:
     assert worker.worker_cumulative_eval_count > 0
     assert worker.worker_cumulative_evaluation_time_seconds <= 2.0
     assert (
-        neps_state.get_next_pending_trial() is None
+        neps_state.lock_and_get_next_pending_trial() is None
     )  # should have no pending trials to be picked up
-    assert len(neps_state.get_errors()) == 0
-    len_trials_on_first_worker = len(neps_state.get_all_trials())
+    assert len(neps_state.lock_and_get_errors()) == 0
+    len_trials_on_first_worker = len(neps_state.lock_and_read_trials())
 
     # New worker should also run some trials more trials
     new_worker = DefaultWorker.new(
@@ -356,17 +367,17 @@ def eval_function(*args, **kwargs) -> float:
     assert new_worker.worker_cumulative_eval_count > 0
     assert new_worker.worker_cumulative_evaluation_time_seconds <= 2.0
     assert (
-        neps_state.get_next_pending_trial() is None
+        neps_state.lock_and_get_next_pending_trial() is None
     )  # should have no pending trials to be picked up
-    assert len(neps_state.get_errors()) == 0
-    len_trials_on_second_worker = len(neps_state.get_all_trials())
+    assert len(neps_state.lock_and_get_errors()) == 0
+    len_trials_on_second_worker = len(neps_state.lock_and_read_trials())
     assert len_trials_on_second_worker > len_trials_on_first_worker
 
 
 def test_max_worker_evaluation_time(
     neps_state: NePSState,
 ) -> None:
-    optimizer = RandomSearch(pipeline_space=SearchSpace(a=FloatParameter(0, 1)))
+    optimizer = RandomSearch(pipeline_space=SearchSpace(a=Float(0, 1)))
     settings = WorkerSettings(
         on_error=OnErrorPossibilities.IGNORE,
         default_report_values=DefaultReportValues(),
@@ -378,6 +389,7 @@ def test_max_worker_evaluation_time(
         max_wallclock_time_for_worker_seconds=None,
         max_evaluation_time_for_worker_seconds=0.5,
         max_cost_for_worker=None,
+        batch_size=None,
     )
 
     def eval_function(*args, **kwargs) -> float:
@@ -397,10 +409,10 @@ def eval_function(*args, **kwargs) -> float:
     assert worker.worker_cumulative_eval_count > 0
     assert worker.worker_cumulative_evaluation_time_seconds <= 1.0
     assert (
-        neps_state.get_next_pending_trial() is None
+        neps_state.lock_and_get_next_pending_trial() is None
     )  # should have no pending trials to be picked up
-    assert len(neps_state.get_errors()) == 0
-    len_trials_on_first_worker = len(neps_state.get_all_trials())
+    assert len(neps_state.lock_and_get_errors()) == 0
+    len_trials_on_first_worker = len(neps_state.lock_and_read_trials())
 
     # New worker should also run some trials more trials
     new_worker = DefaultWorker.new(
@@ -415,17 +427,17 @@ def eval_function(*args, **kwargs) -> float:
     assert new_worker.worker_cumulative_eval_count > 0
     assert new_worker.worker_cumulative_evaluation_time_seconds <= 1.0
     assert (
-        neps_state.get_next_pending_trial() is None
+        neps_state.lock_and_get_next_pending_trial() is None
     )  # should have no pending trials to be picked up
-    assert len(neps_state.get_errors()) == 0
-    len_trials_on_second_worker = len(neps_state.get_all_trials())
+    assert len(neps_state.lock_and_get_errors()) == 0
+    len_trials_on_second_worker = len(neps_state.lock_and_read_trials())
     assert len_trials_on_second_worker > len_trials_on_first_worker
 
 
 def test_max_evaluation_time_global(
     neps_state: NePSState,
 ) -> None:
-    optimizer = RandomSearch(pipeline_space=SearchSpace(a=FloatParameter(0, 1)))
+    optimizer = RandomSearch(pipeline_space=SearchSpace(a=Float(0, 1)))
     settings = WorkerSettings(
         on_error=OnErrorPossibilities.IGNORE,
         default_report_values=DefaultReportValues(),
@@ -437,6 +449,7 @@ def test_max_evaluation_time_global(
         max_wallclock_time_for_worker_seconds=None,
         max_evaluation_time_for_worker_seconds=None,
         max_cost_for_worker=None,
+        batch_size=None,
     )
 
     def eval_function(*args, **kwargs) -> float:
@@ -456,10 +469,10 @@ def eval_function(*args, **kwargs) -> float:
     assert worker.worker_cumulative_eval_count > 0
     assert worker.worker_cumulative_evaluation_time_seconds <= 1.0
     assert (
-        neps_state.get_next_pending_trial() is None
+        neps_state.lock_and_get_next_pending_trial() is None
     )  # should have no pending trials to be picked up
-    assert len(neps_state.get_errors()) == 0
-    len_trials_on_first_worker = len(neps_state.get_all_trials())
+    assert len(neps_state.lock_and_get_errors()) == 0
+    len_trials_on_first_worker = len(neps_state.lock_and_read_trials())
 
     # New worker should also run some trials more trials
     new_worker = DefaultWorker.new(
@@ -474,8 +487,8 @@ def eval_function(*args, **kwargs) -> float:
     assert new_worker.worker_cumulative_eval_count == 0
     assert new_worker.worker_cumulative_evaluation_time_seconds == 0
     assert (
-        neps_state.get_next_pending_trial() is None
+        neps_state.lock_and_get_next_pending_trial() is None
     )  # should have no pending trials to be picked up
-    assert len(neps_state.get_errors()) == 0
-    len_trials_on_second_worker = len(neps_state.get_all_trials())
+    assert len(neps_state.lock_and_get_errors()) == 0
+    len_trials_on_second_worker = len(neps_state.lock_and_read_trials())
     assert len_trials_on_second_worker == len_trials_on_first_worker
diff --git a/tests/test_samplers.py b/tests/test_samplers.py
new file mode 100644
index 000000000..8581153e3
--- /dev/null
+++ b/tests/test_samplers.py
@@ -0,0 +1,86 @@
+from __future__ import annotations
+
+import torch
+from pytest_cases import parametrize
+
+from neps.sampling.priors import Prior, UniformPrior
+from neps.sampling.samplers import BorderSampler, Sampler, Sobol, WeightedSampler
+from neps.search_spaces.domain import Domain
+
+
+def _make_centered_prior(ndim: int) -> Prior:
+    return Prior.from_domains_and_centers(
+        domains=[Domain.unit_float() for _ in range(ndim)],
+        centers=[(0.5, 0.5) for _ in range(ndim)],
+    )
+
+
+@parametrize(
+    "sampler",
+    [
+        Sobol(ndim=3),
+        BorderSampler(ndim=3),
+        UniformPrior(ndim=3),
+        # Convenence method for making a distribution around center points
+        _make_centered_prior(ndim=3),
+        WeightedSampler(
+            [UniformPrior(ndim=3), _make_centered_prior(3), Sobol(ndim=3)],
+            weights=torch.tensor([0.5, 0.25, 0.25]),
+        ),
+    ],
+)
+def test_sampler_samples_into_domain(sampler: Sampler) -> None:
+    assert sampler.ncols == 3
+
+    domain_to_sample_into = Domain.integer(12, 15)
+    for _ in range(10):
+        x = sampler.sample(
+            n=5,
+            to=domain_to_sample_into,
+            seed=None,
+        )
+
+        assert x.shape == (5, 3)
+        assert (x >= 12).all()
+        assert (x <= 15).all()
+
+    x = sampler.sample(
+        n=torch.Size((2, 1)),
+        to=domain_to_sample_into,
+        seed=None,
+    )
+    assert x.shape == (2, 1, 3)
+    assert (x >= 12).all()
+    assert (x <= 15).all()
+
+
+@parametrize(
+    "prior",
+    [
+        UniformPrior(ndim=3),
+        # Convenence method for making a distribution around center points
+        _make_centered_prior(ndim=3),
+    ],
+)
+def test_priors_give_positive_pdfs(prior: Prior) -> None:
+    # NOTE: The uniform prior does not check that
+    assert prior.ncols == 3
+    domain = Domain.floating(10, 100)
+
+    x = prior.sample(n=5, to=domain, seed=None)
+    assert x.shape == (5, 3)
+    assert (x >= 10).all()
+    assert (x <= 100).all()
+
+    probs = prior.pdf(x, frm=domain)
+    assert (probs >= 0).all()
+    assert probs.shape == (5,)
+
+    x = prior.sample(n=torch.Size((2, 1)), to=domain, seed=None)
+    assert x.shape == (2, 1, 3)
+    assert (x >= 10).all()
+    assert (x <= 100).all()
+
+    probs = prior.pdf(x, frm=domain)
+    assert (probs >= 0).all()
+    assert probs.shape == (2, 1)
diff --git a/tests/test_search_space_functions.py b/tests/test_search_space_functions.py
new file mode 100644
index 000000000..f7c09f9f8
--- /dev/null
+++ b/tests/test_search_space_functions.py
@@ -0,0 +1,77 @@
+from __future__ import annotations
+
+import torch
+
+from neps.search_spaces.encoding import ConfigEncoder
+from neps.search_spaces.functions import pairwise_dist
+from neps.search_spaces.hyperparameters import Categorical, Float, Integer
+
+
+def test_config_encoder_pdist_calculation() -> None:
+    parameters = {
+        "a": Categorical(["cat", "mouse", "dog"]),
+        "b": Integer(1, 10),
+        "c": Float(1, 10),
+    }
+    encoder = ConfigEncoder.from_parameters(parameters)
+    config1 = {"a": "cat", "b": 1, "c": 1.0}
+    config2 = {"a": "mouse", "b": 10, "c": 10.0}
+
+    # Same config, no distance
+    x = encoder.encode([config1, config1])
+    dist = pairwise_dist(x, encoder=encoder, square_form=False)
+    assert dist.item() == 0.0
+
+    # Opposite configs, max distance
+    x = encoder.encode([config1, config2])
+    dist = pairwise_dist(x, encoder=encoder, square_form=False)
+
+    # The first config should have it's p2 euclidean distance as the norm
+    # of the distances between these two configs, i.e. the distance along the
+    # diagonal of a unit-square they belong to
+    _first_config_numerical_encoding = torch.tensor([[0.0, 0.0]], dtype=torch.float64)
+    _second_config_numerical_encoding = torch.tensor([[1.0, 1.0]], dtype=torch.float64)
+    _expected_numerical_dist = torch.linalg.norm(
+        _first_config_numerical_encoding - _second_config_numerical_encoding,
+        ord=2,
+    )
+
+    # The categorical distance should just be one, as they are different
+    _expected_categorical_dist = 1.0
+
+    _expected_dist = _expected_numerical_dist + _expected_categorical_dist
+    assert torch.isclose(dist, _expected_dist)
+
+
+def test_config_encoder_pdist_squareform() -> None:
+    parameters = {
+        "a": Categorical(["cat", "mouse", "dog"]),
+        "b": Integer(1, 10),
+        "c": Float(1, 10),
+    }
+    encoder = ConfigEncoder.from_parameters(parameters)
+    config1 = {"a": "cat", "b": 1, "c": 1.0}
+    config2 = {"a": "dog", "b": 5, "c": 5}
+    config3 = {"a": "mouse", "b": 10, "c": 10.0}
+
+    # Same config, no distance
+    x = encoder.encode([config1, config2, config3])
+    dist = pairwise_dist(x, encoder=encoder, square_form=False)
+
+    # 3 possible distances
+    assert dist.shape == (3,)
+    torch.testing.assert_close(
+        dist,
+        torch.tensor([1.6285, 2.4142, 1.7857], dtype=torch.float64),
+        atol=1e-4,
+        rtol=1e-4,
+    )
+
+    dist_sq = pairwise_dist(x, encoder=encoder, square_form=True)
+    assert dist_sq.shape == (3, 3)
+
+    # Distance to self along diagonal should be 0
+    torch.testing.assert_close(dist_sq.diagonal(), torch.zeros(3, dtype=torch.float64))
+
+    # Should be symmetric
+    torch.testing.assert_close(dist_sq, dist_sq.T)
diff --git a/neps/optimizers/multiple_knowledge_sources/__init__.py b/tests/test_settings/__init__.py
similarity index 100%
rename from neps/optimizers/multiple_knowledge_sources/__init__.py
rename to tests/test_settings/__init__.py
diff --git a/tests/test_settings/overwrite_run_args.yaml b/tests/test_settings/overwrite_run_args.yaml
index fb69b4016..e4dd91b4c 100644
--- a/tests/test_settings/overwrite_run_args.yaml
+++ b/tests/test_settings/overwrite_run_args.yaml
@@ -1,6 +1,6 @@
 # Full Configuration Template for NePS
-run_pipeline:
-  path: tests/test_yaml_run_args/test_declarative_usage_docs/run_pipeline.py
+evaluate_pipeline:
+  path: tests/test_yaml_run_args/test_declarative_usage_docs/evaluate_pipeline.py
   name: run_pipeline_constant
 
 pipeline_space:
@@ -31,7 +31,7 @@ max_evaluations_per_run: 6
 continue_until_max_evaluation_completed: False
 
 # Error Handling
-loss_value_on_error: 1.0
+objective_to_minimize_value_on_error: 1.0
 cost_value_on_error: 1.0
 ignore_errors: True
 
diff --git a/tests/test_settings/run_args_optimizer_outside.yaml b/tests/test_settings/run_args_optimizer_outside.yaml
index 1dbfce01b..1d87c32db 100644
--- a/tests/test_settings/run_args_optimizer_outside.yaml
+++ b/tests/test_settings/run_args_optimizer_outside.yaml
@@ -1,5 +1,5 @@
-run_pipeline:
-  name: run_pipeline
+evaluate_pipeline:
+  name: evaluate_pipeline
   path: "tests/test_settings/test_settings.py"
 pipeline_space:
   name: pipeline_space
@@ -12,9 +12,5 @@ searcher:
   name: my_bayesian
   # Specific arguments depending on the searcher
   initial_design_size: 7
-  surrogate_model: gp
-  acquisition: EI
-  acquisition_sampler: random
-  random_interleave_prob: 0.1
 
 overwrite_working_directory: True
diff --git a/tests/test_settings/run_args_optimizer_settings.yaml b/tests/test_settings/run_args_optimizer_settings.yaml
index 00e34d183..4c049813f 100644
--- a/tests/test_settings/run_args_optimizer_settings.yaml
+++ b/tests/test_settings/run_args_optimizer_settings.yaml
@@ -1,6 +1,6 @@
 # Full Configuration Template for NePS
-run_pipeline:
-  path: tests/test_yaml_run_args/test_declarative_usage_docs/run_pipeline.py
+evaluate_pipeline:
+  path: tests/test_yaml_run_args/test_declarative_usage_docs/evaluate_pipeline.py
   name: run_pipeline_constant
 
 pipeline_space:
@@ -31,7 +31,7 @@ max_evaluations_per_run: 6
 continue_until_max_evaluation_completed: False
 
 # Error Handling
-loss_value_on_error: 1.0
+objective_to_minimize_value_on_error: 1.0
 cost_value_on_error: 1.0
 ignore_errors: True
 
@@ -42,8 +42,8 @@ searcher:
   initial_design_type: max_budget
   use_priors: false
   random_interleave_prob: 0.0
-  sample_default_first: false
-  sample_default_at_target: false
+  sample_prior_first: false
+  sample_prior_at_target: false
 
 # Hooks
 pre_load_hooks:
diff --git a/tests/test_settings/run_args_optional.yaml b/tests/test_settings/run_args_optional.yaml
index 5e46ff99d..db0ef575a 100644
--- a/tests/test_settings/run_args_optional.yaml
+++ b/tests/test_settings/run_args_optional.yaml
@@ -7,7 +7,7 @@ max_evaluations_per_run:
 continue_until_max_evaluation_completed: False
 max_evaluations_total: 11 # get ignored
 root_directory: "get/ignored"
-loss_value_on_error:
+objective_to_minimize_value_on_error:
 cost_value_on_error:
 ignore_errors:
 searcher: hyperband
diff --git a/tests/test_settings/run_args_required.yaml b/tests/test_settings/run_args_required.yaml
index d3c6afdde..47aa6a520 100644
--- a/tests/test_settings/run_args_required.yaml
+++ b/tests/test_settings/run_args_required.yaml
@@ -1,5 +1,5 @@
-run_pipeline:
-  name: run_pipeline
+evaluate_pipeline:
+  name: evaluate_pipeline
   path: "tests/test_settings/test_settings.py"
 pipeline_space:
   name: pipeline_space
diff --git a/tests/test_settings/test_settings.py b/tests/test_settings/test_settings.py
index fcdac758c..ca76445cd 100644
--- a/tests/test_settings/test_settings.py
+++ b/tests/test_settings/test_settings.py
@@ -1,14 +1,20 @@
-from neps.utils.run_args import Settings, Default
+from __future__ import annotations
+
+from pathlib import Path
+
 import pytest
-import neps
-from neps.utils.run_args import get_run_args_from_yaml
-from tests.test_yaml_run_args.test_yaml_run_args import (run_pipeline, hook1, hook2,
-                                                         pipeline_space)
+
 from neps.optimizers.bayesian_optimization.optimizer import BayesianOptimization
-from typing import Union, Callable, Dict, List, Type
+from neps.utils.run_args import Default, Settings
+from tests.test_yaml_run_args.test_yaml_run_args import (
+    evaluate_pipeline,
+    hook1,
+    hook2,
+    pipeline_space,
+)
 
-BASE_PATH = "tests/test_settings"
-run_pipeline = run_pipeline
+BASE_PATH = Path("tests") / "test_settings"
+evaluate_pipeline = evaluate_pipeline
 hook1 = hook1
 hook2 = hook2
 pipeline_space = pipeline_space
@@ -16,318 +22,339 @@
 
 
 @pytest.mark.neps_api
-@pytest.mark.parametrize("func_args, yaml_args, expected_output", [
-    (
-        {  # only essential arguments provided by func_args, no yaml
-            "run_pipeline": run_pipeline,
-            "root_directory": "path/to/root_directory",
-            "pipeline_space": pipeline_space,
-            "run_args": Default(None),
-            "overwrite_working_directory": Default(False),
-            "post_run_summary": Default(True),
-            "development_stage_id": Default(None),
-            "task_id": Default(None),
-            "max_evaluations_total": 10,
-            "max_evaluations_per_run": Default(None),
-            "continue_until_max_evaluation_completed": Default(False),
-            "max_cost_total": Default(None),
-            "ignore_errors": Default(False),
-            "loss_value_on_error": Default(None),
-            "cost_value_on_error": Default(None),
-            "pre_load_hooks": Default(None),
-            "searcher": Default("default"),
-            "searcher_kwargs": {},
-        }
-        ,
-        Default(None),
-        {
-            "run_pipeline": run_pipeline,
-            "root_directory": "path/to/root_directory",
-            "pipeline_space": pipeline_space,
-            "overwrite_working_directory": False,
-            "post_run_summary": True,
-            "development_stage_id": None,
-            "task_id": None,
-            "max_evaluations_total": 10,
-            "max_evaluations_per_run": None,
-            "continue_until_max_evaluation_completed": False,
-            "max_cost_total": None,
-            "ignore_errors": False,
-            "loss_value_on_error": None,
-            "cost_value_on_error": None,
-            "pre_load_hooks": None,
-            "searcher": "default",
-            "searcher_kwargs": {}
-        }
-    ),
-    ({  # only required elements of run_args
-        "run_pipeline": Default(None),
-        "root_directory": Default(None),
-        "pipeline_space": Default(None),
-        "run_args": Default(None),
-        "overwrite_working_directory": Default(False),
-        "post_run_summary": Default(True),
-        "development_stage_id": Default(None),
-        "task_id": Default(None),
-        "max_evaluations_total": Default(None),
-        "max_evaluations_per_run": Default(None),
-        "continue_until_max_evaluation_completed": Default(False),
-        "max_cost_total": Default(None),
-        "ignore_errors": Default(False),
-        "loss_value_on_error": Default(None),
-        "cost_value_on_error": Default(None),
-        "pre_load_hooks": Default(None),
-        "searcher": Default("default"),
-        "searcher_kwargs": {},
-    },
-    "/run_args_required.yaml",
-    {
-        "run_pipeline": run_pipeline,
-        "root_directory": "path/to/root_directory",
-        "pipeline_space": pipeline_space,
-        "overwrite_working_directory": False,
-        "post_run_summary": True,
-        "development_stage_id": None,
-        "task_id": None,
-        "max_evaluations_total": 10,
-        "max_evaluations_per_run": None,
-        "continue_until_max_evaluation_completed": False,
-        "max_cost_total": None,
-        "ignore_errors": False,
-        "loss_value_on_error": None,
-        "cost_value_on_error": None,
-        "pre_load_hooks": None,
-        "searcher": "default",
-        "searcher_kwargs": {}
-    }),
-    ({  # required via func_args, optional via yaml
-        "run_pipeline": run_pipeline,
-        "root_directory": "path/to/root_directory",
-        "pipeline_space": pipeline_space,
-        "run_args": "tests/path/to/run_args",  # will be ignored by Settings
-        "overwrite_working_directory": Default(False),
-        "post_run_summary": Default(True),
-        "development_stage_id": Default(None),
-        "task_id": Default(None),
-        "max_evaluations_total": 10,
-        "max_evaluations_per_run": Default(None),
-        "continue_until_max_evaluation_completed": Default(False),
-        "max_cost_total": Default(None),
-        "ignore_errors": Default(False),
-        "loss_value_on_error": Default(None),
-        "cost_value_on_error": Default(None),
-        "pre_load_hooks": Default(None),
-        "searcher": Default("default"),
-        "searcher_kwargs": {},
-    },
-    "/run_args_optional.yaml",
-    {
-        "run_pipeline": run_pipeline,
-        "root_directory": "path/to/root_directory",
-        "pipeline_space": pipeline_space,
-        "overwrite_working_directory": True,
-        "post_run_summary": False,
-        "development_stage_id": None,
-        "task_id": None,
-        "max_evaluations_total": 10,
-        "max_evaluations_per_run": None,
-        "continue_until_max_evaluation_completed": False,
-        "max_cost_total": None,
-        "ignore_errors": False,
-        "loss_value_on_error": None,
-        "cost_value_on_error": None,
-        "pre_load_hooks": None,
-        "searcher": "hyperband",
-        "searcher_kwargs": {}
-    }),
-    ({  # overwrite all yaml values
-            "run_pipeline": run_pipeline,
-            "root_directory": "path/to/root_directory",
-            "pipeline_space": pipeline_space,
-            "run_args": "test",
-            "overwrite_working_directory": False,
-            "post_run_summary": True,
-            "development_stage_id": 5,
-            "task_id": None,
-            "max_evaluations_total": 17,
-            "max_evaluations_per_run": None,
-            "continue_until_max_evaluation_completed": False,
-            "max_cost_total": None,
-            "ignore_errors": False,
-            "loss_value_on_error": None,
-            "cost_value_on_error": None,
-            "pre_load_hooks": None,
-            "searcher": "default",
-            "searcher_kwargs": {},
-        }
-        ,
-        "/overwrite_run_args.yaml",
-     {
-         "run_pipeline": run_pipeline,
-         "root_directory": "path/to/root_directory",
-         "pipeline_space": pipeline_space,
-         "overwrite_working_directory": False,
-         "post_run_summary": True,
-         "development_stage_id": 5,
-         "task_id": None,
-         "max_evaluations_total": 17,
-         "max_evaluations_per_run": None,
-         "continue_until_max_evaluation_completed": False,
-         "max_cost_total": None,
-         "ignore_errors": False,
-         "loss_value_on_error": None,
-         "cost_value_on_error": None,
-         "pre_load_hooks": None,
-         "searcher": "default",
-         "searcher_kwargs": {},
-     }
-    ),
-    ({  # optimizer args special case
-            "run_pipeline": run_pipeline,
-            "root_directory": "path/to/root_directory",
-            "pipeline_space": pipeline_space,
-            "run_args": "test",
-            "overwrite_working_directory": False,
-            "post_run_summary": True,
-            "development_stage_id": 5,
-            "task_id": None,
-            "max_evaluations_total": 17,
-            "max_evaluations_per_run": None,
-            "continue_until_max_evaluation_completed": False,
-            "max_cost_total": None,
-            "ignore_errors": False,
-            "loss_value_on_error": None,
-            "cost_value_on_error": None,
-            "pre_load_hooks": None,
-            "searcher": Default("default"),
-            "searcher_kwargs": {"initial_design_type": "max_budget",
-            "use_priors": False,
-            "random_interleave_prob": 0.0,
-            "sample_default_first": False,
-            "sample_default_at_target": False},
-        }
-        ,
-        "/run_args_optimizer_settings.yaml",
-     {
-         "run_pipeline": run_pipeline,
-         "root_directory": "path/to/root_directory",
-         "pipeline_space": pipeline_space,
-         "overwrite_working_directory": False,
-         "post_run_summary": True,
-         "development_stage_id": 5,
-         "task_id": None,
-         "max_evaluations_total": 17,
-         "max_evaluations_per_run": None,
-         "continue_until_max_evaluation_completed": False,
-         "max_cost_total": None,
-         "ignore_errors": False,
-         "loss_value_on_error": None,
-         "cost_value_on_error": None,
-         "pre_load_hooks": None,
-         "searcher": {
-            "strategy": "hyperband",
-            "eta": 3,
-            "initial_design_type": "max_budget",
-            "use_priors": False,
-            "random_interleave_prob": 0.0,
-            "sample_default_first": False,
-            "sample_default_at_target": False},
-         "searcher_kwargs": {"initial_design_type": "max_budget",
-            "use_priors": False,
-            "random_interleave_prob": 0.0,
-            "sample_default_first": False,
-            "sample_default_at_target": False},
-     }),
-({  # load optimizer with args
-        "run_pipeline": Default(None),
-        "root_directory": Default(None),
-        "pipeline_space": Default(None),
-        "run_args": Default(None),
-        "overwrite_working_directory": Default(False),
-        "post_run_summary": Default(True),
-        "development_stage_id": Default(None),
-        "task_id": Default(None),
-        "max_evaluations_total": Default(None),
-        "max_evaluations_per_run": Default(None),
-        "continue_until_max_evaluation_completed": Default(False),
-        "max_cost_total": Default(None),
-        "ignore_errors": Default(False),
-        "loss_value_on_error": Default(None),
-        "cost_value_on_error": Default(None),
-        "pre_load_hooks": Default(None),
-        "searcher": Default("default"),
-        "searcher_kwargs": {"random_interleave_prob": 0.2,
-                            "initial_design_size": 9},
-        }
-        ,
-        "/run_args_optimizer_outside.yaml",
-     {
-         "run_pipeline": run_pipeline,
-         "root_directory": "path/to/root_directory",
-         "pipeline_space": pipeline_space,
-         "overwrite_working_directory": True,
-         "post_run_summary": True,
-         "development_stage_id": None,
-         "task_id": None,
-         "max_evaluations_total": 10,
-         "max_evaluations_per_run": None,
-         "continue_until_max_evaluation_completed": False,
-         "max_cost_total": None,
-         "ignore_errors": False,
-         "loss_value_on_error": None,
-         "cost_value_on_error": None,
-         "pre_load_hooks": None,
-         "searcher": my_bayesian,
-         "searcher_kwargs": {"acquisition": "EI",
-                             "acquisition_sampler": "random",
-                             "random_interleave_prob": 0.2,
-                             "initial_design_size": 9,
-                             "surrogate_model": "gp"
-                             },
-     })
-])
-def test_check_settings(func_args: Dict, yaml_args: str, expected_output: Dict) -> None:
-    """
-    Check if expected settings are set
-    """
-    if not isinstance(yaml_args, Default):
-        yaml_args = BASE_PATH + yaml_args
-    settings = Settings(func_args, yaml_args)
-    print(settings)
+@pytest.mark.parametrize(
+    ("func_args", "yaml_args", "expected_output"),
+    [
+        (
+            {  # only essential arguments provided by func_args, no yaml
+                "evaluate_pipeline": evaluate_pipeline,
+                "root_directory": "path/to/root_directory",
+                "pipeline_space": pipeline_space,
+                "run_args": Default(None),
+                "overwrite_working_directory": Default(False),
+                "post_run_summary": Default(True),
+                "development_stage_id": Default(None),
+                "task_id": Default(None),
+                "max_evaluations_total": 10,
+                "max_evaluations_per_run": Default(None),
+                "continue_until_max_evaluation_completed": Default(False),
+                "max_cost_total": Default(None),
+                "ignore_errors": Default(False),
+                "objective_to_minimize_value_on_error": Default(None),
+                "cost_value_on_error": Default(None),
+                "pre_load_hooks": Default(None),
+                "searcher": Default("default"),
+                "searcher_kwargs": {},
+                "sample_batch_size": Default(None),
+            },
+            Default(None),
+            {
+                "evaluate_pipeline": evaluate_pipeline,
+                "root_directory": "path/to/root_directory",
+                "pipeline_space": pipeline_space,
+                "overwrite_working_directory": False,
+                "post_run_summary": True,
+                "development_stage_id": None,
+                "task_id": None,
+                "max_evaluations_total": 10,
+                "max_evaluations_per_run": None,
+                "continue_until_max_evaluation_completed": False,
+                "max_cost_total": None,
+                "ignore_errors": False,
+                "objective_to_minimize_value_on_error": None,
+                "cost_value_on_error": None,
+                "pre_load_hooks": None,
+                "searcher": "default",
+                "searcher_kwargs": {},
+                "sample_batch_size": None,
+            },
+        ),
+        (
+            {  # only required elements of run_args
+                "evaluate_pipeline": Default(None),
+                "root_directory": Default(None),
+                "pipeline_space": Default(None),
+                "run_args": Default(None),
+                "overwrite_working_directory": Default(False),
+                "post_run_summary": Default(True),
+                "development_stage_id": Default(None),
+                "task_id": Default(None),
+                "max_evaluations_total": Default(None),
+                "max_evaluations_per_run": Default(None),
+                "continue_until_max_evaluation_completed": Default(False),
+                "max_cost_total": Default(None),
+                "ignore_errors": Default(False),
+                "objective_to_minimize_value_on_error": Default(None),
+                "cost_value_on_error": Default(None),
+                "pre_load_hooks": Default(None),
+                "searcher": Default("default"),
+                "searcher_kwargs": {},
+                "sample_batch_size": Default(None),
+            },
+            "run_args_required.yaml",
+            {
+                "evaluate_pipeline": evaluate_pipeline,
+                "root_directory": "path/to/root_directory",
+                "pipeline_space": pipeline_space,
+                "overwrite_working_directory": False,
+                "post_run_summary": True,
+                "development_stage_id": None,
+                "task_id": None,
+                "max_evaluations_total": 10,
+                "max_evaluations_per_run": None,
+                "continue_until_max_evaluation_completed": False,
+                "max_cost_total": None,
+                "ignore_errors": False,
+                "objective_to_minimize_value_on_error": None,
+                "cost_value_on_error": None,
+                "pre_load_hooks": None,
+                "searcher": "default",
+                "searcher_kwargs": {},
+                "sample_batch_size": None,
+            },
+        ),
+        (
+            {  # required via func_args, optional via yaml
+                "evaluate_pipeline": evaluate_pipeline,
+                "root_directory": "path/to/root_directory",
+                "pipeline_space": pipeline_space,
+                "run_args": "tests/path/to/run_args",  # will be ignored by Settings
+                "overwrite_working_directory": Default(False),
+                "post_run_summary": Default(True),
+                "development_stage_id": Default(None),
+                "task_id": Default(None),
+                "max_evaluations_total": 10,
+                "max_evaluations_per_run": Default(None),
+                "continue_until_max_evaluation_completed": Default(False),
+                "max_cost_total": Default(None),
+                "ignore_errors": Default(False),
+                "objective_to_minimize_value_on_error": Default(None),
+                "cost_value_on_error": Default(None),
+                "pre_load_hooks": Default(None),
+                "searcher": Default("default"),
+                "searcher_kwargs": {},
+                "sample_batch_size": Default(None),
+            },
+            "run_args_optional.yaml",
+            {
+                "evaluate_pipeline": evaluate_pipeline,
+                "root_directory": "path/to/root_directory",
+                "pipeline_space": pipeline_space,
+                "overwrite_working_directory": True,
+                "post_run_summary": False,
+                "development_stage_id": None,
+                "task_id": None,
+                "max_evaluations_total": 10,
+                "max_evaluations_per_run": None,
+                "continue_until_max_evaluation_completed": False,
+                "max_cost_total": None,
+                "ignore_errors": False,
+                "objective_to_minimize_value_on_error": None,
+                "cost_value_on_error": None,
+                "pre_load_hooks": None,
+                "searcher": "hyperband",
+                "searcher_kwargs": {},
+                "sample_batch_size": None,
+            },
+        ),
+        (
+            {  # overwrite all yaml values
+                "evaluate_pipeline": evaluate_pipeline,
+                "root_directory": "path/to/root_directory",
+                "pipeline_space": pipeline_space,
+                "run_args": "test",
+                "overwrite_working_directory": False,
+                "post_run_summary": True,
+                "development_stage_id": 5,
+                "task_id": None,
+                "max_evaluations_total": 17,
+                "max_evaluations_per_run": None,
+                "continue_until_max_evaluation_completed": False,
+                "max_cost_total": None,
+                "ignore_errors": False,
+                "objective_to_minimize_value_on_error": None,
+                "cost_value_on_error": None,
+                "pre_load_hooks": None,
+                "searcher": "default",
+                "searcher_kwargs": {},
+                "sample_batch_size": Default(None),
+            },
+            "overwrite_run_args.yaml",
+            {
+                "evaluate_pipeline": evaluate_pipeline,
+                "root_directory": "path/to/root_directory",
+                "pipeline_space": pipeline_space,
+                "overwrite_working_directory": False,
+                "post_run_summary": True,
+                "development_stage_id": 5,
+                "task_id": None,
+                "max_evaluations_total": 17,
+                "max_evaluations_per_run": None,
+                "continue_until_max_evaluation_completed": False,
+                "max_cost_total": None,
+                "ignore_errors": False,
+                "objective_to_minimize_value_on_error": None,
+                "cost_value_on_error": None,
+                "pre_load_hooks": None,
+                "searcher": "default",
+                "searcher_kwargs": {},
+                "sample_batch_size": None,
+            },
+        ),
+        (
+            {  # optimizer args special case
+                "evaluate_pipeline": evaluate_pipeline,
+                "root_directory": "path/to/root_directory",
+                "pipeline_space": pipeline_space,
+                "run_args": "test",
+                "overwrite_working_directory": False,
+                "post_run_summary": True,
+                "development_stage_id": 5,
+                "task_id": None,
+                "max_evaluations_total": 17,
+                "max_evaluations_per_run": None,
+                "continue_until_max_evaluation_completed": False,
+                "max_cost_total": None,
+                "ignore_errors": False,
+                "objective_to_minimize_value_on_error": None,
+                "cost_value_on_error": None,
+                "pre_load_hooks": None,
+                "searcher": Default("default"),
+                "searcher_kwargs": {
+                    "initial_design_type": "max_budget",
+                    "use_priors": False,
+                    "random_interleave_prob": 0.0,
+                    "sample_prior_first": False,
+                    "sample_prior_at_target": False,
+                },
+                "sample_batch_size": Default(None),
+            },
+            "run_args_optimizer_settings.yaml",
+            {
+                "evaluate_pipeline": evaluate_pipeline,
+                "root_directory": "path/to/root_directory",
+                "pipeline_space": pipeline_space,
+                "overwrite_working_directory": False,
+                "post_run_summary": True,
+                "development_stage_id": 5,
+                "task_id": None,
+                "max_evaluations_total": 17,
+                "max_evaluations_per_run": None,
+                "continue_until_max_evaluation_completed": False,
+                "max_cost_total": None,
+                "ignore_errors": False,
+                "objective_to_minimize_value_on_error": None,
+                "cost_value_on_error": None,
+                "pre_load_hooks": None,
+                "searcher": {
+                    "strategy": "hyperband",
+                    "eta": 3,
+                    "initial_design_type": "max_budget",
+                    "use_priors": False,
+                    "random_interleave_prob": 0.0,
+                    "sample_prior_first": False,
+                    "sample_prior_at_target": False,
+                },
+                "searcher_kwargs": {
+                    "initial_design_type": "max_budget",
+                    "use_priors": False,
+                    "random_interleave_prob": 0.0,
+                    "sample_prior_first": False,
+                    "sample_prior_at_target": False,
+                },
+                "sample_batch_size": None,
+            },
+        ),
+        (
+            {  # load optimizer with args
+                "evaluate_pipeline": Default(None),
+                "root_directory": Default(None),
+                "pipeline_space": Default(None),
+                "run_args": Default(None),
+                "overwrite_working_directory": Default(False),
+                "post_run_summary": Default(True),
+                "development_stage_id": Default(None),
+                "task_id": Default(None),
+                "max_evaluations_total": Default(None),
+                "max_evaluations_per_run": Default(None),
+                "continue_until_max_evaluation_completed": Default(False),
+                "max_cost_total": Default(None),
+                "ignore_errors": Default(False),
+                "objective_to_minimize_value_on_error": Default(None),
+                "cost_value_on_error": Default(None),
+                "pre_load_hooks": Default(None),
+                "searcher": Default("default"),
+                "searcher_kwargs": {
+                    "initial_design_size": 9,
+                },
+                "sample_batch_size": Default(None),
+            },
+            "run_args_optimizer_outside.yaml",
+            {
+                "evaluate_pipeline": evaluate_pipeline,
+                "root_directory": "path/to/root_directory",
+                "pipeline_space": pipeline_space,
+                "overwrite_working_directory": True,
+                "post_run_summary": True,
+                "development_stage_id": None,
+                "task_id": None,
+                "max_evaluations_total": 10,
+                "max_evaluations_per_run": None,
+                "continue_until_max_evaluation_completed": False,
+                "max_cost_total": None,
+                "ignore_errors": False,
+                "objective_to_minimize_value_on_error": None,
+                "cost_value_on_error": None,
+                "pre_load_hooks": None,
+                "searcher": my_bayesian,
+                "searcher_kwargs": {"initial_design_size": 9},
+                "sample_batch_size": None,
+            },
+        ),
+    ],
+)
+def test_check_settings(func_args: dict, yaml_args: str, expected_output: dict) -> None:
+    """Check if expected settings are set."""
+    args = BASE_PATH / yaml_args if isinstance(yaml_args, str) else yaml_args
+
+    settings = Settings(func_args, args)
     for key, value in expected_output.items():
         assert getattr(settings, key) == value
 
 
 @pytest.mark.neps_api
-@pytest.mark.parametrize("func_args, yaml_args, error", [
-    (
-        {
-            "root_directory": Default(None),
-            "pipeline_space": Default(None),
-            "run_args": Default(None),
-            "overwrite_working_directory": Default(False),
-            "post_run_summary": Default(True),
-            "development_stage_id": Default(None),
-            "task_id": Default(None),
-            "max_evaluations_total": Default(None),
-            "max_evaluations_per_run": Default(None),
-            "continue_until_max_evaluation_completed": Default(False),
-            "max_cost_total": Default(None),
-            "ignore_errors": Default(False),
-            "loss_value_on_error": Default(None),
-            "cost_value_on_error": Default(None),
-            "pre_load_hooks": Default(None),
-            "searcher": Default("default"),
-            "searcher_kwargs": {},
-        },
-        Default(None),
-        ValueError
-    )
-])
-def test_settings_initialization_error(func_args: Dict, yaml_args: Union[str, Default],
-                                       error: Exception) -> None:
-    """
-    Test if Settings raises Error when essential arguments are missing
-    """
+@pytest.mark.parametrize(
+    ("func_args", "yaml_args", "error"),
+    [
+        (
+            {
+                "root_directory": Default(None),
+                "pipeline_space": Default(None),
+                "run_args": Default(None),
+                "overwrite_working_directory": Default(False),
+                "post_run_summary": Default(True),
+                "development_stage_id": Default(None),
+                "task_id": Default(None),
+                "max_evaluations_total": Default(None),
+                "max_evaluations_per_run": Default(None),
+                "continue_until_max_evaluation_completed": Default(False),
+                "max_cost_total": Default(None),
+                "ignore_errors": Default(False),
+                "objective_to_minimize_value_on_error": Default(None),
+                "cost_value_on_error": Default(None),
+                "pre_load_hooks": Default(None),
+                "searcher": Default("default"),
+                "searcher_kwargs": {},
+                "sample_batch_size": Default(None),
+            },
+            Default(None),
+            ValueError,
+        )
+    ],
+)
+def test_settings_initialization_error(
+    func_args: dict, yaml_args: str | Default, error: type[Exception]
+) -> None:
+    """Test if Settings raises Error when essential arguments are missing."""
     with pytest.raises(error):
         Settings(func_args, yaml_args)
diff --git a/tests/test_state/test_filebased_neps_state.py b/tests/test_state/test_filebased_neps_state.py
index a33851410..ea2751520 100644
--- a/tests/test_state/test_filebased_neps_state.py
+++ b/tests/test_state/test_filebased_neps_state.py
@@ -1,31 +1,34 @@
 """NOTE: These tests are pretty specific to the filebased state implementation.
 This could be generalized if we end up with a server based implementation but
-for now we're just testing the filebased implementation."""
-
+for now we're just testing the filebased implementation.
+"""
 from __future__ import annotations
 
 from pathlib import Path
 from typing import Any
-from neps.exceptions import NePSError, TrialNotFoundError
-from neps.state.err_dump import ErrDump
-from neps.state.filebased import (
-    create_or_load_filebased_neps_state,
-    load_filebased_neps_state,
-)
 
 import pytest
 from pytest_cases import fixture, parametrize
+
+from neps.exceptions import NePSError, TrialNotFoundError
+from neps.state.err_dump import ErrDump
+from neps.state.neps_state import NePSState
 from neps.state.optimizer import BudgetInfo, OptimizationState, OptimizerInfo
+from neps.state.seed_snapshot import SeedSnapshot
 
 
 @fixture
-@parametrize("budget", [BudgetInfo(max_cost_budget=10, used_cost_budget=0), None])
+@parametrize("budget_info", [BudgetInfo(max_cost_total=10, used_cost_budget=0), None])
 @parametrize("shared_state", [{"a": "b"}, {}])
 def optimizer_state(
-    budget: BudgetInfo | None,
+    budget_info: BudgetInfo | None,
     shared_state: dict[str, Any],
 ) -> OptimizationState:
-    return OptimizationState(budget=budget, shared_state=shared_state)
+    return OptimizationState(
+        budget=budget_info,
+        seed_snapshot=SeedSnapshot.new_capture(),
+        shared_state=shared_state,
+    )
 
 
 @fixture
@@ -40,24 +43,21 @@ def test_create_with_new_filebased_neps_state(
     optimizer_state: OptimizationState,
 ) -> None:
     new_path = tmp_path / "neps_state"
-    neps_state = create_or_load_filebased_neps_state(
-        directory=new_path,
+    neps_state = NePSState.create_or_load(
+        path=new_path,
         optimizer_info=optimizer_info,
         optimizer_state=optimizer_state,
     )
-    assert neps_state.optimizer_info() == optimizer_info
-    assert neps_state.optimizer_state() == optimizer_state
-    assert neps_state.all_trial_ids() == set()
-    assert neps_state.get_all_trials() == {}
-    assert neps_state.get_errors() == ErrDump(errs=[])
-    assert neps_state.get_next_pending_trial() is None
-    assert neps_state.get_next_pending_trial(n=10) == []
-
-    with pytest.raises(TrialNotFoundError):
-        assert neps_state.get_trial_by_id("1")
+    assert neps_state.lock_and_get_optimizer_info() == optimizer_info
+    assert neps_state.lock_and_get_optimizer_state() == optimizer_state
+    assert neps_state.all_trial_ids() == []
+    assert neps_state.lock_and_read_trials() == {}
+    assert neps_state.lock_and_get_errors() == ErrDump(errs=[])
+    assert neps_state.lock_and_get_next_pending_trial() is None
+    assert neps_state.lock_and_get_next_pending_trial(n=10) == []
 
     with pytest.raises(TrialNotFoundError):
-        assert neps_state.get_trials_by_ids(["1", "2"])
+        assert neps_state.lock_and_get_trial_by_id("1")
 
 
 def test_create_or_load_with_load_filebased_neps_state(
@@ -66,8 +66,8 @@ def test_create_or_load_with_load_filebased_neps_state(
     optimizer_state: OptimizationState,
 ) -> None:
     new_path = tmp_path / "neps_state"
-    neps_state = create_or_load_filebased_neps_state(
-        directory=new_path,
+    neps_state = NePSState.create_or_load(
+        path=new_path,
         optimizer_info=optimizer_info,
         optimizer_state=optimizer_state,
     )
@@ -76,11 +76,12 @@ def test_create_or_load_with_load_filebased_neps_state(
     # that we prioritize what's in the existing data over what
     # was passed in.
     different_state = OptimizationState(
-        budget=BudgetInfo(max_cost_budget=20, used_cost_budget=10),
-        shared_state={"c": "d"},
+        budget=BudgetInfo(max_cost_total=20, used_cost_budget=10),
+        seed_snapshot=SeedSnapshot.new_capture(),
+        shared_state=None,
     )
-    neps_state2 = create_or_load_filebased_neps_state(
-        directory=new_path,
+    neps_state2 = NePSState.create_or_load(
+        path=new_path,
         optimizer_info=optimizer_info,
         optimizer_state=different_state,
     )
@@ -93,13 +94,13 @@ def test_load_on_existing_neps_state(
     optimizer_state: OptimizationState,
 ) -> None:
     new_path = tmp_path / "neps_state"
-    neps_state = create_or_load_filebased_neps_state(
-        directory=new_path,
+    neps_state = NePSState.create_or_load(
+        path=new_path,
         optimizer_info=optimizer_info,
         optimizer_state=optimizer_state,
     )
 
-    neps_state2 = load_filebased_neps_state(directory=new_path)
+    neps_state2 = NePSState.create_or_load(path=new_path, load_only=True)
     assert neps_state == neps_state2
 
 
@@ -109,15 +110,15 @@ def test_new_or_load_on_existing_neps_state_with_different_optimizer_info(
     optimizer_state: OptimizationState,
 ) -> None:
     new_path = tmp_path / "neps_state"
-    create_or_load_filebased_neps_state(
-        directory=new_path,
+    NePSState.create_or_load(
+        path=new_path,
         optimizer_info=optimizer_info,
         optimizer_state=optimizer_state,
     )
 
     with pytest.raises(NePSError):
-        create_or_load_filebased_neps_state(
-            directory=new_path,
+        NePSState.create_or_load(
+            path=new_path,
             optimizer_info=OptimizerInfo({"e": "f"}),
             optimizer_state=optimizer_state,
         )
diff --git a/tests/test_state/test_neps_state.py b/tests/test_state/test_neps_state.py
index 0d0891ce6..6d067d896 100644
--- a/tests/test_state/test_neps_state.py
+++ b/tests/test_state/test_neps_state.py
@@ -1,7 +1,7 @@
 """NOTE: These tests are pretty specific to the filebased state implementation.
 This could be generalized if we end up with a server based implementation but
-for now we're just testing the filebased implementation."""
-
+for now we're just testing the filebased implementation.
+"""
 from __future__ import annotations
 
 import time
@@ -9,78 +9,75 @@
 from typing import Any
 
 import pytest
+from pytest_cases import case, fixture, parametrize, parametrize_with_cases
+
+from neps.optimizers import SearcherMapping
 from neps.optimizers.base_optimizer import BaseOptimizer
 from neps.search_spaces.hyperparameters import (
-    FloatParameter,
-    IntegerParameter,
-    ConstantParameter,
-    CategoricalParameter,
+    Categorical,
+    Constant,
+    Float,
+    Integer,
 )
 from neps.search_spaces.search_space import SearchSpace
-from neps.state.filebased import (
-    create_or_load_filebased_neps_state,
-)
-
-from pytest_cases import fixture, parametrize, parametrize_with_cases, case
 from neps.state.neps_state import NePSState
 from neps.state.optimizer import BudgetInfo, OptimizationState, OptimizerInfo
-from neps.optimizers import SearcherMapping
-from neps.utils.common import MissingDependencyError
+from neps.state.seed_snapshot import SeedSnapshot
 
 
 @case
 def case_search_space_no_fid() -> SearchSpace:
     return SearchSpace(
-        a=FloatParameter(0, 1),
-        b=CategoricalParameter(["a", "b", "c"]),
-        c=ConstantParameter("a"),
-        d=IntegerParameter(0, 10),
+        a=Float(0, 1),
+        b=Categorical(["a", "b", "c"]),
+        c=Constant("a"),
+        d=Integer(0, 10),
     )
 
 
 @case
 def case_search_space_with_fid() -> SearchSpace:
     return SearchSpace(
-        a=FloatParameter(0, 1),
-        b=CategoricalParameter(["a", "b", "c"]),
-        c=ConstantParameter("a"),
-        d=IntegerParameter(0, 10),
-        e=IntegerParameter(1, 10, is_fidelity=True),
+        a=Float(0, 1),
+        b=Categorical(["a", "b", "c"]),
+        c=Constant("a"),
+        d=Integer(0, 10),
+        e=Integer(1, 10, is_fidelity=True),
     )
 
 
 @case
 def case_search_space_no_fid_with_prior() -> SearchSpace:
     return SearchSpace(
-        a=FloatParameter(0, 1, default=0.5),
-        b=CategoricalParameter(["a", "b", "c"], default="a"),
-        c=ConstantParameter("a"),
-        d=IntegerParameter(0, 10, default=5),
+        a=Float(0, 1, prior=0.5),
+        b=Categorical(["a", "b", "c"], prior="a"),
+        c=Constant("a"),
+        d=Integer(0, 10, prior=5),
     )
 
 
 @case
 def case_search_space_fid_with_prior() -> SearchSpace:
     return SearchSpace(
-        a=FloatParameter(0, 1, default=0.5),
-        b=CategoricalParameter(["a", "b", "c"], default="a"),
-        c=ConstantParameter("a"),
-        d=IntegerParameter(0, 10, default=5),
-        e=IntegerParameter(1, 10, is_fidelity=True),
+        a=Float(0, 1, prior=0.5),
+        b=Categorical(["a", "b", "c"], prior="a"),
+        c=Constant("a"),
+        d=Integer(0, 10, prior=5),
+        e=Integer(1, 10, is_fidelity=True),
     )
 
 
-# See issue #118
-NON_INSTANTIABLE_SEARCH_SPACES_WITHOUT_SPECIFIC_KWARGS = "assisted_regularized_evolution"
-
 # See issue #121
 JUST_SKIP = [
     "multifidelity_tpe",
 ]
 
-#
 OPTIMIZER_FAILS_WITH_FIDELITY = [
     "random_search",
+    "bayesian_optimization",
+    "pibo",
+    "cost_cooling_bayesian_optimization",
+    "cost_cooling",
 ]
 
 # There's no programattic way to check if a class requires a fidelity.
@@ -93,74 +90,79 @@ def case_search_space_fid_with_prior() -> SearchSpace:
     "hyperband",
     "hyperband_custom_default",
     "priorband",
+    "priorband_bo",
     "mobster",
     "mf_ei_bo",
+    "priorband_asha",
+    "ifbo",
+    "priorband_asha_hyperband",
 ]
 OPTIMIZER_REQUIRES_BUDGET = [
     "successive_halving_prior",
     "hyperband_custom_default",
     "asha",
     "priorband",
+    "priorband_bo",
+    "priorband_asha",
+    "priorband_asha_hyperband",
     "hyperband",
     "asha_prior",
     "mobster",
 ]
 REQUIRES_PRIOR = {
     "priorband",
+    "priorband_bo",
+    "priorband_asha",
+    "priorband_asha_hyperband",
 }
 REQUIRES_COST = ["cost_cooling_bayesian_optimization", "cost_cooling"]
 
 
 @fixture
-@parametrize(
-    "key",
-    [
-        k
-        for k in SearcherMapping.keys()
-        if k not in NON_INSTANTIABLE_SEARCH_SPACES_WITHOUT_SPECIFIC_KWARGS
-    ],
-)
+@parametrize("key", list(SearcherMapping.keys()))
 @parametrize_with_cases("search_space", cases=".", prefix="case_search_space")
 def optimizer_and_key(key: str, search_space: SearchSpace) -> tuple[BaseOptimizer, str]:
     if key in JUST_SKIP:
         pytest.xfail(f"{key} is not instantiable")
 
-    if key in REQUIRES_PRIOR and search_space.hyperparameters["a"].default is None:
+    if key in REQUIRES_PRIOR and search_space.hyperparameters["a"].prior is None:
         pytest.xfail(f"{key} requires a prior")
 
-    if search_space.has_fidelity and key in OPTIMIZER_FAILS_WITH_FIDELITY:
+    if len(search_space.fidelities) > 0 and key in OPTIMIZER_FAILS_WITH_FIDELITY:
         pytest.xfail(f"{key} crashed with a fidelity")
 
-    if key in OPTIMIZER_REQUIRES_FIDELITY and not search_space.has_fidelity:
+    if key in OPTIMIZER_REQUIRES_FIDELITY and not len(search_space.fidelities) > 0:
         pytest.xfail(f"{key} requires a fidelity parameter")
+
     kwargs: dict[str, Any] = {
         "pipeline_space": search_space,
     }
     if key in OPTIMIZER_REQUIRES_BUDGET:
-        kwargs["budget"] = 10
+        kwargs["max_cost_total"] = 10
 
     optimizer_cls = SearcherMapping[key]
 
-    try:
-        return optimizer_cls(**kwargs), key
-    except MissingDependencyError as e:
-        pytest.xfail(f"{key} requires {e.dep} to run.")
+    return optimizer_cls(**kwargs), key
 
 
 @parametrize("optimizer_info", [OptimizerInfo({"a": "b"}), OptimizerInfo({})])
-@parametrize("budget", [BudgetInfo(max_cost_budget=10, used_cost_budget=0), None])
+@parametrize("max_cost_total", [BudgetInfo(max_cost_total=10, used_cost_budget=0), None])
 @parametrize("shared_state", [{"a": "b"}, {}])
 def case_neps_state_filebased(
     tmp_path: Path,
-    budget: BudgetInfo | None,
+    max_cost_total: BudgetInfo | None,
     optimizer_info: OptimizerInfo,
     shared_state: dict[str, Any],
 ) -> NePSState:
     new_path = tmp_path / "neps_state"
-    return create_or_load_filebased_neps_state(
-        directory=new_path,
+    return NePSState.create_or_load(
+        path=new_path,
         optimizer_info=optimizer_info,
-        optimizer_state=OptimizationState(budget=budget, shared_state=shared_state),
+        optimizer_state=OptimizationState(
+            budget=max_cost_total,
+            seed_snapshot=SeedSnapshot.new_capture(),
+            shared_state=shared_state,
+        ),
     )
 
 
@@ -170,15 +172,15 @@ def test_sample_trial(
     optimizer_and_key: tuple[BaseOptimizer, str],
 ) -> None:
     optimizer, key = optimizer_and_key
-    if key in REQUIRES_COST and neps_state.optimizer_state().budget is None:
+    if key in REQUIRES_COST and neps_state.lock_and_get_optimizer_state().budget is None:
         pytest.xfail(f"{key} requires a cost budget")
 
-    assert neps_state.get_all_trials() == {}
-    assert neps_state.get_next_pending_trial() is None
-    assert neps_state.get_next_pending_trial(n=10) == []
-    assert neps_state.all_trial_ids() == set()
+    assert neps_state.lock_and_read_trials() == {}
+    assert neps_state.lock_and_get_next_pending_trial() is None
+    assert neps_state.lock_and_get_next_pending_trial(n=10) == []
+    assert neps_state.all_trial_ids() == []
 
-    trial1 = neps_state.sample_trial(optimizer=optimizer, worker_id="1")
+    trial1 = neps_state.lock_and_sample_trial(optimizer=optimizer, worker_id="1")
     for k, v in trial1.config.items():
         assert k in optimizer.pipeline_space.hyperparameters
         assert v is not None, f"'{k}' is None in {trial1.config}"
@@ -187,19 +189,19 @@ def test_sample_trial(
     # precise, we need to introduce a sleep -_-
     time.sleep(0.1)
 
-    assert neps_state.get_all_trials() == {trial1.id: trial1}
-    assert neps_state.get_next_pending_trial() == trial1
-    assert neps_state.get_next_pending_trial(n=10) == [trial1]
-    assert neps_state.all_trial_ids() == {trial1.id}
+    assert neps_state.lock_and_read_trials() == {trial1.id: trial1}
+    assert neps_state.lock_and_get_next_pending_trial() == trial1
+    assert neps_state.lock_and_get_next_pending_trial(n=10) == [trial1]
+    assert neps_state.all_trial_ids() == [trial1.id]
 
-    trial2 = neps_state.sample_trial(optimizer=optimizer, worker_id="1")
+    trial2 = neps_state.lock_and_sample_trial(optimizer=optimizer, worker_id="1")
     for k, v in trial1.config.items():
         assert k in optimizer.pipeline_space.hyperparameters
         assert v is not None, f"'{k}' is None in {trial1.config}"
 
     assert trial1 != trial2
 
-    assert neps_state.get_all_trials() == {trial1.id: trial1, trial2.id: trial2}
-    assert neps_state.get_next_pending_trial() == trial1
-    assert neps_state.get_next_pending_trial(n=10) == [trial1, trial2]
-    assert neps_state.all_trial_ids() == {trial1.id, trial2.id}
+    assert neps_state.lock_and_read_trials() == {trial1.id: trial1, trial2.id: trial2}
+    assert neps_state.lock_and_get_next_pending_trial() == trial1
+    assert neps_state.lock_and_get_next_pending_trial(n=10) == [trial1, trial2]
+    assert sorted(neps_state.all_trial_ids()) == [trial1.id, trial2.id]
diff --git a/tests/test_state/test_rng.py b/tests/test_state/test_rng.py
index 1f1318d3a..2605433a4 100644
--- a/tests/test_state/test_rng.py
+++ b/tests/test_state/test_rng.py
@@ -1,23 +1,23 @@
 from __future__ import annotations
 
-from pathlib import Path
 import random
-from typing import Callable
+from collections.abc import Callable
+from pathlib import Path
+
 import numpy as np
-import torch
 import pytest
+import torch
 
 from neps.state.seed_snapshot import SeedSnapshot
-from neps.state.filebased import ReaderWriterSeedSnapshot
 
 
 @pytest.mark.parametrize(
     "make_ints",
-    (
+    [
         lambda: [random.randint(0, 100) for _ in range(10)],
         lambda: list(np.random.randint(0, 100, (10,))),
         lambda: list(torch.randint(0, 100, (10,))),
-    ),
+    ],
 )
 def test_randomstate_consistent(
     tmp_path: Path, make_ints: Callable[[], list[int]]
@@ -36,21 +36,3 @@ def test_randomstate_consistent(
 
     integers_2 = make_ints()
     assert integers_1 == integers_2
-
-    ReaderWriterSeedSnapshot.write(SeedSnapshot.new_capture(), seed_dir)
-
-    integers_3 = make_ints()
-    assert integers_3 != integers_2, "Ensure we have actually changed random state"
-
-    ReaderWriterSeedSnapshot.read(seed_dir).set_as_global_seed_state()
-    integers_4 = make_ints()
-
-    assert integers_3 == integers_4
-
-    before = SeedSnapshot.new_capture()
-    after = SeedSnapshot.new_capture()
-
-    _ = make_ints()
-
-    after.recapture()
-    assert before != after
diff --git a/tests/test_state/test_synced.py b/tests/test_state/test_synced.py
deleted file mode 100644
index 3a28b7240..000000000
--- a/tests/test_state/test_synced.py
+++ /dev/null
@@ -1,432 +0,0 @@
-from __future__ import annotations
-
-from pytest_cases import parametrize, parametrize_with_cases, case
-import copy
-import numpy as np
-import random
-from neps.state.err_dump import ErrDump, SerializableTrialError
-from neps.state.filebased import (
-    ReaderWriterErrDump,
-    ReaderWriterOptimizationState,
-    ReaderWriterOptimizerInfo,
-    ReaderWriterSeedSnapshot,
-    ReaderWriterTrial,
-    FileVersioner,
-    FileLocker,
-)
-from neps.state.optimizer import BudgetInfo, OptimizationState, OptimizerInfo
-from neps.state.protocols import Synced
-from neps.state.trial import Trial
-import pytest
-from typing import Any, Callable
-from pathlib import Path
-from neps.state import SeedSnapshot, Synced, Trial
-
-
-@case
-def case_trial_1(tmp_path: Path) -> tuple[Synced[Trial, Path], Callable[[Trial], None]]:
-    trial_id = "1"
-    trial = Trial.new(
-        trial_id=trial_id,
-        location="",
-        config={"a": "b"},
-        time_sampled=0,
-        previous_trial=None,
-        previous_trial_location=None,
-        worker_id=0,
-    )
-
-    def _mutate(trial: Trial) -> None:
-        trial.set_submitted(time_submitted=1)
-
-    x = Synced.new(
-        data=trial,
-        location=tmp_path / "1",
-        locker=FileLocker(lock_path=tmp_path / "1" / ".lock", poll=0.1, timeout=None),
-        versioner=FileVersioner(version_file=tmp_path / "1" / ".version"),
-        reader_writer=ReaderWriterTrial(),
-    )
-    return x, _mutate
-
-
-@case
-def case_trial_2(tmp_path: Path) -> tuple[Synced[Trial, Path], Callable[[Trial], None]]:
-    trial_id = "1"
-    trial = Trial.new(
-        trial_id=trial_id,
-        location="",
-        config={"a": "b"},
-        time_sampled=0,
-        previous_trial=None,
-        previous_trial_location=None,
-        worker_id=0,
-    )
-    trial.set_submitted(time_submitted=1)
-
-    def _mutate(trial: Trial) -> None:
-        trial.set_evaluating(time_started=2, worker_id="1")
-
-    x = Synced.new(
-        data=trial,
-        location=tmp_path / "1",
-        locker=FileLocker(lock_path=tmp_path / "1" / ".lock", poll=0.1, timeout=None),
-        versioner=FileVersioner(version_file=tmp_path / "1" / ".version"),
-        reader_writer=ReaderWriterTrial(),
-    )
-    return x, _mutate
-
-
-@case
-def case_trial_3(tmp_path: Path) -> tuple[Synced[Trial, Path], Callable[[Trial], None]]:
-    trial_id = "1"
-    trial = Trial.new(
-        trial_id=trial_id,
-        config={"a": "b"},
-        location="",
-        time_sampled=0,
-        previous_trial=None,
-        previous_trial_location=None,
-        worker_id=0,
-    )
-    trial.set_submitted(time_submitted=1)
-    trial.set_evaluating(time_started=2, worker_id="1")
-
-    def _mutate(trial: Trial) -> None:
-        trial.set_complete(
-            time_end=3,
-            loss=1,
-            cost=1,
-            extra={"hi": [1, 2, 3]},
-            learning_curve=[1],
-            report_as="success",
-            evaluation_duration=1,
-            err=None,
-            tb=None,
-        )
-
-    x = Synced.new(
-        data=trial,
-        location=tmp_path / "1",
-        locker=FileLocker(lock_path=tmp_path / "1" / ".lock", poll=0.1, timeout=None),
-        versioner=FileVersioner(version_file=tmp_path / "1" / ".version"),
-        reader_writer=ReaderWriterTrial(),
-    )
-    return x, _mutate
-
-
-@case
-def case_trial_4(tmp_path: Path) -> tuple[Synced[Trial, Path], Callable[[Trial], None]]:
-    trial_id = "1"
-    trial = Trial.new(
-        trial_id=trial_id,
-        config={"a": "b"},
-        location="",
-        time_sampled=0,
-        previous_trial=None,
-        previous_trial_location=None,
-        worker_id=0,
-    )
-    trial.set_submitted(time_submitted=1)
-    trial.set_evaluating(time_started=2, worker_id="1")
-
-    def _mutate(trial: Trial) -> None:
-        trial.set_complete(
-            time_end=3,
-            loss=np.nan,
-            cost=np.inf,
-            extra={"hi": [1, 2, 3]},
-            report_as="failed",
-            learning_curve=None,
-            evaluation_duration=2,
-            err=None,
-            tb=None,
-        )
-
-    x = Synced.new(
-        data=trial,
-        location=tmp_path / "1",
-        locker=FileLocker(lock_path=tmp_path / "1" / ".lock", poll=0.1, timeout=None),
-        versioner=FileVersioner(version_file=tmp_path / "1" / ".version"),
-        reader_writer=ReaderWriterTrial(),
-    )
-    return x, _mutate
-
-
-@case
-def case_trial_5(tmp_path: Path) -> tuple[Synced[Trial, Path], Callable[[Trial], None]]:
-    trial_id = "1"
-    trial = Trial.new(
-        trial_id=trial_id,
-        config={"a": "b"},
-        location="",
-        time_sampled=0,
-        previous_trial=None,
-        previous_trial_location=None,
-        worker_id=0,
-    )
-    trial.set_submitted(time_submitted=1)
-    trial.set_evaluating(time_started=2, worker_id=1)
-
-    def _mutate(trial: Trial) -> None:
-        trial.set_complete(
-            time_end=3,
-            loss=np.nan,
-            cost=np.inf,
-            extra={"hi": [1, 2, 3]},
-            learning_curve=None,
-            evaluation_duration=2,
-            report_as="failed",
-            err=ValueError("hi"),
-            tb="something something traceback",
-        )
-
-    x = Synced.new(
-        data=trial,
-        location=tmp_path / "1",
-        locker=FileLocker(lock_path=tmp_path / "1" / ".lock", poll=0.1, timeout=None),
-        versioner=FileVersioner(version_file=tmp_path / "1" / ".version"),
-        reader_writer=ReaderWriterTrial(),
-    )
-    return x, _mutate
-
-
-@case
-def case_trial_6(tmp_path: Path) -> tuple[Synced[Trial, Path], Callable[[Trial], None]]:
-    trial_id = "1"
-    trial = Trial.new(
-        trial_id=trial_id,
-        config={"a": "b"},
-        location="",
-        time_sampled=0,
-        previous_trial=None,
-        previous_trial_location=None,
-        worker_id=0,
-    )
-    trial.set_submitted(time_submitted=1)
-    trial.set_evaluating(time_started=2, worker_id=1)
-
-    def _mutate(trial: Trial) -> None:
-        trial.set_corrupted()
-
-    x = Synced.new(
-        data=trial,
-        location=tmp_path / "1",
-        locker=FileLocker(lock_path=tmp_path / "1" / ".lock", poll=0.1, timeout=None),
-        versioner=FileVersioner(version_file=tmp_path / "1" / ".version"),
-        reader_writer=ReaderWriterTrial(),
-    )
-    return x, _mutate
-
-
-@case
-def case_trial_7(tmp_path: Path) -> tuple[Synced[Trial, Path], Callable[[Trial], None]]:
-    trial_id = "1"
-    trial = Trial.new(
-        trial_id=trial_id,
-        config={"a": "b"},
-        location="",
-        time_sampled=0,
-        previous_trial=None,
-        previous_trial_location=None,
-        worker_id=0,
-    )
-    trial.set_submitted(time_submitted=1)
-    trial.set_evaluating(time_started=2, worker_id=1)
-    trial.set_complete(
-        time_end=3,
-        loss=np.nan,
-        cost=np.inf,
-        extra={"hi": [1, 2, 3]},
-        learning_curve=[1, 2, 3],
-        report_as="failed",
-        evaluation_duration=2,
-        err=ValueError("hi"),
-        tb="something something traceback",
-    )
-
-    def _mutate(trial: Trial) -> None:
-        trial.reset()
-
-    x = Synced.new(
-        data=trial,
-        location=tmp_path / "1",
-        locker=FileLocker(lock_path=tmp_path / "1" / ".lock", poll=0.1, timeout=None),
-        versioner=FileVersioner(version_file=tmp_path / "1" / ".version"),
-        reader_writer=ReaderWriterTrial(),
-    )
-    return x, _mutate
-
-
-@case
-def case_seed_snapshot(
-    tmp_path: Path,
-) -> tuple[Synced[SeedSnapshot, Path], Callable[[SeedSnapshot], None]]:
-    seed = SeedSnapshot.new_capture()
-
-    def _mutate(seed: SeedSnapshot) -> None:
-        random.randint(0, 100)
-        seed.recapture()
-
-    x = Synced.new(
-        data=seed,
-        location=tmp_path / "seeds",
-        locker=FileLocker(lock_path=tmp_path / "seeds" / ".lock", poll=0.1, timeout=None),
-        versioner=FileVersioner(version_file=tmp_path / "seeds" / ".version"),
-        reader_writer=ReaderWriterSeedSnapshot(),
-    )
-    return x, _mutate
-
-
-@case
-@parametrize(
-    "err",
-    [
-        None,
-        SerializableTrialError(
-            trial_id="1",
-            worker_id="2",
-            err_type="ValueError",
-            err="hi",
-            tb="traceback\nmore",
-        ),
-    ],
-)
-def case_err_dump(
-    tmp_path: Path,
-    err: None | SerializableTrialError,
-) -> tuple[Synced[ErrDump, Path], Callable[[ErrDump], None]]:
-    err_dump = ErrDump() if err is None else ErrDump(errs=[err])
-
-    def _mutate(err_dump: ErrDump) -> None:
-        new_err = SerializableTrialError(
-            trial_id="2",
-            worker_id="2",
-            err_type="RuntimeError",
-            err="hi",
-            tb="traceback\nless",
-        )
-        err_dump.append(new_err)
-
-    x = Synced.new(
-        data=err_dump,
-        location=tmp_path / "err_dump",
-        locker=FileLocker(
-            lock_path=tmp_path / "err_dump" / ".lock", poll=0.1, timeout=None
-        ),
-        versioner=FileVersioner(version_file=tmp_path / "err_dump" / ".version"),
-        reader_writer=ReaderWriterErrDump("all"),
-    )
-    return x, _mutate
-
-
-@case
-def case_optimizer_info(
-    tmp_path: Path,
-) -> tuple[Synced[OptimizerInfo, Path], Callable[[OptimizerInfo], None]]:
-    optimizer_info = OptimizerInfo(info={"a": "b"})
-
-    def _mutate(optimizer_info: OptimizerInfo) -> None:
-        optimizer_info.info["b"] = "c"  # type: ignore # NOTE: We shouldn't be mutating but anywho...
-
-    x = Synced.new(
-        data=optimizer_info,
-        location=tmp_path / "optimizer_info",
-        locker=FileLocker(
-            lock_path=tmp_path / "optimizer_info" / ".lock", poll=0.1, timeout=None
-        ),
-        versioner=FileVersioner(version_file=tmp_path / "optimizer_info" / ".version"),
-        reader_writer=ReaderWriterOptimizerInfo(),
-    )
-    return x, _mutate
-
-
-@case
-@pytest.mark.parametrize(
-    "budget", (None, BudgetInfo(max_cost_budget=10, used_cost_budget=0))
-)
-@pytest.mark.parametrize("shared_state", ({}, {"a": "b"}))
-def case_optimization_state(
-    tmp_path: Path,
-    budget: BudgetInfo | None,
-    shared_state: dict[str, Any],
-) -> tuple[Synced[OptimizationState, Path], Callable[[OptimizationState], None]]:
-    optimization_state = OptimizationState(budget=budget, shared_state=shared_state)
-
-    def _mutate(optimization_state: OptimizationState) -> None:
-        optimization_state.shared_state["a"] = "c"  # type: ignore # NOTE: We shouldn't be mutating but anywho...
-        optimization_state.budget = BudgetInfo(max_cost_budget=10, used_cost_budget=5)
-
-    x = Synced.new(
-        data=optimization_state,
-        location=tmp_path / "optimizer_info",
-        locker=FileLocker(
-            lock_path=tmp_path / "optimizer_info" / ".lock", poll=0.1, timeout=None
-        ),
-        versioner=FileVersioner(version_file=tmp_path / "optimizer_info" / ".version"),
-        reader_writer=ReaderWriterOptimizationState(),
-    )
-    return x, _mutate
-
-
-@parametrize_with_cases("shared, mutate", cases=".")
-def test_initial_state(shared: Synced, mutate: Callable) -> None:
-    assert shared._is_locked() == False
-    assert shared._is_stale() == False
-    assert shared._unsynced() == shared.synced()
-
-
-@parametrize_with_cases("shared, mutate", cases=".")
-def test_put_updates_current_data_and_is_not_stale(
-    shared: Synced, mutate: Callable
-) -> None:
-    current_data = shared._unsynced()
-
-    new_data = copy.deepcopy(current_data)
-    mutate(new_data)
-    assert new_data != current_data
-
-    shared.put(new_data)
-    assert shared._unsynced() == new_data
-    assert shared._is_stale() == False
-    assert shared._is_locked() == False
-
-
-@parametrize_with_cases("shared1, mutate", cases=".")
-def test_share_synced_mutate_and_put(shared1: Synced, mutate: Callable) -> None:
-    shared2 = shared1.deepcopy()
-    assert shared1 == shared2
-    assert not shared1._is_locked()
-    assert not shared2._is_locked()
-
-    with shared2.acquire() as (data2, put2):
-        assert shared1._is_locked()
-        assert shared2._is_locked()
-        mutate(data2)
-        put2(data2)
-
-    assert not shared1._is_locked()
-    assert not shared2._is_locked()
-
-    assert shared1 != shared2
-    assert shared1._unsynced() != shared2._unsynced()
-    assert shared1._is_stale()
-
-    shared1.synced()
-    assert not shared1._is_stale()
-    assert not shared2._is_stale()
-    assert shared1._unsynced() == shared2._unsynced()
-
-
-@parametrize_with_cases("shared, mutate", cases=".")
-def test_shared_new_fails_if_done_on_existing_resource(
-    shared: Synced, mutate: Callable
-) -> None:
-    data, location, versioner, rw, lock = shared._components()
-    with pytest.raises(Synced.VersionedResourceAlreadyExistsError):
-        Synced.new(
-            data=data,
-            location=location,
-            versioner=versioner,
-            reader_writer=rw,
-            locker=lock,
-        )
diff --git a/tests/test_state/test_trial.py b/tests/test_state/test_trial.py
index 0ddc9e341..12872bf3f 100644
--- a/tests/test_state/test_trial.py
+++ b/tests/test_state/test_trial.py
@@ -1,8 +1,11 @@
 from __future__ import annotations
-from neps.state import Trial
+
 import os
+
 import numpy as np
 
+from neps.state import Trial
+
 
 def test_trial_creation() -> None:
     trial_id = "1"
@@ -19,12 +22,13 @@ def test_trial_creation() -> None:
         previous_trial=previous_trial,
         worker_id=worker_id,
     )
-    assert trial.state == Trial.State.PENDING
+    assert trial.metadata.state == Trial.State.PENDING
     assert trial.id == trial_id
     assert trial.config == {"a": "b"}
     assert trial.metadata == Trial.MetaData(
         id="1",
         time_sampled=time_sampled,
+        state=Trial.State.PENDING,
         location="1",
         previous_trial_location=None,
         previous_trial_id=previous_trial,
@@ -53,11 +57,12 @@ def test_trial_as_submitted() -> None:
     )
     trial.set_submitted(time_submitted=time_submitted)
 
-    assert trial.state == Trial.State.SUBMITTED
+    assert trial.metadata.state == Trial.State.SUBMITTED
     assert trial.id == trial_id
     assert trial.config == {"a": "b"}
     assert trial.metadata == Trial.MetaData(
         id=trial_id,
+        state=Trial.State.SUBMITTED,
         time_sampled=time_sampled,
         previous_trial_location="0",
         location="1",
@@ -90,11 +95,12 @@ def test_trial_as_in_progress_with_different_evaluating_worker() -> None:
     trial.set_submitted(time_submitted=time_submitted)
     trial.set_evaluating(time_started=time_started, worker_id=evaluating_worker_id)
 
-    assert trial.state == Trial.State.EVALUATING
+    assert trial.metadata.state == Trial.State.EVALUATING
     assert trial.id == trial_id
     assert trial.config == {"a": "b"}
     assert trial.metadata == Trial.MetaData(
         id=trial_id,
+        state=Trial.State.EVALUATING,
         time_sampled=time_sampled,
         previous_trial_id=previous_trial,
         previous_trial_location="0",
@@ -116,7 +122,7 @@ def test_trial_as_success_after_being_progress() -> None:
     previous_trial = "0"
     sampling_worker_id = "42"
     evaluating_worker_id = "43"
-    loss = 427
+    objective_to_minimize = 427
     cost = -123.6
     extra = {"picnic": "basket", "counts": [1, 2, 3]}
 
@@ -133,7 +139,7 @@ def test_trial_as_success_after_being_progress() -> None:
     trial.set_evaluating(time_started=time_started, worker_id=evaluating_worker_id)
     report = trial.set_complete(
         report_as="success",
-        loss=loss,
+        objective_to_minimize=objective_to_minimize,
         cost=cost,
         err=None,
         tb=None,
@@ -143,11 +149,12 @@ def test_trial_as_success_after_being_progress() -> None:
         time_end=time_end,
     )
 
-    assert trial.state == Trial.State.SUCCESS
+    assert trial.metadata.state == Trial.State.SUCCESS
     assert trial.id == trial_id
     assert trial.config == {"a": "b"}
     assert trial.metadata == Trial.MetaData(
         id=trial_id,
+        state=Trial.State.SUCCESS,
         time_sampled=time_sampled,
         previous_trial_location="0",
         location="1",
@@ -161,7 +168,7 @@ def test_trial_as_success_after_being_progress() -> None:
     )
     assert report == Trial.Report(
         trial_id=trial_id,
-        loss=loss,
+        objective_to_minimize=objective_to_minimize,
         cost=cost,
         learning_curve=None,
         evaluation_duration=1,
@@ -172,7 +179,7 @@ def test_trial_as_success_after_being_progress() -> None:
     )
 
 
-def test_trial_as_failed_with_nan_loss_and_in_cost() -> None:
+def test_trial_as_failed_with_nan_objective_to_minimize_and_in_cost() -> None:
     trial_id = "1"
     time_sampled = 0
     time_submitted = 1
@@ -181,7 +188,7 @@ def test_trial_as_failed_with_nan_loss_and_in_cost() -> None:
     previous_trial = "0"
     sampling_worker_id = "42"
     evaluating_worker_id = "43"
-    loss = np.nan
+    objective_to_minimize = np.nan
     cost = np.inf
     extra = {"picnic": "basket", "counts": [1, 2, 3]}
 
@@ -198,7 +205,7 @@ def test_trial_as_failed_with_nan_loss_and_in_cost() -> None:
     trial.set_evaluating(time_started=time_started, worker_id=evaluating_worker_id)
     report = trial.set_complete(
         report_as="failed",
-        loss=loss,
+        objective_to_minimize=objective_to_minimize,
         cost=cost,
         learning_curve=None,
         evaluation_duration=time_end - time_started,
@@ -207,11 +214,12 @@ def test_trial_as_failed_with_nan_loss_and_in_cost() -> None:
         extra=extra,
         time_end=time_end,
     )
-    assert trial.state == Trial.State.FAILED
+    assert trial.metadata.state == Trial.State.FAILED
     assert trial.id == trial_id
     assert trial.config == {"a": "b"}
     assert trial.metadata == Trial.MetaData(
         id=trial_id,
+        state=Trial.State.FAILED,
         time_sampled=time_sampled,
         previous_trial_id=previous_trial,
         sampling_worker_id=sampling_worker_id,
@@ -225,7 +233,7 @@ def test_trial_as_failed_with_nan_loss_and_in_cost() -> None:
     )
     assert report == Trial.Report(
         trial_id=trial_id,
-        loss=loss,
+        objective_to_minimize=objective_to_minimize,
         cost=cost,
         learning_curve=None,
         evaluation_duration=time_end - time_started,
@@ -262,7 +270,7 @@ def test_trial_as_crashed_with_err_and_tb() -> None:
     trial.set_evaluating(time_started=time_started, worker_id=evaluating_worker_id)
     report = trial.set_complete(
         report_as="crashed",
-        loss=None,
+        objective_to_minimize=None,
         cost=None,
         learning_curve=None,
         evaluation_duration=time_end - time_started,
@@ -272,11 +280,12 @@ def test_trial_as_crashed_with_err_and_tb() -> None:
         time_end=time_end,
     )
 
-    assert trial.state == Trial.State.CRASHED
+    assert trial.metadata.state == Trial.State.CRASHED
     assert trial.id == trial_id
     assert trial.config == {"a": "b"}
     assert trial.metadata == Trial.MetaData(
         id=trial_id,
+        state=Trial.State.CRASHED,
         time_sampled=time_sampled,
         previous_trial_id=previous_trial,
         sampling_worker_id=sampling_worker_id,
@@ -290,7 +299,7 @@ def test_trial_as_crashed_with_err_and_tb() -> None:
     )
     assert report == Trial.Report(
         trial_id=trial_id,
-        loss=None,
+        objective_to_minimize=None,
         cost=None,
         learning_curve=None,
         evaluation_duration=time_end - time_started,
diff --git a/neps/optimizers/regularized_evolution/__init__.py b/tests/test_yaml_run_args/__init__.py
similarity index 100%
rename from neps/optimizers/regularized_evolution/__init__.py
rename to tests/test_yaml_run_args/__init__.py
diff --git a/tests/test_yaml_run_args/run_args_full.yaml b/tests/test_yaml_run_args/run_args_full.yaml
index 1ec961404..2bf1671fe 100644
--- a/tests/test_yaml_run_args/run_args_full.yaml
+++ b/tests/test_yaml_run_args/run_args_full.yaml
@@ -1,6 +1,6 @@
-run_pipeline:
+evaluate_pipeline:
     path: "tests/test_yaml_run_args/test_yaml_run_args.py"
-    name: run_pipeline
+    name: evaluate_pipeline
 pipeline_space: "tests/test_yaml_run_args/pipeline_space.yaml"
 root_directory: "test_yaml"
 
@@ -19,7 +19,7 @@ parallelization_setup:
   continue_until_max_evaluation_completed: true
 
 error_handling:
-  loss_value_on_error: 4.2
+  objective_to_minimize_value_on_error: 4.2
   cost_value_on_error: 3.7
   ignore_errors: true
 
diff --git a/tests/test_yaml_run_args/run_args_full_same_level.yaml b/tests/test_yaml_run_args/run_args_full_same_level.yaml
index b920533e4..a7b9948f6 100644
--- a/tests/test_yaml_run_args/run_args_full_same_level.yaml
+++ b/tests/test_yaml_run_args/run_args_full_same_level.yaml
@@ -1,6 +1,6 @@
-run_pipeline:
+evaluate_pipeline:
     path: "tests/test_yaml_run_args/test_yaml_run_args" # check if without .py also works
-    name: "run_pipeline"
+    name: "evaluate_pipeline"
 pipeline_space: "tests/test_yaml_run_args/pipeline_space.yaml"
 root_directory: "test_yaml"
 max_evaluations_total: 20
@@ -11,7 +11,7 @@ development_stage_id: 9
 task_id: 2.0
 max_evaluations_per_run: 5
 continue_until_max_evaluation_completed: true
-loss_value_on_error: 2.4
+objective_to_minimize_value_on_error: 2.4
 cost_value_on_error: 2.1
 ignore_errors: false
 searcher:
diff --git a/tests/test_yaml_run_args/run_args_invalid_key.yaml b/tests/test_yaml_run_args/run_args_invalid_key.yaml
index b9c0ff2db..8047f74a8 100644
--- a/tests/test_yaml_run_args/run_args_invalid_key.yaml
+++ b/tests/test_yaml_run_args/run_args_invalid_key.yaml
@@ -1,6 +1,6 @@
 run_pipelin: # typo in key
   path: "tests/test_yaml_run_args/test_yaml_run_args.py"
-  name: run_pipeline
+  name: evaluate_pipeline
 pipeline_space: "tests/test_yaml_run_args/pipeline_space.yaml"
 root_directory: "test_yaml"
 
@@ -19,7 +19,7 @@ parallelization_setup:
   continue_until_max_evaluation_completed: true
 
 error_handling:
-  loss_value_on_error: 4.2
+  objective_to_minimize_value_on_error: 4.2
   cost_value_on_error: 3.7
   ignore_errors: true
 
diff --git a/tests/test_yaml_run_args/run_args_invalid_type.yaml b/tests/test_yaml_run_args/run_args_invalid_type.yaml
index 1c85b8cbe..8e1f1d2d1 100644
--- a/tests/test_yaml_run_args/run_args_invalid_type.yaml
+++ b/tests/test_yaml_run_args/run_args_invalid_type.yaml
@@ -16,7 +16,7 @@ parallelization_setup:
   continue_until_max_evaluation_completed: false
 
 error_handling:
-  loss_value_on_error: None
+  objective_to_minimize_value_on_error: None
   cost_value_on_error: None
   ignore_errors: None
 
diff --git a/tests/test_yaml_run_args/run_args_key_missing.yaml b/tests/test_yaml_run_args/run_args_key_missing.yaml
index ae27c608a..660349c99 100644
--- a/tests/test_yaml_run_args/run_args_key_missing.yaml
+++ b/tests/test_yaml_run_args/run_args_key_missing.yaml
@@ -1,4 +1,4 @@
-run_pipeline:
+evaluate_pipeline:
   path: "tests/test_yaml_run_args/test_yaml_run_args.py"
   # key name is missing
 pipeline_space: "tests/test_yaml_run_args/pipeline_space.yaml"
@@ -11,7 +11,7 @@ development_stage_id: 9
 task_id: 2.0
 max_evaluations_per_run: 5
 continue_until_max_evaluation_completed: true
-loss_value_on_error: 2.4
+objective_to_minimize_value_on_error: 2.4
 cost_value_on_error: 2.1
 ignore_errors: false
 searcher:
diff --git a/tests/test_yaml_run_args/run_args_optional_loading_format.yaml b/tests/test_yaml_run_args/run_args_optional_loading_format.yaml
index 26bdad832..640c7d9de 100644
--- a/tests/test_yaml_run_args/run_args_optional_loading_format.yaml
+++ b/tests/test_yaml_run_args/run_args_optional_loading_format.yaml
@@ -1,6 +1,6 @@
-run_pipeline:
+evaluate_pipeline:
   path: "tests/test_yaml_run_args/test_yaml_run_args.py"
-  name: "run_pipeline"
+  name: "evaluate_pipeline"
 pipeline_space: # Optional loading
   path: "tests/test_yaml_run_args/test_yaml_run_args.py"
   name: "pipeline_space"
@@ -13,13 +13,12 @@ development_stage_id: 9
 task_id:
 max_evaluations_per_run: 5
 continue_until_max_evaluation_completed: true
-loss_value_on_error: 2.4
+objective_to_minimize_value_on_error: 2.4
 cost_value_on_error: 2.1
 ignore_errors: false
 searcher: # Optional Loading
   path: "neps/optimizers/bayesian_optimization/optimizer.py"
   name: BayesianOptimization
   initial_design_size: 5
-  surrogate_model: gp
 pre_load_hooks:
   hook1: "tests/test_yaml_run_args/test_yaml_run_args.py"
diff --git a/tests/test_yaml_run_args/run_args_partial_same_level.yaml b/tests/test_yaml_run_args/run_args_partial_same_level.yaml
index 85cc1cbe7..931986513 100644
--- a/tests/test_yaml_run_args/run_args_partial_same_level.yaml
+++ b/tests/test_yaml_run_args/run_args_partial_same_level.yaml
@@ -8,7 +8,7 @@ development_stage_id: None
 task_id: 4
 max_evaluations_per_run: None
 continue_until_max_evaluation_completed: True
-loss_value_on_error: None
+objective_to_minimize_value_on_error: None
 ignore_errors: True
 searcher:
 pre_load_hooks: None
diff --git a/tests/test_yaml_run_args/run_args_wrong_name.yaml b/tests/test_yaml_run_args/run_args_wrong_name.yaml
index ebfe640fc..c2eb70020 100644
--- a/tests/test_yaml_run_args/run_args_wrong_name.yaml
+++ b/tests/test_yaml_run_args/run_args_wrong_name.yaml
@@ -1,4 +1,4 @@
-run_pipeline:
+evaluate_pipeline:
   path: "tests/test_yaml_run_args/test_yaml_run_args.py"
   name: run_pipelin # typo in name
 pipeline_space: "tests/test_yaml_run_args/pipeline_space.yaml"
@@ -19,7 +19,7 @@ parallelization_setup:
   continue_until_max_evaluation_completed: True
 
 error_handling:
-  loss_value_on_error: 4.2
+  objective_to_minimize_value_on_error: 4.2
   cost_value_on_error: 3.7
   ignore_errors: True
 
diff --git a/tests/test_yaml_run_args/run_args_wrong_path.yaml b/tests/test_yaml_run_args/run_args_wrong_path.yaml
index bc8feed65..b09808cc1 100644
--- a/tests/test_yaml_run_args/run_args_wrong_path.yaml
+++ b/tests/test_yaml_run_args/run_args_wrong_path.yaml
@@ -1,6 +1,6 @@
-run_pipeline:
+evaluate_pipeline:
   path: "tests/test_yaml_run_args/test_yaml_ru_args.py" # typo in path
-  name: run_pipeline
+  name: evaluate_pipeline
 pipeline_space: "tests/test_yaml_run_args/pipeline_space.yaml"
 root_directory: "test_yaml"
 
@@ -19,7 +19,7 @@ parallelization_setup:
   continue_until_max_evaluation_completed: True
 
 error_handling:
-  loss_value_on_error: 4.2
+  objective_to_minimize_value_on_error: 4.2
   cost_value_on_error: 3.7
   ignore_errors: True
 
diff --git a/tests/test_yaml_run_args/test_declarative_usage_docs/__init__.py b/tests/test_yaml_run_args/test_declarative_usage_docs/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/tests/test_yaml_run_args/test_declarative_usage_docs/customizing_neps_optimizer.yaml b/tests/test_yaml_run_args/test_declarative_usage_docs/customizing_neps_optimizer.yaml
index 5ddaf23ea..afae914e0 100644
--- a/tests/test_yaml_run_args/test_declarative_usage_docs/customizing_neps_optimizer.yaml
+++ b/tests/test_yaml_run_args/test_declarative_usage_docs/customizing_neps_optimizer.yaml
@@ -1,5 +1,5 @@
-run_pipeline:
-  path: tests/test_yaml_run_args/test_declarative_usage_docs/run_pipeline.py
+evaluate_pipeline:
+  path: tests/test_yaml_run_args/test_declarative_usage_docs/evaluate_pipeline.py
   name: run_pipeline_constant
 
 pipeline_space:
@@ -7,10 +7,7 @@ pipeline_space:
     lower: 1e-5
     upper: 1e-1
     log: True  # Log scale for learning rate
-  epochs:
-    lower: 5
-    upper: 20
-    is_fidelity: True
+  epochs: 20
   optimizer:
     choices: [adam, sgd, adamw]
   batch_size: 64
@@ -22,9 +19,5 @@ searcher:
   name: "my_bayesian"
   # Specific arguments depending on the searcher
   initial_design_size: 7
-  surrogate_model: gp
-  acquisition: EI
-  acquisition_sampler: random
-  random_interleave_prob: 0.1
 
 overwrite_working_directory: True
diff --git a/tests/test_yaml_run_args/test_declarative_usage_docs/defining_hooks.yaml b/tests/test_yaml_run_args/test_declarative_usage_docs/defining_hooks.yaml
index 83842f93c..f37485f42 100644
--- a/tests/test_yaml_run_args/test_declarative_usage_docs/defining_hooks.yaml
+++ b/tests/test_yaml_run_args/test_declarative_usage_docs/defining_hooks.yaml
@@ -1,6 +1,6 @@
 # Basic NEPS Configuration Example
-run_pipeline:
-  path: tests/test_yaml_run_args/test_declarative_usage_docs/run_pipeline.py
+evaluate_pipeline:
+  path: tests/test_yaml_run_args/test_declarative_usage_docs/evaluate_pipeline.py
   name: run_pipeline_constant
 
 pipeline_space:
diff --git a/tests/test_yaml_run_args/test_declarative_usage_docs/evaluate_pipeline.py b/tests/test_yaml_run_args/test_declarative_usage_docs/evaluate_pipeline.py
new file mode 100644
index 000000000..adb912682
--- /dev/null
+++ b/tests/test_yaml_run_args/test_declarative_usage_docs/evaluate_pipeline.py
@@ -0,0 +1,29 @@
+from __future__ import annotations
+
+from warnings import warn
+
+import numpy as np
+
+
+def run_pipeline(learning_rate, optimizer, epochs):
+    """Func for test loading of run_pipeline."""
+    warn("run_pipeline is deprecated, use evaluate_pipeline instead", DeprecationWarning, stacklevel=2)
+    return evaluate_pipeline(learning_rate, optimizer, epochs)
+
+def evaluate_pipeline(learning_rate, optimizer, epochs):
+    """Func for test loading of evaluate_pipeline."""
+    eval_score = np.random.choice([learning_rate, epochs], 1) if optimizer == "a" else 5.0
+    return {"objective_to_minimize": eval_score}
+
+
+def run_pipeline_constant(learning_rate, optimizer, epochs, batch_size):
+    """Func for test loading of run_pipeline."""
+    warn("run_pipeline is deprecated, use evaluate_pipeline instead", DeprecationWarning, stacklevel=2)
+    return evaluate_pipeline_constant(learning_rate, optimizer, epochs, batch_size)
+
+def evaluate_pipeline_constant(learning_rate, optimizer, epochs, batch_size):
+    """Func for test loading of evaluate_pipeline."""
+    eval_score = np.random.choice([learning_rate, epochs], 1) if optimizer == "a" else 5.0
+    eval_score += batch_size
+    return {"objective_to_minimize": eval_score}
+
diff --git a/tests/test_yaml_run_args/test_declarative_usage_docs/full_configuration_template.yaml b/tests/test_yaml_run_args/test_declarative_usage_docs/full_configuration_template.yaml
index 21851c881..e207a6dfa 100644
--- a/tests/test_yaml_run_args/test_declarative_usage_docs/full_configuration_template.yaml
+++ b/tests/test_yaml_run_args/test_declarative_usage_docs/full_configuration_template.yaml
@@ -1,6 +1,6 @@
 # Full Configuration Template for NePS
-run_pipeline:
-  path: tests/test_yaml_run_args/test_declarative_usage_docs/run_pipeline.py
+evaluate_pipeline:
+  path: tests/test_yaml_run_args/test_declarative_usage_docs/evaluate_pipeline.py
   name: run_pipeline_constant
 
 pipeline_space:
@@ -31,7 +31,7 @@ max_evaluations_per_run:
 continue_until_max_evaluation_completed: False
 
 # Error Handling
-loss_value_on_error:
+objective_to_minimize_value_on_error:
 cost_value_on_error:
 ignore_errors:
 
diff --git a/tests/test_yaml_run_args/test_declarative_usage_docs/hooks.py b/tests/test_yaml_run_args/test_declarative_usage_docs/hooks.py
index a26f28f32..029ac890c 100644
--- a/tests/test_yaml_run_args/test_declarative_usage_docs/hooks.py
+++ b/tests/test_yaml_run_args/test_declarative_usage_docs/hooks.py
@@ -1,8 +1,11 @@
+from __future__ import annotations
+
+
 def hook1(sampler):
-    """func to test loading of pre_load_hooks"""
+    """Func to test loading of pre_load_hooks."""
     return sampler
 
 
 def hook2(sampler):
-    """func to test loading of pre_load_hooks"""
+    """Func to test loading of pre_load_hooks."""
     return sampler
diff --git a/tests/test_yaml_run_args/test_declarative_usage_docs/loading_own_optimizer.yaml b/tests/test_yaml_run_args/test_declarative_usage_docs/loading_own_optimizer.yaml
index fce520343..c178e8da6 100644
--- a/tests/test_yaml_run_args/test_declarative_usage_docs/loading_own_optimizer.yaml
+++ b/tests/test_yaml_run_args/test_declarative_usage_docs/loading_own_optimizer.yaml
@@ -1,6 +1,6 @@
-run_pipeline:
-  path: tests/test_yaml_run_args/test_declarative_usage_docs/run_pipeline.py
-  name: run_pipeline
+evaluate_pipeline:
+  path: tests/test_yaml_run_args/test_declarative_usage_docs/evaluate_pipeline.py
+  name: evaluate_pipeline
 
 pipeline_space:
   learning_rate:
@@ -18,7 +18,5 @@ searcher:
   name: BayesianOptimization
   # Specific arguments depending on your searcher
   initial_design_size: 7
-  surrogate_model: gp
-  acquisition: EI
 
 overwrite_working_directory: True
diff --git a/tests/test_yaml_run_args/test_declarative_usage_docs/loading_pipeline_space_dict.yaml b/tests/test_yaml_run_args/test_declarative_usage_docs/loading_pipeline_space_dict.yaml
index 909e9559c..0baad5c10 100644
--- a/tests/test_yaml_run_args/test_declarative_usage_docs/loading_pipeline_space_dict.yaml
+++ b/tests/test_yaml_run_args/test_declarative_usage_docs/loading_pipeline_space_dict.yaml
@@ -1,6 +1,6 @@
 # Loading pipeline space from a python dict
-run_pipeline:
-  path: tests/test_yaml_run_args/test_declarative_usage_docs/run_pipeline.py
+evaluate_pipeline:
+  path: tests/test_yaml_run_args/test_declarative_usage_docs/evaluate_pipeline.py
   name: run_pipeline_constant
 
 pipeline_space:
diff --git a/tests/test_yaml_run_args/test_declarative_usage_docs/neps_run.py b/tests/test_yaml_run_args/test_declarative_usage_docs/neps_run.py
index 16533af67..fbc2a1bd1 100644
--- a/tests/test_yaml_run_args/test_declarative_usage_docs/neps_run.py
+++ b/tests/test_yaml_run_args/test_declarative_usage_docs/neps_run.py
@@ -1,7 +1,22 @@
+from __future__ import annotations
+
 import argparse
+from warnings import warn
+
+import numpy as np
+
 import neps
-from tests.test_yaml_run_args.test_declarative_usage_docs.run_pipeline import \
-    run_pipeline_constant
+
+
+def run_pipeline_constant(learning_rate, optimizer, epochs, batch_size):
+    warn("run_pipeline is deprecated, use evaluate_pipeline instead", DeprecationWarning, stacklevel=2)
+    return evaluate_pipeline_constant(learning_rate, optimizer, epochs, batch_size)
+
+def evaluate_pipeline_constant(learning_rate, optimizer, epochs, batch_size):
+    """Func for test loading of evaluate_pipeline."""
+    eval_score = np.random.choice([learning_rate, epochs], 1) if optimizer == "a" else 5.0
+    eval_score += batch_size
+    return {"objective_to_minimize": eval_score}
 
 
 if __name__ == "__main__":
@@ -9,11 +24,10 @@
         description="Run NEPS optimization with run_args.yml."
     )
     parser.add_argument("run_args", type=str, help="Path to the YAML configuration file.")
-    parser.add_argument("--run_pipeline", action="store_true")
+    parser.add_argument("--evaluate_pipeline", action="store_true")
     args = parser.parse_args()
 
-    if args.run_pipeline:
-        neps.run(run_args=args.run_args, run_pipeline=run_pipeline_constant)
+    if args.evaluate_pipeline:
+        neps.run(run_args=args.run_args, evaluate_pipeline=evaluate_pipeline_constant)
     else:
         neps.run(run_args=args.run_args)
-
diff --git a/tests/test_yaml_run_args/test_declarative_usage_docs/outsourcing_optimizer.yaml b/tests/test_yaml_run_args/test_declarative_usage_docs/outsourcing_optimizer.yaml
index e8e00aa30..82882247b 100644
--- a/tests/test_yaml_run_args/test_declarative_usage_docs/outsourcing_optimizer.yaml
+++ b/tests/test_yaml_run_args/test_declarative_usage_docs/outsourcing_optimizer.yaml
@@ -1,7 +1,7 @@
 # Optimizer settings from YAML configuration
-run_pipeline:
-  path: tests/test_yaml_run_args/test_declarative_usage_docs/run_pipeline.py
-  name: run_pipeline
+evaluate_pipeline:
+  path: tests/test_yaml_run_args/test_declarative_usage_docs/evaluate_pipeline.py
+  name: evaluate_pipeline
 
 pipeline_space:
   learning_rate:
diff --git a/tests/test_yaml_run_args/test_declarative_usage_docs/outsourcing_pipeline_space.yaml b/tests/test_yaml_run_args/test_declarative_usage_docs/outsourcing_pipeline_space.yaml
index 0810b62e4..980b8b27b 100644
--- a/tests/test_yaml_run_args/test_declarative_usage_docs/outsourcing_pipeline_space.yaml
+++ b/tests/test_yaml_run_args/test_declarative_usage_docs/outsourcing_pipeline_space.yaml
@@ -1,6 +1,6 @@
 # Pipeline space settings from YAML
-run_pipeline:
-  path: tests/test_yaml_run_args/test_declarative_usage_docs/run_pipeline.py
+evaluate_pipeline:
+  path: tests/test_yaml_run_args/test_declarative_usage_docs/evaluate_pipeline.py
   name: run_pipeline_constant
 
 pipeline_space: tests/test_yaml_run_args/test_declarative_usage_docs/pipeline_space.yaml
diff --git a/tests/test_yaml_run_args/test_declarative_usage_docs/pipeline_space.py b/tests/test_yaml_run_args/test_declarative_usage_docs/pipeline_space.py
index 79a034db5..c63c06d29 100644
--- a/tests/test_yaml_run_args/test_declarative_usage_docs/pipeline_space.py
+++ b/tests/test_yaml_run_args/test_declarative_usage_docs/pipeline_space.py
@@ -1,8 +1,10 @@
+from __future__ import annotations
+
 import neps
 
-pipeline_space = dict(
-    learning_rate=neps.FloatParameter(lower=1e-5, upper=1e-1, log=True),
-    epochs=neps.IntegerParameter(lower=5, upper=20, is_fidelity=True),
-    optimizer=neps.CategoricalParameter(choices=["adam", "sgd", "adamw"]),
-    batch_size=neps.ConstantParameter(value=64)
-)
+pipeline_space = {
+    "learning_rate": neps.Float(lower=1e-5, upper=1e-1, log=True),
+    "epochs": neps.Integer(lower=5, upper=20, is_fidelity=True),
+    "optimizer": neps.Categorical(choices=["adam", "sgd", "adamw"]),
+    "batch_size": neps.Constant(value=64)
+}
diff --git a/tests/test_yaml_run_args/test_declarative_usage_docs/pipeline_space.yaml b/tests/test_yaml_run_args/test_declarative_usage_docs/pipeline_space.yaml
index f8c7a2aa3..274d53873 100644
--- a/tests/test_yaml_run_args/test_declarative_usage_docs/pipeline_space.yaml
+++ b/tests/test_yaml_run_args/test_declarative_usage_docs/pipeline_space.yaml
@@ -3,15 +3,15 @@ learning_rate:
   lower: 1e-5
   upper: 1e-1
   log: True  # Log scale for learning rate
-  default: 1e-2
-  default_confidence: "medium"
+  prior: 1e-2
+  prior_confidence: "medium"
 epochs:
   lower: 5
   upper: 20
-  default: 10
+  prior: 10
   is_fidelity: True
 optimizer:
   choices: [adam, sgd, adamw]
-  default: adam
-  default_confidence: low
+  prior: adam
+  prior_confidence: low
 batch_size: 64
diff --git a/tests/test_yaml_run_args/test_declarative_usage_docs/run_pipeline.py b/tests/test_yaml_run_args/test_declarative_usage_docs/run_pipeline.py
deleted file mode 100644
index f44e6e71b..000000000
--- a/tests/test_yaml_run_args/test_declarative_usage_docs/run_pipeline.py
+++ /dev/null
@@ -1,21 +0,0 @@
-import numpy as np
-
-
-def run_pipeline(learning_rate, optimizer, epochs):
-    """func for test loading of run_pipeline"""
-    if optimizer == "a":
-        eval_score = np.random.choice([learning_rate, epochs], 1)
-    else:
-        eval_score = 5.0
-    return {"loss": eval_score}
-
-
-def run_pipeline_constant(learning_rate, optimizer, epochs, batch_size):
-    """func for test loading of run_pipeline"""
-    if optimizer == "a":
-        eval_score = np.random.choice([learning_rate, epochs], 1)
-    else:
-        eval_score = 5.0
-    eval_score += batch_size
-    return {"loss": eval_score}
-
diff --git a/tests/test_yaml_run_args/test_declarative_usage_docs/set_up_optimizer.yaml b/tests/test_yaml_run_args/test_declarative_usage_docs/set_up_optimizer.yaml
index f65af7431..94922d78a 100644
--- a/tests/test_yaml_run_args/test_declarative_usage_docs/set_up_optimizer.yaml
+++ b/tests/test_yaml_run_args/test_declarative_usage_docs/set_up_optimizer.yaml
@@ -1,11 +1,5 @@
 strategy: bayesian_optimization
 # Specific arguments depending on the searcher
 initial_design_size: 7
-surrogate_model: gp
-acquisition: EI
-log_prior_weighted: false
-acquisition_sampler: random
-random_interleave_prob: 0.1
-disable_priors: false
-prior_confidence: high
-sample_default_first: false
+use_priors: true
+sample_prior_first: false
diff --git a/tests/test_yaml_run_args/test_declarative_usage_docs/simple_example_including_run_pipeline.yaml b/tests/test_yaml_run_args/test_declarative_usage_docs/simple_example_including_run_pipeline.yaml
index fbf36873a..a9abe6a75 100644
--- a/tests/test_yaml_run_args/test_declarative_usage_docs/simple_example_including_run_pipeline.yaml
+++ b/tests/test_yaml_run_args/test_declarative_usage_docs/simple_example_including_run_pipeline.yaml
@@ -1,6 +1,6 @@
-# Simple NePS configuration including run_pipeline
-run_pipeline:
-  path: tests/test_yaml_run_args/test_declarative_usage_docs/run_pipeline.py
+# Simple NePS configuration including evaluate_pipeline
+evaluate_pipeline:
+  path: tests/test_yaml_run_args/test_declarative_usage_docs/evaluate_pipeline.py
   name: run_pipeline_constant
 
 pipeline_space:
@@ -16,7 +16,7 @@ pipeline_space:
     choices: [adam, sgd, adamw]
   batch_size: 64
 
-root_directory: "tests_tmpdir/test_declarative_usage_docs/simple_example_including_run_pipeline"
+root_directory: "tests_tmpdir/test_declarative_usage_docs/simple_example_including_evaluate_pipeline"
 max_evaluations_total: 20             # Budget
 
 overwrite_working_directory: True
diff --git a/tests/test_yaml_run_args/test_declarative_usage_docs/test_declarative_usage_docs.py b/tests/test_yaml_run_args/test_declarative_usage_docs/test_declarative_usage_docs.py
index ea2ca6ec1..2ec2d0dc0 100644
--- a/tests/test_yaml_run_args/test_declarative_usage_docs/test_declarative_usage_docs.py
+++ b/tests/test_yaml_run_args/test_declarative_usage_docs/test_declarative_usage_docs.py
@@ -1,9 +1,12 @@
-import pytest
-import os
+from __future__ import annotations
+
 import subprocess
 import sys
+from pathlib import Path
+
+import pytest
 
-BASE_PATH = "tests/test_yaml_run_args/test_declarative_usage_docs/"
+BASE_PATH = Path("tests") / "test_yaml_run_args" / "test_declarative_usage_docs"
 
 
 @pytest.mark.neps_api
@@ -21,35 +24,31 @@
     ],
 )
 def test_run_with_yaml(yaml_file: str) -> None:
-    """
-    Test 'neps.run' with various run_args.yaml settings to simulate loading options
+    """Test 'neps.run' with various run_args.yaml settings to simulate loading options
     for variables.
     """
-    yaml_path = os.path.join(BASE_PATH, yaml_file)
-    assert os.path.exists(yaml_path), f"{yaml_file} does not exist."
+    yaml_path = BASE_PATH / yaml_file
+    assert yaml_path.exists(), f"{yaml_path} does not exist."
 
     try:
-        subprocess.check_call([sys.executable, BASE_PATH + "neps_run.py", yaml_path])
+        subprocess.check_call([sys.executable, BASE_PATH / "neps_run.py", yaml_path])
     except subprocess.CalledProcessError as e:
-        pytest.fail(
-            f"NePS run failed for configuration: {yaml_file} with error: {str(e)}"
-        )
+        pytest.fail(f"NePS run failed for configuration: {yaml_file} with error: {e!s}")
 
 
 @pytest.mark.neps_api
 def test_run_with_yaml_and_run_pipeline() -> None:
-    """
-    Test 'neps.run' with simple_example.yaml as run_args + a run_pipeline that is
+    """Test 'neps.run' with simple_example.yaml as run_args + a run_pipeline that is
     provided separately.
     """
-    yaml_path = os.path.join(BASE_PATH, "simple_example.yaml")
-    assert os.path.exists(yaml_path), f"{yaml_path} does not exist."
+    yaml_path = BASE_PATH / "simple_example.yaml"
+    assert yaml_path.exists(), f"{yaml_path} does not exist."
 
     try:
         subprocess.check_call(
-            [sys.executable, BASE_PATH + "neps_run.py", yaml_path, "--run_pipeline"]
+            [sys.executable, BASE_PATH / "neps_run.py", yaml_path, "--evaluate_pipeline"]
         )
     except subprocess.CalledProcessError as e:
         pytest.fail(
-            f"NePS run failed for configuration: simple_example.yaml with error: {str(e)}"
+            f"NePS run failed for configuration: simple_example.yaml with error: {e!s}"
         )
diff --git a/tests/test_yaml_run_args/test_run_args_by_neps_run/__init__.py b/tests/test_yaml_run_args/test_run_args_by_neps_run/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/tests/test_yaml_run_args/test_run_args_by_neps_run/config.yaml b/tests/test_yaml_run_args/test_run_args_by_neps_run/config.yaml
index 967e2c9cd..a87e31087 100644
--- a/tests/test_yaml_run_args/test_run_args_by_neps_run/config.yaml
+++ b/tests/test_yaml_run_args/test_run_args_by_neps_run/config.yaml
@@ -1,6 +1,6 @@
-run_pipeline:
+evaluate_pipeline:
   path: "tests/test_yaml_run_args/test_run_args_by_neps_run/neps_run.py"
-  name: run_pipeline
+  name: evaluate_pipeline
 pipeline_space: "tests/test_yaml_run_args/test_run_args_by_neps_run/search_space.yaml"
 root_directory: "tests_tmpdir/test_run_args_by_neps_run/results2"
 
diff --git a/tests/test_yaml_run_args/test_run_args_by_neps_run/config_hyperband_mixed_args.yaml b/tests/test_yaml_run_args/test_run_args_by_neps_run/config_hyperband_mixed_args.yaml
index 597cf20ad..3ebfcb5d0 100644
--- a/tests/test_yaml_run_args/test_run_args_by_neps_run/config_hyperband_mixed_args.yaml
+++ b/tests/test_yaml_run_args/test_run_args_by_neps_run/config_hyperband_mixed_args.yaml
@@ -1,8 +1,8 @@
 # args of optimizer from searcher kwargs (neps.run) and from run_args (yaml)
 
-run_pipeline:
+evaluate_pipeline:
   path: "tests/test_yaml_run_args/test_run_args_by_neps_run/neps_run.py"
-  name: run_pipeline
+  name: evaluate_pipeline
 pipeline_space: "tests/test_yaml_run_args/test_run_args_by_neps_run/search_space_with_fidelity.yaml"
 root_directory: "tests_tmpdir/test_run_args_by_neps_run/optimizer_hyperband"
 
diff --git a/tests/test_yaml_run_args/test_run_args_by_neps_run/config_priorband_with_args.yaml b/tests/test_yaml_run_args/test_run_args_by_neps_run/config_priorband_with_args.yaml
index fca3a6d61..2d94740b7 100644
--- a/tests/test_yaml_run_args/test_run_args_by_neps_run/config_priorband_with_args.yaml
+++ b/tests/test_yaml_run_args/test_run_args_by_neps_run/config_priorband_with_args.yaml
@@ -1,6 +1,6 @@
-run_pipeline:
+evaluate_pipeline:
   path: "tests/test_yaml_run_args/test_run_args_by_neps_run/neps_run.py"
-  name: run_pipeline
+  name: evaluate_pipeline
 pipeline_space: "tests/test_yaml_run_args/test_run_args_by_neps_run/search_space_with_priors.yaml"
 root_directory: "tests_tmpdir/test_run_args_by_neps_run/optimizer_priorband"
 
@@ -21,8 +21,8 @@ searcher:
   strategy: "priorband"
   initial_design_type: max_budget
   prior_confidence: medium
-  sample_default_first: true
-  sample_default_at_target: false
+  sample_prior_first: true
+  sample_prior_at_target: false
   prior_weight_type: geometric
   inc_sample_type: mutation
   inc_mutation_rate: 0.2
@@ -34,6 +34,5 @@ searcher:
   surrogate_model: gp
   acquisition: EI
   log_prior_weighted: false
-  acquisition_sampler: mutation
 
 pre_load_hooks: None
diff --git a/tests/test_yaml_run_args/test_run_args_by_neps_run/config_select_bo.yaml b/tests/test_yaml_run_args/test_run_args_by_neps_run/config_select_bo.yaml
index 6d1ca26e0..46913efa0 100644
--- a/tests/test_yaml_run_args/test_run_args_by_neps_run/config_select_bo.yaml
+++ b/tests/test_yaml_run_args/test_run_args_by_neps_run/config_select_bo.yaml
@@ -1,6 +1,6 @@
-run_pipeline:
+evaluate_pipeline:
   path: "tests/test_yaml_run_args/test_run_args_by_neps_run/neps_run.py"
-  name: run_pipeline
+  name: evaluate_pipeline
 pipeline_space: "tests/test_yaml_run_args/test_run_args_by_neps_run/search_space.yaml"
 root_directory: "tests_tmpdir/test_run_args_by_neps_run/optimizer_bo"
 
diff --git a/tests/test_yaml_run_args/test_run_args_by_neps_run/loading_optimizer.yaml b/tests/test_yaml_run_args/test_run_args_by_neps_run/loading_optimizer.yaml
index b44fafe19..731dd20bc 100644
--- a/tests/test_yaml_run_args/test_run_args_by_neps_run/loading_optimizer.yaml
+++ b/tests/test_yaml_run_args/test_run_args_by_neps_run/loading_optimizer.yaml
@@ -1,6 +1,6 @@
-run_pipeline:
+evaluate_pipeline:
   path: "tests/test_yaml_run_args/test_run_args_by_neps_run/neps_run.py"
-  name: "run_pipeline"
+  name: "evaluate_pipeline"
 pipeline_space: "tests/test_yaml_run_args/test_run_args_by_neps_run/search_space.yaml"
 root_directory: "tests_tmpdir/test_run_args_by_neps_run/results1"
 
diff --git a/tests/test_yaml_run_args/test_run_args_by_neps_run/loading_pipeline_space.yaml b/tests/test_yaml_run_args/test_run_args_by_neps_run/loading_pipeline_space.yaml
index fab9349ab..eb298ad0c 100644
--- a/tests/test_yaml_run_args/test_run_args_by_neps_run/loading_pipeline_space.yaml
+++ b/tests/test_yaml_run_args/test_run_args_by_neps_run/loading_pipeline_space.yaml
@@ -1,6 +1,6 @@
-run_pipeline:
+evaluate_pipeline:
   path: "tests/test_yaml_run_args/test_run_args_by_neps_run/neps_run.py"
-  name: run_pipeline
+  name: evaluate_pipeline
 # Test Case
 pipeline_space:
   path: "tests/test_yaml_run_args/test_run_args_by_neps_run/neps_run.py"
diff --git a/tests/test_yaml_run_args/test_run_args_by_neps_run/neps_run.py b/tests/test_yaml_run_args/test_run_args_by_neps_run/neps_run.py
index f2ee0c9c6..2a2775d4f 100644
--- a/tests/test_yaml_run_args/test_run_args_by_neps_run/neps_run.py
+++ b/tests/test_yaml_run_args/test_run_args_by_neps_run/neps_run.py
@@ -1,25 +1,32 @@
+from __future__ import annotations
+
 import argparse
+from warnings import warn
+
 import numpy as np
+
 import neps
 
 
 def run_pipeline(learning_rate, epochs, optimizer, batch_size):
-    """func for test loading of run_pipeline"""
-    if optimizer == "a":
-        eval_score = np.random.choice([learning_rate, epochs], 1)
-    else:
-        eval_score = 5.0
+    """Func for test loading of run_pipeline."""
+    warn("run_pipeline is deprecated, use evaluate_pipeline instead", DeprecationWarning, stacklevel=2)
+    return evaluate_pipeline(learning_rate, epochs, optimizer, batch_size)
+
+def evaluate_pipeline(learning_rate, epochs, optimizer, batch_size):
+    """Func for test loading of evaluate_pipeline."""
+    eval_score = np.random.choice([learning_rate, epochs], 1) if optimizer == "a" else 5.0
     eval_score += batch_size
-    return {"loss": eval_score}
+    return {"objective_to_minimize": eval_score}
 
 
 # For testing the functionality of loading a dictionary from a YAML configuration.
-pipeline_space = dict(
-    learning_rate=neps.FloatParameter(lower=1e-6, upper=1e-1, log=False),
-    epochs=neps.IntegerParameter(lower=1, upper=3, is_fidelity=False),
-    optimizer=neps.CategoricalParameter(choices=["a", "b", "c"]),
-    batch_size=neps.ConstantParameter(64),
-)
+pipeline_space = {
+    "learning_rate": neps.Float(lower=1e-6, upper=1e-1, log=False),
+    "epochs": neps.Integer(lower=1, upper=3, is_fidelity=False),
+    "optimizer": neps.Categorical(choices=["a", "b", "c"]),
+    "batch_size": neps.Constant(64),
+}
 
 if __name__ == "__main__":
     parser = argparse.ArgumentParser(
@@ -31,8 +38,8 @@ def run_pipeline(learning_rate, epochs, optimizer, batch_size):
     args = parser.parse_args()
 
     hyperband_args_optimizer = {"random_interleave_prob": 0.9,
-                                "sample_default_first": False,
-                                "sample_default_at_target": False,
+                                "sample_prior_first": False,
+                                "sample_prior_at_target": False,
                                 "eta": 7}
 
     if args.kwargs_flag:
diff --git a/tests/test_yaml_run_args/test_run_args_by_neps_run/optimizer_yamls/hyperband_searcher_kwargs_yaml_args.yaml b/tests/test_yaml_run_args/test_run_args_by_neps_run/optimizer_yamls/hyperband_searcher_kwargs_yaml_args.yaml
index 4300ca796..cbb031317 100644
--- a/tests/test_yaml_run_args/test_run_args_by_neps_run/optimizer_yamls/hyperband_searcher_kwargs_yaml_args.yaml
+++ b/tests/test_yaml_run_args/test_run_args_by_neps_run/optimizer_yamls/hyperband_searcher_kwargs_yaml_args.yaml
@@ -7,5 +7,5 @@ searcher_args:
   initial_design_type: max_budget
   use_priors: false
   random_interleave_prob: 0.9
-  sample_default_first: false
-  sample_default_at_target: false
+  sample_prior_first: false
+  sample_prior_at_target: false
diff --git a/tests/test_yaml_run_args/test_run_args_by_neps_run/optimizer_yamls/priorband_args_run_args.yaml b/tests/test_yaml_run_args/test_run_args_by_neps_run/optimizer_yamls/priorband_args_run_args.yaml
index 0f3263302..66a4a3dba 100644
--- a/tests/test_yaml_run_args/test_run_args_by_neps_run/optimizer_yamls/priorband_args_run_args.yaml
+++ b/tests/test_yaml_run_args/test_run_args_by_neps_run/optimizer_yamls/priorband_args_run_args.yaml
@@ -7,8 +7,8 @@ searcher_args:
   initial_design_type: max_budget
   prior_confidence: medium
   random_interleave_prob: 0.0
-  sample_default_first: true
-  sample_default_at_target: false
+  sample_prior_first: true
+  sample_prior_at_target: false
   prior_weight_type: geometric
   inc_sample_type: mutation
   inc_mutation_rate: 0.2
@@ -20,4 +20,3 @@ searcher_args:
   surrogate_model: gp
   acquisition: EI
   log_prior_weighted: false
-  acquisition_sampler: mutation
diff --git a/tests/test_yaml_run_args/test_run_args_by_neps_run/optimizer_yamls/select_bo_run_args.yaml b/tests/test_yaml_run_args/test_run_args_by_neps_run/optimizer_yamls/select_bo_run_args.yaml
index af5259d00..3bdb09431 100644
--- a/tests/test_yaml_run_args/test_run_args_by_neps_run/optimizer_yamls/select_bo_run_args.yaml
+++ b/tests/test_yaml_run_args/test_run_args_by_neps_run/optimizer_yamls/select_bo_run_args.yaml
@@ -3,11 +3,8 @@ searcher_alg: bayesian_optimization
 searcher_selection: user-run_args-yaml
 neps_decision_tree: false
 searcher_args:
-  initial_design_size: 10
-  surrogate_model: gp
-  acquisition: EI
-  log_prior_weighted: false
-  acquisition_sampler: mutation
-  random_interleave_prob: 0.0
-  disable_priors: true
-  sample_default_first: false
+  initial_design_size: null
+  use_priors: false
+  use_cost: false
+  sample_prior_first: false
+  device: null
diff --git a/tests/test_yaml_run_args/test_run_args_by_neps_run/search_space_with_priors.yaml b/tests/test_yaml_run_args/test_run_args_by_neps_run/search_space_with_priors.yaml
index ede66cc78..5e8b1d383 100644
--- a/tests/test_yaml_run_args/test_run_args_by_neps_run/search_space_with_priors.yaml
+++ b/tests/test_yaml_run_args/test_run_args_by_neps_run/search_space_with_priors.yaml
@@ -6,10 +6,10 @@ learning_rate:
   lower: 1e-6
   upper: 1e-1
   log: False
-  default: 1e-3
-  default_confidence: "low"
+  prior: 1e-3
+  prior_confidence: "low"
 optimizer:
   choices: ["a", "b", "c"]
-  default: "b"
-  default_confidence: "high"
+  prior: "b"
+  prior_confidence: "high"
 batch_size: 64
diff --git a/tests/test_yaml_run_args/test_run_args_by_neps_run/test_neps_run.py b/tests/test_yaml_run_args/test_run_args_by_neps_run/test_neps_run.py
index 4995a14c5..3ac9dc85e 100644
--- a/tests/test_yaml_run_args/test_run_args_by_neps_run/test_neps_run.py
+++ b/tests/test_yaml_run_args/test_run_args_by_neps_run/test_neps_run.py
@@ -1,10 +1,13 @@
-import pytest
+from __future__ import annotations
+
 import subprocess
-import os
 import sys
+from pathlib import Path
+
+import pytest
 import yaml
 
-BASE_PATH = "tests/test_yaml_run_args/test_run_args_by_neps_run/"
+BASE_PATH = Path("tests") / "test_yaml_run_args" / "test_run_args_by_neps_run"
 
 
 @pytest.mark.neps_api
@@ -18,63 +21,62 @@
             "file_name": "config_select_bo.yaml",
             "check_optimizer": True,
             "optimizer_path": "select_bo_run_args.yaml",
-            "result_path": "tests_tmpdir/test_run_args_by_neps_run/optimizer_bo/.optimizer_info/info.yaml",
+            "result_path": "tests_tmpdir/test_run_args_by_neps_run/optimizer_bo/optimizer_info.yaml",  # noqa: E501
         },
         {
             "file_name": "config_priorband_with_args.yaml",
             "check_optimizer": True,
             "optimizer_path": "priorband_args_run_args.yaml",
-            "result_path": "tests_tmpdir/test_run_args_by_neps_run/optimizer_priorband/.optimizer_info/info.yaml",
+            "result_path": "tests_tmpdir/test_run_args_by_neps_run/optimizer_priorband/optimizer_info.yaml",  # noqa: E501
         },
         {
             "file_name": "config_hyperband_mixed_args.yaml",
             "check_optimizer": True,
             "optimizer_path": "hyperband_searcher_kwargs_yaml_args.yaml",
-            "result_path": "tests_tmpdir/test_run_args_by_neps_run/optimizer_hyperband/.optimizer_info/info.yaml",
+            "result_path": "tests_tmpdir/test_run_args_by_neps_run/optimizer_hyperband/optimizer_info.yaml",  # noqa: E501
             "args": True,
         },
     ],
 )
 def test_run_with_yaml(config: dict) -> None:
     """Test "neps.run" with various run_args.yaml settings to simulate loading options
-    for variables."""
+    for variables.
+    """
     file_name = config["file_name"]
     check_optimizer = config.pop("check_optimizer", False)
-    assert os.path.exists(os.path.join(BASE_PATH, file_name)), (
-        f"{file_name} " f"does not exist."
-    )
+    assert (BASE_PATH / file_name).exists(), f"{file_name} " f"does not exist."
 
     cmd = [
         sys.executable,
-        os.path.join(BASE_PATH, "neps_run.py"),
-        os.path.join(BASE_PATH, file_name),
+        BASE_PATH / "neps_run.py",
+        BASE_PATH / file_name,
     ]
     if "args" in config:
         cmd.append("--kwargs_flag")
 
     try:
-        subprocess.check_call(cmd)
+        subprocess.check_call(cmd)  # noqa: S603
     except subprocess.CalledProcessError:
         pytest.fail(f"NePS run failed for configuration: {file_name}")
 
     if check_optimizer:
-        optimizer_path = config.pop("optimizer_path")
-        result_path = config.pop("result_path")
+        optimizer_path = Path(config.pop("optimizer_path"))
+        result_path = Path(config.pop("result_path"))
         compare_generated_yaml(result_path, optimizer_path)
 
 
-def compare_generated_yaml(result_path, optimizer_path):
-    """compare generated optimizer settings and solution settings"""
-    assert os.path.exists(result_path), "Generated YAML file does not exist."
+def compare_generated_yaml(result_path: Path, optimizer_path: Path) -> None:
+    """Compare generated optimizer settings and solution settings."""
+    assert result_path.exists(), "Generated YAML file does not exist."
 
-    assert os.path.exists(
-        BASE_PATH + "optimizer_yamls/" + optimizer_path
-    ), "Solution YAML file does not exist."
+    assert (
+        BASE_PATH / "optimizer_yamls" / optimizer_path
+    ).exists(), "Solution YAML file does not exist."
 
-    with open(result_path, "r") as gen_file:
+    with result_path.open("r") as gen_file:
         generated_content = yaml.safe_load(gen_file)
 
-    with open(BASE_PATH + "optimizer_yamls/" + optimizer_path, "r") as ref_file:
+    with (BASE_PATH / "optimizer_yamls" / optimizer_path).open("r") as ref_file:
         reference_content = yaml.safe_load(ref_file)
 
     assert (
diff --git a/tests/test_yaml_run_args/test_yaml_run_args.py b/tests/test_yaml_run_args/test_yaml_run_args.py
index 5a0c5d228..b68a07fe9 100644
--- a/tests/test_yaml_run_args/test_yaml_run_args.py
+++ b/tests/test_yaml_run_args/test_yaml_run_args.py
@@ -1,35 +1,50 @@
+from __future__ import annotations
+
+from collections.abc import Callable
+from warnings import warn
+
 import pytest
+
 import neps
-from neps.utils.run_args import get_run_args_from_yaml
 from neps.optimizers.bayesian_optimization.optimizer import BayesianOptimization
-from typing import Union, Callable, Dict, List, Type
+from neps.utils.run_args import get_run_args_from_yaml
 
 BASE_PATH = "tests/test_yaml_run_args/"
-pipeline_space = dict(lr=neps.FloatParameter(lower=1e-3, upper=0.1),
-                      optimizer=neps.CategoricalParameter(choices=["adam", "sgd",
-                                                                   "adamw"]),
-                      epochs=neps.IntegerParameter(lower=1, upper=10),
-                      batch_size=neps.ConstantParameter(value=64))
+pipeline_space = {
+    "lr": neps.Float(lower=1e-3, upper=0.1),
+    "optimizer": neps.Categorical(choices=["adam", "sgd", "adamw"]),
+    "epochs": neps.Integer(lower=1, upper=10),
+    "batch_size": neps.Constant(value=64),
+}
 
 
 def run_pipeline():
-    """func to test loading of run_pipeline"""
+    """Func to test loading of run_pipeline."""
+    warn(
+        "run_pipeline is deprecated, use evaluate_pipeline instead",
+        DeprecationWarning,
+        stacklevel=2,
+    )
+    return evaluate_pipeline()
+
+
+def evaluate_pipeline():
+    """Func to test loading of evaluate_pipeline."""
     return
 
 
 def hook1(sampler):
-    """func to test loading of pre_load_hooks"""
+    """Func to test loading of pre_load_hooks."""
     return sampler
 
 
 def hook2(sampler):
-    """func to test loading of pre_load_hooks"""
+    """Func to test loading of pre_load_hooks."""
     return sampler
 
 
-def check_run_args(yaml_path_run_args: str, expected_output: Dict) -> None:
-    """
-    Validates the loaded NEPS configuration against expected settings.
+def check_run_args(yaml_path_run_args: str, expected_output: dict) -> None:
+    """Validates the loaded NEPS configuration against expected settings.
 
     Loads NEPS configuration settings from a specified YAML file and verifies
     against expected settings, including function objects, dict and classes. Special
@@ -44,10 +59,10 @@ def check_run_args(yaml_path_run_args: str, expected_output: Dict) -> None:
     """
     output = get_run_args_from_yaml(BASE_PATH + yaml_path_run_args)
 
-    def are_functions_equivalent(f1: Union[Callable, List[Callable]],
-                                 f2: Union[Callable, List[Callable]]) -> bool:
-        """
-        Compares functions or lists of functions for equivalence by their bytecode,
+    def are_functions_equivalent(
+        f1: Callable | list[Callable], f2: Callable | list[Callable]
+    ) -> bool:
+        """Compares functions or lists of functions for equivalence by their bytecode,
         useful when identical functions have different memory addresses. This method
         identifies if functions, despite being distinct instances, perform identical
         operations.
@@ -65,14 +80,14 @@ def are_functions_equivalent(f1: Union[Callable, List[Callable]],
                 return False
             return all(
                 f1_item.__code__.co_code == f2_item.__code__.co_code
-                for f1_item, f2_item in zip(f1, f2)
+                for f1_item, f2_item in zip(f1, f2, strict=False)
             )
         return f1.__code__.co_code == f2.__code__.co_code
 
     # Compare keys with a function/list of functions as their values
     # Special because they include a module loading procedure by a path and the name of
     # the function
-    for special_key in ["run_pipeline", "pre_load_hooks"]:
+    for special_key in ["evaluate_pipeline", "pre_load_hooks"]:
         if special_key in expected_output:
             func_expected = expected_output.pop(special_key)
             func_output = output.pop(special_key)
@@ -92,12 +107,12 @@ def are_functions_equivalent(f1: Union[Callable, List[Callable]],
 
 @pytest.mark.neps_api
 @pytest.mark.parametrize(
-    "yaml_path,expected_output",
+    ("yaml_path", "expected_output"),
     [
         (
             "run_args_full.yaml",
             {
-                "run_pipeline": run_pipeline,
+                "evaluate_pipeline": evaluate_pipeline,
                 "pipeline_space": pipeline_space,
                 "root_directory": "test_yaml",
                 "max_evaluations_total": 20,
@@ -108,18 +123,20 @@ def are_functions_equivalent(f1: Union[Callable, List[Callable]],
                 "task_id": 4,
                 "max_evaluations_per_run": 5,
                 "continue_until_max_evaluation_completed": True,
-                "loss_value_on_error": 4.2,
+                "objective_to_minimize_value_on_error": 4.2,
                 "cost_value_on_error": 3.7,
                 "ignore_errors": True,
-                "searcher": {"strategy": "bayesian_optimization",
-                             "initial_design_size": 5, "surrogate_model": "gp"},
+                "searcher": {
+                    "strategy": "bayesian_optimization",
+                    "initial_design_size": 5,
+                },
                 "pre_load_hooks": [hook1, hook2],
             },
         ),
         (
             "run_args_full_same_level.yaml",
             {
-                "run_pipeline": run_pipeline,
+                "evaluate_pipeline": evaluate_pipeline,
                 "pipeline_space": pipeline_space,
                 "root_directory": "test_yaml",
                 "max_evaluations_total": 20,
@@ -130,11 +147,13 @@ def are_functions_equivalent(f1: Union[Callable, List[Callable]],
                 "task_id": 2.0,
                 "max_evaluations_per_run": 5,
                 "continue_until_max_evaluation_completed": True,
-                "loss_value_on_error": 2.4,
+                "objective_to_minimize_value_on_error": 2.4,
                 "cost_value_on_error": 2.1,
                 "ignore_errors": False,
-                "searcher": {"strategy": "bayesian_optimization",
-                             "initial_design_size": 5, "surrogate_model": "gp"},
+                "searcher": {
+                    "strategy": "bayesian_optimization",
+                    "initial_design_size": 5,
+                },
                 "pre_load_hooks": [hook1],
             },
         ),
@@ -147,8 +166,10 @@ def are_functions_equivalent(f1: Union[Callable, List[Callable]],
                 "overwrite_working_directory": True,
                 "post_run_summary": False,
                 "continue_until_max_evaluation_completed": False,
-                "searcher": {"strategy": "bayesian_optimization",
-                             "initial_design_size": 5, "surrogate_model": "gp"},
+                "searcher": {
+                    "strategy": "bayesian_optimization",
+                    "initial_design_size": 5,
+                },
             },
         ),
         (
@@ -164,31 +185,31 @@ def are_functions_equivalent(f1: Union[Callable, List[Callable]],
             },
         ),
         ("run_args_empty.yaml", {}),
-        ("run_args_optional_loading_format.yaml", {
-            "run_pipeline": run_pipeline,
-            "pipeline_space": pipeline_space,
-            "root_directory": "test_yaml",
-            "max_evaluations_total": 20,
-            "max_cost_total": 4.2,
-            "overwrite_working_directory": True,
-            "post_run_summary": False,
-            "development_stage_id": 9,
-            "max_evaluations_per_run": 5,
-            "continue_until_max_evaluation_completed": True,
-            "loss_value_on_error": 2.4,
-            "cost_value_on_error": 2.1,
-            "ignore_errors": False,
-            "searcher": BayesianOptimization,
-            "searcher_kwargs": {'initial_design_size': 5,
-                                             'surrogate_model': 'gp'},
-            "pre_load_hooks": [hook1]
-
-        })
+        (
+            "run_args_optional_loading_format.yaml",
+            {
+                "evaluate_pipeline": evaluate_pipeline,
+                "pipeline_space": pipeline_space,
+                "root_directory": "test_yaml",
+                "max_evaluations_total": 20,
+                "max_cost_total": 4.2,
+                "overwrite_working_directory": True,
+                "post_run_summary": False,
+                "development_stage_id": 9,
+                "max_evaluations_per_run": 5,
+                "continue_until_max_evaluation_completed": True,
+                "objective_to_minimize_value_on_error": 2.4,
+                "cost_value_on_error": 2.1,
+                "ignore_errors": False,
+                "searcher": BayesianOptimization,
+                "searcher_kwargs": {"initial_design_size": 5},
+                "pre_load_hooks": [hook1],
+            },
+        ),
     ],
 )
-def test_yaml_config(yaml_path: str, expected_output: Dict) -> None:
-    """
-    Tests NePS configuration loading from run_args=YAML, comparing expected settings
+def test_yaml_config(yaml_path: str, expected_output: dict) -> None:
+    """Tests NePS configuration loading from run_args=YAML, comparing expected settings
     against loaded ones. Covers hierarchical levels and partial/full of yaml
     dict definitions.
 
@@ -201,7 +222,7 @@ def test_yaml_config(yaml_path: str, expected_output: Dict) -> None:
 
 @pytest.mark.neps_api
 @pytest.mark.parametrize(
-    "yaml_path, expected_exception",
+    ("yaml_path", "expected_exception"),
     [
         ("run_args_invalid_type.yaml", TypeError),
         ("run_args_wrong_path.yaml", ImportError),
@@ -210,9 +231,8 @@ def test_yaml_config(yaml_path: str, expected_output: Dict) -> None:
         ("run_args_key_missing.yaml", KeyError),
     ],
 )
-def test_yaml_failure_cases(yaml_path: str, expected_exception: Type[Exception]) -> None:
-    """
-    Tests for expected exceptions when loading erroneous NePS configurations from YAML.
+def test_yaml_failure_cases(yaml_path: str, expected_exception: type[Exception]) -> None:
+    """Tests for expected exceptions when loading erroneous NePS configurations from YAML.
 
     Each case checks if `get_run_args_from_yaml` raises the correct exception for errors
     like invalid types, missing keys, and incorrect paths in YAML configurations.
diff --git a/tests/test_yaml_search_space/__init__.py b/tests/test_yaml_search_space/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/tests/test_yaml_search_space/correct_config.yaml b/tests/test_yaml_search_space/correct_config.yaml
index 1264127ae..b8c348663 100644
--- a/tests/test_yaml_search_space/correct_config.yaml
+++ b/tests/test_yaml_search_space/correct_config.yaml
@@ -5,7 +5,7 @@ param_float1:
   is_fidelity: off
 
 param_int1:
-  lower: -3
+  lower: 3
   upper: 30
   log: false
   is_fidelity: on
diff --git a/tests/test_yaml_search_space/correct_config_including_priors.yml b/tests/test_yaml_search_space/correct_config_including_priors.yml
index 1c7711103..305cb3aac 100644
--- a/tests/test_yaml_search_space/correct_config_including_priors.yml
+++ b/tests/test_yaml_search_space/correct_config_including_priors.yml
@@ -2,8 +2,8 @@ learning_rate:
   lower: 0.00001
   upper: 0.1
   log: true
-  default: 3.3E-2
-  default_confidence: high
+  prior: 3.3E-2
+  prior_confidence: high
 
 num_epochs:
   lower: 3
@@ -12,7 +12,7 @@ num_epochs:
 
 optimizer:
   choices: [adam, 90E-3, rmsprop]
-  default: 90E-3
-  default_confidence: "medium"
+  prior: 0.09
+  prior_confidence: "medium"
 
 dropout_rate: 1E3
diff --git a/tests/test_yaml_search_space/correct_config_including_types.yaml b/tests/test_yaml_search_space/correct_config_including_types.yaml
index 95a380746..4ae8cb9c7 100644
--- a/tests/test_yaml_search_space/correct_config_including_types.yaml
+++ b/tests/test_yaml_search_space/correct_config_including_types.yaml
@@ -6,7 +6,7 @@ param_float1:
 
 param_int1:
   type: integer
-  lower: -3
+  lower: 3
   upper: 30
   is_fidelity: True
 
diff --git a/tests/test_yaml_search_space/default_not_in_range_config.yaml b/tests/test_yaml_search_space/default_not_in_range_config.yaml
index a78038218..e0ec7d584 100644
--- a/tests/test_yaml_search_space/default_not_in_range_config.yaml
+++ b/tests/test_yaml_search_space/default_not_in_range_config.yaml
@@ -1,6 +1,6 @@
 param_float1:
   lower: 0.00001
   upper: 0.1
-  default: 0.0000000001
+prior: 0.0000000001
   log: false
   is_fidelity: true
diff --git a/tests/test_yaml_search_space/default_value_not_in_choices_config.yaml b/tests/test_yaml_search_space/default_value_not_in_choices_config.yaml
index dbd6d31e5..bc2e76d56 100644
--- a/tests/test_yaml_search_space/default_value_not_in_choices_config.yaml
+++ b/tests/test_yaml_search_space/default_value_not_in_choices_config.yaml
@@ -1,3 +1,3 @@
 cat1:
   choices: ["a", "b", "c"]
-  default: "d"
+  prior: "d"
diff --git a/tests/test_yaml_search_space/incorrect_fidelity_bounds_config.yaml b/tests/test_yaml_search_space/incorrect_fidelity_bounds_config.yaml
new file mode 100644
index 000000000..552c775d2
--- /dev/null
+++ b/tests/test_yaml_search_space/incorrect_fidelity_bounds_config.yaml
@@ -0,0 +1,22 @@
+param_float1:
+  lower: 0.00001
+  upper: 0.1
+  log: TRUE
+  is_fidelity: off
+
+param_int1:
+  lower: -3 # negative fidelity range
+  upper: 30
+  log: false
+  is_fidelity: on
+
+param_int2:
+  type: int
+  lower: 1E2
+  upper: 3e4
+  log: ON
+  is_fidelity: FALSE
+
+param_float2:
+  lower: 3.3e-5
+  upper: 1.5E-1
diff --git a/tests/test_yaml_search_space/not_boolean_type_is_fidelity_cat_config.yaml b/tests/test_yaml_search_space/not_boolean_type_is_fidelity_cat_config.yaml
index 1705900da..344270858 100644
--- a/tests/test_yaml_search_space/not_boolean_type_is_fidelity_cat_config.yaml
+++ b/tests/test_yaml_search_space/not_boolean_type_is_fidelity_cat_config.yaml
@@ -1,4 +1,4 @@
 cat1:
   choices: ["a", "b", "c"]
   is_fidelity: fals
-  default: "c"
+  prior: "c"
diff --git a/tests/test_yaml_search_space/not_boolean_type_is_fidelity_float_config.yaml b/tests/test_yaml_search_space/not_boolean_type_is_fidelity_float_config.yaml
index 91f80f69e..729f61919 100644
--- a/tests/test_yaml_search_space/not_boolean_type_is_fidelity_float_config.yaml
+++ b/tests/test_yaml_search_space/not_boolean_type_is_fidelity_float_config.yaml
@@ -1,6 +1,6 @@
 param_float1:
   lower: 0.00001
   upper: 0.1
-  default: 0.001
+  prior: 0.001
   log: false
   is_fidelity: truee
diff --git a/tests/test_yaml_search_space/not_boolean_type_log_config.yaml b/tests/test_yaml_search_space/not_boolean_type_log_config.yaml
index 97848fad8..c8ad4c47e 100644
--- a/tests/test_yaml_search_space/not_boolean_type_log_config.yaml
+++ b/tests/test_yaml_search_space/not_boolean_type_log_config.yaml
@@ -1,6 +1,6 @@
 param_float1:
   lower: 0.00001
   upper: 0.1
-  default: 0.001
+  prior: 0.001
   log: falsee
   is_fidelity: true
diff --git a/tests/test_yaml_search_space/test_search_space.py b/tests/test_yaml_search_space/test_search_space.py
index b92289f56..ac1a4ed36 100644
--- a/tests/test_yaml_search_space/test_search_space.py
+++ b/tests/test_yaml_search_space/test_search_space.py
@@ -1,13 +1,15 @@
+from __future__ import annotations
+
 from pathlib import Path
 
 import pytest
+
+from neps import Categorical, Constant, Float, Integer
 from neps.search_spaces.search_space import (
     SearchSpaceFromYamlFileError,
     pipeline_space_from_yaml,
 )
 
-from neps import CategoricalParameter, ConstantParameter, FloatParameter, IntegerParameter
-
 BASE_PATH = "tests/test_yaml_search_space/"
 
 
@@ -17,19 +19,19 @@ def test_correct_yaml_file(path):
         """Test the function with a correctly formatted YAML file."""
         pipeline_space = pipeline_space_from_yaml(path)
         assert isinstance(pipeline_space, dict)
-        float1 = FloatParameter(0.00001, 0.1, log=True, is_fidelity=False)
+        float1 = Float(0.00001, 0.1, log=True, is_fidelity=False)
         assert float1.__eq__(pipeline_space["param_float1"]) is True
-        int1 = IntegerParameter(-3, 30, log=False, is_fidelity=True)
+        int1 = Integer(3, 30, log=False, is_fidelity=True)
         assert int1.__eq__(pipeline_space["param_int1"]) is True
-        int2 = IntegerParameter(100, 30000, log=True, is_fidelity=False)
+        int2 = Integer(100, 30000, log=True, is_fidelity=False)
         assert int2.__eq__(pipeline_space["param_int2"]) is True
-        float2 = FloatParameter(3.3e-5, 0.15, log=False)
+        float2 = Float(3.3e-5, 0.15, log=False)
         assert float2.__eq__(pipeline_space["param_float2"]) is True
-        cat1 = CategoricalParameter([2, "sgd", 10e-3])
+        cat1 = Categorical([2, "sgd", 10e-3])
         assert cat1.__eq__(pipeline_space["param_cat"]) is True
-        const1 = ConstantParameter(0.5)
+        const1 = Constant(0.5)
         assert const1.__eq__(pipeline_space["param_const1"]) is True
-        const2 = ConstantParameter(1e3)
+        const2 = Constant(1e3)
         assert const2.__eq__(pipeline_space["param_const2"]) is True
 
     test_correct_yaml_file(BASE_PATH + "correct_config.yaml")
@@ -43,13 +45,15 @@ def test_correct_including_priors_yaml_file():
         BASE_PATH + "correct_config_including_priors.yml"
     )
     assert isinstance(pipeline_space, dict)
-    float1 = FloatParameter(0.00001, 0.1, log=True, is_fidelity=False, default=3.3e-2, default_confidence="high")
+    float1 = Float(
+        0.00001, 0.1, log=True, is_fidelity=False, prior=3.3e-2, prior_confidence="high"
+    )
     assert float1.__eq__(pipeline_space["learning_rate"]) is True
-    int1 = IntegerParameter(3, 30, log=False, is_fidelity=True)
+    int1 = Integer(3, 30, log=False, is_fidelity=True)
     assert int1.__eq__(pipeline_space["num_epochs"]) is True
-    cat1 = CategoricalParameter(["adam", 90e-3, "rmsprop"], default=90e-3, default_confidence="medium")
+    cat1 = Categorical(["adam", 90e-3, "rmsprop"], prior=90e-3, prior_confidence="medium")
     assert cat1.__eq__(pipeline_space["optimizer"]) is True
-    const1 = ConstantParameter(1e3)
+    const1 = Constant(1e3)
     assert const1.__eq__(pipeline_space["dropout_rate"]) is True
 
 
@@ -72,7 +76,8 @@ def test_yaml_file_with_missing_key():
 @pytest.mark.neps_api
 def test_yaml_file_with_inconsistent_types():
     """Test the function with a YAML file having inconsistent types for
-    'lower' and 'upper'."""
+    'lower' and 'upper'.
+    """
     with pytest.raises(SearchSpaceFromYamlFileError) as excinfo:
         pipeline_space_from_yaml(BASE_PATH + "inconsistent_types_config.yml")
     assert str(excinfo.value.exception_type == "TypeError")
@@ -84,16 +89,18 @@ def test_yaml_file_with_inconsistent_types():
 @pytest.mark.neps_api
 def test_yaml_file_including_wrong_types():
     """Test the function with a YAML file that defines the wrong but existing type
-    int to float as an optional argument"""
+    int to float as an optional argument.
+    """
     with pytest.raises(SearchSpaceFromYamlFileError) as excinfo:
         pipeline_space_from_yaml(Path(BASE_PATH + "inconsistent_types_config2.yml"))
-        assert excinfo.value.exception_type == "TypeError"
+    assert excinfo.value.exception_type == "TypeError"
 
 
 @pytest.mark.neps_api
 def test_yaml_file_including_unkown_types():
     """Test the function with a YAML file that defines an unknown type as an optional
-    argument"""
+    argument.
+    """
     with pytest.raises(SearchSpaceFromYamlFileError) as excinfo:
         pipeline_space_from_yaml(BASE_PATH + "config_including_unknown_types.yaml")
     assert excinfo.value.exception_type == "TypeError"
@@ -102,7 +109,8 @@ def test_yaml_file_including_unkown_types():
 @pytest.mark.neps_api
 def test_yaml_file_including_not_allowed_parameter_keys():
     """Test the function with a YAML file that defines an unknown type as an optional
-    argument"""
+    argument.
+    """
     with pytest.raises(SearchSpaceFromYamlFileError) as excinfo:
         pipeline_space_from_yaml(BASE_PATH + "not_allowed_key_config.yml")
     assert excinfo.value.exception_type == "TypeError"
@@ -111,7 +119,8 @@ def test_yaml_file_including_not_allowed_parameter_keys():
 @pytest.mark.neps_api
 def test_yaml_file_default_parameter_not_in_range():
     """Test if the default value outside the specified range is
-    correctly identified and handled."""
+    correctly identified and handled.
+    """
     with pytest.raises(SearchSpaceFromYamlFileError) as excinfo:
         pipeline_space_from_yaml(BASE_PATH + "default_not_in_range_config.yaml")
     assert excinfo.value.exception_type == "ValueError"
@@ -127,8 +136,9 @@ def test_float_log_not_boolean():
 
 @pytest.mark.neps_api
 def test_float_is_fidelity_not_boolean():
-    """Test if an exception is raised when for FloatParameter the 'is_fidelity'
-    attribute is not a boolean."""
+    """Test if an exception is raised when for Float the 'is_fidelity'
+    attribute is not a boolean.
+    """
     with pytest.raises(SearchSpaceFromYamlFileError) as excinfo:
         pipeline_space_from_yaml(
             BASE_PATH + "not_boolean_type_is_fidelity_float_config.yaml"
@@ -139,7 +149,18 @@ def test_float_is_fidelity_not_boolean():
 @pytest.mark.neps_api
 def test_categorical_default_value_not_in_choices():
     """Test if a ValueError is raised when the default value is not in the choices
-    for a CategoricalParameter."""
+    for a Categorical.
+    """
     with pytest.raises(SearchSpaceFromYamlFileError) as excinfo:
         pipeline_space_from_yaml(BASE_PATH + "default_value_not_in_choices_config.yaml")
     assert excinfo.value.exception_type == "ValueError"
+
+
+@pytest.mark.neps_api
+def test_incorrect_fidelity_parameter_bounds():
+    """Test if a ValueError is raised when the bounds of a fidelity parameter are
+    not correctly specified.
+    """
+    with pytest.raises(SearchSpaceFromYamlFileError) as excinfo:
+        pipeline_space_from_yaml(BASE_PATH + "incorrect_fidelity_bounds_config.yaml")
+    assert excinfo.value.exception_type == "ValueError"