From e9cbecdf89a51538437a652c207b18c6accd52e9 Mon Sep 17 00:00:00 2001 From: Bryon Tjanaka <38124174+btjanaka@users.noreply.github.com> Date: Tue, 31 Oct 2023 20:33:44 -0700 Subject: [PATCH 01/19] Add ArrayStore data structure (#395) ## Description The ArrayStore will be part of an upcoming archive refactor in pyribs. ## TODO ## Questions ## Status - [x] I have read the guidelines in [CONTRIBUTING.md](https://github.com/icaros-usc/pyribs/blob/master/CONTRIBUTING.md) - [x] I have formatted my code using `yapf` - [x] I have tested my code by running `pytest` - [x] I have linted my code with `pylint` - [x] I have added a one-line description of my change to the changelog in `HISTORY.md` - [x] This PR is ready to go --- HISTORY.md | 1 + ribs/archives/__init__.py | 3 + ribs/archives/_array_store.py | 505 +++++++++++++++++++++++++++ tests/archives/array_store_test.py | 539 +++++++++++++++++++++++++++++ 4 files changed, 1048 insertions(+) create mode 100644 ribs/archives/_array_store.py create mode 100644 tests/archives/array_store_test.py diff --git a/HISTORY.md b/HISTORY.md index b699eb90b..b0f5fc8a1 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -7,6 +7,7 @@ #### API - Add GradientOperatorEmitter to support OMG-MEGA and OG-MAP-Elites ({pr}`348`) +- Add ArrayStore data structure ({pr}`395`) #### Improvements diff --git a/ribs/archives/__init__.py b/ribs/archives/__init__.py index 7021333da..f9292aca2 100644 --- a/ribs/archives/__init__.py +++ b/ribs/archives/__init__.py @@ -17,6 +17,7 @@ ribs.archives.CVTArchive ribs.archives.SlidingBoundariesArchive ribs.archives.ArchiveBase + ribs.archives.ArrayStore ribs.archives.AddStatus ribs.archives.Elite ribs.archives.EliteBatch @@ -28,6 +29,7 @@ from ribs.archives._archive_base import ArchiveBase from ribs.archives._archive_data_frame import ArchiveDataFrame from ribs.archives._archive_stats import ArchiveStats +from ribs.archives._array_store import ArrayStore from ribs.archives._cqd_score_result import CQDScoreResult from ribs.archives._cvt_archive import CVTArchive from ribs.archives._elite import Elite, EliteBatch @@ -39,6 +41,7 @@ "CVTArchive", "SlidingBoundariesArchive", "ArchiveBase", + "ArrayStore", "AddStatus", "Elite", "ArchiveDataFrame", diff --git a/ribs/archives/_array_store.py b/ribs/archives/_array_store.py new file mode 100644 index 000000000..26e9311c1 --- /dev/null +++ b/ribs/archives/_array_store.py @@ -0,0 +1,505 @@ +"""Provides ArrayStore.""" +import itertools +from collections import OrderedDict +from enum import IntEnum + +import numpy as np +from numpy_groupies import aggregate_nb as aggregate +from pandas import DataFrame + +from ribs._utils import readonly + + +class Update(IntEnum): + """Indices into the updates array in ArrayStore.""" + ADD = 0 + CLEAR = 1 + + +class ArrayStoreIterator: + """An iterator for an ArrayStore's entries.""" + + # pylint: disable = protected-access + + def __init__(self, store): + self.store = store + self.iter_idx = 0 + self.state = store._props["updates"].copy() + + def __iter__(self): + """This is the iterator, so it returns itself.""" + return self + + def __next__(self): + """Returns dicts with each entry's data. + + Raises RuntimeError if the store was modified. + """ + if not np.all(self.state == self.store._props["updates"]): + # This check should go before the StopIteration check because a call + # to clear() would cause the len(self.store) to be 0 and thus + # trigger StopIteration. + raise RuntimeError( + "ArrayStore was modified with add() or clear() during " + "iteration.") + + if self.iter_idx >= len(self.store): + raise StopIteration + + idx = self.store._props["occupied_list"][self.iter_idx] + self.iter_idx += 1 + + d = {"index": idx} + for name, arr in self.store._fields.items(): + d[name] = arr[idx] + + return d + + +class ArrayStore: + """Maintains a set of arrays that share a common dimension. + + The ArrayStore consists of several *fields* of data that are manipulated + simultaneously via batch operations. Each field is a NumPy array with a + dimension of ``(capacity, ...)`` and can be of any type. + + Since the arrays all share a common first dimension, they also share a + common index. For instance, if we :meth:`retrieve` the data at indices ``[0, + 2, 1]``, we would get a dict that contains the objective and measures at + indices 0, 2, and 1, e.g.:: + + { + "objective": [-1, 3, -5], + "measures": [[0, 0], [2, 1], [3, 5]], + } + + The ArrayStore supports several further operations, in particular a flexible + :meth:`add` method that inserts data into the ArrayStore. + + Args: + field_desc (dict): Description of fields in the array store. The + description is a dict mapping from a str to a tuple of ``(shape, + dtype)``. For instance, ``{"objective": ((), np.float32), + "measures": ((10,), np.float32)}`` will create an "objective" field + with shape ``(capacity,)`` and a "measures" field with shape + ``(capacity, 10)``. + capacity (int): Total possible entries in the store. + + Attributes: + _props (dict): Properties that are common to every ArrayStore. + + * "capacity": Maximum number of data entries in the store. + * "occupied": Boolean array of size ``(capacity,)`` indicating + whether each index has data associated with it. + * "n_occupied": Number of data entries currently in the store. + * "occupied_list": Array of size ``(capacity,)`` listing all + occupied indices in the store. Only the first ``n_occupied`` + elements will be valid. + * "updates": Int array recording number of calls to functions that + modified the store. + + _fields (dict): Holds all the arrays with their data. + + Raises: + ValueError: One of the fields in ``field_desc`` has an invalid name + (currently, "index" is the only invalid name). + """ + + def __init__(self, field_desc, capacity): + self._props = { + "capacity": capacity, + "occupied": np.zeros(capacity, dtype=bool), + "n_occupied": 0, + "occupied_list": np.empty(capacity, dtype=int), + "updates": np.array([0, 0]), + } + + self._fields = {} + for name, (field_shape, dtype) in field_desc.items(): + if name == "index": + raise ValueError(f"`{name}` is an invalid field name.") + + if isinstance(field_shape, (int, np.integer)): + field_shape = (field_shape,) + + array_shape = (capacity,) + tuple(field_shape) + self._fields[name] = np.empty(array_shape, dtype) + + def __len__(self): + """Number of occupied indices in the store, i.e., number of indices that + have a corresponding data entry.""" + return self._props["n_occupied"] + + def __iter__(self): + """Iterates over entries in the store. + + When iterated over, this iterator yields dicts mapping from the fields + to the individual entries. For instance, if we had an "objective" field, + one entry might look like ``{"index": 1, "objective": 6.0}`` (similar to + :meth:`retrieve`, the index is included in the output). + + Example: + + :: + + for entry in store: + entry["index"] + entry["objective"] + ... + """ + return ArrayStoreIterator(self) + + @property + def capacity(self): + """int: Maximum number of data entries in the store.""" + return self._props["capacity"] + + @property + def occupied(self): + """numpy.ndarray: Boolean array of size ``(capacity,)`` indicating + whether each index has a data entry.""" + return readonly(self._props["occupied"].view()) + + @property + def occupied_list(self): + """numpy.ndarray: Integer array listing all occupied indices in the + store.""" + return readonly( + self._props["occupied_list"][:self._props["n_occupied"]]) + + def retrieve(self, indices, fields=None): + """Collects the data at the given indices. + + Args: + indices (array-like): List of indices at which to collect data. + fields (array-like of str): List of fields to include. By default, + all fields will be included. In addition to fields in the store, + "index" is also a valid field. + + Returns: + tuple: 2-element tuple consisting of: + + - **occupied**: Array indicating which indices, among those passed + in, have an associated data entry. For instance, if ``indices`` is + ``[0, 1, 2]`` and only index 2 has data, then ``occupied`` will be + ``[False, False, True]``. + - **data**: Dict mapping from the field name to the field data at + the given indices. For instance, if we have an ``objective`` field + and request data at indices ``[4, 1, 0]``, we might get ``data`` + that looks like ``{"index": [4, 1, 0], "objective": [1.5, 6.0, + 2.3]}``. Observe that we also return the indices as an ``index'' + entry in the dict. The keys in this dict can be modified using the + ``fields`` arg. + + Note that if a given index is not marked as occupied, it can have + any data value associated with it. For instance, if index 1 was + not occupied, then the 6.0 returned above should be ignored. + + All data returned by this method will be a readonly copy, i.e., the + data will not update as the store changes. + + Raises: + ValueError: Invalid field name provided. + """ + indices = np.asarray(indices) + occupied = readonly(self._props["occupied"][indices]) + + data = {} + fields = (itertools.chain(["index"], self._fields) + if fields is None else fields) + for name in fields: + # Note that fancy indexing with indices already creates a copy, so + # only `indices` needs to be copied explicitly. + if name == "index": + data[name] = readonly(np.copy(indices)) + continue + if name not in self._fields: + raise ValueError(f"`{name}` is not a field in this ArrayStore.") + data[name] = readonly(self._fields[name][indices]) + + return occupied, data + + def add(self, indices, new_data, extra_args, transforms): + """Adds new data to the store at the given indices. + + The indices, new_data, and add_info are passed through transforms before + adding to the store. The general idea is that these transforms will + gradually modify the indices, new_data, and add_info. For instance, they + can add new fields to new_data (new_data may not initially have all the + same fields as the store). Alternatively, they can filter out duplicate + indices, eg if multiple entries are being inserted at the same index we + can choose one with the best objective. As another example, the + transforms can add stats to the add_info or delete fields from the + add_info. + + The signature of a transform is as follows:: + + def transform(indices, new_data, add_info, extra_args, + occupied, cur_data) -> + (indices, new_data, add_info): + + Transform parameters: + + - **indices** (array-like): Array of indices at which new_data should be + inserted. + - **new_data** (dict): New data for the given indices. Maps from field + name to the array of new data for that field. + - **add_info** (dict): Information to return to the user about the + addition process. Example info includes whether each entry was + ultimately inserted into the store, as well as general statistics. + For the first transform, this will be an empty dict. + - **extra_args** (dict): Additional arguments for the transform. + - **occupied** (array-like): Whether the given indices are currently + occupied. Same as that given by :meth:`retrieve`. + - **cur_data** (dict): Data at the current indices in the store. Same as + that given by :meth:`retrieve`. + + Transform outputs: + + - **indices** (array-like): Modified indices. We do NOT assume that the + final indices will be unique. + - **new_data** (dict): Modified new_data. At the end of the transforms, + it should have the same keys as the store. If ``indices`` is empty, + ``new_data`` will be ignored. + - **add_info** (dict): Modified add_info. + + Args: + indices (array-like): Initial list of indices for addition. + new_data (dict): Initial data for addition. + extra_args (dict): Dict containing additional arguments to pass to + the transforms. The dict is passed directly (i.e., no unpacking + like with kwargs). + transforms (list): List of transforms on the data to be added. + + Returns: + dict: Final ``add_info`` from the transforms. ``new_data`` and + ``indices`` are not returned; rather, the ``new_data`` is added into + the store at ``indices``. + + Raise: + ValueError: The final version of ``new_data`` does not have the same + keys as the fields of this store. + ValueError: The final version of ``new_data`` has fields that have a + different length than ``indices``. + """ + self._props["updates"][Update.ADD] += 1 + + add_info = {} + for transform in transforms: + occupied, cur_data = self.retrieve(indices) + indices, new_data, add_info = transform(indices, new_data, add_info, + extra_args, occupied, + cur_data) + + # Shortcut when there is nothing to add to the store. + if len(indices) == 0: + return add_info + + # Verify that the array shapes match the indices. + for name, arr in new_data.items(): + if len(arr) != len(indices): + raise ValueError( + f"In `new_data`, the array for `{name}` has length " + f"{len(arr)} but should be the same length as indices " + f"({len(indices)})") + + # Verify that new_data ends up with the correct fields after the + # transforms. + if new_data.keys() != self._fields.keys(): + raise ValueError( + f"`new_data` had keys {new_data.keys()} but should have the " + f"same keys as this ArrayStore, i.e., {self._fields.keys()}") + + # Update occupancy data. + unique_indices = np.where(aggregate(indices, 1, func="len") != 0)[0] + cur_occupied = self._props["occupied"][unique_indices] + new_indices = unique_indices[~cur_occupied] + n_occupied = self._props["n_occupied"] + self._props["occupied"][new_indices] = True + self._props["occupied_list"][n_occupied:n_occupied + + len(new_indices)] = new_indices + self._props["n_occupied"] = n_occupied + len(new_indices) + + # Insert into the ArrayStore. Note that we do not assume indices are + # unique. Hence, when updating occupancy data above, we computed the + # unique indices. In contrast, here we let NumPy's default behavior + # handle duplicate indices. + for name, arr in self._fields.items(): + arr[indices] = new_data[name] + + return add_info + + def clear(self): + """Removes all entries from the store.""" + self._props["updates"][Update.CLEAR] += 1 + self._props["n_occupied"] = 0 # Effectively clears occupied_list too. + self._props["occupied"].fill(False) + + def resize(self, capacity): + """Resizes the store to the given capacity. + + Args: + capacity (int): New capacity. + Raises: + ValueError: The new capacity is less than or equal to the current + capacity. + """ + if capacity <= self._props["capacity"]: + raise ValueError( + f"New capacity ({capacity}) must be greater than current " + f"capacity ({self._props['capacity']}.") + + cur_capacity = self._props["capacity"] + self._props["capacity"] = capacity + + cur_occupied = self._props["occupied"] + self._props["occupied"] = np.zeros(capacity, dtype=bool) + self._props["occupied"][:cur_capacity] = cur_occupied + + cur_occupied_list = self._props["occupied_list"] + self._props["occupied_list"] = np.empty(capacity, dtype=int) + self._props["occupied_list"][:cur_capacity] = cur_occupied_list + + for name, cur_arr in self._fields.items(): + new_shape = (capacity,) + cur_arr.shape[1:] + self._fields[name] = np.empty(new_shape, cur_arr.dtype) + self._fields[name][:cur_capacity] = cur_arr + + def as_raw_dict(self): + """Returns the raw data in the ArrayStore as a one-level dictionary. + + To collapse the dict, we prefix each key with ``props.`` or ``fields.``, + so the result looks as follows:: + + { + "props.capacity": ..., + "props.occupied": ..., + ... + "fields.objective": ..., + ... + } + + Returns: + dict: See description above. + """ + d = {} + for prefix, attr in [("props", self._props), ("fields", self._fields)]: + for name, val in attr.items(): + if isinstance(val, np.ndarray): + val = readonly(val.view()) + d[f"{prefix}.{name}"] = val + return d + + @staticmethod + def from_raw_dict(d): + """Loads an ArrayStore from a dict of raw info. + + Args: + d (dict): Dict returned by :meth:`as_raw_dict`. + Returns: + ArrayStore: The new ArrayStore created from d. + Raises: + ValueError: The loaded props dict has the wrong keys. + """ + # pylint: disable = protected-access + + store = ArrayStore({}, 0) # Create an empty store. + + props = { + name[len("props."):]: arr + for name, arr in d.items() + if name.startswith("props.") + } + if props.keys() != store._props.keys(): + raise ValueError( + f"Expected props to have keys {store._props.keys()} but " + f"only found {props.keys()}") + + fields = { + name[len("fields."):]: arr + for name, arr in d.items() + if name.startswith("fields.") + } + + store._props = props + store._fields = fields + + return store + + def as_dict(self, fields=None): + """Creates a dict containing all data entries in the store. + + Equivalent to calling :meth:`retrieve` with :attr:`occupied_list`. + + Args: + fields (array-like of str): See :meth:`retrieve`. + Returns: + dict: See ``data`` in :meth:`retrieve`. ``occupied`` is not returned + since all indices are known to be occupied in this method. + """ + return self.retrieve(self.occupied_list, fields)[1] + + def as_pandas(self, fields=None): + """Creates a DataFrame containing all data entries in the store. + + The returned DataFrame has: + + - 1 column of integers (``np.int32``) for the index, named ``index``. + - For fields that are scalars, a single column with the field name. For + example, ``objective'' would have a single column called + ``objective``. + - For fields that are 1D arrays, multiple columns with the name suffixed + by its index. For instance, if we have a ``measures'' field of length + 10, we create 10 columns with names ``measures_0``, ``measures_1``, + ..., ``measures_9``. + - We do not currently support fields with >1D data. + + In short, the dataframe might look like this: + + +-------+------------+------+-----------+ + | index | measures_0 | ... | objective | + +=======+============+======+===========+ + | | | ... | | + +-------+------------+------+-----------+ + + Args: + fields (array-like of str): List of fields to include. By default, + all fields will be included. In addition to fields in the store, + "index" is also a valid field. + Returns: + pandas.DataFrame: See above. + Raises: + ValueError: Invalid field name provided. + ValueError: There is a field with >1D data. + """ + data = OrderedDict() + indices = self._props["occupied_list"][:self._props["n_occupied"]] + + fields = (itertools.chain(["index"], self._fields) + if fields is None else fields) + + for name in fields: + if name == "index": + data[name] = np.copy(indices) + continue + + if name not in self._fields: + raise ValueError(f"`{name}` is not a field in this ArrayStore.") + + arr = self._fields[name] + if len(arr.shape) == 1: # Scalar entries. + data[name] = arr[indices] + elif len(arr.shape) == 2: # 1D array entries. + arr = arr[indices] + for i in range(arr.shape[1]): + data[f"{name}_{i}"] = arr[:, i] + else: + raise ValueError( + f"Field `{name}` has shape {arr.shape[1:]} -- " + "cannot convert fields with shape >1D to Pandas") + + return DataFrame( + data, + copy=False, # Fancy indexing above copies all fields, and + # indices is explicitly copied. + ) diff --git a/tests/archives/array_store_test.py b/tests/archives/array_store_test.py new file mode 100644 index 000000000..ea4eb2f85 --- /dev/null +++ b/tests/archives/array_store_test.py @@ -0,0 +1,539 @@ +"""Tests for ArrayStore.""" +import numpy as np +import pytest + +from ribs.archives import ArrayStore + +# pylint: disable = redefined-outer-name + + +def test_init_invalid_field(): + with pytest.raises(ValueError): + ArrayStore( + { + "index": ((), np.float32), + }, + 10, + ) + + +@pytest.mark.parametrize("shape", [((), (2,), (10,)), ((), 2, 10)], + ids=["tuple", "int"]) +def test_init(shape): + capacity = 10 + store = ArrayStore( + { + "objective": (shape[0], np.float32), + "measures": (shape[1], np.float32), + "solution": (shape[2], np.float32), + }, + capacity, + ) + + assert len(store) == 0 + assert store.capacity == capacity + assert np.all(~store.occupied) + assert len(store.occupied_list) == 0 + + +@pytest.fixture +def store(): + """Simple ArrayStore for testing.""" + return ArrayStore( + field_desc={ + "objective": ((), np.float32), + "measures": ((2,), np.float32), + "solution": ((10,), np.float32), + }, + capacity=10, + ) + + +def test_add_wrong_keys(store): + with pytest.raises(ValueError): + store.add( + [0, 1], + { + "objective": [1.0, 2.0], + "measures": [[1.0, 2.0], [3.0, 4.0]], + # Missing `solution` key. + }, + {}, # Empty extra_args. + [], # Empty transforms. + ) + + +def test_add_mismatch_indices(store): + with pytest.raises(ValueError): + store.add( + [0, 1], + { + "objective": [1.0, 2.0, 3.0], # Length 3 instead of 2. + "measures": [[1.0, 2.0], [3.0, 4.0]], + "solution": [np.zeros(10), np.ones(10)], + }, + {}, # Empty extra_args. + [], # Empty transforms. + ) + + +def test_simple_add_retrieve_clear(store): + """Add without transforms, retrieve the data, and clear the archive.""" + store.add( + [3, 5], + { + "objective": [1.0, 2.0], + "measures": [[1.0, 2.0], [3.0, 4.0]], + "solution": [np.zeros(10), np.ones(10)], + }, + {}, # Empty extra_args. + [], # Empty transforms. + ) + + assert len(store) == 2 + assert np.all(store.occupied == [0, 0, 0, 1, 0, 1, 0, 0, 0, 0]) + assert np.all(np.sort(store.occupied_list) == [3, 5]) + + occupied, data = store.retrieve([5, 3]) + + assert np.all(occupied == [True, True]) + assert data.keys() == set(["index", "objective", "measures", "solution"]) + assert np.all(data["index"] == [5, 3]) + assert np.all(data["objective"] == [2.0, 1.0]) + assert np.all(data["measures"] == [[3.0, 4.0], [1.0, 2.0]]) + assert np.all(data["solution"] == [np.ones(10), np.zeros(10)]) + + store.clear() + + assert len(store) == 0 + assert np.all(store.occupied == [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]) + assert len(store.occupied_list) == 0 + + +def test_add_duplicate_indices(store): + store.add( + [3, 3], + { + "objective": [1.0, 2.0], + "measures": [[1.0, 2.0], [3.0, 4.0]], + "solution": [np.zeros(10), np.ones(10)], + }, + {}, # Empty extra_args. + [], # Empty transforms. + ) + + assert len(store) == 1 + assert np.all(store.occupied == [0, 0, 0, 1, 0, 0, 0, 0, 0, 0]) + assert np.all(store.occupied_list == [3]) + + +def test_retrieve_duplicate_indices(store): + store.add( + [3], + { + "objective": [2.0], + "measures": [[3.0, 4.0]], + "solution": [np.ones(10)], + }, + {}, # Empty extra_args. + [], # Empty transforms. + ) + + occupied, data = store.retrieve([3, 3]) + + assert np.all(occupied == [True, True]) + assert data.keys() == set(["index", "objective", "measures", "solution"]) + assert np.all(data["index"] == [3, 3]) + assert np.all(data["objective"] == [2.0, 2.0]) + assert np.all(data["measures"] == [[3.0, 4.0], [3.0, 4.0]]) + assert np.all(data["solution"] == [np.ones(10), np.ones(10)]) + + +def test_retrieve_invalid_fields(store): + with pytest.raises(ValueError): + store.retrieve([0, 1], fields=["objective", "foo"]) + + +def test_retrieve_custom_fields(store): + store.add( + [3, 5], + { + "objective": [1.0, 2.0], + "measures": [[1.0, 2.0], [3.0, 4.0]], + "solution": [np.zeros(10), np.ones(10)], + }, + {}, # Empty extra_args. + [], # Empty transforms. + ) + + occupied, data = store.retrieve([5, 3], fields=["index", "objective"]) + + assert np.all(occupied == [True, True]) + assert data.keys() == set(["index", "objective"]) + assert np.all(data["index"] == [5, 3]) + assert np.all(data["objective"] == [2.0, 1.0]) + + +def test_add_simple_transform(store): + + def obj_meas(indices, new_data, add_info, extra_args, occupied, cur_data): + # pylint: disable = unused-argument + new_data["objective"] = np.sum(new_data["solution"], axis=1) + new_data["measures"] = np.asarray(new_data["solution"])[:, :2] + add_info.update(extra_args) + add_info["bar"] = 5 + return indices, new_data, add_info + + add_info = store.add( + [3, 5], + { + "solution": [np.ones(10), 2 * np.ones(10)], + }, + {"foo": 4}, + [obj_meas], + ) + + assert add_info == {"foo": 4, "bar": 5} + + assert len(store) == 2 + assert np.all(store.occupied == [0, 0, 0, 1, 0, 1, 0, 0, 0, 0]) + assert np.all(np.sort(store.occupied_list) == [3, 5]) + + occupied, data = store.retrieve([3, 5]) + + assert np.all(occupied == [True, True]) + assert data.keys() == set(["index", "objective", "measures", "solution"]) + assert np.all(data["index"] == [3, 5]) + assert np.all(data["objective"] == [10.0, 20.0]) + assert np.all(data["measures"] == [[1.0, 1.0], [2.0, 2.0]]) + assert np.all(data["solution"] == [np.ones(10), 2 * np.ones(10)]) + + +def test_add_empty_transform(store): + # new_data should be able to take on arbitrary values when no indices are + # returned, so we make it an empty dict here. + def empty(indices, new_data, add_info, extra_args, occupied, cur_data): + # pylint: disable = unused-argument + return [], {}, {} + + add_info = store.add( + [3, 5], + { + "solution": [np.ones(10), 2 * np.ones(10)], + }, + {"foo": 4}, + [empty], + ) + + assert add_info == {} + + assert len(store) == 0 + assert np.all(~store.occupied) + assert len(store.occupied_list) == 0 + + +def test_resize_bad_capacity(store): + with pytest.raises(ValueError): + store.resize(store.capacity) + + +def test_resize_to_double_capacity(store): + store.add( + [3, 5], + { + "objective": [1.0, 2.0], + "measures": [[1.0, 2.0], [3.0, 4.0]], + "solution": [np.zeros(10), np.ones(10)], + }, + {}, # Empty extra_args. + [], # Empty transforms. + ) + + store.resize(store.capacity * 2) + + assert len(store) == 2 + assert np.all(store.occupied == + [0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]) + assert np.all(np.sort(store.occupied_list) == [3, 5]) + + # Spot-check the fields. + assert np.all(store._fields["objective"][[3, 5]] == [1.0, 2.0]) + + +def test_as_raw_dict(store): + store.add( + [3, 5], + { + "objective": [1.0, 2.0], + "measures": [[1.0, 2.0], [3.0, 4.0]], + "solution": [np.zeros(10), np.ones(10)], + }, + {}, # Empty extra_args. + [], # Empty transforms. + ) + + d = store.as_raw_dict() + + assert d.keys() == set([ + "props.capacity", + "props.occupied", + "props.n_occupied", + "props.occupied_list", + "props.updates", + "fields.objective", + "fields.measures", + "fields.solution", + ]) + assert d["props.capacity"] == 10 + assert np.all(d["props.occupied"] == [0, 0, 0, 1, 0, 1, 0, 0, 0, 0]) + assert d["props.n_occupied"] == 2 + assert np.all(np.sort(d["props.occupied_list"][:2]) == [3, 5]) + assert np.all(d["props.updates"] == [1, 0]) # 1 add, 0 clear. + assert np.all(d["fields.objective"][[3, 5]] == [1.0, 2.0]) + assert np.all(d["fields.measures"][[3, 5]] == [[1.0, 2.0], [3.0, 4.0]]) + assert np.all(d["fields.solution"][[3, 5]] == [np.zeros(10), np.ones(10)]) + + +def test_from_raw_dict_invalid_props(store): + d = store.as_raw_dict() + del d["props.capacity"] + with pytest.raises(ValueError): + ArrayStore.from_raw_dict(d) + + +def test_from_raw_dict(store): + store.add( + [3, 5], + { + "objective": [1.0, 2.0], + "measures": [[1.0, 2.0], [3.0, 4.0]], + "solution": [np.zeros(10), np.ones(10)], + }, + {}, # Empty extra_args. + [], # Empty transforms. + ) + + new_store = ArrayStore.from_raw_dict(store.as_raw_dict()) + + assert len(new_store) == 2 + assert np.all(new_store.occupied == [0, 0, 0, 1, 0, 1, 0, 0, 0, 0]) + assert np.all(np.sort(new_store.occupied_list) == [3, 5]) + + occupied, data = new_store.retrieve([5, 3]) + + assert np.all(occupied == [True, True]) + assert data.keys() == set(["index", "objective", "measures", "solution"]) + assert np.all(data["index"] == [5, 3]) + assert np.all(data["objective"] == [2.0, 1.0]) + assert np.all(data["measures"] == [[3.0, 4.0], [1.0, 2.0]]) + assert np.all(data["solution"] == [np.ones(10), np.zeros(10)]) + + +def test_as_dict(store): + store.add( + [3, 5], + { + "objective": [1.0, 2.0], + "measures": [[1.0, 2.0], [3.0, 4.0]], + "solution": [np.zeros(10), np.ones(10)], + }, + {}, # Empty extra_args. + [], # Empty transforms. + ) + + d = store.as_dict() + + assert d.keys() == set(["index", "objective", "measures", "solution"]) + assert all(len(v) == 2 for v in d.values()) + + row0 = np.concatenate(([3, 1.0, 1.0, 2.0], np.zeros(10))) + row1 = np.concatenate(([5, 2.0, 3.0, 4.0], np.ones(10))) + + flat = [ + np.concatenate(([d["index"][i]], [d["objective"][i]], d["measures"][i], + d["solution"][i])) for i in range(2) + ] + + # Either permutation. + assert (((flat[0] == row0).all() and (flat[1] == row1).all()) or + ((flat[0] == row1).all() and (flat[1] == row0).all())) + + +def test_as_pandas(store): + store.add( + [3, 5], + { + "objective": [1.0, 2.0], + "measures": [[1.0, 2.0], [3.0, 4.0]], + "solution": [np.zeros(10), np.ones(10)], + }, + {}, # Empty extra_args. + [], # Empty transforms. + ) + + df = store.as_pandas() + + assert (df.columns == [ + "index", + "objective", + "measures_0", + "measures_1", + "solution_0", + "solution_1", + "solution_2", + "solution_3", + "solution_4", + "solution_5", + "solution_6", + "solution_7", + "solution_8", + "solution_9", + ]).all() + assert (df.dtypes == [int] + [np.float32] * 13).all() + assert len(df) == 2 + + row0 = np.concatenate(([3, 1.0, 1.0, 2.0], np.zeros(10))) + row1 = np.concatenate(([5, 2.0, 3.0, 4.0], np.ones(10))) + + # Either permutation. + assert (((df.loc[0] == row0).all() and (df.loc[1] == row1).all()) or + ((df.loc[0] == row1).all() and (df.loc[1] == row0).all())) + + +def test_as_pandas_invalid_fields(store): + with pytest.raises(ValueError): + store.as_pandas(fields=["objective", "foo"]) + + +def test_as_pandas_custom_fields(store): + store.add( + [3, 5], + { + "objective": [1.0, 2.0], + "measures": [[1.0, 2.0], [3.0, 4.0]], + "solution": [np.zeros(10), np.ones(10)], + }, + {}, # Empty extra_args. + [], # Empty transforms. + ) + + df = store.as_pandas(fields=["objective", "measures"]) + + assert (df.columns == [ + "objective", + "measures_0", + "measures_1", + ]).all() + assert (df.dtypes == [np.float32] * 3).all() + assert len(df) == 2 + + row0 = [1.0, 1.0, 2.0] + row1 = [2.0, 3.0, 4.0] + + # Either permutation. + assert (((df.loc[0] == row0).all() and (df.loc[1] == row1).all()) or + ((df.loc[0] == row1).all() and (df.loc[1] == row0).all())) + + +def test_as_pandas_2d_fields(store): + store = ArrayStore( + { + "solution": ((10, 10), np.float32), + }, + 10, + ) + with pytest.raises(ValueError): + store.as_pandas() + + +def test_iteration(store): + store.add( + [3], + { + "objective": [1.0], + "measures": [[1.0, 2.0]], + "solution": [np.zeros(10)], + }, + {}, # Empty extra_args. + [], # Empty transforms. + ) + + for entry in store: + assert entry.keys() == set( + ["index", "objective", "measures", "solution"]) + assert np.all(entry["index"] == [3]) + assert np.all(entry["objective"] == [1.0]) + assert np.all(entry["measures"] == [[1.0, 2.0]]) + assert np.all(entry["solution"] == [np.zeros(10)]) + + +def test_add_during_iteration(store): + store.add( + [3], + { + "objective": [1.0], + "measures": [[1.0, 2.0]], + "solution": [np.zeros(10)], + }, + {}, # Empty extra_args. + [], # Empty transforms. + ) + + # Even with just one entry, adding during iteration should still raise an + # error, just like it does in a set. + with pytest.raises(RuntimeError): + for _ in store: + store.add( + [4], + { + "objective": [2.0], + "measures": [[3.0, 4.0]], + "solution": [np.ones(10)], + }, + {}, # Empty extra_args. + [], # Empty transforms. + ) + + +def test_clear_during_iteration(store): + store.add( + [3], + { + "objective": [1.0], + "measures": [[1.0, 2.0]], + "solution": [np.zeros(10)], + }, + {}, # Empty extra_args. + [], # Empty transforms. + ) + + with pytest.raises(RuntimeError): + for _ in store: + store.clear() + + +def test_clear_and_add_during_iteration(store): + store.add( + [3], + { + "objective": [1.0], + "measures": [[1.0, 2.0]], + "solution": [np.zeros(10)], + }, + {}, # Empty extra_args. + [], # Empty transforms. + ) + + with pytest.raises(RuntimeError): + for _ in store: + store.clear() + store.add( + [4], + { + "objective": [2.0], + "measures": [[3.0, 4.0]], + "solution": [np.ones(10)], + }, + {}, # Empty extra_args. + [], # Empty transforms. + ) From 9484717dd2fb3869eb4b39fbbbb28519cb12bcd9 Mon Sep 17 00:00:00 2001 From: Bryon Tjanaka <38124174+btjanaka@users.noreply.github.com> Date: Tue, 31 Oct 2023 21:34:33 -0700 Subject: [PATCH 02/19] Rename `measure_*` columns to `measures_*` in `as_pandas` (#396) ## Description In other parts of the library, fields like `solution` and `objective` are consistently plural or singular. Since the rest of the library uses `measures` (e.g., EliteBatch has `measures_batch`), this PR changes the columns in `as_pandas` to start with `measures` instead of `measure`. ## TODO ## Questions ## Status - [x] I have read the guidelines in [CONTRIBUTING.md](https://github.com/icaros-usc/pyribs/blob/master/CONTRIBUTING.md) - [x] I have formatted my code using `yapf` - [x] I have tested my code by running `pytest` - [x] I have linted my code with `pylint` - [x] I have added a one-line description of my change to the changelog in `HISTORY.md` - [x] This PR is ready to go --- HISTORY.md | 4 +++- ribs/archives/_archive_base.py | 16 +++++++-------- ribs/archives/_archive_data_frame.py | 2 +- tests/archives/archive_base_test.py | 4 ++-- tests/archives/archive_data_frame_test.py | 6 +++--- .../sliding_boundaries_archive_test.py | 2 +- tutorials/tom_cruise_dqd.ipynb | 20 +++++++++---------- 7 files changed, 28 insertions(+), 26 deletions(-) diff --git a/HISTORY.md b/HISTORY.md index b0f5fc8a1..8a033c7dd 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -6,8 +6,10 @@ #### API -- Add GradientOperatorEmitter to support OMG-MEGA and OG-MAP-Elites ({pr}`348`) +- **Backwards-incompatible:** Rename `measure_*` columns to `measures_*` in + `as_pandas` ({pr}`396`) - Add ArrayStore data structure ({pr}`395`) +- Add GradientOperatorEmitter to support OMG-MEGA and OG-MAP-Elites ({pr}`348`) #### Improvements diff --git a/ribs/archives/_archive_base.py b/ribs/archives/_archive_base.py index 7b1705c09..4790d5eef 100644 --- a/ribs/archives/_archive_base.py +++ b/ribs/archives/_archive_base.py @@ -1017,8 +1017,8 @@ def as_pandas(self, include_solutions=True, include_metadata=False): - 1 column of integers (``np.int32``) for the index, named ``index``. See :meth:`index_of` for more info. - - :attr:`measure_dim` columns for the measures, named ``measure_0, - measure_1, ...`` + - :attr:`measure_dim` columns for the measures, named ``measures_0, + measures_1, ...`` - 1 column for the objectives, named ``objective`` - :attr:`solution_dim` columns for the solution parameters, named ``solution_0, solution_1, ...`` @@ -1026,11 +1026,11 @@ def as_pandas(self, include_solutions=True, include_metadata=False): In short, the dataframe looks like this: - +-------+------------+------+------------+-------------+-----+----------+ - | index | measure_0 | ... | objective | solution_0 | ... | metadata | - +=======+============+======+============+=============+=====+==========+ - | | | ... | | | ... | | - +-------+------------+------+------------+-------------+-----+----------+ + +-------+------------+------+-----------+------------+-----+----------+ + | index | measures_0 | ... | objective | solution_0 | ... | metadata | + +=======+============+======+===========+============+=====+==========+ + | | | ... | | | ... | | + +-------+------------+------+-----------+------------+-----+----------+ Compared to :class:`pandas.DataFrame`, the :class:`ArchiveDataFrame` adds methods and attributes which make it easier to manipulate archive @@ -1054,7 +1054,7 @@ def as_pandas(self, include_solutions=True, include_metadata=False): measures_batch = self._measures_arr[indices] for i in range(self._measure_dim): - data[f"measure_{i}"] = measures_batch[:, i] + data[f"measures_{i}"] = measures_batch[:, i] data["objective"] = self._objective_arr[indices] diff --git a/ribs/archives/_archive_data_frame.py b/ribs/archives/_archive_data_frame.py index ba6abf067..9fbff39cf 100644 --- a/ribs/archives/_archive_data_frame.py +++ b/ribs/archives/_archive_data_frame.py @@ -141,7 +141,7 @@ def measures_batch(self): Returns: (n, measure_dim) numpy.ndarray: See above. """ - cols = [c for c in self if c.startswith("measure_")] + cols = [c for c in self if c.startswith("measures_")] return self[cols].to_numpy(copy=True) if cols else None def index_batch(self): diff --git a/tests/archives/archive_base_test.py b/tests/archives/archive_base_test.py index 156de25d3..50b4f73ba 100644 --- a/tests/archives/archive_base_test.py +++ b/tests/archives/archive_base_test.py @@ -392,7 +392,7 @@ def test_as_pandas(name, with_elite, include_solutions, include_metadata, data = get_archive_data(name, dtype) # Set up expected columns and data types. - measure_cols = [f"measure_{i}" for i in range(len(data.measures))] + measure_cols = [f"measures_{i}" for i in range(len(data.measures))] expected_cols = ["index"] + measure_cols + ["objective"] expected_dtypes = [np.int32, *[dtype for _ in measure_cols], dtype] if include_solutions: @@ -432,4 +432,4 @@ def test_as_pandas(name, with_elite, include_solutions, include_metadata, expected_data += list(data.solution) if include_metadata: expected_data.append(data.metadata) - assert (df.loc[0, "measure_0":] == expected_data).all() + assert (df.loc[0, "measures_0":] == expected_data).all() diff --git a/tests/archives/archive_data_frame_test.py b/tests/archives/archive_data_frame_test.py index 70b983c90..291ab9fcc 100644 --- a/tests/archives/archive_data_frame_test.py +++ b/tests/archives/archive_data_frame_test.py @@ -27,7 +27,7 @@ def df(data): return ArchiveDataFrame({ "index": index_batch, "objective": objective_batch, - "measure_0": measures_batch[:, 0], + "measures_0": measures_batch[:, 0], "solution_0": solution_batch[:, 0], "metadata": metadata_batch, }) @@ -55,7 +55,7 @@ def test_batch_methods(data, df): @pytest.mark.parametrize( "remove", - ["index", "objective", "measure_0", "metadata", "solution_0"], + ["index", "objective", "measures_0", "metadata", "solution_0"], ids=["index", "objective", "measures", "metadata", "solutions"], ) def test_batch_methods_can_be_none(df, remove): @@ -65,7 +65,7 @@ def test_batch_methods_can_be_none(df, remove): method = { "solution_0": df.solution_batch, "objective": df.objective_batch, - "measure_0": df.measures_batch, + "measures_0": df.measures_batch, "index": df.index_batch, "metadata": df.metadata_batch, }[remove] diff --git a/tests/archives/sliding_boundaries_archive_test.py b/tests/archives/sliding_boundaries_archive_test.py index 8eb23da9a..11d0a9b1b 100644 --- a/tests/archives/sliding_boundaries_archive_test.py +++ b/tests/archives/sliding_boundaries_archive_test.py @@ -133,7 +133,7 @@ def test_initial_remap(): # Check that all the measures are as expected. pandas_measures = archive.as_pandas(include_solutions=False)[[ - "measure_0", "measure_1" + "measures_0", "measures_1" ]] measures = list(pandas_measures.itertuples(name=None, index=False)) assert np.isclose(sorted(measures), sorted(expected_measures)).all() diff --git a/tutorials/tom_cruise_dqd.ipynb b/tutorials/tom_cruise_dqd.ipynb index 36d37ceb4..a4b365b16 100644 --- a/tutorials/tom_cruise_dqd.ipynb +++ b/tutorials/tom_cruise_dqd.ipynb @@ -929,22 +929,22 @@ "\n", "# Compute the min and max measures for which solutions were found.\n", "measure_bounds = [\n", - " (round(df['measure_0'].min(), 2), round(df['measure_0'].max(), 2)),\n", - " (round(df['measure_1'].min(), 2), round(df['measure_1'].max(), 2)),\n", + " (round(df['measures_0'].min(), 2), round(df['measures_0'].max(), 2)),\n", + " (round(df['measures_1'].min(), 2), round(df['measures_1'].max(), 2)),\n", "]\n", - "delta_measure_0 = round((measure_bounds[0][1] - measure_bounds[0][0])/img_freq[0], 2)\n", - "delta_measure_1 = round((measure_bounds[1][1] - measure_bounds[1][0])/img_freq[1], 2)\n", + "delta_measures_0 = round((measure_bounds[0][1] - measure_bounds[0][0])/img_freq[0], 2)\n", + "delta_measures_1 = round((measure_bounds[1][1] - measure_bounds[1][0])/img_freq[1], 2)\n", "\n", "for col, row in itertools.product(range(img_freq[1]), range(img_freq[0])):\n", " # Compute bounds of a box in measure space.\n", - " measure_0_low = round(measure_bounds[0][0] + delta_measure_0*row, 2)\n", - " measure_0_high = round(measure_bounds[0][0] + delta_measure_0*(row+1), 2)\n", - " measure_1_low = round(measure_bounds[1][0] + delta_measure_1*col, 2)\n", - " measure_1_high = round(measure_bounds[1][0] + delta_measure_1*(col+1), 2)\n", + " measures_0_low = round(measure_bounds[0][0] + delta_measures_0*row, 2)\n", + " measures_0_high = round(measure_bounds[0][0] + delta_measures_0*(row+1), 2)\n", + " measures_1_low = round(measure_bounds[1][0] + delta_measures_1*col, 2)\n", + " measures_1_high = round(measure_bounds[1][0] + delta_measures_1*(col+1), 2)\n", "\n", " # Query for a solution with measures within this box.\n", - " query_string = (f\"{measure_0_low} <= measure_0 & measure_0 <= {measure_0_high} & \"\n", - " f\"{measure_1_low} <= measure_1 & measure_1 <= {measure_1_high}\")\n", + " query_string = (f\"{measures_0_low} <= measures_0 & measures_0 <= {measures_0_high} & \"\n", + " f\"{measures_1_low} <= measures_1 & measures_1 <= {measures_1_high}\")\n", " df_box = df.query(query_string)\n", "\n", " if not df_box.empty:\n", From 22d40cea7d97f11f7e40af879462bf3f53b5dd4c Mon Sep 17 00:00:00 2001 From: Bryon Tjanaka <38124174+btjanaka@users.noreply.github.com> Date: Thu, 2 Nov 2023 17:09:36 -0700 Subject: [PATCH 03/19] Replace Elite and EliteBatch with dicts (#397) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Description This PR removes the Elite and EliteBatch namedtuples from the public API; instead, we create an Elite and EliteBatch namedtuple on the fly in each archive. This allows us to support custom field names in each namedtuple in the future. In creating this PR, I was considering whether to create a custom namedtuple for each archive (when the archive is constructed, similar to how pandas has itertuples), or to use a dict. These were the pros and cons I came up with of dicts over such namedtuples: Pros: - This is clearly backwards-incompatible, so users will know something has broken. The old tuple unpacking behavior will definitely not work here, and calling the attributes also will not work. - Dicts are less finicky than namedtuples, in that there are no attributes to manage. - Dicts are already a common data structure; people already know how to get the keys etc - It is easier to handle retrieving just a couple of fields. In such a case, we can just add the required keys to the dict. In contrast, we would have to set some fields to None in a namedtuple - We no longer will have a name conflict with the index method of namedtuples Cons: - The old unpacking logic will no longer work - Getting attributes will no longer work - Harder to tell which things are batch because it’s not in the name, although I think it’s usually clear from the context ## TODO - [x] Replace all usages - [x] Double check for usage of Elite and EliteBatch ## Questions ## Status - [x] I have read the guidelines in [CONTRIBUTING.md](https://github.com/icaros-usc/pyribs/blob/master/CONTRIBUTING.md) - [x] I have formatted my code using `yapf` - [x] I have tested my code by running `pytest` - [x] I have linted my code with `pylint` - [x] I have added a one-line description of my change to the changelog in `HISTORY.md` - [x] This PR is ready to go --- HISTORY.md | 2 + ribs/archives/__init__.py | 6 +- ribs/archives/_archive_base.py | 240 +++++++++--------- ribs/archives/_archive_data_frame.py | 20 +- ribs/archives/_elite.py | 50 ---- ribs/emitters/_evolution_strategy_emitter.py | 2 +- ribs/emitters/_gaussian_emitter.py | 3 +- .../_gradient_arborescence_emitter.py | 2 +- ribs/emitters/_gradient_operator_emitter.py | 4 +- ribs/emitters/_iso_line_emitter.py | 5 +- tests/archives/archive_base_test.py | 90 +++---- tests/archives/archive_data_frame_test.py | 12 +- tests/archives/cvt_archive_test.py | 10 +- tests/archives/grid_archive_test.py | 24 +- tests/schedulers/scheduler_test.py | 6 +- tutorials/arm_repertoire.ipynb | 12 +- tutorials/fooling_mnist.ipynb | 6 +- tutorials/lunar_lander.ipynb | 44 ++-- 18 files changed, 238 insertions(+), 300 deletions(-) delete mode 100644 ribs/archives/_elite.py diff --git a/HISTORY.md b/HISTORY.md index 8a033c7dd..d875de393 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -6,6 +6,8 @@ #### API +- **Backwards-incompatible:** Replace Elite and EliteBatch with dicts + ({pr}`397`) - **Backwards-incompatible:** Rename `measure_*` columns to `measures_*` in `as_pandas` ({pr}`396`) - Add ArrayStore data structure ({pr}`395`) diff --git a/ribs/archives/__init__.py b/ribs/archives/__init__.py index f9292aca2..8c6a7e757 100644 --- a/ribs/archives/__init__.py +++ b/ribs/archives/__init__.py @@ -8,7 +8,7 @@ The archives in this subpackage are arranged in a one-layer hierarchy, with all archives inheriting from :class:`~ribs.archives.ArchiveBase`. This subpackage also contains several utilities associated with the archives, such as -:class:`~ribs.archives.Elite` and :class:`~ribs.archives.ArchiveDataFrame`. +:class:`~ribs.archives.ArchiveDataFrame`. .. autosummary:: :toctree: @@ -19,8 +19,6 @@ ribs.archives.ArchiveBase ribs.archives.ArrayStore ribs.archives.AddStatus - ribs.archives.Elite - ribs.archives.EliteBatch ribs.archives.ArchiveDataFrame ribs.archives.ArchiveStats ribs.archives.CQDScoreResult @@ -32,7 +30,6 @@ from ribs.archives._array_store import ArrayStore from ribs.archives._cqd_score_result import CQDScoreResult from ribs.archives._cvt_archive import CVTArchive -from ribs.archives._elite import Elite, EliteBatch from ribs.archives._grid_archive import GridArchive from ribs.archives._sliding_boundaries_archive import SlidingBoundariesArchive @@ -43,7 +40,6 @@ "ArchiveBase", "ArrayStore", "AddStatus", - "Elite", "ArchiveDataFrame", "ArchiveStats", "CQDScoreResult", diff --git a/ribs/archives/_archive_base.py b/ribs/archives/_archive_base.py index 4790d5eef..fb5dcb506 100644 --- a/ribs/archives/_archive_base.py +++ b/ribs/archives/_archive_base.py @@ -11,7 +11,6 @@ from ribs.archives._archive_data_frame import ArchiveDataFrame from ribs.archives._archive_stats import ArchiveStats from ribs.archives._cqd_score_result import CQDScoreResult -from ribs.archives._elite import Elite, EliteBatch _ADD_WARNING = (" Note that starting in pyribs 0.5.0, add() takes in a " "batch of solutions unlike in pyribs 0.4.0, where add() " @@ -45,13 +44,13 @@ def __next__(self): idx = self.archive._occupied_indices[self.iter_idx] self.iter_idx += 1 - return Elite( - self.archive._solution_arr[idx], - self.archive._objective_arr[idx], - self.archive._measures_arr[idx], - idx, - self.archive._metadata_arr[idx], - ) + return { + "solution": self.archive._solution_arr[idx], + "objective": self.archive._objective_arr[idx], + "measures": self.archive._measures_arr[idx], + "index": idx, + "metadata": self.archive._metadata_arr[idx], + } class ArchiveBase(ABC): # pylint: disable = too-many-instance-attributes @@ -273,7 +272,7 @@ def stats(self): @property def best_elite(self): - """:class:`Elite`: The elite with the highest objective in the archive. + """dict: The elite with the highest objective in the archive. None if there are no elites in the archive. @@ -299,15 +298,15 @@ def __len__(self): return self._num_occupied def __iter__(self): - """Creates an iterator over the :class:`Elite`'s in the archive. + """Creates an iterator over the elites in the archive. Example: :: for elite in archive: - elite.sol - elite.obj + elite["solution"] + elite["objective"] ... """ return ArchiveIterator(self) @@ -687,13 +686,13 @@ def add(self, if self._stats.obj_max is None or max_obj_insert > self._stats.obj_max: new_obj_max = max_obj_insert - self._best_elite = Elite( - readonly(np.copy(solution_batch_insert[max_idx])), - objective_batch_insert[max_idx], - readonly(np.copy(measures_batch_insert[max_idx])), - index_batch_insert[max_idx], - metadata_batch_insert[max_idx], - ) + self._best_elite = { + "solution": readonly(np.copy(solution_batch_insert[max_idx])), + "objective": objective_batch_insert[max_idx], + "measures": readonly(np.copy(measures_batch_insert[max_idx])), + "index": index_batch_insert[max_idx], + "metadata": metadata_batch_insert[max_idx], + } else: new_obj_max = self._stats.obj_max @@ -811,13 +810,13 @@ def add_single(self, solution, objective, measures, metadata=None): if self._stats.obj_max is None or objective > self._stats.obj_max: new_obj_max = objective - self._best_elite = Elite( - readonly(np.copy(self._solution_arr[index])), - objective, - readonly(np.copy(self._measures_arr[index])), - index, - metadata, - ) + self._best_elite = { + "solution": readonly(np.copy(self._solution_arr[index])), + "objective": objective, + "measures": readonly(np.copy(self._measures_arr[index])), + "index": index, + "metadata": metadata, + } else: new_obj_max = self._stats.obj_max @@ -836,40 +835,33 @@ def retrieve(self, measures_batch): """Retrieves the elites with measures in the same cells as the measures specified. - This method operates in batch, i.e. it takes in a batch of measures and - outputs an :namedtuple:`EliteBatch`. Since :namedtuple:`EliteBatch` is a - namedtuple, it can be unpacked:: - - solution_batch, objective_batch, measures_batch, \\ - index_batch, metadata_batch = archive.retrieve(...) + This method operates in batch, i.e., it takes in a batch of measures and + outputs the batched data for the elites:: - Or the fields may be accessed by name:: + elites = archive.retrieve(...) + elites["solution"] # Shape: (batch_size, solution_dim) + elites["objective"] + elites["measures"] + elites["index"] + elites["metadata"] - elite_batch = archive.retrieve(...) - elite_batch.solution_batch - elite_batch.objective_batch - elite_batch.measures_batch - elite_batch.index_batch - elite_batch.metadata_batch - - If the cell associated with ``measures_batch[i]`` has an elite in it, - then ``elite_batch.solution_batch[i]``, - ``elite_batch.objective_batch[i]``, ``elite_batch.measures_batch[i]``, - ``elite_batch.index_batch[i]``, and ``elite_batch.metadata_batch[i]`` - will be set to the properties of the elite. Note that - ``elite_batch.measures_batch[i]`` may not be equal to - ``measures_batch[i]`` since the measures only need to be in the same - archive cell. + If the cell associated with ``elites["measures"][i]`` has an elite in + it, then ``elites["solution"][i]``, ``elites["objective"][i]``, + ``elites["measures"][i]``, ``elites["index"][i]``, and + ``elites["metadata"][i]`` will be set to the properties of the elite. + Note that ``elites["measures"][i]`` may not be equal to the + ``measures_batch[i]`` passed as an argument, since the measures only + need to be in the same archive cell. If the cell associated with ``measures_batch[i]`` *does not* have any elite in it, then the corresponding outputs are set to empty values -- namely: - * ``elite_batch.solution_batch[i]`` will be an array of NaN - * ``elite_batch.objective_batch[i]`` will be NaN - * ``elite_batch.measures_batch[i]`` will be an array of NaN - * ``elite_batch.index_batch[i]`` will be -1 - * ``elite_batch.metadata_batch[i]`` will be None + * ``elites["solution"][i]`` will be an array of NaN + * ``elites["objective"][i]`` will be NaN + * ``elites["measures"][i]`` will be an array of NaN + * ``elites["index"][i]`` will be -1 + * ``elites["metadata"][i]`` will be None If you need to retrieve a *single* elite associated with some measures, consider using :meth:`retrieve_single`. @@ -878,7 +870,7 @@ def retrieve(self, measures_batch): measures_batch (array-like): (batch_size, :attr:`measure_dim`) array of coordinates in measure space. Returns: - EliteBatch: See above. + dict: See above. Raises: ValueError: ``measures_batch`` is not of shape (batch_size, :attr:`measure_dim`). @@ -893,63 +885,67 @@ def retrieve(self, measures_batch): occupied_batch = self._occupied_arr[index_batch] expanded_occupied_batch = occupied_batch[:, None] - return EliteBatch( - solution_batch=readonly( - # For each occupied_batch[i], this np.where selects - # self._solution_arr[index_batch][i] if occupied_batch[i] is - # True. Otherwise, it uses the alternate value (a solution - # array consisting of np.nan). - np.where( - expanded_occupied_batch, - self._solution_arr[index_batch], - np.full(self._solution_dim, np.nan), - )), - objective_batch=readonly( - np.where( - occupied_batch, - self._objective_arr[index_batch], - # Here the alternative is just a scalar np.nan. - np.nan, - )), - measures_batch=readonly( - np.where( - expanded_occupied_batch, - self._measures_arr[index_batch], - # And here it is a measures array of np.nan. - np.full(self._measure_dim, np.nan), - )), - index_batch=readonly( - np.where( - occupied_batch, - index_batch, - # Indices must be integers, so np.nan would not work, hence - # we use -1. - -1, - )), - metadata_batch=readonly( - np.where( - occupied_batch, - self._metadata_arr[index_batch], - None, - )), - ) + return { + "solution": + readonly( + # For each occupied_batch[i], this np.where selects + # self._solution_arr[index_batch][i] if occupied_batch[i] is + # True. Otherwise, it uses the alternate value (a solution + # array consisting of np.nan). + np.where( + expanded_occupied_batch, + self._solution_arr[index_batch], + np.full(self._solution_dim, np.nan), + )), + "objective": + readonly( + np.where( + occupied_batch, + self._objective_arr[index_batch], + # Here the alternative is just a scalar np.nan. + np.nan, + )), + "measures": + readonly( + np.where( + expanded_occupied_batch, + self._measures_arr[index_batch], + # And here it is a measures array of np.nan. + np.full(self._measure_dim, np.nan), + )), + "index": + readonly( + np.where( + occupied_batch, + index_batch, + # Indices must be integers, so np.nan would not work, + # hence we use -1. + -1, + )), + "metadata": + readonly( + np.where( + occupied_batch, + self._metadata_arr[index_batch], + None, + )), + } def retrieve_single(self, measures): """Retrieves the elite with measures in the same cell as the measures specified. While :meth:`retrieve` takes in a *batch* of measures, this method takes - in the measures for only *one* solution and returns a single - :namedtuple:`Elite`. + in the measures for only *one* solution and returns a dict with single + entries. Args: measures (array-like): (:attr:`measure_dim`,) array of measures. Returns: If there is an elite with measures in the same cell as the measures - specified, then this method returns an :namedtuple:`Elite` where all - the fields hold the info of that elite. Otherwise, this method - returns an :namedtuple:`Elite` filled with the same "empty" values - described in :meth:`retrieve`. + specified, then this method returns dict where all the fields hold + the info of the elite. Otherwise, this method returns a dict filled + with the same "empty" values described in :meth:`retrieve`. Raises: ValueError: ``measures`` is not of shape (:attr:`measure_dim`,). ValueError: ``measures`` has non-finite values (inf or NaN). @@ -958,14 +954,10 @@ def retrieve_single(self, measures): check_1d_shape(measures, "measures", self.measure_dim, "measure_dim") check_finite(measures, "measures") - elite_batch = self.retrieve(measures[None]) - return Elite( - elite_batch.solution_batch[0], - elite_batch.objective_batch[0], - elite_batch.measures_batch[0], - elite_batch.index_batch[0], - elite_batch.metadata_batch[0], - ) + return { + field: arr[0] + for field, arr in self.retrieve(measures[None]).items() + } def sample_elites(self, n): """Randomly samples elites from the archive. @@ -974,23 +966,19 @@ def sample_elites(self, n): sample is done independently, so elites may be repeated in the sample. Additional sampling methods may be supported in the future. - Since :namedtuple:`EliteBatch` is a namedtuple, the result can be - unpacked (here we show how to ignore some of the fields):: - - solution_batch, objective_batch, measures_batch, *_ = \\ - archive.sample_elites(32) + Example: - Or the fields may be accessed by name:: + :: - elite = archive.sample_elites(16) - elite.solution_batch - elite.objective_batch - ... + elites = archive.sample_elites(16) + elites["solution"] # Shape: (16, solution_dim) + elites["objective"] + ... Args: n (int): Number of elites to sample. Returns: - EliteBatch: A batch of elites randomly selected from the archive. + dict: Holds a batch of elites randomly selected from the archive. Raises: IndexError: The archive is empty. """ @@ -1000,13 +988,13 @@ def sample_elites(self, n): random_indices = self._rng.integers(self._num_occupied, size=n) selected_indices = self._occupied_indices[random_indices] - return EliteBatch( - readonly(self._solution_arr[selected_indices]), - readonly(self._objective_arr[selected_indices]), - readonly(self._measures_arr[selected_indices]), - readonly(selected_indices), - readonly(self._metadata_arr[selected_indices]), - ) + return { + "solution": readonly(self._solution_arr[selected_indices]), + "objective": readonly(self._objective_arr[selected_indices]), + "measures": readonly(self._measures_arr[selected_indices]), + "index": readonly(selected_indices), + "metadata": readonly(self._metadata_arr[selected_indices]), + } def as_pandas(self, include_solutions=True, include_metadata=False): """Converts the archive into an :class:`ArchiveDataFrame` (a child class diff --git a/ribs/archives/_archive_data_frame.py b/ribs/archives/_archive_data_frame.py index 9fbff39cf..c1b4608bd 100644 --- a/ribs/archives/_archive_data_frame.py +++ b/ribs/archives/_archive_data_frame.py @@ -2,8 +2,6 @@ import numpy as np import pandas as pd -from ribs.archives._elite import Elite - # Developer Notes: # - The documentation for this class is hacked -- to add new methods, manually # modify the template in docs/_templates/autosummary/class.rst @@ -27,11 +25,11 @@ class ArchiveDataFrame(pd.DataFrame): df = archive.as_pandas() - To iterate through every :class:`Elite`, use:: + To iterate through every elite as a dict, use:: for elite in df.iterelites(): - elite.solution - elite.objective + elite["solution"] # Shape: (solution_dim,) + elite["objective"] ... There are also methods to access the solutions, objectives, etc. of @@ -83,10 +81,10 @@ def _constructor(self): return ArchiveDataFrame def iterelites(self): - """Iterator which outputs every :class:`Elite` in the ArchiveDataFrame. + """Iterator that outputs every elite in the ArchiveDataFrame. Data which is unavailable will be turned into None. For example, if - there are no solution columns, then ``elite.solution`` will be None. + there are no solution columns, then ``elite["solution"]`` will be None. """ solution_batch = self.solution_batch() objective_batch = self.objective_batch() @@ -97,7 +95,13 @@ def iterelites(self): none_array = np.empty(len(self), dtype=object) return map( - lambda e: Elite(e[0], e[1], e[2], e[3], e[4]), + lambda e: { + "solution": e[0], + "objective": e[1], + "measures": e[2], + "index": e[3], + "metadata": e[4], + }, zip( none_array if solution_batch is None else solution_batch, none_array if objective_batch is None else objective_batch, diff --git a/ribs/archives/_elite.py b/ribs/archives/_elite.py deleted file mode 100644 index f57d829b3..000000000 --- a/ribs/archives/_elite.py +++ /dev/null @@ -1,50 +0,0 @@ -"""Provides data structures for storing one or more elites.""" -from typing import NamedTuple - -import numpy as np - - -class Elite(NamedTuple): - """Represents a single elite in an archive. - - Note that since this class is a namedtuple, its fields may be accessed - either by name or by integer indices. - """ - - #: Parameters of the elite's solution. - solution: np.ndarray - - #: Objective value evaluation. - objective: float - - #: 1D array of measure values. - measures: np.ndarray - - #: Index of the elite in the archive (see :meth:`ArchiveBase.index_of`). - index: int - - #: Metadata object for the elite. - metadata: object - - -class EliteBatch(NamedTuple): - """Represents a batch of elites. - - Each field is an array with dimensions ``(batch, ...)``. Refer to - :class:`Elite` for the non-batched version of this class. - """ - - #: Batch of solutions -- shape ``(batch, solution_dim)`` - solution_batch: np.ndarray - - #: Batch of objectives -- shape ``(batch,)`` - objective_batch: np.ndarray - - #: Batch of measures -- shape ``(batch, measure_dim)`` - measures_batch: np.ndarray - - #: Batch of indices -- shape ``(batch,)`` - index_batch: np.ndarray - - #: Batch of metadata -- shape ``(batch,)`` - metadata_batch: np.ndarray diff --git a/ribs/emitters/_evolution_strategy_emitter.py b/ribs/emitters/_evolution_strategy_emitter.py index 913179cfd..7a27604f6 100644 --- a/ribs/emitters/_evolution_strategy_emitter.py +++ b/ribs/emitters/_evolution_strategy_emitter.py @@ -253,7 +253,7 @@ def tell(self, # Check for reset. if (self._opt.check_stop(ranking_values[indices]) or self._check_restart(new_sols)): - new_x0 = self.archive.sample_elites(1).solution_batch[0] + new_x0 = self.archive.sample_elites(1)["solution"][0] self._opt.reset(new_x0) self._ranker.reset(self, self.archive, self._rng) self._restarts += 1 diff --git a/ribs/emitters/_gaussian_emitter.py b/ribs/emitters/_gaussian_emitter.py index 72eff852f..08d4916ca 100644 --- a/ribs/emitters/_gaussian_emitter.py +++ b/ribs/emitters/_gaussian_emitter.py @@ -132,8 +132,7 @@ def ask(self): self.upper_bounds) parents = np.expand_dims(self.x0, axis=0) else: - parents = self.archive.sample_elites( - self._batch_size).solution_batch + parents = self.archive.sample_elites(self._batch_size)["solution"] noise = self._rng.normal( scale=self._sigma, diff --git a/ribs/emitters/_gradient_arborescence_emitter.py b/ribs/emitters/_gradient_arborescence_emitter.py index 4baf3b82b..215274271 100644 --- a/ribs/emitters/_gradient_arborescence_emitter.py +++ b/ribs/emitters/_gradient_arborescence_emitter.py @@ -453,7 +453,7 @@ def tell(self, # Check for reset. if (self._opt.check_stop(ranking_values[indices]) or self._check_restart(new_sols)): - new_coeff = self.archive.sample_elites(1).solution_batch[0] + new_coeff = self.archive.sample_elites(1)["solution"][0] self._grad_opt.reset(new_coeff) self._opt.reset(np.zeros(self._num_coefficients)) self._ranker.reset(self, self.archive, self._rng) diff --git a/ribs/emitters/_gradient_operator_emitter.py b/ribs/emitters/_gradient_operator_emitter.py index e02d68ea4..baec1b592 100644 --- a/ribs/emitters/_gradient_operator_emitter.py +++ b/ribs/emitters/_gradient_operator_emitter.py @@ -208,7 +208,7 @@ def ask_dqd(self): if self.archive.empty: parents = np.expand_dims(self.x0, axis=0) else: - parents = self.archive.sample_elites(self.batch_size).solution_batch + parents = self.archive.sample_elites(self.batch_size)["solution"] if self._use_isolinedd: noise = self._rng.normal( @@ -218,7 +218,7 @@ def ask_dqd(self): ).astype(self.archive.dtype) directions = self.archive.sample_elites( - self._batch_size).solution_batch - parents + self._batch_size)["solution"] - parents line_gaussian = self._rng.normal( loc=0.0, diff --git a/ribs/emitters/_iso_line_emitter.py b/ribs/emitters/_iso_line_emitter.py index c9d59a43a..a9f57549f 100644 --- a/ribs/emitters/_iso_line_emitter.py +++ b/ribs/emitters/_iso_line_emitter.py @@ -152,10 +152,9 @@ def ask(self): if self.archive.empty: solution_batch = np.expand_dims(self._x0, axis=0) + iso_gaussian else: - parents = self.archive.sample_elites( - self._batch_size).solution_batch + parents = self.archive.sample_elites(self._batch_size)["solution"] directions = ( - self.archive.sample_elites(self._batch_size).solution_batch - + self.archive.sample_elites(self._batch_size)["solution"] - parents) line_gaussian = self._rng.normal( scale=self._line_sigma, diff --git a/tests/archives/archive_base_test.py b/tests/archives/archive_base_test.py index 50b4f73ba..40a880fc6 100644 --- a/tests/archives/archive_base_test.py +++ b/tests/archives/archive_base_test.py @@ -38,12 +38,12 @@ def test_invalid_dtype(): def test_iteration(): data = get_archive_data("GridArchive") for elite in data.archive_with_elite: - assert np.isclose(elite.solution, data.solution).all() - assert np.isclose(elite.objective, data.objective) - assert np.isclose(elite.measures, data.measures).all() - assert elite.index == data.archive_with_elite.grid_to_int_index( + assert np.isclose(elite["solution"], data.solution).all() + assert np.isclose(elite["objective"], data.objective) + assert np.isclose(elite["measures"], data.measures).all() + assert elite["index"] == data.archive_with_elite.grid_to_int_index( [data.grid_indices])[0] - assert elite.metadata == data.metadata + assert elite["metadata"] == data.metadata def test_add_during_iteration(add_mode): @@ -175,9 +175,9 @@ def test_best_elite(add_mode): else: archive.add([[1, 2, 3]], [1.0], [[0, 0]]) - assert np.isclose(archive.best_elite.solution, [1, 2, 3]).all() - assert np.isclose(archive.best_elite.objective, 1.0) - assert np.isclose(archive.best_elite.measures, [0, 0]).all() + assert np.isclose(archive.best_elite["solution"], [1, 2, 3]).all() + assert np.isclose(archive.best_elite["objective"], 1.0) + assert np.isclose(archive.best_elite["measures"], [0, 0]).all() assert np.isclose(archive.stats.obj_max, 1.0) # Add an elite into the same cell as the previous elite -- best_elite should @@ -187,9 +187,9 @@ def test_best_elite(add_mode): else: archive.add([[4, 5, 6]], [2.0], [[0, 0]]) - assert np.isclose(archive.best_elite.solution, [4, 5, 6]).all() - assert np.isclose(archive.best_elite.objective, 2.0).all() - assert np.isclose(archive.best_elite.measures, [0, 0]).all() + assert np.isclose(archive.best_elite["solution"], [4, 5, 6]).all() + assert np.isclose(archive.best_elite["objective"], 2.0).all() + assert np.isclose(archive.best_elite["measures"], [0, 0]).all() assert np.isclose(archive.stats.obj_max, 2.0) @@ -208,9 +208,9 @@ def test_best_elite_with_threshold(add_mode): # Threshold should now be 0.1 * 1 + (1 - 0.1) * 0. - assert np.isclose(archive.best_elite.solution, [1, 2, 3]).all() - assert np.isclose(archive.best_elite.objective, 1.0).all() - assert np.isclose(archive.best_elite.measures, [0, 0]).all() + assert np.isclose(archive.best_elite["solution"], [1, 2, 3]).all() + assert np.isclose(archive.best_elite["objective"], 1.0).all() + assert np.isclose(archive.best_elite["measures"], [0, 0]).all() assert np.isclose(archive.stats.obj_max, 1.0) # Add an elite with lower objective value than best elite but higher @@ -222,9 +222,9 @@ def test_best_elite_with_threshold(add_mode): # Best elite remains the same even though this is a non-elitist archive and # the best elite is no longer in the archive. - assert np.isclose(archive.best_elite.solution, [1, 2, 3]).all() - assert np.isclose(archive.best_elite.objective, 1.0) - assert np.isclose(archive.best_elite.measures, [0, 0]).all() + assert np.isclose(archive.best_elite["solution"], [1, 2, 3]).all() + assert np.isclose(archive.best_elite["objective"], 1.0) + assert np.isclose(archive.best_elite["measures"], [0, 0]).all() assert np.isclose(archive.stats.obj_max, 1.0) @@ -320,21 +320,21 @@ def test_basic_stats(data): def test_retrieve_gets_correct_elite(data): - elite_batch = data.archive_with_elite.retrieve([data.measures]) - assert np.all(elite_batch.solution_batch[0] == data.solution) - assert elite_batch.objective_batch[0] == data.objective - assert np.all(elite_batch.measures_batch[0] == data.measures) - # Avoid checking elite_batch.idx since the meaning varies by archive. - assert elite_batch.metadata_batch[0] == data.metadata + elites = data.archive_with_elite.retrieve([data.measures]) + assert np.all(elites["solution"][0] == data.solution) + assert elites["objective"][0] == data.objective + assert np.all(elites["measures"][0] == data.measures) + # Avoid checking elites["index"] since the meaning varies by archive. + assert elites["metadata"][0] == data.metadata def test_retrieve_empty_values(data): - elite_batch = data.archive.retrieve([data.measures]) - assert np.all(np.isnan(elite_batch.solution_batch[0])) - assert np.isnan(elite_batch.objective_batch) - assert np.all(np.isnan(elite_batch.measures_batch[0])) - assert elite_batch.index_batch[0] == -1 - assert elite_batch.metadata_batch[0] is None + elites = data.archive.retrieve([data.measures]) + assert np.all(np.isnan(elites["solution"][0])) + assert np.isnan(elites["objective"]) + assert np.all(np.isnan(elites["measures"][0])) + assert elites["index"][0] == -1 + assert elites["metadata"][0] is None def test_retrieve_wrong_shape(data): @@ -344,20 +344,20 @@ def test_retrieve_wrong_shape(data): def test_retrieve_single_gets_correct_elite(data): elite = data.archive_with_elite.retrieve_single(data.measures) - assert np.all(elite.solution == data.solution) - assert elite.objective == data.objective - assert np.all(elite.measures == data.measures) - # Avoid checking elite.idx since the meaning varies by archive. - assert elite.metadata == data.metadata + assert np.all(elite["solution"] == data.solution) + assert elite["objective"] == data.objective + assert np.all(elite["measures"] == data.measures) + # Avoid checking elite["index"] since the meaning varies by archive. + assert elite["metadata"] == data.metadata def test_retrieve_single_empty_values(data): elite = data.archive.retrieve_single(data.measures) - assert np.all(np.isnan(elite.solution)) - assert np.isnan(elite.objective) - assert np.all(np.isnan(elite.measures)) - assert elite.index == -1 - assert elite.metadata is None + assert np.all(np.isnan(elite["solution"])) + assert np.isnan(elite["objective"]) + assert np.all(np.isnan(elite["measures"])) + assert elite["index"] == -1 + assert elite["metadata"] is None def test_retrieve_single_wrong_shape(data): @@ -366,12 +366,12 @@ def test_retrieve_single_wrong_shape(data): def test_sample_elites_gets_single_elite(data): - elite_batch = data.archive_with_elite.sample_elites(2) - assert np.all(elite_batch.solution_batch == data.solution) - assert np.all(elite_batch.objective_batch == data.objective) - assert np.all(elite_batch.measures_batch == data.measures) - # Avoid checking elite.idx since the meaning varies by archive. - assert np.all(elite_batch.metadata_batch == data.metadata) + elites = data.archive_with_elite.sample_elites(2) + assert np.all(elites["solution"] == data.solution) + assert np.all(elites["objective"] == data.objective) + assert np.all(elites["measures"] == data.measures) + # Avoid checking elite["index"] since the meaning varies by archive. + assert np.all(elites["metadata"] == data.metadata) def test_sample_elites_fails_when_empty(data): diff --git a/tests/archives/archive_data_frame_test.py b/tests/archives/archive_data_frame_test.py index 291ab9fcc..d83f4be0a 100644 --- a/tests/archives/archive_data_frame_test.py +++ b/tests/archives/archive_data_frame_test.py @@ -21,7 +21,7 @@ def data(): @pytest.fixture def df(data): - """Mimics the ArchiveDataFrame which an as_pandas method would generate.""" + """Mimics the ArchiveDataFrame that an as_pandas method would generate.""" (solution_batch, objective_batch, measures_batch, index_batch, metadata_batch) = data return ArchiveDataFrame({ @@ -36,11 +36,11 @@ def df(data): def test_iterelites(data, df): for elite, (solution, objective, measures, index, metadata) in zip(df.iterelites(), zip(*data)): - assert np.isclose(elite.solution, solution).all() - assert np.isclose(elite.objective, objective) - assert np.isclose(elite.measures, measures).all() - assert elite.index == index - assert elite.metadata == metadata + assert np.isclose(elite["solution"], solution).all() + assert np.isclose(elite["objective"], objective) + assert np.isclose(elite["measures"], measures).all() + assert elite["index"] == index + assert elite["metadata"] == metadata def test_batch_methods(data, df): diff --git a/tests/archives/cvt_archive_test.py b/tests/archives/cvt_archive_test.py index 8972d6de0..0a5fcedae 100644 --- a/tests/archives/cvt_archive_test.py +++ b/tests/archives/cvt_archive_test.py @@ -23,11 +23,11 @@ def assert_archive_elite(archive, solution, objective, measures, centroid, """Asserts that the archive has one specific elite.""" assert len(archive) == 1 elite = list(archive)[0] - assert np.isclose(elite.solution, solution).all() - assert np.isclose(elite.objective, objective).all() - assert np.isclose(elite.measures, measures).all() - assert np.isclose(archive.centroids[elite.index], centroid).all() - assert elite.metadata == metadata + assert np.isclose(elite["solution"], solution).all() + assert np.isclose(elite["objective"], objective).all() + assert np.isclose(elite["measures"], measures).all() + assert np.isclose(archive.centroids[elite["index"]], centroid).all() + assert elite["metadata"] == metadata def test_samples_bad_shape(use_kd_tree): diff --git a/tests/archives/grid_archive_test.py b/tests/archives/grid_archive_test.py index df5d87876..3482c1ff7 100644 --- a/tests/archives/grid_archive_test.py +++ b/tests/archives/grid_archive_test.py @@ -20,14 +20,14 @@ def assert_archive_elite(archive, solution, objective, measures, grid_indices, """Asserts that the archive has one specific elite.""" assert len(archive) == 1 elite = list(archive)[0] - assert np.isclose(elite.solution, solution).all() - assert np.isclose(elite.objective, objective).all() - assert np.isclose(elite.measures, measures).all() - assert elite.index == archive.grid_to_int_index([grid_indices]) - assert elite.metadata == metadata + assert np.isclose(elite["solution"], solution).all() + assert np.isclose(elite["objective"], objective).all() + assert np.isclose(elite["measures"], measures).all() + assert elite["index"] == archive.grid_to_int_index([grid_indices]) + assert elite["metadata"] == metadata -def assert_archive_elite_batch( +def assert_archive_elites( archive, batch_size, solution_batch=None, @@ -316,7 +316,7 @@ def test_add_batch_all_new(data): assert (status_batch == 2).all() assert np.isclose(value_batch, [0, 0, 0, 1]).all() - assert_archive_elite_batch( + assert_archive_elites( archive=data.archive, batch_size=3, solution_batch=[[1, 2, 3]] * 3, @@ -339,7 +339,7 @@ def test_add_batch_none_inserted(data): assert (status_batch == 0).all() assert np.isclose(value_batch, -1.0).all() - assert_archive_elite_batch( + assert_archive_elites( archive=data.archive_with_elite, batch_size=1, solution_batch=[data.solution], @@ -362,7 +362,7 @@ def test_add_batch_with_improvement(data): assert (status_batch == 1).all() assert np.isclose(value_batch, 1.0).all() - assert_archive_elite_batch( + assert_archive_elites( archive=data.archive_with_elite, batch_size=1, solution_batch=[[1, 2, 3]], @@ -402,7 +402,7 @@ def test_add_batch_mixed_statuses(data): assert (status_batch == [0, 0, 1, 1, 2, 2]).all() assert np.isclose(value_batch, [-1, -2, 1, 2, 1, 2]).all() - assert_archive_elite_batch( + assert_archive_elites( archive=data.archive_with_elite, batch_size=2, solution_batch=[[1, 2, 3]] * 2, @@ -439,7 +439,7 @@ def test_add_batch_first_solution_wins_in_ties(data): assert (status_batch == [1, 1, 2, 2]).all() assert np.isclose(value_batch, [1, 1, 3, 3]).all() - assert_archive_elite_batch( + assert_archive_elites( archive=data.archive_with_elite, batch_size=2, # The first and third solution should be inserted since they come first. @@ -471,7 +471,7 @@ def test_add_batch_not_inserted_if_below_threshold_min(): assert (status_batch == [0, 0, 2, 2]).all() assert np.isclose(value_batch, [-10.0, -10.0, 20.0, 20.0]).all() - assert_archive_elite_batch( + assert_archive_elites( archive=archive, batch_size=1, solution_batch=[[1, 2, 3]], diff --git a/tests/schedulers/scheduler_test.py b/tests/schedulers/scheduler_test.py index adda05a86..539f0dc33 100644 --- a/tests/schedulers/scheduler_test.py +++ b/tests/schedulers/scheduler_test.py @@ -6,7 +6,7 @@ from ribs.emitters import GaussianEmitter from ribs.schedulers import BanditScheduler, Scheduler -from ..archives.grid_archive_test import assert_archive_elite_batch +from ..archives.grid_archive_test import assert_archive_elites # pylint: disable = redefined-outer-name @@ -162,7 +162,7 @@ def test_tell_inserts_solutions_into_archive(add_mode, tell_metadata): # the archive. scheduler.tell(np.ones(batch_size), measures_batch, metadata) - assert_archive_elite_batch( + assert_archive_elites( archive=scheduler.archive, batch_size=batch_size, objective_batch=np.ones(batch_size), @@ -195,7 +195,7 @@ def test_tell_inserts_solutions_with_multiple_emitters(add_mode, tell_metadata): _ = scheduler.ask() scheduler.tell(np.ones(batch_size), measures_batch, metadata) - assert_archive_elite_batch( + assert_archive_elites( archive=scheduler.archive, batch_size=batch_size, objective_batch=np.ones(batch_size), diff --git a/tutorials/arm_repertoire.ipynb b/tutorials/arm_repertoire.ipynb index bb4cfcd03..07f0de50b 100644 --- a/tutorials/arm_repertoire.ipynb +++ b/tutorials/arm_repertoire.ipynb @@ -417,10 +417,10 @@ "source": [ "fig, ax = plt.subplots(2, 2, figsize=(8, 8))\n", "ax = ax.ravel()\n", - "elite_batch = archive.sample_elites(len(ax))\n", + "elites = archive.sample_elites(len(ax))\n", "for i in range(len(ax)):\n", - " visualize(elite_batch.solution_batch[i], link_lengths,\n", - " elite_batch.objective_batch[i], ax[i])" + " visualize(elites[\"solution\"][i], link_lengths,\n", + " elites[\"objective\"][i], ax[i])" ] }, { @@ -451,8 +451,8 @@ "source": [ "elite = archive.retrieve_single([0, 0])\n", "_, ax = plt.subplots()\n", - "if elite.solution is not None: # This is None if there is no solution for [0,0].\n", - " visualize(elite.solution, link_lengths, elite.objective, ax)" + "if elite[\"solution\"] is not None: # This is None if there is no solution for [0,0].\n", + " visualize(elite[\"solution\"], link_lengths, elite[\"objective\"], ax)" ] }, { @@ -511,7 +511,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.12" + "version": "3.8.17" } }, "nbformat": 4, diff --git a/tutorials/fooling_mnist.ipynb b/tutorials/fooling_mnist.ipynb index dea826d7c..11900cb54 100644 --- a/tutorials/fooling_mnist.ipynb +++ b/tutorials/fooling_mnist.ipynb @@ -303,13 +303,13 @@ "\n", "# Display images.\n", "for elite in archive:\n", - " digit = elite.index\n", + " digit = elite[\"index\"]\n", " found.add(digit)\n", "\n", " # No need to normalize image because we want to see the original.\n", - " ax[digit].imshow(elite.solution.reshape(28, 28), cmap=\"Greys\")\n", + " ax[digit].imshow(elite[\"solution\"].reshape(28, 28), cmap=\"Greys\")\n", " ax[digit].set_axis_off()\n", - " ax[digit].set_title(f\"{digit} | Score: {elite.objective:.3f}\", pad=8)\n", + " ax[digit].set_title(f\"{digit} | Score: {elite['objective']:.3f}\", pad=8)\n", "\n", "# Mark digits that we did not generate images for.\n", "for digit in range(10):\n", diff --git a/tutorials/lunar_lander.ipynb b/tutorials/lunar_lander.ipynb index f816d2292..87ffaf4e3 100644 --- a/tutorials/lunar_lander.ipynb +++ b/tutorials/lunar_lander.ipynb @@ -792,10 +792,10 @@ "elite = archive.retrieve_single([-0.4, -0.10])\n", "# NaN objective indicates the solution could not be retrieved because there was\n", "# no elite in the corresponding cell.\n", - "if not np.isnan(elite.objective):\n", - " print(f\"Objective: {elite.objective}\")\n", - " print(f\"Measures: (x-pos: {elite.measures[0]}, y-vel: {elite.measures[1]})\")\n", - " display_video(elite.solution)" + "if not np.isnan(elite[\"objective\"]):\n", + " print(f\"Objective: {elite['objective']}\")\n", + " print(f\"Measures: (x-pos: {elite['measures'][0]}, y-vel: {elite['measures'][1]})\")\n", + " display_video(elite[\"solution\"])" ] }, { @@ -808,7 +808,7 @@ "\n", "**Note: Batch and Single Methods**\n", "\n", - "> `retrieve_single` returns an [Elite](https://docs.pyribs.org/en/latest/api/ribs.archives.Elite.html) object given a single `measures` array. Meanwhile, the [`retrieve`](https://docs.pyribs.org/en/latest/api/ribs.archives.GridArchive.html#ribs.archives.GridArchive.retrieve) method takes in a _batch_ of measures (named `measures_batch`) and returns an [EliteBatch](https://docs.pyribs.org/en/latest/api/ribs.archives.EliteBatch.html) object. Several archive methods in pyribs follow a similar pattern of having a batch and single version, e.g., [`add`](https://docs.pyribs.org/en/latest/api/ribs.archives.GridArchive.html#ribs.archives.GridArchive.add) and [`add_single`](https://docs.pyribs.org/en/latest/api/ribs.archives.GridArchive.html#ribs.archives.GridArchive.add_single)." + "> `retrieve_single` returns an elite represented as a dict, given a single `measures` array. Meanwhile, the [`retrieve`](https://docs.pyribs.org/en/latest/api/ribs.archives.GridArchive.html#ribs.archives.GridArchive.retrieve) method takes in a _batch_ of measures (named `measures_batch`) and returns a dict holding batches of data. Several archive methods in pyribs follow a similar pattern of having a batch and single version, e.g., [`add`](https://docs.pyribs.org/en/latest/api/ribs.archives.GridArchive.html#ribs.archives.GridArchive.add) and [`add_single`](https://docs.pyribs.org/en/latest/api/ribs.archives.GridArchive.html#ribs.archives.GridArchive.add_single)." ] }, { @@ -849,10 +849,10 @@ ], "source": [ "elite = archive.retrieve_single([0.6, -0.10])\n", - "if not np.isnan(elite.objective):\n", - " print(f\"Objective: {elite.objective}\")\n", - " print(f\"Measures: (x-pos: {elite.measures[0]}, y-vel: {elite.measures[1]})\")\n", - " display_video(elite.solution)" + "if not np.isnan(elite[\"objective\"]):\n", + " print(f\"Objective: {elite['objective']}\")\n", + " print(f\"Measures: (x-pos: {elite['measures'][0]}, y-vel: {elite['measures'][1]})\")\n", + " display_video(elite[\"solution\"])" ] }, { @@ -902,10 +902,10 @@ ], "source": [ "elite = archive.retrieve_single([0.0, -0.10])\n", - "if not np.isnan(elite.objective):\n", - " print(f\"Objective: {elite.objective}\")\n", - " print(f\"Measures: (x-pos: {elite.measures[0]}, y-vel: {elite.measures[1]})\")\n", - " display_video(elite.solution)" + "if not np.isnan(elite[\"objective\"]):\n", + " print(f\"Objective: {elite['objective']}\")\n", + " print(f\"Measures: (x-pos: {elite['measures'][0]}, y-vel: {elite['measures'][1]})\")\n", + " display_video(elite[\"solution\"])" ] }, { @@ -935,7 +935,7 @@ "id": "0_RYE1rTFKhu" }, "source": [ - "Below we visualize several of these high-performing solutions. The `iterelites` method is available because `as_pandas` returns an [`ArchiveDataFrame`](https://docs.pyribs.org/en/latest/api/ribs.archives.ArchiveDataFrame.html), a subclass of the Pandas DataFrame specialized for pyribs. `iterelites` iterates over the entries in the DataFrame and returns them as [`Elite`](https://docs.pyribs.org/en/latest/api/ribs.archives.Elite.html) objects." + "Below we visualize several of these high-performing solutions. The `iterelites` method is available because `as_pandas` returns an [`ArchiveDataFrame`](https://docs.pyribs.org/en/latest/api/ribs.archives.ArchiveDataFrame.html), a subclass of the Pandas DataFrame specialized for pyribs. `iterelites` iterates over the entries in the DataFrame and returns them as dicts." ] }, { @@ -1023,9 +1023,9 @@ "source": [ "if len(high_perf_sols) > 0:\n", " for elite in high_perf_sols.iloc[[0, len(high_perf_sols) // 2, -1]].iterelites():\n", - " print(f\"Objective: {elite.objective}\")\n", - " print(f\"Measures: (x-pos: {elite.measures[0]}, y-vel: {elite.measures[1]})\")\n", - " display_video(elite.solution)" + " print(f\"Objective: {elite['objective']}\")\n", + " print(f\"Measures: (x-pos: {elite['measures'][0]}, y-vel: {elite['measures'][1]})\")\n", + " display_video(elite['solution'])" ] }, { @@ -1034,7 +1034,7 @@ "id": "P3cQJ2ctOBG5" }, "source": [ - "And finally, the [`best_elite`](https://docs.pyribs.org/en/latest/api/ribs.archives.GridArchive.html#ribs.archives.GridArchive.best_elite) property is the [`Elite`](https://docs.pyribs.org/en/latest/api/ribs.archives.Elite.html) which has the highest performance in the archive." + "And finally, the [`best_elite`](https://docs.pyribs.org/en/latest/api/ribs.archives.GridArchive.html#ribs.archives.GridArchive.best_elite) property is the elite that has the highest performance in the archive." ] }, { @@ -1074,9 +1074,9 @@ } ], "source": [ - "print(f\"Objective: {archive.best_elite.objective}\")\n", - "print(f\"Measures: (x-pos: {archive.best_elite.measures[0]}, y-vel: {archive.best_elite.measures[1]})\")\n", - "display_video(archive.best_elite.solution)" + "print(f\"Objective: {archive.best_elite['objective']}\")\n", + "print(f\"Measures: (x-pos: {archive.best_elite['measures'][0]}, y-vel: {archive.best_elite['measures'][1]})\")\n", + "display_video(archive.best_elite['solution'])" ] }, { @@ -1155,7 +1155,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.12" + "version": "3.8.17" } }, "nbformat": 4, From 1038582da456c6e9412926980808261779fe1f12 Mon Sep 17 00:00:00 2001 From: Bryon Tjanaka <38124174+btjanaka@users.noreply.github.com> Date: Thu, 2 Nov 2023 17:32:18 -0700 Subject: [PATCH 04/19] Make ArrayStore fields be valid identifiers (#398) ## Description Although we mostly use the field names as dict keys, which can take on any value, it may be useful in the future to be able to use fields as identifiers. For example, if we are using kwargs, it makes sense to be able to pass in a field name as `f(field1=[...])`, which is not possible if `field1` is an invalid identifier like `field foo` (note the space). ## TODO - [x] Add check - [x] Add test ## Questions ## Status - [x] I have read the guidelines in [CONTRIBUTING.md](https://github.com/icaros-usc/pyribs/blob/master/CONTRIBUTING.md) - [x] I have formatted my code using `yapf` - [x] I have tested my code by running `pytest` - [x] I have linted my code with `pylint` - [x] I have added a one-line description of my change to the changelog in `HISTORY.md` - [x] This PR is ready to go --- HISTORY.md | 2 +- ribs/archives/_array_store.py | 14 ++++++++++---- tests/archives/array_store_test.py | 13 ++++++++++++- 3 files changed, 23 insertions(+), 6 deletions(-) diff --git a/HISTORY.md b/HISTORY.md index d875de393..ab38c64e7 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -10,7 +10,7 @@ ({pr}`397`) - **Backwards-incompatible:** Rename `measure_*` columns to `measures_*` in `as_pandas` ({pr}`396`) -- Add ArrayStore data structure ({pr}`395`) +- Add ArrayStore data structure ({pr}`395`, {pr}`398`) - Add GradientOperatorEmitter to support OMG-MEGA and OG-MAP-Elites ({pr}`348`) #### Improvements diff --git a/ribs/archives/_array_store.py b/ribs/archives/_array_store.py index 26e9311c1..adc877ac7 100644 --- a/ribs/archives/_array_store.py +++ b/ribs/archives/_array_store.py @@ -82,7 +82,8 @@ class ArrayStore: dtype)``. For instance, ``{"objective": ((), np.float32), "measures": ((10,), np.float32)}`` will create an "objective" field with shape ``(capacity,)`` and a "measures" field with shape - ``(capacity, 10)``. + ``(capacity, 10)``. Note that field names must be valid Python + identifiers. capacity (int): Total possible entries in the store. Attributes: @@ -101,8 +102,10 @@ class ArrayStore: _fields (dict): Holds all the arrays with their data. Raises: - ValueError: One of the fields in ``field_desc`` has an invalid name - (currently, "index" is the only invalid name). + ValueError: One of the fields in ``field_desc`` has a reserved name + (currently, "index" is the only reserved name). + ValueError: One of the fields in ``field_desc`` has a name that is not a + valid Python identifier. """ def __init__(self, field_desc, capacity): @@ -117,7 +120,10 @@ def __init__(self, field_desc, capacity): self._fields = {} for name, (field_shape, dtype) in field_desc.items(): if name == "index": - raise ValueError(f"`{name}` is an invalid field name.") + raise ValueError(f"`{name}` is a reserved field name.") + if not name.isidentifier(): + raise ValueError( + f"Field names must be valid identifiers: `{name}`") if isinstance(field_shape, (int, np.integer)): field_shape = (field_shape,) diff --git a/tests/archives/array_store_test.py b/tests/archives/array_store_test.py index ea4eb2f85..631fc6dd6 100644 --- a/tests/archives/array_store_test.py +++ b/tests/archives/array_store_test.py @@ -7,7 +7,7 @@ # pylint: disable = redefined-outer-name -def test_init_invalid_field(): +def test_init_reserved_field(): with pytest.raises(ValueError): ArrayStore( { @@ -17,6 +17,17 @@ def test_init_invalid_field(): ) +def test_init_invalid_field(): + with pytest.raises(ValueError): + ArrayStore( + { + # The space makes this an invalid identifier. + "foo bar": ((), np.float32), + }, + 10, + ) + + @pytest.mark.parametrize("shape", [((), (2,), (10,)), ((), 2, 10)], ids=["tuple", "int"]) def test_init(shape): From f8847b1ca833148708701b40bf2fc47ece2a0d52 Mon Sep 17 00:00:00 2001 From: Bryon Tjanaka <38124174+btjanaka@users.noreply.github.com> Date: Fri, 3 Nov 2023 03:28:10 -0700 Subject: [PATCH 05/19] Make ArrayStore use int32 indices (#400) ## Description The current ArchiveBase uses int32 indices (the assumption being that we will never have to deal with more than INT_MAX archive cells). This PR makes ArrayStore use int32 indices to be consistent with ArchiveBase. ## TODO - [x] Add tests - [x] Ensure consistency across all ArrayStore methods ## Questions ## Status - [x] I have read the guidelines in [CONTRIBUTING.md](https://github.com/icaros-usc/pyribs/blob/master/CONTRIBUTING.md) - [x] I have formatted my code using `yapf` - [x] I have tested my code by running `pytest` - [x] I have linted my code with `pylint` - [x] I have added a one-line description of my change to the changelog in `HISTORY.md` - [x] This PR is ready to go --- HISTORY.md | 2 +- ribs/archives/_array_store.py | 8 ++++---- tests/archives/array_store_test.py | 24 +++++++++++++++++++++++- 3 files changed, 28 insertions(+), 6 deletions(-) diff --git a/HISTORY.md b/HISTORY.md index ab38c64e7..1931755bb 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -10,7 +10,7 @@ ({pr}`397`) - **Backwards-incompatible:** Rename `measure_*` columns to `measures_*` in `as_pandas` ({pr}`396`) -- Add ArrayStore data structure ({pr}`395`, {pr}`398`) +- Add ArrayStore data structure ({pr}`395`, {pr}`398`, {pr}`400`) - Add GradientOperatorEmitter to support OMG-MEGA and OG-MAP-Elites ({pr}`348`) #### Improvements diff --git a/ribs/archives/_array_store.py b/ribs/archives/_array_store.py index adc877ac7..7cbfbca33 100644 --- a/ribs/archives/_array_store.py +++ b/ribs/archives/_array_store.py @@ -113,7 +113,7 @@ def __init__(self, field_desc, capacity): "capacity": capacity, "occupied": np.zeros(capacity, dtype=bool), "n_occupied": 0, - "occupied_list": np.empty(capacity, dtype=int), + "occupied_list": np.empty(capacity, dtype=np.int32), "updates": np.array([0, 0]), } @@ -168,7 +168,7 @@ def occupied(self): @property def occupied_list(self): - """numpy.ndarray: Integer array listing all occupied indices in the + """numpy.ndarray: int32 array listing all occupied indices in the store.""" return readonly( self._props["occupied_list"][:self._props["n_occupied"]]) @@ -207,7 +207,7 @@ def retrieve(self, indices, fields=None): Raises: ValueError: Invalid field name provided. """ - indices = np.asarray(indices) + indices = np.asarray(indices, dtype=np.int32) occupied = readonly(self._props["occupied"][indices]) data = {} @@ -363,7 +363,7 @@ def resize(self, capacity): self._props["occupied"][:cur_capacity] = cur_occupied cur_occupied_list = self._props["occupied_list"] - self._props["occupied_list"] = np.empty(capacity, dtype=int) + self._props["occupied_list"] = np.empty(capacity, dtype=np.int32) self._props["occupied_list"][:cur_capacity] = cur_occupied_list for name, cur_arr in self._fields.items(): diff --git a/tests/archives/array_store_test.py b/tests/archives/array_store_test.py index 631fc6dd6..2307ee160 100644 --- a/tests/archives/array_store_test.py +++ b/tests/archives/array_store_test.py @@ -138,6 +138,28 @@ def test_add_duplicate_indices(store): assert np.all(store.occupied_list == [3]) +def test_dtypes(store): + store.add( + [3, 5], + { + "objective": [1.0, 2.0], + "measures": [[1.0, 2.0], [3.0, 4.0]], + "solution": [np.zeros(10), np.ones(10)], + }, + {}, # Empty extra_args. + [], # Empty transforms. + ) + + _, data = store.retrieve([5, 3]) + + # Index is always int32, and other fields were defined as float32 in the + # `store` fixture. + assert data["index"].dtype == np.int32 + assert data["objective"].dtype == np.float32 + assert data["measures"].dtype == np.float32 + assert data["solution"].dtype == np.float32 + + def test_retrieve_duplicate_indices(store): store.add( [3], @@ -400,7 +422,7 @@ def test_as_pandas(store): "solution_8", "solution_9", ]).all() - assert (df.dtypes == [int] + [np.float32] * 13).all() + assert (df.dtypes == [np.int32] + [np.float32] * 13).all() assert len(df) == 2 row0 = np.concatenate(([3, 1.0, 1.0, 2.0], np.zeros(10))) From ae207b43d146623367629294293428678eac6ef3 Mon Sep 17 00:00:00 2001 From: Bryon Tjanaka <38124174+btjanaka@users.noreply.github.com> Date: Fri, 3 Nov 2023 12:58:20 -0700 Subject: [PATCH 06/19] Remove checkout from installation CI (#401) ## Description We do not need to checkout the pyribs repo when testing installation, since we are only accessing PyPI and Conda. ## TODO ## Questions ## Status - [x] I have read the guidelines in [CONTRIBUTING.md](https://github.com/icaros-usc/pyribs/blob/master/CONTRIBUTING.md) - [x] I have formatted my code using `yapf` - [x] I have tested my code by running `pytest` - [x] I have linted my code with `pylint` - [N/A] I have added a one-line description of my change to the changelog in `HISTORY.md` - [x] This PR is ready to go --- .github/workflows/install.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/install.yml b/.github/workflows/install.yml index 5be6d4e83..018159836 100644 --- a/.github/workflows/install.yml +++ b/.github/workflows/install.yml @@ -28,7 +28,6 @@ jobs: ["pip install ribs[all]", "conda install -c conda-forge pyribs-all"] runs-on: ${{ matrix.os }} steps: - - uses: actions/checkout@v4 - uses: conda-incubator/setup-miniconda@v2 with: python-version: ${{ matrix.python-version }} From 80fbd3eeb838534dda12c85c4672fb693ff60c54 Mon Sep 17 00:00:00 2001 From: Bryon Tjanaka <38124174+btjanaka@users.noreply.github.com> Date: Mon, 6 Nov 2023 00:37:22 -0800 Subject: [PATCH 07/19] Make ArrayStore data methods more flexible (#402) ## Description This PR modifies ArrayStore as follows: - Introduces a tuple return type to the retrieve and as_dict methods - Renames as_dict to data() - Removes `as_pandas()` in favor of `data(return_type="pandas")` and `retrieve(return_type="pandas")` ## TODO ## Questions ## Status - [x] I have read the guidelines in [CONTRIBUTING.md](https://github.com/icaros-usc/pyribs/blob/master/CONTRIBUTING.md) - [x] I have formatted my code using `yapf` - [x] I have tested my code by running `pytest` - [x] I have linted my code with `pylint` - [x] I have added a one-line description of my change to the changelog in `HISTORY.md` - [x] This PR is ready to go --- HISTORY.md | 4 +- ribs/archives/_array_store.py | 211 +++++++++++++++------------- tests/archives/array_store_test.py | 216 ++++++++++++++++++++--------- 3 files changed, 263 insertions(+), 168 deletions(-) diff --git a/HISTORY.md b/HISTORY.md index 1931755bb..6b3bf2727 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -10,7 +10,7 @@ ({pr}`397`) - **Backwards-incompatible:** Rename `measure_*` columns to `measures_*` in `as_pandas` ({pr}`396`) -- Add ArrayStore data structure ({pr}`395`, {pr}`398`, {pr}`400`) +- Add ArrayStore data structure ({pr}`395`, {pr}`398`, {pr}`400`, {pr}`402`) - Add GradientOperatorEmitter to support OMG-MEGA and OG-MAP-Elites ({pr}`348`) #### Improvements @@ -18,7 +18,7 @@ - Use chunk computation in CVT brute force calculation to reduce memory usage ({pr}`394`) - Test pyribs installation in tutorials ({pr}`384`) -- Add cron job for testing installation ({pr}`389`) +- Add cron job for testing installation ({pr}`389`, {pr}`401`) - Fix broken cross-refs in docs ({pr}`393`) ## 0.6.3 diff --git a/ribs/archives/_array_store.py b/ribs/archives/_array_store.py index 7cbfbca33..41e5a3555 100644 --- a/ribs/archives/_array_store.py +++ b/ribs/archives/_array_store.py @@ -1,6 +1,5 @@ """Provides ArrayStore.""" import itertools -from collections import OrderedDict from enum import IntEnum import numpy as np @@ -173,14 +172,16 @@ def occupied_list(self): return readonly( self._props["occupied_list"][:self._props["n_occupied"]]) - def retrieve(self, indices, fields=None): - """Collects the data at the given indices. + def retrieve(self, indices, fields=None, return_type="dict"): + """Collects data at the given indices. Args: indices (array-like): List of indices at which to collect data. fields (array-like of str): List of fields to include. By default, - all fields will be included. In addition to fields in the store, - "index" is also a valid field. + all fields will be included, with an additional "index" as the + last field ("index" can also be placed anywhere in this list). + return_type (str): Type of data to return. See the ``data`` returned + below. Returns: tuple: 2-element tuple consisting of: @@ -189,42 +190,136 @@ def retrieve(self, indices, fields=None): in, have an associated data entry. For instance, if ``indices`` is ``[0, 1, 2]`` and only index 2 has data, then ``occupied`` will be ``[False, False, True]``. - - **data**: Dict mapping from the field name to the field data at - the given indices. For instance, if we have an ``objective`` field - and request data at indices ``[4, 1, 0]``, we might get ``data`` - that looks like ``{"index": [4, 1, 0], "objective": [1.5, 6.0, - 2.3]}``. Observe that we also return the indices as an ``index'' - entry in the dict. The keys in this dict can be modified using the - ``fields`` arg. Note that if a given index is not marked as occupied, it can have any data value associated with it. For instance, if index 1 was - not occupied, then the 6.0 returned above should be ignored. + not occupied, then the 6.0 returned in the ``dict`` example below + should be ignored. + + - **data**: The data at the given indices. This can take the + following forms, depending on the ``return_type`` argument: + + - ``return_type="dict"``: Dict mapping from the field name to the + field data at the given indices. For instance, if we have an + ``objective`` field and request data at indices ``[4, 1, 0]``, + we would get ``data`` that looks like ``{"objective": [1.5, 6.0, + 2.3], "index": [4, 1, 0]}``. Observe that we also return the + indices as an ``index`` entry in the dict. The keys in this dict + can be modified using the ``fields`` arg; duplicate keys will be + ignored since the dict stores unique keys. + + - ``return_type="tuple"``: Tuple of arrays matching the order + given in ``fields``. For instance, if ``fields`` was + ``["objective", "measures"]``, we would receive a tuple of + ``(objective_arr, measures_arr)``. In this case, the results + from ``retrieve`` could be unpacked as:: + + occupied, (objective, measures) = store.retrieve(...) + + Unlike with the ``dict`` return type, duplicate fields will show + up as duplicate entries in the tuple, e.g., + ``fields=["objective", "objective"]`` will result in two + objective arrays being returned. + + By default, (i.e., when ``fields=None``), the fields in the + tuple will be ordered according to the ``field_desc`` argument + in the constructor, along with ``index`` as the last field. + + - ``return_type="pandas"``: A :class:`pandas.DataFrame` with the + following columns (by default): + + - For fields that are scalars, a single column with the field + name. For example, ``objective`` would have a single column + called ``objective``. + - For fields that are 1D arrays, multiple columns with the name + suffixed by its index. For instance, if we have a ``measures`` + field of length 10, we create 10 columns with names + ``measures_0``, ``measures_1``, ..., ``measures_9``. We do not + currently support fields with >1D data. + - 1 column of integers (``np.int32``) for the index, named + ``index``. + + In short, the dataframe might look like this: + + +-----------+------------+------+-------+ + | objective | measures_0 | ... | index | + +===========+============+======+=======+ + | | | ... | | + +-----------+------------+------+-------+ + + Like the other return types, the columns can be adjusted with + the ``fields`` parameter. All data returned by this method will be a readonly copy, i.e., the data will not update as the store changes. Raises: ValueError: Invalid field name provided. + ValueError: Invalid return_type provided. """ indices = np.asarray(indices, dtype=np.int32) occupied = readonly(self._props["occupied"][indices]) - data = {} - fields = (itertools.chain(["index"], self._fields) + if return_type in ("dict", "pandas"): + data = {} + elif return_type == "tuple": + data = [] + else: + raise ValueError(f"Invalid return_type {return_type}.") + + fields = (itertools.chain(self._fields, ["index"]) if fields is None else fields) for name in fields: + # Collect array data. + # # Note that fancy indexing with indices already creates a copy, so # only `indices` needs to be copied explicitly. if name == "index": - data[name] = readonly(np.copy(indices)) - continue - if name not in self._fields: + arr = readonly(np.copy(indices)) + elif name in self._fields: + arr = readonly(self._fields[name][indices]) + else: raise ValueError(f"`{name}` is not a field in this ArrayStore.") - data[name] = readonly(self._fields[name][indices]) + + # Accumulate data into the return type. + if return_type == "dict": + data[name] = arr + elif return_type == "tuple": + data.append(arr) + elif return_type == "pandas": + if len(arr.shape) == 1: # Scalar entries. + data[name] = arr + elif len(arr.shape) == 2: # 1D array entries. + for i in range(arr.shape[1]): + data[f"{name}_{i}"] = arr[:, i] + else: + raise ValueError( + f"Field `{name}` has shape {arr.shape[1:]} -- " + "cannot convert fields with shape >1D to Pandas") + + # Postprocess return data. + if return_type == "tuple": + data = tuple(data) + elif return_type == "pandas": + # Data above are already copied, so no need to copy again. + data = DataFrame(data, copy=False) return occupied, data + def data(self, fields=None, return_type="dict"): + """Retrieves data for all entries in the store. + + Equivalent to calling :meth:`retrieve` with :attr:`occupied_list`. + + Args: + fields (array-like of str): See :meth:`retrieve`. + Returns: + dict or tuple: See ``data`` in :meth:`retrieve`. ``occupied`` is not + returned since all indices are known to be occupied in this + method. + """ + return self.retrieve(self.occupied_list, fields, return_type)[1] + def add(self, indices, new_data, extra_args, transforms): """Adds new data to the store at the given indices. @@ -431,81 +526,3 @@ def from_raw_dict(d): store._fields = fields return store - - def as_dict(self, fields=None): - """Creates a dict containing all data entries in the store. - - Equivalent to calling :meth:`retrieve` with :attr:`occupied_list`. - - Args: - fields (array-like of str): See :meth:`retrieve`. - Returns: - dict: See ``data`` in :meth:`retrieve`. ``occupied`` is not returned - since all indices are known to be occupied in this method. - """ - return self.retrieve(self.occupied_list, fields)[1] - - def as_pandas(self, fields=None): - """Creates a DataFrame containing all data entries in the store. - - The returned DataFrame has: - - - 1 column of integers (``np.int32``) for the index, named ``index``. - - For fields that are scalars, a single column with the field name. For - example, ``objective'' would have a single column called - ``objective``. - - For fields that are 1D arrays, multiple columns with the name suffixed - by its index. For instance, if we have a ``measures'' field of length - 10, we create 10 columns with names ``measures_0``, ``measures_1``, - ..., ``measures_9``. - - We do not currently support fields with >1D data. - - In short, the dataframe might look like this: - - +-------+------------+------+-----------+ - | index | measures_0 | ... | objective | - +=======+============+======+===========+ - | | | ... | | - +-------+------------+------+-----------+ - - Args: - fields (array-like of str): List of fields to include. By default, - all fields will be included. In addition to fields in the store, - "index" is also a valid field. - Returns: - pandas.DataFrame: See above. - Raises: - ValueError: Invalid field name provided. - ValueError: There is a field with >1D data. - """ - data = OrderedDict() - indices = self._props["occupied_list"][:self._props["n_occupied"]] - - fields = (itertools.chain(["index"], self._fields) - if fields is None else fields) - - for name in fields: - if name == "index": - data[name] = np.copy(indices) - continue - - if name not in self._fields: - raise ValueError(f"`{name}` is not a field in this ArrayStore.") - - arr = self._fields[name] - if len(arr.shape) == 1: # Scalar entries. - data[name] = arr[indices] - elif len(arr.shape) == 2: # 1D array entries. - arr = arr[indices] - for i in range(arr.shape[1]): - data[f"{name}_{i}"] = arr[:, i] - else: - raise ValueError( - f"Field `{name}` has shape {arr.shape[1:]} -- " - "cannot convert fields with shape >1D to Pandas") - - return DataFrame( - data, - copy=False, # Fancy indexing above copies all fields, and - # indices is explicitly copied. - ) diff --git a/tests/archives/array_store_test.py b/tests/archives/array_store_test.py index 2307ee160..25e2a1774 100644 --- a/tests/archives/array_store_test.py +++ b/tests/archives/array_store_test.py @@ -108,11 +108,11 @@ def test_simple_add_retrieve_clear(store): occupied, data = store.retrieve([5, 3]) assert np.all(occupied == [True, True]) - assert data.keys() == set(["index", "objective", "measures", "solution"]) - assert np.all(data["index"] == [5, 3]) + assert data.keys() == set(["objective", "measures", "solution", "index"]) assert np.all(data["objective"] == [2.0, 1.0]) assert np.all(data["measures"] == [[3.0, 4.0], [1.0, 2.0]]) assert np.all(data["solution"] == [np.ones(10), np.zeros(10)]) + assert np.all(data["index"] == [5, 3]) store.clear() @@ -154,10 +154,10 @@ def test_dtypes(store): # Index is always int32, and other fields were defined as float32 in the # `store` fixture. - assert data["index"].dtype == np.int32 assert data["objective"].dtype == np.float32 assert data["measures"].dtype == np.float32 assert data["solution"].dtype == np.float32 + assert data["index"].dtype == np.int32 def test_retrieve_duplicate_indices(store): @@ -175,11 +175,11 @@ def test_retrieve_duplicate_indices(store): occupied, data = store.retrieve([3, 3]) assert np.all(occupied == [True, True]) - assert data.keys() == set(["index", "objective", "measures", "solution"]) - assert np.all(data["index"] == [3, 3]) + assert data.keys() == set(["objective", "measures", "solution", "index"]) assert np.all(data["objective"] == [2.0, 2.0]) assert np.all(data["measures"] == [[3.0, 4.0], [3.0, 4.0]]) assert np.all(data["solution"] == [np.ones(10), np.ones(10)]) + assert np.all(data["index"] == [3, 3]) def test_retrieve_invalid_fields(store): @@ -187,7 +187,24 @@ def test_retrieve_invalid_fields(store): store.retrieve([0, 1], fields=["objective", "foo"]) -def test_retrieve_custom_fields(store): +def test_retrieve_invalid_return_type(store): + with pytest.raises(ValueError): + store.retrieve([0, 1], return_type="foo") + + +def test_retrieve_pandas_2d_fields(store): + store = ArrayStore( + { + "solution": ((10, 10), np.float32), + }, + 10, + ) + with pytest.raises(ValueError): + store.retrieve([], return_type="pandas") + + +@pytest.mark.parametrize("return_type", ["dict", "tuple", "pandas"]) +def test_retrieve(return_type, store): store.add( [3, 5], { @@ -199,12 +216,89 @@ def test_retrieve_custom_fields(store): [], # Empty transforms. ) - occupied, data = store.retrieve([5, 3], fields=["index", "objective"]) + occupied, data = store.retrieve([5, 3], return_type=return_type) + + if return_type == "dict": + assert np.all(occupied == [True, True]) + assert data.keys() == set( + ["objective", "measures", "solution", "index"]) + assert np.all(data["objective"] == [2.0, 1.0]) + assert np.all(data["measures"] == [[3.0, 4.0], [1.0, 2.0]]) + assert np.all(data["solution"] == [np.ones(10), np.zeros(10)]) + assert np.all(data["index"] == [5, 3]) + elif return_type == "tuple": + objective, measures, solution, index = data + assert np.all(occupied == [True, True]) + assert np.all(objective == [2.0, 1.0]) + assert np.all(measures == [[3.0, 4.0], [1.0, 2.0]]) + assert np.all(solution == [np.ones(10), np.zeros(10)]) + assert np.all(index == [5, 3]) + elif return_type == "pandas": + df = data + assert (df.columns == [ + "objective", + "measures_0", + "measures_1", + "solution_0", + "solution_1", + "solution_2", + "solution_3", + "solution_4", + "solution_5", + "solution_6", + "solution_7", + "solution_8", + "solution_9", + "index", + ]).all() + assert (df.dtypes == [np.float32] * 13 + [np.int32]).all() + assert len(df) == 2 + assert np.all(occupied == [True, True]) + assert np.all(df["objective"] == [2.0, 1.0]) + assert np.all(df["measures_0"] == [3.0, 1.0]) + assert np.all(df["measures_1"] == [4.0, 2.0]) + for i in range(10): + assert np.all(df[f"solution_{i}"] == [1, 0]) + assert np.all(df["index"] == [5, 3]) + + +@pytest.mark.parametrize("return_type", ["dict", "tuple", "pandas"]) +def test_retrieve_custom_fields(store, return_type): + store.add( + [3, 5], + { + "objective": [1.0, 2.0], + "measures": [[1.0, 2.0], [3.0, 4.0]], + "solution": [np.zeros(10), np.ones(10)], + }, + {}, # Empty extra_args. + [], # Empty transforms. + ) - assert np.all(occupied == [True, True]) - assert data.keys() == set(["index", "objective"]) - assert np.all(data["index"] == [5, 3]) - assert np.all(data["objective"] == [2.0, 1.0]) + occupied, data = store.retrieve([5, 3], + fields=["index", "objective"], + return_type=return_type) + + if return_type == "dict": + assert np.all(occupied == [True, True]) + assert data.keys() == set(["index", "objective"]) + assert np.all(data["index"] == [5, 3]) + assert np.all(data["objective"] == [2.0, 1.0]) + elif return_type == "tuple": + assert np.all(occupied == [True, True]) + assert np.all(data[0] == [5, 3]) + assert np.all(data[1] == [2.0, 1.0]) + elif return_type == "pandas": + df = data + assert (df.columns == [ + "index", + "objective", + ]).all() + assert (df.dtypes == [np.int32, np.float32]).all() + assert len(df) == 2 + assert np.all(occupied == [True, True]) + assert np.all(df["index"] == [5, 3]) + assert np.all(df["objective"] == [2.0, 1.0]) def test_add_simple_transform(store): @@ -235,11 +329,11 @@ def obj_meas(indices, new_data, add_info, extra_args, occupied, cur_data): occupied, data = store.retrieve([3, 5]) assert np.all(occupied == [True, True]) - assert data.keys() == set(["index", "objective", "measures", "solution"]) - assert np.all(data["index"] == [3, 5]) + assert data.keys() == set(["objective", "measures", "solution", "index"]) assert np.all(data["objective"] == [10.0, 20.0]) assert np.all(data["measures"] == [[1.0, 1.0], [2.0, 2.0]]) assert np.all(data["solution"] == [np.ones(10), 2 * np.ones(10)]) + assert np.all(data["index"] == [3, 5]) def test_add_empty_transform(store): @@ -355,14 +449,14 @@ def test_from_raw_dict(store): occupied, data = new_store.retrieve([5, 3]) assert np.all(occupied == [True, True]) - assert data.keys() == set(["index", "objective", "measures", "solution"]) - assert np.all(data["index"] == [5, 3]) + assert data.keys() == set(["objective", "measures", "solution", "index"]) assert np.all(data["objective"] == [2.0, 1.0]) assert np.all(data["measures"] == [[3.0, 4.0], [1.0, 2.0]]) assert np.all(data["solution"] == [np.ones(10), np.zeros(10)]) + assert np.all(data["index"] == [5, 3]) -def test_as_dict(store): +def test_data(store): store.add( [3, 5], { @@ -374,17 +468,17 @@ def test_as_dict(store): [], # Empty transforms. ) - d = store.as_dict() + d = store.data() - assert d.keys() == set(["index", "objective", "measures", "solution"]) + assert d.keys() == set(["objective", "measures", "solution", "index"]) assert all(len(v) == 2 for v in d.values()) - row0 = np.concatenate(([3, 1.0, 1.0, 2.0], np.zeros(10))) - row1 = np.concatenate(([5, 2.0, 3.0, 4.0], np.ones(10))) + row0 = np.concatenate(([1.0, 1.0, 2.0], np.zeros(10), [3])) + row1 = np.concatenate(([2.0, 3.0, 4.0], np.ones(10), [5])) flat = [ - np.concatenate(([d["index"][i]], [d["objective"][i]], d["measures"][i], - d["solution"][i])) for i in range(2) + np.concatenate(([d["objective"][i]], d["measures"][i], d["solution"][i], + [d["index"][i]])) for i in range(2) ] # Either permutation. @@ -392,7 +486,7 @@ def test_as_dict(store): ((flat[0] == row1).all() and (flat[1] == row0).all())) -def test_as_pandas(store): +def test_data_with_tuple_return_type(store): store.add( [3, 5], { @@ -404,41 +498,25 @@ def test_as_pandas(store): [], # Empty transforms. ) - df = store.as_pandas() - - assert (df.columns == [ - "index", - "objective", - "measures_0", - "measures_1", - "solution_0", - "solution_1", - "solution_2", - "solution_3", - "solution_4", - "solution_5", - "solution_6", - "solution_7", - "solution_8", - "solution_9", - ]).all() - assert (df.dtypes == [np.int32] + [np.float32] * 13).all() - assert len(df) == 2 + d = store.data(return_type="tuple") - row0 = np.concatenate(([3, 1.0, 1.0, 2.0], np.zeros(10))) - row1 = np.concatenate(([5, 2.0, 3.0, 4.0], np.ones(10))) + assert len(d) == 4 # 3 fields and 1 index. + assert all(len(v) == 2 for v in d) - # Either permutation. - assert (((df.loc[0] == row0).all() and (df.loc[1] == row1).all()) or - ((df.loc[0] == row1).all() and (df.loc[1] == row0).all())) + row0 = np.concatenate(([1.0, 1.0, 2.0], np.zeros(10), [3])) + row1 = np.concatenate(([2.0, 3.0, 4.0], np.ones(10), [5])) + flat = [ + np.concatenate(([d[0][i]], d[1][i], d[2][i], [d[3][i]])) + for i in range(2) + ] -def test_as_pandas_invalid_fields(store): - with pytest.raises(ValueError): - store.as_pandas(fields=["objective", "foo"]) + # Either permutation. + assert (((flat[0] == row0).all() and (flat[1] == row1).all()) or + ((flat[0] == row1).all() and (flat[1] == row0).all())) -def test_as_pandas_custom_fields(store): +def test_data_with_pandas_return_type(store): store.add( [3, 5], { @@ -450,35 +528,35 @@ def test_as_pandas_custom_fields(store): [], # Empty transforms. ) - df = store.as_pandas(fields=["objective", "measures"]) + df = store.data(return_type="pandas") assert (df.columns == [ "objective", "measures_0", "measures_1", + "solution_0", + "solution_1", + "solution_2", + "solution_3", + "solution_4", + "solution_5", + "solution_6", + "solution_7", + "solution_8", + "solution_9", + "index", ]).all() - assert (df.dtypes == [np.float32] * 3).all() + assert (df.dtypes == [np.float32] * 13 + [np.int32]).all() assert len(df) == 2 - row0 = [1.0, 1.0, 2.0] - row1 = [2.0, 3.0, 4.0] + row0 = np.concatenate(([1.0, 1.0, 2.0], np.zeros(10), [3])) + row1 = np.concatenate(([2.0, 3.0, 4.0], np.ones(10), [5])) # Either permutation. assert (((df.loc[0] == row0).all() and (df.loc[1] == row1).all()) or ((df.loc[0] == row1).all() and (df.loc[1] == row0).all())) -def test_as_pandas_2d_fields(store): - store = ArrayStore( - { - "solution": ((10, 10), np.float32), - }, - 10, - ) - with pytest.raises(ValueError): - store.as_pandas() - - def test_iteration(store): store.add( [3], @@ -493,11 +571,11 @@ def test_iteration(store): for entry in store: assert entry.keys() == set( - ["index", "objective", "measures", "solution"]) - assert np.all(entry["index"] == [3]) + ["objective", "measures", "solution", "index"]) assert np.all(entry["objective"] == [1.0]) assert np.all(entry["measures"] == [[1.0, 2.0]]) assert np.all(entry["solution"] == [np.zeros(10)]) + assert np.all(entry["index"] == [3]) def test_add_during_iteration(store): From e0aef64e611bbc74a31fe3bca7c327703377dc0b Mon Sep 17 00:00:00 2001 From: Bryon Tjanaka <38124174+btjanaka@users.noreply.github.com> Date: Mon, 6 Nov 2023 01:10:08 -0800 Subject: [PATCH 08/19] Add field_desc property to ArrayStore (#403) ## Description This property is a cached property. It takes some computation, but we only want to do it once since it never changes. Furthermore, we do not want to store it in the constructor since we would then need to worry about it when reconstructing in `from_raw_dict`. ## TODO ## Questions ## Status - [x] I have read the guidelines in [CONTRIBUTING.md](https://github.com/icaros-usc/pyribs/blob/master/CONTRIBUTING.md) - [x] I have formatted my code using `yapf` - [x] I have tested my code by running `pytest` - [x] I have linted my code with `pylint` - [x] I have added a one-line description of my change to the changelog in `HISTORY.md` - [x] This PR is ready to go --- HISTORY.md | 3 ++- ribs/archives/_array_store.py | 24 ++++++++++++++++++++++++ tests/archives/array_store_test.py | 5 +++++ 3 files changed, 31 insertions(+), 1 deletion(-) diff --git a/HISTORY.md b/HISTORY.md index 6b3bf2727..e11ec3490 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -10,7 +10,8 @@ ({pr}`397`) - **Backwards-incompatible:** Rename `measure_*` columns to `measures_*` in `as_pandas` ({pr}`396`) -- Add ArrayStore data structure ({pr}`395`, {pr}`398`, {pr}`400`, {pr}`402`) +- Add ArrayStore data structure ({pr}`395`, {pr}`398`, {pr}`400`, {pr}`402`, + {pr}`403`) - Add GradientOperatorEmitter to support OMG-MEGA and OG-MAP-Elites ({pr}`348`) #### Improvements diff --git a/ribs/archives/_array_store.py b/ribs/archives/_array_store.py index 41e5a3555..ba93785e0 100644 --- a/ribs/archives/_array_store.py +++ b/ribs/archives/_array_store.py @@ -1,6 +1,7 @@ """Provides ArrayStore.""" import itertools from enum import IntEnum +from functools import cached_property import numpy as np from numpy_groupies import aggregate_nb as aggregate @@ -172,6 +173,29 @@ def occupied_list(self): return readonly( self._props["occupied_list"][:self._props["n_occupied"]]) + @cached_property + def field_desc(self): + """dict: Description of fields in the array store. + + Example: + + :: + + store.field_desc == { + "objective": ((), np.float32), + "measures": ((10,), np.float32) + } + + See the constructor ``field_desc`` parameter for more info. Unlike in + the field_desc in the constructor, which accepts ints for 1D field + shapes (e.g., ``5``), this field_desc shows 1D field shapes as tuples of + 1 entry (e.g., ``(5,)``). + """ + return { + name: (arr.shape[1:], arr.dtype) + for name, arr in self._fields.items() + } + def retrieve(self, indices, fields=None, return_type="dict"): """Collects data at the given indices. diff --git a/tests/archives/array_store_test.py b/tests/archives/array_store_test.py index 25e2a1774..28af94bb0 100644 --- a/tests/archives/array_store_test.py +++ b/tests/archives/array_store_test.py @@ -45,6 +45,11 @@ def test_init(shape): assert store.capacity == capacity assert np.all(~store.occupied) assert len(store.occupied_list) == 0 + assert store.field_desc == { + "objective": (shape[0], np.float32), + "measures": ((shape[1],), np.float32), + "solution": ((shape[2],), np.float32), + } @pytest.fixture From a99a78f11723ff4ce7f5b553da802cfcc9a45ce4 Mon Sep 17 00:00:00 2001 From: Bryon Tjanaka <38124174+btjanaka@users.noreply.github.com> Date: Mon, 6 Nov 2023 01:27:10 -0800 Subject: [PATCH 09/19] Fix ArrayStore field_desc test (#404) ## Description Oops ## TODO ## Questions ## Status - [x] I have read the guidelines in [CONTRIBUTING.md](https://github.com/icaros-usc/pyribs/blob/master/CONTRIBUTING.md) - [x] I have formatted my code using `yapf` - [x] I have tested my code by running `pytest` - [x] I have linted my code with `pylint` - [x] I have added a one-line description of my change to the changelog in `HISTORY.md` - [x] This PR is ready to go --- HISTORY.md | 2 +- tests/archives/array_store_test.py | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/HISTORY.md b/HISTORY.md index e11ec3490..550211bac 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -11,7 +11,7 @@ - **Backwards-incompatible:** Rename `measure_*` columns to `measures_*` in `as_pandas` ({pr}`396`) - Add ArrayStore data structure ({pr}`395`, {pr}`398`, {pr}`400`, {pr}`402`, - {pr}`403`) + {pr}`403`, {pr}`404`) - Add GradientOperatorEmitter to support OMG-MEGA and OG-MAP-Elites ({pr}`348`) #### Improvements diff --git a/tests/archives/array_store_test.py b/tests/archives/array_store_test.py index 28af94bb0..400023bbc 100644 --- a/tests/archives/array_store_test.py +++ b/tests/archives/array_store_test.py @@ -47,8 +47,10 @@ def test_init(shape): assert len(store.occupied_list) == 0 assert store.field_desc == { "objective": (shape[0], np.float32), - "measures": ((shape[1],), np.float32), - "solution": ((shape[2],), np.float32), + "measures": ( + (shape[1],) if isinstance(shape[1], int) else shape[1], np.float32), + "solution": ( + (shape[2],) if isinstance(shape[2], int) else shape[2], np.float32), } From 57a54bf95c7930d58a8c3b3acd601774bc5f5897 Mon Sep 17 00:00:00 2001 From: Bryon Tjanaka <38124174+btjanaka@users.noreply.github.com> Date: Tue, 7 Nov 2023 11:08:06 -0800 Subject: [PATCH 10/19] Remove readonly restriction on ArrayStore data (#406) ## Description Since methods like retrieve() already return copies, there is no need to make the copies be readonly. ## TODO ## Questions ## Status - [x] I have read the guidelines in [CONTRIBUTING.md](https://github.com/icaros-usc/pyribs/blob/master/CONTRIBUTING.md) - [x] I have formatted my code using `yapf` - [x] I have tested my code by running `pytest` - [x] I have linted my code with `pylint` - [x] I have added a one-line description of my change to the changelog in `HISTORY.md` - [x] This PR is ready to go --- HISTORY.md | 2 +- ribs/archives/_array_store.py | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/HISTORY.md b/HISTORY.md index 550211bac..2710832d0 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -11,7 +11,7 @@ - **Backwards-incompatible:** Rename `measure_*` columns to `measures_*` in `as_pandas` ({pr}`396`) - Add ArrayStore data structure ({pr}`395`, {pr}`398`, {pr}`400`, {pr}`402`, - {pr}`403`, {pr}`404`) + {pr}`403`, {pr}`404`, {pr}`406`) - Add GradientOperatorEmitter to support OMG-MEGA and OG-MAP-Elites ({pr}`348`) #### Improvements diff --git a/ribs/archives/_array_store.py b/ribs/archives/_array_store.py index ba93785e0..c2d295588 100644 --- a/ribs/archives/_array_store.py +++ b/ribs/archives/_array_store.py @@ -274,15 +274,15 @@ def retrieve(self, indices, fields=None, return_type="dict"): Like the other return types, the columns can be adjusted with the ``fields`` parameter. - All data returned by this method will be a readonly copy, i.e., the - data will not update as the store changes. + All data returned by this method will be a copy, i.e., the data will + not update as the store changes. Raises: ValueError: Invalid field name provided. ValueError: Invalid return_type provided. """ indices = np.asarray(indices, dtype=np.int32) - occupied = readonly(self._props["occupied"][indices]) + occupied = self._props["occupied"][indices] # Induces copy. if return_type in ("dict", "pandas"): data = {} @@ -299,9 +299,9 @@ def retrieve(self, indices, fields=None, return_type="dict"): # Note that fancy indexing with indices already creates a copy, so # only `indices` needs to be copied explicitly. if name == "index": - arr = readonly(np.copy(indices)) + arr = np.copy(indices) elif name in self._fields: - arr = readonly(self._fields[name][indices]) + arr = self._fields[name][indices] # Induces copy. else: raise ValueError(f"`{name}` is not a field in this ArrayStore.") From cc7051c4eff5943660dca5b8025f1aee66e2eb9b Mon Sep 17 00:00:00 2001 From: Bryon Tjanaka <38124174+btjanaka@users.noreply.github.com> Date: Tue, 7 Nov 2023 15:22:58 -0800 Subject: [PATCH 11/19] Reimplement ArchiveBase using ArrayStore (#399) ## Description This PR refactors the ArchiveBase to place data in ArrayStore rather than in separate NumPy arrays. This should make the archive more flexible and easier to extend in the future. While flexibility is the end goal, note that this PR is only a refactor; thus, no API changes are being made. As a result, tests will mostly remain the same. The exception is the creation of a private `ribs.archives._transforms` module, for which we do introduce new tests. This module should be considered unstable for now, but once it seems more stable, we can make it public. ## TODO - [x] Modify ArchiveBase - [x] Update SlidingBoundariesArchive - [x] Create private transforms module - [x] Test transforms - [x] Fix ArchiveBase tests where appropriate ## Questions ## Status - [x] I have read the guidelines in [CONTRIBUTING.md](https://github.com/icaros-usc/pyribs/blob/master/CONTRIBUTING.md) - [x] I have formatted my code using `yapf` - [x] I have tested my code by running `pytest` - [x] I have linted my code with `pylint` - [x] I have added a one-line description of my change to the changelog in `HISTORY.md` - [x] This PR is ready to go --- HISTORY.md | 1 + ribs/_utils.py | 21 + ribs/archives/_archive_base.py | 685 ++++-------------- ribs/archives/_sliding_boundaries_archive.py | 20 +- ribs/archives/_transforms.py | 283 ++++++++ tests/archives/archive_base_test.py | 9 +- .../archives/archive_threshold_update_test.py | 84 +-- tests/archives/transforms_test.py | 61 ++ 8 files changed, 569 insertions(+), 595 deletions(-) create mode 100644 ribs/archives/_transforms.py create mode 100644 tests/archives/transforms_test.py diff --git a/HISTORY.md b/HISTORY.md index 2710832d0..d5d72c3fe 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -16,6 +16,7 @@ #### Improvements +- Reimplement ArchiveBase using ArrayStore ({pr}`399`) - Use chunk computation in CVT brute force calculation to reduce memory usage ({pr}`394`) - Test pyribs installation in tutorials ({pr}`384`) diff --git a/ribs/_utils.py b/ribs/_utils.py index 9c046209d..47abf40ea 100644 --- a/ribs/_utils.py +++ b/ribs/_utils.py @@ -2,6 +2,27 @@ import numpy as np +def parse_float_dtype(dtype): + """Parses a floating point dtype. + + Returns: + np.float32 or np.float64 + Raises: + ValueError: There is an error in the bounds configuration. + """ + # First convert str dtype's to np.dtype. + if isinstance(dtype, str): + dtype = np.dtype(dtype) + + # np.dtype is not np.float32 or np.float64, but it compares equal. + if dtype == np.float32: + return np.float32 + if dtype == np.float64: + return np.float64 + + raise ValueError("Unsupported dtype. Must be np.float32 or np.float64") + + def check_finite(x, name): """Checks that x is finite (i.e. not infinity or NaN). diff --git a/ribs/archives/_archive_base.py b/ribs/archives/_archive_base.py index fb5dcb506..342c83f00 100644 --- a/ribs/archives/_archive_base.py +++ b/ribs/archives/_archive_base.py @@ -1,93 +1,28 @@ """Provides ArchiveBase.""" from abc import ABC, abstractmethod -from collections import OrderedDict import numpy as np -from numpy_groupies import aggregate_nb as aggregate from ribs._utils import (check_1d_shape, check_batch_shape, check_finite, - check_is_1d, readonly, validate_batch_args, + check_is_1d, parse_float_dtype, validate_batch_args, validate_single_args) from ribs.archives._archive_data_frame import ArchiveDataFrame from ribs.archives._archive_stats import ArchiveStats +from ribs.archives._array_store import ArrayStore from ribs.archives._cqd_score_result import CQDScoreResult - -_ADD_WARNING = (" Note that starting in pyribs 0.5.0, add() takes in a " - "batch of solutions unlike in pyribs 0.4.0, where add() " - "only took in a single solution.") - - -class ArchiveIterator: - """An iterator for an archive's elites.""" - - # pylint: disable = protected-access - - def __init__(self, archive): - self.archive = archive - self.iter_idx = 0 - self.state = archive._state.copy() - - def __iter__(self): - """This is the iterator, so it returns itself.""" - return self - - def __next__(self): - """Raises RuntimeError if the archive was modified with add() or - clear().""" - if self.state != self.archive._state: - # This check should go first because a call to clear() would clear - # _occupied_indices and cause StopIteration to happen early. - raise RuntimeError( - "Archive was modified with add() or clear() during iteration.") - if self.iter_idx >= len(self.archive): - raise StopIteration - - idx = self.archive._occupied_indices[self.iter_idx] - self.iter_idx += 1 - return { - "solution": self.archive._solution_arr[idx], - "objective": self.archive._objective_arr[idx], - "measures": self.archive._measures_arr[idx], - "index": idx, - "metadata": self.archive._metadata_arr[idx], - } +from ribs.archives._transforms import (batch_entries_with_threshold, + compute_best_index, + compute_objective_sum, + single_entry_with_threshold) class ArchiveBase(ABC): # pylint: disable = too-many-instance-attributes """Base class for archives. - This class assumes all archives use a fixed-size container with cells that - hold (1) information about whether the cell is occupied (bool), (2) a - solution (1D array), (3) objective function evaluation of the solution - (float), (4) measure space coordinates of the solution (1D array), (5) - any additional metadata associated with the solution (object), and (6) a - threshold which determines how high an objective value must be for a - solution to be inserted into a cell (float). In this class, the container is - implemented with separate numpy arrays that share common dimensions. Using - the ``solution_dim``, ``cells`, and ``measure_dim`` arguments in - ``__init__``, these arrays are as follows: - - +------------------------+----------------------------+ - | Name | Shape | - +========================+============================+ - | ``_occupied_arr`` | ``(cells,)`` | - +------------------------+----------------------------+ - | ``_solution_arr`` | ``(cells, solution_dim)`` | - +------------------------+----------------------------+ - | ``_objective_arr`` | ``(cells,)`` | - +------------------------+----------------------------+ - | ``_measures_arr`` | ``(cells, measure_dim)`` | - +------------------------+----------------------------+ - | ``_metadata_arr`` | ``(cells,)`` | - +------------------------+----------------------------+ - | ``_threshold_arr`` | ``(cells,)`` | - +------------------------+----------------------------+ - - All of these arrays are accessed via a common integer index. If we have - index ``i``, we access its solution at ``_solution_arr[i]``, its measure - values at ``_measures_arr[i]``, etc. - - Thus, child classes typically override the following methods: + This class composes archives using an :class:`ArrayStore` that has + "solution", "objective", "measures", "metadata", and "threshold" fields. + + Child classes typically override the following methods: - ``__init__``: Child classes must invoke this class's ``__init__`` with the appropriate arguments. @@ -126,29 +61,10 @@ class ArchiveBase(ABC): # pylint: disable = too-many-instance-attributes and measures. We only support ``"f"`` / ``np.float32`` and ``"d"`` / ``np.float64``. Attributes: - _solution_dim (int): See ``solution_dim`` arg. _rng (numpy.random.Generator): Random number generator, used in particular for generating random elites. - _cells (int): See ``cells`` arg. - _measure_dim (int): See ``measure_dim`` arg. - _occupied_arr (numpy.ndarray): Bool array storing whether each cell in - the archive is occupied. - _solution_arr (numpy.ndarray): Float array storing the solutions - themselves. - _objective_arr (numpy.ndarray): Float array storing the objective value - of each solution. - _measures_arr (numpy.ndarray): Float array storing the measure space - coordinates of each solution. - _metadata_arr (numpy.ndarray): Object array storing the metadata - associated with each solution. - _threshold_arr (numpy.ndarray): Float array storing the threshold for - insertion into each cell. - _occupied_indices (numpy.ndarray): A ``(cells,)`` array of integer - (``np.int32``) indices that are occupied in the archive. This could - be a list, but for efficiency, we make it a fixed-size array, where - only the first ``_num_occupied`` entries are valid. - _num_occupied (int): Number of elites currently in the archive. This is - used to index into ``_occupied_indices``. + _store (ribs.archives.ArrayStore): The underlying ArrayStore containing + data for the archive. """ def __init__(self, @@ -162,80 +78,43 @@ def __init__(self, seed=None, dtype=np.float64): - ## Intended to be accessed by child classes. ## - self._solution_dim = solution_dim + self._dtype = parse_float_dtype(dtype) + self._seed = seed self._rng = np.random.default_rng(seed) self._cells = cells + self._solution_dim = solution_dim self._measure_dim = measure_dim - self._dtype = self._parse_dtype(dtype) - - self._num_occupied = 0 - self._occupied_arr = np.zeros(self._cells, dtype=bool) - self._occupied_indices = np.empty(self._cells, dtype=np.int32) - - self._solution_arr = np.empty((self._cells, solution_dim), - dtype=self.dtype) - self._objective_arr = np.empty(self._cells, dtype=self.dtype) - self._measures_arr = np.empty((self._cells, self._measure_dim), - dtype=self.dtype) - self._metadata_arr = np.empty(self._cells, dtype=object) + self._qd_score_offset = self._dtype(qd_score_offset) if threshold_min == -np.inf and learning_rate != 1.0: raise ValueError("threshold_min can only be -np.inf if " "learning_rate is 1.0") self._learning_rate = self._dtype(learning_rate) self._threshold_min = self._dtype(threshold_min) - self._threshold_arr = np.full(self._cells, - threshold_min, - dtype=self.dtype) - - self._qd_score_offset = self._dtype(qd_score_offset) self._stats = None + self._best_elite = None # Sum of all objective values in the archive; useful for computing # qd_score and obj_mean. self._objective_sum = None self._stats_reset() - self._best_elite = None - - # Tracks archive modifications by counting calls to clear() and add(). - self._state = {"clear": 0, "add": 0} - - ## Not intended to be accessed by children. ## - self._seed = seed - - @staticmethod - def _parse_dtype(dtype): - """Parses the dtype passed into the constructor. - - Returns: - np.float32 or np.float64 - Raises: - ValueError: There is an error in the bounds configuration. - """ - # First convert str dtype's to np.dtype. - if isinstance(dtype, str): - dtype = np.dtype(dtype) - - # np.dtype is not np.float32 or np.float64, but it compares equal. - if dtype == np.float32: - return np.float32 - if dtype == np.float64: - return np.float64 - - raise ValueError("Unsupported dtype. Must be np.float32 or np.float64") + self._store = ArrayStore( + field_desc={ + "solution": ((solution_dim,), self.dtype), + "objective": ((), self.dtype), + "measures": ((self._measure_dim,), self.dtype), + "metadata": ((), object), + "threshold": ((), self.dtype), + }, + capacity=self._cells, + ) @property def cells(self): """int: Total number of cells in the archive.""" return self._cells - @property - def empty(self): - """bool: Whether the archive is empty.""" - return self._num_occupied == 0 - @property def measure_dim(self): """int: Dimensionality of the measure space.""" @@ -293,9 +172,14 @@ def dtype(self): """data-type: The dtype of the solutions, objective, and measures.""" return self._dtype + @property + def empty(self): + """bool: Whether the archive is empty.""" + return len(self._store) == 0 + def __len__(self): """Number of elites in the archive.""" - return self._num_occupied + return len(self._store) def __iter__(self): """Creates an iterator over the elites in the archive. @@ -309,100 +193,15 @@ def __iter__(self): elite["objective"] ... """ - return ArchiveIterator(self) - - def _stats_reset(self): - """Resets the archive stats.""" - self._stats = ArchiveStats( - num_elites=0, - coverage=self.dtype(0.0), - qd_score=self.dtype(0.0), - norm_qd_score=self.dtype(0.0), - obj_max=None, - obj_mean=None, - ) - self._objective_sum = self.dtype(0.0) - - def _compute_new_thresholds(self, threshold_arr, objective_batch, - index_batch, learning_rate): - """Update thresholds. - - Args: - threshold_arr (np.ndarray): The threshold of the cells before - updating. 1D array. - objective_batch (np.ndarray): The objective values of the solution - that is inserted into the archive for each cell. 1D array. We - assume that the objective values are all higher than the - thresholds of their respective cells. - index_batch (np.ndarray): The archive index of the elements in - objective batch. - Returns: - `new_threshold_batch` (A self.dtype array of new - thresholds) and `threshold_update_indices` (A boolean - array indicating which entries in `threshold_arr` should - be updated. - """ - # Even though we do this check, it should not be possible to have - # empty objective_batch or index_batch in the add() method since - # we check that at least one cell is being updated by seeing if - # can_insert has any True values. - if objective_batch.size == 0 or index_batch.size == 0: - return np.array([], dtype=self.dtype), np.array([], dtype=bool) - - # Compute the number of objectives inserted into each cell. - objective_sizes = aggregate(index_batch, - objective_batch, - func="len", - fill_value=0, - size=threshold_arr.size) - - # These indices are with respect to the archive, so we can directly pass - # them to threshold_arr. - threshold_update_indices = objective_sizes > 0 - - # Compute the sum of the objectives inserted into each cell. - objective_sums = aggregate(index_batch, - objective_batch, - func="sum", - fill_value=np.nan, - size=threshold_arr.size) - - # Throw away indices that we do not care about. - objective_sizes = objective_sizes[threshold_update_indices] - objective_sums = objective_sums[threshold_update_indices] - - # Unlike in add_single, we do not need to worry about - # old_threshold having -np.inf here as a result of threshold_min - # being -np.inf. This is because the case with threshold_min = - # -np.inf is handled separately since we compute the new - # threshold based on the max objective in each cell in that case. - old_threshold = np.copy(threshold_arr[threshold_update_indices]) - - ratio = self.dtype(1.0 - learning_rate)**objective_sizes - new_threshold_batch = (ratio * old_threshold + - (objective_sums / objective_sizes) * (1 - ratio)) - - return new_threshold_batch, threshold_update_indices + return iter(self._store) def clear(self): """Removes all elites from the archive. After this method is called, the archive will be :attr:`empty`. """ - # Clear ``self._occupied_indices`` and ``self._occupied_arr`` since a - # cell can have arbitrary values when its index is marked as unoccupied. - self._num_occupied = 0 # Corresponds to clearing _occupied_indices. - self._occupied_arr.fill(False) - - # We also need to reset thresholds since archive addition is based on - # thresholds. - self._threshold_arr.fill(self._threshold_min) - - self._state["clear"] += 1 - self._state["add"] = 0 - + self._store.clear() self._stats_reset() - self._best_elite = None @abstractmethod def index_of(self, measures_batch): @@ -442,6 +241,49 @@ def index_of_single(self, measures): check_finite(measures, "measures") return self.index_of(measures[None])[0] + def _stats_reset(self): + """Resets the archive stats.""" + self._stats = ArchiveStats( + num_elites=0, + coverage=self.dtype(0.0), + qd_score=self.dtype(0.0), + norm_qd_score=self.dtype(0.0), + obj_max=None, + obj_mean=None, + ) + self._best_elite = None + self._objective_sum = self.dtype(0.0) + + def _stats_update(self, new_objective_sum, new_best_index): + """Updates statistics based on a new sum of objective values + (new_objective_sum) and the index of a potential new best elite + (new_best_index).""" + self._objective_sum = new_objective_sum + new_qd_score = (self._objective_sum - + self.dtype(len(self)) * self._qd_score_offset) + + _, new_best_elite = self._store.retrieve([new_best_index]) + + if (self._stats.obj_max is None or + new_best_elite["objective"] > self._stats.obj_max): + # Convert batched values to single values. + new_best_elite = {k: v[0] for k, v in new_best_elite.items()} + new_best_elite.pop("threshold") + + new_obj_max = new_best_elite["objective"] + self._best_elite = new_best_elite + else: + new_obj_max = self._stats.obj_max + + self._stats = ArchiveStats( + num_elites=len(self), + coverage=self.dtype(len(self) / self.cells), + qd_score=new_qd_score, + norm_qd_score=self.dtype(new_qd_score / self.cells), + obj_max=new_obj_max, + obj_mean=self._objective_sum / self.dtype(len(self)), + ) + def add(self, solution_batch, objective_batch, @@ -544,9 +386,6 @@ def add(self, ValueError: ``objective_batch`` or ``measures_batch`` has non-finite values (inf or NaN). """ - self._state["add"] += 1 - - ## Step 1: Validate input. ## ( solution_batch, objective_batch, @@ -559,153 +398,33 @@ def add(self, measures_batch=measures_batch, metadata_batch=metadata_batch, ) - batch_size = solution_batch.shape[0] - - ## Step 2: Compute status_batch and value_batch ## - - # Retrieve indices. - index_batch = self.index_of(measures_batch) - - # Copy old objectives since we will be modifying the objectives storage. - old_objective_batch = np.copy(self._objective_arr[index_batch]) - old_threshold_batch = np.copy(self._threshold_arr[index_batch]) - - # Compute the statuses -- these are all boolean arrays of length - # batch_size. - already_occupied = self._occupied_arr[index_batch] - # In the case where we want CMA-ME behavior, threshold_arr[index] - # is -inf for new cells, which satisfies the condition for can_be_added. - can_be_added = objective_batch > old_threshold_batch - is_new = can_be_added & ~already_occupied - improve_existing = can_be_added & already_occupied - status_batch = np.zeros(batch_size, dtype=np.int32) - status_batch[is_new] = 2 - status_batch[improve_existing] = 1 - - # New solutions require special settings for old_objective and - # old_threshold. - old_objective_batch[is_new] = self.dtype(0) - - # If threshold_min is -inf, then we want CMA-ME behavior, which - # will compute the improvement value of new solutions w.r.t zero. - # Otherwise, we will compute w.r.t. threshold_min. - old_threshold_batch[is_new] = (self.dtype(0) if self._threshold_min - == -np.inf else self._threshold_min) - value_batch = objective_batch - old_threshold_batch - - ## Step 3: Insert solutions into archive. ## - - # Return early if we cannot insert anything -- continuing would actually - # throw a ValueError in aggregate() since index_batch[can_insert] would - # be empty. - can_insert = is_new | improve_existing - if not np.any(can_insert): - return status_batch, value_batch - - # Select only solutions that can be inserted into the archive. - solution_batch_can = solution_batch[can_insert] - objective_batch_can = objective_batch[can_insert] - measures_batch_can = measures_batch[can_insert] - index_batch_can = index_batch[can_insert] - metadata_batch_can = metadata_batch[can_insert] - old_objective_batch_can = old_objective_batch[can_insert] - - # Retrieve indices of solutions that should be inserted into the - # archive. Currently, multiple solutions may be inserted at each - # archive index, but we only want to insert the maximum among these - # solutions. Thus, we obtain the argmax for each archive index. - # - # We use a fill_value of -1 to indicate archive indices which were not - # covered in the batch. Note that the length of archive_argmax is only - # max(index_batch[can_insert]), rather than the total number of grid - # cells. However, this is okay because we only need the indices of the - # solutions, which we store in should_insert. - # - # aggregate() always chooses the first item if there are ties, so the - # first elite will be inserted if there is a tie. See their default - # numpy implementation for more info: - # https://github.com/ml31415/numpy-groupies/blob/master/numpy_groupies/aggregate_numpy.py#L107 - archive_argmax = aggregate(index_batch_can, - objective_batch_can, - func="argmax", - fill_value=-1) - should_insert = archive_argmax[archive_argmax != -1] - - # Select only solutions that will be inserted into the archive. - solution_batch_insert = solution_batch_can[should_insert] - objective_batch_insert = objective_batch_can[should_insert] - measures_batch_insert = measures_batch_can[should_insert] - index_batch_insert = index_batch_can[should_insert] - metadata_batch_insert = metadata_batch_can[should_insert] - old_objective_batch_insert = old_objective_batch_can[should_insert] - - # Set archive storage. - self._objective_arr[index_batch_insert] = objective_batch_insert - self._measures_arr[index_batch_insert] = measures_batch_insert - self._solution_arr[index_batch_insert] = solution_batch_insert - self._metadata_arr[index_batch_insert] = metadata_batch_insert - self._occupied_arr[index_batch_insert] = True - - # Mark new indices as occupied. - is_new_and_inserted = is_new[can_insert][should_insert] - n_new = np.sum(is_new_and_inserted) - self._occupied_indices[self._num_occupied:self._num_occupied + - n_new] = ( - index_batch_insert[is_new_and_inserted]) - self._num_occupied += n_new - - # Update the thresholds. - if self._threshold_min == -np.inf: - # Here we want regular archive behavior, so the thresholds - # should just be the maximum objective. - self._threshold_arr[index_batch_insert] = objective_batch_insert - else: - # Here we compute the batch threshold update described in the - # appendix of Fontaine 2022 https://arxiv.org/abs/2205.10752 - # This computation is based on the mean objective of all - # solutions in the batch that could have been inserted into - # each cell. This method is separated out to facilitate - # testing. - (new_thresholds, - update_thresholds_indices) = self._compute_new_thresholds( - self._threshold_arr, objective_batch_can, index_batch_can, - self._learning_rate) - self._threshold_arr[update_thresholds_indices] = new_thresholds - - ## Step 4: Update archive stats. ## - - # Since we set the new solutions in the old objective batch to have - # value 0.0, the objectives for new solutions are added in properly - # here. - self._objective_sum += np.sum(objective_batch_insert - - old_objective_batch_insert) - new_qd_score = (self._objective_sum - - self.dtype(len(self)) * self._qd_score_offset) - max_idx = np.argmax(objective_batch_insert) - max_obj_insert = objective_batch_insert[max_idx] - - if self._stats.obj_max is None or max_obj_insert > self._stats.obj_max: - new_obj_max = max_obj_insert - self._best_elite = { - "solution": readonly(np.copy(solution_batch_insert[max_idx])), - "objective": objective_batch_insert[max_idx], - "measures": readonly(np.copy(measures_batch_insert[max_idx])), - "index": index_batch_insert[max_idx], - "metadata": metadata_batch_insert[max_idx], - } - else: - new_obj_max = self._stats.obj_max - self._stats = ArchiveStats( - num_elites=len(self), - coverage=self.dtype(len(self) / self.cells), - qd_score=new_qd_score, - norm_qd_score=self.dtype(new_qd_score / self.cells), - obj_max=new_obj_max, - obj_mean=self._objective_sum / self.dtype(len(self)), + add_info = self._store.add( + self.index_of(measures_batch), + { + "solution": solution_batch, + "objective": objective_batch, + "measures": measures_batch, + "metadata": metadata_batch, + }, + { + "dtype": self._dtype, + "learning_rate": self._learning_rate, + "threshold_min": self._threshold_min, + "objective_sum": self._objective_sum, + }, + [ + batch_entries_with_threshold, + compute_objective_sum, + compute_best_index, + ], ) - return status_batch, value_batch + if not np.all(add_info["status"] == 0): + self._stats_update(add_info.pop("objective_sum"), + add_info.pop("best_index")) + + return add_info["status"], add_info["value"] def add_single(self, solution, objective, measures, metadata=None): """Inserts a single solution into the archive. @@ -744,8 +463,6 @@ def add_single(self, solution, objective, measures, metadata=None): the add operation. Refer to :meth:`add` for the meaning of the status and value. """ - self._state["add"] += 1 - ( solution, objective, @@ -759,77 +476,32 @@ def add_single(self, solution, objective, measures, metadata=None): index = self.index_of_single(measures) - # Only used for computing QD score. - old_objective = self._objective_arr[index] - - # Used for computing improvement value. - old_threshold = self._threshold_arr[index] - - # New solutions require special settings for old_objective and - # old_threshold. - was_occupied = self._occupied_arr[index] - if not was_occupied: - old_objective = self.dtype(0) - # If threshold_min is -inf, then we want CMA-ME behavior, which will - # compute the improvement value w.r.t. zero for new solutions. - # Otherwise, we will compute w.r.t. threshold_min. - old_threshold = (self.dtype(0) if self._threshold_min == -np.inf - else self._threshold_min) - - status = 0 # NOT_ADDED - # In the case where we want CMA-ME behavior, threshold_arr[index] - # is -inf for new cells, which satisfies this if condition. - if self._threshold_arr[index] < objective: - if was_occupied: - status = 1 # IMPROVE_EXISTING - else: - # Set this index to be occupied. - self._occupied_arr[index] = True - self._occupied_indices[self._num_occupied] = index - self._num_occupied += 1 - - status = 2 # NEW - - # This calculation works in the case where threshold_min is -inf - # because old_threshold will be set to 0.0 instead. - self._threshold_arr[index] = (old_threshold * - (1.0 - self._learning_rate) + - objective * self._learning_rate) - - # Insert into the archive. - self._objective_arr[index] = objective - self._measures_arr[index] = measures - self._solution_arr[index] = solution - self._metadata_arr[index] = metadata - - if status: - # Update archive stats. - self._objective_sum += objective - old_objective - new_qd_score = (self._objective_sum - - self.dtype(len(self)) * self._qd_score_offset) - - if self._stats.obj_max is None or objective > self._stats.obj_max: - new_obj_max = objective - self._best_elite = { - "solution": readonly(np.copy(self._solution_arr[index])), - "objective": objective, - "measures": readonly(np.copy(self._measures_arr[index])), - "index": index, - "metadata": metadata, - } - else: - new_obj_max = self._stats.obj_max - - self._stats = ArchiveStats( - num_elites=len(self), - coverage=self.dtype(len(self) / self.cells), - qd_score=new_qd_score, - norm_qd_score=self.dtype(new_qd_score / self.cells), - obj_max=new_obj_max, - obj_mean=self._objective_sum / self.dtype(len(self)), - ) + add_info = self._store.add( + np.array([index]), + { + "solution": np.expand_dims(solution, axis=0), + "objective": np.expand_dims(objective, axis=0), + "measures": np.expand_dims(measures, axis=0), + "metadata": np.expand_dims(metadata, axis=0), + }, + { + "dtype": self._dtype, + "learning_rate": self._learning_rate, + "threshold_min": self._threshold_min, + "objective_sum": self._objective_sum, + }, + [ + single_entry_with_threshold, + compute_objective_sum, + compute_best_index, + ], + ) + + if add_info["status"]: + self._stats_update(add_info.pop("objective_sum"), + add_info.pop("best_index")) - return status, objective - old_threshold + return add_info["status"][0], add_info["value"][0] def retrieve(self, measures_batch): """Retrieves the elites with measures in the same cells as the measures @@ -881,54 +553,28 @@ def retrieve(self, measures_batch): "measure_dim") check_finite(measures_batch, "measures_batch") - index_batch = self.index_of(measures_batch) - occupied_batch = self._occupied_arr[index_batch] - expanded_occupied_batch = occupied_batch[:, None] + occupied, data = self._store.retrieve(self.index_of(measures_batch)) return { + # For each occupied_batch[i], this np.where selects + # self._solution_arr[index_batch][i] if occupied_batch[i] is True. + # Otherwise, it uses the alternate value (a solution array + # consisting of np.nan). "solution": - readonly( - # For each occupied_batch[i], this np.where selects - # self._solution_arr[index_batch][i] if occupied_batch[i] is - # True. Otherwise, it uses the alternate value (a solution - # array consisting of np.nan). - np.where( - expanded_occupied_batch, - self._solution_arr[index_batch], - np.full(self._solution_dim, np.nan), - )), + np.where(occupied[:, None], data["solution"], + np.full(self._solution_dim, np.nan)), + # Here the alternative is just a scalar np.nan. "objective": - readonly( - np.where( - occupied_batch, - self._objective_arr[index_batch], - # Here the alternative is just a scalar np.nan. - np.nan, - )), + np.where(occupied, data["objective"], np.nan), + # And here it is a measures array of np.nan. "measures": - readonly( - np.where( - expanded_occupied_batch, - self._measures_arr[index_batch], - # And here it is a measures array of np.nan. - np.full(self._measure_dim, np.nan), - )), + np.where(occupied[:, None], data["measures"], + np.full(self._measure_dim, np.nan)), + # Indices must be integers, so np.nan would not work, so we use -1. "index": - readonly( - np.where( - occupied_batch, - index_batch, - # Indices must be integers, so np.nan would not work, - # hence we use -1. - -1, - )), + np.where(occupied, data["index"], -1), "metadata": - readonly( - np.where( - occupied_batch, - self._metadata_arr[index_batch], - None, - )), + np.where(occupied, data["metadata"], None), } def retrieve_single(self, measures): @@ -985,16 +631,10 @@ def sample_elites(self, n): if self.empty: raise IndexError("No elements in archive.") - random_indices = self._rng.integers(self._num_occupied, size=n) - selected_indices = self._occupied_indices[random_indices] - - return { - "solution": readonly(self._solution_arr[selected_indices]), - "objective": readonly(self._objective_arr[selected_indices]), - "measures": readonly(self._measures_arr[selected_indices]), - "index": readonly(selected_indices), - "metadata": readonly(self._metadata_arr[selected_indices]), - } + random_indices = self._rng.integers(len(self._store), size=n) + selected_indices = self._store.occupied_list[random_indices] + _, elites = self._store.retrieve(selected_indices) + return elites def as_pandas(self, include_solutions=True, include_metadata=False): """Converts the archive into an :class:`ArchiveDataFrame` (a child class @@ -1034,30 +674,12 @@ def as_pandas(self, include_solutions=True, include_metadata=False): Returns: ArchiveDataFrame: See above. """ # pylint: disable = line-too-long - data = OrderedDict() - indices = self._occupied_indices[:self._num_occupied] - - # Copy indices so we do not overwrite. - data["index"] = np.copy(indices) - - measures_batch = self._measures_arr[indices] - for i in range(self._measure_dim): - data[f"measures_{i}"] = measures_batch[:, i] - - data["objective"] = self._objective_arr[indices] - + fields = ["index", "measures", "objective"] if include_solutions: - solutions = self._solution_arr[indices] - for i in range(self._solution_dim): - data[f"solution_{i}"] = solutions[:, i] - + fields.append("solution") if include_metadata: - data["metadata"] = self._metadata_arr[indices] - - return ArchiveDataFrame( - data, - copy=False, # Fancy indexing above already results in copying. - ) + fields.append("metadata") + return ArchiveDataFrame(self._store.data(fields, return_type="pandas")) def cqd_score(self, iterations, @@ -1155,9 +777,10 @@ def cqd_score(self, penalties = np.copy(penalties) # Copy since we return this. check_is_1d(penalties, "penalties") - index_batch = self._occupied_indices[:self._num_occupied] - measures_batch = self._measures_arr[index_batch] - objective_batch = self._objective_arr[index_batch] + objective_batch, measures_batch = self._store.data( + ["objective", "measures"], + return_type="tuple", + ) norm_objectives = objective_batch / (obj_max - obj_min) diff --git a/ribs/archives/_sliding_boundaries_archive.py b/ribs/archives/_sliding_boundaries_archive.py index 347d671e5..b6d07d933 100644 --- a/ribs/archives/_sliding_boundaries_archive.py +++ b/ribs/archives/_sliding_boundaries_archive.py @@ -180,7 +180,7 @@ def __init__(self, # Allocate an extra entry in each row so we can put the upper bound at # the end. - self._boundaries = np.full((self._measure_dim, np.max(self._dims) + 1), + self._boundaries = np.full((self.measure_dim, np.max(self._dims) + 1), np.nan, dtype=self.dtype) @@ -191,7 +191,7 @@ def __init__(self, upper_bound, dim + 1) # Create buffer. - self._buffer = SolutionBuffer(buffer_capacity, self._measure_dim) + self._buffer = SolutionBuffer(buffer_capacity, self.measure_dim) # Total number of solutions encountered. self._total_num_sol = 0 @@ -343,18 +343,14 @@ def _remap(self): sorted_measures = self._buffer.sorted_measures # Calculate new boundaries. - for i in range(self._measure_dim): + for i in range(self.measure_dim): for j in range(self.dims[i]): sample_idx = int(j * self._buffer.size / self.dims[i]) self._boundaries[i][j] = sorted_measures[i][sample_idx] # Set the upper bound to be the greatest BC. self._boundaries[i][self.dims[i]] = sorted_measures[i][-1] - indices = self._occupied_indices[:self._num_occupied] - old_solution_batch = self._solution_arr[indices].copy() - old_objective_batch = self._objective_arr[indices].copy() - old_measures_batch = self._measures_arr[indices].copy() - old_metadata_batch = self._metadata_arr[indices].copy() + cur_data = self._store.data() ( new_solution_batch, @@ -382,10 +378,10 @@ def _remap(self): ArchiveBase.add( self, - np.concatenate((old_solution_batch, new_solution_batch)), - np.concatenate((old_objective_batch, new_objective_batch)), - np.concatenate((old_measures_batch, new_measures_batch)), - np.concatenate((old_metadata_batch, new_metadata_batch)), + np.concatenate((cur_data["solution"], new_solution_batch)), + np.concatenate((cur_data["objective"], new_objective_batch)), + np.concatenate((cur_data["measures"], new_measures_batch)), + np.concatenate((cur_data["metadata"], new_metadata_batch)), ) status, value = ArchiveBase.add_single(self, last_solution, diff --git a/ribs/archives/_transforms.py b/ribs/archives/_transforms.py new file mode 100644 index 000000000..f8b4f59bf --- /dev/null +++ b/ribs/archives/_transforms.py @@ -0,0 +1,283 @@ +"""Transform functions for :meth:`ribs.archives.ArrayStore.add`. + +This module is still fairly unstable, hence why it is private. We may make it +public in the future once it becomes more stable. +""" +import numpy as np +from numpy_groupies import aggregate_nb as aggregate + + +def single_entry_with_threshold(indices, new_data, add_info, extra_args, + occupied, cur_data): + """Transform function for adding a single entry. + + Assumptions: + + - ``indices`` and ``new_data`` have data for only one entry, e.g., + ``indices`` is length 1. + - ``new_data`` has an ``"objective"`` field and needs a ``"threshold"`` + field. + - ``extra_args`` contains ``"dtype"``, ``"threshold_min"``, and + ``"learning_rate"`` entries. + + In short, this transform checks if the objective exceeds the current + threshold, and if it does, it updates the threshold accordingly. There are + also some special cases to handle CMA-ME (as opposed to CMA-MAE) -- this + case corresponds to when ``threshold_min=-np.inf`` and ``learning_rate=1``. + + Since this transform operates on solutions one at a time, we do not + recommend it when performance is critical. Instead, it is included as a + relatively easy-to-modify example for users creating new archives. + """ + if len(indices) != 1: + raise ValueError("This transform only supports single solutions, but " + f"indices had a length of {len(indices)}.") + + dtype = extra_args["dtype"] # e.g., np.float32 or np.float64 + threshold_min = extra_args["threshold_min"] # scalar value + learning_rate = extra_args["learning_rate"] # scalar value + + cur_occupied = occupied[0] + + # Used for computing improvement value. + cur_threshold = cur_data["threshold"][0] + + # New solutions require special settings for the threshold. + if not cur_occupied: + # If threshold_min is -inf, then we want CMA-ME behavior, which will + # compute the improvement value w.r.t. zero for new solutions. + # Otherwise, we will compute w.r.t. threshold_min. + cur_threshold = (dtype(0) + if threshold_min == -np.inf else threshold_min) + + # Retrieve candidate objective. + objective = new_data["objective"][0] + + # Compute status and threshold. + add_info["status"] = np.array([0]) # NOT_ADDED + # In the case where we want CMA-ME behavior, threshold_arr[index] is -inf + # for new cells, which satisfies this if condition. + if ((not cur_occupied and threshold_min < objective) or + (cur_occupied and cur_threshold < objective)): + if cur_occupied: + add_info["status"] = np.array([1]) # IMPROVE_EXISTING + else: + add_info["status"] = np.array([2]) # NEW + + # This calculation works in the case where threshold_min is -inf because + # cur_threshold will be set to 0.0 instead. + new_data["threshold"] = [ + (cur_threshold * (1.0 - learning_rate) + objective * learning_rate) + ] + + # Value is the improvement over the current threshold (can be negative). + add_info["value"] = np.array([objective - cur_threshold]) + + if add_info["status"]: + return indices, new_data, add_info + else: + # new_data is ignored, so make it an empty dict. + return np.array([], dtype=np.int32), {}, add_info + + +def _compute_thresholds(indices, objective, cur_threshold, learning_rate, + dtype): + """Computes new thresholds. + + The indices, objective, and cur_threshold should all align. Based on these + values, we will compute an array that holds the new threshold. The new array + will have duplicate thresholds that correspond to duplicates in indices. + """ + if len(indices) == 0: + return np.array([], dtype=dtype) + + # Compute the number of objectives inserted into each cell. Note that we + # index with `indices` to place the counts at all relevant indices. For + # instance, if we had an array [1,2,3,1,5], we would end up with [2,1,1,2,1] + # (there are 2 1's, 1 2, 1 3, 2 1's, and 1 5). + # + # All objective_sizes should be > 0 since we only retrieve counts for + # indices in `indices`. + objective_sizes = aggregate(indices, 1, func="len", fill_value=0)[indices] + + # Compute the sum of the objectives inserted into each cell -- again, we + # index with `indices`. + objective_sums = aggregate(indices, + objective, + func="sum", + fill_value=np.nan)[indices] + + # Update the threshold with the batch update rule from Fontaine 2022: + # https://arxiv.org/abs/2205.10752 + # + # Unlike in single_entry_with_threshold, we do not need to worry about + # cur_threshold having -np.inf here as a result of threshold_min being + # -np.inf. This is because the case with threshold_min = -np.inf is handled + # separately since we compute the new threshold based on the max objective + # in each cell in that case. + ratio = dtype(1.0 - learning_rate)**objective_sizes + new_threshold = (ratio * cur_threshold + + (objective_sums / objective_sizes) * (1 - ratio)) + + return new_threshold + + +def batch_entries_with_threshold(indices, new_data, add_info, extra_args, + occupied, cur_data): + """Transform function for adding a batch of entries. + + Assumptions: + + - ``new_data`` has an ``"objective"`` field and needs a ``"threshold"`` + field. + - ``extra_args`` contains ``"dtype"``, ``"threshold_min"``, and + ``"learning_rate"`` entries. + + In short, this transform checks if the batch of solutions exceeds the + current thresholds of their cells. Among those that exceed the threshold, we + select the solution with the highest objective value. We also update the + threshold based on the batch update rule for CMA-MAE: + https://arxiv.org/abs/2205.10752 + + We also handle some special cases for CMA-ME -- this case corresponds to + when ``threshold_min=-np.inf`` and ``learning_rate=1``. + + Since this transform operates on solutions one at a time, we do not + recommend it when performance is critical. Instead, it is included as a + relatively easy-to-modify example for users creating new archives. + """ + dtype = extra_args["dtype"] + threshold_min = extra_args["threshold_min"] + learning_rate = extra_args["learning_rate"] + + batch_size = len(indices) + + cur_threshold = cur_data["threshold"] + cur_threshold[~occupied] = threshold_min # Default to threshold_min. + + # Compute status -- arrays below are all boolean arrays of length + # batch_size. + # + # In the case where we want CMA-ME behavior, the threshold defaults to -inf + # for new cells, which satisfies the condition for can_be_added. + can_be_added = new_data["objective"] > cur_threshold + is_new = can_be_added & ~occupied + improve_existing = can_be_added & occupied + add_info["status"] = np.zeros(batch_size, dtype=np.int32) + add_info["status"][is_new] = 2 + add_info["status"][improve_existing] = 1 + + # If threshold_min is -inf, then we want CMA-ME behavior, which will compute + # the improvement value of new solutions w.r.t zero. Otherwise, we will + # compute improvement with respect to threshold_min. + cur_threshold[is_new] = (dtype(0) + if threshold_min == -np.inf else threshold_min) + add_info["value"] = new_data["objective"] - cur_threshold + + # Return early if we cannot insert anything -- continuing would actually + # throw a ValueError in aggregate() since index[can_insert] would be empty. + can_insert = is_new | improve_existing + if not np.any(can_insert): + return np.array([], dtype=np.int32), {}, add_info + + # Select all solutions that can be inserted -- at this point, there are + # still conflicts in the insertions, e.g., multiple solutions can map to + # index 0. + indices = indices[can_insert] + new_data = {name: arr[can_insert] for name, arr in new_data.items()} + cur_threshold = cur_threshold[can_insert] + + # Compute the new threshold associated with each entry. + if threshold_min == -np.inf: + # Regular archive behavior, so the thresholds are just the objective. + new_threshold = new_data["objective"] + else: + # Batch threshold update described in Fontaine 2022 + # https://arxiv.org/abs/2205.10752 This computation is based on the mean + # objective of all solutions in the batch that could have been inserted + # into each cell. + new_threshold = _compute_thresholds(indices, new_data["objective"], + cur_threshold, learning_rate, dtype) + + # Retrieve indices of solutions that should be inserted into the archive. + # Currently, multiple solutions may be inserted at each archive index, but + # we only want to insert the maximum among these solutions. Thus, we obtain + # the argmax for each archive index. + # + # We use a fill_value of -1 to indicate archive indices that were not + # covered in the batch. Note that the length of archive_argmax is only + # max(indices), rather than the total number of grid cells. However, this is + # okay because we only need the indices of the solutions, which we store in + # should_insert. + # + # aggregate() always chooses the first item if there are ties, so the first + # elite will be inserted if there is a tie. See their default numpy + # implementation for more info: + # https://github.com/ml31415/numpy-groupies/blob/master/numpy_groupies/aggregate_numpy.py#L107 + archive_argmax = aggregate(indices, + new_data["objective"], + func="argmax", + fill_value=-1) + should_insert = archive_argmax[archive_argmax != -1] + + # Select only solutions that will be inserted into the archive. + indices = indices[should_insert] + new_data = {name: arr[should_insert] for name, arr in new_data.items()} + new_data["threshold"] = new_threshold[should_insert] + + return indices, new_data, add_info + + +def compute_objective_sum(indices, new_data, add_info, extra_args, occupied, + cur_data): + """Computes the new sum of objectives after inserting ``new_data``. + + Assumptions: + - ``new_data`` and ``cur_data`` have an ``"objective"`` field. + - ``extra_args`` contains ``"objective_sum"``, the current sum of + objectives. + + The new sum of objectives will be added to ``add_info`` with the key + ``"objective_sum"``. + + This transform should be placed near the end of a chain of transforms so + that it only considers solutions that are going to be inserted into the + store. + """ + cur_objective_sum = extra_args["objective_sum"] + if len(indices) == 0: + add_info["objective_sum"] = cur_objective_sum + else: + cur_objective = cur_data["objective"] + cur_objective[~occupied] = 0.0 # Unoccupied objectives should be 0. + add_info["objective_sum"] = ( + cur_objective_sum + np.sum(new_data["objective"] - cur_objective)) + return indices, new_data, add_info + + +def compute_best_index(indices, new_data, add_info, extra_args, occupied, + cur_data): + """Identifies the index of the best solution among those in new_data. + + Assumptions: + + - ``new_data`` has an ``"objective"`` field. + - The best solution will be the one with the highest objective value. + + The best index will be added to the ``add_info`` dict with the key + ``"best_index"``. If there is no best index, then ``"best_index"`` will be + None. + + This transform should be placed near the end of a chain of transforms so + that it only considers solutions that are going to be inserted into the + store. + """ + # pylint: disable = unused-argument + + if len(indices) == 0: + add_info["best_index"] = None + else: + item_idx = np.argmax(new_data["objective"]) + add_info["best_index"] = indices[item_idx] + + return indices, new_data, add_info diff --git a/tests/archives/archive_base_test.py b/tests/archives/archive_base_test.py index 40a880fc6..b1e1211f2 100644 --- a/tests/archives/archive_base_test.py +++ b/tests/archives/archive_base_test.py @@ -175,6 +175,13 @@ def test_best_elite(add_mode): else: archive.add([[1, 2, 3]], [1.0], [[0, 0]]) + assert archive.best_elite["solution"].shape == (3,) + assert archive.best_elite["objective"].shape == () + assert archive.best_elite["measures"].shape == (2,) + # Seem to be spurious pylint warnings. + # pylint: disable-next=use-implicit-booleaness-not-comparison,comparison-with-callable + assert archive.stats.obj_max.shape == () + assert np.isclose(archive.best_elite["solution"], [1, 2, 3]).all() assert np.isclose(archive.best_elite["objective"], 1.0) assert np.isclose(archive.best_elite["measures"], [0, 0]).all() @@ -246,7 +253,7 @@ def test_index_of_single(): def test_index_of_single_wrong_shape(data): with pytest.raises(ValueError): - data.archive.retrieve_single(data.measures[:-1]) + data.archive.index_of_single(data.measures[:-1]) # diff --git a/tests/archives/archive_threshold_update_test.py b/tests/archives/archive_threshold_update_test.py index 1a243eb92..6cdd70f46 100644 --- a/tests/archives/archive_threshold_update_test.py +++ b/tests/archives/archive_threshold_update_test.py @@ -3,16 +3,9 @@ import pytest from ribs.archives import GridArchive +from ribs.archives._transforms import _compute_thresholds -from .conftest import get_archive_data - -# pylint: disable = redefined-outer-name - - -@pytest.fixture -def data(): - """Data for grid archive tests.""" - return get_archive_data("GridArchive") +# pylint: disable = redefined-outer-name, missing-function-docstring def update_threshold(threshold, f_val, learning_rate): @@ -37,62 +30,51 @@ def calc_expected_threshold(additions, cell_value, learning_rate): @pytest.mark.parametrize("learning_rate", [0, 0.001, 0.01, 0.1, 1]) -def test_threshold_update_for_one_cell(data, learning_rate): - archive = data.archive - - threshold_arr = np.array([-3.1]) - objective_batch = np.array([0.1, 0.3, 0.9, 400.0, 42.0]) - index_batch = np.array([0, 0, 0, 0, 0]) - - # pylint: disable = protected-access - result_test, _ = archive._compute_new_thresholds(threshold_arr, - objective_batch, - index_batch, learning_rate) - result_true = calc_expected_threshold(objective_batch, threshold_arr[0], +def test_threshold_update_for_one_cell(learning_rate): + cur_threshold = np.full(5, -3.1) + objective = np.array([0.1, 0.3, 0.9, 400.0, 42.0]) + indices = np.zeros(5, dtype=np.int32) + + result_test = _compute_thresholds(indices, objective, cur_threshold, + learning_rate, np.float64) + result_true = calc_expected_threshold(objective, cur_threshold[0], learning_rate) - assert pytest.approx(result_test[0]) == result_true + # The result should have 5 duplicate entries with the new threshold. + assert result_test.shape == (5,) + assert np.all(np.isclose(result_test, result_true)) @pytest.mark.parametrize("learning_rate", [0, 0.001, 0.01, 0.1, 1]) -def test_threshold_update_for_multiple_cells(data, learning_rate): - archive = data.archive - - threshold_arr = np.array([-3.1, 0.4, 2.9]) - objective_batch = np.array([ +def test_threshold_update_for_multiple_cells(learning_rate): + cur_threshold = np.repeat([-3.1, 0.4, 2.9], 5) + objective = np.array([ 0.1, 0.3, 0.9, 400.0, 42.0, 0.44, 0.53, 0.51, 0.80, 0.71, 33.6, 61.78, 81.71, 83.48, 41.18 - ]) - index_batch = np.array([0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2]) - - # pylint: disable = protected-access - result_test, _ = archive._compute_new_thresholds(threshold_arr, - objective_batch, - index_batch, learning_rate) - - result_true = [ - calc_expected_threshold(objective_batch[5 * i:5 * (i + 1)], - threshold_arr[i], learning_rate) + ]) # 15 values. + indices = np.repeat([0, 1, 2], 5) + + result_test = _compute_thresholds(indices, objective, cur_threshold, + learning_rate, np.float64) + result_true = np.repeat([ + calc_expected_threshold(objective[5 * i:5 * (i + 1)], + cur_threshold[5 * i], learning_rate) for i in range(3) - ] + ], 5) + assert result_test.shape == (15,) assert np.all(np.isclose(result_test, result_true)) -def test_threshold_update_for_empty_objective_and_index(data): - archive = data.archive - - threshold_arr = np.array([-3.1, 0.4, 2.9]) - objective_batch = np.array([]) # Empty objective. - index_batch = np.array([]) # Empty index. +def test_threshold_update_for_empty_objective_and_index(): + cur_threshold = np.array([]) + objective = np.array([]) # Empty objective. + indices = np.array([], dtype=np.int32) # Empty index. - # pylint: disable = protected-access - new_threshold_batch, threshold_update_indices = ( - archive._compute_new_thresholds(threshold_arr, objective_batch, - index_batch, 0.1)) + new_threshold = _compute_thresholds(indices, objective, cur_threshold, 0.1, + np.float64) - assert new_threshold_batch.size == 0 - assert threshold_update_indices.size == 0 + assert new_threshold.shape == (0,) def test_init_learning_rate_and_threshold_min(): diff --git a/tests/archives/transforms_test.py b/tests/archives/transforms_test.py new file mode 100644 index 000000000..0ca1fda06 --- /dev/null +++ b/tests/archives/transforms_test.py @@ -0,0 +1,61 @@ +"""Tests for transforms.""" +import numpy as np + +from ribs.archives._transforms import compute_best_index, compute_objective_sum + + +def test_objective_sum(): + _, _, add_info = compute_objective_sum( + indices=np.array([0, 10, 6]), + new_data={"objective": np.array([1.0, 5.0, 3.0])}, + add_info={}, + extra_args={"objective_sum": 10.0}, + occupied=np.array([True, False, True]), + cur_data={"objective": np.array([0.0, 2.0, 4.0])}, + ) + + assert "objective_sum" in add_info + assert np.isclose(add_info["objective_sum"], + (10.0 + (1.0 - 0.0) + (5.0 - 0.0) + (3.0 - 4.0))) + + +def test_objective_sum_no_indices(): + _, _, add_info = compute_objective_sum( + indices=np.array([]), + new_data={"objective": np.array([])}, + add_info={}, + extra_args={"objective_sum": 10.0}, + occupied=np.array([]), + cur_data={"objective": np.array([])}, + ) + + assert "objective_sum" in add_info + assert add_info["objective_sum"] == 10.0 + + +def test_best_index(): + _, _, add_info = compute_best_index( + indices=np.array([0, 10, 6]), + new_data={"objective": np.array([1.0, 5.0, 3.0])}, + add_info={}, + extra_args={}, + occupied=np.array([True, False, True]), + cur_data={"objective": np.array([0.0, 2.0, 4.0])}, + ) + + assert "best_index" in add_info + assert add_info["best_index"] == 10 + + +def test_best_index_no_indices(): + _, _, add_info = compute_best_index( + indices=np.array([]), + new_data={"objective": np.array([])}, + add_info={}, + extra_args={}, + occupied=np.array([]), + cur_data={"objective": np.array([])}, + ) + + assert "best_index" in add_info + assert add_info["best_index"] is None From 299e56c74dd13300b49e3ad009816189066ff084 Mon Sep 17 00:00:00 2001 From: Bryon Tjanaka <38124174+btjanaka@users.noreply.github.com> Date: Wed, 8 Nov 2023 00:46:50 -0800 Subject: [PATCH 12/19] Add field_list property to ArrayStore (#407) ## Description The field_desc property can provide a bit too much info on the ArrayStore; we may only want to see the list of fields. This PR provides such a property. ## TODO - [x] Implement - [x] Document - [x] Test ## Questions ## Status - [x] I have read the guidelines in [CONTRIBUTING.md](https://github.com/icaros-usc/pyribs/blob/master/CONTRIBUTING.md) - [x] I have formatted my code using `yapf` - [x] I have tested my code by running `pytest` - [x] I have linted my code with `pylint` - [x] I have added a one-line description of my change to the changelog in `HISTORY.md` - [x] This PR is ready to go --- HISTORY.md | 2 +- ribs/archives/_array_store.py | 25 ++++++++++++++++++++----- tests/archives/array_store_test.py | 1 + 3 files changed, 22 insertions(+), 6 deletions(-) diff --git a/HISTORY.md b/HISTORY.md index d5d72c3fe..ee7b357f4 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -11,7 +11,7 @@ - **Backwards-incompatible:** Rename `measure_*` columns to `measures_*` in `as_pandas` ({pr}`396`) - Add ArrayStore data structure ({pr}`395`, {pr}`398`, {pr}`400`, {pr}`402`, - {pr}`403`, {pr}`404`, {pr}`406`) + {pr}`403`, {pr}`404`, {pr}`406`, {pr}`407`) - Add GradientOperatorEmitter to support OMG-MEGA and OG-MAP-Elites ({pr}`348`) #### Improvements diff --git a/ribs/archives/_array_store.py b/ribs/archives/_array_store.py index c2d295588..8667491c9 100644 --- a/ribs/archives/_array_store.py +++ b/ribs/archives/_array_store.py @@ -175,7 +175,7 @@ def occupied_list(self): @cached_property def field_desc(self): - """dict: Description of fields in the array store. + """dict: Description of fields in the store. Example: @@ -189,13 +189,28 @@ def field_desc(self): See the constructor ``field_desc`` parameter for more info. Unlike in the field_desc in the constructor, which accepts ints for 1D field shapes (e.g., ``5``), this field_desc shows 1D field shapes as tuples of - 1 entry (e.g., ``(5,)``). + 1 entry (e.g., ``(5,)``). Since dicts in Python are ordered, note that + this dict will have the same order as in the constructor. """ return { name: (arr.shape[1:], arr.dtype) for name, arr in self._fields.items() } + @cached_property + def field_list(self): + """list: List of fields in the store. + + Example: + + :: + + store.field_list == ["objective", "measures"] + """ + # Python dicts are ordered, so this will follow the same order as in the + # constructor. + return list(self._fields) + def retrieve(self, indices, fields=None, return_type="dict"): """Collects data at the given indices. @@ -337,10 +352,10 @@ def data(self, fields=None, return_type="dict"): Args: fields (array-like of str): See :meth:`retrieve`. + return_type (str): See :meth:`retrieve`. Returns: - dict or tuple: See ``data`` in :meth:`retrieve`. ``occupied`` is not - returned since all indices are known to be occupied in this - method. + See ``data`` in :meth:`retrieve`. ``occupied`` is not returned since + all indices are known to be occupied in this method. """ return self.retrieve(self.occupied_list, fields, return_type)[1] diff --git a/tests/archives/array_store_test.py b/tests/archives/array_store_test.py index 400023bbc..ac74e4084 100644 --- a/tests/archives/array_store_test.py +++ b/tests/archives/array_store_test.py @@ -52,6 +52,7 @@ def test_init(shape): "solution": ( (shape[2],) if isinstance(shape[2], int) else shape[2], np.float32), } + assert store.field_list == ["objective", "measures", "solution"] @pytest.fixture From e08c1cc02c980f6ce278fa2534b04dbb5c1cbada Mon Sep 17 00:00:00 2001 From: Bryon Tjanaka <38124174+btjanaka@users.noreply.github.com> Date: Wed, 8 Nov 2023 01:33:37 -0800 Subject: [PATCH 13/19] Include threshold in `archive.best_elite` (#409) ## Description Previously, we popped the threshold from best_elite to maintain backwards compatibility. This PR adds the threshold back so that best_elite aligns with the other archive methods. ## TODO - [x] Make tighter tests for best_elite - [x] Add threshold back to best_elite (i.e., no longer pop it) ## Questions ## Status - [x] I have read the guidelines in [CONTRIBUTING.md](https://github.com/icaros-usc/pyribs/blob/master/CONTRIBUTING.md) - [x] I have formatted my code using `yapf` - [x] I have tested my code by running `pytest` - [x] I have linted my code with `pylint` - [x] I have added a one-line description of my change to the changelog in `HISTORY.md` - [x] This PR is ready to go --- HISTORY.md | 1 + ribs/archives/_archive_base.py | 6 +++++- tests/archives/archive_base_test.py | 7 +++++++ 3 files changed, 13 insertions(+), 1 deletion(-) diff --git a/HISTORY.md b/HISTORY.md index ee7b357f4..5cb0d7ea2 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -6,6 +6,7 @@ #### API +- Include threshold in `archive.best_elite` ({pr}`409`) - **Backwards-incompatible:** Replace Elite and EliteBatch with dicts ({pr}`397`) - **Backwards-incompatible:** Rename `measure_*` columns to `measures_*` in diff --git a/ribs/archives/_archive_base.py b/ribs/archives/_archive_base.py index 342c83f00..1936941ee 100644 --- a/ribs/archives/_archive_base.py +++ b/ribs/archives/_archive_base.py @@ -164,6 +164,11 @@ def best_elite(self): the *threshold* of the cell they are being inserted into, not the *objective* of the elite currently in the cell. See :pr:`314` for more info. + + .. note:: + The best elite will contain a "threshold" key. This threshold is the + threshold of the best elite's cell after the best elite was inserted + into the archive. """ return self._best_elite @@ -268,7 +273,6 @@ def _stats_update(self, new_objective_sum, new_best_index): new_best_elite["objective"] > self._stats.obj_max): # Convert batched values to single values. new_best_elite = {k: v[0] for k, v in new_best_elite.items()} - new_best_elite.pop("threshold") new_obj_max = new_best_elite["objective"] self._best_elite = new_best_elite diff --git a/tests/archives/archive_base_test.py b/tests/archives/archive_base_test.py index b1e1211f2..51fd2ef90 100644 --- a/tests/archives/archive_base_test.py +++ b/tests/archives/archive_base_test.py @@ -175,9 +175,14 @@ def test_best_elite(add_mode): else: archive.add([[1, 2, 3]], [1.0], [[0, 0]]) + assert archive.best_elite.keys() == { + "solution", "objective", "measures", "metadata", "threshold", "index" + } + assert archive.best_elite["solution"].shape == (3,) assert archive.best_elite["objective"].shape == () assert archive.best_elite["measures"].shape == (2,) + assert archive.best_elite["threshold"].shape == () # Seem to be spurious pylint warnings. # pylint: disable-next=use-implicit-booleaness-not-comparison,comparison-with-callable assert archive.stats.obj_max.shape == () @@ -185,6 +190,7 @@ def test_best_elite(add_mode): assert np.isclose(archive.best_elite["solution"], [1, 2, 3]).all() assert np.isclose(archive.best_elite["objective"], 1.0) assert np.isclose(archive.best_elite["measures"], [0, 0]).all() + assert np.isclose(archive.best_elite["threshold"], 1.0).all() assert np.isclose(archive.stats.obj_max, 1.0) # Add an elite into the same cell as the previous elite -- best_elite should @@ -197,6 +203,7 @@ def test_best_elite(add_mode): assert np.isclose(archive.best_elite["solution"], [4, 5, 6]).all() assert np.isclose(archive.best_elite["objective"], 2.0).all() assert np.isclose(archive.best_elite["measures"], [0, 0]).all() + assert np.isclose(archive.best_elite["threshold"], 2.0).all() assert np.isclose(archive.stats.obj_max, 2.0) From 8e13081a4063ac274b48f86c9863873417699ad2 Mon Sep 17 00:00:00 2001 From: Bryon Tjanaka <38124174+btjanaka@users.noreply.github.com> Date: Thu, 9 Nov 2023 11:58:19 -0800 Subject: [PATCH 14/19] Support single fields in ArrayStore.retrieve (#411) ## Description This adds a shortcut whereby a single array can be retrieved from an ArrayStore. For instance, `occupied, objective = store.retrieve("objective")`. This call also extends to the data method, e.g., `objective = store.retrieve("objective")`. ## TODO ## Questions ## Status - [x] I have read the guidelines in [CONTRIBUTING.md](https://github.com/icaros-usc/pyribs/blob/master/CONTRIBUTING.md) - [x] I have formatted my code using `yapf` - [x] I have tested my code by running `pytest` - [x] I have linted my code with `pylint` - [x] I have added a one-line description of my change to the changelog in `HISTORY.md` - [x] This PR is ready to go --- HISTORY.md | 2 +- ribs/archives/_array_store.py | 33 ++++++++++++++++++++---------- tests/archives/array_store_test.py | 18 ++++++++++++++++ 3 files changed, 41 insertions(+), 12 deletions(-) diff --git a/HISTORY.md b/HISTORY.md index 5cb0d7ea2..673a85786 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -12,7 +12,7 @@ - **Backwards-incompatible:** Rename `measure_*` columns to `measures_*` in `as_pandas` ({pr}`396`) - Add ArrayStore data structure ({pr}`395`, {pr}`398`, {pr}`400`, {pr}`402`, - {pr}`403`, {pr}`404`, {pr}`406`, {pr}`407`) + {pr}`403`, {pr}`404`, {pr}`406`, {pr}`407`, {pr}`411`) - Add GradientOperatorEmitter to support OMG-MEGA and OG-MAP-Elites ({pr}`348`) #### Improvements diff --git a/ribs/archives/_array_store.py b/ribs/archives/_array_store.py index 8667491c9..469c414c0 100644 --- a/ribs/archives/_array_store.py +++ b/ribs/archives/_array_store.py @@ -216,11 +216,12 @@ def retrieve(self, indices, fields=None, return_type="dict"): Args: indices (array-like): List of indices at which to collect data. - fields (array-like of str): List of fields to include. By default, - all fields will be included, with an additional "index" as the - last field ("index" can also be placed anywhere in this list). + fields (str or array-like of str): List of fields to include. By + default, all fields will be included, with an additional "index" + as the last field ("index" can also be placed anywhere in this + list). This can also be a single str indicating a field name. return_type (str): Type of data to return. See the ``data`` returned - below. + below. Ignored if ``fields`` is a str. Returns: tuple: 2-element tuple consisting of: @@ -235,8 +236,10 @@ def retrieve(self, indices, fields=None, return_type="dict"): not occupied, then the 6.0 returned in the ``dict`` example below should be ignored. - - **data**: The data at the given indices. This can take the - following forms, depending on the ``return_type`` argument: + - **data**: The data at the given indices. If ``fields`` was a + single str, this will just be an array holding data for the given + field. Otherwise, this data can take the following forms, + depending on the ``return_type`` argument: - ``return_type="dict"``: Dict mapping from the field name to the field data at the given indices. For instance, if we have an @@ -296,18 +299,24 @@ def retrieve(self, indices, fields=None, return_type="dict"): ValueError: Invalid field name provided. ValueError: Invalid return_type provided. """ + single_field = isinstance(fields, str) indices = np.asarray(indices, dtype=np.int32) occupied = self._props["occupied"][indices] # Induces copy. - if return_type in ("dict", "pandas"): + if single_field: + data = None + elif return_type in ("dict", "pandas"): data = {} elif return_type == "tuple": data = [] else: raise ValueError(f"Invalid return_type {return_type}.") - fields = (itertools.chain(self._fields, ["index"]) - if fields is None else fields) + if single_field: + fields = [fields] + elif fields is None: + fields = itertools.chain(self._fields, ["index"]) + for name in fields: # Collect array data. # @@ -321,7 +330,9 @@ def retrieve(self, indices, fields=None, return_type="dict"): raise ValueError(f"`{name}` is not a field in this ArrayStore.") # Accumulate data into the return type. - if return_type == "dict": + if single_field: + data = arr + elif return_type == "dict": data[name] = arr elif return_type == "tuple": data.append(arr) @@ -351,7 +362,7 @@ def data(self, fields=None, return_type="dict"): Equivalent to calling :meth:`retrieve` with :attr:`occupied_list`. Args: - fields (array-like of str): See :meth:`retrieve`. + fields (str or array-like of str): See :meth:`retrieve`. return_type (str): See :meth:`retrieve`. Returns: See ``data`` in :meth:`retrieve`. ``occupied`` is not returned since diff --git a/tests/archives/array_store_test.py b/tests/archives/array_store_test.py index ac74e4084..3bb00d7d3 100644 --- a/tests/archives/array_store_test.py +++ b/tests/archives/array_store_test.py @@ -309,6 +309,24 @@ def test_retrieve_custom_fields(store, return_type): assert np.all(df["objective"] == [2.0, 1.0]) +def test_retrieve_single_field(store): + store.add( + [3, 5], + { + "objective": [1.0, 2.0], + "measures": [[1.0, 2.0], [3.0, 4.0]], + "solution": [np.zeros(10), np.ones(10)], + }, + {}, # Empty extra_args. + [], # Empty transforms. + ) + + occupied, data = store.retrieve([5, 3], fields="objective") + + assert np.all(occupied == [True, True]) + assert np.all(data == [2.0, 1.0]) + + def test_add_simple_transform(store): def obj_meas(indices, new_data, add_info, extra_args, occupied, cur_data): From 0bb298f62eb7aaee50970216fb4e67f635429be7 Mon Sep 17 00:00:00 2001 From: Henry Chen <71111859+HenryChen4@users.noreply.github.com> Date: Thu, 9 Nov 2023 16:55:34 -0800 Subject: [PATCH 15/19] Add centroid benchmarking (#405) ## Description ## TODO - [ ] TODO 1 ## Questions - [ ] Question 1 ## Status - [ ] I have read the guidelines in [CONTRIBUTING.md](https://github.com/icaros-usc/pyribs/blob/master/CONTRIBUTING.md) - [ ] I have formatted my code using `yapf` - [ ] I have tested my code by running `pytest` - [ ] I have linted my code with `pylint` - [ ] I have added a one-line description of my change to the changelog in `HISTORY.md` - [ ] This PR is ready to go --- benchmarks/benchmark.py | 92 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 92 insertions(+) create mode 100644 benchmarks/benchmark.py diff --git a/benchmarks/benchmark.py b/benchmarks/benchmark.py new file mode 100644 index 000000000..b244cdd2f --- /dev/null +++ b/benchmarks/benchmark.py @@ -0,0 +1,92 @@ +"""Quantifies the performance of different centroid generation techniques + +To measure how well a generation technique, i.e., random centroids, CVT, etc, +performs, we measure the probability of generating a random point within a +certain region defined by the centroid of that region. + +The equations for this benchmark can be found in Mouret 2023: +https://dl.acm.org/doi/pdf/10.1145/3583133.3590726. + +Usage: + python benchmarks.py + +This script will generate centroids using 2 techniques, CVT and random +generation. These centroids will then be evaluated by the get_score() +function which will output a probability score between [0, 1]. +""" + +import numpy as np +from scipy.spatial import distance + +from ribs.archives import CVTArchive + + +def get_score(centroids, num_samples, seed): + """Returns the performance of generated centroids + + Args: + centroids (numpy.ndarray): centroids being evaluated + num_samples (int): number of random points generated + seed (int): RNG seed + + Returns: + float: probability a sampled point hits a region + + """ + + num_centroids = centroids.shape[0] + centroid_dim = centroids.shape[1] + + rng = np.random.default_rng(seed=seed) + random_samples = rng.random(size=(num_samples, centroid_dim)) + + num_closest_pts = np.zeros(num_centroids) + + closest_idx = distance.cdist(random_samples, centroids).argmin(axis=1) + + for idx in closest_idx: + num_closest_pts[idx] += 1 + # Note: The method in the paper detailed the additional division of + # centroid_vol by num_samples. We did not include that here, however + # results remain similar to the paper's. + + centroid_vol = num_closest_pts / num_samples + + score = np.sum(np.abs(centroid_vol - 1 / num_centroids)) + + return score + + +def main(): + """main() function that benchmarks 6 different centroid generation + techniques used in the aforementioned paper. + """ + + score_seed = 1 + num_samples = 10000 + archive = CVTArchive( + solution_dim=20, + cells=512, + ranges=[(0., 1.), (0., 1.)], + ) + cvt_centroids = archive.centroids + print( + "Score for CVT generation: ", + get_score(centroids=cvt_centroids, + num_samples=num_samples, + seed=score_seed)) + + centroid_gen_seed = 100 + num_centroids = 1024 + dim = 2 + rng = np.random.default_rng(seed=centroid_gen_seed) + random_centroids = rng.random((num_centroids, dim)) + print( + "Score for random generation: ", + get_score(centroids=random_centroids, + num_samples=num_samples, + seed=score_seed)) + + +if __name__ == "__main__": + main() From c26b63a64f88f2028970503e030dd92c83dd8747 Mon Sep 17 00:00:00 2001 From: Bryon Tjanaka <38124174+btjanaka@users.noreply.github.com> Date: Thu, 9 Nov 2023 19:50:27 -0800 Subject: [PATCH 16/19] Add field_list and data methods to archives (#412) ## Description The overall goal of this PR is to make it easier to access the data contained in each archive. ## TODO - [x] Introduce a `data()` method that returns the archive data in many forms -> this method primarily passes calls to `ArrayStore.data` - [x] Test data() by modifying old as_pandas tests (we do not place too much emphasis on testing since ArrayStore.data is already tested fairly thoroughly) - [x] Add a `field_list` method that shows the list of all fields in the archive ## Questions ## Status - [x] I have read the guidelines in [CONTRIBUTING.md](https://github.com/icaros-usc/pyribs/blob/master/CONTRIBUTING.md) - [x] I have formatted my code using `yapf` - [x] I have tested my code by running `pytest` - [x] I have linted my code with `pylint` - [x] I have added a one-line description of my change to the changelog in `HISTORY.md` - [x] This PR is ready to go --- HISTORY.md | 1 + ribs/archives/_archive_base.py | 89 ++++++++++++++++++++++++++++- tests/archives/archive_base_test.py | 50 ++++++++-------- 3 files changed, 112 insertions(+), 28 deletions(-) diff --git a/HISTORY.md b/HISTORY.md index 673a85786..df9070f2d 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -6,6 +6,7 @@ #### API +- Add field_list and data methods to archives ({pr}`412`) - Include threshold in `archive.best_elite` ({pr}`409`) - **Backwards-incompatible:** Replace Elite and EliteBatch with dicts ({pr}`397`) diff --git a/ribs/archives/_archive_base.py b/ribs/archives/_archive_base.py index 1936941ee..843c5232a 100644 --- a/ribs/archives/_archive_base.py +++ b/ribs/archives/_archive_base.py @@ -16,7 +16,8 @@ single_entry_with_threshold) -class ArchiveBase(ABC): # pylint: disable = too-many-instance-attributes +class ArchiveBase(ABC): + # pylint: disable = too-many-instance-attributes, too-many-public-methods """Base class for archives. This class composes archives using an :class:`ArrayStore` that has @@ -110,6 +111,11 @@ def __init__(self, capacity=self._cells, ) + @property + def field_list(self): + """list: List of data fields in the archive.""" + return self._store.field_list + @property def cells(self): """int: Total number of cells in the archive.""" @@ -640,6 +646,87 @@ def sample_elites(self, n): _, elites = self._store.retrieve(selected_indices) return elites + def data(self, fields=None, return_type="dict"): + """Retrieves data for all elites in the archive. + + Args: + fields (array-like of str): List of fields to include. By default, + all fields will be included (see :attr:`field_list`), with an + additional "index" as the last field ("index" can also be placed + anywhere in this list). + return_type (str): Type of data to return. See below. + + Returns: + The data at the given indices. This can take the following forms, + depending on the ``return_type`` argument: + + - ``return_type="dict"``: Dict mapping from the field name to the + field data at the given indices. An example is:: + + { + "solution": [[1.0, 1.0, ...], ...], + "objective": [1.5, ...], + "measures": [[1.0, 2.0], ...], + "threshold": [0.8, ...], + "index": [4, ...], + } + + Observe that we also return the indices as an ``index`` entry in + the dict. The keys in this dict can be modified with the + ``fields`` arg; duplicate fields will be ignored since the dict + stores unique keys. + + - ``return_type="tuple"``: Tuple of arrays matching the field order + given in ``fields``. For instance, if ``fields`` was + ``["objective", "measures"]``, we would receive a tuple of + ``(objective_arr, measures_arr)``. In this case, the results + from ``retrieve`` could be unpacked as:: + + objective, measures = archive.data(["objective", "measures"]) + + Unlike with the ``dict`` return type, duplicate fields will show + up as duplicate entries in the tuple, e.g., + ``fields=["objective", "objective"]`` will result in two + objective arrays being returned. + + By default, (i.e., when ``fields=None``), the fields in the tuple + will be ordered according to the :attr:`field_list` along with + ``index`` as the last field. + + - ``return_type="pandas"``: A + :class:`~ribs.archives.ArchiveDataFrame` with the following + columns: + + - For fields that are scalars, a single column with the field + name. For example, ``objective`` would have a single column + called ``objective``. + - For fields that are 1D arrays, multiple columns with the name + suffixed by its index. For instance, if we have a ``measures`` + field of length 10, we create 10 columns with names + ``measures_0``, ``measures_1``, ..., ``measures_9``. We do not + currently support fields with >1D data. + - 1 column of integers (``np.int32``) for the index, named + ``index``. + + In short, the dataframe might look like this by default: + + +------------+------+-----------+------------+------+-----------+-------+ + | solution_0 | ... | objective | measures_0 | ... | threshold | index | + +============+======+===========+============+======+===========+=======+ + | | ... | | | ... | | | + +------------+------+-----------+------------+------+-----------+-------+ + + Like the other return types, the columns can be adjusted with + the ``fields`` parameter. + + All data returned by this method will be a copy, i.e., the data will + not update as the archive changes. + """ # pylint: disable = line-too-long + data = self._store.data(fields, return_type) + if return_type == "pandas": + data = ArchiveDataFrame(data) + return data + def as_pandas(self, include_solutions=True, include_metadata=False): """Converts the archive into an :class:`ArchiveDataFrame` (a child class of :class:`pandas.DataFrame`). diff --git a/tests/archives/archive_base_test.py b/tests/archives/archive_base_test.py index 51fd2ef90..84a50ef13 100644 --- a/tests/archives/archive_base_test.py +++ b/tests/archives/archive_base_test.py @@ -316,6 +316,12 @@ def test_qd_score_offset_correct(data): assert data.archive.qd_score_offset == 0.0 # Default value. +def test_field_list_correct(data): + assert data.archive.field_list == [ + "solution", "objective", "measures", "metadata", "threshold" + ] + + def test_basic_stats(data): assert data.archive.stats.num_elites == 0 assert data.archive.stats.coverage == 0.0 @@ -395,36 +401,27 @@ def test_sample_elites_fails_when_empty(data): @pytest.mark.parametrize("name", ARCHIVE_NAMES) @pytest.mark.parametrize("with_elite", [True, False], ids=["nonempty", "empty"]) -@pytest.mark.parametrize("include_solutions", [True, False], - ids=["solutions", "no_solutions"]) -@pytest.mark.parametrize("include_metadata", [True, False], - ids=["metadata", "no_metadata"]) @pytest.mark.parametrize("dtype", [np.float64, np.float32], ids=["float64", "float32"]) -def test_as_pandas(name, with_elite, include_solutions, include_metadata, - dtype): +def test_pandas_data(name, with_elite, dtype): data = get_archive_data(name, dtype) # Set up expected columns and data types. - measure_cols = [f"measures_{i}" for i in range(len(data.measures))] - expected_cols = ["index"] + measure_cols + ["objective"] - expected_dtypes = [np.int32, *[dtype for _ in measure_cols], dtype] - if include_solutions: - solution_cols = [f"solution_{i}" for i in range(len(data.solution))] - expected_cols += solution_cols - expected_dtypes += [dtype for _ in solution_cols] - if include_metadata: - expected_cols.append("metadata") - expected_dtypes.append(object) + solution_dim = len(data.solution) + measure_dim = len(data.measures) + expected_cols = ([f"solution_{i}" for i in range(solution_dim)] + + ["objective"] + + [f"measures_{i}" for i in range(measure_dim)] + + ["metadata", "threshold", "index"]) + expected_dtypes = ([dtype for _ in range(solution_dim)] + [dtype] + + [dtype for _ in range(measure_dim)] + + [object, dtype, np.int32]) # Retrieve the dataframe. if with_elite: - df = data.archive_with_elite.as_pandas( - include_solutions=include_solutions, - include_metadata=include_metadata) + df = data.archive_with_elite.data(return_type="pandas") else: - df = data.archive.as_pandas(include_solutions=include_solutions, - include_metadata=include_metadata) + df = data.archive.data(return_type="pandas") # Check columns and data types. assert (df.columns == expected_cols).all() @@ -441,9 +438,8 @@ def test_as_pandas(name, with_elite, include_solutions, include_metadata, assert df.loc[0, "index"] == data.archive.grid_to_int_index( [data.grid_indices])[0] - expected_data = [*data.measures, data.objective] - if include_solutions: - expected_data += list(data.solution) - if include_metadata: - expected_data.append(data.metadata) - assert (df.loc[0, "measures_0":] == expected_data).all() + expected_data = [ + *data.solution, data.objective, *data.measures, data.metadata, + data.objective + ] + assert (df.loc[0, :"threshold"] == expected_data).all() From 122378c95c679ae7de97083c903a573baad54f25 Mon Sep 17 00:00:00 2001 From: Bryon Tjanaka <38124174+btjanaka@users.noreply.github.com> Date: Thu, 9 Nov 2023 21:32:37 -0800 Subject: [PATCH 17/19] Replace ArchiveDataFrame batch methods with `get_field` (#413) --- HISTORY.md | 2 + docs/_templates/autosummary/class.rst | 6 +- ribs/archives/_archive_data_frame.py | 160 +++++++----------- ribs/visualize/_cvt_archive_3d_plot.py | 6 +- ribs/visualize/_cvt_archive_heatmap.py | 6 +- ribs/visualize/_grid_archive_heatmap.py | 8 +- ribs/visualize/_parallel_axes_plot.py | 4 +- .../_sliding_boundaries_archive_heatmap.py | 4 +- tests/archives/archive_data_frame_test.py | 61 ++++--- tests/archives/grid_archive_test.py | 23 +-- tutorials/lsi_mnist.ipynb | 13 +- 11 files changed, 126 insertions(+), 167 deletions(-) diff --git a/HISTORY.md b/HISTORY.md index df9070f2d..8f9d2c490 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -6,6 +6,8 @@ #### API +- **Backwards-incompatible:** Replace ArchiveDataFrame batch methods with + `get_field` ({pr}`413`) - Add field_list and data methods to archives ({pr}`412`) - Include threshold in `archive.best_elite` ({pr}`409`) - **Backwards-incompatible:** Replace Elite and EliteBatch with dicts diff --git a/docs/_templates/autosummary/class.rst b/docs/_templates/autosummary/class.rst index a39703926..a702a7708 100644 --- a/docs/_templates/autosummary/class.rst +++ b/docs/_templates/autosummary/class.rst @@ -21,11 +21,7 @@ .. autosummary:: {% if name == "ArchiveDataFrame" %} - ~{{ name }}.solution_batch - ~{{ name }}.objective_batch - ~{{ name }}.measures_batch - ~{{ name }}.index_batch - ~{{ name }}.metadata_batch + ~{{ name }}.get_field ~{{ name }}.iterelites {% else %} {% for item in all_methods %} diff --git a/ribs/archives/_archive_data_frame.py b/ribs/archives/_archive_data_frame.py index c1b4608bd..ade04fb35 100644 --- a/ribs/archives/_archive_data_frame.py +++ b/ribs/archives/_archive_data_frame.py @@ -1,5 +1,6 @@ """Provides ArchiveDataFrame.""" -import numpy as np +import re + import pandas as pd # Developer Notes: @@ -20,10 +21,10 @@ class ArchiveDataFrame(pd.DataFrame): Example: - This object is created by :meth:`~ArchiveBase.as_pandas` (i.e. users + This object is created by :meth:`~ArchiveBase.data` (i.e. users typically do not create it on their own):: - df = archive.as_pandas() + df = archive.data(..., return_type="pandas") To iterate through every elite as a dict, use:: @@ -32,26 +33,25 @@ class ArchiveDataFrame(pd.DataFrame): elite["objective"] ... - There are also methods to access the solutions, objectives, etc. of - all elites in the archive. For instance, the following is an array - where entry ``i`` contains the measures of the ``i``'th elite in the - DataFrame:: + Arrays corresponding to individual fields can be accessed with + :meth:`get_field`. For instance, the following is an array where entry + ``i`` contains the measures of the ``i``'th elite in the DataFrame:: - df.measures_batch() + df.get_field("measures") .. warning:: - Accessing ``batch`` methods (e.g. :meth:`measures_batch`) always - creates a copy, so the following will copy the measures 3 times:: + Calling :meth:`get_field` always creates a copy, so the following will + copy the measures 3 times:: - df.measures_batch()[0] - df.measures_batch().mean() - df.measures_batch().median() + df.get_field("measures")[0] + df.get_field("measures").mean() + df.get_field("measures").median() **Thus, if you need to use the method several times, we recommend storing it first, like so**:: - measures_batch = df.measures_batch() + measures_batch = df.get_field("measures") measures_batch[0] measures_batch.mean() measures_batch.median() @@ -67,10 +67,10 @@ class ArchiveDataFrame(pd.DataFrame): .. note:: - All the ``batch`` methods "align" with each other -- i.e. - ``measures_batch()[i]`` corresponds to ``index_batch()[i]``, - ``metadata_batch()[i]``, ``objective_batch()[i]``, and - ``solution_batch()[i]``. + Results of :meth:`get_field` "align" with each other -- e.g. + ``get_field("measures")[i]`` corresponds to ``get_field("index")[i]``, + ``get_field("metadata")[i]``, ``get_field("objective")[i]``, and + ``get_field("solution")[i]``. """ def __init__(self, *args, **kwargs): @@ -81,91 +81,55 @@ def _constructor(self): return ArchiveDataFrame def iterelites(self): - """Iterator that outputs every elite in the ArchiveDataFrame. - - Data which is unavailable will be turned into None. For example, if - there are no solution columns, then ``elite["solution"]`` will be None. + """Iterator that outputs every elite in the ArchiveDataFrame as a dict. """ - solution_batch = self.solution_batch() - objective_batch = self.objective_batch() - measures_batch = self.measures_batch() - index_batch = self.index_batch() - metadata_batch = self.metadata_batch() - - none_array = np.empty(len(self), dtype=object) + # Identify fields in the data frame. There are some edge cases here, + # such as if someone purposely names their field with an underscore and + # a number at the end like "foobar_0", but it covers most cases. + fields = {} + for col in self: + split = col.split("_") + if len(split) == 1: + # Single column. + fields[col] = None + elif split[-1].isdigit(): + # If the last item in the split is numerical, this should match + # vector fields like "measures_0". + + # Exclude last val and underscore - note negative sign. + field_name = col[:-(len(split[-1]) + 1)] + + fields[field_name] = None + else: + fields[col] = None + + # Retrieve field data. + for name in fields: + fields[name] = self.get_field(name) + + n_elites = len(self) return map( - lambda e: { - "solution": e[0], - "objective": e[1], - "measures": e[2], - "index": e[3], - "metadata": e[4], + lambda i: { + name: arr[i] for name, arr in fields.items() }, - zip( - none_array if solution_batch is None else solution_batch, - none_array if objective_batch is None else objective_batch, - none_array if measures_batch is None else measures_batch, - none_array if index_batch is None else index_batch, - none_array if metadata_batch is None else metadata_batch, - ), + range(n_elites), ) - # Note: The slices for batch methods cannot be pre-computed because the - # DataFrame columns might change in-place, e.g. when a column is deleted. - - def solution_batch(self): - """Array with solutions of all elites. - - None if there are no solutions (e.g. if ``include_solutions=False`` in - :meth:`~ArchiveBase.as_pandas`). - - Returns: - (n, solution_dim) numpy.ndarray: See above. - """ - cols = [c for c in self if c.startswith("solution_")] - return self[cols].to_numpy(copy=True) if cols else None - - def objective_batch(self): - """Array with objective values of all elites. - - None if there are no objectives in the ``ArchiveDataFrame``. - - Returns: - (n,) numpy.ndarray: See above. - """ - return self["objective"].to_numpy( - copy=True) if "objective" in self else None - - def measures_batch(self): - """Array with measures of all elites. - - None if there are no measures in the ``ArchiveDataFrame``. - - Returns: - (n, measure_dim) numpy.ndarray: See above. - """ - cols = [c for c in self if c.startswith("measures_")] - return self[cols].to_numpy(copy=True) if cols else None - - def index_batch(self): - """Array with indices of all elites. - - None if there are no indices in the ``ArchiveDataFrame``. - - Returns: - (n,) numpy.ndarray: See above. - """ - return self["index"].to_numpy(copy=True) if "index" in self else None - - def metadata_batch(self): - """Array with metadata of all elites. - - None if there is no metadata (e.g. if ``include_metadata=False`` in - :meth:`~ArchiveBase.as_pandas`). + def get_field(self, field): + """Array holding the data for the given field. - Returns: - (n,) numpy.ndarray: See above. + None if there is no data for the field. """ - return self["metadata"].to_numpy( - copy=True) if "metadata" in self else None + # Note: The column names cannot be pre-computed because the DataFrame + # columns might change in-place, e.g., when a column is deleted. + + if field in self: + # Scalar field -- e.g., "objective" + return self[field].to_numpy(copy=True) + else: + # Vector field -- e.g., field="measures" and we want columns like + # "measures_0" and "measures_1" + field_re = f"{field}_\\d+" + cols = [c for c in self if re.fullmatch(field_re, c)] + return self[cols].to_numpy(copy=True) if cols else None diff --git a/ribs/visualize/_cvt_archive_3d_plot.py b/ribs/visualize/_cvt_archive_3d_plot.py index 31e2dddfa..69f565c2f 100644 --- a/ribs/visualize/_cvt_archive_3d_plot.py +++ b/ribs/visualize/_cvt_archive_3d_plot.py @@ -218,8 +218,8 @@ def cvt_archive_3d_plot( # Retrieve archive data. df = archive.as_pandas() if df is None else validate_df(df) - objective_batch = df.objective_batch() - measures_batch = df.measures_batch() + objective_batch = df.get_field("objective") + measures_batch = df.get_field("measures") lower_bounds = archive.lower_bounds upper_bounds = archive.upper_bounds centroids = archive.centroids @@ -297,7 +297,7 @@ def cvt_archive_3d_plot( objs = [] # Also record objective for each ridge so we can color it. # Map from centroid index to objective. - pt_to_obj = dict(zip(df.index_batch(), objective_batch)) + pt_to_obj = dict(zip(df.get_field("index"), objective_batch)) # The points in the Voronoi diagram are indexed by their placement in the # input list. Above, when we called Voronoi, `centroids` were placed first, diff --git a/ribs/visualize/_cvt_archive_heatmap.py b/ribs/visualize/_cvt_archive_heatmap.py index 0648dbb90..50b2b8b41 100644 --- a/ribs/visualize/_cvt_archive_heatmap.py +++ b/ribs/visualize/_cvt_archive_heatmap.py @@ -220,10 +220,10 @@ def cvt_archive_heatmap(archive, inv_idx[x] = i # We only want inverse indexes that are actually used in the archive. - selected_inv_idx = inv_idx[df.index_batch()] + selected_inv_idx = inv_idx[df.get_field("index")] cell_objectives = np.full(archive.cells, np.nan) - cell_objectives[selected_inv_idx] = df.objective_batch() + cell_objectives[selected_inv_idx] = df.get_field("objective") ax = archive_heatmap_1d(archive, cell_boundaries, cell_objectives, ax, cmap, aspect, vmin, vmax, cbar, cbar_kwargs, @@ -288,7 +288,7 @@ def cvt_archive_heatmap(archive, # the region index of each point. region_obj = [None] * len(vor.regions) min_obj, max_obj = np.inf, -np.inf - pt_to_obj = dict(zip(df.index_batch(), df.objective_batch())) + pt_to_obj = dict(zip(df.get_field("index"), df.get_field("objective"))) for pt_idx, region_idx in enumerate( vor.point_region[:-4]): # Exclude faraway_pts. if region_idx != -1 and pt_idx in pt_to_obj: diff --git a/ribs/visualize/_grid_archive_heatmap.py b/ribs/visualize/_grid_archive_heatmap.py index 703f713bd..d7bdbb7c6 100644 --- a/ribs/visualize/_grid_archive_heatmap.py +++ b/ribs/visualize/_grid_archive_heatmap.py @@ -151,8 +151,8 @@ def grid_archive_heatmap(archive, if archive.measure_dim == 1: cell_objectives = np.full(archive.cells, np.nan) - cell_idx = archive.int_to_grid_index(df.index_batch()).squeeze() - cell_objectives[cell_idx] = df.objective_batch() + cell_idx = archive.int_to_grid_index(df.get_field("index")).squeeze() + cell_objectives[cell_idx] = df.get_field("objective") archive_heatmap_1d( archive, @@ -171,7 +171,7 @@ def grid_archive_heatmap(archive, elif archive.measure_dim == 2: # Retrieve data from archive. - objective_batch = df.objective_batch() + objective_batch = df.get_field("objective") lower_bounds = archive.lower_bounds upper_bounds = archive.upper_bounds x_dim, y_dim = archive.dims @@ -180,7 +180,7 @@ def grid_archive_heatmap(archive, # Color for each cell in the heatmap. colors = np.full((y_dim, x_dim), np.nan) - grid_index_batch = archive.int_to_grid_index(df.index_batch()) + grid_index_batch = archive.int_to_grid_index(df.get_field("index")) colors[grid_index_batch[:, 1], grid_index_batch[:, 0]] = objective_batch if transpose_measures: diff --git a/ribs/visualize/_parallel_axes_plot.py b/ribs/visualize/_parallel_axes_plot.py index 28b57ca45..3b4627964 100644 --- a/ribs/visualize/_parallel_axes_plot.py +++ b/ribs/visualize/_parallel_axes_plot.py @@ -169,8 +169,8 @@ def parallel_axes_plot(archive, norm = matplotlib.colors.Normalize(vmin=vmin, vmax=vmax, clip=True) if sort_archive: df.sort_values("objective", inplace=True) - objectives = df.objective_batch() - ys = df.measures_batch()[:, cols] + objectives = df.get_field("objective") + ys = df.get_field("measures")[:, cols] y_ranges = upper_bounds - lower_bounds # Transform all data to be in the first axis coordinates. diff --git a/ribs/visualize/_sliding_boundaries_archive_heatmap.py b/ribs/visualize/_sliding_boundaries_archive_heatmap.py index 207150cc1..25c575f46 100644 --- a/ribs/visualize/_sliding_boundaries_archive_heatmap.py +++ b/ribs/visualize/_sliding_boundaries_archive_heatmap.py @@ -120,7 +120,7 @@ def sliding_boundaries_archive_heatmap(archive, # Retrieve archive data. df = archive.as_pandas() if df is None else validate_df(df) - measures_batch = df.measures_batch() + measures_batch = df.get_field("measures") x = measures_batch[:, 0] y = measures_batch[:, 1] x_boundary = archive.boundaries[0] @@ -144,7 +144,7 @@ def sliding_boundaries_archive_heatmap(archive, ax.set_aspect(aspect) # Create the plot. - objective_batch = df.objective_batch() + objective_batch = df.get_field("objective") vmin = np.min(objective_batch) if vmin is None else vmin vmax = np.max(objective_batch) if vmax is None else vmax t = ax.scatter(x, diff --git a/tests/archives/archive_data_frame_test.py b/tests/archives/archive_data_frame_test.py index d83f4be0a..be671d858 100644 --- a/tests/archives/archive_data_frame_test.py +++ b/tests/archives/archive_data_frame_test.py @@ -21,15 +21,16 @@ def data(): @pytest.fixture def df(data): - """Mimics the ArchiveDataFrame that an as_pandas method would generate.""" + """Mimics an ArchiveDataFrame that a data method would generate.""" (solution_batch, objective_batch, measures_batch, index_batch, metadata_batch) = data return ArchiveDataFrame({ - "index": index_batch, - "objective": objective_batch, - "measures_0": measures_batch[:, 0], "solution_0": solution_batch[:, 0], + "objective": objective_batch, + # Fancy name to test field handling. + "foo__bar_measures_3_0": measures_batch[:, 0], "metadata": metadata_batch, + "index": index_batch, }) @@ -38,39 +39,43 @@ def test_iterelites(data, df): metadata) in zip(df.iterelites(), zip(*data)): assert np.isclose(elite["solution"], solution).all() assert np.isclose(elite["objective"], objective) - assert np.isclose(elite["measures"], measures).all() - assert elite["index"] == index + assert np.isclose(elite["foo__bar_measures_3"], measures).all() assert elite["metadata"] == metadata + assert elite["index"] == index -def test_batch_methods(data, df): +def test_get_field(data, df): (solution_batch, objective_batch, measures_batch, index_batch, metadata_batch) = data - assert np.isclose(df.solution_batch(), solution_batch).all() - assert np.isclose(df.objective_batch(), objective_batch).all() - assert np.isclose(df.measures_batch(), measures_batch).all() - assert (df.index_batch() == index_batch).all() - assert (df.metadata_batch() == metadata_batch).all() + assert np.isclose(df.get_field("solution"), solution_batch).all() + assert np.isclose(df.get_field("objective"), objective_batch).all() + assert np.isclose(df.get_field("foo__bar_measures_3"), measures_batch).all() + assert (df.get_field("metadata") == metadata_batch).all() + assert (df.get_field("index") == index_batch).all() @pytest.mark.parametrize( - "remove", - ["index", "objective", "measures_0", "metadata", "solution_0"], - ids=["index", "objective", "measures", "metadata", "solutions"], + "field_col", + [ + ["solution", "solution_0"], + ["objective", "objective"], + ["measures", "foo__bar_measures_3_0"], + ["metadata", "metadata"], + ["index", "index"], + ], + ids=[ + "solutions", + "objective", + "measures", + "metadata", + "index", + ], ) -def test_batch_methods_can_be_none(df, remove): - """Removes a column so that the corresponding batch method returns None.""" - del df[remove] - - method = { - "solution_0": df.solution_batch, - "objective": df.objective_batch, - "measures_0": df.measures_batch, - "index": df.index_batch, - "metadata": df.metadata_batch, - }[remove] - - assert method() is None +def test_field_can_be_none(df, field_col): + """Removes a column so that get_field returns None.""" + field, col = field_col + del df[col] + assert df.get_field(field) is None def test_correct_constructor(df): diff --git a/tests/archives/grid_archive_test.py b/tests/archives/grid_archive_test.py index 3482c1ff7..e684a6e38 100644 --- a/tests/archives/grid_archive_test.py +++ b/tests/archives/grid_archive_test.py @@ -40,40 +40,33 @@ def assert_archive_elites( Any of the batch items may be excluded by setting to None. """ - archive_df = archive.as_pandas(include_solutions=True, - include_metadata=True) + data = archive.data() # Check the number of solutions. - assert len(archive_df) == batch_size + assert len(data["index"]) == batch_size if grid_indices_batch is not None: index_batch = archive.grid_to_int_index(grid_indices_batch) - archive_solution_batch = archive_df.solution_batch() - archive_objective_batch = archive_df.objective_batch() - archive_measures_batch = archive_df.measures_batch() - archive_index_batch = archive_df.index_batch() - archive_metadata_batch = archive_df.metadata_batch() - # Enforce a one-to-one correspondence between entries in the archive and in # the provided input -- see # https://www.geeksforgeeks.org/check-two-unsorted-array-duplicates-allowed-elements/ archive_covered = [False for _ in range(batch_size)] for i in range(batch_size): - for j in range(len(archive_df)): + for j in range(len(data["index"])): if archive_covered[j]: continue solution_match = (solution_batch is None or np.isclose( - archive_solution_batch[j], solution_batch[i]).all()) + data["solution"][j], solution_batch[i]).all()) objective_match = (objective_batch is None or np.isclose( - archive_objective_batch[j], objective_batch[i])) + data["objective"][j], objective_batch[i])) measures_match = (measures_batch is None or np.isclose( - archive_measures_batch[j], measures_batch[i]).all()) + data["measures"][j], measures_batch[i]).all()) index_match = (grid_indices_batch is None or - archive_index_batch[j] == index_batch[i]) + data["index"][j] == index_batch[i]) metadata_match = (metadata_batch is None or - archive_metadata_batch[j] == metadata_batch[i]) + data["metadata"][j] == metadata_batch[i]) if (solution_match and objective_match and measures_match and index_match and metadata_match): diff --git a/tutorials/lsi_mnist.ipynb b/tutorials/lsi_mnist.ipynb index a645e404e..8e369e65a 100644 --- a/tutorials/lsi_mnist.ipynb +++ b/tutorials/lsi_mnist.ipynb @@ -392,7 +392,7 @@ " scheduler.tell(objs, meas)\n", "\n", " if itr % 1000 == 0:\n", - " tqdm.write(f\"Iteration {itr} archive size: {len(archive.as_pandas(include_solutions=False))}\")" + " tqdm.write(f\"Iteration {itr} archive size: {len(archive)}\")" ] }, { @@ -431,8 +431,8 @@ " grid_index_batch = [(x, y) for y in np.flip(y_range) for x in x_range]\n", " imgs = []\n", " img_size = (28, 28)\n", - " df = archive.as_pandas()\n", - " solution_batch, index_batch = df.solution_batch(), df.index_batch()\n", + " solution_batch = archive.data(\"solution\")\n", + " index_batch = archive.data(\"index\")\n", " int_index_batch = archive.grid_to_int_index(grid_index_batch)\n", " \n", " for int_index in int_index_batch:\n", @@ -602,15 +602,14 @@ " ranges=[(0, 784), (0.5, 1)], # Boldness range, lightness range.\n", ")\n", "\n", - "archive_df = archive.as_pandas(include_solutions=True)\n", "imgs = generator(\n", - " torch.tensor(archive_df.solution_batch(),\n", + " torch.tensor(archive.data(\"solution\"),\n", " dtype=torch.float32,\n", " device=device))\n", "discriminator_archive.add(\n", - " archive_df.solution_batch(),\n", + " archive.data(\"solution\"),\n", " discriminator(imgs).squeeze().cpu().detach().numpy(),\n", - " archive_df.measures_batch(),\n", + " archive.data(\"measures\"),\n", ")" ] }, From 05e4910c7b9fb565902f35d8901d302d88964dd8 Mon Sep 17 00:00:00 2001 From: Bryon Tjanaka <38124174+btjanaka@users.noreply.github.com> Date: Thu, 9 Nov 2023 23:25:52 -0800 Subject: [PATCH 18/19] Deprecate `as_pandas` in favor of `data(return_type="pandas")` (#408) ## Description Due to the new `data` method (#412), `as_pandas` is no longer necessary, as `data` provides more flexible options for accessing archive data, and it is also able to return dataframes by passing `return_type="pandas"`. This PR thus deprecates `as_pandas`. Because this is a fairly popular method, I have kept the method and raised a RuntimeError whenever it is called; however, I anticipate removing the method entirely in the future. I also considered keeping `as_pandas` as an alias to `data(return_type="pandas")`, but this would require changing the parameters of `as_pandas` since `data` takes in `fields` rather than `include_solutions` and `include_metadata`. Removing `as_pandas` entirely makes it clear that it has been deprecated. ## TODO - [x] Introduce a `data()` method that returns the archive data in many forms - [x] Remove `as_pandas()` since `data(return_type="pandas")` now fulfills this role - [x] Replace as_pandas test with data test - [x] Fix usage of as_pandas in tests - [x] Remove as_pandas in tutorials ## Questions ## Status - [x] I have read the guidelines in [CONTRIBUTING.md](https://github.com/icaros-usc/pyribs/blob/master/CONTRIBUTING.md) - [x] I have formatted my code using `yapf` - [x] I have tested my code by running `pytest` - [x] I have linted my code with `pylint` - [x] I have added a one-line description of my change to the changelog in `HISTORY.md` - [x] This PR is ready to go --- HISTORY.md | 2 + examples/lunar_lander.py | 6 +-- examples/sphere.py | 2 +- ribs/archives/_archive_base.py | 52 ++++--------------- ribs/archives/_cvt_archive.py | 6 +-- ribs/archives/_grid_archive.py | 6 +-- ribs/visualize/_cvt_archive_3d_plot.py | 23 +++++--- ribs/visualize/_cvt_archive_heatmap.py | 27 ++++++---- ribs/visualize/_grid_archive_heatmap.py | 24 +++++---- ribs/visualize/_parallel_axes_plot.py | 11 ++-- .../_sliding_boundaries_archive_heatmap.py | 19 ++++--- tests/archives/cvt_archive_benchmark.py | 23 -------- tests/archives/grid_archive_benchmark.py | 25 --------- .../sliding_boundaries_archive_benchmark.py | 25 +-------- .../sliding_boundaries_archive_test.py | 7 +-- tests/tutorials.sh | 4 -- tests/visualize/cvt_archive_3d_plot_test.py | 2 +- tests/visualize/cvt_archive_heatmap_test.py | 2 +- tests/visualize/grid_archive_heatmap_test.py | 2 +- tests/visualize/parallel_axes_plot_test.py | 2 +- ...sliding_boundaries_archive_heatmap_test.py | 2 +- tutorials/lunar_lander.ipynb | 6 +-- tutorials/tom_cruise_dqd.ipynb | 2 +- 23 files changed, 98 insertions(+), 182 deletions(-) diff --git a/HISTORY.md b/HISTORY.md index 8f9d2c490..2918b7dd5 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -6,6 +6,8 @@ #### API +- **Backwards-incompatible:** Deprecate `as_pandas` in favor of + `data(return_type="pandas")` ({pr}`408`) - **Backwards-incompatible:** Replace ArchiveDataFrame batch methods with `get_field` ({pr}`413`) - Add field_list and data methods to archives ({pr}`412`) diff --git a/examples/lunar_lander.py b/examples/lunar_lander.py index 3aff7334e..a610c705c 100644 --- a/examples/lunar_lander.py +++ b/examples/lunar_lander.py @@ -18,7 +18,7 @@ the --outdir flag) with the following files: - archive.csv: The CSV representation of the final archive, obtained with - as_pandas(). + data(). - archive_ccdf.png: A plot showing the (unnormalized) complementary cumulative distribution function of objectives in the archive. For each objective p on the x-axis, this plot shows the number of @@ -297,7 +297,7 @@ def save_ccdf(archive, filename): """ fig, ax = plt.subplots() ax.hist( - archive.as_pandas(include_solutions=False)["objective"], + archive.data("objective"), 50, # Number of cells. histtype="step", density=False, @@ -395,7 +395,7 @@ def lunar_lander_main(workers=4, metrics = run_search(client, scheduler, env_seed, iterations, log_freq) # Outputs. - scheduler.archive.as_pandas().to_csv(outdir / "archive.csv") + scheduler.archive.data(return_type="pandas").to_csv(outdir / "archive.csv") save_ccdf(scheduler.archive, str(outdir / "archive_ccdf.png")) save_heatmap(scheduler.archive, str(outdir / "heatmap.png")) save_metrics(outdir, metrics) diff --git a/examples/sphere.py b/examples/sphere.py index 7f174697f..143a6f4ee 100644 --- a/examples/sphere.py +++ b/examples/sphere.py @@ -835,7 +835,7 @@ def sphere_main(algorithm, final_itr = itr == itrs if itr % log_freq == 0 or final_itr: if final_itr: - result_archive.as_pandas(include_solutions=final_itr).to_csv( + result_archive.data(return_type="pandas").to_csv( outdir / f"{name}_archive.csv") # Record and display metrics. diff --git a/ribs/archives/_archive_base.py b/ribs/archives/_archive_base.py index 843c5232a..a5a131c7e 100644 --- a/ribs/archives/_archive_base.py +++ b/ribs/archives/_archive_base.py @@ -728,49 +728,15 @@ def data(self, fields=None, return_type="dict"): return data def as_pandas(self, include_solutions=True, include_metadata=False): - """Converts the archive into an :class:`ArchiveDataFrame` (a child class - of :class:`pandas.DataFrame`). - - The implementation of this method in :class:`ArchiveBase` creates a - dataframe consisting of: - - - 1 column of integers (``np.int32``) for the index, named ``index``. - See :meth:`index_of` for more info. - - :attr:`measure_dim` columns for the measures, named ``measures_0, - measures_1, ...`` - - 1 column for the objectives, named ``objective`` - - :attr:`solution_dim` columns for the solution parameters, named - ``solution_0, solution_1, ...`` - - 1 column for the metadata objects, named ``metadata`` - - In short, the dataframe looks like this: - - +-------+------------+------+-----------+------------+-----+----------+ - | index | measures_0 | ... | objective | solution_0 | ... | metadata | - +=======+============+======+===========+============+=====+==========+ - | | | ... | | | ... | | - +-------+------------+------+-----------+------------+-----+----------+ - - Compared to :class:`pandas.DataFrame`, the :class:`ArchiveDataFrame` - adds methods and attributes which make it easier to manipulate archive - data. For more information, refer to the :class:`ArchiveDataFrame` - documentation. - - Args: - include_solutions (bool): Whether to include solution columns. - include_metadata (bool): Whether to include the metadata column. - Note that methods like :meth:`~pandas.DataFrame.to_csv` may not - properly save the dataframe since the metadata objects may not - be representable in a CSV. - Returns: - ArchiveDataFrame: See above. - """ # pylint: disable = line-too-long - fields = ["index", "measures", "objective"] - if include_solutions: - fields.append("solution") - if include_metadata: - fields.append("metadata") - return ArchiveDataFrame(self._store.data(fields, return_type="pandas")) + """DEPRECATED.""" + # pylint: disable = unused-argument + raise RuntimeError( + "as_pandas has been deprecated. Please use " + "archive.data(..., return_type='pandas') instead. For more " + "info, please see the archive data tutorial: " + # pylint: disable = line-too-long + "https://docs.pyribs.org/en/stable/tutorials/features/archive_data.html" + ) def cqd_score(self, iterations, diff --git a/ribs/archives/_cvt_archive.py b/ribs/archives/_cvt_archive.py index f8c1c3d66..381b01d9f 100644 --- a/ribs/archives/_cvt_archive.py +++ b/ribs/archives/_cvt_archive.py @@ -49,9 +49,9 @@ class CVTArchive(ArchiveBase): subsequent experiments. .. note:: The idea of archive thresholds was introduced in `Fontaine 2022 - `_. Refer to our `CMA-MAE tutorial - <../../tutorials/cma_mae.html>`_ for more info on thresholds, including - the ``learning_rate`` and ``threshold_min`` parameters. + `_. For more info on thresholds, + including the ``learning_rate`` and ``threshold_min`` parameters, refer + to our tutorial :doc:`/tutorials/cma_mae`. .. note:: For more information on our choice of k-D tree implementation, see :pr:`38`. diff --git a/ribs/archives/_grid_archive.py b/ribs/archives/_grid_archive.py index 7d05f3f1a..74e3aafd2 100644 --- a/ribs/archives/_grid_archive.py +++ b/ribs/archives/_grid_archive.py @@ -16,9 +16,9 @@ class GridArchive(ArchiveBase): cell. .. note:: The idea of archive thresholds was introduced in `Fontaine 2022 - `_. Refer to our `CMA-MAE tutorial - <../../tutorials/cma_mae.html>`_ for more info on thresholds, including - the ``learning_rate`` and ``threshold_min`` parameters. + `_. For more info on thresholds, + including the ``learning_rate`` and ``threshold_min`` parameters, refer + to our tutorial :doc:`/tutorials/cma_mae`. Args: solution_dim (int): Dimension of the solution space. diff --git a/ribs/visualize/_cvt_archive_3d_plot.py b/ribs/visualize/_cvt_archive_3d_plot.py index 69f565c2f..9b158a465 100644 --- a/ribs/visualize/_cvt_archive_3d_plot.py +++ b/ribs/visualize/_cvt_archive_3d_plot.py @@ -154,10 +154,11 @@ def cvt_archive_3d_plot( df (ribs.archives.ArchiveDataFrame): If provided, we will plot data from this argument instead of the data currently in the archive. This data can be obtained by, for instance, calling - :meth:`ribs.archives.ArchiveBase.as_pandas()` and modifying the - resulting :class:`ArchiveDataFrame`. Note that, at a minimum, the - data must contain columns for index, objective, and measures. To - display a custom metric, replace the "objective" column. + :meth:`ribs.archives.ArchiveBase.data` with ``return_type="pandas"`` + and modifying the resulting :class:`ArchiveDataFrame`. Note that, at + a minimum, the data must contain columns for index, objective, and + measures. To display a custom metric, replace the "objective" + column. measure_order (array-like of int): Specifies the axes order for plotting the measures. By default, the first measure (measure 0) in the archive appears on the x-axis, the second (measure 1) on y-axis, and @@ -217,9 +218,15 @@ def cvt_archive_3d_plot( cmap = retrieve_cmap(cmap) # Retrieve archive data. - df = archive.as_pandas() if df is None else validate_df(df) - objective_batch = df.get_field("objective") - measures_batch = df.get_field("measures") + if df is None: + objective_batch = archive.data("objective") + measures_batch = archive.data("measures") + index_batch = archive.data("index") + else: + df = validate_df(df) + objective_batch = df.get_field("objective") + measures_batch = df.get_field("measures") + index_batch = df.get_field("index") lower_bounds = archive.lower_bounds upper_bounds = archive.upper_bounds centroids = archive.centroids @@ -297,7 +304,7 @@ def cvt_archive_3d_plot( objs = [] # Also record objective for each ridge so we can color it. # Map from centroid index to objective. - pt_to_obj = dict(zip(df.get_field("index"), objective_batch)) + pt_to_obj = dict(zip(index_batch, objective_batch)) # The points in the Voronoi diagram are indexed by their placement in the # input list. Above, when we called Voronoi, `centroids` were placed first, diff --git a/ribs/visualize/_cvt_archive_heatmap.py b/ribs/visualize/_cvt_archive_heatmap.py index 50b2b8b41..1b622646b 100644 --- a/ribs/visualize/_cvt_archive_heatmap.py +++ b/ribs/visualize/_cvt_archive_heatmap.py @@ -9,8 +9,8 @@ from ribs.visualize._utils import (archive_heatmap_1d, retrieve_cmap, set_cbar, validate_df, validate_heatmap_visual_args) -# Matplotlib functions tend to have a ton of args. -# pylint: disable = too-many-arguments +# Matplotlib functions tend to have a ton of args and statements. +# pylint: disable = too-many-arguments, too-many-statements def cvt_archive_heatmap(archive, @@ -102,10 +102,11 @@ def cvt_archive_heatmap(archive, df (ribs.archives.ArchiveDataFrame): If provided, we will plot data from this argument instead of the data currently in the archive. This data can be obtained by, for instance, calling - :meth:`ribs.archives.ArchiveBase.as_pandas()` and modifying the - resulting :class:`ArchiveDataFrame`. Note that, at a minimum, the - data must contain columns for index, objective, and measures. To - display a custom metric, replace the "objective" column. + :meth:`ribs.archives.ArchiveBase.data` with ``return_type="pandas"`` + and modifying the resulting :class:`ArchiveDataFrame`. Note that, at + a minimum, the data must contain columns for index, objective, and + measures. To display a custom metric, replace the "objective" + column. transpose_measures (bool): By default, the first measure in the archive will appear along the x-axis, and the second will be along the y-axis. To switch this behavior (i.e. to transpose the axes), set @@ -182,7 +183,13 @@ def cvt_archive_heatmap(archive, cmap = retrieve_cmap(cmap) # Retrieve archive data. - df = archive.as_pandas() if df is None else validate_df(df) + if df is None: + index_batch = archive.data("index") + objective_batch = archive.data("objective") + else: + df = validate_df(df) + index_batch = df["index"] + objective_batch = df["objective"] if archive.measure_dim == 1: # Read in pcm kwargs -- the linewidth and edgecolor are overwritten by @@ -220,10 +227,10 @@ def cvt_archive_heatmap(archive, inv_idx[x] = i # We only want inverse indexes that are actually used in the archive. - selected_inv_idx = inv_idx[df.get_field("index")] + selected_inv_idx = inv_idx[index_batch] cell_objectives = np.full(archive.cells, np.nan) - cell_objectives[selected_inv_idx] = df.get_field("objective") + cell_objectives[selected_inv_idx] = objective_batch ax = archive_heatmap_1d(archive, cell_boundaries, cell_objectives, ax, cmap, aspect, vmin, vmax, cbar, cbar_kwargs, @@ -288,7 +295,7 @@ def cvt_archive_heatmap(archive, # the region index of each point. region_obj = [None] * len(vor.regions) min_obj, max_obj = np.inf, -np.inf - pt_to_obj = dict(zip(df.get_field("index"), df.get_field("objective"))) + pt_to_obj = dict(zip(index_batch, objective_batch)) for pt_idx, region_idx in enumerate( vor.point_region[:-4]): # Exclude faraway_pts. if region_idx != -1 and pt_idx in pt_to_obj: diff --git a/ribs/visualize/_grid_archive_heatmap.py b/ribs/visualize/_grid_archive_heatmap.py index d7bdbb7c6..6462f9fa7 100644 --- a/ribs/visualize/_grid_archive_heatmap.py +++ b/ribs/visualize/_grid_archive_heatmap.py @@ -90,10 +90,11 @@ def grid_archive_heatmap(archive, df (ribs.archives.ArchiveDataFrame): If provided, we will plot data from this argument instead of the data currently in the archive. This data can be obtained by, for instance, calling - :meth:`ribs.archives.ArchiveBase.as_pandas()` and modifying the - resulting :class:`ArchiveDataFrame`. Note that, at a minimum, the - data must contain columns for index, objective, and measures. To - display a custom metric, replace the "objective" column. + :meth:`ribs.archives.ArchiveBase.data` with ``return_type="pandas"`` + and modifying the resulting :class:`ArchiveDataFrame`. Note that, at + a minimum, the data must contain columns for index, objective, and + measures. To display a custom metric, replace the "objective" + column. transpose_measures (bool): By default, the first measure in the archive will appear along the x-axis, and the second will be along the y-axis. To switch this behavior (i.e. to transpose the axes), set @@ -147,12 +148,18 @@ def grid_archive_heatmap(archive, cmap = retrieve_cmap(cmap) # Retrieve archive data. - df = archive.as_pandas() if df is None else validate_df(df) + if df is None: + index_batch = archive.data("index") + objective_batch = archive.data("objective") + else: + df = validate_df(df) + index_batch = df["index"] + objective_batch = df["objective"] if archive.measure_dim == 1: cell_objectives = np.full(archive.cells, np.nan) - cell_idx = archive.int_to_grid_index(df.get_field("index")).squeeze() - cell_objectives[cell_idx] = df.get_field("objective") + cell_idx = archive.int_to_grid_index(index_batch).squeeze() + cell_objectives[cell_idx] = objective_batch archive_heatmap_1d( archive, @@ -171,7 +178,6 @@ def grid_archive_heatmap(archive, elif archive.measure_dim == 2: # Retrieve data from archive. - objective_batch = df.get_field("objective") lower_bounds = archive.lower_bounds upper_bounds = archive.upper_bounds x_dim, y_dim = archive.dims @@ -180,7 +186,7 @@ def grid_archive_heatmap(archive, # Color for each cell in the heatmap. colors = np.full((y_dim, x_dim), np.nan) - grid_index_batch = archive.int_to_grid_index(df.get_field("index")) + grid_index_batch = archive.int_to_grid_index(index_batch) colors[grid_index_batch[:, 1], grid_index_batch[:, 0]] = objective_batch if transpose_measures: diff --git a/ribs/visualize/_parallel_axes_plot.py b/ribs/visualize/_parallel_axes_plot.py index 3b4627964..678ae04f0 100644 --- a/ribs/visualize/_parallel_axes_plot.py +++ b/ribs/visualize/_parallel_axes_plot.py @@ -82,10 +82,11 @@ def parallel_axes_plot(archive, df (ribs.archives.ArchiveDataFrame): If provided, we will plot data from this argument instead of the data currently in the archive. This data can be obtained by, for instance, calling - :meth:`ribs.archives.ArchiveBase.as_pandas()` and modifying the - resulting :class:`ArchiveDataFrame`. Note that, at a minimum, the - data must contain columns for index, objective, and measures. To - display a custom metric, replace the "objective" column. + :meth:`ribs.archives.ArchiveBase.data` with ``return_type="pandas"`` + and modifying the resulting :class:`ArchiveDataFrame`. Note that, at + a minimum, the data must contain columns for index, objective, and + measures. To display a custom metric, replace the "objective" + column. measure_order (list of int or list of (int, str)): If this is a list of ints, it specifies the axes order for measures (e.g. ``[2, 0, 1]``). If this is a list of tuples, each tuple takes the form @@ -163,7 +164,7 @@ def parallel_axes_plot(archive, upper_bounds = archive.upper_bounds[cols] host_ax = plt.gca() if ax is None else ax # Try to get current axis. - df = archive.as_pandas() if df is None else validate_df(df) + df = archive.data(return_type="pandas") if df is None else validate_df(df) vmin = df["objective"].min() if vmin is None else vmin vmax = df["objective"].max() if vmax is None else vmax norm = matplotlib.colors.Normalize(vmin=vmin, vmax=vmax, clip=True) diff --git a/ribs/visualize/_sliding_boundaries_archive_heatmap.py b/ribs/visualize/_sliding_boundaries_archive_heatmap.py index 25c575f46..e1e37a740 100644 --- a/ribs/visualize/_sliding_boundaries_archive_heatmap.py +++ b/ribs/visualize/_sliding_boundaries_archive_heatmap.py @@ -68,10 +68,11 @@ def sliding_boundaries_archive_heatmap(archive, df (ribs.archives.ArchiveDataFrame): If provided, we will plot data from this argument instead of the data currently in the archive. This data can be obtained by, for instance, calling - :meth:`ribs.archives.ArchiveBase.as_pandas()` and modifying the - resulting :class:`ArchiveDataFrame`. Note that, at a minimum, the - data must contain columns for index, objective, and measures. To - display a custom metric, replace the "objective" column. + :meth:`ribs.archives.ArchiveBase.data` with ``return_type="pandas"`` + and modifying the resulting :class:`ArchiveDataFrame`. Note that, at + a minimum, the data must contain columns for index, objective, and + measures. To display a custom metric, replace the "objective" + column. transpose_measures (bool): By default, the first measure in the archive will appear along the x-axis, and the second will be along the y-axis. To switch this behavior (i.e. to transpose the axes), set @@ -119,8 +120,13 @@ def sliding_boundaries_archive_heatmap(archive, cmap = retrieve_cmap(cmap) # Retrieve archive data. - df = archive.as_pandas() if df is None else validate_df(df) - measures_batch = df.get_field("measures") + if df is None: + measures_batch = archive.data("measures") + objective_batch = archive.data("objective") + else: + df = validate_df(df) + measures_batch = df.get_field("measures") + objective_batch = df.get_field("objective") x = measures_batch[:, 0] y = measures_batch[:, 1] x_boundary = archive.boundaries[0] @@ -144,7 +150,6 @@ def sliding_boundaries_archive_heatmap(archive, ax.set_aspect(aspect) # Create the plot. - objective_batch = df.get_field("objective") vmin = np.min(objective_batch) if vmin is None else vmin vmax = np.max(objective_batch) if vmax is None else vmax t = ax.scatter(x, diff --git a/tests/archives/cvt_archive_benchmark.py b/tests/archives/cvt_archive_benchmark.py index 6726cf0eb..8b97537aa 100644 --- a/tests/archives/cvt_archive_benchmark.py +++ b/tests/archives/cvt_archive_benchmark.py @@ -38,26 +38,3 @@ def add_10k(archive): archive.add(solution_batch, objective_batch, measures_batch) benchmark.pedantic(add_10k, setup=setup, rounds=5, iterations=1) - - -def benchmark_as_pandas_2000_items(benchmark): - cells = 2000 - archive = CVTArchive(solution_dim=10, - cells=cells, - ranges=[(-1, 1), (-1, 1)], - use_kd_tree=True, - samples=50_000) - - archive.add( - solution_batch=np.concatenate( - (archive.centroids, np.random.random((cells, 8))), - axis=1, - ), - objective_batch=np.ones(cells), - measures_batch=archive.centroids, - ) - - # Archive should be full. - assert len(archive) == cells - - benchmark(archive.as_pandas) diff --git a/tests/archives/grid_archive_benchmark.py b/tests/archives/grid_archive_benchmark.py index c17774dfe..2a517573c 100644 --- a/tests/archives/grid_archive_benchmark.py +++ b/tests/archives/grid_archive_benchmark.py @@ -22,28 +22,3 @@ def add_10k(archive): archive.add(solution_batch, objective_batch, measures_batch) benchmark.pedantic(add_10k, setup=setup, rounds=5, iterations=1) - - -def benchmark_as_pandas_2025_items(benchmark): - dim = 45 - archive = GridArchive(solution_dim=10, - dims=(dim, dim), - ranges=[(-1, 1), (-1, 1)]) - xxs, yys = np.meshgrid( - np.linspace(-1, 1, dim), - np.linspace(-1, 1, dim), - ) - xxs, yys = xxs.ravel(), yys.ravel() - archive.add( - solution_batch=np.stack( - (xxs, yys, *np.random.random((8, dim * dim))), - axis=1, - ), - objective_batch=np.ones(dim * dim), - measures_batch=np.stack((xxs, yys), axis=1), - ) - - # Archive should be full. - assert len(archive) == dim * dim - - benchmark(archive.as_pandas) diff --git a/tests/archives/sliding_boundaries_archive_benchmark.py b/tests/archives/sliding_boundaries_archive_benchmark.py index a5c418efc..2d22d6338 100644 --- a/tests/archives/sliding_boundaries_archive_benchmark.py +++ b/tests/archives/sliding_boundaries_archive_benchmark.py @@ -1,11 +1,9 @@ """Benchmarks for the SlidingBoundariesArchive.""" -import numpy as np - from ribs.archives import SlidingBoundariesArchive def benchmark_add_10k(benchmark, benchmark_data_10k): - n, solution_batch, objective_batch, measures_batch = benchmark_data_10k + _, solution_batch, objective_batch, measures_batch = benchmark_data_10k def setup(): archive = SlidingBoundariesArchive(solution_dim=solution_batch.shape[1], @@ -24,24 +22,3 @@ def add_10k(archive): archive.add(solution_batch, objective_batch, measures_batch) benchmark.pedantic(add_10k, setup=setup, rounds=5, iterations=1) - - -def benchmark_as_pandas_2048_elements(benchmark): - # TODO (btjanaka): Make this size smaller so that we do a remap. - archive = SlidingBoundariesArchive(solution_dim=10, - dims=[32, 64], - ranges=[(-1, 1), (-2, 2)], - remap_frequency=20000, - buffer_capacity=20000) - - for x in np.linspace(-1, 1, 100): - for y in np.linspace(-2, 2, 100): - sol = np.random.random(10) - sol[0] = x - sol[1] = y - archive.add_single(sol, -(x**2 + y**2), np.array([x, y])) - - # Archive should be full. - assert len(archive) == 32 * 64 - - benchmark(archive.as_pandas) diff --git a/tests/archives/sliding_boundaries_archive_test.py b/tests/archives/sliding_boundaries_archive_test.py index 11d0a9b1b..96e6142bf 100644 --- a/tests/archives/sliding_boundaries_archive_test.py +++ b/tests/archives/sliding_boundaries_archive_test.py @@ -132,10 +132,7 @@ def test_initial_remap(): assert np.isclose(archive.boundaries[1], np.linspace(-2, 2, 21)).all() # Check that all the measures are as expected. - pandas_measures = archive.as_pandas(include_solutions=False)[[ - "measures_0", "measures_1" - ]] - measures = list(pandas_measures.itertuples(name=None, index=False)) + measures = map(tuple, archive.data("measures")) assert np.isclose(sorted(measures), sorted(expected_measures)).all() @@ -184,4 +181,4 @@ def test_adds_solutions_from_old_archive(): # The objective values from the previous archive should remain because they # are higher. - assert (archive.as_pandas(include_solutions=False)["objective"] == 2).all() + assert (archive.data(["objective"], "tuple")[0] == 2).all() diff --git a/tests/tutorials.sh b/tests/tutorials.sh index a3e83ca1f..d248d250f 100644 --- a/tests/tutorials.sh +++ b/tests/tutorials.sh @@ -44,10 +44,6 @@ function test_notebook { # Reduce samples so that CVTArchive runs quickly. sed -i 's/use_kd_tree=True,/use_kd_tree=True, samples=10000,/g' "${TMP_FILE}" ;; - tutorials/lsi_mnist.ipynb) - # Reduce data for the discriminator archive. - sed -i 's/original_data = archive.as_pandas()/original_data = archive.as_pandas().loc[:5]/g' "${TMP_FILE}" - ;; esac # Run the notebook. Timeout is long since some notebook cells take a while, diff --git a/tests/visualize/cvt_archive_3d_plot_test.py b/tests/visualize/cvt_archive_3d_plot_test.py index e91f5a200..303f07297 100644 --- a/tests/visualize/cvt_archive_3d_plot_test.py +++ b/tests/visualize/cvt_archive_3d_plot_test.py @@ -208,6 +208,6 @@ def test_plot_elites(cvt_archive_3d): tol=CVT_IMAGE_TOLERANCE) def test_plot_metadata_with_df(cvt_archive_3d): plt.figure(figsize=(8, 6)) - df = cvt_archive_3d.as_pandas(include_metadata=True) + df = cvt_archive_3d.data(return_type="pandas") df["objective"] = df["metadata"] cvt_archive_3d_plot(cvt_archive_3d, df=df) diff --git a/tests/visualize/cvt_archive_heatmap_test.py b/tests/visualize/cvt_archive_heatmap_test.py index d5a6827d6..554a34a57 100644 --- a/tests/visualize/cvt_archive_heatmap_test.py +++ b/tests/visualize/cvt_archive_heatmap_test.py @@ -225,7 +225,7 @@ def test_rasterized(cvt_archive_2d): tol=CVT_IMAGE_TOLERANCE) def test_plot_metadata_with_df(cvt_archive_2d): plt.figure(figsize=(8, 6)) - df = cvt_archive_2d.as_pandas(include_metadata=True) + df = cvt_archive_2d.data(return_type="pandas") df["objective"] = df["metadata"] cvt_archive_heatmap(cvt_archive_2d, df=df) diff --git a/tests/visualize/grid_archive_heatmap_test.py b/tests/visualize/grid_archive_heatmap_test.py index 35e939aa6..5dfc29661 100644 --- a/tests/visualize/grid_archive_heatmap_test.py +++ b/tests/visualize/grid_archive_heatmap_test.py @@ -199,7 +199,7 @@ def test_rasterized(grid_archive_2d): extensions=["png"]) def test_plot_metadata_with_df(grid_archive_2d): plt.figure(figsize=(8, 6)) - df = grid_archive_2d.as_pandas(include_metadata=True) + df = grid_archive_2d.data(return_type="pandas") df["objective"] = df["metadata"] grid_archive_heatmap(grid_archive_2d, df=df) diff --git a/tests/visualize/parallel_axes_plot_test.py b/tests/visualize/parallel_axes_plot_test.py index faaf4033d..ea628f8aa 100644 --- a/tests/visualize/parallel_axes_plot_test.py +++ b/tests/visualize/parallel_axes_plot_test.py @@ -93,6 +93,6 @@ def test_3d_vertical_cbar(grid_archive_3d): extensions=["png"]) def test_plot_metadata_with_df(grid_archive_3d): plt.figure(figsize=(8, 6)) - df = grid_archive_3d.as_pandas(include_metadata=True) + df = grid_archive_3d.data(return_type="pandas") df["objective"] = df["metadata"] parallel_axes_plot(grid_archive_3d, df=df) diff --git a/tests/visualize/sliding_boundaries_archive_heatmap_test.py b/tests/visualize/sliding_boundaries_archive_heatmap_test.py index d92e3042c..e7b6849da 100644 --- a/tests/visualize/sliding_boundaries_archive_heatmap_test.py +++ b/tests/visualize/sliding_boundaries_archive_heatmap_test.py @@ -167,6 +167,6 @@ def test_rasterized(sliding_archive_2d): extensions=["png"]) def test_plot_metadata_with_df(sliding_archive_2d): plt.figure(figsize=(8, 6)) - df = sliding_archive_2d.as_pandas(include_metadata=True) + df = sliding_archive_2d.data(return_type="pandas") df["objective"] = df["metadata"] sliding_boundaries_archive_heatmap(sliding_archive_2d, df=df) diff --git a/tutorials/lunar_lander.ipynb b/tutorials/lunar_lander.ipynb index 87ffaf4e3..55cc081cb 100644 --- a/tutorials/lunar_lander.ipynb +++ b/tutorials/lunar_lander.ipynb @@ -914,7 +914,7 @@ "id": "MZLDoutPFKhu" }, "source": [ - "As the archive has ~2500 solutions, we cannot view them all, but we can filter for high-performing solutions. We first retrieve the archive's elites with the [`as_pandas`](https://docs.pyribs.org/en/latest/api/ribs.archives.GridArchive.html#ribs.archives.GridArchive.as_pandas) method. Then, we choose solutions that scored above 200 because 200 is the [threshold for the problem to be considered solved](https://gymnasium.farama.org/environments/box2d/lunar_lander/). Note that many high-performing solutions do not land on the landing pad." + "As the archive has ~2500 solutions, we cannot view them all, but we can filter for high-performing solutions. We first retrieve the archive's elites with the [`data`](https://docs.pyribs.org/en/latest/api/ribs.archives.GridArchive.html#ribs.archives.GridArchive.data) method with `return_type=\"pandas\"`. Then, we choose solutions that scored above 200 because 200 is the [threshold for the problem to be considered solved](https://gymnasium.farama.org/environments/box2d/lunar_lander/). Note that many high-performing solutions do not land on the landing pad." ] }, { @@ -925,7 +925,7 @@ }, "outputs": [], "source": [ - "df = archive.as_pandas()\n", + "df = archive.data(return_type=\"pandas\")\n", "high_perf_sols = df.query(\"objective > 200\").sort_values(\"objective\", ascending=False)" ] }, @@ -935,7 +935,7 @@ "id": "0_RYE1rTFKhu" }, "source": [ - "Below we visualize several of these high-performing solutions. The `iterelites` method is available because `as_pandas` returns an [`ArchiveDataFrame`](https://docs.pyribs.org/en/latest/api/ribs.archives.ArchiveDataFrame.html), a subclass of the Pandas DataFrame specialized for pyribs. `iterelites` iterates over the entries in the DataFrame and returns them as dicts." + "Below we visualize several of these high-performing solutions. The `iterelites` method is available because `data` returns an [`ArchiveDataFrame`](https://docs.pyribs.org/en/latest/api/ribs.archives.ArchiveDataFrame.html), a subclass of the Pandas DataFrame specialized for pyribs. `iterelites` iterates over the entries in the DataFrame and returns them as dicts." ] }, { diff --git a/tutorials/tom_cruise_dqd.ipynb b/tutorials/tom_cruise_dqd.ipynb index a4b365b16..0a3c77a8a 100644 --- a/tutorials/tom_cruise_dqd.ipynb +++ b/tutorials/tom_cruise_dqd.ipynb @@ -925,7 +925,7 @@ "imgs = []\n", "\n", "# Convert archive to a df with solutions available.\n", - "df = result_archive.as_pandas(include_solutions=True)\n", + "df = result_archive.data(return_type=\"pandas\")\n", "\n", "# Compute the min and max measures for which solutions were found.\n", "measure_bounds = [\n", From f1da3187603aaa8dd98ef0da17b0e65ec19887e7 Mon Sep 17 00:00:00 2001 From: Bryon Tjanaka <38124174+btjanaka@users.noreply.github.com> Date: Fri, 10 Nov 2023 18:41:58 -0800 Subject: [PATCH 19/19] Return occupied booleans in retrieve (#414) ## Description Previously, we relied on sentinel values to indicate whether a given cell was occupied. Since it is entirely possible that users want to use these sentinel values in their fields, we now return a separate `occupied` array that indicates which cells are occupied. Considerations: - Chose not to support additional return types like tuple and pandas for now, as such flexibility is less essential in `retrieve`, and this feature can be added fairly easily later on - We still set the sentinel values depending on the field type since it may be confusing to see arbitrary values for a given field without seeing the occupied array. - the `threshold` field is now included in outputs from `retrieve()` ## TODO - [x] Implement new retrieve and retrieve_single methods - [x] Fix tests - [x] Fix usage in tutorials ## Questions ## Status - [x] I have read the guidelines in [CONTRIBUTING.md](https://github.com/icaros-usc/pyribs/blob/master/CONTRIBUTING.md) - [x] I have formatted my code using `yapf` - [x] I have tested my code by running `pytest` - [x] I have linted my code with `pylint` - [x] I have added a one-line description of my change to the changelog in `HISTORY.md` - [x] This PR is ready to go --- HISTORY.md | 1 + ribs/archives/_archive_base.py | 85 ++++++++++++++--------------- tests/archives/archive_base_test.py | 16 ++++-- tutorials/arm_repertoire.ipynb | 5 +- tutorials/lunar_lander.ipynb | 16 +++--- 5 files changed, 63 insertions(+), 60 deletions(-) diff --git a/HISTORY.md b/HISTORY.md index 2918b7dd5..778366617 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -6,6 +6,7 @@ #### API +- **Backwards-incompatible:** Return occupied booleans in retrieve ({pr}`414`) - **Backwards-incompatible:** Deprecate `as_pandas` in favor of `data(return_type="pandas")` ({pr}`408`) - **Backwards-incompatible:** Replace ArchiveDataFrame batch methods with diff --git a/ribs/archives/_archive_base.py b/ribs/archives/_archive_base.py index a5a131c7e..2eab4906b 100644 --- a/ribs/archives/_archive_base.py +++ b/ribs/archives/_archive_base.py @@ -520,7 +520,7 @@ def retrieve(self, measures_batch): This method operates in batch, i.e., it takes in a batch of measures and outputs the batched data for the elites:: - elites = archive.retrieve(...) + occupied, elites = archive.retrieve(...) elites["solution"] # Shape: (batch_size, solution_dim) elites["objective"] elites["measures"] @@ -528,7 +528,8 @@ def retrieve(self, measures_batch): elites["metadata"] If the cell associated with ``elites["measures"][i]`` has an elite in - it, then ``elites["solution"][i]``, ``elites["objective"][i]``, + it, then ``occupied[i]`` will be True. Furthermore, + ``elites["solution"][i]``, ``elites["objective"][i]``, ``elites["measures"][i]``, ``elites["index"][i]``, and ``elites["metadata"][i]`` will be set to the properties of the elite. Note that ``elites["measures"][i]`` may not be equal to the @@ -536,14 +537,13 @@ def retrieve(self, measures_batch): need to be in the same archive cell. If the cell associated with ``measures_batch[i]`` *does not* have any - elite in it, then the corresponding outputs are set to empty values -- - namely: + elite in it, then ``occupied[i]`` will be set to False. Furthermore, the + corresponding outputs will be set to empty values -- namely: - * ``elites["solution"][i]`` will be an array of NaN - * ``elites["objective"][i]`` will be NaN - * ``elites["measures"][i]`` will be an array of NaN - * ``elites["index"][i]`` will be -1 - * ``elites["metadata"][i]`` will be None + * NaN for floating-point fields + * -1 for the "index" field + * 0 for integer fields + * None for object fields If you need to retrieve a *single* elite associated with some measures, consider using :meth:`retrieve_single`. @@ -552,7 +552,11 @@ def retrieve(self, measures_batch): measures_batch (array-like): (batch_size, :attr:`measure_dim`) array of coordinates in measure space. Returns: - dict: See above. + tuple: 2-element tuple of (occupied array, dict). The occupied array + indicates whether each of the cells indicated by the measures in + measures_batch has an elite, while the dict contains the data of + those elites. The dict maps from field name to the corresponding + array. Raises: ValueError: ``measures_batch`` is not of shape (batch_size, :attr:`measure_dim`). @@ -564,44 +568,38 @@ def retrieve(self, measures_batch): check_finite(measures_batch, "measures_batch") occupied, data = self._store.retrieve(self.index_of(measures_batch)) + unoccupied = ~occupied - return { - # For each occupied_batch[i], this np.where selects - # self._solution_arr[index_batch][i] if occupied_batch[i] is True. - # Otherwise, it uses the alternate value (a solution array - # consisting of np.nan). - "solution": - np.where(occupied[:, None], data["solution"], - np.full(self._solution_dim, np.nan)), - # Here the alternative is just a scalar np.nan. - "objective": - np.where(occupied, data["objective"], np.nan), - # And here it is a measures array of np.nan. - "measures": - np.where(occupied[:, None], data["measures"], - np.full(self._measure_dim, np.nan)), - # Indices must be integers, so np.nan would not work, so we use -1. - "index": - np.where(occupied, data["index"], -1), - "metadata": - np.where(occupied, data["metadata"], None), - } + for name, arr in data.items(): + if arr.dtype == object: + fill_val = None + elif name == "index": + fill_val = -1 + elif np.issubdtype(arr.dtype, np.integer): + fill_val = 0 + else: # Floating-point and other fields. + fill_val = np.nan + + arr[unoccupied] = fill_val + + return occupied, data def retrieve_single(self, measures): """Retrieves the elite with measures in the same cell as the measures specified. While :meth:`retrieve` takes in a *batch* of measures, this method takes - in the measures for only *one* solution and returns a dict with single - entries. + in the measures for only *one* solution and returns a single bool and a + dict with single entries. Args: measures (array-like): (:attr:`measure_dim`,) array of measures. Returns: - If there is an elite with measures in the same cell as the measures - specified, then this method returns dict where all the fields hold - the info of the elite. Otherwise, this method returns a dict filled - with the same "empty" values described in :meth:`retrieve`. + tuple: If there is an elite with measures in the same cell as the + measures specified, then this method returns a True value and a dict + where all the fields hold the info of the elite. Otherwise, this + method returns a False value and a dict filled with the same "empty" + values described in :meth:`retrieve`. Raises: ValueError: ``measures`` is not of shape (:attr:`measure_dim`,). ValueError: ``measures`` has non-finite values (inf or NaN). @@ -610,10 +608,9 @@ def retrieve_single(self, measures): check_1d_shape(measures, "measures", self.measure_dim, "measure_dim") check_finite(measures, "measures") - return { - field: arr[0] - for field, arr in self.retrieve(measures[None]).items() - } + occupied, data = self.retrieve(measures[None]) + + return occupied[0], {field: arr[0] for field, arr in data.items()} def sample_elites(self, n): """Randomly samples elites from the archive. @@ -834,10 +831,8 @@ def cqd_score(self, penalties = np.copy(penalties) # Copy since we return this. check_is_1d(penalties, "penalties") - objective_batch, measures_batch = self._store.data( - ["objective", "measures"], - return_type="tuple", - ) + objective_batch = self._store.data("objective") + measures_batch = self._store.data("measures") norm_objectives = objective_batch / (obj_max - obj_min) diff --git a/tests/archives/archive_base_test.py b/tests/archives/archive_base_test.py index 84a50ef13..58b236a8c 100644 --- a/tests/archives/archive_base_test.py +++ b/tests/archives/archive_base_test.py @@ -340,19 +340,23 @@ def test_basic_stats(data): def test_retrieve_gets_correct_elite(data): - elites = data.archive_with_elite.retrieve([data.measures]) + occupied, elites = data.archive_with_elite.retrieve([data.measures]) + assert occupied[0] assert np.all(elites["solution"][0] == data.solution) assert elites["objective"][0] == data.objective assert np.all(elites["measures"][0] == data.measures) + assert elites["threshold"][0] == data.objective # Avoid checking elites["index"] since the meaning varies by archive. assert elites["metadata"][0] == data.metadata def test_retrieve_empty_values(data): - elites = data.archive.retrieve([data.measures]) + occupied, elites = data.archive.retrieve([data.measures]) + assert not occupied[0] assert np.all(np.isnan(elites["solution"][0])) assert np.isnan(elites["objective"]) assert np.all(np.isnan(elites["measures"][0])) + assert np.isnan(elites["threshold"]) assert elites["index"][0] == -1 assert elites["metadata"][0] is None @@ -363,19 +367,23 @@ def test_retrieve_wrong_shape(data): def test_retrieve_single_gets_correct_elite(data): - elite = data.archive_with_elite.retrieve_single(data.measures) + occupied, elite = data.archive_with_elite.retrieve_single(data.measures) + assert occupied assert np.all(elite["solution"] == data.solution) assert elite["objective"] == data.objective assert np.all(elite["measures"] == data.measures) + assert elite["threshold"] == data.objective # Avoid checking elite["index"] since the meaning varies by archive. assert elite["metadata"] == data.metadata def test_retrieve_single_empty_values(data): - elite = data.archive.retrieve_single(data.measures) + occupied, elite = data.archive.retrieve_single(data.measures) + assert not occupied assert np.all(np.isnan(elite["solution"])) assert np.isnan(elite["objective"]) assert np.all(np.isnan(elite["measures"])) + assert np.isnan(elite["threshold"]) assert elite["index"] == -1 assert elite["metadata"] is None diff --git a/tutorials/arm_repertoire.ipynb b/tutorials/arm_repertoire.ipynb index 07f0de50b..ffa80dc39 100644 --- a/tutorials/arm_repertoire.ipynb +++ b/tutorials/arm_repertoire.ipynb @@ -449,9 +449,10 @@ } ], "source": [ - "elite = archive.retrieve_single([0, 0])\n", + "occupied, elite = archive.retrieve_single([0, 0])\n", "_, ax = plt.subplots()\n", - "if elite[\"solution\"] is not None: # This is None if there is no solution for [0,0].\n", + "# `occupied` indicates if there was an elite in the corresponding cell.\n", + "if occupied:\n", " visualize(elite[\"solution\"], link_lengths, elite[\"objective\"], ax)" ] }, diff --git a/tutorials/lunar_lander.ipynb b/tutorials/lunar_lander.ipynb index 55cc081cb..f58597440 100644 --- a/tutorials/lunar_lander.ipynb +++ b/tutorials/lunar_lander.ipynb @@ -746,7 +746,6 @@ "id": "t2QPnuqgFKhr" }, "source": [ - "\n", "We can retrieve policies with measures that are close to a query with the [`retrieve_single`](https://docs.pyribs.org/en/latest/api/ribs.archives.GridArchive.html#ribs.archives.GridArchive.retrieve_single) method. This method will look up the cell corresponding to the queried measures. Then, the method will check if there is an elite in that cell, and return the elite if it exists (the method does not check neighboring cells for elites). The returned elite may not have the exact measures requested because the elite only has to be in the same cell as the queried measures.\n", "\n", "Below, we first retrieve a policy that impacted the ground on the left (approximately -0.4) with low velocity (approximately -0.10) by querying for `[-0.4, -0.10]`." @@ -789,10 +788,9 @@ } ], "source": [ - "elite = archive.retrieve_single([-0.4, -0.10])\n", - "# NaN objective indicates the solution could not be retrieved because there was\n", - "# no elite in the corresponding cell.\n", - "if not np.isnan(elite[\"objective\"]):\n", + "occupied, elite = archive.retrieve_single([-0.4, -0.10])\n", + "# `occupied` indicates if there was an elite in the corresponding cell.\n", + "if occupied:\n", " print(f\"Objective: {elite['objective']}\")\n", " print(f\"Measures: (x-pos: {elite['measures'][0]}, y-vel: {elite['measures'][1]})\")\n", " display_video(elite[\"solution\"])" @@ -848,8 +846,8 @@ } ], "source": [ - "elite = archive.retrieve_single([0.6, -0.10])\n", - "if not np.isnan(elite[\"objective\"]):\n", + "occupied, elite = archive.retrieve_single([0.6, -0.10])\n", + "if occupied:\n", " print(f\"Objective: {elite['objective']}\")\n", " print(f\"Measures: (x-pos: {elite['measures'][0]}, y-vel: {elite['measures'][1]})\")\n", " display_video(elite[\"solution\"])" @@ -901,8 +899,8 @@ } ], "source": [ - "elite = archive.retrieve_single([0.0, -0.10])\n", - "if not np.isnan(elite[\"objective\"]):\n", + "occupied, elite = archive.retrieve_single([0.0, -0.10])\n", + "if occupied:\n", " print(f\"Objective: {elite['objective']}\")\n", " print(f\"Measures: (x-pos: {elite['measures'][0]}, y-vel: {elite['measures'][1]})\")\n", " display_video(elite[\"solution\"])"