From d97a04c2da975568e03ae55ce5ad135aee259136 Mon Sep 17 00:00:00 2001 From: Bryon Tjanaka Date: Sat, 28 Oct 2023 03:39:15 -0700 Subject: [PATCH 01/26] Start ArrayStore --- ribs/archives/__init__.py | 3 +++ ribs/archives/_array_store.py | 34 ++++++++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+) create mode 100644 ribs/archives/_array_store.py diff --git a/ribs/archives/__init__.py b/ribs/archives/__init__.py index 7021333da..f9292aca2 100644 --- a/ribs/archives/__init__.py +++ b/ribs/archives/__init__.py @@ -17,6 +17,7 @@ ribs.archives.CVTArchive ribs.archives.SlidingBoundariesArchive ribs.archives.ArchiveBase + ribs.archives.ArrayStore ribs.archives.AddStatus ribs.archives.Elite ribs.archives.EliteBatch @@ -28,6 +29,7 @@ from ribs.archives._archive_base import ArchiveBase from ribs.archives._archive_data_frame import ArchiveDataFrame from ribs.archives._archive_stats import ArchiveStats +from ribs.archives._array_store import ArrayStore from ribs.archives._cqd_score_result import CQDScoreResult from ribs.archives._cvt_archive import CVTArchive from ribs.archives._elite import Elite, EliteBatch @@ -39,6 +41,7 @@ "CVTArchive", "SlidingBoundariesArchive", "ArchiveBase", + "ArrayStore", "AddStatus", "Elite", "ArchiveDataFrame", diff --git a/ribs/archives/_array_store.py b/ribs/archives/_array_store.py new file mode 100644 index 000000000..99e060e56 --- /dev/null +++ b/ribs/archives/_array_store.py @@ -0,0 +1,34 @@ +"""Provides ArrayStore.""" + + +class ArrayStore: + """Maintains a set of arrays that share a common dimension. + + The ArrayStore consists of several *fields* of data that are manipulated + simultaneously via batch operations. Each field is a NumPy array with a + dimension of ``(capacity, ...)`` and can be of any type. + + Since the arrays all share a common first dimension, they also share a + common index. For instance, if we :meth:`retrieve` the entries at indices + ``[0, 2, 1]``, we would get a dict that contains the objective and measures + at indices 0, 2, and 1:: + + { + "objective": [-1, 3, -5], + "measures": [[0, 0], [2, 1], [3, 5]], + } + + The ArrayStore supports several further operations, in particular a flexible + :meth:`add` method that inserts entries into the ArrayStore. + + Args: + field_desc: TODO + capacity: TODO + + Attributes: + _props: TODO + _fields: TODO + """ + + def __init__(self): + pass From a93902d759cdb8e6cdb10d157213dbf5426f449a Mon Sep 17 00:00:00 2001 From: Bryon Tjanaka Date: Sun, 29 Oct 2023 02:00:10 -0700 Subject: [PATCH 02/26] ArrayStore init --- ribs/archives/_array_store.py | 37 +++++++++++++++++++++++++----- tests/archives/array_store_test.py | 15 ++++++++++++ 2 files changed, 46 insertions(+), 6 deletions(-) create mode 100644 tests/archives/array_store_test.py diff --git a/ribs/archives/_array_store.py b/ribs/archives/_array_store.py index 99e060e56..e0067a78e 100644 --- a/ribs/archives/_array_store.py +++ b/ribs/archives/_array_store.py @@ -1,4 +1,5 @@ """Provides ArrayStore.""" +import numpy as np class ArrayStore: @@ -22,13 +23,37 @@ class ArrayStore: :meth:`add` method that inserts entries into the ArrayStore. Args: - field_desc: TODO - capacity: TODO + field_desc (dict): Description of fields in the array store. The + description is a dict mapping from str to tuple of ``(shape, + dtype)``. For instance, ``{"objective": ((), np.float32), + "measures": ((10,), np.float32)}`` will create an "objective" field + with shape ``(capacity,)`` and a "measures" field with shape + ``(capacity, 10)``. + capacity (int): Total possible cells in the store. Attributes: - _props: TODO - _fields: TODO + _props: Dict with properties that are common to every ArrayStore. + + * "capacity": Maximum number of cells in the store. + * "occupied": Boolean array of size ``(capacity,)`` indicating + whether each index has an entry. + * "n_occupied": Number of entries currently in the store. + * "occupied_list": Array of size ``(capacity,)`` storing the indices + of all occupied cells in the store. Only the first ``n_occupied`` + entries will be valid. + + _fields: Dict holding all the arrays with their data. """ - def __init__(self): - pass + def __init__(self, field_desc, capacity): + self._props = { + "capacity": capacity, + "occupied": np.zeros(capacity, dtype=bool), + "n_occupied": 0, + "occupied_list": np.empty(capacity, dtype=int), + } + + self._fields = {} + for name, (field_shape, dtype) in field_desc.items(): + array_shape = (capacity,) + tuple(field_shape) + self._fields[name] = np.empty(array_shape, dtype) diff --git a/tests/archives/array_store_test.py b/tests/archives/array_store_test.py new file mode 100644 index 000000000..dfdbdae7d --- /dev/null +++ b/tests/archives/array_store_test.py @@ -0,0 +1,15 @@ +"""Tests for ArrayStore.""" +import numpy as np + +from ribs.archives import ArrayStore + + +def test_init(): + ArrayStore( + { + "objective": ((), np.float32), + "measures": ((2,), np.float32), + "solution": ((10,), np.float32), + }, + 10, + ) From a99db4ef8b7672c127521345cb0605ab1fba3e67 Mon Sep 17 00:00:00 2001 From: Bryon Tjanaka Date: Sun, 29 Oct 2023 12:25:54 -0700 Subject: [PATCH 03/26] Add properties --- ribs/archives/_array_store.py | 33 ++++++++++++++++++++++++++---- tests/archives/array_store_test.py | 10 +++++++-- 2 files changed, 37 insertions(+), 6 deletions(-) diff --git a/ribs/archives/_array_store.py b/ribs/archives/_array_store.py index e0067a78e..b87eddee7 100644 --- a/ribs/archives/_array_store.py +++ b/ribs/archives/_array_store.py @@ -1,6 +1,8 @@ """Provides ArrayStore.""" import numpy as np +from ribs._utils import readonly + class ArrayStore: """Maintains a set of arrays that share a common dimension. @@ -12,7 +14,7 @@ class ArrayStore: Since the arrays all share a common first dimension, they also share a common index. For instance, if we :meth:`retrieve` the entries at indices ``[0, 2, 1]``, we would get a dict that contains the objective and measures - at indices 0, 2, and 1:: + at indices 0, 2, and 1, e.g.:: { "objective": [-1, 3, -5], @@ -34,12 +36,12 @@ class ArrayStore: Attributes: _props: Dict with properties that are common to every ArrayStore. - * "capacity": Maximum number of cells in the store. + * "capacity": Maximum number of entries in the store. * "occupied": Boolean array of size ``(capacity,)`` indicating whether each index has an entry. * "n_occupied": Number of entries currently in the store. - * "occupied_list": Array of size ``(capacity,)`` storing the indices - of all occupied cells in the store. Only the first ``n_occupied`` + * "occupied_list": Array of size ``(capacity,)`` listing all + occupied indices in the store. Only the first ``n_occupied`` entries will be valid. _fields: Dict holding all the arrays with their data. @@ -57,3 +59,26 @@ def __init__(self, field_desc, capacity): for name, (field_shape, dtype) in field_desc.items(): array_shape = (capacity,) + tuple(field_shape) self._fields[name] = np.empty(array_shape, dtype) + + def __len__(self): + """Number of occupied indices in the store, i.e.g, number of indices + that have a corresponding entry.""" + return self._props["n_occupied"] + + @property + def capacity(self): + """int: Maximum number of entries in the store.""" + return self._props["capacity"] + + @property + def occupied(self): + """numpy.ndarray: Boolean array of size ``(capacity,)`` indicating + whether each index has an entry.""" + return self._props["occupied"] + + @property + def occupied_list(self): + """numpy.ndarray: Integer array listing all occupied indices in the + store.""" + return readonly( + self._props["occupied_list"][:self._props["n_occupied"]]) diff --git a/tests/archives/array_store_test.py b/tests/archives/array_store_test.py index dfdbdae7d..26f38edcb 100644 --- a/tests/archives/array_store_test.py +++ b/tests/archives/array_store_test.py @@ -5,11 +5,17 @@ def test_init(): - ArrayStore( + capacity = 10 + store = ArrayStore( { "objective": ((), np.float32), "measures": ((2,), np.float32), "solution": ((10,), np.float32), }, - 10, + capacity, ) + + assert len(store) == 0 + assert store.capacity == capacity + assert np.all(~store.occupied) + assert len(store.occupied_list) == 0 From 6b71503639893b822ae029c0cca0b45ce0fa5856 Mon Sep 17 00:00:00 2001 From: Bryon Tjanaka Date: Sun, 29 Oct 2023 13:03:03 -0700 Subject: [PATCH 04/26] Add retrieve and basic add --- ribs/archives/_array_store.py | 73 +++++++++++++++++++++++++----- tests/archives/array_store_test.py | 31 +++++++++++++ 2 files changed, 93 insertions(+), 11 deletions(-) diff --git a/ribs/archives/_array_store.py b/ribs/archives/_array_store.py index b87eddee7..d84bd5ce5 100644 --- a/ribs/archives/_array_store.py +++ b/ribs/archives/_array_store.py @@ -12,9 +12,9 @@ class ArrayStore: dimension of ``(capacity, ...)`` and can be of any type. Since the arrays all share a common first dimension, they also share a - common index. For instance, if we :meth:`retrieve` the entries at indices - ``[0, 2, 1]``, we would get a dict that contains the objective and measures - at indices 0, 2, and 1, e.g.:: + common index. For instance, if we :meth:`retrieve` the data at indices ``[0, + 2, 1]``, we would get a dict that contains the objective and measures at + indices 0, 2, and 1, e.g.:: { "objective": [-1, 3, -5], @@ -22,7 +22,7 @@ class ArrayStore: } The ArrayStore supports several further operations, in particular a flexible - :meth:`add` method that inserts entries into the ArrayStore. + :meth:`add` method that inserts data into the ArrayStore. Args: field_desc (dict): Description of fields in the array store. The @@ -36,13 +36,13 @@ class ArrayStore: Attributes: _props: Dict with properties that are common to every ArrayStore. - * "capacity": Maximum number of entries in the store. + * "capacity": Maximum number of data entries in the store. * "occupied": Boolean array of size ``(capacity,)`` indicating - whether each index has an entry. - * "n_occupied": Number of entries currently in the store. + whether each index has data associated with it. + * "n_occupied": Number of data entries currently in the store. * "occupied_list": Array of size ``(capacity,)`` listing all occupied indices in the store. Only the first ``n_occupied`` - entries will be valid. + elements will be valid. _fields: Dict holding all the arrays with their data. """ @@ -62,18 +62,18 @@ def __init__(self, field_desc, capacity): def __len__(self): """Number of occupied indices in the store, i.e.g, number of indices - that have a corresponding entry.""" + that have a corresponding data entry.""" return self._props["n_occupied"] @property def capacity(self): - """int: Maximum number of entries in the store.""" + """int: Maximum number of data entries in the store.""" return self._props["capacity"] @property def occupied(self): """numpy.ndarray: Boolean array of size ``(capacity,)`` indicating - whether each index has an entry.""" + whether each index has an data entry.""" return self._props["occupied"] @property @@ -82,3 +82,54 @@ def occupied_list(self): store.""" return readonly( self._props["occupied_list"][:self._props["n_occupied"]]) + + def retrieve(self, indices): + """Collects the data at the given indices. + + Args: + indices (array-like): List of indices at which to collect data. + Returns: + - **occupied**: Array indicating which indices, among those passed, + in have an associated data entry. For instance, if ``indices`` is + ``[0, 1, 2]`` and only index 2 has data, then ``occupied`` will be + ``[False, False, True]``. + - **data**: Dict mapping from the field name to the field data at + the given indices. For instance, if we have an ``objective`` field + and request data at indices ``[4, 1, 0]``, we might get ``data`` + that looks like ``{"objective": [1.5, 6.0, 2.3]}``. Note that if a + given index is not marked as occupied, it can have any data value + associated with it. For instance, if index 1 was not occupied, + then the 6.0 returned above should be ignored. + """ + occupied = readonly(self._props["occupied"][indices]) + data = { + name: readonly(arr[indices]) for name, arr in self._fields.items() + } + return occupied, data + + def add(self, indices, new_data, transforms): + """Adds new data to the archive at the given indices. + + Raise: + ValueError: The final version of ``new_data`` does not have the same + keys as the fields of this store. + """ + + # TODO: Use transforms + # pylint: disable = unused-argument + + # TODO + add_info = {} + + # Verify that new_data ends up with the correct fields after the + # transforms. + if new_data.keys() != self._fields.keys(): + raise ValueError( + f"`new_data` had keys {new_data.keys()} but should have the " + f"same keys as this ArrayStore, i.e., {self._fields.keys()}") + + # Insert into the ArrayStore. + for name, arr in self._fields.items(): + arr[indices] = new_data[name][indices] + + return add_info diff --git a/tests/archives/array_store_test.py b/tests/archives/array_store_test.py index 26f38edcb..6d5ed7de9 100644 --- a/tests/archives/array_store_test.py +++ b/tests/archives/array_store_test.py @@ -1,8 +1,11 @@ """Tests for ArrayStore.""" import numpy as np +import pytest from ribs.archives import ArrayStore +# pylint: disable = redefined-outer-name + def test_init(): capacity = 10 @@ -19,3 +22,31 @@ def test_init(): assert store.capacity == capacity assert np.all(~store.occupied) assert len(store.occupied_list) == 0 + + +@pytest.fixture +def store(): + return ArrayStore( + field_desc={ + "objective": ((), np.float32), + "measures": ((2,), np.float32), + "solution": ((10,), np.float32), + }, + capacity=10, + ) + + +# TODO test retrieve + + +def test_add_wrong_keys(store): + with pytest.raises(ValueError): + store.add( + [0, 1], + { + "objective": [1.0, 2.0], + "measures": [[1.0, 2.0], [3.0, 4.0]] + # Missing `solution` key. + }, + [], # Empty transforms. + ) From be7db5b11ebdc35d9febc8083415000c394534d0 Mon Sep 17 00:00:00 2001 From: Bryon Tjanaka Date: Sun, 29 Oct 2023 16:05:02 -0700 Subject: [PATCH 05/26] More add functionality --- ribs/archives/_array_store.py | 22 +++++++- tests/archives/array_store_test.py | 80 ++++++++++++++++++++++++++++-- 2 files changed, 97 insertions(+), 5 deletions(-) diff --git a/ribs/archives/_array_store.py b/ribs/archives/_array_store.py index d84bd5ce5..e656bf3bb 100644 --- a/ribs/archives/_array_store.py +++ b/ribs/archives/_array_store.py @@ -1,5 +1,6 @@ """Provides ArrayStore.""" import numpy as np +from numpy_groupies import aggregate_nb as aggregate from ribs._utils import readonly @@ -113,6 +114,8 @@ def add(self, indices, new_data, transforms): Raise: ValueError: The final version of ``new_data`` does not have the same keys as the fields of this store. + ValueError: The final version of ``new_data`` has fields that have a + different length than ``indices``. """ # TODO: Use transforms @@ -128,8 +131,25 @@ def add(self, indices, new_data, transforms): f"`new_data` had keys {new_data.keys()} but should have the " f"same keys as this ArrayStore, i.e., {self._fields.keys()}") + for name, arr in new_data.items(): + if len(arr) != len(indices): + raise ValueError( + f"In `new_data`, the array for `{name}` has length " + f"{len(arr)} but should be the same length as indices " + f"({len(indices)})") + + # Update occupancy data. + unique_indices = np.where(aggregate(indices, 1, func="len") != 0)[0] + cur_occupied = self._props["occupied"][unique_indices] + new_indices = unique_indices[~cur_occupied] + n_occupied = self._props["n_occupied"] + self._props["occupied"][new_indices] = True + self._props["occupied_list"][n_occupied:n_occupied + + len(new_indices)] = new_indices + self._props["n_occupied"] = n_occupied + len(new_indices) + # Insert into the ArrayStore. for name, arr in self._fields.items(): - arr[indices] = new_data[name][indices] + arr[indices] = new_data[name] return add_info diff --git a/tests/archives/array_store_test.py b/tests/archives/array_store_test.py index 6d5ed7de9..f7e59a272 100644 --- a/tests/archives/array_store_test.py +++ b/tests/archives/array_store_test.py @@ -26,6 +26,7 @@ def test_init(): @pytest.fixture def store(): + """Simple ArrayStore for testing.""" return ArrayStore( field_desc={ "objective": ((), np.float32), @@ -36,17 +37,88 @@ def store(): ) -# TODO test retrieve - - def test_add_wrong_keys(store): with pytest.raises(ValueError): store.add( [0, 1], { "objective": [1.0, 2.0], - "measures": [[1.0, 2.0], [3.0, 4.0]] + "measures": [[1.0, 2.0], [3.0, 4.0]], # Missing `solution` key. }, [], # Empty transforms. ) + + +def test_add_mismatch_indices(store): + with pytest.raises(ValueError): + store.add( + [0, 1], + { + "objective": [1.0, 2.0, 3.0], # Length 3 instead of 2. + "measures": [[1.0, 2.0], [3.0, 4.0]], + "solution": [np.zeros(10), np.ones(10)], + }, + [], # Empty transforms. + ) + + +def test_simple_add_and_retrieve(store): + """Add without transforms and then retrieve the data.""" + store.add( + [3, 5], + { + "objective": [1.0, 2.0], + "measures": [[1.0, 2.0], [3.0, 4.0]], + "solution": [np.zeros(10), np.ones(10)], + }, + [], # Empty transforms. + ) + + assert len(store) == 2 + assert np.all(store.occupied == [0, 0, 0, 1, 0, 1, 0, 0, 0, 0]) + assert np.all(np.sort(store.occupied_list) == [3, 5]) + + occupied, data = store.retrieve([5, 3]) + + assert np.all(occupied == [True, True]) + assert data.keys() == set(["objective", "measures", "solution"]) + assert np.all(data["objective"] == [2.0, 1.0]) + assert np.all(data["measures"] == [[3.0, 4.0], [1.0, 2.0]]) + assert np.all(data["solution"] == [np.ones(10), np.zeros(10)]) + + +def test_add_duplicate_indices(store): + store.add( + [3, 3], + { + "objective": [1.0, 2.0], + "measures": [[1.0, 2.0], [3.0, 4.0]], + "solution": [np.zeros(10), np.ones(10)], + }, + [], # Empty transforms. + ) + + assert len(store) == 1 + assert np.all(store.occupied == [0, 0, 0, 1, 0, 0, 0, 0, 0, 0]) + assert np.all(store.occupied_list == [3]) + + +def test_retrieve_duplicate_indices(store): + store.add( + [3], + { + "objective": [2.0], + "measures": [[3.0, 4.0]], + "solution": [np.ones(10)], + }, + [], # Empty transforms. + ) + + occupied, data = store.retrieve([3, 3]) + + assert np.all(occupied == [True, True]) + assert data.keys() == set(["objective", "measures", "solution"]) + assert np.all(data["objective"] == [2.0, 2.0]) + assert np.all(data["measures"] == [[3.0, 4.0], [3.0, 4.0]]) + assert np.all(data["solution"] == [np.ones(10), np.ones(10)]) From 7832725efc851638a72dc36beb9680638e74d0d1 Mon Sep 17 00:00:00 2001 From: Bryon Tjanaka Date: Sun, 29 Oct 2023 16:45:36 -0700 Subject: [PATCH 06/26] Add clear --- ribs/archives/_array_store.py | 22 +++++++++++++++------- tests/archives/array_store_test.py | 10 ++++++++-- 2 files changed, 23 insertions(+), 9 deletions(-) diff --git a/ribs/archives/_array_store.py b/ribs/archives/_array_store.py index e656bf3bb..e133d19fd 100644 --- a/ribs/archives/_array_store.py +++ b/ribs/archives/_array_store.py @@ -32,7 +32,7 @@ class ArrayStore: "measures": ((10,), np.float32)}`` will create an "objective" field with shape ``(capacity,)`` and a "measures" field with shape ``(capacity, 10)``. - capacity (int): Total possible cells in the store. + capacity (int): Total possible entries in the store. Attributes: _props: Dict with properties that are common to every ArrayStore. @@ -117,12 +117,11 @@ def add(self, indices, new_data, transforms): ValueError: The final version of ``new_data`` has fields that have a different length than ``indices``. """ - - # TODO: Use transforms - # pylint: disable = unused-argument - - # TODO add_info = {} + for transform in transforms: + occupied, cur_data = self.retrieve(indices) + indices, new_data, add_info = transform(indices, occupied, cur_data, + new_data, add_info) # Verify that new_data ends up with the correct fields after the # transforms. @@ -131,6 +130,7 @@ def add(self, indices, new_data, transforms): f"`new_data` had keys {new_data.keys()} but should have the " f"same keys as this ArrayStore, i.e., {self._fields.keys()}") + # Verify that the array shapes match the indices. for name, arr in new_data.items(): if len(arr) != len(indices): raise ValueError( @@ -148,8 +148,16 @@ def add(self, indices, new_data, transforms): len(new_indices)] = new_indices self._props["n_occupied"] = n_occupied + len(new_indices) - # Insert into the ArrayStore. + # Insert into the ArrayStore. Note that we do not assume indices are + # unique. Hence, when updating occupancy data above, we computed the + # unique indices. In contrast, here we let NumPy's default behavior + # handle duplicate indices. for name, arr in self._fields.items(): arr[indices] = new_data[name] return add_info + + def clear(self): + """Removes all entries from the store.""" + self._props["n_occupied"] = 0 # Effectively clears occupied_list too. + self._props["occupied"].fill(False) diff --git a/tests/archives/array_store_test.py b/tests/archives/array_store_test.py index f7e59a272..3f8897617 100644 --- a/tests/archives/array_store_test.py +++ b/tests/archives/array_store_test.py @@ -63,8 +63,8 @@ def test_add_mismatch_indices(store): ) -def test_simple_add_and_retrieve(store): - """Add without transforms and then retrieve the data.""" +def test_simple_add_retrieve_clear(store): + """Add without transforms, retrieve the data, and clear the archive.""" store.add( [3, 5], { @@ -87,6 +87,12 @@ def test_simple_add_and_retrieve(store): assert np.all(data["measures"] == [[3.0, 4.0], [1.0, 2.0]]) assert np.all(data["solution"] == [np.ones(10), np.zeros(10)]) + store.clear() + + assert len(store) == 0 + assert np.all(store.occupied == [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]) + assert len(store.occupied_list) == 0 + def test_add_duplicate_indices(store): store.add( From 3123375cec53b227c1287501f67a38dd575d4782 Mon Sep 17 00:00:00 2001 From: Bryon Tjanaka Date: Mon, 30 Oct 2023 00:33:30 -0700 Subject: [PATCH 07/26] More add and tests --- ribs/archives/_array_store.py | 67 +++++++++++++++++++++++++++++- tests/archives/array_store_test.py | 62 +++++++++++++++++++++++++++ 2 files changed, 127 insertions(+), 2 deletions(-) diff --git a/ribs/archives/_array_store.py b/ribs/archives/_array_store.py index e133d19fd..d22ed8cdd 100644 --- a/ribs/archives/_array_store.py +++ b/ribs/archives/_array_store.py @@ -111,6 +111,46 @@ def retrieve(self, indices): def add(self, indices, new_data, transforms): """Adds new data to the archive at the given indices. + The indices and new_data are passed through transforms before adding to + the archive. The general idea is that these transforms will gradually + modify the indices and new_data. For instance, they can add new fields + to new_data (new_data may not initially have all the same fields as the + archive). Alternatively, they can filter out duplicate indices, eg if + multiple entries are being inserted at the same index we can choose one + with the best objective. + + The signature of a transform is as follows: + + def transform(indices, new_data, add_info, occupied, cur_data) -> + (indices, new_data, add_info): + + Transform parameters: + + * **indices** (array-like): Array of indices at which new_data + should be inserted. + * **new_data** (dict): New data for the given indices. Maps from + field name to the array of new data for that field. + * **add_info** (dict): Information to return to the user about the + addition process. Example info includes whether each entry was + ultimately inserted into the archive, as well as general + statistics like update QD score. For the first transform, this + will be an empty dict. + * **occupied** (array-like): Whether the given indices are currently + occupied. Same as that given by :meth:`retrieve`. + * **cur_data** (dict): Data at the current indices in the archive. + Same as that given by :meth:`retrieve`. + + Transform outputs: + + * **indices** (array-like): Modified indices. + * **new_data** (dict): Modified new_data. At the end of the + transforms, it should have the same keys as the store. + * **add_info** (dict): Modified add_info. + + Args: + indices (array-like): Initial list of indices for addition. + new_data (dict): Initial data for addition. + transforms (list): List of transforms on the data to be added. Raise: ValueError: The final version of ``new_data`` does not have the same keys as the fields of this store. @@ -120,8 +160,8 @@ def add(self, indices, new_data, transforms): add_info = {} for transform in transforms: occupied, cur_data = self.retrieve(indices) - indices, new_data, add_info = transform(indices, occupied, cur_data, - new_data, add_info) + indices, new_data, add_info = transform(indices, new_data, add_info, + occupied, cur_data) # Verify that new_data ends up with the correct fields after the # transforms. @@ -161,3 +201,26 @@ def clear(self): """Removes all entries from the store.""" self._props["n_occupied"] = 0 # Effectively clears occupied_list too. self._props["occupied"].fill(False) + + def as_dict(self): + """Returns the data in the ArrayStore as a one-level dictionary. + + To collapse the dict, we prefix each key with ``props.`` or ``fields.``, + so the result looks as follows:: + + { + "props.capacity": ..., + "props.occupied": ..., + ... + "fields.objective": ..., + } + + Returns: + dict: See description above. + """ + d = {} + for name, prop in self._props.items(): + d[f"props.{name}"] = prop + for name, arr in self._fields.items(): + d[f"fields.{name}"] = arr + return d diff --git a/tests/archives/array_store_test.py b/tests/archives/array_store_test.py index 3f8897617..48b2cfeca 100644 --- a/tests/archives/array_store_test.py +++ b/tests/archives/array_store_test.py @@ -128,3 +128,65 @@ def test_retrieve_duplicate_indices(store): assert np.all(data["objective"] == [2.0, 2.0]) assert np.all(data["measures"] == [[3.0, 4.0], [3.0, 4.0]]) assert np.all(data["solution"] == [np.ones(10), np.ones(10)]) + + +def test_add_simple_transform(store): + + def obj_meas(indices, new_data, add_info, occupied, cur_data): + # pylint: disable = unused-argument + new_data["objective"] = np.sum(new_data["solution"], axis=1) + new_data["measures"] = np.asarray(new_data["solution"])[:, :2] + return indices, new_data, {"foo": 5} + + add_info = store.add( + [3, 5], + { + "solution": [np.ones(10), 2 * np.ones(10)], + }, + [obj_meas], + ) + + assert add_info == {"foo": 5} + + assert len(store) == 2 + assert np.all(store.occupied == [0, 0, 0, 1, 0, 1, 0, 0, 0, 0]) + assert np.all(np.sort(store.occupied_list) == [3, 5]) + + occupied, data = store.retrieve([3, 5]) + + assert np.all(occupied == [True, True]) + assert data.keys() == set(["objective", "measures", "solution"]) + assert np.all(data["objective"] == [10.0, 20.0]) + assert np.all(data["measures"] == [[1.0, 1.0], [2.0, 2.0]]) + assert np.all(data["solution"] == [np.ones(10), 2 * np.ones(10)]) + + +def test_as_dict(store): + store.add( + [3, 5], + { + "objective": [1.0, 2.0], + "measures": [[1.0, 2.0], [3.0, 4.0]], + "solution": [np.zeros(10), np.ones(10)], + }, + [], # Empty transforms. + ) + + d = store.as_dict() + + assert d.keys() == set([ + "props.capacity", + "props.occupied", + "props.n_occupied", + "props.occupied_list", + "fields.objective", + "fields.measures", + "fields.solution", + ]) + assert d["props.capacity"] == 10 + assert np.all(d["props.occupied"] == [0, 0, 0, 1, 0, 1, 0, 0, 0, 0]) + assert d["props.n_occupied"] == 2 + assert np.all(np.sort(d["props.occupied_list"][:2]) == [3, 5]) + assert np.all(d["fields.objective"][[3, 5]] == [1.0, 2.0]) + assert np.all(d["fields.measures"][[3, 5]] == [[1.0, 2.0], [3.0, 4.0]]) + assert np.all(d["fields.solution"][[3, 5]] == [np.zeros(10), np.ones(10)]) From cc7235f97ecec6504f856c1c43ebc6939a076f9a Mon Sep 17 00:00:00 2001 From: Bryon Tjanaka Date: Mon, 30 Oct 2023 12:10:26 -0700 Subject: [PATCH 08/26] Return readonly arrays --- ribs/archives/_array_store.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/ribs/archives/_array_store.py b/ribs/archives/_array_store.py index d22ed8cdd..2d75c7ba4 100644 --- a/ribs/archives/_array_store.py +++ b/ribs/archives/_array_store.py @@ -75,7 +75,7 @@ def capacity(self): def occupied(self): """numpy.ndarray: Boolean array of size ``(capacity,)`` indicating whether each index has an data entry.""" - return self._props["occupied"] + return readonly(self._props["occupied"]) @property def occupied_list(self): @@ -220,7 +220,11 @@ def as_dict(self): """ d = {} for name, prop in self._props.items(): + if isinstance(prop, np.ndarray): + prop = readonly(prop.view()) d[f"props.{name}"] = prop for name, arr in self._fields.items(): + if isinstance(arr, np.ndarray): + arr = readonly(arr.view()) d[f"fields.{name}"] = arr return d From 4b3883e9a21f2fc40881c287f3c0b818343ba5e9 Mon Sep 17 00:00:00 2001 From: Bryon Tjanaka Date: Mon, 30 Oct 2023 12:52:31 -0700 Subject: [PATCH 09/26] Resize --- ribs/archives/_array_store.py | 33 +++++++++++++++++++++++++++++- tests/archives/array_store_test.py | 27 ++++++++++++++++++++++++ 2 files changed, 59 insertions(+), 1 deletion(-) diff --git a/ribs/archives/_array_store.py b/ribs/archives/_array_store.py index 2d75c7ba4..e5b676181 100644 --- a/ribs/archives/_array_store.py +++ b/ribs/archives/_array_store.py @@ -75,7 +75,7 @@ def capacity(self): def occupied(self): """numpy.ndarray: Boolean array of size ``(capacity,)`` indicating whether each index has an data entry.""" - return readonly(self._props["occupied"]) + return readonly(self._props["occupied"].view()) @property def occupied_list(self): @@ -108,6 +108,7 @@ def retrieve(self, indices): } return occupied, data + # TODO: Add cur_add_info def add(self, indices, new_data, transforms): """Adds new data to the archive at the given indices. @@ -202,6 +203,36 @@ def clear(self): self._props["n_occupied"] = 0 # Effectively clears occupied_list too. self._props["occupied"].fill(False) + def resize(self, capacity): + """Resizes the store to the given capacity. + + Args: + capacity (int): New capacity. + Raises: + ValueError: The new capacity is less than or equal to the current + capacity. + """ + if capacity <= self._props["capacity"]: + raise ValueError( + f"New capacity ({capacity}) must be greater than current " + f"capacity ({self._props['capacity']}.") + + old_capacity = self._props["capacity"] + self._props["capacity"] = capacity + + old_occupied = self._props["occupied"] + self._props["occupied"] = np.zeros(capacity, dtype=bool) + self._props["occupied"][:old_capacity] = old_occupied + + old_occupied_list = self._props["occupied_list"] + self._props["occupied_list"] = np.empty(capacity, dtype=int) + self._props["occupied_list"][:old_capacity] = old_occupied_list + + for name, old_arr in self._fields.items(): + new_shape = (capacity,) + old_arr.shape[1:] + self._fields[name] = np.empty(new_shape, old_arr.dtype) + self._fields[name][:old_capacity] = old_arr + def as_dict(self): """Returns the data in the ArrayStore as a one-level dictionary. diff --git a/tests/archives/array_store_test.py b/tests/archives/array_store_test.py index 48b2cfeca..150ae21f2 100644 --- a/tests/archives/array_store_test.py +++ b/tests/archives/array_store_test.py @@ -161,6 +161,33 @@ def obj_meas(indices, new_data, add_info, occupied, cur_data): assert np.all(data["solution"] == [np.ones(10), 2 * np.ones(10)]) +def test_resize_bad_capacity(store): + with pytest.raises(ValueError): + store.resize(store.capacity) + + +def test_resize_to_double_capacity(store): + store.add( + [3, 5], + { + "objective": [1.0, 2.0], + "measures": [[1.0, 2.0], [3.0, 4.0]], + "solution": [np.zeros(10), np.ones(10)], + }, + [], # Empty transforms. + ) + + store.resize(store.capacity * 2) + + assert len(store) == 2 + assert np.all(store.occupied == + [0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]) + assert np.all(np.sort(store.occupied_list) == [3, 5]) + + # Spot-check the fields. + assert np.all(store._fields["objective"][[3, 5]] == [1.0, 2.0]) + + def test_as_dict(store): store.add( [3, 5], From a09bf211949a85176c806d2eb1eee4d3f920f1ee Mon Sep 17 00:00:00 2001 From: Bryon Tjanaka Date: Mon, 30 Oct 2023 15:01:16 -0700 Subject: [PATCH 10/26] Add save and load prototypes --- ribs/archives/_array_store.py | 115 ++++++++++++++++++++++++++++++++++ 1 file changed, 115 insertions(+) diff --git a/ribs/archives/_array_store.py b/ribs/archives/_array_store.py index e5b676181..ba86d8264 100644 --- a/ribs/archives/_array_store.py +++ b/ribs/archives/_array_store.py @@ -1,9 +1,15 @@ """Provides ArrayStore.""" +import pickle as pkl +from contextlib import nullcontext +from pathlib import Path + import numpy as np from numpy_groupies import aggregate_nb as aggregate from ribs._utils import readonly +_FORMATS = ["npz", "npz_compressed", "pkl"] + class ArrayStore: """Maintains a set of arrays that share a common dimension. @@ -259,3 +265,112 @@ def as_dict(self): arr = readonly(arr.view()) d[f"fields.{name}"] = arr return d + + def save(self, file, fmt=None): + """Saves the store to a given file. + + Supported formats are: + + * `"npz"`: Saves to the `.npz` file format with :func:`numpy.savez` + * `"npz_compressed"`: Saves to a compressed `.npz` file format with + :func:`numpy.savez_compressed` + * `"pkl"`: Saves to a pickle file with :func:`pickle.dump` + + .. note:: + + Internally, this method calls :meth:`as_dict` and saves the + resulting dictionary to a file. If you need a format that is not + supported here, you can save the dict from :meth:`as_dict`. To + reload ArrayStore, load the dict from your format and then pass the + dict into :meth:`load`. + + Args: + file (str, pathlib.Path, file): Filename or file object for saving + the data. We do not modify the filename to include the + extension. + fmt (str): File format for saving the data. + Raises: + ValueError: Unsupported format. + """ + d = self.as_dict() + + if fmt == "npz": + np.savez(file, **d) + elif fmt == "npz_compressed": + np.savez_compressed(file, **d) + elif fmt == "pkl": + with (open(file, "wb") if isinstance(file, (str, Path)) else + nullcontext(file)) as file_obj: + pkl.dump(d, file_obj) + else: + raise ValueError(f"Unsupported value `{fmt}` for fmt. Must be " + f"one of {_FORMATS}") + + @staticmethod + def load(file, fmt=None, allow_pickle=False): + """Loads the ArrayStore from a dict or file. + + Args: + file (dict, str, pathlib.Path, file): Data to load. Either a dict + like that output by :meth:`as_dict`; a path to a file; or a file + object. In the case of a file object, ``fmt`` must be passed + (see :meth:`save` for supported formats). + fmt (str): Format for the file. If not passed in, we will infer the + format from the extension of ``file``. + allow_pickle (bool): Only applicable if using ``npz`` or + ``npz_compressed`` format and the store contains object arrays. + In this case, pickle is necessary since the object arrays are + saved with pickle (see :meth:`numpy.load` for more info). + Raises: + ValueError: Could not infer ``fmt`` from ``file`` as there is no + extension. + ValueError: The loaded props dict has the wrong keys. + """ + + if isinstance(file, dict): + data = file + else: + # Load dict from file. + + if isinstance(file, (str, Path)): + file = Path(file) + if fmt is None: + fmt = file.suffix[1:] + if fmt == "": + raise ValueError( + f"Could not infer fmt from file `{file}`. Please " + "pass the fmt arg.") + + # Now file is either a Path or a file-like object. + + if fmt in ["npz", "npz_compressed"]: + data = dict(np.load(file, allow_pickle=allow_pickle)) + elif fmt == "pkl": + with (open(file, "rb") if isinstance(file, (str, Path)) else + nullcontext(file)) as file_obj: + data = pkl.load(file_obj) + + # Load the store. Here, we create a store with no data in it. + # pylint: disable = protected-access + store = ArrayStore({}, 0) + + props = { + name[len("props."):]: arr + for name, arr in data.items() + if name.startswith("props.") + } + if props.keys() != store._props.keys(): + raise ValueError( + f"Expected props to have keys {store._props.keys()} but " + f"only found {props.keys()}") + + fields = { + name[len("fields."):]: arr + for name, arr in data.items() + if name.startswith("fields.") + } + + store._props = props + store._fields = fields + + return store From fae7c657a21cf89d737359525ff533b181964402 Mon Sep 17 00:00:00 2001 From: Bryon Tjanaka Date: Mon, 30 Oct 2023 15:06:57 -0700 Subject: [PATCH 11/26] Add from_dict --- ribs/archives/_array_store.py | 96 +++--------------------------- tests/archives/array_store_test.py | 33 ++++++++++ 2 files changed, 40 insertions(+), 89 deletions(-) diff --git a/ribs/archives/_array_store.py b/ribs/archives/_array_store.py index ba86d8264..7fb133ed1 100644 --- a/ribs/archives/_array_store.py +++ b/ribs/archives/_array_store.py @@ -1,15 +1,9 @@ """Provides ArrayStore.""" -import pickle as pkl -from contextlib import nullcontext -from pathlib import Path - import numpy as np from numpy_groupies import aggregate_nb as aggregate from ribs._utils import readonly -_FORMATS = ["npz", "npz_compressed", "pkl"] - class ArrayStore: """Maintains a set of arrays that share a common dimension. @@ -114,7 +108,6 @@ def retrieve(self, indices): } return occupied, data - # TODO: Add cur_add_info def add(self, indices, new_data, transforms): """Adds new data to the archive at the given indices. @@ -266,97 +259,22 @@ def as_dict(self): d[f"fields.{name}"] = arr return d - def save(self, file, fmt=None): - """Saves the store to a given file. - - Supported formats are: - - * `"npz"`: Saves to the `.npz` file format with :func:`numpy.savez` - * `"npz_compressed"`: Saves to a compressed `.npz` file format with - :func:`numpy.savez_compressed` - * `"pkl"`: Saves to a pickle file with :func:`pickle.dump` - - .. note:: - - Internally, this method calls :meth:`as_dict` and saves the - resulting dictionary to a file. If you need a format that is not - supported here, you can save the dict from :meth:`as_dict`. To - reload ArrayStore, load the dict from your format and then pass the - dict into :meth:`load`. - - Args: - file (str, pathlib.Path, file): Filename or file object for saving - the data. We do not modify the filename to include the - extension. - fmt (str): File format for saving the data. - Raises: - ValueError: Unsupported format. - """ - d = self.as_dict() - - if fmt == "npz": - np.savez(file, **d) - elif fmt == "npz_compressed": - np.savez_compressed(file, **d) - elif fmt == "pkl": - with (open(file, "wb") if isinstance(file, (str, Path)) else - nullcontext(file)) as file_obj: - pkl.dump(d, file_obj) - else: - raise ValueError(f"Unsupported value `{fmt}` for fmt. Must be " - f"one of {_FORMATS}") - @staticmethod - def load(file, fmt=None, allow_pickle=False): - """Loads the ArrayStore from a dict or file. + def from_dict(d): + """Loads an ArrayStore from a dict. Args: - file (dict, str, pathlib.Path, file): Data to load. Either a dict - like that output by :meth:`as_dict`; a path to a file; or a file - object. In the case of a file object, ``fmt`` must be passed - (see :meth:`save` for supported formats). - fmt (str): Format for the file. If not passed in, we will infer the - format from the extension of ``file``. - allow_pickle (bool): Only applicable if using ``npz`` or - ``npz_compressed`` format and the store contains object arrays. - In this case, pickle is necessary since the object arrays are - saved with pickle (see :meth:`numpy.load` for more info). + d (dict): Dict returned by :meth:`as_dict`. Raises: - ValueError: Could not infer ``fmt`` from ``file`` as there is no - extension. ValueError: The loaded props dict has the wrong keys. """ - - if isinstance(file, dict): - data = file - else: - # Load dict from file. - - if isinstance(file, (str, Path)): - file = Path(file) - if fmt is None: - fmt = file.suffix[1:] - if fmt == "": - raise ValueError( - f"Could not infer fmt from file `{file}`. Please " - "pass the fmt arg.") - - # Now file is either a Path or a file-like object. - - if fmt in ["npz", "npz_compressed"]: - data = dict(np.load(file, allow_pickle=allow_pickle)) - elif fmt == "pkl": - with (open(file, "rb") if isinstance(file, (str, Path)) else - nullcontext(file)) as file_obj: - data = pkl.load(file_obj) - - # Load the store. Here, we create a store with no data in it. # pylint: disable = protected-access - store = ArrayStore({}, 0) + + store = ArrayStore({}, 0) # Create an empty store. props = { name[len("props."):]: arr - for name, arr in data.items() + for name, arr in d.items() if name.startswith("props.") } if props.keys() != store._props.keys(): @@ -366,7 +284,7 @@ def load(file, fmt=None, allow_pickle=False): fields = { name[len("fields."):]: arr - for name, arr in data.items() + for name, arr in d.items() if name.startswith("fields.") } diff --git a/tests/archives/array_store_test.py b/tests/archives/array_store_test.py index 150ae21f2..e0e74a057 100644 --- a/tests/archives/array_store_test.py +++ b/tests/archives/array_store_test.py @@ -217,3 +217,36 @@ def test_as_dict(store): assert np.all(d["fields.objective"][[3, 5]] == [1.0, 2.0]) assert np.all(d["fields.measures"][[3, 5]] == [[1.0, 2.0], [3.0, 4.0]]) assert np.all(d["fields.solution"][[3, 5]] == [np.zeros(10), np.ones(10)]) + + +def test_from_dict_invalid_props(store): + d = store.as_dict() + del d["props.capacity"] + with pytest.raises(ValueError): + ArrayStore.from_dict(d) + + +def test_from_dict(store): + store.add( + [3, 5], + { + "objective": [1.0, 2.0], + "measures": [[1.0, 2.0], [3.0, 4.0]], + "solution": [np.zeros(10), np.ones(10)], + }, + [], # Empty transforms. + ) + + new_store = ArrayStore.from_dict(store.as_dict()) + + assert len(new_store) == 2 + assert np.all(new_store.occupied == [0, 0, 0, 1, 0, 1, 0, 0, 0, 0]) + assert np.all(np.sort(new_store.occupied_list) == [3, 5]) + + occupied, data = new_store.retrieve([5, 3]) + + assert np.all(occupied == [True, True]) + assert data.keys() == set(["objective", "measures", "solution"]) + assert np.all(data["objective"] == [2.0, 1.0]) + assert np.all(data["measures"] == [[3.0, 4.0], [1.0, 2.0]]) + assert np.all(data["solution"] == [np.ones(10), np.zeros(10)]) From 3dbb129e647a2be123637ecf8a3436f9b45b2751 Mon Sep 17 00:00:00 2001 From: Bryon Tjanaka Date: Mon, 30 Oct 2023 15:16:24 -0700 Subject: [PATCH 12/26] Add add_info to add() --- ribs/archives/_array_store.py | 24 +++++++++++++++--------- tests/archives/array_store_test.py | 14 ++++++++++++-- 2 files changed, 27 insertions(+), 11 deletions(-) diff --git a/ribs/archives/_array_store.py b/ribs/archives/_array_store.py index 7fb133ed1..9bc15d0c5 100644 --- a/ribs/archives/_array_store.py +++ b/ribs/archives/_array_store.py @@ -108,16 +108,18 @@ def retrieve(self, indices): } return occupied, data - def add(self, indices, new_data, transforms): + def add(self, indices, new_data, add_info, transforms): """Adds new data to the archive at the given indices. - The indices and new_data are passed through transforms before adding to - the archive. The general idea is that these transforms will gradually - modify the indices and new_data. For instance, they can add new fields - to new_data (new_data may not initially have all the same fields as the - archive). Alternatively, they can filter out duplicate indices, eg if - multiple entries are being inserted at the same index we can choose one - with the best objective. + The indices, new_data, and add_info are passed through transforms before + adding to the archive. The general idea is that these transforms will + gradually modify the indices, new_data, and add_info. For instance, they + can add new fields to new_data (new_data may not initially have all the + same fields as the archive). Alternatively, they can filter out + duplicate indices, eg if multiple entries are being inserted at the same + index we can choose one with the best objective. As another example, the + transforms can add stats to the add_info or delete fields from the + add_info. The signature of a transform is as follows: @@ -150,14 +152,18 @@ def transform(indices, new_data, add_info, occupied, cur_data) -> Args: indices (array-like): Initial list of indices for addition. new_data (dict): Initial data for addition. + add_info (dict): Initial add_info. transforms (list): List of transforms on the data to be added. + Returns: + Final ``add_info`` from the transforms. ``new_data`` and ``indices`` + are not returned; rather, the ``new_data`` is added into the store + at ``indices``. Raise: ValueError: The final version of ``new_data`` does not have the same keys as the fields of this store. ValueError: The final version of ``new_data`` has fields that have a different length than ``indices``. """ - add_info = {} for transform in transforms: occupied, cur_data = self.retrieve(indices) indices, new_data, add_info = transform(indices, new_data, add_info, diff --git a/tests/archives/array_store_test.py b/tests/archives/array_store_test.py index e0e74a057..34e30eccd 100644 --- a/tests/archives/array_store_test.py +++ b/tests/archives/array_store_test.py @@ -46,6 +46,7 @@ def test_add_wrong_keys(store): "measures": [[1.0, 2.0], [3.0, 4.0]], # Missing `solution` key. }, + {}, # Empty add_info. [], # Empty transforms. ) @@ -59,6 +60,7 @@ def test_add_mismatch_indices(store): "measures": [[1.0, 2.0], [3.0, 4.0]], "solution": [np.zeros(10), np.ones(10)], }, + {}, # Empty add_info. [], # Empty transforms. ) @@ -72,6 +74,7 @@ def test_simple_add_retrieve_clear(store): "measures": [[1.0, 2.0], [3.0, 4.0]], "solution": [np.zeros(10), np.ones(10)], }, + {}, # Empty add_info. [], # Empty transforms. ) @@ -102,6 +105,7 @@ def test_add_duplicate_indices(store): "measures": [[1.0, 2.0], [3.0, 4.0]], "solution": [np.zeros(10), np.ones(10)], }, + {}, # Empty add_info. [], # Empty transforms. ) @@ -118,6 +122,7 @@ def test_retrieve_duplicate_indices(store): "measures": [[3.0, 4.0]], "solution": [np.ones(10)], }, + {}, # Empty add_info. [], # Empty transforms. ) @@ -136,17 +141,19 @@ def obj_meas(indices, new_data, add_info, occupied, cur_data): # pylint: disable = unused-argument new_data["objective"] = np.sum(new_data["solution"], axis=1) new_data["measures"] = np.asarray(new_data["solution"])[:, :2] - return indices, new_data, {"foo": 5} + add_info["bar"] = 5 + return indices, new_data, add_info add_info = store.add( [3, 5], { "solution": [np.ones(10), 2 * np.ones(10)], }, + {"foo": 4}, [obj_meas], ) - assert add_info == {"foo": 5} + assert add_info == {"foo": 4, "bar": 5} assert len(store) == 2 assert np.all(store.occupied == [0, 0, 0, 1, 0, 1, 0, 0, 0, 0]) @@ -174,6 +181,7 @@ def test_resize_to_double_capacity(store): "measures": [[1.0, 2.0], [3.0, 4.0]], "solution": [np.zeros(10), np.ones(10)], }, + {}, # Empty add_info. [], # Empty transforms. ) @@ -196,6 +204,7 @@ def test_as_dict(store): "measures": [[1.0, 2.0], [3.0, 4.0]], "solution": [np.zeros(10), np.ones(10)], }, + {}, # Empty add_info. [], # Empty transforms. ) @@ -234,6 +243,7 @@ def test_from_dict(store): "measures": [[1.0, 2.0], [3.0, 4.0]], "solution": [np.zeros(10), np.ones(10)], }, + {}, # Empty add_info. [], # Empty transforms. ) From 8edea275f16298d7e9b37a69a378604f98e4f680 Mon Sep 17 00:00:00 2001 From: Bryon Tjanaka Date: Mon, 30 Oct 2023 16:42:20 -0700 Subject: [PATCH 13/26] as_pandas and other data methods --- ribs/archives/_array_store.py | 88 +++++++++++++++++++++++++++--- tests/archives/array_store_test.py | 65 +++++++++++++++++++--- 2 files changed, 138 insertions(+), 15 deletions(-) diff --git a/ribs/archives/_array_store.py b/ribs/archives/_array_store.py index 9bc15d0c5..894a30109 100644 --- a/ribs/archives/_array_store.py +++ b/ribs/archives/_array_store.py @@ -1,6 +1,9 @@ """Provides ArrayStore.""" +from collections import OrderedDict + import numpy as np from numpy_groupies import aggregate_nb as aggregate +from pandas import DataFrame from ribs._utils import readonly @@ -46,6 +49,10 @@ class ArrayStore: elements will be valid. _fields: Dict holding all the arrays with their data. + + Raises: + ValueError: One of the fields in ``field_desc`` has an invalid name + (currently, "index" is the only invalid name). """ def __init__(self, field_desc, capacity): @@ -58,6 +65,9 @@ def __init__(self, field_desc, capacity): self._fields = {} for name, (field_shape, dtype) in field_desc.items(): + if name == "index": + raise ValueError(f"`{name}` is an invalid field name.") + array_shape = (capacity,) + tuple(field_shape) self._fields[name] = np.empty(array_shape, dtype) @@ -90,6 +100,8 @@ def retrieve(self, indices): Args: indices (array-like): List of indices at which to collect data. Returns: + tuple: 2-element tuple consisting of: + - **occupied**: Array indicating which indices, among those passed, in have an associated data entry. For instance, if ``indices`` is ``[0, 1, 2]`` and only index 2 has data, then ``occupied`` will be @@ -155,9 +167,9 @@ def transform(indices, new_data, add_info, occupied, cur_data) -> add_info (dict): Initial add_info. transforms (list): List of transforms on the data to be added. Returns: - Final ``add_info`` from the transforms. ``new_data`` and ``indices`` - are not returned; rather, the ``new_data`` is added into the store - at ``indices``. + dict: Final ``add_info`` from the transforms. ``new_data`` and + ``indices`` are not returned; rather, the ``new_data`` is added into + the store at ``indices``. Raise: ValueError: The final version of ``new_data`` does not have the same keys as the fields of this store. @@ -238,8 +250,8 @@ def resize(self, capacity): self._fields[name] = np.empty(new_shape, old_arr.dtype) self._fields[name][:old_capacity] = old_arr - def as_dict(self): - """Returns the data in the ArrayStore as a one-level dictionary. + def as_raw_dict(self): + """Returns the raw data in the ArrayStore as a one-level dictionary. To collapse the dict, we prefix each key with ``props.`` or ``fields.``, so the result looks as follows:: @@ -266,11 +278,13 @@ def as_dict(self): return d @staticmethod - def from_dict(d): - """Loads an ArrayStore from a dict. + def from_raw_dict(d): + """Loads an ArrayStore from a dict of raw info. Args: - d (dict): Dict returned by :meth:`as_dict`. + d (dict): Dict returned by :meth:`as_raw_dict`. + Returns: + ArrayStore: The new ArrayStore created from d. Raises: ValueError: The loaded props dict has the wrong keys. """ @@ -298,3 +312,61 @@ def from_dict(d): store._fields = fields return store + + def as_pandas(self, fields=None): + """Creates a DataFrame containing all data entries in the store. + + The returned DataFrame has: + + - 1 column of integers (``np.int32``) for the index, named ``index``. + - For fields that are scalars, a single column with the field name. For + example, ``objective'' would have a single column called + ``"objective"``. + - For fields that are 1D arrays, multiple columns with the name suffixed + by its index. For instance, if we have a ``measures'' field of length + 10, we create 10 columns with names ``measures_0``, ``measures_1``, + ..., ``measures_9``. + - >1D fields are currently not supported. + + In short, the dataframe might look like this: + + +-------+------------+------+------------+ + | index | measure_0 | ... | objective | + +=======+============+======+============+ + | | | ... | | + +-------+------------+------+------------+ + + Args: + fields (list): List of fields to include. By default, all fields + will be included. + Returns: + pandas.DataFrame: See above. + Raises: + ValueError: There is a field with >1D data. + """ + if fields is None: + fields = self._fields.keys() + + data = OrderedDict() + indices = self._props["occupied_list"][:self._props["n_occupied"]] + + # Copy indices so we do not overwrite. + data["index"] = np.copy(indices) + + for name in fields: + arr = self._fields[name] + if len(arr.shape) == 1: # Scalar entries. + data[name] = arr[indices] + elif len(arr.shape) == 2: # 1D array entries. + arr = arr[indices] + for i in range(arr.shape[1]): + data[f"{name}_{i}"] = arr[:, i] + else: + raise ValueError( + f"Field `{name}` has shape {arr.shape[1:]} -- " + "cannot convert fields with shape >1D to Pandas") + + return DataFrame( + data, + copy=False, # Fancy indexing above already results in copying. + ) diff --git a/tests/archives/array_store_test.py b/tests/archives/array_store_test.py index 34e30eccd..9300e90c0 100644 --- a/tests/archives/array_store_test.py +++ b/tests/archives/array_store_test.py @@ -7,6 +7,16 @@ # pylint: disable = redefined-outer-name +def test_init_invalid_field(): + with pytest.raises(ValueError): + ArrayStore( + { + "index": ((), np.float32), + }, + 10, + ) + + def test_init(): capacity = 10 store = ArrayStore( @@ -196,7 +206,7 @@ def test_resize_to_double_capacity(store): assert np.all(store._fields["objective"][[3, 5]] == [1.0, 2.0]) -def test_as_dict(store): +def test_as_raw_dict(store): store.add( [3, 5], { @@ -208,7 +218,7 @@ def test_as_dict(store): [], # Empty transforms. ) - d = store.as_dict() + d = store.as_raw_dict() assert d.keys() == set([ "props.capacity", @@ -228,14 +238,14 @@ def test_as_dict(store): assert np.all(d["fields.solution"][[3, 5]] == [np.zeros(10), np.ones(10)]) -def test_from_dict_invalid_props(store): - d = store.as_dict() +def test_from_raw_dict_invalid_props(store): + d = store.as_raw_dict() del d["props.capacity"] with pytest.raises(ValueError): - ArrayStore.from_dict(d) + ArrayStore.from_raw_dict(d) -def test_from_dict(store): +def test_from_raw_dict(store): store.add( [3, 5], { @@ -247,7 +257,7 @@ def test_from_dict(store): [], # Empty transforms. ) - new_store = ArrayStore.from_dict(store.as_dict()) + new_store = ArrayStore.from_raw_dict(store.as_raw_dict()) assert len(new_store) == 2 assert np.all(new_store.occupied == [0, 0, 0, 1, 0, 1, 0, 0, 0, 0]) @@ -260,3 +270,44 @@ def test_from_dict(store): assert np.all(data["objective"] == [2.0, 1.0]) assert np.all(data["measures"] == [[3.0, 4.0], [1.0, 2.0]]) assert np.all(data["solution"] == [np.ones(10), np.zeros(10)]) + + +def test_as_pandas(store): + store.add( + [3, 5], + { + "objective": [1.0, 2.0], + "measures": [[1.0, 2.0], [3.0, 4.0]], + "solution": [np.zeros(10), np.ones(10)], + }, + {}, # Empty add_info. + [], # Empty transforms. + ) + + df = store.as_pandas() + + assert (df.columns == [ + "index", + "objective", + "measures_0", + "measures_1", + "solution_0", + "solution_1", + "solution_2", + "solution_3", + "solution_4", + "solution_5", + "solution_6", + "solution_7", + "solution_8", + "solution_9", + ]).all() + assert (df.dtypes == [int] + [np.float32] * 13).all() + assert len(df) == 2 + + row0 = np.concatenate(([3, 1.0, 1.0, 2.0], np.zeros(10))) + row1 = np.concatenate(([5, 2.0, 3.0, 4.0], np.ones(10))) + + # Either permutation. + assert (((df.loc[0] == row0).all() and (df.loc[1] == row1).all()) or + ((df.loc[0] == row1).all() and (df.loc[1] == row0).all())) From 2993589ccd77d740c53a126f98e0fe7b79d967ba Mon Sep 17 00:00:00 2001 From: Bryon Tjanaka Date: Mon, 30 Oct 2023 20:48:30 -0700 Subject: [PATCH 14/26] Return index in retrieve method --- ribs/archives/_array_store.py | 63 +++++++++++++++++------------- tests/archives/array_store_test.py | 12 ++++-- 2 files changed, 44 insertions(+), 31 deletions(-) diff --git a/ribs/archives/_array_store.py b/ribs/archives/_array_store.py index 894a30109..1cee72cf0 100644 --- a/ribs/archives/_array_store.py +++ b/ribs/archives/_array_store.py @@ -102,22 +102,31 @@ def retrieve(self, indices): Returns: tuple: 2-element tuple consisting of: - - **occupied**: Array indicating which indices, among those passed, - in have an associated data entry. For instance, if ``indices`` is + - **occupied**: Array indicating which indices, among those passed + in, have an associated data entry. For instance, if ``indices`` is ``[0, 1, 2]`` and only index 2 has data, then ``occupied`` will be ``[False, False, True]``. - **data**: Dict mapping from the field name to the field data at the given indices. For instance, if we have an ``objective`` field and request data at indices ``[4, 1, 0]``, we might get ``data`` - that looks like ``{"objective": [1.5, 6.0, 2.3]}``. Note that if a - given index is not marked as occupied, it can have any data value - associated with it. For instance, if index 1 was not occupied, - then the 6.0 returned above should be ignored. + that looks like ``{"index": [4, 1, 0], "objective": [1.5, 6.0, + 2.3]}``. Observe that we also return the indices as an ``index'' + entry in the dict. + + Note that if a given index is not marked as occupied, it can have + any data value associated with it. For instance, if index 1 was + not occupied, then the 6.0 returned above should be ignored. + + All data returned by this method will be a readonly copy, i.e., the + data will not update as the store changes. """ + # Note that fancy indexing with indices already creates a copy, so only + # indices need to be copied explicitly. + indices = np.asarray(indices) occupied = readonly(self._props["occupied"][indices]) - data = { - name: readonly(arr[indices]) for name, arr in self._fields.items() - } + data = {"index": readonly(indices.copy())} + for name, arr in self._fields.items(): + data[name] = readonly(arr[indices]) return occupied, data def add(self, indices, new_data, add_info, transforms): @@ -133,33 +142,33 @@ def add(self, indices, new_data, add_info, transforms): transforms can add stats to the add_info or delete fields from the add_info. - The signature of a transform is as follows: + The signature of a transform is as follows:: def transform(indices, new_data, add_info, occupied, cur_data) -> (indices, new_data, add_info): Transform parameters: - * **indices** (array-like): Array of indices at which new_data - should be inserted. - * **new_data** (dict): New data for the given indices. Maps from - field name to the array of new data for that field. - * **add_info** (dict): Information to return to the user about the - addition process. Example info includes whether each entry was - ultimately inserted into the archive, as well as general - statistics like update QD score. For the first transform, this - will be an empty dict. - * **occupied** (array-like): Whether the given indices are currently - occupied. Same as that given by :meth:`retrieve`. - * **cur_data** (dict): Data at the current indices in the archive. - Same as that given by :meth:`retrieve`. + - **indices** (array-like): Array of indices at which new_data should be + inserted. + - **new_data** (dict): New data for the given indices. Maps from field + name to the array of new data for that field. + - **add_info** (dict): Information to return to the user about the + addition process. Example info includes whether each entry was + ultimately inserted into the archive, as well as general statistics + like update QD score. For the first transform, this will be an empty + dict. + - **occupied** (array-like): Whether the given indices are currently + occupied. Same as that given by :meth:`retrieve`. + - **cur_data** (dict): Data at the current indices in the archive. Same + as that given by :meth:`retrieve`. Transform outputs: - * **indices** (array-like): Modified indices. - * **new_data** (dict): Modified new_data. At the end of the - transforms, it should have the same keys as the store. - * **add_info** (dict): Modified add_info. + - **indices** (array-like): Modified indices. + - **new_data** (dict): Modified new_data. At the end of the transforms, + it should have the same keys as the store. + - **add_info** (dict): Modified add_info. Args: indices (array-like): Initial list of indices for addition. diff --git a/tests/archives/array_store_test.py b/tests/archives/array_store_test.py index 9300e90c0..17bb1d94d 100644 --- a/tests/archives/array_store_test.py +++ b/tests/archives/array_store_test.py @@ -95,7 +95,8 @@ def test_simple_add_retrieve_clear(store): occupied, data = store.retrieve([5, 3]) assert np.all(occupied == [True, True]) - assert data.keys() == set(["objective", "measures", "solution"]) + assert data.keys() == set(["index", "objective", "measures", "solution"]) + assert np.all(data["index"] == [5, 3]) assert np.all(data["objective"] == [2.0, 1.0]) assert np.all(data["measures"] == [[3.0, 4.0], [1.0, 2.0]]) assert np.all(data["solution"] == [np.ones(10), np.zeros(10)]) @@ -139,7 +140,8 @@ def test_retrieve_duplicate_indices(store): occupied, data = store.retrieve([3, 3]) assert np.all(occupied == [True, True]) - assert data.keys() == set(["objective", "measures", "solution"]) + assert data.keys() == set(["index", "objective", "measures", "solution"]) + assert np.all(data["index"] == [3, 3]) assert np.all(data["objective"] == [2.0, 2.0]) assert np.all(data["measures"] == [[3.0, 4.0], [3.0, 4.0]]) assert np.all(data["solution"] == [np.ones(10), np.ones(10)]) @@ -172,7 +174,8 @@ def obj_meas(indices, new_data, add_info, occupied, cur_data): occupied, data = store.retrieve([3, 5]) assert np.all(occupied == [True, True]) - assert data.keys() == set(["objective", "measures", "solution"]) + assert data.keys() == set(["index", "objective", "measures", "solution"]) + assert np.all(data["index"] == [3, 5]) assert np.all(data["objective"] == [10.0, 20.0]) assert np.all(data["measures"] == [[1.0, 1.0], [2.0, 2.0]]) assert np.all(data["solution"] == [np.ones(10), 2 * np.ones(10)]) @@ -266,7 +269,8 @@ def test_from_raw_dict(store): occupied, data = new_store.retrieve([5, 3]) assert np.all(occupied == [True, True]) - assert data.keys() == set(["objective", "measures", "solution"]) + assert data.keys() == set(["index", "objective", "measures", "solution"]) + assert np.all(data["index"] == [5, 3]) assert np.all(data["objective"] == [2.0, 1.0]) assert np.all(data["measures"] == [[3.0, 4.0], [1.0, 2.0]]) assert np.all(data["solution"] == [np.ones(10), np.zeros(10)]) From 95fd5aaeb3577d18810022ae7fd21118eca5e79a Mon Sep 17 00:00:00 2001 From: Bryon Tjanaka Date: Mon, 30 Oct 2023 21:18:19 -0700 Subject: [PATCH 15/26] Update retrieve method --- ribs/archives/_array_store.py | 81 ++++++++++++++++++++++++----------- 1 file changed, 57 insertions(+), 24 deletions(-) diff --git a/ribs/archives/_array_store.py b/ribs/archives/_array_store.py index 1cee72cf0..9b94d25b4 100644 --- a/ribs/archives/_array_store.py +++ b/ribs/archives/_array_store.py @@ -1,4 +1,5 @@ """Provides ArrayStore.""" +import itertools from collections import OrderedDict import numpy as np @@ -94,11 +95,14 @@ def occupied_list(self): return readonly( self._props["occupied_list"][:self._props["n_occupied"]]) - def retrieve(self, indices): + def retrieve(self, indices, fields=None): """Collects the data at the given indices. Args: indices (array-like): List of indices at which to collect data. + fields (array-like of str): List of fields to include. By default, + all fields will be included. In addition to fields in the store, + "index" is also a valid field. Returns: tuple: 2-element tuple consisting of: @@ -111,7 +115,8 @@ def retrieve(self, indices): and request data at indices ``[4, 1, 0]``, we might get ``data`` that looks like ``{"index": [4, 1, 0], "objective": [1.5, 6.0, 2.3]}``. Observe that we also return the indices as an ``index'' - entry in the dict. + entry in the dict. The keys in this dict can be modified using the + ``fields`` arg. Note that if a given index is not marked as occupied, it can have any data value associated with it. For instance, if index 1 was @@ -119,26 +124,38 @@ def retrieve(self, indices): All data returned by this method will be a readonly copy, i.e., the data will not update as the store changes. + + Raises: + ValueError: Invalid field name provided. """ # Note that fancy indexing with indices already creates a copy, so only # indices need to be copied explicitly. indices = np.asarray(indices) occupied = readonly(self._props["occupied"][indices]) - data = {"index": readonly(indices.copy())} - for name, arr in self._fields.items(): - data[name] = readonly(arr[indices]) + + data = {} + fields = (itertools.chain(["index"], self._fields) + if fields is None else fields) + for name in fields: + if name == "index": + data[name] = readonly(np.copy(indices)) + continue + if name not in self._fields: + raise ValueError(f"`{name}` is not a field in this ArrayStore.") + data[name] = readonly(self._fields[name][indices]) + return occupied, data def add(self, indices, new_data, add_info, transforms): - """Adds new data to the archive at the given indices. + """Adds new data to the store at the given indices. The indices, new_data, and add_info are passed through transforms before - adding to the archive. The general idea is that these transforms will + adding to the store. The general idea is that these transforms will gradually modify the indices, new_data, and add_info. For instance, they can add new fields to new_data (new_data may not initially have all the - same fields as the archive). Alternatively, they can filter out - duplicate indices, eg if multiple entries are being inserted at the same - index we can choose one with the best objective. As another example, the + same fields as the store). Alternatively, they can filter out duplicate + indices, eg if multiple entries are being inserted at the same index we + can choose one with the best objective. As another example, the transforms can add stats to the add_info or delete fields from the add_info. @@ -155,13 +172,12 @@ def transform(indices, new_data, add_info, occupied, cur_data) -> name to the array of new data for that field. - **add_info** (dict): Information to return to the user about the addition process. Example info includes whether each entry was - ultimately inserted into the archive, as well as general statistics - like update QD score. For the first transform, this will be an empty - dict. + ultimately inserted into the store, as well as general statistics like + update QD score. For the first transform, this will be an empty dict. - **occupied** (array-like): Whether the given indices are currently occupied. Same as that given by :meth:`retrieve`. - - **cur_data** (dict): Data at the current indices in the archive. Same - as that given by :meth:`retrieve`. + - **cur_data** (dict): Data at the current indices in the store. Same as + that given by :meth:`retrieve`. Transform outputs: @@ -322,6 +338,16 @@ def from_raw_dict(d): return store + def as_dict(self, fields=None): + """Creates a dict containing all data entries in the store. + + Equivalent to calling :meth:`retrieve` with :attr:`occupied_list`. + + Args: + fields (array-like of str): See :meth:`retrieve`. + """ + return self.retrieve(self.occupied_list, fields) + def as_pandas(self, fields=None): """Creates a DataFrame containing all data entries in the store. @@ -335,7 +361,7 @@ def as_pandas(self, fields=None): by its index. For instance, if we have a ``measures'' field of length 10, we create 10 columns with names ``measures_0``, ``measures_1``, ..., ``measures_9``. - - >1D fields are currently not supported. + - We do not currently support fields with >1D data. In short, the dataframe might look like this: @@ -346,23 +372,29 @@ def as_pandas(self, fields=None): +-------+------------+------+------------+ Args: - fields (list): List of fields to include. By default, all fields - will be included. + fields (array-like of str): List of fields to include. By default, + all fields will be included. In addition to fields in the store, + "index" is also a valid field. Returns: pandas.DataFrame: See above. Raises: + ValueError: Invalid field name provided. ValueError: There is a field with >1D data. """ - if fields is None: - fields = self._fields.keys() - data = OrderedDict() indices = self._props["occupied_list"][:self._props["n_occupied"]] - # Copy indices so we do not overwrite. - data["index"] = np.copy(indices) + fields = (itertools.chain(["index"], self._fields) + if fields is None else fields) for name in fields: + if name == "index": + data[name] = np.copy(indices) + continue + + if name not in self._fields: + raise ValueError(f"`{name}` is not a field in this ArrayStore.") + arr = self._fields[name] if len(arr.shape) == 1: # Scalar entries. data[name] = arr[indices] @@ -377,5 +409,6 @@ def as_pandas(self, fields=None): return DataFrame( data, - copy=False, # Fancy indexing above already results in copying. + copy=False, # Fancy indexing above copies all fields, and + # indices is explicitly copied. ) From c99793bcbba42806dd2059628e1532718a12f4a4 Mon Sep 17 00:00:00 2001 From: Bryon Tjanaka Date: Mon, 30 Oct 2023 21:24:38 -0700 Subject: [PATCH 16/26] Test retrieve and as_pandas --- tests/archives/array_store_test.py | 60 ++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) diff --git a/tests/archives/array_store_test.py b/tests/archives/array_store_test.py index 17bb1d94d..f2ff289a0 100644 --- a/tests/archives/array_store_test.py +++ b/tests/archives/array_store_test.py @@ -147,6 +147,31 @@ def test_retrieve_duplicate_indices(store): assert np.all(data["solution"] == [np.ones(10), np.ones(10)]) +def test_retrieve_invalid_fields(store): + with pytest.raises(ValueError): + store.retrieve([0, 1], fields=["objective", "foo"]) + + +def test_retrieve_custom_fields(store): + store.add( + [3, 5], + { + "objective": [1.0, 2.0], + "measures": [[1.0, 2.0], [3.0, 4.0]], + "solution": [np.zeros(10), np.ones(10)], + }, + {}, # Empty add_info. + [], # Empty transforms. + ) + + occupied, data = store.retrieve([5, 3], fields=["index", "objective"]) + + assert np.all(occupied == [True, True]) + assert data.keys() == set(["index", "objective"]) + assert np.all(data["index"] == [5, 3]) + assert np.all(data["objective"] == [2.0, 1.0]) + + def test_add_simple_transform(store): def obj_meas(indices, new_data, add_info, occupied, cur_data): @@ -315,3 +340,38 @@ def test_as_pandas(store): # Either permutation. assert (((df.loc[0] == row0).all() and (df.loc[1] == row1).all()) or ((df.loc[0] == row1).all() and (df.loc[1] == row0).all())) + + +def test_as_pandas_invalid_fields(store): + with pytest.raises(ValueError): + store.as_pandas(fields=["objective", "foo"]) + + +def test_as_pandas_custom_fields(store): + store.add( + [3, 5], + { + "objective": [1.0, 2.0], + "measures": [[1.0, 2.0], [3.0, 4.0]], + "solution": [np.zeros(10), np.ones(10)], + }, + {}, # Empty add_info. + [], # Empty transforms. + ) + + df = store.as_pandas(fields=["objective", "measures"]) + + assert (df.columns == [ + "objective", + "measures_0", + "measures_1", + ]).all() + assert (df.dtypes == [np.float32] * 3).all() + assert len(df) == 2 + + row0 = [1.0, 1.0, 2.0] + row1 = [2.0, 3.0, 4.0] + + # Either permutation. + assert (((df.loc[0] == row0).all() and (df.loc[1] == row1).all()) or + ((df.loc[0] == row1).all() and (df.loc[1] == row0).all())) From 37588f9fa704a98088da1f20d39b8be8a7619945 Mon Sep 17 00:00:00 2001 From: Bryon Tjanaka Date: Mon, 30 Oct 2023 21:32:28 -0700 Subject: [PATCH 17/26] Implement and fix as_dict --- ribs/archives/_array_store.py | 5 ++++- tests/archives/array_store_test.py | 30 ++++++++++++++++++++++++++++++ 2 files changed, 34 insertions(+), 1 deletion(-) diff --git a/ribs/archives/_array_store.py b/ribs/archives/_array_store.py index 9b94d25b4..64bb98b94 100644 --- a/ribs/archives/_array_store.py +++ b/ribs/archives/_array_store.py @@ -345,8 +345,11 @@ def as_dict(self, fields=None): Args: fields (array-like of str): See :meth:`retrieve`. + Returns: + dict: See ``data`` in :meth:`retrieve`. ``occupied`` is not returned + since all indices are known to be occupied in this method. """ - return self.retrieve(self.occupied_list, fields) + return self.retrieve(self.occupied_list, fields)[1] def as_pandas(self, fields=None): """Creates a DataFrame containing all data entries in the store. diff --git a/tests/archives/array_store_test.py b/tests/archives/array_store_test.py index f2ff289a0..721d3b65a 100644 --- a/tests/archives/array_store_test.py +++ b/tests/archives/array_store_test.py @@ -301,6 +301,36 @@ def test_from_raw_dict(store): assert np.all(data["solution"] == [np.ones(10), np.zeros(10)]) +def test_as_dict(store): + store.add( + [3, 5], + { + "objective": [1.0, 2.0], + "measures": [[1.0, 2.0], [3.0, 4.0]], + "solution": [np.zeros(10), np.ones(10)], + }, + {}, # Empty add_info. + [], # Empty transforms. + ) + + d = store.as_dict() + + assert d.keys() == set(["index", "objective", "measures", "solution"]) + assert all(len(v) == 2 for v in d.values()) + + row0 = np.concatenate(([3, 1.0, 1.0, 2.0], np.zeros(10))) + row1 = np.concatenate(([5, 2.0, 3.0, 4.0], np.ones(10))) + + flat = [ + np.concatenate(([d["index"][i]], [d["objective"][i]], d["measures"][i], + d["solution"][i])) for i in range(2) + ] + + # Either permutation. + assert (((flat[0] == row0).all() and (flat[1] == row1).all()) or + ((flat[0] == row1).all() and (flat[1] == row0).all())) + + def test_as_pandas(store): store.add( [3, 5], From c17ac12a95babbc0d3329f9145c5ef87acff675f Mon Sep 17 00:00:00 2001 From: Bryon Tjanaka Date: Tue, 31 Oct 2023 01:36:31 -0700 Subject: [PATCH 18/26] Add iter_entries --- ribs/archives/_array_store.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/ribs/archives/_array_store.py b/ribs/archives/_array_store.py index 64bb98b94..a35b9aeb4 100644 --- a/ribs/archives/_array_store.py +++ b/ribs/archives/_array_store.py @@ -351,6 +351,25 @@ def as_dict(self, fields=None): """ return self.retrieve(self.occupied_list, fields)[1] + def iter_entries(self, fields=None): + """Creates an iterator over entries in the store. + + Note that this method induces a copy of the data in the store, as it + calls :meth:`as_dict`. + + Args: + fields (array-like of str): See :meth:`retrieve`. + Returns: + generator: When iterated over, this generator yields dicts mapping + from the fields to the individual entries. For instance, if we + had an "objective" field, one entry might look like ``{"index": + 1, "objective": 6.0}``. + """ + d = self.as_dict(fields) + return ({ + name: arr[i] for name, arr in d.items() + } for i in range(self._props["n_occupied"])) + def as_pandas(self, fields=None): """Creates a DataFrame containing all data entries in the store. From 637e7b3902b3bdd192b9e8b7fba6f12385d1bf8c Mon Sep 17 00:00:00 2001 From: Bryon Tjanaka Date: Tue, 31 Oct 2023 01:36:44 -0700 Subject: [PATCH 19/26] Revert "Add iter_entries" This reverts commit c17ac12a95babbc0d3329f9145c5ef87acff675f. --- ribs/archives/_array_store.py | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/ribs/archives/_array_store.py b/ribs/archives/_array_store.py index a35b9aeb4..64bb98b94 100644 --- a/ribs/archives/_array_store.py +++ b/ribs/archives/_array_store.py @@ -351,25 +351,6 @@ def as_dict(self, fields=None): """ return self.retrieve(self.occupied_list, fields)[1] - def iter_entries(self, fields=None): - """Creates an iterator over entries in the store. - - Note that this method induces a copy of the data in the store, as it - calls :meth:`as_dict`. - - Args: - fields (array-like of str): See :meth:`retrieve`. - Returns: - generator: When iterated over, this generator yields dicts mapping - from the fields to the individual entries. For instance, if we - had an "objective" field, one entry might look like ``{"index": - 1, "objective": 6.0}``. - """ - d = self.as_dict(fields) - return ({ - name: arr[i] for name, arr in d.items() - } for i in range(self._props["n_occupied"])) - def as_pandas(self, fields=None): """Creates a DataFrame containing all data entries in the store. From 567edbedf6f29643dcdc29dd65fde83ec99e8505 Mon Sep 17 00:00:00 2001 From: Bryon Tjanaka Date: Tue, 31 Oct 2023 02:08:28 -0700 Subject: [PATCH 20/26] Add iteration --- ribs/archives/_array_store.py | 85 ++++++++++++++++++++++---- tests/archives/array_store_test.py | 95 ++++++++++++++++++++++++++++++ 2 files changed, 170 insertions(+), 10 deletions(-) diff --git a/ribs/archives/_array_store.py b/ribs/archives/_array_store.py index 64bb98b94..c09ea0ee8 100644 --- a/ribs/archives/_array_store.py +++ b/ribs/archives/_array_store.py @@ -1,6 +1,7 @@ """Provides ArrayStore.""" import itertools from collections import OrderedDict +from enum import IntEnum import numpy as np from numpy_groupies import aggregate_nb as aggregate @@ -9,6 +10,48 @@ from ribs._utils import readonly +class Update(IntEnum): + """Indices into the updates array in ArrayStore.""" + ADD = 0 + CLEAR = 1 + + +class ArrayStoreIterator: + """An iterator for an ArrayStore's entries.""" + + # pylint: disable = protected-access + + def __init__(self, store): + self.store = store + self.iter_idx = 0 + self.state = store._props["updates"].copy() + + def __iter__(self): + """This is the iterator, so it returns itself.""" + return self + + def __next__(self): + """Raises RuntimeError if the store was modified.""" + if not np.all(self.state == self.store._props["updates"]): + # This check should go first because a call to clear() would clear + # _occupied_indices and cause StopIteration to happen early. + raise RuntimeError( + "ArrayStore was modified with add() or clear() during " + "iteration.") + + if self.iter_idx >= len(self.store): + raise StopIteration + + idx = self.store._props["occupied_list"][self.iter_idx] + self.iter_idx += 1 + + d = {"index": idx} + for name, arr in self.store._fields.items(): + d[name] = arr[idx] + + return d + + class ArrayStore: """Maintains a set of arrays that share a common dimension. @@ -39,7 +82,7 @@ class ArrayStore: capacity (int): Total possible entries in the store. Attributes: - _props: Dict with properties that are common to every ArrayStore. + _props (dict): Properties that are common to every ArrayStore. * "capacity": Maximum number of data entries in the store. * "occupied": Boolean array of size ``(capacity,)`` indicating @@ -48,8 +91,10 @@ class ArrayStore: * "occupied_list": Array of size ``(capacity,)`` listing all occupied indices in the store. Only the first ``n_occupied`` elements will be valid. + * "updates": Int array recording number of calls to functions that + modified the store. - _fields: Dict holding all the arrays with their data. + _fields (dict): Holds all the arrays with their data. Raises: ValueError: One of the fields in ``field_desc`` has an invalid name @@ -62,6 +107,7 @@ def __init__(self, field_desc, capacity): "occupied": np.zeros(capacity, dtype=bool), "n_occupied": 0, "occupied_list": np.empty(capacity, dtype=int), + "updates": np.array([0, 0]), } self._fields = {} @@ -77,6 +123,24 @@ def __len__(self): that have a corresponding data entry.""" return self._props["n_occupied"] + def __iter__(self): + """Iterates over entries in the store. + + When iterated over, this iterator yields dicts mapping from the fields + to the individual entries. For instance, if we had an "objective" field, + one entry might look like ``{"index": 1, "objective": 6.0}``. + + Example: + + :: + + for entry in store: + entry["index"] + entry["objective"] + ... + """ + return ArrayStoreIterator(self) + @property def capacity(self): """int: Maximum number of data entries in the store.""" @@ -201,6 +265,8 @@ def transform(indices, new_data, add_info, occupied, cur_data) -> ValueError: The final version of ``new_data`` has fields that have a different length than ``indices``. """ + self._props["updates"][Update.ADD] += 1 + for transform in transforms: occupied, cur_data = self.retrieve(indices) indices, new_data, add_info = transform(indices, new_data, add_info, @@ -242,6 +308,7 @@ def transform(indices, new_data, add_info, occupied, cur_data) -> def clear(self): """Removes all entries from the store.""" + self._props["updates"][Update.CLEAR] += 1 self._props["n_occupied"] = 0 # Effectively clears occupied_list too. self._props["occupied"].fill(False) @@ -286,20 +353,18 @@ def as_raw_dict(self): "props.occupied": ..., ... "fields.objective": ..., + ... } Returns: dict: See description above. """ d = {} - for name, prop in self._props.items(): - if isinstance(prop, np.ndarray): - prop = readonly(prop.view()) - d[f"props.{name}"] = prop - for name, arr in self._fields.items(): - if isinstance(arr, np.ndarray): - arr = readonly(arr.view()) - d[f"fields.{name}"] = arr + for prefix, attr in [("props", self._props), ("fields", self._fields)]: + for name, val in attr.items(): + if isinstance(val, np.ndarray): + val = readonly(val.view()) + d[f"{prefix}.{name}"] = val return d @staticmethod diff --git a/tests/archives/array_store_test.py b/tests/archives/array_store_test.py index 721d3b65a..3ce782654 100644 --- a/tests/archives/array_store_test.py +++ b/tests/archives/array_store_test.py @@ -253,6 +253,7 @@ def test_as_raw_dict(store): "props.occupied", "props.n_occupied", "props.occupied_list", + "props.updates", "fields.objective", "fields.measures", "fields.solution", @@ -261,6 +262,7 @@ def test_as_raw_dict(store): assert np.all(d["props.occupied"] == [0, 0, 0, 1, 0, 1, 0, 0, 0, 0]) assert d["props.n_occupied"] == 2 assert np.all(np.sort(d["props.occupied_list"][:2]) == [3, 5]) + assert np.all(d["props.updates"] == [1, 0]) # 1 add, 0 clear. assert np.all(d["fields.objective"][[3, 5]] == [1.0, 2.0]) assert np.all(d["fields.measures"][[3, 5]] == [[1.0, 2.0], [3.0, 4.0]]) assert np.all(d["fields.solution"][[3, 5]] == [np.zeros(10), np.ones(10)]) @@ -405,3 +407,96 @@ def test_as_pandas_custom_fields(store): # Either permutation. assert (((df.loc[0] == row0).all() and (df.loc[1] == row1).all()) or ((df.loc[0] == row1).all() and (df.loc[1] == row0).all())) + + +def test_iteration(store): + store.add( + [3], + { + "objective": [1.0], + "measures": [[1.0, 2.0]], + "solution": [np.zeros(10)], + }, + {}, # Empty add_info. + [], # Empty transforms. + ) + + for entry in store: + assert entry.keys() == set( + ["index", "objective", "measures", "solution"]) + assert np.all(entry["index"] == [3]) + assert np.all(entry["objective"] == [1.0]) + assert np.all(entry["measures"] == [[1.0, 2.0]]) + assert np.all(entry["solution"] == [np.zeros(10)]) + + +def test_add_during_iteration(store): + store.add( + [3], + { + "objective": [1.0], + "measures": [[1.0, 2.0]], + "solution": [np.zeros(10)], + }, + {}, # Empty add_info. + [], # Empty transforms. + ) + + # Even with just one entry, adding during iteration should still raise an + # error, just like it does in a set. + with pytest.raises(RuntimeError): + for _ in store: + store.add( + [4], + { + "objective": [2.0], + "measures": [[3.0, 4.0]], + "solution": [np.ones(10)], + }, + {}, # Empty add_info. + [], # Empty transforms. + ) + + +def test_clear_during_iteration(store): + store.add( + [3], + { + "objective": [1.0], + "measures": [[1.0, 2.0]], + "solution": [np.zeros(10)], + }, + {}, # Empty add_info. + [], # Empty transforms. + ) + + with pytest.raises(RuntimeError): + for _ in store: + store.clear() + + +def test_clear_and_add_during_iteration(store): + store.add( + [3], + { + "objective": [1.0], + "measures": [[1.0, 2.0]], + "solution": [np.zeros(10)], + }, + {}, # Empty add_info. + [], # Empty transforms. + ) + + with pytest.raises(RuntimeError): + for _ in store: + store.clear() + store.add( + [4], + { + "objective": [2.0], + "measures": [[3.0, 4.0]], + "solution": [np.ones(10)], + }, + {}, # Empty add_info. + [], # Empty transforms. + ) From 98d7537480ff3403f4ea06281d4fd185dbc7ec77 Mon Sep 17 00:00:00 2001 From: Bryon Tjanaka Date: Tue, 31 Oct 2023 14:51:44 -0700 Subject: [PATCH 21/26] Return immediately when transform does not add anything to store --- ribs/archives/_array_store.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/ribs/archives/_array_store.py b/ribs/archives/_array_store.py index c09ea0ee8..0984437c8 100644 --- a/ribs/archives/_array_store.py +++ b/ribs/archives/_array_store.py @@ -287,6 +287,10 @@ def transform(indices, new_data, add_info, occupied, cur_data) -> f"{len(arr)} but should be the same length as indices " f"({len(indices)})") + # Shortcut when there is nothing to add to the store. + if len(indices) == 0: + return add_info + # Update occupancy data. unique_indices = np.where(aggregate(indices, 1, func="len") != 0)[0] cur_occupied = self._props["occupied"][unique_indices] From 75947310f92433dfdff79a326039a3d8e5eabeff Mon Sep 17 00:00:00 2001 From: Bryon Tjanaka Date: Tue, 31 Oct 2023 17:38:53 -0700 Subject: [PATCH 22/26] Allow arbitrary new_data when indices are not returned --- ribs/archives/_array_store.py | 17 +++++++++-------- tests/archives/array_store_test.py | 23 +++++++++++++++++++++++ 2 files changed, 32 insertions(+), 8 deletions(-) diff --git a/ribs/archives/_array_store.py b/ribs/archives/_array_store.py index 0984437c8..d815ba823 100644 --- a/ribs/archives/_array_store.py +++ b/ribs/archives/_array_store.py @@ -247,7 +247,8 @@ def transform(indices, new_data, add_info, occupied, cur_data) -> - **indices** (array-like): Modified indices. - **new_data** (dict): Modified new_data. At the end of the transforms, - it should have the same keys as the store. + it should have the same keys as the store. If ``indices`` is empty, + ``new_data`` will be ignored. - **add_info** (dict): Modified add_info. Args: @@ -272,13 +273,6 @@ def transform(indices, new_data, add_info, occupied, cur_data) -> indices, new_data, add_info = transform(indices, new_data, add_info, occupied, cur_data) - # Verify that new_data ends up with the correct fields after the - # transforms. - if new_data.keys() != self._fields.keys(): - raise ValueError( - f"`new_data` had keys {new_data.keys()} but should have the " - f"same keys as this ArrayStore, i.e., {self._fields.keys()}") - # Verify that the array shapes match the indices. for name, arr in new_data.items(): if len(arr) != len(indices): @@ -291,6 +285,13 @@ def transform(indices, new_data, add_info, occupied, cur_data) -> if len(indices) == 0: return add_info + # Verify that new_data ends up with the correct fields after the + # transforms. + if new_data.keys() != self._fields.keys(): + raise ValueError( + f"`new_data` had keys {new_data.keys()} but should have the " + f"same keys as this ArrayStore, i.e., {self._fields.keys()}") + # Update occupancy data. unique_indices = np.where(aggregate(indices, 1, func="len") != 0)[0] cur_occupied = self._props["occupied"][unique_indices] diff --git a/tests/archives/array_store_test.py b/tests/archives/array_store_test.py index 3ce782654..426227fc1 100644 --- a/tests/archives/array_store_test.py +++ b/tests/archives/array_store_test.py @@ -206,6 +206,29 @@ def obj_meas(indices, new_data, add_info, occupied, cur_data): assert np.all(data["solution"] == [np.ones(10), 2 * np.ones(10)]) +def test_add_empty_transform(store): + # new_data should be able to take on arbitrary values when no indices are + # returned, so we make it an empty dict here. + def empty(indices, new_data, add_info, occupied, cur_data): + # pylint: disable = unused-argument + return [], {}, {} + + add_info = store.add( + [3, 5], + { + "solution": [np.ones(10), 2 * np.ones(10)], + }, + {"foo": 4}, + [empty], + ) + + assert add_info == {} + + assert len(store) == 0 + assert np.all(~store.occupied) + assert len(store.occupied_list) == 0 + + def test_resize_bad_capacity(store): with pytest.raises(ValueError): store.resize(store.capacity) From 97b62d5bc823f9b2ec2e3cd2c5a32e8cdf5178a0 Mon Sep 17 00:00:00 2001 From: Bryon Tjanaka Date: Tue, 31 Oct 2023 17:40:43 -0700 Subject: [PATCH 23/26] history --- HISTORY.md | 1 + 1 file changed, 1 insertion(+) diff --git a/HISTORY.md b/HISTORY.md index b699eb90b..b0f5fc8a1 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -7,6 +7,7 @@ #### API - Add GradientOperatorEmitter to support OMG-MEGA and OG-MAP-Elites ({pr}`348`) +- Add ArrayStore data structure ({pr}`395`) #### Improvements From 11e0041cbc4c0ed2ba8a955eebde5976928035a8 Mon Sep 17 00:00:00 2001 From: Bryon Tjanaka Date: Tue, 31 Oct 2023 17:49:59 -0700 Subject: [PATCH 24/26] Docs fixes --- ribs/archives/_array_store.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/ribs/archives/_array_store.py b/ribs/archives/_array_store.py index d815ba823..3162e2b7b 100644 --- a/ribs/archives/_array_store.py +++ b/ribs/archives/_array_store.py @@ -31,10 +31,14 @@ def __iter__(self): return self def __next__(self): - """Raises RuntimeError if the store was modified.""" + """Returns dicts with each entry's data. + + Raises RuntimeError if the store was modified. + """ if not np.all(self.state == self.store._props["updates"]): - # This check should go first because a call to clear() would clear - # _occupied_indices and cause StopIteration to happen early. + # This check should go before the StopIteration check because a call + # to clear() would cause the len(self.store) to be 0 and thus + # trigger StopIteration. raise RuntimeError( "ArrayStore was modified with add() or clear() during " "iteration.") @@ -74,7 +78,7 @@ class ArrayStore: Args: field_desc (dict): Description of fields in the array store. The - description is a dict mapping from str to tuple of ``(shape, + description is a dict mapping from a str to a tuple of ``(shape, dtype)``. For instance, ``{"objective": ((), np.float32), "measures": ((10,), np.float32)}`` will create an "objective" field with shape ``(capacity,)`` and a "measures" field with shape From d5b61692463886258b5cea46625ca0a47f54ab16 Mon Sep 17 00:00:00 2001 From: Bryon Tjanaka Date: Tue, 31 Oct 2023 20:26:10 -0700 Subject: [PATCH 25/26] Misc fixes --- ribs/archives/_array_store.py | 78 ++++++++++++++++++------------ tests/archives/array_store_test.py | 52 ++++++++++++-------- 2 files changed, 78 insertions(+), 52 deletions(-) diff --git a/ribs/archives/_array_store.py b/ribs/archives/_array_store.py index 3162e2b7b..26e9311c1 100644 --- a/ribs/archives/_array_store.py +++ b/ribs/archives/_array_store.py @@ -119,12 +119,15 @@ def __init__(self, field_desc, capacity): if name == "index": raise ValueError(f"`{name}` is an invalid field name.") + if isinstance(field_shape, (int, np.integer)): + field_shape = (field_shape,) + array_shape = (capacity,) + tuple(field_shape) self._fields[name] = np.empty(array_shape, dtype) def __len__(self): - """Number of occupied indices in the store, i.e.g, number of indices - that have a corresponding data entry.""" + """Number of occupied indices in the store, i.e., number of indices that + have a corresponding data entry.""" return self._props["n_occupied"] def __iter__(self): @@ -132,7 +135,8 @@ def __iter__(self): When iterated over, this iterator yields dicts mapping from the fields to the individual entries. For instance, if we had an "objective" field, - one entry might look like ``{"index": 1, "objective": 6.0}``. + one entry might look like ``{"index": 1, "objective": 6.0}`` (similar to + :meth:`retrieve`, the index is included in the output). Example: @@ -153,7 +157,7 @@ def capacity(self): @property def occupied(self): """numpy.ndarray: Boolean array of size ``(capacity,)`` indicating - whether each index has an data entry.""" + whether each index has a data entry.""" return readonly(self._props["occupied"].view()) @property @@ -171,6 +175,7 @@ def retrieve(self, indices, fields=None): fields (array-like of str): List of fields to include. By default, all fields will be included. In addition to fields in the store, "index" is also a valid field. + Returns: tuple: 2-element tuple consisting of: @@ -196,8 +201,6 @@ def retrieve(self, indices, fields=None): Raises: ValueError: Invalid field name provided. """ - # Note that fancy indexing with indices already creates a copy, so only - # indices need to be copied explicitly. indices = np.asarray(indices) occupied = readonly(self._props["occupied"][indices]) @@ -205,6 +208,8 @@ def retrieve(self, indices, fields=None): fields = (itertools.chain(["index"], self._fields) if fields is None else fields) for name in fields: + # Note that fancy indexing with indices already creates a copy, so + # only `indices` needs to be copied explicitly. if name == "index": data[name] = readonly(np.copy(indices)) continue @@ -214,7 +219,7 @@ def retrieve(self, indices, fields=None): return occupied, data - def add(self, indices, new_data, add_info, transforms): + def add(self, indices, new_data, extra_args, transforms): """Adds new data to the store at the given indices. The indices, new_data, and add_info are passed through transforms before @@ -229,7 +234,8 @@ def add(self, indices, new_data, add_info, transforms): The signature of a transform is as follows:: - def transform(indices, new_data, add_info, occupied, cur_data) -> + def transform(indices, new_data, add_info, extra_args, + occupied, cur_data) -> (indices, new_data, add_info): Transform parameters: @@ -240,8 +246,9 @@ def transform(indices, new_data, add_info, occupied, cur_data) -> name to the array of new data for that field. - **add_info** (dict): Information to return to the user about the addition process. Example info includes whether each entry was - ultimately inserted into the store, as well as general statistics like - update QD score. For the first transform, this will be an empty dict. + ultimately inserted into the store, as well as general statistics. + For the first transform, this will be an empty dict. + - **extra_args** (dict): Additional arguments for the transform. - **occupied** (array-like): Whether the given indices are currently occupied. Same as that given by :meth:`retrieve`. - **cur_data** (dict): Data at the current indices in the store. Same as @@ -249,7 +256,8 @@ def transform(indices, new_data, add_info, occupied, cur_data) -> Transform outputs: - - **indices** (array-like): Modified indices. + - **indices** (array-like): Modified indices. We do NOT assume that the + final indices will be unique. - **new_data** (dict): Modified new_data. At the end of the transforms, it should have the same keys as the store. If ``indices`` is empty, ``new_data`` will be ignored. @@ -258,12 +266,16 @@ def transform(indices, new_data, add_info, occupied, cur_data) -> Args: indices (array-like): Initial list of indices for addition. new_data (dict): Initial data for addition. - add_info (dict): Initial add_info. + extra_args (dict): Dict containing additional arguments to pass to + the transforms. The dict is passed directly (i.e., no unpacking + like with kwargs). transforms (list): List of transforms on the data to be added. + Returns: dict: Final ``add_info`` from the transforms. ``new_data`` and ``indices`` are not returned; rather, the ``new_data`` is added into the store at ``indices``. + Raise: ValueError: The final version of ``new_data`` does not have the same keys as the fields of this store. @@ -272,10 +284,16 @@ def transform(indices, new_data, add_info, occupied, cur_data) -> """ self._props["updates"][Update.ADD] += 1 + add_info = {} for transform in transforms: occupied, cur_data = self.retrieve(indices) indices, new_data, add_info = transform(indices, new_data, add_info, - occupied, cur_data) + extra_args, occupied, + cur_data) + + # Shortcut when there is nothing to add to the store. + if len(indices) == 0: + return add_info # Verify that the array shapes match the indices. for name, arr in new_data.items(): @@ -285,10 +303,6 @@ def transform(indices, new_data, add_info, occupied, cur_data) -> f"{len(arr)} but should be the same length as indices " f"({len(indices)})") - # Shortcut when there is nothing to add to the store. - if len(indices) == 0: - return add_info - # Verify that new_data ends up with the correct fields after the # transforms. if new_data.keys() != self._fields.keys(): @@ -335,21 +349,21 @@ def resize(self, capacity): f"New capacity ({capacity}) must be greater than current " f"capacity ({self._props['capacity']}.") - old_capacity = self._props["capacity"] + cur_capacity = self._props["capacity"] self._props["capacity"] = capacity - old_occupied = self._props["occupied"] + cur_occupied = self._props["occupied"] self._props["occupied"] = np.zeros(capacity, dtype=bool) - self._props["occupied"][:old_capacity] = old_occupied + self._props["occupied"][:cur_capacity] = cur_occupied - old_occupied_list = self._props["occupied_list"] + cur_occupied_list = self._props["occupied_list"] self._props["occupied_list"] = np.empty(capacity, dtype=int) - self._props["occupied_list"][:old_capacity] = old_occupied_list + self._props["occupied_list"][:cur_capacity] = cur_occupied_list - for name, old_arr in self._fields.items(): - new_shape = (capacity,) + old_arr.shape[1:] - self._fields[name] = np.empty(new_shape, old_arr.dtype) - self._fields[name][:old_capacity] = old_arr + for name, cur_arr in self._fields.items(): + new_shape = (capacity,) + cur_arr.shape[1:] + self._fields[name] = np.empty(new_shape, cur_arr.dtype) + self._fields[name][:cur_capacity] = cur_arr def as_raw_dict(self): """Returns the raw data in the ArrayStore as a one-level dictionary. @@ -433,7 +447,7 @@ def as_pandas(self, fields=None): - 1 column of integers (``np.int32``) for the index, named ``index``. - For fields that are scalars, a single column with the field name. For example, ``objective'' would have a single column called - ``"objective"``. + ``objective``. - For fields that are 1D arrays, multiple columns with the name suffixed by its index. For instance, if we have a ``measures'' field of length 10, we create 10 columns with names ``measures_0``, ``measures_1``, @@ -442,11 +456,11 @@ def as_pandas(self, fields=None): In short, the dataframe might look like this: - +-------+------------+------+------------+ - | index | measure_0 | ... | objective | - +=======+============+======+============+ - | | | ... | | - +-------+------------+------+------------+ + +-------+------------+------+-----------+ + | index | measures_0 | ... | objective | + +=======+============+======+===========+ + | | | ... | | + +-------+------------+------+-----------+ Args: fields (array-like of str): List of fields to include. By default, diff --git a/tests/archives/array_store_test.py b/tests/archives/array_store_test.py index 426227fc1..7d8c540c4 100644 --- a/tests/archives/array_store_test.py +++ b/tests/archives/array_store_test.py @@ -56,7 +56,7 @@ def test_add_wrong_keys(store): "measures": [[1.0, 2.0], [3.0, 4.0]], # Missing `solution` key. }, - {}, # Empty add_info. + {}, # Empty extra_args. [], # Empty transforms. ) @@ -70,7 +70,7 @@ def test_add_mismatch_indices(store): "measures": [[1.0, 2.0], [3.0, 4.0]], "solution": [np.zeros(10), np.ones(10)], }, - {}, # Empty add_info. + {}, # Empty extra_args. [], # Empty transforms. ) @@ -84,7 +84,7 @@ def test_simple_add_retrieve_clear(store): "measures": [[1.0, 2.0], [3.0, 4.0]], "solution": [np.zeros(10), np.ones(10)], }, - {}, # Empty add_info. + {}, # Empty extra_args. [], # Empty transforms. ) @@ -116,7 +116,7 @@ def test_add_duplicate_indices(store): "measures": [[1.0, 2.0], [3.0, 4.0]], "solution": [np.zeros(10), np.ones(10)], }, - {}, # Empty add_info. + {}, # Empty extra_args. [], # Empty transforms. ) @@ -133,7 +133,7 @@ def test_retrieve_duplicate_indices(store): "measures": [[3.0, 4.0]], "solution": [np.ones(10)], }, - {}, # Empty add_info. + {}, # Empty extra_args. [], # Empty transforms. ) @@ -160,7 +160,7 @@ def test_retrieve_custom_fields(store): "measures": [[1.0, 2.0], [3.0, 4.0]], "solution": [np.zeros(10), np.ones(10)], }, - {}, # Empty add_info. + {}, # Empty extra_args. [], # Empty transforms. ) @@ -174,10 +174,11 @@ def test_retrieve_custom_fields(store): def test_add_simple_transform(store): - def obj_meas(indices, new_data, add_info, occupied, cur_data): + def obj_meas(indices, new_data, add_info, extra_args, occupied, cur_data): # pylint: disable = unused-argument new_data["objective"] = np.sum(new_data["solution"], axis=1) new_data["measures"] = np.asarray(new_data["solution"])[:, :2] + add_info.update(extra_args) add_info["bar"] = 5 return indices, new_data, add_info @@ -209,7 +210,7 @@ def obj_meas(indices, new_data, add_info, occupied, cur_data): def test_add_empty_transform(store): # new_data should be able to take on arbitrary values when no indices are # returned, so we make it an empty dict here. - def empty(indices, new_data, add_info, occupied, cur_data): + def empty(indices, new_data, add_info, extra_args, occupied, cur_data): # pylint: disable = unused-argument return [], {}, {} @@ -242,7 +243,7 @@ def test_resize_to_double_capacity(store): "measures": [[1.0, 2.0], [3.0, 4.0]], "solution": [np.zeros(10), np.ones(10)], }, - {}, # Empty add_info. + {}, # Empty extra_args. [], # Empty transforms. ) @@ -265,7 +266,7 @@ def test_as_raw_dict(store): "measures": [[1.0, 2.0], [3.0, 4.0]], "solution": [np.zeros(10), np.ones(10)], }, - {}, # Empty add_info. + {}, # Empty extra_args. [], # Empty transforms. ) @@ -306,7 +307,7 @@ def test_from_raw_dict(store): "measures": [[1.0, 2.0], [3.0, 4.0]], "solution": [np.zeros(10), np.ones(10)], }, - {}, # Empty add_info. + {}, # Empty extra_args. [], # Empty transforms. ) @@ -334,7 +335,7 @@ def test_as_dict(store): "measures": [[1.0, 2.0], [3.0, 4.0]], "solution": [np.zeros(10), np.ones(10)], }, - {}, # Empty add_info. + {}, # Empty extra_args. [], # Empty transforms. ) @@ -364,7 +365,7 @@ def test_as_pandas(store): "measures": [[1.0, 2.0], [3.0, 4.0]], "solution": [np.zeros(10), np.ones(10)], }, - {}, # Empty add_info. + {}, # Empty extra_args. [], # Empty transforms. ) @@ -410,7 +411,7 @@ def test_as_pandas_custom_fields(store): "measures": [[1.0, 2.0], [3.0, 4.0]], "solution": [np.zeros(10), np.ones(10)], }, - {}, # Empty add_info. + {}, # Empty extra_args. [], # Empty transforms. ) @@ -432,6 +433,17 @@ def test_as_pandas_custom_fields(store): ((df.loc[0] == row1).all() and (df.loc[1] == row0).all())) +def test_as_pandas_2d_fields(store): + store = ArrayStore( + { + "solution": ((10, 10), np.float32), + }, + 10, + ) + with pytest.raises(ValueError): + store.as_pandas() + + def test_iteration(store): store.add( [3], @@ -440,7 +452,7 @@ def test_iteration(store): "measures": [[1.0, 2.0]], "solution": [np.zeros(10)], }, - {}, # Empty add_info. + {}, # Empty extra_args. [], # Empty transforms. ) @@ -461,7 +473,7 @@ def test_add_during_iteration(store): "measures": [[1.0, 2.0]], "solution": [np.zeros(10)], }, - {}, # Empty add_info. + {}, # Empty extra_args. [], # Empty transforms. ) @@ -476,7 +488,7 @@ def test_add_during_iteration(store): "measures": [[3.0, 4.0]], "solution": [np.ones(10)], }, - {}, # Empty add_info. + {}, # Empty extra_args. [], # Empty transforms. ) @@ -489,7 +501,7 @@ def test_clear_during_iteration(store): "measures": [[1.0, 2.0]], "solution": [np.zeros(10)], }, - {}, # Empty add_info. + {}, # Empty extra_args. [], # Empty transforms. ) @@ -506,7 +518,7 @@ def test_clear_and_add_during_iteration(store): "measures": [[1.0, 2.0]], "solution": [np.zeros(10)], }, - {}, # Empty add_info. + {}, # Empty extra_args. [], # Empty transforms. ) @@ -520,6 +532,6 @@ def test_clear_and_add_during_iteration(store): "measures": [[3.0, 4.0]], "solution": [np.ones(10)], }, - {}, # Empty add_info. + {}, # Empty extra_args. [], # Empty transforms. ) From ca64acec20658c6aa3fdd0050b7b5437a0811c9a Mon Sep 17 00:00:00 2001 From: Bryon Tjanaka Date: Tue, 31 Oct 2023 20:28:56 -0700 Subject: [PATCH 26/26] test shapes in init --- tests/archives/array_store_test.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/tests/archives/array_store_test.py b/tests/archives/array_store_test.py index 7d8c540c4..ea4eb2f85 100644 --- a/tests/archives/array_store_test.py +++ b/tests/archives/array_store_test.py @@ -17,13 +17,15 @@ def test_init_invalid_field(): ) -def test_init(): +@pytest.mark.parametrize("shape", [((), (2,), (10,)), ((), 2, 10)], + ids=["tuple", "int"]) +def test_init(shape): capacity = 10 store = ArrayStore( { - "objective": ((), np.float32), - "measures": ((2,), np.float32), - "solution": ((10,), np.float32), + "objective": (shape[0], np.float32), + "measures": (shape[1], np.float32), + "solution": (shape[2], np.float32), }, capacity, )