diff --git a/docs/guide/serialization.rst b/docs/guide/serialization.rst index 6557a3e4..3e81c9e9 100644 --- a/docs/guide/serialization.rst +++ b/docs/guide/serialization.rst @@ -193,9 +193,9 @@ Similarly, you can reload the object with: .. code:: import json - from gufe.tokenization import JSON_HANDLER + from gufe.tokenization import JSON_HANDLER, GufeTokenizable with open(filename, mode='r') as f: - obj = json.load(f, cls=JSON_HANDLER.decoder) + obj = GufeTokenizable.from_dict(json.load(f, cls=JSON_HANDLER.decoder)) Note that these objects are not space-efficient: that is, if you have the same object in memory referenced by multiple objects (e.g., an identical @@ -203,7 +203,44 @@ the same object in memory referenced by multiple objects (e.g., an identical save multiple copies of its JSON representation. On reloading, tools that use the recommended ``from_dict`` method will undo -do this duplication; see :ref:`gufe-memory-deduplication` for details. +this duplication; see :ref:`gufe-memory-deduplication` for details. + +As a more space-efficient alternative to ``to_dict``/``from_dict``, consider +using ``to_keyed_chain``/``from_keyed_chain`` instead. +This deals in a representation using the :class:`.KeyedChain` approach, which +avoids duplication of dependent :class:`.GufeTokenizables` in the serialized +JSON representation. + +Convenient serialization +~~~~~~~~~~~~------------ + +We also provide convenience methods to convert any :class:`.GufeTokenizable` to +and from JSON using a space-efficient serialization strategy based on our +:class:`.KeyedChain` representation. This is intended for developers that want +to serialise these objects using the current best practice and are not +concerned with the details of the process. The :func:`to_json +` API offers the flexibility to +convert to JSON directly or to write to a filelike object: + +.. code:: + + # get a json representation in-memory + json = obj.to_json() + + # save to a file directly + obj.to_json(file=filename) + +Similarly, you can recreate the object using the :func:`from_json ` +classmethod: + +.. code:: + + # load the object from a json file produced with `to_json` + obj = cls.from_json(file=filename) + + # load from a string produced with `to_json` + obj = cls.from_json(content=json) + .. Using JSON codecs outside of JSON .. --------------------------------- diff --git a/gufe/tests/test_tokenization.py b/gufe/tests/test_tokenization.py index 8ccf95bb..e6d035ea 100644 --- a/gufe/tests/test_tokenization.py +++ b/gufe/tests/test_tokenization.py @@ -201,6 +201,21 @@ def leaf_dict(a): ':version:': 1, } + self.expected_keyed_chain = [ + (str(leaf.key), + leaf_dict("foo")), + (str(bar.key), + leaf_dict({':gufe-key:': str(leaf.key)})), + (str(self.cont.key), + {':version:': 1, + '__module__': __name__, + '__qualname__': 'Container', + 'dct': {'a': 'b', + 'leaf': {':gufe-key:': str(leaf.key)}}, + 'lst': [{':gufe-key:': str(leaf.key)}, 0], + 'obj': {':gufe-key:': str(bar.key)}}) + ] + def test_set_key(self): leaf = Leaf("test-set-key") key = leaf.key @@ -232,6 +247,43 @@ def test_from_keyed_dict(self): assert recreated == self.cont assert recreated is self.cont + def test_to_keyed_chain(self): + assert self.cont.to_keyed_chain() == self.expected_keyed_chain + + def test_from_keyed_chain(self): + recreated = self.cls.from_keyed_chain(self.expected_keyed_chain) + assert recreated == self.cont + assert recreated is self.cont + + def test_to_json_string(self): + raw_json = self.cont.to_json() + + # tuples are converted to lists in JSON so fix the expected result to use lists + expected_key_chain = [list(tok) for tok in self.expected_keyed_chain] + assert json.loads(raw_json, cls=JSON_HANDLER.decoder) == expected_key_chain + + def test_from_json_string(self): + recreated = self.cls.from_json(content=json.dumps(self.expected_keyed_chain, cls=JSON_HANDLER.encoder)) + + assert recreated == self.cont + assert recreated is self.cont + + def test_to_json_file(self, tmpdir): + file_path = tmpdir / "container.json" + self.cont.to_json(file=file_path) + + # tuples are converted to lists in JSON so fix the expected result to use lists + expected_key_chain = [list(tok) for tok in self.expected_keyed_chain] + assert json.load(file_path.open(mode="r"), cls=JSON_HANDLER.decoder) == expected_key_chain + + def test_from_json_file(self, tmpdir): + file_path = tmpdir / "container.json" + json.dump(self.expected_keyed_chain, file_path.open(mode="w"), cls=JSON_HANDLER.encoder) + recreated = self.cls.from_json(file=file_path) + + assert recreated == self.cont + assert recreated is self.cont + def test_to_shallow_dict(self): assert self.cont.to_shallow_dict() == self.expected_shallow diff --git a/gufe/tokenization.py b/gufe/tokenization.py index 06ed7f4c..e7c959db 100644 --- a/gufe/tokenization.py +++ b/gufe/tokenization.py @@ -14,7 +14,8 @@ import warnings import weakref from itertools import chain -from typing import Any, Union, List, Tuple, Dict, Generator +from os import PathLike +from typing import Any, Union, List, Tuple, Dict, Generator, TextIO, Optional from typing_extensions import Self from gufe.custom_codecs import ( @@ -622,6 +623,96 @@ def copy_with_replacements(self, **replacements): dct.update(replacements) return self._from_dict(dct) + def to_keyed_chain(self) -> List[Tuple[str, Dict]]: + """ + Generate a keyed chain representation of the object. + + See Also + -------- + KeyedChain + """ + return KeyedChain.gufe_to_keyed_chain_rep(self) + + @classmethod + def from_keyed_chain(cls, keyed_chain: List[Tuple[str, Dict]]): + """ + Generate an instance from keyed chain representation. + + Parameters + ---------- + keyed_chain : List[Tuple[str, Dict]] + The keyed_chain representation of the GufeTokenizable. + + See Also + -------- + KeyedChain + """ + return KeyedChain(keyed_chain=keyed_chain).to_gufe() + + def to_json(self, file: Optional[PathLike | TextIO] = None) -> None | str: + """ + Generate a JSON keyed chain representation. + + This will be writen to the filepath or filelike object if passed. + + Parameters + ---------- + file + A filepath or filelike object to write the JSON to. + + Returns + ------- + str + A minimal JSON representation of the object if `file` is `None`; else None. + + See Also + -------- + from_json + """ + + if file is None: + return json.dumps(self.to_keyed_chain(), cls=JSON_HANDLER.encoder) + + from gufe.utils import ensure_filelike + with ensure_filelike(file, mode="w") as out: + json.dump(self.to_keyed_chain(), out, cls=JSON_HANDLER.encoder) + + return None + + @classmethod + def from_json(cls, file: Optional[PathLike | TextIO] = None, content: Optional[str] = None): + """ + Generate an instance from JSON keyed chain representation. + + Can provide either a filepath/filelike as `file`, or JSON content via `content`. + + Parameters + ---------- + file + A filepath or filelike object to read JSON data from. + content + A string to read JSON data from. + + See Also + -------- + to_json + """ + + if content is not None and file is not None: + raise ValueError("Cannot specify both `content` and `file`; only one input allowed") + elif content is None and file is None: + raise ValueError("Must specify either `content` and `file` for JSON input") + + if content is not None: + keyed_chain = json.loads(content, cls=JSON_HANDLER.decoder) + return cls.from_keyed_chain(keyed_chain=keyed_chain) + + from gufe.utils import ensure_filelike + with ensure_filelike(file, mode="r") as f: + keyed_chain = json.load(f, cls=JSON_HANDLER.decoder) + + return cls.from_keyed_chain(keyed_chain=keyed_chain) + class GufeKey(str): def __repr__(self): # pragma: no cover