From f5b04b79e7fd93d091ea8a5f6ab910b4528ff051 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mikkel=20E=20Lepper=C3=B8d?= Date: Fri, 3 May 2019 12:19:02 +0200 Subject: [PATCH 1/2] docstrings for group, dataset, raw, attributes and a getting started file with more elaborate tutorial, index is reduced to a bare example --- docs/dataset.rst | 2 - docs/getting_started.rst | 279 +++++++++++++++++++++++++++++++++++++++ docs/index.rst | 157 ++-------------------- docs/installation.rst | 16 ++- docs/raw.rst | 1 + exdir/core/attribute.py | 23 ++++ exdir/core/dataset.py | 30 ++++- exdir/core/exdir_file.py | 16 ++- exdir/core/group.py | 35 ++++- exdir/core/raw.py | 18 ++- 10 files changed, 421 insertions(+), 156 deletions(-) create mode 100644 docs/getting_started.rst diff --git a/docs/dataset.rst b/docs/dataset.rst index c48add7..2896f91 100644 --- a/docs/dataset.rst +++ b/docs/dataset.rst @@ -3,8 +3,6 @@ Datasets ======== -This is data set class. It has class :py:class:`exdir.core.Dataset`: - .. autoclass:: exdir.core.Dataset :members: :undoc-members: diff --git a/docs/getting_started.rst b/docs/getting_started.rst new file mode 100644 index 0000000..3cc46af --- /dev/null +++ b/docs/getting_started.rst @@ -0,0 +1,279 @@ +:orphan: + +.. _getting_started: + +Getting Started +=============== + +Quick usage example +------------------- + +.. testsetup:: + + import os + import shutil + if(os.path.exists("mytestfile.exdir")): + shutil.rmtree("mytestfile.exdir") + + +.. doctest:: + + >>> import exdir + >>> import numpy as np + >>> f = exdir.File("mytestfile.exdir") + +The :ref:`File object ` points to the root folder in the exdir file +structure. +You can add groups and datasets to it. + +.. doctest:: + + >>> my_group = f.require_group("my_group") + >>> a = np.arange(100) + >>> dset = f.require_dataset("my_data", data=a) + +These can later be accessed with square brackets: + +.. doctest:: + + >>> f["my_data"][10] + 10 + +Groups can hold other groups or datasets: + +.. doctest:: + + >>> subgroup = my_group.require_group("subgroup") + >>> subdata = subgroup.require_dataset("subdata", data=a) + +Datasets support array-style slicing: + +.. doctest:: + + >>> dset[0:100:10] + memmap([ 0, 10, 20, 30, 40, 50, 60, 70, 80, 90]) + +Datasets are updated with: + +.. doctest:: + + >>> dset[0:100:10] = a[0:100:10][::-1] + >>> dset[0:100:10] + memmap([ 90, 80, 70, 60, 50, 40, 30, 20, 10, 0]) + +Attributes can be added to files, groups and datasets: + +.. doctest:: + + >>> f.attrs["description"] = "My first exdir file" + >>> my_group.attrs["meaning_of_life"] = 42 + >>> dset.attrs["trial_number"] = 12 + >>> f.attrs["description"] + 'My first exdir file' + +Core concepts +------------- +An exdir object contains two types of objects: `datasets`, which are +array-like collections of data, and `groups`, which are directories containing +datasets and other groups. + +An exdir directory is created by: + +.. testsetup:: + + import os + import shutil + if(os.path.exists("myfile.exdir")): + shutil.rmtree("myfile.exdir") + + +.. doctest:: + + >>> import exdir + >>> import numpy as np + >>> f = exdir.File("myfile.exdir", "w") + +The :ref:`File object ` containes many useful methods including :py:meth:`exdir.core.Group.require_dataset`: + + >>> data = np.arange(100) + >>> dset = f.require_dataset("mydataset", data=data) + +The created object is not an array but :ref:`an exdir dataset`. +Like NumPy arrays, datasets have a shape: + + >>> dset.shape + (100,) + +Also array-style slicing is supported: + + >>> dset[0] + 0 + >>> dset[10] + 10 + >>> dset[0:100:10] + memmap([ 0, 10, 20, 30, 40, 50, 60, 70, 80, 90]) + +Datasets are updated **on file** with: + +.. doctest:: + + >>> dset[0:100:10] = a[0:100:10][::-1] + >>> dset[0:100:10] + memmap([ 90, 80, 70, 60, 50, 40, 30, 20, 10, 0]) + +For more, see :ref:`file` and :ref:`dataset`. + +A Group is a container of other groups, datasets and raw objects. + +To create a Group it is necessary to have a File object available:: + + >>> f = exdir.File('my_file') + >>> group = f.create_group('my_group') + +Groups can contain other groups, datasets and raw objects:: + + >>> group.create_group('other_group') + >>> group.create_dataset('my_dataset', data=[0,1,2]) + >>> group.create_raw('raw_container') + +Children of groups can be accessed by indexing:: + + >>> group['my_dataset'] + memmap([ 0, 1, 2]) + +One may iterate groups similar to maps:: + + >>> for key, value in group.items(): + print(group[key] == value) + True + True + True + >>> for key in group: + print(key) + +Attributes +---------- + +With exdir you can store metadata right next to the data it describes. +All groups and datasets can have attributes which are descibed by :py:meth:`exdir.core.attributes`. + +Attributes are accessed through the ``attrs`` proxy object, which again +implements the dictionary interface: + + >>> dset.attrs['temperature'] = 99.5 + >>> dset.attrs['temperature'] + 99.5 + >>> 'temperature' in dset.attrs + True + +Groups and Files may also have attributes:: + + >>> group.attr = {'description': 'this is a group'} + >>> group.attr['number'] = 1 + >>> print(group.attr) + {'description': 'this is a group', 'number': 1} + >>> f.attr = {'description': 'this is a file'} + >>> f.attr['number'] = 2 + >>> print(f.attr) + {'description': 'this is a file', 'number': 2} + +For more, see :ref:`attributes`. + +Groups and hierarchical organization +------------------------------------ + +Every object in an exdir directory has a name, and they're arranged in a POSIX-style hierarchy with ``/``-separators: + + >>> dset.name + '/mydataset' + +The "directory" in this system are called :ref:`groups `. +The :ref:`File object ` we created is itself a group, in this case the `root group`, named ``/`` + + >>> f.name + '/' + +Creating a subgroup is done by using :py:meth:`exdir.core.Group.require_group` method: + + >>> grp = f.require_group("subgroup") + +All :py:class:`exdir.core.Group` objects also have the ``require_*`` methods like File: + + >>> dset2 = grp.require_dataset("another_dataset", data=data) + >>> dset2.name + '/subgroup/another_dataset' + +.. By the way, you don't have to create all the intermediate groups manually. +.. Specifying a full path works just fine: +.. +.. +.. >>> dset3 = f.create_dataset('subgroup2/dataset_three', (10,)) +.. >>> dset3.name +.. '/subgroup2/dataset_three' + +You retrieve objects in the file using the item-retrieval syntax: + + >>> dataset_three = f['subgroup/another_dataset'] + +Iterating over a group provides the names of its members: + + >>> for name in f: + ... print(name) + mydataset + subgroup + + +Containership testing also uses names: + + + >>> "mydataset" in f + True + >>> "somethingelse" in f + False + +You can even use full path names: + + >>> "subgroup/another_dataset" in f + True + >>> "subgroup/somethingelse" in f + False + +There are also the familiar :py:meth:`exdir.core.Group.keys`, :py:meth:`exdir.core.Group.values`, :py:meth:`exdir.core.Group.items` and +:py:meth:`exdir.core.Group.iter` methods, as well as :py:meth:`exdir.core.Group.get`. + +For more, see :ref:`group`. + +Raw +--- + +With exdir you can store raw data, that is any datatype you want to, in a `Raw` object. +The typical usecase is raw data produced in a format that you want to keep +alongside with data which is converted or processed +and stored in exdir datasets. + +You can create `Raw` objects with: + +.. doctest:: + + >>> raw = f.create_raw('raw_filename') + +Note that you may also use `require_raw`. +The `Raw` directory is available thorough: + +.. doctest:: + + >>> directory = raw.directory + +For more, see :ref:`raw`. + +Acknowledgements +---------------- + +The development of Exdir owes a great deal to other standardization efforts in science in general and neuroscience in particular, +among them the contributors to HDF5, NumPy, YAML, PyYAML, ruamel-yaml, SciPy, Klusta Kwik, NeuralEnsemble, and Neurodata Without Borders. + +References +---------- + +* :ref:`genindex` +* :ref:`search` diff --git a/docs/index.rst b/docs/index.rst index 0fbc13a..e264259 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -22,6 +22,7 @@ Exdir is described in detail in our reasearch paper: :hidden: installation + getting_started file group dataset @@ -74,6 +75,9 @@ It is however explicitly stored in the file system. Install ------- +With `PyPi `_:: + + pip install exdir With `Anaconda `_ or `Miniconda `_:: @@ -130,6 +134,14 @@ Datasets support array-style slicing: >>> dset[0:100:10] memmap([ 0, 10, 20, 30, 40, 50, 60, 70, 80, 90]) +Datasets are updated **on file** with: + +.. doctest:: + + >>> dset[0:100:10] = a[0:100:10][::-1] + >>> dset[0:100:10] + memmap([ 90, 80, 70, 60, 50, 40, 30, 20, 10, 0]) + Attributes can be added to files, groups and datasets: .. doctest:: @@ -140,151 +152,6 @@ Attributes can be added to files, groups and datasets: >>> f.attrs["description"] 'My first exdir file' - -Core concepts -------------- -An exdir object contains two types of objects: `datasets`, which are -array-like collections of data, and `groups`, which are directories containing -datasets and other groups. - -An exdir directory is created by: - -.. testsetup:: - - import os - import shutil - if(os.path.exists("myfile.exdir")): - shutil.rmtree("myfile.exdir") - - -.. doctest:: - - >>> import exdir - >>> import numpy as np - >>> f = exdir.File("myfile.exdir", "w") - -The :ref:`File object ` containes many useful methods including :py:meth:`exdir.core.Group.require_dataset`: - - >>> data = np.arange(100) - >>> dset = f.require_dataset("mydataset", data=data) - -The created object is not an array but :ref:`an exdir dataset`. -Like NumPy arrays, datasets have a shape: - - >>> dset.shape - (100,) - -Also array-style slicing is supported: - - >>> dset[0] - 0 - >>> dset[10] - 10 - >>> dset[0:100:10] - memmap([ 0, 10, 20, 30, 40, 50, 60, 70, 80, 90]) - -For more, see :ref:`file` and :ref:`dataset`. - - -Groups and hierarchical organization ------------------------------------- - -Every object in an exdir directory has a name, and they're arranged in a POSIX-style hierarchy with ``/``-separators: - - >>> dset.name - '/mydataset' - -The "directory" in this system are called :ref:`groups `. -The :ref:`File object ` we created is itself a group, in this case the `root group`, named ``/`` - - >>> f.name - '/' - -Creating a subgroup is done by using :py:meth:`exdir.core.Group.require_group` method: - - >>> grp = f.require_group("subgroup") - -All :py:class:`exdir.core.Group` objects also have the ``require_*`` methods like File: - - >>> dset2 = grp.require_dataset("another_dataset", data=data) - >>> dset2.name - '/subgroup/another_dataset' - -.. By the way, you don't have to create all the intermediate groups manually. -.. Specifying a full path works just fine: -.. -.. -.. >>> dset3 = f.create_dataset('subgroup2/dataset_three', (10,)) -.. >>> dset3.name -.. '/subgroup2/dataset_three' - -You retrieve objects in the file using the item-retrieval syntax: - - >>> dataset_three = f['subgroup/another_dataset'] - -Iterating over a group provides the names of its members: - - >>> for name in f: - ... print(name) - mydataset - subgroup - - -Containership testing also uses names: - - - >>> "mydataset" in f - True - >>> "somethingelse" in f - False - -You can even use full path names: - - >>> "subgroup/another_dataset" in f - True - >>> "subgroup/somethingelse" in f - False - -There are also the familiar :py:meth:`exdir.core.Group.keys`, :py:meth:`exdir.core.Group.values`, :py:meth:`exdir.core.Group.items` and -:py:meth:`exdir.core.Group.iter` methods, as well as :py:meth:`exdir.core.Group.get`. - - -.. Since iterating over a group only yields its directly-attached members, -.. iterating over an entire file is accomplished with the ``Group`` methods -.. ``visit()`` and ``visititems()``, which take a callable: -.. -.. -.. -.. >>> def printname(name): -.. ... print(name) -.. >>> f.visit(printname) -.. mydataset -.. subgroup -.. subgroup/another_dataset -.. subgroup2 -.. subgroup2/dataset_three - -For more, see :ref:`group`. - - - -Attributes ----------- - -With exdir you can store metadata right next to the data it describes. -All groups and datasets can have attributes which are descibed by :py:meth:`exdir.core.attributes`. - -Attributes are accessed through the ``attrs`` proxy object, which again -implements the dictionary interface: - - >>> dset.attrs['temperature'] = 99.5 - >>> dset.attrs['temperature'] - 99.5 - >>> 'temperature' in dset.attrs - True - -For more, see :ref:`attributes`. - Acknowledgements ---------------- diff --git a/docs/installation.rst b/docs/installation.rst index 3e94faa..f3d28c0 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -7,9 +7,21 @@ Installation Pre-configured installation (recommended) ----------------------------------------- -It’s strongly recommended that you use Anaconda to install exdir along with its compiled dependencies. +With `PyPi `_:: -With `Anaconda `_ or + pip install exdir + +With `Anaconda `_ or `Miniconda `_:: conda install -c cinpla exdir + +Installing from source +---------------------- + +It is also possible to install exdir from the source code found on +`GitHub `_:: + + git clone https://github.com/CINPLA/exdir.git + cd exdir + python setup.py install diff --git a/docs/raw.rst b/docs/raw.rst index e1f118d..b735895 100644 --- a/docs/raw.rst +++ b/docs/raw.rst @@ -2,6 +2,7 @@ Raw ====== + .. autoclass:: exdir.core.Raw :members: :undoc-members: diff --git a/exdir/core/attribute.py b/exdir/core/attribute.py index 64b20c4..297f7a5 100644 --- a/exdir/core/attribute.py +++ b/exdir/core/attribute.py @@ -27,6 +27,29 @@ class Attribute(object): the attributes stored in the :code:`attributes.yaml` file for a given Exdir Object. + With exdir you can store metadata right next to the data it describes. + All groups and datasets can have attributes which are descibed by :py:meth:`exdir.core.attributes`. + + Attributes are accessed through the ``attrs`` proxy object, which again + implements the dictionary interface: + + >>> dset.attrs['temperature'] = 99.5 + >>> dset.attrs['temperature'] + 99.5 + >>> 'temperature' in dset.attrs + True + + Groups and Files may also have attributes:: + + >>> group.attr = {'description': 'this is a group'} + >>> group.attr['number'] = 1 + >>> print(group.attr) + {'description': 'this is a group', 'number': 1} + >>> f.attr = {'description': 'this is a file'} + >>> f.attr['number'] = 2 + >>> print(f.attr) + {'description': 'this is a file', 'number': 2} + The Attribute object should not be created, but retrieved by accessing the :code:`.attrs` property of any Exdir Object, such as a Dataset, Group or File. diff --git a/exdir/core/dataset.py b/exdir/core/dataset.py index 633448e..8159a49 100644 --- a/exdir/core/dataset.py +++ b/exdir/core/dataset.py @@ -30,7 +30,35 @@ def _dataset_filename(dataset_directory): class Dataset(exob.Object): """ - Dataset class + A Dataset can be created in a File object or a Group object:: + + >>> import exdir + >>> import numpy as np + >>> f = exdir.File("myfile.exdir", "w") + >>> grp = f.create_group('my_group') + >>> dset_in_file = f.require_dataset("my_dataset", data=np.arange(100)) + >>> dset_in_group = grp.require_dataset("my_dataset", data=np.arange(100)) + + The created object is not an array but :ref:`an exdir dataset`. + Like NumPy arrays, datasets have a shape:: + + >>> dset_in_file.shape + (100,) + + Also array-style slicing is supported:: + + >>> dset_in_file[0] + 0 + >>> dset_in_file[10] + 10 + >>> dset_in_file[0:100:10] + memmap([ 0, 10, 20, 30, 40, 50, 60, 70, 80, 90]) + + Datasets are updated **on file** with:: + + >>> dset_in_file[0:100:10] = a[0:100:10][::-1] + >>> dset_in_file[0:100:10] + memmap([ 90, 80, 70, 60, 50, 40, 30, 20, 10, 0]) Warnings -------- diff --git a/exdir/core/exdir_file.py b/exdir/core/exdir_file.py index af657a7..4bf87e2 100644 --- a/exdir/core/exdir_file.py +++ b/exdir/core/exdir_file.py @@ -23,11 +23,19 @@ class File(Group): The :code:`File` object :code:`f` now points to the root folder in the exdir file structure. - You can add groups and datasets to it as follows: + You can add groups, datasets and raw to it as follows: - >>> my_group = f.require_group("my_group") - >>> a = np.arange(100) - >>> dset = f.require_dataset("my_data", data=a) + >>> grp = f.require_group('my_group') + >>> dset = f.require_dataset("my_dataset", data=np.arange(100)) + >>> raw = f.require_raw('my_raw') + + To loop through all children of a file simply iterate through: + + >>> for key in f: + print(key) + my_group + my_raw + my_dataset The data is immediately written to disk. diff --git a/exdir/core/group.py b/exdir/core/group.py index 57e5804..4ed3576 100644 --- a/exdir/core/group.py +++ b/exdir/core/group.py @@ -48,7 +48,40 @@ def _assert_data_shape_dtype_match(data, shape, dtype): class Group(Object): """ - Container of other groups and datasets. + A Group is a container of other groups, datasets and raw objects. + + To create a Group it is necessary to have a File object available:: + + >>> f = exdir.File('my_file') + >>> group = f.create_group('my_group') + + Groups can contain other groups, datasets and raw objects:: + + >>> group.create_group('other_group') + >>> group.create_dataset('my_dataset', data=[0,1,2]) + >>> group.create_raw('raw_container') + + Children of groups can be accessed by indexing:: + + >>> group['my_dataset'] + memmap([ 0, 1, 2]) + + One may iterate groups similar to maps:: + + >>> for key, value in group.items(): + print(group[key] == value) + True + True + True + >>> for key in group: + print(key) + + Groups may have attributes:: + + >>> group.attr = {'description': 'this is a group'} + >>> group.attr['number'] = 1 + >>> print(group.attr) + {'description': 'this is a group', 'number': 1} """ def __init__(self, root_directory, parent_path, object_name, io_mode=None, diff --git a/exdir/core/raw.py b/exdir/core/raw.py index 75a2969..763a17f 100644 --- a/exdir/core/raw.py +++ b/exdir/core/raw.py @@ -5,7 +5,23 @@ class Raw(exob.Object): """ Raw objects are simple folders with any content. - Raw objects currently have no features apart from showing their path. + With exdir you can store raw data, containing any datatype you want in a `Raw` object. + The typical usecase is raw data produced in a format that you want to keep + alongside with data which is converted or processed + and stored in exdir datasets. + + You can create `Raw` objects with:: + + >>> import exdir + >>> import numpy as np + >>> f = exdir.File("myfile.exdir", "w") + >>> raw = f.create_raw('raw_filename') + + Note that you may also use `require_raw`. + Raw objects currently have no features apart from showing their path:: + + >>> directory = raw.directory + """ def __init__(self, root_directory, parent_path, object_name, io_mode=None, plugin_manager=None): super(Raw, self).__init__( From db60f5019d0aa625679b9518e82fb9a51319256d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mikkel=20E=20Lepper=C3=B8d?= Date: Tue, 7 May 2019 12:55:02 +0200 Subject: [PATCH 2/2] fix doctests and add repr on datasets --- docs/getting_started.rst | 9 ++++++--- docs/index.rst | 2 +- exdir/core/attribute.py | 5 +++++ exdir/core/dataset.py | 3 +++ exdir/core/exdir_file.py | 8 ++++---- 5 files changed, 19 insertions(+), 8 deletions(-) diff --git a/docs/getting_started.rst b/docs/getting_started.rst index 3cc46af..ceb18f7 100644 --- a/docs/getting_started.rst +++ b/docs/getting_started.rst @@ -59,7 +59,7 @@ Datasets are updated with: >>> dset[0:100:10] = a[0:100:10][::-1] >>> dset[0:100:10] - memmap([ 90, 80, 70, 60, 50, 40, 30, 20, 10, 0]) + memmap([90, 80, 70, 60, 50, 40, 30, 20, 10, 0]) Attributes can be added to files, groups and datasets: @@ -113,13 +113,16 @@ Also array-style slicing is supported: >>> dset[0:100:10] memmap([ 0, 10, 20, 30, 40, 50, 60, 70, 80, 90]) -Datasets are updated **on file** with: +Datasets are updated **on file**: .. doctest:: >>> dset[0:100:10] = a[0:100:10][::-1] >>> dset[0:100:10] - memmap([ 90, 80, 70, 60, 50, 40, 30, 20, 10, 0]) + memmap([90, 80, 70, 60, 50, 40, 30, 20, 10, 0]) + >>> dset.data = np.arange(10) + >>> dset + memmap([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) For more, see :ref:`file` and :ref:`dataset`. diff --git a/docs/index.rst b/docs/index.rst index e264259..a75f317 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -140,7 +140,7 @@ Datasets are updated **on file** with: >>> dset[0:100:10] = a[0:100:10][::-1] >>> dset[0:100:10] - memmap([ 90, 80, 70, 60, 50, 40, 30, 20, 10, 0]) + memmap([90, 80, 70, 60, 50, 40, 30, 20, 10, 0]) Attributes can be added to files, groups and datasets: diff --git a/exdir/core/attribute.py b/exdir/core/attribute.py index 297f7a5..2918f65 100644 --- a/exdir/core/attribute.py +++ b/exdir/core/attribute.py @@ -33,6 +33,11 @@ class Attribute(object): Attributes are accessed through the ``attrs`` proxy object, which again implements the dictionary interface: + >>> import exdir + >>> import numpy as np + >>> f = exdir.File("mytestfile.exdir") + >>> grp = f.require_group('my_group') + >>> dset = f.require_dataset("my_dataset", data=np.arange(100)) >>> dset.attrs['temperature'] = 99.5 >>> dset.attrs['temperature'] 99.5 diff --git a/exdir/core/dataset.py b/exdir/core/dataset.py index 8159a49..d0345fb 100644 --- a/exdir/core/dataset.py +++ b/exdir/core/dataset.py @@ -290,6 +290,9 @@ def __iter__(self): for i in range(self.shape[0]): yield self[i] + def __repr__(self): + return self.data.__repr__() + @property def _data(self): if self._data_memmap is None: diff --git a/exdir/core/exdir_file.py b/exdir/core/exdir_file.py index 4bf87e2..8385363 100644 --- a/exdir/core/exdir_file.py +++ b/exdir/core/exdir_file.py @@ -15,7 +15,7 @@ class File(Group): A File is a special type of :class:`.Group`. See :class:`.Group` for documentation of inherited functions. - To create a File, call the File constructor with the name of the File you wish to create: + To create a File, call the File constructor with the name of the File you wish to create:: >>> import exdir >>> import numpy as np @@ -23,16 +23,16 @@ class File(Group): The :code:`File` object :code:`f` now points to the root folder in the exdir file structure. - You can add groups, datasets and raw to it as follows: + You can add groups, datasets and raw to it as follows:: >>> grp = f.require_group('my_group') >>> dset = f.require_dataset("my_dataset", data=np.arange(100)) >>> raw = f.require_raw('my_raw') - To loop through all children of a file simply iterate through: + To loop through all children of a file simply iterate through:: >>> for key in f: - print(key) + ... print(key) my_group my_raw my_dataset