Merge branch 'main' into dtype-meta-vlen

NeurodataWithoutBorders · Apr 16, 2024 · df759d0 · df759d0
2 parents 8a1dd86 + bc71a9d
commit df759d0
Show file tree

Hide file tree

Showing 4 changed files with 13 additions and 2 deletions.
diff --git a/README.md b/README.md
@@ -6,6 +6,8 @@
 
 :warning: Please note, LINDI is currently under development and should not yet be used in practice.
 
+For a more up-to-date introduction to LINDI, see the [README on the dev branch](https://github.com/NeurodataWithoutBorders/lindi/tree/dev).
+
 LINDI is a Python library that facilitates handling NWB (Neurodata Without Borders) files in an efficient, flexible manner, especially when dealing with large datasets on remote servers. The goal is to enable composition of NWB files by integrating data from multiple sources without the need to copy or move large datasets.
 
 LINDI features include:

diff --git a/lindi/LindiH5pyFile/LindiH5pyDataset.py b/lindi/LindiH5pyFile/LindiH5pyDataset.py
@@ -171,6 +171,15 @@ def fletcher32(self):
         else:
             raise Exception(f'Unexpected dataset object type: {type(self._dataset_object)}')
 
+    @property
+    def chunks(self):
+        if isinstance(self._dataset_object, h5py.Dataset):
+            return self._dataset_object.chunks
+        elif isinstance(self._dataset_object, zarr.Array):
+            return self._dataset_object.chunks
+        else:
+            raise Exception(f'Unexpected dataset object type: {type(self._dataset_object)}')
+
     def __repr__(self):  # type: ignore
         return f"<{self.__class__.__name__}: {self.name}>"
 

diff --git a/lindi/LindiH5pyFile/LindiH5pyFile.py b/lindi/LindiH5pyFile/LindiH5pyFile.py
@@ -385,7 +385,7 @@ def _recursive_copy(src_item: Union[h5py.Group, h5py.Dataset], dest: h5py.File,
                             dst_rfs['refs'][dst_ref_key] = _deep_copy(src_rfs['refs'][src_ref_key])
                     return
 
-        dst_item = dest.create_dataset(name, data=src_item[()])
+        dst_item = dest.create_dataset(name, data=src_item[()], chunks=src_item.chunks)
         for k, v in src_item.attrs.items():
             dst_item.attrs[k] = v
     else:

diff --git a/lindi/conversion/create_zarr_dataset_from_h5_data.py b/lindi/conversion/create_zarr_dataset_from_h5_data.py
@@ -80,7 +80,7 @@ def create_zarr_dataset_from_h5_data(
                 return ds
             else:
                 raise Exception(f'Unsupported scalar value type: {type(scalar_value)}')
-        elif h5_dtype.kind == 'S':
+        elif h5_dtype.kind == 'S' or h5_dtype.kind == 'U':
             # byte string
             if h5_data is None:
                 raise Exception(f'Data must be provided for scalar dataset {label}')