zarr-developers · LDeakin · Oct 6, 2024 · Oct 6, 2024 · Oct 6, 2024 · Oct 6, 2024
diff --git a/virtualizarr/tests/test_kerchunk.py b/virtualizarr/tests/test_kerchunk.py
@@ -23,7 +23,7 @@ def test_kerchunk_roundtrip_in_memory_no_concat():
             chunks=(2, 2),
             compressor=None,
             filters=None,
-            fill_value=np.nan,
+            fill_value=0,
             order="C",
         ),
         chunkmanifest=manifest,

diff --git a/virtualizarr/tests/test_manifests/test_array.py b/virtualizarr/tests/test_manifests/test_array.py
@@ -37,7 +37,7 @@ def test_create_manifestarray(self):
 
     def test_create_manifestarray_from_kerchunk_refs(self):
         arr_refs = {
-            ".zarray": '{"chunks":[2,3],"compressor":null,"dtype":"<i8","fill_value":null,"filters":null,"order":"C","shape":[2,3],"zarr_format":2}',
+            ".zarray": '{"chunks":[2,3],"compressor":null,"dtype":"<i8","fill_value":0,"filters":null,"order":"C","shape":[2,3],"zarr_format":2}',
             "0.0": ["test1.nc", 6144, 48],
         }
         marr = ManifestArray._from_kerchunk_refs(arr_refs)
@@ -46,13 +46,13 @@ def test_create_manifestarray_from_kerchunk_refs(self):
         assert marr.chunks == (2, 3)
         assert marr.dtype == np.dtype("int64")
         assert marr.zarray.compressor is None
-        assert marr.zarray.fill_value is np.nan
+        assert marr.zarray.fill_value == 0
         assert marr.zarray.filters is None
         assert marr.zarray.order == "C"
 
     def test_create_scalar_manifestarray_from_kerchunk_refs(self):
         arr_refs = {
-            ".zarray": '{"chunks":[],"compressor":null,"dtype":"<i8","fill_value":null,"filters":null,"order":"C","shape":[],"zarr_format":2}',
+            ".zarray": '{"chunks":[],"compressor":null,"dtype":"<i8","fill_value":0,"filters":null,"order":"C","shape":[],"zarr_format":2}',
             "0": ["test1.nc", 6144, 48],
         }
         marr = ManifestArray._from_kerchunk_refs(arr_refs)

diff --git a/virtualizarr/tests/test_readers/test_kerchunk.py b/virtualizarr/tests/test_readers/test_kerchunk.py
@@ -37,7 +37,7 @@ def test_dataset_from_df_refs():
 
     assert da.data.zarray.compressor is None
     assert da.data.zarray.filters is None
-    assert da.data.zarray.fill_value is np.nan
+    assert da.data.zarray.fill_value == 0
     assert da.data.zarray.order == "C"
 
     assert da.data.manifest.dict() == {

diff --git a/virtualizarr/tests/test_writers/test_kerchunk.py b/virtualizarr/tests/test_writers/test_kerchunk.py
@@ -19,7 +19,7 @@ def test_accessor_to_kerchunk_dict(self):
                 chunks=(2, 3),
                 compressor=None,
                 filters=None,
-                fill_value=np.nan,
+                fill_value=0,
                 order="C",
             ),
         )
@@ -30,7 +30,7 @@ def test_accessor_to_kerchunk_dict(self):
             "refs": {
                 ".zgroup": '{"zarr_format":2}',
                 ".zattrs": "{}",
-                "a/.zarray": '{"shape":[2,3],"chunks":[2,3],"dtype":"<i8","fill_value":null,"order":"C","compressor":null,"filters":null,"zarr_format":2}',
+                "a/.zarray": '{"shape":[2,3],"chunks":[2,3],"dtype":"<i8","fill_value":0,"order":"C","compressor":null,"filters":null,"zarr_format":2}',
                 "a/.zattrs": '{"_ARRAY_DIMENSIONS":["x","y"]}',
                 "a/0.0": ["test.nc", 6144, 48],
             },
@@ -51,7 +51,7 @@ def test_accessor_to_kerchunk_json(self, tmp_path):
                 chunks=(2, 3),
                 compressor=None,
                 filters=None,
-                fill_value=np.nan,
+                fill_value=0,
                 order="C",
             ),
         )
@@ -69,7 +69,7 @@ def test_accessor_to_kerchunk_json(self, tmp_path):
             "refs": {
                 ".zgroup": '{"zarr_format":2}',
                 ".zattrs": "{}",
-                "a/.zarray": '{"shape":[2,3],"chunks":[2,3],"dtype":"<i8","fill_value":null,"order":"C","compressor":null,"filters":null,"zarr_format":2}',
+                "a/.zarray": '{"shape":[2,3],"chunks":[2,3],"dtype":"<i8","fill_value":0,"order":"C","compressor":null,"filters":null,"zarr_format":2}',
                 "a/.zattrs": '{"_ARRAY_DIMENSIONS":["x","y"]}',
                 "a/0.0": ["test.nc", 6144, 48],
             },

diff --git a/virtualizarr/writers/zarr.py b/virtualizarr/writers/zarr.py
@@ -92,8 +92,8 @@ def zarr_v3_array_metadata(zarray: ZArray, dim_names: list[str], attrs: dict) ->
         "configuration": {"chunk_shape": metadata.pop("chunks")},
     }
     metadata["chunk_key_encoding"] = {
-        "name": "default",
-        "configuration": {"separator": "/"},
+        "name": "v2",
+        "configuration": {"separator": "."},
     }
     metadata["codecs"] = zarray._v3_codec_pipeline()
     metadata.pop("filters")

diff --git a/virtualizarr/zarr.py b/virtualizarr/zarr.py
@@ -66,17 +66,39 @@ def __post_init__(self) -> None:
         if self.fill_value is None:
             self.fill_value = ZARR_DEFAULT_FILL_VALUE.get(self.dtype.kind, 0.0)
 
+        # Handle non-finite fill values
+        if not isinstance(self.fill_value, list):
+            if self.fill_value is np.nan:
+                self.fill_value = "NaN"
+            elif self.fill_value is np.inf:
+                self.fill_value = "Infinity"
+            elif self.fill_value is -np.inf:  # TODO: does this work?
+                self.fill_value = "-Infinity"
+        # TODO: Handle other data types (complex, etc.)
+
     @property
     def codec(self) -> Codec:
         """For comparison against other arrays."""
         return Codec(compressor=self.compressor, filters=self.filters)
 
     @classmethod
     def from_kerchunk_refs(cls, decoded_arr_refs_zarray) -> "ZArray":
+        dtype = np.dtype(decoded_arr_refs_zarray["dtype"])
+
         # coerce type of fill_value as kerchunk can be inconsistent with this
         fill_value = decoded_arr_refs_zarray["fill_value"]
         if fill_value is None or fill_value == "NaN" or fill_value == "nan":
-            fill_value = np.nan
+            if dtype.kind == "f":
+                fill_value = np.nan
+            elif dtype.kind == "c":
+                fill_value = [np.nan, np.nan]
+            elif dtype.kind == "i":
+                fill_value = 0
+            else:
+                # TODO: Handle other data types
+                raise ValueError(
+                    f"Fill value {fill_value} is not valid for dtype {dtype}"
+                )
 
         compressor = decoded_arr_refs_zarray["compressor"]
         zarr_format = int(decoded_arr_refs_zarray["zarr_format"])
@@ -185,8 +207,11 @@ def _v3_codec_pipeline(self) -> list:
         # https://github.com/zarr-developers/zarr-python/pull/1944#issuecomment-2151994097
         # "If no ArrayBytesCodec is supplied, we can auto-add a BytesCodec"
         bytes = dict(
-            name="bytes", configuration={}
-        )  # TODO need to handle endianess configuration
+            name="bytes",
+            configuration={
+                "endian": "little"  # TODO need to handle endianess configuration, but little is a sensible default for now
+            },
+        )
 
         # The order here is significant!
         # [ArrayArray] -> ArrayBytes -> [BytesBytes]