Skip to content

Commit

Permalink
use zstd for compression
Browse files Browse the repository at this point in the history
  • Loading branch information
kaizhang committed Dec 14, 2024
1 parent 8202ea6 commit 8db0474
Show file tree
Hide file tree
Showing 3 changed files with 13 additions and 7 deletions.
5 changes: 3 additions & 2 deletions anndata-hdf5/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "anndata-hdf5"
version = "0.5.0"
version = "0.5.1"
edition = "2021"
rust-version = "1.65"
authors = ["Kai Zhang <[email protected]>"]
Expand All @@ -13,7 +13,8 @@ homepage = "https://github.com/kaizhang/anndata-rs"
[dependencies]
anndata = { workspace = true }
anyhow = "1.0"
hdf5 = { package = "hdf5-metno", version = "0.9" }
hdf5 = { package = "hdf5-metno", version = "0.9", features = ["blosc"] }
blosc-src = { version = "0.3.0", features = ["zstd"] }
hdf5-sys = { package = "hdf5-metno-sys", version = "0.9", features = ["static", "zlib", "threadsafe"] }
libz-sys = { version = "1", features = ["libc"], default-features = false }
ndarray = { version = "0.16" }
Expand Down
9 changes: 6 additions & 3 deletions anndata-hdf5/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,8 @@ fn new_dataset<T: BackendData>(
shape: &Shape,
config: WriteConfig,
) -> Result<H5Dataset> {
let mut builder = match T::DTYPE {
let dtype = T::DTYPE;
let mut builder = match dtype {
ScalarType::U8 => group.new_dataset::<u8>(),
ScalarType::U16 => group.new_dataset::<u16>(),
ScalarType::U32 => group.new_dataset::<u32>(),
Expand All @@ -123,8 +124,10 @@ fn new_dataset<T: BackendData>(
};

builder = if let Some(compression) = config.compression {
builder.deflate(compression)
//builder.blosc_blosclz(compression, hdf5::filters::BloscShuffle::Byte)
match dtype {
ScalarType::String => builder.deflate(compression),
_ => builder.blosc_zstd(compression, hdf5::filters::BloscShuffle::Byte),
}
} else {
builder
};
Expand Down
6 changes: 4 additions & 2 deletions python/tests/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,8 +135,8 @@ def test_nullable(tmp_path):
adata.write(file)

adata = read(file)
assert math.isnan(adata.X[0, 0])
assert math.isinf(adata.X[0, 1])
assert math.isnan(adata.X[:][0, 0])
assert math.isinf(adata.X[:][0, 1])

adata.uns['df'] = pd.DataFrame({"test": pd.Series(["a", "b", np.nan, "a"], dtype="category")})

Expand Down Expand Up @@ -177,6 +177,8 @@ def test_create_anndataset(x1, x2, x3, tmp_path, backend):
)
assert dataset.n_obs == 0
assert dataset.n_vars == 0

dataset.to_adata()

# dense array
adata1 = AnnData(X=x1, filename=h5ad(tmp_path), backend=backend)
Expand Down

0 comments on commit 8db0474

Please sign in to comment.