Skip to content

Commit

Permalink
add zarr backend
Browse files Browse the repository at this point in the history
  • Loading branch information
kaizhang committed Nov 9, 2024
1 parent a3ba6ff commit e3d06d8
Show file tree
Hide file tree
Showing 19 changed files with 158 additions and 123 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ jobs:
cd ${GITHUB_WORKSPACE}/anndata && cargo test --no-fail-fast
cd ${GITHUB_WORKSPACE}/anndata-test-utils && cargo test --no-fail-fast
cd ${GITHUB_WORKSPACE}/python && pip install --user .[test]
pytest ${GITHUB_WORKSPACE}/python/tests
pytest -v --durations=0 ${GITHUB_WORKSPACE}/python/tests
- name: benchmark
run: |
Expand Down
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@ Here are the key features of this implementation:

Limitations:

- Only a subset of the h5ad specifications are implemented. For example, the `.layer`
and `.raw` is not supported. To request a missing feature, please open a new issue.
- Only a subset of the h5ad specifications are implemented. For example, the
`.raw` is not supported. To request a missing feature, please open a new issue.
- No views. Subsetting the AnnData will modify the data inplace or make a copy.

Installation
Expand Down
30 changes: 18 additions & 12 deletions anndata/src/anndata.rs
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,12 @@ impl<B: Backend> std::fmt::Display for AnnData<B> {
}
}

pub(crate) fn new_mapping<G: GroupOp<B>, B: Backend>(store: &G, name: &str) -> Result<B::Group> {
let mut g = store.new_group(name)?;
MAPPING_ENCODING.save(&mut g)?;
Ok(g)
}

// Helper function to create a new observation matrix (obsm)
pub(crate) fn new_obsm<B: Backend>(group: B::Group, n_obs: &Dim) -> Result<AxisArrays<B>> {
AxisArrays::new(group, Axis::Row, n_obs, None)
Expand Down Expand Up @@ -175,32 +181,32 @@ impl<B: Backend> AnnData<B> {
Slot::none()
};

let obsm = match file.open_group("obsm").or(file.new_group("obsm")) {
let obsm = match file.open_group("obsm").or(new_mapping(&file, "obsm")) {
Ok(group) => new_obsm(group, &n_obs)?,
_ => AxisArrays::empty(),
};

let obsp = match file.open_group("obsp").or(file.new_group("obsp")) {
let obsp = match file.open_group("obsp").or(new_mapping(&file, "obsp")) {
Ok(group) => new_obsp(group, &n_obs)?,
_ => AxisArrays::empty(),
};

let varm = match file.open_group("varm").or(file.new_group("varm")) {
let varm = match file.open_group("varm").or(new_mapping(&file, "varm")) {
Ok(group) => new_varm(group, &n_vars)?,
_ => AxisArrays::empty(),
};

let varp = match file.open_group("varp").or(file.new_group("varp")) {
let varp = match file.open_group("varp").or(new_mapping(&file, "varp")) {
Ok(group) => new_varp(group, &n_vars)?,
_ => AxisArrays::empty(),
};

let uns = match file.open_group("uns").or(file.new_group("uns")) {
let uns = match file.open_group("uns").or(new_mapping(&file, "uns")) {
Ok(group) => ElemCollection::new(group)?,
_ => ElemCollection::empty(),
};

let layers = match file.open_group("layers").or(file.new_group("layers")) {
let layers = match file.open_group("layers").or(new_mapping(&file, "layers")) {
Ok(group) => new_layers(group, &n_obs, &n_vars)?,
_ => AxisArrays::empty(),
};
Expand Down Expand Up @@ -230,12 +236,12 @@ impl<B: Backend> AnnData<B> {
x: Slot::none(),
obs: Slot::none(),
var: Slot::none(),
obsm: new_obsm(file.new_group("obsm")?, &n_obs)?,
obsp: new_obsp(file.new_group("obsp")?, &n_obs)?,
varm: new_varm(file.new_group("varm")?, &n_vars)?,
varp: new_varp(file.new_group("varp")?, &n_vars)?,
uns: ElemCollection::new(file.new_group("uns")?)?,
layers: new_layers(file.new_group("layers")?, &n_obs, &n_vars)?,
obsm: new_obsm(new_mapping(&file, "obsm")?, &n_obs)?,
obsp: new_obsp(new_mapping(&file, "obsp")?, &n_obs)?,
varm: new_varm(new_mapping(&file, "varm")?, &n_vars)?,
varp: new_varp(new_mapping(&file, "varp")?, &n_vars)?,
uns: ElemCollection::new(new_mapping(&file, "uns")?)?,
layers: new_layers(new_mapping(&file, "layers")?, &n_obs, &n_vars)?,
file,
n_obs,
n_vars,
Expand Down
11 changes: 4 additions & 7 deletions anndata/src/container/collection.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,5 @@
use crate::{
backend::{iter_containers, AttributeOp, Backend, GroupOp},
container::base::*,
data::*,
ElemCollectionOp,
anndata::new_mapping, backend::{iter_containers, AttributeOp, Backend, GroupOp}, container::base::*, data::*, ElemCollectionOp
};

use anyhow::{bail, ensure, Result};
Expand Down Expand Up @@ -71,7 +68,7 @@ impl<B: Backend> InnerElemCollection<B> {
}

pub fn export<O: Backend, G: GroupOp<O>>(&self, location: &G, name: &str) -> Result<()> {
let group = location.new_group(name)?;
let group = new_mapping(location, name)?;
for (key, val) in self.iter() {
val.inner().export::<O, _>(&group, key)?;
}
Expand Down Expand Up @@ -389,7 +386,7 @@ impl<B: Backend> InnerAxisArrays<B> {
}

pub fn export<O: Backend, G: GroupOp<O>>(&self, location: &G, name: &str) -> Result<()> {
let group = location.new_group(name)?;
let group = new_mapping(location, name)?;
for (key, val) in self.iter() {
val.inner().export::<O, _>(&group, key)?;
}
Expand All @@ -409,7 +406,7 @@ impl<B: Backend> InnerAxisArrays<B> {
if selection.into_iter().all(|x| x.as_ref().is_full()) {
self.export::<O, _>(location, name)
} else {
let group = location.new_group(name)?;
let group = new_mapping(location, name)?;
match self.axis {
Axis::Row => {
if selection.len() != 1 {
Expand Down
4 changes: 2 additions & 2 deletions anndata/src/data/array/dataframe.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ impl Writable for DataFrame {
} else {
location.new_group(name)?
};
self.metadata().save_metadata(&mut group)?;
self.metadata().save(&mut group)?;

self.iter().try_for_each(|x| {
write_series(x, &group, x.name())?;
Expand Down Expand Up @@ -85,7 +85,7 @@ impl Writable for DataFrame {
write_series(x, container.as_group()?, x.name())?;
anyhow::Ok(())
})?;
self.metadata().save_metadata(&mut container)?;
self.metadata().save(&mut container)?;

Ok(container)
}
Expand Down
4 changes: 2 additions & 2 deletions anndata/src/data/array/dense.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ impl<'a, T: BackendData, D: Dimension> Writable for ArrayView<'a, T, D> {
) -> Result<DataContainer<B>> {
let dataset = location.new_array_dataset(name, self.into(), Default::default())?;
let mut container = DataContainer::<B>::Dataset(dataset);
self.metadata().save_metadata(&mut container)?;
self.metadata().save(&mut container)?;
Ok(container)
}
}
Expand Down Expand Up @@ -238,7 +238,7 @@ impl Writable for CategoricalArray {
name: &str,
) -> Result<DataContainer<B>> {
let mut group = location.new_group(name)?;
self.metadata().save_metadata(&mut group)?;
self.metadata().save(&mut group)?;

group.new_array_dataset(
"codes",
Expand Down
2 changes: 1 addition & 1 deletion anndata/src/data/array/dense/dynamic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ macro_rules! impl_from_dynscalar {
fn write<B: Backend, G: GroupOp<B>>(&self, location: &G, name: &str) -> Result<DataContainer<B>> {
let dataset = location.new_scalar_dataset(name, self)?;
let mut container = DataContainer::Dataset(dataset);
self.metadata().save_metadata(&mut container)?;
self.metadata().save(&mut container)?;
Ok(container)
}
}
Expand Down
2 changes: 1 addition & 1 deletion anndata/src/data/array/sparse/csc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,7 @@ impl<T: BackendData> Writable for CscMatrix<T> {
let mut group = location.new_group(name)?;
let shape = self.shape();

self.metadata().save_metadata(&mut group)?;
self.metadata().save(&mut group)?;
group.new_array_dataset("data", self.values().into(), Default::default())?;

let num_rows = shape[0];
Expand Down
2 changes: 1 addition & 1 deletion anndata/src/data/array/sparse/csr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,7 @@ impl<T: BackendData> Writable for CsrMatrix<T> {
let mut group = location.new_group(name)?;
let shape = self.shape();

self.metadata().save_metadata(&mut group)?;
self.metadata().save(&mut group)?;
group.new_array_dataset("data", self.values().into(), Default::default())?;

let num_cols = shape[1];
Expand Down
2 changes: 1 addition & 1 deletion anndata/src/data/array/sparse/noncanonical.rs
Original file line number Diff line number Diff line change
Expand Up @@ -650,7 +650,7 @@ impl<T: BackendData> Writable for CsrNonCanonical<T> {
let mut group = location.new_group(name)?;
let shape = self.shape();

self.metadata().save_metadata(&mut group)?;
self.metadata().save(&mut group)?;

group.new_array_dataset("data", self.values().into(), Default::default())?;

Expand Down
8 changes: 7 additions & 1 deletion anndata/src/data/data_traits.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,12 @@ use crate::data::{
use anyhow::Result;
use serde_json::Value;

pub(crate) const MAPPING_ENCODING: MetaData = MetaData {
encoding_type: "dict",
version: "0.1.0",
metadata: None,
};

pub struct MetaData {
encoding_type: &'static str,
version: &'static str,
Expand All @@ -28,7 +34,7 @@ impl MetaData {
}
}

pub(crate) fn save_metadata<B: Backend, A: AttributeOp<B>>(self, loc: &mut A) -> Result<()> {
pub(crate) fn save<B: Backend, A: AttributeOp<B>>(self, loc: &mut A) -> Result<()> {
loc.new_attr("encoding-type", self.encoding_type)?;
loc.new_attr("encoding-version", self.version)?;
if let Some(metadata) = self.metadata {
Expand Down
4 changes: 2 additions & 2 deletions anndata/src/data/mapping.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ impl Element for Mapping {
}

fn metadata(&self) -> MetaData {
MetaData::new("dict", "0.1.0", None)
crate::data::MAPPING_ENCODING
}
}

Expand All @@ -52,7 +52,7 @@ impl Readable for Mapping {
impl Writable for Mapping {
fn write<B: Backend, G: GroupOp<B>>(&self, location: &G, name: &str) -> Result<DataContainer<B>> {
let mut group = location.new_group(name)?;
self.metadata().save_metadata(&mut group)?;
self.metadata().save(&mut group)?;
self.0
.iter()
.try_for_each(|(k, v)| v.write(&group, k).map(|_| ()))?;
Expand Down
44 changes: 18 additions & 26 deletions anndata/src/traits.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use crate::{
anndata::{new_layers, new_obsm, new_obsp, new_varm, new_varp},
backend::{GroupOp, DataType},
anndata::{new_layers, new_mapping, new_obsm, new_obsp, new_varm, new_varp},
backend::DataType,
container::{ChunkedArrayElem, InnerDataFrameElem, StackedChunkedArrayElem},
data::*,
AnnData, AnnDataSet, ArrayElem, AxisArrays, Backend, ElemCollection, StackedArrayElem,
Expand Down Expand Up @@ -346,10 +346,7 @@ impl<B: Backend> AnnDataOp for AnnData<B> {

fn uns(&self) -> Self::ElemCollectionRef<'_> {
if self.uns.is_none() {
let elems = self
.file
.new_group("uns")
.and_then(|g| ElemCollection::new(g));
let elems = new_mapping(&self.file, "uns").and_then(ElemCollection::new);
if let Ok(uns) = elems {
self.uns.swap(&uns);
}
Expand All @@ -358,10 +355,9 @@ impl<B: Backend> AnnDataOp for AnnData<B> {
}
fn obsm(&self) -> Self::AxisArraysRef<'_> {
if self.obsm.is_none() {
let arrays = self
.file
.new_group("obsm")
.and_then(|g| new_obsm(g, &self.n_obs));
let arrays = new_mapping(&self.file, "obsm").and_then(|g|
new_obsm(g, &self.n_obs)
);
if let Ok(obsm) = arrays {
self.obsm.swap(&obsm);
}
Expand All @@ -370,10 +366,9 @@ impl<B: Backend> AnnDataOp for AnnData<B> {
}
fn obsp(&self) -> Self::AxisArraysRef<'_> {
if self.obsp.is_none() {
let arrays = self
.file
.new_group("obsp")
.and_then(|g| new_obsp(g, &self.n_obs));
let arrays = new_mapping(&self.file, "obsp").and_then(|g|
new_obsp(g, &self.n_obs)
);
if let Ok(obsp) = arrays {
self.obsp.swap(&obsp);
}
Expand All @@ -382,10 +377,9 @@ impl<B: Backend> AnnDataOp for AnnData<B> {
}
fn varm(&self) -> Self::AxisArraysRef<'_> {
if self.varm.is_none() {
let arrays = self
.file
.new_group("varm")
.and_then(|g| new_varm(g, &self.n_vars));
let arrays = new_mapping(&self.file, "varm").and_then(|g|
new_varm(g, &self.n_vars)
);
if let Ok(varm) = arrays {
self.varm.swap(&varm);
}
Expand All @@ -394,10 +388,9 @@ impl<B: Backend> AnnDataOp for AnnData<B> {
}
fn varp(&self) -> Self::AxisArraysRef<'_> {
if self.varp.is_none() {
let arrays = self
.file
.new_group("varp")
.and_then(|g| new_varp(g, &self.n_vars));
let arrays = new_mapping(&self.file, "varp").and_then(|g|
new_varp(g, &self.n_vars)
);
if let Ok(varp) = arrays {
self.varp.swap(&varp);
}
Expand All @@ -406,10 +399,9 @@ impl<B: Backend> AnnDataOp for AnnData<B> {
}
fn layers(&self) -> Self::AxisArraysRef<'_> {
if self.layers.is_none() {
let arrays = self
.file
.new_group("layers")
.and_then(|g| new_layers(g, &self.n_obs, &self.n_vars));
let arrays = new_mapping(&self.file, "layers").and_then(|g|
new_layers(g, &self.n_obs, &self.n_vars)
);
if let Ok(layers) = arrays {
self.layers.swap(&layers);
}
Expand Down
6 changes: 3 additions & 3 deletions pyanndata/src/anndata.rs
Original file line number Diff line number Diff line change
Expand Up @@ -58,16 +58,16 @@ pub fn read<'py>(py: Python<'py>, filename: PathBuf, backed: Option<&str>, backe
/// backend: Literal['hdf5'] | None
#[pyfunction]
#[pyo3(
signature = (adatas, *, join="inner", filename, backed="r+", backend=None),
text_signature = "(adatas, *, join='inner', filename, backed='r+', backend=None)",
signature = (adatas, *, join="inner", filename, backed="r+", backend=H5::NAME),
text_signature = "(adatas, *, join='inner', filename, backed='r+', backend='hdf5')",
)]
pub fn concat<'py>(
py: Python<'py>,
adatas: Vec<AnnData>,
join: &str,
filename: PathBuf,
backed: Option<&str>,
backend: Option<&str>
backend: &str,
) -> Result<PyObject> {
//let adatas = adatas.into_iter().map(|x| x.take_inner()).collect::<Vec<_>>();
todo!()
Expand Down
2 changes: 1 addition & 1 deletion python/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "anndata_rs"
version = "0.4.0"
version = "0.5.0"
edition = "2021"
authors = ["Kai Zhang <[email protected]>"]
description = "Rust APIs"
Expand Down
Loading

0 comments on commit e3d06d8

Please sign in to comment.