Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add new trait to encode values to bytes #274

Draft
wants to merge 10 commits into
base: main
Choose a base branch
from
44 changes: 42 additions & 2 deletions heed-traits/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,9 @@

#![warn(missing_docs)]

use std::borrow::Cow;
use std::cmp::{Ord, Ordering};
use std::error::Error as StdError;
use std::io;

/// A boxed `Send + Sync + 'static` error.
pub type BoxedError = Box<dyn StdError + Send + Sync + 'static>;
Expand All @@ -21,8 +21,48 @@ pub trait BytesEncode<'a> {
/// The type to encode.
type EItem: ?Sized + 'a;

/// The type containing the encoded bytes.
type ReturnBytes: Into<Vec<u8>> + AsRef<[u8]> + 'a;

/// The error type to return when decoding goes wrong.
type Error: StdError + Send + Sync + 'static;

/// This function can be used to hint callers of the
/// [`bytes_encode`][BytesEncode::bytes_encode] function to use
/// [`bytes_encode_into_writer`][BytesEncode::bytes_encode_into_writer] instead, if the latter
/// runs faster (for example if it needs less heap allocations).
///
/// The default implementation returns `true` because the default implementation of
/// [`bytes_encode_into_writer`][BytesEncode::bytes_encode_into_writer] is to forward to
/// [`bytes_encode`][BytesEncode::bytes_encode].
fn zero_copy(item: &Self::EItem) -> bool {
// This is preferred to renaming the function parameter (to _item) because IDEs can
// autofill trait implementations, which will default the paramter name to _item then and
// this could probably also mess with clippy's renamed_function_params lint.
let _ = item;

true
}

/// Encode the given item as bytes.
fn bytes_encode(item: &'a Self::EItem) -> Result<Cow<'a, [u8]>, BoxedError>;
fn bytes_encode(item: &'a Self::EItem) -> Result<Self::ReturnBytes, Self::Error>;

/// Encode the given item as bytes and write it into the writer.
///
/// When implementing this, also take a look at [`zero_copy`][BytesEncode::zero_copy]'s
/// documentation.
///
/// The default implementation forwards to [`bytes_encode`][BytesEncode::bytes_encode].
fn bytes_encode_into_writer<W: io::Write>(
item: &'a Self::EItem,
mut writer: W,
) -> Result<(), BoxedError> {
let bytes = Self::bytes_encode(item)?;

writer.write_all(bytes.as_ref())?;

Ok(())
}
}

/// A trait that represents a decoding structure.
Expand Down
33 changes: 30 additions & 3 deletions heed-types/src/bytes.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use std::borrow::Cow;
use std::convert::Infallible;

use heed_traits::{BoxedError, BytesDecode, BytesEncode};

Expand All @@ -11,8 +11,12 @@ pub enum Bytes {}
impl<'a> BytesEncode<'a> for Bytes {
type EItem = [u8];

fn bytes_encode(item: &'a Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
Ok(Cow::Borrowed(item))
type ReturnBytes = &'a [u8];

type Error = Infallible;

fn bytes_encode(item: &'a Self::EItem) -> Result<Self::ReturnBytes, Self::Error> {
Ok(item)
}
}

Expand All @@ -23,3 +27,26 @@ impl<'a> BytesDecode<'a> for Bytes {
Ok(bytes)
}
}

/// Like [`Bytes`], but always contains exactly `N` (the generic parameter) bytes.
pub enum FixedSizeBytes<const N: usize> {}

impl<'a, const N: usize> BytesEncode<'a> for FixedSizeBytes<N> {
type EItem = [u8; N];

type ReturnBytes = [u8; N]; // TODO &'a [u8; N] or [u8; N]

type Error = Infallible;

fn bytes_encode(item: &'a Self::EItem) -> Result<Self::ReturnBytes, Self::Error> {
Ok(*item)
}
}

impl<'a, const N: usize> BytesDecode<'a> for FixedSizeBytes<N> {
type DItem = [u8; N]; // TODO &'a [u8; N] or [u8; N]

fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
bytes.try_into().map_err(Into::into)
}
}
28 changes: 20 additions & 8 deletions heed-types/src/integer.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use std::borrow::Cow;
use std::convert::Infallible;
use std::marker::PhantomData;
use std::mem::size_of;

Expand All @@ -11,8 +11,12 @@ pub struct U8;
impl BytesEncode<'_> for U8 {
type EItem = u8;

fn bytes_encode(item: &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
Ok(Cow::from([*item].to_vec()))
type ReturnBytes = [u8; 1];

type Error = Infallible;

fn bytes_encode(item: &Self::EItem) -> Result<Self::ReturnBytes, Self::Error> {
Ok([*item])
}
}

Expand All @@ -30,8 +34,12 @@ pub struct I8;
impl BytesEncode<'_> for I8 {
type EItem = i8;

fn bytes_encode(item: &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
Ok(Cow::from([*item as u8].to_vec()))
type ReturnBytes = [u8; 1];

type Error = Infallible;

fn bytes_encode(item: &Self::EItem) -> Result<Self::ReturnBytes, Self::Error> {
Ok([*item as u8])
}
}

Expand All @@ -54,10 +62,14 @@ macro_rules! define_type {
impl<O: ByteOrder> BytesEncode<'_> for $name<O> {
type EItem = $native;

fn bytes_encode(item: &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
let mut buf = vec![0; size_of::<Self::EItem>()];
type ReturnBytes = [u8; size_of::<$native>()];

type Error = Infallible;

fn bytes_encode(item: &Self::EItem) -> Result<Self::ReturnBytes, Self::Error> {
let mut buf = [0; size_of::<$native>()];
O::$write_method(&mut buf, *item);
Ok(Cow::from(buf))
Ok(buf)
}
}

Expand Down
22 changes: 18 additions & 4 deletions heed-types/src/serde_bincode.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
use std::borrow::Cow;

use heed_traits::{BoxedError, BytesDecode, BytesEncode};
use serde::{Deserialize, Serialize};

Expand All @@ -14,8 +12,24 @@ where
{
type EItem = T;

fn bytes_encode(item: &'a Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
bincode::serialize(item).map(Cow::Owned).map_err(Into::into)
type ReturnBytes = Vec<u8>;

type Error = bincode::Error;

fn zero_copy(_item: &Self::EItem) -> bool {
false
}

fn bytes_encode(item: &Self::EItem) -> Result<Self::ReturnBytes, Self::Error> {
bincode::serialize(item)
}

fn bytes_encode_into_writer<W: std::io::Write>(
item: &'a Self::EItem,
writer: W,
) -> Result<(), BoxedError> {
bincode::serialize_into(writer, item)?;
Ok(())
}
}

Expand Down
22 changes: 18 additions & 4 deletions heed-types/src/serde_json.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
use std::borrow::Cow;

use heed_traits::{BoxedError, BytesDecode, BytesEncode};
use serde::{Deserialize, Serialize};

Expand All @@ -14,8 +12,24 @@ where
{
type EItem = T;

fn bytes_encode(item: &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
serde_json::to_vec(item).map(Cow::Owned).map_err(Into::into)
type ReturnBytes = Vec<u8>;

type Error = serde_json::Error;

fn zero_copy(_item: &Self::EItem) -> bool {
false
}

fn bytes_encode(item: &Self::EItem) -> Result<Self::ReturnBytes, Self::Error> {
serde_json::to_vec(item)
}

fn bytes_encode_into_writer<W: std::io::Write>(
item: &'a Self::EItem,
writer: W,
) -> Result<(), BoxedError> {
serde_json::to_writer(writer, item)?;
Ok(())
}
}

Expand Down
22 changes: 18 additions & 4 deletions heed-types/src/serde_rmp.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
use std::borrow::Cow;

use heed_traits::{BoxedError, BytesDecode, BytesEncode};
use serde::{Deserialize, Serialize};

Expand All @@ -14,8 +12,24 @@ where
{
type EItem = T;

fn bytes_encode(item: &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
rmp_serde::to_vec(item).map(Cow::Owned).map_err(Into::into)
type ReturnBytes = Vec<u8>;

type Error = rmp_serde::encode::Error;

fn zero_copy(_item: &Self::EItem) -> bool {
false
}

fn bytes_encode(item: &Self::EItem) -> Result<Self::ReturnBytes, Self::Error> {
rmp_serde::to_vec(item)
}

fn bytes_encode_into_writer<W: std::io::Write>(
item: &'a Self::EItem,
mut writer: W,
) -> Result<(), BoxedError> {
rmp_serde::encode::write(&mut writer, item)?;
Ok(())
}
}

Expand Down
17 changes: 10 additions & 7 deletions heed-types/src/str.rs
Original file line number Diff line number Diff line change
@@ -1,23 +1,26 @@
use std::borrow::Cow;
use std::str;
use std::convert::Infallible;

use heed_traits::{BoxedError, BytesDecode, BytesEncode};

/// Describes a [`prim@str`].
/// Describes a [`str`].
pub enum Str {}

impl BytesEncode<'_> for Str {
impl<'a> BytesEncode<'a> for Str {
type EItem = str;

fn bytes_encode(item: &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
Ok(Cow::Borrowed(item.as_bytes()))
type ReturnBytes = &'a [u8];

type Error = Infallible;

fn bytes_encode(item: &'a Self::EItem) -> Result<Self::ReturnBytes, Self::Error> {
Ok(item.as_bytes())
}
}

impl<'a> BytesDecode<'a> for Str {
type DItem = &'a str;

fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
str::from_utf8(bytes).map_err(Into::into)
std::str::from_utf8(bytes).map_err(Into::into)
}
}
10 changes: 7 additions & 3 deletions heed-types/src/unit.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use std::borrow::Cow;
use std::convert::Infallible;
use std::{error, fmt};

use heed_traits::{BoxedError, BytesDecode, BytesEncode};
Expand All @@ -9,8 +9,12 @@ pub enum Unit {}
impl BytesEncode<'_> for Unit {
type EItem = ();

fn bytes_encode(_item: &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
Ok(Cow::Borrowed(&[]))
type ReturnBytes = [u8; 0];

type Error = Infallible;

fn bytes_encode(&(): &Self::EItem) -> Result<Self::ReturnBytes, Self::Error> {
Ok([])
}
}

Expand Down
35 changes: 21 additions & 14 deletions heed/src/cookbook.rs
Original file line number Diff line number Diff line change
Expand Up @@ -146,15 +146,15 @@
//! to create codecs to encode prefixes when possible instead of using a slice of bytes.
//!
//! ```
//! use std::borrow::Cow;
//! use std::convert::Infallible;
//! use std::error::Error;
//! use std::fs;
//! use std::path::Path;
//!
//! use heed::types::*;
//! use heed::{BoxedError, BytesDecode, BytesEncode, Database, EnvOpenOptions};
//!
//! #[derive(Debug, PartialEq, Eq)]
//! #[derive(Debug, Clone, Copy, PartialEq, Eq)]
//! pub enum Level {
//! Debug,
//! Warn,
Expand All @@ -172,18 +172,20 @@
//! impl<'a> BytesEncode<'a> for LogKeyCodec {
//! type EItem = LogKey;
//!
//! type ReturnBytes = [u8; 5];
//!
//! type Error = Infallible;
//!
//! /// Encodes the u32 timestamp in big endian followed by the log level with a single byte.
//! fn bytes_encode(log: &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
//! let (timestamp_bytes, level_byte) = match log {
//! LogKey { timestamp, level: Level::Debug } => (timestamp.to_be_bytes(), 0),
//! LogKey { timestamp, level: Level::Warn } => (timestamp.to_be_bytes(), 1),
//! LogKey { timestamp, level: Level::Error } => (timestamp.to_be_bytes(), 2),
//! };
//! fn bytes_encode(log: &Self::EItem) -> Result<Self::ReturnBytes, Self::Error> {
//! let mut output = [0; 5];
//!
//! let mut output = Vec::new();
//! output.extend_from_slice(&timestamp_bytes);
//! output.push(level_byte);
//! Ok(Cow::Owned(output))
//! let [timestamp @ .., level] = &mut output;
//!
//! *timestamp = log.timestamp.to_be_bytes();
//! *level = log.level as u8;
//!
//! Ok(output)
//! }
//! }
//!
Expand Down Expand Up @@ -218,9 +220,14 @@
//! impl<'a> BytesEncode<'a> for LogAtHalfTimestampCodec {
//! type EItem = u32;
//!
//! type ReturnBytes = [u8; 2];
//!
//! type Error = Infallible;
//!
//! /// This method encodes only the prefix of the keys in this particular case, the timestamp.
//! fn bytes_encode(half_timestamp: &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
//! Ok(Cow::Owned(half_timestamp.to_be_bytes()[..2].to_vec()))
//! fn bytes_encode(half_timestamp: &Self::EItem) -> Result<Self::ReturnBytes, Self::Error> {
//! let [bytes @ .., _, _] = half_timestamp.to_be_bytes();
//! Ok(bytes)
//! }
//! }
//!
Expand Down
Loading
Loading