From 67b9c8674662182d75fdb694c30728f37da99406 Mon Sep 17 00:00:00 2001 From: oXtxNt9U <120286271+oXtxNt9U@users.noreply.github.com> Date: Wed, 25 Sep 2024 09:30:14 +0900 Subject: [PATCH 1/3] allow setting custom dupsort comparator --- heed/src/database.rs | 30 ++++++++++++++++++++++-------- heed/src/env.rs | 25 ++++++++++++++++++------- heed/src/mdb/lmdb_ffi.rs | 6 +++--- 3 files changed, 43 insertions(+), 18 deletions(-) diff --git a/heed/src/database.rs b/heed/src/database.rs index eecc7f39..ed0b0c21 100644 --- a/heed/src/database.rs +++ b/heed/src/database.rs @@ -55,9 +55,9 @@ use crate::*; /// # Ok(()) } /// ``` #[derive(Debug)] -pub struct DatabaseOpenOptions<'e, 'n, KC, DC, C = DefaultComparator> { +pub struct DatabaseOpenOptions<'e, 'n, KC, DC, C = DefaultComparator, CDUP = DefaultComparator> { env: &'e Env, - types: marker::PhantomData<(KC, DC, C)>, + types: marker::PhantomData<(KC, DC, C, CDUP)>, name: Option<&'n str>, flags: AllDatabaseFlags, } @@ -74,7 +74,7 @@ impl<'e> DatabaseOpenOptions<'e, 'static, Unspecified, Unspecified> { } } -impl<'e, 'n, KC, DC, C> DatabaseOpenOptions<'e, 'n, KC, DC, C> { +impl<'e, 'n, KC, DC, C, CDUP> DatabaseOpenOptions<'e, 'n, KC, DC, C, CDUP> { /// Change the type of the database. /// /// The default types are [`Unspecified`] and require a call to [`Database::remap_types`] @@ -90,7 +90,19 @@ impl<'e, 'n, KC, DC, C> DatabaseOpenOptions<'e, 'n, KC, DC, C> { /// Change the customized key compare function of the database. /// /// By default no customized compare function will be set when opening a database. - pub fn key_comparator(self) -> DatabaseOpenOptions<'e, 'n, KC, DC, NC> { + pub fn key_comparator(self) -> DatabaseOpenOptions<'e, 'n, KC, DC, NC, CDUP> { + DatabaseOpenOptions { + env: self.env, + types: Default::default(), + name: self.name, + flags: self.flags, + } + } + + /// Change the customized dup sort compare function of the database. + /// + /// By default no customized compare function will be set when opening a database. + pub fn dup_sort_comparator(self) -> DatabaseOpenOptions<'e, 'n, KC, DC, C, NCDUP> { DatabaseOpenOptions { env: self.env, types: Default::default(), @@ -136,10 +148,11 @@ impl<'e, 'n, KC, DC, C> DatabaseOpenOptions<'e, 'n, KC, DC, C> { KC: 'static, DC: 'static, C: Comparator + 'static, + CDUP: Comparator + 'static, { assert_eq_env_txn!(self.env, rtxn); - match self.env.raw_init_database::(rtxn.txn.unwrap(), self.name, self.flags) { + match self.env.raw_init_database::(rtxn.txn.unwrap(), self.name, self.flags) { Ok(dbi) => Ok(Some(Database::new(self.env.env_mut_ptr() as _, dbi))), Err(Error::Mdb(e)) if e.not_found() => Ok(None), Err(e) => Err(e), @@ -160,24 +173,25 @@ impl<'e, 'n, KC, DC, C> DatabaseOpenOptions<'e, 'n, KC, DC, C> { KC: 'static, DC: 'static, C: Comparator + 'static, + CDUP: Comparator + 'static, { assert_eq_env_txn!(self.env, wtxn); let flags = self.flags | AllDatabaseFlags::CREATE; - match self.env.raw_init_database::(wtxn.txn.txn.unwrap(), self.name, flags) { + match self.env.raw_init_database::(wtxn.txn.txn.unwrap(), self.name, flags) { Ok(dbi) => Ok(Database::new(self.env.env_mut_ptr() as _, dbi)), Err(e) => Err(e), } } } -impl Clone for DatabaseOpenOptions<'_, '_, KC, DC, C> { +impl Clone for DatabaseOpenOptions<'_, '_, KC, DC, C, CDUP> { fn clone(&self) -> Self { *self } } -impl Copy for DatabaseOpenOptions<'_, '_, KC, DC, C> {} +impl Copy for DatabaseOpenOptions<'_, '_, KC, DC, C, CDUP> {} /// A typed database that accepts only the types it was created with. /// diff --git a/heed/src/env.rs b/heed/src/env.rs index 9d432160..010e1fc5 100644 --- a/heed/src/env.rs +++ b/heed/src/env.rs @@ -606,7 +606,8 @@ impl Env { let rtxn = self.read_txn()?; // Open the main database - let dbi = self.raw_open_dbi::(rtxn.txn.unwrap(), None, 0)?; + let dbi = + self.raw_open_dbi::(rtxn.txn.unwrap(), None, 0)?; // We're going to iterate on the unnamed database let mut cursor = RoCursor::new(&rtxn, dbi)?; @@ -619,9 +620,11 @@ impl Env { let key = String::from_utf8(key.to_vec()).unwrap(); // Calling `ffi::db_stat` on a database instance does not involve key comparison // in LMDB, so it's safe to specify a noop key compare function for it. - if let Ok(dbi) = - self.raw_open_dbi::(rtxn.txn.unwrap(), Some(&key), 0) - { + if let Ok(dbi) = self.raw_open_dbi::( + rtxn.txn.unwrap(), + Some(&key), + 0, + ) { let mut stat = mem::MaybeUninit::uninit(); let mut txn = rtxn.txn.unwrap(); unsafe { mdb_result(ffi::mdb_stat(txn.as_mut(), dbi, stat.as_mut_ptr()))? }; @@ -697,19 +700,19 @@ impl Env { options.create(wtxn) } - pub(crate) fn raw_init_database( + pub(crate) fn raw_init_database( &self, raw_txn: NonNull, name: Option<&str>, flags: AllDatabaseFlags, ) -> Result { - match self.raw_open_dbi::(raw_txn, name, flags.bits()) { + match self.raw_open_dbi::(raw_txn, name, flags.bits()) { Ok(dbi) => Ok(dbi), Err(e) => Err(e.into()), } } - fn raw_open_dbi( + fn raw_open_dbi( &self, mut raw_txn: NonNull, name: Option<&str>, @@ -733,6 +736,14 @@ impl Env { Some(custom_key_cmp_wrapper::), ))?; } + + if TypeId::of::() != TypeId::of::() { + mdb_result(ffi::mdb_set_dupsort( + raw_txn, + dbi, + Some(custom_key_cmp_wrapper::), + ))?; + } }; Ok(dbi) diff --git a/heed/src/mdb/lmdb_ffi.rs b/heed/src/mdb/lmdb_ffi.rs index 966a53e0..ce99b460 100644 --- a/heed/src/mdb/lmdb_ffi.rs +++ b/heed/src/mdb/lmdb_ffi.rs @@ -6,9 +6,9 @@ pub use ffi::{ mdb_env_get_fd, mdb_env_get_flags, mdb_env_get_maxkeysize, mdb_env_info, mdb_env_open, mdb_env_set_flags, mdb_env_set_mapsize, mdb_env_set_maxdbs, mdb_env_set_maxreaders, mdb_env_stat, mdb_env_sync, mdb_filehandle_t, mdb_get, mdb_put, mdb_reader_check, - mdb_set_compare, mdb_stat, mdb_txn_abort, mdb_txn_begin, mdb_txn_commit, mdb_version, - MDB_cursor, MDB_dbi, MDB_env, MDB_stat, MDB_txn, MDB_val, MDB_CP_COMPACT, MDB_CURRENT, - MDB_RDONLY, MDB_RESERVE, + mdb_set_compare, mdb_set_dupsort, mdb_stat, mdb_txn_abort, mdb_txn_begin, mdb_txn_commit, + mdb_version, MDB_cursor, MDB_dbi, MDB_env, MDB_stat, MDB_txn, MDB_val, MDB_CP_COMPACT, + MDB_CURRENT, MDB_RDONLY, MDB_RESERVE, }; use lmdb_master_sys as ffi; From fbf6db25575729e03fc397190d1489be0604d665 Mon Sep 17 00:00:00 2001 From: oXtxNt9U <120286271+oXtxNt9U@users.noreply.github.com> Date: Wed, 25 Sep 2024 09:59:40 +0900 Subject: [PATCH 2/3] custom dup sort example --- heed/examples/custom-dupsort-comparator.rs | 62 ++++++++++++++++++++++ 1 file changed, 62 insertions(+) create mode 100644 heed/examples/custom-dupsort-comparator.rs diff --git a/heed/examples/custom-dupsort-comparator.rs b/heed/examples/custom-dupsort-comparator.rs new file mode 100644 index 00000000..7a380acb --- /dev/null +++ b/heed/examples/custom-dupsort-comparator.rs @@ -0,0 +1,62 @@ +use std::cmp::Ordering; +use std::error::Error; +use std::fs; +use std::path::Path; + +use byteorder::BigEndian; +use heed::{DatabaseFlags, EnvOpenOptions}; +use heed_traits::Comparator; +use heed_types::{Str, U128}; + +enum DescendingIntCmp {} + +impl Comparator for DescendingIntCmp { + fn compare(a: &[u8], b: &[u8]) -> Ordering { + b.cmp(&a) + } +} + +fn main() -> Result<(), Box> { + let env_path = Path::new("target").join("custom-dupsort-cmp.mdb"); + + let _ = fs::remove_dir_all(&env_path); + + fs::create_dir_all(&env_path)?; + let env = unsafe { + EnvOpenOptions::new() + .map_size(10 * 1024 * 1024) // 10MB + .max_dbs(3) + .open(env_path)? + }; + + let mut wtxn = env.write_txn()?; + let db = env + .database_options() + .types::>() + .flags(DatabaseFlags::DUP_SORT) + .dup_sort_comparator::() + .create(&mut wtxn)?; + wtxn.commit()?; + + let mut wtxn = env.write_txn()?; + + // We fill our database with entries. + db.put(&mut wtxn, "1", &1)?; + db.put(&mut wtxn, "1", &2)?; + db.put(&mut wtxn, "1", &3)?; + db.put(&mut wtxn, "2", &4)?; + db.put(&mut wtxn, "1", &5)?; + db.put(&mut wtxn, "0", &0)?; + + // We check that the keys are in lexicographic and values in descending order. + let mut iter = db.iter(&wtxn)?; + assert_eq!(iter.next().transpose()?, Some(("0", 0))); + assert_eq!(iter.next().transpose()?, Some(("1", 5))); + assert_eq!(iter.next().transpose()?, Some(("1", 3))); + assert_eq!(iter.next().transpose()?, Some(("1", 2))); + assert_eq!(iter.next().transpose()?, Some(("1", 1))); + assert_eq!(iter.next().transpose()?, Some(("2", 4))); + drop(iter); + + Ok(()) +} From fd3304247a685a33ea002e2e67f3c7a77a7414dd Mon Sep 17 00:00:00 2001 From: oXtxNt9U <120286271+oXtxNt9U@users.noreply.github.com> Date: Wed, 25 Sep 2024 10:00:34 +0900 Subject: [PATCH 3/3] update cookbook examples --- heed/src/cookbook.rs | 152 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 151 insertions(+), 1 deletion(-) diff --git a/heed/src/cookbook.rs b/heed/src/cookbook.rs index f2cfd905..30affc8e 100644 --- a/heed/src/cookbook.rs +++ b/heed/src/cookbook.rs @@ -5,6 +5,8 @@ //! - [Create Custom and Prefix Codecs](#create-custom-and-prefix-codecs) //! - [Change the Environment Size Dynamically](#change-the-environment-size-dynamically) //! - [Advanced Multithreaded Access of Entries](#advanced-multithreaded-access-of-entries) +//! - [Custom Key Comparator](#custom-key-comparator) +//! - [Custom Dupsort Comparator](#custom-dupsort-comparator) //! //! # Decode Values on Demand //! @@ -445,8 +447,156 @@ //! unsafe impl Sync for ImmutableMap<'_> {} //! ``` //! +//! # Custom Key Comparator +//! +//! LMDB keys are sorted in lexicographic order by default. To change this behavior you can implement a custom [`Comparator`] +//! and provide it when creating the database. +//! +//! Under the hood this translates into a [`mdb_set_compare`] call. +//! +//! ``` +//! use std::cmp::Ordering; +//! use std::error::Error; +//! use std::path::Path; +//! use std::{fs, str}; +//! +//! use heed::EnvOpenOptions; +//! use heed_traits::Comparator; +//! use heed_types::{Str, Unit}; +//! +//! enum StringAsIntCmp {} +//! +//! // This function takes two strings which represent positive numbers, +//! // parses them into i32s and compare the parsed value. +//! // Therefore "-1000" < "-100" must be true even without '0' padding. +//! impl Comparator for StringAsIntCmp { +//! fn compare(a: &[u8], b: &[u8]) -> Ordering { +//! let a: i32 = str::from_utf8(a).unwrap().parse().unwrap(); +//! let b: i32 = str::from_utf8(b).unwrap().parse().unwrap(); +//! a.cmp(&b) +//! } +//! } +//! +//! fn main() -> Result<(), Box> { +//! let env_path = Path::new("target").join("custom-key-cmp.mdb"); +//! +//! let _ = fs::remove_dir_all(&env_path); +//! +//! fs::create_dir_all(&env_path)?; +//! let env = unsafe { +//! EnvOpenOptions::new() +//! .map_size(10 * 1024 * 1024) // 10MB +//! .max_dbs(3) +//! .open(env_path)? +//! }; +//! +//! let mut wtxn = env.write_txn()?; +//! let db = env +//! .database_options() +//! .types::() +//! .key_comparator::() +//! .create(&mut wtxn)?; +//! wtxn.commit()?; +//! +//! let mut wtxn = env.write_txn()?; +//! +//! // We fill our database with entries. +//! db.put(&mut wtxn, "-100000", &())?; +//! db.put(&mut wtxn, "-10000", &())?; +//! db.put(&mut wtxn, "-1000", &())?; +//! db.put(&mut wtxn, "-100", &())?; +//! db.put(&mut wtxn, "100", &())?; +//! +//! // We check that the key are in the right order ("-100" < "-1000" < "-10000"...) +//! let mut iter = db.iter(&wtxn)?; +//! assert_eq!(iter.next().transpose()?, Some(("-100000", ()))); +//! assert_eq!(iter.next().transpose()?, Some(("-10000", ()))); +//! assert_eq!(iter.next().transpose()?, Some(("-1000", ()))); +//! assert_eq!(iter.next().transpose()?, Some(("-100", ()))); +//! assert_eq!(iter.next().transpose()?, Some(("100", ()))); +//! drop(iter); +//! +//! Ok(()) +//! } +//! ``` +//! +//! # Custom Dupsort Comparator +//! +//! When using DUPSORT LMDB sorts values of the same key in lexicographic order by default. To change this behavior you can implement a custom [`Comparator`] +//! and provide it when creating the database. +//! +//! Under the hood this translates into a [`mdb_set_dupsort`] call. +//! +//! ``` +//! use std::cmp::Ordering; +//! use std::error::Error; +//! use std::fs; +//! use std::path::Path; +//! +//! use byteorder::BigEndian; +//! use heed::{DatabaseFlags, EnvOpenOptions}; +//! use heed_traits::Comparator; +//! use heed_types::{Str, U128}; +//! +//! enum DescendingIntCmp {} +//! +//! impl Comparator for DescendingIntCmp { +//! fn compare(a: &[u8], b: &[u8]) -> Ordering { +//! b.cmp(&a) +//! } +//! } +//! +//! fn main() -> Result<(), Box> { +//! let env_path = Path::new("target").join("custom-dupsort-cmp.mdb"); +//! +//! let _ = fs::remove_dir_all(&env_path); +//! +//! fs::create_dir_all(&env_path)?; +//! let env = unsafe { +//! EnvOpenOptions::new() +//! .map_size(10 * 1024 * 1024) // 10MB +//! .max_dbs(3) +//! .open(env_path)? +//! }; +//! +//! let mut wtxn = env.write_txn()?; +//! let db = env +//! .database_options() +//! .types::>() +//! .flags(DatabaseFlags::DUP_SORT) +//! .dup_sort_comparator::() +//! .create(&mut wtxn)?; +//! wtxn.commit()?; +//! +//! let mut wtxn = env.write_txn()?; +//! +//! // We fill our database with entries. +//! db.put(&mut wtxn, "1", &1)?; +//! db.put(&mut wtxn, "1", &2)?; +//! db.put(&mut wtxn, "1", &3)?; +//! db.put(&mut wtxn, "2", &4)?; +//! db.put(&mut wtxn, "1", &5)?; +//! db.put(&mut wtxn, "0", &0)?; +//! +//! // We check that the keys are in lexicographic and values in descending order. +//! let mut iter = db.iter(&wtxn)?; +//! assert_eq!(iter.next().transpose()?, Some(("0", 0))); +//! assert_eq!(iter.next().transpose()?, Some(("1", 5))); +//! assert_eq!(iter.next().transpose()?, Some(("1", 3))); +//! assert_eq!(iter.next().transpose()?, Some(("1", 2))); +//! assert_eq!(iter.next().transpose()?, Some(("1", 1))); +//! assert_eq!(iter.next().transpose()?, Some(("2", 4))); +//! drop(iter); +//! +//! Ok(()) +//! } +//! ``` +//! // To let cargo generate doc links #![allow(unused_imports)] -use crate::{BytesDecode, BytesEncode, Database, EnvOpenOptions}; +use crate::{ + mdb::ffi::mdb_set_compare, mdb::ffi::mdb_set_dupsort, BytesDecode, BytesEncode, Comparator, + Database, EnvOpenOptions, +};