From 5ef16e0eba3e68dcd181cfedf9e8c367f3081763 Mon Sep 17 00:00:00 2001 From: Piotr Beling Date: Fri, 4 Oct 2024 13:06:40 +0200 Subject: [PATCH] work on GOMap --- csf/src/fp/gomap/mod.rs | 32 ++++++++++++++++++++++++++++++-- csf/src/fp/map/mod.rs | 10 +++++----- 2 files changed, 35 insertions(+), 7 deletions(-) diff --git a/csf/src/fp/gomap/mod.rs b/csf/src/fp/gomap/mod.rs index cc6a6c9..84f45fe 100644 --- a/csf/src/fp/gomap/mod.rs +++ b/csf/src/fp/gomap/mod.rs @@ -1,7 +1,9 @@ +use bitm::{BitAccess, Rank}; use ph::{BuildDefaultSeededHasher, BuildSeededHasher, stats, utils::ArrayWithRank}; use ph::fmph::{goindexing::group_nr, GroupSize, SeedSize, TwoToPowerBitsStatic}; pub use ph::fmph::GOConf; use dyn_size_of::GetSize; +use std::hash::Hash; mod conf; pub use conf::GOMapConf; @@ -17,7 +19,7 @@ pub struct GOMap, SS: SeedSize = TwoTo values: Box<[u64]>, // BitVec bits_per_value: u8, group_seeds: Box<[SS::VecElement]>, // Box<[u8]>, - level_size: Box<[u64]>, // number of groups + level_sizes: Box<[u64]>, // number of groups goconf: GOConf, } @@ -26,8 +28,34 @@ impl GetSize for GOMap { self.array.size_bytes_dyn() + self.values.size_bytes_dyn() + self.group_seeds.size_bytes_dyn() - + self.level_size.size_bytes_dyn() + + self.level_sizes.size_bytes_dyn() } const USES_DYN_MEM: bool = true; } +impl GOMap { + /// Gets the value associated with the given `key` and reports statistics to `access_stats`. + pub fn get_stats(&self, key: &K, access_stats: &mut A) -> Option { + let mut groups_before = 0u64; + let mut level_nr = 0u32; + loop { + let level_size_groups = *self.level_sizes.get(level_nr as usize)?; + let hash = self.goconf.hash_builder.hash_one(key, level_nr); + let group = groups_before + group_nr(hash, level_size_groups); + let seed = self.goconf.bits_per_seed.get_seed(&self.group_seeds, group as usize); + let bit_index = self.goconf.bits_per_group.bit_index_for_seed(hash, seed, group); + if self.array.content.get_bit(bit_index) { + access_stats.found_on_level(level_nr); + //return Some(unsafe{self.array.rank_unchecked(bit_index)} as u64); + return Some(self.values.get_fragment(self.array.rank(bit_index), self.bits_per_value) as u8); + } + groups_before += level_size_groups; + level_nr += 1; + } + } + + /// Gets the value associated with the given key k. + pub fn get(&self, k: &K) -> Option { + self.get_stats(k, &mut ()) + } +} diff --git a/csf/src/fp/map/mod.rs b/csf/src/fp/map/mod.rs index cb2bed3..652556f 100644 --- a/csf/src/fp/map/mod.rs +++ b/csf/src/fp/map/mod.rs @@ -23,7 +23,7 @@ pub struct Map { array: ArrayWithRank, values: Box<[u64]>, // BitVec bits_per_value: u8, - level_sizes: Box<[u64]>, + level_sizes: Box<[usize]>, // in 64-bit segments hash: S } @@ -44,7 +44,7 @@ fn index(hash: &H, k: &K, level_nr: u32, level_si #[derive(Default)] struct Arrays { - level_sizes: Vec::, + level_sizes: Vec::, arrays: Vec::>, values_lens: Vec::, values: Vec::> @@ -77,7 +77,7 @@ impl Map { let mut array_begin_index = 0usize; let mut level = 0u32; loop { - let level_size = (*self.level_sizes.get(level as usize)? as usize) << 6usize; + let level_size = *self.level_sizes.get(level as usize)? << 6usize; let i = array_begin_index + index(&self.hash, k, level, level_size); if self.array.content.get_bit(i) { access_stats.found_on_level(level); @@ -140,8 +140,8 @@ impl Map { levels_without_reduction = 0; } - res.arrays.push(current_array); - res.level_sizes.push(level_size_segments as u64); + res.arrays.push(current_array); + res.level_sizes.push(level_size_segments); res.values.push(current_values); res.values_lens.push(current_values_len); level_nr += 1;