From 346c2f7ce9970e1ae7880cb64b0e4bfb287b5f19 Mon Sep 17 00:00:00 2001 From: Piotr Beling Date: Tue, 8 Oct 2024 23:21:59 +0200 Subject: [PATCH] work on GOMap --- csf/src/fp/gocmap/mod.rs | 4 ++-- csf/src/fp/gomap/conf.rs | 18 +++++++++--------- csf/src/fp/gomap/mod.rs | 39 +++++++++++++++++++++++++++++++++++++++ csf/src/fp/kvset.rs | 8 ++++++++ csf/src/fp/map/mod.rs | 6 +----- 5 files changed, 59 insertions(+), 16 deletions(-) diff --git a/csf/src/fp/gocmap/mod.rs b/csf/src/fp/gocmap/mod.rs index 2f6b412..872b7b0 100644 --- a/csf/src/fp/gocmap/mod.rs +++ b/csf/src/fp/gocmap/mod.rs @@ -61,8 +61,8 @@ impl GOCMap, /// Choose the size of each level. - pub level_size_chooser: LSC, + pub level_sizer: LSC, /// Constructs collision solver that decides which collisions are positive, and which are negative. pub collision_solver: CSB, } @@ -23,7 +23,7 @@ pub struct GOMapConf< impl Default for GOMapConf { fn default() -> Self { Self { goconf: Default::default(), - level_size_chooser: Default::default(), + level_sizer: Default::default(), collision_solver: Default::default(), } } } @@ -32,7 +32,7 @@ impl GOMapConf Self { Self { goconf: Default::default(), - level_size_chooser: Default::default(), + level_sizer: Default::default(), collision_solver, } } @@ -42,7 +42,7 @@ impl GOMapConf) -> Self { Self { goconf, - level_size_chooser: Default::default(), + level_sizer: Default::default(), collision_solver: Default::default(), } } @@ -52,7 +52,7 @@ impl GOMapConf, collision_solver: CSB) -> Self { Self { goconf, - level_size_chooser: Default::default(), + level_sizer: Default::default(), collision_solver, } } @@ -68,7 +68,7 @@ impl GOMapConf, TwoToPowe pub fn lsize(level_size_chooser: LSC) -> Self { Self { goconf: Default::default(), - level_size_chooser, + level_sizer: level_size_chooser, collision_solver: Default::default(), } } @@ -78,7 +78,7 @@ impl GOMapConf Self { Self { goconf: Default::default(), - level_size_chooser, + level_sizer: level_size_chooser, collision_solver, } } @@ -86,12 +86,12 @@ impl GOMapConf GOMapConf { pub fn groups_lsize(goconf: GOConf, level_size_chooser: LSC) -> Self { - Self { goconf, level_size_chooser, collision_solver: Default::default() } + Self { goconf, level_sizer: level_size_chooser, collision_solver: Default::default() } } } impl GOMapConf { pub fn groups_lsize_cs(goconf: GOConf, level_size_chooser: LSC, collision_solver: CSB) -> Self { - Self { goconf, level_size_chooser, collision_solver } + Self { goconf, level_sizer: level_size_chooser, collision_solver } } } diff --git a/csf/src/fp/gomap/mod.rs b/csf/src/fp/gomap/mod.rs index ec42fc6..7dab922 100644 --- a/csf/src/fp/gomap/mod.rs +++ b/csf/src/fp/gomap/mod.rs @@ -8,6 +8,11 @@ use std::hash::Hash; mod conf; pub use conf::GOMapConf; +use crate::fp::CollisionSolver; + +use super::kvset::KVSet; +use super::{CollisionSolverBuilder, LevelSizer}; + /// Finger-printing based compressed static function (immutable map) /// that uses group optimization and maps hashable keys to unsigned integer values of given bit-size. /// @@ -82,4 +87,38 @@ impl GOMap { self.get_stats_or_panic(key, &mut ()) } + + + pub fn with_conf_stats(kv: KV, conf: GOMapConf, stats: &mut BS) -> Self + where K: Hash, KV: KVSet, LSC: LevelSizer, CSB: CollisionSolverBuilder, BS: stats::BuildStatsCollector + { + let bits_per_value = kv.bits_per_value(); + let level_sizes = Vec::::new(); + let arrays = Vec::>::new(); + let values_lens = Vec::::new(); + let values = Vec::>::new(); + let groups = Vec::>::new(); + let mut input_size = kv.kv_len(); + let mut level_nr = 0; + while input_size != 0 { + let (level_size_groups, level_size_segments) = conf.goconf.bits_per_group + .level_size_groups_segments(conf.level_sizer.size_segments(&kv) * 64); + stats.level(input_size, level_size_segments * 64); + + let mut collision_solver: ::CollisionSolver = conf.collision_solver.new(level_size_segments, bits_per_value); + kv.process_all_values(|key| conf.goconf.key_index(key, level_nr, level_size_groups as u64, + |_| 0), &mut collision_solver); + let collisions = collision_solver.to_collision_array(); + let mut best_counts = vec![0u32; level_size_groups].into_boxed_slice(); + kv.for_each_key(|key| { + let hash = conf.goconf.hash_builder.hash_one(key, level_nr); + let group = group_nr(hash, level_size_groups as u64); + let bit_nr = conf.goconf.bits_per_group.bit_index_for_seed(hash, 0, group); + if collisions.get_bit(bit_nr) { best_counts[group as usize] += 1; } + }); + let mut best_seeds = conf.goconf.bits_per_seed.new_zeroed_seed_vec(level_size_groups); + + } + todo!() + } } diff --git a/csf/src/fp/kvset.rs b/csf/src/fp/kvset.rs index 7166f59..6cb38a5 100644 --- a/csf/src/fp/kvset.rs +++ b/csf/src/fp/kvset.rs @@ -25,6 +25,14 @@ pub trait KVSet { /// If `self` doesn't remember which keys are retained it uses `retained_hint` to check this. fn for_each_key_value(&self, f: F/*, retained_hint: P*/) where F: FnMut(&K, u8)/*, P: FnMut(&K) -> bool*/; + /// Call `f` for each key in the set, using single thread. + /// + /// If `self` doesn't remember which keys are retained it uses `retained_hint` to check this. + #[inline(always)] + fn for_each_key(&self, mut f: F/*, retained_hint: P*/) where F: FnMut(&K)/*, P: FnMut(&K) -> bool*/ { + self.for_each_key_value(|k, _| f(k)); + } + /// Call `collision_solver.process_value(key_to_index(key), value, self.bits_per_value())` for each `key`-`value` pair. #[inline] fn process_all_values(&self, mut key_to_index: I, collision_solver: &mut CS) diff --git a/csf/src/fp/map/mod.rs b/csf/src/fp/map/mod.rs index 9843fd1..5926092 100644 --- a/csf/src/fp/map/mod.rs +++ b/csf/src/fp/map/mod.rs @@ -131,11 +131,7 @@ impl Map { bits_per_value: u8, construct_partial: bool ) -> Arrays - where K: Hash, - KV: KVSet, - LSC: LevelSizer, - CSB: CollisionSolverBuilder, - BS: stats::BuildStatsCollector + where K: Hash, KV: KVSet, LSC: LevelSizer, CSB: CollisionSolverBuilder, BS: stats::BuildStatsCollector { let mut res = Arrays::default(); let mut input_size = kv.kv_len();