diff --git a/cseq/src/elias_fano.rs b/cseq/src/elias_fano.rs index b9ed2c6..65c6717 100644 --- a/cseq/src/elias_fano.rs +++ b/cseq/src/elias_fano.rs @@ -34,10 +34,11 @@ impl Builder { } impl Builder { - /// Constructs [`Builder`] to build [`Sequence`] with `final_len` values in range [`0`, `universe`). + /// Constructs [`Builder`] to build [`Sequence`] with custom bit vector type and + /// `final_len` values in range [`0`, `universe`). /// After adding values in non-decreasing order by [`Self::push`] method, /// [`Self::finish`] can be called to construct [`Sequence`]. - pub fn new(final_len: usize, universe: u64) -> Self { + pub fn new_b(final_len: usize, universe: u64) -> Self { if final_len == 0 || universe == 0 { return Self { hi: BV::with_64bit_segments(0, 0), lo: Default::default(), bits_per_lo: 0, current_len: 0, target_len: 0, last_added: 0, universe }; } @@ -55,6 +56,15 @@ impl Builder { } } +impl Builder { + /// Constructs [`Builder`] to build [`Sequence`] with `final_len` values in range [`0`, `universe`). + /// After adding values in non-decreasing order by [`Self::push`] method, + /// [`Self::finish`] can be called to construct [`Sequence`]. + #[inline] pub fn new(final_len: usize, universe: u64) -> Self { + Self::new_b(final_len, universe) + } +} + impl> Builder { /// A version of [`Self::push`] without any checks and panic. pub unsafe fn push_unchecked(&mut self, value: u64) { @@ -165,6 +175,8 @@ impl Sequence { /// Returns whether the sequence is empty. #[inline] pub fn is_empty(&self) -> bool { self.len == 0 } + + #[inline] pub fn bits_per_lo(&self) -> u8 { self.bits_per_lo } } impl> Sequence { @@ -307,7 +319,7 @@ impl + Clone>(items: &[I]) -> Self { - let mut b = Builder::::new(items.len(), items.last().map_or(0, |v| v.clone().into()+1)); + let mut b = Builder::::new_b(items.len(), items.last().map_or(0, |v| v.clone().into()+1)); b.push_all(items.iter().map(|v| v.clone().into())); b.finish_unchecked_s() } @@ -426,13 +438,22 @@ impl> Sequence } } -impl Select for Sequence { - #[inline(always)] fn try_select(&self, rank: usize) -> Option { +impl> Select for Sequence { + #[inline] unsafe fn select_unchecked(&self, rank: usize) -> usize { + self.get_unchecked(rank) as usize + } + + #[inline] fn try_select(&self, rank: usize) -> Option { self.get(rank).map(|v| v as usize) } } -impl Rank for Sequence { +impl> Rank for Sequence { + /// Returns the number of `self` items with values less than given `value`. + #[inline] unsafe fn rank_unchecked(&self, value: usize) -> usize { + self.geq_index(value as u64) + } + /// Returns the number of `self` items with values less than given `value`. #[inline] fn try_rank(&self, value: usize) -> Option { Some(self.geq_index(value as u64)) diff --git a/cseq_benchmark/src/bitm.rs b/cseq_benchmark/src/bitm.rs index 66a3fdb..8d96804 100644 --- a/cseq_benchmark/src/bitm.rs +++ b/cseq_benchmark/src/bitm.rs @@ -6,7 +6,7 @@ use crate::{percent_of, percent_of_diff, Conf, Tester}; pub fn build_bit_vec(conf: &Conf) -> (ABox<[u64]>, Tester) { let mut content = ABox::with_zeroed_bits(conf.universe); //let mut content = AVec::from_iter(64, (0..ceiling_div(conf.universe, 64)).map(|_| 0)).into_boxed_slice(); - let tester = conf.rand_data(|bit_nr, value| if value {content.init_bit(bit_nr, value)}); + let tester = conf.fill_data(|bit_nr, value| if value {content.init_bit(bit_nr, value)}); (content, tester) } diff --git a/cseq_benchmark/src/elias_fano.rs b/cseq_benchmark/src/elias_fano.rs index 09a12bc..03d2b78 100644 --- a/cseq_benchmark/src/elias_fano.rs +++ b/cseq_benchmark/src/elias_fano.rs @@ -1,19 +1,27 @@ use cseq::elias_fano; use dyn_size_of::GetSize; -use std::{hint::black_box, time::Instant}; use aligned_vec::ABox; -use elias_fano::{Sequence, DefaultSelectStrategy as S}; +use elias_fano::Builder; +use bitm::{Rank, Select}; pub fn benchmark(conf: &super::Conf) { - let data: Vec = (1..=conf.num).map(|i| (i*conf.universe/conf.num) as u64).collect(); - println!("data contains {} items in the range [0, {})", data.len(), data.last().unwrap()+1); + println!("cseq Elias-Fano"); - let start_moment = Instant::now(); - let ef = Sequence::>::with_items_from_slice_s(&data); - let build_time_seconds = start_moment.elapsed().as_secs_f64(); - println!("size [bits/item]: {:.2}, construction time [μs]: {:.0}", 8.0*ef.size_bytes_dyn() as f64/data.len() as f64, build_time_seconds*1_000_000.0); + let mut builder = Builder::>::new_b(conf.num(), conf.universe as u64); + let tester = conf.add_data(|v| builder.push(v as u64)); - let start_moment = Instant::now(); + //let start_moment = Instant::now(); + //let ef = Sequence::>::with_items_from_slice_s(&data); + //let build_time_seconds = start_moment.elapsed().as_secs_f64(); + let ef = builder.finish(); + println!(" size: {:.2} bits/item {} bits/lo entry", 8.0*ef.size_bytes() as f64/tester.number_of_ones as f64, ef.bits_per_lo()); + + let space_overhead = 100.0 * (8.0*ef.size_bytes() as f64 - conf.universe as f64) / conf.universe as f64; + + tester.raport_rank("cseq Elias-Fano", space_overhead, |i| ef.rank(i)); + tester.raport_select1("cseq Elias-Fano", space_overhead, |i| ef.select(i)); + + /*let start_moment = Instant::now(); for index in 0..data.len() { black_box(ef.get(index)); } @@ -34,5 +42,5 @@ pub fn benchmark(conf: &super::Conf) { assert_eq!(ef.index_of(v), Some(index), "wrong index for value {v}"); } println!("DONE"); - } + }*/ } \ No newline at end of file diff --git a/cseq_benchmark/src/main.rs b/cseq_benchmark/src/main.rs index 7385119..0e5df68 100644 --- a/cseq_benchmark/src/main.rs +++ b/cseq_benchmark/src/main.rs @@ -278,13 +278,17 @@ impl Conf { /// - index in universe in range [0..`self.universe`), /// - whether there is an item (one) at the current position. /// Print statistics about data. Returns tester. - fn rand_data(&self, mut add: F) -> Tester { + fn fill_data(&self, mut add: F) -> Tester { let number_of_ones = self.data_foreach(|index, _, v| add(index, v)); println!(" input: number of bit ones is {} / {} ({:.2}%), {} distribution", number_of_ones, self.universe, percent_of(number_of_ones, self.universe), self.distribution); Tester { conf: self, number_of_ones, rank_includes_current: false } } + #[inline] fn add_data(&self, mut add: F) -> Tester { + self.fill_data(|i, v| if v { add(i) }) + } + /// Either opens or crates (and than put headers inside) and returns the file with given `file_name` (+`csv` extension). fn file(&self, file_name: &str, extra_header: &str) -> Option { if !self.save_details { return None; } diff --git a/cseq_benchmark/src/succinct.rs b/cseq_benchmark/src/succinct.rs index 755f702..5558417 100644 --- a/cseq_benchmark/src/succinct.rs +++ b/cseq_benchmark/src/succinct.rs @@ -3,7 +3,7 @@ use crate::{percent_of_diff, Conf, Tester}; pub fn build_bit_vec(conf: &Conf) -> (BitVector::, Tester) { let mut content = BitVector::with_fill(conf.universe as u64, false); - let tester = conf.rand_data(|bit_nr, value| {content.set_bit(bit_nr as u64, value)}); + let tester = conf.fill_data(|bit_nr, value| {content.set_bit(bit_nr as u64, value)}); (content, tester) } diff --git a/cseq_benchmark/src/sucds.rs b/cseq_benchmark/src/sucds.rs index d01b39d..8fb13c6 100644 --- a/cseq_benchmark/src/sucds.rs +++ b/cseq_benchmark/src/sucds.rs @@ -5,7 +5,7 @@ pub fn benchmark_rank9_select(conf: &Conf) { println!("sucds Rank9Sel:"); let mut content = BitVector::from_bit(false, conf.universe); - let tester = conf.rand_data(|pos, value| + let tester = conf.fill_data(|pos, value| if value { content.set_bit(pos, value).unwrap(); } ); diff --git a/cseq_benchmark/src/sux.rs b/cseq_benchmark/src/sux.rs index 51139f4..c300032 100644 --- a/cseq_benchmark/src/sux.rs +++ b/cseq_benchmark/src/sux.rs @@ -4,7 +4,7 @@ use crate::{percent_of_diff, Conf, Tester}; pub fn build_bit_vec(conf: &Conf) -> (BitVec, Tester) { let mut content = BitVec::new(conf.universe); - let tester = conf.rand_data(|bit_nr, value| content.set(bit_nr, value)); + let tester = conf.fill_data(|bit_nr, value| content.set(bit_nr, value)); (content, tester) } diff --git a/cseq_benchmark/src/vers.rs b/cseq_benchmark/src/vers.rs index dd9cb29..918c8ba 100644 --- a/cseq_benchmark/src/vers.rs +++ b/cseq_benchmark/src/vers.rs @@ -4,7 +4,7 @@ use crate::percent_of_diff; pub fn benchmark_rank_select(conf: &super::Conf) { println!("vers:"); let mut content = BitVec::from_zeros(conf.universe); - let tester = conf.rand_data(|pos, value| if value { content.flip_bit(pos); }); + let tester = conf.fill_data(|pos, value| if value { content.flip_bit(pos); }); let raw_size = content.heap_size(); let rs = RsVec::from_bit_vec(content);