Skip to content

Commit

Permalink
work on better EF support
Browse files Browse the repository at this point in the history
  • Loading branch information
beling committed Mar 4, 2024
1 parent aad5bd8 commit 2484cc2
Show file tree
Hide file tree
Showing 8 changed files with 55 additions and 22 deletions.
33 changes: 27 additions & 6 deletions cseq/src/elias_fano.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,11 @@ impl<BV> Builder<BV> {
}

impl<BV: BitVec> Builder<BV> {
/// Constructs [`Builder`] to build [`Sequence`] with `final_len` values in range [`0`, `universe`).
/// Constructs [`Builder`] to build [`Sequence`] with custom bit vector type and
/// `final_len` values in range [`0`, `universe`).
/// After adding values in non-decreasing order by [`Self::push`] method,
/// [`Self::finish`] can be called to construct [`Sequence`].
pub fn new(final_len: usize, universe: u64) -> Self {
pub fn new_b(final_len: usize, universe: u64) -> Self {
if final_len == 0 || universe == 0 {
return Self { hi: BV::with_64bit_segments(0, 0), lo: Default::default(), bits_per_lo: 0, current_len: 0, target_len: 0, last_added: 0, universe };
}
Expand All @@ -55,6 +56,15 @@ impl<BV: BitVec> Builder<BV> {
}
}

impl Builder {
/// Constructs [`Builder`] to build [`Sequence`] with `final_len` values in range [`0`, `universe`).
/// After adding values in non-decreasing order by [`Self::push`] method,
/// [`Self::finish`] can be called to construct [`Sequence`].
#[inline] pub fn new(final_len: usize, universe: u64) -> Self {
Self::new_b(final_len, universe)
}
}

impl<BV: DerefMut<Target = [u64]>> Builder<BV> {
/// A version of [`Self::push`] without any checks and panic.
pub unsafe fn push_unchecked(&mut self, value: u64) {
Expand Down Expand Up @@ -165,6 +175,8 @@ impl<S, S0, BV> Sequence<S, S0, BV> {

/// Returns whether the sequence is empty.
#[inline] pub fn is_empty(&self) -> bool { self.len == 0 }

#[inline] pub fn bits_per_lo(&self) -> u8 { self.bits_per_lo }
}

impl<S, S0, BV: Deref<Target = [u64]>> Sequence<S, S0, BV> {
Expand Down Expand Up @@ -307,7 +319,7 @@ impl<S: SelectForRank101111, S0: Select0ForRank101111, BV: BitVec+DerefMut<Targe
/// Constructs [`Sequence`] with custom select strategy and
/// filled with elements from the `items` slice, which must be in non-decreasing order.
pub fn with_items_from_slice_s<I: Into<u64> + Clone>(items: &[I]) -> Self {
let mut b = Builder::<BV>::new(items.len(), items.last().map_or(0, |v| v.clone().into()+1));
let mut b = Builder::<BV>::new_b(items.len(), items.last().map_or(0, |v| v.clone().into()+1));
b.push_all(items.iter().map(|v| v.clone().into()));
b.finish_unchecked_s()
}
Expand Down Expand Up @@ -426,13 +438,22 @@ impl<S, S0: Select0ForRank101111, BV: Deref<Target = [u64]>> Sequence<S, S0, BV>
}
}

impl<S: SelectForRank101111, S0> Select for Sequence<S, S0> {
#[inline(always)] fn try_select(&self, rank: usize) -> Option<usize> {
impl<S: SelectForRank101111, S0, BV: Deref<Target = [u64]>> Select for Sequence<S, S0, BV> {
#[inline] unsafe fn select_unchecked(&self, rank: usize) -> usize {
self.get_unchecked(rank) as usize
}

#[inline] fn try_select(&self, rank: usize) -> Option<usize> {
self.get(rank).map(|v| v as usize)
}
}

impl<S, S0: Select0ForRank101111> Rank for Sequence<S, S0> {
impl<S, S0: Select0ForRank101111, BV: Deref<Target = [u64]>> Rank for Sequence<S, S0, BV> {
/// Returns the number of `self` items with values less than given `value`.
#[inline] unsafe fn rank_unchecked(&self, value: usize) -> usize {
self.geq_index(value as u64)
}

/// Returns the number of `self` items with values less than given `value`.
#[inline] fn try_rank(&self, value: usize) -> Option<usize> {
Some(self.geq_index(value as u64))
Expand Down
2 changes: 1 addition & 1 deletion cseq_benchmark/src/bitm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ use crate::{percent_of, percent_of_diff, Conf, Tester};
pub fn build_bit_vec(conf: &Conf) -> (ABox<[u64]>, Tester) {
let mut content = ABox::with_zeroed_bits(conf.universe);
//let mut content = AVec::from_iter(64, (0..ceiling_div(conf.universe, 64)).map(|_| 0)).into_boxed_slice();
let tester = conf.rand_data(|bit_nr, value| if value {content.init_bit(bit_nr, value)});
let tester = conf.fill_data(|bit_nr, value| if value {content.init_bit(bit_nr, value)});
(content, tester)
}

Expand Down
28 changes: 18 additions & 10 deletions cseq_benchmark/src/elias_fano.rs
Original file line number Diff line number Diff line change
@@ -1,19 +1,27 @@
use cseq::elias_fano;
use dyn_size_of::GetSize;
use std::{hint::black_box, time::Instant};
use aligned_vec::ABox;
use elias_fano::{Sequence, DefaultSelectStrategy as S};
use elias_fano::Builder;
use bitm::{Rank, Select};

pub fn benchmark(conf: &super::Conf) {
let data: Vec<u64> = (1..=conf.num).map(|i| (i*conf.universe/conf.num) as u64).collect();
println!("data contains {} items in the range [0, {})", data.len(), data.last().unwrap()+1);
println!("cseq Elias-Fano");

let start_moment = Instant::now();
let ef = Sequence::<S, S, ABox<[u64], _>>::with_items_from_slice_s(&data);
let build_time_seconds = start_moment.elapsed().as_secs_f64();
println!("size [bits/item]: {:.2}, construction time [μs]: {:.0}", 8.0*ef.size_bytes_dyn() as f64/data.len() as f64, build_time_seconds*1_000_000.0);
let mut builder = Builder::<ABox<[u64], _>>::new_b(conf.num(), conf.universe as u64);
let tester = conf.add_data(|v| builder.push(v as u64));

let start_moment = Instant::now();
//let start_moment = Instant::now();
//let ef = Sequence::<S, S, ABox<[u64], _>>::with_items_from_slice_s(&data);
//let build_time_seconds = start_moment.elapsed().as_secs_f64();
let ef = builder.finish();
println!(" size: {:.2} bits/item {} bits/lo entry", 8.0*ef.size_bytes() as f64/tester.number_of_ones as f64, ef.bits_per_lo());

let space_overhead = 100.0 * (8.0*ef.size_bytes() as f64 - conf.universe as f64) / conf.universe as f64;

tester.raport_rank("cseq Elias-Fano", space_overhead, |i| ef.rank(i));
tester.raport_select1("cseq Elias-Fano", space_overhead, |i| ef.select(i));

/*let start_moment = Instant::now();
for index in 0..data.len() {
black_box(ef.get(index));
}
Expand All @@ -34,5 +42,5 @@ pub fn benchmark(conf: &super::Conf) {
assert_eq!(ef.index_of(v), Some(index), "wrong index for value {v}");
}
println!("DONE");
}
}*/
}
6 changes: 5 additions & 1 deletion cseq_benchmark/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -278,13 +278,17 @@ impl Conf {
/// - index in universe in range [0..`self.universe`),
/// - whether there is an item (one) at the current position.
/// Print statistics about data. Returns tester.
fn rand_data<F: FnMut(usize, bool)>(&self, mut add: F) -> Tester {
fn fill_data<F: FnMut(usize, bool)>(&self, mut add: F) -> Tester {
let number_of_ones = self.data_foreach(|index, _, v| add(index, v));
println!(" input: number of bit ones is {} / {} ({:.2}%), {} distribution",
number_of_ones, self.universe, percent_of(number_of_ones, self.universe), self.distribution);
Tester { conf: self, number_of_ones, rank_includes_current: false }
}

#[inline] fn add_data<F: FnMut(usize)>(&self, mut add: F) -> Tester {
self.fill_data(|i, v| if v { add(i) })
}

/// Either opens or crates (and than put headers inside) and returns the file with given `file_name` (+`csv` extension).
fn file(&self, file_name: &str, extra_header: &str) -> Option<File> {
if !self.save_details { return None; }
Expand Down
2 changes: 1 addition & 1 deletion cseq_benchmark/src/succinct.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ use crate::{percent_of_diff, Conf, Tester};

pub fn build_bit_vec(conf: &Conf) -> (BitVector::<u64>, Tester) {
let mut content = BitVector::with_fill(conf.universe as u64, false);
let tester = conf.rand_data(|bit_nr, value| {content.set_bit(bit_nr as u64, value)});
let tester = conf.fill_data(|bit_nr, value| {content.set_bit(bit_nr as u64, value)});
(content, tester)
}

Expand Down
2 changes: 1 addition & 1 deletion cseq_benchmark/src/sucds.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ pub fn benchmark_rank9_select(conf: &Conf) {
println!("sucds Rank9Sel:");

let mut content = BitVector::from_bit(false, conf.universe);
let tester = conf.rand_data(|pos, value|
let tester = conf.fill_data(|pos, value|
if value { content.set_bit(pos, value).unwrap(); }
);

Expand Down
2 changes: 1 addition & 1 deletion cseq_benchmark/src/sux.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use crate::{percent_of_diff, Conf, Tester};

pub fn build_bit_vec(conf: &Conf) -> (BitVec, Tester) {
let mut content = BitVec::new(conf.universe);
let tester = conf.rand_data(|bit_nr, value| content.set(bit_nr, value));
let tester = conf.fill_data(|bit_nr, value| content.set(bit_nr, value));
(content, tester)
}

Expand Down
2 changes: 1 addition & 1 deletion cseq_benchmark/src/vers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use crate::percent_of_diff;
pub fn benchmark_rank_select(conf: &super::Conf) {
println!("vers:");
let mut content = BitVec::from_zeros(conf.universe);
let tester = conf.rand_data(|pos, value| if value { content.flip_bit(pos); });
let tester = conf.fill_data(|pos, value| if value { content.flip_bit(pos); });

let raw_size = content.heap_size();
let rs = RsVec::from_bit_vec(content);
Expand Down

0 comments on commit 2484cc2

Please sign in to comment.