From 4de01fe893d21a8c148b7aea69393585f2a909c3 Mon Sep 17 00:00:00 2001 From: ltdk Date: Thu, 3 Oct 2024 15:47:50 -0400 Subject: [PATCH] Add Tag(u8) newtype in an attempt to stop using *const u8 for everything --- Cargo.toml | 2 +- src/raw/generic.rs | 68 ++++++------ src/raw/mod.rs | 261 ++++++++++++++++++++++++--------------------- src/raw/neon.rs | 54 +++++----- src/raw/sse2.rs | 60 +++++------ 5 files changed, 231 insertions(+), 214 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 34937f5c1..d37c74aec 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,7 +10,7 @@ keywords = ["hash", "no_std", "hashmap", "swisstable"] categories = ["data-structures", "no-std"] exclude = [".github", "/ci/*"] edition = "2021" -rust-version = "1.63.0" +rust-version = "1.65.0" [dependencies] # For the default hasher diff --git a/src/raw/generic.rs b/src/raw/generic.rs index c668b0642..435164479 100644 --- a/src/raw/generic.rs +++ b/src/raw/generic.rs @@ -1,5 +1,5 @@ use super::bitmask::BitMask; -use super::EMPTY; +use super::Tag; use core::{mem, ptr}; // Use the native word size as the group size. Using a 64-bit group size on @@ -24,18 +24,18 @@ cfg_if! { pub(crate) type BitMaskWord = GroupWord; pub(crate) type NonZeroBitMaskWord = NonZeroGroupWord; pub(crate) const BITMASK_STRIDE: usize = 8; -// We only care about the highest bit of each byte for the mask. +// We only care about the highest bit of each tag for the mask. #[allow(clippy::cast_possible_truncation, clippy::unnecessary_cast)] -pub(crate) const BITMASK_MASK: BitMaskWord = 0x8080_8080_8080_8080_u64 as GroupWord; +pub(crate) const BITMASK_MASK: BitMaskWord = u64::from_ne_bytes([Tag::DELETED.0; 8]) as GroupWord; pub(crate) const BITMASK_ITER_MASK: BitMaskWord = !0; -/// Helper function to replicate a byte across a `GroupWord`. +/// Helper function to replicate a tag across a `GroupWord`. #[inline] -fn repeat(byte: u8) -> GroupWord { - GroupWord::from_ne_bytes([byte; Group::WIDTH]) +fn repeat(tag: Tag) -> GroupWord { + GroupWord::from_ne_bytes([tag.0; Group::WIDTH]) } -/// Abstraction over a group of control bytes which can be scanned in +/// Abstraction over a group of control tags which can be scanned in /// parallel. /// /// This implementation uses a word-sized integer. @@ -51,94 +51,94 @@ impl Group { /// Number of bytes in the group. pub(crate) const WIDTH: usize = mem::size_of::(); - /// Returns a full group of empty bytes, suitable for use as the initial + /// Returns a full group of empty tags, suitable for use as the initial /// value for an empty hash table. /// /// This is guaranteed to be aligned to the group size. #[inline] - pub(crate) const fn static_empty() -> &'static [u8; Group::WIDTH] { + pub(crate) const fn static_empty() -> &'static [Tag; Group::WIDTH] { #[repr(C)] - struct AlignedBytes { + struct AlignedTags { _align: [Group; 0], - bytes: [u8; Group::WIDTH], + tags: [Tag; Group::WIDTH], } - const ALIGNED_BYTES: AlignedBytes = AlignedBytes { + const ALIGNED_TAGS: AlignedTags = AlignedTags { _align: [], - bytes: [EMPTY; Group::WIDTH], + tags: [Tag::EMPTY; Group::WIDTH], }; - &ALIGNED_BYTES.bytes + &ALIGNED_TAGS.tags } - /// Loads a group of bytes starting at the given address. + /// Loads a group of tags starting at the given address. #[inline] #[allow(clippy::cast_ptr_alignment)] // unaligned load - pub(crate) unsafe fn load(ptr: *const u8) -> Self { + pub(crate) unsafe fn load(ptr: *const Tag) -> Self { Group(ptr::read_unaligned(ptr.cast())) } - /// Loads a group of bytes starting at the given address, which must be + /// Loads a group of tags starting at the given address, which must be /// aligned to `mem::align_of::()`. #[inline] #[allow(clippy::cast_ptr_alignment)] - pub(crate) unsafe fn load_aligned(ptr: *const u8) -> Self { + pub(crate) unsafe fn load_aligned(ptr: *const Tag) -> Self { // FIXME: use align_offset once it stabilizes debug_assert_eq!(ptr as usize & (mem::align_of::() - 1), 0); Group(ptr::read(ptr.cast())) } - /// Stores the group of bytes to the given address, which must be + /// Stores the group of tags to the given address, which must be /// aligned to `mem::align_of::()`. #[inline] #[allow(clippy::cast_ptr_alignment)] - pub(crate) unsafe fn store_aligned(self, ptr: *mut u8) { + pub(crate) unsafe fn store_aligned(self, ptr: *mut Tag) { // FIXME: use align_offset once it stabilizes debug_assert_eq!(ptr as usize & (mem::align_of::() - 1), 0); ptr::write(ptr.cast(), self.0); } - /// Returns a `BitMask` indicating all bytes in the group which *may* + /// Returns a `BitMask` indicating all tags in the group which *may* /// have the given value. /// /// This function may return a false positive in certain cases where - /// the byte in the group differs from the searched value only in its + /// the tag in the group differs from the searched value only in its /// lowest bit. This is fine because: /// - This never happens for `EMPTY` and `DELETED`, only full entries. /// - The check for key equality will catch these. /// - This only happens if there is at least 1 true match. /// - The chance of this happening is very low (< 1% chance per byte). #[inline] - pub(crate) fn match_byte(self, byte: u8) -> BitMask { + pub(crate) fn match_tag(self, tag: Tag) -> BitMask { // This algorithm is derived from // https://graphics.stanford.edu/~seander/bithacks.html##ValueInWord - let cmp = self.0 ^ repeat(byte); - BitMask((cmp.wrapping_sub(repeat(0x01)) & !cmp & repeat(0x80)).to_le()) + let cmp = self.0 ^ repeat(tag); + BitMask((cmp.wrapping_sub(repeat(Tag(0x01))) & !cmp & repeat(Tag::DELETED)).to_le()) } - /// Returns a `BitMask` indicating all bytes in the group which are + /// Returns a `BitMask` indicating all tags in the group which are /// `EMPTY`. #[inline] pub(crate) fn match_empty(self) -> BitMask { - // If the high bit is set, then the byte must be either: + // If the high bit is set, then the tag must be either: // 1111_1111 (EMPTY) or 1000_0000 (DELETED). // So we can just check if the top two bits are 1 by ANDing them. - BitMask((self.0 & (self.0 << 1) & repeat(0x80)).to_le()) + BitMask((self.0 & (self.0 << 1) & repeat(Tag::DELETED)).to_le()) } - /// Returns a `BitMask` indicating all bytes in the group which are + /// Returns a `BitMask` indicating all tags in the group which are /// `EMPTY` or `DELETED`. #[inline] pub(crate) fn match_empty_or_deleted(self) -> BitMask { - // A byte is EMPTY or DELETED iff the high bit is set - BitMask((self.0 & repeat(0x80)).to_le()) + // A tag is EMPTY or DELETED iff the high bit is set + BitMask((self.0 & repeat(Tag::DELETED)).to_le()) } - /// Returns a `BitMask` indicating all bytes in the group which are full. + /// Returns a `BitMask` indicating all tags in the group which are full. #[inline] pub(crate) fn match_full(self) -> BitMask { self.match_empty_or_deleted().invert() } - /// Performs the following transformation on all bytes in the group: + /// Performs the following transformation on all tags in the group: /// - `EMPTY => EMPTY` /// - `DELETED => EMPTY` /// - `FULL => DELETED` @@ -151,7 +151,7 @@ impl Group { // let full = 1000_0000 (true) or 0000_0000 (false) // !1000_0000 + 1 = 0111_1111 + 1 = 1000_0000 (no carry) // !0000_0000 + 0 = 1111_1111 + 0 = 1111_1111 (no carry) - let full = !self.0 & repeat(0x80); + let full = !self.0 & repeat(Tag::DELETED); Group(!full + (full >> 7)) } } diff --git a/src/raw/mod.rs b/src/raw/mod.rs index 495557ad4..58369571d 100644 --- a/src/raw/mod.rs +++ b/src/raw/mod.rs @@ -102,29 +102,54 @@ trait SizedTypeProperties: Sized { impl SizedTypeProperties for T {} -/// Control byte value for an empty bucket. -const EMPTY: u8 = 0b1111_1111; +/// Single tag in a control group. +#[derive(Copy, Clone, PartialEq, Eq, Debug)] +#[repr(transparent)] +struct Tag(u8); +impl Tag { + /// Control tag value for an empty bucket. + const EMPTY: Tag = Tag(0b1111_1111); + + /// Control tag value for a deleted bucket. + const DELETED: Tag = Tag(0b1000_0000); + + /// Checks whether a control tag represents a full bucket (top bit is clear). + #[inline] + const fn is_full(self) -> bool { + self.0 & 0x80 == 0 + } -/// Control byte value for a deleted bucket. -const DELETED: u8 = 0b1000_0000; + /// Checks whether a control tag represents a special value (top bit is set). + #[inline] + const fn is_special(self) -> bool { + self.0 & 0x80 != 0 + } -/// Checks whether a control byte represents a full bucket (top bit is clear). -#[inline] -fn is_full(ctrl: u8) -> bool { - ctrl & 0x80 == 0 -} + /// Checks whether a special control value is EMPTY (just check 1 bit). + #[inline] + const fn special_is_empty(self) -> bool { + debug_assert!(self.is_special()); + self.0 & 0x01 != 0 + } -/// Checks whether a control byte represents a special value (top bit is set). -#[inline] -fn is_special(ctrl: u8) -> bool { - ctrl & 0x80 != 0 -} + /// Creates a control tag representing a full bucket with the given hash. + #[inline] + #[allow(clippy::cast_possible_truncation)] + const fn full(hash: u64) -> Tag { + // Constant for function that grabs the top 7 bits of the hash. + const MIN_HASH_LEN: usize = if mem::size_of::() < mem::size_of::() { + mem::size_of::() + } else { + mem::size_of::() + }; -/// Checks whether a special control value is EMPTY (just check 1 bit). -#[inline] -fn special_is_empty(ctrl: u8) -> bool { - debug_assert!(is_special(ctrl)); - ctrl & 0x01 != 0 + // Grab the top 7 bits of the hash. While the hash is normally a full 64-bit + // value, some hash functions (such as FxHash) produce a usize result + // instead, which means that the top 32 bits are 0 on 32-bit platforms. + // So we use MIN_HASH_LEN constant to handle this. + let top7 = hash >> (MIN_HASH_LEN * 8 - 7); + Tag((top7 & 0x7f) as u8) // truncation + } } /// Primary hash function, used to select the initial bucket to probe from. @@ -135,25 +160,6 @@ fn h1(hash: u64) -> usize { hash as usize } -// Constant for h2 function that grabs the top 7 bits of the hash. -const MIN_HASH_LEN: usize = if mem::size_of::() < mem::size_of::() { - mem::size_of::() -} else { - mem::size_of::() -}; - -/// Secondary hash function, saved in the low 7 bits of the control byte. -#[inline] -#[allow(clippy::cast_possible_truncation)] -fn h2(hash: u64) -> u8 { - // Grab the top 7 bits of the hash. While the hash is normally a full 64-bit - // value, some hash functions (such as FxHash) produce a usize result - // instead, which means that the top 32 bits are 0 on 32-bit platforms. - // So we use MIN_HASH_LEN constant to handle this. - let top7 = hash >> (MIN_HASH_LEN * 8 - 7); - (top7 & 0x7f) as u8 // truncation -} - /// Probe sequence based on triangular numbers, which is guaranteed (since our /// table size is a power of two) to visit every group of elements exactly once. /// @@ -1124,7 +1130,7 @@ impl RawTable { // SAFETY: The function is guaranteed to return [`InsertSlot`] that contains an index // in the range `0..=self.buckets()`. let old_ctrl = *self.table.ctrl(slot.index); - if unlikely(self.table.growth_left == 0 && special_is_empty(old_ctrl)) { + if unlikely(self.table.growth_left == 0 && old_ctrl.special_is_empty()) { self.reserve(1, hasher); // SAFETY: We know for sure that `RawTableInner` has control bytes // initialized and that there is extra space in the table. @@ -1156,7 +1162,7 @@ impl RawTable { // If we are replacing a DELETED entry then we don't need to update // the load counter. - self.table.growth_left -= special_is_empty(old_ctrl) as usize; + self.table.growth_left -= old_ctrl.special_is_empty() as usize; bucket.write(value); self.table.items += 1; @@ -1457,7 +1463,7 @@ impl RawTable { None => unsafe { hint::unreachable_unchecked() }, }; Some(( - unsafe { NonNull::new_unchecked(self.table.ctrl.as_ptr().sub(ctrl_offset)) }, + unsafe { NonNull::new_unchecked(self.table.ctrl.as_ptr().sub(ctrl_offset).cast()) }, layout, unsafe { ptr::read(&self.alloc) }, )) @@ -1492,7 +1498,9 @@ impl RawTableInner { const fn new() -> Self { Self { // Be careful to cast the entire slice to a raw pointer. - ctrl: unsafe { NonNull::new_unchecked(Group::static_empty() as *const _ as *mut u8) }, + ctrl: unsafe { + NonNull::new_unchecked(Group::static_empty().as_ptr().cast_mut().cast()) + }, bucket_mask: 0, items: 0, growth_left: 0, @@ -1508,7 +1516,7 @@ impl RawTableInner { /// /// The caller of this function must ensure that the `buckets` is power of two /// and also initialize all control bytes of the length `self.bucket_mask + 1 + - /// Group::WIDTH` with the [`EMPTY`] bytes. + /// Group::WIDTH` with the [`Tag::EMPTY`] bytes. /// /// See also [`Allocator`] API for other safety concerns. /// @@ -1549,7 +1557,7 @@ impl RawTableInner { /// Attempts to allocate a new [`RawTableInner`] with at least enough /// capacity for inserting the given number of elements without reallocating. /// - /// All the control bytes are initialized with the [`EMPTY`] bytes. + /// All the control bytes are initialized with the [`Tag::EMPTY`] bytes. #[inline] fn fallible_with_capacity( alloc: &A, @@ -1564,7 +1572,7 @@ impl RawTableInner { Ok(Self::NEW) } else { // SAFETY: We checked that we could successfully allocate the new table, and then - // initialized all control bytes with the constant `EMPTY` byte. + // initialized all control bytes with the constant `Tag::EMPTY` byte. unsafe { let buckets = capacity_to_buckets(capacity).ok_or_else(|| fallibility.capacity_overflow())?; @@ -1573,7 +1581,9 @@ impl RawTableInner { // SAFETY: We checked that the table is allocated and therefore the table already has // `self.bucket_mask + 1 + Group::WIDTH` number of control bytes (see TableLayout::calculate_layout_for) // so writing `self.num_ctrl_bytes() == bucket_mask + 1 + Group::WIDTH` bytes is safe. - result.ctrl(0).write_bytes(EMPTY, result.num_ctrl_bytes()); + result + .ctrl(0) + .write_bytes(Tag::EMPTY.0, result.num_ctrl_bytes()); Ok(result) } @@ -1587,7 +1597,7 @@ impl RawTableInner { /// in case of allocation error. Use [`fallible_with_capacity`] instead if you want to /// handle memory allocation failure. /// - /// All the control bytes are initialized with the [`EMPTY`] bytes. + /// All the control bytes are initialized with the [`Tag::EMPTY`] bytes. /// /// [`fallible_with_capacity`]: RawTableInner::fallible_with_capacity /// [`abort`]: https://doc.rust-lang.org/alloc/alloc/fn.handle_alloc_error.html @@ -1606,13 +1616,13 @@ impl RawTableInner { /// Fixes up an insertion slot returned by the [`RawTableInner::find_insert_slot_in_group`] method. /// /// In tables smaller than the group width (`self.buckets() < Group::WIDTH`), trailing control - /// bytes outside the range of the table are filled with [`EMPTY`] entries. These will unfortunately + /// bytes outside the range of the table are filled with [`Tag::EMPTY`] entries. These will unfortunately /// trigger a match of [`RawTableInner::find_insert_slot_in_group`] function. This is because /// the `Some(bit)` returned by `group.match_empty_or_deleted().lowest_set_bit()` after masking /// (`(probe_seq.pos + bit) & self.bucket_mask`) may point to a full bucket that is already occupied. /// We detect this situation here and perform a second scan starting at the beginning of the table. /// This second scan is guaranteed to find an empty slot (due to the load factor) before hitting the - /// trailing control bytes (containing [`EMPTY`] bytes). + /// trailing control bytes (containing [`Tag::EMPTY`] bytes). /// /// If this function is called correctly, it is guaranteed to return [`InsertSlot`] with an /// index of an empty or deleted bucket in the range `0..self.buckets()` (see `Warning` and @@ -1728,7 +1738,7 @@ impl RawTableInner { /// Attempt to write data at the [`InsertSlot`] returned by this function when the table is /// less than the group width and if there was not at least one empty or deleted bucket in /// the table will cause immediate [`undefined behavior`]. This is because in this case the - /// function will return `self.bucket_mask + 1` as an index due to the trailing [`EMPTY`] + /// function will return `self.bucket_mask + 1` as an index due to the trailing [`Tag::EMPTY`] /// control bytes outside the table range. /// /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html @@ -1740,7 +1750,7 @@ impl RawTableInner { ) -> Result { let mut insert_slot = None; - let h2_hash = h2(hash); + let tag_hash = Tag::full(hash); let mut probe_seq = self.probe_seq(hash); loop { @@ -1761,7 +1771,7 @@ impl RawTableInner { // bytes, which is safe (see RawTableInner::new). let group = unsafe { Group::load(self.ctrl(probe_seq.pos)) }; - for bit in group.match_byte(h2_hash) { + for bit in group.match_tag(tag_hash) { let index = (probe_seq.pos + bit) & self.bucket_mask; if likely(eq(index)) { @@ -1816,7 +1826,7 @@ impl RawTableInner { /// # Safety /// /// The safety rules are directly derived from the safety rules for the - /// [`RawTableInner::set_ctrl_h2`] and [`RawTableInner::find_insert_slot`] methods. + /// [`RawTableInner::set_ctrl_hash`] and [`RawTableInner::find_insert_slot`] methods. /// Thus, in order to uphold the safety contracts for that methods, as well as for /// the correct logic of the work of this crate, you must observe the following rules /// when calling this function: @@ -1831,12 +1841,12 @@ impl RawTableInner { /// Attempt to write data at the `index` returned by this function when the table is /// less than the group width and if there was not at least one empty or deleted bucket in /// the table will cause immediate [`undefined behavior`]. This is because in this case the - /// function will return `self.bucket_mask + 1` as an index due to the trailing [`EMPTY`] + /// function will return `self.bucket_mask + 1` as an index due to the trailing [`Tag::EMPTY`] /// control bytes outside the table range. /// /// The caller must independently increase the `items` field of the table, and also, - /// if the old control byte was [`EMPTY`], then decrease the table's `growth_left` - /// field, and do not change it if the old control byte was [`DELETED`]. + /// if the old control byte was [`Tag::EMPTY`], then decrease the table's `growth_left` + /// field, and do not change it if the old control byte was [`Tag::DELETED`]. /// /// See also [`Bucket::as_ptr`] method, for more information about of properly removing /// or saving `element` from / into the [`RawTable`] / [`RawTableInner`]. @@ -1844,10 +1854,10 @@ impl RawTableInner { /// [`Bucket::as_ptr`]: Bucket::as_ptr /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html /// [`RawTableInner::ctrl`]: RawTableInner::ctrl - /// [`RawTableInner::set_ctrl_h2`]: RawTableInner::set_ctrl_h2 + /// [`RawTableInner::set_ctrl_hash`]: RawTableInner::set_ctrl_hash /// [`RawTableInner::find_insert_slot`]: RawTableInner::find_insert_slot #[inline] - unsafe fn prepare_insert_slot(&mut self, hash: u64) -> (usize, u8) { + unsafe fn prepare_insert_slot(&mut self, hash: u64) -> (usize, Tag) { // SAFETY: Caller of this function ensures that the control bytes are properly initialized. let index: usize = self.find_insert_slot(hash).index; // SAFETY: @@ -1857,7 +1867,7 @@ impl RawTableInner { // 2. The caller of this function guarantees that the table has already been // allocated let old_ctrl = *self.ctrl(index); - self.set_ctrl_h2(index, hash); + self.set_ctrl_hash(index, hash); (index, old_ctrl) } @@ -1885,7 +1895,7 @@ impl RawTableInner { /// Attempt to write data at the [`InsertSlot`] returned by this function when the table is /// less than the group width and if there was not at least one empty or deleted bucket in /// the table will cause immediate [`undefined behavior`]. This is because in this case the - /// function will return `self.bucket_mask + 1` as an index due to the trailing [`EMPTY`] + /// function will return `self.bucket_mask + 1` as an index due to the trailing [`Tag::EMPTY`] /// control bytes outside the table range. /// /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html @@ -1948,7 +1958,7 @@ impl RawTableInner { /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html #[inline(always)] unsafe fn find_inner(&self, hash: u64, eq: &mut dyn FnMut(usize) -> bool) -> Option { - let h2_hash = h2(hash); + let tag_hash = Tag::full(hash); let mut probe_seq = self.probe_seq(hash); loop { @@ -1968,7 +1978,7 @@ impl RawTableInner { // bytes, which is safe (see RawTableInner::new_in). let group = unsafe { Group::load(self.ctrl(probe_seq.pos)) }; - for bit in group.match_byte(h2_hash) { + for bit in group.match_tag(tag_hash) { // This is the same as `(probe_seq.pos + bit) % self.buckets()` because the number // of buckets is a power of two, and `self.bucket_mask = self.buckets() - 1`. let index = (probe_seq.pos + bit) & self.bucket_mask; @@ -1987,12 +1997,12 @@ impl RawTableInner { } /// Prepares for rehashing data in place (that is, without allocating new memory). - /// Converts all full index `control bytes` to `DELETED` and all `DELETED` control - /// bytes to `EMPTY`, i.e. performs the following conversion: + /// Converts all full index `control bytes` to `Tag::DELETED` and all `Tag::DELETED` control + /// bytes to `Tag::EMPTY`, i.e. performs the following conversion: /// - /// - `EMPTY` control bytes -> `EMPTY`; - /// - `DELETED` control bytes -> `EMPTY`; - /// - `FULL` control bytes -> `DELETED`. + /// - `Tag::EMPTY` control bytes -> `Tag::EMPTY`; + /// - `Tag::DELETED` control bytes -> `Tag::EMPTY`; + /// - `FULL` control bytes -> `Tag::DELETED`. /// /// This function does not make any changes to the `data` parts of the table, /// or any changes to the `items` or `growth_left` field of the table. @@ -2003,7 +2013,7 @@ impl RawTableInner { /// /// * The [`RawTableInner`] has already been allocated; /// - /// * The caller of this function must convert the `DELETED` bytes back to `FULL` + /// * The caller of this function must convert the `Tag::DELETED` bytes back to `FULL` /// bytes when re-inserting them into their ideal position (which was impossible /// to do during the first insert due to tombstones). If the caller does not do /// this, then calling this function may result in a memory leak. @@ -2396,9 +2406,9 @@ impl RawTableInner { } #[inline] - unsafe fn record_item_insert_at(&mut self, index: usize, old_ctrl: u8, hash: u64) { - self.growth_left -= usize::from(special_is_empty(old_ctrl)); - self.set_ctrl_h2(index, hash); + unsafe fn record_item_insert_at(&mut self, index: usize, old_ctrl: Tag, hash: u64) { + self.growth_left -= usize::from(old_ctrl.special_is_empty()); + self.set_ctrl_hash(index, hash); self.items += 1; } @@ -2438,9 +2448,9 @@ impl RawTableInner { /// [`Bucket::as_ptr`]: Bucket::as_ptr /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html #[inline] - unsafe fn set_ctrl_h2(&mut self, index: usize, hash: u64) { - // SAFETY: The caller must uphold the safety rules for the [`RawTableInner::set_ctrl_h2`] - self.set_ctrl(index, h2(hash)); + unsafe fn set_ctrl_hash(&mut self, index: usize, hash: u64) { + // SAFETY: The caller must uphold the safety rules for the [`RawTableInner::set_ctrl_hash`] + self.set_ctrl(index, Tag::full(hash)); } /// Replaces the hash in the control byte at the given index with the provided one, @@ -2452,7 +2462,7 @@ impl RawTableInner { /// /// # Safety /// - /// The safety rules are directly derived from the safety rules for [`RawTableInner::set_ctrl_h2`] + /// The safety rules are directly derived from the safety rules for [`RawTableInner::set_ctrl_hash`] /// and [`RawTableInner::ctrl`] methods. Thus, in order to uphold the safety contracts for both /// methods, you must observe the following rules when calling this function: /// @@ -2467,15 +2477,15 @@ impl RawTableInner { /// See also [`Bucket::as_ptr`] method, for more information about of properly removing /// or saving `data element` from / into the [`RawTable`] / [`RawTableInner`]. /// - /// [`RawTableInner::set_ctrl_h2`]: RawTableInner::set_ctrl_h2 + /// [`RawTableInner::set_ctrl_hash`]: RawTableInner::set_ctrl_hash /// [`RawTableInner::buckets`]: RawTableInner::buckets /// [`Bucket::as_ptr`]: Bucket::as_ptr /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html #[inline] - unsafe fn replace_ctrl_h2(&mut self, index: usize, hash: u64) -> u8 { - // SAFETY: The caller must uphold the safety rules for the [`RawTableInner::replace_ctrl_h2`] + unsafe fn replace_ctrl_hash(&mut self, index: usize, hash: u64) -> Tag { + // SAFETY: The caller must uphold the safety rules for the [`RawTableInner::replace_ctrl_hash`] let prev_ctrl = *self.ctrl(index); - self.set_ctrl_h2(index, hash); + self.set_ctrl_hash(index, hash); prev_ctrl } @@ -2504,7 +2514,7 @@ impl RawTableInner { /// [`Bucket::as_ptr`]: Bucket::as_ptr /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html #[inline] - unsafe fn set_ctrl(&mut self, index: usize, ctrl: u8) { + unsafe fn set_ctrl(&mut self, index: usize, ctrl: Tag) { // Replicate the first Group::WIDTH control bytes at the end of // the array without using a branch. If the tables smaller than // the group width (self.buckets() < Group::WIDTH), @@ -2524,7 +2534,7 @@ impl RawTableInner { // // Real | Replicated // --------------------------------------------- - // | [A] | [B] | [EMPTY] | [EMPTY] | [A] | [B] | + // | [A] | [B] | [Tag::EMPTY] | [EMPTY] | [A] | [B] | // --------------------------------------------- // This is the same as `(index.wrapping_sub(Group::WIDTH)) % self.buckets() + Group::WIDTH` @@ -2560,10 +2570,10 @@ impl RawTableInner { /// [`Bucket::as_ptr()`]: Bucket::as_ptr() /// [`Undefined Behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html #[inline] - unsafe fn ctrl(&self, index: usize) -> *mut u8 { + unsafe fn ctrl(&self, index: usize) -> *mut Tag { debug_assert!(index < self.num_ctrl_bytes()); // SAFETY: The caller must uphold the safety rules for the [`RawTableInner::ctrl`] - self.ctrl.as_ptr().add(index) + self.ctrl.as_ptr().add(index).cast() } #[inline] @@ -2579,7 +2589,7 @@ impl RawTableInner { #[inline] unsafe fn is_bucket_full(&self, index: usize) -> bool { debug_assert!(index < self.buckets()); - is_full(*self.ctrl(index)) + (*self.ctrl(index)).is_full() } #[inline] @@ -2759,12 +2769,14 @@ impl RawTableInner { // // where: T0...T_n - our stored data; // CT0...CT_n - control bytes or metadata for `data`. - let ctrl = NonNull::new_unchecked(self.ctrl(0)); + let ctrl = NonNull::new_unchecked(self.ctrl(0).cast::()); FullBucketsIndices { // Load the first group // SAFETY: See explanation above. - current_group: Group::load_aligned(ctrl.as_ptr()).match_full().into_iter(), + current_group: Group::load_aligned(ctrl.as_ptr().cast()) + .match_full() + .into_iter(), group_first_index: 0, ctrl, items: self.items, @@ -2925,8 +2937,8 @@ impl RawTableInner { let mut guard = guard(self, move |self_| { if let Some(drop) = drop { for i in 0..self_.buckets() { - if *self_.ctrl(i) == DELETED { - self_.set_ctrl(i, EMPTY); + if *self_.ctrl(i) == Tag::DELETED { + self_.set_ctrl(i, Tag::EMPTY); drop(self_.bucket_ptr(i, size_of)); self_.items -= 1; } @@ -2939,7 +2951,7 @@ impl RawTableInner { // rehashed yet. Find them and re-insert them at their ideal // position. 'outer: for i in 0..guard.buckets() { - if *guard.ctrl(i) != DELETED { + if *guard.ctrl(i) != Tag::DELETED { continue; } @@ -2961,7 +2973,7 @@ impl RawTableInner { // same unaligned group, then there is no benefit in moving // it and we can just continue to the next item. if likely(guard.is_in_same_group(i, new_i, hash)) { - guard.set_ctrl_h2(i, hash); + guard.set_ctrl_hash(i, hash); continue 'outer; } @@ -2969,9 +2981,9 @@ impl RawTableInner { // We are moving the current item to a new position. Write // our H2 to the control byte of the new position. - let prev_ctrl = guard.replace_ctrl_h2(new_i, hash); - if prev_ctrl == EMPTY { - guard.set_ctrl(i, EMPTY); + let prev_ctrl = guard.replace_ctrl_hash(new_i, hash); + if prev_ctrl == Tag::EMPTY { + guard.set_ctrl(i, Tag::EMPTY); // If the target slot is empty, simply move the current // element into the new slot and clear the old control // byte. @@ -2981,7 +2993,7 @@ impl RawTableInner { // If the target slot is occupied, swap the two elements // and then continue processing the element that we just // swapped into the old slot. - debug_assert_eq!(prev_ctrl, DELETED); + debug_assert_eq!(prev_ctrl, Tag::DELETED); ptr::swap_nonoverlapping(i_p, new_i_p, size_of); continue 'inner; } @@ -3100,7 +3112,8 @@ impl RawTableInner { fn clear_no_drop(&mut self) { if !self.is_empty_singleton() { unsafe { - self.ctrl(0).write_bytes(EMPTY, self.num_ctrl_bytes()); + self.ctrl(0) + .write_bytes(Tag::EMPTY.0, self.num_ctrl_bytes()); } } self.items = 0; @@ -3155,45 +3168,45 @@ impl RawTableInner { // Inserting and searching in the map is performed by two key functions: // - // - The `find_insert_slot` function that looks up the index of any `EMPTY` or `DELETED` - // slot in a group to be able to insert. If it doesn't find an `EMPTY` or `DELETED` + // - The `find_insert_slot` function that looks up the index of any `Tag::EMPTY` or `Tag::DELETED` + // slot in a group to be able to insert. If it doesn't find an `Tag::EMPTY` or `Tag::DELETED` // slot immediately in the first group, it jumps to the next `Group` looking for it, // and so on until it has gone through all the groups in the control bytes. // // - The `find_inner` function that looks for the index of the desired element by looking // at all the `FULL` bytes in the group. If it did not find the element right away, and - // there is no `EMPTY` byte in the group, then this means that the `find_insert_slot` + // there is no `Tag::EMPTY` byte in the group, then this means that the `find_insert_slot` // function may have found a suitable slot in the next group. Therefore, `find_inner` - // jumps further, and if it does not find the desired element and again there is no `EMPTY` + // jumps further, and if it does not find the desired element and again there is no `Tag::EMPTY` // byte, then it jumps further, and so on. The search stops only if `find_inner` function - // finds the desired element or hits an `EMPTY` slot/byte. + // finds the desired element or hits an `Tag::EMPTY` slot/byte. // // Accordingly, this leads to two consequences: // - // - The map must have `EMPTY` slots (bytes); + // - The map must have `Tag::EMPTY` slots (bytes); // - // - You can't just mark the byte to be erased as `EMPTY`, because otherwise the `find_inner` - // function may stumble upon an `EMPTY` byte before finding the desired element and stop + // - You can't just mark the byte to be erased as `Tag::EMPTY`, because otherwise the `find_inner` + // function may stumble upon an `Tag::EMPTY` byte before finding the desired element and stop // searching. // // Thus it is necessary to check all bytes after and before the erased element. If we are in - // a contiguous `Group` of `FULL` or `DELETED` bytes (the number of `FULL` or `DELETED` bytes + // a contiguous `Group` of `FULL` or `Tag::DELETED` bytes (the number of `FULL` or `Tag::DELETED` bytes // before and after is greater than or equal to `Group::WIDTH`), then we must mark our byte as - // `DELETED` in order for the `find_inner` function to go further. On the other hand, if there - // is at least one `EMPTY` slot in the `Group`, then the `find_inner` function will still stumble - // upon an `EMPTY` byte, so we can safely mark our erased byte as `EMPTY` as well. + // `Tag::DELETED` in order for the `find_inner` function to go further. On the other hand, if there + // is at least one `Tag::EMPTY` slot in the `Group`, then the `find_inner` function will still stumble + // upon an `Tag::EMPTY` byte, so we can safely mark our erased byte as `Tag::EMPTY` as well. // // Finally, since `index_before == (index.wrapping_sub(Group::WIDTH) & self.bucket_mask) == index` // and given all of the above, tables smaller than the group width (self.buckets() < Group::WIDTH) - // cannot have `DELETED` bytes. + // cannot have `Tag::DELETED` bytes. // // Note that in this context `leading_zeros` refers to the bytes at the end of a group, while // `trailing_zeros` refers to the bytes at the beginning of a group. let ctrl = if empty_before.leading_zeros() + empty_after.trailing_zeros() >= Group::WIDTH { - DELETED + Tag::DELETED } else { self.growth_left += 1; - EMPTY + Tag::EMPTY }; // SAFETY: the caller must uphold the safety contract for `erase` method. self.set_ctrl(index, ctrl); @@ -3484,7 +3497,7 @@ impl RawIterRange { // Load the first group and advance ctrl to point to the next group // SAFETY: The caller must uphold the safety rules for the [`RawIterRange::new`] - let current_group = Group::load_aligned(ctrl).match_full(); + let current_group = Group::load_aligned(ctrl.cast()).match_full(); let next_ctrl = ctrl.add(Group::WIDTH); Self { @@ -3558,7 +3571,9 @@ impl RawIterRange { // than the group size where the trailing control bytes are all // EMPTY. On larger tables self.end is guaranteed to be aligned // to the group size (since tables are power-of-two sized). - self.current_group = Group::load_aligned(self.next_ctrl).match_full().into_iter(); + self.current_group = Group::load_aligned(self.next_ctrl.cast()) + .match_full() + .into_iter(); self.data = self.data.next_n(Group::WIDTH); self.next_ctrl = self.next_ctrl.add(Group::WIDTH); } @@ -3637,7 +3652,9 @@ impl RawIterRange { // The last `self.next_ctrl`, whose index would be `self.buckets()`, will never // actually be read, since we should have already yielded all the elements of // the table. - self.current_group = Group::load_aligned(self.next_ctrl).match_full().into_iter(); + self.current_group = Group::load_aligned(self.next_ctrl.cast()) + .match_full() + .into_iter(); self.data = self.data.next_n(Group::WIDTH); self.next_ctrl = self.next_ctrl.add(Group::WIDTH); } @@ -3848,7 +3865,7 @@ impl FullBucketsIndices { self.ctrl = NonNull::new_unchecked(self.ctrl.as_ptr().add(Group::WIDTH)); // SAFETY: See explanation above. - self.current_group = Group::load_aligned(self.ctrl.as_ptr()) + self.current_group = Group::load_aligned(self.ctrl.as_ptr().cast()) .match_full() .into_iter(); self.group_first_index += Group::WIDTH; @@ -4079,14 +4096,14 @@ struct RawIterHashInner { ctrl: NonNull, // The top 7 bits of the hash. - h2_hash: u8, + tag_hash: Tag, // The sequence of groups to probe in the search. probe_seq: ProbeSeq, group: Group, - // The elements within the group with a matching h2-hash. + // The elements within the group with a matching tag-hash. bitmask: BitMaskIter, } @@ -4124,15 +4141,15 @@ impl Default for RawIterHash { impl RawIterHashInner { #[cfg_attr(feature = "inline-more", inline)] unsafe fn new(table: &RawTableInner, hash: u64) -> Self { - let h2_hash = h2(hash); + let tag_hash = Tag::full(hash); let probe_seq = table.probe_seq(hash); let group = Group::load(table.ctrl(probe_seq.pos)); - let bitmask = group.match_byte(h2_hash).into_iter(); + let bitmask = group.match_tag(tag_hash).into_iter(); RawIterHashInner { bucket_mask: table.bucket_mask, ctrl: table.ctrl, - h2_hash, + tag_hash, probe_seq, group, bitmask, @@ -4178,10 +4195,10 @@ impl Iterator for RawIterHashInner { // an actual `RawTableInner` reference to use. let index = self.probe_seq.pos; debug_assert!(index < self.bucket_mask + 1 + Group::WIDTH); - let group_ctrl = self.ctrl.as_ptr().add(index); + let group_ctrl = self.ctrl.as_ptr().add(index).cast(); self.group = Group::load(group_ctrl); - self.bitmask = self.group.match_byte(self.h2_hash).into_iter(); + self.bitmask = self.group.match_tag(self.tag_hash).into_iter(); } } } @@ -4287,7 +4304,7 @@ mod test_map { table .table .ctrl(0) - .write_bytes(EMPTY, table.table.num_ctrl_bytes()); + .write_bytes(Tag::EMPTY.0, table.table.num_ctrl_bytes()); // SAFETY: table.capacity() is guaranteed to be smaller than table.buckets() table.table.ctrl(0).write_bytes(0, table.capacity()); diff --git a/src/raw/neon.rs b/src/raw/neon.rs index 44e82d57d..b79f139e8 100644 --- a/src/raw/neon.rs +++ b/src/raw/neon.rs @@ -1,5 +1,5 @@ use super::bitmask::BitMask; -use super::EMPTY; +use super::Tag; use core::arch::aarch64 as neon; use core::mem; use core::num::NonZeroU64; @@ -10,7 +10,7 @@ pub(crate) const BITMASK_STRIDE: usize = 8; pub(crate) const BITMASK_MASK: BitMaskWord = !0; pub(crate) const BITMASK_ITER_MASK: BitMaskWord = 0x8080_8080_8080_8080; -/// Abstraction over a group of control bytes which can be scanned in +/// Abstraction over a group of control tags which can be scanned in /// parallel. /// /// This implementation uses a 64-bit NEON value. @@ -22,69 +22,69 @@ impl Group { /// Number of bytes in the group. pub(crate) const WIDTH: usize = mem::size_of::(); - /// Returns a full group of empty bytes, suitable for use as the initial + /// Returns a full group of empty tags, suitable for use as the initial /// value for an empty hash table. /// /// This is guaranteed to be aligned to the group size. #[inline] - pub(crate) const fn static_empty() -> &'static [u8; Group::WIDTH] { + pub(crate) const fn static_empty() -> &'static [Tag; Group::WIDTH] { #[repr(C)] - struct AlignedBytes { + struct AlignedTags { _align: [Group; 0], - bytes: [u8; Group::WIDTH], + tags: [Tag; Group::WIDTH], } - const ALIGNED_BYTES: AlignedBytes = AlignedBytes { + const ALIGNED_TAGS: AlignedTags = AlignedTags { _align: [], - bytes: [EMPTY; Group::WIDTH], + tags: [Tag::EMPTY; Group::WIDTH], }; - &ALIGNED_BYTES.bytes + &ALIGNED_TAGS.tags } - /// Loads a group of bytes starting at the given address. + /// Loads a group of tags starting at the given address. #[inline] #[allow(clippy::cast_ptr_alignment)] // unaligned load - pub(crate) unsafe fn load(ptr: *const u8) -> Self { - Group(neon::vld1_u8(ptr)) + pub(crate) unsafe fn load(ptr: *const Tag) -> Self { + Group(neon::vld1_u8(ptr.cast())) } - /// Loads a group of bytes starting at the given address, which must be + /// Loads a group of tags starting at the given address, which must be /// aligned to `mem::align_of::()`. #[inline] #[allow(clippy::cast_ptr_alignment)] - pub(crate) unsafe fn load_aligned(ptr: *const u8) -> Self { + pub(crate) unsafe fn load_aligned(ptr: *const Tag) -> Self { // FIXME: use align_offset once it stabilizes debug_assert_eq!(ptr as usize & (mem::align_of::() - 1), 0); - Group(neon::vld1_u8(ptr)) + Group(neon::vld1_u8(ptr.cast())) } - /// Stores the group of bytes to the given address, which must be + /// Stores the group of tags to the given address, which must be /// aligned to `mem::align_of::()`. #[inline] #[allow(clippy::cast_ptr_alignment)] - pub(crate) unsafe fn store_aligned(self, ptr: *mut u8) { + pub(crate) unsafe fn store_aligned(self, ptr: *mut Tag) { // FIXME: use align_offset once it stabilizes debug_assert_eq!(ptr as usize & (mem::align_of::() - 1), 0); - neon::vst1_u8(ptr, self.0); + neon::vst1_u8(ptr.cast(), self.0); } - /// Returns a `BitMask` indicating all bytes in the group which *may* + /// Returns a `BitMask` indicating all tags in the group which *may* /// have the given value. #[inline] - pub(crate) fn match_byte(self, byte: u8) -> BitMask { + pub(crate) fn match_tag(self, tag: Tag) -> BitMask { unsafe { - let cmp = neon::vceq_u8(self.0, neon::vdup_n_u8(byte)); + let cmp = neon::vceq_u8(self.0, neon::vdup_n_u8(tag.0)); BitMask(neon::vget_lane_u64(neon::vreinterpret_u64_u8(cmp), 0)) } } - /// Returns a `BitMask` indicating all bytes in the group which are + /// Returns a `BitMask` indicating all tags in the group which are /// `EMPTY`. #[inline] pub(crate) fn match_empty(self) -> BitMask { - self.match_byte(EMPTY) + self.match_tag(Tag::EMPTY) } - /// Returns a `BitMask` indicating all bytes in the group which are + /// Returns a `BitMask` indicating all tags in the group which are /// `EMPTY` or `DELETED`. #[inline] pub(crate) fn match_empty_or_deleted(self) -> BitMask { @@ -94,7 +94,7 @@ impl Group { } } - /// Returns a `BitMask` indicating all bytes in the group which are full. + /// Returns a `BitMask` indicating all tags in the group which are full. #[inline] pub(crate) fn match_full(self) -> BitMask { unsafe { @@ -103,7 +103,7 @@ impl Group { } } - /// Performs the following transformation on all bytes in the group: + /// Performs the following transformation on all tags in the group: /// - `EMPTY => EMPTY` /// - `DELETED => EMPTY` /// - `FULL => DELETED` @@ -113,7 +113,7 @@ impl Group { // and high_bit = 0 (FULL) to 1000_0000 // // Here's this logic expanded to concrete values: - // let special = 0 > byte = 1111_1111 (true) or 0000_0000 (false) + // let special = 0 > tag = 1111_1111 (true) or 0000_0000 (false) // 1111_1111 | 1000_0000 = 1111_1111 // 0000_0000 | 1000_0000 = 1000_0000 unsafe { diff --git a/src/raw/sse2.rs b/src/raw/sse2.rs index 956ba5d26..87af2727b 100644 --- a/src/raw/sse2.rs +++ b/src/raw/sse2.rs @@ -1,5 +1,5 @@ use super::bitmask::BitMask; -use super::EMPTY; +use super::Tag; use core::mem; use core::num::NonZeroU16; @@ -14,7 +14,7 @@ pub(crate) const BITMASK_STRIDE: usize = 1; pub(crate) const BITMASK_MASK: BitMaskWord = 0xffff; pub(crate) const BITMASK_ITER_MASK: BitMaskWord = !0; -/// Abstraction over a group of control bytes which can be scanned in +/// Abstraction over a group of control tags which can be scanned in /// parallel. /// /// This implementation uses a 128-bit SSE value. @@ -27,101 +27,101 @@ impl Group { /// Number of bytes in the group. pub(crate) const WIDTH: usize = mem::size_of::(); - /// Returns a full group of empty bytes, suitable for use as the initial + /// Returns a full group of empty tags, suitable for use as the initial /// value for an empty hash table. /// /// This is guaranteed to be aligned to the group size. #[inline] #[allow(clippy::items_after_statements)] - pub(crate) const fn static_empty() -> &'static [u8; Group::WIDTH] { + pub(crate) const fn static_empty() -> &'static [Tag; Group::WIDTH] { #[repr(C)] - struct AlignedBytes { + struct AlignedTags { _align: [Group; 0], - bytes: [u8; Group::WIDTH], + tags: [Tag; Group::WIDTH], } - const ALIGNED_BYTES: AlignedBytes = AlignedBytes { + const ALIGNED_TAGS: AlignedTags = AlignedTags { _align: [], - bytes: [EMPTY; Group::WIDTH], + tags: [Tag::EMPTY; Group::WIDTH], }; - &ALIGNED_BYTES.bytes + &ALIGNED_TAGS.tags } - /// Loads a group of bytes starting at the given address. + /// Loads a group of tags starting at the given address. #[inline] #[allow(clippy::cast_ptr_alignment)] // unaligned load - pub(crate) unsafe fn load(ptr: *const u8) -> Self { + pub(crate) unsafe fn load(ptr: *const Tag) -> Self { Group(x86::_mm_loadu_si128(ptr.cast())) } - /// Loads a group of bytes starting at the given address, which must be + /// Loads a group of tags starting at the given address, which must be /// aligned to `mem::align_of::()`. #[inline] #[allow(clippy::cast_ptr_alignment)] - pub(crate) unsafe fn load_aligned(ptr: *const u8) -> Self { + pub(crate) unsafe fn load_aligned(ptr: *const Tag) -> Self { // FIXME: use align_offset once it stabilizes debug_assert_eq!(ptr as usize & (mem::align_of::() - 1), 0); Group(x86::_mm_load_si128(ptr.cast())) } - /// Stores the group of bytes to the given address, which must be + /// Stores the group of tags to the given address, which must be /// aligned to `mem::align_of::()`. #[inline] #[allow(clippy::cast_ptr_alignment)] - pub(crate) unsafe fn store_aligned(self, ptr: *mut u8) { + pub(crate) unsafe fn store_aligned(self, ptr: *mut Tag) { // FIXME: use align_offset once it stabilizes debug_assert_eq!(ptr as usize & (mem::align_of::() - 1), 0); x86::_mm_store_si128(ptr.cast(), self.0); } - /// Returns a `BitMask` indicating all bytes in the group which have + /// Returns a `BitMask` indicating all tags in the group which have /// the given value. #[inline] - pub(crate) fn match_byte(self, byte: u8) -> BitMask { + pub(crate) fn match_tag(self, tag: Tag) -> BitMask { #[allow( - clippy::cast_possible_wrap, // byte: u8 as i8 - // byte: i32 as u16 + clippy::cast_possible_wrap, // tag.0: Tag as i8 + // tag: i32 as u16 // note: _mm_movemask_epi8 returns a 16-bit mask in a i32, the // upper 16-bits of the i32 are zeroed: clippy::cast_sign_loss, clippy::cast_possible_truncation )] unsafe { - let cmp = x86::_mm_cmpeq_epi8(self.0, x86::_mm_set1_epi8(byte as i8)); + let cmp = x86::_mm_cmpeq_epi8(self.0, x86::_mm_set1_epi8(tag.0 as i8)); BitMask(x86::_mm_movemask_epi8(cmp) as u16) } } - /// Returns a `BitMask` indicating all bytes in the group which are + /// Returns a `BitMask` indicating all tags in the group which are /// `EMPTY`. #[inline] pub(crate) fn match_empty(self) -> BitMask { - self.match_byte(EMPTY) + self.match_tag(Tag::EMPTY) } - /// Returns a `BitMask` indicating all bytes in the group which are + /// Returns a `BitMask` indicating all tags in the group which are /// `EMPTY` or `DELETED`. #[inline] pub(crate) fn match_empty_or_deleted(self) -> BitMask { #[allow( - // byte: i32 as u16 + // tag: i32 as u16 // note: _mm_movemask_epi8 returns a 16-bit mask in a i32, the // upper 16-bits of the i32 are zeroed: clippy::cast_sign_loss, clippy::cast_possible_truncation )] unsafe { - // A byte is EMPTY or DELETED iff the high bit is set + // A tag is EMPTY or DELETED iff the high bit is set BitMask(x86::_mm_movemask_epi8(self.0) as u16) } } - /// Returns a `BitMask` indicating all bytes in the group which are full. + /// Returns a `BitMask` indicating all tags in the group which are full. #[inline] pub(crate) fn match_full(&self) -> BitMask { self.match_empty_or_deleted().invert() } - /// Performs the following transformation on all bytes in the group: + /// Performs the following transformation on all tags in the group: /// - `EMPTY => EMPTY` /// - `DELETED => EMPTY` /// - `FULL => DELETED` @@ -131,18 +131,18 @@ impl Group { // and high_bit = 0 (FULL) to 1000_0000 // // Here's this logic expanded to concrete values: - // let special = 0 > byte = 1111_1111 (true) or 0000_0000 (false) + // let special = 0 > tag = 1111_1111 (true) or 0000_0000 (false) // 1111_1111 | 1000_0000 = 1111_1111 // 0000_0000 | 1000_0000 = 1000_0000 #[allow( - clippy::cast_possible_wrap, // byte: 0x80_u8 as i8 + clippy::cast_possible_wrap, // tag: Tag::DELETED.0 as i8 )] unsafe { let zero = x86::_mm_setzero_si128(); let special = x86::_mm_cmpgt_epi8(zero, self.0); Group(x86::_mm_or_si128( special, - x86::_mm_set1_epi8(0x80_u8 as i8), + x86::_mm_set1_epi8(Tag::DELETED.0 as i8), )) } }