Skip to content

Commit

Permalink
Auto merge of #565 - clarfonthey:tag-type, r=Amanieu
Browse files Browse the repository at this point in the history
Add Tag(u8) newtype in an attempt to stop using byte-pointers for everything

The longer-term goal is to make it so that all the pointers passed around inside the are either `Tag` pointers or `T` pointers, so that we know whether we're using them for indexing into the control or the buckets. Then, `u8` pointers mean that we're referring to a raw allocation, rather than the control bytes.

However, the current code isn't really built for this, and the result is a lot of pointer casts everywhere. I didn't want to just replace `u8` with `Tag` everywhere, since there are some cases where we use `u8` to really mean bytes, and that would be counter to the original purpose.

One short-term gain, however, is that the constant tags and the various methods on them can now be real associated constants and methods, instead of just standalone functions and constants that have to be imported separately.

----

This change also bumps MSRV to 1.65.0. I could bump it higher, but only 1.65 was needed, so, I decided to go with the smallest amount.
  • Loading branch information
bors committed Oct 5, 2024
2 parents e057e87 + 4de01fe commit ee00971
Show file tree
Hide file tree
Showing 5 changed files with 231 additions and 214 deletions.
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ keywords = ["hash", "no_std", "hashmap", "swisstable"]
categories = ["data-structures", "no-std"]
exclude = [".github", "/ci/*"]
edition = "2021"
rust-version = "1.63.0"
rust-version = "1.65.0"

[dependencies]
# For the default hasher
Expand Down
68 changes: 34 additions & 34 deletions src/raw/generic.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use super::bitmask::BitMask;
use super::EMPTY;
use super::Tag;
use core::{mem, ptr};

// Use the native word size as the group size. Using a 64-bit group size on
Expand All @@ -24,18 +24,18 @@ cfg_if! {
pub(crate) type BitMaskWord = GroupWord;
pub(crate) type NonZeroBitMaskWord = NonZeroGroupWord;
pub(crate) const BITMASK_STRIDE: usize = 8;
// We only care about the highest bit of each byte for the mask.
// We only care about the highest bit of each tag for the mask.
#[allow(clippy::cast_possible_truncation, clippy::unnecessary_cast)]
pub(crate) const BITMASK_MASK: BitMaskWord = 0x8080_8080_8080_8080_u64 as GroupWord;
pub(crate) const BITMASK_MASK: BitMaskWord = u64::from_ne_bytes([Tag::DELETED.0; 8]) as GroupWord;
pub(crate) const BITMASK_ITER_MASK: BitMaskWord = !0;

/// Helper function to replicate a byte across a `GroupWord`.
/// Helper function to replicate a tag across a `GroupWord`.
#[inline]
fn repeat(byte: u8) -> GroupWord {
GroupWord::from_ne_bytes([byte; Group::WIDTH])
fn repeat(tag: Tag) -> GroupWord {
GroupWord::from_ne_bytes([tag.0; Group::WIDTH])
}

/// Abstraction over a group of control bytes which can be scanned in
/// Abstraction over a group of control tags which can be scanned in
/// parallel.
///
/// This implementation uses a word-sized integer.
Expand All @@ -51,94 +51,94 @@ impl Group {
/// Number of bytes in the group.
pub(crate) const WIDTH: usize = mem::size_of::<Self>();

/// Returns a full group of empty bytes, suitable for use as the initial
/// Returns a full group of empty tags, suitable for use as the initial
/// value for an empty hash table.
///
/// This is guaranteed to be aligned to the group size.
#[inline]
pub(crate) const fn static_empty() -> &'static [u8; Group::WIDTH] {
pub(crate) const fn static_empty() -> &'static [Tag; Group::WIDTH] {
#[repr(C)]
struct AlignedBytes {
struct AlignedTags {
_align: [Group; 0],
bytes: [u8; Group::WIDTH],
tags: [Tag; Group::WIDTH],
}
const ALIGNED_BYTES: AlignedBytes = AlignedBytes {
const ALIGNED_TAGS: AlignedTags = AlignedTags {
_align: [],
bytes: [EMPTY; Group::WIDTH],
tags: [Tag::EMPTY; Group::WIDTH],
};
&ALIGNED_BYTES.bytes
&ALIGNED_TAGS.tags
}

/// Loads a group of bytes starting at the given address.
/// Loads a group of tags starting at the given address.
#[inline]
#[allow(clippy::cast_ptr_alignment)] // unaligned load
pub(crate) unsafe fn load(ptr: *const u8) -> Self {
pub(crate) unsafe fn load(ptr: *const Tag) -> Self {
Group(ptr::read_unaligned(ptr.cast()))
}

/// Loads a group of bytes starting at the given address, which must be
/// Loads a group of tags starting at the given address, which must be
/// aligned to `mem::align_of::<Group>()`.
#[inline]
#[allow(clippy::cast_ptr_alignment)]
pub(crate) unsafe fn load_aligned(ptr: *const u8) -> Self {
pub(crate) unsafe fn load_aligned(ptr: *const Tag) -> Self {
// FIXME: use align_offset once it stabilizes
debug_assert_eq!(ptr as usize & (mem::align_of::<Self>() - 1), 0);
Group(ptr::read(ptr.cast()))
}

/// Stores the group of bytes to the given address, which must be
/// Stores the group of tags to the given address, which must be
/// aligned to `mem::align_of::<Group>()`.
#[inline]
#[allow(clippy::cast_ptr_alignment)]
pub(crate) unsafe fn store_aligned(self, ptr: *mut u8) {
pub(crate) unsafe fn store_aligned(self, ptr: *mut Tag) {
// FIXME: use align_offset once it stabilizes
debug_assert_eq!(ptr as usize & (mem::align_of::<Self>() - 1), 0);
ptr::write(ptr.cast(), self.0);
}

/// Returns a `BitMask` indicating all bytes in the group which *may*
/// Returns a `BitMask` indicating all tags in the group which *may*
/// have the given value.
///
/// This function may return a false positive in certain cases where
/// the byte in the group differs from the searched value only in its
/// the tag in the group differs from the searched value only in its
/// lowest bit. This is fine because:
/// - This never happens for `EMPTY` and `DELETED`, only full entries.
/// - The check for key equality will catch these.
/// - This only happens if there is at least 1 true match.
/// - The chance of this happening is very low (< 1% chance per byte).
#[inline]
pub(crate) fn match_byte(self, byte: u8) -> BitMask {
pub(crate) fn match_tag(self, tag: Tag) -> BitMask {
// This algorithm is derived from
// https://graphics.stanford.edu/~seander/bithacks.html##ValueInWord
let cmp = self.0 ^ repeat(byte);
BitMask((cmp.wrapping_sub(repeat(0x01)) & !cmp & repeat(0x80)).to_le())
let cmp = self.0 ^ repeat(tag);
BitMask((cmp.wrapping_sub(repeat(Tag(0x01))) & !cmp & repeat(Tag::DELETED)).to_le())
}

/// Returns a `BitMask` indicating all bytes in the group which are
/// Returns a `BitMask` indicating all tags in the group which are
/// `EMPTY`.
#[inline]
pub(crate) fn match_empty(self) -> BitMask {
// If the high bit is set, then the byte must be either:
// If the high bit is set, then the tag must be either:
// 1111_1111 (EMPTY) or 1000_0000 (DELETED).
// So we can just check if the top two bits are 1 by ANDing them.
BitMask((self.0 & (self.0 << 1) & repeat(0x80)).to_le())
BitMask((self.0 & (self.0 << 1) & repeat(Tag::DELETED)).to_le())
}

/// Returns a `BitMask` indicating all bytes in the group which are
/// Returns a `BitMask` indicating all tags in the group which are
/// `EMPTY` or `DELETED`.
#[inline]
pub(crate) fn match_empty_or_deleted(self) -> BitMask {
// A byte is EMPTY or DELETED iff the high bit is set
BitMask((self.0 & repeat(0x80)).to_le())
// A tag is EMPTY or DELETED iff the high bit is set
BitMask((self.0 & repeat(Tag::DELETED)).to_le())
}

/// Returns a `BitMask` indicating all bytes in the group which are full.
/// Returns a `BitMask` indicating all tags in the group which are full.
#[inline]
pub(crate) fn match_full(self) -> BitMask {
self.match_empty_or_deleted().invert()
}

/// Performs the following transformation on all bytes in the group:
/// Performs the following transformation on all tags in the group:
/// - `EMPTY => EMPTY`
/// - `DELETED => EMPTY`
/// - `FULL => DELETED`
Expand All @@ -151,7 +151,7 @@ impl Group {
// let full = 1000_0000 (true) or 0000_0000 (false)
// !1000_0000 + 1 = 0111_1111 + 1 = 1000_0000 (no carry)
// !0000_0000 + 0 = 1111_1111 + 0 = 1111_1111 (no carry)
let full = !self.0 & repeat(0x80);
let full = !self.0 & repeat(Tag::DELETED);
Group(!full + (full >> 7))
}
}
Loading

0 comments on commit ee00971

Please sign in to comment.