Skip to content

Commit

Permalink
Merge pull request dlang-community#129 from Hackerpilot/hash-nonsense
Browse files Browse the repository at this point in the history
Improve hash code to array index translation
  • Loading branch information
jcrapuchettes authored Nov 9, 2018
2 parents c692722 + f105d54 commit 760a040
Show file tree
Hide file tree
Showing 5 changed files with 80 additions and 88 deletions.
24 changes: 5 additions & 19 deletions src/containers/hashmap.d
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

module containers.hashmap;

private import containers.internal.hash : generateHash;
private import containers.internal.hash;
private import containers.internal.node : shouldAddGCRange;
private import stdx.allocator.mallocator : Mallocator;
private import std.traits : isBasicType, Unqual;
Expand Down Expand Up @@ -370,7 +370,7 @@ private:

static struct MapRange(MapType, IterType Type)
{
static if (Type == IterType.both)
static if (Type == IterType.both)
{
struct FrontType
{
Expand Down Expand Up @@ -452,7 +452,7 @@ private:
bool _empty;
}

void initialize(size_t bucketCount = 4)
void initialize(size_t bucketCount = DEFAULT_BUCKET_COUNT)
{
import std.conv : emplace;
assert((bucketCount & (bucketCount - 1)) == 0, "bucketCount must be a power of two");
Expand All @@ -478,7 +478,7 @@ private:
{
if (buckets.length == 0)
initialize();
immutable size_t index = hashToIndex(hash);
immutable size_t index = hashToIndex(hash, buckets.length);
foreach (ref item; buckets[index])
{
if (item.hash == hash && item.key == key)
Expand Down Expand Up @@ -543,20 +543,6 @@ private:
allocator.deallocate(cast(void[]) oldBuckets);
}

size_t hashToIndex(Hash hash) const pure nothrow @safe @nogc
in
{
assert (buckets.length > 0);
}
out (result)
{
assert (result < buckets.length);
}
body
{
return cast(size_t)hash & (buckets.length - 1);
}

inout(Node)* find(const K key, ref size_t index) inout
{
return find(key, index, hashFunction(key));
Expand All @@ -568,7 +554,7 @@ private:

if (buckets.empty)
return null;
index = hashToIndex(hash);
index = hashToIndex(hash, buckets.length);
foreach (ref r; buckets[index])
{
if (r.hash == hash && r == key)
Expand Down
39 changes: 12 additions & 27 deletions src/containers/hashset.d
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

module containers.hashset;

private import containers.internal.hash : generateHash;
private import containers.internal.hash : generateHash, hashToIndex;
private import containers.internal.node : shouldAddGCRange;
private import stdx.allocator.mallocator : Mallocator;
private import std.traits : isBasicType;
Expand Down Expand Up @@ -108,8 +108,8 @@ struct HashSet(T, Allocator = Mallocator, alias hashFunction = generateHash!T,
*/
bool remove(T value)
{
Hash hash = hashFunction(value);
size_t index = hashToIndex(hash);
immutable Hash hash = hashFunction(value);
immutable size_t index = hashToIndex(hash, buckets.length);
static if (storeHash)
immutable bool removed = buckets[index].remove(ItemNode(hash, value));
else
Expand All @@ -134,8 +134,8 @@ struct HashSet(T, Allocator = Mallocator, alias hashFunction = generateHash!T,
{
if (buckets.length == 0 || _length == 0)
return null;
Hash hash = hashFunction(value);
immutable size_t index = hashToIndex(hash);
immutable Hash hash = hashFunction(value);
immutable index = hashToIndex(hash, buckets.length);
return buckets[index].get(value, hash);
}

Expand All @@ -150,7 +150,7 @@ struct HashSet(T, Allocator = Mallocator, alias hashFunction = generateHash!T,
if (buckets.length == 0)
initialize(4);
Hash hash = hashFunction(value);
immutable size_t index = hashToIndex(hash);
immutable size_t index = hashToIndex(hash, buckets.length);
static if (storeHash)
auto r = buckets[index].insert(ItemNode(hash, value));
else
Expand Down Expand Up @@ -200,12 +200,12 @@ struct HashSet(T, Allocator = Mallocator, alias hashFunction = generateHash!T,

private:

import containers.internal.node : shouldAddGCRange, FatNodeInfo;
import containers.internal.storage_type : ContainerStorageType;
import containers.internal.element_type : ContainerElementType;
import containers.internal.mixins : AllocatorState;
import containers.unrolledlist : UnrolledList;
import containers.internal.node : shouldAddGCRange, FatNodeInfo;
import containers.internal.storage_type : ContainerStorageType;
import std.traits : isPointer;
import core.bitop : bsf;

alias LengthType = ubyte;
alias N = FatNodeInfo!(ItemNode.sizeof, 1, 64, LengthType.sizeof);
Expand All @@ -216,8 +216,8 @@ private:

void initialize(size_t bucketCount)
{
import stdx.allocator : makeArray;
import core.memory : GC;
import stdx.allocator : makeArray;

makeBuckets(bucketCount);
static if (useGC)
Expand Down Expand Up @@ -329,13 +329,13 @@ private:
static if (storeHash)
{
immutable Hash hash = node.items[i].hash;
immutable size_t index = hashToIndex(hash);
size_t index = hashToIndex(hash, buckets.length);
buckets[index].insert(ItemNode(hash, node.items[i].value));
}
else
{
immutable Hash hash = hashFunction(node.items[i].value);
immutable size_t index = hashToIndex(hash);
size_t index = hashToIndex(hash, buckets.length);
buckets[index].insert(ItemNode(node.items[i].value));
}
}
Expand All @@ -346,21 +346,6 @@ private:
allocator.dispose(oldBuckets);
}

size_t hashToIndex(Hash hash) const pure nothrow @safe
in
{
assert (buckets.length > 0);
}
out (result)
{
import std.string : format;
assert (result < buckets.length, "%d, %d".format(result, buckets.length));
}
body
{
return hash & (buckets.length - 1);
}

static struct Bucket
{
this(this) @disable;
Expand Down
24 changes: 24 additions & 0 deletions src/containers/internal/hash.d
Original file line number Diff line number Diff line change
Expand Up @@ -34,3 +34,27 @@ else
return h;
}
}

/**
* Convert a hash code into a valid array index.
*
* Prams:
* hash = the hash code to be mapped
* len = the length of the array that backs the hash container.
*/
size_t hashToIndex(const size_t hash, const size_t len) pure nothrow @nogc @safe
{
import core.bitop : bsr;

// This magic number taken from
// https://probablydance.com/2018/06/16/fibonacci-hashing-the-optimization-that-the-world-forgot-or-a-better-alternative-to-integer-modulo/
//
// It's amazing how much faster this makes the hash data structures
// when faced with low quality hash functions.
static if (size_t.sizeof == 8)
return (hash * 11_400_714_819_323_198_485UL) >>> (64 - bsr(len));
else
return (hash * 2_654_435_769U) >>> (32 - bsr(len));
}

enum size_t DEFAULT_BUCKET_COUNT = 8;
17 changes: 8 additions & 9 deletions src/containers/openhashset.d
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,13 @@
*/
module containers.openhashset;

private import containers.internal.hash : generateHash;
private import containers.internal.hash;
private import containers.internal.node : shouldAddGCRange;
private import stdx.allocator.mallocator : Mallocator;
private import stdx.allocator.common : stateSize;
private import stdx.allocator.mallocator : Mallocator;

/**
* Simple open-addressed hash set. Use this instead of HashSet when the size and
* quantity of the data to be inserted is small.
* Simple open-addressed hash set that uses linear probing to resolve sollisions.
*
* Params:
* T = the element type of the hash set
Expand Down Expand Up @@ -149,7 +148,7 @@ struct OpenHashSet(T, Allocator = Mallocator,
bool insert(T item)
{
if (nodes.length == 0)
initialize(DEFAULT_INITIAL_CAPACITY);
initialize(DEFAULT_BUCKET_COUNT);
immutable size_t hash = hashFunction(item);
size_t index = toIndex(nodes, item, hash);
if (index == size_t.max)
Expand Down Expand Up @@ -211,12 +210,11 @@ struct OpenHashSet(T, Allocator = Mallocator,

private:

import containers.internal.storage_type : ContainerStorageType;
import containers.internal.element_type : ContainerElementType;
import containers.internal.mixins : AllocatorState;
import containers.internal.storage_type : ContainerStorageType;
import core.memory : GC;

enum DEFAULT_INITIAL_CAPACITY = 8;
enum bool useGC = supportGC && shouldAddGCRange!T;

static struct Range(ThisT)
Expand Down Expand Up @@ -286,9 +284,10 @@ private:
// Returns: size_t.max if the item was not found
static size_t toIndex(const Node[] n, T item, size_t hash)
{
immutable size_t bucketMask = (n.length - 1);
immutable size_t index = hash & bucketMask;
assert (n.length > 0);
immutable size_t index = hashToIndex(hash, n.length);
size_t i = index;
immutable bucketMask = n.length - 1;
while (n[i].used && n[i].data != item)
{
i = (i + 1) & bucketMask;
Expand Down
64 changes: 31 additions & 33 deletions test/hashmap_gc_test.d
Original file line number Diff line number Diff line change
@@ -1,48 +1,46 @@

import containers : HashMap;
import std.stdio : writefln;
import core.memory : GC;


/**
* Generate a random alphanumeric string.
*/
@trusted
string randomString (uint len)
@trusted string randomString(uint len)
{
import std.ascii : letters, digits;
import std.conv : to;
import std.random : randomSample;
import std.range : chain;
import std.ascii : letters, digits;
import std.conv : to;
import std.random : randomSample;
import std.range : chain;

auto asciiLetters = to! (dchar[]) (letters);
auto asciiDigits = to! (dchar[]) (digits);
auto asciiLetters = to!(dchar[])(letters);
auto asciiDigits = to!(dchar[])(digits);

if (len == 0)
len = 1;
if (len == 0)
len = 1;

auto res = to!string (randomSample (chain (asciiLetters, asciiDigits), len));
return res;
auto res = to!string(randomSample(chain(asciiLetters, asciiDigits), len));
return res;
}

void main ()
void main()
{
immutable iterationCount = 4;
HashMap!(string, string) hmap;

for (uint n = 1; n <= iterationCount; n++) {
foreach (i; 0 .. 1_000_000)
hmap[randomString (4)] = randomString (16);
GC.collect ();
hmap = HashMap!(string, string) (16);
GC.collect ();

foreach (i; 0 .. 1_000_000)
hmap[randomString (4)] = randomString (16);
GC.collect ();
hmap.clear ();
GC.collect ();

writefln ("iteration %s/%s finished", n, iterationCount);
}
immutable iterationCount = 4;
HashMap!(string, string) hmap;

for (uint n = 1; n <= iterationCount; n++)
{
foreach (i; 0 .. 1_000_000)
hmap[randomString(4)] = randomString(16);
GC.collect();
hmap = HashMap!(string, string)(16);
GC.collect();

foreach (i; 0 .. 1_000_000)
hmap[randomString(4)] = randomString(16);
GC.collect();
hmap.clear();
GC.collect();

writefln("iteration %s/%s finished", n, iterationCount);
}
}

0 comments on commit 760a040

Please sign in to comment.