From d951214c1fafc7f25742b3a0098b3e7a54661e35 Mon Sep 17 00:00:00 2001 From: PeterYang12 Date: Tue, 20 Feb 2024 00:49:35 -0800 Subject: [PATCH] gh-115704: Improve DJBX33A hash algorithm Accelerating python hash algorithm by "unoptimizing" it when using DJBX33A as hash algorithm. See Daniel Lemire's blog post: https://lemire.me/blog/2016/07/21/accelerating-php-hashing-by-unoptimizing-it/ Signed-off-by: PeterYang12 --- Python/pyhash.c | 33 ++++++++++++++++++++++----------- 1 file changed, 22 insertions(+), 11 deletions(-) diff --git a/Python/pyhash.c b/Python/pyhash.c index 141407c265677a..e12b3c9a3e23ee 100644 --- a/Python/pyhash.c +++ b/Python/pyhash.c @@ -162,17 +162,28 @@ _Py_HashBytes(const void *src, Py_ssize_t len) const unsigned char *p = src; hash = 5381; /* DJBX33A starts with 5381 */ - switch(len) { - /* ((hash << 5) + hash) + *p == hash * 33 + *p */ - case 7: hash = ((hash << 5) + hash) + *p++; /* fallthrough */ - case 6: hash = ((hash << 5) + hash) + *p++; /* fallthrough */ - case 5: hash = ((hash << 5) + hash) + *p++; /* fallthrough */ - case 4: hash = ((hash << 5) + hash) + *p++; /* fallthrough */ - case 3: hash = ((hash << 5) + hash) + *p++; /* fallthrough */ - case 2: hash = ((hash << 5) + hash) + *p++; /* fallthrough */ - case 1: hash = ((hash << 5) + hash) + *p++; break; - default: - Py_UNREACHABLE(); + if (len >= 4) { + hash = hash * 33 * 33 * 33 * 33 + + p[0] * 33 * 33 * 33 + + p[1] * 33 * 33 + + p[2] * 33 + + p[3]; + len -= 4; + p += 4; + } + if (len >= 2) { + if (len > 2) { + hash = hash * 33 * 33 * 33 + + p[0] * 33 * 33 + + p[1] * 33 + + p[2]; + } + else { + hash = hash * 33 * 33 + p[0] * 33 + p[1]; + } + } + else if (len != 0 ) { + hash = hash * 33UL + *p; } hash ^= len; hash ^= (Py_uhash_t) _Py_HashSecret.djbx33a.suffix;