diff --git a/README.md b/README.md index 4f7d01a..43123b2 100644 --- a/README.md +++ b/README.md @@ -43,20 +43,20 @@ julia> using BenchmarkTools julia> @benchmark xxh3_64(x) setup=(x=rand(UInt8, 2^20)) BenchmarkTools.Trial: 10000 samples with 1 evaluation. - Range (min … max): 159.330 μs … 260.553 μs ┊ GC (min … max): 0.00% … 0.00% - Time (median): 165.581 μs ┊ GC (median): 0.00% - Time (mean ± σ): 167.285 μs ± 6.255 μs ┊ GC (mean ± σ): 0.00% ± 0.00% + Range (min … max): 126.968 μs … 371.351 μs ┊ GC (min … max): 0.00% … 0.00% + Time (median): 132.413 μs ┊ GC (median): 0.00% + Time (mean ± σ): 138.741 μs ± 21.616 μs ┊ GC (mean ± σ): 0.00% ± 0.00% - ▆█▇█▇▇▃▁▂▁ - ▁▁▄███████████████▇▇▇▇▆▅▄▄▄▄▃▃▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁ ▃ - 159 μs Histogram: frequency by time 191 μs < + ▅█▇▆▆▅▅▄▃▃▃▂▁▁▁▁▁ ▁ ▂ + █████████████████████▇▇▇▇▆▆▆▆▆▆▆▅▅▆▄▆▄▄▅▄▄▅▃▄▃▄▁▃▃▅▁▆▅▆▇███▇▇ █ + 127 μs Histogram: log(frequency) by time 247 μs < Memory estimate: 80 bytes, allocs estimate: 1. -julia> 1/0.159*1000 -6289.30#MB/s +julia> 1/126*10^6 +7936.50#MB/s -# for comparison, the wrapper XXhash.jl is 4 times faster +# for comparison, the wrapper XXhash.jl is 3 times faster julia> 1/40.725*10^6 24554.94#MB/s ``` diff --git a/src/XXHashNative.jl b/src/XXHashNative.jl index 5f5e17d..5a35007 100644 --- a/src/XXHashNative.jl +++ b/src/XXHashNative.jl @@ -166,7 +166,7 @@ end function accumulate!(acc, stripe, secret, secretOffset) - secretWords = reinterpret(UInt64, @view secret[secretOffset+1:secretOffset+64]) + secretWords = @inbounds reinterpret(UInt64, @view secret[secretOffset+1:secretOffset+64]) @inbounds for i = 0:7 value = stripe[i+1] ⊻ secretWords[i+1] acc[i⊻1+1] = acc[i⊻1+1] + stripe[i+1] @@ -178,7 +178,7 @@ end function round_scramble!(acc, secret) secretWords = reinterpret(UInt64, @view secret[end-63:end]) - for i = 1:8 + @inbounds for i = 1:8 acc[i] = acc[i] ⊻ (acc[i] >> 47) acc[i] = acc[i] ⊻ secretWords[i] acc[i] = acc[i] * PRIME32_1 @@ -187,7 +187,7 @@ function round_scramble!(acc, secret) end function round_accumulate!(acc, block, secret, N) - for n = 0:N-1 + @inbounds for n = 0:N-1 _stripe = @view block[n*64+1:n*64+64] stripe = reinterpret(UInt64, _stripe) accumulate!(acc, stripe, secret, n * 8)