Skip to content

Commit

Permalink
Pure HashAndNibblize function (#13453)
Browse files Browse the repository at this point in the history
This PR extracts `HashAndNibblize` as a utility function, not dependent
on `HexPatriciaTrieHashed` .

Doing some benchmark comparisons this change doesn't impact performance
(in fact it looks to make an improvement).

These are the comparisons:

- For 25 million keys:
  - (before) `hph.HashAndNibblizeKey`: 136.664s
  - (after) pure `HashAndNibblizeKey` : 108.215s

- For 35 million keys:
  - (before) `hph.HashAndNibblizeKey`: 351.598s
  - (after) pure `HashAndNibblizeKey` :   326.218s

- For 50 million keys:
  - (before) `hph.HashAndNibblizeKey`: 1055.217s
  - (after) pure `HashAndNibblizeKey` :   568.075s

---------

Co-authored-by: antonis19 <[email protected]>
Co-authored-by: awskii <[email protected]>
  • Loading branch information
3 people authored Jan 16, 2025
1 parent 19cb3b0 commit 5fcefc6
Show file tree
Hide file tree
Showing 7 changed files with 191 additions and 219 deletions.
2 changes: 1 addition & 1 deletion erigon-lib/commitment/commitment.go
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ func InitializeTrieAndUpdates(tv TrieVariant, mode Mode, tmpdir string) (Trie, *
default:

trie := NewHexPatriciaHashed(length.Addr, nil, tmpdir)
tree := NewUpdates(mode, tmpdir, trie.HashAndNibblizeKey)
tree := NewUpdates(mode, tmpdir, KeyToHexNibbleHash)
return trie, tree
}
}
Expand Down
201 changes: 13 additions & 188 deletions erigon-lib/commitment/hex_patricia_hashed.go
Original file line number Diff line number Diff line change
Expand Up @@ -342,31 +342,6 @@ func (cell *cell) fillFromLowerCell(lowCell *cell, lowDepth int, preExtension []
cell.loaded = lowCell.loaded
}

func hashKey(keccak keccakState, plainKey []byte, dest []byte, hashedKeyOffset int, hashBuf []byte) error {
_, _ = hashBuf[length.Hash-1], dest[length.Hash*2-1] // bounds checks elimination
keccak.Reset()
if _, err := keccak.Write(plainKey); err != nil {
return err
}
if _, err := keccak.Read(hashBuf); err != nil {
return err
}
hashBuf = hashBuf[hashedKeyOffset/2:]
var k int
if hashedKeyOffset%2 == 1 {
dest[0] = hashBuf[0] & 0xf
k++
hashBuf = hashBuf[1:]
}
for _, c := range hashBuf {
dest[k] = (c >> 4) & 0xf
k++
dest[k] = c & 0xf
k++
}
return nil
}

func (cell *cell) deriveHashedKeys(depth int, keccak keccakState, accountKeyLen int) error {
extraLen := 0
if cell.accountAddrLen > 0 {
Expand Down Expand Up @@ -630,7 +605,7 @@ func (hph *HexPatriciaHashed) accountLeafHashWithKey(buf, key []byte, val rlp.Rl
} else {
compactLen = len(key)/2 + 1
if len(key)&1 == 1 {
compact0 = 16 + key[0] // Odd (1<<4) + first nibble
compact0 = terminatorHexByte + key[0] // Odd (1<<4) + first nibble
ni = 1
}
}
Expand Down Expand Up @@ -749,7 +724,7 @@ func (hph *HexPatriciaHashed) computeCellHashWithStorage(cell *cell, depth int,
if err = hashKey(hph.keccak, cell.storageAddr[koffset:cell.storageAddrLen], cell.hashedExtension[:], hashedKeyOffset, cell.hashBuf[:]); err != nil {
return nil, storageRootHashIsSet, nil, err
}
cell.hashedExtension[64-hashedKeyOffset] = 16 // Add terminator
cell.hashedExtension[64-hashedKeyOffset] = terminatorHexByte // Add terminator

if cell.stateHashLen > 0 {
res := append([]byte{160}, cell.stateHash[:cell.stateHashLen]...)
Expand Down Expand Up @@ -814,7 +789,7 @@ func (hph *HexPatriciaHashed) computeCellHashWithStorage(cell *cell, depth int,
if err := hashKey(hph.keccak, cell.accountAddr[:cell.accountAddrLen], cell.hashedExtension[:], depth, cell.hashBuf[:]); err != nil {
return nil, storageRootHashIsSet, nil, err
}
cell.hashedExtension[64-depth] = 16 // Add terminator
cell.hashedExtension[64-depth] = terminatorHexByte // Add terminator
if !storageRootHashIsSet {
if cell.extLen > 0 { // Extension
if cell.hashLen == 0 {
Expand Down Expand Up @@ -919,7 +894,7 @@ func (hph *HexPatriciaHashed) computeCellHash(cell *cell, depth int, buf []byte)
if err = cell.hashStorageKey(hph.keccak, koffset, 0, hashedKeyOffset); err != nil {
return nil, err
}
cell.hashedExtension[64-hashedKeyOffset] = 16 // Add terminator
cell.hashedExtension[64-hashedKeyOffset] = terminatorHexByte // Add terminator

if cell.stateHashLen > 0 {
hph.keccak.Reset()
Expand Down Expand Up @@ -966,7 +941,7 @@ func (hph *HexPatriciaHashed) computeCellHash(cell *cell, depth int, buf []byte)
if err := cell.hashAccKey(hph.keccak, depth); err != nil {
return nil, err
}
cell.hashedExtension[64-depth] = 16 // Add terminator
cell.hashedExtension[64-depth] = terminatorHexByte // Add terminator
if !storageRootHashIsSet {
if cell.extLen > 0 { // Extension
if cell.hashLen == 0 {
Expand Down Expand Up @@ -1231,7 +1206,7 @@ func (hph *HexPatriciaHashed) ToTrie(hashedKey []byte, codeReads map[libcommon.H
extensionKey := make([]byte, extKeyLength)
copy(extensionKey, hashedExtKey)
if keyPos+1 == len(hashedKey) || keyPos+1 == 64 {
extensionKey[len(extensionKey)-1] = 16 // append terminator byte
extensionKey[len(extensionKey)-1] = terminatorHexByte // append terminator byte
}
nextNode = &trie.ShortNode{Key: extensionKey} // Value will be in the next iteration
if keyPos+1 == len(hashedKey) {
Expand Down Expand Up @@ -1329,7 +1304,7 @@ func (hph *HexPatriciaHashed) ToTrie(hashedKey []byte, codeReads map[libcommon.H

// unfoldBranchNode returns true if unfolding has been done
func (hph *HexPatriciaHashed) unfoldBranchNode(row, depth int, deleted bool) (bool, error) {
key := hexToCompact(hph.currentKey[:hph.currentKeyLen])
key := hexNibblesToCompactBytes(hph.currentKey[:hph.currentKeyLen])
branchData, fileEndTxNum, err := hph.ctx.Branch(key)
if err != nil {
return false, err
Expand All @@ -1339,7 +1314,8 @@ func (hph *HexPatriciaHashed) unfoldBranchNode(row, depth int, deleted bool) (bo
branchData = branchData[2:] // skip touch map and keep the rest
}
if hph.trace {
fmt.Printf("unfoldBranchNode prefix '%x', nibbles [%x] depth %d row %d '%x'\n", key, hph.currentKey[:hph.currentKeyLen], depth, row, branchData)
fmt.Printf("unfoldBranchNode prefix '%x', nibbles [%x] depth %d row %d '%x'\n",
key, hph.currentKey[:hph.currentKeyLen], depth, row, branchData)
}
if !hph.rootChecked && hph.currentKeyLen == 0 && len(branchData) == 0 {
// Special case - empty or deleted root
Expand All @@ -1348,7 +1324,7 @@ func (hph *HexPatriciaHashed) unfoldBranchNode(row, depth int, deleted bool) (bo
}
if len(branchData) == 0 {
log.Warn("got empty branch data during unfold", "key", hex.EncodeToString(key), "row", row, "depth", depth, "deleted", deleted)
return false, fmt.Errorf("empty branch data read during unfold, prefix %x", hexToCompact(hph.currentKey[:hph.currentKeyLen]))
return false, fmt.Errorf("empty branch data read during unfold, prefix %x", key)
}
hph.branchBefore[row] = true
bitmap := binary.BigEndian.Uint16(branchData[0:])
Expand Down Expand Up @@ -1487,16 +1463,6 @@ type skipStat struct {
accLoaded, accSkipped, accReset, storReset, storLoaded, storSkipped uint64
}

func updatedNibs(num uint16) string {
var nibbles []string
for i := 0; i < 16; i++ {
if num&(1<<i) != 0 {
nibbles = append(nibbles, fmt.Sprintf("%X", i))
}
}
return strings.Join(nibbles, ",")
}

const DepthWithoutNodeHashes = 35 //nolint

func (hph *HexPatriciaHashed) createCellGetter(b []byte, updateKey []byte, row, depth int) func(nibble int, skip bool) (*cell, error) {
Expand Down Expand Up @@ -1566,6 +1532,8 @@ func (hph *HexPatriciaHashed) createCellGetter(b []byte, updateKey []byte, row,
}
}

const terminatorHexByte = 16 // max nibble value +1. Defines end of nibble line in the trie or splits address and storage space in trie.

// updateKind is a type of update that is being applied to the trie structure.
type updateKind uint8

Expand Down Expand Up @@ -1632,7 +1600,7 @@ func (hph *HexPatriciaHashed) fold() (err error) {
}

depth := hph.depths[row]
updateKey := hexToCompact(hph.currentKey[:updateKeyLen])
updateKey := hexNibblesToCompactBytes(hph.currentKey[:updateKeyLen])
defer func() { hph.depthsToTxNum[depth] = 0 }()

if hph.trace {
Expand Down Expand Up @@ -2567,149 +2535,6 @@ func HexTrieStateToString(enc []byte) (string, error) {
return sb.String(), nil
}

func hexToCompact(key []byte) []byte {
zeroByte, keyPos, keyLen := makeCompactZeroByte(key)
bufLen := keyLen/2 + 1 // always > 0
buf := make([]byte, bufLen)
buf[0] = zeroByte
return decodeKey(key[keyPos:], buf)
}

func makeCompactZeroByte(key []byte) (compactZeroByte byte, keyPos, keyLen int) {
keyLen = len(key)
if hasTerm(key) {
keyLen--
compactZeroByte = 0x20
}
var firstNibble byte
if len(key) > 0 {
firstNibble = key[0]
}
if keyLen&1 == 1 {
compactZeroByte |= 0x10 | firstNibble // Odd: (1<<4) + first nibble
keyPos++
}

return
}

func decodeKey(key, buf []byte) []byte {
keyLen := len(key)
if hasTerm(key) {
keyLen--
}
for keyIndex, bufIndex := 0, 1; keyIndex < keyLen; keyIndex, bufIndex = keyIndex+2, bufIndex+1 {
if keyIndex == keyLen-1 {
buf[bufIndex] = buf[bufIndex] & 0x0f
} else {
buf[bufIndex] = key[keyIndex+1]
}
buf[bufIndex] |= key[keyIndex] << 4
}
return buf
}

func CompactedKeyToHex(compact []byte) []byte {
if len(compact) == 0 {
return compact
}
base := keybytesToHexNibbles(compact)
// delete terminator flag
if base[0] < 2 {
base = base[:len(base)-1]
}
// apply odd flag
chop := 2 - base[0]&1
return base[chop:]
}

func keybytesToHexNibbles(str []byte) []byte {
l := len(str)*2 + 1
var nibbles = make([]byte, l)
for i, b := range str {
nibbles[i*2] = b / 16
nibbles[i*2+1] = b % 16
}
nibbles[l-1] = 16
return nibbles
}

// hasTerm returns whether a hex key has the terminator flag.
func hasTerm(s []byte) bool {
return len(s) > 0 && s[len(s)-1] == 16
}

func commonPrefixLen(b1, b2 []byte) int {
var i int
for i = 0; i < len(b1) && i < len(b2); i++ {
if b1[i] != b2[i] {
break
}
}
return i
}

// nolint
// Hashes provided key and expands resulting hash into nibbles (each byte split into two nibbles by 4 bits)
func (hph *HexPatriciaHashed) HashAndNibblizeKey(key []byte) []byte {
hashedKey := make([]byte, length.Hash)

hph.keccak.Reset()
fp := length.Addr
if len(key) < length.Addr {
fp = len(key)
}
hph.keccak.Write(key[:fp])
hph.keccak.Read(hashedKey[:length.Hash])

if len(key[fp:]) > 0 {
hashedKey = append(hashedKey, make([]byte, length.Hash)...)
hph.keccak.Reset()
hph.keccak.Write(key[fp:])
hph.keccak.Read(hashedKey[length.Hash:])
}

nibblized := make([]byte, len(hashedKey)*2)
for i, b := range hashedKey {
nibblized[i*2] = (b >> 4) & 0xf
nibblized[i*2+1] = b & 0xf
}
return nibblized
}

func nibblize(key []byte) []byte { // nolint:unused
nibblized := make([]byte, len(key)*2)
for i, b := range key {
nibblized[i*2] = (b >> 4) & 0xf
nibblized[i*2+1] = b & 0xf
}
return nibblized
}

// compactKey takes a slice of nibbles and compacts them into the original byte slice.
// It returns an error if the input contains invalid nibbles (values > 0xF).
func compactKey(nibbles []byte) ([]byte, error) {
// If the number of nibbles is odd, you might decide to handle it differently.
// For this example, we'll return an error.
if len(nibbles)%2 != 0 {
return nil, errors.New("nibbles slice has an odd length")
}

key := make([]byte, len(nibbles)/2)
for i := 0; i < len(key); i++ {
highNibble := nibbles[i*2]
lowNibble := nibbles[i*2+1]

// Validate that each nibble is indeed a nibble
if highNibble > 0xF || lowNibble > 0xF {
return nil, fmt.Errorf("invalid nibble at position %d or %d: 0x%X, 0x%X", i*2, i*2+1, highNibble, lowNibble)
}

key[i] = (highNibble << 4) | (lowNibble & 0x0F)
}
return key, nil
}

func (hph *HexPatriciaHashed) Grid() [128][16]cell {
return hph.grid
}
Expand Down
2 changes: 1 addition & 1 deletion erigon-lib/commitment/hex_patricia_hashed_bench_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ func Benchmark_HexPatriciaHashed_Process(b *testing.B) {
require.NoError(b, err)

hph := NewHexPatriciaHashed(length.Addr, ms, ms.TempDir())
upds := WrapKeyUpdates(b, ModeDirect, hph.HashAndNibblizeKey, nil, nil)
upds := WrapKeyUpdates(b, ModeDirect, KeyToHexNibbleHash, nil, nil)
defer upds.Close()

b.ResetTimer()
Expand Down
10 changes: 5 additions & 5 deletions erigon-lib/commitment/hex_patricia_hashed_fuzz_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,13 +65,13 @@ func Fuzz_ProcessUpdate(f *testing.F) {
err = ms2.applyPlainUpdates(plainKeys, updates)
require.NoError(t, err)

upds := WrapKeyUpdates(t, ModeDirect, hph.HashAndNibblizeKey, nil, nil)
upds := WrapKeyUpdates(t, ModeDirect, KeyToHexNibbleHash, nil, nil)
rootHashDirect, err := hph.Process(ctx, upds, "")
require.NoError(t, err)
require.Len(t, rootHashDirect, length.Hash, "invalid root hash length")
upds.Close()

anotherUpds := WrapKeyUpdates(t, ModeUpdate, hphAnother.HashAndNibblizeKey, nil, nil)
anotherUpds := WrapKeyUpdates(t, ModeUpdate, KeyToHexNibbleHash, nil, nil)
rootHashUpdate, err := hphAnother.Process(ctx, anotherUpds, "")
require.NoError(t, err)
require.Len(t, rootHashUpdate, length.Hash, "invalid root hash length")
Expand Down Expand Up @@ -149,7 +149,7 @@ func Fuzz_ProcessUpdates_ArbitraryUpdateCount2(f *testing.F) {
err := ms.applyPlainUpdates(plainKeys[i:i+1], updates[i:i+1])
require.NoError(t, err)

updsDirect := WrapKeyUpdates(t, ModeDirect, hph.HashAndNibblizeKey, plainKeys[i:i+1], updates[i:i+1])
updsDirect := WrapKeyUpdates(t, ModeDirect, KeyToHexNibbleHash, plainKeys[i:i+1], updates[i:i+1])
rootHashDirect, err := hph.Process(ctx, updsDirect, "")
updsDirect.Close()
require.NoError(t, err)
Expand All @@ -158,7 +158,7 @@ func Fuzz_ProcessUpdates_ArbitraryUpdateCount2(f *testing.F) {
err = ms2.applyPlainUpdates(plainKeys[i:i+1], updates[i:i+1])
require.NoError(t, err)

upds := WrapKeyUpdates(t, ModeUpdate, hphAnother.HashAndNibblizeKey, plainKeys[i:i+1], updates[i:i+1])
upds := WrapKeyUpdates(t, ModeUpdate, KeyToHexNibbleHash, plainKeys[i:i+1], updates[i:i+1])
rootHashAnother, err := hphAnother.Process(ctx, upds, "")
upds.Close()
require.NoError(t, err)
Expand Down Expand Up @@ -213,7 +213,7 @@ func Fuzz_HexPatriciaHashed_ReviewKeys(f *testing.F) {
t.Fatal(err)
}

upds := WrapKeyUpdates(t, ModeDirect, hph.HashAndNibblizeKey, plainKeys, updates)
upds := WrapKeyUpdates(t, ModeDirect, KeyToHexNibbleHash, plainKeys, updates)
defer upds.Close()

rootHash, err := hph.Process(ctx, upds, "")
Expand Down
Loading

0 comments on commit 5fcefc6

Please sign in to comment.