-
Notifications
You must be signed in to change notification settings - Fork 452
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add a filter type that lets us efficiently remove items as well as add them. It would be better to use the `bloom-filters` module at this point but it adds 50KB+ to browser bundles for very simple use cases so it's not suitable. We can revisit if Callidon/bloom-filters#70 is ever resolved.
- Loading branch information
1 parent
4afd7a9
commit 3f33627
Showing
15 changed files
with
626 additions
and
36 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -16,6 +16,9 @@ | |
}, | ||
{ | ||
"path": "../peer-id-factory" | ||
}, | ||
{ | ||
"path": "../utils" | ||
} | ||
] | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
import { Fingerprint } from './fingerprint.js' | ||
import { getRandomInt } from './utils.js' | ||
|
||
export class Bucket { | ||
private readonly contents: Array<Fingerprint | null> | ||
|
||
constructor (size: number) { | ||
this.contents = new Array(size).fill(null) | ||
} | ||
|
||
has (fingerprint: Fingerprint): boolean { | ||
if (!(fingerprint instanceof Fingerprint)) { | ||
throw new TypeError('Invalid Fingerprint') | ||
} | ||
|
||
return this.contents.some((fp) => { | ||
return fingerprint.equals(fp) | ||
}) | ||
} | ||
|
||
add (fingerprint: Fingerprint): boolean { | ||
if (!(fingerprint instanceof Fingerprint)) { | ||
throw new TypeError('Invalid Fingerprint') | ||
} | ||
|
||
for (let i = 0; i < this.contents.length; i++) { | ||
if (this.contents[i] == null) { | ||
this.contents[i] = fingerprint | ||
return true | ||
} | ||
} | ||
|
||
return true | ||
} | ||
|
||
swap (fingerprint: Fingerprint): Fingerprint | null { | ||
if (!(fingerprint instanceof Fingerprint)) { | ||
throw new TypeError('Invalid Fingerprint') | ||
} | ||
|
||
const i = getRandomInt(0, this.contents.length - 1) | ||
const current = this.contents[i] | ||
this.contents[i] = fingerprint | ||
|
||
return current | ||
} | ||
|
||
remove (fingerprint: Fingerprint): boolean { | ||
if (!(fingerprint instanceof Fingerprint)) { | ||
throw new TypeError('Invalid Fingerprint') | ||
} | ||
|
||
const found = this.contents.findIndex((fp) => { | ||
return fingerprint.equals(fp) | ||
}) | ||
|
||
if (found > -1) { | ||
this.contents[found] = null | ||
return true | ||
} else { | ||
return false | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,197 @@ | ||
import { fromString as uint8ArrayFromString } from 'uint8arrays/from-string' | ||
import { Bucket } from './bucket.js' | ||
import { Fingerprint, MAX_FINGERPRINT_SIZE } from './fingerprint.js' | ||
import { fnv1a, type Hash } from './hashes.js' | ||
import { getRandomInt } from './utils.js' | ||
import type { Filter } from './index.js' | ||
|
||
const maxCuckooCount = 500 | ||
|
||
export interface CuckooFilterInit { | ||
/** | ||
* How many items the filter is expected to contain | ||
*/ | ||
filterSize: number | ||
|
||
/** | ||
* How many items to put in each bucket | ||
*/ | ||
bucketSize?: number | ||
|
||
/** | ||
* How many bytes the fingerprint is expected to be | ||
*/ | ||
fingerprintSize?: number | ||
|
||
/** | ||
* A non-cryptographic hash implementation | ||
*/ | ||
hash?: Hash | ||
|
||
/** | ||
* A number used to seed the hash | ||
*/ | ||
seed?: number | ||
} | ||
|
||
export class CuckooFilter implements Filter { | ||
private readonly bucketSize: number | ||
private readonly filterSize: number | ||
private readonly fingerprintSize: number | ||
private readonly buckets: Bucket[] | ||
public count: number | ||
private readonly hash: Hash | ||
private readonly seed: number | ||
|
||
constructor (init: CuckooFilterInit) { | ||
this.filterSize = init.filterSize | ||
this.bucketSize = init.bucketSize ?? 4 | ||
this.fingerprintSize = init.fingerprintSize ?? 2 | ||
this.count = 0 | ||
this.buckets = [] | ||
this.hash = init.hash ?? fnv1a | ||
this.seed = init.seed ?? getRandomInt(0, Math.pow(2, 10)) | ||
} | ||
|
||
add (item: Uint8Array | string): boolean { | ||
if (typeof item === 'string') { | ||
item = uint8ArrayFromString(item) | ||
} | ||
|
||
const fingerprint = new Fingerprint(item, this.hash, this.seed, this.fingerprintSize) | ||
const j = this.hash.hash(item, this.seed) % this.filterSize | ||
const k = (j ^ fingerprint.hash()) % this.filterSize | ||
|
||
if (this.buckets[j] == null) { | ||
this.buckets[j] = new Bucket(this.bucketSize) | ||
} | ||
|
||
if (this.buckets[k] == null) { | ||
this.buckets[k] = new Bucket(this.bucketSize) | ||
} | ||
|
||
if (this.buckets[j].add(fingerprint) || this.buckets[k].add(fingerprint)) { | ||
this.count++ | ||
return true | ||
} | ||
|
||
const rand = [j, k] | ||
let i = rand[getRandomInt(0, rand.length - 1)] | ||
|
||
if (this.buckets[i] == null) { | ||
this.buckets[i] = new Bucket(this.bucketSize) | ||
} | ||
|
||
for (let n = 0; n < maxCuckooCount; n++) { | ||
const swapped = this.buckets[i].swap(fingerprint) | ||
|
||
if (swapped == null) { | ||
continue | ||
} | ||
|
||
i = (i ^ swapped.hash()) % this.filterSize | ||
|
||
if (this.buckets[i] == null) { | ||
this.buckets[i] = new Bucket(this.bucketSize) | ||
} | ||
|
||
if (this.buckets[i].add(swapped)) { | ||
this.count++ | ||
|
||
return true | ||
} else { | ||
continue | ||
} | ||
} | ||
|
||
return false | ||
} | ||
|
||
has (item: Uint8Array | string): boolean { | ||
if (typeof item === 'string') { | ||
item = uint8ArrayFromString(item) | ||
} | ||
|
||
const fingerprint = new Fingerprint(item, this.hash, this.seed, this.fingerprintSize) | ||
const j = this.hash.hash(item, this.seed) % this.filterSize | ||
const inJ = this.buckets[j]?.has(fingerprint) ?? false | ||
|
||
if (inJ) { | ||
return inJ | ||
} | ||
|
||
const k = (j ^ fingerprint.hash()) % this.filterSize | ||
|
||
return this.buckets[k]?.has(fingerprint) ?? false | ||
} | ||
|
||
remove (item: Uint8Array | string): boolean { | ||
if (typeof item === 'string') { | ||
item = uint8ArrayFromString(item) | ||
} | ||
|
||
const fingerprint = new Fingerprint(item, this.hash, this.seed, this.fingerprintSize) | ||
const j = this.hash.hash(item, this.seed) % this.filterSize | ||
const inJ = this.buckets[j]?.remove(fingerprint) ?? false | ||
|
||
if (inJ) { | ||
this.count-- | ||
return inJ | ||
} | ||
|
||
const k = (j ^ fingerprint.hash()) % this.filterSize | ||
const inK = this.buckets[k]?.remove(fingerprint) ?? false | ||
|
||
if (inK) { | ||
this.count-- | ||
} | ||
|
||
return inK | ||
} | ||
|
||
get reliable (): boolean { | ||
return Math.floor(100 * (this.count / this.filterSize)) <= 95 | ||
} | ||
} | ||
|
||
// max load constants, defined in the cuckoo paper | ||
const MAX_LOAD = { | ||
1: 0.5, | ||
2: 0.84, | ||
4: 0.95, | ||
8: 0.98 | ||
} | ||
|
||
function calculateBucketSize (errorRate: number = 0.001): 2 | 4 | 8 { | ||
if (errorRate > 0.002) { | ||
return 2 | ||
} | ||
|
||
if (errorRate > 0.00001) { | ||
return 4 | ||
} | ||
|
||
return 8 | ||
} | ||
|
||
export function optimize (maxItems: number, errorRate: number = 0.001): CuckooFilterInit { | ||
// https://www.eecs.harvard.edu/~michaelm/postscripts/cuckoo-conext2014.pdf | ||
// Section 5.1 Optimal Bucket Size | ||
const bucketSize = calculateBucketSize(errorRate) | ||
const load = MAX_LOAD[bucketSize] | ||
|
||
// https://stackoverflow.com/questions/57555236/how-to-size-a-cuckoo-filter/57617208#57617208 | ||
const filterSize = Math.round(maxItems / load) | ||
const fingerprintSize = Math.min(Math.ceil(Math.log(filterSize / bucketSize)) + 2, MAX_FINGERPRINT_SIZE) | ||
|
||
return { | ||
filterSize, | ||
bucketSize, | ||
fingerprintSize | ||
} | ||
} | ||
|
||
export function createCuckooFilter (maxItems: number, errorRate: number = 0.005): Filter { | ||
const opts = optimize(maxItems, errorRate) | ||
return new CuckooFilter(opts) | ||
} |
Oops, something went wrong.