diff --git a/README.md b/README.md index b0d099b..84adba5 100644 --- a/README.md +++ b/README.md @@ -7,100 +7,114 @@ They're 'Faster and Smaller Than Bloom and Cuckoo Filters'. Be wary of memory usage when using this module. -This library uses dirty nifs for initializing filters over 10K elements! Make sure your environment is setup correctly. Filters of 10M elements can be initialized within 3 seconds. +This library uses dirty nifs for initializing filters over 10K elements! Make sure your environment is setup correctly. Filters of 10M elements can be initialized within 4 seconds. Within 2.5 seconds if the library is used unsafely. ## Example Usage Basic usage with default hashing is as follows: ```erlang -Filter = exor_filter:xor8(["cat", "dog", "mouse"]), -true = exor_filter:xor8_contain(Filter, "cat"), -false = exor_filter:xor8_contain(Filter, "goose"), -ok = exor_filter:xor8_free(Filter). +Filter = xor8:new(["cat", "dog", "mouse"]), +true = xor8:contain(Filter, "cat"), +false = xor8:contain(Filter, "goose"), +ok = xor8:free(Filter). ``` Filters are initialized independently: ```erlang -Filter1 = exor_filter:xor8([1, 2, 3]), -Filter2 = exor_filter:xor8([4, 5, 6]), +Filter1 = xor8:new([1, 2, 3]), +Filter2 = xor8:new([4, 5, 6]), -false = exor_filter:xor8_contain(Filter1, 6), -true = exor_filter:xor8_contain(Filter1, 2), -false = exor_filter:xor8_contain(Filter2, 2), -true = exor_filter:xor8_contain(Filter2, 5), +false = xor8:contain(Filter1, 6), +true = xor8:contain(Filter1, 2), -ok = exor_filter:xor8_free(Filter1), -ok = exor_filter:xor8_free(Filter2). +false = xor8:contain(Filter2, 2), +true = xor8:contain(Filter2, 5), + +ok = xor8:free(Filter1), +ok = xor8:free(Filter2). ``` -Do not modify the return value of the `exor_filter:xor8/1` or `/2` functions. The other APIs will not function correctly. +Do not modify the return value of the `xor8:new/1` or `/2` functions. The other APIs will not function correctly. ## Hashing -The function `exor_filter:xor8/1` uses the default hash algorithm. To specify the hashing algorithm to use, use the `exor_filter:xor8/2` function. The filter initialization functions return values contain the context of hashing, so there is no need to specify it in the `exor_filter:xor8_contain/2` function. **Do not pre-hash the value** being passed to `exor_filter:xor8_contain/2` or `/3`. **Pass the raw value!** -* (Unless you're using pre-hashed data. See below). +* The function `xor8:new/1` uses the default hash algorithm. + * See [`erlang:phash2/1`](http://erlang.org/doc/man/erlang.html#phash2-1). +* To specify the hashing algorithm to use, use the `xor8:new/2` function. +* The filter initialization functions return values contain the context of hashing, so there is no need to specify it in the `xor8:contain/2` function. + * **Do not pre-hash the value** being passed to `xor8:contain/2` or `/3`. **Pass the raw value!** + * (Unless you're using pre-hashed data. See below). +* The default hashing mechanisms remove duplicate keys. Pre-hashed data will need to be checked by the user. An error will be returned if duplicate keys are detected. ### Example ```erlang -Filter = exor_filter:xor8(["test1", "test2", "test3"], :fast_hash), -true = exor_filter:xor8_contain(Filter, "test1"), -false = exor_filter:xor8_contain(Filter, "test6"), -ok = exor_filter:xor8_free(Filter). +Filter = xor8:new(["test1", "test2", "test3"], fast_hash), +true = xor8:contain(Filter, "test1"), +false = xor8:contain(Filter, "test6"), +ok = xor8:free(Filter). ``` ### Hashing API -* The default hash function used is [`erlang:phash/1`](http://erlang.org/doc/man/erlang.html#phash2-1) - * It can be specified with the `:default_hash` as the second argument to `exor_filter:xor8/2`. +* The default hash function used is [`erlang:phash2/1`](http://erlang.org/doc/man/erlang.html#phash2-1) + * It can be specified with the `default_hash` as the second argument to `xor8:new/2`. * It uses 60 bits on a 64-bit system and is consistent across nodes. - * Do not use the default hash if your list exceeds 20K strings, there will be duplicates and the initialization will spin forever. - * This is a known bug that is being worked on. - * The default hashing function should be fine for most use cases, but if the filter has over 20K elements, create your own hashing function. -* An option for a faster hashing function is available, using the option `:fash_hash`. - * This uses 64 bits, and is not consistent across nodes. + * The default hashing function should be fine for most use cases, but if the filter has over 20K elements, create your own hashing function, as hashing collisions will become more frequent. + * Errors won't happen. +* An option for a faster hashing function is available, using the option `fash_hash`. + * This uses 64 bits, and is not consistent across nodes. * The consequence is that false positives may be inconsistent across nodes. * It isn't recommended to use this method if there are more than 30K items in the filter. #### Pre-Hashing and Custom Hashing -* There is an option to pass a hash function during intialization. +* There is an option to pass a hash function during intialization. * It must return a unsigned 64 bit number and have an airty of `/1`. * Due to the Erlang nif api lacking the functionality to pass and call a function in a nif, this method creates a second list of equal length. Be weary of that. -* The has function **must** return unique keys, or else initialization will never return. - * This is a known implementation bug and will be addressed in the future. - * If your set is known to have a large amount of elements, consider pre-hashing and checking for dups before initing. - * Or consider wrapping initialization in a timed gen-server call. +* The custom hashing function **must** return unique keys. + * An error will be returned otherwise. * Make your unit testing reflect reality, if possible. This will catch the issue early. ```erlang Fun = fun(X) -> X + 1 end, -Filter = exor_filter:xor8([1, 2, 3], Fun), -true = exor_filter:xor8_contain(Filter, 4, Fun), -false = exor_filter:xor8_contain(Filter, 1, Fun), -ok = exor_filter:xor8_free(Filter). +Filter = xor8:new([1, 2, 3], Fun), +true = xor8:contain(Filter, 4), +false = xor8:contain(Filter, 1), +ok = xor8:free(Filter). ``` -* To pass pre-hashed data, use the hash option `:none`. The `exor_filter:contain/2` and `/3` functions must be passed pre-hashed data in this case. +* To pass pre-hashed data, use the hash option `:none`. The `xor8:contain/2` and `/3` functions must be passed pre-hashed data in this case. + * This too will check for duplicate hashed values, and will return an error if it is detected. ## Elixir Example ```elixir # ... -alias :exor_filter, as: XorFilter +alias :xor8, as: Xor8 # ... true = [1, 2, 3, 4] - |> XorFilter.xor8() - |> XorFilter.xor8_contain(1) + |> Xor8.new() + |> Xor8.contain(1) ``` ## Custom Return Values `contain/3` can return a custom value instead of `false` if the value isn't present in the filter: ```erlang -Filter1 = exor_filter:xor8([1, 2, 3]), -true = exor_filter:xor8_contain(Filter1, 2, {error, not_found}), -{error, not_found} = exor_filter:xor8_contain(Filter1, 6, {error, not_found}), -ok = exor_filter:xor8_free(Filter1). +Filter1 = xor8:new(["Ricky Bobby", "Cal Naughton Jr."]), +true = xor8:contain(Filter1, "Ricky Bobby", {error, not_found}), +{error, not_found} = xor8:contain(Filter1, "Reese Bobby", {error, not_found}), +ok = xor8:free(Filter1). ``` -## xor16 and Other Information +## xor16 The usage of the xor16 is the same. That structure is larger, but has a smaller false positive rate. Just sub `xor8` for `xor16` in all of the examples. -The buffered versions of initialize are provided for larger data sets. This can be faster. See `xor8_buffered/2` for more information. +## Buffered Initialization +The buffered versions of initialize are provided for larger data sets. This can be faster. See `xor8:new_buffered/2` for more information. + +## Unsafe Usage +The underlying C library used has an issue where duplicate keys cause an infinite loop on initialization. The convinience modules `xor8` and `xor16` check for duplicates in the passed list for pre-hashed data. HOWEVER, they're just wrappers for the raw `exor_filter` module. If you're confident that the values in the list are unique, and wish to skip the checking step in initialization for greater speed, you can use the following example as a template: +``` +Filter = exor_filter:nif_wrapper([1, 2, 3], none, xor8), +true = xor8:contain(Filter, 1), +ok = xor8:free(Filter). +``` +You didn't hear it from me, though ;) Build ----- @@ -118,8 +132,6 @@ Docs $ rebar3 edoc -Coverage is low due to suggested nif error handling code, but basic functionality is covered. - ## Implements of xor filters in other programming languages * [Go](https://github.com/FastFilter/xor_filter) * Rust: [1](https://github.com/bnclabs/xorfilter) and [2](https://github.com/codri/xorfilter-rs) diff --git a/c_src/xor_filter_nif.c b/c_src/xor_filter_nif.c index d50888c..238e611 100644 --- a/c_src/xor_filter_nif.c +++ b/c_src/xor_filter_nif.c @@ -6,10 +6,6 @@ static ErlNifResourceType* xor8_resource_type; static ErlNifResourceType* xor16_resource_type; -static char* DEFAULT_HASH = "default_hash\0"; -static char* PASSED_HASH = "passed\0"; -static char* NONE_HASH = "none\0"; - typedef struct { int is_buffer_allocated; @@ -26,7 +22,8 @@ typedef struct xor16_t* filter; } xor16_filter_resource; -void destroy_xor8_filter_resource(ErlNifEnv* env, void* obj) +void +destroy_xor8_filter_resource(ErlNifEnv* env, void* obj) { xor8_filter_resource* resource = (xor8_filter_resource*) obj; @@ -41,7 +38,8 @@ void destroy_xor8_filter_resource(ErlNifEnv* env, void* obj) } } -void destroy_xor16_filter_resource(ErlNifEnv* env, void* obj) +void +destroy_xor16_filter_resource(ErlNifEnv* env, void* obj) { xor16_filter_resource* resource = (xor16_filter_resource*) obj; @@ -56,7 +54,8 @@ void destroy_xor16_filter_resource(ErlNifEnv* env, void* obj) } } -ErlNifResourceType* xor8_filter_resource_type(ErlNifEnv* env) +ErlNifResourceType* +xor8_filter_resource_type(ErlNifEnv* env) { return enif_open_resource_type( env, @@ -68,7 +67,8 @@ ErlNifResourceType* xor8_filter_resource_type(ErlNifEnv* env) ); } -ErlNifResourceType* xor16_filter_resource_type(ErlNifEnv* env) +ErlNifResourceType* +xor16_filter_resource_type(ErlNifEnv* env) { return enif_open_resource_type( env, @@ -99,14 +99,8 @@ mk_error(ErlNifEnv* env, const char* mesg) return enif_make_tuple2(env, mk_atom(env, "error"), mk_atom(env, mesg)); } -/** - * Fills a buffer through fetching raw uint64 values from the passed list. - * This is for the pre-hashed or custom hashed values. - * - * Only method that can atually return an error. - */ static int -fill_buffer_raw(uint64_t* buffer, ErlNifEnv* env, ERL_NIF_TERM list) +fill_buffer(uint64_t* buffer, ErlNifEnv* env, ERL_NIF_TERM list) { ERL_NIF_TERM head; uint64_t current = 0; @@ -121,76 +115,6 @@ fill_buffer_raw(uint64_t* buffer, ErlNifEnv* env, ERL_NIF_TERM list) return true; } -/** - * Default hashing method. Uses built in erlang phash2. - */ -static int fill_buffer_default(uint64_t* buffer, ErlNifEnv* env, ERL_NIF_TERM list) -{ - ERL_NIF_TERM head; - for(int i = 0; enif_get_list_cell(env, list, &head, (ERL_NIF_TERM*) &list); i++) - { - buffer[i] = (uint64_t) enif_hash(ERL_NIF_PHASH2, head, 0); - } - return true; -} - -/** - * Fash hash. - * - * I wanna go FAST! -- Ricky Bobby - */ -static int fill_buffer_fast(uint64_t* buffer, ErlNifEnv* env, ERL_NIF_TERM list) -{ - ERL_NIF_TERM head; - for(int i = 0; enif_get_list_cell(env, list, &head, (ERL_NIF_TERM*) &list); i++) - { - buffer[i] = (uint64_t) enif_hash(ERL_NIF_INTERNAL_HASH, head, 0); - } - return true; -} - -static int -xor8_fill_buffer(xor8_filter_resource* filter, char* hash_function, - ErlNifEnv* env, ERL_NIF_TERM list) -{ - - if(strncmp(hash_function, PASSED_HASH, 7) == 0 || - strncmp(hash_function, NONE_HASH, 5) == 0) { - return fill_buffer_raw(filter->buffer, env, list); - } - else if(strncmp(hash_function, DEFAULT_HASH, 12) == 0) - { - return fill_buffer_default(filter->buffer, env, list); - } - else - { - return fill_buffer_fast(filter->buffer, env, list); - } - - return true; -} - -static int -xor16_fill_buffer(xor16_filter_resource* filter, char* hash_function, - ErlNifEnv* env, ERL_NIF_TERM list) -{ - - if(strncmp(hash_function, PASSED_HASH, 7) == 0 || - strncmp(hash_function, NONE_HASH, 5) == 0) { - return fill_buffer_raw(filter->buffer, env, list); - } - else if(strncmp(hash_function, DEFAULT_HASH, 12) == 0) - { - return fill_buffer_default(filter->buffer, env, list); - } - else - { - return fill_buffer_fast(filter->buffer, env, list); - } - - return true; -} - /* Begin xor8 nif code */ static ERL_NIF_TERM xor8_initialize(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[], int buffered) @@ -199,7 +123,7 @@ xor8_initialize(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[], int buffere unsigned list_length; uint64_t* value_list; - if(argc != 2) + if(argc != 1) { return enif_make_badarg(env); } @@ -214,10 +138,6 @@ xor8_initialize(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[], int buffere return mk_error(env, "get_list_length_error"); } - // Get hash_function - char hash_function[24]; - enif_get_atom(env, argv[1], hash_function, 24, ERL_NIF_LATIN1); - xor8_filter_resource* filter_resource = enif_alloc_resource(xor8_resource_type, sizeof(xor8_filter_resource)); filter_resource->is_buffer_allocated = false; @@ -233,7 +153,8 @@ xor8_initialize(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[], int buffere filter_resource->buffer = value_list; filter_resource->is_buffer_allocated = true; - if(!(xor8_fill_buffer(filter_resource, hash_function, env, argv[0]))) { + if(!(fill_buffer(filter_resource->buffer, env, argv[0]))) + { enif_release_resource(filter_resource); return mk_error(env, "convert_to_uint64_t_error"); } @@ -292,7 +213,7 @@ static ERL_NIF_TERM xor8_contain_nif(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) { - if(argc != 3) + if(argc != 2) { return enif_make_badarg(env); } @@ -304,26 +225,10 @@ xor8_contain_nif(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) } xor8_t* filter = filter_resource->filter; - char hash_function[24]; - enif_get_atom(env, argv[1], hash_function, 24, ERL_NIF_LATIN1); - ErlNifUInt64 key; - // Hash the values or not. - if(strncmp(hash_function, PASSED_HASH, 7) == 0 || - strncmp(hash_function, NONE_HASH, 5) == 0) { - - if(!enif_get_uint64(env, argv[1], &key)) - { - return mk_error(env, "get_key_for_contains_error"); - } - } - else if(strncmp(hash_function, DEFAULT_HASH, 12) == 0) - { - key = enif_hash(ERL_NIF_PHASH2, argv[1], 0); - } - else + if(!enif_get_uint64(env, argv[1], &key)) { - key = enif_hash(ERL_NIF_INTERNAL_HASH, argv[1], 0); + return mk_error(env, "get_key_for_contains_error"); } if(xor8_contain(key, filter)) @@ -366,7 +271,7 @@ xor16_initialize(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[], int buffer unsigned list_length; uint64_t* value_list; - if(argc != 2) + if(argc != 1) { return enif_make_badarg(env); } @@ -381,10 +286,6 @@ xor16_initialize(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[], int buffer return mk_error(env, "get_list_length_error"); } - // Get hash_function - char hash_function[24]; - enif_get_atom(env, argv[1], hash_function, 24, ERL_NIF_LATIN1); - xor16_filter_resource* filter_resource = enif_alloc_resource(xor16_resource_type, sizeof(xor8_filter_resource)); filter_resource->is_buffer_allocated = false; @@ -400,7 +301,7 @@ xor16_initialize(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[], int buffer filter_resource->buffer = value_list; filter_resource->is_buffer_allocated = true; - if(!(xor16_fill_buffer(filter_resource, hash_function, env, argv[0]))) { + if(!(fill_buffer(filter_resource->buffer, env, argv[0]))) { enif_release_resource(filter_resource); return mk_error(env, "convert_to_uint64_t_error"); } @@ -459,7 +360,7 @@ static ERL_NIF_TERM xor16_contain_nif(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) { - if(argc != 3) + if(argc != 2) { return enif_make_badarg(env); } @@ -471,26 +372,11 @@ xor16_contain_nif(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) } xor16_t* filter = filter_resource->filter; - char hash_function[24]; - enif_get_atom(env, argv[1], hash_function, 24, ERL_NIF_LATIN1); - ErlNifUInt64 key; // Hash the values or not. - if(strncmp(hash_function, PASSED_HASH, 7) == 0 || - strncmp(hash_function, NONE_HASH, 5) == 0) { - - if(!enif_get_uint64(env, argv[1], &key)) - { - return mk_error(env, "get_key_for_contains_error"); - } - } - else if(strncmp(hash_function, DEFAULT_HASH, 12) == 0) - { - key = enif_hash(ERL_NIF_PHASH2, argv[1], 0); - } - else + if(!enif_get_uint64(env, argv[1], &key)) { - key = enif_hash(ERL_NIF_INTERNAL_HASH, argv[1], 0); + return mk_error(env, "get_key_for_contains_error"); } if(xor16_contain(key, filter)) @@ -534,23 +420,23 @@ nif_load(ErlNifEnv* env, void** priv_data, ERL_NIF_TERM load_info) } static ErlNifFunc nif_funcs[] = { - {"xor8_initialize_nif", 2, xor8_initialize_nif}, - {"xor8_initialize_nif_dirty", 2, xor8_initialize_nif, + {"xor8_initialize_nif", 1, xor8_initialize_nif}, + {"xor8_initialize_nif_dirty", 1, xor8_initialize_nif, ERL_NIF_DIRTY_JOB_CPU_BOUND}, - {"xor8_buffered_initialize_nif", 2, xor8_buffered_initialize_nif}, - {"xor8_buffered_initialize_nif_dirty", 2, + {"xor8_buffered_initialize_nif", 1, xor8_buffered_initialize_nif}, + {"xor8_buffered_initialize_nif_dirty", 1, xor8_buffered_initialize_nif, ERL_NIF_DIRTY_JOB_CPU_BOUND}, - {"xor8_contain_nif", 3, xor8_contain_nif}, + {"xor8_contain_nif", 2, xor8_contain_nif}, {"xor8_free_nif", 1, xor8_free_nif}, - {"xor16_initialize_nif", 2, xor16_initialize_nif}, - {"xor16_initialize_nif_dirty", 2, xor16_initialize_nif, + {"xor16_initialize_nif", 1, xor16_initialize_nif}, + {"xor16_initialize_nif_dirty", 1, xor16_initialize_nif, ERL_NIF_DIRTY_JOB_CPU_BOUND}, - {"xor16_buffered_initialize_nif", 2, + {"xor16_buffered_initialize_nif", 1, xor16_buffered_initialize_nif}, - {"xor16_buffered_initialize_nif_dirty", 2, + {"xor16_buffered_initialize_nif_dirty", 1, xor16_buffered_initialize_nif, ERL_NIF_DIRTY_JOB_CPU_BOUND}, - {"xor16_contain_nif", 3, xor16_contain_nif}, + {"xor16_contain_nif", 2, xor16_contain_nif}, {"xor16_free_nif", 1, xor16_free_nif} }; diff --git a/doc/overview.edoc b/doc/overview.edoc new file mode 100644 index 0000000..869d351 --- /dev/null +++ b/doc/overview.edoc @@ -0,0 +1,24 @@ +-*- html -*- +exor_filter + +@copyright (C) 2019, Matthew Pope +@author Matthew Pope +@title exor_filter nif, They're 'Faster and Smaller Than Bloom and Cuckoo Filters'. + +@doc Nif wrapper for the xor_filter: +https://github.com/FastFilter/xor_singleheader + +They're 'Faster and Smaller Than Bloom and Cuckoo Filters'. + +Be wary of memory usage when using this module. + +Example Usage: + +``` +Filter = exor_filter:xor8(["test1", "test2", "test3"]), +true = exor_filter:xor8_contain(Filter, "test1"), +false = exor_filter:xor8_contain(Filter, "test6"), +ok = exor_filter:xor8_free(Filter). +''' + + diff --git a/priv/exor_filter.so b/priv/exor_filter.so deleted file mode 100755 index db1a814..0000000 Binary files a/priv/exor_filter.so and /dev/null differ diff --git a/src/exor_filter.app.src b/src/exor_filter.app.src index 41d1d69..efef7ff 100644 --- a/src/exor_filter.app.src +++ b/src/exor_filter.app.src @@ -1,6 +1,6 @@ {application, exor_filter, [{description, "Nif wrapper for xor_filters."}, - {vsn, "0.1.0"}, + {vsn, "0.3.0"}, {registered, []}, {applications, [kernel, diff --git a/src/exor_filter.erl b/src/exor_filter.erl index a6ac9c6..1b791d5 100644 --- a/src/exor_filter.erl +++ b/src/exor_filter.erl @@ -1,5 +1,6 @@ %%----------------------------------------------------------------------------- %% @copyright (C) 2019, Matthew Pope +%% @author Matthew Pope %% @doc Nif wrapper for the xor_filter: %% https://github.com/FastFilter/xor_singleheader %% @@ -48,7 +49,7 @@ %% The buffered versions of initialize are provided for larger data sets. %% This can be faster. See xor8_buffered/1 for more information. %% -%% @author Matthew Pope +%% Convinience modules `xor8` and `xor16` are provided. %% @end %%----------------------------------------------------------------------------- -module(exor_filter). @@ -68,7 +69,10 @@ xor16_buffered/2, xor16_contain/2, xor16_contain/3, - xor16_free/1 + xor16_free/1, + + %% Unsafe API. Use with caution. + nif_wrapper/3 ]). -on_load(init/0). @@ -92,9 +96,8 @@ xor8(List) -> %% @doc Initializes the xor filter, and runs the specified pre-defined %% hash function on each of the elements. %% -%% There are predefined hashing function provided, can can be specified by -%% using `default_hash' or `fast_hash'. To pass pre-hashed data, use -%% `none'. +%% There is a predefined hashing function provided, can can be specified by +%% using `default_hash' To pass pre-hashed data, use `none'. %% %% OR %% @@ -121,165 +124,225 @@ xor8(List) -> %% Otherwise, an `{error, reason}' be returned. %% @end %%----------------------------------------------------------------------------- --spec xor8(list(), atom() | fun()) -> {reference(), atom() | fun()} | {error, atom()}. - -xor8(List, HashFunction) when - HashFunction == default_hash; - HashFunction == fast_hash; - HashFunction == none -> - - case xor8_initialize(List, HashFunction) of - - {error, Reason} -> - {error, Reason}; - - Reference -> - {Reference, HashFunction} - end; - -xor8(List, HashFunction) when is_function(HashFunction) -> - - case erlang:fun_info(HashFunction, arity) of - - {arity, 1} -> - - HashedList = lists:map( - fun(Element) -> - HashFunction(Element) - end, - List), - - case xor8_initialize(HashedList, passed) of - - {error, Reason} -> - {error, Reason}; +-spec xor8(list(), atom() | fun()) -> + {reference(), atom() | fun()} | {error, atom()}. - Reference -> - {Reference, HashFunction} - end; - - _ -> - {error, wrong_arity_hash_function_error} - end; - -xor8(_, _) -> - {error, invalid_hash_method}. +xor8(List, HashFunction) -> + initialize_filter(List, HashFunction, xor8). %%----------------------------------------------------------------------------- -%% @doc Internal function that determines if the nif should be dirty scheduled -%% or not, if above 10K -%% records. +%% @doc Similar to the initialize function, but is a buffered version for lists +%% that are large. This version uses the default hash. %% @end %%----------------------------------------------------------------------------- -xor8_initialize(List, HashFunction) when length(List) >= 10000 -> - xor8_initialize_nif_dirty(List, HashFunction); - -xor8_initialize(List, HashFunction) -> - xor8_initialize_nif(List, HashFunction). +-spec xor8_buffered(list()) -> {reference(), atom()} | {error, atom()}. +xor8_buffered(List) -> + xor8_buffered(List, default_hash). + %%----------------------------------------------------------------------------- -%% @doc Nif api. Initializes the xor filter on a passed list. -%% If the list isn't a list of 64 unsigned numbers, an error will be thrown. +%% @doc Similar to the initialize function, but is a buffered version for lists +%% that are over 100,000,000 keys. Use for greater speed. +%% +%% See xor8/1 for example usage. %% %% Returns a `Ref<>' to a filter to be used in `contain' and `free'. %% @end %%----------------------------------------------------------------------------- --spec xor8_initialize_nif(list(), atom()) -> reference() | {error, atom()}. +-spec xor8_buffered(list(), atom() | fun()) + -> {reference(), atom() | fun()} | {error, atom()}. -xor8_initialize_nif(_, _) -> - not_loaded(?LINE). +xor8_buffered(List, HashFunction) -> + initialize_filter(List, HashFunction, xor8_buffered). %%----------------------------------------------------------------------------- -%% @doc Nif api. Initializes the xor filter on a passed list, with a dirty -%% scheduler. -%% If the list isn't a list of 64 unsigned numbers, an error will be thrown. -%% -%% Returns a `Ref<>' to a filter to be used in `contain' and `free'. +%% @doc See the xor8/2 documentation. %% @end %%----------------------------------------------------------------------------- --spec xor8_initialize_nif_dirty(list(), atom()) - -> reference() | {error, atom()}. +-spec xor16(list()) -> {reference(), default_hash} | {error, atom()}. -xor8_initialize_nif_dirty(_, _) -> - not_loaded(?LINE). +xor16(List) -> + xor16(List, default_hash). +%%----------------------------------------------------------------------------- +%% @doc Initializes the xor filter, and runs the specified pre-defined +%% hash function on each of the elements. +%% +%% See the xor8/2 documentation. +%% @end +%%----------------------------------------------------------------------------- +-spec xor16(list(), atom() | fun()) + -> {reference(), atom() | fun()} | {error, atom()}. + +xor16(List, HashFunction) -> + initialize_filter(List, HashFunction, xor16). + + %%----------------------------------------------------------------------------- %% @doc Similar to the initialize function, but is a buffered version for lists %% that are large. This version uses the default hash. %% @end %%----------------------------------------------------------------------------- --spec xor8_buffered(list()) -> {reference(), atom()} | {error, atom()}. +-spec xor16_buffered(list()) -> {reference(), atom()} | {error, atom()}. -xor8_buffered(List) -> - xor8_buffered(List, default_hash). +xor16_buffered(List) -> + xor16_buffered(List, default_hash). %%----------------------------------------------------------------------------- %% @doc Similar to the initialize function, but is a buffered version for lists %% that are over 100,000,000 keys. Use for greater speed. %% -%% See xor8/1 for example usage. +%% See xor16/1 for example usage. %% %% Returns a `Ref<>' to a filter to be used in `contain' and `free'. %% @end %%----------------------------------------------------------------------------- --spec xor8_buffered(list(), atom() | fun()) +-spec xor16_buffered(list(), atom() | fun()) -> {reference(), atom() | fun()} | {error, atom()}. -xor8_buffered(List, HashFunction) when - HashFunction == default_hash; - HashFunction == fast_hash; - HashFunction == none -> +xor16_buffered(List, HashFunction) -> + initialize_filter(List, HashFunction, xor16_buffered). + +%%----------------------------------------------------------------------------- +%% @doc Function that does the actual work. Does some error checking, +%% as well as checks the passed lists for dups, and does hashing. +%% @end +%%----------------------------------------------------------------------------- +-spec initialize_filter(list(), atom(), atom()) + -> {reference(), atom()} | {error, atom()}. + +initialize_filter(PassedList, default_hash, FilterType) -> + + DupedList = + lists:foldr( + fun(X, Acc) -> + [erlang:phash2(X)] ++ Acc + end, [], PassedList), + + List = lists:usort(DupedList), + nif_wrapper(List, default_hash, FilterType); + +initialize_filter(List, none, FilterType) -> + + DeDupedList = lists:usort(List), + case length(List) == length(DeDupedList) of + + true -> + nif_wrapper(List, none, FilterType); + + _ -> + {error, duplicates_in_hash_error} + end; + +initialize_filter(PassedList, HashFunction, FilterType) + when is_function(HashFunction) -> + + DupedList = + lists:foldr( + fun(X, Acc) -> + [erlang:phash2(X)] ++ Acc + end, [], PassedList), + + List = lists:usort(DupedList), + + EqualLength = length(List) == length(PassedList), + case {erlang:fun_info(HashFunction, arity), EqualLength} of + + {{arity, 1}, true} -> + nif_wrapper(List, HashFunction, FilterType); + + {_, false} -> + {error, duplicates_in_hash_error}; + + _ -> + {error, wrong_arity_hash_function_error} + end; + +initialize_filter(_, _, _) -> + {error, invalid_hash_method}. + + +%%----------------------------------------------------------------------------- +%% @doc Unsafe API that is a wrapper for nif errors and bypassing duplication +%% checking. Use with caution. +%% @end +%%----------------------------------------------------------------------------- +-spec nif_wrapper(list(), atom() | fun(), atom()) + -> {error, atom()} | {reference(), atom()} | {reference(), fun()}. +nif_wrapper(List, HashFunction, FilterType) -> - case xor8_buffered_initialize(List, HashFunction) of + case filter_selector(List, FilterType) of {error, Reason} -> {error, Reason}; Reference -> {Reference, HashFunction} - end; + end. -xor8_buffered(List, HashFunction) when is_function(HashFunction) -> - case erlang:fun_info(HashFunction, arity) of +%%----------------------------------------------------------------------------- +%% @doc Internal function that determines if the nif should be dirty scheduled +%% or not, if above 10K records. Also chooses what filter should be init'd. +%% @end +%%----------------------------------------------------------------------------- +-spec filter_selector(list(), atom()) -> reference() | {error, atom()}. - {arity, 1} -> +filter_selector(List, xor8) when length(List) >= 10000 -> + xor8_initialize_nif_dirty(List); - HashedList = lists:map( - fun(Element) -> - HashFunction(Element) - end, - List), - case xor8_buffered_initialize(HashedList, passed) of +filter_selector(List, xor8) -> + xor8_initialize_nif(List); - {error, Reason} -> - {error, Reason}; +filter_selector(List, xor8_buffered) when length(List) >= 10000 -> + xor8_buffered_initialize_nif_dirty(List); - Reference -> - {Reference, HashFunction} - end; +filter_selector(List, xor8_buffered) -> + xor8_buffered_initialize_nif(List); - _ -> - {error, wrong_arity_hash_function_error} - end. +filter_selector(List, xor16) when length(List) >= 10000 -> + xor16_initialize_nif_dirty(List); + +filter_selector(List, xor16) -> + xor16_initialize_nif(List); + +filter_selector(List, xor16_buffered) when length(List) >= 10000 -> + xor16_buffered_initialize_nif_dirty(List); + +filter_selector(List, xor16_buffered) -> + xor16_buffered_initialize_nif(List). + + +%%----------------------------------------------------------------------------- +%% @doc Nif api. Initializes the xor filter on a passed list. +%% If the list isn't a list of 64 unsigned numbers, an error will be thrown. +%% +%% Returns a `Ref<>' to a filter to be used in `contain' and `free'. +%% @end +%%----------------------------------------------------------------------------- +-spec xor8_initialize_nif(list()) -> reference() | {error, atom()}. + +xor8_initialize_nif(_) -> + not_loaded(?LINE). %%----------------------------------------------------------------------------- -%% @doc Internal function that determines if the nif should be dirty scheduled -%% or not, if above 10K. -%% records. +%% @doc Nif api. Initializes the xor filter on a passed list, with a dirty +%% scheduler. +%% If the list isn't a list of 64 unsigned numbers, an error will be thrown. +%% +%% Returns a `Ref<>' to a filter to be used in `contain' and `free'. %% @end %%----------------------------------------------------------------------------- -xor8_buffered_initialize(List, HashFunction) when length(List) >= 10000 -> - xor8_buffered_initialize_nif_dirty(List, HashFunction); +-spec xor8_initialize_nif_dirty(list()) -> reference() | {error, atom()}. -xor8_buffered_initialize(List, HashFunction) -> - xor8_buffered_initialize_nif(List, HashFunction). +xor8_initialize_nif_dirty(_) -> + not_loaded(?LINE). %%----------------------------------------------------------------------------- @@ -289,10 +352,9 @@ xor8_buffered_initialize(List, HashFunction) -> %% Returns a `Ref<>' to a filter to be used in `contain' and `free'. %% @end %%----------------------------------------------------------------------------- --spec xor8_buffered_initialize_nif(list(), atom()) - -> reference() | {error, atom()}. +-spec xor8_buffered_initialize_nif(list()) -> reference() | {error, atom()}. -xor8_buffered_initialize_nif(_, _) -> +xor8_buffered_initialize_nif(_) -> not_loaded(?LINE). @@ -303,10 +365,9 @@ xor8_buffered_initialize_nif(_, _) -> %% Returns a `Ref<>' to a filter to be used in `contain' and `free'. %% @end %%----------------------------------------------------------------------------- --spec xor8_buffered_initialize_nif_dirty(list(), atom()) - -> reference() | {error, atom()}. +-spec xor8_buffered_initialize_nif_dirty(list()) -> reference() | {error, atom()}. -xor8_buffered_initialize_nif_dirty(_, _) -> +xor8_buffered_initialize_nif_dirty(_) -> not_loaded(?LINE). @@ -314,19 +375,24 @@ xor8_buffered_initialize_nif_dirty(_, _) -> %% @doc Tests to see if the passed argument is in the filter. The first %% argument must be the pre-initialized filter. %% -%% DO NOT PASS PRE-HASHED VALUES. The method / fun passed to the -%% initialization function is saved, and is used to compute the hash. +%% DO NOT PASS PRE-HASHED VALUES unless you've specified a pre-hashed filter. +%% The method / fun passed to the initialization function is saved, and +%% is used to compute the hash. %% %% Returns true if the element exists (or if there is a false positive). %% False if not. %% @end +%%----------------------------------------------------------------------------- -spec xor8_contain({reference(), atom() | fun()}, term()) -> true | false. +xor8_contain({Filter, default_hash}, Key) -> + xor8_contain_nif(Filter, erlang:phash2(Key)); + xor8_contain({Filter, HashFunction}, Key) when is_function(HashFunction) -> - xor8_contain_nif(Filter, HashFunction(Key), passed); + xor8_contain_nif(Filter, HashFunction(Key)); -xor8_contain({Filter, HashFunction}, Key) -> - xor8_contain_nif(Filter, Key, HashFunction). +xor8_contain({Filter, _HashFunction}, Key) -> + xor8_contain_nif(Filter, Key). %%----------------------------------------------------------------------------- @@ -337,25 +403,22 @@ xor8_contain({Filter, HashFunction}, Key) -> %% The third argument will be returned instead of `false' if the element is %% not in the filter. %% @end +%%----------------------------------------------------------------------------- -spec xor8_contain({reference(), atom() | fun()}, term(), any()) -> true | any(). +xor8_contain({Filter, default_hash}, Key, ReturnValue) -> + xor8_contain({Filter, default_hash}, erlang:phash2(Key), ReturnValue); + xor8_contain({Filter, HashFunction}, Key, ReturnValue) when is_function(HashFunction) -> HashedKey = HashFunction(Key), - case xor8_contain_nif(Filter, HashedKey, passed) of - - false -> - ReturnValue; - - Value -> - Value - end; + xor8_contain({Filter, HashFunction}, HashedKey, ReturnValue); -xor8_contain({Filter, HashFunction}, Key, ReturnValue) -> +xor8_contain({Filter, _HashFunction}, Key, ReturnValue) -> - case xor8_contain_nif(Filter, Key, HashFunction) of + case xor8_contain_nif(Filter, Key) of false -> ReturnValue; @@ -372,9 +435,9 @@ xor8_contain({Filter, HashFunction}, Key, ReturnValue) -> %% Returns `false' if otherwise. %% @end %%----------------------------------------------------------------------------- --spec xor8_contain_nif(reference(), term(), atom()) -> true | false. +-spec xor8_contain_nif(reference(), term()) -> true | false. -xor8_contain_nif(_, _, _) -> +xor8_contain_nif(_, _) -> not_loaded(?LINE). @@ -403,81 +466,6 @@ xor8_free_nif(_) -> not_loaded(?LINE). -%%----------------------------------------------------------------------------- -%% @doc See the xor8/2 documentation. -%% @end -%%----------------------------------------------------------------------------- --spec xor16(list()) -> {reference(), default_hash} | {error, atom()}. - -xor16(List) -> - xor16(List, default_hash). - - -%%----------------------------------------------------------------------------- -%% @doc Initializes the xor filter, and runs the specified pre-defined -%% hash function on each of the elements. -%% -%% See the xor8/2 documentation. -%% @end -%%----------------------------------------------------------------------------- --spec xor16(list(), atom() | fun()) -> {reference(), atom() | fun()} | {error, atom()}. - -xor16(List, HashFunction) when - HashFunction == default_hash; - HashFunction == fast_hash; - HashFunction == none -> - - case xor16_initialize(List, HashFunction) of - - {error, Reason} -> - {error, Reason}; - - Reference -> - {Reference, HashFunction} - end; - -xor16(List, HashFunction) when is_function(HashFunction) -> - - case erlang:fun_info(HashFunction, arity) of - - {arity, 1} -> - - HashedList = lists:map( - fun(Element) -> - HashFunction(Element) - end, - List), - - case xor16_initialize(HashedList, passed) of - - {error, Reason} -> - {error, Reason}; - - Reference -> - {Reference, HashFunction} - end; - - _ -> - {error, wrong_arity_hash_function_error} - end; - -xor16(_, _) -> - {error, invalid_hash_method}. - - -%%----------------------------------------------------------------------------- -%% @doc Internal function that determines if the nif should be dirty scheduled -%% or not, if above 10K -%% records. -%% @end -%%----------------------------------------------------------------------------- -xor16_initialize(List, HashFunction) when length(List) >= 10000 -> - xor16_initialize_nif_dirty(List, HashFunction); - -xor16_initialize(List, HashFunction) -> - xor16_initialize_nif(List, HashFunction). - - %%----------------------------------------------------------------------------- %% @doc Nif api. Initializes the xor filter on a passed list. %% If the list isn't a list of 64 unsigned numbers, an error will be thrown. @@ -485,9 +473,9 @@ xor16_initialize(List, HashFunction) -> %% Returns a `Ref<>' to a filter to be used in `contain' and `free'. %% @end %%----------------------------------------------------------------------------- --spec xor16_initialize_nif(list(), atom()) -> reference() | {error, atom()}. +-spec xor16_initialize_nif(list()) -> reference() | {error, atom()}. -xor16_initialize_nif(_, _) -> +xor16_initialize_nif(_) -> not_loaded(?LINE). @@ -498,88 +486,12 @@ xor16_initialize_nif(_, _) -> %% Returns a `Ref<>' to a filter to be used in `contain' and `free'. %% @end %%----------------------------------------------------------------------------- --spec xor16_initialize_nif_dirty(list(), atom()) - -> reference() | {error, atom()}. +-spec xor16_initialize_nif_dirty(list()) -> reference() | {error, atom()}. -xor16_initialize_nif_dirty(_, _) -> +xor16_initialize_nif_dirty(_) -> not_loaded(?LINE). -%%----------------------------------------------------------------------------- -%% @doc Similar to the initialize function, but is a buffered version for lists -%% that are large. This version uses the default hash. -%% @end -%%----------------------------------------------------------------------------- --spec xor16_buffered(list()) -> {reference(), atom()} | {error, atom()}. - -xor16_buffered(List) -> - xor16_buffered(List, default_hash). - - -%%----------------------------------------------------------------------------- -%% @doc Similar to the initialize function, but is a buffered version for lists -%% that are over 100,000,000 keys. Use for greater speed. -%% -%% See xor16/1 for example usage. -%% -%% Returns a `Ref<>' to a filter to be used in `contain' and `free'. -%% @end -%%----------------------------------------------------------------------------- --spec xor16_buffered(list(), atom() | fun()) - -> {reference(), atom() | fun()} | {error, atom()}. - -xor16_buffered(List, HashFunction) when - HashFunction == default_hash; - HashFunction == fast_hash; - HashFunction == none -> - - case xor16_buffered_initialize(List, HashFunction) of - - {error, Reason} -> - {error, Reason}; - - Reference -> - {Reference, HashFunction} - end; - -xor16_buffered(List, HashFunction) when is_function(HashFunction) -> - - case erlang:fun_info(HashFunction, arity) of - - {arity, 1} -> - - HashedList = lists:map( - fun(Element) -> - HashFunction(Element) - end, - List), - case xor16_buffered_initialize(HashedList, passed) of - - {error, Reason} -> - {error, Reason}; - - Reference -> - {Reference, HashFunction} - end; - - _ -> - {error, wrong_arity_hash_function_error} - end. - - -%%----------------------------------------------------------------------------- -%% @doc Internal function that determines if the nif should be dirty scheduled -%% or not, if above 10K -%% records. -%% @end -%%----------------------------------------------------------------------------- -xor16_buffered_initialize(List, HashFunction) when length(List) >= 10000 -> - xor16_buffered_initialize_nif_dirty(List, HashFunction); - -xor16_buffered_initialize(List, HashFunction) -> - xor16_buffered_initialize_nif(List, HashFunction). - - %%----------------------------------------------------------------------------- %% @doc Nif api. Similar to the initialize function, but is a buffered %% version for lists @@ -587,10 +499,9 @@ xor16_buffered_initialize(List, HashFunction) -> %% Returns a `Ref<>' to a filter to be used in `contain' and `free'. %% @end %%----------------------------------------------------------------------------- --spec xor16_buffered_initialize_nif(list(), atom()) - -> reference() | {error, atom()}. +-spec xor16_buffered_initialize_nif(list()) -> reference() | {error, atom()}. -xor16_buffered_initialize_nif(_, _) -> +xor16_buffered_initialize_nif(_) -> not_loaded(?LINE). @@ -601,10 +512,9 @@ xor16_buffered_initialize_nif(_, _) -> %% Returns a `Ref<>' to a filter to be used in `contain' and `free'. %% @end %%----------------------------------------------------------------------------- --spec xor16_buffered_initialize_nif_dirty(list(), atom()) - -> reference() | {error, atom()}. +-spec xor16_buffered_initialize_nif_dirty(list()) -> reference() | {error, atom()}. -xor16_buffered_initialize_nif_dirty(_, _) -> +xor16_buffered_initialize_nif_dirty(_) -> not_loaded(?LINE). @@ -620,11 +530,14 @@ xor16_buffered_initialize_nif_dirty(_, _) -> %% @end -spec xor16_contain({reference(), atom() | fun()}, term()) -> true | false. +xor16_contain({Filter, default_hash}, Key) -> + xor16_contain_nif(Filter, erlang:phash2(Key)); + xor16_contain({Filter, HashFunction}, Key) when is_function(HashFunction) -> - xor16_contain_nif(Filter, HashFunction(Key), passed); + xor16_contain_nif(Filter, HashFunction(Key)); -xor16_contain({Filter, HashFunction}, Key) -> - xor16_contain_nif(Filter, Key, HashFunction). +xor16_contain({Filter, _HashFunction}, Key) -> + xor16_contain_nif(Filter, Key). %%----------------------------------------------------------------------------- @@ -642,18 +555,11 @@ xor16_contain({Filter, HashFunction}, Key, ReturnValue) when is_function(HashFunction) -> HashedKey = HashFunction(Key), - case xor16_contain_nif(Filter, HashedKey, passed) of - - false -> - ReturnValue; - - Value -> - Value - end; + xor16_contain({Filter, HashFunction}, HashedKey, ReturnValue); -xor16_contain({Filter, HashFunction}, Key, ReturnValue) -> +xor16_contain({Filter, _HashFunction}, Key, ReturnValue) -> - case xor16_contain_nif(Filter, Key, HashFunction) of + case xor16_contain_nif(Filter, Key) of false -> ReturnValue; @@ -670,9 +576,9 @@ xor16_contain({Filter, HashFunction}, Key, ReturnValue) -> %% Returns `false' if otherwise. %% @end %%----------------------------------------------------------------------------- --spec xor16_contain_nif(reference(), term(), atom()) -> true | false. +-spec xor16_contain_nif(reference(), term()) -> true | false. -xor16_contain_nif(_, _, _) -> +xor16_contain_nif(_, _) -> not_loaded(?LINE). @@ -695,7 +601,7 @@ xor16_free({Filter, _}) -> %% Returns `ok'. %% @end %%----------------------------------------------------------------------------- --spec xor16_free_nif(reference()) -> ok. +-spec xor16_free_nif(any()) -> no_return(). xor16_free_nif(_) -> not_loaded(?LINE). @@ -716,4 +622,4 @@ init() -> erlang:load_nif(SoName, 0). not_loaded(Line) -> - exit({not_loaded, [{module, ?MODULE}, {line, Line}]}). + erlang:nif_error({not_loaded, [{module, ?MODULE}, {line, Line}]}). diff --git a/src/xor16.erl b/src/xor16.erl new file mode 100644 index 0000000..bbf3c96 --- /dev/null +++ b/src/xor16.erl @@ -0,0 +1,116 @@ +%%----------------------------------------------------------------------------- +%% @copyright (C) 2019, Matthew Pope +%% @author Matthew Pope +%% @doc Interface for the xor16 filter. +%% +%% Shorthand for the `exor_filter' module. For indepth documentation, see +%% that module. +%% +%% Example usage: +%% ``` +%% Filter = xor16:new(["cat", "dog", "mouse"]), +%% true = xor16:contain(Filter, "cat"), +%% false = xor16:contain(Filter, "goose"), +%% ok = xor16:free(Filter). +%% ''' +%% @end +%%----------------------------------------------------------------------------- +-module(xor16). + +-export([ + new/1, + new/2, + new_buffered/1, + new_buffered/2, + contain/2, + contain/3, + free/1 +]). + +%%----------------------------------------------------------------------------- +%% @doc Initializes the xor filter, and runs the default hash function on +%% each of the elements in the list. This should be fine for the general case. +%% @end +%%----------------------------------------------------------------------------- +-spec new(list()) -> {reference(), atom()} | {error, atom()}. + +new(List) -> + exor_filter:xor16(List). + + +%%----------------------------------------------------------------------------- +%% @doc Initializes the xor filter, and runs the specified hash on each of +%% the elements in the list. +%% +%% The option `default_hash' uses the `erlang:phash2/1' function. +%% The option `none' is for prehashed data. +%% A fun can be passed that will be applied to each element. +%% @end +%%----------------------------------------------------------------------------- +-spec new(list(), atom() | fun()) -> + {reference(), atom() | fun()} | {error, atom()}. + +new(List, HashFunction) -> + exor_filter:xor16(List, HashFunction). + + +%%----------------------------------------------------------------------------- +%% @doc Initializes the xor filter, and runs the default hash function on +%% each of the elements in the list. This is the buffered version, meant for +%% large filters. +%% @end +%%----------------------------------------------------------------------------- +-spec new_buffered(list()) -> {reference(), atom()} | {error, atom()}. + +new_buffered(List) -> + exor_filter:xor16_buffered(List). + + +%%----------------------------------------------------------------------------- +%% @doc Initializes the xor filter, and runs the default hash function on +%% each of the elements in the list. This is the buffered version, meant for +%% large filters. See the `xor8:new/2' or `exor_filter:xor8_new/2' funtions +%% for more indepth documentaiton. +%% @end +%%----------------------------------------------------------------------------- +-spec new_buffered(list(), atom() | fun()) + -> {reference(), atom() | fun()} | {error, atom()}. + +new_buffered(List, HashFunction) -> + exor_filter:xor16_buffered(List, HashFunction). + + +%%----------------------------------------------------------------------------- +%% @doc Tests to see if the passed argument is in the filter. The first +%% argument must be the pre-initialized filter. +%% @end +%%----------------------------------------------------------------------------- +-spec contain({reference(), atom() | fun()}, term()) -> true | false. + +contain(Filter, Key) -> + exor_filter:xor16_contain(Filter, Key). + + +%%----------------------------------------------------------------------------- +%% @doc Tests to see if the passed argument is in the filter. The first +%% argument must be the pre-initialized filter. +%% +%% Will return the third argument if the element doesn't exist in the filter. +%% @end +%%----------------------------------------------------------------------------- +-spec contain({reference(), atom() | fun()}, term(), any()) -> true | any(). + +contain(Filter, Key, ReturnValue) -> + exor_filter:xor16_contain(Filter, Key, ReturnValue). + +%%----------------------------------------------------------------------------- +%% @doc Frees the memory of the filter. These can be large structures, so it +%% is recommended that this is called for cleanup. +%% +%% Returns `ok'. +%% @end +%%----------------------------------------------------------------------------- +-spec free({reference(), any()}) -> ok. + +free(Filter) -> + exor_filter:xor16_free(Filter). diff --git a/src/xor8.erl b/src/xor8.erl new file mode 100644 index 0000000..6efad7f --- /dev/null +++ b/src/xor8.erl @@ -0,0 +1,117 @@ +%%----------------------------------------------------------------------------- +%% @copyright (C) 2019, Matthew Pope +%% @author Matthew Pope +%% @doc Interface for the xor8 filter. +%% +%% Shorthand for the `exor_filter' module. For indepth documentation, see +%% that module. +%% +%% Example usage: +%% ``` +%% Filter = xor8:new(["cat", "dog", "mouse"]), +%% true = xor8:contain(Filter, "cat"), +%% false = xor8:contain(Filter, "goose"), +%% ok = xor8:free(Filter). +%% ''' +%% @end +%%----------------------------------------------------------------------------- +-module(xor8). + +-export([ + new/1, + new/2, + new_buffered/1, + new_buffered/2, + contain/2, + contain/3, + free/1 +]). + +%%----------------------------------------------------------------------------- +%% @doc Initializes the xor filter, and runs the default hash function on +%% each of the elements in the list. This should be fine for the general case. +%% @end +%%----------------------------------------------------------------------------- +-spec new(list()) -> {reference(), atom()} | {error, atom()}. + +new(List) -> + exor_filter:xor8(List). + + +%%----------------------------------------------------------------------------- +%% @doc Initializes the xor filter, and runs the specified hash on each of +%% the elements in the list. +%% +%% The option `default_hash' uses the `erlang:phash2/1' function. +%% The option `none' is for prehashed data. +%% A fun can be passed that will be applied to each element. +%% @end +%%----------------------------------------------------------------------------- +-spec new(list(), atom() | fun()) -> + {reference(), atom() | fun()} | {error, atom()}. + +new(List, HashFunction) -> + exor_filter:xor8(List, HashFunction). + + +%%----------------------------------------------------------------------------- +%% @doc Initializes the xor filter, and runs the default hash function on +%% each of the elements in the list. This is the buffered version, meant for +%% large filters. +%% @end +%%----------------------------------------------------------------------------- +-spec new_buffered(list()) -> {reference(), atom()} | {error, atom()}. + +new_buffered(List) -> + exor_filter:xor8_buffered(List). + + +%%----------------------------------------------------------------------------- +%% @doc Initializes the xor filter, and runs the default hash function on +%% each of the elements in the list. This is the buffered version, meant for +%% large filters. See the `xor8:new/2' or `exor_filter:xor8_new/2' funtions +%% for more indepth documentaiton. +%% @end +%%----------------------------------------------------------------------------- +-spec new_buffered(list(), atom() | fun()) + -> {reference(), atom() | fun()} | {error, atom()}. + +new_buffered(List, HashFunction) -> + exor_filter:xor8_buffered(List, HashFunction). + + +%%----------------------------------------------------------------------------- +%% @doc Tests to see if the passed argument is in the filter. The first +%% argument must be the pre-initialized filter. +%% @end +%%----------------------------------------------------------------------------- +-spec contain({reference(), atom() | fun()}, term()) -> true | false. + +contain(Filter, Key) -> + exor_filter:xor8_contain(Filter, Key). + + +%%----------------------------------------------------------------------------- +%% @doc Tests to see if the passed argument is in the filter. The first +%% argument must be the pre-initialized filter. +%% +%% Will return the third argument if the element doesn't exist in the filter. +%% @end +%%----------------------------------------------------------------------------- +-spec contain({reference(), atom() | fun()}, term(), any()) -> true | any(). + +contain(Filter, Key, ReturnValue) -> + exor_filter:xor8_contain(Filter, Key, ReturnValue). + + +%%----------------------------------------------------------------------------- +%% @doc Frees the memory of the filter. These can be large structures, so it +%% is recommended that this is called for cleanup. +%% +%% Returns `ok'. +%% @end +%%----------------------------------------------------------------------------- +-spec free({reference(), any()}) -> ok. + +free(Filter) -> + exor_filter:xor8_free(Filter). diff --git a/test/exor_filter_test.erl b/test/exor_filter_test.erl index 5ae8cdb..f84b5c9 100644 --- a/test/exor_filter_test.erl +++ b/test/exor_filter_test.erl @@ -5,7 +5,7 @@ basic_test_() -> %% Expensive pre-computation. - Ints = lists:seq(1, 20000), + Ints = lists:seq(1, 80000), MappingFun = fun(X) -> "test" ++ integer_to_list(X) end, Strings = lists:map(MappingFun, Ints), @@ -16,10 +16,10 @@ basic_test_() -> ?_test(xor8_buffered_filter()), ?_test(xor8_non_uint64()), ?_test(xor8_non_uint64_buffered()), - ?_test(xor8_fast_hash()), - ?_test(xor8_fast_buffered_hash()), ?_test(xor8_invalid_pre_defined_hash()), ?_test(xor8_invalid_pre_defined_hash_buffered()), + ?_test(xor8_dup_in_hash()), + ?_test(xor8_dup_in_pre_hash()), ?_test(xor8_valid_hash()), ?_test(xor8_valid_hash_buffered()), ?_test(xor8_wrong_hash_arity()), @@ -35,8 +35,6 @@ basic_test_() -> ?_test(xor8_cannot_free_twice()), ?_test(xor8_large()), ?_test(xor8_large_buffered()), - ?_test(xor8_medium_fast(Strings)), - ?_test(xor8_medium_fast_buffered(Strings)), ?_test(xor8_medium_default(Strings)), ?_test(xor8_medium_default_buffered(Strings))] }, @@ -46,10 +44,10 @@ basic_test_() -> ?_test(xor16_buffered_filter()), ?_test(xor16_non_uint64()), ?_test(xor16_non_uint64_buffered()), - ?_test(xor16_fast_hash()), - ?_test(xor16_fast_buffered_hash()), ?_test(xor16_invalid_pre_defined_hash()), ?_test(xor16_invalid_pre_defined_hash_buffered()), + ?_test(xor16_dup_in_hash()), + ?_test(xor16_dup_in_pre_hash()), ?_test(xor16_valid_hash()), ?_test(xor16_valid_hash_buffered()), ?_test(xor16_wrong_hash_arity()), @@ -65,8 +63,6 @@ basic_test_() -> ?_test(xor16_cannot_free_twice()), ?_test(xor16_large()), ?_test(xor16_large_buffered()), - ?_test(xor16_medium_fast(Strings)), - ?_test(xor16_medium_fast_buffered(Strings)), ?_test(xor16_medium_default(Strings)), ?_test(xor16_medium_default_buffered(Strings))] } @@ -75,293 +71,266 @@ basic_test_() -> %% Begin xor8 tests. xor8_filter() -> - Filter = exor_filter:xor8(["test1", "test2", "test3"]), - ?_assertMatch(true, exor_filter:xor8_contain(Filter, "test1")), - ?_assertMatch(false, exor_filter:xor8_contain(Filter, "test4")), - exor_filter:xor8_free(Filter). + Filter = xor8:new(["test1", "test2", "test3"]), + ?_assertEqual(true, xor8:contain(Filter, "test1")), + ?_assertEqual(false, xor8:contain(Filter, "test4")), + xor8:free(Filter). xor8_buffered_filter() -> - Filter = exor_filter:xor8_buffered(["test1", "test2", "test3"]), - ?_assertMatch(true, exor_filter:xor8_contain(Filter, "test2")), - ?_assertMatch(false, exor_filter:xor8_contain(Filter, "test6")), - exor_filter:xor8_free(Filter). + Filter = xor8:new_buffered(["test1", "test2", "test3"]), + ?_assertEqual(true, xor8:contain(Filter, "test2")), + ?_assertEqual(false, xor8:contain(Filter, "test6")), + xor8:free(Filter). xor8_non_uint64() -> - ?assertMatch({error, convert_to_uint64_t_error}, - exor_filter:xor8(["test"], none)). + ?assertEqual({error, convert_to_uint64_t_error}, + xor8:new(["test"], none)). xor8_non_uint64_buffered() -> - ?assertMatch({error, convert_to_uint64_t_error}, - exor_filter:xor8_buffered(["test"], none)). - -xor8_fast_hash() -> - Filter = exor_filter:xor8(["test1", "test2", "test3"], fast_hash), - ?_assertMatch(true, exor_filter:xor8_contain(Filter, "test1")), - ?_assertMatch(false, exor_filter:xor8_contain(Filter, "test4")), - exor_filter:xor8_free(Filter). - -xor8_fast_buffered_hash() -> - Filter = exor_filter:xor8(["test1", "test2", "test3"], fast_hash), - ?_assertMatch(true, exor_filter:xor8_contain(Filter, "test1")), - ?_assertMatch(false, exor_filter:xor8_contain(Filter, "test4")), - exor_filter:xor8_free(Filter). + ?assertEqual({error, convert_to_uint64_t_error}, + xor8:new_buffered(["test"], none)). xor8_invalid_pre_defined_hash() -> - ?_assertMatch({error, invalid_hash_method}, - exor_filter:xor8(["test"], fake_hash)). + ?_assertEqual({error, invalid_hash_method}, + xor8:new(["test"], fake_hash)). xor8_invalid_pre_defined_hash_buffered() -> - ?_assertMatch({error, invalid_hash_method}, - exor_filter:xor8(["test"], fake_hash)). + ?_assertEqual({error, invalid_hash_method}, + xor8:new(["test"], fake_hash)). + +xor8_dup_in_hash() -> + Fun = fun(_X) -> 1 end, + ?_assertEqual({error, duplicates_in_hash_error}, + xor8:new([1, 2], Fun)). + +xor8_dup_in_pre_hash() -> + ?_assertEqual({error, duplicates_in_hash_error}, + xor8:new([1, 1], none)). xor8_valid_hash() -> Fun = fun(X) -> X + 1 end, - Filter = exor_filter:xor8([1, 2, 3], Fun), - ?_assertMatch(true, exor_filter:xor8_contain(Filter, 4)), - ?_assertMatch(false, exor_filter:xor8_contain(Filter, 1)), - exor_filter:xor8_free(Filter). + Filter = xor8:new([1, 2, 3], Fun), + ?_assertEqual(true, xor8:contain(Filter, 4)), + ?_assertEqual(false, xor8:contain(Filter, 1)), + xor8:free(Filter). xor8_valid_hash_buffered() -> Fun = fun(X) -> X + 1 end, - Filter = exor_filter:xor8_buffered([1, 2, 3], Fun), - ?_assertMatch(true, exor_filter:xor8_contain(Filter, 4)), - ?_assertMatch(false, exor_filter:xor8_contain(Filter, 1)), - exor_filter:xor8_free(Filter). + Filter = xor8:new_buffered([1, 2, 3], Fun), + ?_assertEqual(true, xor8:contain(Filter, 4)), + ?_assertEqual(false, xor8:contain(Filter, 1)), + xor8:free(Filter). xor8_wrong_hash_arity() -> Fun = fun(X, Y) -> X + Y end, - ?_assertMatch({error, wrong_arity_hash_function_error}, - exor_filter:xor8([1, 2, 3], Fun)). + ?_assertEqual({error, wrong_arity_hash_function_error}, + xor8:new([1, 2, 3], Fun)). xor8_wrong_hash_arity_buffered() -> Fun = fun(X, Y) -> X + Y end, - ?_assertMatch({error, wrong_arity_hash_function_error}, - exor_filter:xor8_buffered([1, 2, 3], Fun)). + ?_assertEqual({error, wrong_arity_hash_function_error}, + xor8:new_buffered([1, 2, 3], Fun)). xor8_hash_does_not_return_uint64() -> Fun = fun(_X) -> "test" end, - ?_assertMatch({error, convert_to_uint64_t_error}, - exor_filter:xor8([1, 2, 3], Fun)). + ?_assertEqual({error, convert_to_uint64_t_error}, + xor8:new([1, 2, 3], Fun)). xor8_hash_does_not_return_uint64_buffered() -> Fun = fun(_X) -> "test" end, - ?_assertMatch({error, convert_to_uint64_t_error}, - exor_filter:xor8_buffered([1, 2, 3], Fun)). + ?_assertEqual({error, convert_to_uint64_t_error}, + xor8:new_buffered([1, 2, 3], Fun)). xor8_custom_contain_return() -> - Filter = exor_filter:xor8([1, 2, 3]), - ?_assertMatch(true, exor_filter:xor8_contain(Filter, 2, asdf)), - ?_assertMatch(asdf, exor_filter:xor8_contain(Filter, 6, asdf)), - exor_filter:xor8_free(Filter). + Filter = xor8:new([1, 2, 3]), + ?_assertEqual(true, xor8:contain(Filter, 2, asdf)), + ?_assertEqual(asdf, xor8:contain(Filter, 6, asdf)), + xor8:free(Filter). xor8_contain_hash_function_custom_return() -> Fun = fun(X) -> X + 1 end, - Filter = exor_filter:xor8([1, 2, 3], Fun), - ?_assertMatch(true, exor_filter:xor8_contain(Filter, 2)), - ?_assertMatch({error, reason}, - exor_filter:xor8_contain(Filter, 1, {error, reason})), - exor_filter:xor8_free(Filter). + Filter = xor8:new([1, 2, 3], Fun), + ?_assertEqual(true, xor8:contain(Filter, 2)), + ?_assertEqual({error, reason}, + xor8:contain(Filter, 1, {error, reason})), + xor8:free(Filter). xor8_contain_key_not_uint64() -> - Filter = exor_filter:xor8([1, 2, 3], none), - ?_assertMatch({error, get_key_for_contains_error}, - exor_filter:xor8_contain(Filter, "test")), - exor_filter:xor8_free(Filter). + Filter = xor8:new([1, 2, 3], none), + ?_assertEqual({error, get_key_for_contains_error}, + xor8:contain(Filter, "test")), + xor8:free(Filter). xor8_contain_custom_key_not_uint64() -> - Filter = exor_filter:xor8([1, 2, 3]), - ?_assertMatch({error, get_key_for_contains_error}, - exor_filter:xor8_contain(Filter, "test", asdf)), - exor_filter:xor8_free(Filter). + Filter = xor8:new([1, 2, 3]), + ?_assertEqual({error, get_key_for_contains_error}, + xor8:contain(Filter, "test", asdf)), + xor8:free(Filter). xor8_valid_filter_in_contain() -> - ?_assertMatch({error, get_key_for_contains_error}, - exor_filter:xor8_contain(asdf, 1)). + ?_assertEqual({error, get_key_for_contains_error}, + xor8:contain(asdf, 1)). xor8_valid_filter_in_free() -> - ?_assertMatch(ok, exor_filter:xor8_free(asdf)). + ?_assertEqual(ok, xor8:free(asdf)). xor8_cannot_free_twice() -> - Filter = exor_filter:xor8([1, 2, 3]), - ?_assertMatch(ok, exor_filter:xor8_free(Filter)), - ?_assertMatch(ok, exor_filter:xor8_free(Filter)). + Filter = xor8:new([1, 2, 3]), + ?_assertEqual(ok, xor8:free(Filter)), + ?_assertEqual(ok, xor8:free(Filter)). xor8_large() -> X = lists:seq(1, 10000000), - Filter = exor_filter:xor8(X, none), - ?_assertMatch(true, exor_filter:xor8_contain(Filter, 100)), - exor_filter:xor8_free(Filter). + Filter = xor8:new(X, none), + ?_assertEqual(true, xor8:contain(Filter, 100)), + xor8:free(Filter). xor8_large_buffered() -> X = lists:seq(1, 10000000), - Filter = exor_filter:xor8_buffered(X, none), - ?_assertMatch(true, exor_filter:xor8_contain(Filter, 100)), - exor_filter:xor8_free(Filter). - -xor8_medium_fast(Strings) -> - Filter = exor_filter:xor8(Strings, fast_hash), - ?_assertMatch(true, exor_filter:xor8_contain(Filter, "test100")), - exor_filter:xor8_free(Filter). - -xor8_medium_fast_buffered(Strings) -> - Filter = exor_filter:xor8_buffered(Strings, fast_hash), - ?_assertMatch(true, exor_filter:xor8_contain(Filter, "test100")), - exor_filter:xor8_free(Filter). + Filter = xor8:new_buffered(X, none), + ?_assertEqual(true, xor8:contain(Filter, 100)), + xor8:free(Filter). xor8_medium_default(Strings) -> - Filter = exor_filter:xor8(Strings), - ?_assertMatch(true, exor_filter:xor8_contain(Filter, "test100")), - exor_filter:xor8_free(Filter). + Filter = xor8:new(Strings), + ?_assertEqual(true, xor8:contain(Filter, "test100")), + xor8:free(Filter). xor8_medium_default_buffered(Strings) -> - Filter = exor_filter:xor8_buffered(Strings), - ?_assertMatch(true, exor_filter:xor8_contain(Filter, "test100")), - exor_filter:xor8_free(Filter). + Filter = xor8:new_buffered(Strings), + ?_assertEqual(true, xor8:contain(Filter, "test100")), + xor8:free(Filter). %% Begin xor16 tests. xor16_filter() -> - Filter = exor_filter:xor16(["test1", "test2", "test3"]), - ?_assertMatch(true, exor_filter:xor16_contain(Filter, "test1")), - ?_assertMatch(false, exor_filter:xor16_contain(Filter, "test4")), - exor_filter:xor16_free(Filter). + Filter = xor16:new(["test1", "test2", "test3"]), + ?_assertEqual(true, xor16:contain(Filter, "test1")), + ?_assertEqual(false, xor16:contain(Filter, "test4")), + xor16:free(Filter). xor16_buffered_filter() -> - Filter = exor_filter:xor16_buffered(["test1", "test2", "test3"]), - ?_assertMatch(true, exor_filter:xor16_contain(Filter, "test2")), - ?_assertMatch(false, exor_filter:xor16_contain(Filter, "test6")), - exor_filter:xor16_free(Filter). + Filter = xor16:new_buffered(["test1", "test2", "test3"]), + ?_assertEqual(true, xor16:contain(Filter, "test2")), + ?_assertEqual(false, xor16:contain(Filter, "test6")), + xor16:free(Filter). xor16_non_uint64() -> - ?assertMatch({error, convert_to_uint64_t_error}, - exor_filter:xor16(["test"], none)). + ?assertEqual({error, convert_to_uint64_t_error}, + xor16:new(["test"], none)). xor16_non_uint64_buffered() -> - ?assertMatch({error, convert_to_uint64_t_error}, - exor_filter:xor16_buffered(["test"], none)). - -xor16_fast_hash() -> - Filter = exor_filter:xor16(["test1", "test2", "test3"], fast_hash), - ?_assertMatch(true, exor_filter:xor16_contain(Filter, "test1")), - ?_assertMatch(false, exor_filter:xor16_contain(Filter, "test4")), - exor_filter:xor16_free(Filter). - -xor16_fast_buffered_hash() -> - Filter = exor_filter:xor16(["test1", "test2", "test3"], fast_hash), - ?_assertMatch(true, exor_filter:xor16_contain(Filter, "test1")), - ?_assertMatch(false, exor_filter:xor16_contain(Filter, "test4")), - exor_filter:xor16_free(Filter). + ?assertEqual({error, convert_to_uint64_t_error}, + xor16:new_buffered(["test"], none)). xor16_invalid_pre_defined_hash() -> - ?_assertMatch({error, invalid_hash_method}, - exor_filter:xor16(["test"], fake_hash)). + ?_assertEqual({error, invalid_hash_method}, + xor16:new(["test"], fake_hash)). xor16_invalid_pre_defined_hash_buffered() -> - ?_assertMatch({error, invalid_hash_method}, - exor_filter:xor16(["test"], fake_hash)). + ?_assertEqual({error, invalid_hash_method}, + xor16:new(["test"], fake_hash)). + +xor16_dup_in_hash() -> + Fun = fun(X) -> 1 end, + ?_assertEqual({error, duplicates_in_hash_error}, + xor16:new([1, 2], Fun)). + +xor16_dup_in_pre_hash() -> + ?_assertEqual({error, duplicates_in_hash_error}, + xor16:new([1, 1], none)). xor16_valid_hash() -> Fun = fun(X) -> X + 1 end, - Filter = exor_filter:xor16([1, 2, 3], Fun), - ?_assertMatch(true, exor_filter:xor16_contain(Filter, 4)), - ?_assertMatch(false, exor_filter:xor16_contain(Filter, 1)), - exor_filter:xor16_free(Filter). + Filter = xor16:new([1, 2, 3], Fun), + ?_assertEqual(true, xor16:contain(Filter, 4)), + ?_assertEqual(false, xor16:contain(Filter, 1)), + xor16:free(Filter). xor16_valid_hash_buffered() -> Fun = fun(X) -> X + 1 end, - Filter = exor_filter:xor16_buffered([1, 2, 3], Fun), - ?_assertMatch(true, exor_filter:xor16_contain(Filter, 4)), - ?_assertMatch(false, exor_filter:xor16_contain(Filter, 1)), - exor_filter:xor16_free(Filter). + Filter = xor16:new_buffered([1, 2, 3], Fun), + ?_assertEqual(true, xor16:contain(Filter, 4)), + ?_assertEqual(false, xor16:contain(Filter, 1)), + xor16:free(Filter). xor16_wrong_hash_arity() -> Fun = fun(X, Y) -> X + Y end, - ?_assertMatch({error, wrong_arity_hash_function_error}, - exor_filter:xor16([1, 2, 3], Fun)). + ?_assertEqual({error, wrong_arity_hash_function_error}, + xor16:new([1, 2, 3], Fun)). xor16_wrong_hash_arity_buffered() -> Fun = fun(X, Y) -> X + Y end, - ?_assertMatch({error, wrong_arity_hash_function_error}, - exor_filter:xor16_buffered([1, 2, 3], Fun)). + ?_assertEqual({error, wrong_arity_hash_function_error}, + xor16:new_buffered([1, 2, 3], Fun)). xor16_hash_does_not_return_uint64() -> Fun = fun(_X) -> "test" end, - ?_assertMatch({error, convert_to_uint64_t_error}, - exor_filter:xor16([1, 2, 3], Fun)). + ?_assertEqual({error, convert_to_uint64_t_error}, + xor16:new([1, 2, 3], Fun)). xor16_hash_does_not_return_uint64_buffered() -> Fun = fun(_X) -> "test" end, - ?_assertMatch({error, convert_to_uint64_t_error}, - exor_filter:xor16_buffered([1, 2, 3], Fun)). + ?_assertEqual({error, convert_to_uint64_t_error}, + xor16:new_buffered([1, 2, 3], Fun)). xor16_custom_contain_return() -> - Filter = exor_filter:xor16([1, 2, 3]), - ?_assertMatch(true, exor_filter:xor16_contain(Filter, 2, asdf)), - ?_assertMatch(asdf, exor_filter:xor16_contain(Filter, 6, asdf)), - exor_filter:xor16_free(Filter). + Filter = xor16:new([1, 2, 3]), + ?_assertEqual(true, xor16:contain(Filter, 2, asdf)), + ?_assertEqual(asdf, xor16:contain(Filter, 6, asdf)), + xor16:free(Filter). xor16_contain_hash_function_custom_return() -> Fun = fun(X) -> X + 1 end, - Filter = exor_filter:xor16([1, 2, 3], Fun), - ?_assertMatch(true, exor_filter:xor16_contain(Filter, 2)), - ?_assertMatch({error, reason}, - exor_filter:xor16_contain(Filter, 1, {error, reason})), - exor_filter:xor16_free(Filter). + Filter = xor16:new([1, 2, 3], Fun), + ?_assertEqual(true, xor16:contain(Filter, 2)), + ?_assertEqual({error, reason}, + xor16:contain(Filter, 1, {error, reason})), + xor16:free(Filter). xor16_contain_key_not_uint64() -> - Filter = exor_filter:xor16([1, 2, 3], none), - ?_assertMatch({error, get_key_for_contains_error}, - exor_filter:xor16_contain(Filter, "test")), - exor_filter:xor16_free(Filter). + Filter = xor16:new([1, 2, 3], none), + ?_assertEqual({error, get_key_for_contains_error}, + xor16:contain(Filter, "test")), + xor16:free(Filter). xor16_contain_custom_key_not_uint64() -> - Filter = exor_filter:xor16([1, 2, 3]), - ?_assertMatch({error, get_key_for_contains_error}, - exor_filter:xor16_contain(Filter, "test", asdf)), - exor_filter:xor16_free(Filter). + Filter = xor16:new([1, 2, 3]), + ?_assertEqual({error, get_key_for_contains_error}, + xor16:contain(Filter, "test", asdf)), + xor16:free(Filter). xor16_valid_filter_in_contain() -> - ?_assertMatch({error, get_key_for_contains_error}, - exor_filter:xor16_contain(asdf, 1)). + ?_assertEqual({error, get_key_for_contains_error}, + xor16:contain(asdf, 1)). xor16_valid_filter_in_free() -> - ?_assertMatch(ok, exor_filter:xor16_free(asdf)). + ?_assertEqual(ok, xor16:free(asdf)). xor16_cannot_free_twice() -> - Filter = exor_filter:xor16([1, 2, 3]), - ?_assertMatch(ok, exor_filter:xor16_free(Filter)), - ?_assertMatch(ok, exor_filter:xor16_free(Filter)). + Filter = xor16:new([1, 2, 3]), + ?_assertEqual(ok, xor16:free(Filter)), + ?_assertEqual(ok, xor16:free(Filter)). xor16_large() -> X = lists:seq(1, 10000000), - Filter = exor_filter:xor16(X, none), - ?_assertMatch(true, exor_filter:xor16_contain(Filter, 100)), - exor_filter:xor16_free(Filter). + Filter = xor16:new(X, none), + ?_assertEqual(true, xor16:contain(Filter, 100)), + xor16:free(Filter). xor16_large_buffered() -> X = lists:seq(1, 10000000), - Filter = exor_filter:xor16_buffered(X, none), - ?_assertMatch(true, exor_filter:xor16_contain(Filter, 100)), - exor_filter:xor16_free(Filter). - -xor16_medium_fast(Strings) -> - Filter = exor_filter:xor16(Strings, fast_hash), - ?_assertMatch(true, exor_filter:xor16_contain(Filter, "test100")), - exor_filter:xor16_free(Filter). - -xor16_medium_fast_buffered(Strings) -> - Filter = exor_filter:xor16_buffered(Strings, fast_hash), - ?_assertMatch(true, exor_filter:xor16_contain(Filter, "test100")), - exor_filter:xor16_free(Filter). + Filter = xor16:new_buffered(X, none), + ?_assertEqual(true, xor16:contain(Filter, 100)), + xor16:free(Filter). xor16_medium_default(Strings) -> - Filter = exor_filter:xor16(Strings), - ?_assertMatch(true, exor_filter:xor16_contain(Filter, "test100")), - exor_filter:xor16_free(Filter). + Filter = xor16:new(Strings), + ?_assertEqual(true, xor16:contain(Filter, "test100")), + xor16:free(Filter). xor16_medium_default_buffered(Strings) -> - Filter = exor_filter:xor16_buffered(Strings), - ?_assertMatch(true, exor_filter:xor16_contain(Filter, "test100")), - exor_filter:xor16_free(Filter). - + Filter = xor16:new_buffered(Strings), + ?_assertEqual(true, xor16:contain(Filter, "test100")), + xor16:free(Filter). %% EOF