Skip to content

Commit

Permalink
Merge pull request #11 from Vagabond/adt/binary_contains
Browse files Browse the repository at this point in the history
Allow checking a binary filter without deserializing to a resource
  • Loading branch information
mpope9 authored Apr 11, 2020
2 parents 74f36f5 + b44086d commit 7b7493b
Show file tree
Hide file tree
Showing 6 changed files with 110 additions and 32 deletions.
80 changes: 64 additions & 16 deletions c_src/xor_filter_nif.c
Original file line number Diff line number Diff line change
Expand Up @@ -205,18 +205,45 @@ xor8_contain_nif(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
return enif_make_badarg(env);
}

xor8_t* filter;
if(!enif_get_resource(env, argv[0], xor8_resource_type, (void**) &filter))
{
return mk_error(env, "get_filter_for_contains_error");
}

ErlNifUInt64 key;
if(!enif_get_uint64(env, argv[1], &key))
{
return mk_error(env, "get_key_for_contains_error");
}



xor8_t* filter;
if(!enif_get_resource(env, argv[0], xor8_resource_type, (void**) &filter))
{
ErlNifBinary bin;
if (!enif_inspect_binary(env, argv[0], &bin)) {
return mk_error(env, "get_filter_for_contains_error");
}

if (bin.size < sizeof(uint64_t) * 2) {
return mk_error(env, "get_filter_for_contains_bin_wrong_size");
}

xor8_t stack_filter;

unpack_le_u64(&stack_filter.seed, bin.data);
unpack_le_u64(&stack_filter.blockLength, bin.data+sizeof(uint64_t));

if (bin.size != (sizeof(uint64_t)*2) + (stack_filter.blockLength * 3)) {
return mk_error(env, "get_filter_for_contains_bin_wrong_size");
}
stack_filter.fingerprints = bin.data + (sizeof(uint64_t) * 2);
if(xor8_contain(key, &stack_filter))
{
return mk_atom(env, "true");
}
else
{
return mk_atom(env, "false");
}
}

if(xor8_contain(key, filter))
{
return mk_atom(env, "true");
Expand All @@ -225,8 +252,6 @@ xor8_contain_nif(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
{
return mk_atom(env, "false");
}

return mk_atom(env, "false");
}

static ERL_NIF_TERM
Expand Down Expand Up @@ -395,19 +420,44 @@ xor16_contain_nif(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
return enif_make_badarg(env);
}

xor16_t* filter;
if(!enif_get_resource(env, argv[0], xor16_resource_type, (void**) &filter))
{
return mk_error(env, "get_filter_for_contains_error");
}

ErlNifUInt64 key;
// Hash the values or not.
if(!enif_get_uint64(env, argv[1], &key))
{
return mk_error(env, "get_key_for_contains_error");
}

xor16_t* filter;
if(!enif_get_resource(env, argv[0], xor16_resource_type, (void**) &filter))
{
ErlNifBinary bin;
if (!enif_inspect_binary(env, argv[0], &bin)) {
return mk_error(env, "get_filter_for_contains_error");
}

if (bin.size < sizeof(uint64_t) * 2) {
return mk_error(env, "get_filter_for_contains_bin_wrong_size");
}

xor16_t stack_filter;

unpack_le_u64(&stack_filter.seed, bin.data);
unpack_le_u64(&stack_filter.blockLength, bin.data+sizeof(uint64_t));

if (bin.size != (sizeof(uint64_t)*2) + (stack_filter.blockLength * sizeof(uint16_t) * 3)) {
return mk_error(env, "get_filter_for_contains_bin_wrong_size");
}
stack_filter.fingerprints = (uint16_t *) (bin.data + (sizeof(uint64_t) * 2));
if(xor16_contain(key, &stack_filter))
{
return mk_atom(env, "true");
}
else
{
return mk_atom(env, "false");
}
}

if(xor16_contain(key, filter))
{
return mk_atom(env, "true");
Expand All @@ -416,8 +466,6 @@ xor16_contain_nif(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
{
return mk_atom(env, "false");
}

return mk_atom(env, "false");
}

static ERL_NIF_TERM
Expand Down
16 changes: 12 additions & 4 deletions doc/exor_filter.html
Original file line number Diff line number Diff line change
Expand Up @@ -119,22 +119,26 @@ <h3 class="function"><a name="xor16_buffered-2">xor16_buffered/2</a></h3>

<h3 class="function"><a name="xor16_contain-2">xor16_contain/2</a></h3>
<div class="spec">
<p><tt>xor16_contain(X1::{reference(), <a href="#type-hash_function">hash_function()</a>}, Key::term()) -&gt; true | false</tt><br></p>
<p><tt>xor16_contain(X1::{reference() | binary(), <a href="#type-hash_function">hash_function()</a>}, Key::term()) -&gt; true | false</tt><br></p>
</div><p><p>Tests to see if the passed argument is in the filter. The first
argument must be the pre-initialized filter.</p>

<p>DO NOT PASS PRE-HASHED VALUES. The method / fun passed to the
initialization function is saved, and is used to compute the hash.</p>

<p>Filters previously serialized with <code>xor16_to_bin</code> are allowed.</p>

Returns true if the element exists (or if there is a false positive).
False if not.</p>

<h3 class="function"><a name="xor16_contain-3">xor16_contain/3</a></h3>
<div class="spec">
<p><tt>xor16_contain(X1::{reference(), <a href="#type-hash_function">hash_function()</a>}, Key::term(), ReturnValue::any()) -&gt; true | any()</tt><br></p>
<p><tt>xor16_contain(X1::{reference() | binary(), <a href="#type-hash_function">hash_function()</a>}, Key::term(), ReturnValue::any()) -&gt; true | any()</tt><br></p>
</div><p><p>Tests to see if the passed argument is in the filter. The first
argument must be the pre-initialized filter.</p>

<p>Filters previously serialized with <code>xor16_to_bin</code> are allowed.</p>

Returns <code>true</code> if the element exists (or there is a false positive).
The third argument will be returned instead of <code>false</code> if the element is
not in the filter.</p>
Expand Down Expand Up @@ -210,23 +214,27 @@ <h3 class="function"><a name="xor8_buffered-2">xor8_buffered/2</a></h3>

<h3 class="function"><a name="xor8_contain-2">xor8_contain/2</a></h3>
<div class="spec">
<p><tt>xor8_contain(X1::{reference(), <a href="#type-hash_function">hash_function()</a>}, Key::term()) -&gt; true | false</tt><br></p>
<p><tt>xor8_contain(X1::{reference() | binary(), <a href="#type-hash_function">hash_function()</a>}, Key::term()) -&gt; true | false</tt><br></p>
</div><p><p>Tests to see if the passed argument is in the filter. The first
argument must be the pre-initialized filter.</p>

<p>DO NOT PASS PRE-HASHED VALUES unless you've specified a pre-hashed filter.
The method / fun passed to the initialization function is saved, and
is used to compute the hash.</p>

<p>Filters previously serialized with <code>xor8_to_bin</code> are allowed.</p>

Returns true if the element exists (or if there is a false positive).
False if not.</p>

<h3 class="function"><a name="xor8_contain-3">xor8_contain/3</a></h3>
<div class="spec">
<p><tt>xor8_contain(X1::{reference(), <a href="#type-hash_function">hash_function()</a>}, Key::term(), ReturnValue::any()) -&gt; true | any()</tt><br></p>
<p><tt>xor8_contain(X1::{reference() | binary(), <a href="#type-hash_function">hash_function()</a>}, Key::term(), ReturnValue::any()) -&gt; true | any()</tt><br></p>
</div><p><p>Tests to see if the passed argument is in the filter. The first
argument must be the pre-initialized filter.</p>

<p>Filters previously serialized with <code>xor8_to_bin</code> are allowed.</p>

Returns <code>true</code> if the element exists (or there is a false positive).
The third argument will be returned instead of <code>false</code> if the element is
not in the filter.</p>
Expand Down
20 changes: 14 additions & 6 deletions src/exor_filter.erl
Original file line number Diff line number Diff line change
Expand Up @@ -365,11 +365,13 @@ xor8_buffered_initialize_nif_dirty(_) ->
%% The method / fun passed to the initialization function is saved, and
%% is used to compute the hash.
%%
%% Filters previously serialized with `xor8_to_bin' are allowed.
%%
%% Returns true if the element exists (or if there is a false positive).
%% False if not.
%% @end
%%-----------------------------------------------------------------------------
-spec xor8_contain({reference(), hash_function()}, term()) -> true | false.
-spec xor8_contain({reference() | binary(), hash_function()}, term()) -> true | false.

xor8_contain({Filter, default_hash}, Key) ->
xor8_contain_nif(Filter, erlang:phash2(Key));
Expand All @@ -385,12 +387,14 @@ xor8_contain({Filter, _HashFunction}, Key) ->
%% @doc Tests to see if the passed argument is in the filter. The first
%% argument must be the pre-initialized filter.
%%
%% Filters previously serialized with `xor8_to_bin' are allowed.
%%
%% Returns `true' if the element exists (or there is a false positive).
%% The third argument will be returned instead of `false' if the element is
%% not in the filter.
%% @end
%%-----------------------------------------------------------------------------
-spec xor8_contain({reference(), hash_function()}, term(), any())
-spec xor8_contain({reference() | binary(), hash_function()}, term(), any())
-> true | any().

xor8_contain({Filter, default_hash}, Key, ReturnValue) ->
Expand Down Expand Up @@ -421,7 +425,7 @@ xor8_contain({Filter, _HashFunction}, Key, ReturnValue) ->
%% Returns `false' if otherwise.
%% @end
%%-----------------------------------------------------------------------------
-spec xor8_contain_nif(reference(), term()) -> true | false.
-spec xor8_contain_nif(reference() | binary(), term()) -> true | false.

xor8_contain_nif(_, _) ->
not_loaded(?LINE).
Expand Down Expand Up @@ -517,10 +521,12 @@ xor16_buffered_initialize_nif_dirty(_) ->
%% DO NOT PASS PRE-HASHED VALUES. The method / fun passed to the
%% initialization function is saved, and is used to compute the hash.
%%
%% Filters previously serialized with `xor16_to_bin' are allowed.
%%
%% Returns true if the element exists (or if there is a false positive).
%% False if not.
%% @end
-spec xor16_contain({reference(), hash_function()}, term()) -> true | false.
-spec xor16_contain({reference() | binary(), hash_function()}, term()) -> true | false.

xor16_contain({Filter, default_hash}, Key) ->
xor16_contain_nif(Filter, erlang:phash2(Key));
Expand All @@ -536,11 +542,13 @@ xor16_contain({Filter, _HashFunction}, Key) ->
%% @doc Tests to see if the passed argument is in the filter. The first
%% argument must be the pre-initialized filter.
%%
%% Filters previously serialized with `xor16_to_bin' are allowed.
%%
%% Returns `true' if the element exists (or there is a false positive).
%% The third argument will be returned instead of `false' if the element is
%% not in the filter.
%% @end
-spec xor16_contain({reference(), hash_function()}, term(), any())
-spec xor16_contain({reference() | binary(), hash_function()}, term(), any())
-> true | any().

xor16_contain({Filter, default_hash}, Key, ReturnValue) ->
Expand Down Expand Up @@ -571,7 +579,7 @@ xor16_contain({Filter, _HashFunction}, Key, ReturnValue) ->
%% Returns `false' if otherwise.
%% @end
%%-----------------------------------------------------------------------------
-spec xor16_contain_nif(reference(), term()) -> true | false.
-spec xor16_contain_nif(reference() | binary(), term()) -> true | false.

xor16_contain_nif(_, _) ->
not_loaded(?LINE).
Expand Down
8 changes: 6 additions & 2 deletions src/xor16.erl
Original file line number Diff line number Diff line change
Expand Up @@ -83,9 +83,11 @@ new_buffered(List, HashFunction) ->
%%-----------------------------------------------------------------------------
%% @doc Tests to see if the passed argument is in the filter. The first
%% argument must be the pre-initialized filter.
%%
%% A filter previously serialized by `to_bin' is allowed
%% @end
%%-----------------------------------------------------------------------------
-spec contain({reference(), exor_filter:hash_function()}, term()) -> true | false.
-spec contain({reference() | binary(), exor_filter:hash_function()}, term()) -> true | false.

contain(Filter, Key) ->
exor_filter:xor16_contain(Filter, Key).
Expand All @@ -95,10 +97,12 @@ contain(Filter, Key) ->
%% @doc Tests to see if the passed argument is in the filter. The first
%% argument must be the pre-initialized filter.
%%
%% A filter previously serialized by `to_bin' is allowed
%%
%% Will return the third argument if the element doesn't exist in the filter.
%% @end
%%-----------------------------------------------------------------------------
-spec contain({reference(), exor_filter:hash_function()}, term(), any()) -> true | any().
-spec contain({reference() | binary(), exor_filter:hash_function()}, term(), any()) -> true | any().

contain(Filter, Key, ReturnValue) ->
exor_filter:xor16_contain(Filter, Key, ReturnValue).
Expand Down
8 changes: 6 additions & 2 deletions src/xor8.erl
Original file line number Diff line number Diff line change
Expand Up @@ -84,9 +84,11 @@ new_buffered(List, HashFunction) ->
%%-----------------------------------------------------------------------------
%% @doc Tests to see if the passed argument is in the filter. The first
%% argument must be the pre-initialized filter.
%%
%% A filter previously serialized by `to_bin' is allowed
%% @end
%%-----------------------------------------------------------------------------
-spec contain({reference(), exor_filter:hash_function()}, term()) -> true | false.
-spec contain({reference() | binary(), exor_filter:hash_function()}, term()) -> true | false.

contain(Filter, Key) ->
exor_filter:xor8_contain(Filter, Key).
Expand All @@ -96,10 +98,12 @@ contain(Filter, Key) ->
%% @doc Tests to see if the passed argument is in the filter. The first
%% argument must be the pre-initialized filter.
%%
%% A filter previously serialized by `to_bin' is allowed
%%
%% Will return the third argument if the element doesn't exist in the filter.
%% @end
%%-----------------------------------------------------------------------------
-spec contain({reference(), exor_filter:hash_function()}, term(), any()) -> true | any().
-spec contain({reference() | binary(), exor_filter:hash_function()}, term(), any()) -> true | any().

contain(Filter, Key, ReturnValue) ->
exor_filter:xor8_contain(Filter, Key, ReturnValue).
Expand Down
10 changes: 8 additions & 2 deletions test/exor_filter_test.erl
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,10 @@ xor8_serialization() ->
Filter = xor8:new(["test1", "test2", "test3"]),
?_assertEqual(true, xor8:contain(Filter, "test1")),
?_assertEqual(false, xor8:contain(Filter, "test4")),
Filter2 = xor8:from_bin(xor8:to_bin(Filter)),
BinFilter = xor8:to_bin(Filter),
?_assertEqual(true, xor8:contain(BinFilter, "test1")),
?_assertEqual(false, xor8:contain(BinFilter, "test4")),
Filter2 = xor8:from_bin(BinFilter),
?_assertEqual(true, xor8:contain(Filter2, "test1")),
?_assertEqual(false, xor8:contain(Filter2, "test4")).

Expand Down Expand Up @@ -303,7 +306,10 @@ xor16_serialization() ->
Filter = xor16:new(["test1", "test2", "test3"]),
?_assertEqual(true, xor16:contain(Filter, "test1")),
?_assertEqual(false, xor16:contain(Filter, "test4")),
Filter2 = xor16:from_bin(xor16:to_bin(Filter)),
BinFilter = xor16:to_bin(Filter),
?_assertEqual(true, xor16:contain(BinFilter, "test1")),
?_assertEqual(false, xor16:contain(BinFilter, "test4")),
Filter2 = xor16:from_bin(BinFilter),
?_assertEqual(true, xor16:contain(Filter2, "test1")),
?_assertEqual(false, xor16:contain(Filter2, "test4")).

Expand Down

0 comments on commit 7b7493b

Please sign in to comment.