Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] Improve the performance of roaring bitmap under sparse cases #364

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 49 additions & 1 deletion include/roaring/containers/containers.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
#include <stdio.h>

#include <roaring/containers/array.h>
#include <roaring/containers/single.h>
#include <roaring/containers/bitset.h>
#include <roaring/containers/convert.h>
#include <roaring/containers/mixed_andnot.h>
Expand All @@ -32,9 +33,10 @@
#define ARRAY_CONTAINER_TYPE_CODE 2
#define RUN_CONTAINER_TYPE_CODE 3
#define SHARED_CONTAINER_TYPE_CODE 4
#define SINGLE_CONTAINER_TYPE_CODE 5

// macro for pairing container type codes
#define CONTAINER_PAIR(c1, c2) (4 * (c1) + (c2))
#define CONTAINER_PAIR(c1, c2) (8 * (c1) + (c2))

/**
* A shared container is a wrapper around a container
Expand Down Expand Up @@ -224,6 +226,8 @@ static inline int container_get_cardinality(const void *container,
case RUN_CONTAINER_TYPE_CODE:
return run_container_cardinality(
(const run_container_t *)container);
case SINGLE_CONTAINER_TYPE_CODE:
return single_container_cardinality(container_to_single(container));
}
assert(false);
__builtin_unreachable();
Expand All @@ -246,6 +250,8 @@ static inline bool container_is_full(const void *container, uint8_t typecode) {
(const array_container_t *)container) == (1 << 16);
case RUN_CONTAINER_TYPE_CODE:
return run_container_is_full((const run_container_t *)container);
case SINGLE_CONTAINER_TYPE_CODE:
return false;
}
assert(false);
__builtin_unreachable();
Expand All @@ -262,6 +268,8 @@ static inline int container_shrink_to_fit(void *container, uint8_t typecode) {
(array_container_t *)container);
case RUN_CONTAINER_TYPE_CODE:
return run_container_shrink_to_fit((run_container_t *)container);
case SINGLE_CONTAINER_TYPE_CODE:
return 0;
}
assert(false);
__builtin_unreachable();
Expand Down Expand Up @@ -292,6 +300,7 @@ static inline void *container_range_of_ones(uint32_t range_start,

/* Create a container with all the values between in [min,max) at a
distance k*step from min. */
// TODO:
static inline void *container_from_range(uint8_t *type, uint32_t min,
uint32_t max, uint16_t step) {
if (step == 0) return NULL; // being paranoid
Expand Down Expand Up @@ -320,6 +329,7 @@ static inline void *container_from_range(uint8_t *type, uint32_t min,
/**
* "repair" the container after lazy operations.
*/
// TODO:
static inline void *container_repair_after_lazy(void *container,
uint8_t *typecode) {
container = get_writable_copy_if_shared(
Expand Down Expand Up @@ -370,6 +380,8 @@ static inline int32_t container_write(const void *container, uint8_t typecode,
return array_container_write((const array_container_t *)container, buf);
case RUN_CONTAINER_TYPE_CODE:
return run_container_write((const run_container_t *)container, buf);
case SINGLE_CONTAINER_TYPE_CODE:
return single_container_write(container_to_single(container), buf);
}
assert(false);
__builtin_unreachable();
Expand All @@ -393,6 +405,8 @@ static inline int32_t container_size_in_bytes(const void *container,
(const array_container_t *)container);
case RUN_CONTAINER_TYPE_CODE:
return run_container_size_in_bytes((const run_container_t *)container);
case SINGLE_CONTAINER_TYPE_CODE:
return single_container_size_in_bytes(container_to_single(container));
}
assert(false);
__builtin_unreachable();
Expand Down Expand Up @@ -427,6 +441,8 @@ static inline bool container_nonzero_cardinality(const void *container,
case RUN_CONTAINER_TYPE_CODE:
return run_container_nonzero_cardinality(
(const run_container_t *)container);
case SINGLE_CONTAINER_TYPE_CODE:
return single_container_nozero_cardinality(container_to_single(container));
}
assert(false);
__builtin_unreachable();
Expand Down Expand Up @@ -457,6 +473,8 @@ static inline int container_to_uint32_array(uint32_t *output,
case RUN_CONTAINER_TYPE_CODE:
return run_container_to_uint32_array(
output, (const run_container_t *)container, base);
case SINGLE_CONTAINER_TYPE_CODE:
return single_container_to_uint32_array(output , container_to_single(container), base);
}
assert(false);
__builtin_unreachable();
Expand Down Expand Up @@ -494,6 +512,18 @@ static inline void *container_add(void *container, uint16_t val,
run_container_add((run_container_t *)container, val);
*new_typecode = RUN_CONTAINER_TYPE_CODE;
return container;
case SINGLE_CONTAINER_TYPE_CODE: {
// single_container_t single = container_to_single(container);
// if (single_container_try_add(&single, val) != -1) {
// *new_typecode = SINGLE_CONTAINER_TYPE_CODE;
// return single_to_container(single);
// } else {
// *new_typecode = ARRAY_CONTAINER_TYPE_CODE;
// return single_to_array(single, val);
// }
// return NULL;
}

default:
assert(false);
__builtin_unreachable();
Expand All @@ -508,6 +538,7 @@ static inline void *container_add(void *container, uint16_t val,
* This function may allocate a new container, and caller is responsible for
* memory deallocation
*/
// TODO:
static inline void *container_remove(void *container, uint16_t val,
uint8_t typecode, uint8_t *new_typecode) {
container = get_writable_copy_if_shared(container, &typecode);
Expand Down Expand Up @@ -1241,6 +1272,13 @@ static inline void *container_ior(void *c1, uint8_t type1, const void *c2,
(const run_container_t *)c2);
return convert_run_to_efficient_container((run_container_t *)c1,
result_type);
case CONTAINER_PAIR(SINGLE_CONTAINER_TYPE_CODE, SINGLE_CONTAINER_TYPE_CODE): {
void* res = single_single_container_inplace_union(c1, c2, result_type);
int car1 = container_get_cardinality(c1, SINGLE_CONTAINER_TYPE_CODE);
int car2 = container_get_cardinality(c2, SINGLE_CONTAINER_TYPE_CODE);
int car3 = container_get_cardinality(res, *result_type);
assert(car1 + car2 == car3);
return res;}
case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE,
ARRAY_CONTAINER_TYPE_CODE):
array_bitset_container_union((const array_container_t *)c2,
Expand Down Expand Up @@ -1298,6 +1336,14 @@ static inline void *container_ior(void *c1, uint8_t type1, const void *c2,
c1 = convert_run_to_efficient_container((run_container_t *)c1,
result_type);
return c1;
case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, SINGLE_CONTAINER_TYPE_CODE):
return array_single_container_inplace_union((array_container_t*)c1, c2, result_type);
case CONTAINER_PAIR(SINGLE_CONTAINER_TYPE_CODE, ARRAY_CONTAINER_TYPE_CODE):
return single_array_container_inplace_union(c1, (const array_container_t*)c2, result_type);
case CONTAINER_PAIR(SINGLE_CONTAINER_TYPE_CODE, BITSET_CONTAINER_TYPE_CODE):
case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, SINGLE_CONTAINER_TYPE_CODE):
case CONTAINER_PAIR(SINGLE_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE):
case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, SINGLE_CONTAINER_TYPE_CODE):
default:
assert(false);
__builtin_unreachable();
Expand Down Expand Up @@ -2161,6 +2207,8 @@ static inline uint16_t container_maximum(const void *container,
return array_container_maximum((const array_container_t *)container);
case RUN_CONTAINER_TYPE_CODE:
return run_container_maximum((const run_container_t *)container);
case SINGLE_CONTAINER_TYPE_CODE:
return single_container_maximum(container_to_single(container));
default:
assert(false);
__builtin_unreachable();
Expand Down
160 changes: 160 additions & 0 deletions include/roaring/containers/single.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,160 @@
#ifndef INCLUDE_CONTAINERS_SINGLE_H_
#define INCLUDE_CONTAINERS_SINGLE_H_
#include <roaring/containers/array.h>
#include <stdint.h>
#include <string.h>

#define SINGLE_CONTAINER_MAX_CAPTAIN (sizeof(void*) / sizeof(uint16_t) - 1)

typedef struct single_container_s {
uint16_t len;
uint16_t vals[SINGLE_CONTAINER_MAX_CAPTAIN];
} single_container_t;

typedef union single_container_converter_u {
single_container_t single_container;
void* container;
} single_container_converter_t;

static inline single_container_t container_to_single(const void* container) {
single_container_converter_t convert;
convert.container = (void*)container;
return convert.single_container;
}

static inline void* single_to_container(single_container_t single) {
single_container_converter_t convert;
convert.single_container = single;
return convert.container;
}

static inline int32_t single_container_size_in_bytes(single_container_t container) {
return container.len * sizeof(uint16_t);
}

static inline bool single_container_nozero_cardinality(single_container_t container) {
return container.len > 0;
}

static inline int32_t single_container_cardinality(single_container_t container) {
return container.len;
}

static inline int32_t single_container_write(single_container_t container, char* buf) {
const int32_t copy_length = single_container_size_in_bytes(container);
memcpy(buf, container.vals, copy_length);
return copy_length;
}

static inline int32_t single_container_serialization_len(
single_container_t container) {
return sizeof(uint16_t) + (sizeof(uint16_t) * container.len);
}

static inline int single_container_to_uint32_array(void* vout, single_container_t cont,
uint32_t base) {
int outpos = 0;
uint32_t* out = (uint32_t*)vout;
const uint16_t* array = cont.vals;
int len = cont.len;
for (int i = 0; i < len; ++i) {
const uint32_t val = base + array[i];
memcpy(out + outpos, &val, sizeof(uint32_t));
outpos++;
}
return outpos;
}

// return -1 exceed
// return 1 add succ
// return 0 ignore
// TODO do better
static inline int single_container_try_add(single_container_t* container,
uint16_t val) {
assert(false);
__builtin_unreachable();
uint16_t* array = container->vals;
uint8_t len = container->len;
if ((len == 0 || array[len - 1] < val) &&
len < SINGLE_CONTAINER_MAX_CAPTAIN) {
array[len] = val;
container->len++;
return 1;
}
for (uint8_t i = 0; i < len; ++i) {
if (array[i] == val) return 0;
if (array[i] > val) {
if (len < SINGLE_CONTAINER_MAX_CAPTAIN) {
for (int j = len - 1; j >= i; --j) {
array[len] = array[len - 1];
}
array[i] = val;
container->len++;
return 1;
} else {
return -1;
}
}
}
// assert(false);
__builtin_unreachable();
}

void* single_to_array(single_container_t container, uint16_t extra_val);

inline void single_container_clear(single_container_t* container) {
memset(container, 0, sizeof(single_container_t));
}

static inline int32_t single_container_read(uint32_t card,
single_container_t* container,
const char* buf) {
int32_t byte_sz = card * sizeof(uint16_t);
container->len = card;
memcpy(container->vals, buf, byte_sz);
return byte_sz;
}

static inline int32_t single_container_serialize(single_container_t container,
char* buf) {
__builtin_unreachable();
int32_t l;
uint32_t cardinality = (uint16_t)container.len;
int32_t offset = sizeof(cardinality);
memcpy(buf, &cardinality, offset);
l = sizeof(uint16_t) * container.len;
if (l) memcpy(&buf[offset], container.vals, l);
return offset + l;
}

static inline void* single_container_deserialize(const char* buf, size_t buf_len) {
__builtin_unreachable();
single_container_t single;
assert(buf_len >= 2);
buf_len -= 2;
size_t len;
int32_t off;
uint16_t cardinality;

memcpy(&cardinality, buf, off = sizeof(cardinality));
single.len = cardinality;
len = sizeof(uint16_t) * cardinality;
if (len) memcpy(single.vals, &buf[off], len);

return single_to_container(single);
}

static inline uint16_t single_container_maximum(const single_container_t single) {
if (single.len == 0) return 0;
return single.vals[single.len - 1];
}

void* single_single_container_inplace_union(void* left, const void* right,
uint8_t* typecode);
void* single_array_container_inplace_union(void* left,
const array_container_t* right,
uint8_t* typecode);
void* array_single_container_inplace_union(array_container_t* arr,
const void* left, uint8_t* typecode);

#endif
1 change: 1 addition & 0 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ set(ROARING_SRC
containers/mixed_xor.c
containers/mixed_andnot.c
containers/run.c
containers/single.c
roaring.c
roaring_priority_queue.c
roaring_array.c)
Expand Down
10 changes: 10 additions & 0 deletions src/containers/containers.c
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ void container_free(void *container, uint8_t typecode) {
case SHARED_CONTAINER_TYPE_CODE:
shared_container_free((shared_container_t *)container);
break;
case SINGLE_CONTAINER_TYPE_CODE:
break;
default:
assert(false);
__builtin_unreachable();
Expand Down Expand Up @@ -93,6 +95,8 @@ int32_t container_serialize(const void *container, uint8_t typecode,
array_container_serialize((const array_container_t *)container, buf));
case RUN_CONTAINER_TYPE_CODE:
return (run_container_serialize((const run_container_t *)container, buf));
case SINGLE_CONTAINER_TYPE_CODE:
// return single_container_serialize(container_to_single(container), buf);
default:
assert(0);
__builtin_unreachable();
Expand All @@ -111,6 +115,8 @@ uint32_t container_serialization_len(const void *container, uint8_t typecode) {
case RUN_CONTAINER_TYPE_CODE:
return run_container_serialization_len(
(const run_container_t *)container);
case SINGLE_CONTAINER_TYPE_CODE:
// return single_container_serialization_len(container_to_single(container));
default:
assert(0);
__builtin_unreachable();
Expand All @@ -126,6 +132,8 @@ void *container_deserialize(uint8_t typecode, const char *buf, size_t buf_len) {
return (array_container_deserialize(buf, buf_len));
case RUN_CONTAINER_TYPE_CODE:
return (run_container_deserialize(buf, buf_len));
case SINGLE_CONTAINER_TYPE_CODE:
// return single_container_deserialize(buf, buf_len);
case SHARED_CONTAINER_TYPE_CODE:
printf("this should never happen.\n");
assert(0);
Expand Down Expand Up @@ -209,6 +217,8 @@ void *container_clone(const void *container, uint8_t typecode) {
printf("shared containers are not cloneable\n");
assert(false);
return NULL;
case SINGLE_CONTAINER_TYPE_CODE:
return single_to_container(container_to_single(container));
default:
assert(false);
__builtin_unreachable();
Expand Down
Loading