diff --git a/include/roaring/containers/containers.h b/include/roaring/containers/containers.h index 2e54e07e2..235a4990f 100644 --- a/include/roaring/containers/containers.h +++ b/include/roaring/containers/containers.h @@ -6,6 +6,7 @@ #include #include +#include #include #include #include @@ -32,9 +33,10 @@ #define ARRAY_CONTAINER_TYPE_CODE 2 #define RUN_CONTAINER_TYPE_CODE 3 #define SHARED_CONTAINER_TYPE_CODE 4 +#define SINGLE_CONTAINER_TYPE_CODE 5 // macro for pairing container type codes -#define CONTAINER_PAIR(c1, c2) (4 * (c1) + (c2)) +#define CONTAINER_PAIR(c1, c2) (8 * (c1) + (c2)) /** * A shared container is a wrapper around a container @@ -224,6 +226,8 @@ static inline int container_get_cardinality(const void *container, case RUN_CONTAINER_TYPE_CODE: return run_container_cardinality( (const run_container_t *)container); + case SINGLE_CONTAINER_TYPE_CODE: + return single_container_cardinality(container_to_single(container)); } assert(false); __builtin_unreachable(); @@ -246,6 +250,8 @@ static inline bool container_is_full(const void *container, uint8_t typecode) { (const array_container_t *)container) == (1 << 16); case RUN_CONTAINER_TYPE_CODE: return run_container_is_full((const run_container_t *)container); + case SINGLE_CONTAINER_TYPE_CODE: + return false; } assert(false); __builtin_unreachable(); @@ -262,6 +268,8 @@ static inline int container_shrink_to_fit(void *container, uint8_t typecode) { (array_container_t *)container); case RUN_CONTAINER_TYPE_CODE: return run_container_shrink_to_fit((run_container_t *)container); + case SINGLE_CONTAINER_TYPE_CODE: + return 0; } assert(false); __builtin_unreachable(); @@ -292,6 +300,7 @@ static inline void *container_range_of_ones(uint32_t range_start, /* Create a container with all the values between in [min,max) at a distance k*step from min. */ +// TODO: static inline void *container_from_range(uint8_t *type, uint32_t min, uint32_t max, uint16_t step) { if (step == 0) return NULL; // being paranoid @@ -320,6 +329,7 @@ static inline void *container_from_range(uint8_t *type, uint32_t min, /** * "repair" the container after lazy operations. */ +// TODO: static inline void *container_repair_after_lazy(void *container, uint8_t *typecode) { container = get_writable_copy_if_shared( @@ -370,6 +380,8 @@ static inline int32_t container_write(const void *container, uint8_t typecode, return array_container_write((const array_container_t *)container, buf); case RUN_CONTAINER_TYPE_CODE: return run_container_write((const run_container_t *)container, buf); + case SINGLE_CONTAINER_TYPE_CODE: + return single_container_write(container_to_single(container), buf); } assert(false); __builtin_unreachable(); @@ -393,6 +405,8 @@ static inline int32_t container_size_in_bytes(const void *container, (const array_container_t *)container); case RUN_CONTAINER_TYPE_CODE: return run_container_size_in_bytes((const run_container_t *)container); + case SINGLE_CONTAINER_TYPE_CODE: + return single_container_size_in_bytes(container_to_single(container)); } assert(false); __builtin_unreachable(); @@ -427,6 +441,8 @@ static inline bool container_nonzero_cardinality(const void *container, case RUN_CONTAINER_TYPE_CODE: return run_container_nonzero_cardinality( (const run_container_t *)container); + case SINGLE_CONTAINER_TYPE_CODE: + return single_container_nozero_cardinality(container_to_single(container)); } assert(false); __builtin_unreachable(); @@ -457,6 +473,8 @@ static inline int container_to_uint32_array(uint32_t *output, case RUN_CONTAINER_TYPE_CODE: return run_container_to_uint32_array( output, (const run_container_t *)container, base); + case SINGLE_CONTAINER_TYPE_CODE: + return single_container_to_uint32_array(output , container_to_single(container), base); } assert(false); __builtin_unreachable(); @@ -494,6 +512,18 @@ static inline void *container_add(void *container, uint16_t val, run_container_add((run_container_t *)container, val); *new_typecode = RUN_CONTAINER_TYPE_CODE; return container; + case SINGLE_CONTAINER_TYPE_CODE: { + // single_container_t single = container_to_single(container); + // if (single_container_try_add(&single, val) != -1) { + // *new_typecode = SINGLE_CONTAINER_TYPE_CODE; + // return single_to_container(single); + // } else { + // *new_typecode = ARRAY_CONTAINER_TYPE_CODE; + // return single_to_array(single, val); + // } + // return NULL; + } + default: assert(false); __builtin_unreachable(); @@ -508,6 +538,7 @@ static inline void *container_add(void *container, uint16_t val, * This function may allocate a new container, and caller is responsible for * memory deallocation */ +// TODO: static inline void *container_remove(void *container, uint16_t val, uint8_t typecode, uint8_t *new_typecode) { container = get_writable_copy_if_shared(container, &typecode); @@ -1241,6 +1272,13 @@ static inline void *container_ior(void *c1, uint8_t type1, const void *c2, (const run_container_t *)c2); return convert_run_to_efficient_container((run_container_t *)c1, result_type); + case CONTAINER_PAIR(SINGLE_CONTAINER_TYPE_CODE, SINGLE_CONTAINER_TYPE_CODE): { + void* res = single_single_container_inplace_union(c1, c2, result_type); + int car1 = container_get_cardinality(c1, SINGLE_CONTAINER_TYPE_CODE); + int car2 = container_get_cardinality(c2, SINGLE_CONTAINER_TYPE_CODE); + int car3 = container_get_cardinality(res, *result_type); + assert(car1 + car2 == car3); + return res;} case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, ARRAY_CONTAINER_TYPE_CODE): array_bitset_container_union((const array_container_t *)c2, @@ -1298,6 +1336,14 @@ static inline void *container_ior(void *c1, uint8_t type1, const void *c2, c1 = convert_run_to_efficient_container((run_container_t *)c1, result_type); return c1; + case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, SINGLE_CONTAINER_TYPE_CODE): + return array_single_container_inplace_union((array_container_t*)c1, c2, result_type); + case CONTAINER_PAIR(SINGLE_CONTAINER_TYPE_CODE, ARRAY_CONTAINER_TYPE_CODE): + return single_array_container_inplace_union(c1, (const array_container_t*)c2, result_type); + case CONTAINER_PAIR(SINGLE_CONTAINER_TYPE_CODE, BITSET_CONTAINER_TYPE_CODE): + case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, SINGLE_CONTAINER_TYPE_CODE): + case CONTAINER_PAIR(SINGLE_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE): + case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, SINGLE_CONTAINER_TYPE_CODE): default: assert(false); __builtin_unreachable(); @@ -2161,6 +2207,8 @@ static inline uint16_t container_maximum(const void *container, return array_container_maximum((const array_container_t *)container); case RUN_CONTAINER_TYPE_CODE: return run_container_maximum((const run_container_t *)container); + case SINGLE_CONTAINER_TYPE_CODE: + return single_container_maximum(container_to_single(container)); default: assert(false); __builtin_unreachable(); diff --git a/include/roaring/containers/single.h b/include/roaring/containers/single.h new file mode 100644 index 000000000..6f00160dd --- /dev/null +++ b/include/roaring/containers/single.h @@ -0,0 +1,160 @@ +#ifndef INCLUDE_CONTAINERS_SINGLE_H_ +#define INCLUDE_CONTAINERS_SINGLE_H_ +#include +#include +#include + +#define SINGLE_CONTAINER_MAX_CAPTAIN (sizeof(void*) / sizeof(uint16_t) - 1) + +typedef struct single_container_s { + uint16_t len; + uint16_t vals[SINGLE_CONTAINER_MAX_CAPTAIN]; +} single_container_t; + +typedef union single_container_converter_u { + single_container_t single_container; + void* container; +} single_container_converter_t; + +static inline single_container_t container_to_single(const void* container) { + single_container_converter_t convert; + convert.container = (void*)container; + return convert.single_container; +} + +static inline void* single_to_container(single_container_t single) { + single_container_converter_t convert; + convert.single_container = single; + return convert.container; +} + +static inline int32_t single_container_size_in_bytes(single_container_t container) { + return container.len * sizeof(uint16_t); +} + +static inline bool single_container_nozero_cardinality(single_container_t container) { + return container.len > 0; +} + +static inline int32_t single_container_cardinality(single_container_t container) { + return container.len; +} + +static inline int32_t single_container_write(single_container_t container, char* buf) { + const int32_t copy_length = single_container_size_in_bytes(container); + memcpy(buf, container.vals, copy_length); + return copy_length; +} + +static inline int32_t single_container_serialization_len( + single_container_t container) { + return sizeof(uint16_t) + (sizeof(uint16_t) * container.len); +} + +static inline int single_container_to_uint32_array(void* vout, single_container_t cont, + uint32_t base) { + int outpos = 0; + uint32_t* out = (uint32_t*)vout; + const uint16_t* array = cont.vals; + int len = cont.len; + for (int i = 0; i < len; ++i) { + const uint32_t val = base + array[i]; + memcpy(out + outpos, &val, sizeof(uint32_t)); + outpos++; + } + return outpos; +} + +// return -1 exceed +// return 1 add succ +// return 0 ignore +// TODO do better +static inline int single_container_try_add(single_container_t* container, + uint16_t val) { + assert(false); + __builtin_unreachable(); + uint16_t* array = container->vals; + uint8_t len = container->len; + if ((len == 0 || array[len - 1] < val) && + len < SINGLE_CONTAINER_MAX_CAPTAIN) { + array[len] = val; + container->len++; + return 1; + } + for (uint8_t i = 0; i < len; ++i) { + if (array[i] == val) return 0; + if (array[i] > val) { + if (len < SINGLE_CONTAINER_MAX_CAPTAIN) { + for (int j = len - 1; j >= i; --j) { + array[len] = array[len - 1]; + } + array[i] = val; + container->len++; + return 1; + } else { + return -1; + } + } + } + // assert(false); + __builtin_unreachable(); +} + +void* single_to_array(single_container_t container, uint16_t extra_val); + +inline void single_container_clear(single_container_t* container) { + memset(container, 0, sizeof(single_container_t)); +} + +static inline int32_t single_container_read(uint32_t card, + single_container_t* container, + const char* buf) { + int32_t byte_sz = card * sizeof(uint16_t); + container->len = card; + memcpy(container->vals, buf, byte_sz); + return byte_sz; +} + +static inline int32_t single_container_serialize(single_container_t container, + char* buf) { + __builtin_unreachable(); + int32_t l; + uint32_t cardinality = (uint16_t)container.len; + int32_t offset = sizeof(cardinality); + memcpy(buf, &cardinality, offset); + l = sizeof(uint16_t) * container.len; + if (l) memcpy(&buf[offset], container.vals, l); + return offset + l; +} + +static inline void* single_container_deserialize(const char* buf, size_t buf_len) { + __builtin_unreachable(); + single_container_t single; + assert(buf_len >= 2); + buf_len -= 2; + size_t len; + int32_t off; + uint16_t cardinality; + + memcpy(&cardinality, buf, off = sizeof(cardinality)); + single.len = cardinality; + len = sizeof(uint16_t) * cardinality; + if (len) memcpy(single.vals, &buf[off], len); + + return single_to_container(single); +} + +static inline uint16_t single_container_maximum(const single_container_t single) { + if (single.len == 0) return 0; + return single.vals[single.len - 1]; +} + +void* single_single_container_inplace_union(void* left, const void* right, + uint8_t* typecode); +void* single_array_container_inplace_union(void* left, + const array_container_t* right, + uint8_t* typecode); +void* array_single_container_inplace_union(array_container_t* arr, + const void* left, uint8_t* typecode); + +#endif \ No newline at end of file diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index d7f0f853a..2bc96eeec 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -29,6 +29,7 @@ set(ROARING_SRC containers/mixed_xor.c containers/mixed_andnot.c containers/run.c + containers/single.c roaring.c roaring_priority_queue.c roaring_array.c) diff --git a/src/containers/containers.c b/src/containers/containers.c index d48b84f49..5d45d5830 100644 --- a/src/containers/containers.c +++ b/src/containers/containers.c @@ -36,6 +36,8 @@ void container_free(void *container, uint8_t typecode) { case SHARED_CONTAINER_TYPE_CODE: shared_container_free((shared_container_t *)container); break; + case SINGLE_CONTAINER_TYPE_CODE: + break; default: assert(false); __builtin_unreachable(); @@ -93,6 +95,8 @@ int32_t container_serialize(const void *container, uint8_t typecode, array_container_serialize((const array_container_t *)container, buf)); case RUN_CONTAINER_TYPE_CODE: return (run_container_serialize((const run_container_t *)container, buf)); + case SINGLE_CONTAINER_TYPE_CODE: + // return single_container_serialize(container_to_single(container), buf); default: assert(0); __builtin_unreachable(); @@ -111,6 +115,8 @@ uint32_t container_serialization_len(const void *container, uint8_t typecode) { case RUN_CONTAINER_TYPE_CODE: return run_container_serialization_len( (const run_container_t *)container); + case SINGLE_CONTAINER_TYPE_CODE: + // return single_container_serialization_len(container_to_single(container)); default: assert(0); __builtin_unreachable(); @@ -126,6 +132,8 @@ void *container_deserialize(uint8_t typecode, const char *buf, size_t buf_len) { return (array_container_deserialize(buf, buf_len)); case RUN_CONTAINER_TYPE_CODE: return (run_container_deserialize(buf, buf_len)); + case SINGLE_CONTAINER_TYPE_CODE: + // return single_container_deserialize(buf, buf_len); case SHARED_CONTAINER_TYPE_CODE: printf("this should never happen.\n"); assert(0); @@ -209,6 +217,8 @@ void *container_clone(const void *container, uint8_t typecode) { printf("shared containers are not cloneable\n"); assert(false); return NULL; + case SINGLE_CONTAINER_TYPE_CODE: + return single_to_container(container_to_single(container)); default: assert(false); __builtin_unreachable(); diff --git a/src/containers/single.c b/src/containers/single.c new file mode 100644 index 000000000..4fe3c927a --- /dev/null +++ b/src/containers/single.c @@ -0,0 +1,110 @@ +#include +#include +#include + +void* single_to_array(single_container_t container, uint16_t extra_val) { + __builtin_unreachable(); + // assert SINGLE_CONTAINER_MAX_CAPTAIN == + array_container_t* arr = + array_container_create_given_capacity(2 * SINGLE_CONTAINER_MAX_CAPTAIN); + memcpy(arr->array, container.vals, container.len * sizeof(uint16_t)); + arr->cardinality = container.len; + array_container_add(arr, extra_val); + return arr; +} + +void* single_single_container_inplace_union(void* left, const void* right, + uint8_t* typecode) { + single_container_t l = container_to_single(left); + single_container_t r = container_to_single(right); + + if (l.len + r.len <= SINGLE_CONTAINER_MAX_CAPTAIN) { + // if (false) { + *typecode = SINGLE_CONTAINER_TYPE_CODE; + single_container_t res; + memset(&res, 0, sizeof(single_container_t)); + int lcur = 0; + int rcur = 0; + int pos = 0; + for (; lcur < l.len && rcur < r.len;) { + if(l.vals[lcur] < r.vals[rcur]) { + res.vals[pos++] = l.vals[lcur++]; + } else if(l.vals[lcur] > r.vals[rcur]) { + res.vals[pos++] = r.vals[rcur++]; + } else { + res.vals[pos++] = l.vals[lcur]; + lcur++; + rcur++; + } + } + for (; lcur < l.len;) { + res.vals[pos++] = l.vals[lcur++]; + } + for (; rcur < r.len;) { + res.vals[pos++] = r.vals[rcur++]; + } + res.len = pos; + // check assert + assert(res.len <= SINGLE_CONTAINER_MAX_CAPTAIN); + for(int i = 0; i + 1 < res.len; ++i) { + assert(res.vals[i] < res.vals[i + 1]); + } + + return single_to_container(res); + } + + array_container_t* arr = array_container_create(); + *typecode = ARRAY_CONTAINER_TYPE_CODE; + int lcur = 0; + int rcur = 0; + + for (; lcur < l.len && rcur < r.len;) { + if (l.vals[lcur] < r.vals[rcur]) { + array_container_add(arr, l.vals[lcur++]); + } else if (l.vals[lcur] > r.vals[rcur]) { + array_container_add(arr, r.vals[rcur++]); + } else { + array_container_add(arr, r.vals[rcur]); + lcur++; + rcur++; + } + } + for (; lcur < l.len;) { + array_container_add(arr, l.vals[lcur++]); + } + for (; rcur < r.len;) { + array_container_add(arr, r.vals[rcur++]); + } + return arr; +} + +void* single_array_container_inplace_union(void* left, + const array_container_t* right, + uint8_t* typecode) { + *typecode = ARRAY_CONTAINER_TYPE_CODE; + single_container_t l = container_to_single(left); + array_container_t* arr = array_container_create_given_capacity(l.len + right->cardinality); + array_container_copy(right, arr); + for(uint16_t i = 0;i < l.len; ++ i) { + array_container_add(arr, l.vals[i]); + } + return arr; +} + +void* array_single_container_inplace_union(array_container_t* arr, const void* right, + uint8_t* typecode) { + single_container_t r = container_to_single(right); + int car1 = arr->cardinality; + int car2 = r.len; + *typecode = ARRAY_CONTAINER_TYPE_CODE; + int32_t capacity = r.len + arr->cardinality; + if (arr->capacity < capacity) { + array_container_grow(arr, capacity, true); + } + for (uint16_t i = 0;i < r.len; ++ i) { + array_container_add(arr, r.vals[i]); + } + int car3 = arr->cardinality; + // assert(car1 + car2 == car3); + return arr; +} \ No newline at end of file diff --git a/src/roaring.c b/src/roaring.c index 78ac8e392..48fee8b00 100644 --- a/src/roaring.c +++ b/src/roaring.c @@ -415,15 +415,17 @@ void roaring_bitmap_add(roaring_bitmap_t *r, uint32_t val) { uint8_t newtypecode = typecode; void *container2 = container_add(container, val & 0xFFFF, typecode, &newtypecode); - if (container2 != container) { + if (container2 != container || typecode != newtypecode) { container_free(container, typecode); ra_set_container_at_index(&r->high_low_container, i, container2, newtypecode); } } else { - array_container_t *newac = array_container_create(); - void *container = container_add(newac, val & 0xFFFF, - ARRAY_CONTAINER_TYPE_CODE, &typecode); + // array container + single_container_t newsingle; + newsingle.len = 0; + void *container = container_add(single_to_container(newsingle), val & 0xFFFF, + SINGLE_CONTAINER_TYPE_CODE, &typecode); // we could just assume that it stays an array container ra_insert_new_key_value_at(&r->high_low_container, -i - 1, hb, container, typecode); @@ -813,8 +815,7 @@ void roaring_bitmap_or_inplace(roaring_bitmap_t *x1, void *c = container_ior(c1, container_type_1, c2, container_type_2, &container_result_type); - if (c != - c1) { // in this instance a new container was created, and + if (c != c1 || container_type_1 != container_result_type) { // in this instance a new container was created, and // we need to free the old one container_free(c1, container_type_1); } @@ -843,7 +844,6 @@ void roaring_bitmap_or_inplace(roaring_bitmap_t *x1, ra_set_container_at_index(&x2->high_low_container, pos2, c2, container_type_2); } - // void *c2_clone = container_clone(c2, container_type_2); ra_insert_new_key_value_at(&x1->high_low_container, pos1, s2, c2, container_type_2); diff --git a/src/roaring_array.c b/src/roaring_array.c index 631b1ecc3..f744a2c56 100644 --- a/src/roaring_array.c +++ b/src/roaring_array.c @@ -607,6 +607,7 @@ size_t ra_portable_serialize(const roaring_array_t *ra, char *buf) { uint32_t startOffset = 0; bool hasrun = ra_has_run_container(ra); if (hasrun) { + assert(false); uint32_t cookie = SERIAL_COOKIE | ((ra->size - 1) << 16); memcpy(buf, &cookie, sizeof(cookie)); buf += sizeof(cookie); @@ -901,18 +902,33 @@ bool ra_portable_deserialize(roaring_array_t *answer, const char *buf, const siz ra_clear(answer);// we need to clear the containers already allocated, and the roaring array return false; } - // it is now safe to read - array_container_t *c = - array_container_create_given_capacity(thiscard); - if(c == NULL) {// memory allocation failure - fprintf(stderr, "Failed to allocate memory for an array container.\n"); - ra_clear(answer);// we need to clear the containers already allocated, and the roaring array - return false; + void* container = NULL; + uint16_t typecodes = 0; + if (thiscard > SINGLE_CONTAINER_MAX_CAPTAIN) { + // if (true) { + // it is now safe to read + array_container_t *c = + array_container_create_given_capacity(thiscard); + if(c == NULL) {// memory allocation failure + fprintf(stderr, "Failed to allocate memory for an array container.\n"); + ra_clear(answer);// we need to clear the containers already allocated, and the roaring array + return false; + } + buf += array_container_read(thiscard, c, buf); + + container = c; + typecodes = ARRAY_CONTAINER_TYPE_CODE; + } else { + single_container_t single; + memset(&single,0, sizeof(single_container_t)); + buf += single_container_read(thiscard, &single, buf); + container = single_to_container(single); + typecodes = SINGLE_CONTAINER_TYPE_CODE; } + answer->size++; - buf += array_container_read(thiscard, c, buf); - answer->containers[k] = c; - answer->typecodes[k] = ARRAY_CONTAINER_TYPE_CODE; + answer->containers[k] = container; + answer->typecodes[k] = typecodes; } } return true;