From 93255e1ba4a0927f14d7ac04e87b935243a91a59 Mon Sep 17 00:00:00 2001 From: Alexis Placet Date: Fri, 25 Oct 2024 10:32:27 +0200 Subject: [PATCH] Add modifiers methods in primitive array (#232) --- CMakeLists.txt | 10 +- include/sparrow/arrow_array_schema_proxy.hpp | 106 ++- .../arrow_array_schema_proxy_factory.hpp | 40 ++ .../sparrow/arrow_interface/arrow_array.hpp | 2 +- .../arrow_array/private_data.hpp | 13 +- .../arrow_array_schema_factory.hpp | 12 +- .../arrow_array_schema_info_utils.hpp | 51 +- .../arrow_schema/private_data.hpp | 12 + include/sparrow/layout/array_base.hpp | 201 ++---- include/sparrow/layout/array_bitmap_base.hpp | 206 ++++++ include/sparrow/layout/array_wrapper.hpp | 17 +- .../layout/dictionary_encoded_array.hpp | 9 +- include/sparrow/layout/layout_iterator.hpp | 68 +- .../sparrow/layout/list_layout/list_array.hpp | 22 +- include/sparrow/layout/mutable_array_base.hpp | 278 ++++++++ include/sparrow/layout/null_array.hpp | 13 +- include/sparrow/layout/primitive_array.hpp | 103 ++- .../run_end_encoded_array.hpp | 12 +- .../layout/struct_layout/struct_array.hpp | 11 +- include/sparrow/layout/union_array.hpp | 9 +- .../layout/variable_size_binary_array.hpp | 185 ++--- include/sparrow/utils/algorithm.hpp | 2 - include/sparrow/utils/mp_utils.hpp | 19 +- src/arrow_array_schema_proxy.cpp | 149 +++- src/arrow_interface/arrow_array.cpp | 2 +- test/test_arrow_array_schema_proxy.cpp | 279 +++++++- test/test_dictionary_encoded_array.cpp | 2 +- test/test_list_array.cpp | 108 +-- test/test_primitive_array.cpp | 671 +++++++++++++++--- test/test_variable_size_binary_array.cpp | 4 +- 30 files changed, 2073 insertions(+), 543 deletions(-) create mode 100644 include/sparrow/arrow_array_schema_proxy_factory.hpp create mode 100644 include/sparrow/layout/array_bitmap_base.hpp create mode 100644 include/sparrow/layout/mutable_array_base.hpp diff --git a/CMakeLists.txt b/CMakeLists.txt index b32bb060..430c0a99 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -122,10 +122,15 @@ set(SPARROW_HEADERS ${SPARROW_INCLUDE_DIR}/sparrow/arrow_interface/arrow_schema/smart_pointers.hpp # buffer ${SPARROW_INCLUDE_DIR}/sparrow/buffer/allocator.hpp - ${SPARROW_INCLUDE_DIR}/sparrow/buffer/buffer.hpp ${SPARROW_INCLUDE_DIR}/sparrow/buffer/buffer_adaptor.hpp ${SPARROW_INCLUDE_DIR}/sparrow/buffer/buffer_view.hpp + ${SPARROW_INCLUDE_DIR}/sparrow/buffer/buffer.hpp ${SPARROW_INCLUDE_DIR}/sparrow/buffer/dynamic_bitset.hpp + ${SPARROW_INCLUDE_DIR}/sparrow/buffer/dynamic_bitset/dynamic_bitset_base.hpp + ${SPARROW_INCLUDE_DIR}/sparrow/buffer/dynamic_bitset/bitset_iterator.hpp + ${SPARROW_INCLUDE_DIR}/sparrow/buffer/dynamic_bitset/bitset_reference.hpp + ${SPARROW_INCLUDE_DIR}/sparrow/buffer/dynamic_bitset/dynamic_bitset_view.hpp + ${SPARROW_INCLUDE_DIR}/sparrow/buffer/dynamic_bitset/dynamic_bitset.hpp # config ${SPARROW_INCLUDE_DIR}/sparrow/config/config.hpp ${SPARROW_INCLUDE_DIR}/sparrow/config/sparrow_version.hpp @@ -179,9 +184,10 @@ if (SPARROW_TARGET_32BIT) else() set(SPARROW_SRC - ${SPARROW_SOURCE_DIR}/array.cpp ${SPARROW_SOURCE_DIR}/array_factory.cpp ${SPARROW_SOURCE_DIR}/array_helper.cpp + ${SPARROW_SOURCE_DIR}/array.cpp + ${SPARROW_SOURCE_DIR}/arrow_array_schema_proxy.cpp ${SPARROW_SOURCE_DIR}/arrow_array_schema_proxy.cpp ${SPARROW_SOURCE_DIR}/arrow_interface/arrow_array.cpp ${SPARROW_SOURCE_DIR}/arrow_interface/arrow_schema.cpp diff --git a/include/sparrow/arrow_array_schema_proxy.hpp b/include/sparrow/arrow_array_schema_proxy.hpp index fb1f04fe..9d420037 100644 --- a/include/sparrow/arrow_array_schema_proxy.hpp +++ b/include/sparrow/arrow_array_schema_proxy.hpp @@ -18,8 +18,10 @@ #include #include "sparrow/arrow_interface/arrow_array/private_data.hpp" +#include "sparrow/arrow_interface/arrow_array_schema_info_utils.hpp" #include "sparrow/arrow_interface/arrow_schema/private_data.hpp" #include "sparrow/buffer/buffer_view.hpp" +#include "sparrow/buffer/dynamic_bitset/non_owning_dynamic_bitset.hpp" #include "sparrow/c_interface.hpp" #include "sparrow/config/config.hpp" #include "sparrow/types/data_type.hpp" @@ -128,7 +130,9 @@ namespace sparrow [[nodiscard]] SPARROW_API size_t length() const; /** - * Set the length of the `ArrowArray`. + * Set the length of the `ArrowArray`. This method does not resize the buffers of the `ArrowArray`. + * You have to change the length before replacing/resizing the buffers to have the right sizes when + * calling `buffers()`. * @exception `arrow_proxy_exception` If the `ArrowArray` was not created with sparrow. * @param length The length to set. */ @@ -136,12 +140,12 @@ namespace sparrow [[nodiscard]] SPARROW_API int64_t null_count() const; /** - * Set the null count of the `ArrowArray`. + * Set the null count of the `ArrowArray`. This method does not change the bitmap. * @exception `arrow_proxy_exception` If the `ArrowArray` was not created with sparrow. * @param null_count The null count to set. */ SPARROW_API void set_null_count(int64_t null_count); - [[nodiscard]] SPARROW_API size_t offset() const; + [[nodiscard]] SPARROW_API size_t offset() const; /** * Set the offset of the `ArrowArray`. @@ -152,7 +156,8 @@ namespace sparrow [[nodiscard]] SPARROW_API size_t n_buffers() const; /** - * Set the number of buffers of the `ArrowArray`. + * Set the number of buffers of the `ArrowArray`. Resize the buffers vector of the `ArrowArray` + * private data. * @exception `arrow_proxy_exception` If the `ArrowArray` was not created with sparrow. * @param n_buffers The number of buffers to set. */ @@ -162,7 +167,8 @@ namespace sparrow [[nodiscard]] SPARROW_API std::vector>& buffers(); /** - * Set the buffer at the given index. + * Set the buffer at the given index. You have to call the `set_length` method before calling this + * method to have the right sizes when calling `buffers()`. * @exception `arrow_proxy_exception` If the `ArrowArray` was not created with sparrow. * @param index The index of the buffer to set. * @param buffer The buffer to set. @@ -170,13 +176,73 @@ namespace sparrow SPARROW_API void set_buffer(size_t index, const buffer_view& buffer); /** - * Set the buffer at the given index. + * Set the buffer at the given index. You have to call the `set_length` method before calling this + * method to have the right sizes when calling `buffers()`. * @exception `arrow_proxy_exception` If the `ArrowArray` was not created with sparrow. * @param index The index of the buffer to set. * @param buffer The buffer to set. */ SPARROW_API void set_buffer(size_t index, buffer&& buffer); + /** + * Resize the bitmap buffer of the `ArrowArray`. + * @exception `arrow_proxy_exception` If the `ArrowArray` was not created with sparrow. + * @exception `arrow_proxy_exception` If the array format does not support a validity bitmap. + * @param new_size The new size of the bitmap buffer. + * @param value The value to set in the new elements. True by default. + */ + SPARROW_API void resize_bitmap(size_t new_size, bool value = true); + + /** + * Insert elements of the same value in the bitmap buffer at the given index. + * @exception `arrow_proxy_exception` If the `ArrowArray` was not created with sparrow. + * @exception `arrow_proxy_exception` If the array format does not support a validity bitmap. + * @exception `std::out_of_range` If the index is greater than the length of the bitmap. + * @param index The index where to insert the value. Must be less than the length of the bitmap. + * @param value The value to insert. + * @param count The number of times to insert the value. 1 by default + * @return The index of the first inserted value. + */ + SPARROW_API size_t insert_bitmap(size_t index, bool value, size_t count = 1); + + /** + * Insert several elements in the bitmap buffer at the given index. + * @exception `arrow_proxy_exception` If the `ArrowArray` was not created with sparrow. + * @exception `arrow_proxy_exception` If the array format does not support a validity bitmap. + * @exception `std::out_of_range` If the index is greater than the length of the bitmap. + * @param index The index where to insert the values. Must be less than the length of the bitmap. + * @param range The range of values to insert. + * @return The index of the first inserted value. + */ + template + size_t insert_bitmap(size_t index, const R& range); + + /** + * Erase several elements in the bitmap buffer at the given index. + * @exception `arrow_proxy_exception` If the `ArrowArray` was not created with sparrow. + * @exception `arrow_proxy_exception` If the array format does not support a validity bitmap. + * @exception `std::out_of_range` If the index is greater than the length of the bitmap. + * @param index The index of the first value to erase. Must be less than the length of the bitmap. + * @param count The number of elements to erase. 1 by default. + * @return The index of the first erased value. + */ + SPARROW_API size_t erase_bitmap(size_t index, size_t count = 1); + + /** + * Push a value at the end of the bitmap buffer. + * @exception `arrow_proxy_exception` If the `ArrowArray` was not created with sparrow. + * @exception `arrow_proxy_exception` If the array format does not support a validity bitmap. + * @param value The value to push. + */ + SPARROW_API void push_back_bitmap(bool value); + + /** + * Pop a value at the end of the bitmap buffer. + * @exception `arrow_proxy_exception` If the `ArrowArray` was not created with sparrow. + * @exception `arrow_proxy_exception` If the array format does not support a validity bitmap. + */ + SPARROW_API void pop_back_bitmap(); + /** * Add children without taking their ownership. * @exception `arrow_proxy_exception` If the `ArrowArray` or the `ArrowSchema` wrapped @@ -268,7 +334,7 @@ namespace sparrow /** * get a non-owning view of the arrow_proxy. */ - [[nodiscard]] SPARROW_API arrow_proxy view(); + [[nodiscard]] SPARROW_API arrow_proxy view() const; [[nodiscard]] SPARROW_API bool owns_array() const; [[nodiscard]] SPARROW_API ArrowArray extract_array(); @@ -280,6 +346,9 @@ namespace sparrow [[nodiscard]] SPARROW_API ArrowSchema& schema(); [[nodiscard]] SPARROW_API const ArrowSchema& schema() const; + [[nodiscard]] SPARROW_API arrow_schema_private_data* get_schema_private_data(); + [[nodiscard]] SPARROW_API arrow_array_private_data* get_array_private_data(); + private: std::variant m_array; @@ -303,10 +372,12 @@ namespace sparrow [[nodiscard]] bool empty() const; SPARROW_API void resize_children(size_t children_count); - void update_buffers(); + [[nodiscard]] SPARROW_API non_owning_dynamic_bitset get_non_owning_dynamic_bitset(); + void update_children(); void update_dictionary(); void update_null_count(); + void update_buffers(); void reset(); [[nodiscard]] bool array_created_with_sparrow() const; @@ -314,13 +385,12 @@ namespace sparrow void validate_array_and_schema() const; - arrow_schema_private_data* get_schema_private_data(); - arrow_array_private_data* get_array_private_data(); - [[nodiscard]] bool is_arrow_array_valid() const; [[nodiscard]] bool is_arrow_schema_valid() const; [[nodiscard]] bool is_proxy_valid() const; + [[nodiscard]] size_t get_null_count() const; + void swap(arrow_proxy& other) noexcept; }; @@ -371,4 +441,18 @@ namespace sparrow ); } } + + template + inline size_t arrow_proxy::insert_bitmap(size_t index, const R& range) + { + if (!is_created_with_sparrow()) + { + throw arrow_proxy_exception("Cannot modify the bitmap on non-sparrow created ArrowArray"); + } + SPARROW_ASSERT_TRUE(has_bitmap(data_type())) + auto bitmap = get_non_owning_dynamic_bitset(); + const auto it = bitmap.insert(sparrow::next(bitmap.cbegin(), index), range.begin(), range.end()); + return static_cast(std::distance(bitmap.begin(), it)); + } + } diff --git a/include/sparrow/arrow_array_schema_proxy_factory.hpp b/include/sparrow/arrow_array_schema_proxy_factory.hpp new file mode 100644 index 00000000..30c86fdd --- /dev/null +++ b/include/sparrow/arrow_array_schema_proxy_factory.hpp @@ -0,0 +1,40 @@ +// Copyright 2024 Man Group Operations Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "sparrow/arrow_array_schema_proxy.hpp" +#include "sparrow/arrow_interface/arrow_array_schema_factory.hpp" +#include "sparrow/types/data_traits.hpp" + +namespace sparrow +{ + template + requires std::is_arithmetic_v> + && std::integral> + arrow_proxy make_primitive_arrow_proxy( + Values&& values, + Nulls&& nulls, + int64_t offset, + std::string_view name, + std::optional metadata + ) + { + using ValueType = std::ranges::range_value_t; + return arrow_proxy{ + make_primitive_arrow_array(std::forward(values), std::forward(nulls), offset), + make_primitive_arrow_schema(arrow_traits::type_id, name, metadata, std::nullopt) + }; + } +} diff --git a/include/sparrow/arrow_interface/arrow_array.hpp b/include/sparrow/arrow_interface/arrow_array.hpp index 2739a4bb..819d1a6b 100644 --- a/include/sparrow/arrow_interface/arrow_array.hpp +++ b/include/sparrow/arrow_interface/arrow_array.hpp @@ -121,7 +121,7 @@ namespace sparrow * * @return The created `ArrowArray`. */ - SPARROW_API arrow_array_unique_ptr default_arrow_array_unique_ptr(); + arrow_array_unique_ptr default_arrow_array_unique_ptr(); /** * Release function to use for the `ArrowArray.release` member. diff --git a/include/sparrow/arrow_interface/arrow_array/private_data.hpp b/include/sparrow/arrow_interface/arrow_array/private_data.hpp index 699bd446..277dcf33 100644 --- a/include/sparrow/arrow_interface/arrow_array/private_data.hpp +++ b/include/sparrow/arrow_interface/arrow_array/private_data.hpp @@ -41,11 +41,12 @@ namespace sparrow [[nodiscard]] constexpr BufferType& buffers() noexcept; [[nodiscard]] constexpr const BufferType& buffers() const noexcept; - + constexpr void resize_buffers(std::size_t size); void set_buffer(std::size_t index, buffer&& buffer); void set_buffer(std::size_t index, const buffer_view& buffer); constexpr void resize_buffer(std::size_t index, std::size_t size, std::uint8_t value); + constexpr void update_buffers_ptrs(); template [[nodiscard]] constexpr const T** buffers_ptrs() noexcept; @@ -64,8 +65,7 @@ namespace sparrow { } - [[nodiscard]] constexpr std::vector>& - arrow_array_private_data::buffers() noexcept + [[nodiscard]] constexpr std::vector>& arrow_array_private_data::buffers() noexcept { return m_buffers; } @@ -79,7 +79,7 @@ namespace sparrow constexpr void arrow_array_private_data::resize_buffers(std::size_t size) { m_buffers.resize(size); - m_buffers_pointers = to_raw_ptr_vec(m_buffers); + update_buffers_ptrs(); } inline void arrow_array_private_data::set_buffer(std::size_t index, buffer&& buffer) @@ -109,4 +109,9 @@ namespace sparrow { return const_cast(reinterpret_cast(m_buffers_pointers.data())); } + + constexpr void arrow_array_private_data::update_buffers_ptrs() + { + m_buffers_pointers = to_raw_ptr_vec(m_buffers); + } } diff --git a/include/sparrow/arrow_interface/arrow_array_schema_factory.hpp b/include/sparrow/arrow_interface/arrow_array_schema_factory.hpp index 93492280..b4822ec8 100644 --- a/include/sparrow/arrow_interface/arrow_array_schema_factory.hpp +++ b/include/sparrow/arrow_interface/arrow_array_schema_factory.hpp @@ -55,7 +55,7 @@ namespace sparrow } template - requires(std::integral>) + requires(std::integral> && !std::same_as, bool>) buffer make_bitmap_buffer(size_t count, R&& nulls) { if (!std::ranges::empty(nulls)) @@ -98,6 +98,16 @@ namespace sparrow return make_arrow_array(length, null_count, offset, std::move(value_buffers), 0, nullptr, nullptr); } + inline ArrowSchema make_primitive_arrow_schema( + data_type data_type, + std::string_view name, + std::optional metadata, + std::optional arrow_flag + ) + { + return make_arrow_schema(data_type_to_format(data_type), name, metadata, arrow_flag, 0, nullptr, nullptr); + } + template < std::ranges::sized_range Keys, std::ranges::sized_range KeyNulls, diff --git a/include/sparrow/arrow_interface/arrow_array_schema_info_utils.hpp b/include/sparrow/arrow_interface/arrow_array_schema_info_utils.hpp index e6a247e4..ccb7c77f 100644 --- a/include/sparrow/arrow_interface/arrow_array_schema_info_utils.hpp +++ b/include/sparrow/arrow_interface/arrow_array_schema_info_utils.hpp @@ -26,7 +26,7 @@ namespace sparrow { /// @returns `true` if the number of buffers in an `ArrowArray` for a given data type is valid, `false` /// otherwise. - constexpr bool validate_buffers_count(data_type data_type, int64_t n_buffers) + constexpr bool validate_buffers_count(data_type data_type, int64_t n_buffers) { const std::size_t expected_buffer_count = get_expected_buffer_count(data_type); return static_cast(n_buffers) == expected_buffer_count; @@ -74,16 +74,17 @@ namespace sparrow } /// @returns `true` if the format of an `ArrowArray` for a given data type is valid, `false` otherwise. - inline bool validate_format_with_arrow_array(data_type , const ArrowArray& ) + inline bool validate_format_with_arrow_array(data_type, const ArrowArray&) { - return true; + return true; /* THE CODE BELOW MAKES WRONG ASSUMPTIONS AND NEEDS TO BE REFACTORED IN A SEPERATE PR*/ // const bool buffers_count_valid = validate_buffers_count(data_type, array.n_buffers); // // const bool children_count_valid = static_cast(array.n_children) // // == get_expected_children_count(data_type); - // //std::cout<<"child cound: "< get_buffer_types_from_data_type(data_type data_type) @@ -232,4 +232,43 @@ namespace sparrow mpl::unreachable(); } + constexpr bool has_bitmap(data_type dt) + { + switch (dt) + { + // List all data types. We use the default warning to catch missing cases. + case data_type::BOOL: + case data_type::INT8: + case data_type::INT16: + case data_type::INT32: + case data_type::INT64: + case data_type::UINT8: + case data_type::UINT16: + case data_type::UINT32: + case data_type::UINT64: + case data_type::HALF_FLOAT: + case data_type::FLOAT: + case data_type::DOUBLE: + case data_type::TIMESTAMP: + case data_type::DECIMAL: + case data_type::LIST: + case data_type::STRUCT: + case data_type::MAP: + case data_type::STRING: + case data_type::BINARY: + case data_type::FIXED_SIZE_BINARY: + case data_type::FIXED_WIDTH_BINARY: + case data_type::LARGE_LIST: + case data_type::LIST_VIEW: + case data_type::LARGE_LIST_VIEW: + case data_type::FIXED_SIZED_LIST: + return true; + case data_type::NA: + case data_type::SPARSE_UNION: + case data_type::DENSE_UNION: + case data_type::RUN_ENCODED: + return false; + } + mpl::unreachable(); + } } diff --git a/include/sparrow/arrow_interface/arrow_schema/private_data.hpp b/include/sparrow/arrow_interface/arrow_schema/private_data.hpp index 61f7c17f..2feca9d7 100644 --- a/include/sparrow/arrow_interface/arrow_schema/private_data.hpp +++ b/include/sparrow/arrow_interface/arrow_schema/private_data.hpp @@ -93,8 +93,20 @@ namespace sparrow { return std::string(t.cbegin(), t.cend()); } + else if constexpr (mpl::is_type_instance_of_v) + { + if (t.has_value()) + { + return to_optional_string(*t); + } + else + { + return std::nullopt; + } + } else { + static_assert(mpl::dependent_false::value, "to_optional_string: unsupported type."); mpl::unreachable(); } } diff --git a/include/sparrow/layout/array_base.hpp b/include/sparrow/layout/array_base.hpp index 81f117d9..1f44a853 100644 --- a/include/sparrow/layout/array_base.hpp +++ b/include/sparrow/layout/array_base.hpp @@ -14,15 +14,17 @@ #pragma once +#include #include #include +#include #include "sparrow/arrow_array_schema_proxy.hpp" -#include "sparrow/buffer/dynamic_bitset.hpp" +#include "sparrow/buffer/dynamic_bitset/dynamic_bitset_view.hpp" #include "sparrow/layout/layout_iterator.hpp" -#include "sparrow/utils/nullable.hpp" -#include "sparrow/utils/iterator.hpp" #include "sparrow/utils/crtp_base.hpp" +#include "sparrow/utils/iterator.hpp" +#include "sparrow/utils/nullable.hpp" namespace sparrow { @@ -34,7 +36,7 @@ namespace sparrow * */ struct array_inner_types_base { - using bitmap_type = dynamic_bitset_view; + using bitmap_type = dynamic_bitset_view; }; /** @@ -54,44 +56,47 @@ namespace sparrow class array_crtp_base : public crtp_base { public: + using self_type = array_crtp_base; using derived_type = D; + using inner_types = array_inner_types; + using size_type = std::size_t; using difference_type = std::ptrdiff_t; + using bitmap_type = typename inner_types::bitmap_type; - using bitmap_reference = bitmap_type::reference; using bitmap_const_reference = bitmap_type::const_reference; using bitmap_iterator = bitmap_type::iterator; - using bitmap_range = std::ranges::subrange; using const_bitmap_iterator = bitmap_type::const_iterator; using const_bitmap_range = std::ranges::subrange; using inner_value_type = typename inner_types::inner_value_type; - using inner_reference = typename inner_types::inner_reference; + using value_type = nullable; + using inner_const_reference = typename inner_types::inner_const_reference; - using reference = nullable; using const_reference = nullable; - using value_type = nullable; + using const_value_iterator = typename inner_types::const_value_iterator; + using const_value_range = std::ranges::subrange; + using iterator_tag = typename inner_types::iterator_tag; - using iterator = layout_iterator; - using const_iterator = layout_iterator; - - using value_iterator = typename inner_types::value_iterator; - using const_value_iterator = typename inner_types::const_value_iterator; + struct iterator_types + { + using value_type = self_type::value_type; + using reference = self_type::const_reference; + using value_iterator = self_type::const_value_iterator; + using bitmap_iterator = self_type::const_bitmap_iterator; + using iterator_tag = self_type::iterator_tag; + }; - using const_value_range = std::ranges::subrange; + using const_iterator = layout_iterator; - size_type size() const; + [[nodiscard]] size_type size() const; - reference operator[](size_type i); const_reference operator[](size_type i) const; - iterator begin(); - iterator end(); - const_iterator begin() const; const_iterator end() const; @@ -111,27 +116,22 @@ namespace sparrow array_crtp_base(array_crtp_base&&) = default; array_crtp_base& operator=(array_crtp_base&&) = default; - const arrow_proxy& storage() const; - arrow_proxy& storage(); + [[nodiscard]] arrow_proxy& get_arrow_proxy(); + [[nodiscard]] const arrow_proxy& get_arrow_proxy() const; - bitmap_reference has_value(size_type i); bitmap_const_reference has_value(size_type i) const; - bitmap_iterator bitmap_begin(); - bitmap_iterator bitmap_end(); - const_bitmap_iterator bitmap_begin() const; const_bitmap_iterator bitmap_end() const; - private: - - arrow_proxy& get_arrow_proxy(); + const_bitmap_iterator bitmap_cbegin() const; + const_bitmap_iterator bitmap_cend() const; + private: arrow_proxy m_proxy; // friend classes - friend class layout_iterator; - friend class layout_iterator; + friend class layout_iterator; template friend class array_wrapper_impl; }; @@ -139,39 +139,6 @@ namespace sparrow template bool operator==(const array_crtp_base& lhs, const array_crtp_base& rhs); - /* - * Base class for arrays using a validity buffer for - * defining their bitmap. - */ - template - class array_bitmap_base : public array_crtp_base - { - public: - - using base_type = array_crtp_base; - using bitmap_type = typename base_type::bitmap_type; - - protected: - - array_bitmap_base(arrow_proxy); - - array_bitmap_base(const array_bitmap_base&); - array_bitmap_base& operator=(const array_bitmap_base&); - - array_bitmap_base(array_bitmap_base&&) = default; - array_bitmap_base& operator=(array_bitmap_base&&) = default; - - bitmap_type& get_bitmap(); - const bitmap_type& get_bitmap() const; - - private: - - static constexpr std::size_t m_bitmap_buffer_index = 0; - - bitmap_type make_bitmap(); - bitmap_type m_bitmap; - }; - /********************************** * array_crtp_base implementation * **********************************/ @@ -179,17 +146,7 @@ namespace sparrow template auto array_crtp_base::size() const -> size_type { - return static_cast(storage().length()); - } - - template - auto array_crtp_base::operator[](size_type i) -> reference - { - SPARROW_ASSERT_TRUE(i < this->derived_cast().size()); - return reference( - inner_reference(this->derived_cast().value(i)), - this->derived_cast().has_value(i) - ); + return static_cast(get_arrow_proxy().length()); } template @@ -202,18 +159,6 @@ namespace sparrow ); } - template - auto array_crtp_base::begin() -> iterator - { - return iterator(this->derived_cast().value_begin(), this->derived_cast().bitmap_begin()); - } - - template - auto array_crtp_base::end() -> iterator - { - return iterator(this->derived_cast().value_end(), this->derived_cast().bitmap_end()); - } - template auto array_crtp_base::begin() const -> const_iterator { @@ -229,13 +174,13 @@ namespace sparrow template auto array_crtp_base::cbegin() const -> const_iterator { - return const_iterator(this->derived_cast().value_cbegin(), this->derived_cast().bitmap_begin()); + return const_iterator(this->derived_cast().value_cbegin(), bitmap_begin()); } template auto array_crtp_base::cend() const -> const_iterator { - return const_iterator(this->derived_cast().value_cend(), this->derived_cast().bitmap_end()); + return const_iterator(this->derived_cast().value_cend(), bitmap_end()); } template @@ -255,24 +200,17 @@ namespace sparrow : m_proxy(std::move(proxy)) { } - - template - auto array_crtp_base::storage() -> arrow_proxy& - { - return m_proxy; - } template - auto array_crtp_base::storage() const -> const arrow_proxy& + auto array_crtp_base::get_arrow_proxy() -> arrow_proxy& { return m_proxy; } template - auto array_crtp_base::has_value(size_type i) -> bitmap_reference + auto array_crtp_base::get_arrow_proxy() const -> const arrow_proxy& { - SPARROW_ASSERT_TRUE(i < size()); - return *sparrow::next(bitmap_begin(), i); + return m_proxy; } template @@ -282,22 +220,10 @@ namespace sparrow return *sparrow::next(bitmap_begin(), i); } - template - auto array_crtp_base::bitmap_begin() -> bitmap_iterator - { - return sparrow::next(this->derived_cast().get_bitmap().begin(), storage().offset()); - } - - template - auto array_crtp_base::bitmap_end() -> bitmap_iterator - { - return sparrow::next(bitmap_begin(), size()); - } - template auto array_crtp_base::bitmap_begin() const -> const_bitmap_iterator { - return sparrow::next(this->derived_cast().get_bitmap().cbegin(), storage().offset()); + return sparrow::next(this->derived_cast().get_bitmap().cbegin(), get_arrow_proxy().offset()); } template @@ -307,59 +233,20 @@ namespace sparrow } template - auto array_crtp_base::get_arrow_proxy() -> arrow_proxy& - { - return m_proxy; - } - - template - bool operator==(const array_crtp_base& lhs, const array_crtp_base& rhs) - { - return std::ranges::equal(lhs, rhs); - } - - /************************************ - * array_bitmap_base implementation * - ************************************/ - - template - array_bitmap_base::array_bitmap_base(arrow_proxy proxy) - : base_type(std::move(proxy)) - , m_bitmap(make_bitmap()) - { - } - - template - array_bitmap_base::array_bitmap_base(const array_bitmap_base& rhs) - : base_type(rhs) - , m_bitmap(make_bitmap()) - { - } - template - array_bitmap_base& array_bitmap_base::operator=(const array_bitmap_base& rhs) - { - base_type::operator=(rhs); - m_bitmap = make_bitmap(); - return *this; - } - - template - auto array_bitmap_base::get_bitmap() -> bitmap_type& + auto array_crtp_base::bitmap_cbegin() const -> const_bitmap_iterator { - return m_bitmap; + return bitmap_begin(); } template - auto array_bitmap_base::get_bitmap() const -> const bitmap_type& + auto array_crtp_base::bitmap_cend() const -> const_bitmap_iterator { - return m_bitmap; + return bitmap_end(); } template - auto array_bitmap_base::make_bitmap() -> bitmap_type + bool operator==(const array_crtp_base& lhs, const array_crtp_base& rhs) { - SPARROW_ASSERT_TRUE(this->storage().buffers().size() > m_bitmap_buffer_index); - const auto bitmap_size = static_cast(this->storage().length() + this->storage().offset()); - return bitmap_type(this->storage().buffers()[m_bitmap_buffer_index].data(), bitmap_size); + return std::ranges::equal(lhs, rhs); } } diff --git a/include/sparrow/layout/array_bitmap_base.hpp b/include/sparrow/layout/array_bitmap_base.hpp new file mode 100644 index 00000000..c0af4c4c --- /dev/null +++ b/include/sparrow/layout/array_bitmap_base.hpp @@ -0,0 +1,206 @@ +// Copyright 2024 Man Group Operations Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or mplied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "sparrow/arrow_array_schema_proxy.hpp" +#include "sparrow/layout/mutable_array_base.hpp" + +namespace sparrow +{ + /* + * Base class for arrays using a validity buffer for + * defining their bitmap. + */ + template + class array_bitmap_base_impl + : public std::conditional_t, array_crtp_base> + { + public: + + using base_type = std::conditional_t, array_crtp_base>; + + using size_type = std::size_t; // typename base_type::size_type; + + using bitmap_type = typename base_type::bitmap_type; + using bitmap_iterator = typename base_type::bitmap_iterator; + using const_bitmap_iterator = typename base_type::const_bitmap_iterator; + + using bitmap_const_reference = typename base_type::bitmap_const_reference; + using difference_type = typename base_type::difference_type; + + using const_bitmap_range = typename base_type::const_bitmap_range; + + using iterator_tag = typename base_type::iterator_tag; + + using base_type::operator[]; + + protected: + + array_bitmap_base_impl(arrow_proxy); + + array_bitmap_base_impl(const array_bitmap_base_impl&); + array_bitmap_base_impl& operator=(const array_bitmap_base_impl&); + + array_bitmap_base_impl(array_bitmap_base_impl&&) noexcept = default; + array_bitmap_base_impl& operator=(array_bitmap_base_impl&&) noexcept = default; + + bitmap_type& get_bitmap() + requires is_mutable; + const bitmap_type& get_bitmap() const; + + void resize_bitmap(size_type new_length) + requires is_mutable; + + bitmap_iterator insert_bitmap(const_bitmap_iterator pos, bool value, size_type count) + requires is_mutable; + + template + requires std::same_as::value_type, bool> + bitmap_iterator insert_bitmap(const_bitmap_iterator pos, InputIt first, InputIt last) + requires is_mutable; + + bitmap_iterator erase_bitmap(const_bitmap_iterator pos, size_type count) + requires is_mutable; + + void update() + requires is_mutable; + + non_owning_dynamic_bitset get_non_owning_dynamic_bitset(); + + bitmap_type make_bitmap(); + + private: + + bitmap_type m_bitmap; + + friend array_crtp_base; + friend mutable_array_base; + }; + + template + using array_bitmap_base = array_bitmap_base_impl; + + template + using mutable_array_bitmap_base = array_bitmap_base_impl; + + /************************************ + * array_bitmap_base implementation * + ************************************/ + + template + array_bitmap_base_impl::array_bitmap_base_impl(arrow_proxy proxy_param) + : base_type(std::move(proxy_param)) + , m_bitmap(make_bitmap()) + { + } + + template + array_bitmap_base_impl::array_bitmap_base_impl(const array_bitmap_base_impl& rhs) + : base_type(rhs) + , m_bitmap(make_bitmap()) + { + } + + template + array_bitmap_base_impl& + array_bitmap_base_impl::operator=(const array_bitmap_base_impl& rhs) + { + base_type::operator=(rhs); + m_bitmap = make_bitmap(); + return *this; + } + + template + auto array_bitmap_base_impl::get_bitmap() -> bitmap_type& + requires is_mutable + { + return m_bitmap; + } + + template + auto array_bitmap_base_impl::get_bitmap() const -> const bitmap_type& + { + return m_bitmap; + } + + template + auto array_bitmap_base_impl::make_bitmap() -> bitmap_type + { + static constexpr size_t bitmap_buffer_index = 0; + arrow_proxy& arrow_proxy = this->get_arrow_proxy(); + SPARROW_ASSERT_TRUE(arrow_proxy.buffers().size() > bitmap_buffer_index); + const auto bitmap_size = arrow_proxy.length() + arrow_proxy.offset(); + return bitmap_type(arrow_proxy.buffers()[bitmap_buffer_index].data(), bitmap_size); + } + + template + void array_bitmap_base_impl::resize_bitmap(size_type new_length) + requires is_mutable + { + arrow_proxy& arrow_proxy = this->get_arrow_proxy(); + const size_t new_size = new_length + arrow_proxy.offset(); + arrow_proxy.resize_bitmap(new_size); + } + + template + auto + array_bitmap_base_impl::insert_bitmap(const_bitmap_iterator pos, bool value, size_type count) + -> bitmap_iterator + requires is_mutable + { + SPARROW_ASSERT_TRUE(this->bitmap_cbegin() <= pos) + SPARROW_ASSERT_TRUE(pos <= this->bitmap_cend()) + const auto pos_index = static_cast(std::distance(this->bitmap_cbegin(), pos)); + const auto idx = this->get_arrow_proxy().insert_bitmap(pos_index, value, count); + return sparrow::next(this->bitmap_begin(), idx); + } + + template + template + requires std::same_as::value_type, bool> + auto array_bitmap_base_impl::insert_bitmap( + const_bitmap_iterator pos, + InputIt first, + InputIt last + ) -> bitmap_iterator + requires is_mutable + { + SPARROW_ASSERT_TRUE(this->bitmap_cbegin() <= pos) + SPARROW_ASSERT_TRUE(pos <= this->bitmap_cend()); + SPARROW_ASSERT_TRUE(first <= last); + const auto distance = static_cast(std::distance(this->bitmap_cbegin(), pos)); + const auto idx = this->get_arrow_proxy().insert_bitmap(distance, std::ranges::subrange(first, last)); + return sparrow::next(this->bitmap_begin(), idx); + } + + template + auto + array_bitmap_base_impl::erase_bitmap(const_bitmap_iterator pos, size_type count) -> bitmap_iterator + requires is_mutable + { + SPARROW_ASSERT_TRUE(this->bitmap_cbegin() <= pos) + SPARROW_ASSERT_TRUE(pos < this->bitmap_cend()) + const auto pos_idx = static_cast(std::distance(this->bitmap_cbegin(), pos)); + const auto idx = this->get_arrow_proxy().erase_bitmap(pos_idx, count); + return sparrow::next(this->bitmap_begin(), idx); + } + + template + void array_bitmap_base_impl::update() + requires is_mutable + { + m_bitmap = make_bitmap(); + } +} diff --git a/include/sparrow/layout/array_wrapper.hpp b/include/sparrow/layout/array_wrapper.hpp index 0a7584b2..e1be1d72 100644 --- a/include/sparrow/layout/array_wrapper.hpp +++ b/include/sparrow/layout/array_wrapper.hpp @@ -69,7 +69,8 @@ namespace sparrow enum data_type data_type() const; bool is_dictionary() const; - arrow_proxy& get_arrow_proxy(); + [[nodiscard]] arrow_proxy& get_arrow_proxy(); + [[nodiscard]] const arrow_proxy& get_arrow_proxy() const; protected: @@ -81,6 +82,7 @@ namespace sparrow enum data_type m_data_type; virtual bool is_dictionary_impl() const = 0; virtual arrow_proxy& get_arrow_proxy_impl() = 0; + virtual const arrow_proxy& get_arrow_proxy_impl() const = 0; virtual wrapper_ptr clone_impl() const = 0; }; @@ -107,6 +109,7 @@ namespace sparrow array_wrapper_impl(const array_wrapper_impl&); bool is_dictionary_impl() const override; arrow_proxy& get_arrow_proxy_impl() override; + const arrow_proxy& get_arrow_proxy_impl() const override; wrapper_ptr clone_impl() const override; using storage_type = std::variant, std::shared_ptr, T*>; @@ -144,6 +147,11 @@ namespace sparrow return get_arrow_proxy_impl(); } + inline const arrow_proxy& array_wrapper::get_arrow_proxy() const + { + return get_arrow_proxy_impl(); + } + inline array_wrapper::array_wrapper(enum data_type dt) : m_data_type(dt) { @@ -222,6 +230,13 @@ namespace sparrow return p_array->get_arrow_proxy(); } + template + const arrow_proxy& array_wrapper_impl::get_arrow_proxy_impl() const + { + return p_array->get_arrow_proxy(); + } + + template auto array_wrapper_impl::clone_impl() const -> wrapper_ptr { diff --git a/include/sparrow/layout/dictionary_encoded_array.hpp b/include/sparrow/layout/dictionary_encoded_array.hpp index 0028cdea..2e8a208c 100644 --- a/include/sparrow/layout/dictionary_encoded_array.hpp +++ b/include/sparrow/layout/dictionary_encoded_array.hpp @@ -135,7 +135,8 @@ namespace sparrow static keys_layout create_keys_layout(arrow_proxy& proxy); static values_layout create_values_layout(arrow_proxy& proxy); - arrow_proxy& get_arrow_proxy(); + [[nodiscard]] arrow_proxy& get_arrow_proxy(); + [[nodiscard]] const arrow_proxy& get_arrow_proxy() const; arrow_proxy m_proxy; keys_layout m_keys_layout; @@ -308,6 +309,12 @@ namespace sparrow return m_proxy; } + template + auto dictionary_encoded_array::get_arrow_proxy() const -> const arrow_proxy& + { + return m_proxy; + } + template bool operator==(const dictionary_encoded_array& lhs, const dictionary_encoded_array& rhs) { diff --git a/include/sparrow/layout/layout_iterator.hpp b/include/sparrow/layout/layout_iterator.hpp index 5a30df98..f57db1bc 100644 --- a/include/sparrow/layout/layout_iterator.hpp +++ b/include/sparrow/layout/layout_iterator.hpp @@ -19,33 +19,46 @@ namespace sparrow { + /** + * Concept for iterator types + */ + template + concept iterator_types = requires { + typename T::value_type; + typename T::reference; + typename T::value_iterator; + typename T::bitmap_iterator; + typename T::iterator_tag; + }; + /** * Layout iterator class * * Relies on a layout's couple of value iterator and bitmap iterator to * return reference proxies when it is dereferenced. */ - template + template class layout_iterator : public iterator_base< - layout_iterator, - mpl::constify_t, - typename Layout::iterator_tag, - std::conditional_t> + layout_iterator, + typename Iterator_types::value_type, + typename Iterator_types::iterator_tag, + typename Iterator_types::reference> { public: - using self_type = layout_iterator; + using self_type = layout_iterator; using base_type = iterator_base< self_type, - mpl::constify_t, - typename Layout::iterator_tag, - std::conditional_t>; + typename Iterator_types::value_type, + typename Iterator_types::iterator_tag, + typename Iterator_types::reference>; + using reference = typename base_type::reference; using difference_type = typename base_type::difference_type; - using value_iterator = std::conditional_t; + using value_iterator = Iterator_types::value_iterator; - using bitmap_iterator = std::conditional_t; + using bitmap_iterator = Iterator_types::bitmap_iterator; layout_iterator() noexcept = default; layout_iterator(value_iterator value_iter, bitmap_iterator bitmap_iter); @@ -70,56 +83,55 @@ namespace sparrow * layout_iterator implementation * **********************************/ - template - layout_iterator::layout_iterator(value_iterator value_iter, bitmap_iterator bitmap_iter) + template + layout_iterator::layout_iterator(value_iterator value_iter, bitmap_iterator bitmap_iter) : m_value_iter(value_iter) , m_bitmap_iter(bitmap_iter) { } - template - auto layout_iterator::dereference() const -> reference + template + auto layout_iterator::dereference() const -> reference { return reference(*m_value_iter, *m_bitmap_iter); } - template - void layout_iterator::increment() + template + void layout_iterator::increment() { ++m_value_iter; ++m_bitmap_iter; } - template - void layout_iterator::decrement() + template + void layout_iterator::decrement() { --m_value_iter; --m_bitmap_iter; } - template - void layout_iterator::advance(difference_type n) + template + void layout_iterator::advance(difference_type n) { m_value_iter += n; m_bitmap_iter += n; } - template - auto layout_iterator::distance_to(const self_type& rhs) const -> difference_type + template + auto layout_iterator::distance_to(const self_type& rhs) const -> difference_type { return rhs.m_value_iter - m_value_iter; } - template - bool layout_iterator::equal(const self_type& rhs) const + template + bool layout_iterator::equal(const self_type& rhs) const { return m_value_iter == rhs.m_value_iter && m_bitmap_iter == rhs.m_bitmap_iter; } - template - bool layout_iterator::less_than(const self_type& rhs) const + template + bool layout_iterator::less_than(const self_type& rhs) const { return m_value_iter < rhs.m_value_iter && m_bitmap_iter < rhs.m_bitmap_iter; } } - diff --git a/include/sparrow/layout/list_layout/list_array.hpp b/include/sparrow/layout/list_layout/list_array.hpp index b09ef75a..f83441ad 100644 --- a/include/sparrow/layout/list_layout/list_array.hpp +++ b/include/sparrow/layout/list_layout/list_array.hpp @@ -17,7 +17,7 @@ #include // for std::stoull #include "sparrow/array_factory.hpp" -#include "sparrow/layout/array_base.hpp" +#include "sparrow/layout/array_bitmap_base.hpp" #include "sparrow/layout/array_wrapper.hpp" #include "sparrow/layout/layout_utils.hpp" #include "sparrow/layout/nested_value_types.hpp" @@ -112,10 +112,10 @@ namespace sparrow using size_type = typename base_type::size_type; using bitmap_type = typename base_type::bitmap_type; - using bitmap_reference = typename base_type::bitmap_reference; + // using bitmap_reference = typename base_type::bitmap_reference; using bitmap_const_reference = typename base_type::bitmap_const_reference; - using bitmap_range = typename base_type::bitmap_range; + // using bitmap_range = typename base_type::bitmap_range; using const_bitmap_range = typename base_type::const_bitmap_range; using inner_value_type = list_value; @@ -123,7 +123,7 @@ namespace sparrow using inner_const_reference = list_value; using value_type = nullable; - using reference = nullable; + // using reference = nullable; using const_reference = nullable; using iterator_tag = typename base_type::iterator_tag; @@ -175,7 +175,7 @@ namespace sparrow using base_type = list_array_crtp_base>; using list_size_type = inner_types::list_size_type; using size_type = typename base_type::size_type; - using offset_type = std::conditional_t; + using offset_type = std::conditional_t; explicit list_array_impl(arrow_proxy proxy); @@ -209,7 +209,7 @@ namespace sparrow using base_type = list_array_crtp_base>; using list_size_type = inner_types::list_size_type; using size_type = typename base_type::size_type; - using offset_type = std::conditional_t; + using offset_type = std::conditional_t; explicit list_view_array_impl(arrow_proxy proxy); @@ -357,7 +357,7 @@ namespace sparrow template cloning_ptr list_array_crtp_base::make_flat_array() { - return array_factory(this->storage().children()[0].view()); + return array_factory(this->get_arrow_proxy().children()[0].view()); } /********************************** @@ -404,7 +404,7 @@ namespace sparrow auto list_array_impl::make_list_offsets() -> offset_type* { return reinterpret_cast( - this->storage().buffers()[OFFSET_BUFFER_INDEX].data() + this->storage().offset() + this->get_arrow_proxy().buffers()[OFFSET_BUFFER_INDEX].data() + this->get_arrow_proxy().offset() ); } @@ -452,7 +452,7 @@ namespace sparrow auto list_view_array_impl::make_list_offsets() -> offset_type* { return reinterpret_cast( - this->storage().buffers()[OFFSET_BUFFER_INDEX].data() + this->storage().offset() + this->get_arrow_proxy().buffers()[OFFSET_BUFFER_INDEX].data() + this->get_arrow_proxy().offset() ); } @@ -460,7 +460,7 @@ namespace sparrow auto list_view_array_impl::make_list_sizes() -> offset_type* { return reinterpret_cast( - this->storage().buffers()[SIZES_BUFFER_INDEX].data() + this->storage().offset() + this->get_arrow_proxy().buffers()[SIZES_BUFFER_INDEX].data() + this->get_arrow_proxy().offset() ); } @@ -482,7 +482,7 @@ namespace sparrow inline fixed_sized_list_array::fixed_sized_list_array(arrow_proxy proxy) : base_type(std::move(proxy)) - , m_list_size(fixed_sized_list_array::list_size_from_format(this->storage().format())) + , m_list_size(fixed_sized_list_array::list_size_from_format(this->get_arrow_proxy().format())) { } diff --git a/include/sparrow/layout/mutable_array_base.hpp b/include/sparrow/layout/mutable_array_base.hpp new file mode 100644 index 00000000..bbc4cb81 --- /dev/null +++ b/include/sparrow/layout/mutable_array_base.hpp @@ -0,0 +1,278 @@ +// Copyright 2024 Man Group Operations Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or mplied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "sparrow/layout/array_base.hpp" + +namespace sparrow +{ + template + class mutable_array_base : public array_crtp_base + { + public: + + using self_type = mutable_array_base; + using base_type = array_crtp_base; + using derived_type = D; + using inner_types = array_inner_types; + + using size_type = base_type::size_type; + using difference_type = base_type::difference_type; + + using bitmap_type = typename inner_types::bitmap_type; + using bitmap_reference = bitmap_type::reference; + using bitmap_const_reference = bitmap_type::const_reference; + using bitmap_iterator = bitmap_type::iterator; + using bitmap_range = std::ranges::subrange; + using const_bitmap_range = base_type::const_bitmap_range; + + using inner_value_type = typename base_type::inner_value_type; + using value_type = typename base_type::value_type; + + using inner_reference = typename inner_types::inner_reference; + using inner_const_reference = typename base_type::inner_const_reference; + + using reference = nullable; + using const_reference = base_type::const_reference; + + using value_iterator = typename inner_types::value_iterator; + + using iterator_tag = base_type::iterator_tag; + + struct iterator_types + { + using value_type = self_type::value_type; + using reference = self_type::reference; + using value_iterator = self_type::value_iterator; + using bitmap_iterator = self_type::bitmap_iterator; + using iterator_tag = self_type::iterator_tag; + }; + + using iterator = layout_iterator; + using const_iterator = base_type::const_iterator; + + reference operator[](size_type i); + using base_type::operator[]; + + bitmap_reference has_value(size_type i); + using base_type::has_value; + + iterator begin(); + iterator end(); + + using base_type::begin; + using base_type::end; + + void resize(size_type new_size, const value_type& value); + + iterator insert(const_iterator pos, const value_type& value); + iterator insert(const_iterator pos, const value_type& value, size_type count); + iterator insert(const_iterator pos, std::initializer_list values); + template + iterator insert(const_iterator pos, InputIt first, InputIt last); + template + iterator insert(const_iterator pos, const R& range); + + iterator erase(const_iterator pos); + iterator erase(const_iterator first, const_iterator last); + + void push_back(const value_type& value); + void pop_back(); + + protected: + + mutable_array_base(arrow_proxy); + mutable_array_base(const mutable_array_base&) = default; + mutable_array_base& operator=(const mutable_array_base&) = default; + + mutable_array_base(mutable_array_base&&) = default; + mutable_array_base& operator=(mutable_array_base&&) = default; + + using base_type::get_arrow_proxy; + + bitmap_iterator bitmap_begin(); + bitmap_iterator bitmap_end(); + + friend class layout_iterator; + }; + + template + mutable_array_base::mutable_array_base(arrow_proxy proxy) + : array_crtp_base(std::forward(proxy)) + { + } + + template + auto mutable_array_base::begin() -> iterator + { + return iterator(this->derived_cast().value_begin(), this->derived_cast().bitmap_begin()); + } + + template + auto mutable_array_base::end() -> iterator + { + return iterator(this->derived_cast().value_end(), this->derived_cast().bitmap_end()); + } + + template + auto mutable_array_base::operator[](size_type i) -> reference + { + SPARROW_ASSERT_TRUE(i < this->size()); + return reference(inner_reference(this->derived_cast().value(i)), this->derived_cast().has_value(i)); + } + + template + auto mutable_array_base::has_value(size_type i) -> bitmap_reference + { + SPARROW_ASSERT_TRUE(i < this->size()); + return *sparrow::next(bitmap_begin(), i); + } + + template + auto mutable_array_base::bitmap_begin() -> bitmap_iterator + { + return sparrow::next(this->derived_cast().get_bitmap().begin(), get_arrow_proxy().offset()); + } + + template + auto mutable_array_base::bitmap_end() -> bitmap_iterator + { + return sparrow::next(bitmap_begin(), this->size()); + } + + template + void mutable_array_base::resize(size_type new_length, const value_type& value) + { + auto& derived = this->derived_cast(); + derived.resize_bitmap(new_length); + derived.resize_values(new_length, value.get()); + get_arrow_proxy().set_length(new_length); // Must be done after resizing the bitmap and values + derived.update(); + } + + template + auto mutable_array_base::insert(const_iterator pos, const value_type& value) -> iterator + { + return insert(pos, value, 1); + } + + template + auto mutable_array_base::insert(const_iterator pos, const value_type& value, size_type count) -> iterator + { + SPARROW_ASSERT_TRUE(pos >= this->cbegin()); + SPARROW_ASSERT_TRUE(pos <= this->cend()); + const size_t distance = static_cast(std::distance(this->cbegin(), pos)); + auto& derived = this->derived_cast(); + derived.insert_bitmap(sparrow::next(this->bitmap_cbegin(), distance), value.has_value(), count); + derived.insert_value(sparrow::next(derived.value_cbegin(), distance), value.get(), count); + get_arrow_proxy().set_length(this->size() + count); // Must be done after resizing the bitmap and values + derived.update(); + return sparrow::next(begin(), distance); + } + + template + auto mutable_array_base::insert(const_iterator pos, std::initializer_list values) -> iterator + { + return insert(pos, values.begin(), values.end()); + } + + template + template + auto mutable_array_base::insert(const_iterator pos, InputIt first, InputIt last) -> iterator + { + SPARROW_ASSERT_TRUE(pos >= this->cbegin()) + SPARROW_ASSERT_TRUE(pos <= this->cend()); + SPARROW_ASSERT_TRUE(first <= last); + const difference_type distance = std::distance(this->cbegin(), pos); + const auto validity_range = std::ranges::subrange(first, last) + | std::views::transform( + [](const value_type& obj) + { + return obj.has_value(); + } + ); + auto& derived = this->derived_cast(); + derived.insert_bitmap( + sparrow::next(this->bitmap_cbegin(), distance), + validity_range.begin(), + validity_range.end() + ); + + const auto value_range = std::ranges::subrange(first, last) + | std::views::transform( + [](const value_type& obj) + { + return obj.get(); + } + ); + derived.insert_values( + sparrow::next(derived.value_cbegin(), distance), + value_range.begin(), + value_range.end() + ); + const difference_type count = std::distance(first, last); + get_arrow_proxy().set_length(this->size() + static_cast(count)); // Must be done after modifying + // the bitmap and values + derived.update(); + return sparrow::next(begin(), distance); + } + + template + template + auto mutable_array_base::insert(const_iterator pos, const R& range) -> iterator + { + return insert(pos, std::ranges::begin(range), std::ranges::end(range)); + } + + template + auto mutable_array_base::erase(const_iterator pos) -> iterator + { + SPARROW_ASSERT_TRUE(this->cbegin() <= pos) + SPARROW_ASSERT_TRUE(pos < this->cend()); + return erase(pos, pos + 1); + } + + template + auto mutable_array_base::erase(const_iterator first, const_iterator last) -> iterator + { + SPARROW_ASSERT_TRUE(first < last); + SPARROW_ASSERT_TRUE(this->cbegin() <= first) + SPARROW_ASSERT_TRUE(last <= this->cend()); + const difference_type first_index = std::distance(this->cbegin(), first); + if (first == last) + { + return sparrow::next(begin(), first_index); + } + const auto count = static_cast(std::distance(first, last)); + auto& derived = this->derived_cast(); + derived.erase_bitmap(sparrow::next(this->bitmap_cbegin(), first_index), count); + derived.erase_values(sparrow::next(derived.value_cbegin(), first_index), count); + get_arrow_proxy().set_length(this->size() - count); // Must be done after modifying the bitmap and values + derived.update(); + return sparrow::next(begin(), first_index); + } + + template + void mutable_array_base::push_back(const value_type& value) + { + insert(this->cend(), value); + } + + template + void mutable_array_base::pop_back() + { + erase(std::prev(this->cend())); + } +} diff --git a/include/sparrow/layout/null_array.hpp b/include/sparrow/layout/null_array.hpp index 23dea0fc..6f42e47c 100644 --- a/include/sparrow/layout/null_array.hpp +++ b/include/sparrow/layout/null_array.hpp @@ -96,12 +96,13 @@ namespace sparrow const_value_range values() const; const_bitmap_range bitmap() const; - + private: difference_type ssize() const; - arrow_proxy& get_arrow_proxy(); + [[nodiscard]] arrow_proxy& get_arrow_proxy(); + [[nodiscard]] const arrow_proxy& get_arrow_proxy() const; arrow_proxy m_proxy; @@ -239,10 +240,14 @@ namespace sparrow { return m_proxy; } - + + inline const arrow_proxy& null_array::get_arrow_proxy() const + { + return m_proxy; + } + inline bool operator==(const null_array& lhs, const null_array& rhs) { return lhs.size() == rhs.size(); } } - diff --git a/include/sparrow/layout/primitive_array.hpp b/include/sparrow/layout/primitive_array.hpp index 92cb7b8d..1de03737 100644 --- a/include/sparrow/layout/primitive_array.hpp +++ b/include/sparrow/layout/primitive_array.hpp @@ -14,13 +14,16 @@ #pragma once +#include + #include "sparrow/arrow_array_schema_proxy.hpp" -#include "sparrow/layout/array_base.hpp" +#include "sparrow/buffer/buffer_adaptor.hpp" +#include "sparrow/layout/array_bitmap_base.hpp" #include "sparrow/utils/iterator.hpp" #include "sparrow/utils/nullable.hpp" namespace sparrow -{ +{ template class primitive_array; @@ -37,17 +40,20 @@ namespace sparrow using value_iterator = pointer_iterator; using const_value_iterator = pointer_iterator; + using bitmap_const_reference = bitmap_type::const_reference; + + using const_reference = nullable; using iterator_tag = std::random_access_iterator_tag; }; template - class primitive_array final : public array_bitmap_base> + class primitive_array final : public mutable_array_bitmap_base> { public: using self_type = primitive_array; - using base_type = array_bitmap_base; + using base_type = mutable_array_bitmap_base; using inner_types = array_inner_types; using inner_value_type = typename inner_types::inner_value_type; using inner_reference = typename inner_types::inner_reference; @@ -55,6 +61,8 @@ namespace sparrow using bitmap_type = typename base_type::bitmap_type; using bitmap_reference = typename base_type::bitmap_reference; using bitmap_const_reference = typename base_type::bitmap_const_reference; + using bitmap_iterator = typename base_type::bitmap_iterator; + using const_bitmap_iterator = typename base_type::const_bitmap_iterator; using value_type = nullable; using reference = nullable; using const_reference = nullable; @@ -69,13 +77,14 @@ namespace sparrow using const_value_iterator = typename base_type::const_value_iterator; using const_bitmap_range = typename base_type::const_bitmap_range; + using iterator = typename base_type::iterator; + using const_iterator = typename base_type::const_iterator; + explicit primitive_array(arrow_proxy); using base_type::size; - private: - - using base_type::storage; + using base_type::get_arrow_proxy; pointer data(); const_pointer data() const; @@ -89,10 +98,26 @@ namespace sparrow const_value_iterator value_cbegin() const; const_value_iterator value_cend() const; + private: + + // Modifiers + + void resize_values(size_type new_length, inner_value_type value); + + value_iterator insert_value(const_value_iterator pos, inner_value_type value, size_type count); + + template InputIt> + value_iterator insert_values(const_value_iterator pos, InputIt first, InputIt last); + + value_iterator erase_values(const_value_iterator pos, size_type count); + + buffer_adaptor&> get_data_buffer(); + static constexpr size_type DATA_BUFFER_INDEX = 1; - friend class array_crtp_base; friend class run_end_encoded_array; + friend base_type; + friend base_type::base_type; }; /********************************** @@ -127,21 +152,21 @@ namespace sparrow primitive_array::primitive_array(arrow_proxy proxy) : base_type(std::move(proxy)) { - SPARROW_ASSERT_TRUE(detail::check_primitive_data_type(storage().data_type())); + SPARROW_ASSERT_TRUE(get_arrow_proxy().data_type() == arrow_traits::type_id); } template auto primitive_array::data() -> pointer { - return storage().buffers()[DATA_BUFFER_INDEX].template data() - + static_cast(storage().offset()); + return get_arrow_proxy().buffers()[DATA_BUFFER_INDEX].template data() + + static_cast(get_arrow_proxy().offset()); } template auto primitive_array::data() const -> const_pointer { - return storage().buffers()[DATA_BUFFER_INDEX].template data() - + static_cast(storage().offset()); + return get_arrow_proxy().buffers()[DATA_BUFFER_INDEX].template data() + + static_cast(get_arrow_proxy().offset()); } template @@ -181,4 +206,56 @@ namespace sparrow { return sparrow::next(value_cbegin(), size()); } + + template + buffer_adaptor&> primitive_array::get_data_buffer() + { + auto& buffers = get_arrow_proxy().get_array_private_data()->buffers(); + return make_buffer_adaptor(buffers[DATA_BUFFER_INDEX]); + } + + template + void primitive_array::resize_values(size_type new_length, inner_value_type value) + { + const size_t new_size = new_length + static_cast(get_arrow_proxy().offset()); + get_data_buffer().resize(new_size, value); + } + + template + auto primitive_array::insert_value(const_value_iterator pos, inner_value_type value, size_type count) + -> value_iterator + { + SPARROW_ASSERT_TRUE(value_cbegin() <= pos) + SPARROW_ASSERT_TRUE(pos <= value_cend()); + const auto distance = std::distance(value_cbegin(), sparrow::next(pos, get_arrow_proxy().offset())); + get_data_buffer().insert(pos, count, value); + return sparrow::next(this->value_begin(), distance); + } + + template + template InputIt> + auto + primitive_array::insert_values(const_value_iterator pos, InputIt first, InputIt last) -> value_iterator + { + SPARROW_ASSERT_TRUE(value_cbegin() <= pos) + SPARROW_ASSERT_TRUE(pos <= value_cend()); + const auto distance = std::distance(value_cbegin(), sparrow::next(pos, get_arrow_proxy().offset())); + get_data_buffer().insert(pos, first, last); + return sparrow::next(this->value_begin(), distance); + } + + template + auto primitive_array::erase_values(const_value_iterator pos, size_type count) -> value_iterator + { + SPARROW_ASSERT_TRUE(this->value_cbegin() <= pos) + SPARROW_ASSERT_TRUE(pos < this->value_cend()); + const size_type distance = static_cast( + std::distance(this->value_cbegin(), sparrow::next(pos, get_arrow_proxy().offset())) + ); + auto data_buffer = get_data_buffer(); + const auto first = sparrow::next(data_buffer.cbegin(), distance); + const auto last = sparrow::next(first, count); + data_buffer.erase(first, last); + return sparrow::next(this->value_begin(), distance); + } } diff --git a/include/sparrow/layout/run_end_encoded_layout/run_end_encoded_array.hpp b/include/sparrow/layout/run_end_encoded_layout/run_end_encoded_array.hpp index fd0d6d4d..2c1b1d59 100644 --- a/include/sparrow/layout/run_end_encoded_layout/run_end_encoded_array.hpp +++ b/include/sparrow/layout/run_end_encoded_layout/run_end_encoded_array.hpp @@ -17,11 +17,7 @@ #include "sparrow/config/config.hpp" #include "sparrow/layout/array_wrapper.hpp" #include "sparrow/array_factory.hpp" -#include "sparrow/layout/layout_utils.hpp" -#include "sparrow/layout/nested_value_types.hpp" -#include "sparrow/utils/iterator.hpp" #include "sparrow/utils/memory.hpp" -#include "sparrow/utils/nullable.hpp" #include "sparrow/layout/run_end_encoded_layout/run_end_encoded_iterator.hpp" namespace sparrow @@ -82,7 +78,8 @@ namespace sparrow SPARROW_API static acc_length_ptr_variant_type get_acc_lengths_ptr(const array_wrapper& ar); SPARROW_API std::uint64_t get_run_length(std::uint64_t run_index) const; - arrow_proxy& get_arrow_proxy(); + [[nodiscard]] arrow_proxy& get_arrow_proxy(); + [[nodiscard]] const arrow_proxy& get_arrow_proxy() const; arrow_proxy m_proxy; std::uint64_t m_encoded_length; @@ -162,6 +159,11 @@ namespace sparrow return m_proxy; } + inline const arrow_proxy& run_end_encoded_array::get_arrow_proxy() const + { + return m_proxy; + } + inline auto run_end_encoded_array::operator[](std::uint64_t i) -> array_traits::const_reference { return static_cast(this)->operator[](i); diff --git a/include/sparrow/layout/struct_layout/struct_array.hpp b/include/sparrow/layout/struct_layout/struct_array.hpp index 459a4071..676bb4a8 100644 --- a/include/sparrow/layout/struct_layout/struct_array.hpp +++ b/include/sparrow/layout/struct_layout/struct_array.hpp @@ -15,7 +15,8 @@ #pragma once #include "sparrow/array_factory.hpp" -#include "sparrow/layout/array_base.hpp" +#include "sparrow/arrow_array_schema_proxy.hpp" +#include "sparrow/layout/array_bitmap_base.hpp" #include "sparrow/layout/array_wrapper.hpp" #include "sparrow/layout/layout_utils.hpp" #include "sparrow/layout/nested_value_types.hpp" @@ -53,10 +54,8 @@ namespace sparrow using size_type = typename base_type::size_type; using bitmap_type = typename base_type::bitmap_type; - using bitmap_reference = typename base_type::bitmap_reference; using bitmap_const_reference = typename base_type::bitmap_const_reference; - using bitmap_range = base_type::bitmap_range; using const_bitmap_range = base_type::const_bitmap_range; using inner_value_type = struct_value; @@ -64,7 +63,6 @@ namespace sparrow using inner_const_reference = struct_value; using value_type = nullable; - using reference = nullable; using const_reference = nullable; using iterator_tag = base_type::iterator_tag; @@ -170,10 +168,11 @@ namespace sparrow inline auto struct_array::make_children() -> children_type { - children_type children(this->storage().children().size(), nullptr); + arrow_proxy& proxy = this->get_arrow_proxy(); + children_type children(proxy.children().size(), nullptr); for (std::size_t i = 0; i < children.size(); ++i) { - children[i] = array_factory(this->storage().children()[i].view()); + children[i] = array_factory(proxy.children()[i].view()); } return children; } diff --git a/include/sparrow/layout/union_array.hpp b/include/sparrow/layout/union_array.hpp index 7753aa07..6da9fd0f 100644 --- a/include/sparrow/layout/union_array.hpp +++ b/include/sparrow/layout/union_array.hpp @@ -96,7 +96,8 @@ namespace sparrow union_array_crtp_base(self_type&& rhs) = default; self_type& operator=(self_type&& rhs) = default; - arrow_proxy& get_arrow_proxy(); + [[nodiscard]] arrow_proxy& get_arrow_proxy(); + [[nodiscard]] const arrow_proxy& get_arrow_proxy() const; arrow_proxy m_proxy; const std::uint8_t * p_type_ids; @@ -172,6 +173,12 @@ namespace sparrow return m_proxy; } + template + const arrow_proxy& union_array_crtp_base::get_arrow_proxy() const + { + return m_proxy; + } + template union_array_crtp_base::union_array_crtp_base(arrow_proxy proxy) : m_proxy(std::move(proxy)) diff --git a/include/sparrow/layout/variable_size_binary_array.hpp b/include/sparrow/layout/variable_size_binary_array.hpp index 7dd79aa7..044046ee 100644 --- a/include/sparrow/layout/variable_size_binary_array.hpp +++ b/include/sparrow/layout/variable_size_binary_array.hpp @@ -17,7 +17,8 @@ #include #include -#include "sparrow/layout/array_base.hpp" +#include "sparrow/arrow_array_schema_proxy.hpp" +#include "sparrow/layout/array_bitmap_base.hpp" #include "sparrow/layout/layout_iterator.hpp" #include "sparrow/types/data_type.hpp" #include "sparrow/utils/contracts.hpp" @@ -32,34 +33,43 @@ namespace sparrow template class variable_size_binary_reference; - template + template class variable_size_binary_value_iterator; template struct array_inner_types> : array_inner_types_base { using array_type = variable_size_binary_array; - using base_type = array_crtp_base; using inner_value_type = T; - using inner_reference = variable_size_binary_reference; + // using inner_reference = variable_size_binary_reference; using inner_const_reference = CR; using offset_type = OT; using data_value_type = typename T::value_type; - using offset_iterator = OT*; + // using offset_iterator = OT*; using const_offset_iterator = const OT*; - using data_iterator = data_value_type*; + // using data_iterator = data_value_type*; using const_data_iterator = const data_value_type*; - using value_iterator = variable_size_binary_value_iterator; - using const_value_iterator = variable_size_binary_value_iterator; + using iterator_tag = std::random_access_iterator_tag; - using iterator = layout_iterator; - using const_iterator = layout_iterator; + using const_bitmap_iterator = bitmap_type::const_iterator; - using size_type = typename base_type::size_type; - using iterator_tag = std::random_access_iterator_tag; + struct iterator_types + { + using value_type = inner_value_type; + using reference = inner_const_reference; + using value_iterator = const_data_iterator; + using bitmap_iterator = const_bitmap_iterator; + using iterator_tag = array_inner_types>::iterator_tag; + }; + + // using value_iterator = variable_size_binary_value_iterator; + using const_value_iterator = variable_size_binary_value_iterator; + + // using iterator = layout_iterator; + // using const_iterator = layout_iterator; }; /** @@ -68,26 +78,27 @@ namespace sparrow * @tparam L the layout type * @tparam is_const a boolean flag specifying whether this iterator is const. */ - template + template class variable_size_binary_value_iterator : public iterator_base< - variable_size_binary_value_iterator, - mpl::constify_t::inner_value_type, is_const>, - std::contiguous_iterator_tag, - impl::get_inner_reference_t, is_const>> + variable_size_binary_value_iterator, + typename Iterator_types::value_type, + typename Iterator_types::iterator_tag, + typename Iterator_types::reference> { public: - using self_type = variable_size_binary_value_iterator; + using self_type = variable_size_binary_value_iterator; using base_type = iterator_base< self_type, - mpl::constify_t::inner_value_type, is_const>, - std::contiguous_iterator_tag, - impl::get_inner_reference_t, is_const>>; + typename Iterator_types::value_type, + typename Iterator_types::iterator_tag, + typename Iterator_types::reference>; using reference = typename base_type::reference; using difference_type = typename base_type::difference_type; - using size_type = typename array_inner_types::size_type; - using layout_type = mpl::constify_t; + using layout_type = mpl::constify_t; + using size_type = size_t; + using value_type = base_type::value_type; variable_size_binary_value_iterator() noexcept = default; variable_size_binary_value_iterator(layout_type* layout, size_type index); @@ -187,44 +198,43 @@ namespace sparrow using base_type = array_bitmap_base; using inner_types = array_inner_types; using inner_value_type = typename inner_types::inner_value_type; - using inner_reference = typename inner_types::inner_reference; + // using inner_reference = typename inner_types::inner_reference; using inner_const_reference = typename inner_types::inner_const_reference; using offset_type = typename inner_types::offset_type; - using bitmap_type = typename base_type::bitmap_type; - using bitmap_reference = typename base_type::bitmap_reference; + using bitmap_type = typename inner_types::bitmap_type; + // using bitmap_reference = typename base_type::bitmap_reference; using bitmap_const_reference = typename base_type::bitmap_const_reference; using value_type = nullable; - using reference = nullable; + // using reference = nullable; using const_reference = nullable; - using offset_iterator = typename inner_types::offset_iterator; + // using offset_iterator = typename inner_types::offset_iterator; using const_offset_iterator = typename inner_types::const_offset_iterator; using size_type = typename base_type::size_type; using difference_type = typename base_type::difference_type; using iterator_tag = typename base_type::iterator_tag; - using data_iterator = typename inner_types::data_iterator; + // using data_iterator = typename inner_types::data_iterator; using const_data_iterator = typename inner_types::const_data_iterator; using data_value_type = typename inner_types::data_value_type; - using bitmap_range = typename base_type::bitmap_range; + // using bitmap_range = typename base_type::bitmap_range; using const_bitmap_range = typename base_type::const_bitmap_range; - using value_iterator = typename inner_types::value_iterator; + // using value_iterator = typename inner_types::value_iterator; using const_value_iterator = typename inner_types::const_value_iterator; explicit variable_size_binary_array(arrow_proxy); using base_type::size; + using base_type::get_arrow_proxy; private: static constexpr size_t OFFSET_BUFFER_INDEX = 1; static constexpr size_t DATA_BUFFER_INDEX = 2; - using base_type::storage; - - offset_iterator offset(size_type i); - offset_iterator offset_end(); - data_iterator data(size_type i); + // offset_iterator offset(size_type i); + // offset_iterator offset_end(); + // data_iterator data(size_type i); const_offset_iterator offset(size_type i) const; const_offset_iterator offset_end() const; @@ -240,20 +250,21 @@ namespace sparrow // value_iterator value_begin(); // value_iterator value_end(); + const_value_iterator value_cbegin() const; const_value_iterator value_cend() const; friend class array_crtp_base; friend class variable_size_binary_reference; - friend class variable_size_binary_value_iterator; + friend const_value_iterator; }; /****************************************************** * variable_size_binary_value_iterator implementation * ******************************************************/ - template - variable_size_binary_value_iterator::variable_size_binary_value_iterator( + template + variable_size_binary_value_iterator::variable_size_binary_value_iterator( layout_type* layout, size_type index ) @@ -262,52 +273,52 @@ namespace sparrow { } - template - auto variable_size_binary_value_iterator::dereference() const -> reference - { - if constexpr (is_const) - { - return p_layout->value(static_cast(m_index)); - } - else - { - return reference(p_layout, static_cast(m_index)); - } + template + auto variable_size_binary_value_iterator::dereference() const -> reference + { + // if constexpr (is_const) + // { + return p_layout->value(static_cast(m_index)); + // } + // else + // { + // return reference(p_layout, static_cast(m_index)); + // } } - template - void variable_size_binary_value_iterator::increment() + template + void variable_size_binary_value_iterator::increment() { ++m_index; } - template - void variable_size_binary_value_iterator::decrement() + template + void variable_size_binary_value_iterator::decrement() { --m_index; } - template - void variable_size_binary_value_iterator::advance(difference_type n) + template + void variable_size_binary_value_iterator::advance(difference_type n) { m_index += n; } - template - auto - variable_size_binary_value_iterator::distance_to(const self_type& rhs) const -> difference_type + template + auto variable_size_binary_value_iterator::distance_to(const self_type& rhs + ) const -> difference_type { return rhs.m_index - m_index; } - template - bool variable_size_binary_value_iterator::equal(const self_type& rhs) const + template + bool variable_size_binary_value_iterator::equal(const self_type& rhs) const { return (p_layout == rhs.p_layout) && (m_index == rhs.m_index); } - template - bool variable_size_binary_value_iterator::less_than(const self_type& rhs) const + template + bool variable_size_binary_value_iterator::less_than(const self_type& rhs) const { return (p_layout == rhs.p_layout) && (m_index < rhs.m_index); } @@ -434,7 +445,7 @@ namespace sparrow variable_size_binary_array::variable_size_binary_array(arrow_proxy proxy) : base_type(std::move(proxy)) { - const auto type = storage().data_type(); + const auto type = get_arrow_proxy().data_type(); SPARROW_ASSERT_TRUE(type == data_type::STRING || type == data_type::BINARY); // TODO: Add // data_type::LARGE_STRING // and @@ -447,22 +458,22 @@ namespace sparrow // template // auto variable_size_binary_array::data() -> pointer // { - // return storage().buffers()[DATA_BUFFER_INDEX].template data() - // + static_cast(storage().offset()); + // return get_arrow_proxy().buffers()[DATA_BUFFER_INDEX].template data() + // + static_cast(get_arrow_proxy().offset()); // } - template - auto variable_size_binary_array::data(size_type i) -> data_iterator - { - SPARROW_ASSERT_FALSE(storage().buffers()[DATA_BUFFER_INDEX].size() == 0u); - return storage().buffers()[DATA_BUFFER_INDEX].template data() + i; - } + // template + // auto variable_size_binary_array::data(size_type i) -> data_iterator + // { + // SPARROW_ASSERT_FALSE(get_arrow_proxy().buffers()[DATA_BUFFER_INDEX].size() == 0u); + // return get_arrow_proxy().buffers()[DATA_BUFFER_INDEX].template data() + i; + // } template auto variable_size_binary_array::data(size_type i) const -> const_data_iterator { - SPARROW_ASSERT_FALSE(storage().buffers()[DATA_BUFFER_INDEX].size() == 0u); - return storage().buffers()[DATA_BUFFER_INDEX].template data() + i; + SPARROW_ASSERT_FALSE(get_arrow_proxy().buffers()[DATA_BUFFER_INDEX].size() == 0u); + return get_arrow_proxy().buffers()[DATA_BUFFER_INDEX].template data() + i; } // template @@ -470,7 +481,7 @@ namespace sparrow // requires mpl::convertible_ranges // void variable_size_binary_array::assign(U&& rhs, size_type index) // { - // auto& data_buffer = storage().buffers()[1]; + // auto& data_buffer = get_arrow_proxy().buffers()[1]; // const auto offset_beg = *offset(index); // const auto offset_end = *offset(index + 1); // const auto initial_value_length = offset_end - offset_beg; @@ -502,28 +513,28 @@ namespace sparrow // std::copy(std::ranges::begin(rhs), std::ranges::end(rhs), data_buffer.begin() + offset_beg); // } - template - auto variable_size_binary_array::offset(size_type i) -> offset_iterator - { - SPARROW_ASSERT_TRUE(i < size() + storage().offset()); - return storage().buffers()[OFFSET_BUFFER_INDEX].template data() - + static_cast(storage().offset()) + i; - } + // template + // auto variable_size_binary_array::offset(size_type i) -> offset_iterator + // { + // SPARROW_ASSERT_TRUE(i < size() + get_arrow_proxy().offset()); + // return get_arrow_proxy().buffers()[OFFSET_BUFFER_INDEX].template data() + // + static_cast(get_arrow_proxy().offset()) + i; + // } template auto variable_size_binary_array::offset(size_type i) const -> const_offset_iterator { - SPARROW_ASSERT_TRUE(i < size() + storage().offset()); - return storage().buffers()[OFFSET_BUFFER_INDEX].template data() - + static_cast(storage().offset()) + i; + SPARROW_ASSERT_TRUE(i < size() + get_arrow_proxy().offset()); + return get_arrow_proxy().buffers()[OFFSET_BUFFER_INDEX].template data() + + static_cast(get_arrow_proxy().offset()) + i; } // template // auto variable_size_binary_array::value(size_type i) -> inner_reference // { // SPARROW_ASSERT_TRUE(i < size()); - // return storage().buffers()[OFFSET_BUFFER_INDEX].template data() - // + static_cast(storage().offset()) + i; + // return get_arrow_proxy().buffers()[OFFSET_BUFFER_INDEX].template data() + // + static_cast(get_arrow_proxy().offset()) + i; // } template diff --git a/include/sparrow/utils/algorithm.hpp b/include/sparrow/utils/algorithm.hpp index 59873f1a..9f72d474 100644 --- a/include/sparrow/utils/algorithm.hpp +++ b/include/sparrow/utils/algorithm.hpp @@ -16,7 +16,6 @@ #include #include -#include #include "sparrow/config/config.hpp" @@ -123,5 +122,4 @@ namespace sparrow { return lexicographical_compare_three_way(r1, r2) == std::strong_ordering::less; } - } // namespace sparrow diff --git a/include/sparrow/utils/mp_utils.hpp b/include/sparrow/utils/mp_utils.hpp index cf83d96c..4774bda6 100644 --- a/include/sparrow/utils/mp_utils.hpp +++ b/include/sparrow/utils/mp_utils.hpp @@ -22,7 +22,7 @@ namespace sparrow::mpl { - + /// Workaround to replace static_assert(false) in template code. /// https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2023/p2593r1.html template @@ -472,9 +472,24 @@ namespace sparrow::mpl // Matches any type that is testable template - concept testable = requires(T t) { t ? true : false; }; + concept testable = requires(T t) { t ? true : false; }; // Fails if the Qualifier is true for Y but not for T. template typename Qualifier> concept T_matches_qualifier_if_Y_is = Qualifier::value || !Qualifier::value; + + /** + * Concept to check if an iterator is of a specific type. + * + * This concept ensures that the given iterator type `I` satisfies the + * `std::input_iterator` concept and that the value type of the iterator + * matches the specified type `T`. + * + * @tparam I The iterator type to be checked. + * @tparam T The type that the iterator's value type should match. + */ + template + concept iterator_of_type = std::input_iterator + && std::same_as::value_type, T>; + } diff --git a/src/arrow_array_schema_proxy.cpp b/src/arrow_array_schema_proxy.cpp index 4a32b7fc..02a45f12 100644 --- a/src/arrow_array_schema_proxy.cpp +++ b/src/arrow_array_schema_proxy.cpp @@ -14,6 +14,8 @@ #include "sparrow/arrow_array_schema_proxy.hpp" +#include + #include "sparrow/arrow_interface/arrow_array.hpp" #include "sparrow/arrow_interface/arrow_array_schema_info_utils.hpp" #include "sparrow/arrow_interface/arrow_flag_utils.hpp" @@ -22,16 +24,25 @@ #include "sparrow/buffer/dynamic_bitset/dynamic_bitset_view.hpp" #include "sparrow/utils/contracts.hpp" - namespace sparrow { - arrow_proxy arrow_proxy::view() + static constexpr size_t bitmap_buffer_index = 0; + + arrow_proxy arrow_proxy::view() const { - return arrow_proxy(&array(), &schema()); + ArrowArray* array_ptr = const_cast(&array()); + ArrowSchema* schema_ptr = const_cast(&schema()); + return arrow_proxy(array_ptr, schema_ptr); } void arrow_proxy::update_buffers() { + if (is_created_with_sparrow()) + { + get_array_private_data()->update_buffers_ptrs(); + array().buffers = get_array_private_data()->buffers_ptrs(); + array().n_buffers = static_cast(n_buffers()); + } m_buffers = get_arrow_array_buffers(array(), schema()); } @@ -291,8 +302,9 @@ namespace sparrow { throw arrow_proxy_exception("Cannot set name on non-sparrow created ArrowArray"); } - get_schema_private_data()->name() = name; - schema().name = get_schema_private_data()->name_ptr(); + auto private_data = get_schema_private_data(); + private_data->name() = name; + schema().name = private_data->name_ptr(); } [[nodiscard]] std::optional arrow_proxy::metadata() const @@ -310,8 +322,9 @@ namespace sparrow { throw arrow_proxy_exception("Cannot set metadata on non-sparrow created ArrowArray"); } - get_schema_private_data()->metadata() = metadata; - schema().metadata = get_schema_private_data()->metadata_ptr(); + auto private_data = get_schema_private_data(); + private_data->metadata() = metadata; + schema().metadata = private_data->metadata_ptr(); } [[nodiscard]] std::vector arrow_proxy::flags() const @@ -343,6 +356,8 @@ namespace sparrow throw arrow_proxy_exception("Cannot set length on non-sparrow created ArrowArray"); } array().length = static_cast(length); + update_buffers(); + update_null_count(); } [[nodiscard]] int64_t arrow_proxy::null_count() const @@ -389,8 +404,7 @@ namespace sparrow array().n_buffers = static_cast(n_buffers); arrow_array_private_data* private_data = get_array_private_data(); private_data->resize_buffers(n_buffers); - array().buffers = private_data->buffers_ptrs(); - array().n_buffers = static_cast(n_buffers); + update_buffers(); } [[nodiscard]] size_t arrow_proxy::n_children() const @@ -458,13 +472,19 @@ namespace sparrow arrow_schema_private_data* arrow_proxy::get_schema_private_data() { - SPARROW_ASSERT_TRUE(schema_created_with_sparrow()); + if (!schema_created_with_sparrow()) + { + throw arrow_proxy_exception("Cannot get schema private data on non-sparrow created ArrowArray"); + } return static_cast(schema().private_data); } arrow_array_private_data* arrow_proxy::get_array_private_data() { - SPARROW_ASSERT_TRUE(array_created_with_sparrow()); + if (!array_created_with_sparrow()) + { + throw arrow_proxy_exception("Cannot get array private data on non-sparrow created ArrowArray"); + } return static_cast(array().private_data); } @@ -485,9 +505,7 @@ namespace sparrow { throw arrow_proxy_exception("Cannot set buffer on non-sparrow created ArrowArray"); } - auto array_private_data = get_array_private_data(); - array_private_data->set_buffer(index, buffer); - array().buffers = array_private_data->buffers_ptrs(); + get_array_private_data()->set_buffer(index, buffer); update_null_count(); update_buffers(); } @@ -499,9 +517,7 @@ namespace sparrow { throw arrow_proxy_exception("Cannot set buffer on non-sparrow created ArrowArray"); } - auto array_private_data = get_array_private_data(); - array_private_data->set_buffer(index, std::move(buffer)); - array().buffers = array_private_data->buffers_ptrs(); + get_array_private_data()->set_buffer(index, std::move(buffer)); update_null_count(); update_buffers(); } @@ -561,7 +577,7 @@ namespace sparrow using value_type = arrow_array_and_schema; add_children(std::ranges::single_view(value_type{std::move(array), std::move(schema)})); } - + [[nodiscard]] const std::unique_ptr& arrow_proxy::dictionary() const { return m_dictionary; @@ -676,7 +692,7 @@ namespace sparrow } const auto validity_index = std::distance(buffer_types.begin(), validity_it); auto& validity_buffer = buffers()[static_cast(validity_index)]; - const dynamic_bitset_view bitmap(validity_buffer.data(), validity_buffer.size()); + const dynamic_bitset_view bitmap(validity_buffer.data(), length() + offset()); const auto null_count = bitmap.null_count(); set_null_count(static_cast(null_count)); } @@ -708,4 +724,99 @@ namespace sparrow std::swap(m_children, other.m_children); std::swap(m_dictionary, other.m_dictionary); } + + [[nodiscard]] non_owning_dynamic_bitset arrow_proxy::get_non_owning_dynamic_bitset() + { + if (!array_created_with_sparrow()) + { + throw arrow_proxy_exception( + "Cannot get non owning dynamic bitset from a non-sparrow created ArrowArray or ArrowSchema" + ); + } + + SPARROW_ASSERT_TRUE(is_created_with_sparrow()) + SPARROW_ASSERT_TRUE(has_bitmap(data_type())) + auto private_data = static_cast(array().private_data); + auto& bitmap_buffer = private_data->buffers()[bitmap_buffer_index]; + const size_t current_size = length() + offset(); + non_owning_dynamic_bitset bitmap{&bitmap_buffer, current_size}; + return bitmap; + } + + void arrow_proxy::resize_bitmap(size_t new_size, bool value) + { + if (!array_created_with_sparrow()) + { + throw arrow_proxy_exception("Cannot resize bitmap on a non-sparrow created ArrowArray or ArrowSchema" + ); + } + SPARROW_ASSERT_TRUE(has_bitmap(data_type())) + auto bitmap = get_non_owning_dynamic_bitset(); + bitmap.resize(new_size, value); + update_buffers(); + } + + size_t arrow_proxy::insert_bitmap(size_t index, bool value, size_t count) + { + if (!array_created_with_sparrow()) + { + throw arrow_proxy_exception( + "Cannot insert values in bitmap on a non-sparrow created ArrowArray or ArrowSchema" + ); + } + SPARROW_ASSERT_TRUE(has_bitmap(data_type())) + SPARROW_ASSERT_TRUE(std::cmp_less_equal(index, length())) + if (count == 0) + { + return index; + } + auto bitmap = get_non_owning_dynamic_bitset(); + auto it = bitmap.insert(sparrow::next(bitmap.cbegin(), index), count, value); + update_buffers(); + return std::distance(bitmap.begin(), it); + } + + size_t arrow_proxy::erase_bitmap(size_t index, size_t count) + { + if (!array_created_with_sparrow()) + { + throw arrow_proxy_exception( + "Cannot erase values in bitmap on a non-sparrow created ArrowArray or ArrowSchema" + ); + } + SPARROW_ASSERT_TRUE(has_bitmap(data_type())) + SPARROW_ASSERT_TRUE(std::cmp_less(index, length())) + auto bitmap = get_non_owning_dynamic_bitset(); + const auto it_first = sparrow::next(bitmap.cbegin(), index + offset()); + const auto it_last = sparrow::next(it_first, count); + const auto it = bitmap.erase(it_first, it_last); + update_buffers(); + return std::distance(bitmap.begin(), it); + } + + void arrow_proxy::push_back_bitmap(bool value) + { + if (!array_created_with_sparrow()) + { + throw arrow_proxy_exception( + "Cannot push_back value in bitmap on a non-sparrow created ArrowArray or ArrowSchema" + ); + } + SPARROW_ASSERT_TRUE(has_bitmap(data_type())) + insert_bitmap(length(), value); + update_buffers(); + } + + void arrow_proxy::pop_back_bitmap() + { + if (!array_created_with_sparrow()) + { + throw arrow_proxy_exception( + "Cannot pop_back value in bitmap on a non-sparrow created ArrowArray or ArrowSchema" + ); + } + SPARROW_ASSERT_TRUE(has_bitmap(data_type())) + erase_bitmap(length() - 1); + update_buffers(); + } } diff --git a/src/arrow_interface/arrow_array.cpp b/src/arrow_interface/arrow_array.cpp index 4b7699ce..c1ce44a4 100644 --- a/src/arrow_interface/arrow_array.cpp +++ b/src/arrow_interface/arrow_array.cpp @@ -17,7 +17,6 @@ #include "sparrow/arrow_interface/arrow_array_schema_info_utils.hpp" #include "sparrow/types/data_type.hpp" - namespace sparrow { void release_arrow_array(ArrowArray* array) @@ -43,6 +42,7 @@ namespace sparrow buffers.reserve(buffer_count); const enum data_type data_type = format_to_data_type(schema.format); const auto buffers_type = get_buffer_types_from_data_type(data_type); + SPARROW_ASSERT_TRUE(buffers_type.size() == buffer_count); for (std::size_t i = 0; i < buffer_count; ++i) { const auto buffer_type = buffers_type[i]; diff --git a/test/test_arrow_array_schema_proxy.cpp b/test/test_arrow_array_schema_proxy.cpp index b0b1e538..32d7185b 100644 --- a/test/test_arrow_array_schema_proxy.cpp +++ b/test/test_arrow_array_schema_proxy.cpp @@ -22,7 +22,6 @@ #include "arrow_array_schema_creation.hpp" #include "doctest/doctest.h" - TEST_SUITE("ArrowArrowSchemaProxy") { TEST_CASE("constructors") @@ -149,7 +148,7 @@ TEST_SUITE("ArrowArrowSchemaProxy") { auto [array, schema] = make_external_arrow_schema_and_array(); sparrow::arrow_proxy proxy(std::move(array), std::move(schema)); - CHECK_THROWS_AS(proxy.set_format("U"), std::runtime_error); + CHECK_THROWS(proxy.set_format("U")); } } @@ -252,8 +251,8 @@ TEST_SUITE("ArrowArrowSchemaProxy") { auto [array, schema] = make_sparrow_arrow_schema_and_array(); sparrow::arrow_proxy proxy(std::move(array), std::move(schema)); - proxy.set_length(20); - CHECK_EQ(proxy.length(), 20); + proxy.set_length(2); + CHECK_EQ(proxy.length(), 2); } SUBCASE("on external c structure") @@ -323,14 +322,15 @@ TEST_SUITE("ArrowArrowSchemaProxy") TEST_CASE("set_n_buffers") { - SUBCASE("on sparrow c structure") - { - auto [array, schema] = make_sparrow_arrow_schema_and_array(); - sparrow::arrow_proxy proxy(std::move(array), std::move(schema)); - CHECK_EQ(proxy.n_children(), 0); - proxy.set_n_buffers(3); - CHECK_EQ(proxy.n_buffers(), 3); - } + // TODO: Deactivate because it can only be tested on Variable Binary View + // SUBCASE("on sparrow c structure") + // { + // auto [array, schema] = make_sparrow_arrow_schema_and_array(); + // sparrow::arrow_proxy proxy(std::move(array), std::move(schema)); + // CHECK_EQ(proxy.n_children(), 0); + // proxy.set_n_buffers(3); + // CHECK_EQ(proxy.n_buffers(), 3); + // } SUBCASE("on external c structure") { @@ -417,7 +417,9 @@ TEST_SUITE("ArrowArrowSchemaProxy") SUBCASE("on sparrow c structure") { auto array_schema_pair = make_sparrow_arrow_schema_and_array(); - std::array array_child_ptr{{{&array_schema_pair.array ,&array_schema_pair.schema}}}; + std::array array_child_ptr{ + {{&array_schema_pair.array, &array_schema_pair.schema}} + }; auto [array, schema] = make_sparrow_arrow_schema_and_array(); sparrow::arrow_proxy proxy(std::move(array), std::move(schema)); @@ -427,6 +429,18 @@ TEST_SUITE("ArrowArrowSchemaProxy") CHECK_EQ(children.size(), 1); CHECK_EQ(children[0].format(), "C"); } + + SUBCASE("on external c structure") + { + auto array_schema_pair = make_external_arrow_schema_and_array(); + std::array array_child_ptr{ + {{&array_schema_pair.first, &array_schema_pair.second}} + }; + + auto [array, schema] = make_external_arrow_schema_and_array(); + sparrow::arrow_proxy proxy(std::move(array), std::move(schema)); + CHECK_THROWS_AS(proxy.add_children(array_child_ptr), std::runtime_error); + } } TEST_CASE("pop_children") @@ -436,14 +450,23 @@ TEST_SUITE("ArrowArrowSchemaProxy") auto [array, schema] = make_sparrow_arrow_schema_and_array(); sparrow::arrow_proxy proxy(std::move(array), std::move(schema)); - auto array_schema_pair = make_sparrow_arrow_schema_and_array(); - std::array array_child_ptr{{{&array_schema_pair.array ,&array_schema_pair.schema}}}; + auto array_schema_pair = make_sparrow_arrow_schema_and_array(); + std::array array_child_ptr{ + {{&array_schema_pair.array, &array_schema_pair.schema}} + }; proxy.add_children(array_child_ptr); proxy.pop_children(1); const auto& children = proxy.children(); CHECK_EQ(children.size(), 0); CHECK_EQ(proxy.n_children(), 0); } + + SUBCASE("on external c structure") + { + auto [array, schema] = make_external_arrow_schema_and_array(); + sparrow::arrow_proxy proxy(std::move(array), std::move(schema)); + CHECK_THROWS_AS(proxy.pop_children(1), std::runtime_error); + } } TEST_CASE("dictionary") @@ -457,7 +480,7 @@ TEST_SUITE("ArrowArrowSchemaProxy") { SUBCASE("on sparrow c structure") { - auto array_schema_pair = make_sparrow_arrow_schema_and_array(); + auto array_schema_pair = make_sparrow_arrow_schema_and_array(); auto [array, schema] = make_sparrow_arrow_schema_and_array(); sparrow::arrow_proxy proxy(std::move(array), std::move(schema)); @@ -498,4 +521,228 @@ TEST_SUITE("ArrowArrowSchemaProxy") const sparrow::arrow_proxy proxy_ext(std::move(array_ext), std::move(schema_ext)); CHECK_EQ(proxy_ext.private_data(), nullptr); } + + TEST_CASE("resize_bitmap") + { + SUBCASE("on sparrow c structure") + { + auto [array, schema] = make_sparrow_arrow_schema_and_array(); + sparrow::arrow_proxy proxy(std::move(array), std::move(schema)); + proxy.resize_bitmap(5); + const auto buffers = proxy.buffers(); + REQUIRE_EQ(buffers.size(), 2); + const sparrow::dynamic_bitset_view bitmap(buffers[0].data(), 5); + CHECK(bitmap.test(0)); + CHECK(bitmap.test(1)); + CHECK_FALSE(bitmap.test(2)); + CHECK_FALSE(bitmap.test(3)); + CHECK(bitmap.test(4)); + } + + SUBCASE("on external c structure") + { + auto [array, schema] = make_external_arrow_schema_and_array(); + sparrow::arrow_proxy proxy(std::move(array), std::move(schema)); + CHECK_THROWS_AS(proxy.resize_bitmap(5), std::runtime_error); + } + } + + TEST_CASE("insert_bitmap") + { + SUBCASE("with index and value") + { + SUBCASE("on sparrow c structure") + { + auto [array, schema] = make_sparrow_arrow_schema_and_array(); + sparrow::arrow_proxy proxy(std::move(array), std::move(schema)); + proxy.insert_bitmap(1, false); + const auto buffers = proxy.buffers(); + REQUIRE_EQ(buffers.size(), 2); + const sparrow::dynamic_bitset_view bitmap(buffers[0].data(), 7); + CHECK(bitmap.test(0)); + CHECK_FALSE(bitmap.test(1)); + CHECK(bitmap.test(2)); + CHECK_FALSE(bitmap.test(3)); + CHECK_FALSE(bitmap.test(4)); + CHECK(bitmap.test(5)); + CHECK(bitmap.test(6)); + } + + SUBCASE("on external c structure") + { + auto [array, schema] = make_external_arrow_schema_and_array(); + sparrow::arrow_proxy proxy(std::move(array), std::move(schema)); + CHECK_THROWS_AS(proxy.insert_bitmap(1, true), std::runtime_error); + } + } + + SUBCASE("with index, value and count") + { + SUBCASE("on sparrow c structure") + { + auto [array, schema] = make_sparrow_arrow_schema_and_array(); + sparrow::arrow_proxy proxy(std::move(array), std::move(schema)); + proxy.insert_bitmap(1, false, 2); + const auto buffers = proxy.buffers(); + REQUIRE_EQ(buffers.size(), 2); + const sparrow::dynamic_bitset_view bitmap(buffers[0].data(), 12); + CHECK(bitmap.test(0)); + CHECK_FALSE(bitmap.test(1)); + CHECK_FALSE(bitmap.test(2)); + CHECK(bitmap.test(3)); + CHECK_FALSE(bitmap.test(4)); + CHECK_FALSE(bitmap.test(5)); + CHECK(bitmap.test(6)); + CHECK(bitmap.test(7)); + CHECK(bitmap.test(8)); + CHECK(bitmap.test(9)); + CHECK(bitmap.test(10)); + CHECK(bitmap.test(11)); + } + + SUBCASE("on external c structure") + { + auto [array, schema] = make_external_arrow_schema_and_array(); + sparrow::arrow_proxy proxy(std::move(array), std::move(schema)); + CHECK_THROWS_AS(proxy.insert_bitmap(1, true, 2), std::runtime_error); + } + } + + SUBCASE("with index and range") + { + SUBCASE("on sparrow c structure") + { + auto [array, schema] = make_sparrow_arrow_schema_and_array(); + sparrow::arrow_proxy proxy(std::move(array), std::move(schema)); + std::vector values{false, true, false, true}; + proxy.insert_bitmap(1, values); + const auto buffers = proxy.buffers(); + REQUIRE_EQ(buffers.size(), 2); + const sparrow::dynamic_bitset_view bitmap(buffers[0].data(), 14); + CHECK(bitmap.test(0)); + CHECK_FALSE(bitmap.test(1)); + CHECK(bitmap.test(2)); + CHECK_FALSE(bitmap.test(3)); + CHECK(bitmap.test(4)); + CHECK(bitmap.test(5)); + CHECK_FALSE(bitmap.test(6)); + CHECK_FALSE(bitmap.test(7)); + CHECK(bitmap.test(8)); + CHECK(bitmap.test(9)); + CHECK(bitmap.test(10)); + CHECK(bitmap.test(11)); + CHECK(bitmap.test(12)); + CHECK(bitmap.test(13)); + } + + SUBCASE("on external c structure") + { + auto [array, schema] = make_external_arrow_schema_and_array(); + sparrow::arrow_proxy proxy(std::move(array), std::move(schema)); + std::vector values{0, 1, 0, 1}; + CHECK_THROWS_AS(proxy.insert_bitmap(1, values), std::runtime_error); + } + } + } + + TEST_CASE("erase_bitmap") + { + SUBCASE("with index") + { + SUBCASE("on sparrow c structure") + { + auto [array, schema] = make_sparrow_arrow_schema_and_array(); + sparrow::arrow_proxy proxy(std::move(array), std::move(schema)); + proxy.erase_bitmap(1); + const auto buffers = proxy.buffers(); + REQUIRE_EQ(buffers.size(), 2); + } + + SUBCASE("on external c structure") + { + auto [array, schema] = make_external_arrow_schema_and_array(); + sparrow::arrow_proxy proxy(std::move(array), std::move(schema)); + CHECK_THROWS_AS(proxy.erase_bitmap(1), std::runtime_error); + } + } + + SUBCASE("with index and count") + { + SUBCASE("on sparrow c structure") + { + auto [array, schema] = make_sparrow_arrow_schema_and_array(); + sparrow::arrow_proxy proxy(std::move(array), std::move(schema)); + proxy.erase_bitmap(1, 2); + const auto buffers = proxy.buffers(); + REQUIRE_EQ(buffers.size(), 2); + } + + SUBCASE("on external c structure") + { + auto [array, schema] = make_external_arrow_schema_and_array(); + sparrow::arrow_proxy proxy(std::move(array), std::move(schema)); + CHECK_THROWS_AS(proxy.erase_bitmap(1, 2), std::runtime_error); + } + } + } + + TEST_CASE("push_back_bitmap") + { + SUBCASE("on sparrow c structure") + { + auto [array, schema] = make_sparrow_arrow_schema_and_array(); + sparrow::arrow_proxy proxy(std::move(array), std::move(schema)); + proxy.push_back_bitmap(1); + const auto buffers = proxy.buffers(); + REQUIRE_EQ(buffers.size(), 2); + const sparrow::dynamic_bitset_view bitmap(buffers[0].data(), 11); + CHECK(bitmap.test(0)); + CHECK(bitmap.test(1)); + CHECK_FALSE(bitmap.test(2)); + CHECK_FALSE(bitmap.test(3)); + CHECK(bitmap.test(4)); + CHECK(bitmap.test(5)); + CHECK(bitmap.test(6)); + CHECK(bitmap.test(7)); + CHECK(bitmap.test(8)); + CHECK(bitmap.test(9)); + CHECK(bitmap.test(10)); + } + + SUBCASE("on external c structure") + { + auto [array, schema] = make_external_arrow_schema_and_array(); + sparrow::arrow_proxy proxy(std::move(array), std::move(schema)); + CHECK_THROWS_AS(proxy.push_back_bitmap(1), std::runtime_error); + } + } + + TEST_CASE("pop_back_bitmap") + { + SUBCASE("on sparrow c structure") + { + auto [array, schema] = make_sparrow_arrow_schema_and_array(); + sparrow::arrow_proxy proxy(std::move(array), std::move(schema)); + proxy.pop_back_bitmap(); + const auto buffers = proxy.buffers(); + REQUIRE_EQ(buffers.size(), 2); + const sparrow::dynamic_bitset_view bitmap(buffers[0].data(), 9); + CHECK(bitmap.test(0)); + CHECK(bitmap.test(1)); + CHECK_FALSE(bitmap.test(2)); + CHECK_FALSE(bitmap.test(3)); + CHECK(bitmap.test(4)); + CHECK(bitmap.test(5)); + CHECK(bitmap.test(6)); + CHECK(bitmap.test(7)); + CHECK(bitmap.test(8)); + } + + SUBCASE("on external c structure") + { + auto [array, schema] = make_external_arrow_schema_and_array(); + sparrow::arrow_proxy proxy(std::move(array), std::move(schema)); + CHECK_THROWS_AS(proxy.pop_back_bitmap(), std::runtime_error); + } + } } diff --git a/test/test_dictionary_encoded_array.cpp b/test/test_dictionary_encoded_array.cpp index ae9c3697..d929b7bf 100644 --- a/test/test_dictionary_encoded_array.cpp +++ b/test/test_dictionary_encoded_array.cpp @@ -33,7 +33,7 @@ namespace sparrow static const std::array words{{"hello", "you", "are", "not", "prepared", "!", "?"}}; - arrow_proxy make_arrow_proxy() + inline arrow_proxy make_arrow_proxy() { constexpr std::array keys_nulls{1ULL, 5ULL}; const std::vector keys{0, 0, 1, 2, 3, 4, 2, 5, 0, 1, 2}; diff --git a/test/test_list_array.cpp b/test/test_list_array.cpp index c9497ca7..4c2cb890 100644 --- a/test/test_list_array.cpp +++ b/test/test_list_array.cpp @@ -12,13 +12,13 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "sparrow/layout/primitive_array.hpp" #include "sparrow/layout/list_layout/list_array.hpp" +#include "sparrow/layout/primitive_array.hpp" #include "doctest/doctest.h" - -#include "test_utils.hpp" #include "external_array_data_creation.hpp" +#include "test_utils.hpp" + namespace sparrow { @@ -41,14 +41,14 @@ namespace sparrow } TEST_SUITE("list_array") - { - TEST_CASE_TEMPLATE("list[T]",T, std::uint8_t, std::int32_t, float, double) + { + TEST_CASE_TEMPLATE("list[T]", T, std::uint8_t, std::int32_t, float, double) { using inner_scalar_type = T; using inner_nullable_type = nullable; // number of elements in the flatted array - const std::size_t n_flat = 10; //1+2+3+4 + const std::size_t n_flat = 10; // 1+2+3+4 // number of elements in the list array const std::size_t n = 4; // vector of sizes @@ -88,25 +88,32 @@ namespace sparrow SUBCASE("element-sizes") { - for(std::size_t i = 0; i < n; ++i){ + for (std::size_t i = 0; i < n; ++i) + { REQUIRE(list_arr[i].has_value()); CHECK(list_arr[i].value().size() == sizes[i]); } - } + } SUBCASE("element-values") { std::size_t flat_index = 0; - for(std::size_t i = 0; i < n; ++i){ + for (std::size_t i = 0; i < n; ++i) + { auto list = list_arr[i].value(); - for(std::size_t j = 0; j < sizes[i]; ++j){ - + for (std::size_t j = 0; j < sizes[i]; ++j) + { auto value_variant = list[j]; // visit the variant - std::visit([&](auto && value){ - if constexpr(std::is_same_v, inner_nullable_type>){ - CHECK(value == flat_index); - } - }, value_variant); + std::visit( + [&](auto&& value) + { + if constexpr (std::is_same_v, inner_nullable_type>) + { + CHECK(value == flat_index); + } + }, + value_variant + ); ++flat_index; } } @@ -130,7 +137,7 @@ namespace sparrow REQUIRE(flat_values_casted.size() == n_flat); // check that flat values are "iota" - if constexpr(std::is_integral_v) + if constexpr (std::is_integral_v) { for(inner_scalar_type i = 0; i < static_cast(n_flat); ++i){ CHECK(flat_values_casted[static_cast(i)].value() == i); @@ -165,14 +172,14 @@ namespace sparrow } TEST_SUITE("list_view_array") - { - TEST_CASE_TEMPLATE("list_view_array[T]",T, std::uint8_t, std::int32_t, float, double) + { + TEST_CASE_TEMPLATE("list_view_array[T]", T, std::uint8_t, std::int32_t, float, double) { using inner_scalar_type = T; using inner_nullable_type = nullable; // number of elements in the flatted array - const std::size_t n_flat = 10; //1+2+3+4 + const std::size_t n_flat = 10; // 1+2+3+4 // number of elements in the list array const std::size_t n = 4; // vector of sizes @@ -212,7 +219,8 @@ namespace sparrow SUBCASE("element-sizes") { - for(std::size_t i = 0; i < n; ++i){ + for (std::size_t i = 0; i < n; ++i) + { REQUIRE(list_arr[i].has_value()); CHECK(list_arr[i].value().size() == sizes[i]); } @@ -221,17 +229,23 @@ namespace sparrow SUBCASE("element-values") { std::size_t flat_index = 0; - for(std::size_t i = 0; i < n; ++i){ + for (std::size_t i = 0; i < n; ++i) + { auto list = list_arr[i].value(); - for(std::size_t j = 0; j < sizes[i]; ++j){ - + for (std::size_t j = 0; j < sizes[i]; ++j) + { auto value_variant = list[j]; // visit the variant - std::visit([&](auto && value){ - if constexpr(std::is_same_v, inner_nullable_type>){ - CHECK(value == flat_index); - } - }, value_variant); + std::visit( + [&](auto&& value) + { + if constexpr (std::is_same_v, inner_nullable_type>) + { + CHECK(value == flat_index); + } + }, + value_variant + ); ++flat_index; } } @@ -255,7 +269,7 @@ namespace sparrow REQUIRE(flat_values_casted.size() == n_flat); // check that flat values are "iota" - if constexpr(std::is_integral_v) + if constexpr (std::is_integral_v) { for(inner_scalar_type i = 0; i < static_cast(n_flat); ++i){ CHECK(flat_values_casted[static_cast(i)].value() == i); @@ -290,15 +304,14 @@ namespace sparrow } TEST_SUITE("fixed_sized_list_array") - { - TEST_CASE_TEMPLATE("fixed_sized_array_list[T]",T, std::uint8_t, std::int32_t, float, double) + { + TEST_CASE_TEMPLATE("fixed_sized_array_list[T]", T, std::uint8_t, std::int32_t, float, double) { - using inner_scalar_type = T; using inner_nullable_type = nullable; // number of elements in the flatted array - const std::size_t n_flat = 20; + const std::size_t n_flat = 20; // the size of each list = const std::size_t list_size = 5; @@ -343,7 +356,8 @@ namespace sparrow SUBCASE("element-sizes") { - for(std::size_t i = 0; i < list_arr.size(); ++i){ + for (std::size_t i = 0; i < list_arr.size(); ++i) + { REQUIRE(list_arr[i].has_value()); REQUIRE(list_arr[i].value().size() == list_size); } @@ -352,23 +366,27 @@ namespace sparrow SUBCASE("element-values") { std::size_t flat_index = 0; - for(std::size_t i = 0; i < n; ++i){ + for (std::size_t i = 0; i < n; ++i) + { auto list = list_arr[i].value(); - for(std::size_t j = 0; j < list.size(); ++j){ - + for (std::size_t j = 0; j < list.size(); ++j) + { auto value_variant = list[j]; // visit the variant - std::visit([&](auto && value){ - if constexpr(std::is_same_v, inner_nullable_type>){ - CHECK(value == flat_index); - } - }, value_variant); + std::visit( + [&](auto&& value) + { + if constexpr (std::is_same_v, inner_nullable_type>) + { + CHECK(value == flat_index); + } + }, + value_variant + ); ++flat_index; } } } } } - } - diff --git a/test/test_primitive_array.cpp b/test/test_primitive_array.cpp index 63393552..4fab4439 100644 --- a/test/test_primitive_array.cpp +++ b/test/test_primitive_array.cpp @@ -12,166 +12,605 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "../test/external_array_data_creation.hpp" -#include "doctest/doctest.h" +#include +#include + +#include "sparrow/arrow_array_schema_proxy_factory.hpp" #include "sparrow/layout/primitive_array.hpp" +#include "doctest/doctest.h" namespace sparrow { - using scalar_value_type = std::int32_t; - using array_test_type = primitive_array; - using test::make_arrow_proxy; + + using testing_types = std::tuple< + std::int8_t, + std::uint8_t, + std::int16_t, + std::uint16_t, + std::int32_t, + std::uint32_t, + std::int64_t, + std::uint64_t, + float16_t, + float32_t, + float64_t>; TEST_SUITE("primitive_array") { - constexpr std::size_t size = 10u; - constexpr std::size_t offset = 1u; - - TEST_CASE("constructor") + TEST_CASE_TEMPLATE_DEFINE("", T, primitive_array_id) { - auto pr = make_arrow_proxy(size, offset); - array_test_type ar(std::move(pr)); - CHECK_EQ(ar.size(), size - offset); - } + const std::array values{1, 2, 3, 4, 5}; + constexpr std::array nulls{2}; + constexpr int64_t offset = 1; - TEST_CASE("copy") - { - array_test_type ar(make_arrow_proxy(size, offset)); - array_test_type ar2(ar); + auto make_array = [&nulls](R values_range) + { + return make_primitive_arrow_proxy(values_range, nulls, offset, "test", std::nullopt); + }; - CHECK_EQ(ar, ar2); + // Elements: 2, null, 4, 5 - array_test_type ar3(make_arrow_proxy(size + 3u, offset)); - CHECK_NE(ar, ar3); - ar3 = ar; - CHECK_EQ(ar, ar3); - } + using array_test_type = primitive_array; + array_test_type ar{make_array(values)}; - TEST_CASE("move") - { - array_test_type ar(make_arrow_proxy(size, offset)); - array_test_type ar2(ar); + SUBCASE("constructor") + { + CHECK_EQ(ar.size(), 4); + } - array_test_type ar3(std::move(ar)); - CHECK_EQ(ar2, ar3); + SUBCASE("const operator[]") + { + REQUIRE_EQ(ar.size(), 4); + CHECK(ar[0].has_value()); + CHECK_EQ(ar[0].get(), values[1]); + CHECK_FALSE(ar[1].has_value()); + CHECK_EQ(ar[1].get(), values[2]); + CHECK(ar[2].has_value()); + CHECK_EQ(ar[2].get(), values[3]); + CHECK(ar[3].has_value()); + CHECK_EQ(ar[3].get(), values[4]); + } - array_test_type ar4(make_arrow_proxy(size + 3u, offset)); - CHECK_NE(ar2, ar4); - ar4 = std::move(ar2); - CHECK_EQ(ar3, ar4); - } + SUBCASE("copy") + { + array_test_type ar2(ar); - TEST_CASE("const operator[]") - { - auto pr = make_arrow_proxy(size, offset); - std::vector ref(size - offset); - std::copy( - pr.buffers()[1].data() + offset, - pr.buffers()[1].data() + size, - ref.begin() - ); - array_test_type ar(std::move(pr)); - const array_test_type& car = ar; - for (std::size_t i = 0; i < ref.size(); ++i) - { - CHECK_EQ(ar[i], ref[i]); - CHECK_EQ(car[i], ref[i]); + CHECK_EQ(ar, ar2); + + array_test_type ar3(make_array(std::vector{1, 2, 3, 4, 5, 6, 7})); + CHECK_NE(ar, ar3); + ar3 = ar; + CHECK_EQ(ar, ar3); } - } - TEST_CASE("value_iterator_ordering") - { - array_test_type ar(make_arrow_proxy(size, offset)); - auto ar_values = ar.values(); - array_test_type::const_value_iterator citer = ar_values.begin(); - CHECK(citer < ar_values.end()); - } + SUBCASE("move") + { + array_test_type ar2(ar); - TEST_CASE("value_iterator_equality") - { - array_test_type ar(make_arrow_proxy(size, offset)); - auto ar_values = ar.values(); - array_test_type::const_value_iterator citer = ar_values.begin(); - for (std::size_t i = 0; i < ar.size(); ++i) + array_test_type ar3(std::move(ar)); + CHECK_EQ(ar2, ar3); + + array_test_type ar4(make_array(std::vector{1, 2, 3, 4, 5, 6, 7})); + CHECK_NE(ar2, ar4); + ar4 = std::move(ar2); + CHECK_EQ(ar3, ar4); + } + + SUBCASE("value_iterator_ordering") { - CHECK_EQ(*citer++, ar[i]); + auto ar_values = ar.values(); + auto citer = ar_values.begin(); + CHECK(citer < ar_values.end()); } - CHECK_EQ(citer, ar_values.end()); - } - TEST_CASE("const_value_iterator_ordering") - { - array_test_type ar(make_arrow_proxy(size, offset)); - auto ar_values = ar.values(); - array_test_type::const_value_iterator citer = ar_values.begin(); - CHECK(citer < ar_values.end()); - } + SUBCASE("value_iterator_equality") + { + const auto ar_values = ar.values(); + auto citer = ar_values.begin(); + CHECK_EQ(*citer, values[1]); + ++citer; + CHECK_EQ(*citer, values[2]); + ++citer; + CHECK_EQ(*citer, values[3]); + ++citer; + CHECK_EQ(*citer, values[4]); + ++citer; + CHECK_EQ(citer, ar_values.end()); + } - TEST_CASE("const_value_iterator_equality") - { - array_test_type ar(make_arrow_proxy(size, offset)); - auto ar_values = ar.values(); - for (std::size_t i = 0; i < ar.size(); ++i) + SUBCASE("const_value_iterator_ordering") { - ar[i] = static_cast(i); + auto ar_values = ar.values(); + auto citer = ar_values.begin(); + CHECK(citer < ar_values.end()); } - array_test_type::const_value_iterator citer = ar_values.begin(); - for (std::size_t i = 0; i < ar.size(); ++i, ++citer) + SUBCASE("const_value_iterator_equality") { - CHECK_EQ(*citer, i); + auto ar_values = ar.values(); + auto citer = ar_values.begin(); + CHECK_EQ(*citer, values[1]); + ++citer; + CHECK_EQ(*citer, values[2]); + ++citer; + CHECK_EQ(*citer, values[3]); + ++citer; + CHECK_EQ(*citer, values[4]); + ++citer; + CHECK_EQ(citer, ar_values.end()); } - } - TEST_CASE("const_bitmap_iterator_ordering") - { - array_test_type ar(make_arrow_proxy(size, offset)); - auto ar_bitmap = ar.bitmap(); - array_test_type::const_bitmap_iterator citer = ar_bitmap.begin(); - CHECK(citer < ar_bitmap.end()); - } + SUBCASE("const_bitmap_iterator_ordering") + { + const auto ar_bitmap = ar.bitmap(); + const auto citer = ar_bitmap.begin(); + CHECK(citer < ar_bitmap.end()); + } - TEST_CASE("const_bitmap_iterator_equality") - { - array_test_type ar(make_arrow_proxy(size, offset)); - auto ar_bitmap = ar.bitmap(); - for (std::size_t i = 0; i < ar.size(); ++i) + SUBCASE("const_bitmap_iterator_equality") { - if (i % 2 != 0) - { - ar[i] = nullval; - } + const auto ar_bitmap = ar.bitmap(); + auto citer = ar_bitmap.begin(); + CHECK(*citer); + ++citer; + CHECK_FALSE(*citer); + ++citer; + CHECK(*citer); + ++citer; + CHECK(*citer); + ++citer; } - array_test_type::const_bitmap_iterator citer = ar_bitmap.begin(); - for (std::size_t i = 0; i < ar.size(); ++i, ++citer) + SUBCASE("iterator") { - CHECK_EQ(*citer, i % 2 == 0); + auto it = ar.begin(); + const auto end = ar.end(); + CHECK(it->has_value()); + CHECK_EQ(*it, values[1]); + ++it; + CHECK_FALSE(it->has_value()); + CHECK_EQ(*it, make_nullable(values[2], false)); + ++it; + CHECK(it->has_value()); + CHECK_EQ(*it, make_nullable(values[3])); + ++it; + CHECK(it->has_value()); + CHECK_EQ(*it, make_nullable(values[4])); + ++it; + + CHECK_EQ(it, end); + + const array_test_type ar_empty( + make_primitive_arrow_proxy(std::array{}, std::array{}, 0, "test", std::nullopt) + ); + CHECK_EQ(ar_empty.begin(), ar_empty.end()); } - } - TEST_CASE("iterator") - { - array_test_type ar(make_arrow_proxy(size, offset)); - auto it = ar.begin(); - auto end = ar.end(); + SUBCASE("resize") + { + const T new_value{99}; + ar.resize(7, make_nullable(99)); + REQUIRE_EQ(ar.size(), 7); + CHECK(ar[0].has_value()); + CHECK_EQ(ar[0].get(), values[1]); + CHECK_FALSE(ar[1].has_value()); + CHECK_EQ(ar[1].get(), values[2]); + CHECK(ar[2].has_value()); + CHECK_EQ(ar[2].get(), values[3]); + CHECK(ar[3].has_value()); + CHECK_EQ(ar[3].get(), values[4]); + CHECK(ar[4].has_value()); + CHECK_EQ(ar[4].get(), new_value); + CHECK(ar[5].has_value()); + CHECK_EQ(ar[5].get(), new_value); + CHECK(ar[6].has_value()); + CHECK_EQ(ar[6].get(), new_value); + } - for (std::size_t i = 0; i != ar.size(); ++it, ++i) + SUBCASE("insert") { - CHECK_EQ(*it, make_nullable(ar[i].value())); - CHECK(it->has_value()); + SUBCASE("with pos and value") + { + SUBCASE("at the beginning") + { + const auto pos = ar.cbegin(); + const T new_value{99}; + const auto iter = ar.insert(pos, make_nullable(99)); + CHECK_EQ(iter, ar.begin()); + REQUIRE_EQ(ar.size(), 5); + CHECK(ar[0].has_value()); + CHECK_EQ(ar[0].get(), new_value); + CHECK(ar[1].has_value()); + CHECK_EQ(ar[1].get(), values[1]); + CHECK_FALSE(ar[2].has_value()); + CHECK_EQ(ar[2].get(), values[2]); + CHECK(ar[3].has_value()); + CHECK_EQ(ar[3].get(), values[3]); + CHECK(ar[4].has_value()); + CHECK_EQ(ar[4].get(), values[4]); + } + + SUBCASE("in the middle") + { + const auto pos = sparrow::next(ar.cbegin(), 1); + const T new_value{99}; + const auto iter = ar.insert(pos, make_nullable(99)); + CHECK_EQ(iter, sparrow::next(ar.begin(), 1)); + REQUIRE_EQ(ar.size(), 5); + CHECK(ar[0].has_value()); + CHECK_EQ(ar[0].get(), values[1]); + CHECK(ar[1].has_value()); + CHECK_EQ(ar[1].get(), new_value); + CHECK_FALSE(ar[2].has_value()); + CHECK_EQ(ar[2].get(), values[2]); + CHECK(ar[3].has_value()); + CHECK_EQ(ar[3].get(), values[3]); + CHECK(ar[4].has_value()); + CHECK_EQ(ar[4].get(), values[4]); + } + + SUBCASE("at the end") + { + const auto pos = ar.cend(); + const T new_value{99}; + const auto iter = ar.insert(pos, make_nullable(99)); + CHECK_EQ(iter, ar.begin() + 4); + REQUIRE_EQ(ar.size(), 5); + CHECK(ar[0].has_value()); + CHECK_EQ(ar[0].get(), values[1]); + CHECK_FALSE(ar[1].has_value()); + CHECK_EQ(ar[1].get(), values[2]); + CHECK(ar[2].has_value()); + CHECK_EQ(ar[2].get(), values[3]); + CHECK(ar[3].has_value()); + CHECK_EQ(ar[3].get(), values[4]); + CHECK(ar[4].has_value()); + CHECK_EQ(ar[4].get(), new_value); + } + } + + SUBCASE("with pos, count and value") + { + SUBCASE("at the beginning") + { + const auto pos = ar.cbegin(); + const T new_value{99}; + const auto iter = ar.insert(pos, make_nullable(new_value), 3); + CHECK_EQ(iter, ar.begin()); + REQUIRE_EQ(ar.size(), 7); + CHECK(ar[0].has_value()); + CHECK_EQ(ar[0].get(), new_value); + CHECK(ar[1].has_value()); + CHECK_EQ(ar[1].get(), new_value); + CHECK(ar[2].has_value()); + CHECK_EQ(ar[2].get(), new_value); + CHECK(ar[3].has_value()); + CHECK_EQ(ar[3].get(), values[1]); + CHECK_FALSE(ar[4].has_value()); + CHECK_EQ(ar[4].get(), values[2]); + CHECK(ar[5].has_value()); + CHECK_EQ(ar[5].get(), values[3]); + CHECK(ar[6].has_value()); + CHECK_EQ(ar[6].get(), values[4]); + } + + SUBCASE("in the middle") + { + const auto pos = sparrow::next(ar.cbegin(), 1); + const T new_value{99}; + const auto iter = ar.insert(pos, make_nullable(new_value), 3); + CHECK_EQ(iter, sparrow::next(ar.begin(), 1)); + REQUIRE_EQ(ar.size(), 7); + CHECK(ar[0].has_value()); + CHECK_EQ(ar[0].get(), values[1]); + CHECK(ar[1].has_value()); + CHECK_EQ(ar[1].get(), new_value); + CHECK(ar[2].has_value()); + CHECK_EQ(ar[2].get(), new_value); + CHECK(ar[3].has_value()); + CHECK_EQ(ar[3].get(), new_value); + CHECK_FALSE(ar[4].has_value()); + CHECK_EQ(ar[4].get(), values[2]); + CHECK(ar[5].has_value()); + CHECK_EQ(ar[5].get(), values[3]); + CHECK(ar[6].has_value()); + CHECK_EQ(ar[6].get(), values[4]); + } + + SUBCASE("at the end") + { + const auto pos = ar.cend(); + const T new_value{99}; + const auto iter = ar.insert(pos, make_nullable(new_value), 3); + CHECK_EQ(iter, ar.begin() + 4); + REQUIRE_EQ(ar.size(), 7); + CHECK(ar[0].has_value()); + CHECK_EQ(ar[0].get(), values[1]); + CHECK_FALSE(ar[1].has_value()); + CHECK_EQ(ar[1].get(), values[2]); + CHECK(ar[2].has_value()); + CHECK_EQ(ar[2].get(), values[3]); + CHECK(ar[3].has_value()); + CHECK_EQ(ar[3].get(), values[4]); + CHECK(ar[4].has_value()); + CHECK_EQ(ar[4].get(), new_value); + CHECK(ar[5].has_value()); + CHECK_EQ(ar[5].get(), new_value); + CHECK(ar[6].has_value()); + CHECK_EQ(ar[6].get(), new_value); + } + } + + SUBCASE("with pos, first and last iterators") + { + SUBCASE("at the beginning") + { + const auto pos = ar.cbegin(); + const std::array, 3> new_values{ + make_nullable(99), + make_nullable(100), + make_nullable(101) + }; + const auto iter = ar.insert(pos, new_values); + CHECK_EQ(iter, ar.begin()); + REQUIRE_EQ(ar.size(), 7); + CHECK(ar[0].has_value()); + CHECK_EQ(ar[0].get(), new_values[0]); + CHECK(ar[1].has_value()); + CHECK_EQ(ar[1].get(), new_values[1]); + CHECK(ar[2].has_value()); + CHECK_EQ(ar[2].get(), new_values[2]); + CHECK(ar[3].has_value()); + CHECK_EQ(ar[3].get(), values[1]); + CHECK_FALSE(ar[4].has_value()); + CHECK(ar[5].has_value()); + CHECK_EQ(ar[5].get(), values[3]); + CHECK(ar[6].has_value()); + CHECK_EQ(ar[6].get(), values[4]); + } + + SUBCASE("in the middle") + { + const auto pos = sparrow::next(ar.cbegin(), 1); + const std::array, 3> new_values{ + make_nullable(99), + make_nullable(100), + make_nullable(101) + }; + const auto iter = ar.insert(pos, new_values); + CHECK_EQ(iter, sparrow::next(ar.begin(), 1)); + REQUIRE_EQ(ar.size(), 7); + CHECK(ar[0].has_value()); + CHECK_EQ(ar[0].get(), values[1]); + CHECK(ar[1].has_value()); + CHECK_EQ(ar[1].get(), new_values[0]); + CHECK(ar[2].has_value()); + CHECK_EQ(ar[2].get(), new_values[1]); + CHECK(ar[3].has_value()); + CHECK_EQ(ar[3].get(), new_values[2]); + CHECK_FALSE(ar[4].has_value()); + CHECK_EQ(ar[4].get(), values[2]); + CHECK(ar[5].has_value()); + CHECK_EQ(ar[5].get(), values[3]); + CHECK(ar[6].has_value()); + CHECK_EQ(ar[6].get(), values[4]); + } + + SUBCASE("at the end") + { + const auto pos = ar.cend(); + const std::array, 3> new_values{ + make_nullable(99), + make_nullable(100), + make_nullable(101) + }; + const auto iter = ar.insert(pos, new_values); + CHECK_EQ(iter, ar.begin() + 4); + REQUIRE_EQ(ar.size(), 7); + CHECK(ar[0].has_value()); + CHECK_EQ(ar[0].get(), values[1]); + CHECK_FALSE(ar[1].has_value()); + CHECK_EQ(ar[1].get(), values[2]); + CHECK(ar[2].has_value()); + CHECK_EQ(ar[2].get(), values[3]); + CHECK(ar[3].has_value()); + CHECK_EQ(ar[3].get(), values[4]); + CHECK(ar[4].has_value()); + CHECK_EQ(ar[4].get(), new_values[0]); + CHECK(ar[5].has_value()); + CHECK_EQ(ar[5].get(), new_values[1]); + CHECK(ar[6].has_value()); + CHECK_EQ(ar[6].get(), new_values[2]); + } + } + + SUBCASE("with pos and initializer list") + { + SUBCASE("at the beginning") + { + const auto pos = ar.cbegin(); + auto new_val_99 = make_nullable(99); + auto new_val_100 = make_nullable(100); + auto new_val_101 = make_nullable(101); + const auto iter = ar.insert(pos, {new_val_99, new_val_100, new_val_101}); + CHECK_EQ(iter, ar.begin()); + REQUIRE_EQ(ar.size(), 7); + CHECK_EQ(ar[0], new_val_99); + CHECK_EQ(ar[1], new_val_100); + CHECK_EQ(ar[2], new_val_101); + CHECK(ar[3].has_value()); + CHECK_EQ(ar[3].get(), values[1]); + CHECK_FALSE(ar[4].has_value()); + CHECK(ar[5].has_value()); + CHECK_EQ(ar[5].get(), values[3]); + CHECK(ar[6].has_value()); + } + + SUBCASE("in the middle") + { + const auto pos = sparrow::next(ar.cbegin(), 1); + auto new_val_99 = make_nullable(99); + auto new_val_100 = make_nullable(100); + auto new_val_101 = make_nullable(101); + const auto iter = ar.insert(pos, {new_val_99, new_val_100, new_val_101}); + CHECK_EQ(iter, sparrow::next(ar.begin(), 1)); + REQUIRE_EQ(ar.size(), 7); + CHECK(ar[0].has_value()); + CHECK_EQ(ar[0].get(), values[1]); + CHECK_EQ(ar[1], new_val_99); + CHECK_EQ(ar[2], new_val_100); + CHECK_EQ(ar[3], new_val_101); + CHECK_FALSE(ar[4].has_value()); + CHECK_EQ(ar[4].get(), values[2]); + CHECK(ar[5].has_value()); + CHECK_EQ(ar[5].get(), values[3]); + CHECK(ar[6].has_value()); + } + + SUBCASE("at the end") + { + const auto pos = ar.cend(); + auto new_val_99 = make_nullable(99); + auto new_val_100 = make_nullable(100); + auto new_val_101 = make_nullable(101); + const auto iter = ar.insert(pos, {new_val_99, new_val_100, new_val_101}); + CHECK_EQ(iter, ar.begin() + 4); + REQUIRE_EQ(ar.size(), 7); + CHECK(ar[0].has_value()); + CHECK_EQ(ar[0].get(), values[1]); + CHECK_FALSE(ar[1].has_value()); + CHECK_EQ(ar[1].get(), values[2]); + CHECK(ar[2].has_value()); + CHECK_EQ(ar[2].get(), values[3]); + CHECK(ar[3].has_value()); + CHECK_EQ(ar[3].get(), values[4]); + CHECK_EQ(ar[4], new_val_99); + CHECK_EQ(ar[5], new_val_100); + CHECK_EQ(ar[6], new_val_101); + } + } + + SUBCASE("with pos and range") + { + SUBCASE("at the beginning") + { + const auto pos = ar.cbegin(); + const std::array, 3> new_values{ + make_nullable(99), + make_nullable(100), + make_nullable(101) + }; + const auto iter = ar.insert(pos, new_values); + CHECK_EQ(iter, ar.begin()); + REQUIRE_EQ(ar.size(), 7); + CHECK(ar[0].has_value()); + CHECK_EQ(ar[0].get(), new_values[0]); + CHECK(ar[1].has_value()); + CHECK_EQ(ar[1].get(), new_values[1]); + CHECK(ar[2].has_value()); + CHECK_EQ(ar[2].get(), new_values[2]); + CHECK(ar[3].has_value()); + CHECK_EQ(ar[3].get(), values[1]); + CHECK_FALSE(ar[4].has_value()); + CHECK_EQ(ar[4].get(), values[2]); + CHECK(ar[5].has_value()); + CHECK_EQ(ar[5].get(), values[3]); + CHECK(ar[6].has_value()); + CHECK_EQ(ar[6].get(), values[4]); + } + } } - CHECK_EQ(it, end); + SUBCASE("erase") + { + SUBCASE("with pos") + { + SUBCASE("at the beginning") + { + const auto pos = ar.cbegin(); + const auto iter = ar.erase(pos); + CHECK_EQ(iter, ar.begin()); + REQUIRE_EQ(ar.size(), 3); + CHECK_FALSE(ar[0].has_value()); + CHECK_EQ(ar[0].get(), values[2]); + CHECK(ar[1].has_value()); + CHECK_EQ(ar[1].get(), values[3]); + CHECK(ar[2].has_value()); + CHECK_EQ(ar[2].get(), values[4]); + } + + SUBCASE("in the middle") + { + const auto pos = sparrow::next(ar.cbegin(), 1); + const auto iter = ar.erase(pos); + CHECK_EQ(iter, sparrow::next(ar.begin(), 1)); + REQUIRE_EQ(ar.size(), 3); + CHECK(ar[0].has_value()); + CHECK_EQ(ar[0].get(), values[1]); + CHECK(ar[1].has_value()); + CHECK_EQ(ar[1].get(), values[3]); + CHECK(ar[2].has_value()); + CHECK_EQ(ar[2].get(), values[4]); + } + + SUBCASE("at the end") + { + const auto pos = std::prev(ar.cend()); + const auto iter = ar.erase(pos); + CHECK_EQ(iter, ar.begin() + 3); + REQUIRE_EQ(ar.size(), 3); + REQUIRE(ar[0].has_value()); + CHECK_EQ(ar[0].get(), values[1]); + CHECK_FALSE(ar[1].has_value()); + CHECK_EQ(ar[1].get(), values[2]); + CHECK(ar[2].has_value()); + CHECK_EQ(ar[2].get(), values[3]); + } + } + + SUBCASE("with iterators") + { + const auto pos = ar.cbegin() + 1; + const auto iter = ar.erase(pos, pos + 2); + CHECK_EQ(iter, ar.begin() + 1); + REQUIRE_EQ(ar.size(), 2); + CHECK(ar[0].has_value()); + CHECK_EQ(ar[0].get(), values[1]); + CHECK(ar[1].has_value()); + CHECK_EQ(ar[1].get(), values[4]); + } + } - for (auto v : ar) + SUBCASE("push_back") { - CHECK(v.has_value()); + const T new_value{99}; + ar.push_back(make_nullable(99)); + REQUIRE_EQ(ar.size(), 5); + CHECK(ar[0].has_value()); + CHECK_EQ(ar[0].get(), values[1]); + CHECK_FALSE(ar[1].has_value()); + CHECK_EQ(ar[1].get(), values[2]); + CHECK(ar[2].has_value()); + CHECK_EQ(ar[2].get(), values[3]); + CHECK(ar[3].has_value()); + CHECK_EQ(ar[3].get(), values[4]); + CHECK(ar[4].has_value()); + CHECK_EQ(ar[4].value(), new_value); } - array_test_type ar_empty(make_arrow_proxy(0, 0)); - CHECK_EQ(ar_empty.begin(), ar_empty.end()); + SUBCASE("pop_back") + { + ar.pop_back(); + REQUIRE_EQ(ar.size(), 3); + CHECK(ar[0].has_value()); + CHECK_EQ(ar[0].get(), values[1]); + CHECK_FALSE(ar[1].has_value()); + CHECK_EQ(ar[1].get(), values[2]); + CHECK(ar[2].has_value()); + CHECK_EQ(ar[2].get(), values[3]); + } } + TEST_CASE_TEMPLATE_APPLY(primitive_array_id, testing_types); } } diff --git a/test/test_variable_size_binary_array.cpp b/test/test_variable_size_binary_array.cpp index 0f677823..25eb9fb9 100644 --- a/test/test_variable_size_binary_array.cpp +++ b/test/test_variable_size_binary_array.cpp @@ -39,12 +39,12 @@ namespace sparrow private: static_assert(std::same_as); - static_assert(std::same_as>); + // static_assert(std::same_as>); static_assert(std::same_as); using const_value_iterator = layout_type::const_value_iterator; static_assert(std::same_as); - // static_assert(std::same_as); + static_assert(std::same_as); arrow_proxy create_arrow_proxy() {