Skip to content

Commit

Permalink
Add modifiers methods in primitive array (#232)
Browse files Browse the repository at this point in the history
  • Loading branch information
Alex-PLACET authored Oct 25, 2024
1 parent f5d7a95 commit 93255e1
Show file tree
Hide file tree
Showing 30 changed files with 2,073 additions and 543 deletions.
10 changes: 8 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -122,10 +122,15 @@ set(SPARROW_HEADERS
${SPARROW_INCLUDE_DIR}/sparrow/arrow_interface/arrow_schema/smart_pointers.hpp
# buffer
${SPARROW_INCLUDE_DIR}/sparrow/buffer/allocator.hpp
${SPARROW_INCLUDE_DIR}/sparrow/buffer/buffer.hpp
${SPARROW_INCLUDE_DIR}/sparrow/buffer/buffer_adaptor.hpp
${SPARROW_INCLUDE_DIR}/sparrow/buffer/buffer_view.hpp
${SPARROW_INCLUDE_DIR}/sparrow/buffer/buffer.hpp
${SPARROW_INCLUDE_DIR}/sparrow/buffer/dynamic_bitset.hpp
${SPARROW_INCLUDE_DIR}/sparrow/buffer/dynamic_bitset/dynamic_bitset_base.hpp
${SPARROW_INCLUDE_DIR}/sparrow/buffer/dynamic_bitset/bitset_iterator.hpp
${SPARROW_INCLUDE_DIR}/sparrow/buffer/dynamic_bitset/bitset_reference.hpp
${SPARROW_INCLUDE_DIR}/sparrow/buffer/dynamic_bitset/dynamic_bitset_view.hpp
${SPARROW_INCLUDE_DIR}/sparrow/buffer/dynamic_bitset/dynamic_bitset.hpp
# config
${SPARROW_INCLUDE_DIR}/sparrow/config/config.hpp
${SPARROW_INCLUDE_DIR}/sparrow/config/sparrow_version.hpp
Expand Down Expand Up @@ -179,9 +184,10 @@ if (SPARROW_TARGET_32BIT)
else()

set(SPARROW_SRC
${SPARROW_SOURCE_DIR}/array.cpp
${SPARROW_SOURCE_DIR}/array_factory.cpp
${SPARROW_SOURCE_DIR}/array_helper.cpp
${SPARROW_SOURCE_DIR}/array.cpp
${SPARROW_SOURCE_DIR}/arrow_array_schema_proxy.cpp
${SPARROW_SOURCE_DIR}/arrow_array_schema_proxy.cpp
${SPARROW_SOURCE_DIR}/arrow_interface/arrow_array.cpp
${SPARROW_SOURCE_DIR}/arrow_interface/arrow_schema.cpp
Expand Down
106 changes: 95 additions & 11 deletions include/sparrow/arrow_array_schema_proxy.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,10 @@
#include <string_view>

#include "sparrow/arrow_interface/arrow_array/private_data.hpp"
#include "sparrow/arrow_interface/arrow_array_schema_info_utils.hpp"
#include "sparrow/arrow_interface/arrow_schema/private_data.hpp"
#include "sparrow/buffer/buffer_view.hpp"
#include "sparrow/buffer/dynamic_bitset/non_owning_dynamic_bitset.hpp"
#include "sparrow/c_interface.hpp"
#include "sparrow/config/config.hpp"
#include "sparrow/types/data_type.hpp"
Expand Down Expand Up @@ -128,20 +130,22 @@ namespace sparrow
[[nodiscard]] SPARROW_API size_t length() const;

/**
* Set the length of the `ArrowArray`.
* Set the length of the `ArrowArray`. This method does not resize the buffers of the `ArrowArray`.
* You have to change the length before replacing/resizing the buffers to have the right sizes when
* calling `buffers()`.
* @exception `arrow_proxy_exception` If the `ArrowArray` was not created with sparrow.
* @param length The length to set.
*/
SPARROW_API void set_length(size_t length);
[[nodiscard]] SPARROW_API int64_t null_count() const;

/**
* Set the null count of the `ArrowArray`.
* Set the null count of the `ArrowArray`. This method does not change the bitmap.
* @exception `arrow_proxy_exception` If the `ArrowArray` was not created with sparrow.
* @param null_count The null count to set.
*/
SPARROW_API void set_null_count(int64_t null_count);
[[nodiscard]] SPARROW_API size_t offset() const;
[[nodiscard]] SPARROW_API size_t offset() const;

/**
* Set the offset of the `ArrowArray`.
Expand All @@ -152,7 +156,8 @@ namespace sparrow
[[nodiscard]] SPARROW_API size_t n_buffers() const;

/**
* Set the number of buffers of the `ArrowArray`.
* Set the number of buffers of the `ArrowArray`. Resize the buffers vector of the `ArrowArray`
* private data.
* @exception `arrow_proxy_exception` If the `ArrowArray` was not created with sparrow.
* @param n_buffers The number of buffers to set.
*/
Expand All @@ -162,21 +167,82 @@ namespace sparrow
[[nodiscard]] SPARROW_API std::vector<sparrow::buffer_view<uint8_t>>& buffers();

/**
* Set the buffer at the given index.
* Set the buffer at the given index. You have to call the `set_length` method before calling this
* method to have the right sizes when calling `buffers()`.
* @exception `arrow_proxy_exception` If the `ArrowArray` was not created with sparrow.
* @param index The index of the buffer to set.
* @param buffer The buffer to set.
*/
SPARROW_API void set_buffer(size_t index, const buffer_view<uint8_t>& buffer);

/**
* Set the buffer at the given index.
* Set the buffer at the given index. You have to call the `set_length` method before calling this
* method to have the right sizes when calling `buffers()`.
* @exception `arrow_proxy_exception` If the `ArrowArray` was not created with sparrow.
* @param index The index of the buffer to set.
* @param buffer The buffer to set.
*/
SPARROW_API void set_buffer(size_t index, buffer<uint8_t>&& buffer);

/**
* Resize the bitmap buffer of the `ArrowArray`.
* @exception `arrow_proxy_exception` If the `ArrowArray` was not created with sparrow.
* @exception `arrow_proxy_exception` If the array format does not support a validity bitmap.
* @param new_size The new size of the bitmap buffer.
* @param value The value to set in the new elements. True by default.
*/
SPARROW_API void resize_bitmap(size_t new_size, bool value = true);

/**
* Insert elements of the same value in the bitmap buffer at the given index.
* @exception `arrow_proxy_exception` If the `ArrowArray` was not created with sparrow.
* @exception `arrow_proxy_exception` If the array format does not support a validity bitmap.
* @exception `std::out_of_range` If the index is greater than the length of the bitmap.
* @param index The index where to insert the value. Must be less than the length of the bitmap.
* @param value The value to insert.
* @param count The number of times to insert the value. 1 by default
* @return The index of the first inserted value.
*/
SPARROW_API size_t insert_bitmap(size_t index, bool value, size_t count = 1);

/**
* Insert several elements in the bitmap buffer at the given index.
* @exception `arrow_proxy_exception` If the `ArrowArray` was not created with sparrow.
* @exception `arrow_proxy_exception` If the array format does not support a validity bitmap.
* @exception `std::out_of_range` If the index is greater than the length of the bitmap.
* @param index The index where to insert the values. Must be less than the length of the bitmap.
* @param range The range of values to insert.
* @return The index of the first inserted value.
*/
template <std::ranges::input_range R>
size_t insert_bitmap(size_t index, const R& range);

/**
* Erase several elements in the bitmap buffer at the given index.
* @exception `arrow_proxy_exception` If the `ArrowArray` was not created with sparrow.
* @exception `arrow_proxy_exception` If the array format does not support a validity bitmap.
* @exception `std::out_of_range` If the index is greater than the length of the bitmap.
* @param index The index of the first value to erase. Must be less than the length of the bitmap.
* @param count The number of elements to erase. 1 by default.
* @return The index of the first erased value.
*/
SPARROW_API size_t erase_bitmap(size_t index, size_t count = 1);

/**
* Push a value at the end of the bitmap buffer.
* @exception `arrow_proxy_exception` If the `ArrowArray` was not created with sparrow.
* @exception `arrow_proxy_exception` If the array format does not support a validity bitmap.
* @param value The value to push.
*/
SPARROW_API void push_back_bitmap(bool value);

/**
* Pop a value at the end of the bitmap buffer.
* @exception `arrow_proxy_exception` If the `ArrowArray` was not created with sparrow.
* @exception `arrow_proxy_exception` If the array format does not support a validity bitmap.
*/
SPARROW_API void pop_back_bitmap();

/**
* Add children without taking their ownership.
* @exception `arrow_proxy_exception` If the `ArrowArray` or the `ArrowSchema` wrapped
Expand Down Expand Up @@ -268,7 +334,7 @@ namespace sparrow
/**
* get a non-owning view of the arrow_proxy.
*/
[[nodiscard]] SPARROW_API arrow_proxy view();
[[nodiscard]] SPARROW_API arrow_proxy view() const;

[[nodiscard]] SPARROW_API bool owns_array() const;
[[nodiscard]] SPARROW_API ArrowArray extract_array();
Expand All @@ -280,6 +346,9 @@ namespace sparrow
[[nodiscard]] SPARROW_API ArrowSchema& schema();
[[nodiscard]] SPARROW_API const ArrowSchema& schema() const;

[[nodiscard]] SPARROW_API arrow_schema_private_data* get_schema_private_data();
[[nodiscard]] SPARROW_API arrow_array_private_data* get_array_private_data();

private:

std::variant<ArrowArray*, ArrowArray> m_array;
Expand All @@ -303,24 +372,25 @@ namespace sparrow
[[nodiscard]] bool empty() const;
SPARROW_API void resize_children(size_t children_count);

void update_buffers();
[[nodiscard]] SPARROW_API non_owning_dynamic_bitset<uint8_t> get_non_owning_dynamic_bitset();

void update_children();
void update_dictionary();
void update_null_count();
void update_buffers();
void reset();

[[nodiscard]] bool array_created_with_sparrow() const;
[[nodiscard]] bool schema_created_with_sparrow() const;

void validate_array_and_schema() const;

arrow_schema_private_data* get_schema_private_data();
arrow_array_private_data* get_array_private_data();

[[nodiscard]] bool is_arrow_array_valid() const;
[[nodiscard]] bool is_arrow_schema_valid() const;
[[nodiscard]] bool is_proxy_valid() const;

[[nodiscard]] size_t get_null_count() const;

void swap(arrow_proxy& other) noexcept;
};

Expand Down Expand Up @@ -371,4 +441,18 @@ namespace sparrow
);
}
}

template <std::ranges::input_range R>
inline size_t arrow_proxy::insert_bitmap(size_t index, const R& range)
{
if (!is_created_with_sparrow())
{
throw arrow_proxy_exception("Cannot modify the bitmap on non-sparrow created ArrowArray");
}
SPARROW_ASSERT_TRUE(has_bitmap(data_type()))
auto bitmap = get_non_owning_dynamic_bitset();
const auto it = bitmap.insert(sparrow::next(bitmap.cbegin(), index), range.begin(), range.end());
return static_cast<size_t>(std::distance(bitmap.begin(), it));
}

}
40 changes: 40 additions & 0 deletions include/sparrow/arrow_array_schema_proxy_factory.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
// Copyright 2024 Man Group Operations Limited
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include "sparrow/arrow_array_schema_proxy.hpp"
#include "sparrow/arrow_interface/arrow_array_schema_factory.hpp"
#include "sparrow/types/data_traits.hpp"

namespace sparrow
{
template <std::ranges::sized_range Values, std::ranges::sized_range Nulls>
requires std::is_arithmetic_v<std::ranges::range_value_t<Values>>
&& std::integral<std::ranges::range_value_t<Nulls>>
arrow_proxy make_primitive_arrow_proxy(
Values&& values,
Nulls&& nulls,
int64_t offset,
std::string_view name,
std::optional<std::string_view> metadata
)
{
using ValueType = std::ranges::range_value_t<Values>;
return arrow_proxy{
make_primitive_arrow_array(std::forward<Values>(values), std::forward<Nulls>(nulls), offset),
make_primitive_arrow_schema(arrow_traits<ValueType>::type_id, name, metadata, std::nullopt)
};
}
}
2 changes: 1 addition & 1 deletion include/sparrow/arrow_interface/arrow_array.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ namespace sparrow
*
* @return The created `ArrowArray`.
*/
SPARROW_API arrow_array_unique_ptr default_arrow_array_unique_ptr();
arrow_array_unique_ptr default_arrow_array_unique_ptr();

/**
* Release function to use for the `ArrowArray.release` member.
Expand Down
13 changes: 9 additions & 4 deletions include/sparrow/arrow_interface/arrow_array/private_data.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,11 +41,12 @@ namespace sparrow

[[nodiscard]] constexpr BufferType& buffers() noexcept;
[[nodiscard]] constexpr const BufferType& buffers() const noexcept;

constexpr void resize_buffers(std::size_t size);
void set_buffer(std::size_t index, buffer<std::uint8_t>&& buffer);
void set_buffer(std::size_t index, const buffer_view<std::uint8_t>& buffer);
constexpr void resize_buffer(std::size_t index, std::size_t size, std::uint8_t value);
constexpr void update_buffers_ptrs();

template <class T>
[[nodiscard]] constexpr const T** buffers_ptrs() noexcept;
Expand All @@ -64,8 +65,7 @@ namespace sparrow
{
}

[[nodiscard]] constexpr std::vector<buffer<std::uint8_t>>&
arrow_array_private_data::buffers() noexcept
[[nodiscard]] constexpr std::vector<buffer<std::uint8_t>>& arrow_array_private_data::buffers() noexcept
{
return m_buffers;
}
Expand All @@ -79,7 +79,7 @@ namespace sparrow
constexpr void arrow_array_private_data::resize_buffers(std::size_t size)
{
m_buffers.resize(size);
m_buffers_pointers = to_raw_ptr_vec<std::uint8_t>(m_buffers);
update_buffers_ptrs();
}

inline void arrow_array_private_data::set_buffer(std::size_t index, buffer<std::uint8_t>&& buffer)
Expand Down Expand Up @@ -109,4 +109,9 @@ namespace sparrow
{
return const_cast<const T**>(reinterpret_cast<T**>(m_buffers_pointers.data()));
}

constexpr void arrow_array_private_data::update_buffers_ptrs()
{
m_buffers_pointers = to_raw_ptr_vec<std::uint8_t>(m_buffers);
}
}
12 changes: 11 additions & 1 deletion include/sparrow/arrow_interface/arrow_array_schema_factory.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ namespace sparrow
}

template <std::ranges::range R>
requires(std::integral<std::ranges::range_value_t<R>>)
requires(std::integral<std::ranges::range_value_t<R>> && !std::same_as<std::ranges::range_value_t<R>, bool>)
buffer<uint8_t> make_bitmap_buffer(size_t count, R&& nulls)
{
if (!std::ranges::empty(nulls))
Expand Down Expand Up @@ -98,6 +98,16 @@ namespace sparrow
return make_arrow_array(length, null_count, offset, std::move(value_buffers), 0, nullptr, nullptr);
}

inline ArrowSchema make_primitive_arrow_schema(
data_type data_type,
std::string_view name,
std::optional<std::string_view> metadata,
std::optional<ArrowFlag> arrow_flag
)
{
return make_arrow_schema(data_type_to_format(data_type), name, metadata, arrow_flag, 0, nullptr, nullptr);
}

template <
std::ranges::sized_range Keys,
std::ranges::sized_range KeyNulls,
Expand Down
Loading

0 comments on commit 93255e1

Please sign in to comment.