From e192d0289ba8539074dd1683bb17429c093cf1e5 Mon Sep 17 00:00:00 2001 From: Rong Ma Date: Fri, 6 Sep 2024 06:51:28 +0000 Subject: [PATCH] remove single-row API --- velox/row/CompactRow.cpp | 4 - velox/row/CompactRow.h | 17 +-- .../UnsafeRowSerializeBenchmark.cpp | 119 ++++++------------ velox/row/tests/CompactRowTest.cpp | 41 +++--- 4 files changed, 59 insertions(+), 122 deletions(-) diff --git a/velox/row/CompactRow.cpp b/velox/row/CompactRow.cpp index 9821f6e1d1a5..06f91d492c2e 100644 --- a/velox/row/CompactRow.cpp +++ b/velox/row/CompactRow.cpp @@ -634,10 +634,6 @@ int32_t CompactRow::serializeMap(vector_size_t index, char* buffer) { return keysSerializedBytes + valuesSerializedBytes; } -int32_t CompactRow::serialize(vector_size_t index, char* buffer) { - return serializeRow(index, buffer); -} - void CompactRow::serialize( vector_size_t offset, vector_size_t size, diff --git a/velox/row/CompactRow.h b/velox/row/CompactRow.h index c8b9ef86980a..29244be4a9f6 100644 --- a/velox/row/CompactRow.h +++ b/velox/row/CompactRow.h @@ -33,17 +33,12 @@ class CompactRow { /// 'fixedRowSize' returned std::nullopt. int32_t rowSize(vector_size_t index); - /// Serializes row at specified index into 'buffer'. - /// 'buffer' must have sufficient capacity and set to all zeros. - int32_t serialize(vector_size_t index, char* buffer); - - /// Serializes rows in range [offset, offset + size) into 'buffer' at given - /// 'bufferOffsets'. 'buffer' must have sufficient capacity and set to all - /// zeros for null-bits handling. 'bufferOffsets' must be pre-filled with - /// the write offsets for each row and must have the same number of elements - /// as the 'size' parameter. The caller must ensure that the space between - /// each offset in 'bufferOffsets' is no less than the 'fixedRowSize' or - /// 'rowSize'. + /// Serializes rows in the range [offset, offset + size) into 'buffer' at + /// given 'bufferOffsets'. 'buffer' must have sufficient capacity and set to + /// all zeros for null-bits handling. 'bufferOffsets' must be pre-filled with + /// the write offsets for each row and must be accessible for 'size' elements. + /// The caller must ensure that the space between each offset in + /// 'bufferOffsets' is no less than the 'fixedRowSize' or 'rowSize'. void serialize( vector_size_t offset, vector_size_t size, diff --git a/velox/row/benchmarks/UnsafeRowSerializeBenchmark.cpp b/velox/row/benchmarks/UnsafeRowSerializeBenchmark.cpp index 0c895fb50a42..a335954eab80 100644 --- a/velox/row/benchmarks/UnsafeRowSerializeBenchmark.cpp +++ b/velox/row/benchmarks/UnsafeRowSerializeBenchmark.cpp @@ -58,18 +58,6 @@ class SerializeBenchmark { auto data = makeData(rowType); suspender.dismiss(); - CompactRow compact(data); - auto totalSize = computeTotalSize(compact, rowType, data->size()); - auto buffer = AlignedBuffer::allocate(totalSize, pool()); - auto serialized = serialize(compact, data->size(), buffer); - VELOX_CHECK_EQ(serialized.size(), data->size()); - } - - void serializeCompactRange(const RowTypePtr& rowType) { - folly::BenchmarkSuspender suspender; - auto data = makeData(rowType); - suspender.dismiss(); - const auto numRows = data->size(); std::vector rowSize(numRows); std::vector offsets(numRows); @@ -85,10 +73,16 @@ class SerializeBenchmark { void deserializeCompact(const RowTypePtr& rowType) { folly::BenchmarkSuspender suspender; auto data = makeData(rowType); + + const auto numRows = data->size(); + std::vector rowSize(numRows); + std::vector offsets(numRows); + CompactRow compact(data); - auto totalSize = computeTotalSize(compact, rowType, data->size()); - auto buffer = AlignedBuffer::allocate(totalSize, pool()); - auto serialized = serialize(compact, data->size(), buffer); + auto totalSize = + computeTotalSize(compact, rowType, numRows, rowSize, offsets); + auto buffer = AlignedBuffer::allocate(totalSize, pool(), 0); + auto serialized = serialize(compact, numRows, buffer, rowSize, offsets); suspender.dismiss(); auto copy = CompactRow::deserialize(serialized, rowType, pool()); @@ -169,38 +163,6 @@ class SerializeBenchmark { return serialized; } - size_t computeTotalSize( - CompactRow& compactRow, - const RowTypePtr& rowType, - vector_size_t numRows) { - size_t totalSize = 0; - if (auto fixedRowSize = CompactRow::fixedRowSize(rowType)) { - totalSize += fixedRowSize.value() * numRows; - } else { - for (auto i = 0; i < numRows; ++i) { - auto rowSize = compactRow.rowSize(i); - totalSize += rowSize; - } - } - return totalSize; - } - - std::vector - serialize(CompactRow& compactRow, vector_size_t numRows, BufferPtr& buffer) { - std::vector serialized; - auto rawBuffer = buffer->asMutable(); - - size_t offset = 0; - for (auto i = 0; i < numRows; ++i) { - auto rowSize = compactRow.serialize(i, rawBuffer + offset); - serialized.push_back(std::string_view(rawBuffer + offset, rowSize)); - offset += rowSize; - } - - VELOX_CHECK_EQ(buffer->size(), offset); - return serialized; - } - size_t computeTotalSize( CompactRow& compactRow, const RowTypePtr& rowType, @@ -262,40 +224,35 @@ class SerializeBenchmark { memory::memoryManager()->addLeafPool()}; }; -#define SERDE_BENCHMARKS(name, rowType) \ - BENCHMARK(unsafe_serialize_##name) { \ - SerializeBenchmark benchmark; \ - benchmark.serializeUnsafe(rowType); \ - } \ - \ - BENCHMARK(compact_serialize_##name) { \ - SerializeBenchmark benchmark; \ - benchmark.serializeCompact(rowType); \ - } \ - \ - BENCHMARK(compact_serialize_range_##name) { \ - SerializeBenchmark benchmark; \ - benchmark.serializeCompactRange(rowType); \ - } \ - \ - BENCHMARK(container_serialize_##name) { \ - SerializeBenchmark benchmark; \ - benchmark.serializeContainer(rowType); \ - } \ - \ - BENCHMARK(unsafe_deserialize_##name) { \ - SerializeBenchmark benchmark; \ - benchmark.deserializeUnsafe(rowType); \ - } \ - \ - BENCHMARK(compact_deserialize_##name) { \ - SerializeBenchmark benchmark; \ - benchmark.deserializeCompact(rowType); \ - } \ - \ - BENCHMARK(container_deserialize_##name) { \ - SerializeBenchmark benchmark; \ - benchmark.deserializeContainer(rowType); \ +#define SERDE_BENCHMARKS(name, rowType) \ + BENCHMARK(unsafe_serialize_##name) { \ + SerializeBenchmark benchmark; \ + benchmark.serializeUnsafe(rowType); \ + } \ + \ + BENCHMARK(compact_serialize_##name) { \ + SerializeBenchmark benchmark; \ + benchmark.serializeCompact(rowType); \ + } \ + \ + BENCHMARK(container_serialize_##name) { \ + SerializeBenchmark benchmark; \ + benchmark.serializeContainer(rowType); \ + } \ + \ + BENCHMARK(unsafe_deserialize_##name) { \ + SerializeBenchmark benchmark; \ + benchmark.deserializeUnsafe(rowType); \ + } \ + \ + BENCHMARK(compact_deserialize_##name) { \ + SerializeBenchmark benchmark; \ + benchmark.deserializeCompact(rowType); \ + } \ + \ + BENCHMARK(container_deserialize_##name) { \ + SerializeBenchmark benchmark; \ + benchmark.deserializeContainer(rowType); \ } SERDE_BENCHMARKS( diff --git a/velox/row/tests/CompactRowTest.cpp b/velox/row/tests/CompactRowTest.cpp index c56fb9bb3c79..2f58a92a2e1b 100644 --- a/velox/row/tests/CompactRowTest.cpp +++ b/velox/row/tests/CompactRowTest.cpp @@ -71,35 +71,24 @@ class CompactRowTest : public ::testing::Test, public VectorTestBase { BufferPtr buffer = AlignedBuffer::allocate(totalSize, pool(), 0); auto* rawBuffer = buffer->asMutable(); - { - size_t offset = 0; - std::vector serialized; - for (auto i = 0; i < numRows; ++i) { - auto size = row.serialize(i, rawBuffer + offset); - serialized.push_back(std::string_view(rawBuffer + offset, size)); - offset += size; - - VELOX_CHECK_EQ( - size, row.rowSize(i), "Row {}: {}", i, data->toString(i)); - } - - VELOX_CHECK_EQ(offset, totalSize); - - auto copy = CompactRow::deserialize(serialized, rowType, pool()); - assertEqualVectors(data, copy); + std::vector serialized; + + vector_size_t offset = 0; + vector_size_t rangeSize = 1; + // Serialize with different range size. + while (offset < numRows) { + auto size = std::min(rangeSize, numRows - offset); + row.serialize(offset, size, rawBuffer, offsets.data() + offset); + offset += size; + rangeSize = checkedMultiply(rangeSize, 2); } - memset(rawBuffer, 0, totalSize); - { - std::vector serialized; - row.serialize(0, numRows, rawBuffer, offsets.data()); - for (auto i = 0; i < numRows; ++i) { - serialized.push_back( - std::string_view(rawBuffer + offsets[i], rowSize[i])); - } - auto copy = CompactRow::deserialize(serialized, rowType, pool()); - assertEqualVectors(data, copy); + for (auto i = 0; i < numRows; ++i) { + serialized.push_back( + std::string_view(rawBuffer + offsets[i], rowSize[i])); } + auto copy = CompactRow::deserialize(serialized, rowType, pool()); + assertEqualVectors(data, copy); } };