Skip to content

Commit

Permalink
remove single-row API
Browse files Browse the repository at this point in the history
  • Loading branch information
marin-ma committed Oct 31, 2024
1 parent e363c51 commit e192d02
Show file tree
Hide file tree
Showing 4 changed files with 59 additions and 122 deletions.
4 changes: 0 additions & 4 deletions velox/row/CompactRow.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -634,10 +634,6 @@ int32_t CompactRow::serializeMap(vector_size_t index, char* buffer) {
return keysSerializedBytes + valuesSerializedBytes;
}

int32_t CompactRow::serialize(vector_size_t index, char* buffer) {
return serializeRow(index, buffer);
}

void CompactRow::serialize(
vector_size_t offset,
vector_size_t size,
Expand Down
17 changes: 6 additions & 11 deletions velox/row/CompactRow.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,17 +33,12 @@ class CompactRow {
/// 'fixedRowSize' returned std::nullopt.
int32_t rowSize(vector_size_t index);

/// Serializes row at specified index into 'buffer'.
/// 'buffer' must have sufficient capacity and set to all zeros.
int32_t serialize(vector_size_t index, char* buffer);

/// Serializes rows in range [offset, offset + size) into 'buffer' at given
/// 'bufferOffsets'. 'buffer' must have sufficient capacity and set to all
/// zeros for null-bits handling. 'bufferOffsets' must be pre-filled with
/// the write offsets for each row and must have the same number of elements
/// as the 'size' parameter. The caller must ensure that the space between
/// each offset in 'bufferOffsets' is no less than the 'fixedRowSize' or
/// 'rowSize'.
/// Serializes rows in the range [offset, offset + size) into 'buffer' at
/// given 'bufferOffsets'. 'buffer' must have sufficient capacity and set to
/// all zeros for null-bits handling. 'bufferOffsets' must be pre-filled with
/// the write offsets for each row and must be accessible for 'size' elements.
/// The caller must ensure that the space between each offset in
/// 'bufferOffsets' is no less than the 'fixedRowSize' or 'rowSize'.
void serialize(
vector_size_t offset,
vector_size_t size,
Expand Down
119 changes: 38 additions & 81 deletions velox/row/benchmarks/UnsafeRowSerializeBenchmark.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -58,18 +58,6 @@ class SerializeBenchmark {
auto data = makeData(rowType);
suspender.dismiss();

CompactRow compact(data);
auto totalSize = computeTotalSize(compact, rowType, data->size());
auto buffer = AlignedBuffer::allocate<char>(totalSize, pool());
auto serialized = serialize(compact, data->size(), buffer);
VELOX_CHECK_EQ(serialized.size(), data->size());
}

void serializeCompactRange(const RowTypePtr& rowType) {
folly::BenchmarkSuspender suspender;
auto data = makeData(rowType);
suspender.dismiss();

const auto numRows = data->size();
std::vector<size_t> rowSize(numRows);
std::vector<size_t> offsets(numRows);
Expand All @@ -85,10 +73,16 @@ class SerializeBenchmark {
void deserializeCompact(const RowTypePtr& rowType) {
folly::BenchmarkSuspender suspender;
auto data = makeData(rowType);

const auto numRows = data->size();
std::vector<size_t> rowSize(numRows);
std::vector<size_t> offsets(numRows);

CompactRow compact(data);
auto totalSize = computeTotalSize(compact, rowType, data->size());
auto buffer = AlignedBuffer::allocate<char>(totalSize, pool());
auto serialized = serialize(compact, data->size(), buffer);
auto totalSize =
computeTotalSize(compact, rowType, numRows, rowSize, offsets);
auto buffer = AlignedBuffer::allocate<char>(totalSize, pool(), 0);
auto serialized = serialize(compact, numRows, buffer, rowSize, offsets);
suspender.dismiss();

auto copy = CompactRow::deserialize(serialized, rowType, pool());
Expand Down Expand Up @@ -169,38 +163,6 @@ class SerializeBenchmark {
return serialized;
}

size_t computeTotalSize(
CompactRow& compactRow,
const RowTypePtr& rowType,
vector_size_t numRows) {
size_t totalSize = 0;
if (auto fixedRowSize = CompactRow::fixedRowSize(rowType)) {
totalSize += fixedRowSize.value() * numRows;
} else {
for (auto i = 0; i < numRows; ++i) {
auto rowSize = compactRow.rowSize(i);
totalSize += rowSize;
}
}
return totalSize;
}

std::vector<std::string_view>
serialize(CompactRow& compactRow, vector_size_t numRows, BufferPtr& buffer) {
std::vector<std::string_view> serialized;
auto rawBuffer = buffer->asMutable<char>();

size_t offset = 0;
for (auto i = 0; i < numRows; ++i) {
auto rowSize = compactRow.serialize(i, rawBuffer + offset);
serialized.push_back(std::string_view(rawBuffer + offset, rowSize));
offset += rowSize;
}

VELOX_CHECK_EQ(buffer->size(), offset);
return serialized;
}

size_t computeTotalSize(
CompactRow& compactRow,
const RowTypePtr& rowType,
Expand Down Expand Up @@ -262,40 +224,35 @@ class SerializeBenchmark {
memory::memoryManager()->addLeafPool()};
};

#define SERDE_BENCHMARKS(name, rowType) \
BENCHMARK(unsafe_serialize_##name) { \
SerializeBenchmark benchmark; \
benchmark.serializeUnsafe(rowType); \
} \
\
BENCHMARK(compact_serialize_##name) { \
SerializeBenchmark benchmark; \
benchmark.serializeCompact(rowType); \
} \
\
BENCHMARK(compact_serialize_range_##name) { \
SerializeBenchmark benchmark; \
benchmark.serializeCompactRange(rowType); \
} \
\
BENCHMARK(container_serialize_##name) { \
SerializeBenchmark benchmark; \
benchmark.serializeContainer(rowType); \
} \
\
BENCHMARK(unsafe_deserialize_##name) { \
SerializeBenchmark benchmark; \
benchmark.deserializeUnsafe(rowType); \
} \
\
BENCHMARK(compact_deserialize_##name) { \
SerializeBenchmark benchmark; \
benchmark.deserializeCompact(rowType); \
} \
\
BENCHMARK(container_deserialize_##name) { \
SerializeBenchmark benchmark; \
benchmark.deserializeContainer(rowType); \
#define SERDE_BENCHMARKS(name, rowType) \
BENCHMARK(unsafe_serialize_##name) { \
SerializeBenchmark benchmark; \
benchmark.serializeUnsafe(rowType); \
} \
\
BENCHMARK(compact_serialize_##name) { \
SerializeBenchmark benchmark; \
benchmark.serializeCompact(rowType); \
} \
\
BENCHMARK(container_serialize_##name) { \
SerializeBenchmark benchmark; \
benchmark.serializeContainer(rowType); \
} \
\
BENCHMARK(unsafe_deserialize_##name) { \
SerializeBenchmark benchmark; \
benchmark.deserializeUnsafe(rowType); \
} \
\
BENCHMARK(compact_deserialize_##name) { \
SerializeBenchmark benchmark; \
benchmark.deserializeCompact(rowType); \
} \
\
BENCHMARK(container_deserialize_##name) { \
SerializeBenchmark benchmark; \
benchmark.deserializeContainer(rowType); \
}

SERDE_BENCHMARKS(
Expand Down
41 changes: 15 additions & 26 deletions velox/row/tests/CompactRowTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -71,35 +71,24 @@ class CompactRowTest : public ::testing::Test, public VectorTestBase {

BufferPtr buffer = AlignedBuffer::allocate<char>(totalSize, pool(), 0);
auto* rawBuffer = buffer->asMutable<char>();
{
size_t offset = 0;
std::vector<std::string_view> serialized;
for (auto i = 0; i < numRows; ++i) {
auto size = row.serialize(i, rawBuffer + offset);
serialized.push_back(std::string_view(rawBuffer + offset, size));
offset += size;

VELOX_CHECK_EQ(
size, row.rowSize(i), "Row {}: {}", i, data->toString(i));
}

VELOX_CHECK_EQ(offset, totalSize);

auto copy = CompactRow::deserialize(serialized, rowType, pool());
assertEqualVectors(data, copy);
std::vector<std::string_view> serialized;

vector_size_t offset = 0;
vector_size_t rangeSize = 1;
// Serialize with different range size.
while (offset < numRows) {
auto size = std::min<vector_size_t>(rangeSize, numRows - offset);
row.serialize(offset, size, rawBuffer, offsets.data() + offset);
offset += size;
rangeSize = checkedMultiply<vector_size_t>(rangeSize, 2);
}

memset(rawBuffer, 0, totalSize);
{
std::vector<std::string_view> serialized;
row.serialize(0, numRows, rawBuffer, offsets.data());
for (auto i = 0; i < numRows; ++i) {
serialized.push_back(
std::string_view(rawBuffer + offsets[i], rowSize[i]));
}
auto copy = CompactRow::deserialize(serialized, rowType, pool());
assertEqualVectors(data, copy);
for (auto i = 0; i < numRows; ++i) {
serialized.push_back(
std::string_view(rawBuffer + offsets[i], rowSize[i]));
}
auto copy = CompactRow::deserialize(serialized, rowType, pool());
assertEqualVectors(data, copy);
}
};

Expand Down

0 comments on commit e192d02

Please sign in to comment.