From 4919cbe502787ba65986c2ac2fd3be40bb9101c5 Mon Sep 17 00:00:00 2001 From: chenxu14 Date: Thu, 23 Nov 2023 13:56:00 +0800 Subject: [PATCH] Fix ORC related failed UT (#417) --- velox/connectors/hive/HiveConnector.cpp | 1 + velox/dwio/dwrf/common/FileMetadata.h | 4 ++-- velox/dwio/dwrf/reader/DwrfReader.cpp | 8 ++++++++ velox/dwio/dwrf/reader/DwrfReader.h | 15 +++++++++++++++ .../dwrf/reader/SelectiveDecimalColumnReader.h | 4 ++++ .../reader/SelectiveIntegerDirectColumnReader.h | 6 +++++- .../SelectiveStringDictionaryColumnReader.h | 8 ++++++++ 7 files changed, 43 insertions(+), 3 deletions(-) diff --git a/velox/connectors/hive/HiveConnector.cpp b/velox/connectors/hive/HiveConnector.cpp index 7af4e030f89f..fc6ad707980d 100644 --- a/velox/connectors/hive/HiveConnector.cpp +++ b/velox/connectors/hive/HiveConnector.cpp @@ -143,6 +143,7 @@ std::unique_ptr HivePartitionFunctionSpec::create( void HiveConnectorFactory::initialize() { static bool once = []() { dwio::common::registerFileSinks(); + dwrf::registerOrcReaderFactory(); dwrf::registerDwrfReaderFactory(); dwrf::registerDwrfWriterFactory(); // Meta's buck build system needs this check. diff --git a/velox/dwio/dwrf/common/FileMetadata.h b/velox/dwio/dwrf/common/FileMetadata.h index 2ea21628a595..973ec312ecd6 100644 --- a/velox/dwio/dwrf/common/FileMetadata.h +++ b/velox/dwio/dwrf/common/FileMetadata.h @@ -426,7 +426,8 @@ class FooterWrapper : public ProtoWrapperBase { // TODO: ORC has not supported column statistics yet int statisticsSize() const { - return format_ == DwrfFormat::kDwrf ? dwrfPtr()->statistics_size() : 0; + return format_ == DwrfFormat::kDwrf ? dwrfPtr()->statistics_size() + : orcPtr()->statistics_size(); } const ::google::protobuf::RepeatedPtrField< @@ -438,7 +439,6 @@ class FooterWrapper : public ProtoWrapperBase { const ::facebook::velox::dwrf::proto::ColumnStatistics& statistics( int index) const { - VELOX_CHECK_EQ(format_, DwrfFormat::kDwrf); return dwrfPtr()->statistics(index); } diff --git a/velox/dwio/dwrf/reader/DwrfReader.cpp b/velox/dwio/dwrf/reader/DwrfReader.cpp index 153cd0d467a3..567f6bfd878a 100644 --- a/velox/dwio/dwrf/reader/DwrfReader.cpp +++ b/velox/dwio/dwrf/reader/DwrfReader.cpp @@ -1113,4 +1113,12 @@ void unregisterDwrfReaderFactory() { dwio::common::unregisterReaderFactory(dwio::common::FileFormat::DWRF); } +void registerOrcReaderFactory() { + dwio::common::registerReaderFactory(std::make_shared()); +} + +void unregisterOrcReaderFactory() { + dwio::common::unregisterReaderFactory(dwio::common::FileFormat::ORC); +} + } // namespace facebook::velox::dwrf diff --git a/velox/dwio/dwrf/reader/DwrfReader.h b/velox/dwio/dwrf/reader/DwrfReader.h index 79742d447b76..d88325af8d47 100644 --- a/velox/dwio/dwrf/reader/DwrfReader.h +++ b/velox/dwio/dwrf/reader/DwrfReader.h @@ -367,8 +367,23 @@ class DwrfReaderFactory : public dwio::common::ReaderFactory { } }; +class OrcReaderFactory : public dwio::common::ReaderFactory { + public: + OrcReaderFactory() : ReaderFactory(dwio::common::FileFormat::ORC) {} + + std::unique_ptr createReader( + std::unique_ptr input, + const dwio::common::ReaderOptions& options) override { + return DwrfReader::create(std::move(input), options); + } +}; + void registerDwrfReaderFactory(); void unregisterDwrfReaderFactory(); +void registerOrcReaderFactory(); + +void unregisterOrcReaderFactory(); + } // namespace facebook::velox::dwrf diff --git a/velox/dwio/dwrf/reader/SelectiveDecimalColumnReader.h b/velox/dwio/dwrf/reader/SelectiveDecimalColumnReader.h index cf0d328d4721..95ed9054a023 100644 --- a/velox/dwio/dwrf/reader/SelectiveDecimalColumnReader.h +++ b/velox/dwio/dwrf/reader/SelectiveDecimalColumnReader.h @@ -40,6 +40,10 @@ class SelectiveDecimalColumnReader : public SelectiveColumnReader { void getValues(RowSet rows, VectorPtr* result) override; + bool hasBulkPath() const override { + return false; + } + private: template void readHelper(RowSet rows); diff --git a/velox/dwio/dwrf/reader/SelectiveIntegerDirectColumnReader.h b/velox/dwio/dwrf/reader/SelectiveIntegerDirectColumnReader.h index 92b3aa750386..8c14cc963d82 100644 --- a/velox/dwio/dwrf/reader/SelectiveIntegerDirectColumnReader.h +++ b/velox/dwio/dwrf/reader/SelectiveIntegerDirectColumnReader.h @@ -63,7 +63,11 @@ class SelectiveIntegerDirectColumnReader } bool hasBulkPath() const override { - return true; + if (format == velox::dwrf::DwrfFormat::kOrc) { + return false; // RLEv2 does't support FastPath yet + } else { + return true; + } } void seekToRowGroup(uint32_t index) override { diff --git a/velox/dwio/dwrf/reader/SelectiveStringDictionaryColumnReader.h b/velox/dwio/dwrf/reader/SelectiveStringDictionaryColumnReader.h index 88ff95ed7aed..937d31c9e0ab 100644 --- a/velox/dwio/dwrf/reader/SelectiveStringDictionaryColumnReader.h +++ b/velox/dwio/dwrf/reader/SelectiveStringDictionaryColumnReader.h @@ -53,6 +53,14 @@ class SelectiveStringDictionaryColumnReader uint64_t skip(uint64_t numValues) override; + bool hasBulkPath() const override { + if (version_ == velox::dwrf::RleVersion_1) { + return true; + } else { + return false; // RLEv2 does't support FastPath yet + } + } + void read(vector_size_t offset, RowSet rows, const uint64_t* incomingNulls) override;