Skip to content

Commit

Permalink
support hdfs3
Browse files Browse the repository at this point in the history
  • Loading branch information
JkSelf committed Oct 25, 2024
1 parent d018bd1 commit c14a5d7
Show file tree
Hide file tree
Showing 5 changed files with 33 additions and 9 deletions.
7 changes: 7 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@ option(VELOX_ENABLE_S3 "Build S3 Connector" OFF)
option(VELOX_ENABLE_GCS "Build GCS Connector" OFF)
option(VELOX_ENABLE_ABFS "Build Abfs Connector" OFF)
option(VELOX_ENABLE_HDFS "Build Hdfs Connector" OFF)
option(VELOX_ENABLE_HDFS3 "Build Hdfs Connector" OFF)
option(VELOX_ENABLE_PARQUET "Enable Parquet support" OFF)
option(VELOX_ENABLE_ARROW "Enable Arrow support" OFF)
option(VELOX_ENABLE_REMOTE_FUNCTIONS "Enable remote function support" OFF)
Expand Down Expand Up @@ -264,6 +265,12 @@ if(VELOX_ENABLE_HDFS)
set(VELOX_ENABLE_ARROW ON)
endif()

if(VELOX_ENABLE_HDFS3)
add_definitions(-DVELOX_ENABLE_HDFS3)
# Libhdfs3 requires arrow dependency.
set(VELOX_ENABLE_ARROW ON)
endif()

if(VELOX_ENABLE_PARQUET)
add_definitions(-DVELOX_ENABLE_PARQUET)
# Native Parquet reader requires Apache Thrift and Arrow Parquet writer, which
Expand Down
2 changes: 1 addition & 1 deletion velox/connectors/hive/storage_adapters/hdfs/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

velox_add_library(velox_hdfs RegisterHdfsFileSystem.cpp)

if(VELOX_ENABLE_HDFS)
if(DEFINED VELOX_ENABLE_HDFS OR DEFINED VELOX_ENABLE_HDFS3)
velox_sources(
velox_hdfs
PRIVATE
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
* limitations under the License.
*/

#ifdef VELOX_ENABLE_HDFS
#if defined(VELOX_ENABLE_HDFS3) || defined(VELOX_ENABLE_HDFS)
#include "folly/concurrency/ConcurrentHashMap.h"

#include "velox/common/config/Config.h"
Expand All @@ -25,7 +25,7 @@

namespace facebook::velox::filesystems {

#ifdef VELOX_ENABLE_HDFS
#if defined(VELOX_ENABLE_HDFS3) || defined(VELOX_ENABLE_HDFS)
std::mutex mtx;

std::function<std::shared_ptr<
Expand Down Expand Up @@ -96,7 +96,7 @@ hdfsWriteFileSinkGenerator() {
#endif

void registerHdfsFileSystem() {
#ifdef VELOX_ENABLE_HDFS
#if defined(VELOX_ENABLE_HDFS3) || defined(VELOX_ENABLE_HDFS)
registerFileSystem(HdfsFileSystem::isHdfsFile, hdfsFileSystemGenerator());
dwio::common::FileSink::registerFactory(hdfsWriteFileSinkGenerator());
#endif
Expand Down
25 changes: 21 additions & 4 deletions velox/external/hdfs/ArrowHdfsInternal.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,10 @@ ::arrow::Result<std::vector<PlatformFilename>> get_potential_libhdfs_paths() {
std::vector<PlatformFilename> potential_paths;
std::string file_name;

// Common paths
ARROW_ASSIGN_OR_RAISE(auto search_paths, MakeFilenameVector({"", "."}));

#ifdef VELOX_ENABLE_HDFS
// OS-specific file name
#ifdef _WIN32
file_name = "hdfs.dll";
Expand All @@ -161,12 +165,21 @@ ::arrow::Result<std::vector<PlatformFilename>> get_potential_libhdfs_paths() {
file_name = "libhdfs.so";
#endif

// Common paths
ARROW_ASSIGN_OR_RAISE(auto search_paths, MakeFilenameVector({"", "."}));

// Path from environment variable
AppendEnvVarFilename("HADOOP_HOME", "lib/native", &search_paths);
AppendEnvVarFilename("ARROW_LIBHDFS_DIR", &search_paths);
#endif

#ifdef VELOX_ENABLE_HDFS3
// OS-specific file name
#ifdef __APPLE__
file_name = "libhdfs3.dylib";
#else
file_name = "libhdfs3.so";
#endif
// Path from environment variable
AppendEnvVarFilename("HDFS3_HOME", &search_paths);
#endif

// All paths with file name
for (const auto& path : search_paths) {
Expand Down Expand Up @@ -362,8 +375,12 @@ ::arrow::Status ConnectLibHdfs(LibHdfsShim** driver) {

ARROW_ASSIGN_OR_RAISE(
auto libhdfs_potential_paths, get_potential_libhdfs_paths());
auto fileName = "libhdfs";
#ifdef VELOX_ENABLE_HDFS3

#endif
ARROW_ASSIGN_OR_RAISE(
shim->handle, try_dlopen(libhdfs_potential_paths, "libhdfs"));
shim->handle, try_dlopen(libhdfs_potential_paths, fileName));
} else if (shim->handle == nullptr) {
return ::arrow::Status::IOError("Prior attempt to load libhdfs failed");
}
Expand Down
2 changes: 1 addition & 1 deletion velox/external/hdfs/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.

if(${VELOX_ENABLE_HDFS})
if(${VELOX_ENABLE_HDFS} OR ${VELOX_ENABLE_HDFS3})
velox_add_library(velox_external_hdfs ArrowHdfsInternal.cpp)
velox_link_libraries(
velox_external_hdfs
Expand Down

0 comments on commit c14a5d7

Please sign in to comment.