From 88e8140ad7902435b5d1ac29205dda7517f2cc79 Mon Sep 17 00:00:00 2001 From: Oliver Layer Date: Wed, 14 Aug 2024 02:16:54 +0200 Subject: [PATCH] GH-43097: [C++] Implement `PathFromUri` support for Azure file system (#43098) ### Rationale for this change See #43097. ### What changes are included in this PR? Implements `AzureFS::PathFromUri` using existing URI parsing and path extraction inside the `AzureOptions`. ### Are these changes tested? Yes, added a unit test. ### Are there any user-facing changes? No, but calling `PathFromUri` will now work instead of throwing due to no implementation provided. * GitHub Issue: #43097 Authored-by: Oliver Layer Signed-off-by: Sutou Kouhei --- cpp/src/arrow/filesystem/azurefs.cc | 27 ++++++++++++++++++++++++ cpp/src/arrow/filesystem/azurefs.h | 2 ++ cpp/src/arrow/filesystem/azurefs_test.cc | 9 ++++++++ 3 files changed, 38 insertions(+) diff --git a/cpp/src/arrow/filesystem/azurefs.cc b/cpp/src/arrow/filesystem/azurefs.cc index a3aa2c8e837d9..9b3c0c0c1d703 100644 --- a/cpp/src/arrow/filesystem/azurefs.cc +++ b/cpp/src/arrow/filesystem/azurefs.cc @@ -3199,4 +3199,31 @@ Result> AzureFileSystem::OpenAppendStream( return impl_->OpenAppendStream(location, metadata, false, this); } +Result AzureFileSystem::PathFromUri(const std::string& uri_string) const { + /// We can not use `internal::PathFromUriHelper` here because for Azure we have to + /// support different URI schemes where the authority is handled differently. + /// Example (both should yield the same path `container/some/path`): + /// - (1) abfss://storageacc.blob.core.windows.net/container/some/path + /// - (2) abfss://acc:pw@container/some/path + /// The authority handling is different with these two URIs. (1) requires no prepending + /// of the authority to the path, while (2) requires to preprend the authority to the + /// path. + std::string path; + Uri uri; + RETURN_NOT_OK(uri.Parse(uri_string)); + RETURN_NOT_OK(AzureOptions::FromUri(uri, &path)); + + std::vector supported_schemes = {"abfs", "abfss"}; + const auto scheme = uri.scheme(); + if (std::find(supported_schemes.begin(), supported_schemes.end(), scheme) == + supported_schemes.end()) { + std::string expected_schemes = + ::arrow::internal::JoinStrings(supported_schemes, ", "); + return Status::Invalid("The filesystem expected a URI with one of the schemes (", + expected_schemes, ") but received ", uri_string); + } + + return path; +} + } // namespace arrow::fs diff --git a/cpp/src/arrow/filesystem/azurefs.h b/cpp/src/arrow/filesystem/azurefs.h index 93d6ec2f945b4..072b061eeb2a9 100644 --- a/cpp/src/arrow/filesystem/azurefs.h +++ b/cpp/src/arrow/filesystem/azurefs.h @@ -367,6 +367,8 @@ class ARROW_EXPORT AzureFileSystem : public FileSystem { Result> OpenAppendStream( const std::string& path, const std::shared_ptr& metadata) override; + + Result PathFromUri(const std::string& uri_string) const override; }; } // namespace arrow::fs diff --git a/cpp/src/arrow/filesystem/azurefs_test.cc b/cpp/src/arrow/filesystem/azurefs_test.cc index 9a11a6f24995a..36646f417cbe1 100644 --- a/cpp/src/arrow/filesystem/azurefs_test.cc +++ b/cpp/src/arrow/filesystem/azurefs_test.cc @@ -2958,5 +2958,14 @@ TEST_F(TestAzuriteFileSystem, OpenInputFileClosed) { ASSERT_RAISES(Invalid, stream->ReadAt(1, 1)); ASSERT_RAISES(Invalid, stream->Seek(2)); } + +TEST_F(TestAzuriteFileSystem, PathFromUri) { + ASSERT_EQ( + "container/some/path", + fs()->PathFromUri("abfss://storageacc.blob.core.windows.net/container/some/path")); + ASSERT_EQ("container/some/path", + fs()->PathFromUri("abfss://acc:pw@container/some/path")); + ASSERT_RAISES(Invalid, fs()->PathFromUri("http://acc:pw@container/some/path")); +} } // namespace fs } // namespace arrow