Skip to content

Commit

Permalink
Add support for AWS S3 through s3a:// as a filesystem to store Iceber…
Browse files Browse the repository at this point in the history
…g data and metadata.
  • Loading branch information
Ralnoc committed May 26, 2023
1 parent d3930a2 commit 6c19940
Show file tree
Hide file tree
Showing 4 changed files with 74 additions and 2 deletions.
2 changes: 2 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ RUN \
useradd iceberg --uid 1000 --gid 1000 --create-home

COPY --from=builder --chown=iceberg:iceberg /app/build/libs/iceberg-rest-image-all.jar /usr/lib/iceberg-rest/iceberg-rest-image-all.jar
RUN mkdir -p /etc/hadoop/conf
COPY static/core-site.xml /etc/hadoop/conf/.

ENV CATALOG_CATALOG__IMPL=org.apache.iceberg.jdbc.JdbcCatalog
ENV CATALOG_URI=jdbc:sqlite:file:/tmp/iceberg_rest_mode=memory
Expand Down
1 change: 1 addition & 0 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ dependencies {

implementation 'org.apache.hadoop:hadoop-common:3.3.4'
implementation 'org.apache.hadoop:hadoop-hdfs-client:3.3.4'
implementation 'org.apache.hadoop:hadoop-aws:3.3.4'

runtimeOnly "software.amazon.awssdk:url-connection-client:${awsSdkVersion}"
runtimeOnly "software.amazon.awssdk:apache-client:${awsSdkVersion}"
Expand Down
3 changes: 1 addition & 2 deletions src/main/java/org/apache/iceberg/rest/RESTCatalogServer.java
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@
import java.util.Locale;
import java.util.Map;
import java.util.stream.Collectors;
import org.apache.hadoop.conf.Configuration;
import org.apache.iceberg.CatalogProperties;
import org.apache.iceberg.CatalogUtil;
import org.apache.iceberg.catalog.Catalog;
Expand Down Expand Up @@ -80,7 +79,7 @@ private static Catalog backendCatalog() throws IOException {
}

LOG.info("Creating catalog with properties: {}", catalogProperties);
return CatalogUtil.buildIcebergCatalog("rest_backend", catalogProperties, new Configuration());
return CatalogUtil.buildIcebergCatalog("rest_backend", catalogProperties, HadoopUtils.getCoreSiteConfiguration());
}

public static void main(String[] args) throws Exception {
Expand Down
70 changes: 70 additions & 0 deletions static/core-site.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->

<!-- Put site-specific property overrides in this file. -->

<configuration>
<property>
<name>fs.s3a.aws.credentials.provider</name>
<value>
org.apache.hadoop.fs.s3a.TemporaryAWSCredentialsProvider,
org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider,
com.amazonaws.auth.EnvironmentVariableCredentialsProvider,
com.amazonaws.auth.InstanceProfileCredentialsProvider,
com.amazonaws.auth.WebIdentityTokenCredentialsProvider
</value>
<description>
Comma-separated class names of credential provider classes which implement
com.amazonaws.auth.AWSCredentialsProvider.

When S3A delegation tokens are not enabled, this list will be used
to directly authenticate with S3 and DynamoDB services.
When S3A Delegation tokens are enabled, depending upon the delegation
token binding it may be used to communicate with the STS endpoint to
request session/role credentials.

These are loaded and queried in sequence for a valid set of credentials.
Each listed class must implement one of the following means of
construction, which are attempted in order:
* a public constructor accepting java.net.URI and
org.apache.hadoop.conf.Configuration,
* a public constructor accepting org.apache.hadoop.conf.Configuration,
* a public static method named getInstance that accepts no
arguments and returns an instance of
com.amazonaws.auth.AWSCredentialsProvider, or
* a public default constructor.

(DISABLED) Specifying org.apache.hadoop.fs.s3a.AnonymousAWSCredentialsProvider
allows anonymous access to a publicly accessible S3 bucket without any
credentials. Please note that allowing anonymous access to an S3 bucket
compromises security and therefore is unsuitable for most use cases. It can
be useful for accessing public data sets without requiring AWS credentials.

If unspecified, then the default list of credential provider classes,
queried in sequence, is:
* org.apache.hadoop.fs.s3a.TemporaryAWSCredentialsProvider: looks
for session login secrets in the Hadoop configuration.
* org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider:
Uses the values of fs.s3a.access.key and fs.s3a.secret.key.
* com.amazonaws.auth.EnvironmentVariableCredentialsProvider: supports
configuration of AWS access key ID and secret access key in
environment variables named AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY,
and AWS_SESSION_TOKEN as documented in the AWS SDK.
* com.amazonaws.auth.InstanceProfileCredentialsProvider: picks up
IAM credentials of any EC2 VM or AWS container in which the process is running.
</description>
</property>
</configuration>

0 comments on commit 6c19940

Please sign in to comment.