diff --git a/Dockerfile b/Dockerfile index c7d73ea..b76cf35 100644 --- a/Dockerfile +++ b/Dockerfile @@ -26,6 +26,8 @@ RUN \ useradd iceberg --uid 1000 --gid 1000 --create-home COPY --from=builder --chown=iceberg:iceberg /app/build/libs/iceberg-rest-image-all.jar /usr/lib/iceberg-rest/iceberg-rest-image-all.jar +RUN mkdir -p /etc/hadoop/conf +COPY static/core-site.xml /etc/hadoop/conf/. ENV CATALOG_CATALOG__IMPL=org.apache.iceberg.jdbc.JdbcCatalog ENV CATALOG_URI=jdbc:sqlite:file:/tmp/iceberg_rest_mode=memory diff --git a/build.gradle b/build.gradle index 8740bae..25cb255 100644 --- a/build.gradle +++ b/build.gradle @@ -21,6 +21,7 @@ dependencies { implementation 'org.apache.hadoop:hadoop-common:3.3.4' implementation 'org.apache.hadoop:hadoop-hdfs-client:3.3.4' + implementation 'org.apache.hadoop:hadoop-aws:3.3.4' runtimeOnly "software.amazon.awssdk:url-connection-client:${awsSdkVersion}" runtimeOnly "software.amazon.awssdk:apache-client:${awsSdkVersion}" diff --git a/src/main/java/org/apache/iceberg/rest/HadoopUtils.java b/src/main/java/org/apache/iceberg/rest/HadoopUtils.java new file mode 100644 index 0000000..f8998d7 --- /dev/null +++ b/src/main/java/org/apache/iceberg/rest/HadoopUtils.java @@ -0,0 +1,90 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.iceberg.rest; + +import java.io.File; + +import org.apache.hadoop.conf.Configuration; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Utility class to work with Apache Hadoop MapRed classes. + */ +public final class HadoopUtils { + private static final Logger LOG = LoggerFactory.getLogger(RESTCatalogServer.class); + + /** + * Returns a new Hadoop Configuration object using the path to the hadoop configuration + * This method is public because its being used in the RESTCatalogServer. + */ + public static org.apache.hadoop.conf.Configuration getCoreSiteConfiguration() { + Configuration retConf = new org.apache.hadoop.conf.Configuration(); + + // We need to load both core-site.xml to determine the default fs path + // Approach environment variables + if (LOG.isDebugEnabled()) { + LOG.debug( + "Building possible paths to core-site.xml for hadoop configuration"); + } + String[] possibleHadoopConfPaths = new String[3]; + possibleHadoopConfPaths[0] = System.getenv("HADOOP_CONF_DIR"); + + if (System.getenv("HADOOP_HOME") != null) { + possibleHadoopConfPaths[1] = System.getenv("HADOOP_HOME") + "/conf"; + possibleHadoopConfPaths[2] = System.getenv("HADOOP_HOME") + "/etc/hadoop"; // hadoop 2.2 + } + + for (String possibleHadoopConfPath : possibleHadoopConfPaths) { + if (possibleHadoopConfPath != null) { + if (LOG.isDebugEnabled()) { + LOG.debug( + "Found possibleHadoopConfPath entry: " + possibleHadoopConfPath); + } + if (new File(possibleHadoopConfPath).exists()) { + if (LOG.isDebugEnabled()) { + LOG.debug( + "possibleHadoopConfPath entry (" + possibleHadoopConfPath + ") exists."); + } + if (new File(possibleHadoopConfPath + "/core-site.xml").exists()) { + if (LOG.isDebugEnabled()) { + LOG.debug( + "Core Site config (" + possibleHadoopConfPath + "/core-site.xml) exists."); + } + retConf.addResource( + new org.apache.hadoop.fs.Path(possibleHadoopConfPath + "/core-site.xml")); + + if (LOG.isDebugEnabled()) { + LOG.debug( + "Adding " + possibleHadoopConfPath + "/core-site.xml to hadoop configuration"); + } + } + } + } + } + return retConf; + } + + /** + * Private constructor to prevent instantiation. + */ + private HadoopUtils() { + throw new RuntimeException(); + } +} \ No newline at end of file diff --git a/src/main/java/org/apache/iceberg/rest/RESTCatalogServer.java b/src/main/java/org/apache/iceberg/rest/RESTCatalogServer.java index 30ec1a7..00173a1 100644 --- a/src/main/java/org/apache/iceberg/rest/RESTCatalogServer.java +++ b/src/main/java/org/apache/iceberg/rest/RESTCatalogServer.java @@ -24,7 +24,6 @@ import java.util.Locale; import java.util.Map; import java.util.stream.Collectors; -import org.apache.hadoop.conf.Configuration; import org.apache.iceberg.CatalogProperties; import org.apache.iceberg.CatalogUtil; import org.apache.iceberg.catalog.Catalog; @@ -80,7 +79,7 @@ private static Catalog backendCatalog() throws IOException { } LOG.info("Creating catalog with properties: {}", catalogProperties); - return CatalogUtil.buildIcebergCatalog("rest_backend", catalogProperties, new Configuration()); + return CatalogUtil.buildIcebergCatalog("rest_backend", catalogProperties, HadoopUtils.getCoreSiteConfiguration()); } public static void main(String[] args) throws Exception { diff --git a/static/core-site.xml b/static/core-site.xml new file mode 100644 index 0000000..e7f1f4d --- /dev/null +++ b/static/core-site.xml @@ -0,0 +1,70 @@ + + + + + + + + + fs.s3a.aws.credentials.provider + + org.apache.hadoop.fs.s3a.TemporaryAWSCredentialsProvider, + org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider, + com.amazonaws.auth.EnvironmentVariableCredentialsProvider, + com.amazonaws.auth.InstanceProfileCredentialsProvider, + com.amazonaws.auth.WebIdentityTokenCredentialsProvider + + + Comma-separated class names of credential provider classes which implement + com.amazonaws.auth.AWSCredentialsProvider. + + When S3A delegation tokens are not enabled, this list will be used + to directly authenticate with S3 and DynamoDB services. + When S3A Delegation tokens are enabled, depending upon the delegation + token binding it may be used to communicate with the STS endpoint to + request session/role credentials. + + These are loaded and queried in sequence for a valid set of credentials. + Each listed class must implement one of the following means of + construction, which are attempted in order: + * a public constructor accepting java.net.URI and + org.apache.hadoop.conf.Configuration, + * a public constructor accepting org.apache.hadoop.conf.Configuration, + * a public static method named getInstance that accepts no + arguments and returns an instance of + com.amazonaws.auth.AWSCredentialsProvider, or + * a public default constructor. + + (DISABLED) Specifying org.apache.hadoop.fs.s3a.AnonymousAWSCredentialsProvider + allows anonymous access to a publicly accessible S3 bucket without any + credentials. Please note that allowing anonymous access to an S3 bucket + compromises security and therefore is unsuitable for most use cases. It can + be useful for accessing public data sets without requiring AWS credentials. + + If unspecified, then the default list of credential provider classes, + queried in sequence, is: + * org.apache.hadoop.fs.s3a.TemporaryAWSCredentialsProvider: looks + for session login secrets in the Hadoop configuration. + * org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider: + Uses the values of fs.s3a.access.key and fs.s3a.secret.key. + * com.amazonaws.auth.EnvironmentVariableCredentialsProvider: supports + configuration of AWS access key ID and secret access key in + environment variables named AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, + and AWS_SESSION_TOKEN as documented in the AWS SDK. + * com.amazonaws.auth.InstanceProfileCredentialsProvider: picks up + IAM credentials of any EC2 VM or AWS container in which the process is running. + + + \ No newline at end of file