From ab380160eaa1f70b8b9f44b4954a04b7d985b5f8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dominik=20T=C3=A1skai?= Date: Thu, 11 Jan 2024 10:43:21 +0100 Subject: [PATCH 1/3] Add missing configuration options and table allowlisting to template. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Dominik Táskai --- athena-dynamodb/athena-dynamodb.yaml | 30 +++++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/athena-dynamodb/athena-dynamodb.yaml b/athena-dynamodb/athena-dynamodb.yaml index ae3e023f58..d03c988e07 100644 --- a/athena-dynamodb/athena-dynamodb.yaml +++ b/athena-dynamodb/athena-dynamodb.yaml @@ -10,7 +10,7 @@ Metadata: Labels: - athena-federation HomePageUrl: 'https://github.com/awslabs/aws-athena-query-federation' - SemanticVersion: 2022.47.1 + SemanticVersion: 1.0.0 SourceCodeUrl: 'https://github.com/awslabs/aws-athena-query-federation' Parameters: AthenaCatalogName: @@ -24,6 +24,28 @@ Parameters: Description: 'The prefix within SpillBucket where this function can spill data.' Type: String Default: athena-spill + SpillPutRequestHeaders: + Description: '(Optional) A JSON encoded map of request headers and values for the Amazon S3 putObject request that is used for spilling. (for example, {"x-amz-server-side-encryption" : "aws:kms"}).' + Type: String + DisableGlue: + Description: "(Optional) If present and set to 'true', the connector does not attempt to retrieve supplemental metadata from AWS Glue." + Default: 'false' + Type: String + AllowedValues: + - true + - false + DisableProjectionAndCasing: + Description: "(Optional) Disables projection and casing. Use if you want to query DynamoDB tables that have casing in their column names and you do not want to specify a columnMapping property on your AWS Glue table. Can be set to 'auto' or 'always', set to 'auto' by default. See documentation for more information." + Default: 'auto' + Type: String + AllowedValues: + - auto + - always + AllowedTables: + Description: "(Optional) List of ';' delimited table names that should be fetched from DynamoDB, no other tables will be scanned if supplied." + Type: String + AllowedPattern: ^(?!.*;$)[^;]*(;[^;]*)*$ + Default: "" LambdaTimeout: Description: 'Maximum Lambda invocation runtime in seconds. (min 1 - 900 max)' Default: 900 @@ -51,6 +73,8 @@ Parameters: Conditions: HasKMSKeyId: !Not [!Equals [!Ref KMSKeyId, ""]] + HasAllowedTables: !Not [!Equals [!Ref AllowedTables, ""]] + HasSpillPutRequestHeaders: !Not [!Equals [!Ref SpillPutRequestHeaders, ""]] NotHasLambdaRole: !Equals [!Ref LambdaRole, ""] HasPermissionsBoundary: !Not [ !Equals [ !Ref PermissionsBoundaryARN, "" ] ] CreateKMSPolicy: !And [!Condition HasKMSKeyId, !Condition NotHasLambdaRole] @@ -62,6 +86,10 @@ Resources: Environment: Variables: disable_spill_encryption: !Ref DisableSpillEncryption + disable_projection_and_casing: !Ref DisableProjectionAndCasing + disable_glue: !Ref DisableGlue + spill_put_request_headers: !If [HasSpillPutRequestHeaders, !Ref SpillPutRequestHeaders, !Ref "AWS::NoValue"] + allowed_tables: !If [HasAllowedTables, !Ref AllowedTables, !Ref "AWS::NoValue"] spill_bucket: !Ref SpillBucket spill_prefix: !Ref SpillPrefix kms_key_id: !If [HasKMSKeyId, !Ref KMSKeyId, !Ref "AWS::NoValue"] From dff70b3aee0c0d36aef0f75101d7f33c27f729a3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dominik=20T=C3=A1skai?= Date: Thu, 11 Jan 2024 10:43:33 +0100 Subject: [PATCH 2/3] Add functionality to allowlist specific DynamoDB tables, instead of showing all. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Dominik Táskai --- .../dynamodb/DynamoDBMetadataHandler.java | 18 +++++++++++---- .../resolver/DynamoDBTableResolver.java | 23 ++++++++++++++++--- 2 files changed, 34 insertions(+), 7 deletions(-) diff --git a/athena-dynamodb/src/main/java/com/amazonaws/athena/connectors/dynamodb/DynamoDBMetadataHandler.java b/athena-dynamodb/src/main/java/com/amazonaws/athena/connectors/dynamodb/DynamoDBMetadataHandler.java index 4036d4176c..5177d5d757 100644 --- a/athena-dynamodb/src/main/java/com/amazonaws/athena/connectors/dynamodb/DynamoDBMetadataHandler.java +++ b/athena-dynamodb/src/main/java/com/amazonaws/athena/connectors/dynamodb/DynamoDBMetadataHandler.java @@ -77,6 +77,7 @@ import software.amazon.awssdk.services.dynamodb.model.ExecuteStatementResponse; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; @@ -126,6 +127,7 @@ public class DynamoDBMetadataHandler @VisibleForTesting static final int MAX_SPLITS_PER_REQUEST = 1000; private static final Logger logger = LoggerFactory.getLogger(DynamoDBMetadataHandler.class); + private static final String ALLOWED_TABLES_ENV = "allowed_tables"; static final String DYNAMODB = "dynamodb"; private static final String SOURCE_TYPE = "ddb"; // defines the value that should be present in the Glue Database URI to enable the DB for DynamoDB. @@ -147,12 +149,19 @@ public class DynamoDBMetadataHandler public DynamoDBMetadataHandler(java.util.Map configOptions) { super(SOURCE_TYPE, configOptions); - this.ddbClient = DynamoDbClient.builder() + this.ddbClient = DynamoDbClient.builder() .credentialsProvider(CrossAccountCredentialsProviderV2.getCrossAccountCredentialsIfPresent(configOptions, "DynamoDBMetadataHandler_CrossAccountRoleSession")) .build(); this.glueClient = getAwsGlue(); this.invoker = ThrottlingInvoker.newDefaultBuilder(EXCEPTION_FILTER, configOptions).build(); - this.tableResolver = new DynamoDBTableResolver(invoker, ddbClient); + + String allowedTablesEnvStr = configOptions.getOrDefault(ALLOWED_TABLES_ENV, ""); + List allowedTables = new ArrayList<>(); + if (!allowedTablesEnvStr.isEmpty()) { + allowedTables = Arrays.asList(allowedTablesEnvStr.split(";", -1)); + } + + this.tableResolver = new DynamoDBTableResolver(invoker, ddbClient, allowedTables); this.queryPassthrough = new DDBQueryPassthrough(); } @@ -171,7 +180,7 @@ public DynamoDBMetadataHandler(java.util.Map configOptions) this.glueClient = glueClient; this.ddbClient = ddbClient; this.invoker = ThrottlingInvoker.newDefaultBuilder(EXCEPTION_FILTER, configOptions).build(); - this.tableResolver = new DynamoDBTableResolver(invoker, ddbClient); + this.tableResolver = new DynamoDBTableResolver(invoker, ddbClient, new ArrayList<>()); this.queryPassthrough = new DDBQueryPassthrough(); } @@ -217,6 +226,7 @@ public ListSchemasResponse doListSchemaNames(BlockAllocator allocator, ListSchem *

* If the specified schema is "default", this also returns an intersection with actual tables in DynamoDB. * Pagination only implemented for DynamoDBTableResolver.listTables() + * * @see GlueMetadataHandler */ @Override @@ -230,7 +240,7 @@ public ListTablesResponse doListTables(BlockAllocator allocator, ListTablesReque try { // does not validate that the tables are actually DDB tables combinedTables.addAll(super.doListTables(allocator, new ListTablesRequest(request.getIdentity(), request.getQueryId(), request.getCatalogName(), - request.getSchemaName(), null, UNLIMITED_PAGE_SIZE_VALUE), TABLE_FILTER).getTables()); + request.getSchemaName(), null, UNLIMITED_PAGE_SIZE_VALUE), TABLE_FILTER).getTables()); } catch (RuntimeException e) { logger.warn("doListTables: Unable to retrieve tables from AWSGlue in database/schema {}", request.getSchemaName(), e); diff --git a/athena-dynamodb/src/main/java/com/amazonaws/athena/connectors/dynamodb/resolver/DynamoDBTableResolver.java b/athena-dynamodb/src/main/java/com/amazonaws/athena/connectors/dynamodb/resolver/DynamoDBTableResolver.java index a3d0b73e3e..6d8dcc9edf 100644 --- a/athena-dynamodb/src/main/java/com/amazonaws/athena/connectors/dynamodb/resolver/DynamoDBTableResolver.java +++ b/athena-dynamodb/src/main/java/com/amazonaws/athena/connectors/dynamodb/resolver/DynamoDBTableResolver.java @@ -48,7 +48,7 @@ * which may have captial letters in them without issue. It does so by fetching all table names and doing * a case insensitive search over them. It will first try to do a targeted get to reduce the penalty for * tables which don't have capitalization. - * + *

* TODO add caching */ public class DynamoDBTableResolver @@ -58,11 +58,13 @@ public class DynamoDBTableResolver private DynamoDbClient ddbClient; // used to handle Throttling events using an AIMD strategy for congestion control. private ThrottlingInvoker invoker; + private List allowedTables; - public DynamoDBTableResolver(ThrottlingInvoker invoker, DynamoDbClient ddbClient) + public DynamoDBTableResolver(ThrottlingInvoker invoker, DynamoDbClient ddbClient, List allowedTables) { this.invoker = invoker; this.ddbClient = ddbClient; + this.allowedTables = allowedTables; } /** @@ -90,7 +92,22 @@ private DynamoDBPaginatedTables listPaginatedTables(String token, int pageSize) .limit(limit) .build(); ListTablesResponse response = invoker.invoke(() -> ddbClient.listTables(ddbRequest)); - tables.addAll(response.tableNames()); + List responseTableNames = response.tableNames(); + + if (!allowedTables.isEmpty()) { + responseTableNames.forEach(tableName -> { + if (allowedTables.contains(tableName)) { + tables.add(tableName); + } + else { + logger.warn("{} excluded from tables-to-scan", tableName); + } + }); + } + else { + tables.addAll(responseTableNames); + } + nextToken = response.lastEvaluatedTableName(); } while (nextToken != null && pageSize == UNLIMITED_PAGE_SIZE_VALUE); From 02b71b744f0fa71db6ce480ec7e1ddee6175a329 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dominik=20T=C3=A1skai?= Date: Wed, 17 Jan 2024 17:48:12 +0100 Subject: [PATCH 3/3] Undo formatting changes, update template metadata. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Dominik Táskai --- athena-dynamodb/athena-dynamodb.yaml | 10 +++++----- .../connectors/dynamodb/DynamoDBMetadataHandler.java | 3 +-- .../dynamodb/resolver/DynamoDBTableResolver.java | 2 +- 3 files changed, 7 insertions(+), 8 deletions(-) diff --git a/athena-dynamodb/athena-dynamodb.yaml b/athena-dynamodb/athena-dynamodb.yaml index d03c988e07..cf4dd1b078 100644 --- a/athena-dynamodb/athena-dynamodb.yaml +++ b/athena-dynamodb/athena-dynamodb.yaml @@ -1,17 +1,17 @@ Transform: 'AWS::Serverless-2016-10-31' Metadata: 'AWS::ServerlessRepo::Application': - Name: AthenaDynamoDBConnector + Name: AthenaDynamoDBConnector-Porsche Description: 'This connector enables Amazon Athena to communicate with DynamoDB, making your tables accessible via SQL.' - Author: 'default author' + Author: 'Porsche - Insights BI' SpdxLicenseId: Apache-2.0 LicenseUrl: LICENSE.txt ReadmeUrl: README.md Labels: - athena-federation - HomePageUrl: 'https://github.com/awslabs/aws-athena-query-federation' - SemanticVersion: 1.0.0 - SourceCodeUrl: 'https://github.com/awslabs/aws-athena-query-federation' + HomePageUrl: 'https://github.com/porscheofficial/aws-athena-query-federation' + SemanticVersion: 2022.47.1 + SourceCodeUrl: 'https://github.com/porscheofficial/aws-athena-query-federation' Parameters: AthenaCatalogName: Description: 'This is the name of the lambda function that will be created. This name must satisfy the pattern ^[a-z0-9-_]{1,64}$' diff --git a/athena-dynamodb/src/main/java/com/amazonaws/athena/connectors/dynamodb/DynamoDBMetadataHandler.java b/athena-dynamodb/src/main/java/com/amazonaws/athena/connectors/dynamodb/DynamoDBMetadataHandler.java index 5177d5d757..fd0df1252a 100644 --- a/athena-dynamodb/src/main/java/com/amazonaws/athena/connectors/dynamodb/DynamoDBMetadataHandler.java +++ b/athena-dynamodb/src/main/java/com/amazonaws/athena/connectors/dynamodb/DynamoDBMetadataHandler.java @@ -226,7 +226,6 @@ public ListSchemasResponse doListSchemaNames(BlockAllocator allocator, ListSchem *

* If the specified schema is "default", this also returns an intersection with actual tables in DynamoDB. * Pagination only implemented for DynamoDBTableResolver.listTables() - * * @see GlueMetadataHandler */ @Override @@ -240,7 +239,7 @@ public ListTablesResponse doListTables(BlockAllocator allocator, ListTablesReque try { // does not validate that the tables are actually DDB tables combinedTables.addAll(super.doListTables(allocator, new ListTablesRequest(request.getIdentity(), request.getQueryId(), request.getCatalogName(), - request.getSchemaName(), null, UNLIMITED_PAGE_SIZE_VALUE), TABLE_FILTER).getTables()); + request.getSchemaName(), null, UNLIMITED_PAGE_SIZE_VALUE), TABLE_FILTER).getTables()); } catch (RuntimeException e) { logger.warn("doListTables: Unable to retrieve tables from AWSGlue in database/schema {}", request.getSchemaName(), e); diff --git a/athena-dynamodb/src/main/java/com/amazonaws/athena/connectors/dynamodb/resolver/DynamoDBTableResolver.java b/athena-dynamodb/src/main/java/com/amazonaws/athena/connectors/dynamodb/resolver/DynamoDBTableResolver.java index 6d8dcc9edf..9fa3220983 100644 --- a/athena-dynamodb/src/main/java/com/amazonaws/athena/connectors/dynamodb/resolver/DynamoDBTableResolver.java +++ b/athena-dynamodb/src/main/java/com/amazonaws/athena/connectors/dynamodb/resolver/DynamoDBTableResolver.java @@ -48,7 +48,7 @@ * which may have captial letters in them without issue. It does so by fetching all table names and doing * a case insensitive search over them. It will first try to do a targeted get to reduce the penalty for * tables which don't have capitalization. - *

+ * * TODO add caching */ public class DynamoDBTableResolver