diff --git a/.github/workflows/auto-approve.yml b/.github/workflows/auto-approve.yml index 7851164e26..d86d0abfb7 100644 --- a/.github/workflows/auto-approve.yml +++ b/.github/workflows/auto-approve.yml @@ -19,12 +19,6 @@ jobs: env: PR_URL: ${{github.event.pull_request.html_url}} GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}} - - name: Approve patch and minor updates - if: ${{steps.dependabot-metadata.outputs.update-type == 'version-update:semver-patch' || steps.dependabot-metadata.outputs.update-type == 'version-update:semver-minor'}} - run: gh pr review $PR_URL --approve -b "I'm **approving** this pull request because **it includes a patch or minor update**" - env: - PR_URL: ${{github.event.pull_request.html_url}} - GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}} - name: Comment on major updates of non-development dependencies if: ${{steps.dependabot-metadata.outputs.update-type == 'version-update:semver-major'}} run: | diff --git a/athena-aws-cmdb/athena-aws-cmdb-connection.yaml b/athena-aws-cmdb/athena-aws-cmdb-connection.yaml new file mode 100644 index 0000000000..bea006e174 --- /dev/null +++ b/athena-aws-cmdb/athena-aws-cmdb-connection.yaml @@ -0,0 +1,142 @@ +Transform: 'AWS::Serverless-2016-10-31' +Metadata: + 'AWS::ServerlessRepo::Application': + Name: AthenaAwsCmdbConnectorWithGlueConnection + Description: 'This connector enables Amazon Athena to communicate with various AWS Services, making your resource inventories accessible via SQL.' + Author: 'default author' + SpdxLicenseId: Apache-2.0 + LicenseUrl: LICENSE.txt + ReadmeUrl: README.md + Labels: + - athena-federation + HomePageUrl: 'https://github.com/awslabs/aws-athena-query-federation' + SemanticVersion: 2022.47.1 + SourceCodeUrl: 'https://github.com/awslabs/aws-athena-query-federation' +Parameters: + LambdaFunctionName: + Description: 'This is the name of the lambda function that will be created. This name must satisfy the pattern ^[a-z0-9-_]{1,64}$' + Type: String + AllowedPattern: ^[a-z0-9-_]{1,64}$ + SpillBucket: + Description: 'The name of the bucket where this function can spill data.' + Type: String + GlueConnection: + Description: "Name of glue connection storing connection details for Federated Data source." + Type: String + KmsKeyId: + Description: "(Optional) By default any data that is spilled to S3 is encrypted using AES-GCM and a randomly generated key. Setting a KMS Key ID allows your Lambda function to use KMS for key generation for a stronger source of encryption keys." + Type: String + Default: "" + LambdaRoleArn: + Description: "(Optional) A custom role to be used by the Connector lambda" + Type: String + Default: "" + +Conditions: + HasKmsKeyId: !Not [ !Equals [ !Ref KmsKeyId, "" ] ] + NotHasLambdaRole: !Equals [ !Ref LambdaRoleArn, "" ] + CreateKmsPolicy: !And [ !Condition HasKmsKeyId, !Condition NotHasLambdaRole ] + IsRegionBAH: !Equals [!Ref "AWS::Region", "me-south-1"] + IsRegionHKG: !Equals [!Ref "AWS::Region", "ap-east-1"] + +Resources: + ConnectorConfig: + Type: 'AWS::Serverless::Function' + Properties: + Environment: + Variables: + glue_connection: !Ref GlueConnection + FunctionName: !Ref LambdaFunctionName + PackageType: "Image" + ImageUri: !Sub + - '${Account}.dkr.ecr.${AWS::Region}.amazonaws.com/athena-federation-repository-aws-cmdb:2022.47.1' + - Account: !If [IsRegionBAH, 084828588479, !If [IsRegionHKG, 183295418215, 292517598671]] + Description: "Enables Amazon Athena to communicate with various AWS Services, making your resource inventories accessible via SQL." + Timeout: 900 + MemorySize: 3008 + Role: !If [NotHasLambdaRole, !GetAtt FunctionRole.Arn, !Ref LambdaRoleArn] + + FunctionRole: + Condition: NotHasLambdaRole + Type: AWS::IAM::Role + Properties: + ManagedPolicyArns: + - "arn:aws:iam::aws:policy/service-role/AWSLambdaBasicExecutionRole" + AssumeRolePolicyDocument: + Version: 2012-10-17 + Statement: + - Effect: Allow + Principal: + Service: + - lambda.amazonaws.com + Action: + - "sts:AssumeRole" + + FunctionExecutionPolicy: + Condition: NotHasLambdaRole + Type: "AWS::IAM::Policy" + Properties: + PolicyName: FunctionExecutionPolicy + PolicyDocument: + Version: 2012-10-17 + Statement: + - Action: + - autoscaling:Describe* + - elasticloadbalancing:Describe* + - ec2:Describe* + - elasticmapreduce:Describe* + - elasticmapreduce:List* + - rds:Describe* + - rds:ListTagsForResource + - athena:GetQueryExecution + - s3:ListBucket + - athena:GetQueryExecution + Effect: Allow + Resource: '*' + - Action: + - s3:GetObject + - s3:ListBucket + - s3:GetBucketLocation + - s3:GetObjectVersion + - s3:PutObject + - s3:PutObjectAcl + - s3:GetLifecycleConfiguration + - s3:PutLifecycleConfiguration + - s3:DeleteObject + Effect: Allow + Resource: + - Fn::Sub: + - arn:${AWS::Partition}:s3:::${bucketName} + - bucketName: + Ref: SpillBucket + - Fn::Sub: + - arn:${AWS::Partition}:s3:::${bucketName}/* + - bucketName: + Ref: SpillBucket + - Action: + - glue:GetConnection + Effect: Allow + Resource: + - !Sub 'arn:${AWS::Partition}:glue:${AWS::Region}:${AWS::AccountId}:connection/${GlueConnection}' + - !Sub 'arn:${AWS::Partition}:glue:${AWS::Region}:${AWS::AccountId}:catalog' + Roles: + - !Ref FunctionRole + + FunctionKmsPolicy: + Condition: CreateKmsPolicy + Type: "AWS::IAM::Policy" + Properties: + PolicyName: FunctionKmsPolicy + PolicyDocument: + Version: 2012-10-17 + Statement: + - Effect: Allow + Action: + - kms:GenerateRandom + Resource: '*' + - Effect: Allow + Action: + - kms:GenerateDataKey + Resource: !Sub "arn:aws:kms:${AWS::Region}:${AWS::AccountId}:key/${KmsKeyId}" + Roles: + - !Ref FunctionRole \ No newline at end of file diff --git a/athena-aws-cmdb/src/main/java/com/amazonaws/athena/connectors/aws/cmdb/AwsCmdbCompositeHandler.java b/athena-aws-cmdb/src/main/java/com/amazonaws/athena/connectors/aws/cmdb/AwsCmdbCompositeHandler.java index 7b4653cb6b..02549fff2a 100644 --- a/athena-aws-cmdb/src/main/java/com/amazonaws/athena/connectors/aws/cmdb/AwsCmdbCompositeHandler.java +++ b/athena-aws-cmdb/src/main/java/com/amazonaws/athena/connectors/aws/cmdb/AwsCmdbCompositeHandler.java @@ -19,6 +19,7 @@ */ package com.amazonaws.athena.connectors.aws.cmdb; +import com.amazonaws.athena.connector.lambda.connection.EnvironmentProperties; import com.amazonaws.athena.connector.lambda.handlers.CompositeHandler; /** @@ -30,6 +31,6 @@ public class AwsCmdbCompositeHandler { public AwsCmdbCompositeHandler() { - super(new AwsCmdbMetadataHandler(System.getenv()), new AwsCmdbRecordHandler(System.getenv())); + super(new AwsCmdbMetadataHandler(new EnvironmentProperties().createEnvironment()), new AwsCmdbRecordHandler(new EnvironmentProperties().createEnvironment())); } } diff --git a/athena-cloudera-hive/Dockerfile b/athena-cloudera-hive/Dockerfile index a56019f693..266ff4b120 100644 --- a/athena-cloudera-hive/Dockerfile +++ b/athena-cloudera-hive/Dockerfile @@ -5,5 +5,5 @@ COPY target/athena-cloudera-hive-2022.47.1.jar ${LAMBDA_TASK_ROOT} # Unpack the jar RUN jar xf athena-cloudera-hive-2022.47.1.jar -# Set the CMD to your handler (could also be done as a parameter override outside of the Dockerfile) -CMD [ "com.amazonaws.athena.connectors.cloudera.HiveMuxCompositeHandler" ] \ No newline at end of file +# Command can be overwritten by providing a different command in the template directly. +# No need to specify here (already defined in .yaml file because legacy and connections use different) diff --git a/athena-cloudera-hive/athena-cloudera-hive-connection.yaml b/athena-cloudera-hive/athena-cloudera-hive-connection.yaml new file mode 100644 index 0000000000..29fa4f5d2e --- /dev/null +++ b/athena-cloudera-hive/athena-cloudera-hive-connection.yaml @@ -0,0 +1,165 @@ +Transform: 'AWS::Serverless-2016-10-31' +Metadata: + 'AWS::ServerlessRepo::Application': + Name: AthenaClouderaHiveConnectorWithGlueConnection + Description: 'This connector enables Amazon Athena to communicate with your Cloudera Hive instance(s) using JDBC driver.' + Author: 'default author' + SpdxLicenseId: Apache-2.0 + LicenseUrl: LICENSE.txt + ReadmeUrl: README.md + Labels: + - athena-federation + HomePageUrl: 'https://github.com/awslabs/aws-athena-query-federation' + SemanticVersion: 2022.47.1 + SourceCodeUrl: 'https://github.com/awslabs/aws-athena-query-federation' +Parameters: + LambdaFunctionName: + Description: 'This is the name of the lambda function that will be created. This name must satisfy the pattern ^[a-z0-9-_]{1,64}$' + Type: String + AllowedPattern: ^[a-z0-9-_]{1,64}$ + SecretName: + Description: 'Used to create resource-based authorization policy for "secretsmanager:GetSecretValue" action. E.g. All Athena JDBC Federation secret names can be prefixed with "AthenaJdbcFederation" and authorization policy will allow "arn:${AWS::Partition}:secretsmanager:${AWS::Region}:${AWS::AccountId}:secret:AthenaJdbcFederation*". Parameter value in this case should be "AthenaJdbcFederation". If you do not have a prefix, you can manually update the IAM policy to add allow any secret names.' + Type: String + SpillBucket: + Description: 'The name of the bucket where this function can spill data.' + Type: String + GlueConnection: + Description: "Name of glue connection storing connection details for Federated Data source." + Type: String + KmsKeyId: + Description: "(Optional) By default any data that is spilled to S3 is encrypted using AES-GCM and a randomly generated key. Setting a KMS Key ID allows your Lambda function to use KMS for key generation for a stronger source of encryption keys." + Type: String + Default: "" + LambdaRoleArn: + Description: "(Optional) A custom role to be used by the Connector lambda" + Type: String + Default: "" + SecurityGroupIds: + Description: 'One or more SecurityGroup IDs corresponding to the SecurityGroup that should be applied to the Lambda function. (e.g. sg1,sg2,sg3)' + Type: 'List' + SubnetIds: + Description: 'One or more Subnet IDs corresponding to the Subnet that the Lambda function can use to access you data source. (e.g. subnet1,subnet2)' + Type: 'List' +Conditions: + NotHasLambdaRole: !Equals [ !Ref LambdaRoleArn, "" ] + HasKmsKeyId: !Not [ !Equals [ !Ref KmsKeyId, "" ] ] + CreateKmsPolicy: !And [!Condition NotHasLambdaRole, !Condition HasKmsKeyId] + IsRegionBAH: !Equals [!Ref "AWS::Region", "me-south-1"] + IsRegionHKG: !Equals [!Ref "AWS::Region", "ap-east-1"] +Resources: + JdbcConnectorConfig: + Type: 'AWS::Serverless::Function' + Properties: + Environment: + Variables: + glue_connection: !Ref GlueConnection + FunctionName: !Ref LambdaFunctionName + PackageType: "Image" + ImageUri: !Sub + - '${Account}.dkr.ecr.${AWS::Region}.amazonaws.com/athena-federation-repository-cloudera-hive:2022.47.1' + - Account: !If [IsRegionBAH, 084828588479, !If [IsRegionHKG, 183295418215, 292517598671]] + ImageConfig: + Command: [ "com.amazonaws.athena.connectors.cloudera.HiveCompositeHandler" ] + Description: "Enables Amazon Athena to communicate with Cloudera Hive using JDBC" + Timeout: 900 + MemorySize: 3008 + Role: !If [NotHasLambdaRole, !GetAtt FunctionRole.Arn, !Ref LambdaRoleArn] + VpcConfig: + SecurityGroupIds: !Ref SecurityGroupIds + SubnetIds: !Ref SubnetIds + + FunctionRole: + Condition: NotHasLambdaRole + Type: AWS::IAM::Role + Properties: + ManagedPolicyArns: + - "arn:aws:iam::aws:policy/service-role/AWSLambdaVPCAccessExecutionRole" + AssumeRolePolicyDocument: + Version: 2012-10-17 + Statement: + - Effect: Allow + Principal: + Service: + - lambda.amazonaws.com + Action: + - "sts:AssumeRole" + + FunctionExecutionPolicy: + Condition: NotHasLambdaRole + Type: "AWS::IAM::Policy" + Properties: + Roles: + - !Ref FunctionRole + PolicyName: FunctionExecutionPolicy + PolicyDocument: + Version: 2012-10-17 + Statement: + - Action: + - secretsmanager:GetSecretValue + Effect: Allow + Resource: !Sub 'arn:${AWS::Partition}:secretsmanager:${AWS::Region}:${AWS::AccountId}:secret:${SecretName}*' + - Action: + - logs:CreateLogGroup + Effect: Allow + Resource: !Sub 'arn:${AWS::Partition}:logs:${AWS::Region}:${AWS::AccountId}:*' + - Action: + - logs:CreateLogStream + - logs:PutLogEvents + Effect: Allow + Resource: !Sub 'arn:${AWS::Partition}:logs:${AWS::Region}:${AWS::AccountId}:log-group:/aws/lambda/${LambdaFunctionName}:*' + - Action: + - athena:GetQueryExecution + Effect: Allow + Resource: '*' + - Action: + - ec2:CreateNetworkInterface + - ec2:DeleteNetworkInterface + - ec2:DescribeNetworkInterfaces + - ec2:DetachNetworkInterface + Effect: Allow + Resource: '*' + - Action: + - s3:GetObject + - s3:ListBucket + - s3:GetBucketLocation + - s3:GetObjectVersion + - s3:PutObject + - s3:PutObjectAcl + - s3:GetLifecycleConfiguration + - s3:PutLifecycleConfiguration + - s3:DeleteObject + Effect: Allow + Resource: + - Fn::Sub: + - arn:${AWS::Partition}:s3:::${bucketName} + - bucketName: + Ref: SpillBucket + - Fn::Sub: + - arn:${AWS::Partition}:s3:::${bucketName}/* + - bucketName: + Ref: SpillBucket + - Action: + - glue:GetConnection + Effect: Allow + Resource: + - !Sub 'arn:${AWS::Partition}:glue:${AWS::Region}:${AWS::AccountId}:connection/${GlueConnection}' + - !Sub 'arn:${AWS::Partition}:glue:${AWS::Region}:${AWS::AccountId}:catalog' + + FunctionKmsPolicy: + Condition: CreateKmsPolicy + Type: "AWS::IAM::Policy" + Properties: + PolicyName: FunctionKmsPolicy + PolicyDocument: + Version: 2012-10-17 + Statement: + - Effect: Allow + Action: + - kms:GenerateRandom + Resource: '*' + - Effect: Allow + Action: + - kms:GenerateDataKey + Resource: !Sub "arn:aws:kms:${AWS::Region}:${AWS::AccountId}:key/${KmsKeyId}" + Roles: + - !Ref FunctionRole \ No newline at end of file diff --git a/athena-cloudera-hive/athena-cloudera-hive.yaml b/athena-cloudera-hive/athena-cloudera-hive.yaml index bc9da142a9..2ec64cad7b 100644 --- a/athena-cloudera-hive/athena-cloudera-hive.yaml +++ b/athena-cloudera-hive/athena-cloudera-hive.yaml @@ -21,8 +21,8 @@ Parameters: Description: 'The default connection string is used when catalog is "lambda:${LambdaFunctionName}". Catalog specific Connection Strings can be added later. Format: ${DatabaseType}://${NativeJdbcConnectionString}.' Type: String SecretNamePrefix: - Description: 'Used to create resource-based authorization policy for "secretsmanager:GetSecretValue" action. E.g. All Athena JDBC Federation secret names can be prefixed with "AthenaJdbcFederation" and authorization policy will allow "arn:${AWS::Partition}:secretsmanager:${AWS::Region}:${AWS::AccountId}:secret:AthenaJdbcFederation*". Parameter value in this case should be "AthenaJdbcFederation". If you do not have a prefix, you can manually update the IAM policy to add allow any secret names.' - Type: String + Description: 'Used to create resource-based authorization policy for "secretsmanager:GetSecretValue" action. E.g. All Athena JDBC Federation secret names can be prefixed with "AthenaJdbcFederation" and authorization policy will allow "arn:${AWS::Partition}:secretsmanager:${AWS::Region}:${AWS::AccountId}:secret:AthenaJdbcFederation*". Parameter value in this case should be "AthenaJdbcFederation". If you do not have a prefix, you can manually update the IAM policy to add allow any secret names.' + Type: String SpillBucket: Description: 'The name of the bucket where this function can spill data.' Type: String @@ -71,6 +71,8 @@ Resources: ImageUri: !Sub - '${Account}.dkr.ecr.${AWS::Region}.amazonaws.com/athena-federation-repository-cloudera-hive:2022.47.1' - Account: !If [IsRegionBAH, 084828588479, !If [IsRegionHKG, 183295418215, 292517598671]] + ImageConfig: + Command: [ "com.amazonaws.athena.connectors.cloudera.HiveMuxCompositeHandler" ] Description: "Enables Amazon Athena to communicate with Coludera Hive using JDBC" Timeout: !Ref LambdaTimeout MemorySize: !Ref LambdaMemory @@ -89,17 +91,17 @@ Resources: Resource: !Sub 'arn:${AWS::Partition}:logs:${AWS::Region}:${AWS::AccountId}:*' Version: '2012-10-17' - Statement: - - Action: - - logs:CreateLogStream - - logs:PutLogEvents - Effect: Allow - Resource: !Sub 'arn:${AWS::Partition}:logs:${AWS::Region}:${AWS::AccountId}:log-group:/aws/lambda/${LambdaFunctionName}:*' + - Action: + - logs:CreateLogStream + - logs:PutLogEvents + Effect: Allow + Resource: !Sub 'arn:${AWS::Partition}:logs:${AWS::Region}:${AWS::AccountId}:log-group:/aws/lambda/${LambdaFunctionName}:*' Version: '2012-10-17' - Statement: - - Action: - - athena:GetQueryExecution - Effect: Allow - Resource: '*' + - Action: + - athena:GetQueryExecution + Effect: Allow + Resource: '*' Version: '2012-10-17' #S3CrudPolicy allows our connector to spill large responses to S3. You can optionally replace this pre-made policy #with one that is more restrictive and can only 'put' but not read,delete, or overwrite files. diff --git a/athena-cloudera-hive/src/main/java/com/amazonaws/athena/connectors/cloudera/ClouderaHiveEnvironmentProperties.java b/athena-cloudera-hive/src/main/java/com/amazonaws/athena/connectors/cloudera/ClouderaHiveEnvironmentProperties.java new file mode 100644 index 0000000000..f53c6001bb --- /dev/null +++ b/athena-cloudera-hive/src/main/java/com/amazonaws/athena/connectors/cloudera/ClouderaHiveEnvironmentProperties.java @@ -0,0 +1,67 @@ +/*- + * #%L + * Amazon Athena Query Federation SDK + * %% + * Copyright (C) 2019 - 2024 Amazon Web Services + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ +package com.amazonaws.athena.connectors.cloudera; + +import com.amazonaws.athena.connectors.jdbc.JdbcEnvironmentProperties; + +import java.util.Map; + +import static com.amazonaws.athena.connector.lambda.connection.EnvironmentConstants.HIVE_CONFS; +import static com.amazonaws.athena.connector.lambda.connection.EnvironmentConstants.HIVE_VARS; +import static com.amazonaws.athena.connector.lambda.connection.EnvironmentConstants.SECRET_NAME; +import static com.amazonaws.athena.connector.lambda.connection.EnvironmentConstants.SESSION_CONFS; + +public class ClouderaHiveEnvironmentProperties extends JdbcEnvironmentProperties +{ + @Override + protected String getConnectionStringPrefix(Map connectionProperties) + { + return "hive://jdbc:hive2://"; + } + + @Override + protected String getJdbcParameters(Map connectionProperties) + { + String params = "?" + connectionProperties.getOrDefault(SESSION_CONFS, ""); + + if (connectionProperties.containsKey(HIVE_CONFS)) { + if (connectionProperties.containsKey(SESSION_CONFS)) { + params = params + ";"; + } + params = params + connectionProperties.get(HIVE_CONFS); + } + + if (connectionProperties.containsKey(HIVE_VARS)) { + if (connectionProperties.containsKey(HIVE_CONFS)) { + params = params + ";"; + } + params = params + connectionProperties.get(HIVE_VARS); + } + + if (connectionProperties.containsKey(SECRET_NAME)) { + if (connectionProperties.containsKey(HIVE_VARS)) { // need to add delimiter + params = params + ";"; + } + params = params + "${" + connectionProperties.get(SECRET_NAME) + "}"; + } + + return params; + } +} diff --git a/athena-cloudera-hive/src/main/java/com/amazonaws/athena/connectors/cloudera/HiveCompositeHandler.java b/athena-cloudera-hive/src/main/java/com/amazonaws/athena/connectors/cloudera/HiveCompositeHandler.java index fc97001c78..505bcc0e67 100644 --- a/athena-cloudera-hive/src/main/java/com/amazonaws/athena/connectors/cloudera/HiveCompositeHandler.java +++ b/athena-cloudera-hive/src/main/java/com/amazonaws/athena/connectors/cloudera/HiveCompositeHandler.java @@ -32,6 +32,6 @@ public class HiveCompositeHandler { public HiveCompositeHandler() { - super(new HiveMetadataHandler(System.getenv()), new HiveRecordHandler(System.getenv())); + super(new HiveMetadataHandler(new ClouderaHiveEnvironmentProperties().createEnvironment()), new HiveRecordHandler(new ClouderaHiveEnvironmentProperties().createEnvironment())); } } diff --git a/athena-cloudera-impala/Dockerfile b/athena-cloudera-impala/Dockerfile index 2ed43aeaa9..67742679e0 100644 --- a/athena-cloudera-impala/Dockerfile +++ b/athena-cloudera-impala/Dockerfile @@ -5,5 +5,5 @@ COPY target/athena-cloudera-impala-2022.47.1.jar ${LAMBDA_TASK_ROOT} # Unpack the jar RUN jar xf athena-cloudera-impala-2022.47.1.jar -# Set the CMD to your handler (could also be done as a parameter override outside of the Dockerfile) -CMD [ "com.amazonaws.athena.connectors.cloudera.ImpalaMuxCompositeHandler" ] \ No newline at end of file +# Command can be overwritten by providing a different command in the template directly. +# No need to specify here (already defined in .yaml file because legacy and connections use different) diff --git a/athena-cloudera-impala/athena-cloudera-impala-connection.yaml b/athena-cloudera-impala/athena-cloudera-impala-connection.yaml new file mode 100644 index 0000000000..6c40c72087 --- /dev/null +++ b/athena-cloudera-impala/athena-cloudera-impala-connection.yaml @@ -0,0 +1,165 @@ +Transform: 'AWS::Serverless-2016-10-31' +Metadata: + 'AWS::ServerlessRepo::Application': + Name: AthenaClouderaImpalaConnectorWithGlueConnection + Description: 'This connector enables Amazon Athena to communicate with your Cloudera Impala instance(s) using JDBC driver.' + Author: 'default author' + SpdxLicenseId: Apache-2.0 + LicenseUrl: LICENSE.txt + ReadmeUrl: README.md + Labels: + - athena-federation + HomePageUrl: 'https://github.com/awslabs/aws-athena-query-federation' + SemanticVersion: 2022.47.1 + SourceCodeUrl: 'https://github.com/awslabs/aws-athena-query-federation' +Parameters: + LambdaFunctionName: + Description: 'This is the name of the lambda function that will be created. This name must satisfy the pattern ^[a-z0-9-_]{1,64}$' + Type: String + AllowedPattern: ^[a-z0-9-_]{1,64}$ + SecretName: + Description: 'Used to create resource-based authorization policy for "secretsmanager:GetSecretValue" action. E.g. All Athena JDBC Federation secret names can be prefixed with "AthenaJdbcFederation" and authorization policy will allow "arn:${AWS::Partition}:secretsmanager:${AWS::Region}:${AWS::AccountId}:secret:AthenaJdbcFederation*". Parameter value in this case should be "AthenaJdbcFederation". If you do not have a prefix, you can manually update the IAM policy to add allow any secret names.' + Type: String + SpillBucket: + Description: 'The name of the bucket where this function can spill data.' + Type: String + GlueConnection: + Description: "Name of glue connection storing connection details for Federated Data source." + Type: String + SecurityGroupIds: + Description: 'One or more SecurityGroup IDs corresponding to the SecurityGroup that should be applied to the Lambda function. (e.g. sg1,sg2,sg3)' + Type: 'List' + SubnetIds: + Description: 'One or more Subnet IDs corresponding to the Subnet that the Lambda function can use to access you data source. (e.g. subnet1,subnet2)' + Type: 'List' + KmsKeyId: + Description: "(Optional) By default any data that is spilled to S3 is encrypted using AES-GCM and a randomly generated key. Setting a KMS Key ID allows your Lambda function to use KMS for key generation for a stronger source of encryption keys." + Type: String + Default: "" + LambdaRoleArn: + Description: "(Optional) A custom role to be used by the Connector lambda" + Type: String + Default: "" +Conditions: + HasKmsKeyId: !Not [ !Equals [ !Ref KmsKeyId, "" ] ] + NotHasLambdaRole: !Equals [ !Ref LambdaRoleArn, "" ] + CreateKmsPolicy: !And [!Condition NotHasLambdaRole, !Condition HasKmsKeyId] + IsRegionBAH: !Equals [!Ref "AWS::Region", "me-south-1"] + IsRegionHKG: !Equals [!Ref "AWS::Region", "ap-east-1"] +Resources: + JdbcConnectorConfig: + Type: 'AWS::Serverless::Function' + Properties: + Environment: + Variables: + glue_connection: !Ref GlueConnection + FunctionName: !Ref LambdaFunctionName + PackageType: "Image" + ImageUri: !Sub + - '${Account}.dkr.ecr.${AWS::Region}.amazonaws.com/athena-federation-repository-cloudera-impala:2022.47.1' + - Account: !If [IsRegionBAH, 084828588479, !If [IsRegionHKG, 183295418215, 292517598671]] + ImageConfig: + Command: [ "com.amazonaws.athena.connectors.cloudera.ImpalaCompositeHandler" ] + Description: "Enables Amazon Athena to communicate with Cloudera Impala using JDBC" + Timeout: 900 + MemorySize: 3008 + Role: !If [NotHasLambdaRole, !GetAtt FunctionRole.Arn, !Ref LambdaRoleArn] + VpcConfig: + SecurityGroupIds: !Ref SecurityGroupIds + SubnetIds: !Ref SubnetIds + + FunctionRole: + Condition: NotHasLambdaRole + Type: AWS::IAM::Role + Properties: + ManagedPolicyArns: + - "arn:aws:iam::aws:policy/service-role/AWSLambdaVPCAccessExecutionRole" + AssumeRolePolicyDocument: + Version: 2012-10-17 + Statement: + - Effect: Allow + Principal: + Service: + - lambda.amazonaws.com + Action: + - "sts:AssumeRole" + + FunctionExecutionPolicy: + Condition: NotHasLambdaRole + Type: "AWS::IAM::Policy" + Properties: + Roles: + - !Ref FunctionRole + PolicyName: FunctionExecutionPolicy + PolicyDocument: + Version: 2012-10-17 + Statement: + - Action: + - secretsmanager:GetSecretValue + Effect: Allow + Resource: !Sub 'arn:${AWS::Partition}:secretsmanager:${AWS::Region}:${AWS::AccountId}:secret:${SecretName}*' + - Action: + - logs:CreateLogGroup + Effect: Allow + Resource: !Sub 'arn:${AWS::Partition}:logs:${AWS::Region}:${AWS::AccountId}:*' + - Action: + - logs:CreateLogStream + - logs:PutLogEvents + Effect: Allow + Resource: !Sub 'arn:${AWS::Partition}:logs:${AWS::Region}:${AWS::AccountId}:log-group:/aws/lambda/${LambdaFunctionName}:*' + - Action: + - athena:GetQueryExecution + Effect: Allow + Resource: '*' + - Action: + - ec2:CreateNetworkInterface + - ec2:DeleteNetworkInterface + - ec2:DescribeNetworkInterfaces + - ec2:DetachNetworkInterface + Effect: Allow + Resource: '*' + - Action: + - s3:GetObject + - s3:ListBucket + - s3:GetBucketLocation + - s3:GetObjectVersion + - s3:PutObject + - s3:PutObjectAcl + - s3:GetLifecycleConfiguration + - s3:PutLifecycleConfiguration + - s3:DeleteObject + Effect: Allow + Resource: + - Fn::Sub: + - arn:${AWS::Partition}:s3:::${bucketName} + - bucketName: + Ref: SpillBucket + - Fn::Sub: + - arn:${AWS::Partition}:s3:::${bucketName}/* + - bucketName: + Ref: SpillBucket + - Action: + - glue:GetConnection + Effect: Allow + Resource: + - !Sub 'arn:${AWS::Partition}:glue:${AWS::Region}:${AWS::AccountId}:connection/${GlueConnection}' + - !Sub 'arn:${AWS::Partition}:glue:${AWS::Region}:${AWS::AccountId}:catalog' + + FunctionKmsPolicy: + Condition: CreateKmsPolicy + Type: "AWS::IAM::Policy" + Properties: + PolicyName: FunctionKmsPolicy + PolicyDocument: + Version: 2012-10-17 + Statement: + - Effect: Allow + Action: + - kms:GenerateRandom + Resource: '*' + - Effect: Allow + Action: + - kms:GenerateDataKey + Resource: !Sub "arn:aws:kms:${AWS::Region}:${AWS::AccountId}:key/${KmsKeyId}" + Roles: + - !Ref FunctionRole diff --git a/athena-cloudera-impala/athena-cloudera-impala.yaml b/athena-cloudera-impala/athena-cloudera-impala.yaml index adf96ece5b..55e1d742d7 100644 --- a/athena-cloudera-impala/athena-cloudera-impala.yaml +++ b/athena-cloudera-impala/athena-cloudera-impala.yaml @@ -21,8 +21,8 @@ Parameters: Description: 'The default connection string is used when catalog is "lambda:${LambdaFunctionName}". Catalog specific Connection Strings can be added later. Format: ${DatabaseType}://${NativeJdbcConnectionString}.' Type: String SecretNamePrefix: - Description: 'Used to create resource-based authorization policy for "secretsmanager:GetSecretValue" action. E.g. All Athena JDBC Federation secret names can be prefixed with "AthenaJdbcFederation" and authorization policy will allow "arn:${AWS::Partition}:secretsmanager:${AWS::Region}:${AWS::AccountId}:secret:AthenaJdbcFederation*". Parameter value in this case should be "AthenaJdbcFederation". If you do not have a prefix, you can manually update the IAM policy to add allow any secret names.' - Type: String + Description: 'Used to create resource-based authorization policy for "secretsmanager:GetSecretValue" action. E.g. All Athena JDBC Federation secret names can be prefixed with "AthenaJdbcFederation" and authorization policy will allow "arn:${AWS::Partition}:secretsmanager:${AWS::Region}:${AWS::AccountId}:secret:AthenaJdbcFederation*". Parameter value in this case should be "AthenaJdbcFederation". If you do not have a prefix, you can manually update the IAM policy to add allow any secret names.' + Type: String SpillBucket: Description: 'The name of the bucket where this function can spill data.' Type: String @@ -76,6 +76,8 @@ Resources: ImageUri: !Sub - '${Account}.dkr.ecr.${AWS::Region}.amazonaws.com/athena-federation-repository-cloudera-impala:2022.47.1' - Account: !If [IsRegionBAH, 084828588479, !If [IsRegionHKG, 183295418215, 292517598671]] + ImageConfig: + Command: [ "com.amazonaws.athena.connectors.cloudera.ImpalaMuxCompositeHandler" ] Description: "Enables Amazon Athena to communicate with Cloudera Impala using JDBC" Timeout: !Ref LambdaTimeout MemorySize: !Ref LambdaMemory @@ -94,17 +96,17 @@ Resources: Resource: !Sub 'arn:${AWS::Partition}:logs:${AWS::Region}:${AWS::AccountId}:*' Version: '2012-10-17' - Statement: - - Action: - - logs:CreateLogStream - - logs:PutLogEvents - Effect: Allow - Resource: !Sub 'arn:${AWS::Partition}:logs:${AWS::Region}:${AWS::AccountId}:log-group:/aws/lambda/${LambdaFunctionName}:*' + - Action: + - logs:CreateLogStream + - logs:PutLogEvents + Effect: Allow + Resource: !Sub 'arn:${AWS::Partition}:logs:${AWS::Region}:${AWS::AccountId}:log-group:/aws/lambda/${LambdaFunctionName}:*' Version: '2012-10-17' - Statement: - - Action: - - athena:GetQueryExecution - Effect: Allow - Resource: '*' + - Action: + - athena:GetQueryExecution + Effect: Allow + Resource: '*' Version: '2012-10-17' #S3CrudPolicy allows our connector to spill large responses to S3. You can optionally replace this pre-made policy #with one that is more restrictive and can only 'put' but not read,delete, or overwrite files. @@ -115,4 +117,4 @@ Resources: VpcConfig: SecurityGroupIds: !Ref SecurityGroupIds SubnetIds: !Ref SubnetIds - KmsKeyArn: !If [ HasLambdaEncryptionKmsKeyARN, !Ref LambdaEncryptionKmsKeyARN, !Ref "AWS::NoValue" ] + KmsKeyArn: !If [ HasLambdaEncryptionKmsKeyARN, !Ref LambdaEncryptionKmsKeyARN, !Ref "AWS::NoValue" ] \ No newline at end of file diff --git a/athena-cloudera-impala/pom.xml b/athena-cloudera-impala/pom.xml index cfdb74e7b3..396904c0ca 100644 --- a/athena-cloudera-impala/pom.xml +++ b/athena-cloudera-impala/pom.xml @@ -35,7 +35,6 @@ test-jar test - org.mockito mockito-core diff --git a/athena-cloudera-impala/src/main/java/com/amazonaws/athena/connectors/cloudera/ImpalaCompositeHandler.java b/athena-cloudera-impala/src/main/java/com/amazonaws/athena/connectors/cloudera/ImpalaCompositeHandler.java index ecb9576da5..45664b1be3 100644 --- a/athena-cloudera-impala/src/main/java/com/amazonaws/athena/connectors/cloudera/ImpalaCompositeHandler.java +++ b/athena-cloudera-impala/src/main/java/com/amazonaws/athena/connectors/cloudera/ImpalaCompositeHandler.java @@ -33,6 +33,7 @@ public class ImpalaCompositeHandler { public ImpalaCompositeHandler() { - super(new ImpalaMetadataHandler(System.getenv()), new ImpalaRecordHandler(System.getenv())); + super(new ImpalaMetadataHandler(new ImpalaEnvironmentProperties().createEnvironment()), + new ImpalaRecordHandler(new ImpalaEnvironmentProperties().createEnvironment())); } } diff --git a/athena-cloudera-impala/src/main/java/com/amazonaws/athena/connectors/cloudera/ImpalaEnvironmentProperties.java b/athena-cloudera-impala/src/main/java/com/amazonaws/athena/connectors/cloudera/ImpalaEnvironmentProperties.java new file mode 100644 index 0000000000..a450056740 --- /dev/null +++ b/athena-cloudera-impala/src/main/java/com/amazonaws/athena/connectors/cloudera/ImpalaEnvironmentProperties.java @@ -0,0 +1,39 @@ +/*- + * #%L + * Amazon Athena Query Federation SDK + * %% + * Copyright (C) 2019 - 2024 Amazon Web Services + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ +package com.amazonaws.athena.connectors.cloudera; + +import com.amazonaws.athena.connectors.jdbc.JdbcEnvironmentProperties; + +import java.util.Map; + +public class ImpalaEnvironmentProperties extends JdbcEnvironmentProperties +{ + @Override + protected String getConnectionStringPrefix(Map connectionProperties) + { + return "impala://jdbc:impala://"; + } + + @Override + protected String getDatabase(Map connectionProperties) + { + return "/"; + } +} diff --git a/athena-cloudwatch-metrics/athena-cloudwatch-metrics-connection.yaml b/athena-cloudwatch-metrics/athena-cloudwatch-metrics-connection.yaml new file mode 100644 index 0000000000..1cf557a0fa --- /dev/null +++ b/athena-cloudwatch-metrics/athena-cloudwatch-metrics-connection.yaml @@ -0,0 +1,136 @@ +Transform: 'AWS::Serverless-2016-10-31' +Metadata: + 'AWS::ServerlessRepo::Application': + Name: AthenaCloudwatchMetricsConnectorWithGlueConnection + Description: 'This connector enables Amazon Athena to communicate with Cloudwatch Metrics, making your metrics data accessible via SQL.' + Author: 'default author' + SpdxLicenseId: Apache-2.0 + LicenseUrl: LICENSE.txt + ReadmeUrl: README.md + Labels: + - athena-federation + HomePageUrl: 'https://github.com/awslabs/aws-athena-query-federation' + SemanticVersion: 2022.47.1 + SourceCodeUrl: 'https://github.com/awslabs/aws-athena-query-federation' +Parameters: + LambdaFunctionName: + Description: 'This is the name of the lambda function that will be created. This name must satisfy the pattern ^[a-z0-9-_]{1,64}$' + Type: String + AllowedPattern: ^[a-z0-9-_]{1,64}$ + SpillBucket: + Description: 'The name of the bucket where this function can spill data.' + Type: String + GlueConnection: + Description: "Name of glue connection storing connection details for Federated Data source." + Type: String + KmsKeyId: + Description: "(Optional) By default any data that is spilled to S3 is encrypted using AES-GCM and a randomly generated key. Setting a KMS Key ID allows your Lambda function to use KMS for key generation for a stronger source of encryption keys." + Type: String + Default: "" + LambdaRoleArn: + Description: "(Optional) A custom role to be used by the Connector lambda" + Type: String + Default: "" + +Conditions: + HasKmsKeyId: !Not [ !Equals [ !Ref KmsKeyId, "" ] ] + NotHasLambdaRole: !Equals [ !Ref LambdaRoleArn, "" ] + CreateKmsPolicy: !And [ !Condition HasKmsKeyId, !Condition NotHasLambdaRole ] + IsRegionBAH: !Equals [!Ref "AWS::Region", "me-south-1"] + IsRegionHKG: !Equals [!Ref "AWS::Region", "ap-east-1"] + +Resources: + ConnectorConfig: + Type: 'AWS::Serverless::Function' + Properties: + Environment: + Variables: + glue_connection: !Ref GlueConnection + FunctionName: !Ref LambdaFunctionName + PackageType: "Image" + ImageUri: !Sub + - '${Account}.dkr.ecr.${AWS::Region}.amazonaws.com/athena-federation-repository-cloudwatch-metrics:2022.47.1' + - Account: !If [IsRegionBAH, 084828588479, !If [IsRegionHKG, 183295418215, 292517598671]] + Description: "Enables Amazon Athena to communicate with Cloudwatch Metrics, making your metrics data accessible via SQL" + Timeout: 900 + MemorySize: 3008 + Role: !If [NotHasLambdaRole, !GetAtt FunctionRole.Arn, !Ref LambdaRoleArn] + + FunctionRole: + Condition: NotHasLambdaRole + Type: AWS::IAM::Role + Properties: + ManagedPolicyArns: + - "arn:aws:iam::aws:policy/service-role/AWSLambdaBasicExecutionRole" + AssumeRolePolicyDocument: + Version: 2012-10-17 + Statement: + - Effect: Allow + Principal: + Service: + - lambda.amazonaws.com + Action: + - "sts:AssumeRole" + + FunctionExecutionPolicy: + Condition: NotHasLambdaRole + Type: "AWS::IAM::Policy" + Properties: + Roles: + - !Ref FunctionRole + PolicyName: FunctionExecutionPolicy + PolicyDocument: + Version: 2012-10-17 + Statement: + - Action: + - cloudwatch:Describe* + - cloudwatch:Get* + - cloudwatch:List* + - athena:GetQueryExecution + Effect: Allow + Resource: '*' + - Action: + - s3:GetObject + - s3:ListBucket + - s3:GetBucketLocation + - s3:GetObjectVersion + - s3:PutObject + - s3:PutObjectAcl + - s3:GetLifecycleConfiguration + - s3:PutLifecycleConfiguration + - s3:DeleteObject + Effect: Allow + Resource: + - Fn::Sub: + - arn:${AWS::Partition}:s3:::${bucketName} + - bucketName: + Ref: SpillBucket + - Fn::Sub: + - arn:${AWS::Partition}:s3:::${bucketName}/* + - bucketName: + Ref: SpillBucket + - Action: + - glue:GetConnection + Effect: Allow + Resource: + - !Sub 'arn:${AWS::Partition}:glue:${AWS::Region}:${AWS::AccountId}:connection/${GlueConnection}' + - !Sub 'arn:${AWS::Partition}:glue:${AWS::Region}:${AWS::AccountId}:catalog' + + FunctionKmsPolicy: + Condition: CreateKmsPolicy + Type: "AWS::IAM::Policy" + Properties: + PolicyName: FunctionKmsPolicy + PolicyDocument: + Version: 2012-10-17 + Statement: + - Effect: Allow + Action: + - kms:GenerateRandom + Resource: '*' + - Effect: Allow + Action: + - kms:GenerateDataKey + Resource: !Sub "arn:aws:kms:${AWS::Region}:${AWS::AccountId}:key/${KmsKeyId}" + Roles: + - !Ref FunctionRole \ No newline at end of file diff --git a/athena-cloudwatch-metrics/src/main/java/com/amazonaws/athena/connectors/cloudwatch/metrics/MetricsCompositeHandler.java b/athena-cloudwatch-metrics/src/main/java/com/amazonaws/athena/connectors/cloudwatch/metrics/MetricsCompositeHandler.java index 7f1e429660..ebee6b84e6 100644 --- a/athena-cloudwatch-metrics/src/main/java/com/amazonaws/athena/connectors/cloudwatch/metrics/MetricsCompositeHandler.java +++ b/athena-cloudwatch-metrics/src/main/java/com/amazonaws/athena/connectors/cloudwatch/metrics/MetricsCompositeHandler.java @@ -19,6 +19,7 @@ */ package com.amazonaws.athena.connectors.cloudwatch.metrics; +import com.amazonaws.athena.connector.lambda.connection.EnvironmentProperties; import com.amazonaws.athena.connector.lambda.handlers.CompositeHandler; /** @@ -30,6 +31,6 @@ public class MetricsCompositeHandler { public MetricsCompositeHandler() { - super(new MetricsMetadataHandler(System.getenv()), new MetricsRecordHandler(System.getenv())); + super(new MetricsMetadataHandler(new EnvironmentProperties().createEnvironment()), new MetricsRecordHandler(new EnvironmentProperties().createEnvironment())); } } diff --git a/athena-cloudwatch/athena-cloudwatch-connection.yaml b/athena-cloudwatch/athena-cloudwatch-connection.yaml new file mode 100644 index 0000000000..b2bab240ef --- /dev/null +++ b/athena-cloudwatch/athena-cloudwatch-connection.yaml @@ -0,0 +1,140 @@ +Transform: 'AWS::Serverless-2016-10-31' +Metadata: + 'AWS::ServerlessRepo::Application': + Name: AthenaCloudwatchConnectorWithGlueConnection + Description: 'This connector enables Amazon Athena to communicate with Cloudwatch, making your logs accessible via SQL.' + Author: 'default author' + SpdxLicenseId: Apache-2.0 + LicenseUrl: LICENSE.txt + ReadmeUrl: README.md + Labels: + - athena-federation + HomePageUrl: 'https://github.com/awslabs/aws-athena-query-federation' + SemanticVersion: 2022.47.1 + SourceCodeUrl: 'https://github.com/awslabs/aws-athena-query-federation' +Parameters: + LambdaFunctionName: + Description: 'This is the name of the lambda function that will be created. This name must satisfy the pattern ^[a-z0-9-_]{1,64}$' + Type: String + AllowedPattern: ^[a-z0-9-_]{1,64}$ + SpillBucket: + Description: 'The name of the bucket where this function can spill data.' + Type: String + GlueConnection: + Description: "Name of glue connection storing connection details for Federated Data source." + Type: String + LambdaRole: + Description: "(Optional) A custom role to be used by the Connector lambda" + Type: String + Default: "" + KmsKeyId: + Description: "(Optional) By default any data that is spilled to S3 is encrypted using AES-GCM and a randomly generated key. Setting a KMS Key ID allows your Lambda function to use KMS for key generation for a stronger source of encryption keys." + Type: String + Default: "" + +Conditions: + HasKmsKeyId: !Not [!Equals [!Ref KmsKeyId, ""]] + NotHasLambdaRole: !Equals [!Ref LambdaRole, ""] + CreateKmsPolicy: !And [ !Condition HasKmsKeyId, !Condition NotHasLambdaRole ] + IsRegionBAH: !Equals [!Ref "AWS::Region", "me-south-1"] + IsRegionHKG: !Equals [!Ref "AWS::Region", "ap-east-1"] + +Resources: + ConnectorConfig: + Type: 'AWS::Serverless::Function' + Properties: + Environment: + Variables: + glue_connection: !Ref GlueConnection + FunctionName: !Ref LambdaFunctionName + PackageType: "Image" + ImageUri: !Sub + - '${Account}.dkr.ecr.${AWS::Region}.amazonaws.com/athena-federation-repository-cloudwatch:2022.47.1' + - Account: !If [IsRegionBAH, 084828588479, !If [IsRegionHKG, 183295418215, 292517598671]] + Description: "Enables Amazon Athena to communicate with Cloudwatch, making your log accessible via SQL" + Timeout: 900 + MemorySize: 3008 + Role: !If [NotHasLambdaRole, !GetAtt FunctionRole.Arn, !Ref LambdaRole] + + FunctionRole: + Condition: NotHasLambdaRole + Type: AWS::IAM::Role + Properties: + ManagedPolicyArns: + - "arn:aws:iam::aws:policy/service-role/AWSLambdaBasicExecutionRole" + AssumeRolePolicyDocument: + Version: 2012-10-17 + Statement: + - Effect: Allow + Principal: + Service: + - lambda.amazonaws.com + Action: + - "sts:AssumeRole" + + FunctionExecutionPolicy: + Condition: NotHasLambdaRole + Type: "AWS::IAM::Policy" + Properties: + PolicyName: FunctionExecutionPolicy + PolicyDocument: + Version: 2012-10-17 + Statement: + - Action: + - logs:Describe* + - logs:Get* + - logs:List* + - logs:StartQuery + - logs:StopQuery + - logs:TestMetricFilter + - logs:FilterLogEvents + - athena:GetQueryExecution + Effect: Allow + Resource: '*' + - Action: + - s3:GetObject + - s3:ListBucket + - s3:GetBucketLocation + - s3:GetObjectVersion + - s3:PutObject + - s3:PutObjectAcl + - s3:GetLifecycleConfiguration + - s3:PutLifecycleConfiguration + - s3:DeleteObject + Effect: Allow + Resource: + - Fn::Sub: + - arn:${AWS::Partition}:s3:::${bucketName} + - bucketName: + Ref: SpillBucket + - Fn::Sub: + - arn:${AWS::Partition}:s3:::${bucketName}/* + - bucketName: + Ref: SpillBucket + - Action: + - glue:GetConnection + Effect: Allow + Resource: + - !Sub 'arn:${AWS::Partition}:glue:${AWS::Region}:${AWS::AccountId}:connection/${GlueConnection}' + - !Sub 'arn:${AWS::Partition}:glue:${AWS::Region}:${AWS::AccountId}:catalog' + Roles: + - !Ref FunctionRole + + FunctionKmsPolicy: + Condition: CreateKmsPolicy + Type: "AWS::IAM::Policy" + Properties: + PolicyName: FunctionKmsPolicy + PolicyDocument: + Version: 2012-10-17 + Statement: + - Action: + - kms:GenerateRandom + Effect: Allow + Resource: '*' + - Action: + - kms:GenerateDataKey + Effect: Allow + Resource: !Sub "arn:aws:kms:${AWS::Region}:${AWS::AccountId}:key/${KmsKeyId}" + Roles: + - !Ref FunctionRole diff --git a/athena-cloudwatch/athena-cloudwatch.yaml b/athena-cloudwatch/athena-cloudwatch.yaml index 67e286740f..860dbf9c5d 100644 --- a/athena-cloudwatch/athena-cloudwatch.yaml +++ b/athena-cloudwatch/athena-cloudwatch.yaml @@ -113,25 +113,25 @@ Resources: Effect: Allow Resource: '*' - Action: - - s3:GetObject - - s3:ListBucket - - s3:GetBucketLocation - - s3:GetObjectVersion - - s3:PutObject - - s3:PutObjectAcl - - s3:GetLifecycleConfiguration - - s3:PutLifecycleConfiguration - - s3:DeleteObject + - s3:GetObject + - s3:ListBucket + - s3:GetBucketLocation + - s3:GetObjectVersion + - s3:PutObject + - s3:PutObjectAcl + - s3:GetLifecycleConfiguration + - s3:PutLifecycleConfiguration + - s3:DeleteObject Effect: Allow Resource: - Fn::Sub: - - arn:${AWS::Partition}:s3:::${bucketName} - - bucketName: - Ref: SpillBucket + - arn:${AWS::Partition}:s3:::${bucketName} + - bucketName: + Ref: SpillBucket - Fn::Sub: - - arn:${AWS::Partition}:s3:::${bucketName}/* - - bucketName: - Ref: SpillBucket + - arn:${AWS::Partition}:s3:::${bucketName}/* + - bucketName: + Ref: SpillBucket Roles: - !Ref FunctionRole @@ -144,7 +144,7 @@ Resources: Version: 2012-10-17 Statement: - Action: - - kms:GenerateRandom + - kms:GenerateRandom Effect: Allow Resource: '*' - Action: @@ -152,4 +152,4 @@ Resources: Effect: Allow Resource: !Sub "arn:aws:kms:${AWS::Region}:${AWS::AccountId}:key/${KMSKeyId}" Roles: - - !Ref FunctionRole + - !Ref FunctionRole \ No newline at end of file diff --git a/athena-cloudwatch/pom.xml b/athena-cloudwatch/pom.xml index d49af55793..bdf2dcda3d 100644 --- a/athena-cloudwatch/pom.xml +++ b/athena-cloudwatch/pom.xml @@ -31,7 +31,7 @@ software.amazon.awssdk cloudwatchlogs - 2.29.9 + 2.29.29 diff --git a/athena-cloudwatch/src/main/java/com/amazonaws/athena/connectors/cloudwatch/CloudwatchCompositeHandler.java b/athena-cloudwatch/src/main/java/com/amazonaws/athena/connectors/cloudwatch/CloudwatchCompositeHandler.java index bf8b9ee1e5..99719f1098 100644 --- a/athena-cloudwatch/src/main/java/com/amazonaws/athena/connectors/cloudwatch/CloudwatchCompositeHandler.java +++ b/athena-cloudwatch/src/main/java/com/amazonaws/athena/connectors/cloudwatch/CloudwatchCompositeHandler.java @@ -19,6 +19,7 @@ */ package com.amazonaws.athena.connectors.cloudwatch; +import com.amazonaws.athena.connector.lambda.connection.EnvironmentProperties; import com.amazonaws.athena.connector.lambda.handlers.CompositeHandler; /** @@ -30,6 +31,6 @@ public class CloudwatchCompositeHandler { public CloudwatchCompositeHandler() { - super(new CloudwatchMetadataHandler(System.getenv()), new CloudwatchRecordHandler(System.getenv())); + super(new CloudwatchMetadataHandler(new EnvironmentProperties().createEnvironment()), new CloudwatchRecordHandler(new EnvironmentProperties().createEnvironment())); } } diff --git a/athena-datalakegen2/Dockerfile b/athena-datalakegen2/Dockerfile index 4e1929f607..d6667524ad 100644 --- a/athena-datalakegen2/Dockerfile +++ b/athena-datalakegen2/Dockerfile @@ -5,5 +5,5 @@ COPY target/athena-datalakegen2-2022.47.1.jar ${LAMBDA_TASK_ROOT} # Unpack the jar RUN jar xf athena-datalakegen2-2022.47.1.jar -# Set the CMD to your handler (could also be done as a parameter override outside of the Dockerfile) -CMD [ "com.amazonaws.athena.connectors.datalakegen2.DataLakeGen2MuxCompositeHandler" ] \ No newline at end of file +# Command can be overwritten by providing a different command in the template directly. +# No need to specify here (already defined in .yaml file because legacy and connections use different) diff --git a/athena-datalakegen2/athena-datalakegen2-connection.yaml b/athena-datalakegen2/athena-datalakegen2-connection.yaml new file mode 100644 index 0000000000..875093bae6 --- /dev/null +++ b/athena-datalakegen2/athena-datalakegen2-connection.yaml @@ -0,0 +1,173 @@ +Transform: 'AWS::Serverless-2016-10-31' +Metadata: + 'AWS::ServerlessRepo::Application': + Name: AthenaDataLakeGen2ConnectorWithGlueConnection + Description: 'This connector enables Amazon Athena to communicate with DataLake Gen2 using JDBC driver.' + Author: 'default author' + SpdxLicenseId: Apache-2.0 + LicenseUrl: LICENSE.txt + ReadmeUrl: README.md + Labels: + - datalake-gen2 + - athena-federation + - jdbc + HomePageUrl: 'https://github.com/awslabs/aws-athena-query-federation' + SemanticVersion: 2022.47.1 + SourceCodeUrl: 'https://github.com/awslabs/aws-athena-query-federation' +Parameters: + LambdaFunctionName: + Description: 'This is the name of the lambda function that will be created. This name must satisfy the pattern ^[a-z0-9-_]{1,64}$' + Type: String + AllowedPattern: ^[a-z0-9-_]{1,64}$ + SecretName: + Description: 'Used to create resource-based authorization policy for "secretsmanager:GetSecretValue" action. E.g. All Athena JDBC Federation secret names can be prefixed with "AthenaJdbcFederation" and authorization policy will allow "arn:${AWS::Partition}:secretsmanager:${AWS::Region}:${AWS::AccountId}:secret:AthenaJdbcFederation*". Parameter value in this case should be "AthenaJdbcFederation". If you do not have a prefix, you can manually update the IAM policy to add allow any secret names.' + Type: String + SpillBucket: + Description: 'The name of the bucket where this function can spill data.' + Type: String + GlueConnection: + Description: "Name of glue connection storing connection details for Federated Data source." + Type: String + SecurityGroupIds: + Description: '(Optional) One or more SecurityGroup IDs corresponding to the SecurityGroup that should be applied to the Lambda function. (e.g. sg1,sg2,sg3)' + Type: CommaDelimitedList + Default: "" + SubnetIds: + Description: '(Optional) One or more Subnet IDs corresponding to the Subnet that the Lambda function can use to access you data source. (e.g. subnet1,subnet2)' + Type: CommaDelimitedList + Default: "" + KmsKeyId: + Description: "(Optional) By default any data that is spilled to S3 is encrypted using AES-GCM and a randomly generated key. Setting a KMS Key ID allows your Lambda function to use KMS for key generation for a stronger source of encryption keys." + Type: String + Default: "" + LambdaRoleArn: + Description: "(Optional) A custom role to be used by the Connector lambda" + Type: String + Default: "" + +Conditions: + HasSecurityGroups: !Not [ !Equals [ !Join ["", !Ref SecurityGroupIds], "" ] ] + HasSubnets: !Not [ !Equals [ !Join ["", !Ref SubnetIds], "" ] ] + HasKmsKeyId: !Not [ !Equals [ !Ref KmsKeyId, "" ] ] + NotHasLambdaRole: !Equals [ !Ref LambdaRoleArn, "" ] + CreateKmsPolicy: !And [ !Condition HasKmsKeyId, !Condition NotHasLambdaRole ] + IsRegionBAH: !Equals [!Ref "AWS::Region", "me-south-1"] + IsRegionHKG: !Equals [!Ref "AWS::Region", "ap-east-1"] + +Resources: + JdbcConnectorConfig: + Type: 'AWS::Serverless::Function' + Properties: + Environment: + Variables: + glue_connection: !Ref GlueConnection + FunctionName: !Ref LambdaFunctionName + PackageType: "Image" + ImageUri: !Sub + - '${Account}.dkr.ecr.${AWS::Region}.amazonaws.com/athena-federation-repository-datalakegen2:2022.47.1' + - Account: !If [IsRegionBAH, 084828588479, !If [IsRegionHKG, 183295418215, 292517598671]] + ImageConfig: + Command: [ "com.amazonaws.athena.connectors.datalakegen2.DataLakeGen2CompositeHandler" ] + Description: "Enables Amazon Athena to communicate with DataLake Gen2 using JDBC" + Timeout: 900 + MemorySize: 3008 + Role: !If [ NotHasLambdaRole, !GetAtt FunctionRole.Arn, !Ref LambdaRoleArn ] + VpcConfig: + SecurityGroupIds: !If [ HasSecurityGroups, !Ref SecurityGroupIds, !Ref "AWS::NoValue" ] + SubnetIds: !If [ HasSubnets, !Ref SubnetIds, !Ref "AWS::NoValue" ] + + FunctionRole: + Condition: NotHasLambdaRole + Type: AWS::IAM::Role + Properties: + ManagedPolicyArns: + - "arn:aws:iam::aws:policy/service-role/AWSLambdaVPCAccessExecutionRole" + AssumeRolePolicyDocument: + Version: 2012-10-17 + Statement: + - Effect: Allow + Principal: + Service: + - lambda.amazonaws.com + Action: + - "sts:AssumeRole" + + FunctionExecutionPolicy: + Condition: NotHasLambdaRole + Type: "AWS::IAM::Policy" + Properties: + Roles: + - !Ref FunctionRole + PolicyName: FunctionExecutionPolicy + PolicyDocument: + Version: 2012-10-17 + Statement: + - Action: + - secretsmanager:GetSecretValue + Effect: Allow + Resource: !Sub 'arn:${AWS::Partition}:secretsmanager:${AWS::Region}:${AWS::AccountId}:secret:${SecretName}*' + - Action: + - logs:CreateLogGroup + Effect: Allow + Resource: !Sub 'arn:${AWS::Partition}:logs:${AWS::Region}:${AWS::AccountId}:*' + - Action: + - logs:CreateLogStream + - logs:PutLogEvents + Effect: Allow + Resource: !Sub 'arn:${AWS::Partition}:logs:${AWS::Region}:${AWS::AccountId}:log-group:/aws/lambda/${LambdaFunctionName}:*' + - Action: + - athena:GetQueryExecution + Effect: Allow + Resource: '*' + - Action: + - ec2:CreateNetworkInterface + - ec2:DeleteNetworkInterface + - ec2:DescribeNetworkInterfaces + - ec2:DetachNetworkInterface + Effect: Allow + Resource: '*' + - Action: + - s3:GetObject + - s3:ListBucket + - s3:GetBucketLocation + - s3:GetObjectVersion + - s3:PutObject + - s3:PutObjectAcl + - s3:GetLifecycleConfiguration + - s3:PutLifecycleConfiguration + - s3:DeleteObject + Effect: Allow + Resource: + - Fn::Sub: + - arn:${AWS::Partition}:s3:::${bucketName} + - bucketName: + Ref: SpillBucket + - Fn::Sub: + - arn:${AWS::Partition}:s3:::${bucketName}/* + - bucketName: + Ref: SpillBucket + - Action: + - glue:GetConnection + Effect: Allow + Resource: + - !Sub 'arn:${AWS::Partition}:glue:${AWS::Region}:${AWS::AccountId}:connection/${GlueConnection}' + - !Sub 'arn:${AWS::Partition}:glue:${AWS::Region}:${AWS::AccountId}:catalog' + + FunctionKmsPolicy: + Condition: CreateKmsPolicy + Type: "AWS::IAM::Policy" + Properties: + PolicyName: FunctionKmsPolicy + PolicyDocument: + Version: 2012-10-17 + Statement: + - Effect: Allow + Action: + - kms:GenerateRandom + Resource: '*' + - Effect: Allow + Action: + - kms:GenerateDataKey + Resource: !Sub "arn:aws:kms:${AWS::Region}:${AWS::AccountId}:key/${KmsKeyId}" + Roles: + - !Ref FunctionRole \ No newline at end of file diff --git a/athena-datalakegen2/athena-datalakegen2.yaml b/athena-datalakegen2/athena-datalakegen2.yaml index 96750e019c..1318145981 100644 --- a/athena-datalakegen2/athena-datalakegen2.yaml +++ b/athena-datalakegen2/athena-datalakegen2.yaml @@ -77,6 +77,8 @@ Resources: ImageUri: !Sub - '${Account}.dkr.ecr.${AWS::Region}.amazonaws.com/athena-federation-repository-datalakegen2:2022.47.1' - Account: !If [IsRegionBAH, 084828588479, !If [IsRegionHKG, 183295418215, 292517598671]] + ImageConfig: + Command: [ "com.amazonaws.athena.connectors.datalakegen2.DataLakeGen2MuxCompositeHandler" ] Description: "Enables Amazon Athena to communicate with DataLake Gen2 using JDBC" Timeout: !Ref LambdaTimeout MemorySize: !Ref LambdaMemory diff --git a/athena-datalakegen2/src/main/java/com/amazonaws/athena/connectors/datalakegen2/DataLakeGen2CompositeHandler.java b/athena-datalakegen2/src/main/java/com/amazonaws/athena/connectors/datalakegen2/DataLakeGen2CompositeHandler.java index 62cfce965b..9eb6d61bc9 100644 --- a/athena-datalakegen2/src/main/java/com/amazonaws/athena/connectors/datalakegen2/DataLakeGen2CompositeHandler.java +++ b/athena-datalakegen2/src/main/java/com/amazonaws/athena/connectors/datalakegen2/DataLakeGen2CompositeHandler.java @@ -31,6 +31,6 @@ public class DataLakeGen2CompositeHandler extends CompositeHandler { public DataLakeGen2CompositeHandler() { - super(new DataLakeGen2MetadataHandler(System.getenv()), new DataLakeGen2RecordHandler(System.getenv())); + super(new DataLakeGen2MetadataHandler(new DataLakeGen2EnvironmentProperties().createEnvironment()), new DataLakeGen2RecordHandler(new DataLakeGen2EnvironmentProperties().createEnvironment())); } } diff --git a/athena-datalakegen2/src/main/java/com/amazonaws/athena/connectors/datalakegen2/DataLakeGen2EnvironmentProperties.java b/athena-datalakegen2/src/main/java/com/amazonaws/athena/connectors/datalakegen2/DataLakeGen2EnvironmentProperties.java new file mode 100644 index 0000000000..dfecee2fff --- /dev/null +++ b/athena-datalakegen2/src/main/java/com/amazonaws/athena/connectors/datalakegen2/DataLakeGen2EnvironmentProperties.java @@ -0,0 +1,53 @@ +/*- + * #%L + * Amazon Athena Query Federation SDK + * %% + * Copyright (C) 2019 - 2024 Amazon Web Services + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ +package com.amazonaws.athena.connectors.datalakegen2; + +import com.amazonaws.athena.connectors.jdbc.JdbcEnvironmentProperties; + +import java.util.Map; + +import static com.amazonaws.athena.connector.lambda.connection.EnvironmentConstants.DATABASE; + +public class DataLakeGen2EnvironmentProperties extends JdbcEnvironmentProperties +{ + @Override + protected String getConnectionStringPrefix(Map connectionProperties) + { + return "datalakegentwo://jdbc:sqlserver://"; + } + + @Override + protected String getDatabase(Map connectionProperties) + { + return ";databaseName=" + connectionProperties.get(DATABASE); + } + + @Override + protected String getJdbcParametersSeparator() + { + return ";"; + } + + @Override + protected String getDelimiter() + { + return ";"; + } +} diff --git a/athena-db2-as400/Dockerfile b/athena-db2-as400/Dockerfile index affd37e7bb..cac4944c1a 100644 --- a/athena-db2-as400/Dockerfile +++ b/athena-db2-as400/Dockerfile @@ -5,5 +5,5 @@ COPY target/athena-db2-as400-2022.47.1.jar ${LAMBDA_TASK_ROOT} # Unpack the jar RUN jar xf athena-db2-as400-2022.47.1.jar -# Set the CMD to your handler (could also be done as a parameter override outside of the Dockerfile) -CMD [ "com.amazonaws.athena.connectors.db2as400.Db2As400MuxCompositeHandler" ] \ No newline at end of file +# Command can be overwritten by providing a different command in the template directly. +# No need to specify here (already defined in .yaml file because legacy and connections use different) diff --git a/athena-db2-as400/athena-db2-as400-connection.yaml b/athena-db2-as400/athena-db2-as400-connection.yaml new file mode 100644 index 0000000000..1f245c8491 --- /dev/null +++ b/athena-db2-as400/athena-db2-as400-connection.yaml @@ -0,0 +1,174 @@ +Transform: 'AWS::Serverless-2016-10-31' +Metadata: + 'AWS::ServerlessRepo::Application': + Name: AthenaDb2AS400ConnectorWithGlueConnection + Description: 'This connector enables Amazon Athena to communicate with DB2 on iSeries (AS400) using JDBC driver.' + Author: 'default author' + SpdxLicenseId: Apache-2.0 + LicenseUrl: LICENSE.txt + ReadmeUrl: README.md + Labels: + - ibm + - db2as400 + - athena-federation + - jdbc + HomePageUrl: 'https://github.com/awslabs/aws-athena-query-federation' + SemanticVersion: 2022.47.1 + SourceCodeUrl: 'https://github.com/awslabs/aws-athena-query-federation' +Parameters: + LambdaFunctionName: + Description: 'This is the name of the lambda function that will be created. This name must satisfy the pattern ^[a-z0-9-_]{1,64}$' + Type: String + AllowedPattern: ^[a-z0-9-_]{1,64}$ + SecretName: + Description: 'Used to create resource-based authorization policy for "secretsmanager:GetSecretValue" action. E.g. All Athena JDBC Federation secret names can be prefixed with "AthenaJdbcFederation" and authorization policy will allow "arn:${AWS::Partition}:secretsmanager:${AWS::Region}:${AWS::AccountId}:secret:AthenaJdbcFederation*". Parameter value in this case should be "AthenaJdbcFederation". If you do not have a prefix, you can manually update the IAM policy to add allow any secret names.' + Type: String + SpillBucket: + Description: 'The name of the bucket where this function can spill data.' + Type: String + GlueConnection: + Description: "Name of glue connection storing connection details for Federated Data source." + Type: String + SecurityGroupIds: + Description: '(Optional) One or more SecurityGroup IDs corresponding to the SecurityGroup that should be applied to the Lambda function. (e.g. sg1,sg2,sg3)' + Type: CommaDelimitedList + Default: "" + SubnetIds: + Description: '(Optional) One or more Subnet IDs corresponding to the Subnet that the Lambda function can use to access you data source. (e.g. subnet1,subnet2)' + Type: CommaDelimitedList + Default: "" + KmsKeyId: + Description: "(Optional) By default any data that is spilled to S3 is encrypted using AES-GCM and a randomly generated key. Setting a KMS Key ID allows your Lambda function to use KMS for key generation for a stronger source of encryption keys." + Type: String + Default: "" + LambdaRoleArn: + Description: "(Optional) A custom role to be used by the Connector lambda" + Type: String + Default: "" + +Conditions: + HasSecurityGroups: !Not [ !Equals [ !Join ["", !Ref SecurityGroupIds], "" ] ] + HasSubnets: !Not [ !Equals [ !Join ["", !Ref SubnetIds], "" ] ] + HasKmsKeyId: !Not [!Equals [!Ref KmsKeyId, ""]] + NotHasLambdaRole: !Equals [!Ref LambdaRoleArn, ""] + CreateKmsPolicy: !And [!Condition HasKmsKeyId, !Condition NotHasLambdaRole] + IsRegionBAH: !Equals [!Ref "AWS::Region", "me-south-1"] + IsRegionHKG: !Equals [!Ref "AWS::Region", "ap-east-1"] + +Resources: + JdbcConnectorConfig: + Type: 'AWS::Serverless::Function' + Properties: + Environment: + Variables: + glue_connection: !Ref GlueConnection + FunctionName: !Ref LambdaFunctionName + PackageType: "Image" + ImageUri: !Sub + - '${Account}.dkr.ecr.${AWS::Region}.amazonaws.com/athena-federation-repository-db2-as400:2022.47.1' + - Account: !If [IsRegionBAH, 084828588479, !If [IsRegionHKG, 183295418215, 292517598671]] + ImageConfig: + Command: [ "com.amazonaws.athena.connectors.db2as400.Db2As400CompositeHandler" ] + Description: "Enables Amazon Athena to communicate with DB2 on iSeries (AS400) using JDBC" + Timeout: 900 + MemorySize: 3008 + Role: !If [ NotHasLambdaRole, !GetAtt FunctionRole.Arn, !Ref LambdaRoleArn ] + VpcConfig: + SecurityGroupIds: !If [ HasSecurityGroups, !Ref SecurityGroupIds, !Ref "AWS::NoValue" ] + SubnetIds: !If [ HasSubnets, !Ref SubnetIds, !Ref "AWS::NoValue" ] + + FunctionRole: + Condition: NotHasLambdaRole + Type: AWS::IAM::Role + Properties: + ManagedPolicyArns: + - "arn:aws:iam::aws:policy/service-role/AWSLambdaVPCAccessExecutionRole" + AssumeRolePolicyDocument: + Version: 2012-10-17 + Statement: + - Effect: Allow + Principal: + Service: + - lambda.amazonaws.com + Action: + - "sts:AssumeRole" + + FunctionExecutionPolicy: + Condition: NotHasLambdaRole + Type: "AWS::IAM::Policy" + Properties: + Roles: + - !Ref FunctionRole + PolicyName: FunctionExecutionPolicy + PolicyDocument: + Version: 2012-10-17 + Statement: + - Action: + - secretsmanager:GetSecretValue + Effect: Allow + Resource: !Sub 'arn:${AWS::Partition}:secretsmanager:${AWS::Region}:${AWS::AccountId}:secret:${SecretName}*' + - Action: + - logs:CreateLogGroup + Effect: Allow + Resource: !Sub 'arn:${AWS::Partition}:logs:${AWS::Region}:${AWS::AccountId}:*' + - Action: + - logs:CreateLogStream + - logs:PutLogEvents + Effect: Allow + Resource: !Sub 'arn:${AWS::Partition}:logs:${AWS::Region}:${AWS::AccountId}:log-group:/aws/lambda/${LambdaFunctionName}:*' + - Action: + - athena:GetQueryExecution + Effect: Allow + Resource: '*' + - Action: + - ec2:CreateNetworkInterface + - ec2:DeleteNetworkInterface + - ec2:DescribeNetworkInterfaces + - ec2:DetachNetworkInterface + Effect: Allow + Resource: '*' + - Action: + - s3:GetObject + - s3:ListBucket + - s3:GetBucketLocation + - s3:GetObjectVersion + - s3:PutObject + - s3:PutObjectAcl + - s3:GetLifecycleConfiguration + - s3:PutLifecycleConfiguration + - s3:DeleteObject + Effect: Allow + Resource: + - Fn::Sub: + - arn:${AWS::Partition}:s3:::${bucketName} + - bucketName: + Ref: SpillBucket + - Fn::Sub: + - arn:${AWS::Partition}:s3:::${bucketName}/* + - bucketName: + Ref: SpillBucket + - Action: + - glue:GetConnection + Effect: Allow + Resource: + - !Sub 'arn:${AWS::Partition}:glue:${AWS::Region}:${AWS::AccountId}:connection/${GlueConnection}' + - !Sub 'arn:${AWS::Partition}:glue:${AWS::Region}:${AWS::AccountId}:catalog' + + FunctionKmsPolicy: + Condition: CreateKmsPolicy + Type: "AWS::IAM::Policy" + Properties: + PolicyName: FunctionKmsPolicy + PolicyDocument: + Version: 2012-10-17 + Statement: + - Effect: Allow + Action: + - kms:GenerateRandom + Resource: '*' + - Effect: Allow + Action: + - kms:GenerateDataKey + Resource: !Sub "arn:aws:kms:${AWS::Region}:${AWS::AccountId}:key/${KmsKeyId}" + Roles: + - !Ref FunctionRole \ No newline at end of file diff --git a/athena-db2-as400/athena-db2-as400.yaml b/athena-db2-as400/athena-db2-as400.yaml index 1b6cf39bf1..ac6dca58a5 100644 --- a/athena-db2-as400/athena-db2-as400.yaml +++ b/athena-db2-as400/athena-db2-as400.yaml @@ -78,6 +78,8 @@ Resources: ImageUri: !Sub - '${Account}.dkr.ecr.${AWS::Region}.amazonaws.com/athena-federation-repository-db2-as400:2022.47.1' - Account: !If [IsRegionBAH, 084828588479, !If [IsRegionHKG, 183295418215, 292517598671]] + ImageConfig: + Command: [ "com.amazonaws.athena.connectors.db2as400.Db2As400MuxCompositeHandler" ] Description: "Enables Amazon Athena to communicate with DB2 on iSeries (AS400) using JDBC" Timeout: !Ref LambdaTimeout MemorySize: !Ref LambdaMemory diff --git a/athena-db2-as400/src/main/java/com/amazonaws/athena/connectors/db2as400/Db2As400CompositeHandler.java b/athena-db2-as400/src/main/java/com/amazonaws/athena/connectors/db2as400/Db2As400CompositeHandler.java index e0066d295e..f0e0a8ea04 100644 --- a/athena-db2-as400/src/main/java/com/amazonaws/athena/connectors/db2as400/Db2As400CompositeHandler.java +++ b/athena-db2-as400/src/main/java/com/amazonaws/athena/connectors/db2as400/Db2As400CompositeHandler.java @@ -31,6 +31,6 @@ public class Db2As400CompositeHandler extends CompositeHandler { public Db2As400CompositeHandler() { - super(new Db2As400MetadataHandler(System.getenv()), new Db2As400RecordHandler(System.getenv())); + super(new Db2As400MetadataHandler(new Db2As400EnvironmentProperties().createEnvironment()), new Db2As400RecordHandler(new Db2As400EnvironmentProperties().createEnvironment())); } } diff --git a/athena-db2-as400/src/main/java/com/amazonaws/athena/connectors/db2as400/Db2As400EnvironmentProperties.java b/athena-db2-as400/src/main/java/com/amazonaws/athena/connectors/db2as400/Db2As400EnvironmentProperties.java new file mode 100644 index 0000000000..46dcde3b14 --- /dev/null +++ b/athena-db2-as400/src/main/java/com/amazonaws/athena/connectors/db2as400/Db2As400EnvironmentProperties.java @@ -0,0 +1,54 @@ +/*- + * #%L + * Amazon Athena Query Federation SDK + * %% + * Copyright (C) 2019 - 2024 Amazon Web Services + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ +package com.amazonaws.athena.connectors.db2as400; + +import com.amazonaws.athena.connector.lambda.connection.EnvironmentProperties; + +import java.util.HashMap; +import java.util.Map; + +import static com.amazonaws.athena.connector.lambda.connection.EnvironmentConstants.DEFAULT; +import static com.amazonaws.athena.connector.lambda.connection.EnvironmentConstants.HOST; +import static com.amazonaws.athena.connector.lambda.connection.EnvironmentConstants.JDBC_PARAMS; +import static com.amazonaws.athena.connector.lambda.connection.EnvironmentConstants.SECRET_NAME; + +public class Db2As400EnvironmentProperties extends EnvironmentProperties +{ + @Override + public Map connectionPropertiesToEnvironment(Map connectionProperties) + { + HashMap environment = new HashMap<>(); + + // now construct jdbc string + String connectionString = "db2as400://jdbc:as400://" + connectionProperties.get(HOST) + + ";" + connectionProperties.getOrDefault(JDBC_PARAMS, ""); + + if (connectionProperties.containsKey(SECRET_NAME)) { + if (connectionProperties.containsKey(JDBC_PARAMS)) { // need to add delimiter + connectionString = connectionString + ";"; + } + connectionString = connectionString + ":${" + connectionProperties.get(SECRET_NAME) + "}"; + } + + logger.debug("Constructed connection string: {}", connectionString); + environment.put(DEFAULT, connectionString); + return environment; + } +} diff --git a/athena-db2/Dockerfile b/athena-db2/Dockerfile index 0d8231fa29..26e4d3746a 100644 --- a/athena-db2/Dockerfile +++ b/athena-db2/Dockerfile @@ -5,5 +5,5 @@ COPY target/athena-db2-2022.47.1.jar ${LAMBDA_TASK_ROOT} # Unpack the jar RUN jar xf athena-db2-2022.47.1.jar -# Set the CMD to your handler (could also be done as a parameter override outside of the Dockerfile) -CMD [ "com.amazonaws.athena.connectors.db2.Db2MuxCompositeHandler" ] \ No newline at end of file +# Command can be overwritten by providing a different command in the template directly. +# No need to specify here (already defined in .yaml file because legacy and connections use different) diff --git a/athena-db2/athena-db2-connection.yaml b/athena-db2/athena-db2-connection.yaml new file mode 100644 index 0000000000..34ad745ff1 --- /dev/null +++ b/athena-db2/athena-db2-connection.yaml @@ -0,0 +1,174 @@ +Transform: 'AWS::Serverless-2016-10-31' +Metadata: + 'AWS::ServerlessRepo::Application': + Name: AthenaDb2ConnectorWithGlueConnection + Description: 'This connector enables Amazon Athena to communicate with DB2 using JDBC driver.' + Author: 'default author' + SpdxLicenseId: Apache-2.0 + LicenseUrl: LICENSE.txt + ReadmeUrl: README.md + Labels: + - ibm + - db2 + - athena-federation + - jdbc + HomePageUrl: 'https://github.com/awslabs/aws-athena-query-federation' + SemanticVersion: 2022.47.1 + SourceCodeUrl: 'https://github.com/awslabs/aws-athena-query-federation' +Parameters: + LambdaFunctionName: + Description: 'This is the name of the lambda function that will be created. This name must satisfy the pattern ^[a-z0-9-_]{1,64}$' + Type: String + AllowedPattern: ^[a-z0-9-_]{1,64}$ + SecretName: + Description: 'Used to create resource-based authorization policy for "secretsmanager:GetSecretValue" action. E.g. All Athena JDBC Federation secret names can be prefixed with "AthenaJdbcFederation" and authorization policy will allow "arn:${AWS::Partition}:secretsmanager:${AWS::Region}:${AWS::AccountId}:secret:AthenaJdbcFederation*". Parameter value in this case should be "AthenaJdbcFederation". If you do not have a prefix, you can manually update the IAM policy to add allow any secret names.' + Type: String + SpillBucket: + Description: 'The name of the bucket where this function can spill data.' + Type: String + GlueConnection: + Description: "Name of glue connection storing connection details for Federated Data source." + Type: String + SecurityGroupIds: + Description: '(Optional) One or more SecurityGroup IDs corresponding to the SecurityGroup that should be applied to the Lambda function. (e.g. sg1,sg2,sg3)' + Type: CommaDelimitedList + Default: "" + SubnetIds: + Description: '(Optional) One or more Subnet IDs corresponding to the Subnet that the Lambda function can use to access you data source. (e.g. subnet1,subnet2)' + Type: CommaDelimitedList + Default: "" + KmsKeyId: + Description: "(Optional) By default any data that is spilled to S3 is encrypted using AES-GCM and a randomly generated key. Setting a KMS Key ID allows your Lambda function to use KMS for key generation for a stronger source of encryption keys." + Type: String + Default: "" + LambdaRoleArn: + Description: "(Optional) A custom role to be used by the Connector lambda" + Type: String + Default: "" + +Conditions: + HasSecurityGroups: !Not [ !Equals [ !Join ["", !Ref SecurityGroupIds], "" ] ] + HasSubnets: !Not [ !Equals [ !Join ["", !Ref SubnetIds], "" ] ] + HasKmsKeyId: !Not [ !Equals [ !Ref KmsKeyId, "" ] ] + NotHasLambdaRole: !Equals [ !Ref LambdaRoleArn, "" ] + CreateKmsPolicy: !And [ !Condition HasKmsKeyId, !Condition NotHasLambdaRole ] + IsRegionBAH: !Equals [!Ref "AWS::Region", "me-south-1"] + IsRegionHKG: !Equals [!Ref "AWS::Region", "ap-east-1"] + +Resources: + JdbcConnectorConfig: + Type: 'AWS::Serverless::Function' + Properties: + Environment: + Variables: + glue_connection: !Ref GlueConnection + FunctionName: !Ref LambdaFunctionName + PackageType: "Image" + ImageUri: !Sub + - '${Account}.dkr.ecr.${AWS::Region}.amazonaws.com/athena-federation-repository-db2:2022.47.1' + - Account: !If [IsRegionBAH, 084828588479, !If [IsRegionHKG, 183295418215, 292517598671]] + ImageConfig: + Command: [ "com.amazonaws.athena.connectors.db2.Db2CompositeHandler" ] + Description: "Enables Amazon Athena to communicate with DB2 using JDBC" + Timeout: 900 + MemorySize: 3008 + Role: !If [ NotHasLambdaRole, !GetAtt FunctionRole.Arn, !Ref LambdaRoleArn ] + VpcConfig: + SecurityGroupIds: !If [ HasSecurityGroups, !Ref SecurityGroupIds, !Ref "AWS::NoValue" ] + SubnetIds: !If [ HasSubnets, !Ref SubnetIds, !Ref "AWS::NoValue" ] + + FunctionRole: + Condition: NotHasLambdaRole + Type: AWS::IAM::Role + Properties: + ManagedPolicyArns: + - "arn:aws:iam::aws:policy/service-role/AWSLambdaVPCAccessExecutionRole" + AssumeRolePolicyDocument: + Version: 2012-10-17 + Statement: + - Effect: Allow + Principal: + Service: + - lambda.amazonaws.com + Action: + - "sts:AssumeRole" + + FunctionExecutionPolicy: + Condition: NotHasLambdaRole + Type: "AWS::IAM::Policy" + Properties: + Roles: + - !Ref FunctionRole + PolicyName: FunctionExecutionPolicy + PolicyDocument: + Version: 2012-10-17 + Statement: + - Action: + - secretsmanager:GetSecretValue + Effect: Allow + Resource: !Sub 'arn:${AWS::Partition}:secretsmanager:${AWS::Region}:${AWS::AccountId}:secret:${SecretName}*' + - Action: + - logs:CreateLogGroup + Effect: Allow + Resource: !Sub 'arn:${AWS::Partition}:logs:${AWS::Region}:${AWS::AccountId}:*' + - Action: + - logs:CreateLogStream + - logs:PutLogEvents + Effect: Allow + Resource: !Sub 'arn:${AWS::Partition}:logs:${AWS::Region}:${AWS::AccountId}:log-group:/aws/lambda/${LambdaFunctionName}:*' + - Action: + - athena:GetQueryExecution + Effect: Allow + Resource: '*' + - Action: + - ec2:CreateNetworkInterface + - ec2:DeleteNetworkInterface + - ec2:DescribeNetworkInterfaces + - ec2:DetachNetworkInterface + Effect: Allow + Resource: '*' + - Action: + - s3:GetObject + - s3:ListBucket + - s3:GetBucketLocation + - s3:GetObjectVersion + - s3:PutObject + - s3:PutObjectAcl + - s3:GetLifecycleConfiguration + - s3:PutLifecycleConfiguration + - s3:DeleteObject + Effect: Allow + Resource: + - Fn::Sub: + - "arn:${AWS::Partition}:s3:::${bucketName}" + - bucketName: + Ref: SpillBucket + - Fn::Sub: + - "arn:${AWS::Partition}:s3:::${bucketName}/*" + - bucketName: + Ref: SpillBucket + - Action: + - glue:GetConnection + Effect: Allow + Resource: + - !Sub 'arn:${AWS::Partition}:glue:${AWS::Region}:${AWS::AccountId}:connection/${GlueConnection}' + - !Sub 'arn:${AWS::Partition}:glue:${AWS::Region}:${AWS::AccountId}:catalog' + + FunctionKmsPolicy: + Condition: CreateKmsPolicy + Type: "AWS::IAM::Policy" + Properties: + PolicyName: FunctionKmsPolicy + PolicyDocument: + Version: 2012-10-17 + Statement: + - Effect: Allow + Action: + - kms:GenerateRandom + Resource: '*' + - Effect: Allow + Action: + - kms:GenerateDataKey + Resource: !Sub "arn:aws:kms:${AWS::Region}:${AWS::AccountId}:key/${KmsKeyId}" + Roles: + - !Ref FunctionRole \ No newline at end of file diff --git a/athena-db2/athena-db2.yaml b/athena-db2/athena-db2.yaml index d82d9585f4..711652357a 100644 --- a/athena-db2/athena-db2.yaml +++ b/athena-db2/athena-db2.yaml @@ -78,6 +78,8 @@ Resources: ImageUri: !Sub - '${Account}.dkr.ecr.${AWS::Region}.amazonaws.com/athena-federation-repository-db2:2022.47.1' - Account: !If [IsRegionBAH, 084828588479, !If [IsRegionHKG, 183295418215, 292517598671]] + ImageConfig: + Command: [ "com.amazonaws.athena.connectors.db2.Db2MuxCompositeHandler" ] Description: "Enables Amazon Athena to communicate with DB2 using JDBC" Timeout: !Ref LambdaTimeout MemorySize: !Ref LambdaMemory diff --git a/athena-db2/pom.xml b/athena-db2/pom.xml index e018349754..8601caf25d 100644 --- a/athena-db2/pom.xml +++ b/athena-db2/pom.xml @@ -65,7 +65,6 @@ ${mockito.version} test - diff --git a/athena-db2/src/main/java/com/amazonaws/athena/connectors/db2/Db2CompositeHandler.java b/athena-db2/src/main/java/com/amazonaws/athena/connectors/db2/Db2CompositeHandler.java index 4affa252af..0bc8c1a332 100644 --- a/athena-db2/src/main/java/com/amazonaws/athena/connectors/db2/Db2CompositeHandler.java +++ b/athena-db2/src/main/java/com/amazonaws/athena/connectors/db2/Db2CompositeHandler.java @@ -31,6 +31,6 @@ public class Db2CompositeHandler extends CompositeHandler { public Db2CompositeHandler() { - super(new Db2MetadataHandler(System.getenv()), new Db2RecordHandler(System.getenv())); + super(new Db2MetadataHandler(new Db2EnvironmentProperties().createEnvironment()), new Db2RecordHandler(new Db2EnvironmentProperties().createEnvironment())); } } diff --git a/athena-db2/src/main/java/com/amazonaws/athena/connectors/db2/Db2EnvironmentProperties.java b/athena-db2/src/main/java/com/amazonaws/athena/connectors/db2/Db2EnvironmentProperties.java new file mode 100644 index 0000000000..7d410d149e --- /dev/null +++ b/athena-db2/src/main/java/com/amazonaws/athena/connectors/db2/Db2EnvironmentProperties.java @@ -0,0 +1,45 @@ +/*- + * #%L + * Amazon Athena Query Federation SDK + * %% + * Copyright (C) 2019 - 2024 Amazon Web Services + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ +package com.amazonaws.athena.connectors.db2; + +import com.amazonaws.athena.connectors.jdbc.JdbcEnvironmentProperties; + +import java.util.Map; + +public class Db2EnvironmentProperties extends JdbcEnvironmentProperties +{ + @Override + protected String getConnectionStringPrefix(Map connectionProperties) + { + return "dbtwo://jdbc:db2://"; + } + + @Override + protected String getJdbcParametersSeparator() + { + return ":"; + } + + @Override + protected String getDelimiter() + { + return ";"; + } +} diff --git a/athena-docdb/Dockerfile b/athena-docdb/Dockerfile index 06e8a5c907..0c3cb8a82c 100644 --- a/athena-docdb/Dockerfile +++ b/athena-docdb/Dockerfile @@ -1,9 +1,39 @@ FROM public.ecr.aws/lambda/java:11 +# Install necessary tools +RUN yum update -y && yum install -y curl perl openssl + # Copy function code and runtime dependencies from Maven layout COPY target/athena-docdb-2022.47.1.jar ${LAMBDA_TASK_ROOT} + # Unpack the jar RUN jar xf athena-docdb-2022.47.1.jar -# Set the CMD to your handler (could also be done as a parameter override outside of the Dockerfile) +# Set up environment variables +ENV truststore=${LAMBDA_TASK_ROOT}/rds-truststore.jks +ENV storepassword=federationStorePass + +# Download and process the RDS certificate +RUN curl -sS "https://truststore.pki.rds.amazonaws.com/global/global-bundle.pem" > ${LAMBDA_TASK_ROOT}/global-bundle.pem && \ + awk 'split_after == 1 {n++;split_after=0} /-----END CERTIFICATE-----/ {split_after=1}{print > "rds-ca-" n ".pem"}' < ${LAMBDA_TASK_ROOT}/global-bundle.pem + +# Import certificates into the truststore +RUN for CERT in rds-ca-*; do \ + alias=$(openssl x509 -noout -text -in $CERT | perl -ne 'next unless /Subject:/; s/.*(CN=|CN = )//; print') && \ + echo "Importing $alias" && \ + keytool -import -file ${CERT} -alias "${alias}" -storepass ${storepassword} -keystore ${truststore} -noprompt && \ + rm $CERT; \ + done + +# Clean up +RUN rm ${LAMBDA_TASK_ROOT}/global-bundle.pem + +# Optional: List the content of the trust store (for verification) +RUN echo "Trust store content is: " && \ + keytool -list -v -keystore "$truststore" -storepass ${storepassword} | grep Alias | cut -d " " -f3- | while read alias; do \ + expiry=$(keytool -list -v -keystore "$truststore" -storepass ${storepassword} -alias "${alias}" | grep Valid | perl -ne 'if(/until: (.*?)\n/) { print "$1\n"; }'); \ + echo " Certificate ${alias} expires in '$expiry'"; \ + done + +# Set the CMD to your handler CMD [ "com.amazonaws.athena.connectors.docdb.DocDBCompositeHandler" ] \ No newline at end of file diff --git a/athena-docdb/athena-docdb-connection.yaml b/athena-docdb/athena-docdb-connection.yaml new file mode 100644 index 0000000000..1f1d6e0841 --- /dev/null +++ b/athena-docdb/athena-docdb-connection.yaml @@ -0,0 +1,164 @@ +Transform: 'AWS::Serverless-2016-10-31' +Metadata: + 'AWS::ServerlessRepo::Application': + Name: AthenaDocumentDBConnectorWithGlueConnection + Description: This connector enables Amazon Athena to communicate with your DocumentDB instance(s), making your DocumentDB data accessible via SQL. + Author: 'default author' + SpdxLicenseId: Apache-2.0 + LicenseUrl: LICENSE.txt + ReadmeUrl: README.md + Labels: + - athena-federation + HomePageUrl: 'https://github.com/awslabs/aws-athena-query-federation' + SemanticVersion: 2022.47.1 + SourceCodeUrl: 'https://github.com/awslabs/aws-athena-query-federation' +Parameters: + LambdaFunctionName: + Description: 'This is the name of the lambda function that will be created. This name must satisfy the pattern ^[a-z0-9-_]{1,64}$' + Type: String + AllowedPattern: ^[a-z0-9-_]{1,64}$ + SpillBucket: + Description: 'The name of the bucket where this function can spill data.' + Type: String + GlueConnection: + Description: "Name of glue connection storing connection details for Federated Data source." + Type: String + SecurityGroupIds: + Description: 'One or more SecurityGroup IDs corresponding to the SecurityGroup that should be applied to the Lambda function. (e.g. sg1,sg2,sg3)' + Type: 'List' + SubnetIds: + Description: 'One or more Subnet IDs corresponding to the Subnet that the Lambda function can use to access you data source. (e.g. subnet1,subnet2)' + Type: 'List' + SecretName: + Description: 'The name or prefix of a set of names within Secrets Manager that this function should have access to. (e.g. hbase-*).' + Type: String + KmsKeyId: + Description: "(Optional) By default any data that is spilled to S3 is encrypted using AES-GCM and a randomly generated key. Setting a KMS Key ID allows your Lambda function to use KMS for key generation for a stronger source of encryption keys." + Type: String + Default: "" + LambdaRoleArn: + Description: "(Optional) A custom role to be used by the Connector lambda" + Type: String + Default: "" + +Conditions: + HasKmsKeyId: !Not [!Equals [!Ref KmsKeyId, ""]] + NotHasLambdaRole: !Equals [!Ref LambdaRoleArn, ""] + CreateKmsPolicy: !And [!Condition HasKmsKeyId, !Condition NotHasLambdaRole] + IsRegionBAH: !Equals [!Ref "AWS::Region", "me-south-1"] + IsRegionHKG: !Equals [!Ref "AWS::Region", "ap-east-1"] + +Resources: + ConnectorConfig: + Type: 'AWS::Serverless::Function' + Properties: + Environment: + Variables: + glue_connection: !Ref GlueConnection + FunctionName: !Ref LambdaFunctionName + PackageType: "Image" + ImageUri: !Sub + - '${Account}.dkr.ecr.${AWS::Region}.amazonaws.com/athena-federation-repository-docdb:2022.47.1' + - Account: !If [IsRegionBAH, 084828588479, !If [IsRegionHKG, 183295418215, 292517598671]] + Description: "Enables Amazon Athena to communicate with DocumentDB, making your DocumentDB data accessible via SQL." + Timeout: 900 + MemorySize: 3008 + Role: !If [ NotHasLambdaRole, !GetAtt FunctionRole.Arn, !Ref LambdaRoleArn ] + VpcConfig: + SecurityGroupIds: !Ref SecurityGroupIds + SubnetIds: !Ref SubnetIds + + FunctionRole: + Condition: NotHasLambdaRole + Type: AWS::IAM::Role + Properties: + ManagedPolicyArns: + - "arn:aws:iam::aws:policy/service-role/AWSLambdaVPCAccessExecutionRole" + AssumeRolePolicyDocument: + Version: 2012-10-17 + Statement: + - Effect: Allow + Principal: + Service: + - lambda.amazonaws.com + Action: + - "sts:AssumeRole" + + FunctionExecutionPolicy: + Condition: NotHasLambdaRole + Type: "AWS::IAM::Policy" + Properties: + Roles: + - !Ref FunctionRole + PolicyName: FunctionExecutionPolicy + PolicyDocument: + Version: 2012-10-17 + Statement: + - Action: + - secretsmanager:GetSecretValue + Effect: Allow + Resource: !Sub 'arn:${AWS::Partition}:secretsmanager:*:*:secret:${SecretName}*' + - Action: + - glue:GetTableVersions + - glue:GetPartitions + - glue:GetTables + - glue:GetTableVersion + - glue:GetDatabases + - glue:GetTable + - glue:GetPartition + - glue:GetDatabase + - athena:GetQueryExecution + Effect: Allow + Resource: '*' + - Action: + - ec2:CreateNetworkInterface + - ec2:DeleteNetworkInterface + - ec2:DescribeNetworkInterfaces + - ec2:DetachNetworkInterface + Effect: Allow + Resource: '*' + - Action: + - s3:GetObject + - s3:ListBucket + - s3:GetBucketLocation + - s3:GetObjectVersion + - s3:PutObject + - s3:PutObjectAcl + - s3:GetLifecycleConfiguration + - s3:PutLifecycleConfiguration + - s3:DeleteObject + Effect: Allow + Resource: + - Fn::Sub: + - arn:${AWS::Partition}:s3:::${bucketName} + - bucketName: + Ref: SpillBucket + - Fn::Sub: + - arn:${AWS::Partition}:s3:::${bucketName}/* + - bucketName: + Ref: SpillBucket + - Action: + - glue:GetConnection + Effect: Allow + Resource: + - !Sub 'arn:${AWS::Partition}:glue:${AWS::Region}:${AWS::AccountId}:connection/${GlueConnection}' + - !Sub 'arn:${AWS::Partition}:glue:${AWS::Region}:${AWS::AccountId}:catalog' + + FunctionKmsPolicy: + Condition: CreateKmsPolicy + Type: "AWS::IAM::Policy" + Properties: + PolicyName: FunctionKmsPolicy + PolicyDocument: + Version: 2012-10-17 + Statement: + - Effect: Allow + Action: + - kms:GenerateRandom + Resource: '*' + - Effect: Allow + Action: + - kms:GenerateDataKey + Resource: !Sub "arn:aws:kms:${AWS::Region}:${AWS::AccountId}:key/${KmsKeyId}" + Roles: + - !Ref FunctionRole \ No newline at end of file diff --git a/athena-docdb/src/main/java/com/amazonaws/athena/connectors/docdb/DocDBCompositeHandler.java b/athena-docdb/src/main/java/com/amazonaws/athena/connectors/docdb/DocDBCompositeHandler.java index 2810491031..e88dfdd57e 100644 --- a/athena-docdb/src/main/java/com/amazonaws/athena/connectors/docdb/DocDBCompositeHandler.java +++ b/athena-docdb/src/main/java/com/amazonaws/athena/connectors/docdb/DocDBCompositeHandler.java @@ -30,6 +30,6 @@ public class DocDBCompositeHandler { public DocDBCompositeHandler() { - super(new DocDBMetadataHandler(System.getenv()), new DocDBRecordHandler(System.getenv())); + super(new DocDBMetadataHandler(new DocDBEnvironmentProperties().createEnvironment()), new DocDBRecordHandler(new DocDBEnvironmentProperties().createEnvironment())); } } diff --git a/athena-docdb/src/main/java/com/amazonaws/athena/connectors/docdb/DocDBConnectionFactory.java b/athena-docdb/src/main/java/com/amazonaws/athena/connectors/docdb/DocDBConnectionFactory.java index 715f2a4104..3e94cd49a0 100644 --- a/athena-docdb/src/main/java/com/amazonaws/athena/connectors/docdb/DocDBConnectionFactory.java +++ b/athena-docdb/src/main/java/com/amazonaws/athena/connectors/docdb/DocDBConnectionFactory.java @@ -53,6 +53,12 @@ public synchronized MongoClient getOrCreateConn(String connStr) MongoClient result = clientCache.get(connStr); if (result == null || !connectionTest(result)) { + //Setup SSL Trust Store: + if (connStr.toLowerCase().contains("ssl=true")) { + logger.info("MongoClient is using SSL; thus setting up System properties for trust store"); + System.setProperty("javax.net.ssl.trustStore", "rds-truststore.jks"); + System.setProperty("javax.net.ssl.trustStorePassword", "federationStorePass"); + } result = MongoClients.create(connStr); clientCache.put(connStr, result); } diff --git a/athena-docdb/src/main/java/com/amazonaws/athena/connectors/docdb/DocDBEnvironmentProperties.java b/athena-docdb/src/main/java/com/amazonaws/athena/connectors/docdb/DocDBEnvironmentProperties.java new file mode 100644 index 0000000000..60e79d5792 --- /dev/null +++ b/athena-docdb/src/main/java/com/amazonaws/athena/connectors/docdb/DocDBEnvironmentProperties.java @@ -0,0 +1,49 @@ +/*- + * #%L + * Amazon Athena Query Federation SDK + * %% + * Copyright (C) 2019 - 2024 Amazon Web Services + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ +package com.amazonaws.athena.connectors.docdb; + +import com.amazonaws.athena.connector.lambda.connection.EnvironmentProperties; + +import java.util.HashMap; +import java.util.Map; + +import static com.amazonaws.athena.connector.lambda.connection.EnvironmentConstants.DEFAULT_DOCDB; +import static com.amazonaws.athena.connector.lambda.connection.EnvironmentConstants.HOST; +import static com.amazonaws.athena.connector.lambda.connection.EnvironmentConstants.JDBC_PARAMS; +import static com.amazonaws.athena.connector.lambda.connection.EnvironmentConstants.PORT; +import static com.amazonaws.athena.connector.lambda.connection.EnvironmentConstants.SECRET_NAME; + +public class DocDBEnvironmentProperties extends EnvironmentProperties +{ + @Override + public Map connectionPropertiesToEnvironment(Map connectionProperties) + { + Map environment = new HashMap<>(); + + StringBuilder connectionString = new StringBuilder("mongodb://${"); + connectionString.append(connectionProperties.get(SECRET_NAME)).append("}@"); + connectionString.append(connectionProperties.get(HOST)).append(":").append(connectionProperties.get(PORT)); + if (connectionProperties.containsKey(JDBC_PARAMS)) { + connectionString.append("/?").append(connectionProperties.get(JDBC_PARAMS)); + } + environment.put(DEFAULT_DOCDB, connectionString.toString()); + return environment; + } +} diff --git a/athena-docdb/src/main/java/com/amazonaws/athena/connectors/docdb/DocDBMetadataHandler.java b/athena-docdb/src/main/java/com/amazonaws/athena/connectors/docdb/DocDBMetadataHandler.java index 191269fbd6..5b21336bab 100644 --- a/athena-docdb/src/main/java/com/amazonaws/athena/connectors/docdb/DocDBMetadataHandler.java +++ b/athena-docdb/src/main/java/com/amazonaws/athena/connectors/docdb/DocDBMetadataHandler.java @@ -91,6 +91,8 @@ public class DocDBMetadataHandler //The Env variable name used to store the default DocDB connection string if no catalog specific //env variable is set. private static final String DEFAULT_DOCDB = "default_docdb"; + //The env secret_name to use if defined + private static final String SECRET_NAME = "secret_name"; //The Glue table property that indicates that a table matching the name of an DocDB table //is indeed enabled for use by this connector. private static final String DOCDB_METADATA_FLAG = "docdb-metadata-flag"; @@ -130,10 +132,19 @@ protected DocDBMetadataHandler( private MongoClient getOrCreateConn(MetadataRequest request) { - String endpoint = resolveSecrets(getConnStr(request)); + String connStr = getConnStr(request); + if (configOptions.containsKey(SECRET_NAME) && !hasEmbeddedSecret(connStr)) { + connStr = connStr.substring(0, 10) + "${" + configOptions.get(SECRET_NAME) + "}@" + connStr.substring(10); + } + String endpoint = resolveSecrets(connStr); return connectionFactory.getOrCreateConn(endpoint); } + private boolean hasEmbeddedSecret(String connStr) + { + return connStr.contains("${"); + } + /** * Retrieves the DocDB connection details from an env variable matching the catalog name, if no such * env variable exists we fall back to the default env variable defined by DEFAULT_DOCDB. diff --git a/athena-docdb/src/main/java/com/amazonaws/athena/connectors/docdb/DocDBRecordHandler.java b/athena-docdb/src/main/java/com/amazonaws/athena/connectors/docdb/DocDBRecordHandler.java index 4b0459f57e..d1d536ee73 100644 --- a/athena-docdb/src/main/java/com/amazonaws/athena/connectors/docdb/DocDBRecordHandler.java +++ b/athena-docdb/src/main/java/com/amazonaws/athena/connectors/docdb/DocDBRecordHandler.java @@ -65,6 +65,8 @@ public class DocDBRecordHandler //Used to denote the 'type' of this connector for diagnostic purposes. private static final String SOURCE_TYPE = "documentdb"; + //The env secret_name to use if defined + private static final String SECRET_NAME = "secret_name"; //Controls the page size for fetching batches of documents from the MongoDB client. private static final int MONGO_QUERY_BATCH_SIZE = 100; @@ -103,14 +105,22 @@ protected DocDBRecordHandler(S3Client amazonS3, SecretsManagerClient secretsMana */ private MongoClient getOrCreateConn(Split split) { - String conStr = split.getProperty(DOCDB_CONN_STR); - if (conStr == null) { + String connStr = split.getProperty(DOCDB_CONN_STR); + if (connStr == null) { throw new RuntimeException(DOCDB_CONN_STR + " Split property is null! Unable to create connection."); } - String endpoint = resolveSecrets(conStr); + if (configOptions.containsKey(SECRET_NAME) && !hasEmbeddedSecret(connStr)) { + connStr = connStr.substring(0, 10) + "${" + configOptions.get(SECRET_NAME) + "}@" + connStr.substring(10); + } + String endpoint = resolveSecrets(connStr); return connectionFactory.getOrCreateConn(endpoint); } + private boolean hasEmbeddedSecret(String connStr) + { + return connStr.contains("${"); + } + private static Map documentAsMap(Document document, boolean caseInsensitive) { logger.info("documentAsMap: caseInsensitive: {}", caseInsensitive); diff --git a/athena-docdb/src/test/java/com/amazonaws/athena/connectors/docdb/DocDBEnvironmentPropertiesTest.java b/athena-docdb/src/test/java/com/amazonaws/athena/connectors/docdb/DocDBEnvironmentPropertiesTest.java new file mode 100644 index 0000000000..5b5362bfad --- /dev/null +++ b/athena-docdb/src/test/java/com/amazonaws/athena/connectors/docdb/DocDBEnvironmentPropertiesTest.java @@ -0,0 +1,67 @@ +/*- + * #%L + * athena-mongodb + * %% + * Copyright (C) 2019 Amazon Web Services + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ +package com.amazonaws.athena.connectors.docdb; + +import org.junit.Test; + +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; + +import static com.amazonaws.athena.connector.lambda.connection.EnvironmentConstants.DEFAULT_DOCDB; +import static com.amazonaws.athena.connector.lambda.connection.EnvironmentConstants.HOST; +import static com.amazonaws.athena.connector.lambda.connection.EnvironmentConstants.JDBC_PARAMS; +import static com.amazonaws.athena.connector.lambda.connection.EnvironmentConstants.PORT; +import static com.amazonaws.athena.connector.lambda.connection.EnvironmentConstants.SECRET_NAME; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +public class DocDBEnvironmentPropertiesTest +{ + @Test + public void connectionPropertiesToEnvironmentTest() + throws IOException + { + Map connectionProperties = new HashMap<>(); + connectionProperties.put(HOST, "localhost"); + connectionProperties.put(PORT, "1234"); + connectionProperties.put(JDBC_PARAMS, "key=value&key2=value2"); + connectionProperties.put(SECRET_NAME, "secret"); + String connectionString = "mongodb://${secret}@localhost:1234/?key=value&key2=value2"; + + Map docdbConnectionProperties = new DocDBEnvironmentProperties().connectionPropertiesToEnvironment(connectionProperties); + assertTrue(docdbConnectionProperties.containsKey(DEFAULT_DOCDB)); + assertEquals(connectionString, docdbConnectionProperties.get(DEFAULT_DOCDB)); + } + + @Test + public void noJdbcParamsConnectionProperties() + { + Map connectionProperties = new HashMap<>(); + connectionProperties.put(HOST, "localhost"); + connectionProperties.put(PORT, "1234"); + connectionProperties.put(SECRET_NAME, "secret"); + String connectionString = "mongodb://${secret}@localhost:1234"; + + Map docdbConnectionProperties = new DocDBEnvironmentProperties().connectionPropertiesToEnvironment(connectionProperties); + assertTrue(docdbConnectionProperties.containsKey(DEFAULT_DOCDB)); + assertEquals(connectionString, docdbConnectionProperties.get(DEFAULT_DOCDB)); + } +} diff --git a/athena-dynamodb/athena-dynamodb-connection.yaml b/athena-dynamodb/athena-dynamodb-connection.yaml new file mode 100644 index 0000000000..600bb49697 --- /dev/null +++ b/athena-dynamodb/athena-dynamodb-connection.yaml @@ -0,0 +1,147 @@ +Transform: 'AWS::Serverless-2016-10-31' +Metadata: + 'AWS::ServerlessRepo::Application': + Name: AthenaDynamoDBConnectorWithGlueConnection + Description: 'This connector enables Amazon Athena to communicate with DynamoDB, making your tables accessible via SQL.' + Author: 'default author' + SpdxLicenseId: Apache-2.0 + LicenseUrl: LICENSE.txt + ReadmeUrl: README.md + Labels: + - athena-federation + HomePageUrl: 'https://github.com/awslabs/aws-athena-query-federation' + SemanticVersion: 2022.47.1 + SourceCodeUrl: 'https://github.com/awslabs/aws-athena-query-federation' +Parameters: + LambdaFunctionName: + Description: 'This is the name of the lambda function that will be created. This name must satisfy the pattern ^[a-z0-9-_]{1,64}$' + Type: String + AllowedPattern: ^[a-z0-9-_]{1,64}$ + SpillBucket: + Description: 'The name of the bucket where this function can spill data.' + Type: String + GlueConnection: + Description: "Name of glue connection storing connection details for Federated Data source." + Type: String + LambdaRoleArn: + Description: "(Optional) A custom role to be used by the Connector lambda" + Type: String + Default: "" + KmsKeyId: + Description: "(Optional) By default any data that is spilled to S3 is encrypted using AES-GCM and a randomly generated key. Setting a KMS Key ID allows your Lambda function to use KMS for key generation for a stronger source of encryption keys." + Type: String + Default: "" + +Conditions: + HasKmsKeyId: !Not [!Equals [!Ref KmsKeyId, ""]] + NotHasLambdaRole: !Equals [!Ref LambdaRoleArn, ""] + CreateKmsPolicy: !And [!Condition HasKmsKeyId, !Condition NotHasLambdaRole] + IsRegionBAH: !Equals [!Ref "AWS::Region", "me-south-1"] + IsRegionHKG: !Equals [!Ref "AWS::Region", "ap-east-1"] + +Resources: + ConnectorConfig: + Type: 'AWS::Serverless::Function' + Properties: + Environment: + Variables: + glue_connection: !Ref GlueConnection + FunctionName: !Ref LambdaFunctionName + PackageType: "Image" + ImageUri: !Sub + - '${Account}.dkr.ecr.${AWS::Region}.amazonaws.com/athena-federation-repository-dynamodb:2022.47.1' + - Account: !If [IsRegionBAH, 084828588479, !If [IsRegionHKG, 183295418215, 292517598671]] + Description: "Enables Amazon Athena to communicate with DynamoDB, making your tables accessible via SQL" + Timeout: 900 + MemorySize: 3008 + Role: !If [NotHasLambdaRole, !GetAtt FunctionRole.Arn, !Ref LambdaRoleArn] + + FunctionRole: + Condition: NotHasLambdaRole + Type: AWS::IAM::Role + Properties: + ManagedPolicyArns: + - "arn:aws:iam::aws:policy/service-role/AWSLambdaBasicExecutionRole" + AssumeRolePolicyDocument: + Version: 2012-10-17 + Statement: + - Effect: Allow + Principal: + Service: + - lambda.amazonaws.com + Action: + - "sts:AssumeRole" + + FunctionExecutionPolicy: + Condition: NotHasLambdaRole + Type: "AWS::IAM::Policy" + Properties: + PolicyName: FunctionExecutionPolicy + PolicyDocument: + Version: 2012-10-17 + Statement: + - Action: + - dynamodb:DescribeTable + - dynamodb:ListSchemas + - dynamodb:ListTables + - dynamodb:Query + - dynamodb:Scan + - dynamodb:PartiQLSelect + - glue:GetTableVersions + - glue:GetPartitions + - glue:GetTables + - glue:GetTableVersion + - glue:GetDatabases + - glue:GetTable + - glue:GetPartition + - glue:GetDatabase + - athena:GetQueryExecution + Effect: Allow + Resource: '*' + - Action: + - s3:GetObject + - s3:ListBucket + - s3:GetBucketLocation + - s3:GetObjectVersion + - s3:PutObject + - s3:PutObjectAcl + - s3:GetLifecycleConfiguration + - s3:PutLifecycleConfiguration + - s3:DeleteObject + Effect: Allow + Resource: + - Fn::Sub: + - arn:${AWS::Partition}:s3:::${bucketName} + - bucketName: + Ref: SpillBucket + - Fn::Sub: + - arn:${AWS::Partition}:s3:::${bucketName}/* + - bucketName: + Ref: SpillBucket + - Action: + - glue:GetConnection + Effect: Allow + Resource: + - !Sub 'arn:${AWS::Partition}:glue:${AWS::Region}:${AWS::AccountId}:connection/${GlueConnection}' + - !Sub 'arn:${AWS::Partition}:glue:${AWS::Region}:${AWS::AccountId}:catalog' + Roles: + - !Ref FunctionRole + + FunctionKmsPolicy: + Condition: CreateKmsPolicy + Type: "AWS::IAM::Policy" + Properties: + PolicyName: FunctionKmsPolicy + PolicyDocument: + Version: 2012-10-17 + Statement: + - Action: + - kms:GenerateRandom + Effect: Allow + Resource: '*' + - Action: + - kms:GenerateDataKey + Effect: Allow + Resource: !Sub "arn:aws:kms:${AWS::Region}:${AWS::AccountId}:key/${KmsKeyId}" + Roles: + - !Ref FunctionRole diff --git a/athena-dynamodb/athena-dynamodb.yaml b/athena-dynamodb/athena-dynamodb.yaml index fa766fed1f..c0b46b7dc6 100644 --- a/athena-dynamodb/athena-dynamodb.yaml +++ b/athena-dynamodb/athena-dynamodb.yaml @@ -130,42 +130,42 @@ Resources: Version: 2012-10-17 Statement: - Action: - - dynamodb:DescribeTable - - dynamodb:ListTables - - dynamodb:Query - - dynamodb:Scan - - dynamodb:PartiQLSelect - - glue:GetTableVersions - - glue:GetPartitions - - glue:GetTables - - glue:GetTableVersion - - glue:GetDatabases - - glue:GetTable - - glue:GetPartition - - glue:GetDatabase - - athena:GetQueryExecution + - dynamodb:DescribeTable + - dynamodb:ListTables + - dynamodb:Query + - dynamodb:Scan + - dynamodb:PartiQLSelect + - glue:GetTableVersions + - glue:GetPartitions + - glue:GetTables + - glue:GetTableVersion + - glue:GetDatabases + - glue:GetTable + - glue:GetPartition + - glue:GetDatabase + - athena:GetQueryExecution Effect: Allow Resource: '*' - Action: - - s3:GetObject - - s3:ListBucket - - s3:GetBucketLocation - - s3:GetObjectVersion - - s3:PutObject - - s3:PutObjectAcl - - s3:GetLifecycleConfiguration - - s3:PutLifecycleConfiguration - - s3:DeleteObject + - s3:GetObject + - s3:ListBucket + - s3:GetBucketLocation + - s3:GetObjectVersion + - s3:PutObject + - s3:PutObjectAcl + - s3:GetLifecycleConfiguration + - s3:PutLifecycleConfiguration + - s3:DeleteObject Effect: Allow Resource: - Fn::Sub: - - arn:${AWS::Partition}:s3:::${bucketName} - - bucketName: - Ref: SpillBucket + - arn:${AWS::Partition}:s3:::${bucketName} + - bucketName: + Ref: SpillBucket - Fn::Sub: - - arn:${AWS::Partition}:s3:::${bucketName}/* - - bucketName: - Ref: SpillBucket + - arn:${AWS::Partition}:s3:::${bucketName}/* + - bucketName: + Ref: SpillBucket Roles: - !Ref FunctionRole @@ -178,7 +178,7 @@ Resources: Version: 2012-10-17 Statement: - Action: - - kms:GenerateRandom + - kms:GenerateRandom Effect: Allow Resource: '*' - Action: @@ -186,4 +186,4 @@ Resources: Effect: Allow Resource: !Sub "arn:aws:kms:${AWS::Region}:${AWS::AccountId}:key/${KMSKeyId}" Roles: - - !Ref FunctionRole + - !Ref FunctionRole \ No newline at end of file diff --git a/athena-dynamodb/src/main/java/com/amazonaws/athena/connectors/dynamodb/DynamoDBCompositeHandler.java b/athena-dynamodb/src/main/java/com/amazonaws/athena/connectors/dynamodb/DynamoDBCompositeHandler.java index 5a17e74600..23a5887535 100644 --- a/athena-dynamodb/src/main/java/com/amazonaws/athena/connectors/dynamodb/DynamoDBCompositeHandler.java +++ b/athena-dynamodb/src/main/java/com/amazonaws/athena/connectors/dynamodb/DynamoDBCompositeHandler.java @@ -19,6 +19,7 @@ */ package com.amazonaws.athena.connectors.dynamodb; +import com.amazonaws.athena.connector.lambda.connection.EnvironmentProperties; import com.amazonaws.athena.connector.lambda.handlers.CompositeHandler; /** @@ -30,6 +31,6 @@ public class DynamoDBCompositeHandler { public DynamoDBCompositeHandler() { - super(new DynamoDBMetadataHandler(System.getenv()), new DynamoDBRecordHandler(System.getenv())); + super(new DynamoDBMetadataHandler(new EnvironmentProperties().createEnvironment()), new DynamoDBRecordHandler(new EnvironmentProperties().createEnvironment())); } } diff --git a/athena-dynamodb/src/main/java/com/amazonaws/athena/connectors/dynamodb/util/DDBTableUtils.java b/athena-dynamodb/src/main/java/com/amazonaws/athena/connectors/dynamodb/util/DDBTableUtils.java index 98332f78c1..c1a98945f2 100644 --- a/athena-dynamodb/src/main/java/com/amazonaws/athena/connectors/dynamodb/util/DDBTableUtils.java +++ b/athena-dynamodb/src/main/java/com/amazonaws/athena/connectors/dynamodb/util/DDBTableUtils.java @@ -21,7 +21,6 @@ import com.amazonaws.athena.connector.lambda.ThrottlingInvoker; import com.amazonaws.athena.connector.lambda.data.SchemaBuilder; -import com.amazonaws.athena.connector.lambda.exceptions.AthenaConnectorException; import com.amazonaws.athena.connectors.dynamodb.model.DynamoDBIndex; import com.amazonaws.athena.connectors.dynamodb.model.DynamoDBTable; import com.google.common.collect.ImmutableList; @@ -42,8 +41,6 @@ import software.amazon.awssdk.services.dynamodb.model.ScanRequest; import software.amazon.awssdk.services.dynamodb.model.ScanResponse; import software.amazon.awssdk.services.dynamodb.model.TableDescription; -import software.amazon.awssdk.services.glue.model.ErrorDetails; -import software.amazon.awssdk.services.glue.model.FederationSourceErrorCode; import java.util.List; import java.util.Map; @@ -170,7 +167,7 @@ public static Schema peekTableForSchema(String tableName, ThrottlingInvoker invo logger.warn("Failed to retrieve table schema due to KMS issue, empty schema for table: {}. Error Message: {}", tableName, runtimeException.getMessage()); } else { - throw new AthenaConnectorException(runtimeException.getMessage(), ErrorDetails.builder().errorCode(FederationSourceErrorCode.OPERATION_TIMEOUT_EXCEPTION.toString()).build()); + throw runtimeException; } } return schemaBuilder.build(); diff --git a/athena-dynamodb/src/main/java/com/amazonaws/athena/connectors/dynamodb/util/DDBTypeUtils.java b/athena-dynamodb/src/main/java/com/amazonaws/athena/connectors/dynamodb/util/DDBTypeUtils.java index d1abcdefaa..8412b584f5 100644 --- a/athena-dynamodb/src/main/java/com/amazonaws/athena/connectors/dynamodb/util/DDBTypeUtils.java +++ b/athena-dynamodb/src/main/java/com/amazonaws/athena/connectors/dynamodb/util/DDBTypeUtils.java @@ -126,7 +126,7 @@ else if (enhancedAttributeValue.isSetOfBytes()) { return new Field(key, FieldType.nullable(Types.MinorType.LIST.getType()), Collections.singletonList(child)); } else if (enhancedAttributeValue.isSetOfNumbers()) { - Field child = new Field(key, FieldType.nullable(Types.MinorType.DECIMAL.getType()), null); + Field child = new Field(key, FieldType.nullable(new ArrowType.Decimal(38, 9)), null); return new Field(key, FieldType.nullable(Types.MinorType.LIST.getType()), Collections.singletonList(child)); } else if (enhancedAttributeValue.isSetOfStrings()) { diff --git a/athena-elasticsearch/athena-elasticsearch-connection.yaml b/athena-elasticsearch/athena-elasticsearch-connection.yaml new file mode 100644 index 0000000000..e2c051882f --- /dev/null +++ b/athena-elasticsearch/athena-elasticsearch-connection.yaml @@ -0,0 +1,175 @@ +Transform: 'AWS::Serverless-2016-10-31' + +Metadata: + AWS::ServerlessRepo::Application: + Name: AthenaElasticsearchConnectorWithGlueConnection + Description: 'This connector enables Amazon Athena to communicate with your Elasticsearch instance(s).' + Author: 'default author' + SpdxLicenseId: Apache-2.0 + LicenseUrl: LICENSE.txt + ReadmeUrl: README.md + Labels: ['athena-federation'] + HomePageUrl: https://github.com/awslabs/aws-athena-query-federation + SemanticVersion: 2022.47.1 + SourceCodeUrl: https://github.com/awslabs/aws-athena-query-federation + +# Parameters are CloudFormation features to pass input +# to your template when you create a stack +Parameters: + LambdaFunctionName: + Description: 'This is the name of the lambda function that will be created. This name must satisfy the pattern ^[a-z0-9-_]{1,64}$' + Type: String + AllowedPattern: ^[a-z0-9-_]{1,64}$ + SecretName: + Description: 'Used to create resource-based authorization policy for "secretsmanager:GetSecretValue" action. E.g. All Athena Elasticsearch Federation secret names can be prefixed with "AthenaESFederation" and authorization policy will allow "arn:${AWS::Partition}:secretsmanager:${AWS::Region}:${AWS::AccountId}:secret:AthenaESFederation*". Parameter value in this case should be "AthenaESFederation". If you do not have a prefix, you can manually update the IAM policy to add allow any secret names.' + Type: String + Default: "" + SpillBucket: + Description: 'The name of the bucket where this function can spill data.' + Type: String + GlueConnection: + Description: "Name of glue connection storing connection details for Federated Data source." + Type: String + SecurityGroupIds: + Description: '(Optional) Provide one or more SecurityGroup IDs corresponding to the SecurityGroup that should be applied to the Lambda function. (e.g. sg1,sg2,sg3)' + Type: CommaDelimitedList + Default: "" + SubnetIds: + Description: '(Optional) Provide one or more Subnet IDs corresponding to the Subnet that the Lambda function can use to access you data source. (e.g. subnet1,subnet2)' + Type: CommaDelimitedList + Default: "" + KmsKeyId: + Description: "(Optional) By default any data that is spilled to S3 is encrypted using AES-GCM and a randomly generated key. Setting a KMS Key ID allows your Lambda function to use KMS for key generation for a stronger source of encryption keys." + Type: String + Default: "" + LambdaRoleArn: + Description: "(Optional) A custom role to be used by the Connector lambda" + Type: String + Default: "" + +Conditions: + HasSecurityGroups: !Not [ !Equals [ !Join [ "", !Ref SecurityGroupIds ], "" ] ] + HasSubnets: !Not [ !Equals [ !Join [ "", !Ref SubnetIds ], "" ] ] + HasKmsKeyId: !Not [ !Equals [ !Ref KmsKeyId, "" ] ] + NotHasLambdaRole: !Equals [ !Ref LambdaRoleArn, "" ] + CreateKmsPolicy: !And [ !Condition HasKmsKeyId, !Condition NotHasLambdaRole ] + IsRegionBAH: !Equals [!Ref "AWS::Region", "me-south-1"] + IsRegionHKG: !Equals [!Ref "AWS::Region", "ap-east-1"] + +Resources: + ConnectorConfig: + Type: 'AWS::Serverless::Function' + Properties: + Environment: + Variables: + glue_connection: !Ref GlueConnection + FunctionName: !Ref LambdaFunctionName + PackageType: "Image" + ImageUri: !Sub + - '${Account}.dkr.ecr.${AWS::Region}.amazonaws.com/athena-federation-repository-elasticsearch:2022.47.1' + - Account: !If [IsRegionBAH, 084828588479, !If [IsRegionHKG, 183295418215, 292517598671]] + Description: "The Elasticsearch Lambda Connector provides Athena users the ability to query data stored on Elasticsearch clusters." + Timeout: 900 + MemorySize: 3008 + Role: !If [ NotHasLambdaRole, !GetAtt FunctionRole.Arn, !Ref LambdaRoleArn ] + VpcConfig: + SecurityGroupIds: !If [ HasSecurityGroups, !Ref SecurityGroupIds, !Ref "AWS::NoValue" ] + SubnetIds: !If [ HasSubnets, !Ref SubnetIds, !Ref "AWS::NoValue" ] + + FunctionRole: + Condition: NotHasLambdaRole + Type: AWS::IAM::Role + Properties: + ManagedPolicyArns: + - "arn:aws:iam::aws:policy/service-role/AWSLambdaVPCAccessExecutionRole" + AssumeRolePolicyDocument: + Version: 2012-10-17 + Statement: + - Effect: Allow + Principal: + Service: + - lambda.amazonaws.com + Action: + - "sts:AssumeRole" + + FunctionExecutionPolicy: + Condition: NotHasLambdaRole + Type: "AWS::IAM::Policy" + Properties: + Roles: + - !Ref FunctionRole + PolicyName: FunctionExecutionPolicy + PolicyDocument: + Version: 2012-10-17 + Statement: + - Action: + - secretsmanager:GetSecretValue + Effect: Allow + Resource: !Sub 'arn:${AWS::Partition}:secretsmanager:${AWS::Region}:${AWS::AccountId}:secret:${SecretName}*' + - Action: + - es:List* + - es:Describe* + - es:ESHttp* + - glue:GetTableVersions + - glue:GetPartitions + - glue:GetTables + - glue:GetTableVersion + - glue:GetDatabases + - glue:GetTable + - glue:GetPartition + - glue:GetDatabase + - athena:GetQueryExecution + Effect: Allow + Resource: '*' + - Action: + - ec2:CreateNetworkInterface + - ec2:DeleteNetworkInterface + - ec2:DescribeNetworkInterfaces + - ec2:DetachNetworkInterface + Effect: Allow + Resource: '*' + - Action: + - s3:GetObject + - s3:ListBucket + - s3:GetBucketLocation + - s3:GetObjectVersion + - s3:PutObject + - s3:PutObjectAcl + - s3:GetLifecycleConfiguration + - s3:PutLifecycleConfiguration + - s3:DeleteObject + Effect: Allow + Resource: + - Fn::Sub: + - arn:${AWS::Partition}:s3:::${bucketName} + - bucketName: + Ref: SpillBucket + - Fn::Sub: + - arn:${AWS::Partition}:s3:::${bucketName}/* + - bucketName: + Ref: SpillBucket + - Action: + - glue:GetConnection + Effect: Allow + Resource: + - !Sub 'arn:${AWS::Partition}:glue:${AWS::Region}:${AWS::AccountId}:connection/${GlueConnection}' + - !Sub 'arn:${AWS::Partition}:glue:${AWS::Region}:${AWS::AccountId}:catalog' + + FunctionKmsPolicy: + Condition: CreateKmsPolicy + Type: "AWS::IAM::Policy" + Properties: + PolicyName: FunctionKmsPolicy + PolicyDocument: + Version: 2012-10-17 + Statement: + - Effect: Allow + Action: + - kms:GenerateRandom + Resource: '*' + - Effect: Allow + Action: + - kms:GenerateDataKey + Resource: !Sub "arn:aws:kms:${AWS::Region}:${AWS::AccountId}:key/${KmsKeyId}" + Roles: + - !Ref FunctionRole \ No newline at end of file diff --git a/athena-elasticsearch/athena-elasticsearch.yaml b/athena-elasticsearch/athena-elasticsearch.yaml index 7a57683d0c..f99f6800c1 100644 --- a/athena-elasticsearch/athena-elasticsearch.yaml +++ b/athena-elasticsearch/athena-elasticsearch.yaml @@ -65,8 +65,8 @@ Parameters: Type: Number IsVPCAccess: AllowedValues: - - true - - false + - true + - false Default: false Description: "If ElasticSearch cluster is in VPC select true, [true, false] (default is false)" Type: String @@ -140,15 +140,15 @@ Resources: - S3CrudPolicy: BucketName: !Ref SpillBucket VpcConfig: - SecurityGroupIds: - !If - - IsVPCAccessSelected - - - !Ref SecurityGroupIds - - !Ref "AWS::NoValue" - SubnetIds: - !If - - IsVPCAccessSelected - - - !Ref SubnetIds - - !Ref "AWS::NoValue" \ No newline at end of file + SecurityGroupIds: + !If + - IsVPCAccessSelected + - + !Ref SecurityGroupIds + - !Ref "AWS::NoValue" + SubnetIds: + !If + - IsVPCAccessSelected + - + !Ref SubnetIds + - !Ref "AWS::NoValue" \ No newline at end of file diff --git a/athena-elasticsearch/pom.xml b/athena-elasticsearch/pom.xml index 0d040efdbc..e25948dacf 100644 --- a/athena-elasticsearch/pom.xml +++ b/athena-elasticsearch/pom.xml @@ -73,7 +73,7 @@ org.elasticsearch.client elasticsearch-rest-client - 8.15.3 + 8.16.1 diff --git a/athena-elasticsearch/src/main/java/com/amazonaws/athena/connectors/elasticsearch/AwsRestHighLevelClientFactory.java b/athena-elasticsearch/src/main/java/com/amazonaws/athena/connectors/elasticsearch/AwsRestHighLevelClientFactory.java index 6286d64eda..3b1bf30f0c 100644 --- a/athena-elasticsearch/src/main/java/com/amazonaws/athena/connectors/elasticsearch/AwsRestHighLevelClientFactory.java +++ b/athena-elasticsearch/src/main/java/com/amazonaws/athena/connectors/elasticsearch/AwsRestHighLevelClientFactory.java @@ -84,6 +84,18 @@ public synchronized AwsRestHighLevelClient getOrCreateClient(String endpoint) return client; } + public synchronized AwsRestHighLevelClient getOrCreateClient(String endpoint, String username, String password) + { + AwsRestHighLevelClient client = clientCache.get(endpoint); + + if (client == null) { + client = createClient(endpoint, username, password); + clientCache.put(endpoint, client); + } + + return client; + } + /** * Creates a new Elasticsearch REST client. If useAwsCredentials = true, the client is injected with AWS * credentials. If useAwsCredentials = false and username/password are extracted using the credentialsPattern, @@ -99,12 +111,14 @@ public synchronized AwsRestHighLevelClient getOrCreateClient(String endpoint) private AwsRestHighLevelClient createClient(String endpoint) { if (useAwsCredentials) { + logger.debug("Creating Client using Aws Credentials."); return new AwsRestHighLevelClient.Builder(endpoint) .withCredentials(DefaultCredentialsProvider.create()).build(); } else { Matcher credentials = credentialsPattern.matcher(endpoint); if (credentials.find()) { + logger.debug("Creating Client using embedded Secret in Connection String."); String usernameAndPassword = credentials.group(); String username = usernameAndPassword.substring(0, usernameAndPassword.indexOf("@")); String password = usernameAndPassword.substring(usernameAndPassword.indexOf("@") + 1, @@ -115,9 +129,14 @@ private AwsRestHighLevelClient createClient(String endpoint) } } - logger.debug("Default client w/o credentials"); - + logger.debug("Creating default client w/o credentials"); // Default client w/o credentials. return new AwsRestHighLevelClient.Builder(endpoint).build(); } + + private AwsRestHighLevelClient createClient(String endpoint, String username, String password) + { + logger.debug("Creating Client using credentials provided by Glue Connectionn secret_name property"); + return new AwsRestHighLevelClient.Builder(endpoint).withCredentials(username, password).build(); + } } diff --git a/athena-elasticsearch/src/main/java/com/amazonaws/athena/connectors/elasticsearch/ElasticsearchCompositeHandler.java b/athena-elasticsearch/src/main/java/com/amazonaws/athena/connectors/elasticsearch/ElasticsearchCompositeHandler.java index 4cc082596a..3e20875455 100644 --- a/athena-elasticsearch/src/main/java/com/amazonaws/athena/connectors/elasticsearch/ElasticsearchCompositeHandler.java +++ b/athena-elasticsearch/src/main/java/com/amazonaws/athena/connectors/elasticsearch/ElasticsearchCompositeHandler.java @@ -19,6 +19,7 @@ */ package com.amazonaws.athena.connectors.elasticsearch; +import com.amazonaws.athena.connector.lambda.connection.EnvironmentProperties; import com.amazonaws.athena.connector.lambda.handlers.CompositeHandler; /** @@ -30,6 +31,6 @@ public class ElasticsearchCompositeHandler { public ElasticsearchCompositeHandler() { - super(new ElasticsearchMetadataHandler(System.getenv()), new ElasticsearchRecordHandler(System.getenv())); + super(new ElasticsearchMetadataHandler(new EnvironmentProperties().createEnvironment()), new ElasticsearchRecordHandler(new EnvironmentProperties().createEnvironment())); } } diff --git a/athena-elasticsearch/src/main/java/com/amazonaws/athena/connectors/elasticsearch/ElasticsearchCredential.java b/athena-elasticsearch/src/main/java/com/amazonaws/athena/connectors/elasticsearch/ElasticsearchCredential.java new file mode 100644 index 0000000000..92354c5c6e --- /dev/null +++ b/athena-elasticsearch/src/main/java/com/amazonaws/athena/connectors/elasticsearch/ElasticsearchCredential.java @@ -0,0 +1,73 @@ +/*- + * #%L + * athena-elasticsearch + * %% + * Copyright (C) 2019 Amazon Web Services + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ +package com.amazonaws.athena.connectors.elasticsearch; + +import org.apache.commons.lang3.Validate; + +import java.util.Objects; + +/** + * Encapsulates database connection user name and password information. + */ +public class ElasticsearchCredential +{ + private final String user; + private final String password; + + /** + * @param user Database user name. + * @param password Database password. + */ + public ElasticsearchCredential(String user, String password) + { + this.user = Validate.notBlank(user, "User must not be blank"); + this.password = Validate.notBlank(password, "Password must not be blank"); + } + + public String getUser() + { + return user; + } + + public String getPassword() + { + return password; + } + + @Override + public boolean equals(Object o) + { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + ElasticsearchCredential that = (ElasticsearchCredential) o; + return Objects.equals(getUser(), that.getUser()) && + Objects.equals(getPassword(), that.getPassword()); + } + + @Override + public int hashCode() + { + return Objects.hash(getUser(), getPassword()); + } +} diff --git a/athena-elasticsearch/src/main/java/com/amazonaws/athena/connectors/elasticsearch/ElasticsearchCredentialProvider.java b/athena-elasticsearch/src/main/java/com/amazonaws/athena/connectors/elasticsearch/ElasticsearchCredentialProvider.java new file mode 100644 index 0000000000..9e8f9768da --- /dev/null +++ b/athena-elasticsearch/src/main/java/com/amazonaws/athena/connectors/elasticsearch/ElasticsearchCredentialProvider.java @@ -0,0 +1,68 @@ +/*- + * #%L + * athena-elasticsearch + * %% + * Copyright (C) 2019 Amazon Web Services + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ +package com.amazonaws.athena.connectors.elasticsearch; + +import com.fasterxml.jackson.databind.ObjectMapper; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; + +/** + * Encapsulates Elasticsearch secrets deserialization, stored in following JSON format (showing minimal required for extraction): + * + * { + * "username": "${user}", + * "password": "${password}" + * } + * + */ +public class ElasticsearchCredentialProvider +{ + private static final Logger LOGGER = LoggerFactory.getLogger(ElasticsearchCredentialProvider.class); + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + + private final ElasticsearchCredential elasticsearchCredential; + + public ElasticsearchCredentialProvider(final String secretString) + { + Map elasticsearchSecrets; + try { + Map originalMap = OBJECT_MAPPER.readValue(secretString, HashMap.class); + + elasticsearchSecrets = new HashMap<>(); + for (Map.Entry entry : originalMap.entrySet()) { + elasticsearchSecrets.put(entry.getKey().toLowerCase(), entry.getValue()); + } + } + catch (IOException ioException) { + throw new RuntimeException("Could not deserialize Elasticsearch credentials into HashMap", ioException); + } + + this.elasticsearchCredential = new ElasticsearchCredential(elasticsearchSecrets.get("username"), elasticsearchSecrets.get("password")); + } + + public ElasticsearchCredential getCredential() + { + return this.elasticsearchCredential; + } +} diff --git a/athena-elasticsearch/src/main/java/com/amazonaws/athena/connectors/elasticsearch/ElasticsearchMetadataHandler.java b/athena-elasticsearch/src/main/java/com/amazonaws/athena/connectors/elasticsearch/ElasticsearchMetadataHandler.java index 836f0635b4..d4f2a0e531 100644 --- a/athena-elasticsearch/src/main/java/com/amazonaws/athena/connectors/elasticsearch/ElasticsearchMetadataHandler.java +++ b/athena-elasticsearch/src/main/java/com/amazonaws/athena/connectors/elasticsearch/ElasticsearchMetadataHandler.java @@ -45,6 +45,7 @@ import org.apache.arrow.util.VisibleForTesting; import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.Schema; +import org.apache.commons.lang3.StringUtils; import org.elasticsearch.client.RequestOptions; import org.elasticsearch.client.indices.GetDataStreamRequest; import org.elasticsearch.client.indices.GetIndexRequest; @@ -58,6 +59,7 @@ import java.io.IOException; import java.util.Arrays; import java.util.Collections; +import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Set; @@ -89,8 +91,19 @@ public class ElasticsearchMetadataHandler // this environment variable is fed into the domainSplitter to populate the domainMap where the key = domain-name, // and the value = endpoint. private static final String DOMAIN_MAPPING = "domain_mapping"; + + // Individual domain endpoint which is associated with a Glue Connection + private static final String DOMAIN_ENDPOINT = "domain_endpoint"; + // Secret Name that provides credentials + private static final String SECRET_NAME = "secret_name"; + + // credential keys of secret + protected static final String SECRET_USERNAME = "username"; + protected static final String SECRET_PASSWORD = "password"; + // A Map of the domain-names and their respective endpoints. private Map domainMap; + private Map secretMap; // Env. variable that holds the query timeout period for the Cluster-Health queries. private static final String QUERY_TIMEOUT_CLUSTER = "query_timeout_cluster"; @@ -120,12 +133,13 @@ public ElasticsearchMetadataHandler(Map configOptions) { super(SOURCE_TYPE, configOptions); this.awsGlue = getAwsGlue(); + this.secretMap = new HashMap<>(); this.autoDiscoverEndpoint = configOptions.getOrDefault(AUTO_DISCOVER_ENDPOINT, "").equalsIgnoreCase("true"); this.domainMapProvider = new ElasticsearchDomainMapProvider(this.autoDiscoverEndpoint); - this.domainMap = domainMapProvider.getDomainMap(resolveSecrets(configOptions.getOrDefault(DOMAIN_MAPPING, ""))); + this.domainMap = resolveDomainMap(configOptions); this.clientFactory = new AwsRestHighLevelClientFactory(this.autoDiscoverEndpoint); this.glueTypeMapper = new ElasticsearchGlueTypeMapper(); - this.queryTimeout = Long.parseLong(configOptions.getOrDefault(QUERY_TIMEOUT_CLUSTER, "")); + this.queryTimeout = Long.parseLong(configOptions.getOrDefault(QUERY_TIMEOUT_CLUSTER, "10")); } @VisibleForTesting @@ -143,6 +157,7 @@ protected ElasticsearchMetadataHandler( { super(awsGlue, keyFactory, awsSecretsManager, athena, SOURCE_TYPE, spillBucket, spillPrefix, configOptions); this.awsGlue = awsGlue; + this.secretMap = new HashMap<>(); this.domainMapProvider = domainMapProvider; this.domainMap = this.domainMapProvider.getDomainMap(null); this.clientFactory = clientFactory; @@ -150,6 +165,25 @@ protected ElasticsearchMetadataHandler( this.queryTimeout = queryTimeout; } + protected Map resolveDomainMap(Map config) + { + String secretName = config.getOrDefault(SECRET_NAME, ""); + String domainEndpoint = config.getOrDefault(DOMAIN_ENDPOINT, ""); + if (StringUtils.isNotBlank(secretName) && StringUtils.isNotBlank(domainEndpoint)) { + logger.info("Using Secrets Manager provided by Glue Connection secret_name."); + this.secretMap.put(domainEndpoint.split("=")[0], new ElasticsearchCredentialProvider(getSecret(secretName))); + } + else { + logger.info("No secret_name provided as Config property."); + if (StringUtils.isBlank(domainEndpoint)) { + domainEndpoint = config.getOrDefault(DOMAIN_MAPPING, ""); + } + domainEndpoint = resolveSecrets(domainEndpoint); + } + + return domainMapProvider.getDomainMap(domainEndpoint); + } + /** * Used to get the list of domains (aka databases) for the Elasticsearch service. * @param allocator Tool for creating and managing Apache Arrow Blocks. @@ -186,7 +220,11 @@ public ListTablesResponse doListTables(BlockAllocator allocator, ListTablesReque logger.debug("doListTables: enter - " + request); String endpoint = getDomainEndpoint(request.getSchemaName()); - AwsRestHighLevelClient client = clientFactory.getOrCreateClient(endpoint); + String domain = request.getSchemaName(); + ElasticsearchCredentialProvider creds = secretMap.get(domain); + String username = creds != null ? creds.getCredential().getUser() : ""; + String password = creds != null ? creds.getCredential().getPassword() : ""; + AwsRestHighLevelClient client = creds != null ? clientFactory.getOrCreateClient(endpoint, username, password) : clientFactory.getOrCreateClient(endpoint); // get regular indices from ES, ignore all system indices starting with period `.` (e.g. .kibana, .tasks, etc...) Stream indicesStream = client.getAliases() .stream() @@ -245,8 +283,9 @@ public GetTableResponse doGetTable(BlockAllocator allocator, GetTableRequest req // Supplement GLUE catalog if not present. if (schema == null) { String index = request.getTableName().getTableName(); - String endpoint = getDomainEndpoint(request.getTableName().getSchemaName()); - schema = getSchema(index, endpoint); + String domain = request.getTableName().getSchemaName(); + String endpoint = getDomainEndpoint(domain); + schema = getSchema(index, endpoint, domain); } return new GetTableResponse(request.getCatalogName(), request.getTableName(), @@ -293,9 +332,12 @@ public GetSplitsResponse doGetSplits(BlockAllocator allocator, GetSplitsRequest domain = request.getTableName().getSchemaName(); indx = request.getTableName().getTableName(); } - String endpoint = getDomainEndpoint(domain); - AwsRestHighLevelClient client = clientFactory.getOrCreateClient(endpoint); + + ElasticsearchCredentialProvider creds = secretMap.get(domain); + String username = creds != null ? creds.getCredential().getUser() : ""; + String password = creds != null ? creds.getCredential().getPassword() : ""; + AwsRestHighLevelClient client = creds != null ? clientFactory.getOrCreateClient(endpoint, username, password) : clientFactory.getOrCreateClient(endpoint); // We send index request in case the table name is a data stream, a data stream can contains multiple indices which are created by ES // For non data stream, index name is same as table name GetIndexResponse indexResponse = client.indices().get(new GetIndexRequest(indx), RequestOptions.DEFAULT); @@ -303,7 +345,7 @@ public GetSplitsResponse doGetSplits(BlockAllocator allocator, GetSplitsRequest Set splits = Arrays.stream(indexResponse.getIndices()) .flatMap(index -> getShardsIDsFromES(client, index) // get all shards for an index. .stream() - .map(shardId -> new Split(makeSpillLocation(request), makeEncryptionKey(), ImmutableMap.of(domain, endpoint, SHARD_KEY, SHARD_VALUE + shardId.toString(), INDEX_KEY, index))) // make split for each (index + shardId) combination + .map(shardId -> new Split(makeSpillLocation(request), makeEncryptionKey(), ImmutableMap.of(SECRET_USERNAME, username, SECRET_PASSWORD, password, domain, endpoint, SHARD_KEY, SHARD_VALUE + shardId.toString(), INDEX_KEY, index))) // make split for each (index + shardId) combination ) .collect(Collectors.toSet()); @@ -328,17 +370,21 @@ public GetTableResponse doGetQueryPassthroughSchema(BlockAllocator allocator, Ge } queryPassthrough.verify(request.getQueryPassthroughArguments()); String index = request.getQueryPassthroughArguments().get(ElasticsearchQueryPassthrough.INDEX); - String endpoint = getDomainEndpoint(request.getQueryPassthroughArguments().get(ElasticsearchQueryPassthrough.SCHEMA)); - Schema schema = getSchema(index, endpoint); + String domain = request.getQueryPassthroughArguments().get(ElasticsearchQueryPassthrough.SCHEMA); + String endpoint = getDomainEndpoint(domain); + Schema schema = getSchema(index, endpoint, domain); return new GetTableResponse(request.getCatalogName(), request.getTableName(), (schema == null) ? SchemaBuilder.newBuilder().build() : schema, Collections.emptySet()); } - private Schema getSchema(String index, String endpoint) + private Schema getSchema(String index, String endpoint, String domain) { Schema schema; - AwsRestHighLevelClient client = clientFactory.getOrCreateClient(endpoint); + ElasticsearchCredentialProvider creds = secretMap.get(domain); + String username = creds != null ? creds.getCredential().getUser() : ""; + String password = creds != null ? creds.getCredential().getPassword() : ""; + AwsRestHighLevelClient client = creds != null ? clientFactory.getOrCreateClient(endpoint, username, password) : clientFactory.getOrCreateClient(endpoint); try { Map mappings = client.getMapping(index); schema = ElasticsearchSchemaUtils.parseMapping(mappings); diff --git a/athena-elasticsearch/src/main/java/com/amazonaws/athena/connectors/elasticsearch/ElasticsearchRecordHandler.java b/athena-elasticsearch/src/main/java/com/amazonaws/athena/connectors/elasticsearch/ElasticsearchRecordHandler.java index 1d90956ad1..2bc3ad6873 100644 --- a/athena-elasticsearch/src/main/java/com/amazonaws/athena/connectors/elasticsearch/ElasticsearchRecordHandler.java +++ b/athena-elasticsearch/src/main/java/com/amazonaws/athena/connectors/elasticsearch/ElasticsearchRecordHandler.java @@ -29,6 +29,7 @@ import com.amazonaws.athena.connectors.elasticsearch.qpt.ElasticsearchQueryPassthrough; import org.apache.arrow.util.VisibleForTesting; import org.apache.arrow.vector.types.pojo.Field; +import org.apache.commons.lang3.StringUtils; import org.elasticsearch.action.search.ClearScrollRequest; import org.elasticsearch.action.search.SearchRequest; import org.elasticsearch.action.search.SearchResponse; @@ -93,7 +94,7 @@ public ElasticsearchRecordHandler(Map configOptions) this.typeUtils = new ElasticsearchTypeUtils(); this.clientFactory = new AwsRestHighLevelClientFactory(configOptions.getOrDefault(AUTO_DISCOVER_ENDPOINT, "").equalsIgnoreCase("true")); - this.queryTimeout = Long.parseLong(configOptions.getOrDefault(QUERY_TIMEOUT_SEARCH, "")); + this.queryTimeout = Long.parseLong(configOptions.getOrDefault(QUERY_TIMEOUT_SEARCH, "720")); this.scrollTimeout = Long.parseLong(configOptions.getOrDefault(SCROLL_TIMEOUT, "60")); } @@ -153,13 +154,16 @@ protected void readWithConstraint(BlockSpiller spiller, ReadRecordsRequest recor String endpoint = recordsRequest.getSplit().getProperty(domain); String shard = recordsRequest.getSplit().getProperty(ElasticsearchMetadataHandler.SHARD_KEY); + String username = recordsRequest.getSplit().getProperty(ElasticsearchMetadataHandler.SECRET_USERNAME); + String password = recordsRequest.getSplit().getProperty(ElasticsearchMetadataHandler.SECRET_PASSWORD); + boolean useSecret = StringUtils.isNotBlank(username) && StringUtils.isNotBlank(password); logger.info("readWithConstraint - enter - Domain: {}, Index: {}, Mapping: {}, Query: {}", domain, index, recordsRequest.getSchema(), query); long numRows = 0; if (queryStatusChecker.isQueryRunning()) { - AwsRestHighLevelClient client = clientFactory.getOrCreateClient(endpoint); + AwsRestHighLevelClient client = useSecret ? clientFactory.getOrCreateClient(endpoint, username, password) : clientFactory.getOrCreateClient(endpoint); try { // Create field extractors for all data types in the schema. GeneratedRowWriter rowWriter = createFieldExtractors(recordsRequest); diff --git a/athena-federation-sdk/pom.xml b/athena-federation-sdk/pom.xml index 75269ede6b..9d1d5af124 100644 --- a/athena-federation-sdk/pom.xml +++ b/athena-federation-sdk/pom.xml @@ -397,7 +397,7 @@ org.codehaus.mojo license-maven-plugin - 2.4.0 + 2.5.0 false false diff --git a/athena-federation-sdk/src/main/java/com/amazonaws/athena/connector/lambda/connection/EnvironmentConstants.java b/athena-federation-sdk/src/main/java/com/amazonaws/athena/connector/lambda/connection/EnvironmentConstants.java new file mode 100644 index 0000000000..2b16ad7e61 --- /dev/null +++ b/athena-federation-sdk/src/main/java/com/amazonaws/athena/connector/lambda/connection/EnvironmentConstants.java @@ -0,0 +1,55 @@ +/*- + * #%L + * Amazon Athena Query Federation SDK + * %% + * Copyright (C) 2019 - 2024 Amazon Web Services + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ +package com.amazonaws.athena.connector.lambda.connection; + +public final class EnvironmentConstants +{ + private EnvironmentConstants() {} + + public static final int CONNECT_TIMEOUT = 2000; + + // Lambda environment variable keys + public static final String DEFAULT_GLUE_CONNECTION = "glue_connection"; + public static final String SECRET_NAME = "secret_name"; + public static final String SPILL_KMS_KEY_ID = "spill_kms_key_id"; + public static final String KMS_KEY_ID = "kms_key_id"; + public static final String DEFAULT = "default"; + public static final String DEFAULT_DOCDB = "default_docdb"; + public static final String DEFAULT_HBASE = "default_hbase"; + + // glue connection property names + public static final String HOST = "HOST"; + public static final String PORT = "PORT"; + public static final String JDBC_PARAMS = "JDBC_PARAMS"; + public static final String DATABASE = "DATABASE"; + public static final String SESSION_CONFS = "SESSION_CONFS"; + public static final String HIVE_CONFS = "HIVE_CONFS"; + public static final String HIVE_VARS = "HIVE_VARS"; + public static final String WAREHOUSE = "WAREHOUSE"; + public static final String SCHEMA = "SCHEMA"; + public static final String PROJECT_ID = "PROJECT_ID"; + public static final String CLUSTER_RES_ID = "CLUSTER_RESOURCE_ID"; + public static final String GRAPH_TYPE = "GRAPH_TYPE"; + public static final String HBASE_PORT = "HBASE_PORT"; + public static final String ZOOKEEPER_PORT = "ZOOKEEPER_PORT"; + public static final String CUSTOM_AUTH_TYPE = "CUSTOM_AUTH_TYPE"; + public static final String GLUE_CERTIFICATES_S3_REFERENCE = "CERTIFICATE_S3_REFERENCE"; + public static final String ENFORCE_SSL = "ENFORCE_SSL"; +} diff --git a/athena-federation-sdk/src/main/java/com/amazonaws/athena/connector/lambda/connection/EnvironmentProperties.java b/athena-federation-sdk/src/main/java/com/amazonaws/athena/connector/lambda/connection/EnvironmentProperties.java new file mode 100644 index 0000000000..a94bde43bb --- /dev/null +++ b/athena-federation-sdk/src/main/java/com/amazonaws/athena/connector/lambda/connection/EnvironmentProperties.java @@ -0,0 +1,135 @@ +/*- + * #%L + * Amazon Athena Query Federation SDK + * %% + * Copyright (C) 2019 - 2024 Amazon Web Services + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ +package com.amazonaws.athena.connector.lambda.connection; + +import org.apache.commons.lang3.StringUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import software.amazon.awssdk.http.apache.ApacheHttpClient; +import software.amazon.awssdk.services.glue.GlueClient; +import software.amazon.awssdk.services.glue.model.AuthenticationConfiguration; +import software.amazon.awssdk.services.glue.model.Connection; +import software.amazon.awssdk.services.glue.model.GetConnectionRequest; +import software.amazon.awssdk.services.glue.model.GetConnectionResponse; + +import java.net.URI; +import java.time.Duration; +import java.util.Arrays; +import java.util.HashMap; +import java.util.Map; + +import static com.amazonaws.athena.connector.lambda.connection.EnvironmentConstants.CONNECT_TIMEOUT; +import static com.amazonaws.athena.connector.lambda.connection.EnvironmentConstants.DEFAULT_GLUE_CONNECTION; +import static com.amazonaws.athena.connector.lambda.connection.EnvironmentConstants.KMS_KEY_ID; +import static com.amazonaws.athena.connector.lambda.connection.EnvironmentConstants.SECRET_NAME; +import static com.amazonaws.athena.connector.lambda.connection.EnvironmentConstants.SPILL_KMS_KEY_ID; + +public class EnvironmentProperties +{ + protected static final Logger logger = LoggerFactory.getLogger(EnvironmentProperties.class); + + public Map createEnvironment() throws RuntimeException + { + HashMap lambdaEnvironment = new HashMap<>(System.getenv()); + String glueConnectionName = lambdaEnvironment.get(DEFAULT_GLUE_CONNECTION); + + HashMap connectionEnvironment = new HashMap<>(); + if (StringUtils.isNotBlank(glueConnectionName)) { + connectionEnvironment.put(DEFAULT_GLUE_CONNECTION, glueConnectionName); + Connection connection = getGlueConnection(glueConnectionName); + Map connectionPropertiesWithSecret = new HashMap<>(connection.connectionPropertiesAsStrings()); + connectionPropertiesWithSecret.putAll(authenticationConfigurationToMap(connection.authenticationConfiguration())); + + connectionEnvironment.putAll(connectionPropertiesToEnvironment(connectionPropertiesWithSecret)); + connectionEnvironment.putAll(athenaPropertiesToEnvironment(connection.athenaProperties())); + } + + connectionEnvironment.putAll(lambdaEnvironment); // Overwrite connection environment variables with lambda environment variables + return connectionEnvironment; + } + + public Connection getGlueConnection(String glueConnectionName) throws RuntimeException + { + try { + HashMap lambdaEnvironment = new HashMap<>(System.getenv()); + GlueClient awsGlue = GlueClient.builder() + .httpClientBuilder(ApacheHttpClient + .builder() + .connectionTimeout(Duration.ofMillis(CONNECT_TIMEOUT))) + .build(); + if (lambdaEnvironment.getOrDefault("USE_GAMMA_GLUE", "false").equals("true")) { + awsGlue = GlueClient.builder() + .endpointOverride(new URI(String.format("https://glue-gamma.%s.amazonaws.com", lambdaEnvironment.get("AWS_REGION")))) + .httpClientBuilder(ApacheHttpClient + .builder() + .connectionTimeout(Duration.ofMillis(CONNECT_TIMEOUT))) + .build(); + } + GetConnectionResponse glueConnection = awsGlue.getConnection(GetConnectionRequest.builder().name(glueConnectionName).build()); + logger.debug("Successfully retrieved connection {}", glueConnectionName); + return glueConnection.connection(); + } + catch (Exception err) { + logger.error("Failed to retrieve connection: {}, and parse the connection properties!", glueConnectionName); + throw new RuntimeException(err.toString()); + } + } + + private Map authenticationConfigurationToMap(AuthenticationConfiguration auth) + { + Map authMap = new HashMap<>(); + + if (auth != null && StringUtils.isNotBlank(auth.secretArn())) { + String[] splitArn = auth.secretArn().split(":"); + String[] secretNameWithRandom = splitArn[splitArn.length - 1].split("-"); // 6 random characters at end. at least length of 2 + String[] secretNameArray = Arrays.copyOfRange(secretNameWithRandom, 0, secretNameWithRandom.length - 1); + String secretName = String.join("-", secretNameArray); // add back the dashes + authMap.put(SECRET_NAME, secretName); + } + return authMap; + } + + /** + * Maps glue athena properties to environment properties like 'kms_key_id' + * + * @param athenaProperties contains athena specific properties + * */ + public Map athenaPropertiesToEnvironment(Map athenaProperties) + { + Map athenaPropertiesModified = new HashMap<>(athenaProperties); + if (athenaPropertiesModified.containsKey(SPILL_KMS_KEY_ID)) { + String kmsKeyId = athenaPropertiesModified.remove(SPILL_KMS_KEY_ID); + athenaPropertiesModified.put(KMS_KEY_ID, kmsKeyId); + } + return athenaPropertiesModified; + } + + /** + * Maps glue connection properties and authentication configuration + * to Athena federation environment properties like 'default' and 'secret_manager_gcp_creds_name' + * Default behavior is to not map to Athena federation environment variables + * + * @param connectionProperties contains secret_name and connection properties + */ + public Map connectionPropertiesToEnvironment(Map connectionProperties) + { + return connectionProperties; + } +} diff --git a/athena-federation-sdk/src/main/java/com/amazonaws/athena/connector/lambda/handlers/CompositeHandler.java b/athena-federation-sdk/src/main/java/com/amazonaws/athena/connector/lambda/handlers/CompositeHandler.java index a3af2e5b90..188b407d23 100644 --- a/athena-federation-sdk/src/main/java/com/amazonaws/athena/connector/lambda/handlers/CompositeHandler.java +++ b/athena-federation-sdk/src/main/java/com/amazonaws/athena/connector/lambda/handlers/CompositeHandler.java @@ -106,7 +106,7 @@ public final void handleRequest(InputStream inputStream, OutputStream outputStre break; } catch (IllegalStateException e) { // if client has not upgraded to our latest, fallback to lower version - logger.warn("Client's SerDe mis-matched with connector version:, attempt with lower version: '{}'", --resolvedSerDeVersion, e); + logger.debug("Client's SerDe mis-matched with connector version:, attempt with lower version: '{}'", --resolvedSerDeVersion); } } diff --git a/athena-gcs/athena-gcs-connection.yaml b/athena-gcs/athena-gcs-connection.yaml new file mode 100644 index 0000000000..304e998c88 --- /dev/null +++ b/athena-gcs/athena-gcs-connection.yaml @@ -0,0 +1,154 @@ +Transform: 'AWS::Serverless-2016-10-31' + +Metadata: + AWS::ServerlessRepo::Application: + Name: AthenaGCSConnectorWithGlueConnection + Description: 'This connector enables Amazon Athena to communicate with Google Cloud Storage (GCS) and fetch data from Parquet file format' + Author: 'default author' + SpdxLicenseId: Apache-2.0 + LicenseUrl: LICENSE.txt + ReadmeUrl: README.md + Labels: ['athena-federation', 'GCS', 'Google-Cloud-Storage', 'parquet', 'csv'] + HomePageUrl: https://github.com/awslabs/aws-athena-query-federation + SemanticVersion: 2022.47.1 + SourceCodeUrl: https://github.com/awslabs/aws-athena-query-federation + +Parameters: + LambdaFunctionName: + Description: 'The name you will give to this catalog is a Lambda name for Athena. Athena will use this name as the function name. This name must satisfy the pattern ^[a-z0-9-_]{1,64}$' + Type: String + AllowedPattern: ^[a-z0-9-_]{1,64}$ + SpillBucket: + Description: 'The name of the bucket where this function can spill data.' + Type: String + GlueConnection: + Description: "Name of glue connection storing connection details for Federated Data source." + Type: String + SecretName: + Description: 'Secret key name in the AWS Secrets Manager.' + Type: String + KmsKeyId: + Description: "(Optional) By default any data that is spilled to S3 is encrypted using AES-GCM and a randomly generated key. Setting a KMS Key ID allows your Lambda function to use KMS for key generation for a stronger source of encryption keys." + Type: String + Default: "" + LambdaRoleArn: + Description: "(Optional) A custom role to be used by the Connector lambda" + Type: String + Default: "" + +Conditions: + HasKmsKeyId: !Not [!Equals [!Ref KmsKeyId, ""]] + NotHasLambdaRole: !Equals [!Ref LambdaRoleArn, ""] + CreateKmsPolicy: !And [!Condition HasKmsKeyId, !Condition NotHasLambdaRole] + IsRegionBAH: !Equals [!Ref "AWS::Region", "me-south-1"] + IsRegionHKG: !Equals [!Ref "AWS::Region", "ap-east-1"] + +Resources: + AthenaGCSConnector: + Type: 'AWS::Serverless::Function' + Properties: + Environment: + Variables: + glue_connection: !Ref GlueConnection + FunctionName: !Ref LambdaFunctionName + PackageType: "Image" + ImageUri: !Sub + - '${Account}.dkr.ecr.${AWS::Region}.amazonaws.com/athena-federation-repository-gcs:2022.47.1' + - Account: !If [IsRegionBAH, 084828588479, !If [IsRegionHKG, 183295418215, 292517598671]] + Description: "Amazon Athena GCS Connector" + Timeout: 900 + MemorySize: 3008 + Role: !If [NotHasLambdaRole, !GetAtt FunctionRole.Arn, !Ref LambdaRoleArn] + + FunctionRole: + Condition: NotHasLambdaRole + Type: AWS::IAM::Role + Properties: + ManagedPolicyArns: + - "arn:aws:iam::aws:policy/service-role/AWSLambdaBasicExecutionRole" + AssumeRolePolicyDocument: + Version: 2012-10-17 + Statement: + - Effect: Allow + Principal: + Service: + - lambda.amazonaws.com + Action: + - "sts:AssumeRole" + + FunctionExecutionPolicy: + Condition: NotHasLambdaRole + Type: "AWS::IAM::Policy" + Properties: + Roles: + - !Ref FunctionRole + PolicyName: FunctionExecutionPolicy + PolicyDocument: + Version: 2012-10-17 + Statement: + - Action: + - athena:GetQueryExecution + - glue:GetTableVersions + - glue:GetPartitions + - glue:GetTables + - glue:GetTableVersion + - glue:GetDatabases + - glue:GetTable + - glue:GetPartition + - glue:GetDatabase + Effect: Allow + Resource: '*' + - Action: + - s3:ListBucket + Effect: Allow + Resource: + - !Sub 'arn:${AWS::Partition}:s3:::${SpillBucket}' + - Action: + - secretsmanager:GetSecretValue + Effect: Allow + Resource: !Sub 'arn:${AWS::Partition}:secretsmanager:*:*:secret:${SecretName}*' + - Action: + - s3:GetObject + - s3:ListBucket + - s3:GetBucketLocation + - s3:GetObjectVersion + - s3:PutObject + - s3:PutObjectAcl + - s3:GetLifecycleConfiguration + - s3:PutLifecycleConfiguration + - s3:DeleteObject + Effect: Allow + Resource: + - Fn::Sub: + - arn:${AWS::Partition}:s3:::${bucketName} + - bucketName: + Ref: SpillBucket + - Fn::Sub: + - arn:${AWS::Partition}:s3:::${bucketName}/* + - bucketName: + Ref: SpillBucket + - Action: + - glue:GetConnection + Effect: Allow + Resource: + - !Sub 'arn:${AWS::Partition}:glue:${AWS::Region}:${AWS::AccountId}:connection/${GlueConnection}' + - !Sub 'arn:${AWS::Partition}:glue:${AWS::Region}:${AWS::AccountId}:catalog' + + FunctionKmsPolicy: + Condition: CreateKmsPolicy + Type: "AWS::IAM::Policy" + Properties: + PolicyName: FunctionKmsPolicy + PolicyDocument: + Version: 2012-10-17 + Statement: + - Effect: Allow + Action: + - kms:GenerateRandom + Resource: '*' + - Effect: Allow + Action: + - kms:GenerateDataKey + Resource: !Sub "arn:aws:kms:${AWS::Region}:${AWS::AccountId}:key/${KmsKeyId}" + Roles: + - !Ref FunctionRole \ No newline at end of file diff --git a/athena-gcs/athena-gcs.yaml b/athena-gcs/athena-gcs.yaml index 80ce5a6cbe..aabe3fa4e2 100644 --- a/athena-gcs/athena-gcs.yaml +++ b/athena-gcs/athena-gcs.yaml @@ -100,4 +100,4 @@ Resources: Ref: SpillBucket - S3WritePolicy: BucketName: - Ref: SpillBucket + Ref: SpillBucket \ No newline at end of file diff --git a/athena-gcs/pom.xml b/athena-gcs/pom.xml index 5a5804a3c2..214e45b1fe 100644 --- a/athena-gcs/pom.xml +++ b/athena-gcs/pom.xml @@ -75,7 +75,7 @@ com.google.cloud google-cloud-storage - 2.44.1 + 2.45.0 @@ -90,7 +90,6 @@ test - org.testng testng @@ -115,7 +114,6 @@ ${mockito.version} test - software.amazon.awscdk logs diff --git a/athena-gcs/src/main/java/com/amazonaws/athena/connectors/gcs/GcsCompositeHandler.java b/athena-gcs/src/main/java/com/amazonaws/athena/connectors/gcs/GcsCompositeHandler.java index 1d10ab5ce8..668f8e3a2d 100644 --- a/athena-gcs/src/main/java/com/amazonaws/athena/connectors/gcs/GcsCompositeHandler.java +++ b/athena-gcs/src/main/java/com/amazonaws/athena/connectors/gcs/GcsCompositeHandler.java @@ -46,9 +46,9 @@ public class GcsCompositeHandler */ public GcsCompositeHandler() throws IOException, CertificateEncodingException, NoSuchAlgorithmException, KeyStoreException { - super(new GcsMetadataHandler(allocator, System.getenv()), new GcsRecordHandler(allocator, System.getenv())); + super(new GcsMetadataHandler(allocator, new GcsEnvironmentProperties().createEnvironment()), new GcsRecordHandler(allocator, new GcsEnvironmentProperties().createEnvironment())); installCaCertificate(); - installGoogleCredentialsJsonFile(System.getenv()); + installGoogleCredentialsJsonFile(new GcsEnvironmentProperties().createEnvironment()); setupNativeEnvironmentVariables(); } } diff --git a/athena-gcs/src/main/java/com/amazonaws/athena/connectors/gcs/GcsEnvironmentProperties.java b/athena-gcs/src/main/java/com/amazonaws/athena/connectors/gcs/GcsEnvironmentProperties.java new file mode 100644 index 0000000000..0308591318 --- /dev/null +++ b/athena-gcs/src/main/java/com/amazonaws/athena/connectors/gcs/GcsEnvironmentProperties.java @@ -0,0 +1,40 @@ +/*- + * #%L + * athena-gcs + * %% + * Copyright (C) 2019 - 2024 Amazon Web Services + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ +package com.amazonaws.athena.connectors.gcs; + +import com.amazonaws.athena.connector.lambda.connection.EnvironmentProperties; + +import java.util.HashMap; +import java.util.Map; + +import static com.amazonaws.athena.connector.lambda.connection.EnvironmentConstants.SECRET_NAME; +import static com.amazonaws.athena.connectors.gcs.GcsConstants.GCS_SECRET_KEY_ENV_VAR; + +public class GcsEnvironmentProperties extends EnvironmentProperties +{ + @Override + public Map connectionPropertiesToEnvironment(Map connectionProperties) + { + Map environment = new HashMap<>(); + + environment.put(GCS_SECRET_KEY_ENV_VAR, connectionProperties.get(SECRET_NAME)); + return environment; + } +} diff --git a/athena-google-bigquery/athena-google-bigquery-connection.yaml b/athena-google-bigquery/athena-google-bigquery-connection.yaml new file mode 100644 index 0000000000..3105bed256 --- /dev/null +++ b/athena-google-bigquery/athena-google-bigquery-connection.yaml @@ -0,0 +1,172 @@ +Transform: 'AWS::Serverless-2016-10-31' +Metadata: + 'AWS::ServerlessRepo::Application': + Name: AthenaGoogleBigQueryConnectorWithGlueConnection + Description: 'This connector enables Amazon Athena to communicate with Big Query using Google SDK' + Author: 'default author' + SpdxLicenseId: Apache-2.0 + LicenseUrl: LICENSE.txt + ReadmeUrl: README.md + Labels: + - Big-Query + - Athena-Federation + - Google-SDK + HomePageUrl: 'https://github.com/awslabs/aws-athena-query-federation' + SemanticVersion: 2022.47.1 + SourceCodeUrl: 'https://github.com/awslabs/aws-athena-query-federation' +Parameters: + LambdaFunctionName: + Description: 'This is the name of the lambda function that will be created. This name must satisfy the pattern ^[a-z0-9-_]{1,64}$' + Type: String + AllowedPattern: ^[a-z0-9-_]{1,64}$ + SecretName: + Description: "The secret name within AWS Secrets Manager that contains your Google Cloud Platform Credentials." + Type: String + SpillBucket: + Description: 'The name of the bucket where this function can spill data.' + Type: String + GlueConnection: + Description: "Name of glue connection storing connection details for Federated Data source." + Type: String + SecurityGroupIds: + Description: '(Optional) One or more SecurityGroup IDs corresponding to the SecurityGroup that should be applied to the Lambda function. (e.g. sg1,sg2,sg3)' + Type: CommaDelimitedList + Default: "" + SubnetIds: + Description: '(Optional) One or more Subnet IDs corresponding to the Subnet that the Lambda function can use to access you data source. (e.g. subnet1,subnet2)' + Type: CommaDelimitedList + Default: "" + KmsKeyId: + Description: "(Optional) By default any data that is spilled to S3 is encrypted using AES-GCM and a randomly generated key. Setting a KMS Key ID allows your Lambda function to use KMS for key generation for a stronger source of encryption keys." + Type: String + Default: "" + LambdaRoleArn: + Description: "(Optional) A custom role to be used by the Connector lambda" + Type: String + Default: "" + +Conditions: + HasSecurityGroups: !Not [ !Equals [ !Join ["", !Ref SecurityGroupIds], "" ] ] + HasSubnets: !Not [ !Equals [ !Join ["", !Ref SubnetIds], "" ] ] + HasKmsKeyId: !Not [ !Equals [ !Ref KmsKeyId, "" ] ] + NotHasLambdaRole: !Equals [ !Ref LambdaRoleArn, "" ] + CreateKmsPolicy: !And [ !Condition HasKmsKeyId, !Condition NotHasLambdaRole ] + IsRegionBAH: !Equals [!Ref "AWS::Region", "me-south-1"] + IsRegionHKG: !Equals [!Ref "AWS::Region", "ap-east-1"] + +Resources: + AthenaBigQueryConnector: + Type: 'AWS::Serverless::Function' + Properties: + Environment: + Variables: + glue_connection: !Ref GlueConnection + GOOGLE_APPLICATION_CREDENTIALS: '/tmp/service-account.json' + FunctionName: !Ref LambdaFunctionName + PackageType: "Image" + ImageUri: !Sub + - '${Account}.dkr.ecr.${AWS::Region}.amazonaws.com/athena-federation-repository-google-bigquery:2022.47.1' + - Account: !If [IsRegionBAH, 084828588479, !If [IsRegionHKG, 183295418215, 292517598671]] + Description: "Enables Amazon Athena to communicate with BigQuery using Google SDK" + Timeout: 900 + MemorySize: 3008 + Role: !If [ NotHasLambdaRole, !GetAtt FunctionRole.Arn, !Ref LambdaRoleArn ] + VpcConfig: + SecurityGroupIds: !If [ HasSecurityGroups, !Ref SecurityGroupIds, !Ref "AWS::NoValue" ] + SubnetIds: !If [ HasSubnets, !Ref SubnetIds, !Ref "AWS::NoValue" ] + + FunctionRole: + Condition: NotHasLambdaRole + Type: AWS::IAM::Role + Properties: + ManagedPolicyArns: + - "arn:aws:iam::aws:policy/service-role/AWSLambdaVPCAccessExecutionRole" + AssumeRolePolicyDocument: + Version: 2012-10-17 + Statement: + - Effect: Allow + Principal: + Service: + - lambda.amazonaws.com + Action: + - "sts:AssumeRole" + + FunctionExecutionPolicy: + Condition: NotHasLambdaRole + Type: "AWS::IAM::Policy" + Properties: + Roles: + - !Ref FunctionRole + PolicyName: FunctionExecutionPolicy + PolicyDocument: + Version: 2012-10-17 + Statement: + - Action: + - secretsmanager:GetSecretValue + Effect: Allow + Resource: !Sub 'arn:${AWS::Partition}:secretsmanager:${AWS::Region}:${AWS::AccountId}:secret:${SecretName}*' + - Action: + - logs:CreateLogGroup + Effect: Allow + Resource: !Sub 'arn:${AWS::Partition}:logs:${AWS::Region}:${AWS::AccountId}:*' + - Action: + - logs:CreateLogStream + - logs:PutLogEvents + Effect: Allow + Resource: !Sub 'arn:${AWS::Partition}:logs:${AWS::Region}:${AWS::AccountId}:log-group:/aws/lambda/${LambdaFunctionName}:*' + - Action: + - athena:GetQueryExecution + Effect: Allow + Resource: '*' + - Action: + - ec2:CreateNetworkInterface + - ec2:DeleteNetworkInterface + - ec2:DescribeNetworkInterfaces + - ec2:DetachNetworkInterface + Effect: Allow + Resource: '*' + - Action: + - s3:GetObject + - s3:ListBucket + - s3:GetBucketLocation + - s3:GetObjectVersion + - s3:PutObject + - s3:PutObjectAcl + - s3:GetLifecycleConfiguration + - s3:PutLifecycleConfiguration + - s3:DeleteObject + Effect: Allow + Resource: + - Fn::Sub: + - arn:${AWS::Partition}:s3:::${bucketName} + - bucketName: + Ref: SpillBucket + - Fn::Sub: + - arn:${AWS::Partition}:s3:::${bucketName}/* + - bucketName: + Ref: SpillBucket + - Action: + - glue:GetConnection + Effect: Allow + Resource: + - !Sub 'arn:${AWS::Partition}:glue:${AWS::Region}:${AWS::AccountId}:connection/${GlueConnection}' + - !Sub 'arn:${AWS::Partition}:glue:${AWS::Region}:${AWS::AccountId}:catalog' + + FunctionKmsPolicy: + Condition: CreateKmsPolicy + Type: "AWS::IAM::Policy" + Properties: + PolicyName: FunctionKmsPolicy + PolicyDocument: + Version: 2012-10-17 + Statement: + - Effect: Allow + Action: + - kms:GenerateRandom + Resource: '*' + - Effect: Allow + Action: + - kms:GenerateDataKey + Resource: !Sub "arn:aws:kms:${AWS::Region}:${AWS::AccountId}:key/${KmsKeyId}" + Roles: + - !Ref FunctionRole \ No newline at end of file diff --git a/athena-google-bigquery/athena-google-bigquery.yaml b/athena-google-bigquery/athena-google-bigquery.yaml index 42acf445f5..b92e9d01ea 100644 --- a/athena-google-bigquery/athena-google-bigquery.yaml +++ b/athena-google-bigquery/athena-google-bigquery.yaml @@ -8,7 +8,6 @@ Metadata: LicenseUrl: LICENSE.txt ReadmeUrl: README.md Labels: - - Trianz - Big-Query - Athena-Federation - Google-SDK @@ -123,4 +122,4 @@ Resources: - VPCAccessPolicy: { } VpcConfig: SecurityGroupIds: !If [ HasSecurityGroups, !Ref SecurityGroupIds, !Ref "AWS::NoValue" ] - SubnetIds: !If [ HasSubnets, !Ref SubnetIds, !Ref "AWS::NoValue" ] + SubnetIds: !If [ HasSubnets, !Ref SubnetIds, !Ref "AWS::NoValue" ] \ No newline at end of file diff --git a/athena-google-bigquery/pom.xml b/athena-google-bigquery/pom.xml index 24ae94785c..ac5ae171a8 100644 --- a/athena-google-bigquery/pom.xml +++ b/athena-google-bigquery/pom.xml @@ -66,7 +66,7 @@ nl.jqno.equalsverifier equalsverifier - 3.17.3 + 3.17.5 test diff --git a/athena-google-bigquery/src/main/java/com/amazonaws/athena/connectors/google/bigquery/BigQueryCompositeHandler.java b/athena-google-bigquery/src/main/java/com/amazonaws/athena/connectors/google/bigquery/BigQueryCompositeHandler.java index 51c8418ae0..0694c48aa2 100644 --- a/athena-google-bigquery/src/main/java/com/amazonaws/athena/connectors/google/bigquery/BigQueryCompositeHandler.java +++ b/athena-google-bigquery/src/main/java/com/amazonaws/athena/connectors/google/bigquery/BigQueryCompositeHandler.java @@ -41,8 +41,8 @@ public class BigQueryCompositeHandler public BigQueryCompositeHandler() throws IOException { - super(new BigQueryMetadataHandler(System.getenv()), new BigQueryRecordHandler(System.getenv(), allocator)); - installGoogleCredentialsJsonFile(System.getenv()); + super(new BigQueryMetadataHandler(new BigQueryEnvironmentProperties().createEnvironment()), new BigQueryRecordHandler(new BigQueryEnvironmentProperties().createEnvironment(), allocator)); + installGoogleCredentialsJsonFile(new BigQueryEnvironmentProperties().createEnvironment()); setupNativeEnvironmentVariables(); logger.info("Inside BigQueryCompositeHandler()"); } diff --git a/athena-google-bigquery/src/main/java/com/amazonaws/athena/connectors/google/bigquery/BigQueryEnvironmentProperties.java b/athena-google-bigquery/src/main/java/com/amazonaws/athena/connectors/google/bigquery/BigQueryEnvironmentProperties.java new file mode 100644 index 0000000000..0efc606a25 --- /dev/null +++ b/athena-google-bigquery/src/main/java/com/amazonaws/athena/connectors/google/bigquery/BigQueryEnvironmentProperties.java @@ -0,0 +1,45 @@ +/*- + * #%L + * Amazon Athena Query Federation SDK + * %% + * Copyright (C) 2019 - 2024 Amazon Web Services + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ +package com.amazonaws.athena.connectors.google.bigquery; + +import com.amazonaws.athena.connector.lambda.connection.EnvironmentProperties; + +import java.util.HashMap; +import java.util.Map; + +import static com.amazonaws.athena.connector.lambda.connection.EnvironmentConstants.PROJECT_ID; +import static com.amazonaws.athena.connector.lambda.connection.EnvironmentConstants.SECRET_NAME; +import static com.amazonaws.athena.connectors.google.bigquery.BigQueryConstants.ENV_BIG_QUERY_CREDS_SM_ID; +import static com.amazonaws.athena.connectors.google.bigquery.BigQueryConstants.GCP_PROJECT_ID; + +public class BigQueryEnvironmentProperties extends EnvironmentProperties +{ + @Override + public Map connectionPropertiesToEnvironment(Map connectionProperties) + { + Map environment = new HashMap<>(); + + if (connectionProperties.containsKey(PROJECT_ID)) { + environment.put(GCP_PROJECT_ID, connectionProperties.get(PROJECT_ID)); + } + environment.put(ENV_BIG_QUERY_CREDS_SM_ID, connectionProperties.get(SECRET_NAME)); + return environment; + } +} diff --git a/athena-hbase/athena-hbase-connection.yaml b/athena-hbase/athena-hbase-connection.yaml new file mode 100644 index 0000000000..e950f7e16d --- /dev/null +++ b/athena-hbase/athena-hbase-connection.yaml @@ -0,0 +1,164 @@ +Transform: 'AWS::Serverless-2016-10-31' +Metadata: + 'AWS::ServerlessRepo::Application': + Name: AthenaHBaseConnectorWithGlueConnection + Description: 'This connector enables Amazon Athena to communicate with your HBase instance(s), making your HBase data accessible via SQL.' + Author: 'default author' + SpdxLicenseId: Apache-2.0 + LicenseUrl: LICENSE.txt + ReadmeUrl: README.md + Labels: + - athena-federation + HomePageUrl: 'https://github.com/awslabs/aws-athena-query-federation' + SemanticVersion: 2022.47.1 + SourceCodeUrl: 'https://github.com/awslabs/aws-athena-query-federation' +Parameters: + LambdaFunctionName: + Description: 'This is the name of the lambda function that will be created. This name must satisfy the pattern ^[a-z0-9-_]{1,64}$' + Type: String + AllowedPattern: ^[a-z0-9-_]{1,64}$ + SpillBucket: + Description: 'The name of the bucket where this function can spill data.' + Type: String + GlueConnection: + Description: "Name of glue connection storing connection details for Federated Data source." + Type: String + SecurityGroupIds: + Description: 'One or more SecurityGroup IDs corresponding to the SecurityGroup that should be applied to the Lambda function. (e.g. sg1,sg2,sg3)' + Type: 'List' + SubnetIds: + Description: 'One or more Subnet IDs corresponding to the Subnet that the Lambda function can use to access you data source. (e.g. subnet1,subnet2)' + Type: 'List' + SecretName: + Description: 'The name or prefix of a set of names within Secrets Manager that this function should have access to. (e.g. hbase-*).' + Type: String + KmsKeyId: + Description: "(Optional) By default any data that is spilled to S3 is encrypted using AES-GCM and a randomly generated key. Setting a KMS Key ID allows your Lambda function to use KMS for key generation for a stronger source of encryption keys." + Type: String + Default: "" + LambdaRoleArn: + Description: "(Optional) A custom role to be used by the Connector lambda" + Type: String + Default: "" + +Conditions: + HasKmsKeyId: !Not [ !Equals [ !Ref KmsKeyId, "" ] ] + NotHasLambdaRole: !Equals [ !Ref LambdaRoleArn, "" ] + CreateKmsPolicy: !And [ !Condition HasKmsKeyId, !Condition NotHasLambdaRole ] + IsRegionBAH: !Equals [!Ref "AWS::Region", "me-south-1"] + IsRegionHKG: !Equals [!Ref "AWS::Region", "ap-east-1"] + +Resources: + ConnectorConfig: + Type: 'AWS::Serverless::Function' + Properties: + Environment: + Variables: + glue_connection: !Ref GlueConnection + FunctionName: !Ref LambdaFunctionName + PackageType: "Image" + ImageUri: !Sub + - '${Account}.dkr.ecr.${AWS::Region}.amazonaws.com/athena-federation-repository-hbase:2022.47.1' + - Account: !If [IsRegionBAH, 084828588479, !If [IsRegionHKG, 183295418215, 292517598671]] + Description: "Enables Amazon Athena to communicate with HBase, making your HBase data accessible via SQL" + Timeout: 900 + MemorySize: 3008 + Role: !If [ NotHasLambdaRole, !GetAtt FunctionRole.Arn, !Ref LambdaRoleArn ] + VpcConfig: + SecurityGroupIds: !Ref SecurityGroupIds + SubnetIds: !Ref SubnetIds + + FunctionRole: + Condition: NotHasLambdaRole + Type: AWS::IAM::Role + Properties: + ManagedPolicyArns: + - "arn:aws:iam::aws:policy/service-role/AWSLambdaVPCAccessExecutionRole" + AssumeRolePolicyDocument: + Version: 2012-10-17 + Statement: + - Effect: Allow + Principal: + Service: + - lambda.amazonaws.com + Action: + - "sts:AssumeRole" + + FunctionExecutionPolicy: + Condition: NotHasLambdaRole + Type: "AWS::IAM::Policy" + Properties: + Roles: + - !Ref FunctionRole + PolicyName: FunctionExecutionPolicy + PolicyDocument: + Version: 2012-10-17 + Statement: + - Action: + - secretsmanager:GetSecretValue + Effect: Allow + Resource: !Sub 'arn:${AWS::Partition}:secretsmanager:*:*:secret:${SecretName}*' + - Action: + - glue:GetTableVersions + - glue:GetPartitions + - glue:GetTables + - glue:GetTableVersion + - glue:GetDatabases + - glue:GetTable + - glue:GetPartition + - glue:GetDatabase + - athena:GetQueryExecution + Effect: Allow + Resource: '*' + - Action: + - ec2:CreateNetworkInterface + - ec2:DeleteNetworkInterface + - ec2:DescribeNetworkInterfaces + - ec2:DetachNetworkInterface + Effect: Allow + Resource: '*' + - Action: + - s3:GetObject + - s3:ListBucket + - s3:GetBucketLocation + - s3:GetObjectVersion + - s3:PutObject + - s3:PutObjectAcl + - s3:GetLifecycleConfiguration + - s3:PutLifecycleConfiguration + - s3:DeleteObject + Effect: Allow + Resource: + - Fn::Sub: + - arn:${AWS::Partition}:s3:::${bucketName} + - bucketName: + Ref: SpillBucket + - Fn::Sub: + - arn:${AWS::Partition}:s3:::${bucketName}/* + - bucketName: + Ref: SpillBucket + - Action: + - glue:GetConnection + Effect: Allow + Resource: + - !Sub 'arn:${AWS::Partition}:glue:${AWS::Region}:${AWS::AccountId}:connection/${GlueConnection}' + - !Sub 'arn:${AWS::Partition}:glue:${AWS::Region}:${AWS::AccountId}:catalog' + + FunctionKmsPolicy: + Condition: CreateKmsPolicy + Type: "AWS::IAM::Policy" + Properties: + PolicyName: FunctionKmsPolicy + PolicyDocument: + Version: 2012-10-17 + Statement: + - Effect: Allow + Action: + - kms:GenerateRandom + Resource: '*' + - Effect: Allow + Action: + - kms:GenerateDataKey + Resource: !Sub "arn:aws:kms:${AWS::Region}:${AWS::AccountId}:key/${KmsKeyId}" + Roles: + - !Ref FunctionRole \ No newline at end of file diff --git a/athena-hbase/src/main/java/com/amazonaws/athena/connectors/hbase/HbaseCompositeHandler.java b/athena-hbase/src/main/java/com/amazonaws/athena/connectors/hbase/HbaseCompositeHandler.java index b2ea994987..e02af8ecb4 100644 --- a/athena-hbase/src/main/java/com/amazonaws/athena/connectors/hbase/HbaseCompositeHandler.java +++ b/athena-hbase/src/main/java/com/amazonaws/athena/connectors/hbase/HbaseCompositeHandler.java @@ -30,6 +30,6 @@ public class HbaseCompositeHandler { public HbaseCompositeHandler() { - super(new HbaseMetadataHandler(System.getenv()), new HbaseRecordHandler(System.getenv())); + super(new HbaseMetadataHandler(new HbaseEnvironmentProperties().createEnvironment()), new HbaseRecordHandler(new HbaseEnvironmentProperties().createEnvironment())); } } diff --git a/athena-hbase/src/main/java/com/amazonaws/athena/connectors/hbase/HbaseConnectionFactory.java b/athena-hbase/src/main/java/com/amazonaws/athena/connectors/hbase/HbaseConnectionFactory.java index a61f6f078d..e96a8cef38 100644 --- a/athena-hbase/src/main/java/com/amazonaws/athena/connectors/hbase/HbaseConnectionFactory.java +++ b/athena-hbase/src/main/java/com/amazonaws/athena/connectors/hbase/HbaseConnectionFactory.java @@ -125,7 +125,7 @@ private Connection createConnection(String host, String masterPort, String zooke config.set(nextConfig.getKey(), nextConfig.getValue()); } - Map configOptions = System.getenv(); + Map configOptions = new HbaseEnvironmentProperties().createEnvironment(); boolean kerberosAuthEnabled = configOptions.get(KERBEROS_AUTH_ENABLED) != null && "true".equalsIgnoreCase(configOptions.get(KERBEROS_AUTH_ENABLED)); logger.info("Kerberos Authentication Enabled: " + kerberosAuthEnabled); if (kerberosAuthEnabled) { diff --git a/athena-hbase/src/main/java/com/amazonaws/athena/connectors/hbase/HbaseEnvironmentProperties.java b/athena-hbase/src/main/java/com/amazonaws/athena/connectors/hbase/HbaseEnvironmentProperties.java new file mode 100644 index 0000000000..cfd435ea1c --- /dev/null +++ b/athena-hbase/src/main/java/com/amazonaws/athena/connectors/hbase/HbaseEnvironmentProperties.java @@ -0,0 +1,44 @@ +/*- + * #%L + * Amazon Athena Query Federation SDK + * %% + * Copyright (C) 2019 - 2024 Amazon Web Services + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ +package com.amazonaws.athena.connectors.hbase; + +import com.amazonaws.athena.connector.lambda.connection.EnvironmentProperties; + +import java.util.HashMap; +import java.util.Map; + +import static com.amazonaws.athena.connector.lambda.connection.EnvironmentConstants.DEFAULT_HBASE; +import static com.amazonaws.athena.connector.lambda.connection.EnvironmentConstants.HBASE_PORT; +import static com.amazonaws.athena.connector.lambda.connection.EnvironmentConstants.HOST; +import static com.amazonaws.athena.connector.lambda.connection.EnvironmentConstants.ZOOKEEPER_PORT; + +public class HbaseEnvironmentProperties extends EnvironmentProperties +{ + @Override + public Map connectionPropertiesToEnvironment(Map connectionProperties) + { + Map environment = new HashMap<>(); + + environment.put(DEFAULT_HBASE, connectionProperties.get(HOST) + + ":" + connectionProperties.get(HBASE_PORT) + + ":" + connectionProperties.get(ZOOKEEPER_PORT)); + return environment; + } +} diff --git a/athena-hbase/src/main/java/com/amazonaws/athena/connectors/hbase/connection/HbaseConnectionFactory.java b/athena-hbase/src/main/java/com/amazonaws/athena/connectors/hbase/connection/HbaseConnectionFactory.java index 2d8c0a4d1c..5708e4266b 100644 --- a/athena-hbase/src/main/java/com/amazonaws/athena/connectors/hbase/connection/HbaseConnectionFactory.java +++ b/athena-hbase/src/main/java/com/amazonaws/athena/connectors/hbase/connection/HbaseConnectionFactory.java @@ -19,6 +19,7 @@ */ package com.amazonaws.athena.connectors.hbase.connection; +import com.amazonaws.athena.connectors.hbase.HbaseEnvironmentProperties; import org.apache.arrow.util.VisibleForTesting; import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.conf.Configuration; @@ -127,7 +128,7 @@ private HBaseConnection createConnection(String host, String masterPort, String config.set(nextConfig.getKey(), nextConfig.getValue()); } - Map configOptions = System.getenv(); + Map configOptions = new HbaseEnvironmentProperties().createEnvironment(); boolean kerberosAuthEnabled = configOptions.get(KERBEROS_AUTH_ENABLED) != null && "true".equalsIgnoreCase(configOptions.get(KERBEROS_AUTH_ENABLED)); logger.info("Kerberos Authentication Enabled: " + kerberosAuthEnabled); if (kerberosAuthEnabled) { diff --git a/athena-hbase/src/main/java/com/amazonaws/athena/connectors/hbase/integ/HbaseTableUtils.java b/athena-hbase/src/main/java/com/amazonaws/athena/connectors/hbase/integ/HbaseTableUtils.java index 625827de29..2745037afb 100644 --- a/athena-hbase/src/main/java/com/amazonaws/athena/connectors/hbase/integ/HbaseTableUtils.java +++ b/athena-hbase/src/main/java/com/amazonaws/athena/connectors/hbase/integ/HbaseTableUtils.java @@ -19,6 +19,7 @@ */ package com.amazonaws.athena.connectors.hbase.integ; +import com.amazonaws.athena.connectors.hbase.HbaseEnvironmentProperties; import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.HBaseConfiguration; @@ -99,7 +100,7 @@ private Configuration getHbaseConfiguration(String connectionStr) configuration.set("hbase.client.pause", "500"); configuration.set("zookeeper.recovery.retry", "2"); - java.util.Map configOptions = System.getenv(); + java.util.Map configOptions = new HbaseEnvironmentProperties().createEnvironment(); boolean kerberosAuthEnabled = configOptions.get(KERBEROS_AUTH_ENABLED) != null && "true".equalsIgnoreCase(configOptions.get(KERBEROS_AUTH_ENABLED)); logger.info("Kerberos Authentication Enabled: " + kerberosAuthEnabled); if (kerberosAuthEnabled) { diff --git a/athena-hortonworks-hive/Dockerfile b/athena-hortonworks-hive/Dockerfile index 3a68e6d997..d0c666fb70 100644 --- a/athena-hortonworks-hive/Dockerfile +++ b/athena-hortonworks-hive/Dockerfile @@ -5,5 +5,5 @@ COPY target/athena-hortonworks-hive-2022.47.1.jar ${LAMBDA_TASK_ROOT} # Unpack the jar RUN jar xf athena-hortonworks-hive-2022.47.1.jar -# Set the CMD to your handler (could also be done as a parameter override outside of the Dockerfile) -CMD [ "com.amazonaws.athena.connectors.hortonworks.HiveMuxCompositeHandler" ] \ No newline at end of file +# Command can be overwritten by providing a different command in the template directly. +# No need to specify here (already defined in .yaml file because legacy and connections use different) diff --git a/athena-hortonworks-hive/athena-hortonworks-hive-connection.yaml b/athena-hortonworks-hive/athena-hortonworks-hive-connection.yaml new file mode 100644 index 0000000000..9c92230e5a --- /dev/null +++ b/athena-hortonworks-hive/athena-hortonworks-hive-connection.yaml @@ -0,0 +1,171 @@ +Transform: 'AWS::Serverless-2016-10-31' +Metadata: + 'AWS::ServerlessRepo::Application': + Name: AthenaHortonworksHiveConnectorWithGlueConnection + Description: 'This connector enables Amazon Athena to communicate with your Hortonworks Hive instance(s) using JDBC driver.' + Author: 'default author' + SpdxLicenseId: Apache-2.0 + LicenseUrl: LICENSE.txt + ReadmeUrl: README.md + Labels: + - athena-federation + HomePageUrl: 'https://github.com/awslabs/aws-athena-query-federation' + SemanticVersion: 2022.47.1 + SourceCodeUrl: 'https://github.com/awslabs/aws-athena-query-federation' +Parameters: + LambdaFunctionName: + Description: 'This is the name of the lambda function that will be created. This name must satisfy the pattern ^[a-z0-9-_]{1,64}$' + Type: String + AllowedPattern: ^[a-z0-9-_]{1,64}$ + SecretName: + Description: 'Used to create resource-based authorization policy for "secretsmanager:GetSecretValue" action. E.g. All Athena JDBC Federation secret names can be prefixed with "AthenaJdbcFederation" and authorization policy will allow "arn:${AWS::Partition}:secretsmanager:${AWS::Region}:${AWS::AccountId}:secret:AthenaJdbcFederation*". Parameter value in this case should be "AthenaJdbcFederation". If you do not have a prefix, you can manually update the IAM policy to add allow any secret names.' + Type: String + SpillBucket: + Description: 'The name of the bucket where this function can spill data.' + Type: String + GlueConnection: + Description: "Name of glue connection storing connection details for Federated Data source." + Type: String + SecurityGroupIds: + Description: '(Optional) One or more SecurityGroup IDs corresponding to the SecurityGroup that should be applied to the Lambda function. (e.g. sg1,sg2,sg3)' + Type: CommaDelimitedList + Default: "" + SubnetIds: + Description: '(Optional) One or more Subnet IDs corresponding to the Subnet that the Lambda function can use to access you data source. (e.g. subnet1,subnet2)' + Type: CommaDelimitedList + Default: "" + KmsKeyId: + Description: "(Optional) By default any data that is spilled to S3 is encrypted using AES-GCM and a randomly generated key. Setting a KMS Key ID allows your Lambda function to use KMS for key generation for a stronger source of encryption keys." + Type: String + Default: "" + LambdaRoleArn: + Description: "(Optional) A custom role to be used by the Connector lambda" + Type: String + Default: "" + +Conditions: + HasSecurityGroups: !Not [ !Equals [ !Join ["", !Ref SecurityGroupIds], "" ] ] + HasSubnets: !Not [ !Equals [ !Join ["", !Ref SubnetIds], "" ] ] + HasKmsKeyId: !Not [ !Equals [ !Ref KmsKeyId, "" ] ] + NotHasLambdaRole: !Equals [ !Ref LambdaRoleArn, "" ] + CreateKmsPolicy: !And [ !Condition HasKmsKeyId, !Condition NotHasLambdaRole ] + IsRegionBAH: !Equals [!Ref "AWS::Region", "me-south-1"] + IsRegionHKG: !Equals [!Ref "AWS::Region", "ap-east-1"] + +Resources: + JdbcConnectorConfig: + Type: 'AWS::Serverless::Function' + Properties: + Environment: + Variables: + glue_connection: !Ref GlueConnection + FunctionName: !Ref LambdaFunctionName + PackageType: "Image" + ImageUri: !Sub + - '${Account}.dkr.ecr.${AWS::Region}.amazonaws.com/athena-federation-repository-hortonworks-hive:2022.47.1' + - Account: !If [IsRegionBAH, 084828588479, !If [IsRegionHKG, 183295418215, 292517598671]] + ImageConfig: + Command: [ "com.amazonaws.athena.connectors.hortonworks.HiveCompositeHandler" ] + Description: "Enables Amazon Athena to communicate with Hortonworks Hive using JDBC" + Timeout: 900 + MemorySize: 3008 + Role: !If [ NotHasLambdaRole, !GetAtt FunctionRole.Arn, !Ref LambdaRoleArn ] + VpcConfig: + SecurityGroupIds: !If [ HasSecurityGroups, !Ref SecurityGroupIds, !Ref "AWS::NoValue" ] + SubnetIds: !If [ HasSubnets, !Ref SubnetIds, !Ref "AWS::NoValue" ] + + FunctionRole: + Condition: NotHasLambdaRole + Type: AWS::IAM::Role + Properties: + ManagedPolicyArns: + - "arn:aws:iam::aws:policy/service-role/AWSLambdaVPCAccessExecutionRole" + AssumeRolePolicyDocument: + Version: 2012-10-17 + Statement: + - Effect: Allow + Principal: + Service: + - lambda.amazonaws.com + Action: + - "sts:AssumeRole" + + FunctionExecutionPolicy: + Condition: NotHasLambdaRole + Type: "AWS::IAM::Policy" + Properties: + Roles: + - !Ref FunctionRole + PolicyName: FunctionExecutionPolicy + PolicyDocument: + Version: 2012-10-17 + Statement: + - Action: + - secretsmanager:GetSecretValue + Effect: Allow + Resource: !Sub 'arn:${AWS::Partition}:secretsmanager:${AWS::Region}:${AWS::AccountId}:secret:${SecretName}*' + - Action: + - logs:CreateLogGroup + Effect: Allow + Resource: !Sub 'arn:${AWS::Partition}:logs:${AWS::Region}:${AWS::AccountId}:*' + - Action: + - logs:CreateLogStream + - logs:PutLogEvents + Effect: Allow + Resource: !Sub 'arn:${AWS::Partition}:logs:${AWS::Region}:${AWS::AccountId}:log-group:/aws/lambda/${LambdaFunctionName}:*' + - Action: + - athena:GetQueryExecution + Effect: Allow + Resource: '*' + - Action: + - ec2:CreateNetworkInterface + - ec2:DeleteNetworkInterface + - ec2:DescribeNetworkInterfaces + - ec2:DetachNetworkInterface + Effect: Allow + Resource: '*' + - Action: + - s3:GetObject + - s3:ListBucket + - s3:GetBucketLocation + - s3:GetObjectVersion + - s3:PutObject + - s3:PutObjectAcl + - s3:GetLifecycleConfiguration + - s3:PutLifecycleConfiguration + - s3:DeleteObject + Effect: Allow + Resource: + - Fn::Sub: + - arn:${AWS::Partition}:s3:::${bucketName} + - bucketName: + Ref: SpillBucket + - Fn::Sub: + - arn:${AWS::Partition}:s3:::${bucketName}/* + - bucketName: + Ref: SpillBucket + - Action: + - glue:GetConnection + Effect: Allow + Resource: + - !Sub 'arn:${AWS::Partition}:glue:${AWS::Region}:${AWS::AccountId}:connection/${GlueConnection}' + - !Sub 'arn:${AWS::Partition}:glue:${AWS::Region}:${AWS::AccountId}:catalog' + + FunctionKmsPolicy: + Condition: CreateKmsPolicy + Type: "AWS::IAM::Policy" + Properties: + PolicyName: FunctionKmsPolicy + PolicyDocument: + Version: 2012-10-17 + Statement: + - Effect: Allow + Action: + - kms:GenerateRandom + Resource: '*' + - Effect: Allow + Action: + - kms:GenerateDataKey + Resource: !Sub "arn:aws:kms:${AWS::Region}:${AWS::AccountId}:key/${KmsKeyId}" + Roles: + - !Ref FunctionRole \ No newline at end of file diff --git a/athena-hortonworks-hive/athena-hortonworks-hive.yaml b/athena-hortonworks-hive/athena-hortonworks-hive.yaml index 8c1ac3a176..85f8dfc206 100644 --- a/athena-hortonworks-hive/athena-hortonworks-hive.yaml +++ b/athena-hortonworks-hive/athena-hortonworks-hive.yaml @@ -21,8 +21,8 @@ Parameters: Description: 'The default connection string is used when catalog is "lambda:${LambdaFunctionName}". Catalog specific Connection Strings can be added later. Format: ${DatabaseType}://${NativeJdbcConnectionString}.' Type: String SecretNamePrefix: - Description: 'Used to create resource-based authorization policy for "secretsmanager:GetSecretValue" action. E.g. All Athena JDBC Federation secret names can be prefixed with "AthenaJdbcFederation" and authorization policy will allow "arn:${AWS::Partition}:secretsmanager:${AWS::Region}:${AWS::AccountId}:secret:AthenaJdbcFederation*". Parameter value in this case should be "AthenaJdbcFederation". If you do not have a prefix, you can manually update the IAM policy to add allow any secret names.' - Type: String + Description: 'Used to create resource-based authorization policy for "secretsmanager:GetSecretValue" action. E.g. All Athena JDBC Federation secret names can be prefixed with "AthenaJdbcFederation" and authorization policy will allow "arn:${AWS::Partition}:secretsmanager:${AWS::Region}:${AWS::AccountId}:secret:AthenaJdbcFederation*". Parameter value in this case should be "AthenaJdbcFederation". If you do not have a prefix, you can manually update the IAM policy to add allow any secret names.' + Type: String SpillBucket: Description: 'The name of the bucket where this function can spill data.' Type: String @@ -75,6 +75,8 @@ Resources: ImageUri: !Sub - '${Account}.dkr.ecr.${AWS::Region}.amazonaws.com/athena-federation-repository-hortonworks-hive:2022.47.1' - Account: !If [IsRegionBAH, 084828588479, !If [IsRegionHKG, 183295418215, 292517598671]] + ImageConfig: + Command: [ "com.amazonaws.athena.connectors.hortonworks.HiveMuxCompositeHandler" ] Description: "Enables Amazon Athena to communicate with Hortonworks Hive using JDBC" Timeout: !Ref LambdaTimeout MemorySize: !Ref LambdaMemory @@ -93,17 +95,17 @@ Resources: Resource: !Sub 'arn:${AWS::Partition}:logs:${AWS::Region}:${AWS::AccountId}:*' Version: '2012-10-17' - Statement: - - Action: - - logs:CreateLogStream - - logs:PutLogEvents - Effect: Allow - Resource: !Sub 'arn:${AWS::Partition}:logs:${AWS::Region}:${AWS::AccountId}:log-group:/aws/lambda/${LambdaFunctionName}:*' + - Action: + - logs:CreateLogStream + - logs:PutLogEvents + Effect: Allow + Resource: !Sub 'arn:${AWS::Partition}:logs:${AWS::Region}:${AWS::AccountId}:log-group:/aws/lambda/${LambdaFunctionName}:*' Version: '2012-10-17' - Statement: - - Action: - - athena:GetQueryExecution - Effect: Allow - Resource: '*' + - Action: + - athena:GetQueryExecution + Effect: Allow + Resource: '*' Version: '2012-10-17' #S3CrudPolicy allows our connector to spill large responses to S3. You can optionally replace this pre-made policy #with one that is more restrictive and can only 'put' but not read,delete, or overwrite files. diff --git a/athena-hortonworks-hive/pom.xml b/athena-hortonworks-hive/pom.xml index dc11525b4c..cbb6440964 100644 --- a/athena-hortonworks-hive/pom.xml +++ b/athena-hortonworks-hive/pom.xml @@ -47,7 +47,6 @@ ${mockito.version} test - software.amazon.awssdk diff --git a/athena-hortonworks-hive/src/main/java/com/amazonaws/athena/connectors/hortonworks/HiveCompositeHandler.java b/athena-hortonworks-hive/src/main/java/com/amazonaws/athena/connectors/hortonworks/HiveCompositeHandler.java index 70ac1f47aa..cc03c1dad0 100644 --- a/athena-hortonworks-hive/src/main/java/com/amazonaws/athena/connectors/hortonworks/HiveCompositeHandler.java +++ b/athena-hortonworks-hive/src/main/java/com/amazonaws/athena/connectors/hortonworks/HiveCompositeHandler.java @@ -32,6 +32,6 @@ public class HiveCompositeHandler { public HiveCompositeHandler() { - super(new HiveMetadataHandler(System.getenv()), new HiveRecordHandler(System.getenv())); + super(new HiveMetadataHandler(new HortonworksEnvironmentProperties().createEnvironment()), new HiveRecordHandler(new HortonworksEnvironmentProperties().createEnvironment())); } } diff --git a/athena-hortonworks-hive/src/main/java/com/amazonaws/athena/connectors/hortonworks/HortonworksEnvironmentProperties.java b/athena-hortonworks-hive/src/main/java/com/amazonaws/athena/connectors/hortonworks/HortonworksEnvironmentProperties.java new file mode 100644 index 0000000000..522073b2f0 --- /dev/null +++ b/athena-hortonworks-hive/src/main/java/com/amazonaws/athena/connectors/hortonworks/HortonworksEnvironmentProperties.java @@ -0,0 +1,33 @@ +/*- + * #%L + * Amazon Athena Query Federation SDK + * %% + * Copyright (C) 2019 - 2024 Amazon Web Services + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ +package com.amazonaws.athena.connectors.hortonworks; + +import com.amazonaws.athena.connectors.jdbc.JdbcEnvironmentProperties; + +import java.util.Map; + +public class HortonworksEnvironmentProperties extends JdbcEnvironmentProperties +{ + @Override + protected String getConnectionStringPrefix(Map connectionProperties) + { + return "hive://jdbc:hive2://"; + } +} diff --git a/athena-jdbc/src/main/java/com/amazonaws/athena/connectors/jdbc/JdbcEnvironmentProperties.java b/athena-jdbc/src/main/java/com/amazonaws/athena/connectors/jdbc/JdbcEnvironmentProperties.java new file mode 100644 index 0000000000..db4a349155 --- /dev/null +++ b/athena-jdbc/src/main/java/com/amazonaws/athena/connectors/jdbc/JdbcEnvironmentProperties.java @@ -0,0 +1,84 @@ +/*- + * #%L + * Amazon Athena Query Federation SDK + * %% + * Copyright (C) 2019 - 2024 Amazon Web Services + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ +package com.amazonaws.athena.connectors.jdbc; + +import com.amazonaws.athena.connector.lambda.connection.EnvironmentProperties; + +import java.util.HashMap; +import java.util.Map; + +import static com.amazonaws.athena.connector.lambda.connection.EnvironmentConstants.DATABASE; +import static com.amazonaws.athena.connector.lambda.connection.EnvironmentConstants.DEFAULT; +import static com.amazonaws.athena.connector.lambda.connection.EnvironmentConstants.HOST; +import static com.amazonaws.athena.connector.lambda.connection.EnvironmentConstants.JDBC_PARAMS; +import static com.amazonaws.athena.connector.lambda.connection.EnvironmentConstants.PORT; +import static com.amazonaws.athena.connector.lambda.connection.EnvironmentConstants.SECRET_NAME; + +public abstract class JdbcEnvironmentProperties extends EnvironmentProperties +{ + @Override + public Map connectionPropertiesToEnvironment(Map connectionProperties) + { + HashMap environment = new HashMap<>(); + + // now construct jdbc string + String connectionString = getConnectionStringPrefix(connectionProperties) + connectionProperties.get(HOST) + + ":" + connectionProperties.get(PORT) + getDatabase(connectionProperties) + getJdbcParameters(connectionProperties); + + environment.put(DEFAULT, connectionString); + return environment; + } + + protected abstract String getConnectionStringPrefix(Map connectionProperties); + + protected String getDatabase(Map connectionProperties) + { + return getDatabaseSeparator() + connectionProperties.get(DATABASE); + } + + protected String getJdbcParameters(Map connectionProperties) + { + String params = getJdbcParametersSeparator() + connectionProperties.getOrDefault(JDBC_PARAMS, ""); + + if (connectionProperties.containsKey(SECRET_NAME)) { + if (connectionProperties.containsKey(JDBC_PARAMS)) { // need to add delimiter + params = params + getDelimiter(); + } + params = params + "${" + connectionProperties.get(SECRET_NAME) + "}"; + } + + return params; + } + + protected String getDatabaseSeparator() + { + return "/"; + } + + protected String getJdbcParametersSeparator() + { + return "?"; + } + + protected String getDelimiter() + { + return "&"; + } +} diff --git a/athena-jdbc/src/main/java/com/amazonaws/athena/connectors/jdbc/connection/DatabaseConnectionConfigBuilder.java b/athena-jdbc/src/main/java/com/amazonaws/athena/connectors/jdbc/connection/DatabaseConnectionConfigBuilder.java index 25418ac93c..323472c25f 100644 --- a/athena-jdbc/src/main/java/com/amazonaws/athena/connectors/jdbc/connection/DatabaseConnectionConfigBuilder.java +++ b/athena-jdbc/src/main/java/com/amazonaws/athena/connectors/jdbc/connection/DatabaseConnectionConfigBuilder.java @@ -43,6 +43,8 @@ public class DatabaseConnectionConfigBuilder private static final String SECRET_PATTERN_STRING = "\\$\\{(([a-z-]+!)?[a-zA-Z0-9:/_+=.@-]+)}"; public static final Pattern SECRET_PATTERN = Pattern.compile(SECRET_PATTERN_STRING); + public static final String DEFAULT_GLUE_CONNECTION = "glue_connection"; + private Map properties; private String engine; @@ -87,7 +89,7 @@ public DatabaseConnectionConfigBuilder properties(final Map prop public List build() { Validate.notEmpty(this.properties, "properties must not be empty"); - Validate.notBlank(this.properties.get(DEFAULT_CONNECTION_STRING_PROPERTY), "Default connection string must be present"); + Validate.isTrue(properties.containsKey(DEFAULT_CONNECTION_STRING_PROPERTY), "Default connection string must be present"); List databaseConnectionConfigs = new ArrayList<>(); @@ -95,7 +97,7 @@ public List build() for (Map.Entry property : this.properties.entrySet()) { final String key = property.getKey(); final String value = property.getValue(); - + String catalogName; if (DEFAULT_CONNECTION_STRING_PROPERTY.equals(key.toLowerCase())) { catalogName = key.toLowerCase(); @@ -109,7 +111,9 @@ else if (key.endsWith(CONNECTION_STRING_PROPERTY_SUFFIX)) { } databaseConnectionConfigs.add(extractDatabaseConnectionConfig(catalogName, value)); - numberOfCatalogs++; + if (StringUtils.isBlank(properties.get(DEFAULT_GLUE_CONNECTION))) { + numberOfCatalogs++; // Mux is not supported with glue. Do not count + } if (numberOfCatalogs > MUX_CATALOG_LIMIT) { throw new RuntimeException("Too many database instances in mux. Max supported is " + MUX_CATALOG_LIMIT); } diff --git a/athena-jdbc/src/main/java/com/amazonaws/athena/connectors/jdbc/connection/GenericJdbcConnectionFactory.java b/athena-jdbc/src/main/java/com/amazonaws/athena/connectors/jdbc/connection/GenericJdbcConnectionFactory.java index 40478a00cf..ab061c52d2 100644 --- a/athena-jdbc/src/main/java/com/amazonaws/athena/connectors/jdbc/connection/GenericJdbcConnectionFactory.java +++ b/athena-jdbc/src/main/java/com/amazonaws/athena/connectors/jdbc/connection/GenericJdbcConnectionFactory.java @@ -45,7 +45,7 @@ public class GenericJdbcConnectionFactory { private static final Logger LOGGER = LoggerFactory.getLogger(GenericJdbcConnectionFactory.class); - private static final String SECRET_NAME_PATTERN_STRING = "(\\$\\{[a-zA-Z0-9:/_+=.@-]+})"; + private static final String SECRET_NAME_PATTERN_STRING = "(\\$\\{[a-zA-Z0-9:/_+=.@!-]+})"; public static final Pattern SECRET_NAME_PATTERN = Pattern.compile(SECRET_NAME_PATTERN_STRING); private final DatabaseConnectionInfo databaseConnectionInfo; diff --git a/athena-jdbc/src/main/java/com/amazonaws/athena/connectors/jdbc/connection/RdsSecretsCredentialProvider.java b/athena-jdbc/src/main/java/com/amazonaws/athena/connectors/jdbc/connection/RdsSecretsCredentialProvider.java index 71fdfda3bf..a98958e981 100644 --- a/athena-jdbc/src/main/java/com/amazonaws/athena/connectors/jdbc/connection/RdsSecretsCredentialProvider.java +++ b/athena-jdbc/src/main/java/com/amazonaws/athena/connectors/jdbc/connection/RdsSecretsCredentialProvider.java @@ -48,7 +48,14 @@ public RdsSecretsCredentialProvider(final String secretString) { Map rdsSecrets; try { - rdsSecrets = OBJECT_MAPPER.readValue(secretString, HashMap.class); + Map originalMap = OBJECT_MAPPER.readValue(secretString, HashMap.class); + + rdsSecrets = new HashMap<>(); + for (Map.Entry entry : originalMap.entrySet()) { + if (entry.getKey().equalsIgnoreCase("username") || entry.getKey().equalsIgnoreCase("password")) { + rdsSecrets.put(entry.getKey().toLowerCase(), entry.getValue()); + } + } } catch (IOException ioException) { throw new RuntimeException("Could not deserialize RDS credentials into HashMap", ioException); diff --git a/athena-jdbc/src/main/java/com/amazonaws/athena/connectors/jdbc/manager/JDBCUtil.java b/athena-jdbc/src/main/java/com/amazonaws/athena/connectors/jdbc/manager/JDBCUtil.java index dbd9f4d421..71de4ce6f8 100644 --- a/athena-jdbc/src/main/java/com/amazonaws/athena/connectors/jdbc/manager/JDBCUtil.java +++ b/athena-jdbc/src/main/java/com/amazonaws/athena/connectors/jdbc/manager/JDBCUtil.java @@ -212,7 +212,7 @@ public static List getTableMetadata(PreparedStatement preparedStateme } } catch (SQLException ex) { - LOGGER.info("Unable to return list of {} from data source!", tableType); + LOGGER.warn("Unable to return list of {} from data source!. Returning Empty list of table", tableType, ex); } return list.build(); } diff --git a/athena-jdbc/src/test/java/com/amazonaws/athena/connectors/jdbc/connection/DatabaseConnectionConfigBuilderTest.java b/athena-jdbc/src/test/java/com/amazonaws/athena/connectors/jdbc/connection/DatabaseConnectionConfigBuilderTest.java index 1f54475206..5104571b56 100644 --- a/athena-jdbc/src/test/java/com/amazonaws/athena/connectors/jdbc/connection/DatabaseConnectionConfigBuilderTest.java +++ b/athena-jdbc/src/test/java/com/amazonaws/athena/connectors/jdbc/connection/DatabaseConnectionConfigBuilderTest.java @@ -33,6 +33,9 @@ public class DatabaseConnectionConfigBuilderTest private static final String CONNECTION_STRING2 = "postgres://jdbc:postgresql://hostname/user=testUser&password=testPassword"; private static final String CONNECTION_STRING3 = "redshift://jdbc:redshift://hostname:5439/dev?${arn:aws:secretsmanager:us-east-1:1234567890:secret:redshift/user/secret}"; private static final String CONNECTION_STRING4 = "postgres://jdbc:postgresql://hostname:5439/dev?${arn:aws:secretsmanager:us-east-1:1234567890:secret:postgresql/user/secret}"; + private static final String CONNECTION_STRING5 = "jdbc:postgresql://hostname/test"; + private static final String CONNECTION_STRING5_SECRET = "testSecret"; + private static final String MOCK_GLUE_CONNECTION_NAME = "postgresql-connection"; @Test public void build() @@ -123,3 +126,4 @@ public void validSecretsSyntaxTest() } } } + diff --git a/athena-jdbc/src/test/java/com/amazonaws/athena/connectors/jdbc/connection/GenericJdbcConnectionFactoryTest.java b/athena-jdbc/src/test/java/com/amazonaws/athena/connectors/jdbc/connection/GenericJdbcConnectionFactoryTest.java new file mode 100644 index 0000000000..2ba8cb254d --- /dev/null +++ b/athena-jdbc/src/test/java/com/amazonaws/athena/connectors/jdbc/connection/GenericJdbcConnectionFactoryTest.java @@ -0,0 +1,48 @@ +/*- + * #%L + * athena-jdbc + * %% + * Copyright (C) 2019 - 2024 Amazon Web Services + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ +package com.amazonaws.athena.connectors.jdbc.connection; + +import org.junit.Assert; +import org.junit.Test; + +import java.util.regex.Matcher; + +import static com.amazonaws.athena.connectors.jdbc.connection.GenericJdbcConnectionFactory.SECRET_NAME_PATTERN; + +public class GenericJdbcConnectionFactoryTest +{ + @Test + public void matchSecretNamePattern() + { + String jdbcConnectionString = "mysql://jdbc:mysql://mysql.host:3333/default?${secret!@+=_}"; + Matcher secretMatcher = SECRET_NAME_PATTERN.matcher(jdbcConnectionString); + + Assert.assertTrue(secretMatcher.find()); + } + + @Test + public void matchIncorrectSecretNamePattern() + { + String jdbcConnectionString = "mysql://jdbc:mysql://mysql.host:3333/default?${secret!@+=*_}"; + Matcher secretMatcher = SECRET_NAME_PATTERN.matcher(jdbcConnectionString); + + Assert.assertFalse(secretMatcher.find()); + } +} diff --git a/athena-kafka/athena-kafka.yaml b/athena-kafka/athena-kafka.yaml index 9606527e64..2479b64225 100644 --- a/athena-kafka/athena-kafka.yaml +++ b/athena-kafka/athena-kafka.yaml @@ -176,4 +176,4 @@ Resources: Effect: Allow Resource: '*' Roles: - - !Ref FunctionRole + - !Ref FunctionRole \ No newline at end of file diff --git a/athena-kafka/pom.xml b/athena-kafka/pom.xml index 7618263568..a5a46def16 100644 --- a/athena-kafka/pom.xml +++ b/athena-kafka/pom.xml @@ -24,7 +24,7 @@ org.apache.kafka kafka-clients - 7.7.1-ce + 7.8.0-ce org.apache.avro @@ -34,12 +34,12 @@ io.confluent kafka-avro-serializer - 7.7.1 + 7.8.0 io.confluent kafka-protobuf-serializer - 7.7.1 + 7.8.0 com.fasterxml.jackson.core @@ -118,7 +118,6 @@ withdep - org.testng testng diff --git a/athena-msk/athena-msk-connection.yaml b/athena-msk/athena-msk-connection.yaml new file mode 100644 index 0000000000..d8649830a0 --- /dev/null +++ b/athena-msk/athena-msk-connection.yaml @@ -0,0 +1,163 @@ +Transform: 'AWS::Serverless-2016-10-31' +Metadata: + 'AWS::ServerlessRepo::Application': + Name: AthenaMSKConnectorWithGlueConnection + Description: 'This connector enables Amazon Athena to communicate with MSK clusters and fetch data.' + Author: 'default author' + SpdxLicenseId: Apache-2.0 + LicenseUrl: LICENSE.txt + ReadmeUrl: README.md + Labels: + - msk + - athena-federation + HomePageUrl: 'https://github.com/awslabs/aws-athena-query-federation' + SemanticVersion: 2022.47.1 + SourceCodeUrl: 'https://github.com/awslabs/aws-athena-query-federation' +Parameters: + LambdaFunctionName: + Description: 'This is the name of the lambda function that will be created. This name must satisfy the pattern ^[a-z0-9-_]{1,64}$' + Type: String + AllowedPattern: ^[a-z0-9-_]{1,64}$ + SecretName: + Description: "The secret name within AWS Secrets Manager that contains your aws key and secret Credentials(Not Required for IAM AUTH)" + Default: "" + Type: String + SpillBucket: + Description: 'The name of the bucket where this function can spill data.' + Type: String + GlueConnection: + Description: "Name of glue connection storing connection details for Federated Data source." + Type: String + SecurityGroupIds: + Description: 'One or more SecurityGroup IDs corresponding to the SecurityGroup that should be applied to the Lambda function. (e.g. sg1,sg2,sg3)' + Type: CommaDelimitedList + Default: "" + SubnetIds: + Description: 'One or more Subnet IDs corresponding to the Subnet that the Lambda function can use to access you data source. (e.g. subnet1,subnet2)' + Type: CommaDelimitedList + Default: "" + LambdaRoleArn: + Description: "(Must for auth type IAM) A custom role to be used by the Connector lambda" + Default: "" + Type: String + KmsKeyId: + Description: "(Optional) By default any data that is spilled to S3 is encrypted using AES-GCM and a randomly generated key. Setting a KMS Key ID allows your Lambda function to use KMS for key generation for a stronger source of encryption keys." + Type: String + Default: "" + +Conditions: + HasSecurityGroups: !Not [ !Equals [ !Join ["", !Ref SecurityGroupIds], "" ] ] + HasSubnets: !Not [ !Equals [ !Join ["", !Ref SubnetIds], "" ] ] + HasKmsKeyId: !Not [ !Equals [ !Ref KmsKeyId, "" ] ] + NotHasLambdaRole: !Equals [ !Ref LambdaRoleArn, "" ] + CreateKmsPolicy: !And [ !Condition HasKmsKeyId, !Condition NotHasLambdaRole ] + IsRegionBAH: !Equals [!Ref "AWS::Region", "me-south-1"] + IsRegionHKG: !Equals [!Ref "AWS::Region", "ap-east-1"] + +Resources: + AthenaMSKConnector: + Type: 'AWS::Serverless::Function' + Properties: + Environment: + Variables: + glue_connection: !Ref GlueConnection + FunctionName: !Ref LambdaFunctionName + PackageType: "Image" + ImageUri: !Sub + - '${Account}.dkr.ecr.${AWS::Region}.amazonaws.com/athena-federation-repository-msk:2022.47.1' + - Account: !If [IsRegionBAH, 084828588479, !If [IsRegionHKG, 183295418215, 292517598671]] + Description: "Enables Amazon Athena to communicate with MSK clusters" + Timeout: 900 + MemorySize: 3008 + Role: !If [NotHasLambdaRole, !GetAtt FunctionRole.Arn, !Ref LambdaRoleArn] + VpcConfig: + SecurityGroupIds: !If [ HasSecurityGroups, !Ref SecurityGroupIds, !Ref "AWS::NoValue" ] + SubnetIds: !If [ HasSubnets, !Ref SubnetIds, !Ref "AWS::NoValue"] + + FunctionRole: + Condition: NotHasLambdaRole + Type: AWS::IAM::Role + Properties: + ManagedPolicyArns: + - "arn:aws:iam::aws:policy/service-role/AWSLambdaVPCAccessExecutionRole" + AssumeRolePolicyDocument: + Version: 2012-10-17 + Statement: + - Effect: Allow + Principal: + Service: + - lambda.amazonaws.com + Action: + - "sts:AssumeRole" + + FunctionExecutionPolicy: + Condition: NotHasLambdaRole + Type: "AWS::IAM::Policy" + Properties: + PolicyName: FunctionExecutionPolicy + PolicyDocument: + Version: 2012-10-17 + Statement: + - Action: + - secretsmanager:GetSecretValue + Effect: Allow + Resource: !Sub 'arn:${AWS::Partition}:secretsmanager:${AWS::Region}:${AWS::AccountId}:secret:${SecretName}*' + - Action: + - logs:CreateLogGroup + Effect: Allow + Resource: !Sub 'arn:${AWS::Partition}:logs:${AWS::Region}:${AWS::AccountId}:*' + - Action: + - logs:CreateLogStream + - logs:PutLogEvents + Effect: Allow + Resource: !Sub 'arn:${AWS::Partition}:logs:${AWS::Region}:${AWS::AccountId}:log-group:/aws/lambda/${LambdaFunctionName}:*' + - Action: + - athena:GetQueryExecution + - s3:ListBucket + - s3:GetObject + - s3:GetBucketLocation + - s3:GetObjectVersion + - s3:PutObject + - s3:PutObjectAcl + - s3:GetLifecycleConfiguration + - s3:PutLifecycleConfiguration + - s3:DeleteObject + - kms:Decrypt + - glue:* + Effect: Allow + Resource: '*' + - Action: + - ec2:CreateNetworkInterface + - ec2:DeleteNetworkInterface + - ec2:DescribeNetworkInterfaces + - ec2:DetachNetworkInterface + - autoscaling:CompleteLifecycleAction + Effect: Allow + Resource: '*' + - Action: + - glue:GetConnection + Effect: Allow + Resource: + - !Sub 'arn:${AWS::Partition}:glue:${AWS::Region}:${AWS::AccountId}:connection/${GlueConnection}' + - !Sub 'arn:${AWS::Partition}:glue:${AWS::Region}:${AWS::AccountId}:catalog' + Roles: + - !Ref FunctionRole + + FunctionKmsPolicy: + Condition: CreateKmsPolicy + Type: "AWS::IAM::Policy" + Properties: + PolicyName: FunctionKmsPolicy + PolicyDocument: + Version: 2012-10-17 + Statement: + - Effect: Allow + Action: + - kms:GenerateRandom + Resource: '*' + - Effect: Allow + Action: + - kms:GenerateDataKey + Resource: !Sub "arn:aws:kms:${AWS::Region}:${AWS::AccountId}:key/${KmsKeyId}" + Roles: + - !Ref FunctionRole \ No newline at end of file diff --git a/athena-msk/athena-msk.yaml b/athena-msk/athena-msk.yaml index ab06956c09..f04269c75f 100644 --- a/athena-msk/athena-msk.yaml +++ b/athena-msk/athena-msk.yaml @@ -171,4 +171,4 @@ Resources: Effect: Allow Resource: '*' Roles: - - !Ref FunctionRole + - !Ref FunctionRole \ No newline at end of file diff --git a/athena-msk/pom.xml b/athena-msk/pom.xml index e12aad9167..b73f469a0c 100644 --- a/athena-msk/pom.xml +++ b/athena-msk/pom.xml @@ -25,7 +25,7 @@ org.jetbrains.kotlin kotlin-stdlib - 2.0.21 + 2.1.0 org.jetbrains.kotlin @@ -35,12 +35,12 @@ org.jetbrains.kotlin kotlin-stdlib-jdk8 - 2.0.21 + 2.1.0 org.jetbrains.kotlin kotlin-reflect - 2.0.21 + 2.1.0 runtime @@ -83,7 +83,7 @@ org.apache.kafka kafka-clients - 7.7.1-ce + 7.8.0-ce org.apache.avro @@ -98,12 +98,12 @@ software.amazon.glue schema-registry-serde - 1.1.21 + 1.1.22 io.confluent kafka-protobuf-provider - 7.7.1 + 7.7.2 com.fasterxml.jackson.core diff --git a/athena-msk/src/main/java/com/amazonaws/athena/connectors/msk/AmazonMskCompositeHandler.java b/athena-msk/src/main/java/com/amazonaws/athena/connectors/msk/AmazonMskCompositeHandler.java index 14cb474d87..bdf3f21eab 100644 --- a/athena-msk/src/main/java/com/amazonaws/athena/connectors/msk/AmazonMskCompositeHandler.java +++ b/athena-msk/src/main/java/com/amazonaws/athena/connectors/msk/AmazonMskCompositeHandler.java @@ -26,6 +26,6 @@ public class AmazonMskCompositeHandler { public AmazonMskCompositeHandler() throws Exception { - super(new AmazonMskMetadataHandler(System.getenv()), new AmazonMskRecordHandler(System.getenv())); + super(new AmazonMskMetadataHandler(new AmazonMskEnvironmentProperties().createEnvironment()), new AmazonMskRecordHandler(new AmazonMskEnvironmentProperties().createEnvironment())); } } diff --git a/athena-msk/src/main/java/com/amazonaws/athena/connectors/msk/AmazonMskEnvironmentProperties.java b/athena-msk/src/main/java/com/amazonaws/athena/connectors/msk/AmazonMskEnvironmentProperties.java new file mode 100644 index 0000000000..c281bf0479 --- /dev/null +++ b/athena-msk/src/main/java/com/amazonaws/athena/connectors/msk/AmazonMskEnvironmentProperties.java @@ -0,0 +1,50 @@ +/*- + * #%L + * Amazon Athena Query Federation SDK + * %% + * Copyright (C) 2019 - 2024 Amazon Web Services + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ +package com.amazonaws.athena.connectors.msk; + +import com.amazonaws.athena.connector.lambda.connection.EnvironmentProperties; + +import java.util.HashMap; +import java.util.Map; + +import static com.amazonaws.athena.connector.lambda.connection.EnvironmentConstants.CUSTOM_AUTH_TYPE; +import static com.amazonaws.athena.connector.lambda.connection.EnvironmentConstants.GLUE_CERTIFICATES_S3_REFERENCE; +import static com.amazonaws.athena.connector.lambda.connection.EnvironmentConstants.HOST; +import static com.amazonaws.athena.connector.lambda.connection.EnvironmentConstants.PORT; +import static com.amazonaws.athena.connector.lambda.connection.EnvironmentConstants.SECRET_NAME; +import static com.amazonaws.athena.connectors.msk.AmazonMskConstants.AUTH_TYPE; +import static com.amazonaws.athena.connectors.msk.AmazonMskConstants.CERTIFICATES_S3_REFERENCE; +import static com.amazonaws.athena.connectors.msk.AmazonMskConstants.ENV_KAFKA_ENDPOINT; +import static com.amazonaws.athena.connectors.msk.AmazonMskConstants.SECRET_MANAGER_MSK_CREDS_NAME; + +public class AmazonMskEnvironmentProperties extends EnvironmentProperties +{ + @Override + public Map connectionPropertiesToEnvironment(Map connectionProperties) + { + Map environment = new HashMap<>(); + + environment.put(AUTH_TYPE, connectionProperties.get(CUSTOM_AUTH_TYPE)); + environment.put(CERTIFICATES_S3_REFERENCE, connectionProperties.getOrDefault(GLUE_CERTIFICATES_S3_REFERENCE, "")); + environment.put(SECRET_MANAGER_MSK_CREDS_NAME, connectionProperties.getOrDefault(SECRET_NAME, "")); + environment.put(ENV_KAFKA_ENDPOINT, connectionProperties.get(HOST) + ":" + connectionProperties.get(PORT)); + return environment; + } +} diff --git a/athena-mysql/Dockerfile b/athena-mysql/Dockerfile index 08f27b704d..64e7a2115a 100644 --- a/athena-mysql/Dockerfile +++ b/athena-mysql/Dockerfile @@ -5,5 +5,5 @@ COPY target/athena-mysql-2022.47.1.jar ${LAMBDA_TASK_ROOT} # Unpack the jar RUN jar xf athena-mysql-2022.47.1.jar -# Set the CMD to your handler (could also be done as a parameter override outside of the Dockerfile) -CMD [ "com.amazonaws.athena.connectors.mysql.MySqlMuxCompositeHandler" ] \ No newline at end of file +# Command can be overwritten by providing a different command in the template directly. +# No need to specify here (already defined in .yaml file because legacy and connections use different) diff --git a/athena-mysql/athena-mysql-connection.yaml b/athena-mysql/athena-mysql-connection.yaml new file mode 100644 index 0000000000..68cb803e91 --- /dev/null +++ b/athena-mysql/athena-mysql-connection.yaml @@ -0,0 +1,167 @@ +Transform: 'AWS::Serverless-2016-10-31' +Metadata: + 'AWS::ServerlessRepo::Application': + Name: AthenaMySQLConnectorWithGlueConnection + Description: 'This connector enables Amazon Athena to communicate with your MySQL instance(s) using JDBC driver.' + Author: 'default author' + SpdxLicenseId: Apache-2.0 + LicenseUrl: LICENSE.txt + ReadmeUrl: README.md + Labels: + - athena-federation + HomePageUrl: 'https://github.com/awslabs/aws-athena-query-federation' + SemanticVersion: 2022.47.1 + SourceCodeUrl: 'https://github.com/awslabs/aws-athena-query-federation' +Parameters: + LambdaFunctionName: + Description: 'This is the name of the lambda function that will be created. This name must satisfy the pattern ^[a-z0-9-_]{1,64}$' + Type: String + AllowedPattern: ^[a-z0-9-_]{1,64}$ + SecretName: + Description: 'Used to create resource-based authorization policy for "secretsmanager:GetSecretValue" action. E.g. All Athena MySQL Federation secret names can be prefixed with "AthenaMySQLFederation" and authorization policy will allow "arn:${AWS::Partition}:secretsmanager:${AWS::Region}:${AWS::AccountId}:secret:AthenaJdbcFederation*". Parameter value in this case should be "AthenaMySQLFederation". If you do not have a prefix, you can manually update the IAM policy to add allow any secret names.' + Type: String + SpillBucket: + Description: 'The name of the bucket where this function can spill data.' + Type: String + GlueConnection: + Description: "Name of glue connection storing connection details for Federated Data source." + Type: String + LambdaRoleArn: + Description: "(Optional) A custom role to be used by the Connector lambda" + Type: String + Default: "" + SecurityGroupIds: + Description: 'One or more SecurityGroup IDs corresponding to the SecurityGroup that should be applied to the Lambda function. (e.g. sg1,sg2,sg3)' + Type: 'List' + SubnetIds: + Description: 'One or more Subnet IDs corresponding to the Subnet that the Lambda function can use to access you data source. (e.g. subnet1,subnet2)' + Type: 'List' + KmsKeyId: + Description: "(Optional) By default any data that is spilled to S3 is encrypted using AES-GCM and a randomly generated key. Setting a KMS Key ID allows your Lambda function to use KMS for key generation for a stronger source of encryption keys." + Type: String + Default: "" + +Conditions: + HasKmsKeyId: !Not [ !Equals [ !Ref KmsKeyId, "" ] ] + NotHasLambdaRole: !Equals [ !Ref LambdaRoleArn, "" ] + CreateKmsPolicy: !And [ !Condition HasKmsKeyId, !Condition NotHasLambdaRole ] + IsRegionBAH: !Equals [!Ref "AWS::Region", "me-south-1"] + IsRegionHKG: !Equals [!Ref "AWS::Region", "ap-east-1"] + +Resources: + JdbcConnectorConfig: + Type: 'AWS::Serverless::Function' + Properties: + Environment: + Variables: + glue_connection: !Ref GlueConnection + FunctionName: !Ref LambdaFunctionName + PackageType: "Image" + ImageUri: !Sub + - '${Account}.dkr.ecr.${AWS::Region}.amazonaws.com/athena-federation-repository-mysql:2022.47.1' + - Account: !If [IsRegionBAH, 084828588479, !If [IsRegionHKG, 183295418215, 292517598671]] + ImageConfig: + Command: [ "com.amazonaws.athena.connectors.mysql.MySqlCompositeHandler" ] + Description: "Enables Amazon Athena to communicate with MySQL using JDBC" + Timeout: 900 + MemorySize: 3008 + Role: !If [NotHasLambdaRole, !GetAtt FunctionRole.Arn, !Ref LambdaRoleArn] + VpcConfig: + SecurityGroupIds: !Ref SecurityGroupIds + SubnetIds: !Ref SubnetIds + + FunctionRole: + Condition: NotHasLambdaRole + Type: AWS::IAM::Role + Properties: + ManagedPolicyArns: + - "arn:aws:iam::aws:policy/service-role/AWSLambdaVPCAccessExecutionRole" + AssumeRolePolicyDocument: + Version: 2012-10-17 + Statement: + - Effect: Allow + Principal: + Service: + - lambda.amazonaws.com + Action: + - "sts:AssumeRole" + + FunctionExecutionPolicy: + Condition: NotHasLambdaRole + Type: "AWS::IAM::Policy" + Properties: + Roles: + - !Ref FunctionRole + PolicyName: FunctionExecutionPolicy + PolicyDocument: + Version: 2012-10-17 + Statement: + - Action: + - secretsmanager:GetSecretValue + Effect: Allow + Resource: !Sub 'arn:${AWS::Partition}:secretsmanager:${AWS::Region}:${AWS::AccountId}:secret:${SecretName}*' + - Action: + - logs:CreateLogGroup + Effect: Allow + Resource: !Sub 'arn:${AWS::Partition}:logs:${AWS::Region}:${AWS::AccountId}:*' + - Action: + - logs:CreateLogStream + - logs:PutLogEvents + Effect: Allow + Resource: !Sub 'arn:${AWS::Partition}:logs:${AWS::Region}:${AWS::AccountId}:log-group:/aws/lambda/${LambdaFunctionName}:*' + - Action: + - athena:GetQueryExecution + Effect: Allow + Resource: '*' + - Action: + - ec2:CreateNetworkInterface + - ec2:DeleteNetworkInterface + - ec2:DescribeNetworkInterfaces + - ec2:DetachNetworkInterface + Effect: Allow + Resource: '*' + - Action: + - s3:GetObject + - s3:ListBucket + - s3:GetBucketLocation + - s3:GetObjectVersion + - s3:PutObject + - s3:PutObjectAcl + - s3:GetLifecycleConfiguration + - s3:PutLifecycleConfiguration + - s3:DeleteObject + Effect: Allow + Resource: + - Fn::Sub: + - arn:${AWS::Partition}:s3:::${bucketName} + - bucketName: + Ref: SpillBucket + - Fn::Sub: + - arn:${AWS::Partition}:s3:::${bucketName}/* + - bucketName: + Ref: SpillBucket + - Action: + - glue:GetConnection + Effect: Allow + Resource: + - !Sub 'arn:${AWS::Partition}:glue:${AWS::Region}:${AWS::AccountId}:connection/${GlueConnection}' + - !Sub 'arn:${AWS::Partition}:glue:${AWS::Region}:${AWS::AccountId}:catalog' + + FunctionKmsPolicy: + Condition: CreateKmsPolicy + Type: "AWS::IAM::Policy" + Properties: + PolicyName: FunctionKmsPolicy + PolicyDocument: + Version: 2012-10-17 + Statement: + - Effect: Allow + Action: + - kms:GenerateRandom + Resource: '*' + - Effect: Allow + Action: + - kms:GenerateDataKey + Resource: !Sub "arn:aws:kms:${AWS::Region}:${AWS::AccountId}:key/${KmsKeyId}" + Roles: + - !Ref FunctionRole diff --git a/athena-mysql/athena-mysql.yaml b/athena-mysql/athena-mysql.yaml index 2b0d305c3e..c657e41e64 100644 --- a/athena-mysql/athena-mysql.yaml +++ b/athena-mysql/athena-mysql.yaml @@ -76,6 +76,8 @@ Resources: ImageUri: !Sub - '${Account}.dkr.ecr.${AWS::Region}.amazonaws.com/athena-federation-repository-mysql:2022.47.1' - Account: !If [IsRegionBAH, 084828588479, !If [IsRegionHKG, 183295418215, 292517598671]] + ImageConfig: + Command: [ "com.amazonaws.athena.connectors.mysql.MySqlMuxCompositeHandler" ] Description: "Enables Amazon Athena to communicate with MySQL using JDBC" Timeout: !Ref LambdaTimeout MemorySize: !Ref LambdaMemory @@ -98,7 +100,7 @@ Resources: Service: - lambda.amazonaws.com Action: - - "sts:AssumeRole" + - "sts:AssumeRole" FunctionExecutionPolicy: Condition: NotHasLambdaRole Type: "AWS::IAM::Policy" @@ -118,38 +120,38 @@ Resources: Effect: Allow Resource: !Sub 'arn:${AWS::Partition}:logs:${AWS::Region}:${AWS::AccountId}:*' - Action: - - logs:CreateLogStream - - logs:PutLogEvents + - logs:CreateLogStream + - logs:PutLogEvents Effect: Allow Resource: !Sub 'arn:${AWS::Partition}:logs:${AWS::Region}:${AWS::AccountId}:log-group:/aws/lambda/${LambdaFunctionName}:*' - Action: - - athena:GetQueryExecution + - athena:GetQueryExecution Effect: Allow Resource: '*' - Action: - - ec2:CreateNetworkInterface - - ec2:DeleteNetworkInterface - - ec2:DescribeNetworkInterfaces - - ec2:DetachNetworkInterface + - ec2:CreateNetworkInterface + - ec2:DeleteNetworkInterface + - ec2:DescribeNetworkInterfaces + - ec2:DetachNetworkInterface Effect: Allow Resource: '*' - Action: - - s3:GetObject - - s3:ListBucket - - s3:GetBucketLocation - - s3:GetObjectVersion - - s3:PutObject - - s3:PutObjectAcl - - s3:GetLifecycleConfiguration - - s3:PutLifecycleConfiguration - - s3:DeleteObject + - s3:GetObject + - s3:ListBucket + - s3:GetBucketLocation + - s3:GetObjectVersion + - s3:PutObject + - s3:PutObjectAcl + - s3:GetLifecycleConfiguration + - s3:PutLifecycleConfiguration + - s3:DeleteObject Effect: Allow Resource: - Fn::Sub: - - arn:${AWS::Partition}:s3:::${bucketName} - - bucketName: - Ref: SpillBucket + - arn:${AWS::Partition}:s3:::${bucketName} + - bucketName: + Ref: SpillBucket - Fn::Sub: - - arn:${AWS::Partition}:s3:::${bucketName}/* - - bucketName: - Ref: SpillBucket + - arn:${AWS::Partition}:s3:::${bucketName}/* + - bucketName: + Ref: SpillBucket \ No newline at end of file diff --git a/athena-mysql/src/main/java/com/amazonaws/athena/connectors/mysql/MySqlCompositeHandler.java b/athena-mysql/src/main/java/com/amazonaws/athena/connectors/mysql/MySqlCompositeHandler.java index 35265e60db..01519cd9c1 100644 --- a/athena-mysql/src/main/java/com/amazonaws/athena/connectors/mysql/MySqlCompositeHandler.java +++ b/athena-mysql/src/main/java/com/amazonaws/athena/connectors/mysql/MySqlCompositeHandler.java @@ -32,6 +32,6 @@ public class MySqlCompositeHandler { public MySqlCompositeHandler() { - super(new MySqlMetadataHandler(System.getenv()), new MySqlRecordHandler(System.getenv())); + super(new MySqlMetadataHandler(new MySqlEnvironmentProperties().createEnvironment()), new MySqlRecordHandler(new MySqlEnvironmentProperties().createEnvironment())); } } diff --git a/athena-mysql/src/main/java/com/amazonaws/athena/connectors/mysql/MySqlEnvironmentProperties.java b/athena-mysql/src/main/java/com/amazonaws/athena/connectors/mysql/MySqlEnvironmentProperties.java new file mode 100644 index 0000000000..d43758930c --- /dev/null +++ b/athena-mysql/src/main/java/com/amazonaws/athena/connectors/mysql/MySqlEnvironmentProperties.java @@ -0,0 +1,33 @@ +/*- + * #%L + * Amazon Athena Query Federation SDK + * %% + * Copyright (C) 2019 - 2024 Amazon Web Services + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ +package com.amazonaws.athena.connectors.mysql; + +import com.amazonaws.athena.connectors.jdbc.JdbcEnvironmentProperties; + +import java.util.Map; + +public class MySqlEnvironmentProperties extends JdbcEnvironmentProperties +{ + @Override + protected String getConnectionStringPrefix(Map connectionProperties) + { + return "mysql://jdbc:mysql://"; + } +} diff --git a/athena-neptune/athena-neptune-connection.yaml b/athena-neptune/athena-neptune-connection.yaml new file mode 100644 index 0000000000..f0d7e098e8 --- /dev/null +++ b/athena-neptune/athena-neptune-connection.yaml @@ -0,0 +1,168 @@ +Transform: 'AWS::Serverless-2016-10-31' + +Metadata: + 'AWS::ServerlessRepo::Application': + Name: AthenaNeptuneConnectorWithGlueConnection + Description: This connector enables Amazon Athena to communicate with your Neptune instance, making your Neptune graph data accessible via SQL. + Author: 'default author' + SpdxLicenseId: Apache-2.0 + LicenseUrl: LICENSE.txt + ReadmeUrl: README.md + Labels: ['athena-federation','athena-neptune','neptune'] + HomePageUrl: https://github.com/awslabs/aws-athena-query-federation + SemanticVersion: 2022.47.1 + SourceCodeUrl: https://github.com/awslabs/aws-athena-query-federation + +Parameters: + NeptuneClusterResId: + Description: 'To find the Neptune cluster resource ID in the Amazon Neptune AWS Management Console, choose the DB cluster that you want. The Resource ID is shown in the Configuration section.' + Type: String + LambdaFunctionName: + Description: 'This is the name of the lambda function that will be created. This name must satisfy the pattern ^[a-z0-9-_]{1,64}$' + Type: String + Default: 'athena-catalog' + AllowedPattern: ^[a-z0-9-_]{1,64}$ + SpillBucket: + Description: 'The name of the bucket where this function can spill data.' + Type: String + GlueConnection: + Description: "Name of glue connection storing connection details for Federated Data source." + Type: String + SecurityGroupIds: + Description: 'One or more SecurityGroup IDs corresponding to the SecurityGroup(s) that should be applied to the Lambda function to allow connectivity to Neptune cluster. (e.g. sg1,sg2,sg3)' + Type: 'List' + SubnetIds: + Description: 'One or more Subnet IDs corresponding to the Subnet(s) that the Lambda function can use to access the Neptune cluster. (e.g. subnet1,subnet2)' + Type: 'List' + KmsKeyId: + Description: "(Optional) By default any data that is spilled to S3 is encrypted using AES-GCM and a randomly generated key. Setting a KMS Key ID allows your Lambda function to use KMS for key generation for a stronger source of encryption keys." + Type: String + Default: "" + LambdaRoleArn: + Description: "(Optional) A custom role to be used by the Connector lambda" + Type: String + Default: "" + +Conditions: + HasKmsKeyId: !Not [ !Equals [ !Ref KmsKeyId, "" ] ] + NotHasLambdaRole: !Equals [ !Ref LambdaRoleArn, "" ] + CreateKmsPolicy: !And [ !Condition HasKmsKeyId, !Condition NotHasLambdaRole ] + IsRegionBAH: !Equals [!Ref "AWS::Region", "me-south-1"] + IsRegionHKG: !Equals [!Ref "AWS::Region", "ap-east-1"] + +Resources: + ConnectorConfig: + Type: 'AWS::Serverless::Function' + Properties: + Environment: + Variables: + glue_connection: !Ref GlueConnection + SERVICE_REGION: !Ref AWS::Region + FunctionName: !Ref LambdaFunctionName + PackageType: "Image" + ImageUri: !Sub + - '${Account}.dkr.ecr.${AWS::Region}.amazonaws.com/athena-federation-repository-neptune:2022.47.1' + - Account: !If [IsRegionBAH, 084828588479, !If [IsRegionHKG, 183295418215, 292517598671]] + Description: "Enables Amazon Athena to communicate with Neptune, making your Neptune graph data accessible via SQL." + Timeout: 900 + MemorySize: 3008 + Role: !If [ NotHasLambdaRole, !GetAtt FunctionRole.Arn, !Ref LambdaRoleArn ] + VpcConfig: + SecurityGroupIds: !Ref SecurityGroupIds + SubnetIds: !Ref SubnetIds + + FunctionRole: + Condition: NotHasLambdaRole + Type: AWS::IAM::Role + Properties: + ManagedPolicyArns: + - "arn:aws:iam::aws:policy/service-role/AWSLambdaVPCAccessExecutionRole" + AssumeRolePolicyDocument: + Version: 2012-10-17 + Statement: + - Effect: Allow + Principal: + Service: + - lambda.amazonaws.com + Action: + - "sts:AssumeRole" + + FunctionExecutionPolicy: + Condition: NotHasLambdaRole + Type: "AWS::IAM::Policy" + Properties: + Roles: + - !Ref FunctionRole + PolicyName: FunctionExecutionPolicy + PolicyDocument: + Version: 2012-10-17 + Statement: + - Action: + - glue:GetTableVersions + - glue:GetPartitions + - glue:GetTables + - glue:GetTableVersion + - glue:GetDatabases + - glue:GetTable + - glue:GetPartition + - glue:GetDatabase + - athena:GetQueryExecution + Effect: Allow + Resource: '*' + - Action: + - neptune-db:* + Effect: Allow + #Dynamically construct Neptune Cluster Resource ARN to limit permissions to the specific cluster provided + Resource: !Sub 'arn:${AWS::Partition}:neptune-db:${AWS::Region}:${AWS::AccountId}:${NeptuneClusterResId}/*' + - Action: + - ec2:CreateNetworkInterface + - ec2:DeleteNetworkInterface + - ec2:DescribeNetworkInterfaces + - ec2:DetachNetworkInterface + Effect: Allow + Resource: '*' + - Action: + - s3:GetObject + - s3:ListBucket + - s3:GetBucketLocation + - s3:GetObjectVersion + - s3:PutObject + - s3:PutObjectAcl + - s3:GetLifecycleConfiguration + - s3:PutLifecycleConfiguration + - s3:DeleteObject + Effect: Allow + Resource: + - Fn::Sub: + - arn:${AWS::Partition}:s3:::${bucketName} + - bucketName: + Ref: SpillBucket + - Fn::Sub: + - arn:${AWS::Partition}:s3:::${bucketName}/* + - bucketName: + Ref: SpillBucket + - Action: + - glue:GetConnection + Effect: Allow + Resource: + - !Sub 'arn:${AWS::Partition}:glue:${AWS::Region}:${AWS::AccountId}:connection/${GlueConnection}' + - !Sub 'arn:${AWS::Partition}:glue:${AWS::Region}:${AWS::AccountId}:catalog' + + FunctionKmsPolicy: + Condition: CreateKmsPolicy + Type: "AWS::IAM::Policy" + Properties: + PolicyName: FunctionKmsPolicy + PolicyDocument: + Version: 2012-10-17 + Statement: + - Effect: Allow + Action: + - kms:GenerateRandom + Resource: '*' + - Effect: Allow + Action: + - kms:GenerateDataKey + Resource: !Sub "arn:aws:kms:${AWS::Region}:${AWS::AccountId}:key/${KmsKeyId}" + Roles: + - !Ref FunctionRole \ No newline at end of file diff --git a/athena-neptune/athena-neptune.yaml b/athena-neptune/athena-neptune.yaml index a468eeffb3..ebf995f804 100644 --- a/athena-neptune/athena-neptune.yaml +++ b/athena-neptune/athena-neptune.yaml @@ -115,7 +115,7 @@ Resources: - glue:GetDatabases - glue:GetTable - glue:GetPartition - - glue:GetDatabase + - glue:GetDatabase - athena:GetQueryExecution Effect: Allow Resource: '*' @@ -133,4 +133,4 @@ Resources: - VPCAccessPolicy: {} VpcConfig: SecurityGroupIds: !Ref SecurityGroupIds - SubnetIds: !Ref SubnetIds + SubnetIds: !Ref SubnetIds \ No newline at end of file diff --git a/athena-neptune/docs/neptune-connector-setup/README.md b/athena-neptune/docs/neptune-connector-setup/README.md index 7b0ac44908..44ef93597e 100644 --- a/athena-neptune/docs/neptune-connector-setup/README.md +++ b/athena-neptune/docs/neptune-connector-setup/README.md @@ -6,7 +6,7 @@ To deploy the Amazon Athena Neptune connector, we will need the following pre-re 2) NeptuneClusterEndpoint – You can get this information from the Neptune console and copying the cluster “Writer” endpoint information. ![](./assets/connector-clusterendpoint.png) -3) NeptuneClusterResourceID - To find the Neptune cluster resource ID in the Amazon Neptune AWS Management Console, choose the DB cluster that you want. The Resource ID is shown in the Configuration section. +3) NeptuneClusterResId/NeptuneClusterResourceID - To find the Neptune cluster resource ID in the Amazon Neptune AWS Management Console, choose the DB cluster that you want. The Resource ID is shown in the Configuration section. ![](./assets/connector-clusterId.png) 4) SecurityGroupIds – These are the Security Group ID(s) that the connector Lambda function uses to communicate with Neptune. There are two steps: @@ -55,7 +55,7 @@ Scroll down to “Application Settings” and specify the following field values * NeptuneClusterEndpoint: Provide the Neptune Cluster endpoint that you have captured in one of the previous steps. - * NeptuneClusterResourceID: Provide the Neptune Cluster resourceid that you have captured in one of the previous steps. + * NeptuneClusterResId/NeptuneClusterResourceID: Provide the Neptune Cluster resourceid that you have captured in one of the previous steps. * NeptunePort: The listener port for your Neptune Cluster. Default is 8182. diff --git a/athena-neptune/pom.xml b/athena-neptune/pom.xml index 909f68c5aa..a004ca3795 100644 --- a/athena-neptune/pom.xml +++ b/athena-neptune/pom.xml @@ -124,7 +124,7 @@ org.eclipse.rdf4j rdf4j-repository-sparql - 5.0.3 + 5.1.0 org.slf4j diff --git a/athena-neptune/src/main/java/com/amazonaws/athena/connectors/neptune/Constants.java b/athena-neptune/src/main/java/com/amazonaws/athena/connectors/neptune/Constants.java index 41215afa55..542aac9f89 100644 --- a/athena-neptune/src/main/java/com/amazonaws/athena/connectors/neptune/Constants.java +++ b/athena-neptune/src/main/java/com/amazonaws/athena/connectors/neptune/Constants.java @@ -32,6 +32,7 @@ protected Constants() public static final String CFG_PORT = "neptune_port"; public static final String CFG_IAM = "iam_enabled"; public static final String CFG_REGION = "AWS_REGION"; + public static final String CFG_ClUSTER_RES_ID = "neptune_cluster_res_id"; public static final String SCHEMA_QUERY = "query"; public static final String SCHEMA_CASE_INSEN = "enable_caseinsensitivematch"; diff --git a/athena-neptune/src/main/java/com/amazonaws/athena/connectors/neptune/NeptuneCompositeHandler.java b/athena-neptune/src/main/java/com/amazonaws/athena/connectors/neptune/NeptuneCompositeHandler.java index 5659602885..1890b9460d 100644 --- a/athena-neptune/src/main/java/com/amazonaws/athena/connectors/neptune/NeptuneCompositeHandler.java +++ b/athena-neptune/src/main/java/com/amazonaws/athena/connectors/neptune/NeptuneCompositeHandler.java @@ -30,6 +30,6 @@ public class NeptuneCompositeHandler { public NeptuneCompositeHandler() { - super(new NeptuneMetadataHandler(System.getenv()), new NeptuneRecordHandler(System.getenv())); + super(new NeptuneMetadataHandler(new NeptuneEnvironmentProperties().createEnvironment()), new NeptuneRecordHandler(new NeptuneEnvironmentProperties().createEnvironment())); } } diff --git a/athena-neptune/src/main/java/com/amazonaws/athena/connectors/neptune/NeptuneEnvironmentProperties.java b/athena-neptune/src/main/java/com/amazonaws/athena/connectors/neptune/NeptuneEnvironmentProperties.java new file mode 100644 index 0000000000..7ddff52a4e --- /dev/null +++ b/athena-neptune/src/main/java/com/amazonaws/athena/connectors/neptune/NeptuneEnvironmentProperties.java @@ -0,0 +1,49 @@ +/*- + * #%L + * Amazon Athena Query Federation SDK + * %% + * Copyright (C) 2019 - 2024 Amazon Web Services + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ +package com.amazonaws.athena.connectors.neptune; + +import com.amazonaws.athena.connector.lambda.connection.EnvironmentProperties; + +import java.util.HashMap; +import java.util.Map; + +import static com.amazonaws.athena.connector.lambda.connection.EnvironmentConstants.CLUSTER_RES_ID; +import static com.amazonaws.athena.connector.lambda.connection.EnvironmentConstants.GRAPH_TYPE; +import static com.amazonaws.athena.connector.lambda.connection.EnvironmentConstants.HOST; +import static com.amazonaws.athena.connector.lambda.connection.EnvironmentConstants.PORT; +import static com.amazonaws.athena.connectors.neptune.Constants.CFG_ClUSTER_RES_ID; +import static com.amazonaws.athena.connectors.neptune.Constants.CFG_ENDPOINT; +import static com.amazonaws.athena.connectors.neptune.Constants.CFG_GRAPH_TYPE; +import static com.amazonaws.athena.connectors.neptune.Constants.CFG_PORT; + +public class NeptuneEnvironmentProperties extends EnvironmentProperties +{ + @Override + public Map connectionPropertiesToEnvironment(Map connectionProperties) + { + Map environment = new HashMap<>(); + + environment.put(CFG_ENDPOINT, connectionProperties.get(HOST)); + environment.put(CFG_PORT, connectionProperties.get(PORT)); + environment.put(CFG_ClUSTER_RES_ID, environment.get(CLUSTER_RES_ID)); + environment.put(CFG_GRAPH_TYPE, environment.get(GRAPH_TYPE)); + return environment; + } +} diff --git a/athena-oracle/Dockerfile b/athena-oracle/Dockerfile index e85f8c566e..cb5216c202 100644 --- a/athena-oracle/Dockerfile +++ b/athena-oracle/Dockerfile @@ -1,9 +1,42 @@ FROM public.ecr.aws/lambda/java:11 +# Install necessary tools +RUN yum update -y && yum install -y curl perl openssl + +ENV truststore=${LAMBDA_TASK_ROOT}/rds-truststore.jks +ENV storepassword=federationStorePass + +# Download and process the RDS certificate +RUN curl -sS "https://truststore.pki.rds.amazonaws.com/global/global-bundle.pem" > ${LAMBDA_TASK_ROOT}/global-bundle.pem && \ + awk 'split_after == 1 {n++;split_after=0} /-----END CERTIFICATE-----/ {split_after=1}{print > "rds-ca-" n ".pem"}' < ${LAMBDA_TASK_ROOT}/global-bundle.pem + +# Import certificates into the truststore +RUN for CERT in rds-ca-*; do \ + alias=$(openssl x509 -noout -text -in $CERT | perl -ne 'next unless /Subject:/; s/.*(CN=|CN = )//; print') && \ + echo "Importing $alias" && \ + keytool -import -file ${CERT} -alias "${alias}" -storepass ${storepassword} -keystore ${truststore} -noprompt && \ + rm $CERT; \ + done + +# Clean up +RUN rm ${LAMBDA_TASK_ROOT}/global-bundle.pem + +# Optional: List the content of the trust store (for verification) +RUN echo "Trust store content is: " && \ + keytool -list -v -keystore "$truststore" -storepass ${storepassword} | grep Alias | cut -d " " -f3- | while read alias; do \ + expiry=$(keytool -list -v -keystore "$truststore" -storepass ${storepassword} -alias "${alias}" | grep Valid | perl -ne 'if(/until: (.*?)\n/) { print "$1\n"; }'); \ + echo " Certificate ${alias} expires in '$expiry'"; \ + done + # Copy function code and runtime dependencies from Maven layout COPY target/athena-oracle-2022.47.1.jar ${LAMBDA_TASK_ROOT} # Unpack the jar RUN jar xf athena-oracle-2022.47.1.jar -# Set the CMD to your handler (could also be done as a parameter override outside of the Dockerfile) -CMD [ "com.amazonaws.athena.connectors.oracle.OracleMuxCompositeHandler" ] \ No newline at end of file +# Clean up JAR +RUN rm ${LAMBDA_TASK_ROOT}/athena-oracle-2022.47.1.jar + +# Command can be overwritten by providing a different command in the template directly. +# No need to specify here (already defined in .yaml file because legacy and connections use different) +# Set the CMD to your handler by removing the following comment for manual testing +# CMD [ "com.amazonaws.athena.connectors.oracle.OracleCompositeHandler" ] diff --git a/athena-oracle/athena-oracle-connection.yaml b/athena-oracle/athena-oracle-connection.yaml new file mode 100644 index 0000000000..871f1624b0 --- /dev/null +++ b/athena-oracle/athena-oracle-connection.yaml @@ -0,0 +1,169 @@ +Transform: 'AWS::Serverless-2016-10-31' +Metadata: + 'AWS::ServerlessRepo::Application': + Name: AthenaOracleConnectorWithGlueConnection + Description: 'This connector enables Amazon Athena to communicate with your ORACLE instance(s) using JDBC driver.' + Author: 'default author' + SpdxLicenseId: Apache-2.0 + LicenseUrl: LICENSE.txt + ReadmeUrl: README.md + Labels: + - athena-federation + HomePageUrl: 'https://github.com/awslabs/aws-athena-query-federation' + SemanticVersion: 2022.47.1 + SourceCodeUrl: 'https://github.com/awslabs/aws-athena-query-federation' +Parameters: + LambdaFunctionName: + Description: 'This is the name of the lambda function that will be created. This name must satisfy the pattern ^[a-z0-9-_]{1,64}$' + Type: String + AllowedPattern: ^[a-z0-9-_]{1,64}$ + SecretName: + Description: 'Used to create resource-based authorization policy for "secretsmanager:GetSecretValue" action. E.g. All Athena JDBC Federation secret names can be prefixed with "AthenaJdbcFederation" and authorization policy will allow "arn:${AWS::Partition}:secretsmanager:${AWS::Region}:${AWS::AccountId}:secret:AthenaJdbcFederation*". Parameter value in this case should be "AthenaJdbcFederation". If you do not have a prefix, you can manually update the IAM policy to add allow any secret names.' + Type: String + SpillBucket: + Description: 'The name of the bucket where this function can spill data.' + Type: String + GlueConnection: + Description: "Name of glue connection storing connection details for Federated Data source." + Type: String + SecurityGroupIds: + Description: '(Optional) One or more SecurityGroup IDs corresponding to the SecurityGroup that should be applied to the Lambda function. (e.g. sg1,sg2,sg3)' + Type: CommaDelimitedList + Default: "" + SubnetIds: + Description: '(Optional) One or more Subnet IDs corresponding to the Subnet that the Lambda function can use to access you data source. (e.g. subnet1,subnet2)' + Type: CommaDelimitedList + Default: "" + KmsKeyId: + Description: "(Optional) By default any data that is spilled to S3 is encrypted using AES-GCM and a randomly generated key. Setting a KMS Key ID allows your Lambda function to use KMS for key generation for a stronger source of encryption keys." + Type: String + Default: "" + LambdaRoleArn: + Description: "(Optional) A custom role to be used by the Connector lambda" + Type: String + Default: "" +Conditions: + HasSecurityGroups: !Not [ !Equals [ !Join [ "", !Ref SecurityGroupIds ], "" ] ] + HasSubnets: !Not [ !Equals [ !Join [ "", !Ref SubnetIds ], "" ] ] + HasKmsKeyId: !Not [ !Equals [ !Ref KmsKeyId, "" ] ] + NotHasLambdaRole: !Equals [ !Ref LambdaRoleArn, "" ] + CreateKmsPolicy: !And [ !Condition HasKmsKeyId, !Condition NotHasLambdaRole ] + IsRegionBAH: !Equals [!Ref "AWS::Region", "me-south-1"] + IsRegionHKG: !Equals [!Ref "AWS::Region", "ap-east-1"] +Resources: + JdbcConnectorConfig: + Type: 'AWS::Serverless::Function' + Properties: + Environment: + Variables: + glue_connection: !Ref GlueConnection + FunctionName: !Ref LambdaFunctionName + PackageType: "Image" + ImageUri: !Sub + - '${Account}.dkr.ecr.${AWS::Region}.amazonaws.com/athena-federation-repository-oracle:2022.47.1' + - Account: !If [IsRegionBAH, 084828588479, !If [IsRegionHKG, 183295418215, 292517598671]] + ImageConfig: + Command: [ "com.amazonaws.athena.connectors.oracle.OracleCompositeHandler" ] + Description: "Enables Amazon Athena to communicate with ORACLE using JDBC" + Timeout: 900 + MemorySize: 3008 + Role: !If [ NotHasLambdaRole, !GetAtt FunctionRole.Arn, !Ref LambdaRoleArn ] + VpcConfig: + SecurityGroupIds: !If [ HasSecurityGroups, !Ref SecurityGroupIds, !Ref "AWS::NoValue" ] + SubnetIds: !If [ HasSubnets, !Ref SubnetIds, !Ref "AWS::NoValue" ] + + FunctionRole: + Condition: NotHasLambdaRole + Type: AWS::IAM::Role + Properties: + ManagedPolicyArns: + - "arn:aws:iam::aws:policy/service-role/AWSLambdaVPCAccessExecutionRole" + AssumeRolePolicyDocument: + Version: 2012-10-17 + Statement: + - Effect: Allow + Principal: + Service: + - lambda.amazonaws.com + Action: + - "sts:AssumeRole" + + FunctionExecutionPolicy: + Condition: NotHasLambdaRole + Type: "AWS::IAM::Policy" + Properties: + Roles: + - !Ref FunctionRole + PolicyName: FunctionExecutionPolicy + PolicyDocument: + Version: 2012-10-17 + Statement: + - Action: + - secretsmanager:GetSecretValue + Effect: Allow + Resource: !Sub 'arn:${AWS::Partition}:secretsmanager:${AWS::Region}:${AWS::AccountId}:secret:${SecretName}*' + - Action: + - logs:CreateLogGroup + Effect: Allow + Resource: !Sub 'arn:${AWS::Partition}:logs:${AWS::Region}:${AWS::AccountId}:*' + - Action: + - logs:CreateLogStream + - logs:PutLogEvents + Effect: Allow + Resource: !Sub 'arn:${AWS::Partition}:logs:${AWS::Region}:${AWS::AccountId}:log-group:/aws/lambda/${LambdaFunctionName}:*' + - Action: + - athena:GetQueryExecution + Effect: Allow + Resource: '*' + - Action: + - ec2:CreateNetworkInterface + - ec2:DeleteNetworkInterface + - ec2:DescribeNetworkInterfaces + - ec2:DetachNetworkInterface + Effect: Allow + Resource: '*' + - Action: + - s3:GetObject + - s3:ListBucket + - s3:GetBucketLocation + - s3:GetObjectVersion + - s3:PutObject + - s3:PutObjectAcl + - s3:GetLifecycleConfiguration + - s3:PutLifecycleConfiguration + - s3:DeleteObject + Effect: Allow + Resource: + - Fn::Sub: + - arn:${AWS::Partition}:s3:::${bucketName} + - bucketName: + Ref: SpillBucket + - Fn::Sub: + - arn:${AWS::Partition}:s3:::${bucketName}/* + - bucketName: + Ref: SpillBucket + - Action: + - glue:GetConnection + Effect: Allow + Resource: + - !Sub 'arn:${AWS::Partition}:glue:${AWS::Region}:${AWS::AccountId}:connection/${GlueConnection}' + - !Sub 'arn:${AWS::Partition}:glue:${AWS::Region}:${AWS::AccountId}:catalog' + + FunctionKmsPolicy: + Condition: CreateKmsPolicy + Type: "AWS::IAM::Policy" + Properties: + PolicyName: FunctionKmsPolicy + PolicyDocument: + Version: 2012-10-17 + Statement: + - Effect: Allow + Action: + - kms:GenerateRandom + Resource: '*' + - Effect: Allow + Action: + - kms:GenerateDataKey + Resource: !Sub "arn:aws:kms:${AWS::Region}:${AWS::AccountId}:key/${KmsKeyId}" + Roles: + - !Ref FunctionRole \ No newline at end of file diff --git a/athena-oracle/athena-oracle.yaml b/athena-oracle/athena-oracle.yaml index 58d8329dc6..6badf23d43 100644 --- a/athena-oracle/athena-oracle.yaml +++ b/athena-oracle/athena-oracle.yaml @@ -88,6 +88,8 @@ Resources: ImageUri: !Sub - '${Account}.dkr.ecr.${AWS::Region}.amazonaws.com/athena-federation-repository-oracle:2022.47.1' - Account: !If [IsRegionBAH, 084828588479, !If [IsRegionHKG, 183295418215, 292517598671]] + ImageConfig: + Command: [ "com.amazonaws.athena.connectors.oracle.OracleMuxCompositeHandler" ] Description: "Enables Amazon Athena to communicate with ORACLE using JDBC" Timeout: !Ref LambdaTimeout MemorySize: !Ref LambdaMemory diff --git a/athena-oracle/pom.xml b/athena-oracle/pom.xml index 6eb10809f4..993af6f21e 100644 --- a/athena-oracle/pom.xml +++ b/athena-oracle/pom.xml @@ -64,9 +64,6 @@ ${mockito.version} test - - - diff --git a/athena-oracle/src/main/java/com/amazonaws/athena/connectors/oracle/OracleCompositeHandler.java b/athena-oracle/src/main/java/com/amazonaws/athena/connectors/oracle/OracleCompositeHandler.java index 0d8662cffc..78a779d000 100644 --- a/athena-oracle/src/main/java/com/amazonaws/athena/connectors/oracle/OracleCompositeHandler.java +++ b/athena-oracle/src/main/java/com/amazonaws/athena/connectors/oracle/OracleCompositeHandler.java @@ -33,6 +33,6 @@ public class OracleCompositeHandler { public OracleCompositeHandler() { - super(new OracleMetadataHandler(System.getenv()), new OracleRecordHandler(System.getenv())); + super(new OracleMetadataHandler(new OracleEnvironmentProperties().createEnvironment()), new OracleRecordHandler(new OracleEnvironmentProperties().createEnvironment())); } } diff --git a/athena-oracle/src/main/java/com/amazonaws/athena/connectors/oracle/OracleEnvironmentProperties.java b/athena-oracle/src/main/java/com/amazonaws/athena/connectors/oracle/OracleEnvironmentProperties.java new file mode 100644 index 0000000000..fbbe7195bc --- /dev/null +++ b/athena-oracle/src/main/java/com/amazonaws/athena/connectors/oracle/OracleEnvironmentProperties.java @@ -0,0 +1,60 @@ +/*- + * #%L + * Amazon Athena Query Federation SDK + * %% + * Copyright (C) 2019 - 2024 Amazon Web Services + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ +package com.amazonaws.athena.connectors.oracle; + +import com.amazonaws.athena.connectors.jdbc.JdbcEnvironmentProperties; + +import java.util.Map; + +import static com.amazonaws.athena.connector.lambda.connection.EnvironmentConstants.DATABASE; +import static com.amazonaws.athena.connector.lambda.connection.EnvironmentConstants.ENFORCE_SSL; +import static com.amazonaws.athena.connector.lambda.connection.EnvironmentConstants.SECRET_NAME; + +public class OracleEnvironmentProperties extends JdbcEnvironmentProperties +{ + @Override + protected String getConnectionStringPrefix(Map connectionProperties) + { + String prefix = "oracle://jdbc:oracle:thin:"; + if (connectionProperties.containsKey(SECRET_NAME)) { + prefix = prefix + "${" + connectionProperties.get(SECRET_NAME) + "}"; + } + if (connectionProperties.containsKey(ENFORCE_SSL)) { + prefix = prefix + "@tcps://"; + } + else { + prefix = prefix + "@//"; + } + + return prefix; + } + + @Override + protected String getDatabase(Map connectionProperties) + { + return "/" + connectionProperties.get(DATABASE); + } + + @Override + protected String getJdbcParameters(Map connectionProperties) + { + return ""; + } +} diff --git a/athena-oracle/src/main/java/com/amazonaws/athena/connectors/oracle/OracleJdbcConnectionFactory.java b/athena-oracle/src/main/java/com/amazonaws/athena/connectors/oracle/OracleJdbcConnectionFactory.java index 433f3d28ff..22efdd5ebb 100644 --- a/athena-oracle/src/main/java/com/amazonaws/athena/connectors/oracle/OracleJdbcConnectionFactory.java +++ b/athena-oracle/src/main/java/com/amazonaws/athena/connectors/oracle/OracleJdbcConnectionFactory.java @@ -33,24 +33,19 @@ import java.sql.SQLException; import java.util.Properties; import java.util.regex.Matcher; -import java.util.regex.Pattern; public class OracleJdbcConnectionFactory extends GenericJdbcConnectionFactory { - public static final String IS_FIPS_ENABLED = "is_FIPS_Enabled"; + public static final String IS_FIPS_ENABLED = "is_fips_enabled"; + public static final String IS_FIPS_ENABLED_LEGACY = "is_FIPS_Enabled"; private final DatabaseConnectionInfo databaseConnectionInfo; private final DatabaseConnectionConfig databaseConnectionConfig; private static final Logger LOGGER = LoggerFactory.getLogger(OracleJdbcConnectionFactory.class); - private static final String SSL_CONNECTION_STRING_REGEX = "jdbc:oracle:thin:\\$\\{([a-zA-Z0-9:_/+=.@-]+)\\}@" + - "\\((?i)description=\\(address=\\(protocol=tcps\\)\\(host=[a-zA-Z0-9-.]+\\)" + - "\\(port=([1-9][0-9]{0,3}|[1-5][0-9]{4}|6[0-4][0-9]{3}|65[0-4][0-9]{2}|655[0-2][0-9]|6553[0-5])\\)\\)" + - "\\(connect_data=\\(sid=[a-zA-Z_]+\\)\\)\\(security=\\(ssl_server_cert_dn=\"[=a-zA-Z,0-9-.,]+\"\\)\\)\\)"; - private static final Pattern SSL_CONNECTION_STRING_PATTERN = Pattern.compile(SSL_CONNECTION_STRING_REGEX); /** * @param databaseConnectionConfig database connection configuration {@link DatabaseConnectionConfig} - * @param databaseConnectionInfo - */ + * @param databaseConnectionInfo + */ public OracleJdbcConnectionFactory(DatabaseConnectionConfig databaseConnectionConfig, DatabaseConnectionInfo databaseConnectionInfo) { super(databaseConnectionConfig, null, databaseConnectionInfo); @@ -66,12 +61,14 @@ public Connection getConnection(final JdbcCredentialProvider jdbcCredentialProvi Properties properties = new Properties(); if (null != jdbcCredentialProvider) { - if (SSL_CONNECTION_STRING_PATTERN.matcher(databaseConnectionConfig.getJdbcConnectionString()).matches()) { + //checking for tcps (Secure Communication) protocol as part of the connection string. + if (databaseConnectionConfig.getJdbcConnectionString().toLowerCase().contains("@tcps://")) { LOGGER.info("Establishing connection over SSL.."); properties.put("javax.net.ssl.trustStoreType", "JKS"); - properties.put("javax.net.ssl.trustStorePassword", "changeit"); + properties.put("javax.net.ssl.trustStore", "rds-truststore.jks"); + properties.put("javax.net.ssl.trustStorePassword", "federationStorePass"); properties.put("oracle.net.ssl_server_dn_match", "true"); - if (System.getenv().getOrDefault(IS_FIPS_ENABLED, "false").equalsIgnoreCase("true")) { + if (System.getenv().getOrDefault(IS_FIPS_ENABLED, "false").equalsIgnoreCase("true") || System.getenv().getOrDefault(IS_FIPS_ENABLED_LEGACY, "false").equalsIgnoreCase("true")) { properties.put("oracle.net.ssl_cipher_suites", "(TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384, TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256, TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA384, TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256, TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA, TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA)"); } } @@ -79,10 +76,13 @@ public Connection getConnection(final JdbcCredentialProvider jdbcCredentialProvi LOGGER.info("Establishing normal connection.."); } Matcher secretMatcher = SECRET_NAME_PATTERN.matcher(databaseConnectionConfig.getJdbcConnectionString()); + String password = jdbcCredentialProvider.getCredential().getPassword(); + if (!password.contains("\"")) { + password = String.format("\"%s\"", password); + } final String secretReplacement = String.format("%s/%s", jdbcCredentialProvider.getCredential().getUser(), - jdbcCredentialProvider.getCredential().getPassword()); + password); derivedJdbcString = secretMatcher.replaceAll(Matcher.quoteReplacement(secretReplacement)); - LOGGER.info("derivedJdbcString: " + derivedJdbcString); return DriverManager.getConnection(derivedJdbcString, properties); } else { diff --git a/athena-oracle/src/main/java/com/amazonaws/athena/connectors/oracle/OracleMetadataHandler.java b/athena-oracle/src/main/java/com/amazonaws/athena/connectors/oracle/OracleMetadataHandler.java index 3932645fe7..0b4c42eeee 100644 --- a/athena-oracle/src/main/java/com/amazonaws/athena/connectors/oracle/OracleMetadataHandler.java +++ b/athena-oracle/src/main/java/com/amazonaws/athena/connectors/oracle/OracleMetadataHandler.java @@ -56,11 +56,13 @@ import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; +import oracle.jdbc.OracleTypes; import org.apache.arrow.vector.complex.reader.FieldReader; import org.apache.arrow.vector.types.Types; import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.Schema; +import org.apache.commons.lang3.StringUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import software.amazon.awssdk.services.athena.AthenaClient; @@ -72,12 +74,12 @@ import java.sql.ResultSet; import java.sql.SQLException; import java.util.Arrays; -import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Set; import java.util.stream.Collectors; +import static com.amazonaws.athena.connector.lambda.connection.EnvironmentConstants.DEFAULT_GLUE_CONNECTION; import static com.amazonaws.athena.connector.lambda.domain.predicate.functions.StandardFunctions.IS_DISTINCT_FROM_OPERATOR_FUNCTION_NAME; import static com.amazonaws.athena.connector.lambda.domain.predicate.functions.StandardFunctions.MODULUS_FUNCTION_NAME; import static com.amazonaws.athena.connector.lambda.domain.predicate.functions.StandardFunctions.NULLIF_FUNCTION_NAME; @@ -89,13 +91,15 @@ public class OracleMetadataHandler extends JdbcMetadataHandler { - static final String GET_PARTITIONS_QUERY = "Select DISTINCT PARTITION_NAME FROM USER_TAB_PARTITIONS where table_name= ?"; - static final String BLOCK_PARTITION_COLUMN_NAME = "PARTITION_NAME"; + static final String GET_PARTITIONS_QUERY = "Select DISTINCT PARTITION_NAME as \"partition_name\" FROM USER_TAB_PARTITIONS where table_name= ?"; + static final String BLOCK_PARTITION_COLUMN_NAME = "PARTITION_NAME".toLowerCase(); static final String ALL_PARTITIONS = "0"; - static final String PARTITION_COLUMN_NAME = "PARTITION_NAME"; + static final String PARTITION_COLUMN_NAME = "PARTITION_NAME".toLowerCase(); + static final String CASING_MODE = "casing_mode"; private static final Logger LOGGER = LoggerFactory.getLogger(OracleMetadataHandler.class); private static final int MAX_SPLITS_PER_REQUEST = 1000_000; private static final String COLUMN_NAME = "COLUMN_NAME"; + private static final String ORACLE_QUOTE_CHARACTER = "\""; static final String LIST_PAGINATED_TABLES_QUERY = "SELECT TABLE_NAME as \"TABLE_NAME\", OWNER as \"TABLE_SCHEM\" FROM all_tables WHERE owner = ? ORDER BY TABLE_NAME OFFSET ? ROWS FETCH NEXT ? ROWS ONLY"; @@ -154,15 +158,16 @@ public Schema getPartitionSchema(final String catalogName) public void getPartitions(final BlockWriter blockWriter, final GetTableLayoutRequest getTableLayoutRequest, QueryStatusChecker queryStatusChecker) throws Exception { - LOGGER.debug("{}: Schema {}, table {}", getTableLayoutRequest.getQueryId(), getTableLayoutRequest.getTableName().getSchemaName(), - getTableLayoutRequest.getTableName().getTableName()); + LOGGER.debug("{}: Schema {}, table {}", getTableLayoutRequest.getQueryId(), transformString(getTableLayoutRequest.getTableName().getSchemaName(), true), + transformString(getTableLayoutRequest.getTableName().getTableName(), true)); try (Connection connection = getJdbcConnectionFactory().getConnection(getCredentialProvider())) { - List parameters = Arrays.asList(getTableLayoutRequest.getTableName().getTableName().toUpperCase()); + List parameters = Arrays.asList(transformString(getTableLayoutRequest.getTableName().getTableName(), true)); try (PreparedStatement preparedStatement = new PreparedStatementBuilder().withConnection(connection).withQuery(GET_PARTITIONS_QUERY).withParameters(parameters).build(); - ResultSet resultSet = preparedStatement.executeQuery()) { + ResultSet resultSet = preparedStatement.executeQuery()) { // Return a single partition if no partitions defined if (!resultSet.next()) { blockWriter.writeRows((Block block, int rowNum) -> { + LOGGER.debug("Parameters: " + BLOCK_PARTITION_COLUMN_NAME + " " + rowNum + " " + ALL_PARTITIONS); block.setValue(BLOCK_PARTITION_COLUMN_NAME, rowNum, ALL_PARTITIONS); LOGGER.info("Adding partition {}", ALL_PARTITIONS); //we wrote 1 row so we return 1 @@ -305,7 +310,7 @@ public GetTableResponse doGetTable(final BlockAllocator blockAllocator, final Ge { try (Connection connection = getJdbcConnectionFactory().getConnection(getCredentialProvider())) { Schema partitionSchema = getPartitionSchema(getTableRequest.getCatalogName()); - TableName tableName = new TableName(getTableRequest.getTableName().getSchemaName().toUpperCase(), getTableRequest.getTableName().getTableName().toUpperCase()); + TableName tableName = new TableName(transformString(getTableRequest.getTableName().getSchemaName(), false), transformString(getTableRequest.getTableName().getTableName(), false)); return new GetTableResponse(getTableRequest.getCatalogName(), tableName, getSchema(connection, tableName, partitionSchema), partitionSchema.getFields().stream().map(Field::getName).collect(Collectors.toSet())); } @@ -348,89 +353,85 @@ private Schema getSchema(Connection jdbcConnection, TableName tableName, Schema { SchemaBuilder schemaBuilder = SchemaBuilder.newBuilder(); - try (ResultSet resultSet = getColumns(jdbcConnection.getCatalog(), tableName, jdbcConnection.getMetaData()); - Connection connection = getJdbcConnectionFactory().getConnection(getCredentialProvider())) { - boolean found = false; - HashMap hashMap = new HashMap(); - /** - * Getting original data type from oracle table for conversion - */ - try - (PreparedStatement stmt = connection.prepareStatement("select COLUMN_NAME ,DATA_TYPE from USER_TAB_COLS where table_name =?")) { - stmt.setString(1, tableName.getTableName().toUpperCase()); - ResultSet dataTypeResultSet = stmt.executeQuery(); - while (dataTypeResultSet.next()) { - hashMap.put(dataTypeResultSet.getString(COLUMN_NAME).trim(), dataTypeResultSet.getString("DATA_TYPE").trim()); - } - while (resultSet.next()) { - ArrowType columnType = JdbcArrowTypeConverter.toArrowType( - resultSet.getInt("DATA_TYPE"), - resultSet.getInt("COLUMN_SIZE"), - resultSet.getInt("DECIMAL_DIGITS"), - configOptions); - String columnName = resultSet.getString(COLUMN_NAME); - /** Handling TIMESTAMP,DATE, 0 Precesion**/ - if (columnType != null && columnType.getTypeID().equals(ArrowType.ArrowTypeID.Decimal)) { - String[] data = columnType.toString().split(","); - if (data[0].contains("0") || data[1].contains("0")) { - columnType = Types.MinorType.BIGINT.getType(); - } - - /** Handling negative scale issue */ - if (Integer.parseInt(data[1].trim().replace(")", "")) < 0.0) { - columnType = Types.MinorType.VARCHAR.getType(); - } + try (ResultSet resultSet = getColumns(jdbcConnection.getCatalog(), tableName, jdbcConnection.getMetaData())) { + while (resultSet.next()) { + ArrowType arrowColumnType = JdbcArrowTypeConverter.toArrowType( + resultSet.getInt("DATA_TYPE"), + resultSet.getInt("COLUMN_SIZE"), + resultSet.getInt("DECIMAL_DIGITS"), + configOptions); + + String columnName = resultSet.getString(COLUMN_NAME); + int jdbcColumnType = resultSet.getInt("DATA_TYPE"); + int scale = resultSet.getInt("COLUMN_SIZE"); + + LOGGER.debug("columnName: {}", columnName); + LOGGER.debug("arrowColumnType: {}", arrowColumnType); + LOGGER.debug("jdbcColumnType: {}", jdbcColumnType); + + /** + * below data type conversion doing since a framework not giving appropriate + * data types for oracle data types. + */ + + /** Handling TIMESTAMP, DATE, 0 Precision **/ + if (arrowColumnType != null && arrowColumnType.getTypeID().equals(ArrowType.ArrowTypeID.Decimal)) { + String[] data = arrowColumnType.toString().split(","); + if (scale == 0 || Integer.parseInt(data[1].trim()) < 0) { + arrowColumnType = Types.MinorType.BIGINT.getType(); } + } - String dataType = hashMap.get(columnName); - LOGGER.debug("columnName: " + columnName); - LOGGER.debug("dataType: " + dataType); - /** - * below data type conversion doing since framework not giving appropriate - * data types for oracle data types.. - */ - /** - * Converting oracle date data type into DATEDAY MinorType - */ - if (dataType != null && (dataType.contains("date") || dataType.contains("DATE"))) { - columnType = Types.MinorType.DATEDAY.getType(); - } - /** - * Converting oracle NUMBER data type into BIGINT MinorType - */ - if (dataType != null && (dataType.contains("NUMBER")) && columnType.getTypeID().toString().equalsIgnoreCase("Utf8")) { - columnType = Types.MinorType.BIGINT.getType(); - } + /** + * Converting an Oracle date data type into DATEDAY MinorType + */ + if (jdbcColumnType == java.sql.Types.TIMESTAMP && scale == 7) { + arrowColumnType = Types.MinorType.DATEDAY.getType(); + } - /** - * Converting oracle TIMESTAMP data type into DATEMILLI MinorType - */ - if (dataType != null && (dataType.contains("TIMESTAMP")) - ) { - columnType = Types.MinorType.DATEMILLI.getType(); - } - if (columnType == null) { - columnType = Types.MinorType.VARCHAR.getType(); - } - if (columnType != null && !SupportedTypes.isSupported(columnType)) { - columnType = Types.MinorType.VARCHAR.getType(); - } + /** + * Converting an Oracle TIMESTAMP_WITH_TZ & TIMESTAMP_WITH_LOCAL_TZ data type into DATEMILLI MinorType + */ + if (jdbcColumnType == OracleTypes.TIMESTAMPLTZ || jdbcColumnType == OracleTypes.TIMESTAMPTZ) { + arrowColumnType = Types.MinorType.DATEMILLI.getType(); + } - if (columnType != null && SupportedTypes.isSupported(columnType)) { - schemaBuilder.addField(FieldBuilder.newBuilder(columnName, columnType).build()); - found = true; - } - else { - LOGGER.error("getSchema: Unable to map type for column[" + columnName + "] to a supported type, attempted " + columnType); - } + if (arrowColumnType != null && !SupportedTypes.isSupported(arrowColumnType)) { + LOGGER.warn("getSchema: Unable to map type JDBC type [{}] for column[{}] to a supported type, attempted {}", jdbcColumnType, columnName, arrowColumnType); + arrowColumnType = Types.MinorType.VARCHAR.getType(); } + + if (arrowColumnType == null) { + LOGGER.warn("getSchema: column[{}] type is null setting it to varchar | JDBC Type is [{}]", columnName, jdbcColumnType); + arrowColumnType = Types.MinorType.VARCHAR.getType(); + } + schemaBuilder.addField(FieldBuilder.newBuilder(columnName, arrowColumnType).build()); } - if (!found) { - throw new RuntimeException("Could not find table in " + tableName.getSchemaName()); - } + partitionSchema.getFields().forEach(schemaBuilder::addField); LOGGER.debug("Oracle Table Schema" + schemaBuilder.toString()); return schemaBuilder.build(); } } + + /** + * Always adds double quotes around the string + * If the lambda uses a glue connection, return the string as is (lowercased by the trino engine) + * Otherwise uppercase it (the default of oracle) + * @param str + * @param quote + * @return + */ + private String transformString(String str, boolean quote) + { + boolean isGlueConnection = StringUtils.isNotBlank(configOptions.get(DEFAULT_GLUE_CONNECTION)); + boolean uppercase = configOptions.getOrDefault(CASING_MODE, isGlueConnection ? "lower" : "upper").toLowerCase().equals("upper"); + if (uppercase) { + str = str.toUpperCase(); + } + if (quote && !str.contains(ORACLE_QUOTE_CHARACTER)) { + str = ORACLE_QUOTE_CHARACTER + str + ORACLE_QUOTE_CHARACTER; + } + return str; + } } diff --git a/athena-oracle/src/test/java/com/amazonaws/athena/connectors/oracle/OracleMetadataHandlerTest.java b/athena-oracle/src/test/java/com/amazonaws/athena/connectors/oracle/OracleMetadataHandlerTest.java index c84dc15538..9a4cd4b376 100644 --- a/athena-oracle/src/test/java/com/amazonaws/athena/connectors/oracle/OracleMetadataHandlerTest.java +++ b/athena-oracle/src/test/java/com/amazonaws/athena/connectors/oracle/OracleMetadataHandlerTest.java @@ -33,6 +33,7 @@ import com.amazonaws.athena.connectors.jdbc.connection.DatabaseConnectionConfig; import com.amazonaws.athena.connectors.jdbc.connection.JdbcConnectionFactory; import com.amazonaws.athena.connectors.jdbc.connection.JdbcCredentialProvider; +import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.Schema; import org.junit.Assert; @@ -65,7 +66,7 @@ public class OracleMetadataHandlerTest extends TestBase { - private static final Schema PARTITION_SCHEMA = SchemaBuilder.newBuilder().addField("PARTITION_NAME", org.apache.arrow.vector.types.Types.MinorType.VARCHAR.getType()).build(); + private static final Schema PARTITION_SCHEMA = SchemaBuilder.newBuilder().addField("partition_name", org.apache.arrow.vector.types.Types.MinorType.VARCHAR.getType()).build(); private DatabaseConnectionConfig databaseConnectionConfig = new DatabaseConnectionConfig("testCatalog", ORACLE_NAME, "oracle://jdbc:oracle:thin:username/password@//127.0.0.1:1521/orcl"); private OracleMetadataHandler oracleMetadataHandler; @@ -103,7 +104,7 @@ public void doGetTableLayout() { BlockAllocator blockAllocator = new BlockAllocatorImpl(); Constraints constraints = Mockito.mock(Constraints.class); - TableName tableName = new TableName("testSchema", "TESTTABLE"); + TableName tableName = new TableName("testSchema", "\"TESTTABLE\""); Schema partitionSchema = this.oracleMetadataHandler.getPartitionSchema("testCatalogName"); Set partitionCols = partitionSchema.getFields().stream().map(Field::getName).collect(Collectors.toSet()); GetTableLayoutRequest getTableLayoutRequest = new GetTableLayoutRequest(this.federatedIdentity, "testQueryId", "testCatalogName", tableName, constraints, partitionSchema, partitionCols); @@ -111,7 +112,7 @@ public void doGetTableLayout() PreparedStatement preparedStatement = Mockito.mock(PreparedStatement.class); Mockito.when(this.connection.prepareStatement(OracleMetadataHandler.GET_PARTITIONS_QUERY)).thenReturn(preparedStatement); - String[] columns = {"PARTITION_NAME"}; + String[] columns = {"PARTITION_NAME".toLowerCase()}; int[] types = {Types.VARCHAR}; Object[][] values = {{"p0"}, {"p1"}}; ResultSet resultSet = mockResultSet(columns, types, values, new AtomicInteger(-1)); @@ -127,7 +128,7 @@ public void doGetTableLayout() for (int i = 0; i < getTableLayoutResponse.getPartitions().getRowCount(); i++) { expectedValues.add(BlockUtils.rowToString(getTableLayoutResponse.getPartitions(), i)); } - Assert.assertEquals(expectedValues, Arrays.asList("[PARTITION_NAME : p0]", "[PARTITION_NAME : p1]")); + Assert.assertEquals(expectedValues, Arrays.asList("[partition_name : p0]", "[partition_name : p1]")); SchemaBuilder expectedSchemaBuilder = SchemaBuilder.newBuilder(); expectedSchemaBuilder.addField(FieldBuilder.newBuilder(OracleMetadataHandler.BLOCK_PARTITION_COLUMN_NAME, org.apache.arrow.vector.types.Types.MinorType.VARCHAR.getType()).build()); @@ -144,7 +145,7 @@ public void doGetTableLayoutWithNoPartitions() { BlockAllocator blockAllocator = new BlockAllocatorImpl(); Constraints constraints = Mockito.mock(Constraints.class); - TableName tableName = new TableName("testSchema", "TESTTABLE"); + TableName tableName = new TableName("testSchema", "\"TESTTABLE\""); Schema partitionSchema = this.oracleMetadataHandler.getPartitionSchema("testCatalogName"); Set partitionCols = partitionSchema.getFields().stream().map(Field::getName).collect(Collectors.toSet()); GetTableLayoutRequest getTableLayoutRequest = new GetTableLayoutRequest(this.federatedIdentity, "testQueryId", "testCatalogName", tableName, constraints, partitionSchema, partitionCols); @@ -152,7 +153,7 @@ public void doGetTableLayoutWithNoPartitions() PreparedStatement preparedStatement = Mockito.mock(PreparedStatement.class); Mockito.when(this.connection.prepareStatement(OracleMetadataHandler.GET_PARTITIONS_QUERY)).thenReturn(preparedStatement); - String[] columns = {"PARTITION_NAME"}; + String[] columns = {"PARTITION_NAME".toLowerCase()}; int[] types = {Types.VARCHAR}; Object[][] values = {{}}; ResultSet resultSet = mockResultSet(columns, types, values, new AtomicInteger(-1)); @@ -168,7 +169,7 @@ public void doGetTableLayoutWithNoPartitions() for (int i = 0; i < getTableLayoutResponse.getPartitions().getRowCount(); i++) { expectedValues.add(BlockUtils.rowToString(getTableLayoutResponse.getPartitions(), i)); } - Assert.assertEquals(expectedValues, Collections.singletonList("[PARTITION_NAME : 0]")); + Assert.assertEquals(expectedValues, Collections.singletonList("[partition_name : 0]")); SchemaBuilder expectedSchemaBuilder = SchemaBuilder.newBuilder(); expectedSchemaBuilder.addField(FieldBuilder.newBuilder(OracleMetadataHandler.BLOCK_PARTITION_COLUMN_NAME, org.apache.arrow.vector.types.Types.MinorType.VARCHAR.getType()).build()); @@ -249,7 +250,7 @@ public void doGetSplitsContinuation() PreparedStatement preparedStatement = Mockito.mock(PreparedStatement.class); Mockito.when(this.connection.prepareStatement(OracleMetadataHandler.GET_PARTITIONS_QUERY)).thenReturn(preparedStatement); - String[] columns = {"PARTITION_NAME"}; + String[] columns = {"PARTITION_NAME".toLowerCase()}; int[] types = {Types.VARCHAR}; Object[][] values = {{"p0"}, {"p1"}}; ResultSet resultSet = mockResultSet(columns, types, values, new AtomicInteger(-1)); @@ -265,7 +266,7 @@ public void doGetSplitsContinuation() GetSplitsResponse getSplitsResponse = this.oracleMetadataHandler.doGetSplits(splitBlockAllocator, getSplitsRequest); Set> expectedSplits = new HashSet<>(); - expectedSplits.add(Collections.singletonMap("PARTITION_NAME", "p1")); + expectedSplits.add(Collections.singletonMap("PARTITION_NAME".toLowerCase(), "p1")); Assert.assertEquals(expectedSplits.size(), getSplitsResponse.getSplits().size()); Set> actualSplits = getSplitsResponse.getSplits().stream().map(Split::getProperties).collect(Collectors.toSet()); Assert.assertEquals(expectedSplits, actualSplits); @@ -312,7 +313,7 @@ public void doGetTable() BlockAllocator blockAllocator = new BlockAllocatorImpl(); String[] schema = {"DATA_TYPE", "COLUMN_SIZE", "COLUMN_NAME", "DECIMAL_DIGITS", "NUM_PREC_RADIX"}; Object[][] values = {{Types.INTEGER, 12, "testCol1", 0, 0}, {Types.VARCHAR, 25, "testCol2", 0, 0}, - {Types.TIMESTAMP, 93, "testCol3", 0, 0}, {Types.TIMESTAMP_WITH_TIMEZONE, 93, "testCol4", 0, 0}}; + {Types.TIMESTAMP, 93, "testCol3", 0, 0}, {Types.TIMESTAMP_WITH_TIMEZONE, 93, "testCol4", 0, 0}, {Types.NUMERIC, 10, "testCol5", 2, 0}}; AtomicInteger rowNumber = new AtomicInteger(-1); ResultSet resultSet = mockResultSet(schema, values, rowNumber); @@ -321,6 +322,8 @@ public void doGetTable() expectedSchemaBuilder.addField(FieldBuilder.newBuilder("testCol2", org.apache.arrow.vector.types.Types.MinorType.VARCHAR.getType()).build()); expectedSchemaBuilder.addField(FieldBuilder.newBuilder("testCol3", org.apache.arrow.vector.types.Types.MinorType.DATEMILLI.getType()).build()); expectedSchemaBuilder.addField(FieldBuilder.newBuilder("testCol4", org.apache.arrow.vector.types.Types.MinorType.VARCHAR.getType()).build()); + ArrowType.Decimal testCol5ArrowType = ArrowType.Decimal.createDecimal(10, 2, 128); + expectedSchemaBuilder.addField(FieldBuilder.newBuilder("testCol5", testCol5ArrowType).build()); PARTITION_SCHEMA.getFields().forEach(expectedSchemaBuilder::addField); Schema expected = expectedSchemaBuilder.build(); diff --git a/athena-postgresql/athena-postgresql-connection.yaml b/athena-postgresql/athena-postgresql-connection.yaml new file mode 100644 index 0000000000..79b3cb38c8 --- /dev/null +++ b/athena-postgresql/athena-postgresql-connection.yaml @@ -0,0 +1,176 @@ +Transform: 'AWS::Serverless-2016-10-31' +Metadata: + 'AWS::ServerlessRepo::Application': + Name: AthenaPostgreSQLConnectorWithGlueConnection + Description: 'This connector enables Amazon Athena to communicate with your PostgreSQL instance(s) using JDBC driver.' + Author: 'default author' + SpdxLicenseId: Apache-2.0 + LicenseUrl: LICENSE.txt + ReadmeUrl: README.md + Labels: + - athena-federation + HomePageUrl: 'https://github.com/awslabs/aws-athena-query-federation' + SemanticVersion: 2022.47.1 + SourceCodeUrl: 'https://github.com/awslabs/aws-athena-query-federation' +Parameters: + LambdaFunctionName: + Description: 'This is the name of the lambda function that will be created. This name must satisfy the pattern ^[a-z0-9-_]{1,64}$' + Type: String + AllowedPattern: ^[a-z0-9-_]{1,64}$ + SecretName: + Description: 'Used to create resource-based authorization policy for "secretsmanager:GetSecretValue" action. E.g. All Athena PostgreSQL Federation secret names can be prefixed with "AthenaPostgreSQLFederation" and authorization policy will allow "arn:${AWS::Partition}:secretsmanager:${AWS::Region}:${AWS::AccountId}:secret:AthenaJdbcFederation*". Parameter value in this case should be "AthenaPostgreSQLFederation". If you do not have a prefix, you can manually update the IAM policy to add allow any secret names.' + Type: String + SpillBucket: + Description: 'The name of the bucket where this function can spill data.' + Type: String + GlueConnection: + Description: "Name of glue connection storing connection details for Federated Data source." + Type: String + LambdaRoleArn: + Description: "(Optional) A custom role to be used by the Connector lambda" + Type: String + Default: "" + SecurityGroupIds: + Description: 'One or more SecurityGroup IDs corresponding to the SecurityGroup that should be applied to the Lambda function. (e.g. sg1,sg2,sg3)' + Type: 'List' + SubnetIds: + Description: 'One or more Subnet IDs corresponding to the Subnet that the Lambda function can use to access you data source. (e.g. subnet1,subnet2)' + Type: 'List' + CompositeHandler: + Description: 'Use "PostGreSqlMuxCompositeHandler" to access multiple postgres instances and "PostGreSqlCompositeHandler" to access single instance using DefaultConnectionString' + Type: String + Default: "PostGreSqlMuxCompositeHandler" + AllowedValues : ["PostGreSqlMuxCompositeHandler", "PostGreSqlCompositeHandler"] + DefaultScale: + Description: "(Optional) Default value for scale of type Numeric, representing the decimal digits in the fractional part, to the right of the decimal point." + Default: 0 + Type: Number + KmsKeyId: + Description: "(Optional) By default any data that is spilled to S3 is encrypted using AES-GCM and a randomly generated key. Setting a KMS Key ID allows your Lambda function to use KMS for key generation for a stronger source of encryption keys." + Type: String + Default: "" + +Conditions: + HasKmsKeyId: !Not [ !Equals [ !Ref KmsKeyId, "" ] ] + NotHasLambdaRole: !Equals [ !Ref LambdaRoleArn, "" ] + CreateKmsPolicy: !And [ !Condition HasKmsKeyId, !Condition NotHasLambdaRole ] + IsRegionBAH: !Equals [!Ref "AWS::Region", "me-south-1"] + IsRegionHKG: !Equals [!Ref "AWS::Region", "ap-east-1"] + +Resources: + JdbcConnectorConfig: + Type: 'AWS::Serverless::Function' + Properties: + Environment: + Variables: + glue_connection: !Ref GlueConnection + FunctionName: !Ref LambdaFunctionName + PackageType: "Image" + ImageUri: !Sub + - '${Account}.dkr.ecr.${AWS::Region}.amazonaws.com/athena-federation-repository-postgresql:2022.47.1' + - Account: !If [IsRegionBAH, 084828588479, !If [IsRegionHKG, 183295418215, 292517598671]] + ImageConfig: + Command: [ !Sub "com.amazonaws.athena.connectors.postgresql.PostGreSqlCompositeHandler" ] + Description: "Enables Amazon Athena to communicate with PostgreSQL using JDBC" + Timeout: 900 + MemorySize: 3008 + Role: !If [NotHasLambdaRole, !GetAtt FunctionRole.Arn, !Ref LambdaRoleArn] + VpcConfig: + SecurityGroupIds: !Ref SecurityGroupIds + SubnetIds: !Ref SubnetIds + + FunctionRole: + Condition: NotHasLambdaRole + Type: AWS::IAM::Role + Properties: + ManagedPolicyArns: + - "arn:aws:iam::aws:policy/service-role/AWSLambdaVPCAccessExecutionRole" + AssumeRolePolicyDocument: + Version: 2012-10-17 + Statement: + - Effect: Allow + Principal: + Service: + - lambda.amazonaws.com + Action: + - "sts:AssumeRole" + + FunctionExecutionPolicy: + Condition: NotHasLambdaRole + Type: "AWS::IAM::Policy" + Properties: + Roles: + - !Ref FunctionRole + PolicyName: FunctionExecutionPolicy + PolicyDocument: + Version: 2012-10-17 + Statement: + - Action: + - secretsmanager:GetSecretValue + Effect: Allow + Resource: !Sub 'arn:${AWS::Partition}:secretsmanager:${AWS::Region}:${AWS::AccountId}:secret:${SecretName}*' + - Action: + - logs:CreateLogGroup + Effect: Allow + Resource: !Sub 'arn:${AWS::Partition}:logs:${AWS::Region}:${AWS::AccountId}:*' + - Action: + - logs:CreateLogStream + - logs:PutLogEvents + Effect: Allow + Resource: !Sub 'arn:${AWS::Partition}:logs:${AWS::Region}:${AWS::AccountId}:log-group:/aws/lambda/${LambdaFunctionName}:*' + - Action: + - athena:GetQueryExecution + Effect: Allow + Resource: '*' + - Action: + - ec2:CreateNetworkInterface + - ec2:DeleteNetworkInterface + - ec2:DescribeNetworkInterfaces + - ec2:DetachNetworkInterface + Effect: Allow + Resource: '*' + - Action: + - s3:GetObject + - s3:ListBucket + - s3:GetBucketLocation + - s3:GetObjectVersion + - s3:PutObject + - s3:PutObjectAcl + - s3:GetLifecycleConfiguration + - s3:PutLifecycleConfiguration + - s3:DeleteObject + Effect: Allow + Resource: + - Fn::Sub: + - arn:${AWS::Partition}:s3:::${bucketName} + - bucketName: + Ref: SpillBucket + - Fn::Sub: + - arn:${AWS::Partition}:s3:::${bucketName}/* + - bucketName: + Ref: SpillBucket + - Action: + - glue:GetConnection + Effect: Allow + Resource: + - !Sub 'arn:${AWS::Partition}:glue:${AWS::Region}:${AWS::AccountId}:connection/${GlueConnection}' + - !Sub 'arn:${AWS::Partition}:glue:${AWS::Region}:${AWS::AccountId}:catalog' + + FunctionKmsPolicy: + Condition: CreateKmsPolicy + Type: "AWS::IAM::Policy" + Properties: + PolicyName: FunctionKmsPolicy + PolicyDocument: + Version: 2012-10-17 + Statement: + - Effect: Allow + Action: + - kms:GenerateRandom + Resource: '*' + - Effect: Allow + Action: + - kms:GenerateDataKey + Resource: !Sub "arn:aws:kms:${AWS::Region}:${AWS::AccountId}:key/${KmsKeyId}" + Roles: + - !Ref FunctionRole \ No newline at end of file diff --git a/athena-postgresql/athena-postgresql.yaml b/athena-postgresql/athena-postgresql.yaml index 1752d485e9..79d2013130 100644 --- a/athena-postgresql/athena-postgresql.yaml +++ b/athena-postgresql/athena-postgresql.yaml @@ -21,8 +21,8 @@ Parameters: Description: 'The default connection string is used when catalog is "lambda:${LambdaFunctionName}". Catalog specific Connection Strings can be added later. Format: ${DatabaseType}://${NativeJdbcConnectionString}.' Type: String SecretNamePrefix: - Description: 'Used to create resource-based authorization policy for "secretsmanager:GetSecretValue" action. E.g. All Athena PostgreSQL Federation secret names can be prefixed with "AthenaPostgreSQLFederation" and authorization policy will allow "arn:${AWS::Partition}:secretsmanager:${AWS::Region}:${AWS::AccountId}:secret:AthenaJdbcFederation*". Parameter value in this case should be "AthenaPostgreSQLFederation". If you do not have a prefix, you can manually update the IAM policy to add allow any secret names.' - Type: String + Description: 'Used to create resource-based authorization policy for "secretsmanager:GetSecretValue" action. E.g. All Athena PostgreSQL Federation secret names can be prefixed with "AthenaPostgreSQLFederation" and authorization policy will allow "arn:${AWS::Partition}:secretsmanager:${AWS::Region}:${AWS::AccountId}:secret:AthenaJdbcFederation*". Parameter value in this case should be "AthenaPostgreSQLFederation". If you do not have a prefix, you can manually update the IAM policy to add allow any secret names.' + Type: String SpillBucket: Description: 'The name of the bucket where this function can spill data.' Type: String @@ -97,7 +97,7 @@ Resources: VpcConfig: SecurityGroupIds: !Ref SecurityGroupIds SubnetIds: !Ref SubnetIds - + FunctionRole: Condition: NotHasLambdaRole Type: AWS::IAM::Role @@ -112,8 +112,8 @@ Resources: Service: - lambda.amazonaws.com Action: - - "sts:AssumeRole" - + - "sts:AssumeRole" + FunctionExecutionPolicy: Condition: NotHasLambdaRole Type: "AWS::IAM::Policy" @@ -133,38 +133,38 @@ Resources: Effect: Allow Resource: !Sub 'arn:${AWS::Partition}:logs:${AWS::Region}:${AWS::AccountId}:*' - Action: - - logs:CreateLogStream - - logs:PutLogEvents + - logs:CreateLogStream + - logs:PutLogEvents Effect: Allow Resource: !Sub 'arn:${AWS::Partition}:logs:${AWS::Region}:${AWS::AccountId}:log-group:/aws/lambda/${LambdaFunctionName}:*' - Action: - - athena:GetQueryExecution + - athena:GetQueryExecution Effect: Allow Resource: '*' - Action: - - ec2:CreateNetworkInterface - - ec2:DeleteNetworkInterface - - ec2:DescribeNetworkInterfaces - - ec2:DetachNetworkInterface + - ec2:CreateNetworkInterface + - ec2:DeleteNetworkInterface + - ec2:DescribeNetworkInterfaces + - ec2:DetachNetworkInterface Effect: Allow Resource: '*' - Action: - - s3:GetObject - - s3:ListBucket - - s3:GetBucketLocation - - s3:GetObjectVersion - - s3:PutObject - - s3:PutObjectAcl - - s3:GetLifecycleConfiguration - - s3:PutLifecycleConfiguration - - s3:DeleteObject + - s3:GetObject + - s3:ListBucket + - s3:GetBucketLocation + - s3:GetObjectVersion + - s3:PutObject + - s3:PutObjectAcl + - s3:GetLifecycleConfiguration + - s3:PutLifecycleConfiguration + - s3:DeleteObject Effect: Allow Resource: - Fn::Sub: - - arn:${AWS::Partition}:s3:::${bucketName} - - bucketName: - Ref: SpillBucket + - arn:${AWS::Partition}:s3:::${bucketName} + - bucketName: + Ref: SpillBucket - Fn::Sub: - - arn:${AWS::Partition}:s3:::${bucketName}/* - - bucketName: - Ref: SpillBucket + - arn:${AWS::Partition}:s3:::${bucketName}/* + - bucketName: + Ref: SpillBucket \ No newline at end of file diff --git a/athena-postgresql/src/main/java/com/amazonaws/athena/connectors/postgresql/PostGreSqlCompositeHandler.java b/athena-postgresql/src/main/java/com/amazonaws/athena/connectors/postgresql/PostGreSqlCompositeHandler.java index 5ad466fcb3..c5829cbb0a 100644 --- a/athena-postgresql/src/main/java/com/amazonaws/athena/connectors/postgresql/PostGreSqlCompositeHandler.java +++ b/athena-postgresql/src/main/java/com/amazonaws/athena/connectors/postgresql/PostGreSqlCompositeHandler.java @@ -32,6 +32,6 @@ public class PostGreSqlCompositeHandler { public PostGreSqlCompositeHandler() { - super(new PostGreSqlMetadataHandler(System.getenv()), new PostGreSqlRecordHandler(System.getenv())); + super(new PostGreSqlMetadataHandler(new PostGreSqlEnvironmentProperties().createEnvironment()), new PostGreSqlRecordHandler(new PostGreSqlEnvironmentProperties().createEnvironment())); } } diff --git a/athena-postgresql/src/main/java/com/amazonaws/athena/connectors/postgresql/PostGreSqlEnvironmentProperties.java b/athena-postgresql/src/main/java/com/amazonaws/athena/connectors/postgresql/PostGreSqlEnvironmentProperties.java new file mode 100644 index 0000000000..d2e49cd53d --- /dev/null +++ b/athena-postgresql/src/main/java/com/amazonaws/athena/connectors/postgresql/PostGreSqlEnvironmentProperties.java @@ -0,0 +1,33 @@ +/*- + * #%L + * Amazon Athena Query Federation SDK + * %% + * Copyright (C) 2019 - 2024 Amazon Web Services + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ +package com.amazonaws.athena.connectors.postgresql; + +import com.amazonaws.athena.connectors.jdbc.JdbcEnvironmentProperties; + +import java.util.Map; + +public class PostGreSqlEnvironmentProperties extends JdbcEnvironmentProperties +{ + @Override + protected String getConnectionStringPrefix(Map connectionProperties) + { + return "postgres://jdbc:postgresql://"; + } +} diff --git a/athena-redis/athena-redis-connection.yaml b/athena-redis/athena-redis-connection.yaml new file mode 100644 index 0000000000..adf5552f00 --- /dev/null +++ b/athena-redis/athena-redis-connection.yaml @@ -0,0 +1,162 @@ +Transform: 'AWS::Serverless-2016-10-31' +Metadata: + 'AWS::ServerlessRepo::Application': + Name: AthenaRedisConnectorWithGlueConnection + Description: 'This connector enables Amazon Athena to communicate with your Redis instance(s), making your Redis data accessible via SQL.' + Author: 'default author' + SpdxLicenseId: Apache-2.0 + LicenseUrl: LICENSE.txt + ReadmeUrl: README.md + Labels: + - athena-federation + HomePageUrl: 'https://github.com/awslabs/aws-athena-query-federation' + SemanticVersion: 2022.47.1 + SourceCodeUrl: 'https://github.com/awslabs/aws-athena-query-federation' +Parameters: + LambdaFunctionName: + Description: 'This is the name of the lambda function that will be created. This name must satisfy the pattern ^[a-z0-9-_]{1,64}$' + Type: String + AllowedPattern: ^[a-z0-9-_]{1,64}$ + SpillBucket: + Description: 'The name of the bucket where this function can spill data.' + Type: String + GlueConnection: + Description: "Name of glue connection storing connection details for Federated Data source." + Type: String + SecurityGroupIds: + Description: 'One or more SecurityGroup IDs corresponding to the SecurityGroup that should be applied to the Lambda function. (e.g. sg1,sg2,sg3)' + Type: 'List' + SubnetIds: + Description: 'One or more Subnet IDs corresponding to the Subnet that the Lambda function can use to access you data source. (e.g. subnet1,subnet2)' + Type: 'List' + SecretName: + Description: 'The name or prefix of a set of names within Secrets Manager that this function should have access to. (e.g. redis-*).' + Type: String + KmsKeyId: + Description: "(Optional) By default any data that is spilled to S3 is encrypted using AES-GCM and a randomly generated key. Setting a KMS Key ID allows your Lambda function to use KMS for key generation for a stronger source of encryption keys." + Type: String + Default: "" + LambdaRoleArn: + Description: "(Optional) A custom role to be used by the Connector lambda" + Type: String + Default: "" +Conditions: + HasKmsKeyId: !Not [ !Equals [ !Ref KmsKeyId, "" ] ] + NotHasLambdaRole: !Equals [ !Ref LambdaRoleArn, "" ] + CreateKmsPolicy: !And [ !Condition HasKmsKeyId, !Condition NotHasLambdaRole ] + IsRegionBAH: !Equals [!Ref "AWS::Region", "me-south-1"] + IsRegionHKG: !Equals [!Ref "AWS::Region", "ap-east-1"] +Resources: + ConnectorConfig: + Type: 'AWS::Serverless::Function' + Properties: + Environment: + Variables: + glue_connection: !Ref GlueConnection + FunctionName: !Ref LambdaFunctionName + PackageType: "Image" + ImageUri: !Sub + - '${Account}.dkr.ecr.${AWS::Region}.amazonaws.com/athena-federation-repository-redis:2022.47.1' + - Account: !If [IsRegionBAH, 084828588479, !If [IsRegionHKG, 183295418215, 292517598671]] + Description: "Enables Amazon Athena to communicate with Redis, making your Redis data accessible via SQL" + Timeout: 900 + MemorySize: 3008 + Role: !If [ NotHasLambdaRole, !GetAtt FunctionRole.Arn, !Ref LambdaRoleArn ] + VpcConfig: + SecurityGroupIds: !Ref SecurityGroupIds + SubnetIds: !Ref SubnetIds + + FunctionRole: + Condition: NotHasLambdaRole + Type: AWS::IAM::Role + Properties: + ManagedPolicyArns: + - "arn:aws:iam::aws:policy/service-role/AWSLambdaVPCAccessExecutionRole" + AssumeRolePolicyDocument: + Version: 2012-10-17 + Statement: + - Effect: Allow + Principal: + Service: + - lambda.amazonaws.com + Action: + - "sts:AssumeRole" + + FunctionExecutionPolicy: + Condition: NotHasLambdaRole + Type: "AWS::IAM::Policy" + Properties: + Roles: + - !Ref FunctionRole + PolicyName: FunctionExecutionPolicy + PolicyDocument: + Version: 2012-10-17 + Statement: + - Action: + - secretsmanager:GetSecretValue + Effect: Allow + Resource: !Sub 'arn:${AWS::Partition}:secretsmanager:*:*:secret:${SecretName}*' + - Action: + - glue:GetTableVersions + - glue:GetPartitions + - glue:GetTables + - glue:GetTableVersion + - glue:GetDatabases + - glue:GetTable + - glue:GetPartition + - glue:GetDatabase + - athena:GetQueryExecution + Effect: Allow + Resource: '*' + - Action: + - ec2:CreateNetworkInterface + - ec2:DeleteNetworkInterface + - ec2:DescribeNetworkInterfaces + - ec2:DetachNetworkInterface + Effect: Allow + Resource: '*' + - Action: + - s3:GetObject + - s3:ListBucket + - s3:GetBucketLocation + - s3:GetObjectVersion + - s3:PutObject + - s3:PutObjectAcl + - s3:GetLifecycleConfiguration + - s3:PutLifecycleConfiguration + - s3:DeleteObject + Effect: Allow + Resource: + - Fn::Sub: + - arn:${AWS::Partition}:s3:::${bucketName} + - bucketName: + Ref: SpillBucket + - Fn::Sub: + - arn:${AWS::Partition}:s3:::${bucketName}/* + - bucketName: + Ref: SpillBucket + - Action: + - glue:GetConnection + Effect: Allow + Resource: + - !Sub 'arn:${AWS::Partition}:glue:${AWS::Region}:${AWS::AccountId}:connection/${GlueConnection}' + - !Sub 'arn:${AWS::Partition}:glue:${AWS::Region}:${AWS::AccountId}:catalog' + + FunctionKmsPolicy: + Condition: CreateKmsPolicy + Type: "AWS::IAM::Policy" + Properties: + PolicyName: FunctionKmsPolicy + PolicyDocument: + Version: 2012-10-17 + Statement: + - Effect: Allow + Action: + - kms:GenerateRandom + Resource: '*' + - Effect: Allow + Action: + - kms:GenerateDataKey + Resource: !Sub "arn:aws:kms:${AWS::Region}:${AWS::AccountId}:key/${KmsKeyId}" + Roles: + - !Ref FunctionRole \ No newline at end of file diff --git a/athena-redis/pom.xml b/athena-redis/pom.xml index 72f9c2ced1..114474ff50 100644 --- a/athena-redis/pom.xml +++ b/athena-redis/pom.xml @@ -25,7 +25,7 @@ io.lettuce lettuce-core - 6.5.0.RELEASE + 6.5.1.RELEASE org.slf4j diff --git a/athena-redis/src/main/java/com/amazonaws/athena/connectors/redis/RedisCompositeHandler.java b/athena-redis/src/main/java/com/amazonaws/athena/connectors/redis/RedisCompositeHandler.java index 08d9982471..42e12a0615 100644 --- a/athena-redis/src/main/java/com/amazonaws/athena/connectors/redis/RedisCompositeHandler.java +++ b/athena-redis/src/main/java/com/amazonaws/athena/connectors/redis/RedisCompositeHandler.java @@ -19,6 +19,7 @@ */ package com.amazonaws.athena.connectors.redis; +import com.amazonaws.athena.connector.lambda.connection.EnvironmentProperties; import com.amazonaws.athena.connector.lambda.handlers.CompositeHandler; /** @@ -30,6 +31,6 @@ public class RedisCompositeHandler { public RedisCompositeHandler() { - super(new RedisMetadataHandler(System.getenv()), new RedisRecordHandler(System.getenv())); + super(new RedisMetadataHandler(new EnvironmentProperties().createEnvironment()), new RedisRecordHandler(new EnvironmentProperties().createEnvironment())); } } diff --git a/athena-redshift/Dockerfile b/athena-redshift/Dockerfile index 0e7d808823..20a2afdef3 100644 --- a/athena-redshift/Dockerfile +++ b/athena-redshift/Dockerfile @@ -5,5 +5,5 @@ COPY target/athena-redshift-2022.47.1.jar ${LAMBDA_TASK_ROOT} # Unpack the jar RUN jar xf athena-redshift-2022.47.1.jar -# Set the CMD to your handler (could also be done as a parameter override outside of the Dockerfile) -CMD [ "com.amazonaws.athena.connectors.redshift.RedshiftMuxCompositeHandler" ] \ No newline at end of file +# Command can be overwritten by providing a different command in the template directly. +# No need to specify here (already defined in .yaml file because legacy and connections use different) diff --git a/athena-redshift/athena-redshift-connection.yaml b/athena-redshift/athena-redshift-connection.yaml new file mode 100644 index 0000000000..9c63a6ffde --- /dev/null +++ b/athena-redshift/athena-redshift-connection.yaml @@ -0,0 +1,156 @@ +Transform: 'AWS::Serverless-2016-10-31' +Metadata: + 'AWS::ServerlessRepo::Application': + Name: AthenaRedshiftConnectorWithGlueConnection + Description: 'This connector enables Amazon Athena to communicate with your Redshift instance(s) using JDBC driver.' + Author: 'default author' + SpdxLicenseId: Apache-2.0 + LicenseUrl: LICENSE.txt + ReadmeUrl: README.md + Labels: + - athena-federation + HomePageUrl: 'https://github.com/awslabs/aws-athena-query-federation' + SemanticVersion: 2022.47.1 + SourceCodeUrl: 'https://github.com/awslabs/aws-athena-query-federation' +Parameters: + LambdaFunctionName: + Description: 'This is the name of the lambda function that will be created. This name must satisfy the pattern ^[a-z0-9-_]{1,64}$' + Type: String + AllowedPattern: ^[a-z0-9-_]{1,64}$ + SecretName: + Description: 'Used to create resource-based authorization policy for "secretsmanager:GetSecretValue" action. E.g. All Athena Redshift Federation secret names can be prefixed with "AthenaRedshiftFederation" and authorization policy will allow "arn:${AWS::Partition}:secretsmanager:${AWS::Region}:${AWS::AccountId}:secret:AthenaRedshiftFederation*". Parameter value in this case should be "AthenaRedshiftFederation". If you do not have a prefix, you can manually update the IAM policy to add allow any secret names.' + Type: String + SpillBucket: + Description: 'The name of the bucket where this function can spill data.' + Type: String + GlueConnection: + Description: "Name of glue connection storing connection details for Federated Data source." + Type: String + SecurityGroupIds: + Description: 'One or more SecurityGroup IDs corresponding to the SecurityGroup that should be applied to the Lambda function. (e.g. sg1,sg2,sg3)' + Type: 'List' + SubnetIds: + Description: 'One or more Subnet IDs corresponding to the Subnet that the Lambda function can use to access you data source. (e.g. subnet1,subnet2)' + Type: 'List' + KmsKeyId: + Description: "(Optional) By default any data that is spilled to S3 is encrypted using AES-GCM and a randomly generated key. Setting a KMS Key ID allows your Lambda function to use KMS for key generation for a stronger source of encryption keys." + Type: String + Default: "" + LambdaRoleArn: + Description: "(Optional) A custom IAM role ARN to be used by the Connector lambda" + Type: String + Default: "" +Conditions: + HasKmsKeyId: !Not [!Equals [!Ref KmsKeyId, ""]] + NotHasLambdaRole: !Equals [!Ref LambdaRoleArn, ""] + CreateKmsPolicy: !And [!Condition NotHasLambdaRole, !Condition HasKmsKeyId] + IsRegionBAH: !Equals [!Ref "AWS::Region", "me-south-1"] + IsRegionHKG: !Equals [!Ref "AWS::Region", "ap-east-1"] +Resources: + JdbcConnectorConfig: + Type: 'AWS::Serverless::Function' + Properties: + Environment: + Variables: + glue_connection: !Ref GlueConnection + FunctionName: !Ref LambdaFunctionName + PackageType: "Image" + ImageUri: !Sub + - '${Account}.dkr.ecr.${AWS::Region}.amazonaws.com/athena-federation-repository-redshift:2022.47.1' + - Account: !If [IsRegionBAH, 084828588479, !If [IsRegionHKG, 183295418215, 292517598671]] + ImageConfig: + Command: [ "com.amazonaws.athena.connectors.redshift.RedshiftCompositeHandler" ] + Description: "Enables Amazon Athena to communicate with Redshift using JDBC" + Timeout: 900 + MemorySize: 3008 + Role: !If [NotHasLambdaRole, !GetAtt FunctionRole.Arn, !Ref LambdaRoleArn] + VpcConfig: + SecurityGroupIds: !Ref SecurityGroupIds + SubnetIds: !Ref SubnetIds + + FunctionRole: + Condition: NotHasLambdaRole + Type: AWS::IAM::Role + Properties: + ManagedPolicyArns: + - "arn:aws:iam::aws:policy/service-role/AWSLambdaVPCAccessExecutionRole" + AssumeRolePolicyDocument: + Version: 2012-10-17 + Statement: + - Effect: Allow + Principal: + Service: + - lambda.amazonaws.com + Action: + - "sts:AssumeRole" + + FunctionExecutionPolicy: + Condition: NotHasLambdaRole + Type: "AWS::IAM::Policy" + Properties: + PolicyName: FunctionExecutionPolicy + PolicyDocument: + Version: 2012-10-17 + Statement: + - Effect: Allow + Action: + - secretsmanager:GetSecretValue + Resource: !Sub 'arn:${AWS::Partition}:secretsmanager:${AWS::Region}:${AWS::AccountId}:secret:${SecretName}*' + - Effect: Allow + Action: + - athena:GetQueryExecution + Resource: '*' + - Action: + - ec2:CreateNetworkInterface + - ec2:DeleteNetworkInterface + - ec2:DescribeNetworkInterfaces + - ec2:DetachNetworkInterface + Effect: Allow + Resource: '*' + - Effect: Allow + Action: + - s3:GetObject + - s3:ListBucket + - s3:GetBucketLocation + - s3:GetObjectVersion + - s3:PutObject + - s3:PutObjectAcl + - s3:GetLifecycleConfiguration + - s3:PutLifecycleConfiguration + - s3:DeleteObject + Resource: + - Fn::Sub: + - arn:${AWS::Partition}:s3:::${bucketName} + - bucketName: + Ref: SpillBucket + - Fn::Sub: + - arn:${AWS::Partition}:s3:::${bucketName}/* + - bucketName: + Ref: SpillBucket + - Action: + - glue:GetConnection + Effect: Allow + Resource: + - !Sub 'arn:${AWS::Partition}:glue:${AWS::Region}:${AWS::AccountId}:connection/${GlueConnection}' + - !Sub 'arn:${AWS::Partition}:glue:${AWS::Region}:${AWS::AccountId}:catalog' + Roles: + - !Ref FunctionRole + + FunctionKmsPolicy: + Condition: CreateKmsPolicy + Type: "AWS::IAM::Policy" + Properties: + PolicyName: FunctionKmsPolicy + PolicyDocument: + Version: 2012-10-17 + Statement: + - Effect: Allow + Action: + - kms:GenerateRandom + Resource: '*' + - Effect: Allow + Action: + - kms:GenerateDataKey + Resource: !Sub "arn:aws:kms:${AWS::Region}:${AWS::AccountId}:key/${KmsKeyId}" + Roles: + - !Ref FunctionRole diff --git a/athena-redshift/athena-redshift.yaml b/athena-redshift/athena-redshift.yaml index 58686808d4..b395ce646f 100644 --- a/athena-redshift/athena-redshift.yaml +++ b/athena-redshift/athena-redshift.yaml @@ -21,8 +21,8 @@ Parameters: Description: 'The default connection string is used when catalog is "lambda:${LambdaFunctionName}". Catalog specific Connection Strings can be added later. Format: ${DatabaseType}://${NativeJdbcConnectionString}.' Type: String SecretNamePrefix: - Description: 'Used to create resource-based authorization policy for "secretsmanager:GetSecretValue" action. E.g. All Athena Redshift Federation secret names can be prefixed with "AthenaRedshiftFederation" and authorization policy will allow "arn:${AWS::Partition}:secretsmanager:${AWS::Region}:${AWS::AccountId}:secret:AthenaRedshiftFederation*". Parameter value in this case should be "AthenaRedshiftFederation". If you do not have a prefix, you can manually update the IAM policy to add allow any secret names.' - Type: String + Description: 'Used to create resource-based authorization policy for "secretsmanager:GetSecretValue" action. E.g. All Athena Redshift Federation secret names can be prefixed with "AthenaRedshiftFederation" and authorization policy will allow "arn:${AWS::Partition}:secretsmanager:${AWS::Region}:${AWS::AccountId}:secret:AthenaRedshiftFederation*". Parameter value in this case should be "AthenaRedshiftFederation". If you do not have a prefix, you can manually update the IAM policy to add allow any secret names.' + Type: String SpillBucket: Description: 'The name of the bucket where this function can spill data.' Type: String @@ -85,6 +85,8 @@ Resources: ImageUri: !Sub - '${Account}.dkr.ecr.${AWS::Region}.amazonaws.com/athena-federation-repository-redshift:2022.47.1' - Account: !If [IsRegionBAH, 084828588479, !If [IsRegionHKG, 183295418215, 292517598671]] + ImageConfig: + Command: [ "com.amazonaws.athena.connectors.redshift.RedshiftMuxCompositeHandler" ] Description: "Enables Amazon Athena to communicate with Redshift using JDBC" Timeout: !Ref LambdaTimeout MemorySize: !Ref LambdaMemory @@ -140,13 +142,13 @@ Resources: - s3:DeleteObject Resource: - Fn::Sub: - - arn:${AWS::Partition}:s3:::${bucketName} - - bucketName: - Ref: SpillBucket + - arn:${AWS::Partition}:s3:::${bucketName} + - bucketName: + Ref: SpillBucket - Fn::Sub: - - arn:${AWS::Partition}:s3:::${bucketName}/* - - bucketName: - Ref: SpillBucket + - arn:${AWS::Partition}:s3:::${bucketName}/* + - bucketName: + Ref: SpillBucket Roles: - !Ref FunctionRole @@ -160,11 +162,11 @@ Resources: Statement: - Effect: Allow Action: - - kms:GenerateRandom + - kms:GenerateRandom Resource: '*' - Effect: Allow Action: - kms:GenerateDataKey Resource: !Sub "arn:aws:kms:${AWS::Region}:${AWS::AccountId}:key/${KMSKeyId}" Roles: - - !Ref FunctionRole + - !Ref FunctionRole \ No newline at end of file diff --git a/athena-redshift/pom.xml b/athena-redshift/pom.xml index 5ec2b541d0..688f70378a 100644 --- a/athena-redshift/pom.xml +++ b/athena-redshift/pom.xml @@ -23,6 +23,7 @@ com.amazon.redshift redshift-jdbc42 + 2.1.0.30 diff --git a/athena-redshift/src/main/java/com/amazonaws/athena/connectors/redshift/RedshiftCompositeHandler.java b/athena-redshift/src/main/java/com/amazonaws/athena/connectors/redshift/RedshiftCompositeHandler.java new file mode 100644 index 0000000000..765919c6c7 --- /dev/null +++ b/athena-redshift/src/main/java/com/amazonaws/athena/connectors/redshift/RedshiftCompositeHandler.java @@ -0,0 +1,30 @@ +/*- + * #%L + * athena-redshift + * %% + * Copyright (C) 2019 - 2024 Amazon Web Services + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ +package com.amazonaws.athena.connectors.redshift; + +import com.amazonaws.athena.connector.lambda.handlers.CompositeHandler; + +public class RedshiftCompositeHandler extends CompositeHandler +{ + public RedshiftCompositeHandler() + { + super(new RedshiftMetadataHandler(new RedshiftEnvironmentProperties().createEnvironment()), new RedshiftRecordHandler(new RedshiftEnvironmentProperties().createEnvironment())); + } +} diff --git a/athena-redshift/src/main/java/com/amazonaws/athena/connectors/redshift/RedshiftEnvironmentProperties.java b/athena-redshift/src/main/java/com/amazonaws/athena/connectors/redshift/RedshiftEnvironmentProperties.java new file mode 100644 index 0000000000..c9bc6c29a3 --- /dev/null +++ b/athena-redshift/src/main/java/com/amazonaws/athena/connectors/redshift/RedshiftEnvironmentProperties.java @@ -0,0 +1,33 @@ +/*- + * #%L + * athena-redshift + * %% + * Copyright (C) 2019 - 2024 Amazon Web Services + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ +package com.amazonaws.athena.connectors.redshift; + +import com.amazonaws.athena.connectors.jdbc.JdbcEnvironmentProperties; + +import java.util.Map; + +public class RedshiftEnvironmentProperties extends JdbcEnvironmentProperties +{ + @Override + protected String getConnectionStringPrefix(Map connectionProperties) + { + return "redshift://jdbc:redshift://"; + } +} diff --git a/athena-saphana/Dockerfile b/athena-saphana/Dockerfile index 5e55d28a12..827a67e65c 100644 --- a/athena-saphana/Dockerfile +++ b/athena-saphana/Dockerfile @@ -5,5 +5,5 @@ COPY target/athena-saphana.zip ${LAMBDA_TASK_ROOT} # Unpack the jar RUN jar xf athena-saphana.zip -# Set the CMD to your handler (could also be done as a parameter override outside of the Dockerfile) -CMD [ "com.amazonaws.athena.connectors.saphana.SaphanaMuxCompositeHandler" ] \ No newline at end of file +# Command can be overwritten by providing a different command in the template directly. +# No need to specify here (already defined in .yaml file because legacy and connections use different) diff --git a/athena-saphana/athena-saphana-connection.yaml b/athena-saphana/athena-saphana-connection.yaml new file mode 100644 index 0000000000..7a58a1a271 --- /dev/null +++ b/athena-saphana/athena-saphana-connection.yaml @@ -0,0 +1,171 @@ +Transform: 'AWS::Serverless-2016-10-31' +Metadata: + 'AWS::ServerlessRepo::Application': + Name: AthenaSaphanaConnectorWithGlueConnection + Description: 'This connector enables Amazon Athena to communicate with your Teradata instance(s) using JDBC driver.' + Author: 'default author' + SpdxLicenseId: Apache-2.0 + LicenseUrl: LICENSE.txt + ReadmeUrl: README.md + Labels: + - athena-federation + HomePageUrl: 'https://github.com/awslabs/aws-athena-query-federation' + SemanticVersion: 2022.47.1 + SourceCodeUrl: 'https://github.com/awslabs/aws-athena-query-federation' +Parameters: + LambdaFunctionName: + Description: 'This is the name of the lambda function that will be created. This name must satisfy the pattern ^[a-z0-9-_]{1,64}$' + Type: String + AllowedPattern: ^[a-z0-9-_]{1,64}$ + SecretName: + Description: 'Used to create resource-based authorization policy for "secretsmanager:GetSecretValue" action. E.g. All Athena JDBC Federation secret names can be prefixed with "AthenaJdbcFederation" and authorization policy will allow "arn:${AWS::Partition}:secretsmanager:${AWS::Region}:${AWS::AccountId}:secret:AthenaJdbcFederation*". Parameter value in this case should be "AthenaJdbcFederation". If you do not have a prefix, you can manually update the IAM policy to add allow any secret names.' + Type: String + SpillBucket: + Description: 'The name of the bucket where this function can spill data.' + Type: String + GlueConnection: + Description: "Name of glue connection storing connection details for Federated Data source." + Type: String + SecurityGroupIds: + Description: '(Optional) One or more SecurityGroup IDs corresponding to the SecurityGroup that should be applied to the Lambda function. (e.g. sg1,sg2,sg3)' + Type: CommaDelimitedList + Default: "" + SubnetIds: + Description: '(Optional) One or more Subnet IDs corresponding to the Subnet that the Lambda function can use to access you data source. (e.g. subnet1,subnet2)' + Type: CommaDelimitedList + Default: "" + KmsKeyId: + Description: "(Optional) By default any data that is spilled to S3 is encrypted using AES-GCM and a randomly generated key. Setting a KMS Key ID allows your Lambda function to use KMS for key generation for a stronger source of encryption keys." + Type: String + Default: "" + LambdaRoleArn: + Description: "(Optional) A custom role to be used by the Connector lambda" + Type: String + Default: "" + +Conditions: + HasSecurityGroups: !Not [ !Equals [ !Join ["", !Ref SecurityGroupIds], "" ] ] + HasSubnets: !Not [ !Equals [ !Join ["", !Ref SubnetIds], "" ] ] + HasKmsKeyId: !Not [ !Equals [ !Ref KmsKeyId, "" ] ] + NotHasLambdaRole: !Equals [ !Ref LambdaRoleArn, "" ] + CreateKmsPolicy: !And [ !Condition HasKmsKeyId, !Condition NotHasLambdaRole ] + IsRegionBAH: !Equals [!Ref "AWS::Region", "me-south-1"] + IsRegionHKG: !Equals [!Ref "AWS::Region", "ap-east-1"] + +Resources: + JdbcConnectorConfig: + Type: 'AWS::Serverless::Function' + Properties: + Environment: + Variables: + glue_connection: !Ref GlueConnection + FunctionName: !Ref LambdaFunctionName + PackageType: "Image" + ImageUri: !Sub + - '${Account}.dkr.ecr.${AWS::Region}.amazonaws.com/athena-federation-repository-saphana:2022.47.1' + - Account: !If [IsRegionBAH, 084828588479, !If [IsRegionHKG, 183295418215, 292517598671]] + ImageConfig: + Command: [ "com.amazonaws.athena.connectors.saphana.SaphanaCompositeHandler" ] + Description: "Enables Amazon Athena to communicate with Teradata using JDBC" + Timeout: 900 + MemorySize: 3008 + Role: !If [ NotHasLambdaRole, !GetAtt FunctionRole.Arn, !Ref LambdaRoleArn ] + VpcConfig: + SecurityGroupIds: !If [ HasSecurityGroups, !Ref SecurityGroupIds, !Ref "AWS::NoValue" ] + SubnetIds: !If [ HasSubnets, !Ref SubnetIds, !Ref "AWS::NoValue" ] + + FunctionRole: + Condition: NotHasLambdaRole + Type: AWS::IAM::Role + Properties: + ManagedPolicyArns: + - "arn:aws:iam::aws:policy/service-role/AWSLambdaVPCAccessExecutionRole" + AssumeRolePolicyDocument: + Version: 2012-10-17 + Statement: + - Effect: Allow + Principal: + Service: + - lambda.amazonaws.com + Action: + - "sts:AssumeRole" + + FunctionExecutionPolicy: + Condition: NotHasLambdaRole + Type: "AWS::IAM::Policy" + Properties: + Roles: + - !Ref FunctionRole + PolicyName: FunctionExecutionPolicy + PolicyDocument: + Version: 2012-10-17 + Statement: + - Action: + - secretsmanager:GetSecretValue + Effect: Allow + Resource: !Sub 'arn:${AWS::Partition}:secretsmanager:${AWS::Region}:${AWS::AccountId}:secret:${SecretName}*' + - Action: + - logs:CreateLogGroup + Effect: Allow + Resource: !Sub 'arn:${AWS::Partition}:logs:${AWS::Region}:${AWS::AccountId}:*' + - Action: + - logs:CreateLogStream + - logs:PutLogEvents + Effect: Allow + Resource: !Sub 'arn:${AWS::Partition}:logs:${AWS::Region}:${AWS::AccountId}:log-group:/aws/lambda/${LambdaFunctionName}:*' + - Action: + - athena:GetQueryExecution + Effect: Allow + Resource: '*' + - Action: + - ec2:CreateNetworkInterface + - ec2:DeleteNetworkInterface + - ec2:DescribeNetworkInterfaces + - ec2:DetachNetworkInterface + Effect: Allow + Resource: '*' + - Action: + - s3:GetObject + - s3:ListBucket + - s3:GetBucketLocation + - s3:GetObjectVersion + - s3:PutObject + - s3:PutObjectAcl + - s3:GetLifecycleConfiguration + - s3:PutLifecycleConfiguration + - s3:DeleteObject + Effect: Allow + Resource: + - Fn::Sub: + - arn:${AWS::Partition}:s3:::${bucketName} + - bucketName: + Ref: SpillBucket + - Fn::Sub: + - arn:${AWS::Partition}:s3:::${bucketName}/* + - bucketName: + Ref: SpillBucket + - Action: + - glue:GetConnection + Effect: Allow + Resource: + - !Sub 'arn:${AWS::Partition}:glue:${AWS::Region}:${AWS::AccountId}:connection/${GlueConnection}' + - !Sub 'arn:${AWS::Partition}:glue:${AWS::Region}:${AWS::AccountId}:catalog' + + FunctionKmsPolicy: + Condition: CreateKmsPolicy + Type: "AWS::IAM::Policy" + Properties: + PolicyName: FunctionKmsPolicy + PolicyDocument: + Version: 2012-10-17 + Statement: + - Effect: Allow + Action: + - kms:GenerateRandom + Resource: '*' + - Effect: Allow + Action: + - kms:GenerateDataKey + Resource: !Sub "arn:aws:kms:${AWS::Region}:${AWS::AccountId}:key/${KmsKeyId}" + Roles: + - !Ref FunctionRole diff --git a/athena-saphana/athena-saphana.yaml b/athena-saphana/athena-saphana.yaml index 201816368e..68ecafe0f2 100644 --- a/athena-saphana/athena-saphana.yaml +++ b/athena-saphana/athena-saphana.yaml @@ -75,6 +75,8 @@ Resources: ImageUri: !Sub - '${Account}.dkr.ecr.${AWS::Region}.amazonaws.com/athena-federation-repository-saphana:2022.47.1' - Account: !If [IsRegionBAH, 084828588479, !If [IsRegionHKG, 183295418215, 292517598671]] + ImageConfig: + Command: [ "com.amazonaws.athena.connectors.saphana.SaphanaMuxCompositeHandler" ] Description: "Enables Amazon Athena to communicate with Teradata using JDBC" Timeout: !Ref LambdaTimeout MemorySize: !Ref LambdaMemory @@ -113,4 +115,4 @@ Resources: - VPCAccessPolicy: {} VpcConfig: SecurityGroupIds: !If [ HasSecurityGroups, !Ref SecurityGroupIds, !Ref "AWS::NoValue" ] - SubnetIds: !If [ HasSubnets, !Ref SubnetIds, !Ref "AWS::NoValue" ] + SubnetIds: !If [ HasSubnets, !Ref SubnetIds, !Ref "AWS::NoValue" ] \ No newline at end of file diff --git a/athena-saphana/src/main/java/com/amazonaws/athena/connectors/saphana/SaphanaCompositeHandler.java b/athena-saphana/src/main/java/com/amazonaws/athena/connectors/saphana/SaphanaCompositeHandler.java index 911d762c41..304e20fb19 100644 --- a/athena-saphana/src/main/java/com/amazonaws/athena/connectors/saphana/SaphanaCompositeHandler.java +++ b/athena-saphana/src/main/java/com/amazonaws/athena/connectors/saphana/SaphanaCompositeHandler.java @@ -35,6 +35,6 @@ public class SaphanaCompositeHandler { public SaphanaCompositeHandler() { - super(new SaphanaMetadataHandler(System.getenv()), new SaphanaRecordHandler(System.getenv())); + super(new SaphanaMetadataHandler(new SaphanaEnvironmentProperties().createEnvironment()), new SaphanaRecordHandler(new SaphanaEnvironmentProperties().createEnvironment())); } } diff --git a/athena-saphana/src/main/java/com/amazonaws/athena/connectors/saphana/SaphanaEnvironmentProperties.java b/athena-saphana/src/main/java/com/amazonaws/athena/connectors/saphana/SaphanaEnvironmentProperties.java new file mode 100644 index 0000000000..98951d18e7 --- /dev/null +++ b/athena-saphana/src/main/java/com/amazonaws/athena/connectors/saphana/SaphanaEnvironmentProperties.java @@ -0,0 +1,39 @@ +/*- + * #%L + * Amazon Athena Query Federation SDK + * %% + * Copyright (C) 2019 - 2024 Amazon Web Services + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ +package com.amazonaws.athena.connectors.saphana; + +import com.amazonaws.athena.connectors.jdbc.JdbcEnvironmentProperties; + +import java.util.Map; + +public class SaphanaEnvironmentProperties extends JdbcEnvironmentProperties +{ + @Override + protected String getConnectionStringPrefix(Map connectionProperties) + { + return "saphana://jdbc:sap://"; + } + + @Override + protected String getDatabase(Map connectionProperties) + { + return "/"; + } +} diff --git a/athena-snowflake/Dockerfile b/athena-snowflake/Dockerfile index 8d4d9081a6..c14408ec23 100644 --- a/athena-snowflake/Dockerfile +++ b/athena-snowflake/Dockerfile @@ -5,5 +5,5 @@ COPY target/athena-snowflake.zip ${LAMBDA_TASK_ROOT} # Unpack the jar RUN jar xf athena-snowflake.zip -# Set the CMD to your handler (could also be done as a parameter override outside of the Dockerfile) -CMD [ "com.amazonaws.athena.connectors.snowflake.SnowflakeMuxCompositeHandler" ] \ No newline at end of file +# Command can be overwritten by providing a different command in the template directly. +# No need to specify here (already defined in .yaml file because legacy and connections use different) diff --git a/athena-snowflake/athena-snowflake-connection.yaml b/athena-snowflake/athena-snowflake-connection.yaml new file mode 100644 index 0000000000..af2d35782e --- /dev/null +++ b/athena-snowflake/athena-snowflake-connection.yaml @@ -0,0 +1,171 @@ +Transform: 'AWS::Serverless-2016-10-31' +Metadata: + 'AWS::ServerlessRepo::Application': + Name: AthenaSnowflakeConnectorWithGlueConnection + Description: 'This Amazon Athena connector for Snowflake enables Amazon Athena to run SQL queries on data stored in Snowflake.' + Author: 'default author' + SpdxLicenseId: Apache-2.0 + LicenseUrl: LICENSE.txt + ReadmeUrl: README.md + Labels: + - athena-federation + HomePageUrl: 'https://github.com/awslabs/aws-athena-query-federation' + SemanticVersion: 2022.47.1 + SourceCodeUrl: 'https://github.com/awslabs/aws-athena-query-federation' +Parameters: + LambdaFunctionName: + Description: 'This is the name of the lambda function that will be created. This name must satisfy the pattern ^[a-z0-9-_]{1,64}$' + Type: String + AllowedPattern: ^[a-z0-9-_]{1,64}$ + SecretName: + Description: 'Used to create resource-based authorization policy for "secretsmanager:GetSecretValue" action. E.g. All Athena JDBC Federation secret names can be prefixed with "AthenaJdbcFederation" and authorization policy will allow "arn:${AWS::Partition}:secretsmanager:${AWS::Region}:${AWS::AccountId}:secret:AthenaJdbcFederation*". Parameter value in this case should be "AthenaJdbcFederation". If you do not have a prefix, you can manually update the IAM policy to add allow any secret names.' + Type: String + SpillBucket: + Description: 'The name of the bucket where this function can spill data.' + Type: String + GlueConnection: + Description: "Name of glue connection storing connection details for Federated Data source." + Type: String + SecurityGroupIds: + Description: '(Optional) One or more SecurityGroup IDs corresponding to the SecurityGroup that should be applied to the Lambda function. (e.g. sg1,sg2,sg3)' + Type: CommaDelimitedList + Default: "" + SubnetIds: + Description: '(Optional) One or more Subnet IDs corresponding to the Subnet that the Lambda function can use to access you data source. (e.g. subnet1,subnet2)' + Type: CommaDelimitedList + Default: "" + KmsKeyId: + Description: "(Optional) By default any data that is spilled to S3 is encrypted using AES-GCM and a randomly generated key. Setting a KMS Key ID allows your Lambda function to use KMS for key generation for a stronger source of encryption keys." + Type: String + Default: "" + LambdaRoleArn: + Description: "(Optional) A custom role to be used by the Connector lambda" + Type: String + Default: "" + +Conditions: + HasSecurityGroups: !Not [ !Equals [ !Join ["", !Ref SecurityGroupIds], "" ] ] + HasSubnets: !Not [ !Equals [ !Join ["", !Ref SubnetIds], "" ] ] + HasKmsKeyId: !Not [ !Equals [ !Ref KmsKeyId, "" ] ] + NotHasLambdaRole: !Equals [ !Ref LambdaRoleArn, "" ] + CreateKmsPolicy: !And [ !Condition HasKmsKeyId, !Condition NotHasLambdaRole ] + IsRegionBAH: !Equals [!Ref "AWS::Region", "me-south-1"] + IsRegionHKG: !Equals [!Ref "AWS::Region", "ap-east-1"] + +Resources: + JdbcConnectorConfig: + Type: 'AWS::Serverless::Function' + Properties: + Environment: + Variables: + glue_connection: !Ref GlueConnection + FunctionName: !Ref LambdaFunctionName + PackageType: "Image" + ImageUri: !Sub + - '${Account}.dkr.ecr.${AWS::Region}.amazonaws.com/athena-federation-repository-snowflake:2022.47.1' + - Account: !If [IsRegionBAH, 084828588479, !If [IsRegionHKG, 183295418215, 292517598671]] + ImageConfig: + Command: [ "com.amazonaws.athena.connectors.snowflake.SnowflakeCompositeHandler" ] + Description: "Enables Amazon Athena to communicate with Snowflake using JDBC" + Timeout: 900 + MemorySize: 3008 + Role: !If [ NotHasLambdaRole, !GetAtt FunctionRole.Arn, !Ref LambdaRoleArn ] + VpcConfig: + SecurityGroupIds: !If [ HasSecurityGroups, !Ref SecurityGroupIds, !Ref "AWS::NoValue" ] + SubnetIds: !If [ HasSubnets, !Ref SubnetIds, !Ref "AWS::NoValue" ] + + FunctionRole: + Condition: NotHasLambdaRole + Type: AWS::IAM::Role + Properties: + ManagedPolicyArns: + - "arn:aws:iam::aws:policy/service-role/AWSLambdaVPCAccessExecutionRole" + AssumeRolePolicyDocument: + Version: 2012-10-17 + Statement: + - Effect: Allow + Principal: + Service: + - lambda.amazonaws.com + Action: + - "sts:AssumeRole" + + FunctionExecutionPolicy: + Condition: NotHasLambdaRole + Type: "AWS::IAM::Policy" + Properties: + Roles: + - !Ref FunctionRole + PolicyName: FunctionExecutionPolicy + PolicyDocument: + Version: 2012-10-17 + Statement: + - Action: + - secretsmanager:GetSecretValue + Effect: Allow + Resource: !Sub 'arn:${AWS::Partition}:secretsmanager:${AWS::Region}:${AWS::AccountId}:secret:${SecretName}*' + - Action: + - logs:CreateLogGroup + Effect: Allow + Resource: !Sub 'arn:${AWS::Partition}:logs:${AWS::Region}:${AWS::AccountId}:*' + - Action: + - logs:CreateLogStream + - logs:PutLogEvents + Effect: Allow + Resource: !Sub 'arn:${AWS::Partition}:logs:${AWS::Region}:${AWS::AccountId}:log-group:/aws/lambda/${LambdaFunctionName}:*' + - Action: + - athena:GetQueryExecution + Effect: Allow + Resource: '*' + - Action: + - ec2:CreateNetworkInterface + - ec2:DeleteNetworkInterface + - ec2:DescribeNetworkInterfaces + - ec2:DetachNetworkInterface + Effect: Allow + Resource: '*' + - Action: + - s3:GetObject + - s3:ListBucket + - s3:GetBucketLocation + - s3:GetObjectVersion + - s3:PutObject + - s3:PutObjectAcl + - s3:GetLifecycleConfiguration + - s3:PutLifecycleConfiguration + - s3:DeleteObject + Effect: Allow + Resource: + - Fn::Sub: + - arn:${AWS::Partition}:s3:::${bucketName} + - bucketName: + Ref: SpillBucket + - Fn::Sub: + - arn:${AWS::Partition}:s3:::${bucketName}/* + - bucketName: + Ref: SpillBucket + - Action: + - glue:GetConnection + Effect: Allow + Resource: + - !Sub 'arn:${AWS::Partition}:glue:${AWS::Region}:${AWS::AccountId}:connection/${GlueConnection}' + - !Sub 'arn:${AWS::Partition}:glue:${AWS::Region}:${AWS::AccountId}:catalog' + + FunctionKmsPolicy: + Condition: CreateKmsPolicy + Type: "AWS::IAM::Policy" + Properties: + PolicyName: FunctionKmsPolicy + PolicyDocument: + Version: 2012-10-17 + Statement: + - Effect: Allow + Action: + - kms:GenerateRandom + Resource: '*' + - Effect: Allow + Action: + - kms:GenerateDataKey + Resource: !Sub "arn:aws:kms:${AWS::Region}:${AWS::AccountId}:key/${KmsKeyId}" + Roles: + - !Ref FunctionRole \ No newline at end of file diff --git a/athena-snowflake/athena-snowflake.yaml b/athena-snowflake/athena-snowflake.yaml index 0646c8a5e6..7b86121621 100644 --- a/athena-snowflake/athena-snowflake.yaml +++ b/athena-snowflake/athena-snowflake.yaml @@ -75,6 +75,8 @@ Resources: ImageUri: !Sub - '${Account}.dkr.ecr.${AWS::Region}.amazonaws.com/athena-federation-repository-snowflake:2022.47.1' - Account: !If [IsRegionBAH, 084828588479, !If [IsRegionHKG, 183295418215, 292517598671]] + ImageConfig: + Command: [ "com.amazonaws.athena.connectors.snowflake.SnowflakeMuxCompositeHandler" ] Description: "Enables Amazon Athena to communicate with Snowflake using JDBC" Timeout: !Ref LambdaTimeout MemorySize: !Ref LambdaMemory diff --git a/athena-snowflake/src/main/java/com/amazonaws/athena/connectors/snowflake/SnowflakeCaseInsensitiveResolver.java b/athena-snowflake/src/main/java/com/amazonaws/athena/connectors/snowflake/SnowflakeCaseInsensitiveResolver.java new file mode 100644 index 0000000000..5c2f542709 --- /dev/null +++ b/athena-snowflake/src/main/java/com/amazonaws/athena/connectors/snowflake/SnowflakeCaseInsensitiveResolver.java @@ -0,0 +1,223 @@ +/*- + * #%L + * athena-snowflake + * %% + * Copyright (C) 2019 - 2024 Amazon Web Services + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ +package com.amazonaws.athena.connectors.snowflake; + +import com.amazonaws.athena.connector.lambda.domain.TableName; +import com.amazonaws.athena.connectors.jdbc.manager.PreparedStatementBuilder; +import org.apache.commons.lang3.StringUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.sql.Connection; +import java.sql.PreparedStatement; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.util.Arrays; +import java.util.Map; + +import static com.amazonaws.athena.connector.lambda.connection.EnvironmentConstants.DEFAULT_GLUE_CONNECTION; + +public class SnowflakeCaseInsensitiveResolver +{ + private static final Logger LOGGER = LoggerFactory.getLogger(SnowflakeCaseInsensitiveResolver.class); + private static final String SCHEMA_NAME_QUERY_TEMPLATE = "select * from INFORMATION_SCHEMA.SCHEMATA where lower(SCHEMA_NAME) = ?"; + private static final String TABLE_NAME_QUERY_TEMPLATE = "select * from INFORMATION_SCHEMA.TABLES where TABLE_SCHEMA = ? and lower(TABLE_NAME) = ?"; + private static final String SCHEMA_NAME_COLUMN_KEY = "SCHEMA_NAME"; + private static final String TABLE_NAME_COLUMN_KEY = "TABLE_NAME"; + private static final String CASING_MODE = "casing_mode"; + private static final String ANNOTATION_CASE_UPPER = "upper"; + private static final String ANNOTATION_CASE_LOWER = "lower"; + + private SnowflakeCaseInsensitiveResolver() + { + } + + private enum SnowflakeCasingMode + { + NONE, + CASE_INSENSITIVE_SEARCH, + ANNOTATION + } + + public static TableName getAdjustedTableObjectNameBasedOnConfig(final Connection connection, TableName tableName, Map configOptions) + throws SQLException + { + SnowflakeCasingMode casingMode = getCasingMode(configOptions); + switch (casingMode) { + case CASE_INSENSITIVE_SEARCH: + String schemaNameCaseInsensitively = getSchemaNameCaseInsensitively(connection, tableName.getSchemaName(), configOptions); + String tableNameCaseInsensitively = getTableNameCaseInsensitively(connection, schemaNameCaseInsensitively, tableName.getTableName(), configOptions); + TableName tableNameResult = new TableName(schemaNameCaseInsensitively, tableNameCaseInsensitively); + LOGGER.info("casing mode is `CASE_INSENSITIVE_SEARCH`: adjusting casing from Slowflake case insensitive search for TableName object. TableName:{}", tableNameResult); + return tableNameResult; + case ANNOTATION: + TableName tableNameFromQueryHint = getTableNameFromQueryHint(tableName); + LOGGER.info("casing mode is `ANNOTATION`: adjusting casing from input if annotation found for TableName object. TableName:{}", tableNameFromQueryHint); + return tableNameFromQueryHint; + case NONE: + LOGGER.info("casing mode is `NONE`: not adjust casing from input for TableName object. TableName:{}", tableName); + return tableName; + } + LOGGER.warn("casing mode is empty: not adjust casing from input for TableName object. TableName:{}", tableName); + return tableName; + } + + public static String getAdjustedSchemaNameBasedOnConfig(final Connection connection, String schemaNameInput, Map configOptions) + throws SQLException + { + SnowflakeCasingMode casingMode = getCasingMode(configOptions); + switch (casingMode) { + case CASE_INSENSITIVE_SEARCH: + LOGGER.info("casing mode is `CASE_INSENSITIVE_SEARCH`: adjusting casing from Slowflake case insensitive search for Schema..."); + return getSchemaNameCaseInsensitively(connection, schemaNameInput, configOptions); + case NONE: + LOGGER.info("casing mode is `NONE`: not adjust casing from input for Schema"); + return schemaNameInput; + case ANNOTATION: + LOGGER.info("casing mode is `ANNOTATION`: adjust casing for SCHEMA is NOT SUPPORTED. Skip casing adjustment"); + } + + return schemaNameInput; + } + + public static String getSchemaNameCaseInsensitively(final Connection connection, String schemaNameInput, Map configOptions) + throws SQLException + { + String nameFromSnowFlake = null; + int i = 0; + try (PreparedStatement preparedStatement = new PreparedStatementBuilder() + .withConnection(connection) + .withQuery(SCHEMA_NAME_QUERY_TEMPLATE) + .withParameters(Arrays.asList(schemaNameInput.toLowerCase())).build(); + ResultSet resultSet = preparedStatement.executeQuery()) { + while (resultSet.next()) { + i++; + String schemaNameCandidate = resultSet.getString(SCHEMA_NAME_COLUMN_KEY); + LOGGER.debug("Case insensitive search on columLabel: {}, schema name: {}", SCHEMA_NAME_COLUMN_KEY, schemaNameCandidate); + nameFromSnowFlake = schemaNameCandidate; + } + } + catch (SQLException e) { + throw new RuntimeException(e); + } + + if (i == 0 || i > 1) { + throw new RuntimeException(String.format("Schema name case insensitive match failed, number of match : %d", i)); + } + + return nameFromSnowFlake; + } + + public static String getTableNameCaseInsensitively(final Connection connection, String schemaName, String tableNameInput, Map configOptions) + throws SQLException + { + // schema name input should be correct case before searching tableName already + String nameFromSnowFlake = null; + int i = 0; + try (PreparedStatement preparedStatement = new PreparedStatementBuilder() + .withConnection(connection) + .withQuery(TABLE_NAME_QUERY_TEMPLATE) + .withParameters(Arrays.asList(schemaName, tableNameInput.toLowerCase())).build(); + ResultSet resultSet = preparedStatement.executeQuery()) { + while (resultSet.next()) { + i++; + String schemaNameCandidate = resultSet.getString(TABLE_NAME_COLUMN_KEY); + LOGGER.debug("Case insensitive search on columLabel: {}, schema name: {}", TABLE_NAME_COLUMN_KEY, schemaNameCandidate); + nameFromSnowFlake = schemaNameCandidate; + } + } + catch (SQLException e) { + throw new RuntimeException(e); + } + + if (i == 0 || i > 1) { + throw new RuntimeException(String.format("Schema name case insensitive match failed, number of match : %d", i)); + } + + return nameFromSnowFlake; + } + + /* + Keep previous implementation of table name casing adjustment from query hint. This is to keep backward compatibility. + */ + public static TableName getTableNameFromQueryHint(TableName table) + { + LOGGER.info("getTableNameFromQueryHint: " + table); + //if no query hints has been passed then return input table name + if (!table.getTableName().contains("@")) { + return new TableName(table.getSchemaName().toUpperCase(), table.getTableName().toUpperCase()); + } + //analyze the hint to find table and schema case + String[] tbNameWithQueryHint = table.getTableName().split("@"); + String[] hintDetails = tbNameWithQueryHint[1].split("&"); + String schemaCase = ANNOTATION_CASE_UPPER; + String tableCase = ANNOTATION_CASE_UPPER; + String tableName = tbNameWithQueryHint[0]; + for (String str : hintDetails) { + String[] hintDetail = str.split("="); + if (hintDetail[0].contains("schema")) { + schemaCase = hintDetail[1]; + } + else if (hintDetail[0].contains("table")) { + tableCase = hintDetail[1]; + } + } + if (schemaCase.equalsIgnoreCase(ANNOTATION_CASE_UPPER) && tableCase.equalsIgnoreCase(ANNOTATION_CASE_UPPER)) { + return new TableName(table.getSchemaName().toUpperCase(), tableName.toUpperCase()); + } + else if (schemaCase.equalsIgnoreCase(ANNOTATION_CASE_LOWER) && tableCase.equalsIgnoreCase(ANNOTATION_CASE_LOWER)) { + return new TableName(table.getSchemaName().toLowerCase(), tableName.toLowerCase()); + } + else if (schemaCase.equalsIgnoreCase(ANNOTATION_CASE_LOWER) && tableCase.equalsIgnoreCase(ANNOTATION_CASE_UPPER)) { + return new TableName(table.getSchemaName().toLowerCase(), tableName.toUpperCase()); + } + else if (schemaCase.equalsIgnoreCase(ANNOTATION_CASE_UPPER) && tableCase.equalsIgnoreCase(ANNOTATION_CASE_LOWER)) { + return new TableName(table.getSchemaName().toUpperCase(), tableName.toLowerCase()); + } + else { + return new TableName(table.getSchemaName().toUpperCase(), tableName.toUpperCase()); + } + } + + /* + Default behavior with and without glue connection is different. As we want to make it backward compatible for customer who is not using glue connection. + With Glue connection, default behavior is `NONE` which we will not adjust any casing in the connector. + Without Glue connection, default behavior is `ANNOTATION` which customer can perform MY_TABLE@schemaCase=upper&tableCase=upper + */ + private static SnowflakeCasingMode getCasingMode(Map configOptions) + { + boolean isGlueConnection = StringUtils.isNotBlank(configOptions.get(DEFAULT_GLUE_CONNECTION)); + if (!configOptions.containsKey(CASING_MODE)) { + LOGGER.info("CASING MODE disable"); + return isGlueConnection ? SnowflakeCasingMode.NONE : SnowflakeCasingMode.ANNOTATION; + } + + try { + SnowflakeCasingMode snowflakeCasingMode = SnowflakeCasingMode.valueOf(configOptions.get(CASING_MODE).toUpperCase()); + LOGGER.info("CASING MODE enable: {}", snowflakeCasingMode.toString()); + return snowflakeCasingMode; + } + catch (Exception ex) { + // print error log for customer along with list of input + LOGGER.error("Invalid input for:{}, input value:{}, valid values:{}", CASING_MODE, configOptions.get(CASING_MODE), Arrays.asList(SnowflakeCasingMode.values()), ex); + throw ex; + } + } +} diff --git a/athena-snowflake/src/main/java/com/amazonaws/athena/connectors/snowflake/SnowflakeCompositeHandler.java b/athena-snowflake/src/main/java/com/amazonaws/athena/connectors/snowflake/SnowflakeCompositeHandler.java index 9e6b70ef31..c7e9c4b64f 100644 --- a/athena-snowflake/src/main/java/com/amazonaws/athena/connectors/snowflake/SnowflakeCompositeHandler.java +++ b/athena-snowflake/src/main/java/com/amazonaws/athena/connectors/snowflake/SnowflakeCompositeHandler.java @@ -35,6 +35,6 @@ public class SnowflakeCompositeHandler { public SnowflakeCompositeHandler() { - super(new SnowflakeMetadataHandler(System.getenv()), new SnowflakeRecordHandler(System.getenv())); + super(new SnowflakeMetadataHandler(new SnowflakeEnvironmentProperties().createEnvironment()), new SnowflakeRecordHandler(new SnowflakeEnvironmentProperties().createEnvironment())); } } diff --git a/athena-snowflake/src/main/java/com/amazonaws/athena/connectors/snowflake/SnowflakeEnvironmentProperties.java b/athena-snowflake/src/main/java/com/amazonaws/athena/connectors/snowflake/SnowflakeEnvironmentProperties.java new file mode 100644 index 0000000000..3e74355467 --- /dev/null +++ b/athena-snowflake/src/main/java/com/amazonaws/athena/connectors/snowflake/SnowflakeEnvironmentProperties.java @@ -0,0 +1,137 @@ +/*- + * #%L + * Amazon Athena Query Federation SDK + * %% + * Copyright (C) 2019 - 2024 Amazon Web Services + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ +package com.amazonaws.athena.connectors.snowflake; + +import com.amazonaws.athena.connectors.jdbc.JdbcEnvironmentProperties; +import com.google.common.base.Strings; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.HashMap; +import java.util.Map; +import java.util.Optional; + +import static com.amazonaws.athena.connector.lambda.connection.EnvironmentConstants.DATABASE; +import static com.amazonaws.athena.connector.lambda.connection.EnvironmentConstants.DEFAULT; +import static com.amazonaws.athena.connector.lambda.connection.EnvironmentConstants.DEFAULT_GLUE_CONNECTION; +import static com.amazonaws.athena.connector.lambda.connection.EnvironmentConstants.HOST; +import static com.amazonaws.athena.connector.lambda.connection.EnvironmentConstants.PORT; +import static com.amazonaws.athena.connector.lambda.connection.EnvironmentConstants.SCHEMA; +import static com.amazonaws.athena.connector.lambda.connection.EnvironmentConstants.WAREHOUSE; + +public class SnowflakeEnvironmentProperties extends JdbcEnvironmentProperties +{ + private static final Logger LOGGER = LoggerFactory.getLogger(SnowflakeEnvironmentProperties.class); + private static final String WAREHOUSE_PROPERTY_KEY = "warehouse"; + private static final String DB_PROPERTY_KEY = "db"; + private static final String SCHEMA_PROPERTY_KEY = "schema"; + private static final String SNOWFLAKE_ESCAPE_CHARACTER = "\""; + + @Override + public Map connectionPropertiesToEnvironment(Map connectionProperties) + { + HashMap environment = new HashMap<>(); + + // put it as environment variable so we can put it as JDBC parameters later when creation connection (not with JDBC) + Optional.ofNullable(connectionProperties.get(WAREHOUSE)).ifPresent(x -> environment.put(WAREHOUSE, x)); + Optional.ofNullable(connectionProperties.get(DATABASE)).ifPresent(x -> environment.put(DATABASE, x)); + Optional.ofNullable(connectionProperties.get(SCHEMA)).ifPresent(x -> environment.put(SCHEMA, x)); + + // now construct jdbc string, Snowflake JDBC should just be plain JDBC String. Parameter in JDBC string will get upper case. + StringBuilder connectionStringBuilder = new StringBuilder(getConnectionStringPrefix(connectionProperties)); + connectionStringBuilder.append(connectionProperties.get(HOST)); + if (connectionProperties.containsKey(PORT)) { + connectionStringBuilder + .append(":") + .append(connectionProperties.get(PORT)); + } + + String jdbcParametersString = getJdbcParameters(connectionProperties); + if (!Strings.isNullOrEmpty(jdbcParametersString)) { + LOGGER.info("JDBC parameters found, adding to JDBC String"); + connectionStringBuilder.append(getSnowflakeJDBCParameterPrefix()).append(getJdbcParameters(connectionProperties)); + } + + environment.put(DEFAULT, connectionStringBuilder.toString()); + return environment; + } + + @Override + protected String getConnectionStringPrefix(Map connectionProperties) + { + return "snowflake://jdbc:snowflake://"; + } + + /** + * For Snowflake, we don't put warehouse, database or schema information to the JDBC String to avoid casing issues. + * @param connectionProperties + * @return + */ + @Override + protected String getDatabase(Map connectionProperties) + { + return ""; + } + + @Override + protected String getJdbcParametersSeparator() + { + return "&"; + } + + private String getSnowflakeJDBCParameterPrefix() + { + return "/?"; + } + + private static String getValueWrapperWithEscapedCharacter(String input) + { + return SNOWFLAKE_ESCAPE_CHARACTER + input + SNOWFLAKE_ESCAPE_CHARACTER; + } + + private static boolean isGlueConnection(Map properties) + { + return properties.containsKey(DEFAULT_GLUE_CONNECTION); + } + + public static Map getSnowFlakeParameter(Map baseProperty, Map connectionProperties) + { + logger.debug("getSnowFlakeParameter, Loading connection properties"); + Map parameters = new HashMap<>(baseProperty); + + if (!isGlueConnection(connectionProperties)) { + return parameters; + } + + if (!connectionProperties.containsKey(SCHEMA)) { + logger.debug("No schema specified in connection string"); + } + + parameters.put(WAREHOUSE_PROPERTY_KEY, getValueWrapperWithEscapedCharacter(connectionProperties.get(WAREHOUSE))); + parameters.put(DB_PROPERTY_KEY, getValueWrapperWithEscapedCharacter(connectionProperties.get(DATABASE))); + + if (connectionProperties.containsKey(SCHEMA)) { + logger.debug("Found schema specified"); + parameters.put(SCHEMA_PROPERTY_KEY, getValueWrapperWithEscapedCharacter(connectionProperties.get(SCHEMA))); + } + + return parameters; + } +} diff --git a/athena-snowflake/src/main/java/com/amazonaws/athena/connectors/snowflake/SnowflakeMetadataHandler.java b/athena-snowflake/src/main/java/com/amazonaws/athena/connectors/snowflake/SnowflakeMetadataHandler.java index d9c9b9d3f1..fd05d4d88a 100644 --- a/athena-snowflake/src/main/java/com/amazonaws/athena/connectors/snowflake/SnowflakeMetadataHandler.java +++ b/athena-snowflake/src/main/java/com/amazonaws/athena/connectors/snowflake/SnowflakeMetadataHandler.java @@ -41,6 +41,8 @@ import com.amazonaws.athena.connector.lambda.metadata.GetTableResponse; import com.amazonaws.athena.connector.lambda.metadata.ListSchemasRequest; import com.amazonaws.athena.connector.lambda.metadata.ListSchemasResponse; +import com.amazonaws.athena.connector.lambda.metadata.ListTablesRequest; +import com.amazonaws.athena.connector.lambda.metadata.ListTablesResponse; import com.amazonaws.athena.connector.lambda.metadata.optimizations.DataSourceOptimizations; import com.amazonaws.athena.connector.lambda.metadata.optimizations.OptimizationSubType; import com.amazonaws.athena.connector.lambda.metadata.optimizations.pushdown.ComplexExpressionPushdownSubType; @@ -57,6 +59,7 @@ import com.amazonaws.athena.connectors.jdbc.manager.PreparedStatementBuilder; import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Strings; +import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; import org.apache.arrow.vector.complex.reader.FieldReader; @@ -84,6 +87,7 @@ import java.util.Set; import java.util.stream.Collectors; +import static com.amazonaws.athena.connector.lambda.metadata.ListTablesRequest.UNLIMITED_PAGE_SIZE_VALUE; import static com.amazonaws.athena.connectors.snowflake.SnowflakeConstants.MAX_PARTITION_COUNT; import static com.amazonaws.athena.connectors.snowflake.SnowflakeConstants.SINGLE_SPLIT_LIMIT_COUNT; @@ -93,7 +97,7 @@ */ public class SnowflakeMetadataHandler extends JdbcMetadataHandler { - static final Map JDBC_PROPERTIES = ImmutableMap.of("databaseTerm", "SCHEMA"); + static final Map JDBC_PROPERTIES = ImmutableMap.of("databaseTerm", "SCHEMA", "CLIENT_RESULT_COLUMN_CASE_INSENSITIVE", "true"); static final String BLOCK_PARTITION_COLUMN_NAME = "partition"; private static final Logger LOGGER = LoggerFactory.getLogger(SnowflakeMetadataHandler.class); private static final int MAX_SPLITS_PER_REQUEST = 1000_000; @@ -109,13 +113,23 @@ public class SnowflakeMetadataHandler extends JdbcMetadataHandler static final String SHOW_PRIMARY_KEYS_QUERY = "SHOW PRIMARY KEYS IN "; static final String PRIMARY_KEY_COLUMN_NAME = "column_name"; static final String COUNTS_COLUMN_NAME = "COUNTS"; - private static final String CASE_UPPER = "upper"; - private static final String CASE_LOWER = "lower"; /** * Query to check view */ static final String VIEW_CHECK_QUERY = "SELECT * FROM information_schema.views WHERE table_schema = ? AND table_name = ?"; static final String ALL_PARTITIONS = "*"; + + static final Map STRING_ARROW_TYPE_MAP = com.google.common.collect.ImmutableMap.of( + "INTEGER", (ArrowType) Types.MinorType.INT.getType(), + "DATE", (ArrowType) Types.MinorType.DATEDAY.getType(), + "TIMESTAMP", (ArrowType) Types.MinorType.DATEMILLI.getType(), + "TIMESTAMP_LTZ", (ArrowType) Types.MinorType.DATEMILLI.getType(), + "TIMESTAMP_NTZ", (ArrowType) Types.MinorType.DATEMILLI.getType(), + "TIMESTAMP_TZ", (ArrowType) Types.MinorType.DATEMILLI.getType(), + "TIMESTAMPLTZ", (ArrowType) Types.MinorType.DATEMILLI.getType(), + "TIMESTAMPNTZ", (ArrowType) Types.MinorType.DATEMILLI.getType(), + "TIMESTAMPTZ", (ArrowType) Types.MinorType.DATEMILLI.getType() + ); /** * Instantiates handler to be used by Lambda function directly. * @@ -132,8 +146,8 @@ public SnowflakeMetadataHandler(java.util.Map configOptions) public SnowflakeMetadataHandler(DatabaseConnectionConfig databaseConnectionConfig, java.util.Map configOptions) { this(databaseConnectionConfig, new GenericJdbcConnectionFactory(databaseConnectionConfig, - JDBC_PROPERTIES, new DatabaseConnectionInfo(SnowflakeConstants.SNOWFLAKE_DRIVER_CLASS, - SnowflakeConstants.SNOWFLAKE_DEFAULT_PORT)), configOptions); + SnowflakeEnvironmentProperties.getSnowFlakeParameter(JDBC_PROPERTIES, configOptions), + new DatabaseConnectionInfo(SnowflakeConstants.SNOWFLAKE_DRIVER_CLASS, SnowflakeConstants.SNOWFLAKE_DEFAULT_PORT)), configOptions); } @VisibleForTesting @@ -150,8 +164,8 @@ protected SnowflakeMetadataHandler( @Override public GetDataSourceCapabilitiesResponse doGetDataSourceCapabilities(BlockAllocator allocator, GetDataSourceCapabilitiesRequest request) { + LOGGER.debug("doGetDataSourceCapabilities: " + request); ImmutableMap.Builder> capabilities = ImmutableMap.builder(); - capabilities.put(DataSourceOptimizations.SUPPORTS_FILTER_PUSHDOWN.withSupportedSubTypes( FilterPushdownSubType.SORTED_RANGE_SET, FilterPushdownSubType.NULLABLE_COMPARISON )); @@ -180,26 +194,29 @@ public SnowflakeMetadataHandler(DatabaseConnectionConfig databaseConnectionConfi @Override public Schema getPartitionSchema(final String catalogName) { + LOGGER.debug("getPartitionSchema: " + catalogName); SchemaBuilder schemaBuilder = SchemaBuilder.newBuilder() .addField(BLOCK_PARTITION_COLUMN_NAME, Types.MinorType.VARCHAR.getType()); return schemaBuilder.build(); } - private Optional getPrimaryKey(TableName tableName) throws Exception + private Optional getPrimaryKey(TableName tableName) throws Exception { + LOGGER.debug("getPrimaryKey tableName: " + tableName); List primaryKeys = new ArrayList(); try (Connection connection = getJdbcConnectionFactory().getConnection(getCredentialProvider())) { - try (PreparedStatement preparedStatement = connection.prepareStatement(SHOW_PRIMARY_KEYS_QUERY + tableName.getTableName()); + try (PreparedStatement preparedStatement = connection.prepareStatement(SHOW_PRIMARY_KEYS_QUERY + "\"" + tableName.getSchemaName() + "\".\"" + tableName.getTableName() + "\""); ResultSet rs = preparedStatement.executeQuery()) { while (rs.next()) { // Concatenate multiple primary keys if they exist primaryKeys.add(rs.getString(PRIMARY_KEY_COLUMN_NAME)); } } - } - String primaryKey = String.join(", ", primaryKeys); - if (!Strings.isNullOrEmpty(primaryKey) && hasUniquePrimaryKey(tableName, primaryKey)) { - return Optional.of(primaryKey); + + String primaryKeyString = primaryKeys.stream().map(s -> "\"" + s + "\"").collect(Collectors.joining(",")); + if (!Strings.isNullOrEmpty(primaryKeyString) && hasUniquePrimaryKey(tableName, primaryKeyString)) { + return Optional.of(primaryKeyString); + } } return Optional.empty(); } @@ -211,7 +228,7 @@ private Optional getPrimaryKey(TableName tableName) throws Exception private boolean hasUniquePrimaryKey(TableName tableName, String primaryKey) throws Exception { try (Connection connection = getJdbcConnectionFactory().getConnection(getCredentialProvider())) { - try (PreparedStatement preparedStatement = connection.prepareStatement("SELECT " + primaryKey + ", count(*) as COUNTS FROM " + tableName.getTableName() + " GROUP BY " + primaryKey + " ORDER BY COUNTS DESC"); + try (PreparedStatement preparedStatement = connection.prepareStatement("SELECT " + primaryKey + ", count(*) as COUNTS FROM " + "\"" + tableName.getSchemaName() + "\".\"" + tableName.getTableName() + "\"" + " GROUP BY " + primaryKey + " ORDER BY COUNTS DESC"); ResultSet rs = preparedStatement.executeQuery()) { if (rs.next()) { if (rs.getInt(COUNTS_COLUMN_NAME) == 1) { @@ -237,38 +254,42 @@ private boolean hasUniquePrimaryKey(TableName tableName, String primaryKey) thro public void getPartitions(BlockWriter blockWriter, GetTableLayoutRequest getTableLayoutRequest, QueryStatusChecker queryStatusChecker) throws Exception { - LOGGER.info("{}: Schema {}, table {}", getTableLayoutRequest.getQueryId(), getTableLayoutRequest.getTableName().getSchemaName(), + LOGGER.debug("getPartitions: {}: Schema {}, table {}", getTableLayoutRequest.getQueryId(), getTableLayoutRequest.getTableName().getSchemaName(), getTableLayoutRequest.getTableName().getTableName()); - /** - * "MAX_PARTITION_COUNT" is currently set to 50 to limit the number of partitions. - * this is to handle timeout issues because of huge partitions - */ - LOGGER.info(" Total Partition Limit" + MAX_PARTITION_COUNT); - boolean viewFlag = checkForView(getTableLayoutRequest); - //if the input table is a view , there will be single split - if (viewFlag) { - blockWriter.writeRows((Block block, int rowNum) -> { - block.setValue(BLOCK_PARTITION_COLUMN_NAME, rowNum, ALL_PARTITIONS); - return 1; - }); - } - else { + + try (Connection connection = getJdbcConnectionFactory().getConnection(getCredentialProvider())) { + TableName tableName = getTableLayoutRequest.getTableName(); + /** + * "MAX_PARTITION_COUNT" is currently set to 50 to limit the number of partitions. + * this is to handle timeout issues because of huge partitions + */ + LOGGER.info(" Total Partition Limit" + MAX_PARTITION_COUNT); + boolean viewFlag = checkForView(tableName); + //if the input table is a view , there will be single split + if (viewFlag) { + blockWriter.writeRows((Block block, int rowNum) -> { + block.setValue(BLOCK_PARTITION_COLUMN_NAME, rowNum, ALL_PARTITIONS); + return 1; + }); + return; + } + double totalRecordCount = 0; LOGGER.info(COUNT_RECORDS_QUERY); - List parameters = Arrays.asList(getTableLayoutRequest.getTableName().getSchemaName(), getTableLayoutRequest.getTableName().getTableName()); - try (Connection connection = getJdbcConnectionFactory().getConnection(getCredentialProvider()); - PreparedStatement preparedStatement = new PreparedStatementBuilder().withConnection(connection) - .withQuery(COUNT_RECORDS_QUERY).withParameters(parameters).build(); - ResultSet rs = preparedStatement.executeQuery()) { + try (PreparedStatement preparedStatement = new PreparedStatementBuilder() + .withConnection(connection) + .withQuery(COUNT_RECORDS_QUERY) + .withParameters(Arrays.asList(tableName.getSchemaName(), tableName.getTableName())).build(); + ResultSet rs = preparedStatement.executeQuery()) { while (rs.next()) { totalRecordCount = rs.getLong(1); } if (totalRecordCount > 0) { - Optional primaryKey = getPrimaryKey(getTableLayoutRequest.getTableName()); + Optional primaryKey = getPrimaryKey(tableName); long recordsInPartition = (long) (Math.ceil(totalRecordCount / MAX_PARTITION_COUNT)); long partitionRecordCount = (totalRecordCount <= SINGLE_SPLIT_LIMIT_COUNT || !primaryKey.isPresent()) ? (long) totalRecordCount : recordsInPartition; - LOGGER.info(" Total Page Count: " + partitionRecordCount); + LOGGER.info(" Total Page Count: " + partitionRecordCount); double numberOfPartitions = (int) Math.ceil(totalRecordCount / partitionRecordCount); long offset = 0; /** @@ -276,7 +297,7 @@ public void getPartitions(BlockWriter blockWriter, GetTableLayoutRequest getTabl * It will have maximum 50 partitions and number of records in each partition is decided by dividing total number of records by 50 * the partition values we are setting the limit and offset values like p-limit-3000-offset-0 */ - for (int i = 1; i <= numberOfPartitions; i++) { + for (int i = 1; i <= numberOfPartitions; i++) { final String partitionVal = BLOCK_PARTITION_COLUMN_NAME + "-primary-" + primaryKey.orElse("") + "-limit-" + partitionRecordCount + "-offset-" + offset; LOGGER.info("partitionVal {} ", partitionVal); blockWriter.writeRows((Block block, int rowNum) -> @@ -288,22 +309,19 @@ public void getPartitions(BlockWriter blockWriter, GetTableLayoutRequest getTabl } } else { - LOGGER.info("No Records Found for table {}", getTableLayoutRequest.getTableName().getTableName()); + LOGGER.info("No Records Found for table {}", tableName); } } } } - /** + /* * Check if the input table is a view and returns viewflag accordingly - * @param getTableLayoutRequest - * @return - * @throws Exception */ - private boolean checkForView(GetTableLayoutRequest getTableLayoutRequest) throws Exception + private boolean checkForView(TableName tableName) throws Exception { boolean viewFlag = false; - List viewparameters = Arrays.asList(getTableLayoutRequest.getTableName().getSchemaName(), getTableLayoutRequest.getTableName().getTableName()); + List viewparameters = Arrays.asList(tableName.getSchemaName(), tableName.getTableName()); try (Connection connection = getJdbcConnectionFactory().getConnection(getCredentialProvider())) { try (PreparedStatement preparedStatement = new PreparedStatementBuilder().withConnection(connection).withQuery(VIEW_CHECK_QUERY).withParameters(viewparameters).build(); ResultSet resultSet = preparedStatement.executeQuery()) { @@ -319,7 +337,7 @@ private boolean checkForView(GetTableLayoutRequest getTableLayoutRequest) throws @Override public GetSplitsResponse doGetSplits(BlockAllocator blockAllocator, GetSplitsRequest getSplitsRequest) { - LOGGER.info("{}: Catalog {}, table {}", getSplitsRequest.getQueryId(), getSplitsRequest.getTableName().getSchemaName(), getSplitsRequest.getTableName().getTableName()); + LOGGER.info("doGetSplits: {}: Catalog {}, table {}", getSplitsRequest.getQueryId(), getSplitsRequest.getTableName().getSchemaName(), getSplitsRequest.getTableName().getTableName()); if (getSplitsRequest.getConstraints().isQueryPassThrough()) { LOGGER.info("QPT Split Requested"); return setupQueryPassthroughSplit(getSplitsRequest); @@ -362,15 +380,55 @@ private String encodeContinuationToken(int partition) public GetTableResponse doGetTable(final BlockAllocator blockAllocator, final GetTableRequest getTableRequest) throws Exception { + LOGGER.debug("doGetTable getTableName:{}", getTableRequest.getTableName()); try (Connection connection = getJdbcConnectionFactory().getConnection(getCredentialProvider())) { Schema partitionSchema = getPartitionSchema(getTableRequest.getCatalogName()); - TableName tableName = getTableFromMetadata(connection.getCatalog(), getTableRequest.getTableName(), connection.getMetaData()); + TableName tableName = SnowflakeCaseInsensitiveResolver.getAdjustedTableObjectNameBasedOnConfig(connection, getTableRequest.getTableName(), configOptions); GetTableResponse getTableResponse = new GetTableResponse(getTableRequest.getCatalogName(), tableName, getSchema(connection, tableName, partitionSchema), partitionSchema.getFields().stream().map(Field::getName).collect(Collectors.toSet())); return getTableResponse; } } + @Override + public ListTablesResponse doListTables(final BlockAllocator blockAllocator, final ListTablesRequest listTablesRequest) + throws Exception + { + try (Connection connection = getJdbcConnectionFactory().getConnection(getCredentialProvider())) { + LOGGER.info("{}: List table names for Catalog {}, Schema {}", listTablesRequest.getQueryId(), + listTablesRequest.getCatalogName(), listTablesRequest.getSchemaName()); + String schemaName = SnowflakeCaseInsensitiveResolver.getAdjustedSchemaNameBasedOnConfig(connection, listTablesRequest.getSchemaName(), configOptions); + + String token = listTablesRequest.getNextToken(); + int pageSize = listTablesRequest.getPageSize(); + + if (pageSize == UNLIMITED_PAGE_SIZE_VALUE && token == null) { // perform no pagination + LOGGER.info("doListTables - NO pagination"); + return new ListTablesResponse(listTablesRequest.getCatalogName(), listTablesNoPagination(connection, schemaName), null); + } + + LOGGER.info("doListTables - pagination - NOT SUPPORTED - return all tables"); + return new ListTablesResponse(listTablesRequest.getCatalogName(), listTablesNoPagination(connection, schemaName), null); + } + } + + private List listTablesNoPagination(final Connection jdbcConnection, final String databaseName) + throws SQLException + { + LOGGER.debug("listTables, databaseName:" + databaseName); + try (ResultSet resultSet = jdbcConnection.getMetaData().getTables( + jdbcConnection.getCatalog(), + databaseName, + null, + new String[] {"TABLE", "VIEW", "EXTERNAL TABLE", "MATERIALIZED VIEW"})) { + ImmutableList.Builder list = ImmutableList.builder(); + while (resultSet.next()) { + list.add(JDBCUtil.getSchemaTableName(resultSet)); + } + return list.build(); + } + } + /** * * @param jdbcConnection @@ -379,9 +437,10 @@ public GetTableResponse doGetTable(final BlockAllocator blockAllocator, final Ge * @return * @throws Exception */ - public Schema getSchema(Connection jdbcConnection, TableName tableName, Schema partitionSchema) + private Schema getSchema(Connection jdbcConnection, TableName tableName, Schema partitionSchema) throws Exception { + LOGGER.debug("getSchema start, tableName:" + tableName); /** * query to fetch column data type to handle appropriate datatype to arrowtype conversions. */ @@ -391,8 +450,8 @@ public Schema getSchema(Connection jdbcConnection, TableName tableName, Schema p try (ResultSet resultSet = getColumns(jdbcConnection.getCatalog(), tableName, jdbcConnection.getMetaData()); Connection connection = getJdbcConnectionFactory().getConnection(getCredentialProvider()); PreparedStatement stmt = connection.prepareStatement(dataTypeQuery)) { - stmt.setString(1, tableName.getSchemaName().toUpperCase()); - stmt.setString(2, tableName.getTableName().toUpperCase()); + stmt.setString(1, tableName.getSchemaName()); + stmt.setString(2, tableName.getTableName()); HashMap hashMap = new HashMap(); ResultSet dataTypeResultSet = stmt.executeQuery(); @@ -419,16 +478,8 @@ public Schema getSchema(Connection jdbcConnection, TableName tableName, Schema p String dataType = hashMap.get(columnName); LOGGER.debug("columnName: " + columnName); LOGGER.debug("dataType: " + dataType); - final Map stringArrowTypeMap = com.google.common.collect.ImmutableMap.of( - "INTEGER", (ArrowType) Types.MinorType.INT.getType(), - "DATE", (ArrowType) Types.MinorType.DATEDAY.getType(), - "TIMESTAMP", (ArrowType) Types.MinorType.DATEMILLI.getType(), - "TIMESTAMP_LTZ", (ArrowType) Types.MinorType.DATEMILLI.getType(), - "TIMESTAMP_NTZ", (ArrowType) Types.MinorType.DATEMILLI.getType(), - "TIMESTAMP_TZ", (ArrowType) Types.MinorType.DATEMILLI.getType() - ); - if (dataType != null && stringArrowTypeMap.containsKey(dataType.toUpperCase())) { - columnType = stringArrowTypeMap.get(dataType.toUpperCase()); + if (dataType != null && STRING_ARROW_TYPE_MAP.containsKey(dataType.toUpperCase())) { + columnType = STRING_ARROW_TYPE_MAP.get(dataType.toUpperCase()); } /** * converting into VARCHAR for not supported data types. @@ -469,90 +520,18 @@ public Schema getSchema(Connection jdbcConnection, TableName tableName, Schema p private ResultSet getColumns(final String catalogName, final TableName tableHandle, final DatabaseMetaData metadata) throws SQLException { + LOGGER.debug("getColumns, catalogName:" + catalogName + ", tableHandle: " + tableHandle); String escape = metadata.getSearchStringEscape(); - return metadata.getColumns( + + ResultSet columns = metadata.getColumns( catalogName, escapeNamePattern(tableHandle.getSchemaName(), escape), escapeNamePattern(tableHandle.getTableName(), escape), null); - } - /** - * Finding table name from query hint - * In sap hana schemas and tables can be case sensitive, but executed query from athena sends table and schema names - * in lower case, this has been handled by appending query hint to the table name as below - * "lambda:lambdaname".SCHEMA_NAME."TABLE_NAME@schemacase=upper&tablecase=upper" - * @param table - * @return - */ - protected TableName findTableNameFromQueryHint(TableName table) - { - //if no query hints has been passed then return input table name - if (!table.getTableName().contains("@")) { - return new TableName(table.getSchemaName().toUpperCase(), table.getTableName().toUpperCase()); - } - //analyze the hint to find table and schema case - String[] tbNameWithQueryHint = table.getTableName().split("@"); - String[] hintDetails = tbNameWithQueryHint[1].split("&"); - String schemaCase = CASE_UPPER; - String tableCase = CASE_UPPER; - String tableName = tbNameWithQueryHint[0]; - for (String str : hintDetails) { - String[] hintDetail = str.split("="); - if (hintDetail[0].contains("schema")) { - schemaCase = hintDetail[1]; - } - else if (hintDetail[0].contains("table")) { - tableCase = hintDetail[1]; - } - } - if (schemaCase.equalsIgnoreCase(CASE_UPPER) && tableCase.equalsIgnoreCase(CASE_UPPER)) { - return new TableName(table.getSchemaName().toUpperCase(), tableName.toUpperCase()); - } - else if (schemaCase.equalsIgnoreCase(CASE_LOWER) && tableCase.equalsIgnoreCase(CASE_LOWER)) { - return new TableName(table.getSchemaName().toLowerCase(), tableName.toLowerCase()); - } - else if (schemaCase.equalsIgnoreCase(CASE_LOWER) && tableCase.equalsIgnoreCase(CASE_UPPER)) { - return new TableName(table.getSchemaName().toLowerCase(), tableName.toUpperCase()); - } - else if (schemaCase.equalsIgnoreCase(CASE_UPPER) && tableCase.equalsIgnoreCase(CASE_LOWER)) { - return new TableName(table.getSchemaName().toUpperCase(), tableName.toLowerCase()); - } - else { - return new TableName(table.getSchemaName().toUpperCase(), tableName.toUpperCase()); - } + return columns; } - /** - * Logic to handle case sensitivity of table name and schema name - * @param catalogName - * @param tableHandle - * @param metadata - * @return - * @throws SQLException - */ - protected TableName getTableFromMetadata(final String catalogName, final TableName tableHandle, final DatabaseMetaData metadata) - throws SQLException - { - TableName tableName = findTableNameFromQueryHint(tableHandle); - //check for presence exact table and schema name returned by findTableNameFromQueryHint method by invoking metadata.getTables method - ResultSet resultSet = metadata.getTables(catalogName, tableName.getSchemaName(), tableName.getTableName(), null); - while (resultSet.next()) { - if (tableName.getTableName().equals(resultSet.getString(3))) { - tableName = new TableName(tableName.getSchemaName(), resultSet.getString(3)); - return tableName; - } - } - // if table not found in above step, check for presence of input table by doing pattern search - ResultSet rs = metadata.getTables(catalogName, tableName.getSchemaName().toUpperCase(), "%", null); - while (rs.next()) { - if (tableName.getTableName().equalsIgnoreCase(rs.getString(3))) { - tableName = new TableName(tableName.getSchemaName().toUpperCase(), rs.getString(3)); - return tableName; - } - } - return tableName; - } @Override public ListSchemasResponse doListSchemaNames(final BlockAllocator blockAllocator, final ListSchemasRequest listSchemasRequest) throws Exception @@ -562,10 +541,13 @@ public ListSchemasResponse doListSchemaNames(final BlockAllocator blockAllocator return new ListSchemasResponse(listSchemasRequest.getCatalogName(), listDatabaseNames(connection)); } } - protected static Set listDatabaseNames(final Connection jdbcConnection) + + private static Set listDatabaseNames(final Connection jdbcConnection) throws Exception { - try (ResultSet resultSet = jdbcConnection.getMetaData().getSchemas()) { + try (ResultSet resultSet = jdbcConnection + .getMetaData() + .getSchemas(jdbcConnection.getCatalog(), null)) { ImmutableSet.Builder schemaNames = ImmutableSet.builder(); String inputCatalogName = jdbcConnection.getCatalog(); String inputSchemaName = jdbcConnection.getSchema(); diff --git a/athena-snowflake/src/main/java/com/amazonaws/athena/connectors/snowflake/SnowflakeRecordHandler.java b/athena-snowflake/src/main/java/com/amazonaws/athena/connectors/snowflake/SnowflakeRecordHandler.java index 28ac13ff21..f1776b3985 100644 --- a/athena-snowflake/src/main/java/com/amazonaws/athena/connectors/snowflake/SnowflakeRecordHandler.java +++ b/athena-snowflake/src/main/java/com/amazonaws/athena/connectors/snowflake/SnowflakeRecordHandler.java @@ -42,6 +42,7 @@ import java.sql.SQLException; import static com.amazonaws.athena.connectors.snowflake.SnowflakeConstants.SNOWFLAKE_QUOTE_CHARACTER; +import static com.amazonaws.athena.connectors.snowflake.SnowflakeMetadataHandler.JDBC_PROPERTIES; public class SnowflakeRecordHandler extends JdbcRecordHandler { @@ -59,7 +60,7 @@ public SnowflakeRecordHandler(java.util.Map configOptions) public SnowflakeRecordHandler(DatabaseConnectionConfig databaseConnectionConfig, java.util.Map configOptions) { this(databaseConnectionConfig, new GenericJdbcConnectionFactory(databaseConnectionConfig, - SnowflakeMetadataHandler.JDBC_PROPERTIES, + SnowflakeEnvironmentProperties.getSnowFlakeParameter(JDBC_PROPERTIES, configOptions), new DatabaseConnectionInfo(SnowflakeConstants.SNOWFLAKE_DRIVER_CLASS, SnowflakeConstants.SNOWFLAKE_DEFAULT_PORT)), configOptions); } @@ -77,7 +78,7 @@ public SnowflakeRecordHandler(DatabaseConnectionConfig databaseConnectionConfig, } @Override - public PreparedStatement buildSplitSql(Connection jdbcConnection, String catalogName, TableName tableName, Schema schema, Constraints constraints, Split split) throws SQLException + public PreparedStatement buildSplitSql(Connection jdbcConnection, String catalogName, TableName tableNameInput, Schema schema, Constraints constraints, Split split) throws SQLException { PreparedStatement preparedStatement; @@ -85,7 +86,7 @@ public PreparedStatement buildSplitSql(Connection jdbcConnection, String catalog preparedStatement = buildQueryPassthroughSql(jdbcConnection, constraints); } else { - preparedStatement = jdbcSplitQueryBuilder.buildSql(jdbcConnection, null, tableName.getSchemaName(), tableName.getTableName(), schema, constraints, split); + preparedStatement = jdbcSplitQueryBuilder.buildSql(jdbcConnection, null, tableNameInput.getSchemaName(), tableNameInput.getTableName(), schema, constraints, split); } // Disable fetching all rows. diff --git a/athena-snowflake/src/test/java/com/amazonaws/athena/connectors/snowflake/SnowflakeMetadataHandlerTest.java b/athena-snowflake/src/test/java/com/amazonaws/athena/connectors/snowflake/SnowflakeMetadataHandlerTest.java index 6a219a3b1f..37421c2a02 100644 --- a/athena-snowflake/src/test/java/com/amazonaws/athena/connectors/snowflake/SnowflakeMetadataHandlerTest.java +++ b/athena-snowflake/src/test/java/com/amazonaws/athena/connectors/snowflake/SnowflakeMetadataHandlerTest.java @@ -111,11 +111,11 @@ public void doGetTableLayout() String[] primaryKeyColumns = new String[] {SnowflakeMetadataHandler.PRIMARY_KEY_COLUMN_NAME}; String[][] primaryKeyValues = new String[][]{new String[] {"pkey"}}; ResultSet primaryKeyResultSet = mockResultSet(primaryKeyColumns, primaryKeyValues, new AtomicInteger(-1)); - Mockito.when(this.connection.prepareStatement(SnowflakeMetadataHandler.SHOW_PRIMARY_KEYS_QUERY + "testTable")).thenReturn(primaryKeyPreparedStatement); + Mockito.when(this.connection.prepareStatement(SnowflakeMetadataHandler.SHOW_PRIMARY_KEYS_QUERY + "\"testSchema\"" + "." + "\"testTable\"")).thenReturn(primaryKeyPreparedStatement); Mockito.when(primaryKeyPreparedStatement.executeQuery()).thenReturn(primaryKeyResultSet); PreparedStatement countsPreparedStatement = Mockito.mock(PreparedStatement.class); - String GET_PKEY_COUNTS_QUERY = "SELECT pkey, count(*) as COUNTS FROM testTable GROUP BY pkey ORDER BY COUNTS DESC"; + String GET_PKEY_COUNTS_QUERY = "SELECT \"pkey\", count(*) as COUNTS FROM \"testSchema\".\"testTable\" GROUP BY \"pkey\" ORDER BY COUNTS DESC"; String[] countsColumns = new String[] {"pkey", SnowflakeMetadataHandler.COUNTS_COLUMN_NAME}; Object[][] countsValues = {{"a", 1}}; ResultSet countsResultSet = mockResultSet(countsColumns, countsValues, new AtomicInteger(-1)); @@ -136,7 +136,7 @@ public void doGetTableLayout() if (i > 1) { offset = offset + partitionActualRecordCount; } - actualValues.add("[partition : partition-primary-pkey-limit-" +partitionActualRecordCount + "-offset-" + offset + "]"); + actualValues.add("[partition : partition-primary-\"pkey\"-limit-" + + partitionActualRecordCount + "-offset-" + offset + "]"); } Assert.assertEquals((int)limit, getTableLayoutResponse.getPartitions().getRowCount()); Assert.assertEquals(expectedValues, actualValues); @@ -179,7 +179,7 @@ public void doGetTableLayoutSinglePartition() Mockito.when(primaryKeyPreparedStatement.executeQuery()).thenReturn(primaryKeyResultSet); PreparedStatement countsPreparedStatement = Mockito.mock(PreparedStatement.class); - String GET_PKEY_COUNTS_QUERY = "SELECT pkey, count(*) as COUNTS FROM testTable GROUP BY pkey ORDER BY COUNTS DESC"; + String GET_PKEY_COUNTS_QUERY = "SELECT \"pkey\", count(*) as COUNTS FROM \"testSchema\".\"testTable\" GROUP BY \"pkey\" ORDER BY COUNTS DESC"; String[] countsColumns = new String[] {"pkey", SnowflakeMetadataHandler.COUNTS_COLUMN_NAME}; Object[][] countsValues = {{"a", 1}}; ResultSet countsResultSet = mockResultSet(countsColumns, countsValues, new AtomicInteger(-1)); @@ -236,12 +236,12 @@ public void doGetTableLayoutMaxPartition() String[] primaryKeyColumns = new String[] {SnowflakeMetadataHandler.PRIMARY_KEY_COLUMN_NAME}; String[][] primaryKeyValues = new String[][]{new String[] {"pkey"}}; ResultSet primaryKeyResultSet = mockResultSet(primaryKeyColumns, primaryKeyValues, new AtomicInteger(-1)); - Mockito.when(this.connection.prepareStatement(SnowflakeMetadataHandler.SHOW_PRIMARY_KEYS_QUERY + "testTable")).thenReturn(primaryKeyPreparedStatement); + Mockito.when(this.connection.prepareStatement(SnowflakeMetadataHandler.SHOW_PRIMARY_KEYS_QUERY + "\"testSchema\"" + "." + "\"testTable\"")).thenReturn(primaryKeyPreparedStatement); Mockito.when(primaryKeyPreparedStatement.executeQuery()).thenReturn(primaryKeyResultSet); PreparedStatement countsPreparedStatement = Mockito.mock(PreparedStatement.class); - String GET_PKEY_COUNTS_QUERY = "SELECT pkey, count(*) as COUNTS FROM testTable GROUP BY pkey ORDER BY COUNTS DESC"; - String[] countsColumns = new String[] {"pkey", SnowflakeMetadataHandler.COUNTS_COLUMN_NAME}; + String GET_PKEY_COUNTS_QUERY = "SELECT \"pkey\", count(*) as COUNTS FROM \"testSchema\".\"testTable\" GROUP BY \"pkey\" ORDER BY COUNTS DESC"; + String[] countsColumns = new String[] {"\"pkey\"", SnowflakeMetadataHandler.COUNTS_COLUMN_NAME}; Object[][] countsValues = {{"a", 1}}; ResultSet countsResultSet = mockResultSet(countsColumns, countsValues, new AtomicInteger(-1)); Mockito.when(this.connection.prepareStatement(GET_PKEY_COUNTS_QUERY)).thenReturn(countsPreparedStatement); @@ -257,7 +257,7 @@ public void doGetTableLayoutMaxPartition() if (i > 1) { offset = offset + partitionActualRecordCount; } - actualValues.add("[partition : partition-primary-pkey-limit-" +partitionActualRecordCount + "-offset-" + offset + "]"); + actualValues.add("[partition : partition-primary-\"pkey\"-limit-" +partitionActualRecordCount + "-offset-" + offset + "]"); } Assert.assertEquals(expectedValues,actualValues); SchemaBuilder expectedSchemaBuilder = SchemaBuilder.newBuilder(); @@ -426,28 +426,6 @@ public void doGetTable() Assert.assertEquals("testCatalog", getTableResponse.getCatalogName()); } - @Test - public void testFindTableNameFromQueryHint() - throws Exception - { - TableName inputTableName = new TableName("testSchema", "testTable@schemacase=upper&tablecase=upper"); - TableName tableName = snowflakeMetadataHandler.findTableNameFromQueryHint(inputTableName); - Assert.assertEquals(new TableName("TESTSCHEMA", "TESTTABLE"), tableName); - - TableName inputTableName1 = new TableName("testSchema", "testTable@schemacase=upper&tablecase=lower"); - TableName tableName1 = snowflakeMetadataHandler.findTableNameFromQueryHint(inputTableName1); - Assert.assertEquals(new TableName("TESTSCHEMA", "testtable"), tableName1); - - TableName inputTableName2 = new TableName("testSchema", "testTable@schemacase=lower&tablecase=lower"); - TableName tableName2 = snowflakeMetadataHandler.findTableNameFromQueryHint(inputTableName2); - Assert.assertEquals(new TableName("testschema", "testtable"), tableName2); - - TableName inputTableName3 = new TableName("testSchema", "testTable@schemacase=lower&tablecase=upper"); - TableName tableName3 = snowflakeMetadataHandler.findTableNameFromQueryHint(inputTableName3); - Assert.assertEquals(new TableName("testschema", "TESTTABLE"), tableName3); - - } - @Test(expected = RuntimeException.class) public void doListSchemaNames() throws Exception { BlockAllocator blockAllocator = new BlockAllocatorImpl(); @@ -457,7 +435,7 @@ public void doListSchemaNames() throws Exception { Mockito.when(this.connection.createStatement()).thenReturn(statement); String[][] SchemaandCatalogNames = {{"TESTSCHEMA"},{"TESTCATALOG"}}; ResultSet schemaResultSet = mockResultSet(new String[]{"TABLE_SCHEM","TABLE_CATALOG"}, new int[]{Types.VARCHAR,Types.VARCHAR}, SchemaandCatalogNames, new AtomicInteger(-1)); - Mockito.when(this.connection.getMetaData().getSchemas()).thenReturn(schemaResultSet); + Mockito.when(this.connection.getMetaData().getSchemas(any(), any())).thenReturn(schemaResultSet); ListSchemasResponse listSchemasResponse = this.snowflakeMetadataHandler.doListSchemaNames(blockAllocator, listSchemasRequest); String[] expectedResult = {"TESTSCHEMA","TESTCATALOG"}; Assert.assertEquals(Arrays.toString(expectedResult), listSchemasResponse.getSchemas().toString()); diff --git a/athena-sqlserver/Dockerfile b/athena-sqlserver/Dockerfile index e602b9fc50..2456d9e94b 100644 --- a/athena-sqlserver/Dockerfile +++ b/athena-sqlserver/Dockerfile @@ -5,5 +5,5 @@ COPY target/athena-sqlserver-2022.47.1.jar ${LAMBDA_TASK_ROOT} # Unpack the jar RUN jar xf athena-sqlserver-2022.47.1.jar -# Set the CMD to your handler (could also be done as a parameter override outside of the Dockerfile) -CMD [ "com.amazonaws.athena.connectors.sqlserver.SqlServerMuxCompositeHandler" ] \ No newline at end of file +# Command can be overwritten by providing a different command in the template directly. +# No need to specify here (already defined in .yaml file because legacy and connections use different) diff --git a/athena-sqlserver/athena-sqlserver-connection.yaml b/athena-sqlserver/athena-sqlserver-connection.yaml new file mode 100644 index 0000000000..54ba72b3a8 --- /dev/null +++ b/athena-sqlserver/athena-sqlserver-connection.yaml @@ -0,0 +1,169 @@ +Transform: 'AWS::Serverless-2016-10-31' +Metadata: + 'AWS::ServerlessRepo::Application': + Name: AthenaSqlServerConnectorWithGlueConnection + Description: 'This connector enables Amazon Athena to communicate with SQL Server using JDBC driver.' + Author: 'default author' + SpdxLicenseId: Apache-2.0 + LicenseUrl: LICENSE.txt + ReadmeUrl: README.md + Labels: + - sqlserver + - athena-federation + - jdbc + HomePageUrl: 'https://github.com/awslabs/aws-athena-query-federation' + SemanticVersion: 2022.47.1 + SourceCodeUrl: 'https://github.com/awslabs/aws-athena-query-federation' +Parameters: + LambdaFunctionName: + Description: 'This is the name of the lambda function that will be created. This name must satisfy the pattern ^[a-z0-9-_]{1,64}$' + Type: String + AllowedPattern: ^[a-z0-9-_]{1,64}$ + SecretName: + Description: 'Used to create resource-based authorization policy for "secretsmanager:GetSecretValue" action. E.g. All Athena JDBC Federation secret names can be prefixed with "AthenaJdbcFederation" and authorization policy will allow "arn:${AWS::Partition}:secretsmanager:${AWS::Region}:${AWS::AccountId}:secret:AthenaJdbcFederation*". Parameter value in this case should be "AthenaJdbcFederation". If you do not have a prefix, you can manually update the IAM policy to add allow any secret names.' + Type: String + SpillBucket: + Description: 'The name of the bucket where this function can spill data.' + Type: String + GlueConnection: + Description: "Name of glue connection storing connection details for Federated Data source." + Type: String + LambdaRoleArn: + Description: "(Optional) A custom role to be used by the Connector lambda" + Type: String + Default: "" + SecurityGroupIds: + Description: '(Optional) One or more SecurityGroup IDs corresponding to the SecurityGroup that should be applied to the Lambda function. (e.g. sg1,sg2,sg3)' + Type: CommaDelimitedList + Default: "" + SubnetIds: + Description: '(Optional) One or more Subnet IDs corresponding to the Subnet that the Lambda function can use to access you data source. (e.g. subnet1,subnet2)' + Type: CommaDelimitedList + Default: "" + KmsKeyId: + Description: "(Optional) By default any data that is spilled to S3 is encrypted using AES-GCM and a randomly generated key. Setting a KMS Key ID allows your Lambda function to use KMS for key generation for a stronger source of encryption keys." + Type: String + Default: "" +Conditions: + HasSecurityGroups: !Not [ !Equals [ !Join [ "", !Ref SecurityGroupIds ], "" ] ] + HasSubnets: !Not [ !Equals [ !Join [ "", !Ref SubnetIds ], "" ] ] + HasKmsKeyId: !Not [ !Equals [ !Ref KmsKeyId, "" ] ] + NotHasLambdaRole: !Equals [ !Ref LambdaRoleArn, "" ] + CreateKmsPolicy: !And [ !Condition HasKmsKeyId, !Condition NotHasLambdaRole ] + IsRegionBAH: !Equals [!Ref "AWS::Region", "me-south-1"] + IsRegionHKG: !Equals [!Ref "AWS::Region", "ap-east-1"] +Resources: + JdbcConnectorConfig: + Type: 'AWS::Serverless::Function' + Properties: + Environment: + Variables: + glue_connection: !Ref GlueConnection + FunctionName: !Ref LambdaFunctionName + PackageType: "Image" + ImageUri: !Sub + - '${Account}.dkr.ecr.${AWS::Region}.amazonaws.com/athena-federation-repository-sqlserver:2022.47.1' + - Account: !If [IsRegionBAH, 084828588479, !If [IsRegionHKG, 183295418215, 292517598671]] + ImageConfig: + Command: [ "com.amazonaws.athena.connectors.sqlserver.SqlServerCompositeHandler" ] + Description: "Enables Amazon Athena to communicate with SQLSERVER using JDBC" + Timeout: 900 + MemorySize: 3008 + Role: !If [NotHasLambdaRole, !GetAtt FunctionRole.Arn, !Ref LambdaRoleArn] + VpcConfig: + SecurityGroupIds: !If [ HasSecurityGroups, !Ref SecurityGroupIds, !Ref "AWS::NoValue" ] + SubnetIds: !If [ HasSubnets, !Ref SubnetIds, !Ref "AWS::NoValue" ] + FunctionRole: + Condition: NotHasLambdaRole + Type: AWS::IAM::Role + Properties: + ManagedPolicyArns: + - "arn:aws:iam::aws:policy/service-role/AWSLambdaVPCAccessExecutionRole" + AssumeRolePolicyDocument: + Version: 2012-10-17 + Statement: + - Effect: Allow + Principal: + Service: + - lambda.amazonaws.com + Action: + - "sts:AssumeRole" + FunctionExecutionPolicy: + Condition: NotHasLambdaRole + Type: "AWS::IAM::Policy" + Properties: + Roles: + - !Ref FunctionRole + PolicyName: FunctionExecutionPolicy + PolicyDocument: + Version: 2012-10-17 + Statement: + - Action: + - secretsmanager:GetSecretValue + Effect: Allow + Resource: !Sub 'arn:${AWS::Partition}:secretsmanager:${AWS::Region}:${AWS::AccountId}:secret:${SecretName}*' + - Action: + - logs:CreateLogGroup + Effect: Allow + Resource: !Sub 'arn:${AWS::Partition}:logs:${AWS::Region}:${AWS::AccountId}:*' + - Action: + - logs:CreateLogStream + - logs:PutLogEvents + Effect: Allow + Resource: !Sub 'arn:${AWS::Partition}:logs:${AWS::Region}:${AWS::AccountId}:log-group:/aws/lambda/${LambdaFunctionName}:*' + - Action: + - athena:GetQueryExecution + Effect: Allow + Resource: '*' + - Action: + - ec2:CreateNetworkInterface + - ec2:DeleteNetworkInterface + - ec2:DescribeNetworkInterfaces + - ec2:DetachNetworkInterface + Effect: Allow + Resource: '*' + - Action: + - s3:GetObject + - s3:ListBucket + - s3:GetBucketLocation + - s3:GetObjectVersion + - s3:PutObject + - s3:PutObjectAcl + - s3:GetLifecycleConfiguration + - s3:PutLifecycleConfiguration + - s3:DeleteObject + Effect: Allow + Resource: + - Fn::Sub: + - arn:${AWS::Partition}:s3:::${bucketName} + - bucketName: + Ref: SpillBucket + - Fn::Sub: + - arn:${AWS::Partition}:s3:::${bucketName}/* + - bucketName: + Ref: SpillBucket + - Action: + - glue:GetConnection + Effect: Allow + Resource: + - !Sub 'arn:${AWS::Partition}:glue:${AWS::Region}:${AWS::AccountId}:connection/${GlueConnection}' + - !Sub 'arn:${AWS::Partition}:glue:${AWS::Region}:${AWS::AccountId}:catalog' + + FunctionKmsPolicy: + Condition: CreateKmsPolicy + Type: "AWS::IAM::Policy" + Properties: + PolicyName: FunctionKmsPolicy + PolicyDocument: + Version: 2012-10-17 + Statement: + - Effect: Allow + Action: + - kms:GenerateRandom + Resource: '*' + - Effect: Allow + Action: + - kms:GenerateDataKey + Resource: !Sub "arn:aws:kms:${AWS::Region}:${AWS::AccountId}:key/${KmsKeyId}" + Roles: + - !Ref FunctionRole diff --git a/athena-sqlserver/athena-sqlserver.yaml b/athena-sqlserver/athena-sqlserver.yaml index 8edbf4e082..4aa9804b16 100644 --- a/athena-sqlserver/athena-sqlserver.yaml +++ b/athena-sqlserver/athena-sqlserver.yaml @@ -82,6 +82,8 @@ Resources: ImageUri: !Sub - '${Account}.dkr.ecr.${AWS::Region}.amazonaws.com/athena-federation-repository-sqlserver:2022.47.1' - Account: !If [IsRegionBAH, 084828588479, !If [IsRegionHKG, 183295418215, 292517598671]] + ImageConfig: + Command: [ "com.amazonaws.athena.connectors.sqlserver.SqlServerMuxCompositeHandler" ] Description: "Enables Amazon Athena to communicate with SQLSERVER using JDBC" Timeout: !Ref LambdaTimeout MemorySize: !Ref LambdaMemory @@ -104,7 +106,7 @@ Resources: Service: - lambda.amazonaws.com Action: - - "sts:AssumeRole" + - "sts:AssumeRole" FunctionExecutionPolicy: Condition: NotHasLambdaRole Type: "AWS::IAM::Policy" @@ -124,38 +126,38 @@ Resources: Effect: Allow Resource: !Sub 'arn:${AWS::Partition}:logs:${AWS::Region}:${AWS::AccountId}:*' - Action: - - logs:CreateLogStream - - logs:PutLogEvents + - logs:CreateLogStream + - logs:PutLogEvents Effect: Allow Resource: !Sub 'arn:${AWS::Partition}:logs:${AWS::Region}:${AWS::AccountId}:log-group:/aws/lambda/${LambdaFunctionName}:*' - Action: - - athena:GetQueryExecution + - athena:GetQueryExecution Effect: Allow Resource: '*' - Action: - - ec2:CreateNetworkInterface - - ec2:DeleteNetworkInterface - - ec2:DescribeNetworkInterfaces - - ec2:DetachNetworkInterface + - ec2:CreateNetworkInterface + - ec2:DeleteNetworkInterface + - ec2:DescribeNetworkInterfaces + - ec2:DetachNetworkInterface Effect: Allow Resource: '*' - Action: - - s3:GetObject - - s3:ListBucket - - s3:GetBucketLocation - - s3:GetObjectVersion - - s3:PutObject - - s3:PutObjectAcl - - s3:GetLifecycleConfiguration - - s3:PutLifecycleConfiguration - - s3:DeleteObject + - s3:GetObject + - s3:ListBucket + - s3:GetBucketLocation + - s3:GetObjectVersion + - s3:PutObject + - s3:PutObjectAcl + - s3:GetLifecycleConfiguration + - s3:PutLifecycleConfiguration + - s3:DeleteObject Effect: Allow Resource: - Fn::Sub: - - arn:${AWS::Partition}:s3:::${bucketName} - - bucketName: - Ref: SpillBucket + - arn:${AWS::Partition}:s3:::${bucketName} + - bucketName: + Ref: SpillBucket - Fn::Sub: - - arn:${AWS::Partition}:s3:::${bucketName}/* - - bucketName: - Ref: SpillBucket + - arn:${AWS::Partition}:s3:::${bucketName}/* + - bucketName: + Ref: SpillBucket \ No newline at end of file diff --git a/athena-sqlserver/src/main/java/com/amazonaws/athena/connectors/sqlserver/SqlServerCompositeHandler.java b/athena-sqlserver/src/main/java/com/amazonaws/athena/connectors/sqlserver/SqlServerCompositeHandler.java index bc4b1b9077..b56a72f774 100644 --- a/athena-sqlserver/src/main/java/com/amazonaws/athena/connectors/sqlserver/SqlServerCompositeHandler.java +++ b/athena-sqlserver/src/main/java/com/amazonaws/athena/connectors/sqlserver/SqlServerCompositeHandler.java @@ -25,6 +25,6 @@ public class SqlServerCompositeHandler extends CompositeHandler { public SqlServerCompositeHandler() { - super(new SqlServerMetadataHandler(System.getenv()), new SqlServerRecordHandler(System.getenv())); + super(new SqlServerMetadataHandler(new SqlServerEnvironmentProperties().createEnvironment()), new SqlServerRecordHandler(new SqlServerEnvironmentProperties().createEnvironment())); } } diff --git a/athena-sqlserver/src/main/java/com/amazonaws/athena/connectors/sqlserver/SqlServerEnvironmentProperties.java b/athena-sqlserver/src/main/java/com/amazonaws/athena/connectors/sqlserver/SqlServerEnvironmentProperties.java new file mode 100644 index 0000000000..d7672db050 --- /dev/null +++ b/athena-sqlserver/src/main/java/com/amazonaws/athena/connectors/sqlserver/SqlServerEnvironmentProperties.java @@ -0,0 +1,53 @@ +/*- + * #%L + * Amazon Athena Query Federation SDK + * %% + * Copyright (C) 2019 - 2024 Amazon Web Services + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ +package com.amazonaws.athena.connectors.sqlserver; + +import com.amazonaws.athena.connectors.jdbc.JdbcEnvironmentProperties; + +import java.util.Map; + +import static com.amazonaws.athena.connector.lambda.connection.EnvironmentConstants.DATABASE; + +public class SqlServerEnvironmentProperties extends JdbcEnvironmentProperties +{ + @Override + protected String getConnectionStringPrefix(Map connectionProperties) + { + return "sqlserver://jdbc:sqlserver://"; + } + + @Override + protected String getDatabase(Map connectionProperties) + { + return ";databaseName=" + connectionProperties.get(DATABASE); + } + + @Override + protected String getJdbcParametersSeparator() + { + return ";"; + } + + @Override + protected String getDelimiter() + { + return ";"; + } +} diff --git a/athena-synapse/Dockerfile b/athena-synapse/Dockerfile index 2a7a05ec98..ce59270ed0 100644 --- a/athena-synapse/Dockerfile +++ b/athena-synapse/Dockerfile @@ -5,5 +5,5 @@ COPY target/athena-synapse-2022.47.1.jar ${LAMBDA_TASK_ROOT} # Unpack the jar RUN jar xf athena-synapse-2022.47.1.jar -# Set the CMD to your handler (could also be done as a parameter override outside of the Dockerfile) -CMD [ "com.amazonaws.athena.connectors.synapse.SynapseMuxCompositeHandler" ] \ No newline at end of file +# Command can be overwritten by providing a different command in the template directly. +# No need to specify here (already defined in .yaml file because legacy and connections use different) diff --git a/athena-synapse/athena-synapse-connection.yaml b/athena-synapse/athena-synapse-connection.yaml new file mode 100644 index 0000000000..56645fd176 --- /dev/null +++ b/athena-synapse/athena-synapse-connection.yaml @@ -0,0 +1,174 @@ +Transform: 'AWS::Serverless-2016-10-31' +Metadata: + 'AWS::ServerlessRepo::Application': + Name: AthenaSynapseConnectorWithGlueConnection + Description: 'This connector enables Amazon Athena to communicate with Synapse using JDBC driver.' + Author: 'default author' + SpdxLicenseId: Apache-2.0 + LicenseUrl: LICENSE.txt + ReadmeUrl: README.md + Labels: + - synapse + - athena-federation + - jdbc + HomePageUrl: 'https://github.com/awslabs/aws-athena-query-federation' + SemanticVersion: 2022.47.1 + SourceCodeUrl: 'https://github.com/awslabs/aws-athena-query-federation' +Parameters: + LambdaFunctionName: + Description: 'This is the name of the lambda function that will be created. This name must satisfy the pattern ^[a-z0-9-_]{1,64}$' + Type: String + AllowedPattern: ^[a-z0-9-_]{1,64}$ + SecretName: + Description: 'Used to create resource-based authorization policy for "secretsmanager:GetSecretValue" action. E.g. All Athena JDBC Federation secret names can be prefixed with "AthenaJdbcFederation" and authorization policy will allow "arn:${AWS::Partition}:secretsmanager:${AWS::Region}:${AWS::AccountId}:secret:AthenaJdbcFederation*". Parameter value in this case should be "AthenaJdbcFederation". If you do not have a prefix, you can manually update the IAM policy to add allow any secret names.' + Type: String + SpillBucket: + Description: 'The name of the bucket where this function can spill data.' + Type: String + GlueConnection: + Description: "Name of glue connection storing connection details for Federated Data source." + Type: String + LambdaRoleArn: + Description: "(Optional) A custom role to be used by the Connector lambda" + Default: "" + Type: String + SecurityGroupIds: + Description: '(Optional) One or more SecurityGroup IDs corresponding to the SecurityGroup that should be applied to the Lambda function. (e.g. sg1,sg2,sg3)' + Type: CommaDelimitedList + Default: "" + SubnetIds: + Description: '(Optional) One or more Subnet IDs corresponding to the Subnet that the Lambda function can use to access you data source. (e.g. subnet1,subnet2)' + Type: CommaDelimitedList + Default: "" + KmsKeyId: + Description: "(Optional) By default any data that is spilled to S3 is encrypted using AES-GCM and a randomly generated key. Setting a KMS Key ID allows your Lambda function to use KMS for key generation for a stronger source of encryption keys." + Type: String + Default: "" + +Conditions: + HasSecurityGroups: !Not [ !Equals [ !Join ["", !Ref SecurityGroupIds], "" ] ] + HasSubnets: !Not [ !Equals [ !Join ["", !Ref SubnetIds], "" ] ] + HasKmsKeyId: !Not [ !Equals [ !Ref KmsKeyId, "" ] ] + NotHasLambdaRole: !Equals [ !Ref LambdaRoleArn, "" ] + CreateKmsPolicy: !And [ !Condition HasKmsKeyId, !Condition NotHasLambdaRole ] + IsRegionBAH: !Equals [!Ref "AWS::Region", "me-south-1"] + IsRegionHKG: !Equals [!Ref "AWS::Region", "ap-east-1"] + +Resources: + JdbcConnectorConfig: + Type: 'AWS::Serverless::Function' + Properties: + Environment: + Variables: + glue_connection: !Ref GlueConnection + FunctionName: !Ref LambdaFunctionName + PackageType: "Image" + ImageUri: !Sub + - '${Account}.dkr.ecr.${AWS::Region}.amazonaws.com/athena-federation-repository-synapse:2022.47.1' + - Account: !If [IsRegionBAH, 084828588479, !If [IsRegionHKG, 183295418215, 292517598671]] + ImageConfig: + Command: [ "com.amazonaws.athena.connectors.synapse.SynapseCompositeHandler" ] + Description: "Enables Amazon Athena to communicate with SYNPASE using JDBC" + Timeout: 900 + MemorySize: 3008 + Role: !If [NotHasLambdaRole, !GetAtt FunctionRole.Arn, !Ref LambdaRoleArn] + VpcConfig: + SecurityGroupIds: !If [ HasSecurityGroups, !Ref SecurityGroupIds, !Ref "AWS::NoValue" ] + SubnetIds: !If [ HasSubnets, !Ref SubnetIds, !Ref "AWS::NoValue" ] + + FunctionRole: + Condition: NotHasLambdaRole + Type: AWS::IAM::Role + Properties: + ManagedPolicyArns: + - "arn:aws:iam::aws:policy/service-role/AWSLambdaVPCAccessExecutionRole" + AssumeRolePolicyDocument: + Version: 2012-10-17 + Statement: + - Effect: Allow + Principal: + Service: + - lambda.amazonaws.com + Action: + - "sts:AssumeRole" + + FunctionExecutionPolicy: + Condition: NotHasLambdaRole + Type: "AWS::IAM::Policy" + Properties: + PolicyName: FunctionExecutionPolicy + PolicyDocument: + Version: 2012-10-17 + Statement: + - Action: + - secretsmanager:GetSecretValue + Effect: Allow + Resource: !Sub 'arn:${AWS::Partition}:secretsmanager:${AWS::Region}:${AWS::AccountId}:secret:${SecretName}*' + - Action: + - logs:CreateLogGroup + Effect: Allow + Resource: !Sub 'arn:${AWS::Partition}:logs:${AWS::Region}:${AWS::AccountId}:*' + - Action: + - logs:CreateLogStream + - logs:PutLogEvents + Effect: Allow + Resource: !Sub 'arn:${AWS::Partition}:logs:${AWS::Region}:${AWS::AccountId}:log-group:/aws/lambda/${LambdaFunctionName}:*' + - Action: + - athena:GetQueryExecution + Effect: Allow + Resource: '*' + - Action: + - ec2:CreateNetworkInterface + - ec2:DeleteNetworkInterface + - ec2:DescribeNetworkInterfaces + - ec2:DetachNetworkInterface + - autoscaling:CompleteLifecycleAction + Effect: Allow + Resource: '*' + - Action: + - s3:GetObject + - s3:ListBucket + - s3:GetBucketLocation + - s3:GetObjectVersion + - s3:PutObject + - s3:PutObjectAcl + - s3:GetLifecycleConfiguration + - s3:PutLifecycleConfiguration + - s3:DeleteObject + Effect: Allow + Resource: + - Fn::Sub: + - arn:${AWS::Partition}:s3:::${bucketName} + - bucketName: + Ref: SpillBucket + - Fn::Sub: + - arn:${AWS::Partition}:s3:::${bucketName}/* + - bucketName: + Ref: SpillBucket + - Action: + - glue:GetConnection + Effect: Allow + Resource: + - !Sub 'arn:${AWS::Partition}:glue:${AWS::Region}:${AWS::AccountId}:connection/${GlueConnection}' + - !Sub 'arn:${AWS::Partition}:glue:${AWS::Region}:${AWS::AccountId}:catalog' + Roles: + - !Ref FunctionRole + + FunctionKmsPolicy: + Condition: CreateKmsPolicy + Type: "AWS::IAM::Policy" + Properties: + PolicyName: FunctionKmsPolicy + PolicyDocument: + Version: 2012-10-17 + Statement: + - Effect: Allow + Action: + - kms:GenerateRandom + Resource: '*' + - Effect: Allow + Action: + - kms:GenerateDataKey + Resource: !Sub "arn:aws:kms:${AWS::Region}:${AWS::AccountId}:key/${KmsKeyId}" + Roles: + - !Ref FunctionRole diff --git a/athena-synapse/athena-synapse.yaml b/athena-synapse/athena-synapse.yaml index 50f1092c7c..1c524545f8 100644 --- a/athena-synapse/athena-synapse.yaml +++ b/athena-synapse/athena-synapse.yaml @@ -83,6 +83,8 @@ Resources: ImageUri: !Sub - '${Account}.dkr.ecr.${AWS::Region}.amazonaws.com/athena-federation-repository-synapse:2022.47.1' - Account: !If [IsRegionBAH, 084828588479, !If [IsRegionHKG, 183295418215, 292517598671]] + ImageConfig: + Command: [ "com.amazonaws.athena.connectors.synapse.SynapseMuxCompositeHandler" ] Description: "Enables Amazon Athena to communicate with SYNPASE using JDBC" Timeout: !Ref LambdaTimeout MemorySize: !Ref LambdaMemory @@ -165,4 +167,4 @@ Resources: - bucketName: Ref: SpillBucket Roles: - - !Ref FunctionRole + - !Ref FunctionRole \ No newline at end of file diff --git a/athena-synapse/pom.xml b/athena-synapse/pom.xml index bb40a5cad7..5dd871603b 100644 --- a/athena-synapse/pom.xml +++ b/athena-synapse/pom.xml @@ -35,7 +35,7 @@ com.microsoft.azure msal4j - 1.17.2 + 1.17.3 com.fasterxml.jackson.datatype diff --git a/athena-synapse/src/main/java/com/amazonaws/athena/connectors/synapse/SynapseCompositeHandler.java b/athena-synapse/src/main/java/com/amazonaws/athena/connectors/synapse/SynapseCompositeHandler.java index e4499930f3..ccf952b241 100644 --- a/athena-synapse/src/main/java/com/amazonaws/athena/connectors/synapse/SynapseCompositeHandler.java +++ b/athena-synapse/src/main/java/com/amazonaws/athena/connectors/synapse/SynapseCompositeHandler.java @@ -25,6 +25,6 @@ public class SynapseCompositeHandler extends CompositeHandler { public SynapseCompositeHandler() { - super(new SynapseMetadataHandler(System.getenv()), new SynapseRecordHandler(System.getenv())); + super(new SynapseMetadataHandler(new SynapseEnvironmentProperties().createEnvironment()), new SynapseRecordHandler(new SynapseEnvironmentProperties().createEnvironment())); } } diff --git a/athena-synapse/src/main/java/com/amazonaws/athena/connectors/synapse/SynapseEnvironmentProperties.java b/athena-synapse/src/main/java/com/amazonaws/athena/connectors/synapse/SynapseEnvironmentProperties.java new file mode 100644 index 0000000000..a9d146e311 --- /dev/null +++ b/athena-synapse/src/main/java/com/amazonaws/athena/connectors/synapse/SynapseEnvironmentProperties.java @@ -0,0 +1,53 @@ +/*- + * #%L + * Amazon Athena Query Federation SDK + * %% + * Copyright (C) 2019 - 2024 Amazon Web Services + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ +package com.amazonaws.athena.connectors.synapse; + +import com.amazonaws.athena.connectors.jdbc.JdbcEnvironmentProperties; + +import java.util.Map; + +import static com.amazonaws.athena.connector.lambda.connection.EnvironmentConstants.DATABASE; + +public class SynapseEnvironmentProperties extends JdbcEnvironmentProperties +{ + @Override + protected String getConnectionStringPrefix(Map connectionProperties) + { + return "synapse://jdbc:sqlserver://"; + } + + @Override + protected String getDatabase(Map connectionProperties) + { + return ";databaseName=" + connectionProperties.get(DATABASE); + } + + @Override + protected String getJdbcParametersSeparator() + { + return ";"; + } + + @Override + protected String getDelimiter() + { + return ";"; + } +} diff --git a/athena-teradata/Dockerfile b/athena-teradata/Dockerfile index 8f58411065..996da2013f 100644 --- a/athena-teradata/Dockerfile +++ b/athena-teradata/Dockerfile @@ -5,5 +5,5 @@ COPY target/athena-teradata-2022.47.1.jar ${LAMBDA_TASK_ROOT} # Unpack the jar RUN jar xf athena-teradata-2022.47.1.jar -# Set the CMD to your handler (could also be done as a parameter override outside of the Dockerfile) -CMD [ "com.amazonaws.athena.connectors.teradata.TeradataMuxCompositeHandler" ] \ No newline at end of file +# Command can be overwritten by providing a different command in the template directly. +# No need to specify here (already defined in .yaml file because legacy and connections use different) diff --git a/athena-teradata/athena-teradata-connection.yaml b/athena-teradata/athena-teradata-connection.yaml new file mode 100644 index 0000000000..dcdc786e9c --- /dev/null +++ b/athena-teradata/athena-teradata-connection.yaml @@ -0,0 +1,169 @@ +Transform: 'AWS::Serverless-2016-10-31' +Metadata: + 'AWS::ServerlessRepo::Application': + Name: AthenaTeradataConnectorWithGlueConnection + Description: 'This connector enables Amazon Athena to communicate with your Teradata instance(s) using JDBC driver.' + Author: 'default author' + SpdxLicenseId: Apache-2.0 + LicenseUrl: LICENSE.txt + ReadmeUrl: README.md + Labels: + - athena-federation + HomePageUrl: 'https://github.com/awslabs/aws-athena-query-federation' + SemanticVersion: 2022.47.1 + SourceCodeUrl: 'https://github.com/awslabs/aws-athena-query-federation' +Parameters: + LambdaFunctionName: + Description: 'This is the name of the lambda function that will be created. This name must satisfy the pattern ^[a-z0-9-_]{1,64}$' + Type: String + AllowedPattern: ^[a-z0-9-_]{1,64}$ + SecretName: + Description: 'Used to create resource-based authorization policy for "secretsmanager:GetSecretValue" action. E.g. All Athena JDBC Federation secret names can be prefixed with "AthenaJdbcFederation" and authorization policy will allow "arn:${AWS::Partition}:secretsmanager:${AWS::Region}:${AWS::AccountId}:secret:AthenaJdbcFederation*". Parameter value in this case should be "AthenaJdbcFederation". If you do not have a prefix, you can manually update the IAM policy to add allow any secret names.' + Type: String + SpillBucket: + Description: 'The name of the bucket where this function can spill data.' + Type: String + GlueConnection: + Description: "Name of glue connection storing connection details for Federated Data source." + Type: String + SecurityGroupIds: + Description: '(Optional) One or more SecurityGroup IDs corresponding to the SecurityGroup that should be applied to the Lambda function. (e.g. sg1,sg2,sg3)' + Type: CommaDelimitedList + Default: "" + SubnetIds: + Description: '(Optional) One or more Subnet IDs corresponding to the Subnet that the Lambda function can use to access you data source. (e.g. subnet1,subnet2)' + Type: CommaDelimitedList + Default: "" + KmsKeyId: + Description: "(Optional) By default any data that is spilled to S3 is encrypted using AES-GCM and a randomly generated key. Setting a KMS Key ID allows your Lambda function to use KMS for key generation for a stronger source of encryption keys." + Type: String + Default: "" + LambdaRoleArn: + Description: "(Optional) A custom role to be used by the Connector lambda" + Type: String + Default: "" +Conditions: + HasSecurityGroups: !Not [ !Equals [ !Join ["", !Ref SecurityGroupIds], "" ] ] + HasSubnets: !Not [ !Equals [ !Join ["", !Ref SubnetIds], "" ] ] + HasKmsKeyId: !Not [ !Equals [ !Ref KmsKeyId, "" ] ] + NotHasLambdaRole: !Equals [ !Ref LambdaRoleArn, "" ] + CreateKmsPolicy: !And [ !Condition HasKmsKeyId, !Condition NotHasLambdaRole ] + IsRegionBAH: !Equals [!Ref "AWS::Region", "me-south-1"] + IsRegionHKG: !Equals [!Ref "AWS::Region", "ap-east-1"] +Resources: + JdbcConnectorConfig: + Type: 'AWS::Serverless::Function' + Properties: + Environment: + Variables: + glue_connection: !Ref GlueConnection + FunctionName: !Ref LambdaFunctionName + PackageType: "Image" + ImageUri: !Sub + - '${Account}.dkr.ecr.${AWS::Region}.amazonaws.com/athena-federation-repository-teradata:2022.47.1' + - Account: !If [IsRegionBAH, 084828588479, !If [IsRegionHKG, 183295418215, 292517598671]] + ImageConfig: + Command: [ "com.amazonaws.athena.connectors.teradata.TeradataCompositeHandler" ] + Description: "Enables Amazon Athena to communicate with Teradata using JDBC" + Timeout: 900 + MemorySize: 3008 + Role: !If [ NotHasLambdaRole, !GetAtt FunctionRole.Arn, !Ref LambdaRoleArn ] + VpcConfig: + SecurityGroupIds: !If [ HasSecurityGroups, !Ref SecurityGroupIds, !Ref "AWS::NoValue" ] + SubnetIds: !If [ HasSubnets, !Ref SubnetIds, !Ref "AWS::NoValue" ] + + FunctionRole: + Condition: NotHasLambdaRole + Type: AWS::IAM::Role + Properties: + ManagedPolicyArns: + - "arn:aws:iam::aws:policy/service-role/AWSLambdaVPCAccessExecutionRole" + AssumeRolePolicyDocument: + Version: 2012-10-17 + Statement: + - Effect: Allow + Principal: + Service: + - lambda.amazonaws.com + Action: + - "sts:AssumeRole" + + FunctionExecutionPolicy: + Condition: NotHasLambdaRole + Type: "AWS::IAM::Policy" + Properties: + Roles: + - !Ref FunctionRole + PolicyName: FunctionExecutionPolicy + PolicyDocument: + Version: 2012-10-17 + Statement: + - Action: + - secretsmanager:GetSecretValue + Effect: Allow + Resource: !Sub 'arn:${AWS::Partition}:secretsmanager:${AWS::Region}:${AWS::AccountId}:secret:${SecretName}*' + - Action: + - logs:CreateLogGroup + Effect: Allow + Resource: !Sub 'arn:${AWS::Partition}:logs:${AWS::Region}:${AWS::AccountId}:*' + - Action: + - logs:CreateLogStream + - logs:PutLogEvents + Effect: Allow + Resource: !Sub 'arn:${AWS::Partition}:logs:${AWS::Region}:${AWS::AccountId}:log-group:/aws/lambda/${LambdaFunctionName}:*' + - Action: + - athena:GetQueryExecution + Effect: Allow + Resource: '*' + - Action: + - ec2:CreateNetworkInterface + - ec2:DeleteNetworkInterface + - ec2:DescribeNetworkInterfaces + - ec2:DetachNetworkInterface + Effect: Allow + Resource: '*' + - Action: + - s3:GetObject + - s3:ListBucket + - s3:GetBucketLocation + - s3:GetObjectVersion + - s3:PutObject + - s3:PutObjectAcl + - s3:GetLifecycleConfiguration + - s3:PutLifecycleConfiguration + - s3:DeleteObject + Effect: Allow + Resource: + - Fn::Sub: + - arn:${AWS::Partition}:s3:::${bucketName} + - bucketName: + Ref: SpillBucket + - Fn::Sub: + - arn:${AWS::Partition}:s3:::${bucketName}/* + - bucketName: + Ref: SpillBucket + - Action: + - glue:GetConnection + Effect: Allow + Resource: + - !Sub 'arn:${AWS::Partition}:glue:${AWS::Region}:${AWS::AccountId}:connection/${GlueConnection}' + - !Sub 'arn:${AWS::Partition}:glue:${AWS::Region}:${AWS::AccountId}:catalog' + + FunctionKmsPolicy: + Condition: CreateKmsPolicy + Type: "AWS::IAM::Policy" + Properties: + PolicyName: FunctionKmsPolicy + PolicyDocument: + Version: 2012-10-17 + Statement: + - Effect: Allow + Action: + - kms:GenerateRandom + Resource: '*' + - Effect: Allow + Action: + - kms:GenerateDataKey + Resource: !Sub "arn:aws:kms:${AWS::Region}:${AWS::AccountId}:key/${KmsKeyId}" + Roles: + - !Ref FunctionRole \ No newline at end of file diff --git a/athena-teradata/athena-teradata.yaml b/athena-teradata/athena-teradata.yaml index 14177fa635..987201837b 100644 --- a/athena-teradata/athena-teradata.yaml +++ b/athena-teradata/athena-teradata.yaml @@ -80,6 +80,8 @@ Resources: ImageUri: !Sub - '${Account}.dkr.ecr.${AWS::Region}.amazonaws.com/athena-federation-repository-teradata:2022.47.1' - Account: !If [IsRegionBAH, 084828588479, !If [IsRegionHKG, 183295418215, 292517598671]] + ImageConfig: + Command: [ "com.amazonaws.athena.connectors.teradata.TeradataMuxCompositeHandler" ] Description: "Enables Amazon Athena to communicate with Teradata using JDBC" Timeout: !Ref LambdaTimeout MemorySize: !Ref LambdaMemory diff --git a/athena-teradata/pom.xml b/athena-teradata/pom.xml index c44fd9da86..05ccc6025a 100644 --- a/athena-teradata/pom.xml +++ b/athena-teradata/pom.xml @@ -62,7 +62,7 @@ com.teradata.jdbc terajdbc - 20.00.00.34 + 20.00.00.38 @@ -86,7 +86,7 @@ - + diff --git a/athena-teradata/src/main/java/com/amazonaws/athena/connectors/teradata/TeradataCompositeHandler.java b/athena-teradata/src/main/java/com/amazonaws/athena/connectors/teradata/TeradataCompositeHandler.java index 8aec45fc78..0990873960 100644 --- a/athena-teradata/src/main/java/com/amazonaws/athena/connectors/teradata/TeradataCompositeHandler.java +++ b/athena-teradata/src/main/java/com/amazonaws/athena/connectors/teradata/TeradataCompositeHandler.java @@ -20,6 +20,7 @@ */ package com.amazonaws.athena.connectors.teradata; + import com.amazonaws.athena.connector.lambda.handlers.CompositeHandler; /** @@ -33,6 +34,6 @@ public class TeradataCompositeHandler { public TeradataCompositeHandler() { - super(new TeradataMetadataHandler(System.getenv()), new TeradataRecordHandler(System.getenv())); + super(new TeradataMetadataHandler(new TeradataEnvironmentProperties().createEnvironment()), new TeradataRecordHandler(new TeradataEnvironmentProperties().createEnvironment())); } } diff --git a/athena-teradata/src/main/java/com/amazonaws/athena/connectors/teradata/TeradataEnvironmentProperties.java b/athena-teradata/src/main/java/com/amazonaws/athena/connectors/teradata/TeradataEnvironmentProperties.java new file mode 100644 index 0000000000..0b41bbcb0f --- /dev/null +++ b/athena-teradata/src/main/java/com/amazonaws/athena/connectors/teradata/TeradataEnvironmentProperties.java @@ -0,0 +1,47 @@ +/*- + * #%L + * Amazon Athena Query Federation SDK + * %% + * Copyright (C) 2019 - 2024 Amazon Web Services + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ +package com.amazonaws.athena.connectors.teradata; + +import com.amazonaws.athena.connectors.jdbc.JdbcEnvironmentProperties; + +import java.util.Map; + +import static com.amazonaws.athena.connector.lambda.connection.EnvironmentConstants.DATABASE; + +public class TeradataEnvironmentProperties extends JdbcEnvironmentProperties +{ + @Override + protected String getConnectionStringPrefix(Map connectionProperties) + { + return "teradata://jdbc:teradata://"; + } + + @Override + protected String getDatabase(Map connectionProperties) + { + return "/TMODE=ANSI,CHARSET=UTF8,DATABASE=" + connectionProperties.get(DATABASE); + } + + @Override + protected String getJdbcParametersSeparator() + { + return ","; + } +} diff --git a/athena-teradata/src/main/java/com/amazonaws/athena/connectors/teradata/TeradataMetadataHandler.java b/athena-teradata/src/main/java/com/amazonaws/athena/connectors/teradata/TeradataMetadataHandler.java index d230e8dcbf..dbbf79c1a5 100644 --- a/athena-teradata/src/main/java/com/amazonaws/athena/connectors/teradata/TeradataMetadataHandler.java +++ b/athena-teradata/src/main/java/com/amazonaws/athena/connectors/teradata/TeradataMetadataHandler.java @@ -244,7 +244,7 @@ private boolean useNonPartitionApproach(GetTableLayoutRequest getTableLayoutRequ { final String getPartitionsCountQuery = "Select count(distinct partition ) as partition_count FROM " + getTableLayoutRequest.getTableName().getSchemaName() + "." + getTableLayoutRequest.getTableName().getTableName() + " where 1= ?"; - String partitioncount = configOptions.get("partitioncount"); + String partitioncount = configOptions.containsKey("partition_count") ? configOptions.get("partition_count") : configOptions.getOrDefault("partitioncount", "500"); int totalPartitionCount = Integer.parseInt(partitioncount); int partitionCount = 0; boolean nonPartitionApproach = false; diff --git a/athena-timestream/athena-timestream-connection.yaml b/athena-timestream/athena-timestream-connection.yaml new file mode 100644 index 0000000000..288cbfea47 --- /dev/null +++ b/athena-timestream/athena-timestream-connection.yaml @@ -0,0 +1,142 @@ +Transform: 'AWS::Serverless-2016-10-31' +Metadata: + 'AWS::ServerlessRepo::Application': + Name: AthenaTimestreamConnectorWithGlueConnection + Description: 'This connector enables Amazon Athena to communicate with Amazon Timestream, making your time series data accessible from Athena.' + Author: 'default author' + SpdxLicenseId: Apache-2.0 + LicenseUrl: LICENSE.txt + ReadmeUrl: README.md + Labels: + - athena-federation + HomePageUrl: 'https://github.com/awslabs/aws-athena-query-federation' + SemanticVersion: 2022.47.1 + SourceCodeUrl: 'https://github.com/awslabs/aws-athena-query-federation' +Parameters: + LambdaFunctionName: + Description: 'This is the name of the lambda function that will be created. This name must satisfy the pattern ^[a-z0-9-_]{1,64}$' + Type: String + AllowedPattern: ^[a-z0-9-_]{1,64}$ + SpillBucket: + Description: 'The name of the bucket where this function can spill data.' + Type: String + GlueConnection: + Description: "Name of glue connection storing connection details for Federated Data source." + Type: String + KmsKeyId: + Description: "(Optional) By default any data that is spilled to S3 is encrypted using AES-GCM and a randomly generated key. Setting a KMS Key ID allows your Lambda function to use KMS for key generation for a stronger source of encryption keys." + Type: String + Default: "" + LambdaRoleArn: + Description: "(Optional) A custom role to be used by the Connector lambda" + Type: String + Default: "" +Conditions: + HasKmsKeyId: !Not [ !Equals [ !Ref KmsKeyId, "" ] ] + NotHasLambdaRole: !Equals [ !Ref LambdaRoleArn, "" ] + CreateKmsPolicy: !And [ !Condition HasKmsKeyId, !Condition NotHasLambdaRole ] + IsRegionBAH: !Equals [!Ref "AWS::Region", "me-south-1"] + IsRegionHKG: !Equals [!Ref "AWS::Region", "ap-east-1"] +Resources: + ConnectorConfig: + Type: 'AWS::Serverless::Function' + Properties: + Environment: + Variables: + glue_connection: !Ref GlueConnection + FunctionName: !Ref LambdaFunctionName + PackageType: "Image" + ImageUri: !Sub + - '${Account}.dkr.ecr.${AWS::Region}.amazonaws.com/athena-federation-repository-timestream:2022.47.1' + - Account: !If [IsRegionBAH, 084828588479, !If [IsRegionHKG, 183295418215, 292517598671]] + Description: "Enables Amazon Athena to communicate with Amazon Timestream, making your time series data accessible from Athena." + Timeout: 900 + MemorySize: 3008 + Role: !If [NotHasLambdaRole, !GetAtt FunctionRole.Arn, !Ref LambdaRoleArn] + + FunctionRole: + Condition: NotHasLambdaRole + Type: AWS::IAM::Role + Properties: + ManagedPolicyArns: + - "arn:aws:iam::aws:policy/service-role/AWSLambdaBasicExecutionRole" + AssumeRolePolicyDocument: + Version: 2012-10-17 + Statement: + - Effect: Allow + Principal: + Service: + - lambda.amazonaws.com + Action: + - "sts:AssumeRole" + + FunctionExecutionPolicy: + Condition: NotHasLambdaRole + Type: "AWS::IAM::Policy" + Properties: + Roles: + - !Ref FunctionRole + PolicyName: FunctionExecutionPolicy + PolicyDocument: + Version: 2012-10-17 + Statement: + - Action: + - glue:GetTableVersions + - glue:GetPartitions + - glue:GetTables + - glue:GetTableVersion + - glue:GetDatabases + - glue:GetTable + - glue:GetPartition + - glue:GetDatabase + - athena:GetQueryExecution + - timestream:Describe* + - timestream:List* + - timestream:Select* + Effect: Allow + Resource: '*' + - Action: + - s3:GetObject + - s3:ListBucket + - s3:GetBucketLocation + - s3:GetObjectVersion + - s3:PutObject + - s3:PutObjectAcl + - s3:GetLifecycleConfiguration + - s3:PutLifecycleConfiguration + - s3:DeleteObject + Effect: Allow + Resource: + - Fn::Sub: + - arn:${AWS::Partition}:s3:::${bucketName} + - bucketName: + Ref: SpillBucket + - Fn::Sub: + - arn:${AWS::Partition}:s3:::${bucketName}/* + - bucketName: + Ref: SpillBucket + - Action: + - glue:GetConnection + Effect: Allow + Resource: + - !Sub 'arn:${AWS::Partition}:glue:${AWS::Region}:${AWS::AccountId}:connection/${GlueConnection}' + - !Sub 'arn:${AWS::Partition}:glue:${AWS::Region}:${AWS::AccountId}:catalog' + + FunctionKmsPolicy: + Condition: CreateKmsPolicy + Type: "AWS::IAM::Policy" + Properties: + PolicyName: FunctionKmsPolicy + PolicyDocument: + Version: 2012-10-17 + Statement: + - Effect: Allow + Action: + - kms:GenerateRandom + Resource: '*' + - Effect: Allow + Action: + - kms:GenerateDataKey + Resource: !Sub "arn:aws:kms:${AWS::Region}:${AWS::AccountId}:key/${KmsKeyId}" + Roles: + - !Ref FunctionRole \ No newline at end of file diff --git a/athena-timestream/athena-timestream.yaml b/athena-timestream/athena-timestream.yaml index 2e0387ce57..3062d1f435 100644 --- a/athena-timestream/athena-timestream.yaml +++ b/athena-timestream/athena-timestream.yaml @@ -83,4 +83,4 @@ Resources: #S3CrudPolicy allows our connector to spill large responses to S3. You can optionally replace this pre-made policy #with one that is more restrictive and can only 'put' but not read,delete, or overwrite files. - S3CrudPolicy: - BucketName: !Ref SpillBucket + BucketName: !Ref SpillBucket \ No newline at end of file diff --git a/athena-timestream/src/main/java/com/amazonaws/athena/connectors/timestream/TimestreamCompositeHandler.java b/athena-timestream/src/main/java/com/amazonaws/athena/connectors/timestream/TimestreamCompositeHandler.java index e85fc84532..3101f0197d 100644 --- a/athena-timestream/src/main/java/com/amazonaws/athena/connectors/timestream/TimestreamCompositeHandler.java +++ b/athena-timestream/src/main/java/com/amazonaws/athena/connectors/timestream/TimestreamCompositeHandler.java @@ -19,6 +19,7 @@ */ package com.amazonaws.athena.connectors.timestream; +import com.amazonaws.athena.connector.lambda.connection.EnvironmentProperties; import com.amazonaws.athena.connector.lambda.handlers.CompositeHandler; public class TimestreamCompositeHandler @@ -26,6 +27,6 @@ public class TimestreamCompositeHandler { public TimestreamCompositeHandler() { - super(new TimestreamMetadataHandler(System.getenv()), new TimestreamRecordHandler(System.getenv())); + super(new TimestreamMetadataHandler(new EnvironmentProperties().createEnvironment()), new TimestreamRecordHandler(new EnvironmentProperties().createEnvironment())); } } diff --git a/athena-tpcds/athena-tpcds-connection.yaml b/athena-tpcds/athena-tpcds-connection.yaml new file mode 100644 index 0000000000..6bc7d694b9 --- /dev/null +++ b/athena-tpcds/athena-tpcds-connection.yaml @@ -0,0 +1,133 @@ +Transform: 'AWS::Serverless-2016-10-31' +Metadata: + 'AWS::ServerlessRepo::Application': + Name: AthenaTPCDSConnectorWithGlueConnection + Description: 'This connector enables Amazon Athena to communicate with a randomly generated TPC-DS data source.' + Author: 'default author' + SpdxLicenseId: Apache-2.0 + LicenseUrl: LICENSE.txt + ReadmeUrl: README.md + Labels: + - athena-federation + HomePageUrl: 'https://github.com/awslabs/aws-athena-query-federation' + SemanticVersion: 2022.47.1 + SourceCodeUrl: 'https://github.com/awslabs/aws-athena-query-federation' +Parameters: + LambdaFunctionName: + Description: 'This is the name of the lambda function that will be created. This name must satisfy the pattern ^[a-z0-9-_]{1,64}$' + Type: String + AllowedPattern: ^[a-z0-9-_]{1,64}$ + SpillBucket: + Description: 'The name of the bucket where this function can spill data.' + Type: String + GlueConnection: + Description: "Name of glue connection storing connection details for Federated Data source." + Type: String + KmsKeyId: + Description: "(Optional) By default any data that is spilled to S3 is encrypted using AES-GCM and a randomly generated key. Setting a KMS Key ID allows your Lambda function to use KMS for key generation for a stronger source of encryption keys." + Type: String + Default: "" + LambdaRoleArn: + Description: "(Optional) A custom role to be used by the Connector lambda" + Type: String + Default: "" + +Conditions: + HasKmsKeyId: !Not [ !Equals [ !Ref KmsKeyId, "" ] ] + NotHasLambdaRole: !Equals [ !Ref LambdaRoleArn, "" ] + CreateKmsPolicy: !And [ !Condition HasKmsKeyId, !Condition NotHasLambdaRole ] + IsRegionBAH: !Equals [!Ref "AWS::Region", "me-south-1"] + IsRegionHKG: !Equals [!Ref "AWS::Region", "ap-east-1"] + +Resources: + ConnectorConfig: + Type: 'AWS::Serverless::Function' + Properties: + Environment: + Variables: + glue_connection: !Ref GlueConnection + FunctionName: !Ref LambdaFunctionName + PackageType: "Image" + ImageUri: !Sub + - '${Account}.dkr.ecr.${AWS::Region}.amazonaws.com/athena-federation-repository-tpcds:2022.47.1' + - Account: !If [IsRegionBAH, 084828588479, !If [IsRegionHKG, 183295418215, 292517598671]] + Description: "This connector enables Amazon Athena to communicate with a randomly generated TPC-DS data source." + Timeout: 900 + MemorySize: 3008 + Role: !If [NotHasLambdaRole, !GetAtt FunctionRole.Arn, !Ref LambdaRoleArn] + + FunctionRole: + Condition: NotHasLambdaRole + Type: AWS::IAM::Role + Properties: + ManagedPolicyArns: + - "arn:aws:iam::aws:policy/service-role/AWSLambdaBasicExecutionRole" + AssumeRolePolicyDocument: + Version: 2012-10-17 + Statement: + - Effect: Allow + Principal: + Service: + - lambda.amazonaws.com + Action: + - "sts:AssumeRole" + + FunctionExecutionPolicy: + Condition: NotHasLambdaRole + Type: "AWS::IAM::Policy" + Properties: + Roles: + - !Ref FunctionRole + PolicyName: FunctionExecutionPolicy + PolicyDocument: + Version: 2012-10-17 + Statement: + - Action: + - athena:GetQueryExecution + Effect: Allow + Resource: '*' + - Action: + - s3:GetObject + - s3:ListBucket + - s3:GetBucketLocation + - s3:GetObjectVersion + - s3:PutObject + - s3:PutObjectAcl + - s3:GetLifecycleConfiguration + - s3:PutLifecycleConfiguration + - s3:DeleteObject + Effect: Allow + Resource: + - Fn::Sub: + - arn:${AWS::Partition}:s3:::${bucketName} + - bucketName: + Ref: SpillBucket + - Fn::Sub: + - arn:${AWS::Partition}:s3:::${bucketName}/* + - bucketName: + Ref: SpillBucket + - Action: + - glue:GetConnection + Effect: Allow + Resource: + - !Sub 'arn:${AWS::Partition}:glue:${AWS::Region}:${AWS::AccountId}:connection/${GlueConnection}' + - !Sub 'arn:${AWS::Partition}:glue:${AWS::Region}:${AWS::AccountId}:catalog' + + FunctionKmsPolicy: + Condition: CreateKmsPolicy + Type: "AWS::IAM::Policy" + Properties: + PolicyName: FunctionKmsPolicy + PolicyDocument: + Version: 2012-10-17 + Statement: + - Effect: Allow + Action: + - kms:GenerateRandom + Resource: '*' + - Effect: Allow + Action: + - kms:GenerateDataKey + Resource: !Sub "arn:aws:kms:${AWS::Region}:${AWS::AccountId}:key/${KmsKeyId}" + Roles: + - !Ref FunctionRole \ No newline at end of file diff --git a/athena-tpcds/src/main/java/com/amazonaws/athena/connectors/tpcds/TPCDSCompositeHandler.java b/athena-tpcds/src/main/java/com/amazonaws/athena/connectors/tpcds/TPCDSCompositeHandler.java index baec56ea4c..ce8fe93952 100644 --- a/athena-tpcds/src/main/java/com/amazonaws/athena/connectors/tpcds/TPCDSCompositeHandler.java +++ b/athena-tpcds/src/main/java/com/amazonaws/athena/connectors/tpcds/TPCDSCompositeHandler.java @@ -19,6 +19,7 @@ */ package com.amazonaws.athena.connectors.tpcds; +import com.amazonaws.athena.connector.lambda.connection.EnvironmentProperties; import com.amazonaws.athena.connector.lambda.handlers.CompositeHandler; public class TPCDSCompositeHandler @@ -26,6 +27,6 @@ public class TPCDSCompositeHandler { public TPCDSCompositeHandler() { - super(new TPCDSMetadataHandler(System.getenv()), new TPCDSRecordHandler(System.getenv())); + super(new TPCDSMetadataHandler(new EnvironmentProperties().createEnvironment()), new TPCDSRecordHandler(new EnvironmentProperties().createEnvironment())); } } diff --git a/athena-udfs/athena-udfs.yaml b/athena-udfs/athena-udfs.yaml index facaba9bcb..8a36679838 100644 --- a/athena-udfs/athena-udfs.yaml +++ b/athena-udfs/athena-udfs.yaml @@ -25,7 +25,7 @@ Parameters: Description: 'Lambda memory in MB (min 128 - 3008 max).' Default: 3008 Type: Number - SecretNameOrPrefix: + SecretName: Description: 'The name or prefix of a set of names within Secrets Manager that this function should have access to. (e.g. database-*).' Type: String PermissionsBoundaryARN: @@ -54,5 +54,5 @@ Resources: - Action: - secretsmanager:GetSecretValue Effect: Allow - Resource: !Sub 'arn:${AWS::Partition}:secretsmanager:*:*:secret:${SecretNameOrPrefix}' + Resource: !Sub 'arn:${AWS::Partition}:secretsmanager:*:*:secret:${SecretName}' Version: '2012-10-17' diff --git a/athena-vertica/athena-vertica-connection.yaml b/athena-vertica/athena-vertica-connection.yaml new file mode 100644 index 0000000000..502c3ac451 --- /dev/null +++ b/athena-vertica/athena-vertica-connection.yaml @@ -0,0 +1,177 @@ +Transform: 'AWS::Serverless-2016-10-31' + +Metadata: + AWS::ServerlessRepo::Application: + Name: AthenaVerticaConnectorWithGlueConnection + Description: 'This connector enables Amazon Athena to communicate with Vertica' + Author: 'default author' + SpdxLicenseId: Apache-2.0 + LicenseUrl: LICENSE.txt + ReadmeUrl: README.md + Labels: ['athena-federation'] + HomePageUrl: https://github.com/awslabs/aws-athena-query-federation + SemanticVersion: 2022.47.1 + SourceCodeUrl: https://github.com/awslabs/aws-athena-query-federation + +# Parameters are CloudFormation features to pass input +# to your template when you create a stack +Parameters: + LambdaFunctionName: + Description: 'This is the name of the lambda function that will be created. This name must satisfy the pattern ^[a-z0-9-_]{1,64}$' + Type: String + AllowedPattern: ^[a-z0-9-_]{1,64}$ + SpillBucket: + Description: 'The name of the bucket where this function can spill data.' + Type: String + ExportBucket: + Description: "The bucket where the Vertica Query results will be exported." + Type: String + GlueConnection: + Description: "Name of glue connection storing connection details for Federated Data source." + Type: String + SubnetIds: + Description: 'One or more Subnet IDs corresponding to the Subnet that the Lambda function can use to access you data source. (e.g. subnet1,subnet2)' + Type: 'List' + SecurityGroupIds: + Description: 'One or more SecurityGroup IDs corresponding to the SecurityGroup that should be applied to the Lambda function. (e.g. sg1,sg2,sg3)' + Type: 'List' + SecretName: + Description: 'The name or prefix of a set of names within Secrets Manager that this function should have access to. (e.g. vertica-*).' + Type: String + Default: "vertica-*" + KmsKeyId: + Description: "(Optional) By default any data that is spilled to S3 is encrypted using AES-GCM and a randomly generated key. Setting a KMS Key ID allows your Lambda function to use KMS for key generation for a stronger source of encryption keys." + Type: String + Default: "" + LambdaRoleArn: + Description: "(Optional) A custom role to be used by the Connector lambda" + Type: String + Default: "" + +Conditions: + HasKmsKeyId: !Not [ !Equals [ !Ref KmsKeyId, "" ] ] + NotHasLambdaRole: !Equals [ !Ref LambdaRoleArn, "" ] + CreateKmsPolicy: !And [ !Condition HasKmsKeyId, !Condition NotHasLambdaRole ] + IsRegionBAH: !Equals [!Ref "AWS::Region", "me-south-1"] + IsRegionHKG: !Equals [!Ref "AWS::Region", "ap-east-1"] + +Resources: + ConnectorConfig: + Type: 'AWS::Serverless::Function' + Properties: + Environment: + Variables: + glue_connection: !Ref GlueConnection + FunctionName: !Ref LambdaFunctionName + PackageType: "Image" + ImageUri: !Sub + - '${Account}.dkr.ecr.${AWS::Region}.amazonaws.com/athena-federation-repository-vertica:2022.47.1' + - Account: !If [IsRegionBAH, 084828588479, !If [IsRegionHKG, 183295418215, 292517598671]] + Description: "Amazon Athena Vertica Connector" + Timeout: 900 + MemorySize: 3008 + Role: !If [ NotHasLambdaRole, !GetAtt FunctionRole.Arn, !Ref LambdaRoleArn ] + VpcConfig: + SecurityGroupIds: !Ref SecurityGroupIds + SubnetIds: !Ref SubnetIds + + FunctionRole: + Condition: NotHasLambdaRole + Type: AWS::IAM::Role + Properties: + ManagedPolicyArns: + - "arn:aws:iam::aws:policy/service-role/AWSLambdaVPCAccessExecutionRole" + AssumeRolePolicyDocument: + Version: 2012-10-17 + Statement: + - Effect: Allow + Principal: + Service: + - lambda.amazonaws.com + Action: + - "sts:AssumeRole" + + FunctionExecutionPolicy: + Condition: NotHasLambdaRole + Type: "AWS::IAM::Policy" + Properties: + Roles: + - !Ref FunctionRole + PolicyName: FunctionExecutionPolicy + PolicyDocument: + Version: 2012-10-17 + Statement: + - Action: + - athena:GetQueryExecution + Effect: Allow + Resource: '*' + - Action: + - s3:ListBucket + Effect: Allow + Resource: + - !Sub 'arn:${AWS::Partition}:s3:::${ExportBucket}' + - !Sub 'arn:${AWS::Partition}:s3:::${SpillBucket}' + - Action: + - secretsmanager:GetSecretValue + Effect: Allow + Resource: !Sub 'arn:${AWS::Partition}:secretsmanager:*:*:secret:${SecretName}*' + - Action: + - ec2:CreateNetworkInterface + - ec2:DeleteNetworkInterface + - ec2:DescribeNetworkInterfaces + - ec2:DetachNetworkInterface + Effect: Allow + Resource: '*' + - Action: + - s3:GetObject + - s3:ListBucket + - s3:GetBucketLocation + - s3:GetObjectVersion + - s3:PutObject + - s3:PutObjectAcl + - s3:GetLifecycleConfiguration + - s3:PutLifecycleConfiguration + - s3:DeleteObject + Effect: Allow + Resource: + - Fn::Sub: + - arn:${AWS::Partition}:s3:::${spillBucketName} + - spillBucketName: + Ref: SpillBucket + - Fn::Sub: + - arn:${AWS::Partition}:s3:::${spillBucketName}/* + - spillBucketName: + Ref: SpillBucket + - Fn::Sub: + - arn:${AWS::Partition}:s3:::${exportBucketName} + - exportBucketName: + Ref: ExportBucket + - Fn::Sub: + - arn:${AWS::Partition}:s3:::${exportBucketName}/* + - exportBucketName: + Ref: ExportBucket + - Action: + - glue:GetConnection + Effect: Allow + Resource: + - !Sub 'arn:${AWS::Partition}:glue:${AWS::Region}:${AWS::AccountId}:connection/${GlueConnection}' + - !Sub 'arn:${AWS::Partition}:glue:${AWS::Region}:${AWS::AccountId}:catalog' + + FunctionKmsPolicy: + Condition: CreateKmsPolicy + Type: "AWS::IAM::Policy" + Properties: + PolicyName: FunctionKmsPolicy + PolicyDocument: + Version: 2012-10-17 + Statement: + - Effect: Allow + Action: + - kms:GenerateRandom + Resource: '*' + - Effect: Allow + Action: + - kms:GenerateDataKey + Resource: !Sub "arn:aws:kms:${AWS::Region}:${AWS::AccountId}:key/${KmsKeyId}" + Roles: + - !Ref FunctionRole \ No newline at end of file diff --git a/athena-vertica/athena-vertica.yaml b/athena-vertica/athena-vertica.yaml index 60775db042..472e70e253 100644 --- a/athena-vertica/athena-vertica.yaml +++ b/athena-vertica/athena-vertica.yaml @@ -128,4 +128,4 @@ Resources: SecurityGroupIds: #SecurityGroup that should be applied to the Lambda function - !Ref LambdaSecurityGroup - SubnetIds: !Ref SubnetIds + SubnetIds: !Ref SubnetIds \ No newline at end of file diff --git a/athena-vertica/pom.xml b/athena-vertica/pom.xml index a637ce0266..1b319fa6a2 100644 --- a/athena-vertica/pom.xml +++ b/athena-vertica/pom.xml @@ -80,6 +80,7 @@ com.amazonaws athena-jdbc 2022.47.1 + compile diff --git a/athena-vertica/src/main/java/com/amazonaws/athena/connectors/vertica/VerticaCompositeHandler.java b/athena-vertica/src/main/java/com/amazonaws/athena/connectors/vertica/VerticaCompositeHandler.java index 07467897b2..38212f254b 100644 --- a/athena-vertica/src/main/java/com/amazonaws/athena/connectors/vertica/VerticaCompositeHandler.java +++ b/athena-vertica/src/main/java/com/amazonaws/athena/connectors/vertica/VerticaCompositeHandler.java @@ -38,7 +38,7 @@ public class VerticaCompositeHandler { public VerticaCompositeHandler() throws CertificateEncodingException, IOException, NoSuchAlgorithmException, KeyStoreException { - super(new VerticaMetadataHandler(System.getenv()), new VerticaRecordHandler(System.getenv())); + super(new VerticaMetadataHandler(new VerticaEnvironmentProperties().createEnvironment()), new VerticaRecordHandler(new VerticaEnvironmentProperties().createEnvironment())); installCaCertificate(); setupNativeEnvironmentVariables(); } diff --git a/athena-vertica/src/main/java/com/amazonaws/athena/connectors/vertica/VerticaEnvironmentProperties.java b/athena-vertica/src/main/java/com/amazonaws/athena/connectors/vertica/VerticaEnvironmentProperties.java new file mode 100644 index 0000000000..5a9ec76437 --- /dev/null +++ b/athena-vertica/src/main/java/com/amazonaws/athena/connectors/vertica/VerticaEnvironmentProperties.java @@ -0,0 +1,33 @@ +/*- + * #%L + * Amazon Athena Query Federation SDK + * %% + * Copyright (C) 2019 - 2024 Amazon Web Services + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ +package com.amazonaws.athena.connectors.vertica; + +import com.amazonaws.athena.connectors.jdbc.JdbcEnvironmentProperties; + +import java.util.Map; + +public class VerticaEnvironmentProperties extends JdbcEnvironmentProperties +{ + @Override + protected String getConnectionStringPrefix(Map connectionProperties) + { + return "vertica://jdbc:vertica://"; + } +} diff --git a/athena-vertica/src/main/java/com/amazonaws/athena/connectors/vertica/VerticaMetadataHandler.java b/athena-vertica/src/main/java/com/amazonaws/athena/connectors/vertica/VerticaMetadataHandler.java index da52327eaf..a3d5cf2406 100644 --- a/athena-vertica/src/main/java/com/amazonaws/athena/connectors/vertica/VerticaMetadataHandler.java +++ b/athena-vertica/src/main/java/com/amazonaws/athena/connectors/vertica/VerticaMetadataHandler.java @@ -369,16 +369,17 @@ public GetSplitsResponse doGetSplits(BlockAllocator allocator, GetSplitsRequest FieldReader fieldReaderAwsRegion = request.getPartitions().getFieldReader("awsRegionSql"); String awsRegionSql = fieldReaderAwsRegion.readText().toString(); + List s3ObjectsList = getlistExportedObjects(exportBucket, queryId); + if (s3ObjectsList.isEmpty()) { + // Execute queries on Vertica if S3 export bucket does not contain objects for given queryId + executeQueriesOnVertica(connection, sqlStatement, awsRegionSql); + // Retrieve the S3 objects list for given queryId + s3ObjectsList = getlistExportedObjects(exportBucket, queryId); + } - //execute the queries on Vertica - executeQueriesOnVertica(connection, sqlStatement, awsRegionSql); - - /* - * For each generated S3 object, create a split and add data to the split. - */ Split split; - List s3ObjectsList = getlistExportedObjects(exportBucket, queryId); + // Create a split for each s3 object if(!s3ObjectsList.isEmpty()) { for (S3Object s3Object : s3ObjectsList) diff --git a/pom.xml b/pom.xml index a0892ff62e..363e67d55f 100644 --- a/pom.xml +++ b/pom.xml @@ -14,29 +14,29 @@ 11 3.13.0 - 2.29.9 + 2.29.29 1.2.2 1.6.0 1.204.0 - 1.104.0 + 1.105.0 2.0.16 4.11.0 4.13.2 - 1.9.1 + 1.9.2 3.26.3 7.10.2 - 2.18.1 + 2.18.2 3.5.2 - 2.24.1 + 2.24.2 13.0.0 33.3.1-jre 3.25.3 4.3.4 2.15 4.5.14 - 12.8.1.jre11 + 12.9.0.jre11-preview 1.9.0 3.2.1 1.19.0 @@ -44,7 +44,7 @@ 3.6.0 3.6.0 3.3.1 - 3.11.1 + 3.11.2 3.4.2 none @@ -318,7 +318,7 @@ org.codehaus.mojo license-maven-plugin - 2.4.0 + 2.5.0 false false diff --git a/tools/bump_versions/common.py b/tools/bump_versions/common.py index 4bc750300d..d3882ddd85 100755 --- a/tools/bump_versions/common.py +++ b/tools/bump_versions/common.py @@ -41,8 +41,7 @@ def update_yaml(yaml_files, new_version): def update_dockerfile(dockerfiles, new_version): for file in dockerfiles: - subprocess.run(["sed", "-i", f"s|\(target\/.*-\)[0-9]*\.[0-9]*\.[0-9]*|\\1{new_version}|", file]) - subprocess.run(["sed", "-i", f"s|\(xf\s*.*-\)[0-9]*\.[0-9]*\.[0-9]*|\\1{new_version}|", file]) + subprocess.run(["sed", "-i", f"s|\(athena-.*\)-[0-9]*\.[0-9]*\.[0-9]*\.jar|\\1-{new_version}.jar|g", file]) def update_project_version(soup, new_version):