Skip to content

Commit

Permalink
[sdlf-dataset] handling of storage parameters
Browse files Browse the repository at this point in the history
  • Loading branch information
cnfait committed Nov 25, 2024
1 parent c628976 commit e1b5757
Showing 1 changed file with 67 additions and 43 deletions.
110 changes: 67 additions & 43 deletions sdlf-dataset/src/dataset.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,37 +9,42 @@ Parameters:
Type: String
Description: A string uniquely identifying this deployment in this AWS account
Default: dev
pStorageDeploymentInstance:
Type: String
Description: The string uniquely identifying a sdlf-foundations deployment in this AWS account
Default: "" # see below comments
pOrg:
Description: Name of the organization owning the datalake
Type: String
Default: "{{resolve:ssm:/sdlf/storage/rOrganization/dev}}"
Default: "" # if not provided, pStorageDeploymentInstance must be specified
pDomain:
Description: Data domain name
Type: String
Default: "{{resolve:ssm:/sdlf/storage/rDomain/dev}}"
Default: "" # if not provided, pStorageDeploymentInstance must be specified
pBucketKey:
Description: KMS key set as bucket key for the solutions' buckets
Type: String
Default: "" # if not provided, pStorageDeploymentInstance must be specified
pRawBucket:
Description: The raw bucket for the solution
Description: Raw bucket
Type: String
Default: "{{resolve:ssm:/sdlf/storage/rRawBucket/dev}}"
Default: "" # if not provided, pStorageDeploymentInstance must be specified
pStageBucket:
Description: The stage bucket for the solution
Description: Stage bucket
Type: String
Default: "{{resolve:ssm:/sdlf/storage/rStageBucket/dev}}"
Default: "" # if not provided, pStorageDeploymentInstance must be specified
pAnalyticsBucket:
Description: The analytics bucket for the solution
Description: Analytics bucket
Type: String
Default: "{{resolve:ssm:/sdlf/storage/rAnalyticsBucket/dev}}"
Default: "" # if not provided, pStorageDeploymentInstance must be specified
pArtifactsBucket:
Description: S3 bucket used to store artifacts (from CICD or generated by data pipelines)
Type: String
Default: "{{resolve:ssm:/sdlf/storage/rArtifactsBucket/dev}}"
pBucketKey:
Description: KMS key set as bucket key for the solutions' buckets
Type: String
Default: "{{resolve:ssm:/sdlf/storage/rKMSKey/dev}}"
Default: "" # if not provided, pStorageDeploymentInstance must be specified
pLakeFormationDataAccessRole:
Description: https://docs.aws.amazon.com/lake-formation/latest/dg/registration-role.html
Type: String
Default: "{{resolve:ssm:/sdlf/storage/rLakeFormationDataAccessRoleArn/dev}}"
Default: "" # if not provided, pStorageDeploymentInstance must be specified
pDatasetName:
Description: The name of the dataset (all lowercase, no symbols or spaces)
Type: String
Expand All @@ -59,6 +64,7 @@ Parameters:
Default: false

Conditions:
FetchFromStorageSsm: !Not [!Equals [!Ref pStorageDeploymentInstance, ""]]
IsS3Prefix: !Not [!Equals [!Ref pS3Prefix, ""]]
RunInVpc: !Equals [!Ref pEnableVpc, true]

Expand Down Expand Up @@ -165,7 +171,7 @@ Resources:
- kms:Describe*
Effect: Allow
Principal:
AWS: !Ref pLakeFormationDataAccessRole
AWS: !If [FetchFromStorageSsm, !Sub "{{resolve:ssm:/sdlf/storage/rLakeFormationDataAccessRoleArn/${pStorageDeploymentInstance}}}", !Ref pLakeFormationDataAccessRole]
Resource: "*"

rKMSDataKeyAlias:
Expand Down Expand Up @@ -203,7 +209,7 @@ Resources:
KmsKeyArn: !If
- IsS3Prefix
- !GetAtt rKMSDataKey.Arn
- !Ref pBucketKey
- !If [FetchFromStorageSsm, !Sub "arn:${AWS::Partition}:s3:::{{resolve:ssm:/sdlf/storage/rKMSKey/${pStorageDeploymentInstance}}}", !Ref pBucketKey]

rGlueSecurityConfigurationSsm:
Type: AWS::SSM::Parameter
Expand Down Expand Up @@ -326,9 +332,9 @@ Resources:
- s3:PutObject
- s3:PutObjectVersion
Resource:
- !Sub arn:${AWS::Partition}:s3:::${pRawBucket}/${pS3Prefix}/*
- !Sub arn:${AWS::Partition}:s3:::${pStageBucket}/${pS3Prefix}/*
- !Sub arn:${AWS::Partition}:s3:::${pAnalyticsBucket}/${pS3Prefix}/*
- !If [FetchFromStorageSsm, !Sub "arn:${AWS::Partition}:s3:::{{resolve:ssm:/sdlf/storage/rRawBucket/${pStorageDeploymentInstance}}}/${pS3Prefix}/*", !Sub "arn:${AWS::Partition}:s3:::${pRawBucket}/${pS3Prefix}/*"]
- !If [FetchFromStorageSsm, !Sub "arn:${AWS::Partition}:s3:::{{resolve:ssm:/sdlf/storage/rStageBucket/${pStorageDeploymentInstance}}}/${pS3Prefix}/*", !Sub "arn:${AWS::Partition}:s3:::${pStageBucket}/${pS3Prefix}/*"]
- !If [FetchFromStorageSsm, !Sub "arn:${AWS::Partition}:s3:::{{resolve:ssm:/sdlf/storage/rAnalyticsBucket/${pStorageDeploymentInstance}}}/${pS3Prefix}/*", !Sub "arn:${AWS::Partition}:s3:::${pAnalyticsBucket}/${pS3Prefix}/*"]
- Effect: Allow
Action:
- kms:DescribeKey
Expand All @@ -341,9 +347,9 @@ Resources:
- IsS3Prefix
- - !GetAtt rKMSInfraKey.Arn
- !GetAtt rKMSDataKey.Arn
- !Ref pBucketKey
- !If [FetchFromStorageSsm, !Sub "arn:${AWS::Partition}:s3:::{{resolve:ssm:/sdlf/storage/rKMSKey/${pStorageDeploymentInstance}}}", !Ref pBucketKey]
- - !GetAtt rKMSInfraKey.Arn
- !Ref pBucketKey
- !If [FetchFromStorageSsm, !Sub "arn:${AWS::Partition}:s3:::{{resolve:ssm:/sdlf/storage/rKMSKey/${pStorageDeploymentInstance}}}", !Ref pBucketKey]
- Effect: Allow
Action:
- lakeformation:GetDataAccess # W11 exception
Expand Down Expand Up @@ -379,7 +385,10 @@ Resources:
CatalogId: !Ref AWS::AccountId
DatabaseInput:
Description: !Sub "${pDatasetName} raw metadata catalog"
Name: !Sub ${pOrg}_${pDomain}_${pDatasetName}_raw
Name: !If
- FetchFromStorageSsm
- !Sub "{{resolve:ssm:/sdlf/storage/rOrganization/${pStorageDeploymentInstance}}}_{{resolve:ssm:/sdlf/storage/rDomain/${pStorageDeploymentInstance}}}_${pDatasetName}_raw"
- !Sub ${pOrg}_${pDomain}_${pDatasetName}_raw

rRawGlueDataCatalogSsm:
Type: AWS::SSM::Parameter
Expand Down Expand Up @@ -411,7 +420,7 @@ Resources:
Name: !Sub sdlf-${pDatasetName}-raw-crawler
Targets:
S3Targets:
- Path: !Sub s3://${pRawBucket}/${pS3Prefix}
- Path: !If [FetchFromStorageSsm, !Sub "s3://{{resolve:ssm:/sdlf/storage/rRawBucket/${pStorageDeploymentInstance}}}/${pS3Prefix}", !Sub "s3://${pRawBucket}/${pS3Prefix}"]

rRawGlueCrawlerGlueLakeFormationPermissions:
Type: AWS::LakeFormation::Permissions
Expand All @@ -435,7 +444,7 @@ Resources:
- DATA_LOCATION_ACCESS
Resource:
DataLocationResource:
S3Resource: !Sub arn:${AWS::Partition}:s3:::${pRawBucket}/${pS3Prefix}/
S3Resource: !If [FetchFromStorageSsm, !Sub "arn:${AWS::Partition}:s3:::{{resolve:ssm:/sdlf/storage/rRawBucket/${pStorageDeploymentInstance}}}/${pS3Prefix}/", !Sub "arn:${AWS::Partition}:s3:::${pRawBucket}/${pS3Prefix}/"]

rRawGlueCrawlerSsm:
Type: AWS::SSM::Parameter
Expand All @@ -451,7 +460,10 @@ Resources:
CatalogId: !Ref AWS::AccountId
DatabaseInput:
Description: !Sub "${pDatasetName} stage metadata catalog"
Name: !Sub ${pOrg}_${pDomain}_${pDatasetName}_stage
Name: !If
- FetchFromStorageSsm
- !Sub "{{resolve:ssm:/sdlf/storage/rOrganization/${pStorageDeploymentInstance}}}_{{resolve:ssm:/sdlf/storage/rDomain/${pStorageDeploymentInstance}}}_${pDatasetName}_stage"
- !Sub ${pOrg}_${pDomain}_${pDatasetName}_stage

rStageGlueDataCatalogSsm:
Type: AWS::SSM::Parameter
Expand Down Expand Up @@ -483,7 +495,7 @@ Resources:
Name: !Sub sdlf-${pDatasetName}-stage-crawler
Targets:
S3Targets:
- Path: !Sub s3://${pStageBucket}/${pS3Prefix}
- Path: !If [FetchFromStorageSsm, !Sub "s3://{{resolve:ssm:/sdlf/storage/rStageBucket/${pStorageDeploymentInstance}}}/${pS3Prefix}", !Sub "s3://${pStageBucket}/${pS3Prefix}"]

rStageGlueCrawlerGlueLakeFormationPermissions:
Type: AWS::LakeFormation::Permissions
Expand All @@ -507,7 +519,7 @@ Resources:
- DATA_LOCATION_ACCESS
Resource:
DataLocationResource:
S3Resource: !Sub arn:${AWS::Partition}:s3:::${pStageBucket}/${pS3Prefix}/
S3Resource: !If [FetchFromStorageSsm, !Sub "arn:${AWS::Partition}:s3:::{{resolve:ssm:/sdlf/storage/rStageBucket/${pStorageDeploymentInstance}}}/${pS3Prefix}/", !Sub "arn:${AWS::Partition}:s3:::${pStageBucket}/${pS3Prefix}/"]

rStageGlueCrawlerSsm:
Type: AWS::SSM::Parameter
Expand All @@ -523,7 +535,10 @@ Resources:
CatalogId: !Ref AWS::AccountId
DatabaseInput:
Description: !Sub "${pDatasetName} analytics metadata catalog"
Name: !Sub ${pOrg}_${pDomain}_${pDatasetName}_analytics
Name: !If
- FetchFromStorageSsm
- !Sub "{{resolve:ssm:/sdlf/storage/rOrganization/${pStorageDeploymentInstance}}}_{{resolve:ssm:/sdlf/storage/rDomain/${pStorageDeploymentInstance}}}_${pDatasetName}_analytics"
- !Sub ${pOrg}_${pDomain}_${pDatasetName}_analytics

rAnalyticsGlueDataCatalogSsm:
Type: AWS::SSM::Parameter
Expand Down Expand Up @@ -555,7 +570,7 @@ Resources:
Name: !Sub sdlf-${pDatasetName}-analytics-crawler
Targets:
S3Targets:
- Path: !Sub s3://${pAnalyticsBucket}/${pS3Prefix}
- Path: !If [FetchFromStorageSsm, !Sub "s3://{{resolve:ssm:/sdlf/storage/rAnalyticsBucket/${pStorageDeploymentInstance}}}/${pS3Prefix}", !Sub "s3://${pAnalyticsBucket}/${pS3Prefix}"]

rAnalyticsGlueCrawlerGlueLakeFormationPermissions:
Type: AWS::LakeFormation::Permissions
Expand All @@ -579,7 +594,7 @@ Resources:
- DATA_LOCATION_ACCESS
Resource:
DataLocationResource:
S3Resource: !Sub arn:${AWS::Partition}:s3:::${pAnalyticsBucket}/${pS3Prefix}/
S3Resource: !If [FetchFromStorageSsm, !Sub "arn:${AWS::Partition}:s3:::{{resolve:ssm:/sdlf/storage/rAnalyticsBucket/${pStorageDeploymentInstance}}}/${pS3Prefix}/", !Sub "arn:${AWS::Partition}:s3:::${pAnalyticsBucket}/${pS3Prefix}/"]

rAnalyticsGlueCrawlerSsm:
Type: AWS::SSM::Parameter
Expand Down Expand Up @@ -701,21 +716,21 @@ Resources:
Action:
- s3:ListBucket
Resource:
- !Sub arn:${AWS::Partition}:s3:::${pArtifactsBucket}
- !Sub arn:${AWS::Partition}:s3:::${pRawBucket}
- !Sub arn:${AWS::Partition}:s3:::${pStageBucket}
- !Sub arn:${AWS::Partition}:s3:::${pAnalyticsBucket}
- !If [FetchFromStorageSsm, !Sub "arn:${AWS::Partition}:s3:::{{resolve:ssm:/sdlf/storage/rArtifactsBucket/${pStorageDeploymentInstance}}}", !Sub "arn:${AWS::Partition}:s3:::${pArtifactsBucket}"]
- !If [FetchFromStorageSsm, !Sub "arn:${AWS::Partition}:s3:::{{resolve:ssm:/sdlf/storage/rRawBucket/${pStorageDeploymentInstance}}}", !Sub "arn:${AWS::Partition}:s3:::${pRawBucket}"]
- !If [FetchFromStorageSsm, !Sub "arn:${AWS::Partition}:s3:::{{resolve:ssm:/sdlf/storage/rStageBucket/${pStorageDeploymentInstance}}}", !Sub "arn:${AWS::Partition}:s3:::${pStageBucket}"]
- !If [FetchFromStorageSsm, !Sub "arn:${AWS::Partition}:s3:::{{resolve:ssm:/sdlf/storage/rAnalyticsBucket/${pStorageDeploymentInstance}}}", !Sub "arn:${AWS::Partition}:s3:::${pAnalyticsBucket}"]
- Sid: AllowTeamPrefixActions
Effect: Allow
Action:
- s3:DeleteObject
- s3:GetObject
- s3:PutObject
Resource:
- !Sub arn:${AWS::Partition}:s3:::${pArtifactsBucket}/${pS3Prefix}/*
- !Sub arn:${AWS::Partition}:s3:::${pRawBucket}/${pS3Prefix}/*
- !Sub arn:${AWS::Partition}:s3:::${pStageBucket}/${pS3Prefix}/*
- !Sub arn:${AWS::Partition}:s3:::${pAnalyticsBucket}/${pS3Prefix}/*
- !If [FetchFromStorageSsm, !Sub "arn:${AWS::Partition}:s3:::{{resolve:ssm:/sdlf/storage/rArtifactsBucket/${pStorageDeploymentInstance}}}/${pS3Prefix}/*", !Sub "arn:${AWS::Partition}:s3:::${pArtifactsBucket}/${pS3Prefix}/*"]
- !If [FetchFromStorageSsm, !Sub "arn:${AWS::Partition}:s3:::{{resolve:ssm:/sdlf/storage/rRawBucket/${pStorageDeploymentInstance}}}/${pS3Prefix}/*", !Sub "arn:${AWS::Partition}:s3:::${pRawBucket}/${pS3Prefix}/*"]
- !If [FetchFromStorageSsm, !Sub "arn:${AWS::Partition}:s3:::{{resolve:ssm:/sdlf/storage/rStageBucket/${pStorageDeploymentInstance}}}/${pS3Prefix}/*", !Sub "arn:${AWS::Partition}:s3:::${pStageBucket}/${pS3Prefix}/*"]
- !If [FetchFromStorageSsm, !Sub "arn:${AWS::Partition}:s3:::{{resolve:ssm:/sdlf/storage/rAnalyticsBucket/${pStorageDeploymentInstance}}}/${pS3Prefix}/*", !Sub "arn:${AWS::Partition}:s3:::${pAnalyticsBucket}/${pS3Prefix}/*"]
- Sid: AllowTeamKMSDataKeyUsage
Effect: Allow
Action:
Expand All @@ -729,9 +744,9 @@ Resources:
- IsS3Prefix
- - !GetAtt rKMSInfraKey.Arn
- !GetAtt rKMSDataKey.Arn
- !Ref pBucketKey
- !If [FetchFromStorageSsm, !Sub "arn:${AWS::Partition}:s3:::{{resolve:ssm:/sdlf/storage/rKMSKey/${pStorageDeploymentInstance}}}", !Ref pBucketKey]
- - !GetAtt rKMSInfraKey.Arn
- !Ref pBucketKey
- !If [FetchFromStorageSsm, !Sub "arn:${AWS::Partition}:s3:::{{resolve:ssm:/sdlf/storage/rKMSKey/${pStorageDeploymentInstance}}}", !Ref pBucketKey]
- Effect: Allow
Action:
- ssm:GetParameter
Expand Down Expand Up @@ -793,11 +808,20 @@ Resources:
- glue:GetDataQualityRule*
Resource:
- !Sub arn:${AWS::Partition}:glue:${AWS::Region}:${AWS::AccountId}:catalog
- !Sub arn:${AWS::Partition}:glue:${AWS::Region}:${AWS::AccountId}:database/${pOrg}_${pDomain}_${pDatasetName}_*
- !Sub arn:${AWS::Partition}:glue:${AWS::Region}:${AWS::AccountId}:table/${pOrg}_${pDomain}_${pDatasetName}_*
- !If
- FetchFromStorageSsm
- !Sub "arn:${AWS::Partition}:glue:${AWS::Region}:${AWS::AccountId}:database/{{resolve:ssm:/sdlf/storage/rOrganization/${pStorageDeploymentInstance}}}_{{resolve:ssm:/sdlf/storage/rDomain/${pStorageDeploymentInstance}}}_${pDatasetName}_*"
- !Sub arn:${AWS::Partition}:glue:${AWS::Region}:${AWS::AccountId}:database/${pOrg}_${pDomain}_${pDatasetName}_*
- !If
- FetchFromStorageSsm
- !Sub "arn:${AWS::Partition}:glue:${AWS::Region}:${AWS::AccountId}:table/{{resolve:ssm:/sdlf/storage/rOrganization/${pStorageDeploymentInstance}}}_{{resolve:ssm:/sdlf/storage/rDomain/${pStorageDeploymentInstance}}}_${pDatasetName}_*"
- !Sub arn:${AWS::Partition}:glue:${AWS::Region}:${AWS::AccountId}:table/${pOrg}_${pDomain}_${pDatasetName}_*
- !If
- FetchFromStorageSsm
- !Sub "arn:${AWS::Partition}:glue:${AWS::Region}:${AWS::AccountId}:job/{{resolve:ssm:/sdlf/storage/rOrganization/${pStorageDeploymentInstance}}}-{{resolve:ssm:/sdlf/storage/rDomain/${pStorageDeploymentInstance}}}-${pDatasetName}_*"
- !Sub arn:${AWS::Partition}:glue:${AWS::Region}:${AWS::AccountId}:job/${pOrg}-${pDomain}-${pDatasetName}-*
- !Sub arn:${AWS::Partition}:glue:${AWS::Region}:${AWS::AccountId}:crawler/sdlf-${pDatasetName}-*
- !Sub arn:${AWS::Partition}:glue:${AWS::Region}:${AWS::AccountId}:job/sdlf-${pDatasetName}-*
- !Sub arn:${AWS::Partition}:glue:${AWS::Region}:${AWS::AccountId}:job/${pOrg}-${pDomain}-${pDatasetName}-*
- !Sub arn:${AWS::Partition}:glue:${AWS::Region}:${AWS::AccountId}:dataQualityRuleset/* # glue:StartDataQualityRuleRecommendationRun requires dataQualityRuleset/*
- Effect: Allow
Action:
Expand Down

0 comments on commit e1b5757

Please sign in to comment.