From 2aaec3b6e9fadba4fad32e35b5ae80fefaf5d1b9 Mon Sep 17 00:00:00 2001 From: Pat Heard Date: Sat, 9 Nov 2024 22:53:04 +0000 Subject: [PATCH 1/4] fix: add JSON classifier to account tags crawler Add a custom classifier for crawling files that contain a JSON array of objects. --- terragrunt/aws/glue/crawlers.tf | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/terragrunt/aws/glue/crawlers.tf b/terragrunt/aws/glue/crawlers.tf index 012fdcc..77dba27 100644 --- a/terragrunt/aws/glue/crawlers.tf +++ b/terragrunt/aws/glue/crawlers.tf @@ -56,6 +56,7 @@ resource "aws_glue_crawler" "operations_aws_production_account_tags" { description = "Classify the AWS Organization account tag extract" database_name = aws_glue_catalog_database.operations_aws_production.name table_prefix = "account_tags_" + classifiers = [aws_glue_classifier.json_object_array.name] role = aws_iam_role.glue_crawler.arn security_configuration = aws_glue_security_configuration.encryption_at_rest.name @@ -77,3 +78,12 @@ resource "aws_glue_crawler" "operations_aws_production_account_tags" { schedule = "cron(00 13 * * ? *)" # Pickup new accounts each day } + +# JSON classifier for arrays of objects +resource "aws_glue_classifier" "json_object_array" { + name = "json_object_array" + + json_classifier { + json_path = "$[*]" + } +} From b6b11738327d3b5ce2f3a70bb4d9d926abe9f018 Mon Sep 17 00:00:00 2001 From: Pat Heard Date: Sun, 10 Nov 2024 02:08:43 +0000 Subject: [PATCH 2/4] feat: add Glue ETL IAM role --- terragrunt/aws/glue/iam.tf | 66 +++++++++++++++++++++++++++++++++++--- terragrunt/aws/glue/kms.tf | 1 + 2 files changed, 63 insertions(+), 4 deletions(-) diff --git a/terragrunt/aws/glue/iam.tf b/terragrunt/aws/glue/iam.tf index 924a2e0..40d5100 100644 --- a/terragrunt/aws/glue/iam.tf +++ b/terragrunt/aws/glue/iam.tf @@ -4,13 +4,20 @@ resource "aws_iam_role" "glue_crawler" { name = "AWSGlueCrawler-DataLake" path = "/service-role/" - assume_role_policy = data.aws_iam_policy_document.glue_crawler_assume.json + assume_role_policy = data.aws_iam_policy_document.glue_assume.json } resource "aws_iam_policy" "glue_crawler" { name = "AWSGlueCrawler-DataLake" path = "/service-role/" - policy = data.aws_iam_policy_document.glue_crawler.json + policy = data.aws_iam_policy_document.glue_crawler_combined.json +} + +data "aws_iam_policy_document" "glue_crawler_combined" { + source_policy_documents = [ + data.aws_iam_policy_document.s3_read_data_lake.json, + data.aws_iam_policy_document.glue_kms.json + ] } resource "aws_iam_role_policy_attachment" "glue_crawler" { @@ -23,7 +30,43 @@ resource "aws_iam_role_policy_attachment" "aws_glue_service_role" { role = aws_iam_role.glue_crawler.name } -data "aws_iam_policy_document" "glue_crawler_assume" { +# +# Glue ETL role +# +resource "aws_iam_role" "glue_etl" { + name = "AWSGlueETL-DataLake" + path = "/service-role/" + assume_role_policy = data.aws_iam_policy_document.glue_assume.json +} + +resource "aws_iam_policy" "glue_etl" { + name = "AWSGlueETL-DataLake" + path = "/service-role/" + policy = data.aws_iam_policy_document.glue_etl_combined.json +} + +data "aws_iam_policy_document" "glue_etl_combined" { + source_policy_documents = [ + data.aws_iam_policy_document.s3_read_data_lake.json, + data.aws_iam_policy_document.s3_write_data_lake.json, + data.aws_iam_policy_document.glue_kms.json + ] +} + +resource "aws_iam_role_policy_attachment" "glue_etl" { + policy_arn = aws_iam_policy.glue_etl.arn + role = aws_iam_role.glue_etl.name +} + +resource "aws_iam_role_policy_attachment" "glue_etl" { + policy_arn = "arn:aws:iam::aws:policy/service-role/AWSGlueServiceRole" + role = aws_iam_role.glue_etl.name +} + +# +# Custom policies +# +data "aws_iam_policy_document" "glue_assume" { statement { actions = [ "sts:AssumeRole", @@ -37,7 +80,7 @@ data "aws_iam_policy_document" "glue_crawler_assume" { } } -data "aws_iam_policy_document" "glue_crawler" { +data "aws_iam_policy_document" "s3_read_data_lake" { statement { sid = "ReadDataLakeS3Buckets" actions = [ @@ -49,7 +92,9 @@ data "aws_iam_policy_document" "glue_crawler" { "${var.transformed_bucket_arn}/*" ] } +} +data "aws_iam_policy_document" "glue_kms" { statement { sid = "UseGlueKey" effect = "Allow" @@ -80,3 +125,16 @@ data "aws_iam_policy_document" "glue_crawler" { ] } } + +data "aws_iam_policy_document" "s3_write_data_lake" { + statement { + sid = "WriteDataLakeS3TransformedBuckets" + actions = [ + "s3:PutObject", + ] + resources = [ + "${var.curated_bucket_arn}/*", + "${var.transformed_bucket_arn}/*" + ] + } +} diff --git a/terragrunt/aws/glue/kms.tf b/terragrunt/aws/glue/kms.tf index 2b02617..107dc0e 100644 --- a/terragrunt/aws/glue/kms.tf +++ b/terragrunt/aws/glue/kms.tf @@ -1,6 +1,7 @@ locals { glue_role_arns = [ aws_iam_role.glue_crawler.arn, + aws_iam_role.glue_etl.arn, ] } From d4e3087ccfbcf88bd270f13dbc207c7ca69dd8f4 Mon Sep 17 00:00:00 2001 From: Pat Heard Date: Sun, 10 Nov 2024 02:11:39 +0000 Subject: [PATCH 3/4] fix: TF resource name clash --- terragrunt/aws/glue/iam.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/terragrunt/aws/glue/iam.tf b/terragrunt/aws/glue/iam.tf index 40d5100..48741b5 100644 --- a/terragrunt/aws/glue/iam.tf +++ b/terragrunt/aws/glue/iam.tf @@ -58,7 +58,7 @@ resource "aws_iam_role_policy_attachment" "glue_etl" { role = aws_iam_role.glue_etl.name } -resource "aws_iam_role_policy_attachment" "glue_etl" { +resource "aws_iam_role_policy_attachment" "glue_etl_service_role" { policy_arn = "arn:aws:iam::aws:policy/service-role/AWSGlueServiceRole" role = aws_iam_role.glue_etl.name } From d94602a82b77c91f4597729ba259caecfd36688d Mon Sep 17 00:00:00 2001 From: Pat Heard Date: Tue, 12 Nov 2024 15:14:25 +0000 Subject: [PATCH 4/4] chore: add lock file --- .../env/production/alarms/.terraform.lock.hcl | 25 +++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 terragrunt/env/production/alarms/.terraform.lock.hcl diff --git a/terragrunt/env/production/alarms/.terraform.lock.hcl b/terragrunt/env/production/alarms/.terraform.lock.hcl new file mode 100644 index 0000000..584839c --- /dev/null +++ b/terragrunt/env/production/alarms/.terraform.lock.hcl @@ -0,0 +1,25 @@ +# This file is maintained automatically by "terraform init". +# Manual edits may be lost in future updates. + +provider "registry.terraform.io/hashicorp/aws" { + version = "5.75.1" + constraints = "~> 5.0" + hashes = [ + "h1:ijX5mwbQZOnPVQGxxVsJs6Yh6h2w+V3mQmKznB6pIkw=", + "zh:1075825e7311a8d2d233fd453a173910e891b0320e8a7698af44d1f90b02621d", + "zh:203c5d09a03fcaa946defb8459f01227f2fcda07df768f74777beb328d6751ae", + "zh:21bc79ccb09bfdeb711a3a5226c6c4a457ac7c4bb781dbda6ade7be38461739f", + "zh:2bac969855b62a0ff6716954be29387a1f9793626059122cda4681206396e309", + "zh:4b65ea5b51058f05b9ec8797f76184e19e5b38a609029fe2226af3fa4ad289b3", + "zh:5065d7df357fb3ee2b0a2520bbcff6335c0c47bfb9e8e9932bad088c3ab7efd3", + "zh:678a4015a4cd26af5c2b30dfd9290b8a01e900668fa0fec6585dfd1838f1cebd", + "zh:6ddc5dfdd4a0dddca027db99a7bfa9a0978933119d63af81acb6020728405119", + "zh:98c0d48b09842c444dbcbddd279e5b5b1e44113951817a8ecc28896bb4ad1dd7", + "zh:9b12af85486a96aedd8d7984b0ff811a4b42e3d88dad1a3fb4c0b580d04fa425", + "zh:aad169fea072842c0b54f1ff95f1ec6558d6c5af3ea4c159308583db59003b09", + "zh:bd2625ed8e1ff29ac6ed3a810d7b68a090add5fcb2fce4122669bd37e1eb9f1d", + "zh:c6f57625e26a6ef1ffb49bfa0e6148496ad12d80c857f6bb222e21f293a2a78a", + "zh:c7cd085326c5eb88804b11a4bc0fbc8376f06138f4b9624fb25cd06ea8687cdd", + "zh:f60c98139f983817d4d08f4138b1e53f31f91176ff638631e8dd38b6de36fce0", + ] +}