diff --git a/sm2a/infrastructure/configuration/airflow.cfg b/sm2a/infrastructure/configuration/airflow.cfg deleted file mode 100755 index 49591f28..00000000 --- a/sm2a/infrastructure/configuration/airflow.cfg +++ /dev/null @@ -1,63 +0,0 @@ -[api] -auth_backends = airflow.api.auth.backend.basic_auth - -[core] -executor = CeleryExecutor -dags_are_paused_at_creation = true -load_examples = false -load_default_connections = false -# Allow airflow to run hundreds of tasks in parallel, because we will scale workers -# automatically. -# https://programmaticponderings.com/2020/12/29/amazon-managed-workflows-for-apache-airflow-configuration-understanding-amazon-mwaas-configuration-options/ -max_active_tasks_per_dag = 10000 -parallelism = 10000 - -[celery] -broker_url = sqs:// -celery_config_options = configuration.celery_config.CELERY_CONFIG - - -[github_enterprise] -api_rev = v3 -host = github.com -client_id = Iv23lil9JEmXAM6QJlFe -client_secret = 8cbd483d2cb4e73599dffba93dbd0295ef0830c5 -oauth_callback_route = /home -allowed_teams = VEDA - -[webserver] -authenticate = True -auth_backends = airflow.contrib.auth.backends.github_enterprise_auth -dag_default_view = grid -expose_config = true -dag_orientation = TB -warn_deployment_exposure = false - -# On ECS, you can deploy the CloudWatch agent as a sidecar to your application container to collect metrics. -# https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/deploy_servicelens_CloudWatch_agent_deploy_ECS.html -# https://airflow.apache.org/docs/apache-airflow/stable/logging-monitoring/metrics.html -# https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/CloudWatch-Agent-custom-metrics-statsd.html -# https://docs.aws.amazon.com/mwaa/latest/userguide/mwaa-autoscaling.html -# https://docs.aws.amazon.com/mwaa/latest/userguide/access-metrics-cw-202.html#available-metrics-cw-v202 -# [metrics] -# statsd_on = true -# statsd_host = localhost -# statsd_port = 8125 -# statsd_prefix = airflow - -[scheduler] -catchup_by_default = false - -[logging] -# logging_config_class = configuration.logging_config.STDOUT_LOGGING_CONFIG -remote_logging = true -# We set this value as an environment variable -# remote_base_log_folder = - -[secrets] -# AWS Secrets Manager Backend -# https://airflow.apache.org/docs/apache-airflow-providers-amazon/stable/secrets-backends/aws-secrets-manager.html -# Setting full_url_mode to false allows us to use multiple fields when storing connections -# Source code: https://github.com/apache/airflow/blob/main/airflow/providers/amazon/aws/secrets/secrets_manager.py -backend = airflow.providers.amazon.aws.secrets.secrets_manager.SecretsManagerBackend -backend_kwargs = {"connections_prefix": "sm2a-dev/airflow/connections", "variables_prefix": "sm2a-dev/airflow/variables","connections_lookup_pattern": "_default$", "variables_lookup_pattern": "^aws_", "config_prefix": "sm2a-dev/airflow/config"} diff --git a/sm2a/infrastructure/main.tf b/sm2a/infrastructure/main.tf index 9089222b..3e735010 100644 --- a/sm2a/infrastructure/main.tf +++ b/sm2a/infrastructure/main.tf @@ -19,7 +19,7 @@ resource "random_password" "password" { module "sma-base" { - source = "https://github.com/NASA-IMPACT/self-managed-apache-airflow/releases/download/v1.1.4/self-managed-apache-airflow.zip" + source = "https://github.com/NASA-IMPACT/self-managed-apache-airflow/releases/download/v1.1.5/self-managed-apache-airflow.zip" project = var.project_name airflow_db = var.airflow_db fernet_key = var.fernet_key @@ -28,22 +28,22 @@ module "sma-base" { public_subnets_tagname = var.public_subnets_tagname vpc_id = var.vpc_id state_bucketname = var.state_bucketname - desired_max_workers_count = var.workers_configuration[var.stage].max_desired_workers + desired_max_workers_count = var.desired_max_workers_count airflow_admin_password = random_password.password.result airflow_admin_username = "admin" rds_publicly_accessible = var.rds_publicly_accessible permission_boundaries_arn = var.permission_boundaries_arn custom_worker_policy_statement = var.custom_worker_policy_statement - worker_cpu = var.workers_configuration[var.stage].cpu - worker_memory = var.workers_configuration[var.stage].memory + worker_cpu = tonumber(var.workers_cpu) + worker_memory = tonumber(var.workers_memory) number_of_schedulers = var.number_of_schedulers - scheduler_cpu = var.scheduler_cpu - scheduler_memory = var.scheduler_memory - rds_engine_version = var.rds_configuration[var.stage].rds_engine_version - rds_instance_class = var.rds_configuration[var.stage].rds_instance_class - rds_allocated_storage = var.rds_configuration[var.stage].rds_allocated_storage - rds_max_allocated_storage = var.rds_configuration[var.stage].rds_max_allocated_storage - workers_logs_retention_days = var.workers_configuration[var.stage].workers_logs_retention_days + scheduler_cpu = tonumber(var.scheduler_cpu) + scheduler_memory = tonumber(var.scheduler_memory) + rds_engine_version = var.rds_engine_version + rds_instance_class = var.rds_instance_class + rds_allocated_storage = tonumber(var.rds_allocated_storage) + rds_max_allocated_storage = tonumber(var.rds_max_allocated_storage) + workers_logs_retention_days = tonumber(var.workers_logs_retention_days) extra_airflow_task_common_environment = [ { @@ -52,7 +52,7 @@ module "sma-base" { }, { name = "AIRFLOW__CORE__DEFAULT_TASK_RETRIES" - value = var.workers_configuration[var.stage].task_retries + value = var.workers_task_retries }, { name = "GH_CLIENT_ID" diff --git a/sm2a/infrastructure/s3_event_bridge_lambda.tf b/sm2a/infrastructure/s3_event_bridge_lambda.tf index a4ca3550..53882637 100644 --- a/sm2a/infrastructure/s3_event_bridge_lambda.tf +++ b/sm2a/infrastructure/s3_event_bridge_lambda.tf @@ -113,6 +113,7 @@ data "archive_file" "python_lambda_package" { resource "aws_lambda_function" "lambda" { + count = var.eis_storage_bucket_name != null ? 1 : 0 provider = aws.aws_current filename = "/tmp/s3_event_bridge_to_sfn_execute.zip" @@ -127,17 +128,19 @@ resource "aws_lambda_function" "lambda" { variables = { TARGET_DAG_ID = var.target_dag_id SM2A_SECRET_MANAGER_NAME = var.sm2a_secret_manager_name - STORAGE_BUCKET = var.storage_bucket_name - S3_FILTER_PREFIX = var.s3_invoke_filter_prefix + STORAGE_BUCKET = var.eis_storage_bucket_name + S3_FILTER_PREFIX = var.eis_s3_invoke_filter_prefix } } } resource "aws_cloudwatch_log_group" "group" { + count = var.eis_storage_bucket_name != null ? 1 : 0 + provider = aws.aws_current - name = "/aws/lambda/${aws_lambda_function.lambda.function_name}" + name = "/aws/lambda/${aws_lambda_function.lambda[count.index].function_name}" retention_in_days = 5 } @@ -146,25 +149,27 @@ resource "aws_cloudwatch_log_group" "group" { ##################################################### resource "aws_lambda_permission" "s3_invoke" { + count = var.eis_storage_bucket_name != null ? 1 : 0 provider = aws.aws_current action = "lambda:InvokeFunction" - function_name = aws_lambda_function.lambda.function_name + function_name = aws_lambda_function.lambda[count.index].function_name principal = "s3.amazonaws.com" statement_id = "AllowInvocationFromS3Bucket-veda-${var.stage}" - source_arn = "arn:aws:s3:::${var.storage_bucket_name}" + source_arn = "arn:aws:s3:::${var.eis_storage_bucket_name}" } resource "aws_s3_bucket_notification" "bucket_notification" { - bucket = var.storage_bucket_name + count = var.eis_storage_bucket_name != null ? 1 : 0 + bucket = var.eis_storage_bucket_name lambda_function { - lambda_function_arn = aws_lambda_function.lambda.arn + lambda_function_arn = aws_lambda_function.lambda[count.index].arn events = ["s3:ObjectCreated:*"] - filter_prefix = var.s3_invoke_filter_prefix + filter_prefix = var.eis_s3_invoke_filter_prefix filter_suffix = ".gpkg" } diff --git a/sm2a/infrastructure/variables.tf b/sm2a/infrastructure/variables.tf index 8056c3d8..a6fe186a 100644 --- a/sm2a/infrastructure/variables.tf +++ b/sm2a/infrastructure/variables.tf @@ -65,102 +65,10 @@ variable "subdomain" { default = "null" } - -variable "rds_configuration" { - type = object({ - dev = object({ - rds_instance_class = string, - rds_allocated_storage = number, - rds_max_allocated_storage = number, - rds_engine_version = string - }) - staging = object({ - rds_instance_class = string, - rds_allocated_storage = number, - rds_max_allocated_storage = number, - rds_engine_version = string - }) - prod = object({ - rds_instance_class = string, - rds_allocated_storage = number, - rds_max_allocated_storage = number, - rds_engine_version = string - }) - - }) - default = { - dev = { - rds_instance_class = "db.t4g.medium", - rds_allocated_storage = 20, - rds_max_allocated_storage = 100, - rds_engine_version = "13" - }, - staging = { - rds_instance_class = "db.t4g.large", - rds_allocated_storage = 40, - rds_max_allocated_storage = 100, - rds_engine_version = "13" - }, - prod = { - rds_instance_class = "db.r5.xlarge", - rds_allocated_storage = 100, - rds_max_allocated_storage = 200, - rds_engine_version = "13" - } - } -} - -variable "workers_configuration" { - type = object({ - dev = object({ - cpu = number, - memory = number, - max_desired_workers = string, - task_retries = string, - workers_logs_retention_days = number - - }) - staging = object({ - cpu = number, - memory = number, - max_desired_workers = string, - task_retries = string, - workers_logs_retention_days = number - }) - prod = object({ - cpu = number, - memory = number, - max_desired_workers = string, - task_retries = string, - workers_logs_retention_days = number - }) - }) - default = { - dev = { - cpu = 2048, - memory = 4096, - max_desired_workers = "5" - task_retries = "0" - workers_logs_retention_days = 1 - }, - staging = { - cpu = 4096, - memory = 8192, - max_desired_workers = "10", - task_retries = "1", - workers_logs_retention_days = 1 - }, - prod = { - cpu = 8192, - memory = 16384, - max_desired_workers = "30", - task_retries = "1", - workers_logs_retention_days = 14 - } - } +variable "desired_max_workers_count" { + default = "5" } - variable "gh_app_client_id" { } @@ -214,15 +122,53 @@ variable "stac_url" { } variable "vector_secret_name" { + type = string + default = null } -variable "storage_bucket_name" { +variable "eis_storage_bucket_name" { + type = string + default = null } -variable "s3_invoke_filter_prefix" { +variable "eis_s3_invoke_filter_prefix" { + type = string + default = null } variable "sm2a_secret_manager_name" { + type = string + default = null } variable "target_dag_id" { + type = string + default = null +} + + +variable "workers_cpu" { + default = 2048 +} +variable "workers_memory" { + default = 4096 +} + +variable "rds_engine_version" { + default = "13" +} +variable "rds_instance_class" { + default = "db.t4g.medium" +} +variable "rds_allocated_storage" { + default = 20 +} +variable "rds_max_allocated_storage" { + default = 200 +} +variable "workers_logs_retention_days" { + default = 1 +} + +variable "workers_task_retries" { + default = "1" }