diff --git a/geostore/populate_catalog/task.py b/geostore/populate_catalog/task.py index 0e83e8592..fd211480f 100644 --- a/geostore/populate_catalog/task.py +++ b/geostore/populate_catalog/task.py @@ -14,7 +14,7 @@ from ..api_keys import EVENT_KEY from ..aws_keys import BODY_KEY from ..boto3_config import CONFIG -from ..logging_keys import GIT_COMMIT +from ..logging_keys import GIT_COMMIT, LOG_MESSAGE_LAMBDA_FAILURE from ..parameter_store import ParameterName, get_param from ..pystac_io_methods import S3StacIO from ..resources import Resource @@ -74,27 +74,39 @@ def handle_message(metadata_key: str) -> None: storage_bucket_path = f"{S3_URL_PREFIX}{Resource.STORAGE_BUCKET_NAME.resource_name}" - results = S3_CLIENT.list_objects( - Bucket=Resource.STORAGE_BUCKET_NAME.resource_name, Prefix=CATALOG_FILENAME - ) + # there could be a myriad of problems preventing catalog from being populated + # hence a rather broad try except exception clause is used + # an exception thrown here indicates stuck message(s) in the sqs queue + # logging is monitored by elasticsearch and alerting is set up to notify the team of a problem + try: + dataset_metadata = read_file(f"{storage_bucket_path}/{metadata_key}") + assert isinstance(dataset_metadata, (Catalog, Collection)) - # create root catalog if it doesn't exist - if CONTENTS_KEY in results: - root_catalog = Catalog.from_file(f"{storage_bucket_path}/{CATALOG_FILENAME}") - - else: - root_catalog = Catalog( - id=ROOT_CATALOG_ID, - title=ROOT_CATALOG_TITLE, - description=ROOT_CATALOG_DESCRIPTION, - catalog_type=CatalogType.SELF_CONTAINED, + results = S3_CLIENT.list_objects( + Bucket=Resource.STORAGE_BUCKET_NAME.resource_name, Prefix=CATALOG_FILENAME ) - root_catalog.set_self_href(f"{storage_bucket_path}/{CATALOG_FILENAME}") - dataset_metadata = read_file(f"{storage_bucket_path}/{metadata_key}") - assert isinstance(dataset_metadata, (Catalog, Collection)) + # create root catalog if it doesn't exist + if CONTENTS_KEY in results: + root_catalog = Catalog.from_file(f"{storage_bucket_path}/{CATALOG_FILENAME}") + + else: + root_catalog = Catalog( + id=ROOT_CATALOG_ID, + title=ROOT_CATALOG_TITLE, + description=ROOT_CATALOG_DESCRIPTION, + catalog_type=CatalogType.SELF_CONTAINED, + ) + root_catalog.set_self_href(f"{storage_bucket_path}/{CATALOG_FILENAME}") - if root_catalog.get_child(dataset_metadata.id) is None: - root_catalog.add_child(child=dataset_metadata, strategy=GeostoreSTACLayoutStrategy()) + if root_catalog.get_child(dataset_metadata.id) is None: + root_catalog.add_child(child=dataset_metadata, strategy=GeostoreSTACLayoutStrategy()) - root_catalog.save(catalog_type=CatalogType.SELF_CONTAINED) + root_catalog.save(catalog_type=CatalogType.SELF_CONTAINED) + + except Exception as error: + LOGGER.warning( + f"{LOG_MESSAGE_LAMBDA_FAILURE}: Unable to populate catalog due to “{error}”", + extra={GIT_COMMIT: get_param(ParameterName.GIT_COMMIT)}, + ) + raise diff --git a/tests/test_populate_catalog.py b/tests/test_populate_catalog.py index 0f038e69d..0a7744b13 100644 --- a/tests/test_populate_catalog.py +++ b/tests/test_populate_catalog.py @@ -2,15 +2,17 @@ from datetime import timedelta from json import load from time import sleep +from unittest.mock import MagicMock, patch from uuid import uuid4 import smart_open from mypy_boto3_s3 import S3Client from mypy_boto3_sqs import SQSServiceResource -from pytest import mark +from pytest import mark, raises from pytest_subtests import SubTests from geostore.aws_keys import BODY_KEY +from geostore.logging_keys import GIT_COMMIT, LOG_MESSAGE_LAMBDA_FAILURE from geostore.parameter_store import ParameterName, get_param from geostore.populate_catalog.task import ( CATALOG_FILENAME, @@ -18,6 +20,7 @@ ROOT_CATALOG_DESCRIPTION, ROOT_CATALOG_ID, ROOT_CATALOG_TITLE, + handle_message, lambda_handler, ) from geostore.resources import Resource @@ -40,9 +43,9 @@ from geostore.types import JsonList from geostore.update_root_catalog.task import SQS_MESSAGE_GROUP_ID -from .aws_utils import Dataset, S3Object, any_lambda_context, delete_s3_key +from .aws_utils import Dataset, S3Object, any_lambda_context, any_s3_url, delete_s3_key from .file_utils import json_dict_to_file_object -from .general_generators import any_safe_filename +from .general_generators import any_error_message, any_exception_class, any_safe_filename from .stac_generators import any_dataset_version_id from .stac_objects import ( MINIMAL_VALID_STAC_CATALOG_OBJECT, @@ -652,3 +655,25 @@ def should_add_link_to_root_catalog_in_series( expected_root_catalog_links == load(smart_open.open(root_url, mode="rb"))[STAC_LINKS_KEY] ) + + +@mark.infrastructure +@patch("geostore.populate_catalog.task.read_file") +def should_log_error_message_when_an_exception_is_raised_trying_to_update_catalog( + pystac_read_file_mock: MagicMock, +) -> None: + exception_class = any_exception_class() + error_message = any_error_message() + error = exception_class(error_message) + expected_message = f"{LOG_MESSAGE_LAMBDA_FAILURE}: Unable to populate catalog due to “{error}”" + + pystac_read_file_mock.side_effect = error + + with patch("geostore.populate_catalog.task.LOGGER.warning") as logger_mock, raises( + exception_class + ): + handle_message(any_s3_url()) + + logger_mock.assert_any_call( + expected_message, extra={GIT_COMMIT: get_param(ParameterName.GIT_COMMIT)} + )