Skip to content
This repository has been archived by the owner on Nov 23, 2023. It is now read-only.

feat: Better logging and exception handling in populate catalog #2084

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 32 additions & 20 deletions geostore/populate_catalog/task.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from ..api_keys import EVENT_KEY
from ..aws_keys import BODY_KEY
from ..boto3_config import CONFIG
from ..logging_keys import GIT_COMMIT
from ..logging_keys import GIT_COMMIT, LOG_MESSAGE_LAMBDA_FAILURE
from ..parameter_store import ParameterName, get_param
from ..pystac_io_methods import S3StacIO
from ..resources import Resource
Expand Down Expand Up @@ -74,27 +74,39 @@ def handle_message(metadata_key: str) -> None:

storage_bucket_path = f"{S3_URL_PREFIX}{Resource.STORAGE_BUCKET_NAME.resource_name}"

results = S3_CLIENT.list_objects(
Bucket=Resource.STORAGE_BUCKET_NAME.resource_name, Prefix=CATALOG_FILENAME
)
# there could be a myriad of problems preventing catalog from being populated
# hence a rather broad try except exception clause is used
# an exception thrown here indicates stuck message(s) in the sqs queue
# logging is monitored by elasticsearch and alerting is set up to notify the team of a problem
try:
dataset_metadata = read_file(f"{storage_bucket_path}/{metadata_key}")
assert isinstance(dataset_metadata, (Catalog, Collection))

# create root catalog if it doesn't exist
if CONTENTS_KEY in results:
root_catalog = Catalog.from_file(f"{storage_bucket_path}/{CATALOG_FILENAME}")

else:
root_catalog = Catalog(
id=ROOT_CATALOG_ID,
title=ROOT_CATALOG_TITLE,
description=ROOT_CATALOG_DESCRIPTION,
catalog_type=CatalogType.SELF_CONTAINED,
results = S3_CLIENT.list_objects(
Bucket=Resource.STORAGE_BUCKET_NAME.resource_name, Prefix=CATALOG_FILENAME
)
root_catalog.set_self_href(f"{storage_bucket_path}/{CATALOG_FILENAME}")

dataset_metadata = read_file(f"{storage_bucket_path}/{metadata_key}")
assert isinstance(dataset_metadata, (Catalog, Collection))
# create root catalog if it doesn't exist
if CONTENTS_KEY in results:
root_catalog = Catalog.from_file(f"{storage_bucket_path}/{CATALOG_FILENAME}")

else:
root_catalog = Catalog(
id=ROOT_CATALOG_ID,
title=ROOT_CATALOG_TITLE,
description=ROOT_CATALOG_DESCRIPTION,
catalog_type=CatalogType.SELF_CONTAINED,
)
root_catalog.set_self_href(f"{storage_bucket_path}/{CATALOG_FILENAME}")

if root_catalog.get_child(dataset_metadata.id) is None:
root_catalog.add_child(child=dataset_metadata, strategy=GeostoreSTACLayoutStrategy())
if root_catalog.get_child(dataset_metadata.id) is None:
root_catalog.add_child(child=dataset_metadata, strategy=GeostoreSTACLayoutStrategy())

root_catalog.save(catalog_type=CatalogType.SELF_CONTAINED)
root_catalog.save(catalog_type=CatalogType.SELF_CONTAINED)

except Exception as error:
LOGGER.warning(
f"{LOG_MESSAGE_LAMBDA_FAILURE}: Unable to populate catalog due to “{error}”",
extra={GIT_COMMIT: get_param(ParameterName.GIT_COMMIT)},
)
raise
31 changes: 28 additions & 3 deletions tests/test_populate_catalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,22 +2,25 @@
from datetime import timedelta
from json import load
from time import sleep
from unittest.mock import MagicMock, patch
from uuid import uuid4

import smart_open
from mypy_boto3_s3 import S3Client
from mypy_boto3_sqs import SQSServiceResource
from pytest import mark
from pytest import mark, raises
from pytest_subtests import SubTests

from geostore.aws_keys import BODY_KEY
from geostore.logging_keys import GIT_COMMIT, LOG_MESSAGE_LAMBDA_FAILURE
from geostore.parameter_store import ParameterName, get_param
from geostore.populate_catalog.task import (
CATALOG_FILENAME,
RECORDS_KEY,
ROOT_CATALOG_DESCRIPTION,
ROOT_CATALOG_ID,
ROOT_CATALOG_TITLE,
handle_message,
lambda_handler,
)
from geostore.resources import Resource
Expand All @@ -40,9 +43,9 @@
from geostore.types import JsonList
from geostore.update_root_catalog.task import SQS_MESSAGE_GROUP_ID

from .aws_utils import Dataset, S3Object, any_lambda_context, delete_s3_key
from .aws_utils import Dataset, S3Object, any_lambda_context, any_s3_url, delete_s3_key
from .file_utils import json_dict_to_file_object
from .general_generators import any_safe_filename
from .general_generators import any_error_message, any_exception_class, any_safe_filename
from .stac_generators import any_dataset_version_id
from .stac_objects import (
MINIMAL_VALID_STAC_CATALOG_OBJECT,
Expand Down Expand Up @@ -652,3 +655,25 @@ def should_add_link_to_root_catalog_in_series(
expected_root_catalog_links
== load(smart_open.open(root_url, mode="rb"))[STAC_LINKS_KEY]
)


@mark.infrastructure
@patch("geostore.populate_catalog.task.read_file")
def should_log_error_message_when_an_exception_is_raised_trying_to_update_catalog(
pystac_read_file_mock: MagicMock,
) -> None:
exception_class = any_exception_class()
error_message = any_error_message()
error = exception_class(error_message)
expected_message = f"{LOG_MESSAGE_LAMBDA_FAILURE}: Unable to populate catalog due to “{error}”"

pystac_read_file_mock.side_effect = error

with patch("geostore.populate_catalog.task.LOGGER.warning") as logger_mock, raises(
exception_class
):
handle_message(any_s3_url())

logger_mock.assert_any_call(
expected_message, extra={GIT_COMMIT: get_param(ParameterName.GIT_COMMIT)}
)