Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Issue #3271] Setup structure of opportunity attachment transformation (minus file logic) #3443

Merged
merged 6 commits into from
Jan 15, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
import logging
from typing import Sequence

import src.data_migration.transformation.transform_constants as transform_constants
import src.data_migration.transformation.transform_util as transform_util
from src.constants.lookup_constants import OpportunityAttachmentType
from src.data_migration.transformation.subtask.abstract_transform_subtask import (
AbstractTransformSubTask,
)
from src.db.models.opportunity_models import Opportunity, OpportunityAttachment
from src.db.models.staging.attachment import TsynopsisAttachment

logger = logging.getLogger(__name__)


class TransformOpportunityAttachment(AbstractTransformSubTask):

def transform_records(self) -> None:

# Fetch staging attachment / our attachment / opportunity groups
records = self.fetch_with_opportunity(
TsynopsisAttachment,
OpportunityAttachment,
[TsynopsisAttachment.syn_att_id == OpportunityAttachment.attachment_id],
)

self.process_opportunity_attachment_group(records)

def process_opportunity_attachment_group(
self,
records: Sequence[
tuple[TsynopsisAttachment, OpportunityAttachment | None, Opportunity | None]
],
) -> None:
for source_attachment, target_attachment, opportunity in records:
try:
self.process_opportunity_attachment(
source_attachment, target_attachment, opportunity
)
except ValueError:
self.increment(
transform_constants.Metrics.TOTAL_ERROR_COUNT,
prefix=transform_constants.OPPORTUNITY_ATTACHMENT,
)
logger.exception(
"Failed to process opportunity attachment",
extra=transform_util.get_log_extra_opportunity_attachment(source_attachment),
)

def process_opportunity_attachment(
self,
source_attachment: TsynopsisAttachment,
target_attachment: OpportunityAttachment | None,
opportunity: Opportunity | None,
) -> None:

self.increment(
transform_constants.Metrics.TOTAL_RECORDS_PROCESSED,
prefix=transform_constants.OPPORTUNITY_ATTACHMENT,
)

extra = transform_util.get_log_extra_opportunity_attachment(source_attachment)
logger.info("Processing opportunity attachment", extra=extra)

if source_attachment.is_deleted:
# TODO - https://github.com/HHS/simpler-grants-gov/issues/3322
# deletes are more complex because of s3
# this just handles deleting the DB record at the moment
self._handle_delete(
source=source_attachment,
target=target_attachment,
record_type=transform_constants.OPPORTUNITY_ATTACHMENT,
extra=extra,
)

elif opportunity is None:
# This shouldn't be possible as the incoming data has foreign keys, but as a safety net
# we'll make sure the opportunity actually exists
raise ValueError(
"Opportunity attachment cannot be processed as the opportunity for it does not exist"
)

else:
# To avoid incrementing metrics for records we fail to transform, record
# here whether it's an insert/update and we'll increment after transforming
is_insert = target_attachment is None

logger.info("Transforming and upserting opportunity attachment", extra=extra)

transformed_opportunity_attachment = transform_opportunity_attachment(
source_attachment, target_attachment
)

# TODO - we'll need to handle more with the s3 files here
if is_insert:
self.increment(
transform_constants.Metrics.TOTAL_RECORDS_INSERTED,
prefix=transform_constants.OPPORTUNITY_ATTACHMENT,
)
self.db_session.add(transformed_opportunity_attachment)
else:
self.increment(
transform_constants.Metrics.TOTAL_RECORDS_UPDATED,
prefix=transform_constants.OPPORTUNITY_ATTACHMENT,
)
self.db_session.merge(transformed_opportunity_attachment)

logger.info("Processed opportunity attachment", extra=extra)
source_attachment.transformed_at = self.transform_time


def transform_opportunity_attachment(
source_attachment: TsynopsisAttachment, incoming_attachment: OpportunityAttachment | None
) -> OpportunityAttachment:

log_extra = transform_util.get_log_extra_opportunity_attachment(source_attachment)

if incoming_attachment is None:
logger.info("Creating new opportunity attachment record", extra=log_extra)

# We always create a new record here and merge it in the calling function
# this way if there is any error doing the transformation, we don't modify the existing one.
target_attachment = OpportunityAttachment(
attachment_id=source_attachment.syn_att_id,
opportunity_id=source_attachment.opportunity_id,
# TODO - we'll eventually remove attachment type, for now just arbitrarily set the value
opportunity_attachment_type=OpportunityAttachmentType.OTHER,
# TODO - in https://github.com/HHS/simpler-grants-gov/issues/3322
# we'll actually handle the file location logic with s3
file_location="TODO", # TODO - next PR
mime_type=source_attachment.mime_type,
file_name=source_attachment.file_name,
file_description=source_attachment.file_desc,
file_size_bytes=source_attachment.file_lob_size,
created_by=source_attachment.creator_id,
updated_by=source_attachment.last_upd_id,
legacy_folder_id=source_attachment.syn_att_folder_id,
)

transform_util.transform_update_create_timestamp(
source_attachment, target_attachment, log_extra=log_extra
)

return target_attachment
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
FUNDING_CATEGORY = "funding_category"
FUNDING_INSTRUMENT = "funding_instrument"
AGENCY = "agency"
OPPORTUNITY_ATTACHMENT = "opportunity_attachment"


class Metrics(StrEnum):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@
TransformFundingInstrument,
)
from src.data_migration.transformation.subtask.transform_opportunity import TransformOpportunity
from src.data_migration.transformation.subtask.transform_opportunity_attachment import (
TransformOpportunityAttachment,
)
from src.data_migration.transformation.subtask.transform_opportunity_summary import (
TransformOpportunitySummary,
)
Expand All @@ -42,6 +45,9 @@ class TransformOracleDataTaskConfig(PydanticBaseEnvConfig):
enable_funding_category: bool = True # TRANSFORM_ORACLE_DATA_ENABLE_FUNDING_CATEGORY
enable_funding_instrument: bool = True # TRANSFORM_ORACLE_DATA_ENABLE_FUNDING_INSTRUMENT
enable_agency: bool = True # TRANSFORM_ORACLE_DATA_ENABLE_AGENCY
enable_opportunity_attachment: bool = (
False # TRANSFORM_ORACLE_DATA_ENABLE_OPPORTUNITY_ATTACHMENT
)


class TransformOracleDataTask(Task):
Expand Down Expand Up @@ -85,3 +91,6 @@ def run_task(self) -> None:
if self.transform_config.enable_agency:
TransformAgency(self).run()
TransformAgencyHierarchy(self).run()

if self.transform_config.enable_opportunity_attachment:
TransformOpportunityAttachment(self).run()
9 changes: 9 additions & 0 deletions api/src/data_migration/transformation/transform_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
OpportunityAssistanceListing,
OpportunitySummary,
)
from src.db.models.staging.attachment import TsynopsisAttachment
from src.db.models.staging.opportunity import Topportunity, TopportunityCfda
from src.db.models.staging.staging_base import StagingBase
from src.util import datetime_util
Expand Down Expand Up @@ -535,3 +536,11 @@ def get_log_extra_funding_instrument(source_funding_instrument: SourceFundingIns
"revision_number": getattr(source_funding_instrument, "revision_number", None),
"table_name": source_funding_instrument.__tablename__,
}


def get_log_extra_opportunity_attachment(source_attachment: TsynopsisAttachment) -> dict:
return {
"opportunity_id": source_attachment.opportunity_id,
"syn_att_id": source_attachment.syn_att_id,
"att_revision_number": source_attachment.att_revision_number,
}
64 changes: 64 additions & 0 deletions api/tests/src/data_migration/transformation/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
LinkOpportunitySummaryFundingInstrument,
Opportunity,
OpportunityAssistanceListing,
OpportunityAttachment,
OpportunitySummary,
)
from tests.conftest import BaseTestClass
Expand Down Expand Up @@ -330,6 +331,33 @@ def setup_agency(
return tgroups


def setup_opportunity_attachment(
create_existing: bool,
opportunity: Opportunity,
is_delete: bool = False,
is_already_processed: bool = False,
source_values: dict | None = None,
):
if source_values is None:
source_values = {}

synopsis_attachment = f.StagingTsynopsisAttachmentFactory.create(
opportunity=None,
opportunity_id=opportunity.opportunity_id,
is_deleted=is_delete,
already_transformed=is_already_processed,
**source_values,
)

if create_existing:
f.OpportunityAttachmentFactory.create(
attachment_id=synopsis_attachment.syn_att_id,
opportunity=opportunity,
)

return synopsis_attachment


def validate_matching_fields(
source, destination, fields: list[Tuple[str, str]], expect_all_to_match: bool
):
Expand Down Expand Up @@ -760,3 +788,39 @@ def validate_agency(
validate_matching_fields(
tgroup_map, agency.agency_contact_info, agency_contact_field_mapping, expect_values_to_match
)


def validate_opportunity_attachment(
db_session,
source_attachment,
expect_in_db: bool = True,
expect_values_to_match: bool = True,
):

opportunity_attachment = (
db_session.query(OpportunityAttachment)
.filter(OpportunityAttachment.attachment_id == source_attachment.syn_att_id)
.one_or_none()
)

if not expect_in_db:
assert opportunity_attachment is None
return

assert opportunity_attachment is not None
validate_matching_fields(
source_attachment,
opportunity_attachment,
[
("syn_att_id", "attachment_id"),
("opportunity_id", "opportunity_id"),
("mime_type", "mime_type"),
("file_name", "file_name"),
("file_desc", "file_description"),
("file_lob_size", "file_size_bytes"),
("creator_id", "created_by"),
("last_upd_id", "updated_by"),
("syn_att_folder_id", "legacy_folder_id"),
],
expect_values_to_match,
)
Loading