Skip to content

Commit

Permalink
Merge branch 'main' into features/36_github_action
Browse files Browse the repository at this point in the history
  • Loading branch information
Aldrian Harjati committed Aug 28, 2023
2 parents 1f191b5 + 810de45 commit 5ed7b87
Show file tree
Hide file tree
Showing 4 changed files with 118 additions and 3,439 deletions.
26 changes: 25 additions & 1 deletion src/validator/create_schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
with validations listed in phase 1 and phase 2."""

from pandera import DataFrameSchema
from pandera.errors import SchemaErrors
from phase_validations import get_phase_1_and_2_validations_for_lei
from schema_template import get_template

Expand All @@ -13,11 +14,34 @@


def get_schema_by_phase_for_lei(template: dict, phase: str, lei: str = None):
for column, validations in get_phase_1_and_2_validations_for_lei(lei):
for column in get_phase_1_and_2_validations_for_lei(lei):
validations = get_phase_1_and_2_validations_for_lei(lei)[column]
template[column].checks = validations[phase]
return DataFrameSchema(template)


def print_schema_errors(errors: SchemaErrors, phase: str):
for error in errors.schema_errors:
# Name of the column in the dataframe being checked
schema_error = error["error"]
column_name = schema_error.schema.name

# built in checks such as unique=True are different than custom
# checks unfortunately so the name needs to be accessed differently
try:
check_name = schema_error.check.name
# This will either be a boolean series or a single bool
check_output = schema_error.check_output
except AttributeError:
check_name = schema_error.check
# this is just a string that we'd need to parse manually
check_output = schema_error.args[0]

print(f"{phase} Validation `{check_name}` failed for column `{column_name}`")
print(check_output)
print("")


def get_phase_1_schema_for_lei(lei: str = None):
return get_schema_by_phase_for_lei(phase_1_template, "phase_1", lei)

Expand Down
36 changes: 16 additions & 20 deletions src/validator/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,12 @@
import sys

import pandas as pd
from create_schemas import (
get_phase_1_schema_for_lei,
get_phase_2_schema_for_lei,
print_schema_errors,
)
from pandera.errors import SchemaErrors
from schema import get_schema_for_lei


def csv_to_df(path: str) -> pd.DataFrame:
Expand All @@ -28,29 +32,21 @@ def run_validation_on_df(df: pd.DataFrame, lei: str) -> None:
print(df)
print("")

sblar_schema = get_schema_for_lei(lei)
phase_1_failure_cases = None

phase_1_sblar_schema = get_phase_1_schema_for_lei(lei)
try:
sblar_schema(df, lazy=True)
phase_1_sblar_schema(df, lazy=True)
except SchemaErrors as errors:
for error in errors.schema_errors:
# Name of the column in the dataframe being checked
column_name = error["error"].schema.name
phase_1_failure_cases = errors.failure_cases
print_schema_errors(errors, "Phase 1")

# built in checks such as unique=True are different than custom
# checks unfortunately so the name needs to be accessed differently
try:
check_name = error["error"].check.name
# This will either be a boolean series or a single bool
check_output = error["error"].check_output
except AttributeError:
check_name = error["error"].check
# this is just a string that we'd need to parse manually
check_output = error["error"].args[0]

print(f"Validation `{check_name}` failed for column `{column_name}`")
print(check_output)
print("")
if phase_1_failure_cases is None:
phase_2_sblar_schema = get_phase_2_schema_for_lei(lei)
try:
phase_2_sblar_schema(df, lazy=True)
except SchemaErrors as errors:
print_schema_errors(errors, "Phase 2")


if __name__ == "__main__":
Expand Down
114 changes: 77 additions & 37 deletions src/validator/phase_validations.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,29 +9,18 @@


import global_data
from check_functions import (
has_correct_length,
has_no_conditional_field_conflict,
has_valid_enum_pair,
has_valid_fieldset_pair,
has_valid_format,
has_valid_multi_field_value_count,
has_valid_value_count,
is_date,
is_date_after,
is_date_before_in_days,
is_date_in_range,
is_greater_than,
is_greater_than_or_equal_to,
is_less_than,
is_number,
is_unique_column,
is_unique_in_field,
is_valid_code,
is_valid_enum,
meets_multi_value_field_restriction,
string_contains,
)
from check_functions import (has_correct_length,
has_no_conditional_field_conflict,
has_valid_enum_pair, has_valid_fieldset_pair,
has_valid_format,
has_valid_multi_field_value_count,
has_valid_value_count, is_date, is_date_after,
is_date_before_in_days, is_date_in_range,
is_greater_than, is_greater_than_or_equal_to,
is_less_than, is_number, is_unique_column,
is_unique_in_field, is_valid_code, is_valid_enum,
meets_multi_value_field_restriction,
string_contains)
from checks import SBLCheck

# read and populate global naics code (this should be called only once)
Expand Down Expand Up @@ -75,9 +64,9 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None):
string_contains,
name="uid.invalid_uid_lei",
description=(
"The first 20 characters of the 'unique identifier' "
"should match the Legal Entity Identifier (LEI) for "
"the financial institution."
"The first 20 characters of the 'unique identifier' should"
" match the Legal Entity Identifier (LEI) for the financial"
" institution."
),
element_wise=True,
containing_value=lei,
Expand Down Expand Up @@ -951,8 +940,8 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None):
description=(
"When 'interest rate type' does not equal 1"
" (adjustable interest rate, no initial rate period),"
" 3 (initial rate period > 12 months, adjustable interest "
"rate), or 5 (initial rate period <= 12 months, variable "
" 3 (initial rate period > 12 months, adjustable interest"
" rate), or 5 (initial rate period <= 12 months, variable "
"interest rate), 'adjustable rate transaction: margin' must "
"be blank. When 'interest rate type' equals 1, 3, or 5, "
"'variable rate transaction: margin' must not be blank."
Expand Down Expand Up @@ -1037,8 +1026,8 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None):
max_value=300,
name="pricing_adj_index_name_ff.invalid_text_length",
description=(
"'Adjustable rate transaction: index name: other' must "
"not exceed 300 characters in length."
"'Adjustable rate transaction: index name: other' must not"
" exceed 300 characters in length."
),
),
],
Expand Down Expand Up @@ -1132,20 +1121,19 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None):
],
"phase_2": [],
},
"pricing_mca_addcost_flag": {"phase_1": [], "phase_2": []},
"pricing_mca_addcost": {"phase_1": [], "phase_2": []},
"pricing_prepenalty_allowed": {
"pricing_mca_addcost_flag": {
"phase_1": [
SBLCheck(
is_valid_enum,
name="pricing_prepenalty_allowed.invalid_enum_value",
name="pricing_mca_addcost_flag.invalid_enum_value",
description=(
"'Prepayment penalty could be imposed' must equal 1, 2, or 999."
"'MCA/sales-based: additional cost for merchant cash "
"advances or other sales-based financing: NA flag' "
"must equal 900 or 999."
),
element_wise=True,
accepted_values=[
"1",
"2",
"900",
"999",
],
),
Expand Down Expand Up @@ -1173,6 +1161,58 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None):
),
],
},
"pricing_mca_addcost": {
"phase_1": [
SBLCheck(
is_number,
name="pricing_mca_addcost.invalid_numeric_format",
description=(
"When present, 'MCA/sales-based: additional cost for "
"merchant cash advances or other sales-based financing' "
"must be a numeric value"
),
element_wise=True,
accept_blank=True,
),
],
"phase_2": [
SBLCheck(
has_no_conditional_field_conflict,
name="pricing_mca_addcost.conditional_field_conflict",
description=(
"When 'MCA/sales-based: additional cost for merchant "
"cash advances or other sales-based financing: NA flag' "
"does not equal 900 (applicable), 'MCA/sales-based: "
"additional cost for merchant cash advances or other "
"sales-based financing' must be blank. When 'MCA/sales-based: "
"additional cost for merchant cash advances or other "
"sales-based financing: NA flag' equals 900, MCA/sales-based: "
"additional cost for merchant cash advances or other "
"sales-based financing’ must not be blank."
),
groupby="pricing_mca_addcost_flag",
condition_values={"900"},
),
],
},
"pricing_prepenalty_allowed": {
"phase_1": [
SBLCheck(
is_valid_enum,
name="pricing_prepenalty_allowed.invalid_enum_value",
description=(
"'Prepayment penalty could be imposed' must equal 1, 2, or 999."
),
element_wise=True,
accepted_values=[
"1",
"2",
"999",
],
),
],
"phase_2": [],
},
"pricing_prepenalty_exists": {
"phase_1": [
SBLCheck(
Expand Down
Loading

0 comments on commit 5ed7b87

Please sign in to comment.