diff --git a/.devcontainer/requirements.txt b/.devcontainer/requirements.txt index ffc429af..bde28b32 100644 --- a/.devcontainer/requirements.txt +++ b/.devcontainer/requirements.txt @@ -1,8 +1,6 @@ -kedro==0.18.7 black==23.3.0 ruff==0.0.259 pandas==1.5.3 pandera==0.14.5 -ipykernel -openpyxl -pytest \ No newline at end of file +pytest==7.4.0 +pytest-cov==4.1.0 \ No newline at end of file diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml new file mode 100644 index 00000000..fd60e93e --- /dev/null +++ b/.github/workflows/coverage.yml @@ -0,0 +1,31 @@ +name: Coverage + +on: + workflow_run: + workflows: ["Tests"] + types: + - completed + +jobs: + coverage: + name: Run tests & display coverage + runs-on: ubuntu-latest + if: github.event.workflow_run.event == 'pull_request' && github.event.workflow_run.conclusion == 'success' + permissions: + # Gives the action the necessary permissions for publishing new + # comments in pull requests. + pull-requests: write + # Gives the action the necessary permissions for editing existing + # comments (to avoid publishing multiple comments in the same PR) + contents: write + # Gives the action the necessary permissions for looking up the + # workflow that launched this workflow, and download the related + # artifact that contains the comment to be published + actions: read + steps: + - name: Post comment + uses: py-cov-action/python-coverage-comment-action@v3 + with: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + GITHUB_PR_RUN_ID: ${{ github.event.workflow_run.id }} + verbose: true \ No newline at end of file diff --git a/.github/workflows/linters.yml b/.github/workflows/linters.yml new file mode 100644 index 00000000..2f242985 --- /dev/null +++ b/.github/workflows/linters.yml @@ -0,0 +1,15 @@ +name: Linters + +on: [push] + +jobs: + black: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: psf/black@stable + ruff: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: chartboost/ruff-action@v1 \ No newline at end of file diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml new file mode 100644 index 00000000..b2e5558f --- /dev/null +++ b/.github/workflows/tests.yml @@ -0,0 +1,45 @@ +name: Tests + +on: + pull_request: + push: + branches: + - "main" + +jobs: + unit-tests: + runs-on: ubuntu-latest + permissions: + # Gives the action the necessary permissions for publishing new + # comments in pull requests. + pull-requests: write + # Gives the action the necessary permissions for pushing data to the + # python-coverage-comment-action branch, and for editing existing + # comments (to avoid publishing multiple comments in the same PR) + contents: write + steps: + - uses: actions/checkout@v3 + - name: Set up Python 3.11 + uses: actions/setup-python@v4 + with: + python-version: 3.11 + - name: Install dependencies + run: | + python -m pip install --upgrade pip + if [ -f .devcontainer/requirements.txt ]; then pip install -r .devcontainer/requirements.txt; fi + - name: Launch tests & generate report + run: pytest + - name: Coverage comment + id: coverage_comment + uses: py-cov-action/python-coverage-comment-action@v3 + with: + GITHUB_TOKEN: ${{ github.token }} + verbose: true + - name: Store Pull Request comment to be posted + uses: actions/upload-artifact@v3 + if: steps.coverage_comment.outputs.COMMENT_FILE_WRITTEN == 'true' + with: + # If you use a different name, update COMMENT_ARTIFACT_NAME accordingly + name: python-coverage-comment-action + # If you use a different name, update COMMENT_FILENAME accordingly + path: python-coverage-comment-action.txt diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 00000000..8450e219 --- /dev/null +++ b/setup.cfg @@ -0,0 +1,10 @@ +[tool:pytest] +addopts = + --cov-report term-missing --cov-branch --cov-report xml --cov-report term + --cov=src -vv --strict-markers -rfE + +testpaths = + src/tests + +[coverage:run] +relative_files = true \ No newline at end of file diff --git a/src/tests/__init__.py b/src/tests/__init__.py index 6e9e654f..238d07e8 100644 --- a/src/tests/__init__.py +++ b/src/tests/__init__.py @@ -3,4 +3,4 @@ ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) -sys.path.append(os.path.join(ROOT_DIR, "validator")) \ No newline at end of file +sys.path.append(os.path.join(ROOT_DIR, "validator")) diff --git a/src/tests/test_check_functions.py b/src/tests/test_check_functions.py index 117e8a44..e40caeef 100644 --- a/src/tests/test_check_functions.py +++ b/src/tests/test_check_functions.py @@ -148,10 +148,6 @@ def test_outside_maxlength(self): result = has_valid_multi_field_value_count({"4": self.series}, 1) assert result.values == [False] - def test_inside_maxlength(self): - result = has_valid_multi_field_value_count({"4": self.series}, 5) - assert result.values == [True] - def test_valid_length_with_non_blank(self): result = has_valid_multi_field_value_count( {"4;1": self.multiple_values_series}, 5 @@ -302,7 +298,8 @@ def test_conditional_field_conflict_correct(self): def test_conditional_field_conflict_incorrect(self): # if ct_loan_term_flag != 900 then ct_loan_term must be blank - # in this test, ct_loan_term_flag is not 900 and ct_loan_term is NOT blank, so must return False + # in this test, ct_loan_term_flag is not 900 and ct_loan_term is + # NOT blank, so must return False series = pd.Series(["36"], name="ct_loan_term", index=[2]) condition_values: set[str] = {"900"} @@ -656,15 +653,15 @@ def test_with_valid_series(self): def test_with_multiple_valid_series(self): result = is_unique_column({"ABC123": self.series, "DEF456": self.other_series}) - assert result.values[0] == True and result.values[1] == True + assert result.values[0] and result.values[1] def test_with_invalid_series(self): result = is_unique_column({"ABC123": self.invalid_series}) - assert result.values.all() == False + assert not result.values.all() def test_with_multiple_items_series(self): result = is_unique_column({"GHI123": self.multi_invalid_series}) - assert result.values.all() == False + assert not result.values.all() def test_with_multiple_invalid_series(self): result = is_unique_column( @@ -672,11 +669,11 @@ def test_with_multiple_invalid_series(self): ) # ALL rows should be FALSE assert ( - result.values[0] == False - and result.values[1] == False - and result.values[2] == False - and result.values[3] == False - and result.values[4] == False + not result.values[0] + and not result.values[1] + and not result.values[2] + and not result.values[3] + and not result.values[4] ) def test_with_multiple_mix_series(self): @@ -684,11 +681,7 @@ def test_with_multiple_mix_series(self): {"ABC123": self.invalid_series, "DEF456": self.other_series} ) # first two rows should be FALSE and last Row should be TRUE - assert ( - result.values[0] == False - and result.values[1] == False - and result.values[2] == True - ) + assert not result.values[0] and not result.values[1] and result.values[2] def test_with_blank_value_series(self): result = is_unique_column({"": self.blank_value_series}) @@ -795,7 +788,7 @@ def test_with_incorrect_is_equal_and_not_equal_conditions(self): "field2": (1, True, "999"), "field3": (2, True, "0"), "field4": (3, False, ""), - "field4": (4, False, ""), + "field5": (4, False, ""), } series = pd.Series(["0"], name="num_principal_owners", index=[1]) diff --git a/src/validator/global_data.py b/src/validator/global_data.py index 69c1fc40..422851b2 100644 --- a/src/validator/global_data.py +++ b/src/validator/global_data.py @@ -1,5 +1,4 @@ import os -import re import sys import pandas as pd diff --git a/src/validator/phase_validations.py b/src/validator/phase_validations.py index 6c88afac..d5123ded 100644 --- a/src/validator/phase_validations.py +++ b/src/validator/phase_validations.py @@ -3,10 +3,6 @@ This mapping is used to populate the schema template object and create an instance of a PanderaSchema object for phase 1 and phase 2.""" -#! NOTE: "pricing_adj_margin", "pricing_adj_index_name": "pricing_adj_index_name_ff", -# and "pricing_adj_index_value" have been renamed. They used to be called -# pricing_var_xyz but are now called pricing_adj_xyz - import global_data from check_functions import ( @@ -138,8 +134,8 @@ def get_phase_1_and_2_validations_for_lei(lei: str = None): is_valid_enum, name="ct_credit_product.invalid_enum_value", description=( - "'Credit product' must equal 1, 2, 3, 4, 5, 6, 7, 8, " - "977, or 988." + "'Credit product' must equal 1, 2, 3, 4, 5, 6, " + "7, 8, 977, or 988." ), element_wise=True, accepted_values=[ diff --git a/tools/process_naics.py b/tools/process_naics.py index 8ac162ae..b202407c 100644 --- a/tools/process_naics.py +++ b/tools/process_naics.py @@ -4,8 +4,8 @@ import pandas as pd -ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) # noqa: E402 -sys.path.append(ROOT_DIR) # noqa: E402 +ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) # noqa: E402 +sys.path.append(ROOT_DIR) # noqa: E402 import config # noqa: E402 @@ -21,30 +21,29 @@ CSV_PATH = config.NAICS_CSV_PATH CODE_COL = config.NAICS_CODE_COL TITLE_COL = config.NAICS_TITLE_COL - - #check for paths + + # check for paths if not os.path.isfile(EXCEL_PATH): error_msg = "Input excel file not existed" raise FileNotFoundError(error_msg) if os.path.isfile(CSV_PATH): error_msg = "Output csv file existed" raise FileExistsError(error_msg) - + df = pd.read_excel(EXCEL_PATH, dtype=str, na_filter=False) - - #add header + + # add header result = [["code", "title"]] - - #read excel file + + # read excel file # and create csv data list for index, row in df.iterrows(): code = str(row[CODE_COL]) if len(code) == 3: - a_row = [code , str(row[TITLE_COL])] + a_row = [code, str(row[TITLE_COL])] result.append(a_row) - - #output data to csv file - with open(CSV_PATH, 'w') as f: + + # output data to csv file + with open(CSV_PATH, "w") as f: writer = csv.writer(f) writer.writerows(result) - \ No newline at end of file