Skip to content

Commit

Permalink
Added authorized mods replacing MedianAge values: s/2002/6/
Browse files Browse the repository at this point in the history
  • Loading branch information
tptignor committed Sep 13, 2024
1 parent 2ce120b commit d5f2555
Show file tree
Hide file tree
Showing 3 changed files with 240 additions and 213 deletions.
2 changes: 2 additions & 0 deletions common/src/main/pyhmda/parse_census_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@ def conv_scf(val: str) -> str:
cfconverters = {k: v[1] for k, v in census_file_columns.items()}
parsed_census_df = pd.read_csv(args.censusfile, sep=',', header=None, usecols=cfkeys,
converters=cfconverters)[cfkeys].rename(cfcolnames, axis=1)
parsed_census_df = apply_authorized_modifications(census_file_authorized_modifications,
parsed_census_df)
logging.info(f"Parsed {args.censusfile}")

root, ext = os.path.splitext(args.delineationfile)
Expand Down
25 changes: 25 additions & 0 deletions common/src/main/pyhmda/utils.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@

from datetime import datetime
import logging
import pandas as pd
import re
Expand Down Expand Up @@ -60,6 +61,8 @@ def conv_optpct(val: str) -> Any:


def prepare_file(read_file: str, write_file: str, pattern: str, expected_match: float=0.95) -> None:
"""Performs preprocessing on source files to insure clean reads by pandas.
"""
lc, mc = 0, 0
with open(read_file, 'r') as rf:
with open(write_file, 'w') as wf:
Expand All @@ -71,3 +74,25 @@ def prepare_file(read_file: str, write_file: str, pattern: str, expected_match:
if mc < expected_match * lc:
sys.exit(f"{read_file} pattern matched only {mc} of {lc} lines")
logging.info(f"Prepared file {write_file}")


def apply_authorized_modifications(modmap: dict, df: pd.DataFrame) -> pd.DataFrame:
"""Applies modifications to non-CFPB-owned source data files on directions of outside agencies.
"""
for mod_date in sorted(modmap.keys()):
df = modmap[mod_date](df)
return df


# Census Flat File Modifications

def replace_MedianAge_2002_values(df: pd.DataFrame) -> pd.DataFrame:
df.loc[df["MedianAge"] == 2002, "MedianAge"] = 6
return df


# Modifications to published Census Flat Files directed by the US Census Bureau.
census_file_authorized_modifications = {
datetime(2024, 9, 1) : replace_MedianAge_2002_values
}

Loading

0 comments on commit d5f2555

Please sign in to comment.