Skip to content

Commit

Permalink
Reverting census parser output csv header
Browse files Browse the repository at this point in the history
---

(.venv) MP-LF4T2CC2CX:main/pyhmda tignort$ diff CensusFlatFile2024-parsed.txt CensusFlatFile2024-parsed.txt.BEFORE
1c1
< Collection Year|MSA/MD|State|County|Census Tract|FFIEC Median Family Income|Population|Minority Population %|Number of Owner Occupied Units|Number of 1 to 4 Family Units|Tract MFI|Tract to MSA Income %|Median Age|Small County|MSA/MD Name
---
> CollectionYear|CBSACode|FIPSStateCode|FIPSCountyCode|CensusTract|FFIECMedianFamilyIncome|Population|MinorityPopulationPct|NumOwnerOccupiedUnits|Num1To4FamilyUnits|TractMFI|TractToMSAIncomePct|MedianAge|SmallCounty|MSAOrMDTitle
(.venv) MP-LF4T2CC2CX:main/pyhmda tignort$
(.venv) MP-LF4T2CC2CX:main/pyhmda tignort$ head -1 CensusFlatFile2024-parsed.txt > reverted-header.txt
(.venv) MP-LF4T2CC2CX:main/pyhmda tignort$ cat reverted-header.txt
Collection Year|MSA/MD|State|County|Census Tract|FFIEC Median Family Income|Population|Minority Population %|Number of Owner Occupied Units|Number of 1 to 4 Family Units|Tract MFI|Tract to MSA Income %|Median Age|Small County|MSA/MD Name
(.venv) MP-LF4T2CC2CX:main/pyhmda tignort$ cat expected.txt
Collection Year|MSA/MD|State|County|Census Tract|FFIEC Median Family Income|Population|Minority Population %|Number of Owner Occupied Units|Number of 1 to 4 Family Units|Tract MFI|Tract to MSA Income %|Median Age|Small County|MSA/MD Name
(.venv) MP-LF4T2CC2CX:main/pyhmda tignort$ diff reverted-header.txt expected.txt
(.venv) MP-LF4T2CC2CX:main/pyhmda tignort$
  • Loading branch information
tptignor committed Oct 30, 2024
1 parent 70f7b05 commit d2b8c1e
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 15 deletions.
27 changes: 13 additions & 14 deletions common/src/main/pyhmda/parse_census_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,15 +24,14 @@ def conv_scf(val: str) -> str:
if val not in ["T", "S", "I"]: raise ValueError(f"invalid scf: \"{val}\"")
return val


census_file_columns = {
0: ["CollectionYear", conv_num], 1: ["CBSACode", conv_dgstr], 2: ["FIPSStateCode", conv_dgstr],
3: ["FIPSCountyCode", conv_dgstr], 4: ["CensusTract", conv_dgstr],
13: ["FFIECMedianFamilyIncome", conv_num], 22: ["Population", conv_optnum],
28: ["MinorityPopulationPct", conv_optpct], 879: ["NumOwnerOccupiedUnits", conv_optnum],
899: ["Num1To4FamilyUnits", conv_optnum], 585: ["TractMFI", conv_optnum],
12: ["TractToMSAIncomePct", conv_optpct], 1057: ["MedianAge", conv_optnum],
6: ["SmallCounty", conv_scf]
0: ["Collection Year", conv_num], 1: ["MSA/MD", conv_dgstr], 2: ["State", conv_dgstr],
3: ["County", conv_dgstr], 4: ["Census Tract", conv_dgstr],
13: ["FFIEC Median Family Income", conv_num], 22: ["Population", conv_optnum],
28: ["Minority Population %", conv_optpct], 879: ["Number of Owner Occupied Units", conv_optnum],
899: ["Number of 1 to 4 Family Units", conv_optnum], 585: ["Tract MFI", conv_optnum],
12: ["Tract to MSA Income %", conv_optpct], 1057: ["Median Age", conv_optnum],
6: ["Small County", conv_scf]
}
cfkeys = census_file_columns.keys()
cfcolnames = {k: v[0] for k, v in census_file_columns.items()}
Expand All @@ -50,7 +49,7 @@ def conv_scf(val: str) -> str:

delineation_file_columns = {
3: ["CBSATitle", conv_str], 5: ["MDTitle", conv_optstr],
9: ["FIPSStateCode", conv_dgstr], 10: ["FIPSCountyCode", conv_dgstr]
9: ["State", conv_dgstr], 10: ["County", conv_dgstr]
}
dfkeys = delineation_file_columns.keys()
dfcolnames = {k: v[0] for k, v in delineation_file_columns.items()}
Expand All @@ -59,17 +58,17 @@ def conv_scf(val: str) -> str:
converters=dfconverters).rename(dfcolnames, axis=1)
logging.info(f"Parsed {prepared_file}")

parsed_delin_df["MSAOrMDTitle"] = parsed_delin_df.apply(lambda row:
parsed_delin_df["MSA/MD Name"] = parsed_delin_df.apply(lambda row:
row.MDTitle if pd.notna(row.MDTitle) else row.CBSATitle, axis=1)
parsed_delin_df.drop(columns=["CBSATitle", "MDTitle"], inplace=True)
logging.info("Calculated MSAOrMDTitles")
logging.info("Calculated MSA/MD Names")

output_file = args.output_file if args.output_file \
else f"{os.path.splitext(args.censusfile)[0]}-parsed.txt"
output_df = parsed_census_df.merge(parsed_delin_df,
how="left", on=["FIPSStateCode", "FIPSCountyCode"])
output_df["MSAOrMDTitle"] = output_df.apply(lambda row:
"" if row.CBSACode == "99999" else row.MSAOrMDTitle, axis=1)
how="left", on=["State", "County"])
output_df["MSA/MD Name"] = output_df.apply(lambda row:
"" if row["MSA/MD"] == "99999" else row["MSA/MD Name"], axis=1)
output_df.to_csv(output_file, sep='|', index=False)
logging.info(f"Wrote output file {output_file}")
os.remove(prepared_file)
2 changes: 1 addition & 1 deletion common/src/main/pyhmda/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ def apply_authorized_modifications(modmap: dict, df: pd.DataFrame) -> pd.DataFra
# Census Flat File Modifications

def replace_MedianAge_2002_values(df: pd.DataFrame) -> pd.DataFrame:
df.loc[df["MedianAge"] == 2002, "MedianAge"] = 6
df.loc[df["Median Age"] == 2002, "Median Age"] = 6
return df


Expand Down

0 comments on commit d2b8c1e

Please sign in to comment.