Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: Use less common name as match suffix #13

Merged
merged 2 commits into from
Nov 6, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions sqlcompyre/analysis/table_comparison.py
Original file line number Diff line number Diff line change
Expand Up @@ -252,12 +252,13 @@ def row_matches(self) -> RowMatches:
@cached_property
def column_matches(self) -> ColumnMatches:
"""A comparison between the column values of the two tables."""
MATCH_SUFFIX = "_zzz_match"
inner_join = self._inner_join()

# Query for testing equality of column values in matching rows
cases = [
sa.case((self._is_equal(left_column, right_column), 1.0), else_=0.0).label(
f"{left_column}_match"
f"{left_column}_{MATCH_SUFFIX}"
)
for left_column, right_column in self.column_name_mapping.items()
if left_column not in self.join_columns
Expand All @@ -268,10 +269,10 @@ def column_matches(self) -> ColumnMatches:
case_stmt = select(*cases).select_from(inner_join).subquery()
borchero marked this conversation as resolved.
Show resolved Hide resolved

# Compute fraction of matching values
cols_to_avg = [col for col in case_stmt.c if "_match" in col.name]
cols_to_avg = [col for col in case_stmt.c if f"_{MATCH_SUFFIX}" in col.name]
avgs = select(
*[
sa.func.avg(col).label(f"{col.name.replace('_match', '')}")
sa.func.avg(col).label(f"{col.name.replace(f'_{MATCH_SUFFIX}', '')}")
for col in cols_to_avg
]
)
Expand Down