From bc43b3dbad0de6bbd8d38d8b3fd6668ed7ea218f Mon Sep 17 00:00:00 2001 From: Marius Merkle Date: Wed, 6 Nov 2024 12:56:43 +0100 Subject: [PATCH 01/11] fix: Column name mapping in missing left/right --- sqlcompyre/analysis/schema_comparison.py | 1 + sqlcompyre/analysis/table_comparison.py | 1 + sqlcompyre/results/names.py | 22 +++++++++++++++++++--- 3 files changed, 21 insertions(+), 3 deletions(-) diff --git a/sqlcompyre/analysis/schema_comparison.py b/sqlcompyre/analysis/schema_comparison.py index 1b3d10f..070675e 100644 --- a/sqlcompyre/analysis/schema_comparison.py +++ b/sqlcompyre/analysis/schema_comparison.py @@ -77,6 +77,7 @@ def table_names(self) -> Names: return Names( left=set(self.left_tables.keys()), right=set(self.right_tables.keys()), + column_name_mapping=None, ignore_casing=self.ignore_casing, ) diff --git a/sqlcompyre/analysis/table_comparison.py b/sqlcompyre/analysis/table_comparison.py index 181b866..8ace769 100644 --- a/sqlcompyre/analysis/table_comparison.py +++ b/sqlcompyre/analysis/table_comparison.py @@ -156,6 +156,7 @@ def column_names(self) -> Names: return Names( left={col.name for col in self.left_table.columns}, right={col.name for col in self.right_table.columns}, + column_name_mapping=self.column_name_mapping, ignore_casing=self.ignore_casing, ) diff --git a/sqlcompyre/results/names.py b/sqlcompyre/results/names.py index ab4a19b..0dc5441 100644 --- a/sqlcompyre/results/names.py +++ b/sqlcompyre/results/names.py @@ -7,11 +7,18 @@ class Names: """Investigate the names of database objects.""" - def __init__(self, left: set[str], right: set[str], ignore_casing: bool): + def __init__( + self, + left: set[str], + right: set[str], + column_name_mapping: dict[str, str] | None, + ignore_casing: bool, + ): """ Args: left: Names from the "left" database object. right: Names from the "right" database object. + column_name_mapping: Mapping of column names from the "left" to the "right" database object. ignore_casing: Whether to ignore casing for name equality. """ if ignore_casing: @@ -20,6 +27,7 @@ def __init__(self, left: set[str], right: set[str], ignore_casing: bool): else: self._set_left = left self._set_right = right + self.column_name_mapping = column_name_mapping @cached_property def left(self) -> list[str]: @@ -39,12 +47,20 @@ def in_common(self) -> list[str]: @cached_property def missing_left(self) -> list[str]: """Ordered list of names provided only by the "right" database object.""" - return sorted(self._set_right - self._set_left) + renamed_keys = ( + set(self.column_name_mapping.keys()) if self.column_name_mapping else set() + ) + return sorted(self._set_right - self._set_left - renamed_keys) @cached_property def missing_right(self) -> list[str]: """Ordered list of names provided only by the "left" database object.""" - return sorted(self._set_left - self._set_right) + renamed_keys = ( + set(self.column_name_mapping.values()) + if self.column_name_mapping + else set() + ) + return sorted(self._set_left - self._set_right - renamed_keys) @cached_property def equal(self) -> bool: From b88f5d141882cef86e2cc6b1afc009f88b0e0a8c Mon Sep 17 00:00:00 2001 From: Marius Merkle Date: Wed, 6 Nov 2024 13:00:58 +0100 Subject: [PATCH 02/11] fix pre-commit --- tests/report/formatters/conftest.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/report/formatters/conftest.py b/tests/report/formatters/conftest.py index 89df017..5cd932d 100644 --- a/tests/report/formatters/conftest.py +++ b/tests/report/formatters/conftest.py @@ -20,7 +20,10 @@ def metadata_description() -> Metadata: @pytest.fixture() def names() -> Names: return Names( - left={"hello", "world"}, right={"hello", "hi", "there"}, ignore_casing=False + left={"hello", "world"}, + right={"hello", "hi", "there"}, + column_name_mapping=None, + ignore_casing=False, ) From 40dd6802db791779661ee4a3b8ea8ab74731f3b8 Mon Sep 17 00:00:00 2001 From: Marius Merkle Date: Wed, 6 Nov 2024 13:12:47 +0100 Subject: [PATCH 03/11] add test case --- .../table_comparison/test_column_names.py | 40 +++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/tests/analysis/table_comparison/test_column_names.py b/tests/analysis/table_comparison/test_column_names.py index a2705ad..4b7beee 100644 --- a/tests/analysis/table_comparison/test_column_names.py +++ b/tests/analysis/table_comparison/test_column_names.py @@ -28,6 +28,46 @@ def test_column_names_equal_missing_2(engine: Engine, table_students: sa.Table): assert not comparison.column_names.missing_right +def test_column_names_equal_missing_renamed_1( + engine: Engine, + table_students_modified_2: sa.Table, + table_students_renamed: sa.Table, +): + # Check that there are no missing columns in two different tables with renamings + comparison = sc.compare_tables( + engine, + table_students_modified_2, + table_students_renamed, + column_name_mapping={ + "id": "id_v2", + "name": "name_v2", + "age": "age_v2", + "gpa": "gpa_v2", + }, + ) + assert not comparison.column_names.missing_left + + +def test_column_names_equal_missing_renamed_2( + engine: Engine, + table_students_modified_2: sa.Table, + table_students_renamed: sa.Table, +): + # Check that there are no missing columns in two different tables with renamings + comparison = sc.compare_tables( + engine, + table_students_modified_2, + table_students_renamed, + column_name_mapping={ + "id": "id_v2", + "name": "name_v2", + "age": "age_v2", + "gpa": "gpa_v2", + }, + ) + assert not comparison.column_names.missing_right + + def test_column_names_unequal_same_names( engine: Engine, table_students: sa.Table, From 050e39614c52fbecd1ffbdb8e1eee716631d2666 Mon Sep 17 00:00:00 2001 From: Marius Merkle Date: Wed, 6 Nov 2024 13:16:32 +0100 Subject: [PATCH 04/11] switch keys and values --- sqlcompyre/results/names.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sqlcompyre/results/names.py b/sqlcompyre/results/names.py index 0dc5441..d9b5b26 100644 --- a/sqlcompyre/results/names.py +++ b/sqlcompyre/results/names.py @@ -48,7 +48,9 @@ def in_common(self) -> list[str]: def missing_left(self) -> list[str]: """Ordered list of names provided only by the "right" database object.""" renamed_keys = ( - set(self.column_name_mapping.keys()) if self.column_name_mapping else set() + set(self.column_name_mapping.values()) + if self.column_name_mapping + else set() ) return sorted(self._set_right - self._set_left - renamed_keys) @@ -56,9 +58,7 @@ def missing_left(self) -> list[str]: def missing_right(self) -> list[str]: """Ordered list of names provided only by the "left" database object.""" renamed_keys = ( - set(self.column_name_mapping.values()) - if self.column_name_mapping - else set() + set(self.column_name_mapping.keys()) if self.column_name_mapping else set() ) return sorted(self._set_left - self._set_right - renamed_keys) From 7809009b6146e70e4f7bd708a61141119df4a027 Mon Sep 17 00:00:00 2001 From: Marius Merkle Date: Wed, 6 Nov 2024 13:19:51 +0100 Subject: [PATCH 05/11] adjust missing_left/_right values in test_partly_renaming --- tests/analysis/table_comparison/test_column_matching.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/analysis/table_comparison/test_column_matching.py b/tests/analysis/table_comparison/test_column_matching.py index 1465845..d6e096e 100644 --- a/tests/analysis/table_comparison/test_column_matching.py +++ b/tests/analysis/table_comparison/test_column_matching.py @@ -89,8 +89,8 @@ def test_partly_renaming( column_name_mapping={"age": "age_v2", "gpa": "gpa_v2"}, ) assert len(comparison.column_names.in_common) == 2 - assert len(comparison.column_names.missing_left) == 2 - assert len(comparison.column_names.missing_right) == 2 + assert len(comparison.column_names.missing_left) == 0 + assert len(comparison.column_names.missing_right) == 0 assert comparison.row_counts.diff == 1 # Ensure that all columns are matched, one is primary key, 3 per table left assert pd.read_sql(comparison.row_matches.joined_equal, con=engine).shape[1] == 7 @@ -102,8 +102,8 @@ def test_partly_renaming( column_name_mapping={"age_v2": "age", "gpa_v2": "gpa"}, ) assert len(comparison.column_names.in_common) == 2 - assert len(comparison.column_names.missing_left) == 2 - assert len(comparison.column_names.missing_right) == 2 + assert len(comparison.column_names.missing_left) == 0 + assert len(comparison.column_names.missing_right) == 0 assert comparison.row_counts.diff == 1 # Ensure that all columns are matched, one is primary key, 3 per table left assert pd.read_sql(comparison.row_matches.joined_equal, con=engine).shape[1] == 7 From a610fcdbf10c71655530f6cc0154b3f3b9a0c726 Mon Sep 17 00:00:00 2001 From: Marius Merkle Date: Wed, 6 Nov 2024 13:27:08 +0100 Subject: [PATCH 06/11] fine-tune implementation --- sqlcompyre/analysis/schema_comparison.py | 2 +- sqlcompyre/analysis/table_comparison.py | 2 +- sqlcompyre/results/names.py | 31 ++++++++++++++---------- 3 files changed, 20 insertions(+), 15 deletions(-) diff --git a/sqlcompyre/analysis/schema_comparison.py b/sqlcompyre/analysis/schema_comparison.py index 070675e..a8916cd 100644 --- a/sqlcompyre/analysis/schema_comparison.py +++ b/sqlcompyre/analysis/schema_comparison.py @@ -77,7 +77,7 @@ def table_names(self) -> Names: return Names( left=set(self.left_tables.keys()), right=set(self.right_tables.keys()), - column_name_mapping=None, + name_mapping=None, ignore_casing=self.ignore_casing, ) diff --git a/sqlcompyre/analysis/table_comparison.py b/sqlcompyre/analysis/table_comparison.py index 8ace769..58b381e 100644 --- a/sqlcompyre/analysis/table_comparison.py +++ b/sqlcompyre/analysis/table_comparison.py @@ -156,7 +156,7 @@ def column_names(self) -> Names: return Names( left={col.name for col in self.left_table.columns}, right={col.name for col in self.right_table.columns}, - column_name_mapping=self.column_name_mapping, + name_mapping=self.column_name_mapping, ignore_casing=self.ignore_casing, ) diff --git a/sqlcompyre/results/names.py b/sqlcompyre/results/names.py index d9b5b26..916f9cf 100644 --- a/sqlcompyre/results/names.py +++ b/sqlcompyre/results/names.py @@ -11,14 +11,14 @@ def __init__( self, left: set[str], right: set[str], - column_name_mapping: dict[str, str] | None, + name_mapping: dict[str, str] | None, ignore_casing: bool, ): """ Args: left: Names from the "left" database object. right: Names from the "right" database object. - column_name_mapping: Mapping of column names from the "left" to the "right" database object. + name_mapping: Mapping from the "left" to the "right" database object. ignore_casing: Whether to ignore casing for name equality. """ if ignore_casing: @@ -27,7 +27,10 @@ def __init__( else: self._set_left = left self._set_right = right - self.column_name_mapping = column_name_mapping + self.name_mapping = name_mapping + self.inverse_name_mapping = ( + {v: k for k, v in name_mapping.items()} if name_mapping else {} + ) @cached_property def left(self) -> list[str]: @@ -47,20 +50,22 @@ def in_common(self) -> list[str]: @cached_property def missing_left(self) -> list[str]: """Ordered list of names provided only by the "right" database object.""" - renamed_keys = ( - set(self.column_name_mapping.values()) - if self.column_name_mapping - else set() - ) - return sorted(self._set_right - self._set_left - renamed_keys) + if self.name_mapping: + right_renamed = { + self.inverse_name_mapping.get(k, k) for k in self._set_right + } + else: + right_renamed = self._set_right + return sorted(right_renamed - self._set_left) @cached_property def missing_right(self) -> list[str]: """Ordered list of names provided only by the "left" database object.""" - renamed_keys = ( - set(self.column_name_mapping.keys()) if self.column_name_mapping else set() - ) - return sorted(self._set_left - self._set_right - renamed_keys) + if self.name_mapping: + left_renamed = {self.name_mapping.get(k, k) for k in self._set_left} + else: + left_renamed = self._set_left + return sorted(self._set_left - left_renamed) @cached_property def equal(self) -> bool: From 045dee4efca23b6e7271e83c2d2021e6faefb9e2 Mon Sep 17 00:00:00 2001 From: Marius Merkle Date: Wed, 6 Nov 2024 13:30:22 +0100 Subject: [PATCH 07/11] fix --- sqlcompyre/results/names.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sqlcompyre/results/names.py b/sqlcompyre/results/names.py index 916f9cf..427db2c 100644 --- a/sqlcompyre/results/names.py +++ b/sqlcompyre/results/names.py @@ -65,7 +65,7 @@ def missing_right(self) -> list[str]: left_renamed = {self.name_mapping.get(k, k) for k in self._set_left} else: left_renamed = self._set_left - return sorted(self._set_left - left_renamed) + return sorted(left_renamed - self._set_right) @cached_property def equal(self) -> bool: From 5f4dc733927e474186c893b1d9ca400791c9f679 Mon Sep 17 00:00:00 2001 From: Marius Merkle Date: Wed, 6 Nov 2024 13:33:29 +0100 Subject: [PATCH 08/11] fix kwarg --- tests/report/formatters/conftest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/report/formatters/conftest.py b/tests/report/formatters/conftest.py index 5cd932d..06e866a 100644 --- a/tests/report/formatters/conftest.py +++ b/tests/report/formatters/conftest.py @@ -22,7 +22,7 @@ def names() -> Names: return Names( left={"hello", "world"}, right={"hello", "hi", "there"}, - column_name_mapping=None, + name_mapping=None, ignore_casing=False, ) From 7ab470b87b5d78db43305996dc18e2868682b79f Mon Sep 17 00:00:00 2001 From: Marius Merkle Date: Wed, 6 Nov 2024 13:36:49 +0100 Subject: [PATCH 09/11] simplify --- sqlcompyre/results/names.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/sqlcompyre/results/names.py b/sqlcompyre/results/names.py index 427db2c..a4a39cd 100644 --- a/sqlcompyre/results/names.py +++ b/sqlcompyre/results/names.py @@ -27,8 +27,8 @@ def __init__( else: self._set_left = left self._set_right = right - self.name_mapping = name_mapping - self.inverse_name_mapping = ( + self._name_mapping = name_mapping + self._inverse_name_mapping = ( {v: k for k, v in name_mapping.items()} if name_mapping else {} ) @@ -50,22 +50,22 @@ def in_common(self) -> list[str]: @cached_property def missing_left(self) -> list[str]: """Ordered list of names provided only by the "right" database object.""" - if self.name_mapping: + if self._name_mapping: right_renamed = { - self.inverse_name_mapping.get(k, k) for k in self._set_right + self._inverse_name_mapping.get(k, k) for k in self._set_right } + return sorted(right_renamed - self._set_left) else: - right_renamed = self._set_right - return sorted(right_renamed - self._set_left) + return sorted(self._set_right - self._set_left) @cached_property def missing_right(self) -> list[str]: """Ordered list of names provided only by the "left" database object.""" - if self.name_mapping: - left_renamed = {self.name_mapping.get(k, k) for k in self._set_left} + if self._name_mapping: + left_renamed = {self._name_mapping.get(k, k) for k in self._set_left} + return sorted(left_renamed - self._set_right) else: - left_renamed = self._set_left - return sorted(left_renamed - self._set_right) + return sorted(self._set_left - self._set_right) @cached_property def equal(self) -> bool: From e43d78c2ee2261ce23373dd4b4bb496aa7ea710e Mon Sep 17 00:00:00 2001 From: Marius Merkle Date: Thu, 7 Nov 2024 12:22:08 +0100 Subject: [PATCH 10/11] feedback from OB --- sqlcompyre/results/names.py | 4 +- .../table_comparison/test_column_names.py | 40 ------------------- 2 files changed, 2 insertions(+), 42 deletions(-) diff --git a/sqlcompyre/results/names.py b/sqlcompyre/results/names.py index a4a39cd..2bba57f 100644 --- a/sqlcompyre/results/names.py +++ b/sqlcompyre/results/names.py @@ -50,7 +50,7 @@ def in_common(self) -> list[str]: @cached_property def missing_left(self) -> list[str]: """Ordered list of names provided only by the "right" database object.""" - if self._name_mapping: + if self._name_mapping is not None: right_renamed = { self._inverse_name_mapping.get(k, k) for k in self._set_right } @@ -61,7 +61,7 @@ def missing_left(self) -> list[str]: @cached_property def missing_right(self) -> list[str]: """Ordered list of names provided only by the "left" database object.""" - if self._name_mapping: + if self._name_mapping is not None: left_renamed = {self._name_mapping.get(k, k) for k in self._set_left} return sorted(left_renamed - self._set_right) else: diff --git a/tests/analysis/table_comparison/test_column_names.py b/tests/analysis/table_comparison/test_column_names.py index 4b7beee..a2705ad 100644 --- a/tests/analysis/table_comparison/test_column_names.py +++ b/tests/analysis/table_comparison/test_column_names.py @@ -28,46 +28,6 @@ def test_column_names_equal_missing_2(engine: Engine, table_students: sa.Table): assert not comparison.column_names.missing_right -def test_column_names_equal_missing_renamed_1( - engine: Engine, - table_students_modified_2: sa.Table, - table_students_renamed: sa.Table, -): - # Check that there are no missing columns in two different tables with renamings - comparison = sc.compare_tables( - engine, - table_students_modified_2, - table_students_renamed, - column_name_mapping={ - "id": "id_v2", - "name": "name_v2", - "age": "age_v2", - "gpa": "gpa_v2", - }, - ) - assert not comparison.column_names.missing_left - - -def test_column_names_equal_missing_renamed_2( - engine: Engine, - table_students_modified_2: sa.Table, - table_students_renamed: sa.Table, -): - # Check that there are no missing columns in two different tables with renamings - comparison = sc.compare_tables( - engine, - table_students_modified_2, - table_students_renamed, - column_name_mapping={ - "id": "id_v2", - "name": "name_v2", - "age": "age_v2", - "gpa": "gpa_v2", - }, - ) - assert not comparison.column_names.missing_right - - def test_column_names_unequal_same_names( engine: Engine, table_students: sa.Table, From 1e030f1e5b46658bcb2b925cbacff2ce4aecc6f0 Mon Sep 17 00:00:00 2001 From: Marius Merkle Date: Thu, 7 Nov 2024 12:52:29 +0100 Subject: [PATCH 11/11] undo is not None --- sqlcompyre/results/names.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sqlcompyre/results/names.py b/sqlcompyre/results/names.py index 2bba57f..a4a39cd 100644 --- a/sqlcompyre/results/names.py +++ b/sqlcompyre/results/names.py @@ -50,7 +50,7 @@ def in_common(self) -> list[str]: @cached_property def missing_left(self) -> list[str]: """Ordered list of names provided only by the "right" database object.""" - if self._name_mapping is not None: + if self._name_mapping: right_renamed = { self._inverse_name_mapping.get(k, k) for k in self._set_right } @@ -61,7 +61,7 @@ def missing_left(self) -> list[str]: @cached_property def missing_right(self) -> list[str]: """Ordered list of names provided only by the "left" database object.""" - if self._name_mapping is not None: + if self._name_mapping: left_renamed = {self._name_mapping.get(k, k) for k in self._set_left} return sorted(left_renamed - self._set_right) else: