From df96667d040c1900b1a930ff9f767fcd3da76868 Mon Sep 17 00:00:00 2001 From: Alexander Bruy Date: Thu, 24 Mar 2022 17:50:45 +0200 Subject: [PATCH] more sophisticated validation of the schema changes based on the geodiff representation of the database schema. --- Mergin/utils.py | 92 +++++++++++++++++++++++++++++++++++++++----- Mergin/validation.py | 6 ++- 2 files changed, 86 insertions(+), 12 deletions(-) diff --git a/Mergin/utils.py b/Mergin/utils.py index 98b47a07..ac3e6706 100644 --- a/Mergin/utils.py +++ b/Mergin/utils.py @@ -8,6 +8,8 @@ import platform import urllib.parse import urllib.request +import tempfile +import json from qgis.PyQt.QtCore import QSettings, QVariant from qgis.PyQt.QtWidgets import QMessageBox, QFileDialog @@ -42,7 +44,6 @@ try: - from .mergin import InvalidProject from .mergin.client import MerginClient, ClientError, LoginError, InvalidProject from .mergin.client_pull import download_project_async, download_project_is_running, \ download_project_finalize, download_project_cancel @@ -51,6 +52,7 @@ from .mergin.client_push import push_project_async, push_project_is_running, \ push_project_finalize, push_project_cancel from .mergin.report import create_report + from .mergin.deps import pygeodiff except ImportError: import sys this_dir = os.path.dirname(os.path.realpath(__file__)) @@ -64,6 +66,7 @@ from mergin.client_push import push_project_async, push_project_is_running, \ push_project_finalize, push_project_cancel from .mergin.report import create_report + from .mergin.deps import pygeodiff MERGIN_URL = 'https://public.cloudmergin.com' MERGIN_LOGS_URL = 'https://g4pfq226j0.execute-api.eu-west-1.amazonaws.com/mergin_client_log_submit' @@ -885,16 +888,85 @@ def is_number(s): def has_schema_change(mp, layer): - """Check if the layer has schema (attribute table) changes. """ - local_fields = layer.fields() + Check whether the layer has schema changes using schema representaion + in JSON format generated by geodiff. + """ - file_path = os.path.split(layer.publicSource().split("|")[0])[1] - meta_path = mp.fpath_meta(file_path) - meta_layer = QgsVectorLayer(meta_path, '', 'ogr') - meta_fields = meta_layer.fields() + geodiff = pygeodiff.GeoDiff() - if local_fields != meta_fields: - return True + local_path = layer.publicSource().split("|")[0] + f_name = os.path.split(local_path)[1] + base_path = mp.fpath_meta(f_name) - return False + tmp_file = tempfile.NamedTemporaryFile(delete=False) + tmp_file.close() + geodiff.schema('sqlite', '', local_path, tmp_file.name) + with open(tmp_file.name, encoding="utf-8") as f: + base_schema = json.load(f).get('geodiff_schema') + + geodiff.schema('sqlite', '', base_path, tmp_file.name) + with open(tmp_file.name, encoding="utf-8") as f: + local_schema = json.load(f).get('geodiff_schema') + + os.unlink(tmp_file.name) + + return same_schema(local_schema, base_schema) + + +def same_schema(schema_a, schema_b): + """ + Compares two JSON objects created by geodiff which represent database schemas. + + :param schema_a: first schema JSON + :type schema_a: dict + :param schema_b: second schema JSON + :type schema_b: dict + :returns: comparison result + :rtype: tuple(bool, str) + """ + + def compare(list_a, list_b, key): + """ + Test whether lists of dictionaries have the same number of keys and + these keys are the same. + + :param list_a: first list + :type list_a: list[dict] + :param list_b: second list + :type list_b: list[dict] + :param key: dictionary key used for comparison + :type key: str + :returns: comparison result + :rtype: tuple(bool, str) + """ + items_a = sorted([item[key] for item in list_a]) + items_b = sorted([item[key] for item in list_b]) + if items_a != items_b: + s1 = set(items_a) + s2 = set(items_b) + added = s2 - s1 + removed = s1 - s2 + msg = ["added: {}".format(', '.join(added)) if added else ''] + msg.append("removed: {}".format(', '.join(removed)) if removed else '') + if added or removed: + return False, '; '.join(filter(None, msg)) + + return True, '' + + equal, msg = compare(schema_a, schema_b, 'table') + if not equal: + return equal, "Tables added/removed: " + msg + + for table_a in schema_a: + table_b = next(item for item in schema_b if item["table"] == table_a["table"]) + equal, msg = compare(table_a["columns"], table_b["columns"], 'name') + if not equal: + return equal, "Fields in table '{}' added/removed: {}".format(table_a["table"], msg) + + for column_a in table_a["columns"]: + column_b = next(item for item in table_b["columns"] if item["name"] == column_a["name"]) + if column_a != column_b: + return False, "Definition of '{}' field in '{}' table is not the same".format(column_a["name"], table_a["table"]) + + return True, "No schema changes" diff --git a/Mergin/validation.py b/Mergin/validation.py index 2d25eb3f..2d171a9e 100644 --- a/Mergin/validation.py +++ b/Mergin/validation.py @@ -186,5 +186,7 @@ def check_db_schema(self): if lid not in self.editable: continue dp = layer.dataProvider() - if dp.storageType() == "GPKG" and has_schema_change(self.mp, layer): - self.issues[self.DATABASE_SCHEMA_CHANGE].append(lid) + if dp.storageType() == "GPKG": + has_change, msg = has_schema_change(self.mp, layer) + if not has_change: + self.issues[self.DATABASE_SCHEMA_CHANGE].append(lid)