From 754a19cbea2b6b6b3984dfca64f8b631663552cc Mon Sep 17 00:00:00 2001 From: Witek Bedyk Date: Tue, 12 Sep 2023 14:00:17 +0200 Subject: [PATCH 1/7] Add unique index for rhnpackagechangelogdata table Inspite of client code trying to avoid duplicate records for changelog data such records are observed in the database. The change enforces unique combination of `name`, `time` and `text` fields by updating the definition of the unique index on the database level. Add logging package details during failed import. Clean up the logic for removing duplicate changelog entries. --- python/spacewalk/satellite_tools/reposync.py | 4 -- python/spacewalk/server/importlib/backend.py | 10 ++++- .../server/importlib/packageImport.py | 14 ++++--- ...alk-backend.changes.witek.add-unique-index | 1 + .../203-add-changelogdata-index.sql | 37 +++++++++++++++++++ 5 files changed, 56 insertions(+), 10 deletions(-) create mode 100644 python/spacewalk/spacewalk-backend.changes.witek.add-unique-index create mode 100644 schema/spacewalk/upgrade/susemanager-schema-4.4.6-to-susemanager-schema-4.4.7/203-add-changelogdata-index.sql diff --git a/python/spacewalk/satellite_tools/reposync.py b/python/spacewalk/satellite_tools/reposync.py index 01de32847503..4a2afd7134ce 100644 --- a/python/spacewalk/satellite_tools/reposync.py +++ b/python/spacewalk/satellite_tools/reposync.py @@ -1366,10 +1366,6 @@ def import_package_batch(self, to_process, to_disassociate, is_non_local_repo, b raise except Exception as e: e_message = f'Exception: {e}' - if importer: - e_message += f'\nPackage: {repr(importer)}' - if src_importer: - e_message += f'\nSource package: {repr(src_importer)}' log2(0, 1, e_message, stream=sys.stderr) if self.fail: raise diff --git a/python/spacewalk/server/importlib/backend.py b/python/spacewalk/server/importlib/backend.py index cd0fdb099588..1606c1d87d5d 100644 --- a/python/spacewalk/server/importlib/backend.py +++ b/python/spacewalk/server/importlib/backend.py @@ -26,6 +26,7 @@ from spacewalk.common.rhnConfig import CFG from spacewalk.common.rhnException import rhnFault from spacewalk.common.rhnLog import log_debug +from spacewalk.satellite_tools import syncLib from spacewalk.server import rhnSQL, rhnChannel, taskomatic from .importLib import Diff, Package, IncompletePackage, Erratum, \ AlreadyUploadedError, InvalidPackageError, TransactionError, \ @@ -33,6 +34,7 @@ from .backendLib import TableCollection, sanitizeValue, TableDelete, \ TableUpdate, TableLookup, addHash, TableInsert + sequences = { 'rhnPackageCapability': 'rhn_pkg_capability_id_seq', 'rhnPackage': 'rhn_package_id_seq', @@ -984,10 +986,16 @@ def processPackages(self, packages, uploadForce=0, ignoreUploaded=0, package['header_start'] = -1 package['header_end'] = -1 - self.__processObjectCollection__([package, ], 'rhnPackage', tableList, + try: + self.__processObjectCollection__([package, ], 'rhnPackage', tableList, uploadForce=uploadForce, forceVerify=forceVerify, ignoreUploaded=ignoreUploaded, severityLimit=1, transactional=transactional) + except Exception as e: + syncLib.log(0, "Error during processing package %s-%s-%s:%s.%s.\n%s" % + (package['name'], package['version'], package['release'], package['epoch'], package['arch'], + str(e))) + raise def processErrata(self, errata): # Insert/update the packages diff --git a/python/spacewalk/server/importlib/packageImport.py b/python/spacewalk/server/importlib/packageImport.py index 41aca288f0e1..ea21d327d3df 100644 --- a/python/spacewalk/server/importlib/packageImport.py +++ b/python/spacewalk/server/importlib/packageImport.py @@ -285,16 +285,20 @@ def _processPackage(self, package): self.checksums[fchecksumTuple] = None # Uniquify changelog entries - unique_package_changelog_hash = {} + unique_package_changelog_hash = set() unique_package_changelog = [] for changelog in package['changelog']: - key = (self._fix_encoding(changelog['name'][:128]), self._fix_encoding(changelog['time']), self._fix_encoding(changelog['text'])[:3000]) + changelog_name = self._fix_encoding(changelog['name'][:128]) + changelog_time = self._fix_encoding(changelog['time']) + changelog_text = self._fix_encoding(changelog['text'])[:3000] + key = (changelog_name, changelog_time, changelog_text) if key not in unique_package_changelog_hash: self.changelog_data[key] = None - changelog['name'] = changelog['name'][:128] - changelog['text'] = changelog['text'][:3000] + changelog['name'] = changelog_name + changelog['text'] = changelog_text + changelog['time'] = changelog_time unique_package_changelog.append(changelog) - unique_package_changelog_hash[key] = 1 + unique_package_changelog_hash.add(key) package['changelog'] = unique_package_changelog # fix encoding issues in package summary and description diff --git a/python/spacewalk/spacewalk-backend.changes.witek.add-unique-index b/python/spacewalk/spacewalk-backend.changes.witek.add-unique-index new file mode 100644 index 000000000000..c635f8472974 --- /dev/null +++ b/python/spacewalk/spacewalk-backend.changes.witek.add-unique-index @@ -0,0 +1 @@ +- Add unique index for rhnpackagechangelogdata table diff --git a/schema/spacewalk/upgrade/susemanager-schema-4.4.6-to-susemanager-schema-4.4.7/203-add-changelogdata-index.sql b/schema/spacewalk/upgrade/susemanager-schema-4.4.6-to-susemanager-schema-4.4.7/203-add-changelogdata-index.sql new file mode 100644 index 000000000000..93d9b8582e1b --- /dev/null +++ b/schema/spacewalk/upgrade/susemanager-schema-4.4.6-to-susemanager-schema-4.4.7/203-add-changelogdata-index.sql @@ -0,0 +1,37 @@ +create or replace function remove_duplicate_changelogdata() +returns void as +$$ +declare original record; +declare duplicate record; +begin + for original in select min(id) as id + from rhnpackagechangelogdata + group by name, text, time + having count(*) > 1 loop + for duplicate in select data2.id + from rhnpackagechangelogdata data1, rhnpackagechangelogdata data2 + where data1.name = data2.name + and data1.text = data2.text + and data1.time = data2.time + and data1.id != data2.id + and data1.id = original.id loop + update rhnpackagechangelogrec + set changelog_data_id = original.id + where changelog_data_id = duplicate.id; + delete from rhnpackagechangelogdata + where id = duplicate.id; + end loop; + end loop; +end; +$$ language plpgsql; + +select remove_duplicate_changelogdata(); +drop function remove_duplicate_changelogdata(); + +drop index if exists rhn_pkg_cld_nt_idx; + +create extension pgcrypto; +create unique index concurrently rhn_pkg_cld_ntt_idx + on "rhnpackagechangelogdata" + using btree(name, digest("text", 'md5'::text), "time"); + From d0bce1dbbdf9ee2952a69d098ed31e1580e6c8f3 Mon Sep 17 00:00:00 2001 From: Witek Bedyk Date: Wed, 11 Oct 2023 10:19:55 +0200 Subject: [PATCH 2/7] Use sha512 hashing Use sha512 hashing to minimize collision risk --- .../203-add-changelogdata-index.sql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/schema/spacewalk/upgrade/susemanager-schema-4.4.6-to-susemanager-schema-4.4.7/203-add-changelogdata-index.sql b/schema/spacewalk/upgrade/susemanager-schema-4.4.6-to-susemanager-schema-4.4.7/203-add-changelogdata-index.sql index 93d9b8582e1b..d226289fc84e 100644 --- a/schema/spacewalk/upgrade/susemanager-schema-4.4.6-to-susemanager-schema-4.4.7/203-add-changelogdata-index.sql +++ b/schema/spacewalk/upgrade/susemanager-schema-4.4.6-to-susemanager-schema-4.4.7/203-add-changelogdata-index.sql @@ -30,8 +30,8 @@ drop function remove_duplicate_changelogdata(); drop index if exists rhn_pkg_cld_nt_idx; -create extension pgcrypto; +create extension if not exists pgcrypto; create unique index concurrently rhn_pkg_cld_ntt_idx on "rhnpackagechangelogdata" - using btree(name, digest("text", 'md5'::text), "time"); + using btree(name, digest("text", 'sha512'::text), "time"); From 49737d6c307b55c55ce1d81341d641ffc32417db Mon Sep 17 00:00:00 2001 From: Witek Bedyk Date: Wed, 11 Oct 2023 11:55:30 +0200 Subject: [PATCH 3/7] Drop the index before recreating it --- .../203-add-changelogdata-index.sql | 1 + 1 file changed, 1 insertion(+) diff --git a/schema/spacewalk/upgrade/susemanager-schema-4.4.6-to-susemanager-schema-4.4.7/203-add-changelogdata-index.sql b/schema/spacewalk/upgrade/susemanager-schema-4.4.6-to-susemanager-schema-4.4.7/203-add-changelogdata-index.sql index d226289fc84e..5052829aaa31 100644 --- a/schema/spacewalk/upgrade/susemanager-schema-4.4.6-to-susemanager-schema-4.4.7/203-add-changelogdata-index.sql +++ b/schema/spacewalk/upgrade/susemanager-schema-4.4.6-to-susemanager-schema-4.4.7/203-add-changelogdata-index.sql @@ -29,6 +29,7 @@ select remove_duplicate_changelogdata(); drop function remove_duplicate_changelogdata(); drop index if exists rhn_pkg_cld_nt_idx; +drop index if exists rhn_pkg_cld_ntt_idx; create extension if not exists pgcrypto; create unique index concurrently rhn_pkg_cld_ntt_idx From c5747cc2dbf323fd71bb138019a51efcd533f957 Mon Sep 17 00:00:00 2001 From: Witek Bedyk Date: Fri, 20 Oct 2023 10:09:36 +0200 Subject: [PATCH 4/7] Add new unique index in rhnPackageChangeLogData --- schema/spacewalk/common/tables/rhnPackageChangeLogData.sql | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/schema/spacewalk/common/tables/rhnPackageChangeLogData.sql b/schema/spacewalk/common/tables/rhnPackageChangeLogData.sql index 215255e0e3ff..912ec81351ab 100644 --- a/schema/spacewalk/common/tables/rhnPackageChangeLogData.sql +++ b/schema/spacewalk/common/tables/rhnPackageChangeLogData.sql @@ -28,8 +28,9 @@ CREATE TABLE rhnPackageChangeLogData ; -CREATE INDEX rhn_pkg_cld_nt_idx - ON rhnPackageChangeLogData (name, time) +CREATE UNIQUE INDEX CONCURRENTLY rhn_pkg_cld_ntt_idx + ON rhnPackageChangeLogData + USING btree(name, digest("text", 'sha512'::text), time) ; From a1754bb5c9ef05d79805141df28586f57cb65e65 Mon Sep 17 00:00:00 2001 From: Witek Bedyk Date: Fri, 20 Oct 2023 10:48:40 +0200 Subject: [PATCH 5/7] Remove CONCURRENTLY keyword --- schema/spacewalk/common/tables/rhnPackageChangeLogData.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/schema/spacewalk/common/tables/rhnPackageChangeLogData.sql b/schema/spacewalk/common/tables/rhnPackageChangeLogData.sql index 912ec81351ab..86bf2bdc73c9 100644 --- a/schema/spacewalk/common/tables/rhnPackageChangeLogData.sql +++ b/schema/spacewalk/common/tables/rhnPackageChangeLogData.sql @@ -28,7 +28,7 @@ CREATE TABLE rhnPackageChangeLogData ; -CREATE UNIQUE INDEX CONCURRENTLY rhn_pkg_cld_ntt_idx +CREATE UNIQUE INDEX rhn_pkg_cld_ntt_idx ON rhnPackageChangeLogData USING btree(name, digest("text", 'sha512'::text), time) From 442f1a2088291e5c72035f857e41ed5680138031 Mon Sep 17 00:00:00 2001 From: Witek Bedyk Date: Fri, 20 Oct 2023 12:00:32 +0200 Subject: [PATCH 6/7] Initialize pgcrypto extension --- schema/spacewalk/common/tables/rhnPackageChangeLogData.sql | 1 + 1 file changed, 1 insertion(+) diff --git a/schema/spacewalk/common/tables/rhnPackageChangeLogData.sql b/schema/spacewalk/common/tables/rhnPackageChangeLogData.sql index 86bf2bdc73c9..500c7440daaf 100644 --- a/schema/spacewalk/common/tables/rhnPackageChangeLogData.sql +++ b/schema/spacewalk/common/tables/rhnPackageChangeLogData.sql @@ -13,6 +13,7 @@ -- in this software or its documentation. -- +CREATE EXTENSION IF NOT EXISTS pgcrypto; CREATE TABLE rhnPackageChangeLogData ( From 796f5094041cc48c60046b8ee73a42a198e5d2b0 Mon Sep 17 00:00:00 2001 From: Witek Bedyk Date: Mon, 23 Oct 2023 16:38:13 +0200 Subject: [PATCH 7/7] Move adding extension to start.sql --- schema/spacewalk/common/tables/rhnPackageChangeLogData.sql | 2 -- schema/spacewalk/postgres/start.sql | 2 ++ 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/schema/spacewalk/common/tables/rhnPackageChangeLogData.sql b/schema/spacewalk/common/tables/rhnPackageChangeLogData.sql index 500c7440daaf..67950a49fb25 100644 --- a/schema/spacewalk/common/tables/rhnPackageChangeLogData.sql +++ b/schema/spacewalk/common/tables/rhnPackageChangeLogData.sql @@ -13,8 +13,6 @@ -- in this software or its documentation. -- -CREATE EXTENSION IF NOT EXISTS pgcrypto; - CREATE TABLE rhnPackageChangeLogData ( id NUMERIC NOT NULL diff --git a/schema/spacewalk/postgres/start.sql b/schema/spacewalk/postgres/start.sql index ea80ba2a3683..c92263d809ef 100644 --- a/schema/spacewalk/postgres/start.sql +++ b/schema/spacewalk/postgres/start.sql @@ -21,3 +21,5 @@ create temporary table store_searchpath as select setting from pg_settings where update pg_settings set setting = (select setting from store_searchpath) where name = 'search_path'; drop table store_searchpath; + +CREATE EXTENSION pgcrypto;