From ad46fbbf956b0e140b36c038b581eb5bc4ea86e6 Mon Sep 17 00:00:00 2001 From: amandine-sahl Date: Thu, 16 Jan 2025 12:07:25 +0100 Subject: [PATCH 1/3] Taxref v18 : Import + Migrate --- .../da3172cecdb1_taxref_taxref_v18.py | 31 +++ .../commands/migrate_taxref/commands_v18.py | 199 +++++++++++++++ .../0_taxref_import_data.sql | 72 ++++++ .../3.2_alter_taxref_data.sql | 237 ++++++++++++++++++ .../data/specific_taxref_v18/__init__.py | 0 .../migrate_taxref/test_commands_migrate.py | 166 ++++++++++-- apptax/taxonomie/commands/taxref.py | 9 +- apptax/taxonomie/commands/taxref_v18.py | 155 ++++++++++++ apptax/taxonomie/models.py | 2 + apptax/tests/test_taxref_last_version.py | 2 +- install_db.sh | 2 +- 11 files changed, 854 insertions(+), 21 deletions(-) create mode 100644 apptax/migrations/versions/da3172cecdb1_taxref_taxref_v18.py create mode 100644 apptax/taxonomie/commands/migrate_taxref/commands_v18.py create mode 100644 apptax/taxonomie/commands/migrate_taxref/data/specific_taxref_v18/0_taxref_import_data.sql create mode 100755 apptax/taxonomie/commands/migrate_taxref/data/specific_taxref_v18/3.2_alter_taxref_data.sql create mode 100644 apptax/taxonomie/commands/migrate_taxref/data/specific_taxref_v18/__init__.py create mode 100644 apptax/taxonomie/commands/taxref_v18.py diff --git a/apptax/migrations/versions/da3172cecdb1_taxref_taxref_v18.py b/apptax/migrations/versions/da3172cecdb1_taxref_taxref_v18.py new file mode 100644 index 000000000..9ef3ea08a --- /dev/null +++ b/apptax/migrations/versions/da3172cecdb1_taxref_taxref_v18.py @@ -0,0 +1,31 @@ +"""[taxref] Taxref v18 + +Revision ID: da3172cecdb1 +Revises: 2c68a907f74c +Create Date: 2025-01-14 11:44:12.356028 + +""" + +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision = "da3172cecdb1" +down_revision = "2c68a907f74c" +branch_labels = None +depends_on = None + + +def upgrade(): + op.add_column(table_name="taxref", column=sa.Column("cd_ba", sa.Integer()), schema="taxonomie") + op.add_column( + table_name="taxref", + column=sa.Column("nomenclatural_comment", sa.String(500)), + schema="taxonomie", + ) + + +def downgrade(): + op.drop_column(table_name="taxref", column_name="cd_ba", schema="taxonomie") + op.drop_column(table_name="taxref", column_name="nomenclatural_comment", schema="taxonomie") diff --git a/apptax/taxonomie/commands/migrate_taxref/commands_v18.py b/apptax/taxonomie/commands/migrate_taxref/commands_v18.py new file mode 100644 index 000000000..8dd0f2293 --- /dev/null +++ b/apptax/taxonomie/commands/migrate_taxref/commands_v18.py @@ -0,0 +1,199 @@ +import importlib +import click +from zipfile import ZipFile +from sqlalchemy import text +from flask.cli import with_appcontext + +from utils_flask_sqla.migrations.utils import open_remote_file + +from apptax.database import db +from apptax.taxonomie.commands.utils import ( + copy_from_csv, + truncate_bdc_statuts, + refresh_taxref_vm, + insert_taxref_numversion, +) +from apptax.taxonomie.commands.taxref_v18 import import_bdc_statuts_v18 +from .utils import save_data, analyse_taxref_changes +from . import logger + + +base_url = "http://geonature.fr/data/inpn/taxonomie/" + + +@click.group(help="Migrate to TaxRef v18.") +def migrate_to_v18(): + pass + + +@migrate_to_v18.command() +@with_appcontext +def import_taxref_v18(): + """ + Procédure de migration de taxref vers la version 18 + Test de la disparition des cd_noms + """ + # Prerequis : deps_test_fk_dependencies_cd_nom + query = text( + importlib.resources.read_text( + "apptax.taxonomie.commands.migrate_taxref.data.changes_detection", + "0.2_taxref_detection_repercussion_disparition_cd_nom.sql", + ) + ) + db.session.execute(query) + + # import taxref v18 data + import_data_taxref_v18() + db.session.commit() + + # Analyse des changements à venir + analyse_taxref_changes() + + +@migrate_to_v18.command() +@click.option("--keep-cdnom", is_flag=True) +@with_appcontext +def test_changes_detection(keep_cdnom): + """Analyse des répercussions de changement de taxref + + :param keep-cdnom: Indique si l'on souhaite concerver les cd_noms manquant au lieu de les supprimer + :type keep-cdnom: boolean + + 3 étapes : + - Detection des cd_noms manquants + - Création d'une copie de travail de bib_noms + - Analyse des modifications taxonomique (split, merge, ...) et + de leur répercussion sur les attributs et medias de taxhub + """ + # Analyse des changements à venir + analyse_taxref_changes(keep_missing_cd_nom=keep_cdnom) + + +@migrate_to_v18.command() +@click.option("--keep-oldtaxref", is_flag=True) +@click.option("--keep-oldbdc", is_flag=True) +@click.option("--keep-cdnom", is_flag=True) +@click.option("--taxref-region", type=str) +@click.option("--script_predetection", type=click.Path(exists=True)) +@click.option("--script_postdetection", type=click.Path(exists=True)) +@with_appcontext +def apply_changes( + keep_oldtaxref, + keep_oldbdc, + keep_cdnom, + taxref_region, + script_predetection, + script_postdetection, +): + """Procédure de migration de taxref vers la version 18 + Application des changements import des données dans les tables taxref et bdc_status + + + :param keep-oldtaxref: Indique si l'on souhaite concerver l'ancienne version du referentiel taxref + :type keep-oldtaxref: boolean + :param keep-oldbdc: Indique si l'on souhaite concerver l'ancienne version du referentiel bdc_status + :type keep-oldbdc: boolean + :param keep-cdnom: Indique si l'on souhaite concerver les cd_noms manquant au lieu de les supprimer + :type keep-cdnom: boolean + :param script_predetection: Emplacement d'un fichier sql de correction avant la detection des changements + :type script_predetection: Path + :param script_postdetection: Emplacement d'un fichier sql de correction après la detection des changements + :type script_postdetection: Path + """ + + # Analyse des changements à venir + analyse_taxref_changes( + keep_missing_cd_nom=keep_cdnom, + script_predetection=script_predetection, + script_postdetection=script_postdetection, + ) + + # Save taxref and bdc_status data + save_data(17, keep_oldtaxref, keep_oldbdc) + + # Update taxref v18 + logger.info("Migration of taxref ...") + try: + query = text( + importlib.resources.read_text( + "apptax.taxonomie.commands.migrate_taxref.data.specific_taxref_v18", + "3.2_alter_taxref_data.sql", + ) + ) + db.session.execute(query, {"keep_cd_nom": keep_cdnom, "taxref_region": taxref_region}) + db.session.commit() + logger.info("it's done") + except Exception as e: + logger.error(str(e)) + + # Import bdc status data and insert into taxhub tables + import_and_format_dbc_status() + + # Clean DB + logger.info("Clean DB") + query = text( + importlib.resources.read_text( + "apptax.taxonomie.commands.migrate_taxref.data", "5_clean_db.sql" + ) + ) + db.session.execute(query) + + logger.info("Refresh materialized views…") + refresh_taxref_vm() + + insert_taxref_numversion(18) + db.session.commit() + + +def import_data_taxref_v18(): + """ + Import des données brutes de taxref v18 en base + avant leur traitement + """ + print("sdfsdfsdf") + logger.info("Import TAXREFv18 into tmp table…") + + # Préparation création de table temporaire permettant d'importer taxref + query = text( + importlib.resources.read_text( + "apptax.taxonomie.commands.migrate_taxref.data.specific_taxref_v18", + "0_taxref_import_data.sql", + ) + ) + db.session.execute(query) + db.session.commit() + + with open_remote_file(base_url, "TAXREF_v18_2025.zip", open_fct=ZipFile) as archive: + with archive.open("TAXREFv18.txt") as f: + logger.info("Insert TAXREFv18 into taxonomie.import_taxref table…") + copy_from_csv( + f, + table_name="import_taxref", + delimiter="\t", + ) + with archive.open("CDNOM_DISPARUS.txt") as f: + logger.info("Insert missing cd_nom into taxonomie.cdnom_disparu table…") + copy_from_csv( + f, + table_name="cdnom_disparu", + delimiter="\t", + ) + + with archive.open("rangs_note.csv") as f: + logger.info("Insert rangs_note tmp table…") + copy_from_csv( + f, + table_name="import_taxref_rangs", + encoding="WIN1252", + delimiter=";", + ) + + +def import_and_format_dbc_status(): + """ + Import des données brutes de la base bdc_status en base + Puis traitement des données de façon à les ventiler dans les différentes tables + """ + pass + # truncate_bdc_statuts() + # import_bdc_statuts_v18(logger) diff --git a/apptax/taxonomie/commands/migrate_taxref/data/specific_taxref_v18/0_taxref_import_data.sql b/apptax/taxonomie/commands/migrate_taxref/data/specific_taxref_v18/0_taxref_import_data.sql new file mode 100644 index 000000000..db660a58c --- /dev/null +++ b/apptax/taxonomie/commands/migrate_taxref/data/specific_taxref_v18/0_taxref_import_data.sql @@ -0,0 +1,72 @@ +-- Créer la table import_taxref + + +DROP TABLE IF EXISTS taxonomie.import_taxref; +CREATE TABLE taxonomie.import_taxref +( + regne character varying(20), + phylum character varying(50), + classe character varying(50), + ordre character varying(50), + famille character varying(50), + sous_famille character varying(50), + tribu character varying(50), + group1_inpn character varying(50), + group2_inpn character varying(50), + group3_inpn character varying(50), + cd_nom integer NOT NULL, + cd_taxsup integer, + cd_sup integer, + cd_ref integer, + cd_ba integer, + rang character varying(10), + lb_nom character varying(100), + lb_auteur character varying(500), + nomenclatural_comment character varying(500), + nom_complet character varying(500), + nom_complet_html character varying(500), + nom_valide character varying(500), + nom_vern text, + nom_vern_eng character varying(500), + habitat character varying(10), + fr character varying(10), + gf character varying(10), + mar character varying(10), + gua character varying(10), + sm character varying(10), + sb character varying(10), + spm character varying(10), + may character varying(10), + epa character varying(10), + reu character varying(10), + sa character varying(10), + ta character varying(10), + taaf character varying(10), + pf character varying(10), + nc character varying(10), + wf character varying(10), + cli character varying(10), + url text, + url_inpn text +); + +ALTER TABLE taxonomie.import_taxref ADD CONSTRAINT pk_import_taxref PRIMARY KEY (cd_nom); + +-- Créer la table cdnom_disparus +DROP TABLE IF EXISTS taxonomie.cdnom_disparu; +CREATE TABLE taxonomie.cdnom_disparu ( + CD_NOM int, + PLUS_RECENTE_DIFFUSION character varying(50), + CD_NOM_REMPLACEMENT int, + CD_RAISON_SUPPRESSION int, + RAISON_SUPPRESSION text +); + + +DROP TABLE IF EXISTS taxonomie.import_taxref_rangs; +CREATE TABLE taxonomie.import_taxref_rangs ( + level int NOT NULL, + rang varchar(20) NOT NULL, + detail_fr varchar(50) NOT NULL, + detail_en varchar(50) NOT NULL +); diff --git a/apptax/taxonomie/commands/migrate_taxref/data/specific_taxref_v18/3.2_alter_taxref_data.sql b/apptax/taxonomie/commands/migrate_taxref/data/specific_taxref_v18/3.2_alter_taxref_data.sql new file mode 100755 index 000000000..689d1fe16 --- /dev/null +++ b/apptax/taxonomie/commands/migrate_taxref/data/specific_taxref_v18/3.2_alter_taxref_data.sqllter existing constraints +------------------------------------------------ +------------------------------------------------ +ALTER TABLE taxonomie.t_medias DROP CONSTRAINT IF EXISTS check_cd_ref_is_ref; +ALTER TABLE taxonomie.cor_taxon_attribut DROP CONSTRAINT IF EXISTS check_is_cd_ref; +ALTER TABLE taxonomie.cor_nom_liste DROP CONSTRAINT cor_nom_listes_taxref_fkey; + +------------------------------------------------ +------------------------------------------------ +-- UPDATE TAXREF +------------------------------------------------ +------------------------------------------------ + +-- UPDATE EXISTING CD_NOM +UPDATE taxonomie.taxref t + SET id_habitat = it.habitat::int, id_rang = it.rang, regne = it.regne, phylum = it.phylum, + classe = it.classe, ordre = it.ordre, famille = it.famille, cd_taxsup = it.cd_taxsup, + cd_sup = it.cd_sup, cd_ref = it.cd_ref, + lb_nom = it.lb_nom, lb_auteur = it.lb_auteur, nom_complet = it.nom_complet, + nom_complet_html = it.nom_complet_html, nom_valide = it.nom_valide, + nom_vern = it.nom_vern, nom_vern_eng = it.nom_vern_eng, group1_inpn = it.group1_inpn, + group2_inpn = it.group2_inpn, sous_famille = it.sous_famille, + tribu = it.tribu, url = it.url, group3_inpn = it.group3_inpn, + cd_ba = it.cd_ba, nomenclatural_comment = it.nomenclatural_comment +FROM taxonomie.import_taxref it +WHERE it.cd_nom = t.cd_nom; + +-- ADD NEW CD_NOM +INSERT INTO taxonomie.taxref( + cd_nom, id_habitat, id_rang, regne, phylum, classe, + ordre, famille, cd_taxsup, cd_sup, cd_ref, cd_ba, + lb_nom, lb_auteur, + nomenclatural_comment, nom_complet, nom_complet_html, nom_valide, nom_vern, nom_vern_eng, + group1_inpn, group2_inpn, sous_famille, tribu, url, group3_inpn) +SELECT it.cd_nom,it.habitat::int, it.rang, it.regne, it.phylum, it.classe, + it.ordre, it.famille, it.cd_taxsup, it.cd_sup, it.cd_ref, it.cd_ba, + it.lb_nom, it.lb_auteur, + it.nomenclatural_comment, it.nom_complet, it.nom_complet_html, it.nom_valide, it.nom_vern, it.nom_vern_eng, + it.group1_inpn, it.group2_inpn, it.sous_famille, it.tribu, it.url, it.group3_inpn +FROM taxonomie.import_taxref it +LEFT OUTER JOIN taxonomie.taxref t +ON it.cd_nom = t.cd_nom +WHERE t.cd_nom IS NULL; + +-- Regional Status + +DO $$ BEGIN + IF :taxref_region = 'gf' THEN UPDATE taxonomie.taxref t SET id_statut = NULLIF(it.gf, '') FROM taxonomie.import_taxref it WHERE it.cd_nom = t.cd_nom; + ELSIF :taxref_region = 'mar' THEN UPDATE taxonomie.taxref t SET id_statut = NULLIF(it.mar, '') FROM taxonomie.import_taxref it WHERE it.cd_nom = t.cd_nom; + ELSIF :taxref_region = 'gua' THEN UPDATE taxonomie.taxref t SET id_statut = NULLIF(it.gua, '') FROM taxonomie.import_taxref it WHERE it.cd_nom = t.cd_nom; + ELSIF :taxref_region = 'sm' THEN UPDATE taxonomie.taxref t SET id_statut = NULLIF(it.sm, '') FROM taxonomie.import_taxref it WHERE it.cd_nom = t.cd_nom; + ELSIF :taxref_region = 'sb' THEN UPDATE taxonomie.taxref t SET id_statut = NULLIF(it.sb, '') FROM taxonomie.import_taxref it WHERE it.cd_nom = t.cd_nom; + ELSIF :taxref_region = 'spm' THEN UPDATE taxonomie.taxref t SET id_statut = NULLIF(it.spm, '') FROM taxonomie.import_taxref it WHERE it.cd_nom = t.cd_nom; + ELSIF :taxref_region = 'may' THEN UPDATE taxonomie.taxref t SET id_statut = NULLIF(it.may, '') FROM taxonomie.import_taxref it WHERE it.cd_nom = t.cd_nom; + ELSIF :taxref_region = 'epa' THEN UPDATE taxonomie.taxref t SET id_statut = NULLIF(it.epa, '') FROM taxonomie.import_taxref it WHERE it.cd_nom = t.cd_nom; + ELSIF :taxref_region = 'reu' THEN UPDATE taxonomie.taxref t SET id_statut = NULLIF(it.reu, '') FROM taxonomie.import_taxref it WHERE it.cd_nom = t.cd_nom; + ELSIF :taxref_region = 'sa' THEN UPDATE taxonomie.taxref t SET id_statut = NULLIF(it.sa, '') FROM taxonomie.import_taxref it WHERE it.cd_nom = t.cd_nom; + ELSIF :taxref_region = 'ta' THEN UPDATE taxonomie.taxref t SET id_statut = NULLIF(it.ta, '') FROM taxonomie.import_taxref it WHERE it.cd_nom = t.cd_nom; + ELSIF :taxref_region = 'taaf' THEN UPDATE taxonomie.taxref t SET id_statut = NULLIF(it.taaf, '') FROM taxonomie.import_taxref it WHERE it.cd_nom = t.cd_nom; + ELSIF :taxref_region = 'pf' THEN UPDATE taxonomie.taxref t SET id_statut = NULLIF(it.pf, '') FROM taxonomie.import_taxref it WHERE it.cd_nom = t.cd_nom; + ELSIF :taxref_region = 'nc' THEN UPDATE taxonomie.taxref t SET id_statut = NULLIF(it.nc, '') FROM taxonomie.import_taxref it WHERE it.cd_nom = t.cd_nom; + ELSIF :taxref_region = 'wf' THEN UPDATE taxonomie.taxref t SET id_statut = NULLIF(it.wf, '') FROM taxonomie.import_taxref it WHERE it.cd_nom = t.cd_nom; + ELSIF :taxref_region = 'cli' THEN UPDATE taxonomie.taxref t SET id_statut = NULLIF(it.cli, '') FROM taxonomie.import_taxref it WHERE it.cd_nom = t.cd_nom; + ELSE UPDATE taxonomie.taxref t SET id_statut = NULLIF(it.fr, '') FROM taxonomie.import_taxref it WHERE it.cd_nom = t.cd_nom; +END IF; +END $$; + +-------------------------------------------------------- +-- Cas des cd_ref supprimés avec un cd_nom de remplacement + +--- médias +WITH deleted_cd_ref AS ( + SELECT cd.cd_nom AS old_cd_ref, it.cd_ref AS new_cd_ref + FROM taxonomie.cdnom_disparu cd + JOIN taxonomie.taxref t + ON cd.cd_nom = t.cd_nom + AND t.cd_nom = t.cd_ref + AND cd.cd_raison_suppression = 1 + JOIN taxonomie.import_taxref it + ON cd.cd_nom_remplacement = it.cd_nom +) +UPDATE taxonomie.t_medias tm SET cd_ref = new_cd_ref +FROM deleted_cd_ref d +WHERE d.old_cd_ref = tm.cd_ref; + +--- attribut +-- @TODO cas de conflit lors de merge si le cd_ref de remplacement est déjà présent +WITH deleted_cd_ref AS ( + SELECT cd.cd_nom AS old_cd_ref, it.cd_ref AS new_cd_ref + FROM taxonomie.cdnom_disparu cd + JOIN taxonomie.taxref t + ON cd.cd_nom = t.cd_nom + AND t.cd_nom = t.cd_ref + AND cd.cd_raison_suppression = 1 + JOIN taxonomie.import_taxref it + ON cd.cd_nom_remplacement = it.cd_nom +) +UPDATE taxonomie.t_medias tm SET cd_ref = new_cd_ref +FROM deleted_cd_ref d +WHERE d.old_cd_ref = tm.cd_ref; + +-- DELETE MISSING CD_NOM if not keep_cdnom is specify +DO $$ BEGIN + IF :keep_cd_nom = FALSE THEN + DELETE FROM taxonomie.taxref + WHERE cd_nom IN ( + SELECT cd_nom + FROM taxonomie.cdnom_disparu + ); + + END IF; +END $$; + +---- ################################################################################# +---- ################################################################################# +---- REPERCUSSION des changements de taxref dans taxhub (attributs, médias) +---- ################################################################################# +---- ################################################################################# + +--- Sauvegarde des données au cas ou +DROP TABLE IF EXISTS tmp_taxref_changes.t_medias; +CREATE TABLE tmp_taxref_changes.t_medias AS +SELECT * FROM taxonomie.t_medias; + +DROP TABLE IF EXISTS tmp_taxref_changes.cor_taxon_attribut; +CREATE TABLE tmp_taxref_changes.cor_taxon_attribut AS +SELECT * FROM taxonomie.cor_taxon_attribut; + +DROP TABLE IF EXISTS tmp_taxref_changes.cor_nom_liste; +CREATE TABLE tmp_taxref_changes.cor_nom_liste AS +SELECT * FROM taxonomie.cor_nom_liste; + + +---- ################################################################################# +--- cor_nom_liste +---- ################################################################################# +-- Remplacement des anciens cd_nom par leurs remplaçants dans cor_nom_liste +WITH d AS ( + SELECT cnl.id_liste , cnl.cd_nom, cd.cd_nom_remplacement + FROM taxonomie.cor_nom_liste AS cnl + JOIN taxonomie.cdnom_disparu AS cd + ON cnl.cd_nom = cd.cd_nom + LEFT OUTER JOIN taxonomie.cor_nom_liste AS repl + ON repl.cd_nom = cd.cd_nom_remplacement AND cnl.id_liste = repl.id_liste + WHERE repl.cd_nom IS NULL AND NOT cd.cd_nom_remplacement IS NULL +) +UPDATE taxonomie.cor_nom_liste l SET cd_nom = cd_nom_remplacement +FROM d +WHERE d.cd_nom = l.cd_nom AND d.id_liste = l.id_liste; + +-- supression dans les cas ou il n'y a pas de taxons de remplacements +-- Même si le paramètre keep_cd_nom est spécifié +-- de façon à ne pas autoriser la saisie de nouvelles données avec des cd_nom qui n'existent plus +DELETE FROM taxonomie.cor_nom_liste l +USING taxonomie.cdnom_disparu AS cd +WHERE l.cd_nom = cd.cd_nom AND cd.cd_nom_remplacement IS NULL; + + +---- ################################################################################# +---- MODIFICATIONS DES ATTRIBUTS ET DES MEDIAS +---- ################################################################################# + +--- Action : Update cd_ref no changes for attributes and medium +ALTER TABLE taxonomie.t_medias DISABLE TRIGGER USER; +UPDATE taxonomie.t_medias SET cd_ref = f_cd_ref +FROM tmp_taxref_changes.comp_grap +WHERE cas = 'update cd_ref' AND cd_ref = i_cd_ref; +ALTER TABLE taxonomie.t_medias ENABLE TRIGGER USER; + +UPDATE taxonomie.cor_taxon_attribut SET cd_ref = f_cd_ref +FROM tmp_taxref_changes.comp_grap +WHERE cas = 'update cd_ref' AND cd_ref = i_cd_ref; + +-- Action merge +UPDATE taxonomie.t_medias SET cd_ref = f_cd_ref +FROM tmp_taxref_changes.comp_grap +WHERE cas = 'merge' AND cd_ref = i_cd_ref; + +-- Suppression des potentiels doublons puis modification +WITH grp_del AS ( + SELECT f_cd_ref, id_attribut, count(*), array_agg(DISTINCT i_cd_ref) cd_refs, array_agg( DISTINCT valeur_attribut) AS valeur_attribut + FROM taxonomie.cor_taxon_attribut ia + JOIN tmp_taxref_changes.comp_grap cg + ON + cd_ref = i_cd_ref + GROUP BY f_cd_ref, id_attribut + HAVING count(*) > 1 +) , del AS ( + SELECT id_attribut as at, unnest(cd_refs[2:]) as i_cd_ref + FROM grp_del + WHERE array_length(valeur_attribut, 1) = 1 +) +DELETE FROM taxonomie.cor_taxon_attribut +USING del +WHERE cd_ref = i_cd_ref AND id_attribut = at; + +UPDATE taxonomie.cor_taxon_attribut SET cd_ref = f_cd_ref +FROM tmp_taxref_changes.comp_grap +WHERE cas = 'merge' AND cd_ref = i_cd_ref; + +------------------------------------------------ +------------------------------------------------ +-- REBUILD CONSTAINTS +------------------------------------------------ +------------------------------------------------ + +UPDATE taxonomie.t_medias m SET cd_ref = t.cd_ref +FROM taxonomie.taxref t +WHERE m.cd_ref = t.cd_nom AND NOT t.cd_nom = t.cd_ref; + + +UPDATE taxonomie.cor_taxon_attribut m SET cd_ref = t.cd_ref +FROM taxonomie.taxref t +WHERE m.cd_ref = t.cd_nom + AND NOT t.cd_ref = t.cd_nom; + + + +ALTER TABLE taxonomie.t_medias + DROP CONSTRAINT IF EXISTS check_is_cd_ref, + ADD CONSTRAINT check_is_cd_ref CHECK (cd_ref = taxonomie.find_cdref(cd_ref)); + +ALTER TABLE taxonomie.cor_taxon_attribut + DROP CONSTRAINT IF EXISTS check_is_cd_ref, + ADD CONSTRAINT check_is_cd_ref CHECK (cd_ref = taxonomie.find_cdref(cd_ref)); + +ALTER TABLE taxonomie.cor_nom_liste ADD CONSTRAINT cor_nom_listes_taxref_fkey FOREIGN KEY (cd_nom) +REFERENCES taxonomie.taxref(cd_nom) ON UPDATE CASCADE ON DELETE NO ACTION; \ No newline at end of file diff --git a/apptax/taxonomie/commands/migrate_taxref/data/specific_taxref_v18/__init__.py b/apptax/taxonomie/commands/migrate_taxref/data/specific_taxref_v18/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/apptax/taxonomie/commands/migrate_taxref/test_commands_migrate.py b/apptax/taxonomie/commands/migrate_taxref/test_commands_migrate.py index 98dd8358a..6099b2a01 100644 --- a/apptax/taxonomie/commands/migrate_taxref/test_commands_migrate.py +++ b/apptax/taxonomie/commands/migrate_taxref/test_commands_migrate.py @@ -3,6 +3,7 @@ from click.testing import CliRunner from flask.cli import with_appcontext +from sqlalchemy import select, delete, func from sqlalchemy.orm.exc import NoResultFound from apptax.database import db @@ -60,11 +61,37 @@ def test_migrate_taxref(): (713870, 54376, 713870, "Leptidea sinapis sinapis (Linnaeus, 1758)", None), # split ] +# test migration taxref 17 vers 18 détection des différents cas +# cd_nom, cd_ref, new_cd_ref, nom_complet, attr_value +data_migration_taxref_v17_to_v18 = [ + (997791, 997791, None, "Poropila dubia J.Schiller, 1925", None), # cd_nom sans substition + (608162, 608162, 104384, "Juncus x langei Erdner, 1906", None), # cd_nom avec substition + (54502, 54502, 1042429, "Parnassius mnemosyne (Linnaeus, 1758)", "A"), # update cd_ref + (103749, 103749, 103749, "Iris lutescens Lam., 1789", "A"), # merge + (136857, 136857, 103749, "Iris lutescens subsp. lutescens Lam., 1789", "A"), # merge + (116456, 116456, 116456, "Pulsatilla rubra (Lam.) Delarbre, 1800", "A"), # merge with conflict + ( + 150342, + 150342, + 116456, + "Pulsatilla rubra var. rubra (Lam.) Delarbre, 1800", + "B", + ), # merge with conflict + (29552, 29574, 29552, "Leccinum brunneogriseolum Lannoy & Estadès, 1991", "A"), # split + ( + 29553, + 29574, + 29574, + "Leccinum brunneogriseolum f. chlorinum Lannoy & Estadès, 1993", + None, + ), # split +] + def populate_data(sample_data): - liste = BibListes.query.filter_by(code_liste="100").one() + liste = db.session.scalar(select(BibListes).where(BibListes.code_liste == "100")) + theme = db.session.scalar(select(BibThemes).where(BibThemes.nom_theme == "Mon territoire")) - theme = BibThemes.query.filter_by(nom_theme="Mon territoire").one() attribut = BibAttributs( nom_attribut="test", label_attribut="Test", @@ -81,9 +108,9 @@ def populate_data(sample_data): for cd_nom, cd_ref, new_cd_ref, nom_complet, attr_value in sample_data: # cor_nom_liste - nom = Taxref.query.get(cd_nom) + nom = db.session.scalar(select(Taxref).where(Taxref.cd_nom == cd_nom)) + nom.listes.append(liste) db.session.add(nom) - liste.noms.append(nom) # medias media = TMedias( @@ -110,21 +137,16 @@ def clean_data(sample_data): if nom: nom.listes = [] db.session.add(nom) - res = CorTaxonAttribut.query.filter(CorTaxonAttribut.cd_ref == new_cd_ref).all() - for c in res: - db.session.delete(c) - res = TMedias.query.filter(TMedias.cd_ref == new_cd_ref).all() - for c in res: - db.session.delete(c) + db.session.execute(delete(CorTaxonAttribut).where(CorTaxonAttribut.cd_ref == new_cd_ref)) + db.session.execute(delete(CorTaxonAttribut).where(CorTaxonAttribut.cd_ref == cd_ref)) + db.session.execute(delete(cor_nom_liste).where(cor_nom_liste.c.cd_nom == cd_nom)) + db.session.execute(delete(TMedias).where(TMedias.cd_ref == new_cd_ref)) + db.session.execute(delete(TMedias).where(TMedias.cd_ref == cd_ref)) - try: - for c in res: - db.session.delete(c) - attr = BibAttributs.query.filter(BibAttributs.nom_attribut == "test").one() + # Suppression attribut + db.session.execute(delete(BibAttributs).where(BibAttributs.nom_attribut == "test")) - db.session.delete(attr) - except NoResultFound: - pass + # Commit db.session.commit() @@ -331,6 +353,103 @@ def test_import_taxref_v17(): assert results == 5 +def test_import_taxref_v18(): + from apptax.taxonomie.commands.migrate_taxref.commands_v18 import ( + import_taxref_v18, + test_changes_detection, + apply_changes, + ) + + """Test des commandes de migration de taxref v16 vers taxref v18 + + Etapes : + - données de test migration_example : + - Erreur : merge de taxon avec attributs contradictoire + - Erreur : cd_nom disparu sans cd_nom de remplacement + - import de taxref v18 + - correction des erreurs + - migration des données + - vérification des modifications réalisées lors de l'import + """ + runner = CliRunner() + runner.invoke(import_taxref_v18, []) + + # Test generated files + data = open_csv_file("liste_changements.csv") + # Test 2 conflicts + conflict = [d for d in data if d["action"] == "Conflicts with attributes : test: A, test: B"] + assert len(conflict) == 2 + + # Test 4 merge + merge = [d for d in data if d["cas"] == "merge"] + assert len(merge) == 4 + # Test 2 update cd_ref + update_cd_ref = [d for d in data if d["cas"] == "update cd_ref"] + assert len(update_cd_ref) == 1 + + # Résolution des conflits : Erreur liée à la fusion des noms Pulsatilla rubra + # (150342, 150342, 116456, "Pulsatilla rubra var. rubra (Lam.) Delarbre, 1800", "B"), # merge with conflict + db.session.execute(delete(CorTaxonAttribut).where(CorTaxonAttribut.cd_ref == 150342)) + db.session.commit() + + runner.invoke(test_changes_detection, []) + data = open_csv_file("liste_changements.csv") + # Test plus de conflits + conflict = [d for d in data if d["action"] == "Conflicts with attributes : test: A, test: B"] + assert len(conflict) == 0 + + # test nom avec ou sans substition + # Missing 2 : cor_nom_liste et t_medias + data = open_csv_file("missing_cd_nom_into_database.csv") + + sans_substitution = [d for d in data if d["cd_nom_remplacement"] == ""] + assert len(sans_substitution) == 2 + avec_substitution = [d for d in data if not d["cd_nom_remplacement"] == ""] + assert len(avec_substitution) == 2 + + # Erreur liée au taxon sans substition + # (997791, 997791, None, "Poropila dubia J.Schiller, 1925", None), # cd_nom sans substition + tax_sans_substitution = db.session.scalar(select(Taxref).where(Taxref.cd_nom == 997791)) + tax_sans_substitution.listes = [] + + db.session.execute(delete(TMedias).where(TMedias.cd_ref == 997791)) + db.session.commit() + + runner.invoke(test_changes_detection, []) + data = open_csv_file("missing_cd_nom_into_database.csv") + sans_substitution = [d for d in data if d["cd_nom_remplacement"] == ""] + assert len(sans_substitution) == 0 + + # Migration de taxref + runner.invoke(apply_changes, ["--keep-oldtaxref"]) + + # Analyse de la migration + # cor_nom_liste : nb enregistrements initial = 9 ; final = 8 + # perte de 1 du à la suppression du cd_nom 997791 + nb_cor_liste = db.session.scalar(select(func.count()).select_from(cor_nom_liste)) + assert nb_cor_liste == 8 + + # cor_taxon_attribut : nb enregistrements initial = 6 ; final = 4 + # perte de 2 du au merge des taxons 103749 + 136857 et 116456 + 150342 + nb_attr = db.session.scalar(select(func.count()).select_from(CorTaxonAttribut)) + assert nb_attr == 4 + + # t_medias : + # nb media initial = 9 ; final = 8 + # nb de taxon initial = 8 ; final = 5 + # perte de 3 taxons : + # - 2 du au merge des taxons 103749 + 136857 et 116456 + 150342 + # - 1 du à la suppression du cd_nom 997791 + # perte de 1 média du à la suppression sans remplacement de 997791 + nb_media = db.session.scalar(select(func.count()).select_from(TMedias)) + assert nb_media == 8 + + nb_media_taxa = db.session.scalar( + select(func.count(TMedias.cd_ref.distinct())).select_from(TMedias) + ) + assert nb_media_taxa == 5 + + @test_migrate_taxref.command() @with_appcontext def test_taxref_v16_migration(): @@ -355,3 +474,16 @@ def test_taxref_v17_migration(): raise (e) finally: clean_data(data_migration_taxref_v16_to_v17) + + +@test_migrate_taxref.command() +@with_appcontext +def test_taxref_v18_migration(): + """Test des commandes de migration de taxref v17 vers taxref v18""" + populate_data(data_migration_taxref_v17_to_v18) + try: + test_import_taxref_v18() + except AssertionError as e: + raise (e) + finally: + clean_data(data_migration_taxref_v17_to_v18) diff --git a/apptax/taxonomie/commands/taxref.py b/apptax/taxonomie/commands/taxref.py index 711c56638..e1d6cab5f 100644 --- a/apptax/taxonomie/commands/taxref.py +++ b/apptax/taxonomie/commands/taxref.py @@ -7,7 +7,10 @@ from apptax.database import db +from apptax.taxonomie.commands.migrate_taxref.commands_v15 import migrate_to_v15 +from apptax.taxonomie.commands.migrate_taxref.commands_v16 import migrate_to_v16 from apptax.taxonomie.commands.migrate_taxref.commands_v17 import migrate_to_v17 +from apptax.taxonomie.commands.migrate_taxref.commands_v18 import migrate_to_v18 from apptax.taxonomie.models import Taxref, TaxrefBdcStatutText, TMetaTaxref from .utils import truncate_bdc_statuts @@ -22,8 +25,7 @@ import_v16, import_bdc_v16, ) -from .migrate_taxref.commands_v15 import migrate_to_v15 -from .migrate_taxref.commands_v16 import migrate_to_v16 +from .taxref_v18 import import_v18 from .migrate_taxref.test_commands_migrate import test_migrate_taxref from apptax.taxonomie.models import Taxref @@ -133,6 +135,7 @@ def import_inpn_media(file): taxref.add_command(import_bdc_v15) taxref.add_command(import_v16) taxref.add_command(import_v17) +taxref.add_command(import_v18) taxref.add_command(import_bdc_v16) taxref.add_command(import_bdc_v17) taxref.add_command(migrate_to_v15) @@ -142,3 +145,5 @@ def import_inpn_media(file): taxref.add_command(link_bdc_statut_to_areas) taxref.add_command(enable_bdc_statut_text) taxref.add_command(import_inpn_media) + +taxref.add_command(migrate_to_v18) diff --git a/apptax/taxonomie/commands/taxref_v18.py b/apptax/taxonomie/commands/taxref_v18.py new file mode 100644 index 000000000..bc00eec1a --- /dev/null +++ b/apptax/taxonomie/commands/taxref_v18.py @@ -0,0 +1,155 @@ +import os +import logging +from zipfile import ZipFile + +import click +from sqlalchemy.schema import MetaData +from flask.cli import with_appcontext + +from utils_flask_sqla.migrations.utils import open_remote_file + +from ref_geo.models import LAreas, BibAreasTypes + +from apptax.database import db +from apptax.taxonomie.commands.utils import ( + copy_from_csv, + refresh_taxref_vm, + import_bdc_statuts, + insert_taxref_numversion, +) +from apptax.taxonomie.commands.taxref_v15_v16 import import_bdc_statuts_v17 + + +base_url = "http://geonature.fr/data/inpn/taxonomie/" + + +def import_bdc_statuts_v18(logger): + import_bdc_statuts_v17(logger) + + +def import_taxref(logger, num_version, taxref_archive_name, taxref_file_name, taxref_region="fr"): + with open_remote_file(base_url, taxref_archive_name, open_fct=ZipFile) as archive: + with archive.open("habitats_note.csv") as f: + logger.info(f"Insert TAXREF v{num_version} habitats…") + copy_from_csv(f, "bib_taxref_habitats", encoding="WIN1252", delimiter=";") + with archive.open("rangs_note.csv") as f: + logger.info(f"Insert TAXREF v{num_version} rangs…") + copy_from_csv( + f, + "bib_taxref_rangs", + encoding="WIN1252", + delimiter=";", + dest_cols=("tri_rang", "id_rang", "nom_rang", "nom_rang_en"), + ) + with archive.open("statuts_note.csv") as f: + logger.info(f"Insert TAXREF v{num_version} statuts…") + copy_from_csv( + f, + "bib_taxref_statuts", + encoding="WIN1252", + delimiter=";", + dest_cols=("id_statut", "nom_statut"), + source_cols=("statut", "description"), + ) + with archive.open(taxref_file_name) as f: + logger.info(f"Insert TAXREF v{num_version} referentiel…") + copy_from_csv( + f, + "taxref", + delimiter="\t", + dest_cols=( + "cd_nom", + "id_statut", + "id_habitat", + "id_rang", + "regne", + "phylum", + "classe", + "ordre", + "famille", + "sous_famille", + "tribu", + "cd_taxsup", + "cd_sup", + "cd_ref", + "cd_ba", + "lb_nom", + "lb_auteur", + "nomenclatural_comment", + "nom_complet", + "nom_complet_html", + "nom_valide", + "nom_vern", + "nom_vern_eng", + "group1_inpn", + "group2_inpn", + "group3_inpn", + "url", + ), + source_cols=( + "cd_nom::int", + f"NULLIF({taxref_region}, '') as id_statut", + "habitat::int as id_habitat", + "rang as id_rang", + "regne", + "phylum", + "classe", + "ordre", + "famille", + "sous_famille", + "tribu", + "cd_taxsup::int", + "cd_sup::int", + "cd_ref::int", + "cd_ba::int", + "lb_nom", + "substring(lb_auteur, 1, 250)", + "nomenclatural_comment", + "nom_complet", + "nom_complet_html", + "nom_valide", + "substring(nom_vern,1,1000)", + "nom_vern_eng", + "group1_inpn", + "group2_inpn", + "group3_inpn", + "url", + ), + ) + insert_taxref_numversion(num_version) + db.session.commit() + + +@click.command() +@click.option("--skip-bdc-statuts", is_flag=True, help="Skip import of BDC Statuts") +@click.option("--taxref-region", type=str, default="fr", help="Taxref region : column status") +@with_appcontext +def import_v18(skip_bdc_statuts, taxref_region): + logger = logging.getLogger() + + import_taxref( + logger, + num_version="18", + taxref_archive_name="TAXREF_v18_2025.zip", + taxref_file_name="TAXREFv18.txt", + taxref_region=taxref_region, + ) + + if not skip_bdc_statuts: + import_bdc_statuts_v18(logger) + else: + logger.info("Skipping BDC statuts.") + + logger.info("Refresh materialized views…") + refresh_taxref_vm() + + logger.info("Committing…") + db.session.commit() + + +@click.command() +@with_appcontext +def import_bdc_v18(): + logger = logging.getLogger() + import_bdc_statuts_v18(logger) + db.session.commit() diff --git a/apptax/taxonomie/models.py b/apptax/taxonomie/models.py index aaffafe44..31ada2709 100644 --- a/apptax/taxonomie/models.py +++ b/apptax/taxonomie/models.py @@ -160,8 +160,10 @@ class Taxref(db.Model): cd_taxsup = db.Column(db.Integer) cd_sup = db.Column(db.Integer) cd_ref = db.Column(db.Integer) + cd_ba = db.Column(db.Integer) lb_nom = db.Column(db.Unicode) lb_auteur = db.Column(db.Unicode) + nomenclatural_comment = db.Column(db.Unicode) nom_complet = db.Column(db.Unicode) nom_complet_html = db.Column(db.Unicode) nom_vern = db.Column(db.Unicode) diff --git a/apptax/tests/test_taxref_last_version.py b/apptax/tests/test_taxref_last_version.py index 8cac30aa0..73fcd3f15 100644 --- a/apptax/tests/test_taxref_last_version.py +++ b/apptax/tests/test_taxref_last_version.py @@ -14,7 +14,7 @@ class TestPopulateTaxref: def test_count_taxref(self): nb_taxref = Taxref.query.count() - assert nb_taxref == 691281 + assert nb_taxref == 708685 def test_count_bdc_status(self): nb_bdc_texts = TaxrefBdcStatutText.query.count() diff --git a/install_db.sh b/install_db.sh index a104e1207..eb5804908 100755 --- a/install_db.sh +++ b/install_db.sh @@ -105,7 +105,7 @@ then flask db upgrade ref_geo_fr_departments@head flask db autoupgrade - flask taxref import-v17 --taxref-region=${taxref_region:-fr} + flask taxref import-v18 --taxref-region=${taxref_region:-fr} if $insert_geonatureatlas_data then From 934e03f80a31c8753c9d03054ad193b04b0df5e5 Mon Sep 17 00:00:00 2001 From: amandine-sahl Date: Thu, 16 Jan 2025 12:19:03 +0100 Subject: [PATCH 2/3] github action :Test install taxref v18 --- .github/workflows/pytest.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index b7c6824fa..209fa82bf 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -93,7 +93,7 @@ jobs: TAXHUB_CONFIG_FILE: config/test_config.toml - name: Install taxref run: | - flask taxref import-v17 + flask taxref import-v18 env: TAXHUB_CONFIG_FILE: config/test_config.toml - name: Test with pytest From f3174dd0964076534c3699e4519f5e2ecd1f5982 Mon Sep 17 00:00:00 2001 From: amandine-sahl Date: Thu, 16 Jan 2025 12:26:02 +0100 Subject: [PATCH 3/3] [test] Update taxref_schema add cd_ba + nomenclatural_comment --- apptax/tests/test_taxref.py | 6 +++++- apptax/tests/test_taxref_last_version.py | 2 +- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/apptax/tests/test_taxref.py b/apptax/tests/test_taxref.py index fe78249d7..27d8a00cb 100644 --- a/apptax/tests/test_taxref.py +++ b/apptax/tests/test_taxref.py @@ -46,6 +46,7 @@ class TestAPITaxref: "cd_taxsup": Or(None, int), "cd_sup": Or(None, int), "cd_ref": int, + "cd_ba": Or(None, int), "lb_nom": str, "lb_auteur": str, "nom_complet": str, @@ -53,6 +54,7 @@ class TestAPITaxref: "nom_vern": Or(None, str), "nom_valide": str, "nom_vern_eng": Or(None, str), + "nomenclatural_comment": Or(None, str), "group1_inpn": str, "group2_inpn": str, "group3_inpn": Or(None, str), @@ -76,6 +78,7 @@ class TestAPITaxref: "cd_taxsup": Or(None, int), "cd_sup": Or(None, int), "cd_ref": int, + "cd_ba": Or(None, int), "lb_nom": str, "lb_auteur": str, "nom_complet": str, @@ -83,6 +86,7 @@ class TestAPITaxref: "nom_vern": Or(None, str), "nom_valide": str, "nom_vern_eng": Or(None, str), + "nomenclatural_comment": Or(None, str), "group1_inpn": str, "group2_inpn": str, "group3_inpn": Or(None, str), @@ -268,7 +272,7 @@ def test_taxrefversion_routes(self): response = self.client.get(url_for("taxref.getTaxrefVersion")) assert response.status_code == 200 assert ( - json.loads(response.data)["version"] == 17 + json.loads(response.data)["version"] == 18 ) # FIXME: Comment faire si quelqu'un a besoin de taxref dans une différente version... def test_get_groupe3_inpn(self): diff --git a/apptax/tests/test_taxref_last_version.py b/apptax/tests/test_taxref_last_version.py index 73fcd3f15..fbe34eadc 100644 --- a/apptax/tests/test_taxref_last_version.py +++ b/apptax/tests/test_taxref_last_version.py @@ -42,4 +42,4 @@ def test_enable_bdc_statut(self): def test_taxref_version(self): taxref_version = TMetaTaxref.query.order_by(TMetaTaxref.update_date.desc()).scalar() - assert taxref_version.version == 17 + assert taxref_version.version == 18