From 6812ee88cc94f2d160289f0df995960012641511 Mon Sep 17 00:00:00 2001 From: "Huska, Matthew" Date: Thu, 26 Oct 2023 17:33:43 +0200 Subject: [PATCH] Automatically update parent-child lineage info when doing a match using --with-sublineage. Disable with --no-lineage-update. --- lib/Lineages_UPDATER.py | 11 ++++++----- sonar.py | 13 ++++++++++--- 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/lib/Lineages_UPDATER.py b/lib/Lineages_UPDATER.py index f67483c..dbdebe6 100644 --- a/lib/Lineages_UPDATER.py +++ b/lib/Lineages_UPDATER.py @@ -5,6 +5,7 @@ # We just adapt and change some parts to be used in covsonar, vocal etc. import json import os +import sys import pandas as pd import requests @@ -88,12 +89,12 @@ def download_source(tmp_dir): alias_key_url = "https://raw.githubusercontent.com/cov-lineages/pango-designation/master/pango_designation/alias_key.json" lineag = os.path.join(tmp_dir, "lineags.csv") alias_key = os.path.join(tmp_dir, "alias_key.json") - print("Download lineages") + print("Download lineages", file=sys.stderr) url_content = requests.get(lineages_url).content csv_file = open(lineag, "wb") csv_file.write(url_content) csv_file.close() - print("Download alias_key") + print("Download alias_key", file=sys.stderr) items = requests.get(alias_key_url) data = items.json() with open(alias_key, "w") as f: @@ -115,7 +116,7 @@ def process_lineage(alias_key_path, lineages_path, output): with open(alias_key_path ,'w') as nf: json.dump(data_dict, nf) """ - print("Create all lineages") + print("Create all lineages", file=sys.stderr) aliasor = Aliasor(alias_key_path) df_lineages = pd.read_csv(lineages_path) lineages = df_lineages.lineage.unique() @@ -127,7 +128,7 @@ def process_lineage(alias_key_path, lineages_path, output): uncompressed_lineages.sort(key=lts) sorted_lineages = list(map(aliasor.compress, uncompressed_lineages)) - print("Calculate parent-child relationship") + print("Calculate parent-child relationship", file=sys.stderr) # -- Fill the sub child -- # the current method is working, but it is not good in term of performance # the algoritm use 3 loops @@ -157,7 +158,7 @@ def process_lineage(alias_key_path, lineages_path, output): row_dict["lineage"] = _id row_dict["sublineage"] = "none" _final_list.append(row_dict) - print("Write output:", output) + print("Write output:", output, file=sys.stderr) df = pd.DataFrame.from_dict(_final_list, orient="columns") df = df[df.lineage != ""] df.sort_values(by=["lineage"]).to_csv(output, sep="\t", index=False) diff --git a/sonar.py b/sonar.py index adb94bc..2db042b 100755 --- a/sonar.py +++ b/sonar.py @@ -179,7 +179,6 @@ def parse_args(): help="recursively get all sublineages from a given lineage (--lineage) (only child) ", action="store_true", ) - # parser_match.add_argument('--recursion', help="recursively get all sublineages of a given lineage (--lineage). this will work only if '--with-sublineage' is used",action="store_true") parser_match.add_argument( "--lineage", metavar="STR", @@ -336,6 +335,11 @@ def parse_args(): parser_match.add_argument( "--debug", help="show database query for debugging", action="store_true" ) + parser_match.add_argument( + "--no-lineage-update", + help="do not automatically update parent-child lineage relationship information", + action="store_true", + ) # create the parser for the "restore" command parser_restore = subparsers.add_parser( @@ -975,13 +979,16 @@ def process_update_expressions(expr): else: debug = False # update-lineage-info - if args.tool == "update-lineage-info": + if args.tool == "update-lineage-info" or ( + args.tool == "match" and args.with_sublineage and not args.no_lineage_update + ): tmp_dirname = mkdtemp(prefix=".tmp_") alias_key, lineage = Lineages_UPDATER.download_source(tmp_dirname) Lineages_UPDATER.process_lineage(alias_key, lineage, "lib/lineage.all.tsv") if os.path.isdir(tmp_dirname): shutil.rmtree(tmp_dirname) - sys.exit("Complete!") + if args.tool == "update-lineage-info": + sys.exit("Complete!") if not args.db is None and args.tool != "add" and not os.path.isfile(args.db): sys.exit("input error: database does not exist.")