From 8ef4e6c032c5ddae967e4409c5f90b2b8150bdd0 Mon Sep 17 00:00:00 2001 From: Angie Hinrichs Date: Fri, 8 Apr 2022 10:42:53 -0700 Subject: [PATCH 1/6] Install pangolin-assignment from a web directory instead of github / git-lfs. Up to this point, all data dependencies have been github cov-lineages repositories. The cache file in pangolin-assignment exceeded the github file size limit so we changed the pangolin-assignment repository to use git-lfs. Thanks @pvanheus for pointing out that github has storage and bandwidth quotas for Git LFS usage, and that by default the pangolin-assignment release tarball from github does not include the cache file; it can be added to the release tarball, but will count further against the storage and bandwidth quotas. Since the cache file is generated at UCSC which has ample web server storage and bandwidth, this adds a new mechanism to search for the latest versioned tarball in a web directory (instead of querying the github API), compare its version to the locally installed package if present (using the same pip/__init__.py __version__ mechanism), and install the tarball from the web directory (instead of github). Note: currently the URL for pangolin-assignment uses the hgdownload-test server; this will need to be changed to hgwdownload after some testing and before release. --- pangolin/utils/update.py | 66 +++++++++++++++++++++++++++++----------- 1 file changed, 49 insertions(+), 17 deletions(-) diff --git a/pangolin/utils/update.py b/pangolin/utils/update.py index e1ceee9..bb27d1a 100644 --- a/pangolin/utils/update.py +++ b/pangolin/utils/update.py @@ -3,6 +3,7 @@ import os import sys import json +import re import shutil import tarfile import subprocess @@ -14,8 +15,10 @@ version_dict_keys = ['pangolin', 'scorpio', 'pangolin-data', 'constellations', 'pangolin-assignment'] +dependency_web_dir = { 'pangolin-assignment': 'https://hgdownload-test.gi.ucsc.edu/goldenPath/wuhCor1/pangolin-assignment' } -def get_latest_release(dependency): + +def get_latest_cov_lineages(dependency): """ Using the github releases API check for the latest release of dependency and its tarball """ @@ -43,31 +46,58 @@ def get_latest_release(dependency): return latest_release, latest_release_tarball -def git_lfs_install(): +def get_latest_web_dir(dependency, web_dir): """ - 'git-lfs install' must be run after installing git-lfs and before cloning a repo - that uses Git LFS. + Find the tarball url with the latest release from a web directory with versioned tarballs + instead of github. An HTTP GET of the web directory must return some text that contains + names of files in that directory, some of which are {dependency}-{version}.tar.gz. """ try: - subprocess.run(['git-lfs', 'install'], - check=True, - stdout=subprocess.DEVNULL, - stderr=subprocess.DEVNULL) - except CalledProcessError as e: - sys.stderr.write(cyan(f'Error: "git-lfs install" failed: {e}')) + listing = request.urlopen(web_dir).read().decode('utf-8') + except: + sys.stderr.write(cyan(f"Unable to read {web_dir}")) + sys.exit(-1) + tarRe = re.compile(f"{dependency}-(.*?).tar.gz") + matches = list(set(tarRe.findall(listing))) + if not matches: + sys.stderr.write(cyan(f"Can't find {dependency}-.tar.gz files in listing of {web_dir}")) sys.exit(-1) + versions = [LooseVersion(v) for v in matches] + versions.sort() + latest_release = str(versions[-1]) + latest_release_tarball = f"{web_dir}/{dependency}-{latest_release}.tar.gz" + return latest_release, latest_release_tarball + + +def get_latest_release(dependency): + """ + If dependency comes from a web directory then find latest release and tarball there, otherwise + query github API for cov-lineages repo + """ + if dependency in dependency_web_dir: + return get_latest_web_dir(dependency, dependency_web_dir[dependency]) + else: + return get_latest_cov_lineages(dependency) -def pip_install_dep(dependency, release): + +def pip_install_url(url): """ - Use pip install to install a cov-lineages repository with the specificed release + Use pip install to install a package from a url. """ - url = f"git+https://github.com/cov-lineages/{dependency}.git@{release}" subprocess.run([sys.executable, '-m', 'pip', 'install', '--upgrade', url], check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) +def pip_install_cov_lineages(dependency, release): + """ + Use pip install to install a cov-lineages repository with the specified release + """ + url = f"git+https://github.com/cov-lineages/{dependency}.git@{release}" + pip_install_url(url) + + def install_pangolin_assignment(): """ If the pangolin-assignment repo has not been installed already then install the latest release. @@ -77,9 +107,8 @@ def install_pangolin_assignment(): print(f"pangolin-assignment already installed with version {pangolin_assignment.__version__}; use --update or --update-data if you wish to update it.", file=sys.stderr) except: - git_lfs_install() latest_release, tarball = get_latest_release('pangolin-assignment') - pip_install_dep('pangolin-assignment', latest_release) + pip_install_url(tarball) print(f"pangolin-assignment installed with latest release ({latest_release})") @@ -99,7 +128,7 @@ def update(version_dictionary, data_dir=None): Using the github releases API check for the latest current release of the set of dependencies provided e.g., pangolin, scorpio, pangolin-data and constellations for complete --update and just pangolearn and constellations - for --update_data. If pangolin-assignment has been added to the installation + for --update_data. If pangolin-assignment has been added to version_dictionary then it will be included in both --update and --update-data. Dictionary keys must be one of pangolin, scorpio, pangolin-data, constellations @@ -163,7 +192,10 @@ def update(version_dictionary, data_dir=None): shutil.rmtree(destination_directory) shutil.move(os.path.join(tempdir, extracted_dir, dependency_package), destination_directory) else: - pip_install_dep(dependency, latest_release) + if dependency in dependency_web_dir: + pip_install_url(latest_release_tarball) + else: + pip_install_cov_lineages(dependency, latest_release) print(f"{dependency} updated to {latest_release}", file=sys.stderr) elif version > latest_release_tidied: print(f"{dependency} ({version}) is newer than latest stable " From 4fd6c2a9b23f22e213bb0fa01bee31f7965b2650 Mon Sep 17 00:00:00 2001 From: Angie Hinrichs Date: Fri, 8 Apr 2022 15:27:20 -0700 Subject: [PATCH 2/6] Remove error exit in case of mismatching pangolin-data and pangolin-assignment versions. There may be patch releases that make sense for pangolin-data but not pangolin-assignment (e.g. pangoLEARN patch update), and the suggestion to run --update-data is not helpful because that's how the versions came to be installed in the first place. --- pangolin/utils/data_checks.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/pangolin/utils/data_checks.py b/pangolin/utils/data_checks.py index 05278b5..80b1e6a 100644 --- a/pangolin/utils/data_checks.py +++ b/pangolin/utils/data_checks.py @@ -95,13 +95,6 @@ def get_assignment_cache(cache_file, config): 'pangolin-assignment repository (that will make future data updates slower).\n')) sys.exit(-1) - # Check versions of pangolin-data and pangolin-assignment to make sure they are consistent. - if pangolin_assignment.__version__.lstrip('v') != config[KEY_PANGOLIN_DATA_VERSION].lstrip('v'): - print(cyan(f'Error: pangolin_assignment cache version {pangolin_assignment.__version__} ' - f'does not match pangolin_data version {config[KEY_PANGOLIN_DATA_VERSION]}. ' - 'Run "pangolin --update-data" to fetch latest versions of both.')) - sys.exit(-1) - try: with gzip.open(cache, 'rt') as f: line = f.readline() From 5538368c6d3876e2eeae30d1cf52565e4d35423a Mon Sep 17 00:00:00 2001 From: Angie Hinrichs Date: Tue, 12 Apr 2022 12:07:03 -0700 Subject: [PATCH 3/6] Update pangolin-assignment URL to hgdownload (not -test). Also fix option name typo in github query exception message. --- pangolin/utils/update.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pangolin/utils/update.py b/pangolin/utils/update.py index 4ab811c..758103f 100644 --- a/pangolin/utils/update.py +++ b/pangolin/utils/update.py @@ -15,7 +15,7 @@ version_dict_keys = ['pangolin', 'scorpio', 'pangolin-data', 'constellations', 'pangolin-assignment'] -dependency_web_dir = { 'pangolin-assignment': 'https://hgdownload-test.gi.ucsc.edu/goldenPath/wuhCor1/pangolin-assignment' } +dependency_web_dir = { 'pangolin-assignment': 'https://hgdownload.gi.ucsc.edu/goldenPath/wuhCor1/pangolin-assignment' } def get_latest_cov_lineages(dependency): @@ -32,8 +32,8 @@ def get_latest_cov_lineages(dependency): # so if this is thrown and there is definitely connectivity then # double check the version labels except Exception as e: - sys.stderr.write(cyan("Unable to connect to reach github API " - "--update/--data_update requires internet " + sys.stderr.write(cyan("Unable to connect to reach github API. " + "--update/--update-data requires internet " "connectivity so may not work on certain " "systems or if your IP has exceeded the " f"5,000 request per hour limit\n{e}\n")) From 16156efc5878c4b26d4c7f808eca4668c86ae819 Mon Sep 17 00:00:00 2001 From: Peter van Heusden Date: Wed, 4 May 2022 15:37:13 +0200 Subject: [PATCH 4/6] Support download assignment cache to datadir and using from datadir --- .github/workflows/pangolin.yml | 11 +++++++++- pangolin/command.py | 14 +++++++----- pangolin/utils/config.py | 2 ++ pangolin/utils/data_checks.py | 8 +++---- pangolin/utils/initialising.py | 40 +++++++++++++++++++++++++++++----- pangolin/utils/update.py | 28 +++++++++--------------- 6 files changed, 68 insertions(+), 35 deletions(-) diff --git a/.github/workflows/pangolin.yml b/.github/workflows/pangolin.yml index ff3a4b1..c153092 100644 --- a/.github/workflows/pangolin.yml +++ b/.github/workflows/pangolin.yml @@ -55,4 +55,13 @@ jobs: run: pangolin --update-data 2>&1 | tee pangolin_update_data.log - name: Run pangolin verbose mode run: pangolin --verbose pangolin/test/test_seqs.fasta 2>&1 | tee pangolin_verbose.log - + - name: Add assignment cache + run: pangolin --add-assignment-cache + - name: Test use-assignment-cache + run: pangolin --use-assignment-cache pangolin/test/test_seqs.fasta 2>&1 | grep 'Using pangolin-assignment cache' + - name: remove assignment cache + run: pip uninstall -y pangolin-assignment + - name: Add assignment cache to datadir + run: mkdir ac && pangolin --add-assignment-cache --datadir ac + - name: Test use-assignment-cache with datadir + run: pangolin --use-assignment-cache --datadir ac pangolin/test/test_seqs.fasta 2>&1 | grep 'Using pangolin-assignment cache' diff --git a/pangolin/command.py b/pangolin/command.py index 2bd6abb..01eeb35 100644 --- a/pangolin/command.py +++ b/pangolin/command.py @@ -4,17 +4,17 @@ from pangolin.utils import data_checks try: import pangolin_data -except: +except ImportError: data_checks.install_error("pangolin_data", "https://github.com/cov-lineages/pangolin-data.git") try: import scorpio -except: +except ImportError: data_checks.install_error("scorpio", "https://github.com/cov-lineages/scorpio.git") try: import constellations -except: +except ImportError: data_checks.install_error("constellations", "https://github.com/cov-lineages/constellations.git") import os @@ -110,20 +110,22 @@ def main(sysargs = sys.argv[1:]): setup_data(args.datadir,config[KEY_ANALYSIS_MODE], config) if args.add_assignment_cache: - update.install_pangolin_assignment() + update.install_pangolin_assignment(config[KEY_PANGOLIN_ASSIGNMENT_VERSION], args.datadir) if args.update: version_dictionary = {'pangolin': __version__, 'pangolin-data': config[KEY_PANGOLIN_DATA_VERSION], 'constellations': config[KEY_CONSTELLATIONS_VERSION], 'scorpio': config[KEY_SCORPIO_VERSION]} - update.add_pangolin_assignment_if_installed(version_dictionary) + if config[KEY_PANGOLIN_ASSIGNMENT_VERSION] is not None: + version_dictionary['pangolin-assignment'] = config[KEY_PANGOLIN_ASSIGNMENT_VERSION] update.update(version_dictionary) if args.update_data: version_dictionary = {'pangolin-data': config[KEY_PANGOLIN_DATA_VERSION], 'constellations': config[KEY_CONSTELLATIONS_VERSION]} - update.add_pangolin_assignment_if_installed(version_dictionary) + if config[KEY_PANGOLIN_ASSIGNMENT_VERSION] is not None: + version_dictionary['pangolin-assignment'] = config[KEY_PANGOLIN_ASSIGNMENT_VERSION] update.update(version_dictionary, args.datadir) # install_pangolin_assignment doesn't exit so that --update/--update-data can be given at the diff --git a/pangolin/utils/config.py b/pangolin/utils/config.py index 1f0d0be..9f10f8e 100644 --- a/pangolin/utils/config.py +++ b/pangolin/utils/config.py @@ -40,6 +40,8 @@ KEY_PANGOLIN_VERSION="pangolin_version" KEY_CONSTELLATIONS_VERSION="constellation_version" KEY_SCORPIO_VERSION="scorpio_version" +KEY_PANGOLIN_ASSIGNMENT_VERSION="pangolin_assignment_version" +KEY_PANGOLIN_ASSIGNMENT_PATH="pangolin_assignment_path" KEY_VERBOSE="verbose" KEY_LOG_API = "log_api" diff --git a/pangolin/utils/data_checks.py b/pangolin/utils/data_checks.py index 80b1e6a..a639b83 100644 --- a/pangolin/utils/data_checks.py +++ b/pangolin/utils/data_checks.py @@ -79,9 +79,8 @@ def install_error(package, url): def get_assignment_cache(cache_file, config): cache = "" - try: - import pangolin_assignment - pangolin_assignment_dir = pangolin_assignment.__path__[0] + if config[KEY_PANGOLIN_ASSIGNMENT_VERSION] is not None: + pangolin_assignment_dir = config[KEY_PANGOLIN_ASSIGNMENT_PATH] for r, d, f in os.walk(pangolin_assignment_dir): for fn in f: if fn == cache_file and cache == "": @@ -89,7 +88,7 @@ def get_assignment_cache(cache_file, config): if not os.path.exists(cache): sys.stderr.write(cyan(f'Error: cannot find assignment cache file {cache_file} in pangolin_assignment\n')) sys.exit(-1) - except: + else: sys.stderr.write(cyan('\nError: "pangolin --add-assignment-cache" is required before ' '"pangolin --use-assignment-cache", in order to install optional ' 'pangolin-assignment repository (that will make future data updates slower).\n')) @@ -100,6 +99,7 @@ def get_assignment_cache(cache_file, config): line = f.readline() except: with open(cache, 'r') as f: + # this is legacy code from when the assignment cache was installed using pip and git-lfs line = f.readline() if "git-lfs.github.com" in line: sys.stderr.write(cyan( diff --git a/pangolin/utils/initialising.py b/pangolin/utils/initialising.py index e9221b9..017d550 100644 --- a/pangolin/utils/initialising.py +++ b/pangolin/utils/initialising.py @@ -11,6 +11,15 @@ from pangolin import __version__ import pangolin_data +pangolin_assignment_version = None +pangolin_assignment_path = None +try: + import pangolin_assignment + pangolin_assignment_version = pangolin_assignment.__version__ + pangolin_assignment_path = pangolin_assignment.__path__[0] +except ImportError: + # if we can't import the module, leave the variables as None + pass import scorpio import constellations @@ -52,7 +61,9 @@ def setup_config_dict(cwd): KEY_PANGOLIN_DATA_VERSION: pangolin_data.__version__, KEY_SCORPIO_VERSION: scorpio.__version__, KEY_CONSTELLATIONS_VERSION: constellations.__version__, - + KEY_PANGOLIN_ASSIGNMENT_VERSION: pangolin_assignment_version, + KEY_PANGOLIN_ASSIGNMENT_PATH: pangolin_assignment_path, + KEY_VERBOSE: False, KEY_LOG_API: "", KEY_THREADS: 1 @@ -116,7 +127,9 @@ def version_from_init(init_file): break return version -def setup_data(datadir_arg,analysis_mode, config): +def setup_data(datadir_arg, analysis_mode, config): + global pangolin_assignment_version + global pangolin_assignment_path datadir = check_datadir(datadir_arg) @@ -143,6 +156,8 @@ def setup_data(datadir_arg,analysis_mode, config): constellation_files.append(os.path.join(r, fn)) pangolin_data_version = pangolin_data.__version__ + + # pangolin_assignment_version and pangolin_assignment_path are set at module import time use_datadir = False datadir_too_old = False if datadir: @@ -150,7 +165,7 @@ def setup_data(datadir_arg,analysis_mode, config): for r,d,f in os.walk(datadir): for fn in f: # pangolin-data/__init__.py not constellations/__init__.py: - if r.endswith('data') and fn == "__init__.py": + if r.endswith('/pangolin_data') and fn == "__init__.py": # print("Found " + os.path.join(r, fn)) version = version_from_init(os.path.join(r, fn)) if not version: @@ -162,8 +177,19 @@ def setup_data(datadir_arg,analysis_mode, config): use_datadir = True else: datadir_too_old = True - sys.stderr.write(cyan(f"Warning: Ignoring specified datadir {datadir} - it contains pangoLEARN model files older ({version}) than those installed ({pangolin_data.__version__})\n")) + sys.stderr.write(cyan(f"Warning: Ignoring pangolin data in specified datadir {datadir} - it contains pangolin_data older ({version}) than those installed ({pangolin_data.__version__})\n")) + elif r.endswith('/pangolin_assignment') and fn == '__init__.py': + version = version_from_init(os.path.join(r, fn)) + if not version: + continue + if pangolin_assignment_version is None or LooseVersion(version) >= LooseVersion(pangolin_assignment_version): + # only use this if the version is >= than what we already have + pangolin_assignment_version = version + pangolin_assignment_path = r + else: + datadir_too_old = True + sys.stderr.write(cyan(f"Warning: Ignoring pangolin assignment in specified datadir {datadir} - it contains pangolin_assignment older ({version}) than those installed ({pangolin_assignment.__version__})\n")) if use_datadir == False: # we haven't got a viable datadir from searching args.datadir if datadir and not datadir_too_old: @@ -175,8 +201,10 @@ def setup_data(datadir_arg,analysis_mode, config): config[KEY_PANGOLIN_DATA_VERSION] = pangolin_data_version config[KEY_CONSTELLATIONS_VERSION] = constellations_version - config[KEY_DATADIR] = datadir + config[KEY_DATADIR] = datadir # this is the pangolin_data datadir, the naming is from when there was only a single datadir to worry about config[KEY_CONSTELLATION_FILES] = constellation_files + config[KEY_PANGOLIN_ASSIGNMENT_VERSION] = pangolin_assignment_version + config[KEY_PANGOLIN_ASSIGNMENT_PATH] = pangolin_assignment_path def parse_qc_thresholds(maxambig, minlen, reference_fasta, config): @@ -225,7 +253,7 @@ def print_versions_exit(config): # Report pangolin_assignment version if it is installed, otherwise ignore try: import pangolin_assignment - print(f"pangolin-assignment: {pangolin_assignment.__version__}") + print(f"pangolin-assignment: {config[KEY_PANGOLIN_ASSIGNMENT_VERSION]}") except: pass sys.exit(0) diff --git a/pangolin/utils/update.py b/pangolin/utils/update.py index 758103f..e7aafc9 100644 --- a/pangolin/utils/update.py +++ b/pangolin/utils/update.py @@ -104,31 +104,23 @@ def pip_install_cov_lineages(dependency, release): pip_install_url(url) -def install_pangolin_assignment(): +def install_pangolin_assignment(pangolin_assignment_version, datadir): """ If the pangolin-assignment repo has not been installed already then install the latest release. """ - try: - import pangolin_assignment - print(f"pangolin-assignment already installed with version {pangolin_assignment.__version__}; use --update or --update-data if you wish to update it.", file=sys.stderr) - - except: + if pangolin_assignment_version is not None: + print(f"pangolin-assignment already installed with version {pangolin_assignment_version}; use --update or --update-data if you wish to update it.", file=sys.stderr) + else: latest_release, tarball = get_latest_release('pangolin-assignment') - pip_install_url(tarball) + if datadir is not None and os.path.exists(datadir): + # install pangolin-assignment to datadir instead of using pip install + version_dictionary = {'pangolin-assignment': '0'} + update(version_dictionary, datadir) + else: + pip_install_url(tarball) print(f"pangolin-assignment installed with latest release ({latest_release})") -def add_pangolin_assignment_if_installed(version_dictionary): - """ - If pangolin_assignment has been installed then add it to version_dictionary, else ignore. - """ - try: - import pangolin_assignment - version_dictionary["pangolin-assignment"] = pangolin_assignment.__version__ - except: - pass - - def update(version_dictionary, data_dir=None): """ Using the github releases API check for the latest current release From eba85c94fa471182fba8180c3a6a6bde6c0b5ec4 Mon Sep 17 00:00:00 2001 From: Peter van Heusden Date: Wed, 4 May 2022 15:43:08 +0200 Subject: [PATCH 5/6] Remove useless warning about datadir --- pangolin/utils/initialising.py | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/pangolin/utils/initialising.py b/pangolin/utils/initialising.py index 017d550..d819b36 100644 --- a/pangolin/utils/initialising.py +++ b/pangolin/utils/initialising.py @@ -176,7 +176,6 @@ def setup_data(datadir_arg, analysis_mode, config): pangolin_data_version = version use_datadir = True else: - datadir_too_old = True sys.stderr.write(cyan(f"Warning: Ignoring pangolin data in specified datadir {datadir} - it contains pangolin_data older ({version}) than those installed ({pangolin_data.__version__})\n")) elif r.endswith('/pangolin_assignment') and fn == '__init__.py': version = version_from_init(os.path.join(r, fn)) @@ -188,14 +187,8 @@ def setup_data(datadir_arg, analysis_mode, config): pangolin_assignment_version = version pangolin_assignment_path = r else: - datadir_too_old = True - sys.stderr.write(cyan(f"Warning: Ignoring pangolin assignment in specified datadir {datadir} - it contains pangolin_assignment older ({version}) than those installed ({pangolin_assignment.__version__})\n")) +\ sys.stderr.write(cyan(f"Warning: Ignoring pangolin assignment in specified datadir {datadir} - it contains pangolin_assignment older ({version}) than those installed ({pangolin_assignment.__version__})\n")) if use_datadir == False: - # we haven't got a viable datadir from searching args.datadir - if datadir and not datadir_too_old: - sys.stderr.write(cyan( - f"Warning: Ignoring specified datadir {datadir} - could not find __init__.py file to check versions \n")) - pangolin_data_dir = pangolin_data.__path__[0] datadir = os.path.join(pangolin_data_dir,"data") @@ -233,11 +226,10 @@ def parse_qc_thresholds(maxambig, minlen, reference_fasta, config): print(green(f"Maximum ambiguity allowed is {config[KEY_MAXAMBIG]}.\n****")) - def print_ram_warning(analysis_mode): if analysis_mode == "pangolearn": print(cyan("Warning: pangoLEARN mode may use a significant amount of RAM, be aware that it will not suit every system.")) - + def print_alias_file_exit(alias_file): with open(alias_file, 'r') as handle: for line in handle: From bea4cc7e588fe07b8a01e03893270c93e672f890 Mon Sep 17 00:00:00 2001 From: Peter van Heusden Date: Wed, 4 May 2022 15:45:59 +0200 Subject: [PATCH 6/6] Fix typo --- pangolin/utils/initialising.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pangolin/utils/initialising.py b/pangolin/utils/initialising.py index d819b36..c1d0691 100644 --- a/pangolin/utils/initialising.py +++ b/pangolin/utils/initialising.py @@ -187,7 +187,7 @@ def setup_data(datadir_arg, analysis_mode, config): pangolin_assignment_version = version pangolin_assignment_path = r else: -\ sys.stderr.write(cyan(f"Warning: Ignoring pangolin assignment in specified datadir {datadir} - it contains pangolin_assignment older ({version}) than those installed ({pangolin_assignment.__version__})\n")) + sys.stderr.write(cyan(f"Warning: Ignoring pangolin assignment in specified datadir {datadir} - it contains pangolin_assignment older ({version}) than those installed ({pangolin_assignment.__version__})\n")) if use_datadir == False: pangolin_data_dir = pangolin_data.__path__[0] datadir = os.path.join(pangolin_data_dir,"data")