From ccf8305284f1e67c8d2fee51e4aa9b7afe8eee40 Mon Sep 17 00:00:00 2001 From: Stella <30465823+stellaprins@users.noreply.github.com> Date: Tue, 5 Nov 2024 11:07:58 +0000 Subject: [PATCH] draft BioModels compatibility table --- ...t_biomodels_compatibility_biosimulators.py | 247 ++++++++++++++++++ 1 file changed, 247 insertions(+) create mode 100644 BioModels/test_biomodels_compatibility_biosimulators.py diff --git a/BioModels/test_biomodels_compatibility_biosimulators.py b/BioModels/test_biomodels_compatibility_biosimulators.py new file mode 100644 index 0000000..4f50b5b --- /dev/null +++ b/BioModels/test_biomodels_compatibility_biosimulators.py @@ -0,0 +1,247 @@ +#!/usr/bin/env python3 + +md_description = \ +''' +Download and run validation tests on all the curated models from BioModels https://www.ebi.ac.uk/biomodels. +The final step is to run the model in tellurium, +only models specified in SBML with a matching SEDML file are run in tellurium. +Errors or validation failures are reported at each step. +Outputs to the Markdown Table below. + +'valid-sbml-units' enforces strict unit checking, 'broken-ref' indicates that the SEDML file contained +a broken source='model.xml' reference which was corrected to the name of the model's provided SBML file. +''' + +import pyneuroml.sbml #for validate_sbml_files +import pyneuroml.sedml #for validate_sedml_files + +import re +import os +import urllib +import sys + +sys.path.append("..") +import utils + +API_URL: str = "https://www.ebi.ac.uk/biomodels" + +out_format="json" +max_count = 0 #0 for unlimited + +#local temporary storage of the model files +#this is independent of caching, and still happens when caching is turned off +#this allows the model to be executed and the files manually examined etc +tmp_dir = "tmplocalfiles" + +#suppress stdout/err output from validation functions to make progress counter readable +suppress_stdout = True +suppress_stderr = True + +#whether to replace "model.xml" in the sedml file with the name of the actual sbml file +fix_broken_ref = True + +#skip tests that cause the script to be killed due to lack of RAM +#needs at least 8GB +skip = {} + +def download_file(model_id,filename,output_file,cache): + ''' + request the given file and save it to disk + ''' + + qfilename = urllib.parse.quote_plus(filename) + + response = cache.do_request(f'{API_URL}/model/download/{model_id}?filename={qfilename}').content + + with open(output_file,"wb") as fout: + fout.write(response) + +def replace_model_xml(sedml_path,sbml_filename): + ''' + if the SEDML refers to a generic "model.xml" file + and the SBML file is not called this + replace the SEDML reference with the actual SBML filename + + method used assumes 'source="model.xml"' will only + occur in the SBML file reference + which was true at time of testing on current BioModels release + + returns True if the SBML reference already seemed valid + ''' + + if sbml_filename == "model.xml": return True + + with open(sedml_path,encoding='utf-8') as f: + data = f.read() + + if not 'source="model.xml"' in data: return True + + data = data.replace('source="model.xml"',f'source="{sbml_filename}"') + + with open(f'{sedml_path}',"w",encoding="utf-8") as fout: + fout.write(data) + + return False + +def validate_sbml_file(model_id,mtab,info,cache,sup): + ''' + tasks relating to validating the SBML file + return None to indicate aborting any further tests on this model + otherwise return the SBML filename + ''' + + #handle only single SBML files + if not info['format']['name'] == "SBML": + mtab['valid_sbml'] = ['NonSBML', f"{info['format']['name']}:{info['files']['main']}"] + return None + + if len(info['files']['main']) > 1: + mtab['valid_sbml'] = ['MultipleSBMLs',f"{info['files']['main']}"] + return None + + if len(info['files']['main']) < 1: + mtab['valid_sbml'] = ['NoSBMLs',f"{info['files']['main']}"] + return None + + #download the sbml file + sbml_file = info['files']['main'][0]['name'] + try: + download_file(model_id,sbml_file,sbml_file,cache) + except Exception as e: + mtab['valid_sbml'] = ['DownloadFail',f"{sbml_file} {e}"] + return None + + #validate the sbml file + sup.suppress() #suppress validation warning/error messages + valid_sbml = pyneuroml.sbml.validate_sbml_files([sbml_file], strict_units=False) + valid_sbml_units = pyneuroml.sbml.validate_sbml_files([sbml_file], strict_units=True) + sup.restore() + + mtab['valid_sbml'] = ['pass' if valid_sbml else 'FAIL', f'[{sbml_file}]({API_URL}/{model_id}#Files)'] + mtab['valid_sbml_units'] = 'pass' if valid_sbml_units else 'FAIL' + + return sbml_file + +def validate_sedml_file(model_id,mtab,info,cache,sup,sbml_file): + ''' + tasks relating to validating the SEDML file + return None to indicate aborting any further tests on this model + otherwise return the SEDML filename + ''' + + #must have a SEDML file as well in order to be executed + if not 'additional' in info['files']: + mtab['valid_sedml'] = f"NoSEDML" + return None + + sedml_file = [] + for file_info in info['files']['additional']: + pattern = 'SED[-]?ML' + target = f"{file_info['name']}|{file_info['description']}".upper() + if re.search(pattern,target): + sedml_file.append(file_info['name']) + + #require exactly one SEDML file + if len(sedml_file) == 0: + mtab['valid_sedml'] = "NoSEDML" + return None + + if len(sedml_file) > 1: + mtab['valid_sedml'] = ["MultipleSEDMLs",f"{sedml_file}"] + return None + + #download sedml file + sedml_file = sedml_file[0] + try: + download_file(model_id,sedml_file,sedml_file,cache) + except: + mtab['valid_sedml'] = ["DownloadFail",f"{sedml_file}"] + return None + + #if the sedml file contains a generic 'source="model.xml"' replace it with the sbml filename + if fix_broken_ref: + broken_ref = replace_model_xml(sedml_file,sbml_file) + mtab['broken_ref'] = 'pass' if broken_ref else 'FAIL' + else: + mtab['broken_ref'] = 'NA' + + sup.suppress() + valid_sedml = pyneuroml.sedml.validate_sedml_files([sedml_file]) + sup.restore() + mtab['valid_sedml'] = ['pass' if valid_sedml else 'FAIL', f'[{sedml_file}]({API_URL}/{model_id}#Files)'] + + return sedml_file + +def main(): + ''' + download the BioModel model files, run various validation steps + report the results as a markdown table README file with a summary row at the top + ''' + + #caching is used to prevent the need to download the same responses from the remote server multiple times during testing + #mode="off" to disable caching, "store" to wipe and store fresh results, "reuse" to use the stored cache + cache = utils.RequestCache(mode="auto",direc="cache") + + #accumulate results in columns defined by keys which correspond to the local variable names to be used below + #to allow automated loading into the columns + column_labels = "Model |valid-sbml|valid-sbml-units|valid-sedml|broken-ref|tellurium" + column_keys = "model_desc|valid_sbml|valid_sbml_units|valid_sedml|broken_ref|tellurium_outcome" + mtab = utils.MarkdownTable(column_labels,column_keys) + + #allow stdout/stderr from validation tests to be suppressed to improve progress count visibility + sup = utils.SuppressOutput(stdout=suppress_stdout,stderr=suppress_stderr) + + #get list of all available models + model_ids = cache.do_request(f"{API_URL}/model/identifiers?format={out_format}").json()['models'] + count = 0 + starting_dir = os.getcwd() + + for model_id in model_ids[0:1]: + #allow testing on a small sample of models + if max_count > 0 and count >= max_count: break + count += 1 + print(f"\r{model_id} {count}/{len(model_ids)} ",end='') + + #only process curated models + #BIOMD ids should be the curated models + if not 'BIOMD' in model_id: + continue + + #skip if on the list to be skipped + if count in skip or model_id in skip: + continue + + #from this point the model will create an output row even if not all tests are run + mtab.new_row() #append empty placeholder row + info = cache.do_request(f"{API_URL}/{model_id}?format={out_format}").json() + + if len(info['name']) > 36: + model_summary = f"[{model_id}]({API_URL}/{model_id})
{info['name'][:30]}" + model_details = f"{info['name']}" + mtab['model_desc'] = mtab.make_fold(model_summary,model_details) + else: + mtab['model_desc'] = f"[{model_id}]({API_URL}/{model_id})
{info['name']}" + + #make temporary downloads of the sbml and sedml files + model_dir = os.path.join(starting_dir,tmp_dir,model_id) + os.makedirs(model_dir,exist_ok=True) + os.chdir(model_dir) + + #sbml file validation tasks, includes downloading a local copy + sbml_file = validate_sbml_file(model_id,mtab,info,cache,sup) + if not sbml_file: continue # no further tests possible + + sedml_file = validate_sedml_file(model_id,mtab,info,cache,sup,sbml_file) + if not sedml_file: continue # no further tests possible + + test_folder = 'tests' + engine_list = list(utils.ENGINES.keys()) + utils.run_biosimulators_remotely_and_locally(engine_list, + os.path.basename(sedml_file), + os.path.basename(sbml_file), + os.path.join(test_folder,'d1_plots_remote'), + os.path.join(test_folder,'d1_plots_local'), + test_folder=test_folder) + +if __name__ == "__main__": + main() \ No newline at end of file