From ccf8305284f1e67c8d2fee51e4aa9b7afe8eee40 Mon Sep 17 00:00:00 2001
From: Stella <30465823+stellaprins@users.noreply.github.com>
Date: Tue, 5 Nov 2024 11:07:58 +0000
Subject: [PATCH] draft BioModels compatibility table
---
...t_biomodels_compatibility_biosimulators.py | 247 ++++++++++++++++++
1 file changed, 247 insertions(+)
create mode 100644 BioModels/test_biomodels_compatibility_biosimulators.py
diff --git a/BioModels/test_biomodels_compatibility_biosimulators.py b/BioModels/test_biomodels_compatibility_biosimulators.py
new file mode 100644
index 0000000..4f50b5b
--- /dev/null
+++ b/BioModels/test_biomodels_compatibility_biosimulators.py
@@ -0,0 +1,247 @@
+#!/usr/bin/env python3
+
+md_description = \
+'''
+Download and run validation tests on all the curated models from BioModels https://www.ebi.ac.uk/biomodels.
+The final step is to run the model in tellurium,
+only models specified in SBML with a matching SEDML file are run in tellurium.
+Errors or validation failures are reported at each step.
+Outputs to the Markdown Table below.
+
+'valid-sbml-units' enforces strict unit checking, 'broken-ref' indicates that the SEDML file contained
+a broken source='model.xml' reference which was corrected to the name of the model's provided SBML file.
+'''
+
+import pyneuroml.sbml #for validate_sbml_files
+import pyneuroml.sedml #for validate_sedml_files
+
+import re
+import os
+import urllib
+import sys
+
+sys.path.append("..")
+import utils
+
+API_URL: str = "https://www.ebi.ac.uk/biomodels"
+
+out_format="json"
+max_count = 0 #0 for unlimited
+
+#local temporary storage of the model files
+#this is independent of caching, and still happens when caching is turned off
+#this allows the model to be executed and the files manually examined etc
+tmp_dir = "tmplocalfiles"
+
+#suppress stdout/err output from validation functions to make progress counter readable
+suppress_stdout = True
+suppress_stderr = True
+
+#whether to replace "model.xml" in the sedml file with the name of the actual sbml file
+fix_broken_ref = True
+
+#skip tests that cause the script to be killed due to lack of RAM
+#needs at least 8GB
+skip = {}
+
+def download_file(model_id,filename,output_file,cache):
+ '''
+ request the given file and save it to disk
+ '''
+
+ qfilename = urllib.parse.quote_plus(filename)
+
+ response = cache.do_request(f'{API_URL}/model/download/{model_id}?filename={qfilename}').content
+
+ with open(output_file,"wb") as fout:
+ fout.write(response)
+
+def replace_model_xml(sedml_path,sbml_filename):
+ '''
+ if the SEDML refers to a generic "model.xml" file
+ and the SBML file is not called this
+ replace the SEDML reference with the actual SBML filename
+
+ method used assumes 'source="model.xml"' will only
+ occur in the SBML file reference
+ which was true at time of testing on current BioModels release
+
+ returns True if the SBML reference already seemed valid
+ '''
+
+ if sbml_filename == "model.xml": return True
+
+ with open(sedml_path,encoding='utf-8') as f:
+ data = f.read()
+
+ if not 'source="model.xml"' in data: return True
+
+ data = data.replace('source="model.xml"',f'source="{sbml_filename}"')
+
+ with open(f'{sedml_path}',"w",encoding="utf-8") as fout:
+ fout.write(data)
+
+ return False
+
+def validate_sbml_file(model_id,mtab,info,cache,sup):
+ '''
+ tasks relating to validating the SBML file
+ return None to indicate aborting any further tests on this model
+ otherwise return the SBML filename
+ '''
+
+ #handle only single SBML files
+ if not info['format']['name'] == "SBML":
+ mtab['valid_sbml'] = ['NonSBML', f"{info['format']['name']}:{info['files']['main']}"]
+ return None
+
+ if len(info['files']['main']) > 1:
+ mtab['valid_sbml'] = ['MultipleSBMLs',f"{info['files']['main']}"]
+ return None
+
+ if len(info['files']['main']) < 1:
+ mtab['valid_sbml'] = ['NoSBMLs',f"{info['files']['main']}"]
+ return None
+
+ #download the sbml file
+ sbml_file = info['files']['main'][0]['name']
+ try:
+ download_file(model_id,sbml_file,sbml_file,cache)
+ except Exception as e:
+ mtab['valid_sbml'] = ['DownloadFail',f"{sbml_file} {e}"]
+ return None
+
+ #validate the sbml file
+ sup.suppress() #suppress validation warning/error messages
+ valid_sbml = pyneuroml.sbml.validate_sbml_files([sbml_file], strict_units=False)
+ valid_sbml_units = pyneuroml.sbml.validate_sbml_files([sbml_file], strict_units=True)
+ sup.restore()
+
+ mtab['valid_sbml'] = ['pass' if valid_sbml else 'FAIL', f'[{sbml_file}]({API_URL}/{model_id}#Files)']
+ mtab['valid_sbml_units'] = 'pass' if valid_sbml_units else 'FAIL'
+
+ return sbml_file
+
+def validate_sedml_file(model_id,mtab,info,cache,sup,sbml_file):
+ '''
+ tasks relating to validating the SEDML file
+ return None to indicate aborting any further tests on this model
+ otherwise return the SEDML filename
+ '''
+
+ #must have a SEDML file as well in order to be executed
+ if not 'additional' in info['files']:
+ mtab['valid_sedml'] = f"NoSEDML"
+ return None
+
+ sedml_file = []
+ for file_info in info['files']['additional']:
+ pattern = 'SED[-]?ML'
+ target = f"{file_info['name']}|{file_info['description']}".upper()
+ if re.search(pattern,target):
+ sedml_file.append(file_info['name'])
+
+ #require exactly one SEDML file
+ if len(sedml_file) == 0:
+ mtab['valid_sedml'] = "NoSEDML"
+ return None
+
+ if len(sedml_file) > 1:
+ mtab['valid_sedml'] = ["MultipleSEDMLs",f"{sedml_file}"]
+ return None
+
+ #download sedml file
+ sedml_file = sedml_file[0]
+ try:
+ download_file(model_id,sedml_file,sedml_file,cache)
+ except:
+ mtab['valid_sedml'] = ["DownloadFail",f"{sedml_file}"]
+ return None
+
+ #if the sedml file contains a generic 'source="model.xml"' replace it with the sbml filename
+ if fix_broken_ref:
+ broken_ref = replace_model_xml(sedml_file,sbml_file)
+ mtab['broken_ref'] = 'pass' if broken_ref else 'FAIL'
+ else:
+ mtab['broken_ref'] = 'NA'
+
+ sup.suppress()
+ valid_sedml = pyneuroml.sedml.validate_sedml_files([sedml_file])
+ sup.restore()
+ mtab['valid_sedml'] = ['pass' if valid_sedml else 'FAIL', f'[{sedml_file}]({API_URL}/{model_id}#Files)']
+
+ return sedml_file
+
+def main():
+ '''
+ download the BioModel model files, run various validation steps
+ report the results as a markdown table README file with a summary row at the top
+ '''
+
+ #caching is used to prevent the need to download the same responses from the remote server multiple times during testing
+ #mode="off" to disable caching, "store" to wipe and store fresh results, "reuse" to use the stored cache
+ cache = utils.RequestCache(mode="auto",direc="cache")
+
+ #accumulate results in columns defined by keys which correspond to the local variable names to be used below
+ #to allow automated loading into the columns
+ column_labels = "Model |valid-sbml|valid-sbml-units|valid-sedml|broken-ref|tellurium"
+ column_keys = "model_desc|valid_sbml|valid_sbml_units|valid_sedml|broken_ref|tellurium_outcome"
+ mtab = utils.MarkdownTable(column_labels,column_keys)
+
+ #allow stdout/stderr from validation tests to be suppressed to improve progress count visibility
+ sup = utils.SuppressOutput(stdout=suppress_stdout,stderr=suppress_stderr)
+
+ #get list of all available models
+ model_ids = cache.do_request(f"{API_URL}/model/identifiers?format={out_format}").json()['models']
+ count = 0
+ starting_dir = os.getcwd()
+
+ for model_id in model_ids[0:1]:
+ #allow testing on a small sample of models
+ if max_count > 0 and count >= max_count: break
+ count += 1
+ print(f"\r{model_id} {count}/{len(model_ids)} ",end='')
+
+ #only process curated models
+ #BIOMD ids should be the curated models
+ if not 'BIOMD' in model_id:
+ continue
+
+ #skip if on the list to be skipped
+ if count in skip or model_id in skip:
+ continue
+
+ #from this point the model will create an output row even if not all tests are run
+ mtab.new_row() #append empty placeholder row
+ info = cache.do_request(f"{API_URL}/{model_id}?format={out_format}").json()
+
+ if len(info['name']) > 36:
+ model_summary = f"[{model_id}]({API_URL}/{model_id})
{info['name'][:30]}"
+ model_details = f"{info['name']}"
+ mtab['model_desc'] = mtab.make_fold(model_summary,model_details)
+ else:
+ mtab['model_desc'] = f"[{model_id}]({API_URL}/{model_id})
{info['name']}"
+
+ #make temporary downloads of the sbml and sedml files
+ model_dir = os.path.join(starting_dir,tmp_dir,model_id)
+ os.makedirs(model_dir,exist_ok=True)
+ os.chdir(model_dir)
+
+ #sbml file validation tasks, includes downloading a local copy
+ sbml_file = validate_sbml_file(model_id,mtab,info,cache,sup)
+ if not sbml_file: continue # no further tests possible
+
+ sedml_file = validate_sedml_file(model_id,mtab,info,cache,sup,sbml_file)
+ if not sedml_file: continue # no further tests possible
+
+ test_folder = 'tests'
+ engine_list = list(utils.ENGINES.keys())
+ utils.run_biosimulators_remotely_and_locally(engine_list,
+ os.path.basename(sedml_file),
+ os.path.basename(sbml_file),
+ os.path.join(test_folder,'d1_plots_remote'),
+ os.path.join(test_folder,'d1_plots_local'),
+ test_folder=test_folder)
+
+if __name__ == "__main__":
+ main()
\ No newline at end of file