From dedf30d6eaaca8f29a7a194f4035d904321cbe80 Mon Sep 17 00:00:00 2001 From: kt Date: Fri, 22 Mar 2024 10:02:13 -0400 Subject: [PATCH] fix #13 and #14 --- espfit/app/sampler.py | 67 +++++++++++++++++++++++++++++-------------- 1 file changed, 46 insertions(+), 21 deletions(-) diff --git a/espfit/app/sampler.py b/espfit/app/sampler.py index eb211e5..8efc348 100644 --- a/espfit/app/sampler.py +++ b/espfit/app/sampler.py @@ -38,8 +38,8 @@ class BaseSimulation(object): export_xml(exportSystem=True, exportState=True, exportIntegrator=True, output_directory_path=None): Export serialized system XML file and solvated pdb file. """ - def __init__(self, maxIterations=100, nsteps=250000, atomSubset='solute', - checkpoint_frequency=25000, logging_frequency=250000, netcdf_frequency=250000, + def __init__(self, maxIterations=100, nsteps=2500000, atomSubset='solute', + checkpoint_frequency=250000, logging_frequency=250000, netcdf_frequency=250000, output_directory_path=None, input_directory_path=None): """Initialize base simulation object. @@ -48,19 +48,19 @@ def __init__(self, maxIterations=100, nsteps=250000, atomSubset='solute', maxIterations : int, default=100 Maximum number of iterations to perform minimization. - nsteps : int, default=250000 (10 ns using 4 fs timestep) + nsteps : int, default=2500000 (10 ns using 4 fs timestep) Number of steps to run the simulation. atomSubset : str, default='solute' Subset of atoms to save. Default is 'solute'. Other options 'all' and 'not water'. - checkpoint_frequency : int, default=25000 (1 ns) + checkpoint_frequency : int, default=250000 (1 ns) Frequency (in steps) at which to write checkpoint files. - logging_frequency : int, default=250000 (10 ns) + logging_frequency : int, default=250000 (1 ns) Frequency (in steps) at which to write logging files. - netcdf_frequency : int, default=250000 (10 ns) + netcdf_frequency : int, default=250000 (1 ns) Frequency (in steps) at which to write netcdf files. output_directory_path : str, optional @@ -336,7 +336,7 @@ class SetupSampler(BaseSimulation): ['amber/protein.ff14SB.xml', 'amber/RNA.OL3.xml'] : pl-multi (TPO): NG, pl-single: OK, RNA: OK """ def __init__(self, - small_molecule_forcefield='espfit/data/forcefield/espaloma-0.3.2.pt', + small_molecule_forcefield='espaloma-0.3.2', forcefield_files = ['amber/ff14SB.xml', 'amber/phosaa14SB.xml'], water_model='tip3p', solvent_padding=9.0 * unit.angstroms, @@ -355,7 +355,8 @@ def __init__(self, Parameters ---------- small_molecule_forcefield : str, optional - The force field to be used for small molecules. Default is 'openff-2.1.0'. + The force field to be used for small molecules. Default is 'espaloma-0.3.2'. + Alternative recommended choice is 'openff-2.1.0'. forcefield_files : list, optional List of force field files. Default is ['amber14-all.xml']. water_model : str, optional @@ -378,8 +379,12 @@ def __init__(self, The integration timestep. Default is 4 * unit.femtoseconds. override_with_espaloma : bool, optional Whether to override the original parameters with espaloma. Default is True. + This will override all solute molecules with espaloma parameters. """ super(SetupSampler, self).__init__(**kwargs) + if small_molecule_forcefield == 'espaloma-0.3.2': + from importlib.resources import files + small_molecule_forcefield = str(files('espfit').joinpath("data/forcefield/espaloma-0.3.2.pt")) self.small_molecule_forcefield = small_molecule_forcefield self.water_model = water_model self.forcefield_files = self._update_forcefield_files(forcefield_files) @@ -397,9 +402,13 @@ def __init__(self, @classmethod - def from_toml(cls, filename, *args, **override_sampler_kwargs): + def _from_toml(cls, filename, *args, **override_sampler_kwargs): """Create SetupSampler from a TOML configuration file. + Note that this is designed for creating new systems with temporary espaloma models generated during + espaloma training. It supports multiple systems in the configuration file and returns a list of + SetupSampler instances. + Parameters ---------- filename : str @@ -429,30 +438,48 @@ def from_toml(cls, filename, *args, **override_sampler_kwargs): print(e) raise - config = config['sampler']['setup'] # list + config = config['sampler']['setup'] # this could be list to support multiple systems if config is None: raise ValueError("target is not specified in the configuration file") + # If the configuration file contains a single system, convert it to a list + if not isinstance(config, list): + config = [config] + samplers = [] _logger.debug(f'Found {len(config)} systems in the configuration file') for _config in config: sampler = cls() # Target information - target_class = _config['target_class'] - target_name = _config['target_name'] - + target_class = _config.get('target_class', None) + target_name = _config.get('target_name', None) sampler.target_class = target_class sampler.target_name = target_name - biopolymer_file = files('espfit').joinpath(f'data/target/{target_class}/{target_name}/target.pdb') - ligand_file = files('espfit').joinpath(f'data/target/{target_class}/{target_name}/ligand.sdf') - if not ligand_file.exists(): - ligand_file = None + # Get biopolymer and ligand file if given + # Priority 1: Use input files if given + biopolymer_file, ligand_file = None, None + if _config.get('biopolymer_file'): + biopolymer_file = _config['biopolymer_file'] + if not os.path.exists(biopolymer_file): + raise FileNotFoundError(f"File not found: {biopolymer_file}") + if _config.get('ligand_file'): + ligand_file = _config['ligand_file'] + if not os.path.exists(ligand_file): + raise FileNotFoundError(f"File not found: {ligand_file}") + # Priority 2: Search espfit/data/target directory if input files are not given + if biopolymer_file is None and ligand_file is None: + biopolymer_file = files('espfit').joinpath(f'data/target/{target_class}/{target_name}/target.pdb') + ligand_file = files('espfit').joinpath(f'data/target/{target_class}/{target_name}/ligand.sdf') + if not biopolymer_file.exists(): + raise FileNotFoundError(f"File not found: {biopolymer_file}") + if not ligand_file.exists(): + ligand_file = None # System settings for key, value in _config.items(): - if key not in ['target_class', 'target_name']: + if key not in ['target_class', 'target_name', 'biopolymer_file', 'ligand_file']: if hasattr(sampler, key): if isinstance(value, str) and "*" in value: _value = float(value.split('*')[0].strip()) @@ -684,7 +711,7 @@ def create_system(self, biopolymer_file=None, ligand_file=None): # As a workaround, we will delete the original `self._system_generator` and create a new one to regenerate the system with espaloma. # Only water and ion forcefield files will be used to regenerate the system. Solute molecules will be parametrized with espaloma. # - _logger.debug("Regenerate system with espaloma.") + _logger.info("Regenerate system with espaloma") # Re-create system generator del self._system_generator @@ -729,8 +756,6 @@ def _regenerate_espaloma_system(self): import mdtraj from openff.toolkit import Molecule - _logger.debug("Regenerate system with espaloma") - # Check biopolymer chains mdtop = mdtraj.Topology.from_openmm(self.modeller_solvated_topology) chain_indices = [ chain.index for chain in self.modeller_solvated_topology.chains() ]