diff --git a/benchmarks/benchmarks/topology.py b/benchmarks/benchmarks/topology.py index 8691515a938..45c3fcf95bf 100644 --- a/benchmarks/benchmarks/topology.py +++ b/benchmarks/benchmarks/topology.py @@ -1,6 +1,6 @@ import MDAnalysis import numpy as np -from MDAnalysis.topology import guessers +from MDAnalysis.guesser import DefaultGuesser try: from MDAnalysisTests.datafiles import GRO @@ -26,7 +26,7 @@ def setup(self, num_atoms): def time_guessbonds(self, num_atoms): """Benchmark for guessing bonds""" - guessers.guess_bonds(self.ag, self.ag.positions, + DefaultGuesser(None).guess_bonds(self.ag, self.ag.positions, box=self.ag.dimensions, vdwradii=self.vdwradii) diff --git a/package/CHANGELOG b/package/CHANGELOG index 1763df44ec3..64fcb63fe0e 100644 --- a/package/CHANGELOG +++ b/package/CHANGELOG @@ -16,13 +16,14 @@ The rules for this file: ------------------------------------------------------------------------------- ??/??/?? IAlibay, HeetVekariya, marinegor, lilyminium, RMeli, ljwoods2, aditya292002, pstaerk, PicoCentauri, BFedder, - tyler.je.reddy, SampurnaM, leonwehrhan, kainszs, orionarcher, + tyler.je.reddy, SampurnaM, leonwehrhan, kainszs, orionarcher, yuxuanzhuang, PythonFZ, laksh-krishna-sharma, orbeckst, MattTDavies, - talagayev + talagayev, aya9aladdin * 2.8.0 Fixes + * Fix Bohrium (Bh) atomic mass in tables.py (PR #3753) * set `n_parts` to the total number of frames being analyzed if `n_parts` is bigger. (Issue #4685) * Catch higher dimensional indexing in GroupBase & ComponentBase (Issue #4647) @@ -56,6 +57,15 @@ Fixes * Fix groups.py doctests using sphinx directives (Issue #3925, PR #4374) Enhancements + * Removed type and mass guessing from all topology parsers (PR #3753) + * Added guess_TopologyAttrs() API to the Universe to handle attribute + guessing (PR #3753) + * Added the DefaultGuesser class, which is a general-purpose guesser with + the same functionalities as the existing guesser.py methods (PR #3753) + * Added is_value_missing() to `TopologyAttrs` to check for missing + values (PR #3753) + * Added guessed `Element` attribute to the ITPParser to preserve old mass + partial guessing behavior from being broken (PR #3753) * MDAnalysis now supports Python 3.13 (PR #4732) * Introduce parallelization API to `AnalysisBase` and to `analysis.rms.RMSD` class (Issue #4158, PR #4304) @@ -100,6 +110,9 @@ Changes numpy.testing.assert_allclose #4438) Deprecations + * Unknown masses are set to 0.0 for current version, this will be depracated + in version 3.0.0 and replaced by :class:`Masses`' no_value_label attribute(np.nan) + (PR #3753) * The MDAnalysis.anaylsis.encore module has been deprecated in favour of the mdaencore MDAKit and will be removed in version 3.0.0 (PR #4737) * The MMTF Reader is deprecated and will be removed in version 3.0 diff --git a/package/MDAnalysis/__init__.py b/package/MDAnalysis/__init__.py index 0e9e5574607..ca11be4bdf2 100644 --- a/package/MDAnalysis/__init__.py +++ b/package/MDAnalysis/__init__.py @@ -181,7 +181,7 @@ _TOPOLOGY_ATTRS: Dict = {} # {attrname: cls} _TOPOLOGY_TRANSPLANTS: Dict = {} # {name: [attrname, method, transplant class]} _TOPOLOGY_ATTRNAMES: Dict = {} # {lower case name w/o _ : name} - +_GUESSERS: Dict = {} # custom exceptions and warnings from .exceptions import ( diff --git a/package/MDAnalysis/analysis/bat.py b/package/MDAnalysis/analysis/bat.py index 5186cb6c882..c8a908f9ea5 100644 --- a/package/MDAnalysis/analysis/bat.py +++ b/package/MDAnalysis/analysis/bat.py @@ -175,7 +175,7 @@ class to calculate dihedral angles for a given set of atoms or residues def _sort_atoms_by_mass(atoms, reverse=False): - r"""Sorts a list of atoms by name and then by index + r"""Sorts a list of atoms by mass and then by index The atom index is used as a tiebreaker so that the ordering is reproducible. diff --git a/package/MDAnalysis/converters/OpenMMParser.py b/package/MDAnalysis/converters/OpenMMParser.py index eef92873783..b3402c448eb 100644 --- a/package/MDAnalysis/converters/OpenMMParser.py +++ b/package/MDAnalysis/converters/OpenMMParser.py @@ -25,6 +25,9 @@ =================================================================== .. versionadded:: 2.0.0 +.. versionchanged:: 2.8.0 + Removed type and mass guessing (attributes guessing takes place + now through universe.guess_TopologyAttrs() API) Converts an @@ -59,8 +62,7 @@ import warnings from ..topology.base import TopologyReaderBase -from ..topology.tables import SYMB2Z -from ..topology.guessers import guess_types, guess_masses +from ..guesser.tables import SYMB2Z from ..core.topology import Topology from ..core.topologyattrs import ( Atomids, @@ -108,11 +110,6 @@ def _mda_topology_from_omm_topology(self, omm_topology): ------- top : MDAnalysis.core.topology.Topology - Note - ---- - When none of the elements are present in the openmm topolgy, their - atomtypes are guessed using their names and their masses are - then guessed using their atomtypes. When partial elements are present, values from available elements are used whereas the absent elements are assigned an empty string @@ -184,21 +181,32 @@ def _mda_topology_from_omm_topology(self, omm_topology): warnings.warn("Element information missing for some atoms. " "These have been given an empty element record ") if any(i == 'X' for i in atomtypes): - warnings.warn("For absent elements, atomtype has been " - "set to 'X' and mass has been set to 0.0. " - "If needed these can be guessed using " - "MDAnalysis.topology.guessers.") + warnings.warn( + "For absent elements, atomtype has been " + "set to 'X' and mass has been set to 0.0. " + "If needed these can be guessed using " + "universe.guess_TopologyAttrs(" + "to_guess=['masses', 'types']). " + "(for MDAnalysis version 2.x " + "this is done automatically," + " but it will be removed in 3.0).") + attrs.append(Elements(np.array(validated_elements, dtype=object))) else: - atomtypes = guess_types(atomnames) - masses = guess_masses(atomtypes) - wmsg = ("Element information is missing for all the atoms. " - "Elements attribute will not be populated. " - "Atomtype attribute will be guessed using atom " - "name and mass will be guessed using atomtype." - "See MDAnalysis.topology.guessers.") + wmsg = ( + "Element information is missing for all the atoms. " + "Elements attribute will not be populated. " + "Atomtype attribute will be guessed using atom " + "name and mass will be guessed using atomtype." + "For MDAnalysis version 2.x this is done automatically, " + "but it will be removed in MDAnalysis v3.0. " + "These can be guessed using " + "universe.guess_TopologyAttrs(" + "to_guess=['masses', 'types']) " + "See MDAnalysis.guessers.") + warnings.warn(wmsg) else: attrs.append(Elements(np.array(validated_elements, dtype=object))) diff --git a/package/MDAnalysis/converters/ParmEd.py b/package/MDAnalysis/converters/ParmEd.py index 174dc9fad3b..cc2e7a4cc52 100644 --- a/package/MDAnalysis/converters/ParmEd.py +++ b/package/MDAnalysis/converters/ParmEd.py @@ -81,12 +81,12 @@ import itertools import warnings +from ..guesser.tables import SYMB2Z import numpy as np from numpy.lib import NumpyVersion from . import base from ..coordinates.base import SingleFrameReaderBase -from ..topology.tables import SYMB2Z from ..core.universe import Universe from ..exceptions import NoDataError diff --git a/package/MDAnalysis/converters/ParmEdParser.py b/package/MDAnalysis/converters/ParmEdParser.py index 55dc48b2f1c..86de585fe53 100644 --- a/package/MDAnalysis/converters/ParmEdParser.py +++ b/package/MDAnalysis/converters/ParmEdParser.py @@ -90,7 +90,7 @@ import numpy as np from ..topology.base import TopologyReaderBase, change_squash -from ..topology.tables import Z2SYMB +from ..guesser.tables import Z2SYMB from ..core.topologyattrs import ( Atomids, Atomnames, diff --git a/package/MDAnalysis/converters/RDKit.py b/package/MDAnalysis/converters/RDKit.py index 139528440ab..b6d806df4c0 100644 --- a/package/MDAnalysis/converters/RDKit.py +++ b/package/MDAnalysis/converters/RDKit.py @@ -255,9 +255,7 @@ class RDKitConverter(base.ConverterBase): from MDAnalysisTests.datafiles import PSF, DCD from rdkit.Chem.Descriptors3D import Asphericity - u = mda.Universe(PSF, DCD) - elements = mda.topology.guessers.guess_types(u.atoms.names) - u.add_TopologyAttr('elements', elements) + u = mda.Universe(PSF, DCD, to_guess=['elements']) ag = u.select_atoms("resid 1-10") for ts in u.trajectory: diff --git a/package/MDAnalysis/converters/RDKitParser.py b/package/MDAnalysis/converters/RDKitParser.py index 841f979eeca..6bca57a43fe 100644 --- a/package/MDAnalysis/converters/RDKitParser.py +++ b/package/MDAnalysis/converters/RDKitParser.py @@ -1,5 +1,5 @@ # -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*- -# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 +# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 # # MDAnalysis --- https://www.mdanalysis.org # Copyright (c) 2006-2017 The MDAnalysis Development Team and contributors @@ -47,7 +47,6 @@ import numpy as np from ..topology.base import TopologyReaderBase, change_squash -from ..topology import guessers from ..core.topologyattrs import ( Atomids, Atomnames, @@ -90,6 +89,7 @@ class RDKitParser(TopologyReaderBase): - Atomnames - Aromaticities - Elements + - Types - Masses - Bonds - Resids @@ -97,9 +97,6 @@ class RDKitParser(TopologyReaderBase): - RSChirality - Segids - Guesses the following: - - Atomtypes - Depending on RDKit's input, the following Attributes might be present: - Charges - Resnames @@ -156,6 +153,12 @@ class RDKitParser(TopologyReaderBase): .. versionadded:: 2.0.0 .. versionchanged:: 2.1.0 Added R/S chirality support + .. versionchanged:: 2.8.0 + Removed type guessing (attributes guessing takes place now + through universe.guess_TopologyAttrs() API). If atoms types is not + present in the input rdkit molecule as a _TriposAtomType property, + the type attribute get the same values as the element attribute. + """ format = 'RDKIT' @@ -303,8 +306,7 @@ def parse(self, **kwargs): if atomtypes: attrs.append(Atomtypes(np.array(atomtypes, dtype=object))) else: - atomtypes = guessers.guess_types(names) - attrs.append(Atomtypes(atomtypes, guessed=True)) + atomtypes = np.char.upper(elements) # Partial charges if charges: diff --git a/package/MDAnalysis/coordinates/PDB.py b/package/MDAnalysis/coordinates/PDB.py index bce51c43cdc..5e9530cac8a 100644 --- a/package/MDAnalysis/coordinates/PDB.py +++ b/package/MDAnalysis/coordinates/PDB.py @@ -155,7 +155,6 @@ from ..lib.util import store_init_arguments from . import base from .timestep import Timestep -from ..topology.core import guess_atom_element from ..exceptions import NoDataError diff --git a/package/MDAnalysis/core/groups.py b/package/MDAnalysis/core/groups.py index 91b2c779304..7c9a3650dfd 100644 --- a/package/MDAnalysis/core/groups.py +++ b/package/MDAnalysis/core/groups.py @@ -3453,7 +3453,6 @@ def guess_bonds(self, vdwradii=None, fudge_factor=0.55, lower_bound=0.1): ---------- vdwradii : dict, optional Dict relating atom types: vdw radii - fudge_factor : float, optional The factor by which atoms must overlap each other to be considered a bond. Larger values will increase the number of bonds found. [0.55] @@ -3477,8 +3476,8 @@ def guess_bonds(self, vdwradii=None, fudge_factor=0.55, lower_bound=0.1): Corrected misleading docs, and now allows passing of `fudge_factor` and `lower_bound` arguments. """ - from ..topology.core import guess_bonds, guess_angles, guess_dihedrals from .topologyattrs import Bonds, Angles, Dihedrals + from ..guesser.default_guesser import DefaultGuesser def get_TopAttr(u, name, cls): """either get *name* or create one from *cls*""" @@ -3490,22 +3489,20 @@ def get_TopAttr(u, name, cls): return attr # indices of bonds - b = guess_bonds( - self.atoms, - self.atoms.positions, - vdwradii=vdwradii, - box=self.dimensions, - fudge_factor=fudge_factor, - lower_bound=lower_bound, - ) - bondattr = get_TopAttr(self.universe, "bonds", Bonds) + guesser = DefaultGuesser(None, fudge_factor=fudge_factor, + lower_bound=lower_bound, + box=self.dimensions, + vdwradii=vdwradii) + b = guesser.guess_bonds(self.atoms, self.atoms.positions) + + bondattr = get_TopAttr(self.universe, 'bonds', Bonds) bondattr._add_bonds(b, guessed=True) - a = guess_angles(self.bonds) + a = guesser.guess_angles(self.bonds) angleattr = get_TopAttr(self.universe, 'angles', Angles) angleattr._add_bonds(a, guessed=True) - d = guess_dihedrals(self.angles) + d = guesser.guess_dihedrals(self.angles) diheattr = get_TopAttr(self.universe, 'dihedrals', Dihedrals) diheattr._add_bonds(d) diff --git a/package/MDAnalysis/core/topologyattrs.py b/package/MDAnalysis/core/topologyattrs.py index 92fa25a5e5d..5e2621dc63d 100644 --- a/package/MDAnalysis/core/topologyattrs.py +++ b/package/MDAnalysis/core/topologyattrs.py @@ -518,6 +518,18 @@ def set_segments(self, sg, values): """Set segmentattributes for a given SegmentGroup""" raise NotImplementedError + @classmethod + def are_values_missing(cls, values): + """check if an attribute has a missing value + + .. versionadded:: 2.8.0 + """ + missing_value_label = getattr(cls, 'missing_value_label', None) + + if missing_value_label is np.nan: + return np.isnan(values) + else: + return values == missing_value_label # core attributes @@ -1441,6 +1453,7 @@ class Masses(AtomAttr): attrname = 'masses' singular = 'mass' per_object = 'atom' + missing_value_label = np.nan target_classes = [AtomGroup, ResidueGroup, SegmentGroup, Atom, Residue, Segment] transplants = defaultdict(list) diff --git a/package/MDAnalysis/core/universe.py b/package/MDAnalysis/core/universe.py index 739e0483395..c0ac6bcf6fd 100644 --- a/package/MDAnalysis/core/universe.py +++ b/package/MDAnalysis/core/universe.py @@ -86,7 +86,7 @@ from .topology import Topology from .topologyattrs import AtomAttr, ResidueAttr, SegmentAttr, BFACTOR_WARNING from .topologyobjects import TopologyObject - +from ..guesser.base import get_guesser logger = logging.getLogger("MDAnalysis.core.universe") @@ -238,6 +238,25 @@ class Universe(object): vdwradii: dict, ``None``, default ``None`` For use with *guess_bonds*. Supply a dict giving a vdwradii for each atom type which are used in guessing bonds. + context: str or :mod:`Guesser`, default ``'default'`` + Type of the Guesser to be used in guessing TopologyAttrs + to_guess: list[str] (optional, default ``['types', 'masses']``) + TopologyAttrs to be guessed. These TopologyAttrs will be wholly + guessed if they don't exist in the Universe. If they already exist in + the Universe, only empty or missing values will be guessed. + + .. warning:: + In MDAnalysis 2.x, types and masses are being automatically guessed + if they are missing (``to_guess=('types, 'masses')``). + However, starting with release 3.0 **no guessing will be done + by default** and it will be up to the user to request guessing + using ``to_guess`` and ``force_guess``. + + force_guess: list[str], (optional) + TopologyAttrs in this list will be force guessed. If the TopologyAttr + does not already exist in the Universe, this has no effect. If the + TopologyAttr does already exist, all values will be overwritten + by guessed values. fudge_factor: float, default [0.55] For use with *guess_bonds*. Supply the factor by which atoms must overlap each other to be considered a bond. @@ -267,7 +286,7 @@ class Universe(object): dimensions : numpy.ndarray system dimensions (simulation unit cell, if set in the trajectory) at the *current time step* - (see :attr:`MDAnalysis.coordinates.timestep.Timestep.dimensions`). + (see :attr:`MDAnalysis.coordinates.base.Timestep.dimensions`). The unit cell can be set for the current time step (but the change is not permanent unless written to a file). atoms : AtomGroup @@ -320,11 +339,18 @@ class Universe(object): .. versionchanged:: 2.5.0 Added fudge_factor and lower_bound parameters for use with *guess_bonds*. + + .. versionchanged:: 2.8.0 + Added :meth:`~MDAnalysis.core.universe.Universe.guess_TopologyAttrs` API + guessing masses and atom types after topology + is read from a registered parser. + """ def __init__(self, topology=None, *coordinates, all_coordinates=False, format=None, topology_format=None, transformations=None, guess_bonds=False, vdwradii=None, fudge_factor=0.55, - lower_bound=0.1, in_memory=False, + lower_bound=0.1, in_memory=False, context='default', + to_guess=('types', 'masses'), force_guess=(), in_memory_step=1, **kwargs): self._trajectory = None # managed attribute holding Reader @@ -333,7 +359,7 @@ def __init__(self, topology=None, *coordinates, all_coordinates=False, self.residues = None self.segments = None self.filename = None - + self._context = get_guesser(context) self._kwargs = { 'transformations': transformations, 'guess_bonds': guess_bonds, @@ -381,12 +407,17 @@ def __init__(self, topology=None, *coordinates, all_coordinates=False, self._trajectory.add_transformations(*transformations) if guess_bonds: - self.atoms.guess_bonds(vdwradii=vdwradii, fudge_factor=fudge_factor, - lower_bound=lower_bound) + force_guess = list(force_guess) + ['bonds', 'angles', 'dihedrals'] + + self.guess_TopologyAttrs( + context, to_guess, force_guess, vdwradii=vdwradii, **kwargs) + def copy(self): """Return an independent copy of this Universe""" - new = self.__class__(self._topology.copy()) + context = self._context.copy() + new = self.__class__(self._topology.copy(), + to_guess=(), context=context) new.trajectory = self.trajectory.copy() return new @@ -475,7 +506,7 @@ def empty(cls, n_atoms, n_residues=1, n_segments=1, n_frames=1, residue_segindex=residue_segindex, ) - u = cls(top) + u = cls(top, to_guess=()) if n_frames > 1 or trajectory: coords = np.zeros((n_frames, n_atoms, 3), dtype=np.float32) @@ -714,10 +745,10 @@ def __repr__(self): n_atoms=len(self.atoms)) @classmethod - def _unpickle_U(cls, top, traj): + def _unpickle_U(cls, top, traj, context): """Special method used by __reduce__ to deserialise a Universe""" # top is a Topology obj at this point, but Universe can handle that. - u = cls(top) + u = cls(top, to_guess=(), context=context) u.trajectory = traj return u @@ -727,7 +758,7 @@ def __reduce__(self): # transformation (that has AtomGroup inside). Use __reduce__ instead. # Universe's two "legs" of top and traj both serialise themselves. return (self._unpickle_U, (self._topology, - self._trajectory)) + self._trajectory, self._context.copy())) # Properties @property @@ -1459,13 +1490,114 @@ def from_smiles(cls, smiles, sanitize=True, addHs=True, "hydrogens with `addHs=True`") numConfs = rdkit_kwargs.pop("numConfs", numConfs) - if not (type(numConfs) is int and numConfs > 0): + if not (isinstance(numConfs, int) and numConfs > 0): raise SyntaxError("numConfs must be a non-zero positive " - "integer instead of {0}".format(numConfs)) + "integer instead of {0}".format(numConfs)) AllChem.EmbedMultipleConfs(mol, numConfs, **rdkit_kwargs) return cls(mol, **kwargs) + def guess_TopologyAttrs( + self, context=None, to_guess=None, force_guess=None, **kwargs): + """ + Guess and add attributes through a specific context-aware guesser. + + Parameters + ---------- + context: str or :mod:`Guesser` class + For calling a matching guesser class for this specific context + to_guess: Optional[list[str]] + TopologyAttrs to be guessed. These TopologyAttrs will be wholly + guessed if they don't exist in the Universe. If they already exist in + the Universe, only empty or missing values will be guessed. + + .. warning:: + In MDAnalysis 2.x, types and masses are being automatically guessed + if they are missing (``to_guess=('types, 'masses')``). + However, starting with release 3.0 **no guessing will be done + by default** and it will be up to the user to request guessing + using ``to_guess`` and ``force_guess``. + + force_guess: Optional[list[str]] + TopologyAttrs in this list will be force guessed. If the + TopologyAttr does not already exist in the Universe, this has no + effect. If the TopologyAttr does already exist, all values will + be overwritten by guessed values. + **kwargs: extra arguments to be passed to the guesser class + + Examples + -------- + To guess ``masses`` and ``types`` attributes:: + + u.guess_TopologyAttrs(context='default', to_guess=['masses', 'types']) + + .. versionadded:: 2.8.0 + + """ + if not context: + context = self._context + + guesser = get_guesser(context, self.universe, **kwargs) + self._context = guesser + + if to_guess is None: + to_guess = [] + if force_guess is None: + force_guess = [] + + total_guess = list(to_guess) + list(force_guess) + + # Removing duplicates from the guess list while keeping attributes + # order as it is more convenient to guess attributes + # in the same order that the user provided + total_guess = list(dict.fromkeys(total_guess)) + + objects = ['bonds', 'angles', 'dihedrals', 'impropers'] + + # Checking if the universe is empty to avoid errors + # from guesser methods + if self._topology.n_atoms > 0: + + topology_attrs = [att.attrname for att in + self._topology.read_attributes] + + common_attrs = set(to_guess) & set(topology_attrs) + common_attrs = ", ".join(attr for attr in common_attrs) + + if len(common_attrs) > 0: + logger.info( + f'The attribute(s) {common_attrs} have already been read ' + 'from the topology file. The guesser will ' + 'only guess empty values for this attribute, ' + 'if any exists. To overwrite it by completely ' + 'guessed values, you can pass the attribute to' + ' the force_guess parameter instead of ' + 'the to_guess one') + + for attr in total_guess: + if guesser.is_guessable(attr): + fg = attr in force_guess + values = guesser.guess_attr(attr, fg) + + if values is not None: + if attr in objects: + self._add_topology_objects( + attr, values, guessed=True) + else: + guessed_attr = _TOPOLOGY_ATTRS[attr](values, True) + self.add_TopologyAttr(guessed_attr) + logger.info( + f'attribute {attr} has been guessed' + ' successfully.') + + else: + raise ValueError(f'{context} guesser can not guess the' + f' following attribute: {attr}') + + else: + warnings.warn('Can not guess attributes ' + 'for universe with 0 atoms') + def Merge(*args): """Create a new new :class:`Universe` from one or more diff --git a/package/MDAnalysis/guesser/__init__.py b/package/MDAnalysis/guesser/__init__.py new file mode 100644 index 00000000000..b433356290f --- /dev/null +++ b/package/MDAnalysis/guesser/__init__.py @@ -0,0 +1,50 @@ +# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*- +# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 +# +# MDAnalysis --- https://www.mdanalysis.org +# Copyright (c) 2006-2017 The MDAnalysis Development Team and contributors +# (see the file AUTHORS for the full list of names) +# +# Released under the Lesser GNU Public Licence, v2 or any higher version +# +# Please cite your use of MDAnalysis in published work: +# +# R. J. Gowers, M. Linke, J. Barnoud, T. J. E. Reddy, M. N. Melo, S. L. Seyler, +# D. L. Dotson, J. Domanski, S. Buchoux, I. M. Kenney, and O. Beckstein. +# MDAnalysis: A Python package for the rapid analysis of molecular dynamics +# simulations. In S. Benthall and S. Rostrup editors, Proceedings of the 15th +# Python in Science Conference, pages 102-109, Austin, TX, 2016. SciPy. +# doi: 10.25080/majora-629e541a-00e +# +# N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and O. Beckstein. +# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations. +# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787 +# + +""" +Context-specific guessers --- :mod:`MDAnalysis.guesser` +======================================================== + +You can use guesser classes either directly by initiating an instance of it and use its guessing methods or through +the :meth:`guess_TopologyAttrs `: API of the universe. + +The following table lists the currently supported Context-aware Guessers along with +the attributes they can guess. + +.. table:: Table of Supported Guessers + + ============================================== ========== ===================== =================================================== + Name Context Attributes Remarks + ============================================== ========== ===================== =================================================== + :ref:`DefaultGuesser ` default types, elements, general purpose guesser + masses, bonds, + angles, dihedrals, + improper dihedrals + ============================================== ========== ===================== =================================================== + + + + +""" +from . import base +from .default_guesser import DefaultGuesser diff --git a/package/MDAnalysis/guesser/base.py b/package/MDAnalysis/guesser/base.py new file mode 100644 index 00000000000..aab0723aaa6 --- /dev/null +++ b/package/MDAnalysis/guesser/base.py @@ -0,0 +1,202 @@ +# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding: utf-8 -*- +# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 +# +# MDAnalysis --- https://www.mdanalysis.org +# Copyright (c) 2006-2017 The MDAnalysis Development Team and contributors +# (see the file AUTHORS for the full list of names) +# +# Released under the Lesser GNU Public Licence, v2 or any higher version +# +# Please cite your use of MDAnalysis in published work: +# +# R. J. Gowers, M. Linke, J. Barnoud, T. J. E. Reddy, M. N. Melo, S. L. Seyler, +# D. L. Dotson, J. Domanski, S. Buchoux, I. M. Kenney, and O. Beckstein. +# MDAnalysis: A Python package for the rapid analysis of molecular dynamics +# simulations. In S. Benthall and S. Rostrup editors, Proceedings of the 15th +# Python in Science Conference, pages 102-109, Austin, TX, 2016. SciPy. +# doi: 10.25080/majora-629e541a-00e +# +# N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and O. Beckstein. +# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations. +# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787 +# +""" +Base guesser classes --- :mod:`MDAnalysis.guesser.base` +================================================================ + +Derive context-specific guesser classes from the base class in this module. + +Classes +------- + +.. autoclass:: GuesserBase + :members: + :inherited-members: + +.. autofunction:: get_guesser + +""" +from .. import _GUESSERS +import numpy as np +from .. import _TOPOLOGY_ATTRS +import logging +from typing import Dict +import copy + +logger = logging.getLogger("MDAnalysis.guesser.base") + + +class _GuesserMeta(type): + """Internal: guesser classes registration + + When classes which inherit from GuesserBase are *defined* + this metaclass makes it known to MDAnalysis. 'context' + attribute are read: + - `context` defines the context of the guesser class for example: + forcefield specific context as MartiniGuesser + and file specific context as PDBGuesser. + + Eg:: + + class FooGuesser(GuesserBase): + format = 'foo' + + .. versionadded:: 2.8.0 + """ + def __init__(cls, name, bases, classdict): + type.__init__(type, name, bases, classdict) + + _GUESSERS[classdict['context'].upper()] = cls + + +class GuesserBase(metaclass=_GuesserMeta): + """Base class for context-specific guessers to inherit from + + Parameters + ---------- + universe : Universe, optional + Supply a Universe to the Guesser. This then becomes the source of atom + attributes to be used in guessing processes. (this is relevant to how + the universe's guess_TopologyAttrs API works. + See :meth:`~MDAnalysis.core.universe.Universe.guess_TopologyAttrs`). + **kwargs : dict, optional + To pass additional data to the guesser that can be used with + different methods. + + + .. versionadded:: 2.8.0 + + """ + context = 'base' + _guesser_methods: Dict = {} + + def __init__(self, universe=None, **kwargs): + self._universe = universe + self._kwargs = kwargs + + def update_kwargs(self, **kwargs): + self._kwargs.update(kwargs) + + def copy(self): + """Return a copy of this Guesser""" + kwargs = copy.deepcopy(self._kwargs) + new = self.__class__(universe=None, **kwargs) + return new + + def is_guessable(self, attr_to_guess): + """check if the passed atrribute can be guessed by the guesser class + + Parameters + ---------- + guess: str + Attribute to be guessed then added to the Universe + + Returns + ------- + bool + """ + if attr_to_guess.lower() in self._guesser_methods: + return True + + return False + + def guess_attr(self, attr_to_guess, force_guess=False): + """map the attribute to be guessed with the apporpiate guessing method + + Parameters + ---------- + attr_to_guess: str + an atrribute to be guessed then to be added to the universe + force_guess: bool + To indicate wether to only partialy guess the empty values of the + attribute or to overwrite all existing values by guessed one + + Returns + ------- + NDArray of guessed values + + """ + + # check if the topology already has the attribute to partially guess it + if hasattr(self._universe.atoms, attr_to_guess) and not force_guess: + attr_values = np.array( + getattr(self._universe.atoms, attr_to_guess, None)) + + top_attr = _TOPOLOGY_ATTRS[attr_to_guess] + + empty_values = top_attr.are_values_missing(attr_values) + + if True in empty_values: + # pass to the guesser_method boolean mask to only guess the + # empty values + attr_values[empty_values] = self._guesser_methods[attr_to_guess]( + indices_to_guess=empty_values) + return attr_values + + else: + logger.info( + f'There is no empty {attr_to_guess} values. Guesser did ' + f'not guess any new values for {attr_to_guess} attribute') + return None + else: + return np.array(self._guesser_methods[attr_to_guess]()) + + +def get_guesser(context, u=None, **kwargs): + """get an appropiate guesser to the Universe and pass + the Universe to the guesser + + Parameters + ---------- + u: Universe + to be passed to the guesser + context: str or Guesser + **kwargs : dict, optional + Extra arguments are passed to the guesser. + + Returns + ------- + Guesser class + + Raises + ------ + * :exc:`KeyError` upon failing to return a guesser class + + .. versionadded:: 2.8.0 + + """ + if isinstance(context, GuesserBase): + context._universe = u + context.update_kwargs(**kwargs) + return context + try: + if issubclass(context, GuesserBase): + return context(u, **kwargs) + except TypeError: + pass + + try: + guesser = _GUESSERS[context.upper()](u, **kwargs) + except KeyError: + raise KeyError("Unidentified guesser type {0}".format(context)) + return guesser diff --git a/package/MDAnalysis/guesser/default_guesser.py b/package/MDAnalysis/guesser/default_guesser.py new file mode 100644 index 00000000000..a64b023309e --- /dev/null +++ b/package/MDAnalysis/guesser/default_guesser.py @@ -0,0 +1,568 @@ +# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding: utf-8 -*- +# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 +# +# MDAnalysis --- https://www.mdanalysis.org +# Copyright (c) 2006-2017 The MDAnalysis Development Team and contributors +# (see the file AUTHORS for the full list of names) +# +# Released under the Lesser GNU Public Licence, v2 or any higher version +# +# Please cite your use of MDAnalysis in published work: +# +# R. J. Gowers, M. Linke, J. Barnoud, T. J. E. Reddy, M. N. Melo, S. L. Seyler, +# D. L. Dotson, J. Domanski, S. Buchoux, I. M. Kenney, and O. Beckstein. +# MDAnalysis: A Python package for the rapid analysis of molecular dynamics +# simulations. In S. Benthall and S. Rostrup editors, Proceedings of the 15th +# Python in Science Conference, pages 102-109, Austin, TX, 2016. SciPy. +# doi: 10.25080/majora-629e541a-00e +# +# N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and O. Beckstein. +# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations. +# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787 + +r""" +Default Guesser +================ +.. _DefaultGuesser: + +DefaultGuesser is a generic guesser class that has basic guessing methods. +This class is a general purpose guesser that can be used with most topologies, +but being generic makes it the less accurate among all guessers. + + + + + +Classes +------- + +.. autoclass:: DefaultGuesser + :members: + :inherited-members: + +""" +from .base import GuesserBase +import numpy as np +import warnings +import math + +import re + +from ..lib import distances +from . import tables + + +class DefaultGuesser(GuesserBase): + """ + This guesser holds generic methods (not directed to specific contexts) for + guessing different topology attribute. It has the same methods which where + originally found in Topology.guesser.py. The attributes that can be + guessed by this class are: + - masses + - types + - elements + - angles + - dihedrals + - bonds + - improper dihedrals + - aromaticities + + You can use this guesser either directly through an instance, or through + the :meth:`~MDAnalysis.core.universe.Universe.guess_TopologyAttrs` method. + + Examples + -------- + to guess bonds for a universe:: + + import MDAnalysis as mda + from MDAnalysisTests.datafiles import two_water_gro + + u = mda.Universe(two_water_gro, context='default', to_guess=['bonds']) + + .. versionadded:: 2.8.0 + + """ + context = 'default' + + def __init__(self, universe, **kwargs): + super().__init__(universe, **kwargs) + self._guesser_methods = { + 'masses': self.guess_masses, + 'types': self.guess_types, + 'elements': self.guess_types, + 'bonds': self.guess_bonds, + 'angles': self.guess_angles, + 'dihedrals': self.guess_dihedrals, + 'impropers': self.guess_improper_dihedrals, + 'aromaticities': self.guess_aromaticities, + } + + def guess_masses(self, atom_types=None, indices_to_guess=None): + """Guess the mass of many atoms based upon their type. + For guessing masses through :meth:`~MDAnalysis.core.universe.Universe.guess_TopologyAttrs`: + + First try to guess masses from atom elements, if not available, + try to guess masses from types and if not available, try to guess + types. + + Parameters + ---------- + atom_types : Optional[np.ndarray] + Atom types/elements to guess masses from + indices_to_guess : Optional[np.ndarray] + Mask array for partially guess masses for certain atoms + + Returns + ------- + atom_masses : np.ndarray dtype float64 + + Raises + ------ + :exc:`ValueError` + If there are no atom types or elements to guess mass from. + + """ + if atom_types is None: + try: + atom_types = self._universe.atoms.elements + except AttributeError: + try: + atom_types = self._universe.atoms.types + except AttributeError: + try: + atom_types = self.guess_types( + atom_types=self._universe.atoms.names) + except ValueError: + raise ValueError( + "there is no reference attributes" + " (elements, types, or names)" + " in this universe to guess mass from") + + if indices_to_guess is not None: + atom_types = atom_types[indices_to_guess] + + masses = np.array([self.get_atom_mass(atom) + for atom in atom_types], dtype=np.float64) + return masses + + def get_atom_mass(self, element): + """Return the atomic mass in u for *element*. + Masses are looked up in :data:`MDAnalysis.guesser.tables.masses`. + + .. Warning:: Until version 3.0.0 unknown masses are set to 0.0 + + """ + try: + return tables.masses[element] + except KeyError: + try: + return tables.masses[element.upper()] + except KeyError: + warnings.warn( + "Unknown masses are set to 0.0 for current version, " + "this will be deprecated in version 3.0.0 and replaced by" + " Masse's no_value_label (np.nan)", + PendingDeprecationWarning) + return 0.0 + + def guess_atom_mass(self, atomname): + """Guess a mass based on the atom name. + + :func:`guess_atom_element` is used to determine the kind of atom. + + .. warning:: Until version 3.0.0 anything not recognized is simply + set to 0.0; if you rely on the masses you might want to double-check. + """ + return self.get_atom_mass(self.guess_atom_element(atomname)) + + def guess_types(self, atom_types=None, indices_to_guess=None): + """Guess the atom type of many atoms based on atom name + + Parameters + ---------- + atom_types (optional) + atoms names if types guessing is desired to be from names + indices_to_guess (optional) + Mask array for partially guess types for certain atoms + + Returns + ------- + atom_types : np.ndarray dtype object + + Raises + ------ + :exc:`ValueError` + If there is no names to guess types from. + + """ + if atom_types is None: + try: + atom_types = self._universe.atoms.names + except AttributeError: + raise ValueError( + "there is no reference attributes in this universe" + "to guess types from") + + if indices_to_guess is not None: + atom_types = atom_types[indices_to_guess] + + return np.array([self.guess_atom_element(atom) + for atom in atom_types], dtype=object) + + def guess_atom_element(self, atomname): + """Guess the element of the atom from the name. + + Looks in dict to see if element is found, otherwise it uses the first + character in the atomname. The table comes from CHARMM and AMBER atom + types, where the first character is not sufficient to determine the + atom type. Some GROMOS ions have also been added. + + .. Warning: The translation table is incomplete. + This will probably result in some mistakes, + but it still better than nothing! + + See Also + -------- + :func:`guess_atom_type` + :mod:`MDAnalysis.guesser.tables` + """ + NUMBERS = re.compile(r'[0-9]') # match numbers + SYMBOLS = re.compile(r'[*+-]') # match *, +, - + if atomname == '': + return '' + try: + return tables.atomelements[atomname.upper()] + except KeyError: + # strip symbols and numbers + no_symbols = re.sub(SYMBOLS, '', atomname) + name = re.sub(NUMBERS, '', no_symbols).upper() + + # just in case + if name in tables.atomelements: + return tables.atomelements[name] + + while name: + if name in tables.elements: + return name + if name[:-1] in tables.elements: + return name[:-1] + if name[1:] in tables.elements: + return name[1:] + if len(name) <= 2: + return name[0] + name = name[:-1] # probably element is on left not right + + # if it's numbers + return no_symbols + + def guess_bonds(self, atoms=None, coords=None): + r"""Guess if bonds exist between two atoms based on their distance. + + Bond between two atoms is created, if the two atoms are within + + .. math:: + + d < f \cdot (R_1 + R_2) + + of each other, where :math:`R_1` and :math:`R_2` are the VdW radii + of the atoms and :math:`f` is an ad-hoc *fudge_factor*. This is + the `same algorithm that VMD uses`_. + + Parameters + ---------- + atoms : AtomGroup + atoms for which bonds should be guessed + fudge_factor : float, optional + The factor by which atoms must overlap eachother to be considered a + bond. Larger values will increase the number of bonds found. [0.55] + vdwradii : dict, optional + To supply custom vdwradii for atoms in the algorithm. Must be a + dict of format {type:radii}. The default table of van der Waals + radii is hard-coded as :data:`MDAnalysis.guesser.tables.vdwradii`. + Any user defined vdwradii passed as an argument will supercede the + table values. [``None``] + lower_bound : float, optional + The minimum bond length. All bonds found shorter than this length + will be ignored. This is useful for parsing PDB with altloc records + where atoms with altloc A and B maybe very close together and + there should be no chemical bond between them. [0.1] + box : array_like, optional + Bonds are found using a distance search, if unit cell information + is given, periodic boundary conditions will be considered in the + distance search. [``None``] + + Returns + ------- + list + List of tuples suitable for use in Universe topology building. + + Warnings + -------- + No check is done after the bonds are guessed to see if Lewis + structure is correct. This is wrong and will burn somebody. + + Raises + ------ + :exc:`ValueError` + If inputs are malformed or `vdwradii` data is missing. + + + .. _`same algorithm that VMD uses`: + http://www.ks.uiuc.edu/Research/vmd/vmd-1.9.1/ug/node26.html + + """ + if atoms is None: + atoms = self._universe.atoms + + if coords is None: + coords = self._universe.atoms.positions + + if len(atoms) != len(coords): + raise ValueError("'atoms' and 'coord' must be the same length") + + fudge_factor = self._kwargs.get('fudge_factor', 0.55) + + # so I don't permanently change it + vdwradii = tables.vdwradii.copy() + user_vdwradii = self._kwargs.get('vdwradii', None) + # this should make algo use their values over defaults + if user_vdwradii: + vdwradii.update(user_vdwradii) + + # Try using types, then elements + if hasattr(atoms, 'types'): + atomtypes = atoms.types + else: + atomtypes = self.guess_types(atom_types=atoms.names) + + # check that all types have a defined vdw + if not all(val in vdwradii for val in set(atomtypes)): + raise ValueError(("vdw radii for types: " + + ", ".join([t for t in set(atomtypes) if + t not in vdwradii]) + + ". These can be defined manually using the" + + f" keyword 'vdwradii'")) + + lower_bound = self._kwargs.get('lower_bound', 0.1) + + box = self._kwargs.get('box', None) + + if box is not None: + box = np.asarray(box) + + # to speed up checking, calculate what the largest possible bond + # atom that would warrant attention. + # then use this to quickly mask distance results later + max_vdw = max([vdwradii[t] for t in atomtypes]) + + bonds = [] + + pairs, dist = distances.self_capped_distance(coords, + max_cutoff=2.0 * max_vdw, + min_cutoff=lower_bound, + box=box) + for idx, (i, j) in enumerate(pairs): + d = (vdwradii[atomtypes[i]] + + vdwradii[atomtypes[j]]) * fudge_factor + if (dist[idx] < d): + bonds.append((atoms[i].index, atoms[j].index)) + return tuple(bonds) + + def guess_angles(self, bonds=None): + """Given a list of Bonds, find all angles that exist between atoms. + + Works by assuming that if atoms 1 & 2 are bonded, and 2 & 3 are bonded, + then (1,2,3) must be an angle. + + Parameters + ---------- + bonds : Bonds + from which angles should be guessed + + Returns + ------- + list of tuples + List of tuples defining the angles. + Suitable for use in u._topology + + + See Also + -------- + :meth:`guess_bonds` + + """ + from ..core.universe import Universe + + angles_found = set() + + if bonds is None: + if hasattr(self._universe.atoms, 'bonds'): + bonds = self._universe.atoms.bonds + else: + temp_u = Universe.empty(n_atoms=len(self._universe.atoms)) + temp_u.add_bonds(self.guess_bonds( + self._universe.atoms, self._universe.atoms.positions)) + bonds = temp_u.atoms.bonds + + for b in bonds: + for atom in b: + other_a = b.partner(atom) # who's my friend currently in Bond + for other_b in atom.bonds: + if other_b != b: # if not the same bond I start as + third_a = other_b.partner(atom) + desc = tuple( + [other_a.index, atom.index, third_a.index]) + # first index always less than last + if desc[0] > desc[-1]: + desc = desc[::-1] + angles_found.add(desc) + + return tuple(angles_found) + + def guess_dihedrals(self, angles=None): + """Given a list of Angles, find all dihedrals that exist between atoms. + + Works by assuming that if (1,2,3) is an angle, and 3 & 4 are bonded, + then (1,2,3,4) must be a dihedral. + + Parameters + ---------- + angles : Angles + from which dihedrals should be guessed + + Returns + ------- + list of tuples + List of tuples defining the dihedrals. + Suitable for use in u._topology + + """ + from ..core.universe import Universe + + if angles is None: + if hasattr(self._universe.atoms, 'angles'): + angles = self._universe.atoms.angles + + else: + temp_u = Universe.empty(n_atoms=len(self._universe.atoms)) + + temp_u.add_bonds(self.guess_bonds( + self._universe.atoms, self._universe.atoms.positions)) + + temp_u.add_angles(self.guess_angles(temp_u.atoms.bonds)) + + angles = temp_u.atoms.angles + + dihedrals_found = set() + + for b in angles: + a_tup = tuple([a.index for a in b]) # angle as tuple of numbers + # if searching with b[0], want tuple of (b[2], b[1], b[0], +new) + # search the first and last atom of each angle + for atom, prefix in zip([b.atoms[0], b.atoms[-1]], + [a_tup[::-1], a_tup]): + for other_b in atom.bonds: + if not other_b.partner(atom) in b: + third_a = other_b.partner(atom) + desc = prefix + (third_a.index,) + if desc[0] > desc[-1]: + desc = desc[::-1] + dihedrals_found.add(desc) + + return tuple(dihedrals_found) + + def guess_improper_dihedrals(self, angles=None): + """Given a list of Angles, find all improper dihedrals + that exist between atoms. + + Works by assuming that if (1,2,3) is an angle, and 2 & 4 are bonded, + then (2, 1, 3, 4) must be an improper dihedral. + ie the improper dihedral is the angle between the planes formed by + (1, 2, 3) and (1, 3, 4) + + Returns + ------- + List of tuples defining the improper dihedrals. + Suitable for use in u._topology + + """ + + from ..core.universe import Universe + + if angles is None: + if hasattr(self._universe.atoms, 'angles'): + angles = self._universe.atoms.angles + + else: + temp_u = Universe.empty(n_atoms=len(self._universe.atoms)) + + temp_u.add_bonds(self.guess_bonds( + self._universe.atoms, self._universe.atoms.positions)) + + temp_u.add_angles(self.guess_angles(temp_u.atoms.bonds)) + + angles = temp_u.atoms.angles + + dihedrals_found = set() + + for b in angles: + atom = b[1] # select middle atom in angle + # start of improper tuple + a_tup = tuple([b[a].index for a in [1, 2, 0]]) + # if searching with b[1], want tuple of (b[1], b[2], b[0], +new) + # search the first and last atom of each angle + for other_b in atom.bonds: + other_atom = other_b.partner(atom) + # if this atom isn't in the angle I started with + if other_atom not in b: + desc = a_tup + (other_atom.index,) + if desc[0] > desc[-1]: + desc = desc[::-1] + dihedrals_found.add(desc) + + return tuple(dihedrals_found) + + def guess_atom_charge(self, atoms): + """Guess atom charge from the name. + + .. Warning:: Not implemented; simply returns 0. + """ + # TODO: do something slightly smarter, at least use name/element + return 0.0 + + def guess_aromaticities(self, atomgroup=None): + """Guess aromaticity of atoms using RDKit + + Returns + ------- + aromaticities : numpy.ndarray + Array of boolean values for the aromaticity of each atom + + """ + if atomgroup is None: + atomgroup = self._universe.atoms + + mol = atomgroup.convert_to("RDKIT") + return np.array([atom.GetIsAromatic() for atom in mol.GetAtoms()]) + + def guess_gasteiger_charges(self, atomgroup): + """Guess Gasteiger partial charges using RDKit + + Parameters + ---------- + atomgroup : mda.core.groups.AtomGroup + Atoms for which the charges will be guessed + + Returns + ------- + charges : numpy.ndarray + Array of float values representing the charge of each atom + + """ + + mol = atomgroup.convert_to("RDKIT") + from rdkit.Chem.rdPartialCharges import ComputeGasteigerCharges + ComputeGasteigerCharges(mol, throwOnParamFailure=True) + return np.array([atom.GetDoubleProp("_GasteigerCharge") + for atom in mol.GetAtoms()], + dtype=np.float32) diff --git a/package/MDAnalysis/topology/tables.py b/package/MDAnalysis/guesser/tables.py similarity index 99% rename from package/MDAnalysis/topology/tables.py rename to package/MDAnalysis/guesser/tables.py index ea46421cc42..5e373616b7e 100644 --- a/package/MDAnalysis/topology/tables.py +++ b/package/MDAnalysis/guesser/tables.py @@ -200,7 +200,7 @@ def kv2dict(s, convertor: Any = str): Bk 247 Be 9.012182 Bi 208.98037 -Bh 262 +Bh 264 B 10.811 BR 79.90400 Cd 112.411 diff --git a/package/MDAnalysis/topology/CRDParser.py b/package/MDAnalysis/topology/CRDParser.py index 05dbe9d4298..5e4406732f3 100644 --- a/package/MDAnalysis/topology/CRDParser.py +++ b/package/MDAnalysis/topology/CRDParser.py @@ -28,8 +28,7 @@ Read a list of atoms from a CHARMM CARD coordinate file (CRD_) to build a basic topology. Reads atom ids (ATOMNO), atom names (TYPES), resids (RESID), residue numbers (RESNO), residue names (RESNames), segment ids -(SEGID) and tempfactor (Weighting). Atom element and mass are guessed based on -the name of the atom. +(SEGID) and tempfactor (Weighting). Residues are detected through a change is either resid or resname while segments are detected according to changes in segid. @@ -49,13 +48,10 @@ from ..lib.util import openany, FORTRANReader from .base import TopologyReaderBase, change_squash -from . import guessers from ..core.topology import Topology from ..core.topologyattrs import ( Atomids, Atomnames, - Atomtypes, - Masses, Resids, Resnames, Resnums, @@ -76,9 +72,9 @@ class CRDParser(TopologyReaderBase): - Resnums - Segids - Guesses the following attributes: - - Atomtypes - - Masses + .. versionchanged:: 2.8.0 + Type and mass are not longer guessed here. Until 3.0 these will still be + set by default through through universe.guess_TopologyAttrs() API. """ format = 'CRD' @@ -141,10 +137,6 @@ def parse(self, **kwargs): resnums = np.array(resnums, dtype=np.int32) segids = np.array(segids, dtype=object) - # Guess some attributes - atomtypes = guessers.guess_types(atomnames) - masses = guessers.guess_masses(atomtypes) - atom_residx, (res_resids, res_resnames, res_resnums, res_segids) = change_squash( (resids, resnames), (resids, resnames, resnums, segids)) res_segidx, (seg_segids,) = change_squash( @@ -154,8 +146,6 @@ def parse(self, **kwargs): attrs=[ Atomids(atomids), Atomnames(atomnames), - Atomtypes(atomtypes, guessed=True), - Masses(masses, guessed=True), Tempfactors(tempfactors), Resids(res_resids), Resnames(res_resnames), diff --git a/package/MDAnalysis/topology/DLPolyParser.py b/package/MDAnalysis/topology/DLPolyParser.py index 489e6675bde..b85a0d188cc 100644 --- a/package/MDAnalysis/topology/DLPolyParser.py +++ b/package/MDAnalysis/topology/DLPolyParser.py @@ -29,9 +29,6 @@ DLPoly files have the following Attributes: - Atomnames - Atomids -Guesses the following attributes: - - Atomtypes - - Masses .. _Poly: http://www.stfc.ac.uk/SCD/research/app/ccg/software/DL_POLY/44516.aspx @@ -46,14 +43,11 @@ """ import numpy as np -from . import guessers from .base import TopologyReaderBase from ..core.topology import Topology from ..core.topologyattrs import ( Atomids, Atomnames, - Atomtypes, - Masses, Resids, Resnums, Segids, @@ -65,6 +59,9 @@ class ConfigParser(TopologyReaderBase): """DL_Poly CONFIG file parser .. versionadded:: 0.10.1 + .. versionchanged:: 2.8.0 + Removed type and mass guessing (attributes guessing takes place now + through universe.guess_TopologyAttrs() API). """ format = 'CONFIG' @@ -109,14 +106,9 @@ def parse(self, **kwargs): else: ids = np.arange(n_atoms) - atomtypes = guessers.guess_types(names) - masses = guessers.guess_masses(atomtypes) - attrs = [ Atomnames(names), Atomids(ids), - Atomtypes(atomtypes, guessed=True), - Masses(masses, guessed=True), Resids(np.array([1])), Resnums(np.array([1])), Segids(np.array(['SYSTEM'], dtype=object)), @@ -176,14 +168,9 @@ def parse(self, **kwargs): else: ids = np.arange(n_atoms) - atomtypes = guessers.guess_types(names) - masses = guessers.guess_masses(atomtypes) - attrs = [ Atomnames(names), Atomids(ids), - Atomtypes(atomtypes, guessed=True), - Masses(masses, guessed=True), Resids(np.array([1])), Resnums(np.array([1])), Segids(np.array(['SYSTEM'], dtype=object)), diff --git a/package/MDAnalysis/topology/DMSParser.py b/package/MDAnalysis/topology/DMSParser.py index 1240c9b7574..f165272fc26 100644 --- a/package/MDAnalysis/topology/DMSParser.py +++ b/package/MDAnalysis/topology/DMSParser.py @@ -44,7 +44,6 @@ import sqlite3 import os -from . import guessers from .base import TopologyReaderBase, change_squash from ..core.topology import Topology from ..core.topologyattrs import ( @@ -53,7 +52,6 @@ Bonds, Charges, ChainIDs, - Atomtypes, Masses, Resids, Resnums, @@ -87,12 +85,14 @@ class DMSParser(TopologyReaderBase): - Resids Segment: - Segids - Guesses the following attributes - - Atomtypes .. _DESRES: http://www.deshawresearch.com .. _Desmond: http://www.deshawresearch.com/resources_desmond.html .. _DMS: http://www.deshawresearch.com/Desmond_Users_Guide-0.7.pdf + .. versionchanged:: 2.8.0 + Removed type and mass guessing (attributes guessing takes place now + through universe.guess_TopologyAttrs() API). + """ format = 'DMS' @@ -161,7 +161,6 @@ def dict_factory(cursor, row): attrs['bond'] = bondlist attrs['bondorder'] = bondorder - atomtypes = guessers.guess_types(attrs['name']) topattrs = [] # Bundle in Atom level objects for attr, cls in [ @@ -173,7 +172,6 @@ def dict_factory(cursor, row): ('chain', ChainIDs), ]: topattrs.append(cls(attrs[attr])) - topattrs.append(Atomtypes(atomtypes, guessed=True)) # Residues atom_residx, (res_resids, diff --git a/package/MDAnalysis/topology/ExtendedPDBParser.py b/package/MDAnalysis/topology/ExtendedPDBParser.py index 2138d386923..ec6e1e527d2 100644 --- a/package/MDAnalysis/topology/ExtendedPDBParser.py +++ b/package/MDAnalysis/topology/ExtendedPDBParser.py @@ -78,8 +78,6 @@ class ExtendedPDBParser(PDBParser.PDBParser): - bonds - formalcharges - Guesses the following Attributes: - - masses See Also -------- diff --git a/package/MDAnalysis/topology/FHIAIMSParser.py b/package/MDAnalysis/topology/FHIAIMSParser.py index 688a3e1626c..fcf95691f33 100644 --- a/package/MDAnalysis/topology/FHIAIMSParser.py +++ b/package/MDAnalysis/topology/FHIAIMSParser.py @@ -47,15 +47,12 @@ """ import numpy as np -from . import guessers from ..lib.util import openany from .base import TopologyReaderBase from ..core.topology import Topology from ..core.topologyattrs import ( Atomnames, Atomids, - Atomtypes, - Masses, Resids, Resnums, Segids, @@ -69,10 +66,9 @@ class FHIAIMSParser(TopologyReaderBase): Creates the following attributes: - Atomnames - Guesses the following attributes: - - Atomtypes - - Masses - + .. versionchanged:: 2.8.0 + Removed type and mass guessing (attributes guessing takes place now + through universe.guess_TopologyAttrs() API). """ format = ['IN', 'FHIAIMS'] @@ -100,14 +96,8 @@ def parse(self, **kwargs): names = np.asarray(names) natoms = len(names) - # Guessing time - atomtypes = guessers.guess_types(names) - masses = guessers.guess_masses(names) - attrs = [Atomnames(names), Atomids(np.arange(natoms) + 1), - Atomtypes(atomtypes, guessed=True), - Masses(masses, guessed=True), Resids(np.array([1])), Resnums(np.array([1])), Segids(np.array(['SYSTEM'], dtype=object)), diff --git a/package/MDAnalysis/topology/GMSParser.py b/package/MDAnalysis/topology/GMSParser.py index 022fb990708..812207ed674 100644 --- a/package/MDAnalysis/topology/GMSParser.py +++ b/package/MDAnalysis/topology/GMSParser.py @@ -48,15 +48,12 @@ import re import numpy as np -from . import guessers from ..lib.util import openany from .base import TopologyReaderBase from ..core.topology import Topology from ..core.topologyattrs import ( Atomids, Atomnames, - Atomtypes, - Masses, Resids, Resnums, Segids, @@ -75,11 +72,12 @@ class GMSParser(TopologyReaderBase): Creates the following Attributes: - names - atomic charges - Guesses: - - types - - masses .. versionadded:: 0.9.1 + .. versionchanged:: 2.8.0 + Removed type and mass guessing (attributes guessing takes place now + through universe.guess_TopologyAttrs() API). + """ format = 'GMS' @@ -112,15 +110,11 @@ def parse(self, **kwargs): at_charges.append(at_charge) #TODO: may be use coordinates info from _m.group(3-5) ?? - atomtypes = guessers.guess_types(names) - masses = guessers.guess_masses(atomtypes) n_atoms = len(names) attrs = [ Atomids(np.arange(n_atoms) + 1), Atomnames(np.array(names, dtype=object)), AtomicCharges(np.array(at_charges, dtype=np.int32)), - Atomtypes(atomtypes, guessed=True), - Masses(masses, guessed=True), Resids(np.array([1])), Resnums(np.array([1])), Segids(np.array(['SYSTEM'], dtype=object)), diff --git a/package/MDAnalysis/topology/GROParser.py b/package/MDAnalysis/topology/GROParser.py index 23fb0af5655..6bcaec24cb5 100644 --- a/package/MDAnalysis/topology/GROParser.py +++ b/package/MDAnalysis/topology/GROParser.py @@ -28,7 +28,6 @@ Read a list of atoms from a GROMOS/Gromacs GRO coordinate file to build a basic topology. -Atom types and masses are guessed. See Also -------- @@ -49,9 +48,7 @@ from ..lib.util import openany from ..core.topologyattrs import ( Atomnames, - Atomtypes, Atomids, - Masses, Resids, Resnames, Resnums, @@ -59,7 +56,6 @@ ) from ..core.topology import Topology from .base import TopologyReaderBase, change_squash -from . import guessers class GROParser(TopologyReaderBase): @@ -71,9 +67,10 @@ class GROParser(TopologyReaderBase): - atomids - atomnames - Guesses the following attributes - - atomtypes - - masses + .. versionchanged:: 2.8.0 + Removed type and mass guessing (attributes guessing takes place now + through universe.guess_TopologyAttrs() API). + """ format = 'GRO' @@ -129,9 +126,6 @@ def parse(self, **kwargs): for s in starts: resids[s:] += 100000 - # Guess types and masses - atomtypes = guessers.guess_types(names) - masses = guessers.guess_masses(atomtypes) residx, (new_resids, new_resnames) = change_squash( (resids, resnames), (resids, resnames)) @@ -141,11 +135,9 @@ def parse(self, **kwargs): attrs = [ Atomnames(names), Atomids(indices), - Atomtypes(atomtypes, guessed=True), Resids(new_resids), Resnums(new_resids.copy()), Resnames(new_resnames), - Masses(masses, guessed=True), Segids(np.array(['SYSTEM'], dtype=object)) ] diff --git a/package/MDAnalysis/topology/GSDParser.py b/package/MDAnalysis/topology/GSDParser.py index f1ae72d287d..bd62d0f5f98 100644 --- a/package/MDAnalysis/topology/GSDParser.py +++ b/package/MDAnalysis/topology/GSDParser.py @@ -54,7 +54,6 @@ import os import numpy as np -from . import guessers from .base import TopologyReaderBase from ..core.topology import Topology from ..core.topologyattrs import ( diff --git a/package/MDAnalysis/topology/HoomdXMLParser.py b/package/MDAnalysis/topology/HoomdXMLParser.py index b8e7baa0613..f2d1cea9526 100644 --- a/package/MDAnalysis/topology/HoomdXMLParser.py +++ b/package/MDAnalysis/topology/HoomdXMLParser.py @@ -49,7 +49,6 @@ import xml.etree.ElementTree as ET import numpy as np -from . import guessers from ..lib.util import openany from .base import TopologyReaderBase from ..core.topology import Topology diff --git a/package/MDAnalysis/topology/ITPParser.py b/package/MDAnalysis/topology/ITPParser.py index 932430c0a45..d8552160278 100644 --- a/package/MDAnalysis/topology/ITPParser.py +++ b/package/MDAnalysis/topology/ITPParser.py @@ -1,5 +1,5 @@ # -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*- -# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 +# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 # # MDAnalysis --- https://www.mdanalysis.org # Copyright (c) 2006-2017 The MDAnalysis Development Team and contributors @@ -27,7 +27,8 @@ Reads a GROMACS ITP_ or TOP_ file to build the system. The topology will contain atom IDs, segids, residue IDs, residue names, atom names, atom types, -charges, chargegroups, masses (guessed if not found), moltypes, and molnums. +charges, chargegroups, masses, moltypes, and molnums. +Any masses that are in the file will be read; any missing values will be guessed. Bonds, angles, dihedrals and impropers are also read from the file. If an ITP file is passed without a ``[ molecules ]`` directive, passing @@ -106,7 +107,7 @@ u = mda.Universe(ITP_tip5p, EXTRA_ATOMS=True, HW1_CHARGE=3, HW2_CHARGE=3) -These keyword variables are **case-sensitive**. Note that if you set keywords to +These keyword variables are **case-sensitive**. Note that if you set keywords to ``False`` or ``None``, they will be treated as if they are not defined in #ifdef conditions. For example, the universe below will only have 5 atoms. :: @@ -132,8 +133,10 @@ import logging import numpy as np +import warnings from ..lib.util import openany -from . import guessers +from ..guesser.tables import SYMB2Z +from ..guesser.default_guesser import DefaultGuesser from .base import TopologyReaderBase, change_squash, reduce_singular from ..core.topologyattrs import ( Atomids, @@ -152,6 +155,7 @@ Dihedrals, Impropers, AtomAttr, + Elements, ) from ..core.topology import Topology @@ -216,7 +220,7 @@ def define(self, line): except ValueError: _, variable = line.split() value = True - + # kwargs overrides files if variable not in self._original_defines: self.defines[variable] = value @@ -252,7 +256,7 @@ def skip_until_else(self, infile): break else: raise IOError('Missing #endif in {}'.format(self.current_file)) - + def skip_until_endif(self, infile): """Skip lines until #endif""" for line in self.clean_file_lines(infile): @@ -333,9 +337,9 @@ def parse_atoms(self, line): lst.append('') def parse_bonds(self, line): - self.add_param(line, self.bonds, n_funct=2, + self.add_param(line, self.bonds, n_funct=2, funct_values=(1, 2, 3, 4, 5, 6, 7, 8, 9, 10)) - + def parse_angles(self, line): self.add_param(line, self.angles, n_funct=3, funct_values=(1, 2, 3, 4, 5, 6, 8, 10)) @@ -355,7 +359,7 @@ def parse_settles(self, line): # water molecules. # In ITP files this is defined with only the # oxygen atom index. The next two atoms are - # assumed to be hydrogens. Unlike TPRParser, + # assumed to be hydrogens. Unlike TPRParser, # the manual only lists this format (as of 2019). # These are treated as 2 bonds. # No angle component is included to avoid discrepancies @@ -471,6 +475,12 @@ class ITPParser(TopologyReaderBase): .. versionchanged:: 2.2.0 no longer adds angles for water molecules with SETTLE constraint + .. versionchanged:: 2.8.0 + Removed mass guessing (attributes guessing takes place now + through universe.guess_TopologyAttrs() API). + Added guessed elements if all elements are valid to preserve partial + mass guessing behavior + """ format = 'ITP' @@ -503,7 +513,7 @@ def parse(self, include_dir='/usr/local/gromacs/share/gromacs/top/', self._molecules = [] # for order self.current_mol = None self.parser = self._pass - self.system_molecules = [] + self.system_molecules = [] # Open and check itp validity with openany(self.filename) as itpfile: @@ -523,10 +533,10 @@ def parse(self, include_dir='/usr/local/gromacs/share/gromacs/top/', elif self.current_mol: self.parser = self.current_mol.parsers.get(section, self._pass) - + else: self.parser = self._pass - + else: self.parser(line) @@ -575,13 +585,21 @@ def parse(self, include_dir='/usr/local/gromacs/share/gromacs/top/', if not all(self.masses): empty = self.masses == '' - self.masses[empty] = guessers.guess_masses( - guessers.guess_types(self.types)[empty]) - attrs.append(Masses(np.array(self.masses, dtype=np.float64), - guessed=True)) + self.masses[empty] = Masses.missing_value_label + + attrs.append(Masses(np.array(self.masses, dtype=np.float64), + guessed=False)) + + self.elements = DefaultGuesser(None).guess_types(self.types) + if all(e.capitalize() in SYMB2Z for e in self.elements): + attrs.append(Elements(np.array(self.elements, + dtype=object), guessed=True)) + else: - attrs.append(Masses(np.array(self.masses, dtype=np.float64), - guessed=False)) + warnings.warn("Element information is missing, elements attribute " + "will not be populated. If needed these can be " + "guessed using universe.guess_TopologyAttrs(" + "to_guess=['elements']).") # residue stuff resids = np.array(self.resids, dtype=np.int32) diff --git a/package/MDAnalysis/topology/LAMMPSParser.py b/package/MDAnalysis/topology/LAMMPSParser.py index 85bf2dec8f6..62664b568bc 100644 --- a/package/MDAnalysis/topology/LAMMPSParser.py +++ b/package/MDAnalysis/topology/LAMMPSParser.py @@ -81,7 +81,6 @@ import functools import warnings -from . import guessers from ..lib.util import openany, conv_float from ..lib.mdamath import triclinic_box from .base import TopologyReaderBase, squash_by @@ -182,6 +181,10 @@ class as the topology and coordinate reader share many common see :ref:`atom_style_kwarg`. .. versionadded:: 0.9.0 + .. versionchanged:: 2.8.0 + Removed mass guessing (attributes guessing takes place now + through universe.guess_TopologyAttrs() API). + """ format = 'DATA' @@ -252,9 +255,9 @@ def _interpret_atom_style(atom_style): if missing_attrs: raise ValueError("atom_style string missing required field(s): {}" "".format(', '.join(missing_attrs))) - + return style_dict - + def parse(self, **kwargs): """Parses a LAMMPS_ DATA file. @@ -300,7 +303,7 @@ def parse(self, **kwargs): type, sect = self._parse_bond_section(sects[L], nentries, mapping) except KeyError: type, sect = [], [] - + top.add_TopologyAttr(attr(sect, type)) return top @@ -323,7 +326,7 @@ def read_DATA_timestep(self, n_atoms, TS_class, TS_kwargs, self.style_dict = None else: self.style_dict = self._interpret_atom_style(atom_style) - + header, sects = self.grab_datafile() unitcell = self._parse_box(header) @@ -361,7 +364,7 @@ def _parse_pos(self, datalines): style_dict = {'id': 0, 'x': 3, 'y': 4, 'z': 5} else: style_dict = self.style_dict - + for i, line in enumerate(datalines): line = line.split() @@ -520,10 +523,6 @@ def _parse_atoms(self, datalines, massdict=None): for i, at in enumerate(types): masses[i] = massdict[at] attrs.append(Masses(masses)) - else: - # Guess them - masses = guessers.guess_masses(types) - attrs.append(Masses(masses, guessed=True)) residx, resids = squash_by(resids)[:2] n_residues = len(resids) @@ -610,7 +609,7 @@ def parse(self, **kwargs): indices = np.zeros(natoms, dtype=int) types = np.zeros(natoms, dtype=object) - + atomline = fin.readline() # ITEM ATOMS attrs = atomline.split()[2:] # attributes on coordinate line col_ids = {attr: i for i, attr in enumerate(attrs)} # column ids diff --git a/package/MDAnalysis/topology/MMTFParser.py b/package/MDAnalysis/topology/MMTFParser.py index 51bc16c8ac0..e9332e9d689 100644 --- a/package/MDAnalysis/topology/MMTFParser.py +++ b/package/MDAnalysis/topology/MMTFParser.py @@ -1,5 +1,5 @@ # -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*- -# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 +# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 # # MDAnalysis --- https://www.mdanalysis.org # Copyright (c) 2006-2017 The MDAnalysis Development Team and contributors @@ -38,6 +38,9 @@ .. versionchanged:: 2.0.0 Aliased ``bfactors`` topologyattribute to ``tempfactors``. ``tempfactors`` is deprecated and will be removed in 3.0 (Issue #1901) +.. versionchanged:: 2.8.0 + Removed mass guessing (attributes guessing takes place now + through universe.guess_TopologyAttrs() API). Reads the following topology attributes: @@ -47,7 +50,6 @@ - tempfactor - bonds - charge - - masses (guessed) - name - occupancy - type @@ -76,7 +78,6 @@ from . import base -from . import guessers from ..core.topology import Topology from ..core.topologyattrs import ( AltLocs, @@ -87,7 +88,6 @@ Bonds, Charges, ICodes, - Masses, Occupancies, Resids, Resnames, @@ -188,8 +188,7 @@ def iter_atoms(field): charges = Charges(list(iter_atoms('formalChargeList'))) names = Atomnames(list(iter_atoms('atomNameList'))) types = Atomtypes(list(iter_atoms('elementList'))) - masses = Masses(guessers.guess_masses(types.values), guessed=True) - attrs.extend([charges, names, types, masses]) + attrs.extend([charges, names, types]) #optional are empty list if empty, sometimes arrays if len(mtop.atom_id_list): diff --git a/package/MDAnalysis/topology/MOL2Parser.py b/package/MDAnalysis/topology/MOL2Parser.py index 96d9dbdbd40..4345ca0efe7 100644 --- a/package/MDAnalysis/topology/MOL2Parser.py +++ b/package/MDAnalysis/topology/MOL2Parser.py @@ -1,5 +1,5 @@ # -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*- -# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 +# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 # # MDAnalysis --- https://www.mdanalysis.org # Copyright (c) 2006-2017 The MDAnalysis Development Team and contributors @@ -44,7 +44,6 @@ import os import numpy as np -from . import guessers from ..lib.util import openany from .base import TopologyReaderBase, squash_by from ..core.topologyattrs import ( @@ -54,14 +53,13 @@ Bonds, Charges, Elements, - Masses, Resids, Resnums, Resnames, Segids, ) from ..core.topology import Topology -from .tables import SYBYL2SYMB +from ..guesser.tables import SYBYL2SYMB import warnings @@ -79,8 +77,6 @@ class MOL2Parser(TopologyReaderBase): - Bonds - Elements - Guesses the following: - - masses Notes ----- @@ -95,7 +91,7 @@ class MOL2Parser(TopologyReaderBase): 2. If no atoms have ``resname`` field, resnames attribute will not be set; If some atoms have ``resname`` while some do not, :exc:`ValueError` will occur. - + 3. If "NO_CHARGES" shows up in "@MOLECULE" section and no atoms have the ``charge`` field, charges attribute will not be set; If "NO_CHARGES" shows up while ``charge`` field appears, @@ -129,6 +125,10 @@ class MOL2Parser(TopologyReaderBase): Parse elements from atom types. .. versionchanged:: 2.2.0 Read MOL2 files with optional columns omitted. + .. versionchanged:: 2.8.0 + Removed mass guessing (attributes guessing takes place now + through universe.guess_TopologyAttrs() API). + """ format = 'MOL2' @@ -235,15 +235,12 @@ def parse(self, **kwargs): f"atoms: {invalid_elements}. " "These have been given an empty element record.") - masses = guessers.guess_masses(validated_elements) - attrs = [] attrs.append(Atomids(np.array(ids, dtype=np.int32))) attrs.append(Atomnames(np.array(names, dtype=object))) attrs.append(Atomtypes(np.array(types, dtype=object))) if has_charges: attrs.append(Charges(np.array(charges, dtype=np.float32))) - attrs.append(Masses(masses, guessed=True)) if not np.all(validated_elements == ''): attrs.append(Elements(validated_elements)) diff --git a/package/MDAnalysis/topology/PDBParser.py b/package/MDAnalysis/topology/PDBParser.py index e1e08dd04c6..bad6d2bc6d5 100644 --- a/package/MDAnalysis/topology/PDBParser.py +++ b/package/MDAnalysis/topology/PDBParser.py @@ -39,8 +39,8 @@ .. Note:: - The parser processes atoms and their names. Masses are guessed and set to 0 - if unknown. Partial charges are not set. Elements are parsed if they are + The parser processes atoms and their names. + Partial charges are not set. Elements are parsed if they are valid. If partially missing or incorrect, empty records are assigned. See Also @@ -61,8 +61,7 @@ import numpy as np import warnings -from .guessers import guess_masses, guess_types -from .tables import SYMB2Z +from ..guesser.tables import SYMB2Z from ..lib import util from .base import TopologyReaderBase, change_squash from ..core.topology import Topology @@ -75,7 +74,6 @@ Atomtypes, Elements, ICodes, - Masses, Occupancies, RecordTypes, Resids, @@ -169,9 +167,6 @@ class PDBParser(TopologyReaderBase): - bonds - formalcharges - Guesses the following Attributes: - - masses - See Also -------- :class:`MDAnalysis.coordinates.PDB.PDBReader` @@ -197,6 +192,9 @@ class PDBParser(TopologyReaderBase): .. versionchanged:: 2.5.0 Formal charges will not be populated if an unknown entry is encountered, instead a UserWarning is emitted. + .. versionchanged:: 2.8.0 + Removed type and mass guessing (attributes guessing takes place now + through universe.guess_TopologyAttrs() API). """ format = ['PDB', 'ENT'] @@ -322,16 +320,8 @@ def _parseatoms(self): (occupancies, Occupancies, np.float32), ): attrs.append(Attr(np.array(vals, dtype=dtype))) - # Guessed attributes - # masses from types if they exist # OPT: We do this check twice, maybe could refactor to avoid this - if not any(elements): - atomtypes = guess_types(names) - attrs.append(Atomtypes(atomtypes, guessed=True)) - warnings.warn("Element information is missing, elements attribute " - "will not be populated. If needed these can be " - "guessed using MDAnalysis.topology.guessers.") - else: + if any(elements): # Feed atomtypes as raw element column, but validate elements atomtypes = elements attrs.append(Atomtypes(np.array(elements, dtype=object))) @@ -344,10 +334,16 @@ def _parseatoms(self): wmsg = (f"Unknown element {elem} found for some atoms. " f"These have been given an empty element record. " f"If needed they can be guessed using " - f"MDAnalysis.topology.guessers.") + f"universe.guess_TopologyAttrs(context='default'," + " to_guess=['elements']).") warnings.warn(wmsg) validated_elements.append('') attrs.append(Elements(np.array(validated_elements, dtype=object))) + else: + warnings.warn("Element information is missing, elements attribute " + "will not be populated. If needed these can be" + " guessed using universe.guess_TopologyAttrs(" + "context='default', to_guess=['elements']).") if any(formalcharges): try: @@ -374,9 +370,6 @@ def _parseatoms(self): else: attrs.append(FormalCharges(np.array(formalcharges, dtype=int))) - masses = guess_masses(atomtypes) - attrs.append(Masses(masses, guessed=True)) - # Residue level stuff from here resids = np.array(resids, dtype=np.int32) resnames = np.array(resnames, dtype=object) diff --git a/package/MDAnalysis/topology/PDBQTParser.py b/package/MDAnalysis/topology/PDBQTParser.py index a05ca35267a..97640820218 100644 --- a/package/MDAnalysis/topology/PDBQTParser.py +++ b/package/MDAnalysis/topology/PDBQTParser.py @@ -35,7 +35,7 @@ Notes ----- Only reads atoms and their names; connectivity is not -deduced. Masses are guessed and set to 0 if unknown. +deduced. See Also @@ -57,7 +57,6 @@ """ import numpy as np -from . import guessers from ..lib import util from .base import TopologyReaderBase, change_squash from ..core.topology import Topology @@ -68,7 +67,6 @@ Atomtypes, Charges, ICodes, - Masses, Occupancies, RecordTypes, Resids, @@ -97,14 +95,15 @@ class PDBQTParser(TopologyReaderBase): - tempfactors - charges - Guesses the following: - - masses .. versionchanged:: 0.18.0 Added parsing of Record types .. versionchanged:: 2.7.0 Columns 67 - 70 in ATOM records, corresponding to the field *footnote*, are now ignored. See Autodock's `reference`_. + .. versionchanged:: 2.8.0 + Removed mass guessing (attributes guessing takes place now + through universe.guess_TopologyAttrs() API). .. _reference: https://autodock.scripps.edu/wp-content/uploads/sites/56/2021/10/AutoDock4.2.6_UserGuide.pdf @@ -151,8 +150,6 @@ def parse(self, **kwargs): n_atoms = len(serials) - masses = guessers.guess_masses(atomtypes) - attrs = [] for attrlist, Attr, dtype in ( (record_types, RecordTypes, object), @@ -165,7 +162,6 @@ def parse(self, **kwargs): (atomtypes, Atomtypes, object), ): attrs.append(Attr(np.array(attrlist, dtype=dtype))) - attrs.append(Masses(masses, guessed=True)) resids = np.array(resids, dtype=np.int32) icodes = np.array(icodes, dtype=object) diff --git a/package/MDAnalysis/topology/PQRParser.py b/package/MDAnalysis/topology/PQRParser.py index e5f7b6415b4..1adcd7fba2a 100644 --- a/package/MDAnalysis/topology/PQRParser.py +++ b/package/MDAnalysis/topology/PQRParser.py @@ -49,7 +49,6 @@ """ import numpy as np -from . import guessers from ..lib.util import openany from ..core.topologyattrs import ( Atomids, @@ -57,7 +56,6 @@ Atomtypes, Charges, ICodes, - Masses, Radii, RecordTypes, Resids, @@ -82,9 +80,6 @@ class PQRParser(TopologyReaderBase): - Resnames - Segids - Guesses the following: - - atomtypes (if not present, Gromacs generated PQR files have these) - - masses .. versionchanged:: 0.9.0 Read chainID from a PQR file and use it as segid (before we always used @@ -95,6 +90,10 @@ class PQRParser(TopologyReaderBase): Added parsing of Record types Can now read PQR files from Gromacs, these provide atom type as last column but don't have segids + .. versionchanged:: 2.8.0 + Removed type and mass guessing (attributes guessing takes place now + through universe.guess_TopologyAttrs() API). + """ format = 'PQR' @@ -191,20 +190,14 @@ def parse(self, **kwargs): n_atoms = len(serials) - if not elements: - atomtypes = guessers.guess_types(names) - guessed_types = True - else: + attrs = [] + if elements: atomtypes = elements - guessed_types = False - masses = guessers.guess_masses(atomtypes) + attrs.append(Atomtypes(atomtypes, False)) - attrs = [] attrs.append(Atomids(np.array(serials, dtype=np.int32))) attrs.append(Atomnames(np.array(names, dtype=object))) attrs.append(Charges(np.array(charges, dtype=np.float32))) - attrs.append(Atomtypes(atomtypes, guessed=guessed_types)) - attrs.append(Masses(masses, guessed=True)) attrs.append(RecordTypes(np.array(record_types, dtype=object))) attrs.append(Radii(np.array(radii, dtype=np.float32))) diff --git a/package/MDAnalysis/topology/PSFParser.py b/package/MDAnalysis/topology/PSFParser.py index 1961d21c7b0..70cd38d51fa 100644 --- a/package/MDAnalysis/topology/PSFParser.py +++ b/package/MDAnalysis/topology/PSFParser.py @@ -49,7 +49,7 @@ import numpy as np from ..lib.util import openany, atoi -from . import guessers + from .base import TopologyReaderBase, squash_by, change_squash from ..core.topologyattrs import ( Atomids, diff --git a/package/MDAnalysis/topology/TOPParser.py b/package/MDAnalysis/topology/TOPParser.py index 3153357b21b..9113750cf95 100644 --- a/package/MDAnalysis/topology/TOPParser.py +++ b/package/MDAnalysis/topology/TOPParser.py @@ -75,7 +75,7 @@ As of version 2.0.0, elements are no longer guessed if ATOMIC_NUMBER records are missing. In those scenarios, if elements are necessary, users will have to invoke the element guessers after parsing the topology file. Please see - :mod:`MDAnalysis.topology.guessers` for more details. + :mod:`MDAnalysis.guessers` for more details. .. _`PARM parameter/topology file specification`: https://ambermd.org/FileFormats.php#topo.cntrl @@ -91,7 +91,7 @@ import numpy as np import itertools -from .tables import Z2SYMB +from ..guesser.tables import Z2SYMB from ..lib.util import openany, FORTRANReader from .base import TopologyReaderBase, change_squash from ..core.topology import Topology @@ -294,14 +294,15 @@ def next_getter(): if 'elements' not in attrs: msg = ("ATOMIC_NUMBER record not found, elements attribute will " "not be populated. If needed these can be guessed using " - "MDAnalysis.topology.guessers.") + "universe.guess_TopologyAttrs(to_guess=['elements']).") logger.warning(msg) warnings.warn(msg) elif np.any(attrs['elements'].values == ""): # only send out one warning that some elements are unknown msg = ("Unknown ATOMIC_NUMBER value found for some atoms, these " "have been given an empty element record. If needed these " - "can be guessed using MDAnalysis.topology.guessers.") + "can be guessed using " + "universe.guess_TopologyAttrs(to_guess=['elements']).") logger.warning(msg) warnings.warn(msg) diff --git a/package/MDAnalysis/topology/TPRParser.py b/package/MDAnalysis/topology/TPRParser.py index d6f3717bf1a..396211d071f 100644 --- a/package/MDAnalysis/topology/TPRParser.py +++ b/package/MDAnalysis/topology/TPRParser.py @@ -166,7 +166,6 @@ __copyright__ = "GNU Public Licence, v2" -from . import guessers from ..lib.util import openany from .tpr import utils as tpr_utils from .tpr import setting as S diff --git a/package/MDAnalysis/topology/TXYZParser.py b/package/MDAnalysis/topology/TXYZParser.py index 56e6cc59b7c..0781488c9dc 100644 --- a/package/MDAnalysis/topology/TXYZParser.py +++ b/package/MDAnalysis/topology/TXYZParser.py @@ -47,17 +47,16 @@ import numpy as np import warnings -from . import guessers -from .tables import SYMB2Z +from ..guesser.tables import SYMB2Z from ..lib.util import openany from .base import TopologyReaderBase from ..core.topology import Topology +from ..guesser.tables import SYMB2Z from ..core.topologyattrs import ( Atomnames, Atomids, Atomtypes, Bonds, - Masses, Resids, Resnums, Segids, @@ -77,6 +76,10 @@ class TXYZParser(TopologyReaderBase): .. versionadded:: 0.17.0 .. versionchanged:: 2.4.0 Adding the `Element` attribute if all names are valid element symbols. + .. versionchanged:: 2.8.0 + Removed mass guessing (attributes guessing takes place now + through universe.guess_TopologyAttrs() API). + """ format = ['TXYZ', 'ARC'] @@ -95,6 +98,7 @@ def parse(self, **kwargs): names = np.zeros(natoms, dtype=object) types = np.zeros(natoms, dtype=object) bonds = [] + # Find first atom line, maybe there's box information fline = inf.readline() try: @@ -120,26 +124,23 @@ def parse(self, **kwargs): if i < other_atom: bonds.append((i, other_atom)) - # Guessing time - masses = guessers.guess_masses(names) - attrs = [Atomnames(names), Atomids(atomids), Atomtypes(types), Bonds(tuple(bonds)), - Masses(masses, guessed=True), Resids(np.array([1])), Resnums(np.array([1])), Segids(np.array(['SYSTEM'], dtype=object)), ] if all(n.capitalize() in SYMB2Z for n in names): attrs.append(Elements(np.array(names, dtype=object))) - + else: warnings.warn("Element information is missing, elements attribute " "will not be populated. If needed these can be " - "guessed using MDAnalysis.topology.guessers.") - + "guessed using universe.guess_TopologyAttrs(" + "to_guess=['elements']).") + top = Topology(natoms, 1, 1, attrs=attrs) diff --git a/package/MDAnalysis/topology/XYZParser.py b/package/MDAnalysis/topology/XYZParser.py index 4162e343517..cb0df129e08 100644 --- a/package/MDAnalysis/topology/XYZParser.py +++ b/package/MDAnalysis/topology/XYZParser.py @@ -1,5 +1,5 @@ # -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*- -# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 +# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 # # MDAnalysis --- https://www.mdanalysis.org # Copyright (c) 2006-2017 The MDAnalysis Development Team and contributors @@ -40,15 +40,12 @@ """ import numpy as np -from . import guessers from ..lib.util import openany from .base import TopologyReaderBase from ..core.topology import Topology from ..core.topologyattrs import ( Atomnames, Atomids, - Atomtypes, - Masses, Resids, Resnums, Segids, @@ -62,14 +59,15 @@ class XYZParser(TopologyReaderBase): Creates the following attributes: - Atomnames - Guesses the following attributes: - - Atomtypes - - Masses .. versionadded:: 0.9.1 .. versionchanged: 1.0.0 Store elements attribute, based on XYZ atom names + .. versionchanged:: 2.8.0 + Removed type and mass guessing (attributes guessing takes place now + through universe.guess_TopologyAttrs() API). + """ format = 'XYZ' @@ -91,14 +89,9 @@ def parse(self, **kwargs): name = inf.readline().split()[0] names[i] = name - # Guessing time - atomtypes = guessers.guess_types(names) - masses = guessers.guess_masses(names) attrs = [Atomnames(names), Atomids(np.arange(natoms) + 1), - Atomtypes(atomtypes, guessed=True), - Masses(masses, guessed=True), Resids(np.array([1])), Resnums(np.array([1])), Segids(np.array(['SYSTEM'], dtype=object)), diff --git a/package/MDAnalysis/topology/__init__.py b/package/MDAnalysis/topology/__init__.py index 32df510f47e..b1b756b5386 100644 --- a/package/MDAnalysis/topology/__init__.py +++ b/package/MDAnalysis/topology/__init__.py @@ -33,9 +33,8 @@ As a minimum, all topology parsers will provide atom ids, atom types, masses, resids, resnums and segids as well as assigning all atoms to residues and all residues to segments. For systems without residues and segments, this results -in there being a single residue and segment to which all atoms belong. Often -when data is not provided by a file, it will be guessed based on other data in -the file. In the event that this happens, a UserWarning will always be issued. +in there being a single residue and segment to which all atoms belong. +In the event that this happens, a UserWarning will always be issued. The following table lists the currently supported topology formats along with the attributes they provide. @@ -134,7 +133,7 @@ :mod:`MDAnalysis.topology.XYZParser` TXYZ [#a]_ txyz, names, atomids, Tinker_ XYZ File Parser. Reads atom labels, numbers - arc masses, types, and connectivity; masses are guessed from atoms names. + arc masses, types, and connectivity. bonds :mod:`MDAnalysis.topology.TXYZParser` GAMESS [#a]_ gms, names, GAMESS_ output parser. Read only atoms of assembly diff --git a/package/MDAnalysis/topology/core.py b/package/MDAnalysis/topology/core.py index 91596a0fefc..3ed1c7a3461 100644 --- a/package/MDAnalysis/topology/core.py +++ b/package/MDAnalysis/topology/core.py @@ -26,11 +26,6 @@ The various topology parsers make use of functions and classes in this module. They are mostly of use to developers. -See Also --------- -:mod:`MDAnalysis.topology.tables` - for some hard-coded atom information that is used by functions such as - :func:`guess_atom_type` and :func:`guess_atom_mass`. """ @@ -40,12 +35,6 @@ from collections import defaultdict # Local imports -from . import tables -from .guessers import ( - guess_atom_element, guess_atom_type, - get_atom_mass, guess_atom_mass, guess_atom_charge, - guess_bonds, guess_angles, guess_dihedrals, guess_improper_dihedrals, -) from ..core._get_readers import get_parser_for from ..lib.util import cached diff --git a/package/MDAnalysis/topology/guessers.py b/package/MDAnalysis/topology/guessers.py deleted file mode 100644 index a0847036de8..00000000000 --- a/package/MDAnalysis/topology/guessers.py +++ /dev/null @@ -1,526 +0,0 @@ -# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*- -# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 -# -# MDAnalysis --- https://www.mdanalysis.org -# Copyright (c) 2006-2017 The MDAnalysis Development Team and contributors -# (see the file AUTHORS for the full list of names) -# -# Released under the GNU Public Licence, v2 or any higher version -# -# Please cite your use of MDAnalysis in published work: -# -# R. J. Gowers, M. Linke, J. Barnoud, T. J. E. Reddy, M. N. Melo, S. L. Seyler, -# D. L. Dotson, J. Domanski, S. Buchoux, I. M. Kenney, and O. Beckstein. -# MDAnalysis: A Python package for the rapid analysis of molecular dynamics -# simulations. In S. Benthall and S. Rostrup editors, Proceedings of the 15th -# Python in Science Conference, pages 102-109, Austin, TX, 2016. SciPy. -# doi: 10.25080/majora-629e541a-00e -# -# N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and O. Beckstein. -# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations. -# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787 -# -""" -Guessing unknown Topology information --- :mod:`MDAnalysis.topology.guessers` -============================================================================= - -In general `guess_atom_X` returns the guessed value for a single value, -while `guess_Xs` will work on an array of many atoms. - - -Example uses of guessers ------------------------- - -Guessing elements from atom names -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Currently, it is possible to guess elements from atom names using -:func:`guess_atom_element` (or the synonymous :func:`guess_atom_type`). This can -be done in the following manner:: - - import MDAnalysis as mda - from MDAnalysis.topology.guessers import guess_atom_element - from MDAnalysisTests.datafiles import PRM7 - - u = mda.Universe(PRM7) - - print(u.atoms.names[1]) # returns the atom name H1 - - element = guess_atom_element(u.atoms.names[1]) - - print(element) # returns element H - -In the above example, we take an atom named H1 and use -:func:`guess_atom_element` to guess the element hydrogen (i.e. H). It is -important to note that element guessing is not always accurate. Indeed in cases -where the atom type is not recognised, we may end up with the wrong element. -For example:: - - import MDAnalysis as mda - from MDAnalysis.topology.guessers import guess_atom_element - from MDAnalysisTests.datafiles import PRM19SBOPC - - u = mda.Universe(PRM19SBOPC) - - print(u.atoms.names[-1]) # returns the atom name EPW - - element = guess_atom_element(u.atoms.names[-1]) - - print(element) # returns element P - -Here we find that virtual site atom 'EPW' was given the element P, which -would not be an expected result. We therefore always recommend that users -carefully check the outcomes of any guessers. - -In some cases, one may want to guess elements for an entire universe and add -this guess as a topology attribute. This can be done using :func:`guess_types` -in the following manner:: - - import MDAnalysis as mda - from MDAnalysis.topology.guessers import guess_types - from MDAnalysisTests.datafiles import PRM7 - - u = mda.Universe(PRM7) - - guessed_elements = guess_types(u.atoms.names) - - u.add_TopologyAttr('elements', guessed_elements) - - print(u.atoms.elements) # returns an array of guessed elements - -More information on adding topology attributes can found in the `user guide`_. - - -.. Links - -.. _user guide: https://www.mdanalysis.org/UserGuide/examples/constructing_universe.html#Adding-topology-attributes - -""" -import numpy as np -import warnings -import re - -from ..lib import distances -from . import tables - - -def guess_masses(atom_types): - """Guess the mass of many atoms based upon their type - - Parameters - ---------- - atom_types - Type of each atom - - Returns - ------- - atom_masses : np.ndarray dtype float64 - """ - validate_atom_types(atom_types) - masses = np.array([get_atom_mass(atom_t) for atom_t in atom_types], dtype=np.float64) - return masses - - -def validate_atom_types(atom_types): - """Vaildates the atom types based on whether they are available in our tables - - Parameters - ---------- - atom_types - Type of each atom - - Returns - ------- - None - - .. versionchanged:: 0.20.0 - Try uppercase atom type name as well - """ - for atom_type in np.unique(atom_types): - try: - tables.masses[atom_type] - except KeyError: - try: - tables.masses[atom_type.upper()] - except KeyError: - warnings.warn("Failed to guess the mass for the following atom types: {}".format(atom_type)) - - -def guess_types(atom_names): - """Guess the atom type of many atoms based on atom name - - Parameters - ---------- - atom_names - Name of each atom - - Returns - ------- - atom_types : np.ndarray dtype object - """ - return np.array([guess_atom_element(name) for name in atom_names], dtype=object) - - -def guess_atom_type(atomname): - """Guess atom type from the name. - - At the moment, this function simply returns the element, as - guessed by :func:`guess_atom_element`. - - - See Also - -------- - :func:`guess_atom_element` - :mod:`MDAnalysis.topology.tables` - - - """ - return guess_atom_element(atomname) - - -NUMBERS = re.compile(r'[0-9]') # match numbers -SYMBOLS = re.compile(r'[*+-]') # match *, +, - - -def guess_atom_element(atomname): - """Guess the element of the atom from the name. - - Looks in dict to see if element is found, otherwise it uses the first - character in the atomname. The table comes from CHARMM and AMBER atom - types, where the first character is not sufficient to determine the atom - type. Some GROMOS ions have also been added. - - .. Warning: The translation table is incomplete. This will probably result - in some mistakes, but it still better than nothing! - - See Also - -------- - :func:`guess_atom_type` - :mod:`MDAnalysis.topology.tables` - """ - if atomname == '': - return '' - try: - return tables.atomelements[atomname.upper()] - except KeyError: - # strip symbols - no_symbols = re.sub(SYMBOLS, '', atomname) - - # split name by numbers - no_numbers = re.split(NUMBERS, no_symbols) - no_numbers = list(filter(None, no_numbers)) #remove '' - # if no_numbers is not empty, use the first element of no_numbers - name = no_numbers[0].upper() if no_numbers else '' - - # just in case - if name in tables.atomelements: - return tables.atomelements[name] - - while name: - if name in tables.elements: - return name - if name[:-1] in tables.elements: - return name[:-1] - if name[1:] in tables.elements: - return name[1:] - if len(name) <= 2: - return name[0] - name = name[:-1] # probably element is on left not right - - # if it's numbers - return no_symbols - - -def guess_bonds(atoms, coords, box=None, **kwargs): - r"""Guess if bonds exist between two atoms based on their distance. - - Bond between two atoms is created, if the two atoms are within - - .. math:: - - d < f \cdot (R_1 + R_2) - - of each other, where :math:`R_1` and :math:`R_2` are the VdW radii - of the atoms and :math:`f` is an ad-hoc *fudge_factor*. This is - the `same algorithm that VMD uses`_. - - Parameters - ---------- - atoms : AtomGroup - atoms for which bonds should be guessed - coords : array - coordinates of the atoms (i.e., `AtomGroup.positions)`) - fudge_factor : float, optional - The factor by which atoms must overlap eachother to be considered a - bond. Larger values will increase the number of bonds found. [0.55] - vdwradii : dict, optional - To supply custom vdwradii for atoms in the algorithm. Must be a dict - of format {type:radii}. The default table of van der Waals radii is - hard-coded as :data:`MDAnalysis.topology.tables.vdwradii`. Any user - defined vdwradii passed as an argument will supercede the table - values. [``None``] - lower_bound : float, optional - The minimum bond length. All bonds found shorter than this length will - be ignored. This is useful for parsing PDB with altloc records where - atoms with altloc A and B maybe very close together and there should be - no chemical bond between them. [0.1] - box : array_like, optional - Bonds are found using a distance search, if unit cell information is - given, periodic boundary conditions will be considered in the distance - search. [``None``] - - Returns - ------- - list - List of tuples suitable for use in Universe topology building. - - Warnings - -------- - No check is done after the bonds are guessed to see if Lewis - structure is correct. This is wrong and will burn somebody. - - Raises - ------ - :exc:`ValueError` if inputs are malformed or `vdwradii` data is missing. - - - .. _`same algorithm that VMD uses`: - http://www.ks.uiuc.edu/Research/vmd/vmd-1.9.1/ug/node26.html - - .. versionadded:: 0.7.7 - .. versionchanged:: 0.9.0 - Updated method internally to use more :mod:`numpy`, should work - faster. Should also use less memory, previously scaled as - :math:`O(n^2)`. *vdwradii* argument now augments table list - rather than replacing entirely. - """ - # why not just use atom.positions? - if len(atoms) != len(coords): - raise ValueError("'atoms' and 'coord' must be the same length") - - fudge_factor = kwargs.get('fudge_factor', 0.55) - - vdwradii = tables.vdwradii.copy() # so I don't permanently change it - user_vdwradii = kwargs.get('vdwradii', None) - if user_vdwradii: # this should make algo use their values over defaults - vdwradii.update(user_vdwradii) - - # Try using types, then elements - atomtypes = atoms.types - - # check that all types have a defined vdw - if not all(val in vdwradii for val in set(atomtypes)): - raise ValueError(("vdw radii for types: " + - ", ".join([t for t in set(atomtypes) if - not t in vdwradii]) + - ". These can be defined manually using the" + - " keyword 'vdwradii'")) - - lower_bound = kwargs.get('lower_bound', 0.1) - - if box is not None: - box = np.asarray(box) - - # to speed up checking, calculate what the largest possible bond - # atom that would warrant attention. - # then use this to quickly mask distance results later - max_vdw = max([vdwradii[t] for t in atomtypes]) - - bonds = [] - - pairs, dist = distances.self_capped_distance(coords, - max_cutoff=2.0*max_vdw, - min_cutoff=lower_bound, - box=box) - for idx, (i, j) in enumerate(pairs): - d = (vdwradii[atomtypes[i]] + vdwradii[atomtypes[j]])*fudge_factor - if (dist[idx] < d): - bonds.append((atoms[i].index, atoms[j].index)) - return tuple(bonds) - - -def guess_angles(bonds): - """Given a list of Bonds, find all angles that exist between atoms. - - Works by assuming that if atoms 1 & 2 are bonded, and 2 & 3 are bonded, - then (1,2,3) must be an angle. - - Returns - ------- - list of tuples - List of tuples defining the angles. - Suitable for use in u._topology - - - See Also - -------- - :meth:`guess_bonds` - - - .. versionadded 0.9.0 - """ - angles_found = set() - - for b in bonds: - for atom in b: - other_a = b.partner(atom) # who's my friend currently in Bond - for other_b in atom.bonds: - if other_b != b: # if not the same bond I start as - third_a = other_b.partner(atom) - desc = tuple([other_a.index, atom.index, third_a.index]) - if desc[0] > desc[-1]: # first index always less than last - desc = desc[::-1] - angles_found.add(desc) - - return tuple(angles_found) - - -def guess_dihedrals(angles): - """Given a list of Angles, find all dihedrals that exist between atoms. - - Works by assuming that if (1,2,3) is an angle, and 3 & 4 are bonded, - then (1,2,3,4) must be a dihedral. - - Returns - ------- - list of tuples - List of tuples defining the dihedrals. - Suitable for use in u._topology - - .. versionadded 0.9.0 - """ - dihedrals_found = set() - - for b in angles: - a_tup = tuple([a.index for a in b]) # angle as tuple of numbers - # if searching with b[0], want tuple of (b[2], b[1], b[0], +new) - # search the first and last atom of each angle - for atom, prefix in zip([b.atoms[0], b.atoms[-1]], - [a_tup[::-1], a_tup]): - for other_b in atom.bonds: - if not other_b.partner(atom) in b: - third_a = other_b.partner(atom) - desc = prefix + (third_a.index,) - if desc[0] > desc[-1]: - desc = desc[::-1] - dihedrals_found.add(desc) - - return tuple(dihedrals_found) - - -def guess_improper_dihedrals(angles): - """Given a list of Angles, find all improper dihedrals that exist between - atoms. - - Works by assuming that if (1,2,3) is an angle, and 2 & 4 are bonded, - then (2, 1, 3, 4) must be an improper dihedral. - ie the improper dihedral is the angle between the planes formed by - (1, 2, 3) and (1, 3, 4) - - Returns - ------- - List of tuples defining the improper dihedrals. - Suitable for use in u._topology - - .. versionadded 0.9.0 - """ - dihedrals_found = set() - - for b in angles: - atom = b[1] # select middle atom in angle - # start of improper tuple - a_tup = tuple([b[a].index for a in [1, 2, 0]]) - # if searching with b[1], want tuple of (b[1], b[2], b[0], +new) - # search the first and last atom of each angle - for other_b in atom.bonds: - other_atom = other_b.partner(atom) - # if this atom isn't in the angle I started with - if not other_atom in b: - desc = a_tup + (other_atom.index,) - if desc[0] > desc[-1]: - desc = desc[::-1] - dihedrals_found.add(desc) - - return tuple(dihedrals_found) - - -def get_atom_mass(element): - """Return the atomic mass in u for *element*. - - Masses are looked up in :data:`MDAnalysis.topology.tables.masses`. - - .. Warning:: Unknown masses are set to 0.0 - - .. versionchanged:: 0.20.0 - Try uppercase atom type name as well - """ - try: - return tables.masses[element] - except KeyError: - try: - return tables.masses[element.upper()] - except KeyError: - return 0.0 - - -def guess_atom_mass(atomname): - """Guess a mass based on the atom name. - - :func:`guess_atom_element` is used to determine the kind of atom. - - .. warning:: Anything not recognized is simply set to 0; if you rely on the - masses you might want to double check. - """ - return get_atom_mass(guess_atom_element(atomname)) - - -def guess_atom_charge(atomname): - """Guess atom charge from the name. - - .. Warning:: Not implemented; simply returns 0. - """ - # TODO: do something slightly smarter, at least use name/element - return 0.0 - - -def guess_aromaticities(atomgroup): - """Guess aromaticity of atoms using RDKit - - Parameters - ---------- - atomgroup : mda.core.groups.AtomGroup - Atoms for which the aromaticity will be guessed - - Returns - ------- - aromaticities : numpy.ndarray - Array of boolean values for the aromaticity of each atom - - - .. versionadded:: 2.0.0 - """ - mol = atomgroup.convert_to("RDKIT") - return np.array([atom.GetIsAromatic() for atom in mol.GetAtoms()]) - - -def guess_gasteiger_charges(atomgroup): - """Guess Gasteiger partial charges using RDKit - - Parameters - ---------- - atomgroup : mda.core.groups.AtomGroup - Atoms for which the charges will be guessed - - Returns - ------- - charges : numpy.ndarray - Array of float values representing the charge of each atom - - - .. versionadded:: 2.0.0 - """ - mol = atomgroup.convert_to("RDKIT") - from rdkit.Chem.rdPartialCharges import ComputeGasteigerCharges - ComputeGasteigerCharges(mol, throwOnParamFailure=True) - return np.array([atom.GetDoubleProp("_GasteigerCharge") - for atom in mol.GetAtoms()], - dtype=np.float32) diff --git a/package/MDAnalysis/topology/tpr/obj.py b/package/MDAnalysis/topology/tpr/obj.py index 0524d77c6ec..5f5040c7db8 100644 --- a/package/MDAnalysis/topology/tpr/obj.py +++ b/package/MDAnalysis/topology/tpr/obj.py @@ -32,7 +32,7 @@ """ from collections import namedtuple -from ..tables import Z2SYMB +from ...guesser.tables import Z2SYMB TpxHeader = namedtuple( "TpxHeader", [ diff --git a/package/doc/sphinx/source/documentation_pages/guesser_modules.rst b/package/doc/sphinx/source/documentation_pages/guesser_modules.rst new file mode 100644 index 00000000000..7747fdc380f --- /dev/null +++ b/package/doc/sphinx/source/documentation_pages/guesser_modules.rst @@ -0,0 +1,61 @@ +.. Contains the formatted docstrings from the guesser modules located in 'mdanalysis/package/MDAnalysis/guesser' + +************************** +Guesser modules +************************** +This module contains the context-aware guessers, which are used by the :meth:`~MDAnalysis.core.Universe.Universe.guess_TopologyAttrs` API. Context-aware guessers' main purpose +is to be tailored guesser classes that target specific file format or force field (eg. PDB file format, or Martini forcefield). +Having such guessers makes attribute guessing more accurate and reliable than having generic guessing methods that doesn't fit all topologies. + +Example uses of guessers +------------------------ + +Guessing using :meth:`~MDAnalysis.core.universe.Universe.guess_TopologyAttrs` API +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + +Guessing can be done through the Universe's :meth:`~MDAnalysis.core.universe.Universe.guess_TopologyAttrs` as following:: + + import MDAnalysis as mda + from MDAnalysisTests.datafiles import PDB + + u = mda.Universe(PDB) + print(hasattr(u.atoms, 'elements')) # returns False + u.guess_TopologyAttrs(to_guess=['elements']) + print(u.atoms.elements) # print ['N' 'H' 'H' ... 'NA' 'NA' 'NA'] + +In the above example, we passed ``elements`` as the attribute we want to guess, and +:meth:`~MDAnalysis.core.universe.Universe.guess_TopologyAttrs` guess then add it as a topology +attribute to the ``AtomGroup`` of the universe. + +If the attribute already exist in the universe, passing the attribute of interest to the ``to_guess`` parameter will only fill the empty values of the attribute if any exists. +To override all the attribute values, you can pass the attribute to the ``force_guess`` parameter instead of the to_guess one as following:: + + import MDAnalysis as mda + from MDAnalysisTests.datafiles import PRM12 +  + u = mda.Universe(PRM12, context='default', to_guess=()) # types ['HW', 'OW', ..] + + u.guess_TopologyAttrs(force_guess=['types']) # types ['H', 'O', ..] + +N.B.: If you didn't pass any ``context`` to the API, it will use the :class:`~MDAnalysis.guesser.default_guesser.DefaultGuesser` + +.. rubric:: available guessers +.. toctree:: + :maxdepth: 1 + + guesser_modules/init + guesser_modules/default_guesser + + +.. rubric:: guesser core modules + +The remaining pages are primarily of interest to developers as they +contain functions and classes that are used in the implementation of +the context-specific guessers. + +.. toctree:: + :maxdepth: 1 + + guesser_modules/base + guesser_modules/tables diff --git a/package/doc/sphinx/source/documentation_pages/guesser_modules/base.rst b/package/doc/sphinx/source/documentation_pages/guesser_modules/base.rst new file mode 100644 index 00000000000..cfba20f17be --- /dev/null +++ b/package/doc/sphinx/source/documentation_pages/guesser_modules/base.rst @@ -0,0 +1 @@ +.. automodule:: MDAnalysis.guesser.base diff --git a/package/doc/sphinx/source/documentation_pages/guesser_modules/default_guesser.rst b/package/doc/sphinx/source/documentation_pages/guesser_modules/default_guesser.rst new file mode 100644 index 00000000000..a3f3f897152 --- /dev/null +++ b/package/doc/sphinx/source/documentation_pages/guesser_modules/default_guesser.rst @@ -0,0 +1 @@ +.. automodule:: MDAnalysis.guesser.default_guesser diff --git a/package/doc/sphinx/source/documentation_pages/guesser_modules/init.rst b/package/doc/sphinx/source/documentation_pages/guesser_modules/init.rst new file mode 100644 index 00000000000..6fa6449c5c3 --- /dev/null +++ b/package/doc/sphinx/source/documentation_pages/guesser_modules/init.rst @@ -0,0 +1 @@ +.. automodule:: MDAnalysis.guesser.__init__ diff --git a/package/doc/sphinx/source/documentation_pages/guesser_modules/tables.rst b/package/doc/sphinx/source/documentation_pages/guesser_modules/tables.rst new file mode 100644 index 00000000000..6116739fe89 --- /dev/null +++ b/package/doc/sphinx/source/documentation_pages/guesser_modules/tables.rst @@ -0,0 +1 @@ +.. automodule:: MDAnalysis.guesser.tables diff --git a/package/doc/sphinx/source/documentation_pages/topology/guessers.rst b/package/doc/sphinx/source/documentation_pages/topology/guessers.rst deleted file mode 100644 index e6449f5ddc8..00000000000 --- a/package/doc/sphinx/source/documentation_pages/topology/guessers.rst +++ /dev/null @@ -1,2 +0,0 @@ -.. automodule:: MDAnalysis.topology.guessers - :members: diff --git a/package/doc/sphinx/source/documentation_pages/topology/tables.rst b/package/doc/sphinx/source/documentation_pages/topology/tables.rst deleted file mode 100644 index f4d579ec9c8..00000000000 --- a/package/doc/sphinx/source/documentation_pages/topology/tables.rst +++ /dev/null @@ -1 +0,0 @@ -.. automodule:: MDAnalysis.topology.tables diff --git a/package/doc/sphinx/source/documentation_pages/topology_modules.rst b/package/doc/sphinx/source/documentation_pages/topology_modules.rst index ed8caba8ce6..01f3ab32e27 100644 --- a/package/doc/sphinx/source/documentation_pages/topology_modules.rst +++ b/package/doc/sphinx/source/documentation_pages/topology_modules.rst @@ -28,10 +28,10 @@ topology file format in the *topology_format* keyword argument to topology/CRDParser topology/DLPolyParser topology/DMSParser - topology/FHIAIMSParser + topology/FHIAIMSParser topology/GMSParser topology/GROParser - topology/GSDParser + topology/GSDParser topology/HoomdXMLParser topology/ITPParser topology/LAMMPSParser @@ -59,6 +59,4 @@ the topology readers. topology/base topology/core - topology/guessers - topology/tables topology/tpr_util diff --git a/package/doc/sphinx/source/index.rst b/package/doc/sphinx/source/index.rst index e6171630c28..29e800d6a59 100644 --- a/package/doc/sphinx/source/index.rst +++ b/package/doc/sphinx/source/index.rst @@ -82,9 +82,9 @@ can be installed either with conda_ or pip_. conda ----- -First installation with conda_: +First installation with conda_: -.. code-block:: bash +.. code-block:: bash conda config --add channels conda-forge conda install mdanalysis @@ -93,7 +93,7 @@ which will automatically install a *full set of dependencies*. To upgrade later: -.. code-block:: bash +.. code-block:: bash conda update mdanalysis @@ -102,14 +102,14 @@ pip Installation with `pip`_ and a *minimal set of dependencies*: -.. code-block:: bash +.. code-block:: bash pip install --upgrade MDAnalysis To install with a *full set of dependencies* (which includes everything needed for :mod:`MDAnalysis.analysis`), add the ``[analysis]`` tag: -.. code-block:: bash +.. code-block:: bash pip install --upgrade MDAnalysis[analysis] @@ -121,7 +121,7 @@ If you want to `run the tests`_ or use example files to follow some of the examples in the documentation or the tutorials_, also install the ``MDAnalysisTests`` package: -.. code-block:: bash +.. code-block:: bash conda install mdanalysistests # with conda pip install --upgrade MDAnalysisTests # with pip @@ -187,12 +187,13 @@ Thank you! :caption: Documentation :numbered: :hidden: - + ./documentation_pages/overview ./documentation_pages/topology ./documentation_pages/selections ./documentation_pages/analysis_modules ./documentation_pages/topology_modules + ./documentation_pages/guesser_modules ./documentation_pages/coordinates_modules ./documentation_pages/converters ./documentation_pages/trajectory_transformations @@ -205,7 +206,7 @@ Thank you! ./documentation_pages/units ./documentation_pages/exceptions ./documentation_pages/references - + Indices and tables ================== @@ -213,4 +214,3 @@ Indices and tables * :ref:`genindex` * :ref:`modindex` * :ref:`search` - diff --git a/testsuite/MDAnalysisTests/analysis/test_base.py b/testsuite/MDAnalysisTests/analysis/test_base.py index 345e0dc671e..68b86fc9439 100644 --- a/testsuite/MDAnalysisTests/analysis/test_base.py +++ b/testsuite/MDAnalysisTests/analysis/test_base.py @@ -276,7 +276,7 @@ def test_parallelizable_transformations(): # pick any transformation that would allow # for parallelizable attribute from MDAnalysis.transformations import NoJump - u = mda.Universe(XTC) + u = mda.Universe(XTC, to_guess=()) u.trajectory.add_transformations(NoJump()) # test that serial works diff --git a/testsuite/MDAnalysisTests/analysis/test_dielectric.py b/testsuite/MDAnalysisTests/analysis/test_dielectric.py index ac3de34b659..21992cf5d5c 100644 --- a/testsuite/MDAnalysisTests/analysis/test_dielectric.py +++ b/testsuite/MDAnalysisTests/analysis/test_dielectric.py @@ -57,7 +57,7 @@ def test_temperature(self, ag): assert_allclose(eps.results['eps_mean'], 9.621, rtol=1e-03) def test_non_charges(self): - u = mda.Universe(DCD_TRICLINIC) + u = mda.Universe(DCD_TRICLINIC, to_guess=()) with pytest.raises(NoDataError, match="No charges defined given atomgroup."): DielectricConstant(u.atoms).run() diff --git a/testsuite/MDAnalysisTests/converters/test_openmm_parser.py b/testsuite/MDAnalysisTests/converters/test_openmm_parser.py index 5b67ca5924f..d3d923294f1 100644 --- a/testsuite/MDAnalysisTests/converters/test_openmm_parser.py +++ b/testsuite/MDAnalysisTests/converters/test_openmm_parser.py @@ -53,9 +53,15 @@ class OpenMMTopologyBase(ParserBase): "bonds", "chainIDs", "elements", + "types" ] expected_n_bonds = 0 + @pytest.fixture() + def top(self, filename): + with self.parser(filename) as p: + yield p.parse() + def test_creates_universe(self, filename): """Check that Universe works with this Parser""" u = mda.Universe(filename, topology_format="OPENMMTOPOLOGY") @@ -130,6 +136,14 @@ def test_masses(self, top): else: assert top.masses.values == [] + def test_guessed_attributes(self, filename): + u = mda.Universe(filename, topology_format="OPENMMTOPOLOGY") + u_guessed_attrs = [attr.attrname for attr + in u._topology.guessed_attributes] + for attr in self.guessed_attrs: + assert hasattr(u.atoms, attr) + assert attr in u_guessed_attrs + class OpenMMAppTopologyBase(OpenMMTopologyBase): parser = mda.converters.OpenMMParser.OpenMMAppTopologyParser @@ -142,14 +156,25 @@ class OpenMMAppTopologyBase(OpenMMTopologyBase): "bonds", "chainIDs", "elements", + "types" ] expected_n_bonds = 0 + @pytest.fixture() + def top(self, filename): + with self.parser(filename) as p: + yield p.parse() + def test_creates_universe(self, filename): """Check that Universe works with this Parser""" u = mda.Universe(filename, topology_format="OPENMMAPP") assert isinstance(u, mda.Universe) + def test_guessed_attributes(self, filename): + u = mda.Universe(filename, topology_format="OPENMMAPP") + for attr in self.guessed_attrs: + assert hasattr(u.atoms, attr) + class TestOpenMMTopologyParser(OpenMMTopologyBase): ref_filename = app.PDBFile(CONECT).topology @@ -171,10 +196,13 @@ def test_with_partial_elements(self): wmsg1 = ("Element information missing for some atoms. " "These have been given an empty element record ") - wmsg2 = ("For absent elements, atomtype has been " - "set to 'X' and mass has been set to 0.0. " - "If needed these can be guessed using " - "MDAnalysis.topology.guessers.") + wmsg2 = ( + "For absent elements, atomtype has been " + "set to 'X' and mass has been set to 0.0. " + "If needed these can be guessed using " + "universe.guess_TopologyAttrs(to_guess=['masses', 'types']). " + "(for MDAnalysis version 2.x this is done automatically," + " but it will be removed in 3.0).") with pytest.warns(UserWarning) as warnings: mda_top = self.parser(self.ref_filename).parse() @@ -182,6 +210,8 @@ def test_with_partial_elements(self): assert mda_top.types.values[3388] == 'X' assert mda_top.elements.values[3344] == '' assert mda_top.elements.values[3388] == '' + assert mda_top.masses.values[3344] == 0.0 + assert mda_top.masses.values[3388] == 0.0 assert len(warnings) == 2 assert str(warnings[0].message) == wmsg1 @@ -194,14 +224,20 @@ def test_no_elements_warn(): for a in omm_top.atoms(): a.element = None - wmsg = ("Element information is missing for all the atoms. " - "Elements attribute will not be populated. " - "Atomtype attribute will be guessed using atom " - "name and mass will be guessed using atomtype." - "See MDAnalysis.topology.guessers.") - - with pytest.warns(UserWarning, match=wmsg): + wmsg = ( + "Element information is missing for all the atoms. " + "Elements attribute will not be populated. " + "Atomtype attribute will be guessed using atom " + "name and mass will be guessed using atomtype." + "For MDAnalysis version 2.x this is done automatically, " + "but it will be removed in MDAnalysis v3.0. " + "These can be guessed using " + "universe.guess_TopologyAttrs(to_guess=['masses', 'types']) " + "See MDAnalysis.guessers.") + + with pytest.warns(UserWarning) as warnings: mda_top = parser(omm_top).parse() + assert str(warnings[0].message) == wmsg def test_invalid_element_symbols(): diff --git a/testsuite/MDAnalysisTests/converters/test_rdkit.py b/testsuite/MDAnalysisTests/converters/test_rdkit.py index 85860162f7b..59c15af4c16 100644 --- a/testsuite/MDAnalysisTests/converters/test_rdkit.py +++ b/testsuite/MDAnalysisTests/converters/test_rdkit.py @@ -26,9 +26,10 @@ from io import StringIO import MDAnalysis as mda +from MDAnalysis.guesser.default_guesser import DefaultGuesser + import numpy as np import pytest -from MDAnalysis.topology.guessers import guess_atom_element from MDAnalysisTests.datafiles import GRO, PDB_full, PDB_helix, mol2_molecule from MDAnalysisTests.util import import_not_available from numpy.testing import assert_allclose, assert_equal @@ -146,7 +147,8 @@ def pdb(self): def mol2(self): u = mda.Universe(mol2_molecule) # add elements - elements = np.array([guess_atom_element(x) for x in u.atoms.types], + guesser = DefaultGuesser(None) + elements = np.array([guesser.guess_atom_element(x) for x in u.atoms.types], dtype=object) u.add_TopologyAttr('elements', elements) return u @@ -154,7 +156,7 @@ def mol2(self): @pytest.fixture def peptide(self): u = mda.Universe(GRO) - elements = mda.topology.guessers.guess_types(u.atoms.names) + elements = mda.guesser.DefaultGuesser(None).guess_types(u.atoms.names) u.add_TopologyAttr('elements', elements) return u.select_atoms("resid 2-12") diff --git a/testsuite/MDAnalysisTests/converters/test_rdkit_parser.py b/testsuite/MDAnalysisTests/converters/test_rdkit_parser.py index e54672902d9..e376ff09e37 100644 --- a/testsuite/MDAnalysisTests/converters/test_rdkit_parser.py +++ b/testsuite/MDAnalysisTests/converters/test_rdkit_parser.py @@ -39,16 +39,19 @@ class RDKitParserBase(ParserBase): parser = mda.converters.RDKitParser.RDKitParser expected_attrs = ['ids', 'names', 'elements', 'masses', 'aromaticities', - 'resids', 'resnums', 'chiralities', - 'segids', - 'bonds', - ] - + 'resids', 'resnums', 'chiralities', 'segids', 'bonds', + ] + expected_n_atoms = 0 expected_n_residues = 1 expected_n_segments = 1 expected_n_bonds = 0 + @pytest.fixture() + def top(self, filename): + with self.parser(filename) as p: + yield p.parse() + def test_creates_universe(self, filename): u = mda.Universe(filename, format='RDKIT') assert isinstance(u, mda.Universe) @@ -56,11 +59,18 @@ def test_creates_universe(self, filename): def test_bonds_total_counts(self, top): assert len(top.bonds.values) == self.expected_n_bonds + def test_guessed_attributes(self, filename): + u = mda.Universe(filename, format='RDKIT') + u_guessed_attrs = [a.attrname for a in u._topology.guessed_attributes] + for attr in self.guessed_attrs: + assert hasattr(u.atoms, attr) + assert attr in u_guessed_attrs + class TestRDKitParserMOL2(RDKitParserBase): ref_filename = mol2_molecule - expected_attrs = RDKitParserBase.expected_attrs + ['charges'] + expected_attrs = RDKitParserBase.expected_attrs + ['charges', 'types'] expected_n_atoms = 49 expected_n_residues = 1 @@ -138,6 +148,10 @@ def test_aromaticity(self, top, filename): atom.GetIsAromatic() for atom in filename.GetAtoms()]) assert_equal(expected, top.aromaticities.values) + def test_guessed_types(self, filename): + u = mda.Universe(filename, format='RDKIT') + assert_equal(u.atoms.types[:7], ['N.am', 'S.o2', + 'N.am', 'N.am', 'O.2', 'O.2', 'C.3']) class TestRDKitParserPDB(RDKitParserBase): ref_filename = PDB_helix @@ -145,7 +159,6 @@ class TestRDKitParserPDB(RDKitParserBase): expected_attrs = RDKitParserBase.expected_attrs + [ 'resnames', 'altLocs', 'chainIDs', 'occupancies', 'icodes', 'tempfactors'] - guessed_attrs = ['types'] expected_n_atoms = 137 expected_n_residues = 13 @@ -165,12 +178,14 @@ def test_partial_residueinfo_raise_error(self, filename): mh = Chem.AddHs(mol, addResidueInfo=True) mda.Universe(mh) + def test_guessed_types(self, filename): + u = mda.Universe(filename, format='RDKIT') + assert_equal(u.atoms.types[:7], ['N', 'H', 'C', 'H', 'C', 'H', 'H']) + class TestRDKitParserSMILES(RDKitParserBase): ref_filename = "CN1C=NC2=C1C(=O)N(C(=O)N2C)C" - guessed_attrs = ['types'] - expected_n_atoms = 24 expected_n_residues = 1 expected_n_segments = 1 @@ -186,8 +201,6 @@ def filename(self): class TestRDKitParserSDF(RDKitParserBase): ref_filename = SDF_molecule - guessed_attrs = ['types'] - expected_n_atoms = 49 expected_n_residues = 1 expected_n_segments = 1 @@ -200,3 +213,7 @@ def filename(self): def test_bond_orders(self, top, filename): expected = [bond.GetBondTypeAsDouble() for bond in filename.GetBonds()] assert top.bonds.order == expected + + def test_guessed_types(self, filename): + u = mda.Universe(filename, format='RDKIT') + assert_equal(u.atoms.types[:7], ['CA', 'C', 'C', 'C', 'C', 'C', 'O']) diff --git a/testsuite/MDAnalysisTests/coordinates/base.py b/testsuite/MDAnalysisTests/coordinates/base.py index 3de8cfb9ff6..39770e1460b 100644 --- a/testsuite/MDAnalysisTests/coordinates/base.py +++ b/testsuite/MDAnalysisTests/coordinates/base.py @@ -504,7 +504,7 @@ def test_timeseries_values(self, reader, slice): @pytest.mark.parametrize('asel', ("index 1", "index 2", "index 1 to 3")) def test_timeseries_asel_shape(self, reader, asel): - atoms = mda.Universe(reader.filename).select_atoms(asel) + atoms = mda.Universe(reader.filename, to_guess=()).select_atoms(asel) timeseries = reader.timeseries(atoms, order='fac') assert(timeseries.shape[0] == len(reader)) assert(timeseries.shape[1] == len(atoms)) @@ -513,28 +513,28 @@ def test_timeseries_asel_shape(self, reader, asel): def test_timeseries_empty_asel(self, reader): with pytest.warns(UserWarning, match="Empty string to select atoms, empty group returned."): - atoms = mda.Universe(reader.filename).select_atoms(None) + atoms = mda.Universe(reader.filename, to_guess=()).select_atoms(None) with pytest.raises(ValueError, match="Timeseries requires at least"): reader.timeseries(asel=atoms) def test_timeseries_empty_atomgroup(self, reader): with pytest.warns(UserWarning, match="Empty string to select atoms, empty group returned."): - atoms = mda.Universe(reader.filename).select_atoms(None) + atoms = mda.Universe(reader.filename, to_guess=()).select_atoms(None) with pytest.raises(ValueError, match="Timeseries requires at least"): reader.timeseries(atomgroup=atoms) def test_timeseries_asel_warns_deprecation(self, reader): - atoms = mda.Universe(reader.filename).select_atoms("index 1") + atoms = mda.Universe(reader.filename, to_guess=()).select_atoms("index 1") with pytest.warns(DeprecationWarning, match="asel argument to"): timeseries = reader.timeseries(asel=atoms, order='fac') def test_timeseries_atomgroup(self, reader): - atoms = mda.Universe(reader.filename).select_atoms("index 1") + atoms = mda.Universe(reader.filename, to_guess=()).select_atoms("index 1") timeseries = reader.timeseries(atomgroup=atoms, order='fac') def test_timeseries_atomgroup_asel_mutex(self, reader): - atoms = mda.Universe(reader.filename).select_atoms("index 1") + atoms = mda.Universe(reader.filename, to_guess=()).select_atoms("index 1") with pytest.raises(ValueError, match="Cannot provide both"): timeseries = reader.timeseries(atomgroup=atoms, asel=atoms, order='fac') diff --git a/testsuite/MDAnalysisTests/coordinates/test_chainreader.py b/testsuite/MDAnalysisTests/coordinates/test_chainreader.py index 2049b687279..9f81b8c325c 100644 --- a/testsuite/MDAnalysisTests/coordinates/test_chainreader.py +++ b/testsuite/MDAnalysisTests/coordinates/test_chainreader.py @@ -52,12 +52,12 @@ def transformed(ref): return mda.Universe(PSF, [DCD, CRD, DCD, CRD, DCD, CRD, CRD], transformations=[translate([10,10,10])]) - + def test_regular_repr(self): u = mda.Universe(PSF, [DCD, CRD, DCD]) assert_equal("", u.trajectory.__repr__()) - - + + def test_truncated_repr(self, universe): assert_equal("", universe.trajectory.__repr__()) @@ -135,8 +135,8 @@ def test_write_dcd(self, universe, tmpdir): ts_new._pos, self.prec, err_msg="Coordinates disagree at frame {0:d}".format( - ts_orig.frame)) - + ts_orig.frame)) + def test_transform_iteration(self, universe, transformed): vector = np.float32([10,10,10]) # # Are the transformations applied and @@ -151,7 +151,7 @@ def test_transform_iteration(self, universe, transformed): frame = ts.frame ref = universe.trajectory[frame].positions + vector assert_almost_equal(ts.positions, ref, decimal = 6) - + def test_transform_slice(self, universe, transformed): vector = np.float32([10,10,10]) # what happens when we slice the trajectory? @@ -159,7 +159,7 @@ def test_transform_slice(self, universe, transformed): frame = ts.frame ref = universe.trajectory[frame].positions + vector assert_almost_equal(ts.positions, ref, decimal = 6) - + def test_transform_switch(self, universe, transformed): vector = np.float32([10,10,10]) # grab a frame: @@ -170,7 +170,7 @@ def test_transform_switch(self, universe, transformed): assert_almost_equal(transformed.trajectory[10].positions, newref, decimal = 6) # what happens when we comeback to the previous frame? assert_almost_equal(transformed.trajectory[2].positions, ref, decimal = 6) - + def test_transfrom_rewind(self, universe, transformed): vector = np.float32([10,10,10]) ref = universe.trajectory[0].positions + vector @@ -221,13 +221,13 @@ def test_set_all_format_lammps(self): assert_equal(time_values, np.arange(11)) def test_set_format_tuples_and_format(self): - universe = mda.Universe(GRO, [(PDB, 'pdb'), GRO, GRO, (XTC, 'xtc'), + universe = mda.Universe(GRO, [(PDB, 'pdb'), GRO, GRO, (XTC, 'xtc'), (TRR, 'trr')], format='gro') assert universe.trajectory.n_frames == 23 assert_equal(universe.trajectory.filenames, [PDB, GRO, GRO, XTC, TRR]) - + with pytest.raises(TypeError) as errinfo: - mda.Universe(GRO, [(PDB, 'pdb'), GRO, GRO, (XTC, 'xtc'), + mda.Universe(GRO, [(PDB, 'pdb'), GRO, GRO, (XTC, 'xtc'), (TRR, 'trr')], format='pdb') assert 'Unable to read' in str(errinfo.value) @@ -268,7 +268,7 @@ def build_trajectories(folder, sequences, fmt='xtc'): fnames = [] for index, subseq in enumerate(sequences): coords = np.zeros((len(subseq), 1, 3), dtype=np.float32) + index - u = mda.Universe(utop._topology, coords) + u = mda.Universe(utop._topology, coords, to_guess=()) out_traj = mda.Writer(template.format(index), n_atoms=len(u.atoms)) fnames.append(out_traj.filename) with out_traj: @@ -320,7 +320,7 @@ def __init__(self, seq, n_frames, order): def test_order(self, seq_info, tmpdir, fmt): folder = str(tmpdir) utop, fnames = build_trajectories(folder, sequences=seq_info.seq, fmt=fmt) - u = mda.Universe(utop._topology, fnames, continuous=True) + u = mda.Universe(utop._topology, fnames, continuous=True, to_guess=()) assert u.trajectory.n_frames == seq_info.n_frames for i, ts in enumerate(u.trajectory): assert_almost_equal(i, ts.time, decimal=4) @@ -331,14 +331,14 @@ def test_start_frames(self, tmpdir): folder = str(tmpdir) sequences = ([0, 1, 2, 3], [2, 3, 4, 5], [4, 5, 6, 7]) utop, fnames = build_trajectories(folder, sequences=sequences,) - u = mda.Universe(utop._topology, fnames, continuous=True) + u = mda.Universe(utop._topology, fnames, continuous=True, to_guess=()) assert_equal(u.trajectory._start_frames, [0, 2, 4]) def test_missing(self, tmpdir): folder = str(tmpdir) sequences = ([0, 1, 2, 3], [5, 6, 7, 8, 9]) utop, fnames = build_trajectories(folder, sequences=sequences,) - u = mda.Universe(utop._topology, fnames, continuous=True) + u = mda.Universe(utop._topology, fnames, continuous=True, to_guess=()) assert u.trajectory.n_frames == 9 def test_warning(self, tmpdir): @@ -347,7 +347,7 @@ def test_warning(self, tmpdir): sequences = ([0, 1, 2, 3], [5, 6, 7]) utop, fnames = build_trajectories(folder, sequences=sequences,) with pytest.warns(UserWarning): - mda.Universe(utop._topology, fnames, continuous=True) + mda.Universe(utop._topology, fnames, continuous=True, to_guess=()) def test_interleaving_error(self, tmpdir): folder = str(tmpdir) @@ -355,7 +355,7 @@ def test_interleaving_error(self, tmpdir): sequences = ([0, 2, 4, 6], [1, 3, 5, 7]) utop, fnames = build_trajectories(folder, sequences=sequences,) with pytest.raises(RuntimeError): - mda.Universe(utop._topology, fnames, continuous=True) + mda.Universe(utop._topology, fnames, continuous=True, to_guess=()) def test_easy_trigger_warning(self, tmpdir): folder = str(tmpdir) @@ -372,14 +372,14 @@ def test_easy_trigger_warning(self, tmpdir): warnings.filterwarnings( action='ignore', category=ImportWarning) - mda.Universe(utop._topology, fnames, continuous=True) + mda.Universe(utop._topology, fnames, continuous=True, to_guess=()) def test_single_frames(self, tmpdir): folder = str(tmpdir) sequences = ([0, 1, 2, 3], [5, ]) utop, fnames = build_trajectories(folder, sequences=sequences,) with pytest.raises(RuntimeError): - mda.Universe(utop._topology, fnames, continuous=True) + mda.Universe(utop._topology, fnames, continuous=True, to_guess=()) def test_mixed_filetypes(self): with pytest.raises(ValueError): @@ -388,9 +388,9 @@ def test_mixed_filetypes(self): def test_unsupported_filetypes(self): with pytest.raises(NotImplementedError): mda.Universe(PSF, [DCD, DCD], continuous=True) - # see issue 2353. The PDB reader has multiple format endings. To ensure - # the not implemented error is thrown we do a check here. A more - # careful test in the future would be a dummy reader with multiple + # see issue 2353. The PDB reader has multiple format endings. To ensure + # the not implemented error is thrown we do a check here. A more + # careful test in the future would be a dummy reader with multiple # formats, just in case PDB will allow continuous reading in the future. with pytest.raises(ValueError): mda.Universe(PDB, [PDB, XTC], continuous=True) diff --git a/testsuite/MDAnalysisTests/coordinates/test_h5md.py b/testsuite/MDAnalysisTests/coordinates/test_h5md.py index d3fec51f818..76e80a2a46d 100644 --- a/testsuite/MDAnalysisTests/coordinates/test_h5md.py +++ b/testsuite/MDAnalysisTests/coordinates/test_h5md.py @@ -449,7 +449,7 @@ def test_parse_n_atoms(self, h5md_file, outfile, group1, group2): except KeyError: continue - u = mda.Universe(outfile) + u = mda.Universe(outfile, to_guess=()) assert_equal(u.atoms.n_atoms, n_atoms_in_dset) def test_parse_n_atoms_error(self, h5md_file, outfile): diff --git a/testsuite/MDAnalysisTests/coordinates/test_netcdf.py b/testsuite/MDAnalysisTests/coordinates/test_netcdf.py index c009682421e..7e365dad51a 100644 --- a/testsuite/MDAnalysisTests/coordinates/test_netcdf.py +++ b/testsuite/MDAnalysisTests/coordinates/test_netcdf.py @@ -492,7 +492,7 @@ def test_scale_factor_coordinates(self, tmpdir): expected = np.asarray(range(3), dtype=np.float32) * 2.0 with tmpdir.as_cwd(): self.create_ncdf(params) - u = mda.Universe(params['filename']) + u = mda.Universe(params['filename'], to_guess=()) for ts in u.trajectory: assert_almost_equal(ts.positions[0], expected, self.prec) @@ -502,7 +502,7 @@ def test_scale_factor_velocities(self, tmpdir): expected = np.asarray(range(3), dtype=np.float32) * 3.0 with tmpdir.as_cwd(): self.create_ncdf(params) - u = mda.Universe(params['filename']) + u = mda.Universe(params['filename'], to_guess=()) for ts in u.trajectory: assert_almost_equal(ts.velocities[0], expected, self.prec) @@ -512,7 +512,7 @@ def test_scale_factor_forces(self, tmpdir): expected = np.asarray(range(3), dtype=np.float32) * 10.0 * 4.184 with tmpdir.as_cwd(): self.create_ncdf(params) - u = mda.Universe(params['filename']) + u = mda.Universe(params['filename'], to_guess=()) for ts in u.trajectory: assert_almost_equal(ts.forces[0], expected, self.prec) @@ -526,7 +526,7 @@ def test_scale_factor_box(self, tmpdir, mutation, expected): params = self.gen_params(keypair=mutation, restart=False) with tmpdir.as_cwd(): self.create_ncdf(params) - u = mda.Universe(params['filename']) + u = mda.Universe(params['filename'], to_guess=()) for ts in u.trajectory: assert_almost_equal(ts.dimensions, expected, self.prec) @@ -599,7 +599,7 @@ def test_ioerror(self, tmpdir): with tmpdir.as_cwd(): self.create_ncdf(params) with pytest.raises(IOError): - u = mda.Universe(params['filename']) + u = mda.Universe(params['filename'], to_guess=()) u.trajectory.close() u.trajectory[-1] diff --git a/testsuite/MDAnalysisTests/coordinates/test_timestep_api.py b/testsuite/MDAnalysisTests/coordinates/test_timestep_api.py index 2d0228fcd58..423a255cc49 100644 --- a/testsuite/MDAnalysisTests/coordinates/test_timestep_api.py +++ b/testsuite/MDAnalysisTests/coordinates/test_timestep_api.py @@ -119,7 +119,7 @@ def test_iter(self, ts): def test_repr(self, ts): assert_equal(type(repr(ts)), str) - + def test_repr_with_box(self, ts): assert("with unit cell dimensions" in repr(ts)) @@ -698,7 +698,7 @@ def test_dt(self, universe): def test_atomgroup_dims_access(uni): uni_args, uni_kwargs = uni # check that AtomGroup.dimensions always returns a copy - u = mda.Universe(*uni_args, **uni_kwargs) + u = mda.Universe(*uni_args, **uni_kwargs, to_guess=()) ag = u.atoms[:10] diff --git a/testsuite/MDAnalysisTests/coordinates/test_trz.py b/testsuite/MDAnalysisTests/coordinates/test_trz.py index e0803aa7978..e0f20f961a6 100644 --- a/testsuite/MDAnalysisTests/coordinates/test_trz.py +++ b/testsuite/MDAnalysisTests/coordinates/test_trz.py @@ -162,7 +162,7 @@ def test_read_zero_box(self, tmpdir): with mda.Writer(outfile, n_atoms=10) as w: w.write(u) - u2 = mda.Universe(outfile, n_atoms=10) + u2 = mda.Universe(outfile, n_atoms=10, to_guess=()) assert u2.dimensions is None diff --git a/testsuite/MDAnalysisTests/core/test_atomgroup.py b/testsuite/MDAnalysisTests/core/test_atomgroup.py index 4456362c498..fe51cf24073 100644 --- a/testsuite/MDAnalysisTests/core/test_atomgroup.py +++ b/testsuite/MDAnalysisTests/core/test_atomgroup.py @@ -129,7 +129,7 @@ def test_write_frames(self, u, tmpdir, frames): ref_positions = np.stack([ts.positions.copy() for ts in selection]) u.atoms.write(destination, frames=frames) - u_new = mda.Universe(destination) + u_new = mda.Universe(destination, to_guess=()) new_positions = np.stack([ts.positions.copy() for ts in u_new.trajectory]) assert_array_almost_equal(new_positions, ref_positions) @@ -145,7 +145,7 @@ def test_write_frame_iterator(self, u, tmpdir, frames): ref_positions = np.stack([ts.positions.copy() for ts in selection]) u.atoms.write(destination, frames=selection) - u_new = mda.Universe(destination) + u_new = mda.Universe(destination, to_guess=()) new_positions = np.stack([ts.positions.copy() for ts in u_new.trajectory]) assert_array_almost_equal(new_positions, ref_positions) @@ -155,7 +155,7 @@ def test_write_frame_iterator(self, u, tmpdir, frames): def test_write_frame_none(self, u, tmpdir, extension, compression): destination = str(tmpdir / 'test.' + extension + compression) u.atoms.write(destination, frames=None) - u_new = mda.Universe(destination) + u_new = mda.Universe(destination, to_guess=()) new_positions = np.stack([ts.positions for ts in u_new.trajectory]) # Most format only save 3 decimals; XTC even has only 2. assert_array_almost_equal(u.atoms.positions[None, ...], @@ -165,7 +165,8 @@ def test_write_frame_none(self, u, tmpdir, extension, compression): def test_write_frames_all(self, u, tmpdir, compression): destination = str(tmpdir / 'test.dcd' + compression) u.atoms.write(destination, frames='all') - u_new = mda.Universe(destination) + + u_new = mda.Universe(destination, to_guess=()) ref_positions = np.stack([ts.positions.copy() for ts in u.trajectory]) new_positions = np.stack([ts.positions.copy() for ts in u_new.trajectory]) assert_array_almost_equal(new_positions, ref_positions) diff --git a/testsuite/MDAnalysisTests/core/test_topologyattrs.py b/testsuite/MDAnalysisTests/core/test_topologyattrs.py index a0c1acdf4bd..5489c381af2 100644 --- a/testsuite/MDAnalysisTests/core/test_topologyattrs.py +++ b/testsuite/MDAnalysisTests/core/test_topologyattrs.py @@ -82,7 +82,7 @@ def attr(self, top): @pytest.fixture() def universe(self, top): - return mda.Universe(top) + return mda.Universe(top, to_guess=()) def test_len(self, attr): assert len(attr) == len(attr.values) @@ -197,6 +197,17 @@ def test_next_emptyresidue(self, u): assert groupsize == 0 assert groupsize == len(u.residues[[]].atoms) + def test_missing_values(self, attr): + assert_equal(attr.are_values_missing(self.values), np.array( + [False, False, False, False, False, False, + False, False, False, False])) + + def test_missing_value_label(self): + self.attrclass.missing_value_label = 'FOO' + values = np.array(['NA', 'C', 'N', 'FOO']) + assert_equal(self.attrclass.are_values_missing(values), + np.array([False, False, False, True])) + class AggregationMixin(TestAtomAttr): def test_get_residues(self, attr): @@ -216,6 +227,10 @@ def test_get_segment(self, attr): class TestMasses(AggregationMixin): attrclass = tpattrs.Masses + def test_missing_masses(self): + values = [1., 2., np.nan, 3.] + assert_equal(self.attrclass.are_values_missing(values), + np.array([False, False, True, False])) class TestCharges(AggregationMixin): values = np.array([+2, -1, 0, -1, +1, +2, 0, 0, 0, -1]) diff --git a/testsuite/MDAnalysisTests/core/test_universe.py b/testsuite/MDAnalysisTests/core/test_universe.py index 80259df5e79..3e41a38a967 100644 --- a/testsuite/MDAnalysisTests/core/test_universe.py +++ b/testsuite/MDAnalysisTests/core/test_universe.py @@ -47,7 +47,7 @@ GRO, TRR, two_water_gro, two_water_gro_nonames, TRZ, TRZ_psf, - PDB, MMTF, + PDB, MMTF, CONECT, ) import MDAnalysis as mda @@ -239,7 +239,7 @@ def test_from_bad_smiles(self): def test_no_Hs(self): smi = "CN1C=NC2=C1C(=O)N(C(=O)N2C)C" - u = mda.Universe.from_smiles(smi, addHs=False, + u = mda.Universe.from_smiles(smi, addHs=False, generate_coordinates=False, format='RDKIT') assert u.atoms.n_atoms == 14 assert len(u.bonds.indices) == 15 @@ -261,7 +261,7 @@ def test_generate_coordinates_numConfs(self): def test_rdkit_kwargs(self): # test for bad kwarg: # Unfortunately, exceptions from Boost cannot be passed to python, - # we cannot `from Boost.Python import ArgumentError` and use it with + # we cannot `from Boost.Python import ArgumentError` and use it with # pytest.raises(ArgumentError), so "this is the way" try: u = mda.Universe.from_smiles("CCO", rdkit_kwargs=dict(abc=42)) @@ -274,7 +274,7 @@ def test_rdkit_kwargs(self): u1 = mda.Universe.from_smiles("C", rdkit_kwargs=dict(randomSeed=42)) u2 = mda.Universe.from_smiles("C", rdkit_kwargs=dict(randomSeed=51)) with pytest.raises(AssertionError) as e: - assert_equal(u1.trajectory.coordinate_array, + assert_equal(u1.trajectory.coordinate_array, u2.trajectory.coordinate_array) assert "Mismatched elements: 15 / 15 (100%)" in str(e.value) @@ -385,6 +385,41 @@ def test_list(self): ref = translate([10,10,10])(uref.trajectory.ts) assert_almost_equal(u.trajectory.ts.positions, ref, decimal=6) + +class TestGuessTopologyAttrs(object): + def test_automatic_type_and_mass_guessing(self): + u = mda.Universe(PDB_small) + assert_equal(len(u.atoms.masses), 3341) + assert_equal(len(u.atoms.types), 3341) + + def test_no_type_and_mass_guessing(self): + u = mda.Universe(PDB_small, to_guess=()) + assert not hasattr(u.atoms, 'masses') + assert not hasattr(u.atoms, 'types') + + def test_invalid_context(self): + u = mda.Universe(PDB_small) + with pytest.raises(KeyError): + u.guess_TopologyAttrs(context='trash', to_guess=['masses']) + + def test_invalid_attributes(self): + u = mda.Universe(PDB_small) + with pytest.raises(ValueError): + u.guess_TopologyAttrs(to_guess=['trash']) + + def test_guess_masses_before_types(self): + u = mda.Universe(PDB_small, to_guess=('masses', 'types')) + assert_equal(len(u.atoms.masses), 3341) + assert_equal(len(u.atoms.types), 3341) + + def test_guessing_read_attributes(self): + u = mda.Universe(PSF) + old_types = u.atoms.types + u.guess_TopologyAttrs(force_guess=['types']) + with pytest.raises(AssertionError): + assert_equal(old_types, u.atoms.types) + + class TestGuessMasses(object): """Tests the Mass Guesser in topology.guessers """ @@ -433,7 +468,7 @@ def test_universe_guess_bonds_no_vdwradii(self): def test_universe_guess_bonds_with_vdwradii(self, vdw): """Unknown atom types, but with vdw radii here to save the day""" u = mda.Universe(two_water_gro_nonames, guess_bonds=True, - vdwradii=vdw) + vdwradii=vdw) self._check_universe(u) assert u.kwargs['guess_bonds'] assert_equal(vdw, u.kwargs['vdwradii']) @@ -450,7 +485,7 @@ def test_universe_guess_bonds_arguments(self): are being passed correctly. """ u = mda.Universe(two_water_gro, guess_bonds=True) - + self._check_universe(u) assert u.kwargs["guess_bonds"] assert u.kwargs["fudge_factor"] @@ -516,6 +551,17 @@ def test_guess_bonds_periodicity(self): self._check_atomgroup(ag, u) + def guess_bonds_with_to_guess(self): + u = mda.Universe(two_water_gro) + has_bonds = hasattr(u.atoms, 'bonds') + u.guess_TopologyAttrs(to_guess=['bonds']) + assert not has_bonds + assert u.atoms.bonds + + def test_guess_read_bonds(self): + u = mda.Universe(CONECT) + assert len(u.bonds) == 72 + class TestInMemoryUniverse(object): def test_reader_w_timeseries(self): @@ -747,10 +793,14 @@ def test_add_connection(self, universe, attr, values): ('impropers', [(1, 2, 3)]), ) ) - def add_connection_error(self, universe, attr, values): + def test_add_connection_error(self, universe, attr, values): with pytest.raises(ValueError): universe.add_TopologyAttr(attr, values) + def test_add_attr_length_error(self, universe): + with pytest.raises(ValueError): + universe.add_TopologyAttr('masses', np.array([1, 2, 3], dtype=np.float64)) + class TestDelTopologyAttr(object): @pytest.fixture() @@ -827,7 +877,7 @@ def potatoes(self): return "potoooooooo" transplants["Universe"].append(("potatoes", potatoes)) - + universe.add_TopologyAttr("tubers") assert universe.potatoes() == "potoooooooo" universe.del_TopologyAttr("tubers") @@ -1375,6 +1425,6 @@ def test_only_top(self): with pytest.warns(UserWarning, match="No coordinate reader found for"): - u = mda.Universe(t) + u = mda.Universe(t, to_guess=()) assert len(u.atoms) == 10 diff --git a/testsuite/MDAnalysisTests/guesser/test_base.py b/testsuite/MDAnalysisTests/guesser/test_base.py new file mode 100644 index 00000000000..b429826647f --- /dev/null +++ b/testsuite/MDAnalysisTests/guesser/test_base.py @@ -0,0 +1,102 @@ +# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*- +# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 fileencoding=utf-8 +# +# MDAnalysis --- https://www.mdanalysis.org +# Copyright (c) 2006-2017 The MDAnalysis Development Team and contributors +# (see the file AUTHORS for the full list of names) +# +# Released under the Lesser GNU Public Licence, v2 or any higher version +# +# Please cite your use of MDAnalysis in published work: +# +# R. J. Gowers, M. Linke, J. Barnoud, T. J. E. Reddy, M. N. Melo, S. L. Seyler, +# D. L. Dotson, J. Domanski, S. Buchoux, I. M. Kenney, and O. Beckstein. +# MDAnalysis: A Python package for the rapid analysis of molecular dynamics +# simulations. In S. Benthall and S. Rostrup editors, Proceedings of the 15th +# Python in Science Conference, pages 102-109, Austin, TX, 2016. SciPy. +# doi: 10.25080/majora-629e541a-00e +# +# N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and O. Beckstein. +# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations. +# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787 +# +import pytest +import numpy as np +import MDAnalysis as mda +from MDAnalysis.guesser.base import GuesserBase, get_guesser +from MDAnalysis.core.topology import Topology +from MDAnalysis.core.topologyattrs import Masses, Atomnames, Atomtypes +import MDAnalysis.tests.datafiles as datafiles +from numpy.testing import assert_allclose, assert_equal + + +class TesttBaseGuesser(): + + def test_get_guesser(self): + class TestGuesser1(GuesserBase): + context = 'test1' + + class TestGuesser2(GuesserBase): + context = 'test2' + + assert get_guesser(TestGuesser1).context == 'test1' + assert get_guesser('test1').context == 'test1' + assert get_guesser(TestGuesser2()).context == 'test2' + + def test_get_guesser_with_universe(self): + class TestGuesser1(GuesserBase): + context = 'test1' + + u = mda.Universe.empty(n_atoms=5) + guesser = get_guesser(TestGuesser1(), u, foo=1) + + assert len(guesser._universe.atoms) == 5 + assert 'foo' in guesser._kwargs + + def test_guess_invalid_attribute(self): + with pytest.raises(ValueError, + match='default guesser can not guess ' + 'the following attribute: foo'): + mda.Universe(datafiles.PDB, to_guess=['foo']) + + def test_guess_attribute_with_missing_parent_attr(self): + names = Atomnames(np.array(['C', 'HB', 'HA', 'O'], dtype=object)) + masses = Masses( + np.array([np.nan, np.nan, np.nan, np.nan], dtype=np.float64)) + top = Topology(4, 1, 1, attrs=[names, masses, ]) + u = mda.Universe(top, to_guess=['masses']) + assert_allclose(u.atoms.masses, np.array( + [12.01100, 1.00800, 1.00800, 15.99900]), atol=0) + + def test_force_guessing(self): + names = Atomnames(np.array(['C', 'H', 'H', 'O'], dtype=object)) + types = Atomtypes(np.array(['1', '2', '3', '4'], dtype=object)) + top = Topology(4, 1, 1, attrs=[names, types, ]) + u = mda.Universe(top, force_guess=['types']) + assert_equal(u.atoms.types, ['C', 'H', 'H', 'O']) + + def test_partial_guessing(self): + types = Atomtypes(np.array(['C', 'H', 'H', 'O'], dtype=object)) + masses = Masses(np.array([0, np.nan, np.nan, 0], dtype=np.float64)) + top = Topology(4, 1, 1, attrs=[types, masses, ]) + u = mda.Universe(top, to_guess=['masses']) + assert_allclose(u.atoms.masses, np.array( + [0, 1.00800, 1.00800, 0]), atol=0) + + def test_force_guess_priority(self): + "check that passing the attribute to force_guess have higher power" + types = Atomtypes(np.array(['C', 'H', 'H', 'O'], dtype=object)) + masses = Masses(np.array([0, np.nan, np.nan, 0], dtype=np.float64)) + top = Topology(4, 1, 1, attrs=[types, masses, ]) + u = mda.Universe(top, to_guess=['masses'], force_guess=['masses']) + assert_allclose(u.atoms.masses, np.array( + [12.01100, 1.00800, 1.00800, 15.99900]), atol=0) + + def test_partial_guess_attr_with_unknown_no_value_label(self): + "trying to partially guess attribute tha doesn't have declared" + "no_value_label should gives no effect" + names = Atomnames(np.array(['C', 'H', 'H', 'O'], dtype=object)) + types = Atomtypes(np.array(['', '', '', ''], dtype=object)) + top = Topology(4, 1, 1, attrs=[names, types, ]) + u = mda.Universe(top, to_guess=['types']) + assert_equal(u.atoms.types, ['', '', '', '']) diff --git a/testsuite/MDAnalysisTests/guesser/test_default_guesser.py b/testsuite/MDAnalysisTests/guesser/test_default_guesser.py new file mode 100644 index 00000000000..2aacb28d4f1 --- /dev/null +++ b/testsuite/MDAnalysisTests/guesser/test_default_guesser.py @@ -0,0 +1,302 @@ +# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*- +# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 fileencoding=utf-8 +# +# MDAnalysis --- https://www.mdanalysis.org +# Copyright (c) 2006-2017 The MDAnalysis Development Team and contributors +# (see the file AUTHORS for the full list of names) +# +# Released under the Lesser GNU Public Licence, v2 or any higher version +# +# Please cite your use of MDAnalysis in published work: +# +# R. J. Gowers, M. Linke, J. Barnoud, T. J. E. Reddy, M. N. Melo, S. L. Seyler, +# D. L. Dotson, J. Domanski, S. Buchoux, I. M. Kenney, and O. Beckstein. +# MDAnalysis: A Python package for the rapid analysis of molecular dynamics +# simulations. In S. Benthall and S. Rostrup editors, Proceedings of the 15th +# Python in Science Conference, pages 102-109, Austin, TX, 2016. SciPy. +# doi: 10.25080/majora-629e541a-00e +# +# N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and O. Beckstein. +# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations. +# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787 +# +import pytest +from pytest import approx +import MDAnalysis as mda + +from numpy.testing import assert_equal, assert_allclose +import numpy as np +from MDAnalysis.core.topologyattrs import Angles, Atomtypes, Atomnames, Masses +from MDAnalysis.guesser.default_guesser import DefaultGuesser +from MDAnalysis.core.topology import Topology +from MDAnalysisTests import make_Universe +from MDAnalysisTests.core.test_fragments import make_starshape +import MDAnalysis.tests.datafiles as datafiles +from MDAnalysisTests.util import import_not_available + +try: + from rdkit import Chem + from rdkit.Chem.rdPartialCharges import ComputeGasteigerCharges +except ImportError: + pass + +requires_rdkit = pytest.mark.skipif(import_not_available("rdkit"), + reason="requires RDKit") + + +@pytest.fixture +def default_guesser(): + return DefaultGuesser(None) + + +class TestGuessMasses(object): + def test_guess_masses_from_universe(self): + topology = Topology(3, attrs=[Atomtypes(['C', 'C', 'H'])]) + u = mda.Universe(topology) + + assert isinstance(u.atoms.masses, np.ndarray) + assert_allclose(u.atoms.masses, np.array( + [12.011, 12.011, 1.008]), atol=0) + + def test_guess_masses_from_guesser_object(self, default_guesser): + elements = ['H', 'Ca', 'Am'] + values = np.array([1.008, 40.08000, 243.0]) + assert_allclose(default_guesser.guess_masses( + elements), values, atol=0) + + def test_guess_masses_warn(self): + topology = Topology(2, attrs=[Atomtypes(['X', 'Z'])]) + msg = "Unknown masses are set to 0.0 for current version, " + "this will be depracated in version 3.0.0 and replaced by" + " Masse's no_value_label (np.nan)" + with pytest.warns(PendingDeprecationWarning, match=msg): + u = mda.Universe(topology, to_guess=['masses']) + assert_allclose(u.atoms.masses, np.array([0.0, 0.0]), atol=0) + + @pytest.mark.parametrize('element, value', (('H', 1.008), ('XYZ', 0.0),)) + def test_get_atom_mass(self, element, value, default_guesser): + default_guesser.get_atom_mass(element) == approx(value) + + def test_guess_atom_mass(self, default_guesser): + assert default_guesser.guess_atom_mass('1H') == approx(1.008) + + def test_guess_masses_with_no_reference_elements(self): + u = mda.Universe.empty(3) + with pytest.raises(ValueError, + match=('there is no reference attributes ')): + u.guess_TopologyAttrs('default', ['masses']) + + +class TestGuessTypes(object): + + def test_guess_types(self): + topology = Topology(2, attrs=[Atomnames(['MG2+', 'C12'])]) + u = mda.Universe(topology, to_guess=['types']) + assert isinstance(u.atoms.types, np.ndarray) + assert_equal(u.atoms.types, np.array(['MG', 'C'], dtype=object)) + + def test_guess_atom_element(self, default_guesser): + assert default_guesser.guess_atom_element('MG2+') == 'MG' + + def test_guess_atom_element_empty(self, default_guesser): + assert default_guesser.guess_atom_element('') == '' + + def test_guess_atom_element_singledigit(self, default_guesser): + assert default_guesser.guess_atom_element('1') == '1' + + def test_guess_atom_element_1H(self, default_guesser): + assert default_guesser.guess_atom_element('1H') == 'H' + assert default_guesser.guess_atom_element('2H') == 'H' + + def test_partial_guess_elements(self, default_guesser): + names = np.array(['BR123', 'Hk', 'C12'], dtype=object) + elements = np.array(['BR', 'C'], dtype=object) + guessed_elements = default_guesser.guess_types( + atom_types=names, indices_to_guess=[True, False, True]) + assert_equal(elements, guessed_elements) + + def test_guess_elements_from_no_data(self): + top = Topology(5) + msg = "there is no reference attributes in this universe" + "to guess types from" + with pytest.raises(ValueError, match=(msg)): + mda.Universe(top, to_guess=['types']) + + @pytest.mark.parametrize('name, element', ( + ('AO5*', 'O'), + ('F-', 'F'), + ('HB1', 'H'), + ('OC2', 'O'), + ('1he2', 'H'), + ('3hg2', 'H'), + ('OH-', 'O'), + ('HO', 'H'), + ('he', 'H'), + ('zn', 'ZN'), + ('Ca2+', 'CA'), + ('CA', 'C'), + )) + def test_guess_element_from_name(self, name, element, default_guesser): + assert default_guesser.guess_atom_element(name) == element + + +def test_guess_charge(default_guesser): + # this always returns 0.0 + assert default_guesser.guess_atom_charge('this') == approx(0.0) + + +def test_guess_bonds_Error(): + u = make_Universe(trajectory=True) + msg = "This Universe does not contain name information" + with pytest.raises(ValueError, match=msg): + u.guess_TopologyAttrs(to_guess=['bonds']) + + +def test_guess_bond_vdw_error(): + u = mda.Universe(datafiles.PDB) + with pytest.raises(ValueError, match="vdw radii for types: DUMMY"): + DefaultGuesser(u).guess_bonds(u.atoms) + + +def test_guess_bond_coord_error(default_guesser): + msg = "atoms' and 'coord' must be the same length" + with pytest.raises(ValueError, match=msg): + default_guesser.guess_bonds(['N', 'O', 'C'], [[1, 2, 3]]) + + +def test_guess_angles_with_no_bonds(): + "Test guessing angles for atoms with no bonds" + " information without adding bonds to universe " + u = mda.Universe(datafiles.two_water_gro) + u.guess_TopologyAttrs(to_guess=['angles']) + assert hasattr(u, 'angles') + assert not hasattr(u, 'bonds') + + +def test_guess_impropers(default_guesser): + u = make_starshape() + + ag = u.atoms[:5] + guessed_angles = default_guesser.guess_angles(ag.bonds) + u.add_TopologyAttr(Angles(guessed_angles)) + + vals = default_guesser.guess_improper_dihedrals(ag.angles) + assert_equal(len(vals), 12) + + +def test_guess_dihedrals_with_no_angles(): + "Test guessing dihedrals for atoms with no angles " + "information without adding bonds or angles to universe" + u = mda.Universe(datafiles.two_water_gro) + u.guess_TopologyAttrs(to_guess=['dihedrals']) + assert hasattr(u, 'dihedrals') + assert not hasattr(u, 'angles') + assert not hasattr(u, 'bonds') + + +def test_guess_impropers_with_angles(): + "Test guessing impropers for atoms with angles " + "and bonds information " + u = mda.Universe(datafiles.two_water_gro, + to_guess=['bonds', 'angles', 'impropers']) + u.guess_TopologyAttrs(to_guess=['impropers']) + assert hasattr(u, 'impropers') + assert hasattr(u, 'angles') + assert hasattr(u, 'bonds') + + +def test_guess_impropers_with_no_angles(): + "Test guessing impropers for atoms with no angles " + "information without adding bonds or angles to universe" + u = mda.Universe(datafiles.two_water_gro) + u.guess_TopologyAttrs(to_guess=['impropers']) + assert hasattr(u, 'impropers') + assert not hasattr(u, 'angles') + assert not hasattr(u, 'bonds') + + +def bond_sort(arr): + # sort from low to high, also within a tuple + # e.g. ([5, 4], [0, 1], [0, 3]) -> ([0, 1], [0, 3], [4, 5]) + out = [] + for (i, j) in arr: + if i > j: + i, j = j, i + out.append((i, j)) + return sorted(out) + + +def test_guess_bonds_water(): + u = mda.Universe(datafiles.two_water_gro) + bonds = bond_sort(DefaultGuesser( + None, box=u.dimensions).guess_bonds(u.atoms, u.atoms.positions)) + assert_equal(bonds, ((0, 1), + (0, 2), + (3, 4), + (3, 5))) + + +def test_guess_bonds_adk(): + u = mda.Universe(datafiles.PSF, datafiles.DCD) + u.guess_TopologyAttrs(force_guess=['types']) + guesser = DefaultGuesser(None) + bonds = bond_sort(guesser.guess_bonds(u.atoms, u.atoms.positions)) + assert_equal(np.sort(u.bonds.indices, axis=0), + np.sort(bonds, axis=0)) + + +def test_guess_bonds_peptide(): + u = mda.Universe(datafiles.PSF_NAMD, datafiles.PDB_NAMD) + u.guess_TopologyAttrs(force_guess=['types']) + guesser = DefaultGuesser(None) + bonds = bond_sort(guesser.guess_bonds(u.atoms, u.atoms.positions)) + assert_equal(np.sort(u.bonds.indices, axis=0), + np.sort(bonds, axis=0)) + + +@pytest.mark.parametrize("smi", [ + "c1ccccc1", + "C1=CC=CC=C1", + "CCO", + "c1ccccc1Cc1ccccc1", + "CN1C=NC2=C1C(=O)N(C(=O)N2C)C", +]) +@requires_rdkit +def test_guess_aromaticities(smi): + mol = Chem.MolFromSmiles(smi) + mol = Chem.AddHs(mol) + expected = np.array([atom.GetIsAromatic() for atom in mol.GetAtoms()]) + u = mda.Universe(mol) + guesser = DefaultGuesser(None) + values = guesser.guess_aromaticities(u.atoms) + u.guess_TopologyAttrs(to_guess=['aromaticities']) + assert_equal(values, expected) + assert_equal(u.atoms.aromaticities, expected) + + +@pytest.mark.parametrize("smi", [ + "c1ccccc1", + "C1=CC=CC=C1", + "CCO", + "c1ccccc1Cc1ccccc1", + "CN1C=NC2=C1C(=O)N(C(=O)N2C)C", +]) +@requires_rdkit +def test_guess_gasteiger_charges(smi): + mol = Chem.MolFromSmiles(smi) + mol = Chem.AddHs(mol) + ComputeGasteigerCharges(mol, throwOnParamFailure=True) + expected = np.array([atom.GetDoubleProp("_GasteigerCharge") + for atom in mol.GetAtoms()], dtype=np.float32) + u = mda.Universe(mol) + guesser = DefaultGuesser(None) + values = guesser.guess_gasteiger_charges(u.atoms) + assert_equal(values, expected) + + +@requires_rdkit +def test_aromaticity(): + u = mda.Universe(datafiles.PDB_small, + to_guess=['elements', 'aromaticities']) + c_aromatic = u.select_atoms('resname PHE and name CD1') + assert_equal(c_aromatic.aromaticities[0], True) diff --git a/testsuite/MDAnalysisTests/parallelism/test_multiprocessing.py b/testsuite/MDAnalysisTests/parallelism/test_multiprocessing.py index 6b29aec6a57..e92ff80bf14 100644 --- a/testsuite/MDAnalysisTests/parallelism/test_multiprocessing.py +++ b/testsuite/MDAnalysisTests/parallelism/test_multiprocessing.py @@ -90,7 +90,7 @@ def u(request): if len(request.param) == 1: f = request.param[0] - return mda.Universe(f) + return mda.Universe(f, to_guess=()) else: top, trj = request.param return mda.Universe(top, trj) diff --git a/testsuite/MDAnalysisTests/topology/base.py b/testsuite/MDAnalysisTests/topology/base.py index 7833f0a51ed..142e7954abb 100644 --- a/testsuite/MDAnalysisTests/topology/base.py +++ b/testsuite/MDAnalysisTests/topology/base.py @@ -25,8 +25,7 @@ import MDAnalysis as mda from MDAnalysis.core.topology import Topology -mandatory_attrs = ['ids', 'masses', 'types', - 'resids', 'resnums', 'segids'] +mandatory_attrs = ['ids', 'resids', 'resnums', 'segids'] class ParserBase(object): @@ -62,25 +61,16 @@ def test_mandatory_attributes(self, top): def test_expected_attributes(self, top): # Extra attributes as declared in specific implementations - for attr in self.expected_attrs+self.guessed_attrs: + for attr in self.expected_attrs: assert hasattr(top, attr), 'Missing expected attribute: {}'.format(attr) - + def test_no_unexpected_attributes(self, top): attrs = set(self.expected_attrs - + self.guessed_attrs + mandatory_attrs - + ['indices', 'resindices', 'segindices']) + + ['indices', 'resindices', 'segindices'] + self.guessed_attrs) for attr in top.attrs: assert attr.attrname in attrs, 'Unexpected attribute: {}'.format(attr.attrname) - def test_guessed_attributes(self, top): - # guessed attributes must be declared as guessed - for attr in top.attrs: - val = attr.is_guessed - if not val in (True, False): # only for simple yes/no cases - continue - assert val == (attr.attrname in self.guessed_attrs), 'Attr "{}" guessed= {}'.format(attr, val) - def test_size(self, top): """Check that the Topology is correctly sized""" assert top.n_atoms == self.expected_n_atoms, '{} atoms read, {} expected in {}'.format( @@ -100,3 +90,13 @@ def test_creates_universe(self, filename): """Check that Universe works with this Parser""" u = mda.Universe(filename) assert isinstance(u, mda.Universe) + + def test_guessed_attributes(self, filename): + """check that the universe created with certain parser have the same + guessed attributes as when it was guessed inside the parser""" + u = mda.Universe(filename) + u_guessed_attrs = [attr.attrname for attr + in u._topology.guessed_attributes] + for attr in self.guessed_attrs: + assert hasattr(u.atoms, attr) + assert attr in u_guessed_attrs diff --git a/testsuite/MDAnalysisTests/topology/test_crd.py b/testsuite/MDAnalysisTests/topology/test_crd.py index 846bff496f1..3062ba12f3b 100644 --- a/testsuite/MDAnalysisTests/topology/test_crd.py +++ b/testsuite/MDAnalysisTests/topology/test_crd.py @@ -27,6 +27,8 @@ CRD, ) +from numpy.testing import assert_allclose + class TestCRDParser(ParserBase): parser = mda.topology.CRDParser.CRDParser @@ -35,6 +37,17 @@ class TestCRDParser(ParserBase): 'resids', 'resnames', 'resnums', 'segids'] guessed_attrs = ['masses', 'types'] + expected_n_atoms = 3341 expected_n_residues = 214 expected_n_segments = 1 + + def test_guessed_masses(self, filename): + u = mda.Universe(filename) + expected = [14.007, 1.008, 1.008, 1.008, 12.011, 1.008, 12.011] + assert_allclose(u.atoms.masses[:7], expected) + + def test_guessed_types(self, filename): + u = mda.Universe(filename) + expected = ['N', 'H', 'H', 'H', 'C', 'H', 'C'] + assert (u.atoms.types[:7] == expected).all() diff --git a/testsuite/MDAnalysisTests/topology/test_dlpoly.py b/testsuite/MDAnalysisTests/topology/test_dlpoly.py index 01ea6632b11..a21f7134ca1 100644 --- a/testsuite/MDAnalysisTests/topology/test_dlpoly.py +++ b/testsuite/MDAnalysisTests/topology/test_dlpoly.py @@ -20,7 +20,7 @@ # MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations. # J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787 # -from numpy.testing import assert_equal +from numpy.testing import assert_equal, assert_allclose import pytest import MDAnalysis as mda @@ -43,14 +43,30 @@ def test_creates_universe(self, filename): u = mda.Universe(filename, topology_format=self.format) assert isinstance(u, mda.Universe) + def test_guessed_attributes(self, filename): + u = mda.Universe(filename, topology_format=self.format) + for attr in self.guessed_attrs: + assert hasattr(u.atoms, attr) + class DLPBase2(DLPUniverse): expected_attrs = ['ids', 'names'] - guessed_attrs = ['types', 'masses'] + guessed_attrs = ['masses', 'types'] + expected_n_atoms = 216 expected_n_residues = 1 expected_n_segments = 1 + def test_guesssed_masses(self, filename): + u = mda.Universe(filename, topology_format=self.format) + assert_allclose(u.atoms.masses[0], 39.102) + assert_allclose(u.atoms.masses[4], 35.45) + + def test_guessed_types(self, filename): + u = mda.Universe(filename, topology_format=self.format) + assert u.atoms.types[0] == 'K' + assert u.atoms.types[4] == 'CL' + def test_names(self, top): assert top.names.values[0] == 'K+' assert top.names.values[4] == 'Cl-' @@ -70,7 +86,6 @@ class TestDLPConfigParser(DLPBase2): class DLPBase(DLPUniverse): expected_attrs = ['ids', 'names'] - guessed_attrs = ['types', 'masses'] expected_n_atoms = 3 expected_n_residues = 1 expected_n_segments = 1 diff --git a/testsuite/MDAnalysisTests/topology/test_dms.py b/testsuite/MDAnalysisTests/topology/test_dms.py index 2740c392d26..d9f7944aaa0 100644 --- a/testsuite/MDAnalysisTests/topology/test_dms.py +++ b/testsuite/MDAnalysisTests/topology/test_dms.py @@ -21,7 +21,6 @@ # J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787 # import MDAnalysis as mda - from MDAnalysisTests.topology.base import ParserBase from MDAnalysisTests.datafiles import DMS_DOMAINS, DMS_NO_SEGID @@ -62,6 +61,11 @@ def test_atomsels(self, filename): s5 = u.select_atoms("resname ALA") assert len(s5) == 190 + def test_guessed_types(self, filename): + u = mda.Universe(filename) + expected = ['N', 'H', 'H', 'H', 'C', 'H', 'C'] + assert (u.atoms.types[:7] == expected).all() + class TestDMSParserNoSegid(TestDMSParser): ref_filename = DMS_NO_SEGID diff --git a/testsuite/MDAnalysisTests/topology/test_fhiaims.py b/testsuite/MDAnalysisTests/topology/test_fhiaims.py index 4596ccfa2bc..39097473871 100644 --- a/testsuite/MDAnalysisTests/topology/test_fhiaims.py +++ b/testsuite/MDAnalysisTests/topology/test_fhiaims.py @@ -20,13 +20,14 @@ # MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations. # J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787 # -from numpy.testing import assert_equal, assert_almost_equal +from numpy.testing import assert_equal, assert_allclose import MDAnalysis as mda from MDAnalysisTests.topology.base import ParserBase from MDAnalysisTests.datafiles import FHIAIMS + class TestFHIAIMS(ParserBase): parser = mda.topology.FHIAIMSParser.FHIAIMSParser expected_attrs = ['names', 'elements'] @@ -40,15 +41,17 @@ def test_names(self, top): assert_equal(top.names.values, ['O', 'H', 'H', 'O', 'H', 'H']) - def test_types(self, top): - assert_equal(top.types.values, + def test_guessed_types(self, filename): + u = mda.Universe(filename) + assert_equal(u.atoms.types, ['O', 'H', 'H', 'O', 'H', 'H']) + def test_guessed_masses(self, filename): + u = mda.Universe(filename) + assert_allclose(u.atoms.masses, + [15.999, 1.008, 1.008, 15.999, + 1.008, 1.008]) + def test_elements(self, top): assert_equal(top.elements.values, ['O', 'H', 'H', 'O', 'H', 'H']) - - def test_masses(self, top): - assert_almost_equal(top.masses.values, - [15.999, 1.008, 1.008, 15.999, - 1.008, 1.008]) diff --git a/testsuite/MDAnalysisTests/topology/test_gms.py b/testsuite/MDAnalysisTests/topology/test_gms.py index 87b1795b352..cb187adad37 100644 --- a/testsuite/MDAnalysisTests/topology/test_gms.py +++ b/testsuite/MDAnalysisTests/topology/test_gms.py @@ -20,7 +20,7 @@ # MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations. # J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787 # -from numpy.testing import assert_equal +from numpy.testing import assert_equal, assert_allclose import MDAnalysis as mda @@ -52,6 +52,16 @@ def test_types(self, top): assert_equal(top.atomiccharges.values, [8, 1, 1, 8, 1, 1]) + def test_guessed_masses(self, filename): + u = mda.Universe(filename) + expected = [15.999, 1.008, 1.008, 15.999, 1.008, 1.008] + assert_allclose(u.atoms.masses, expected) + + def test_guessed_types(self, filename): + u = mda.Universe(filename) + expected = ['O', 'H', 'H', 'O', 'H', 'H'] + assert (u.atoms.types == expected).all() + class TestGMSSYMOPT(GMSBase): expected_n_atoms = 4 diff --git a/testsuite/MDAnalysisTests/topology/test_gro.py b/testsuite/MDAnalysisTests/topology/test_gro.py index 6a457cecb8d..f95deea52b0 100644 --- a/testsuite/MDAnalysisTests/topology/test_gro.py +++ b/testsuite/MDAnalysisTests/topology/test_gro.py @@ -34,13 +34,13 @@ GRO_residwrap_0base, GRO_sameresid_diffresname, ) -from numpy.testing import assert_equal +from numpy.testing import assert_equal, assert_allclose class TestGROParser(ParserBase): parser = mda.topology.GROParser.GROParser ref_filename = GRO - expected_attrs = ['ids', 'names', 'resids', 'resnames', 'masses'] + expected_attrs = ['ids', 'names', 'resids', 'resnames'] guessed_attrs = ['masses', 'types'] expected_n_atoms = 47681 expected_n_residues = 11302 @@ -52,6 +52,16 @@ def test_attr_size(self, top): assert len(top.resids) == top.n_residues assert len(top.resnames) == top.n_residues + def test_guessed_masses(self, filename): + u = mda.Universe(filename) + expected = [14.007, 1.008, 1.008, 1.008, 12.011, 1.008, 12.011] + assert_allclose(u.atoms.masses[:7], expected) + + def test_guessed_types(self, filename): + u = mda.Universe(filename) + expected = ['N', 'H', 'H', 'H', 'C', 'H', 'C'] + assert_equal(u.atoms.types[:7], expected) + class TestGROWideBox(object): """Tests for Issue #548""" @@ -75,6 +85,7 @@ def test_parse_missing_atomname_IOerror(): with pytest.raises(IOError): p.parse() + class TestGroResidWrapping(object): # resid is 5 digit field, so is limited to 100k # check that parser recognises when resids have wrapped diff --git a/testsuite/MDAnalysisTests/topology/test_gsd.py b/testsuite/MDAnalysisTests/topology/test_gsd.py index 41c3cb8b81d..b80df4ede11 100644 --- a/testsuite/MDAnalysisTests/topology/test_gsd.py +++ b/testsuite/MDAnalysisTests/topology/test_gsd.py @@ -28,7 +28,6 @@ from MDAnalysisTests.topology.base import ParserBase from MDAnalysisTests.datafiles import GSD from MDAnalysisTests.datafiles import GSD_bonds -from numpy.testing import assert_equal import os @@ -36,19 +35,19 @@ class GSDBase(ParserBase): parser = mda.topology.GSDParser.GSDParser expected_attrs = ['ids', 'names', 'resids', 'resnames', 'masses', - 'charges', 'radii', + 'charges', 'radii', 'types', 'bonds', 'angles', 'dihedrals', 'impropers'] expected_n_bonds = 0 expected_n_angles = 0 expected_n_dihedrals = 0 expected_n_impropers = 0 - + def test_attr_size(self, top): assert len(top.ids) == top.n_atoms assert len(top.names) == top.n_atoms assert len(top.resids) == top.n_residues assert len(top.resnames) == top.n_residues - + def test_atoms(self, top): assert top.n_atoms == self.expected_n_atoms @@ -72,7 +71,7 @@ def test_dihedrals(self, top): assert isinstance(top.angles.values[0], tuple) else: assert top.dihedrals.values == [] - + def test_impropers(self, top): assert len(top.impropers.values) == self.expected_n_impropers if self.expected_n_impropers: diff --git a/testsuite/MDAnalysisTests/topology/test_guessers.py b/testsuite/MDAnalysisTests/topology/test_guessers.py deleted file mode 100644 index 1d946f22c8c..00000000000 --- a/testsuite/MDAnalysisTests/topology/test_guessers.py +++ /dev/null @@ -1,205 +0,0 @@ -# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*- -# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 fileencoding=utf-8 -# -# MDAnalysis --- https://www.mdanalysis.org -# Copyright (c) 2006-2017 The MDAnalysis Development Team and contributors -# (see the file AUTHORS for the full list of names) -# -# Released under the GNU Public Licence, v2 or any higher version -# -# Please cite your use of MDAnalysis in published work: -# -# R. J. Gowers, M. Linke, J. Barnoud, T. J. E. Reddy, M. N. Melo, S. L. Seyler, -# D. L. Dotson, J. Domanski, S. Buchoux, I. M. Kenney, and O. Beckstein. -# MDAnalysis: A Python package for the rapid analysis of molecular dynamics -# simulations. In S. Benthall and S. Rostrup editors, Proceedings of the 15th -# Python in Science Conference, pages 102-109, Austin, TX, 2016. SciPy. -# doi: 10.25080/majora-629e541a-00e -# -# N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and O. Beckstein. -# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations. -# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787 -# -import pytest -from numpy.testing import assert_equal -import numpy as np - -import MDAnalysis as mda -from MDAnalysis.topology import guessers -from MDAnalysis.core.topologyattrs import Angles - -from MDAnalysisTests import make_Universe -from MDAnalysisTests.core.test_fragments import make_starshape -import MDAnalysis.tests.datafiles as datafiles - -from MDAnalysisTests.util import import_not_available - - -try: - from rdkit import Chem - from rdkit.Chem.rdPartialCharges import ComputeGasteigerCharges -except ImportError: - pass - -requires_rdkit = pytest.mark.skipif(import_not_available("rdkit"), - reason="requires RDKit") - - -class TestGuessMasses(object): - def test_guess_masses(self): - out = guessers.guess_masses(['C', 'C', 'H']) - - assert isinstance(out, np.ndarray) - assert_equal(out, np.array([12.011, 12.011, 1.008])) - - def test_guess_masses_warn(self): - with pytest.warns(UserWarning): - guessers.guess_masses(['X']) - - def test_guess_masses_miss(self): - out = guessers.guess_masses(['X', 'Z']) - assert_equal(out, np.array([0.0, 0.0])) - - @pytest.mark.parametrize('element, value', (('H', 1.008), ('XYZ', 0.0), )) - def test_get_atom_mass(self, element, value): - assert guessers.get_atom_mass(element) == value - - def test_guess_atom_mass(self): - assert guessers.guess_atom_mass('1H') == 1.008 - - -class TestGuessTypes(object): - # guess_types - # guess_atom_type - # guess_atom_element - def test_guess_types(self): - out = guessers.guess_types(['MG2+', 'C12']) - - assert isinstance(out, np.ndarray) - assert_equal(out, np.array(['MG', 'C'], dtype=object)) - - def test_guess_atom_element(self): - assert guessers.guess_atom_element('MG2+') == 'MG' - - def test_guess_atom_element_empty(self): - assert guessers.guess_atom_element('') == '' - - def test_guess_atom_element_singledigit(self): - assert guessers.guess_atom_element('1') == '1' - - def test_guess_atom_element_1H(self): - assert guessers.guess_atom_element('1H') == 'H' - assert guessers.guess_atom_element('2H') == 'H' - - @pytest.mark.parametrize('name, element', ( - ('AO5*', 'O'), - ('F-', 'F'), - ('HB1', 'H'), - ('OC2', 'O'), - ('1he2', 'H'), - ('3hg2', 'H'), - ('OH-', 'O'), - ('HO', 'H'), - ('he', 'H'), - ('zn', 'ZN'), - ('Ca2+', 'CA'), - ('CA', 'C'), - ('N0A', 'N'), - ('C0U', 'C'), - ('C0S', 'C'), - ('Na+', 'NA'), - ('Cu2+', 'CU') - )) - def test_guess_element_from_name(self, name, element): - assert guessers.guess_atom_element(name) == element - - -def test_guess_charge(): - # this always returns 0.0 - assert guessers.guess_atom_charge('this') == 0.0 - - -def test_guess_bonds_Error(): - u = make_Universe(trajectory=True) - with pytest.raises(ValueError): - guessers.guess_bonds(u.atoms[:4], u.atoms.positions[:5]) - - -def test_guess_impropers(): - u = make_starshape() - - ag = u.atoms[:5] - - u.add_TopologyAttr(Angles(guessers.guess_angles(ag.bonds))) - - vals = guessers.guess_improper_dihedrals(ag.angles) - assert_equal(len(vals), 12) - - -def bond_sort(arr): - # sort from low to high, also within a tuple - # e.g. ([5, 4], [0, 1], [0, 3]) -> ([0, 1], [0, 3], [4, 5]) - out = [] - for (i, j) in arr: - if i > j: - i, j = j, i - out.append((i, j)) - return sorted(out) - -def test_guess_bonds_water(): - u = mda.Universe(datafiles.two_water_gro) - bonds = bond_sort(guessers.guess_bonds(u.atoms, u.atoms.positions, u.dimensions)) - assert_equal(bonds, ((0, 1), - (0, 2), - (3, 4), - (3, 5))) - -def test_guess_bonds_adk(): - u = mda.Universe(datafiles.PSF, datafiles.DCD) - u.atoms.types = guessers.guess_types(u.atoms.names) - bonds = bond_sort(guessers.guess_bonds(u.atoms, u.atoms.positions)) - assert_equal(np.sort(u.bonds.indices, axis=0), - np.sort(bonds, axis=0)) - -def test_guess_bonds_peptide(): - u = mda.Universe(datafiles.PSF_NAMD, datafiles.PDB_NAMD) - u.atoms.types = guessers.guess_types(u.atoms.names) - bonds = bond_sort(guessers.guess_bonds(u.atoms, u.atoms.positions)) - assert_equal(np.sort(u.bonds.indices, axis=0), - np.sort(bonds, axis=0)) - - -@pytest.mark.parametrize("smi", [ - "c1ccccc1", - "C1=CC=CC=C1", - "CCO", - "c1ccccc1Cc1ccccc1", - "CN1C=NC2=C1C(=O)N(C(=O)N2C)C", -]) -@requires_rdkit -def test_guess_aromaticities(smi): - mol = Chem.MolFromSmiles(smi) - mol = Chem.AddHs(mol) - expected = np.array([atom.GetIsAromatic() for atom in mol.GetAtoms()]) - u = mda.Universe(mol) - values = guessers.guess_aromaticities(u.atoms) - assert_equal(values, expected) - - -@pytest.mark.parametrize("smi", [ - "c1ccccc1", - "C1=CC=CC=C1", - "CCO", - "c1ccccc1Cc1ccccc1", - "CN1C=NC2=C1C(=O)N(C(=O)N2C)C", -]) -@requires_rdkit -def test_guess_gasteiger_charges(smi): - mol = Chem.MolFromSmiles(smi) - mol = Chem.AddHs(mol) - ComputeGasteigerCharges(mol, throwOnParamFailure=True) - expected = np.array([atom.GetDoubleProp("_GasteigerCharge") - for atom in mol.GetAtoms()], dtype=np.float32) - u = mda.Universe(mol) - values = guessers.guess_gasteiger_charges(u.atoms) - assert_equal(values, expected) diff --git a/testsuite/MDAnalysisTests/topology/test_hoomdxml.py b/testsuite/MDAnalysisTests/topology/test_hoomdxml.py index 018470647f4..d85a25c8465 100644 --- a/testsuite/MDAnalysisTests/topology/test_hoomdxml.py +++ b/testsuite/MDAnalysisTests/topology/test_hoomdxml.py @@ -21,7 +21,6 @@ # J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787 # from numpy.testing import assert_almost_equal - import MDAnalysis as mda from MDAnalysisTests.topology.base import ParserBase @@ -34,6 +33,7 @@ class TestHoomdXMLParser(ParserBase): expected_attrs = [ 'types', 'masses', 'charges', 'radii', 'bonds', 'angles', 'dihedrals', 'impropers' ] + expected_n_atoms = 769 expected_n_residues = 1 expected_n_segments = 1 diff --git a/testsuite/MDAnalysisTests/topology/test_itp.py b/testsuite/MDAnalysisTests/topology/test_itp.py index 035c05bb98a..e5cea0e215d 100644 --- a/testsuite/MDAnalysisTests/topology/test_itp.py +++ b/testsuite/MDAnalysisTests/topology/test_itp.py @@ -49,6 +49,9 @@ class BaseITP(ParserBase): 'resids', 'resnames', 'segids', 'moltypes', 'molnums', 'bonds', 'angles', 'dihedrals', 'impropers'] + + guessed_attrs = ['elements', ] + expected_n_atoms = 63 expected_n_residues = 10 expected_n_segments = 1 @@ -64,13 +67,13 @@ def universe(self, filename): def test_bonds_total_counts(self, top): assert len(top.bonds.values) == self.expected_n_bonds - + def test_angles_total_counts(self, top): assert len(top.angles.values) == self.expected_n_angles def test_dihedrals_total_counts(self, top): assert len(top.dihedrals.values) == self.expected_n_dihedrals - + def test_impropers_total_counts(self, top): assert len(top.impropers.values) == self.expected_n_impropers @@ -86,7 +89,7 @@ class TestITP(BaseITP): expected_n_angles = 91 expected_n_dihedrals = 30 expected_n_impropers = 29 - + def test_bonds_atom_counts(self, universe): assert len(universe.atoms[[0]].bonds) == 3 assert len(universe.atoms[[42]].bonds) == 1 @@ -95,7 +98,7 @@ def test_bonds_values(self, top): vals = top.bonds.values for b in ((0, 1), (0, 2), (0, 3), (3, 4)): assert b in vals - + def test_bonds_type(self, universe): assert universe.bonds[0].type == 2 @@ -107,7 +110,7 @@ def test_angles_values(self, top): vals = top.angles.values for b in ((1, 0, 2), (1, 0, 3), (2, 0, 3)): assert (b in vals) or (b[::-1] in vals) - + def test_angles_type(self, universe): assert universe.angles[0].type == 2 @@ -123,7 +126,7 @@ def test_dihedrals_values(self, top): vals = top.dihedrals.values for b in ((1, 0, 3, 5), (0, 3, 5, 7)): assert (b in vals) or (b[::-1] in vals) - + def test_dihedrals_type(self, universe): assert universe.dihedrals[0].type == (1, 1) @@ -134,7 +137,7 @@ def test_impropers_values(self, top): vals = top.impropers.values for b in ((3, 0, 5, 4), (5, 3, 7, 6)): assert (b in vals) or (b[::-1] in vals) - + def test_impropers_type(self, universe): assert universe.impropers[0].type == 2 @@ -142,12 +145,13 @@ def test_impropers_type(self, universe): class TestITPNoMass(ParserBase): parser = mda.topology.ITPParser.ITPParser ref_filename = ITP_nomass - expected_attrs = ['ids', 'names', 'types', 'masses', + expected_attrs = ['ids', 'names', 'types', 'charges', 'chargegroups', 'resids', 'resnames', 'segids', 'moltypes', 'molnums', - 'bonds', 'angles', 'dihedrals', 'impropers'] - guessed_attrs = ['masses'] + 'bonds', 'angles', 'dihedrals', 'impropers', 'masses', ] + guessed_attrs = ['elements', ] + expected_n_atoms = 60 expected_n_residues = 1 expected_n_segments = 1 @@ -157,18 +161,18 @@ def universe(self, filename): return mda.Universe(filename) def test_mass_guess(self, universe): - assert universe.atoms[0].mass not in ('', None) + assert not np.isnan(universe.atoms[0].mass) class TestITPAtomtypes(ParserBase): parser = mda.topology.ITPParser.ITPParser ref_filename = ITP_atomtypes - expected_attrs = ['ids', 'names', 'types', 'masses', + expected_attrs = ['ids', 'names', 'types', 'charges', 'chargegroups', - 'resids', 'resnames', + 'resids', 'resnames', 'masses', 'segids', 'moltypes', 'molnums', 'bonds', 'angles', 'dihedrals', 'impropers'] - guessed_attrs = ['masses'] + expected_n_atoms = 4 expected_n_residues = 1 expected_n_segments = 1 @@ -202,7 +206,8 @@ class TestITPCharges(ParserBase): 'resids', 'resnames', 'segids', 'moltypes', 'molnums', 'bonds', 'angles', 'dihedrals', 'impropers'] - guessed_attrs = [] + guessed_attrs = ['elements', ] + expected_n_atoms = 9 expected_n_residues = 3 expected_n_segments = 1 @@ -220,6 +225,7 @@ def test_charge_parse(self, universe): def test_masses_are_read(self, universe): assert_allclose(universe.atoms.masses, [100] * 9) + class TestDifferentDirectivesITP(BaseITP): ref_filename = ITP_edited @@ -245,6 +251,13 @@ def test_dihedrals_identity(self, universe): class TestITPNoKeywords(BaseITP): + expected_attrs = ['ids', 'names', 'types', + 'charges', 'chargegroups', + 'resids', 'resnames', + 'segids', 'moltypes', 'molnums', + 'bonds', 'angles', 'dihedrals', 'impropers', 'masses', ] + guessed_attrs = ['elements', 'masses', ] + """ Test reading ITP files *without* defined keywords. @@ -253,7 +266,7 @@ class TestITPNoKeywords(BaseITP): #ifndef HW1_CHARGE #define HW1_CHARGE 0.241 #endif - + [ atoms ] 1 opls_118 1 SOL OW 1 0 2 opls_119 1 SOL HW1 1 HW1_CHARGE @@ -263,8 +276,6 @@ class TestITPNoKeywords(BaseITP): expected_n_residues = 1 expected_n_segments = 1 - guessed_attrs = ['masses'] - expected_n_bonds = 2 # FLEXIBLE not set -> SETTLE constraint -> water has no angle expected_n_angles = 0 @@ -284,7 +295,12 @@ def test_defines(self, top): assert_allclose(top.charges.values[1], 0.241) assert_allclose(top.charges.values[2], 0.241) - + def test_guessed_masses(self, filename): + u = mda.Universe(filename) + assert_allclose(u.atoms.masses, + [15.999, 15.999, 15.999, 15.999, 15.999]) + + class TestITPKeywords(TestITPNoKeywords): """ Test reading ITP files *with* defined keywords. @@ -296,13 +312,13 @@ class TestITPKeywords(TestITPNoKeywords): @pytest.fixture def universe(self, filename): - return mda.Universe(filename, FLEXIBLE=True, EXTRA_ATOMS=True, + return mda.Universe(filename, FLEXIBLE=True, EXTRA_ATOMS=True, HW1_CHARGE=1, HW2_CHARGE=3) @pytest.fixture() def top(self, filename): with self.parser(filename) as p: - yield p.parse(FLEXIBLE=True, EXTRA_ATOMS=True, + yield p.parse(FLEXIBLE=True, EXTRA_ATOMS=True, HW1_CHARGE=1, HW2_CHARGE=3) def test_whether_settles_types(self, universe): @@ -341,7 +357,7 @@ def universe(self, filename): def top(self, filename): with self.parser(filename) as p: yield p.parse(HEAVY_H=True, EXTRA_ATOMS=True, HEAVY_SIX=True) - + def test_heavy_atom(self, universe): assert universe.atoms[5].mass > 40 @@ -380,6 +396,13 @@ def test_creates_universe(self, filename): """Check that Universe works with this Parser""" u = mda.Universe(filename, topology_format='ITP', include_dir=GMX_DIR) + def test_guessed_attributes(self, filename): + """check that the universe created with certain parser have the same + guessed attributes as when it was guessed inside the parser""" + u = mda.Universe(filename, topology_format='ITP', include_dir=GMX_DIR) + for attr in self.guessed_attrs: + assert hasattr(u.atoms, attr) + def test_sequential(self, universe): resids = np.array(list(range(2, 12)) + list(range(13, 23))) assert_equal(universe.residues.resids[:20], resids) @@ -453,3 +476,17 @@ def test_relative_path(self, tmpdir): with subsubdir.as_cwd(): u = mda.Universe("../test.itp") assert len(u.atoms) == 1 + + +def test_missing_elements_no_attribute(): + """Check that: + + 1) a warning is raised if elements are missing + 2) the elements attribute is not set + """ + wmsg = ("Element information is missing, elements attribute " + "will not be populated. If needed these can be ") + with pytest.warns(UserWarning, match=wmsg): + u = mda.Universe(ITP_atomtypes) + with pytest.raises(AttributeError): + _ = u.atoms.elements diff --git a/testsuite/MDAnalysisTests/topology/test_lammpsdata.py b/testsuite/MDAnalysisTests/topology/test_lammpsdata.py index 44a8af7236d..2f65eda3cbe 100644 --- a/testsuite/MDAnalysisTests/topology/test_lammpsdata.py +++ b/testsuite/MDAnalysisTests/topology/test_lammpsdata.py @@ -21,7 +21,7 @@ # J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787 # import pytest -from numpy.testing import assert_equal +from numpy.testing import assert_equal, assert_allclose import numpy as np from io import StringIO @@ -92,6 +92,11 @@ def test_improper_member(self, top): def test_creates_universe(self, filename): u = mda.Universe(filename, format='DATA') + def test_guessed_attributes(self, filename): + u = mda.Universe(filename, format='DATA') + for attr in self.guessed_attrs: + assert hasattr(u.atoms, attr) + class TestLammpsData(LammpsBase): """Tests the reading of lammps .data topology files. @@ -263,8 +268,7 @@ def test_interpret_atom_style_missing(): class TestDumpParser(ParserBase): - expected_attrs = ['types'] - guessed_attrs = ['masses'] + expected_attrs = ['types', 'masses'] expected_n_atoms = 24 expected_n_residues = 1 expected_n_segments = 1 @@ -284,13 +288,28 @@ def test_masses_warning(self): with self.parser(self.ref_filename) as p: with pytest.warns(UserWarning, match='Guessed all Masses to 1.0'): p.parse() - + + def test_guessed_attributes(self, filename): + u = mda.Universe(filename, format='LAMMPSDUMP') + for attr in self.guessed_attrs: + assert hasattr(u.atoms, attr) + def test_id_ordering(self): # ids are nonsequential in file, but should get rearranged u = mda.Universe(self.ref_filename, format='LAMMPSDUMP') # the 4th in file has id==13, but should have been sorted assert u.atoms[3].id == 4 + def test_guessed_masses(self, filename): + u = mda.Universe(filename, format='LAMMPSDUMP') + expected = [1., 1., 1., 1., 1., 1., 1.] + assert_allclose(u.atoms.masses[:7], expected) + + def test_guessed_types(self, filename): + u = mda.Universe(filename, format='LAMMPSDUMP') + expected = ['2', '1', '1', '2', '1', '1', '2'] + assert (u.atoms.types[:7] == expected).all() + # this tests that topology can still be constructed if non-standard or uneven # column present. class TestDumpParserLong(TestDumpParser): diff --git a/testsuite/MDAnalysisTests/topology/test_minimal.py b/testsuite/MDAnalysisTests/topology/test_minimal.py index 6c275f0217c..60f009b44b0 100644 --- a/testsuite/MDAnalysisTests/topology/test_minimal.py +++ b/testsuite/MDAnalysisTests/topology/test_minimal.py @@ -60,7 +60,7 @@ def test_minimal_parser(filename, expected_n_atoms): @working_readers def test_universe_with_minimal(filename, expected_n_atoms): - u = mda.Universe(filename) + u = mda.Universe(filename, to_guess=()) assert len(u.atoms) == expected_n_atoms @@ -81,7 +81,7 @@ def test_minimal_parser_fail(filename,n_atoms): @nonworking_readers def test_minimal_n_atoms_kwarg(filename, n_atoms): # test that these can load when we supply the number of atoms - u = mda.Universe(filename, n_atoms=n_atoms) + u = mda.Universe(filename, n_atoms=n_atoms, to_guess=()) assert len(u.atoms) == n_atoms @@ -107,6 +107,6 @@ def test_memory_minimal_parser(array, order): @memory_reader def test_memory_universe(array, order): - u = mda.Universe(array, order=order) + u = mda.Universe(array, order=order, to_guess=()) assert len(u.atoms) == 10 diff --git a/testsuite/MDAnalysisTests/topology/test_mmtf.py b/testsuite/MDAnalysisTests/topology/test_mmtf.py index b05bd1767a4..9e2a85f7784 100644 --- a/testsuite/MDAnalysisTests/topology/test_mmtf.py +++ b/testsuite/MDAnalysisTests/topology/test_mmtf.py @@ -1,5 +1,5 @@ import pytest -from numpy.testing import assert_equal +from numpy.testing import assert_equal, assert_allclose import mmtf from unittest import mock @@ -9,6 +9,7 @@ from MDAnalysisTests.topology.base import ParserBase from MDAnalysisTests.datafiles import MMTF, MMTF_gz, MMTF_skinny, MMTF_skinny2 + class MMTFBase(ParserBase): expected_attrs = [ 'ids', 'names', 'types', 'altLocs', 'tempfactors', 'occupancies', @@ -44,7 +45,7 @@ class TestMMTFSkinny(MMTFBase): expected_n_residues = 134 expected_n_segments = 2 - + class TestMMTFSkinny2(MMTFBase): parser = mda.topology.MMTFParser.MMTFParser ref_filename = MMTF_skinny2 @@ -96,6 +97,10 @@ def test_icodes(self, u): def test_altlocs(self, u): assert all(u.atoms.altLocs[:3] == '') + def test_guessed_masses(self, u): + expected = [15.999, 12.011, 12.011, 15.999, 12.011, 15.999, 12.011] + assert_allclose(u.atoms.masses[:7], expected) + class TestMMTFUniverseFromDecoder(TestMMTFUniverse): @pytest.fixture() @@ -119,6 +124,10 @@ def test_universe_models(self, u): assert isinstance(m, AtomGroup) assert len(m) == 570 + def test_guessed_masses(self, u): + expected = [15.999, 12.011, 12.011, 15.999, 12.011, 15.999, 12.011] + assert_allclose(u.atoms.masses[:7], expected) + class TestMMTFgzUniverseFromDecoder(TestMMTFgzUniverse): @pytest.fixture() @@ -128,7 +137,7 @@ def u(self): class TestSelectModels(object): - # tests for 'model' keyword in select_atoms + # tests for 'model' keyword in select_atoms @pytest.fixture() def u(self): return mda.Universe(MMTF_gz) diff --git a/testsuite/MDAnalysisTests/topology/test_mol2.py b/testsuite/MDAnalysisTests/topology/test_mol2.py index 7378cdb01f9..b6084b861ef 100644 --- a/testsuite/MDAnalysisTests/topology/test_mol2.py +++ b/testsuite/MDAnalysisTests/topology/test_mol2.py @@ -23,11 +23,12 @@ from io import StringIO import numpy as np -from numpy.testing import assert_equal +from numpy.testing import assert_equal, assert_allclose import pytest import MDAnalysis as mda from MDAnalysisTests.topology.base import ParserBase + from MDAnalysisTests.datafiles import ( mol2_molecule, mol2_molecules, @@ -178,6 +179,7 @@ class TestMOL2Base(ParserBase): 'ids', 'names', 'types', 'charges', 'resids', 'resnames', 'bonds', 'elements', ] + guessed_attrs = ['masses'] expected_n_atoms = 49 expected_n_residues = 1 @@ -235,11 +237,16 @@ def test_wrong_elements_warnings(): with pytest.warns(UserWarning, match='Unknown elements found') as record: u = mda.Universe(StringIO(mol2_wrong_element), format='MOL2') - # One warning from invalid elements, one from invalid masses - assert len(record) == 2 + # One warning from invalid elements, one from masses PendingDeprecationWarning + assert len(record) == 3 - expected = np.array(['N', '', ''], dtype=object) - assert_equal(u.atoms.elements, expected) + expected_elements = np.array(['N', '', ''], dtype=object) + guseed_masses = np.array([14.007, 0.0, 0.0], dtype=float) + gussed_types = np.array(['N.am', 'X.o2', 'XX.am']) + + assert_equal(u.atoms.elements, expected_elements) + assert_equal(u.atoms.types, gussed_types) + assert_allclose(u.atoms.masses, guseed_masses) def test_all_wrong_elements_warnings(): @@ -301,3 +308,9 @@ def test_unformat(): with pytest.raises(ValueError, match='Some atoms in the mol2 file'): u = mda.Universe(StringIO(mol2_resname_unformat), format='MOL2') + + +def test_guessed_masses(): + u = mda.Universe(mol2_molecules) + assert_allclose(u.atoms.masses[:7], [14.007, 32.06, + 14.007, 14.007, 15.999, 15.999, 12.011]) diff --git a/testsuite/MDAnalysisTests/topology/test_pdb.py b/testsuite/MDAnalysisTests/topology/test_pdb.py index 01fbcd8a5a4..c176d50be13 100644 --- a/testsuite/MDAnalysisTests/topology/test_pdb.py +++ b/testsuite/MDAnalysisTests/topology/test_pdb.py @@ -24,7 +24,7 @@ import pytest import numpy as np -from numpy.testing import assert_equal +from numpy.testing import assert_equal, assert_allclose import MDAnalysis as mda from MDAnalysisTests.topology.base import ParserBase @@ -43,6 +43,7 @@ ) from MDAnalysis.topology.PDBParser import PDBParser from MDAnalysis import NoDataError +from MDAnalysis.guesser import tables _PDBPARSER = mda.topology.PDBParser.PDBParser @@ -252,8 +253,6 @@ def test_PDB_hex(): @pytest.mark.filterwarnings("error:Failed to guess the mass") def test_PDB_metals(): - from MDAnalysis.topology import tables - u = mda.Universe(StringIO(PDB_metals), format='PDB') assert len(u.atoms) == 4 @@ -310,11 +309,32 @@ def test_wrong_elements_warnings(): column which have been parsed and returns an appropriate warning. """ with pytest.warns(UserWarning, match='Unknown element XX found'): - u = mda.Universe(StringIO(PDB_wrong_ele), format='PDB') + u = mda.Universe(StringIO(PDB_wrong_ele,), format='PDB') + + expected_elements = np.array(['N', '', 'C', 'O', '', 'Cu', 'Fe', 'Mg'], + dtype=object) + gussed_types = np.array(['N', '', 'C', 'O', 'XX', 'CU', 'Fe', 'MG']) + guseed_masses = np.array([14.007, 0.0, 12.011, 15.999, 0.0, + 63.546, 55.847, 24.305], dtype=float) + + assert_equal(u.atoms.elements, expected_elements) + assert_equal(u.atoms.types, gussed_types) + assert_allclose(u.atoms.masses, guseed_masses) - expected = np.array(['N', '', 'C', 'O', '', 'Cu', 'Fe', 'Mg'], - dtype=object) - assert_equal(u.atoms.elements, expected) + +def test_guessed_masses_and_types_values(): + """Test that guessed masses and types have the expected values for universe + constructed from PDB file. + """ + u = mda.Universe(PDB, format='PDB') + gussed_types = np.array(['N', 'H', 'H', 'H', 'C', 'H', 'C', 'H', 'H', 'C']) + guseed_masses = [14.007, 1.008, 1.008, 1.008, + 12.011, 1.008, 12.011, 1.008, 1.008, 12.011] + failed_type_guesses = u.atoms.types == "" + + assert_allclose(u.atoms.masses[:10], guseed_masses) + assert_equal(u.atoms.types[:10], gussed_types) + assert not failed_type_guesses.any() def test_nobonds_error(): diff --git a/testsuite/MDAnalysisTests/topology/test_pdbqt.py b/testsuite/MDAnalysisTests/topology/test_pdbqt.py index c81f60cdc80..b2511a889e2 100644 --- a/testsuite/MDAnalysisTests/topology/test_pdbqt.py +++ b/testsuite/MDAnalysisTests/topology/test_pdbqt.py @@ -23,11 +23,14 @@ import MDAnalysis as mda from MDAnalysisTests.topology.base import ParserBase + from MDAnalysisTests.datafiles import ( PDBQT_input, # pdbqt_inputpdbqt.pdbqt PDBQT_tyrosol, # tyrosol.pdbqt.bz2 ) +from numpy.testing import assert_allclose + class TestPDBQT(ParserBase): parser = mda.topology.PDBQTParser.PDBQTParser @@ -47,11 +50,17 @@ class TestPDBQT(ParserBase): "occupancies", "tempfactors", ] + guessed_attrs = ['masses'] expected_n_atoms = 1805 expected_n_residues = 199 # resids go 2-102 then 2-99 expected_n_segments = 2 # res2-102 are A, 2-99 are B + def test_guessed_masses(self, filename): + u = mda.Universe(filename) + assert_allclose(u.atoms.masses[:7], [14.007, 0., + 0., 12.011, 12.011, 0., 12.011]) + def test_footnote(): """just test that the Universe is built even in the presence of a diff --git a/testsuite/MDAnalysisTests/topology/test_pqr.py b/testsuite/MDAnalysisTests/topology/test_pqr.py index 569b964e3ba..aa03d789ac4 100644 --- a/testsuite/MDAnalysisTests/topology/test_pqr.py +++ b/testsuite/MDAnalysisTests/topology/test_pqr.py @@ -21,8 +21,7 @@ # J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787 # from io import StringIO -from numpy.testing import assert_equal, assert_almost_equal -import pytest +from numpy.testing import assert_equal, assert_almost_equal, assert_allclose import MDAnalysis as mda from MDAnalysisTests.topology.base import ParserBase @@ -52,12 +51,26 @@ def test_attr_size(self, top): assert len(top.resnames) == top.n_residues assert len(top.segids) == top.n_segments + expected_masses = [14.007, 1.008, 1.008, 1.008, 12.011, 1.008, 12.011] + expected_types = ['N', 'H', 'H', 'H', 'C', 'H', 'C'] + + def test_guessed_masses(self, filename): + u = mda.Universe(filename) + assert_allclose(u.atoms.masses[:7], self.expected_masses) + + def test_guessed_types(self, filename): + u = mda.Universe(filename) + assert (u.atoms.types[:7] == self.expected_types).all() + class TestPQRParser2(TestPQRParser): ref_filename = PQR_icodes expected_n_atoms = 5313 expected_n_residues = 474 + expected_masses = [14.007, 12.011, 12.011, 15.999, 12.011, 12.011, 12.011] + expected_types = ['N', 'C', 'C', 'O', 'C', 'C', 'C'] + def test_record_types(): u = mda.Universe(PQR_icodes) @@ -81,6 +94,7 @@ def test_record_types(): ENDMDL ''' + def test_gromacs_flavour(): u = mda.Universe(StringIO(GROMACS_PQR), format='PQR') @@ -88,7 +102,6 @@ def test_gromacs_flavour(): # topology things assert u.atoms[0].type == 'O' assert u.atoms[0].segid == 'SYSTEM' - assert not u._topology.types.is_guessed assert_almost_equal(u.atoms[0].radius, 1.48, decimal=5) assert_almost_equal(u.atoms[0].charge, -0.67, decimal=5) # coordinatey things diff --git a/testsuite/MDAnalysisTests/topology/test_psf.py b/testsuite/MDAnalysisTests/topology/test_psf.py index 72a4622c1b7..895f4185146 100644 --- a/testsuite/MDAnalysisTests/topology/test_psf.py +++ b/testsuite/MDAnalysisTests/topology/test_psf.py @@ -148,7 +148,7 @@ def test_angles_total_counts(self, top): def test_dihedrals_total_counts(self, top): assert len(top.dihedrals.values) == 0 - + def test_impropers_total_counts(self, top): assert len(top.impropers.values) == 0 diff --git a/testsuite/MDAnalysisTests/topology/test_tprparser.py b/testsuite/MDAnalysisTests/topology/test_tprparser.py index 023a52dee4e..bd1444a5661 100644 --- a/testsuite/MDAnalysisTests/topology/test_tprparser.py +++ b/testsuite/MDAnalysisTests/topology/test_tprparser.py @@ -73,6 +73,8 @@ class TPRAttrs(ParserBase): parser = MDAnalysis.topology.TPRParser.TPRParser expected_attrs = [ "ids", + "types", + "masses", "names", "elements", "resids", diff --git a/testsuite/MDAnalysisTests/topology/test_txyz.py b/testsuite/MDAnalysisTests/topology/test_txyz.py index 17ca447e9bc..72ab11d6525 100644 --- a/testsuite/MDAnalysisTests/topology/test_txyz.py +++ b/testsuite/MDAnalysisTests/topology/test_txyz.py @@ -26,12 +26,14 @@ from MDAnalysisTests.topology.base import ParserBase from MDAnalysisTests.datafiles import TXYZ, ARC, ARC_PBC -from numpy.testing import assert_equal +from numpy.testing import assert_equal, assert_allclose + class TestTXYZParser(ParserBase): parser = mda.topology.TXYZParser.TXYZParser guessed_attrs = ['masses'] expected_attrs = ['ids', 'names', 'bonds', 'types', 'elements'] + expected_n_residues = 1 expected_n_atoms = 9 expected_n_segments = 1 @@ -60,18 +62,24 @@ def test_TXYZ_elements(): u = mda.Universe(TXYZ, format='TXYZ') element_list = np.array(['C', 'H', 'H', 'O', 'H', 'C', 'H', 'H', 'H'], dtype=object) assert_equal(u.atoms.elements, element_list) - - + + def test_missing_elements_noattribute(): """Check that: 1) a warning is raised if elements are missing 2) the elements attribute is not set """ - wmsg = ("Element information is missing, elements attribute will not be " - "populated") + wmsg = ("Element information is missing, elements attribute " + "will not be populated. If needed these can be ") with pytest.warns(UserWarning, match=wmsg): u = mda.Universe(ARC_PBC) with pytest.raises(AttributeError): _ = u.atoms.elements + +def test_guessed_masses(): + u = mda.Universe(TXYZ) + expected = [12.011, 1.008, 1.008, 15.999, 1.008, 12.011, + 1.008, 1.008, 1.008] + assert_allclose(u.atoms.masses, expected) diff --git a/testsuite/MDAnalysisTests/topology/test_xpdb.py b/testsuite/MDAnalysisTests/topology/test_xpdb.py index 9bce73cd444..617d5caf7bf 100644 --- a/testsuite/MDAnalysisTests/topology/test_xpdb.py +++ b/testsuite/MDAnalysisTests/topology/test_xpdb.py @@ -27,6 +27,8 @@ XPDB_small, ) +from numpy.testing import assert_equal, assert_allclose + class TestXPDBParser(ParserBase): parser = mda.topology.ExtendedPDBParser.ExtendedPDBParser @@ -38,3 +40,13 @@ class TestXPDBParser(ParserBase): expected_n_atoms = 5 expected_n_residues = 5 expected_n_segments = 1 + + def test_guessed_masses(self, filename): + u = mda.Universe(filename) + expected = [15.999, 15.999, 15.999, 15.999, 15.999] + assert_allclose(u.atoms.masses, expected) + + def test_guessed_types(self, filename): + u = mda.Universe(filename) + expected = ['O', 'O', 'O', 'O', 'O'] + assert_equal(u.atoms.types, expected) diff --git a/testsuite/MDAnalysisTests/topology/test_xyz.py b/testsuite/MDAnalysisTests/topology/test_xyz.py index e45591a238c..8ce6ce45c72 100644 --- a/testsuite/MDAnalysisTests/topology/test_xyz.py +++ b/testsuite/MDAnalysisTests/topology/test_xyz.py @@ -30,13 +30,16 @@ XYZ_mini, ) +from numpy.testing import assert_equal, assert_allclose + class XYZBase(ParserBase): parser = mda.topology.XYZParser.XYZParser expected_n_residues = 1 expected_n_segments = 1 - expected_attrs = ['names', "elements"] - guessed_attrs = ['types', 'masses'] + expected_attrs = ['names', 'elements'] + guessed_attrs = ['masses', 'types'] + class TestXYZMini(XYZBase): ref_filename = XYZ_mini @@ -49,3 +52,13 @@ class TestXYZParser(XYZBase): @pytest.fixture(params=[XYZ, XYZ_bz2]) def filename(self, request): return request.param + + def test_guessed_masses(self, filename): + u = mda.Universe(filename) + expected = [1.008, 1.008, 1.008, 1.008, 1.008, 1.008, 1.008] + assert_allclose(u.atoms.masses[:7], expected) + + def test_guessed_types(self, filename): + u = mda.Universe(filename) + expected = ['H', 'H', 'H', 'H', 'H', 'H', 'H'] + assert_equal(u.atoms.types[:7], expected) diff --git a/testsuite/MDAnalysisTests/transformations/test_positionaveraging.py b/testsuite/MDAnalysisTests/transformations/test_positionaveraging.py index 3eec056a5bb..ba2c348bd06 100644 --- a/testsuite/MDAnalysisTests/transformations/test_positionaveraging.py +++ b/testsuite/MDAnalysisTests/transformations/test_positionaveraging.py @@ -13,7 +13,7 @@ def posaveraging_universes(): ''' Create the universe objects for the tests. ''' - u = md.Universe(datafiles.XTC_multi_frame) + u = md.Universe(datafiles.XTC_multi_frame, to_guess=()) transformation = PositionAverager(3) u.trajectory.add_transformations(transformation) return u @@ -24,7 +24,7 @@ def posaveraging_universes_noreset(): Create the universe objects for the tests. Position averaging reset is set to False. ''' - u = md.Universe(datafiles.XTC_multi_frame) + u = md.Universe(datafiles.XTC_multi_frame, to_guess=()) transformation = PositionAverager(3, check_reset=False) u.trajectory.add_transformations(transformation) return u @@ -106,6 +106,6 @@ def test_posavging_specific_noreset(posaveraging_universes_noreset): specr_avgd[...,idx] = ts.positions.copy() idx += 1 assert_array_almost_equal(ref_matrix_specr, specr_avgd[1,:,-1], decimal=5) - +