diff --git a/CAT/attachment/core_anchoring.py b/CAT/attachment/core_anchoring.py new file mode 100644 index 00000000..aced5d32 --- /dev/null +++ b/CAT/attachment/core_anchoring.py @@ -0,0 +1,115 @@ +"""A module designed for finding core functional groups. + +Index +----- +.. currentmodule:: CAT.attachment.ligand_anchoring +.. autosummary:: + set_core_anchors + find_core_substructure + +API +--- +.. autofunction:: set_core_anchors +.. autofunction:: find_core_substructure + +""" + +from typing import Tuple, Any, Mapping, TYPE_CHECKING + +import numpy as np +from scm.plams import Molecule, MoleculeError, to_rdmol + +from .distribution import distribute_idx +from ..utils import AllignmentEnum, AllignmentTup, AnchorTup + +if TYPE_CHECKING: + from numpy.typing import NDArray + from numpy import int64 as i8 + +__all__ = ["set_core_anchors", "find_core_substructure"] + + +def set_core_anchors( + mol: Molecule, + anchor_tup: AnchorTup, + allignment_tup: AllignmentTup, + subset_kwargs: "None | Mapping[str, Any]" = None, +) -> Tuple[str, str]: + """Identify and parse the core anchors within the passed molecule. + + Returns two strings: The (parsed) molecular formula and the anchor indices. + + """ + # Checks the if the anchor is a string (atomic symbol) or integer (atomic number) + formula = mol.get_formula() + + # Get the indices of all anchor atom ligand placeholders in the core + anchors = mol.properties.dummies + if not anchors: + anchor_idx, remove_idx = find_core_substructure(mol, anchor_tup) + else: + anchor_idx = np.fromiter(anchors, count=len(anchors), dtype=np.int64) + anchor_idx -= 1 + remove_idx = anchor_idx.copy() + if subset_kwargs: + anchor_idx = distribute_idx(mol, anchor_idx, **subset_kwargs) + if not len(anchor_idx): + raise MoleculeError(f"No valid anchoring groups found in the core {formula!r}") + + # Convert atomic indices into Atoms + anchor_idx += 1 + anchor_idx.sort() + mol.properties.dummies = [mol[i] for i in anchor_idx] + + # Returns an error if no anchor atoms were found + if len(anchor_idx) < 4 and allignment_tup.kind == AllignmentEnum.SURFACE: + raise NotImplementedError( + '`optional.core.allignment = "surface"` is not supported for cores with less ' + f'than 4 anchor atoms ({mol.get_formula()}); consider using ' + '`optional.core.allignment = "sphere"`' + ) + + # Delete all core anchor atoms + if remove_idx is not None: + remove_idx += 1 + remove_idx.sort() + for i in reversed(remove_idx): + mol.delete_atom(mol[i]) + return formula, ' '.join(anchor_idx.astype(str)) + + +def find_core_substructure( + mol: Molecule, + anchor_tup: AnchorTup, +) -> Tuple["NDArray[i8]", "None | NDArray[i8]"]: + """Identify substructures within the passed core based on **anchor_tup**. + + Returns two indice-arrays, respectivelly containing the indices of the anchor + atoms and all to-be removed atoms. + + """ + rdmol = to_rdmol(mol) + matches = rdmol.GetSubstructMatches(anchor_tup.mol, useChirality=True) + remove = anchor_tup.remove + + # Remove all duplicate matches, each heteroatom (match[0]) should have <= 1 entry + ref_set = set() + anchor_list = [] + remove_list = [] + for idx_tup in matches: + anchor_idx_tup = tuple(idx_tup[i] for i in anchor_tup.group_idx) + if anchor_idx_tup in ref_set: + continue # Skip duplicates + else: + ref_set.add(anchor_idx_tup) + + if remove is not None: + remove_list += [idx_tup[i] for i in remove] + anchor_list.append(anchor_idx_tup[0]) + + anchor_array = np.fromiter(anchor_list, dtype=np.int64, count=len(anchor_list)) + if remove is not None: + remove_array = np.fromiter(remove_list, dtype=np.int64, count=len(remove_list)) + return anchor_array, remove_array + else: + return anchor_array, None diff --git a/CAT/base.py b/CAT/base.py index 38600e36..7d3744b3 100644 --- a/CAT/base.py +++ b/CAT/base.py @@ -33,9 +33,7 @@ from .__version__ import __version__ from .logger import logger -from .mol_utils import to_symbol from .settings_dataframe import SettingsDataFrame -from .utils import AllignmentEnum from .data_handling.mol_import import read_mol from .data_handling.update_qd_df import update_qd_df @@ -44,9 +42,9 @@ from .multi_ligand import init_multi_ligand from .attachment.qd_opt import init_qd_opt from .attachment.ligand_opt import init_ligand_opt, allign_axis -from .attachment.distribution import distribute_idx from .attachment.ligand_attach import init_qd_construction from .attachment.ligand_anchoring import init_ligand_anchoring +from .attachment.core_anchoring import set_core_anchors from .workflows import MOL @@ -209,50 +207,13 @@ def prep_core(core_df: SettingsDataFrame) -> SettingsDataFrame: """ # Unpack arguments - anchor = core_df.settings.optional.core.anchor - subset = core_df.settings.optional.core.subset - - idx_tuples = [] - for core in core_df[MOL]: - # Checks the if the anchor is a string (atomic symbol) or integer (atomic number) - formula = core.get_formula() - - # Returns the indices of all anchor atom ligand placeholders in the core - if not core.properties.dummies: - at_idx = np.array([i for i, atom in enumerate(core) if atom.atnum == anchor]) - else: - dummies = core.properties.dummies - at_idx = np.fromiter(dummies, count=len(dummies), dtype=int) - at_idx -= 1 - if subset: - at_idx = distribute_idx(core, at_idx, **subset) - - # Convert atomic indices into Atoms - at_idx += 1 - at_idx.sort() - core.properties.dummies = dummies = [core[i] for i in at_idx] - - # Returns an error if no anchor atoms were found - if not dummies: - raise MoleculeError(f"{repr(to_symbol(anchor))} was specified as core anchor atom, yet " - f"no matching atoms were found in {core.properties.name} " - f"(formula: {formula})") - elif ( - len(dummies) < 4 and - core_df.settings.optional.core.allignment.kind == AllignmentEnum.SURFACE - ): - raise NotImplementedError( - '`optional.core.allignment = "surface"` is not supported for cores with less ' - f'than 4 anchor atoms ({core.get_formula()}); consider using ' - '`optional.core.allignment = "sphere"`' - ) - - # Delete all core anchor atoms - for at in dummies: - core.delete_atom(at) - idx_tuples.append( - (formula, ' '.join(at_idx.astype(str))) - ) + core_options = core_df.settings.optional.core + anchor_tup = core_options.anchor[0] + allignment_tup = core_options.allignment + subset = core_options.subset + + # Set the core anchors + idx_tuples = [set_core_anchors(i, anchor_tup, allignment_tup, subset) for i in core_df[MOL]] # Create and return a new dataframe idx = pd.MultiIndex.from_tuples(idx_tuples, names=['formula', 'anchor']) diff --git a/CAT/data_handling/anchor_parsing.py b/CAT/data_handling/anchor_parsing.py index 32efc229..dbe574ec 100644 --- a/CAT/data_handling/anchor_parsing.py +++ b/CAT/data_handling/anchor_parsing.py @@ -5,7 +5,7 @@ from typing import Union, Tuple, Iterable, SupportsFloat from rdkit.Chem import Mol -from scm.plams import Units +from scm.plams import Units, PT from schema import Schema, Use, Optional from typing_extensions import TypedDict, SupportsIndex @@ -103,25 +103,42 @@ def _parse_angle_offset( }) +#: All atom types that have to be encapsulated in square brackets when parsing SMILES strings +SQUARE_BRACKET_ATOMS = frozenset( + PT.symtonum.keys() - {'B', 'Br', 'C', 'Cl', 'F', 'I', 'N', 'O', 'P', 'S'} +) + + def parse_anchors( patterns: Union[ None, + SupportsIndex, str, Mol, AnchorTup, _UnparsedAnchorDict, - "Iterable[str | Mol | AnchorTup | _UnparsedAnchorDict]", + "Iterable[str | SupportsIndex | Mol | AnchorTup | _UnparsedAnchorDict]", ] = None, split: bool = True, + is_core: bool = False, ) -> Tuple[AnchorTup, ...]: """Parse the user-specified anchors.""" if patterns is None: + if is_core: + raise TypeError("`anchor=None` is not supported for core anchors") patterns = get_functional_groups(None, split) - elif isinstance(patterns, (Mol, str, dict, AnchorTup)): + elif isinstance(patterns, (Mol, str, dict, AnchorTup, SupportsIndex)): patterns = [patterns] ret = [] - for p in patterns: # type: _UnparsedAnchorDict | str | Mol | AnchorTup + for p in patterns: # type: _UnparsedAnchorDict | str | Mol | SupportsIndex | AnchorTup + try: + atnum = operator.index(p) # Check for atomic symbols + except TypeError: + pass + else: + p = PT.get_symbol(atnum) + if isinstance(p, AnchorTup): ret.append(p) elif isinstance(p, Mol): @@ -132,34 +149,51 @@ def parse_anchors( ret.append(AnchorTup(mol=mol, remove=remove)) elif isinstance(p, str): group = p + if group in SQUARE_BRACKET_ATOMS: + group = f"[{group}]" mol = _smiles_to_rdmol(group) remove = None if not split else (list(mol.GetAtoms())[-1].GetIdx(),) ret.append(AnchorTup(mol=mol, group=group, remove=remove)) else: kwargs: _AnchorDict = anchor_schema.validate(p) - # Check that `group_idx` and `remove` are disjoint group_idx = kwargs["group_idx"] remove = kwargs["remove"] - if remove is not None and not set(group_idx).isdisjoint(remove): - raise ValueError("`group_idx` and `remove` must be disjoint") - - # Check that at least 3 atoms are available for `angle_offset` - # (so a plane can be defined) angle_offset = kwargs["angle_offset"] - if angle_offset is not None and len(group_idx) < 3: - raise ValueError("`group_idx` must contain at least 3 atoms when " - "`angle_offset` is specified") - - # Check that at least 2 atoms are available for `dihedral` - # (so the third dihedral-defining vector can be defined) dihedral = kwargs["dihedral"] - if dihedral is not None and len(group_idx) < 2: - raise ValueError("`group_idx` must contain at least 2 atoms when " - "`dihedral` is specified") + + group = kwargs.pop("group") + if group in SQUARE_BRACKET_ATOMS: + group = f"[{group}]" + mol = _smiles_to_rdmol(group) + + # Dihedral and angle-offset options are not supported for core anchors + if is_core: + if dihedral is not None: + raise TypeError("`dihedral != None` is not supported for core anchors") + elif angle_offset is not None: + raise TypeError("`angle_offset != None` is not supported for core anchors") + elif kwargs["kind"] != KindEnum.FIRST: + raise NotImplementedError('`kind != "first"` is not yet supported') + else: + # Check that at least 3 atoms are available for `angle_offset` + # (so a plane can be defined) + if angle_offset is not None and len(group_idx) < 3: + raise ValueError("`group_idx` must contain at least 3 atoms when " + "`angle_offset` is specified") + + # Check that at least 2 atoms are available for `dihedral` + # (so the third dihedral-defining vector can be defined) + if dihedral is not None and len(group_idx) < 2: + raise ValueError("`group_idx` must contain at least 2 atoms when " + "`dihedral` is specified") + + # Check that `group_idx` and `remove` are disjoint + # TODO: Investigate if this check can be removed + if remove is not None and not set(group_idx).isdisjoint(remove): + raise ValueError("`group_idx` and `remove` must be disjoint") # Check that the indices in `group_idx` and `remove` are not out of bounds - mol = _smiles_to_rdmol(kwargs["group"]) atom_count = len(mol.GetAtoms()) if atom_count <= max(group_idx): raise IndexError(f"`group_idx` index {max(group_idx)} is out of bounds " @@ -167,5 +201,7 @@ def parse_anchors( elif remove is not None and atom_count <= max(remove): raise IndexError(f"`remove` index {max(remove)} is out of bounds " f"for a `group` with {atom_count} atoms") - ret.append(AnchorTup(**kwargs, mol=mol)) + ret.append(AnchorTup(**kwargs, group=group, mol=mol)) + if is_core and len(ret) > 1: + raise NotImplementedError("Cores with multiple anchor types aren't supported yet") return tuple(ret) diff --git a/CAT/data_handling/validate_input.py b/CAT/data_handling/validate_input.py index aab93936..8c99afc0 100644 --- a/CAT/data_handling/validate_input.py +++ b/CAT/data_handling/validate_input.py @@ -190,4 +190,5 @@ def validate_input(s: Settings, validate_only: bool = True) -> None: del s.optional.ligand.functional_groups split = s.optional.ligand.split - s.optional.ligand.anchor = parse_anchors(func_groups, split) + s.optional.ligand.anchor = parse_anchors(func_groups, split=split) + s.optional.core.anchor = parse_anchors(s.optional.core.anchor, split=True, is_core=True) diff --git a/CAT/data_handling/validation_schemas.py b/CAT/data_handling/validation_schemas.py index e9a75d22..658efc63 100644 --- a/CAT/data_handling/validation_schemas.py +++ b/CAT/data_handling/validation_schemas.py @@ -303,21 +303,9 @@ def _get_crsjob() -> type: And(str, error='optional.core.dirname expects a string'), # Alias for `optional.core.anchor` - Optional_('dummy', default=None): # Return a tuple of atomic numbers - Or( - None, - And(val_int, Use(lambda n: to_atnum(int(n)))), - And(str, Use(to_atnum)), - error='optional.core.dummy expects a valid atomic number (int) or symbol (string)' - ), + Optional_('dummy', default=None): object, - Optional_('anchor', default=None): # Return a tuple of atomic numbers - Or( - None, - And(val_int, Use(lambda n: to_atnum(int(n)))), - And(str, Use(to_atnum)), - error='optional.core.anchor expects a valid atomic number (int) or symbol (string)' - ), + Optional_('anchor', default=None): object, Optional_('subset', default=None): Or(None, dict, error="optional.core.subset epected 'None' or a dictionary"), diff --git a/docs/4_optional.rst b/docs/4_optional.rst index 72c2b716..6b38bf7e 100644 --- a/docs/4_optional.rst +++ b/docs/4_optional.rst @@ -264,7 +264,23 @@ Core replaced with ligands. Alternatively, anchor atoms can be manually specified with the core_indices variable. - This optiona can alternatively be provided as ``optional.core.dummy``. + Further customization can be achieved by passing a dictionary: + + * :attr:`anchor.group ` + * :attr:`anchor.group_idx ` + * :attr:`anchor.remove ` + + .. note:: + + .. code:: yaml + + optional: + core: + anchor: + group: "[H]Cl" # Remove HCl and attach at previous Cl position + group_idx: 1 + remove: [0, 1] + .. attribute:: optional.core.allignment diff --git a/tests/test_files/core/Cd68Se55_HCl.pdb b/tests/test_files/core/Cd68Se55_HCl.pdb new file mode 100644 index 00000000..e3559b95 --- /dev/null +++ b/tests/test_files/core/Cd68Se55_HCl.pdb @@ -0,0 +1,158 @@ +HETATM 1 CD1 UNL 1 -2.289 -1.440 4.440 1.00 0.00 CD +HETATM 2 CD2 UNL 1 -0.318 2.329 4.326 1.00 0.00 CD +HETATM 3 CD3 UNL 1 -3.999 1.773 2.377 1.00 0.00 CD +HETATM 4 CD4 UNL 1 -0.588 -4.659 2.363 1.00 0.00 CD +HETATM 5 CD5 UNL 1 1.405 -0.938 2.426 1.00 0.00 CD +HETATM 6 CD6 UNL 1 -2.417 -1.505 0.205 1.00 0.00 CD +HETATM 7 CD7 UNL 1 3.231 2.938 2.365 1.00 0.00 CD +HETATM 8 CD8 UNL 1 -0.413 2.470 0.205 1.00 0.00 CD +HETATM 9 CD9 UNL 1 -3.999 1.773 -1.967 1.00 0.00 CD +HETATM 10 CD10 UNL 1 3.089 -4.107 0.205 1.00 0.00 CD +HETATM 11 CD11 UNL 1 -0.588 -4.659 -1.953 1.00 0.00 CD +HETATM 12 CD12 UNL 1 4.943 -0.333 0.205 1.00 0.00 CD +HETATM 13 CD13 UNL 1 1.405 -0.938 -2.016 1.00 0.00 CD +HETATM 14 CD14 UNL 1 -2.289 -1.440 -4.031 1.00 0.00 CD +HETATM 15 CD15 UNL 1 3.231 2.938 -1.955 1.00 0.00 CD +HETATM 16 CD16 UNL 1 -0.318 2.329 -3.916 1.00 0.00 CD +HETATM 17 SE1 UNL 1 0.100 -0.282 4.676 1.00 0.00 SE +HETATM 18 SE2 UNL 1 -3.892 -0.896 2.381 1.00 0.00 SE +HETATM 19 SE3 UNL 1 -1.810 3.291 2.343 1.00 0.00 SE +HETATM 20 SE4 UNL 1 1.871 -3.620 2.522 1.00 0.00 SE +HETATM 21 SE5 UNL 1 -2.093 -4.216 0.205 1.00 0.00 SE +HETATM 22 SE6 UNL 1 3.816 0.331 2.524 1.00 0.00 SE +HETATM 23 SE7 UNL 1 -0.056 -0.200 0.205 1.00 0.00 SE +HETATM 24 SE8 UNL 1 -3.892 -0.896 -1.971 1.00 0.00 SE +HETATM 25 SE9 UNL 1 1.948 3.845 0.205 1.00 0.00 SE +HETATM 26 SE10 UNL 1 -1.810 3.291 -1.934 1.00 0.00 SE +HETATM 27 SE11 UNL 1 1.871 -3.620 -2.112 1.00 0.00 SE +HETATM 28 SE12 UNL 1 3.816 0.331 -2.115 1.00 0.00 SE +HETATM 29 SE13 UNL 1 0.100 -0.282 -4.266 1.00 0.00 SE +HETATM 30 CD17 UNL 1 -1.958 -1.602 8.502 1.00 0.00 CD +HETATM 31 CD18 UNL 1 -5.894 -2.212 6.041 1.00 0.00 CD +HETATM 32 CD19 UNL 1 -0.611 2.056 8.666 1.00 0.00 CD +HETATM 33 CD20 UNL 1 -3.879 1.823 6.886 1.00 0.00 CD +HETATM 34 CD21 UNL 1 -7.445 0.924 4.086 1.00 0.00 CD +HETATM 35 CD22 UNL 1 -1.958 5.757 6.001 1.00 0.00 CD +HETATM 36 CD23 UNL 1 -5.397 5.035 4.085 1.00 0.00 CD +HETATM 37 CD24 UNL 1 -0.434 -4.650 6.735 1.00 0.00 CD +HETATM 38 CD25 UNL 1 -4.378 -5.264 4.367 1.00 0.00 CD +HETATM 39 CD26 UNL 1 1.552 -1.013 7.027 1.00 0.00 CD +HETATM 40 CD27 UNL 1 -6.387 -2.283 2.267 1.00 0.00 CD +HETATM 41 CD28 UNL 1 3.058 2.736 6.875 1.00 0.00 CD +HETATM 42 CD29 UNL 1 -7.713 1.034 0.205 1.00 0.00 CD +HETATM 43 CD30 UNL 1 1.387 6.305 4.373 1.00 0.00 CD +HETATM 44 CD31 UNL 1 -2.218 6.121 2.257 1.00 0.00 CD +HETATM 45 CD32 UNL 1 -5.669 5.150 0.205 1.00 0.00 CD +HETATM 46 CD33 UNL 1 1.291 -7.362 4.651 1.00 0.00 CD +HETATM 47 CD34 UNL 1 -2.805 -8.005 2.236 1.00 0.00 CD +HETATM 48 CD35 UNL 1 3.300 -4.139 4.934 1.00 0.00 CD +HETATM 49 CD36 UNL 1 -4.692 -5.381 0.205 1.00 0.00 CD +HETATM 50 CD37 UNL 1 5.077 -0.479 4.944 1.00 0.00 CD +HETATM 51 CD38 UNL 1 -6.387 -2.283 -1.857 1.00 0.00 CD +HETATM 52 CD39 UNL 1 6.336 3.082 4.746 1.00 0.00 CD +HETATM 53 CD40 UNL 1 -7.445 0.924 -3.676 1.00 0.00 CD +HETATM 54 CD41 UNL 1 4.505 6.710 2.234 1.00 0.00 CD +HETATM 55 CD42 UNL 1 1.285 6.616 0.205 1.00 0.00 CD +HETATM 56 CD43 UNL 1 -2.218 6.121 -1.847 1.00 0.00 CD +HETATM 57 CD44 UNL 1 -5.397 5.035 -3.675 1.00 0.00 CD +HETATM 58 CD45 UNL 1 4.759 -6.818 2.615 1.00 0.00 CD +HETATM 59 CD46 UNL 1 1.104 -7.499 0.205 1.00 0.00 CD +HETATM 60 CD47 UNL 1 -2.805 -8.005 -1.827 1.00 0.00 CD +HETATM 61 CD48 UNL 1 6.676 -3.517 2.558 1.00 0.00 CD +HETATM 62 CD49 UNL 1 -4.378 -5.264 -3.957 1.00 0.00 CD +HETATM 63 CD50 UNL 1 8.088 0.036 2.636 1.00 0.00 CD +HETATM 64 CD51 UNL 1 -5.894 -2.212 -5.632 1.00 0.00 CD +HETATM 65 CD52 UNL 1 6.464 3.326 0.205 1.00 0.00 CD +HETATM 66 CD53 UNL 1 -3.879 1.823 -6.476 1.00 0.00 CD +HETATM 67 CD54 UNL 1 4.505 6.710 -1.824 1.00 0.00 CD +HETATM 68 CD55 UNL 1 1.387 6.305 -3.963 1.00 0.00 CD +HETATM 69 CD56 UNL 1 -1.958 5.757 -5.591 1.00 0.00 CD +HETATM 70 CD57 UNL 1 4.759 -6.818 -2.205 1.00 0.00 CD +HETATM 71 CD58 UNL 1 1.291 -7.362 -4.241 1.00 0.00 CD +HETATM 72 CD59 UNL 1 6.676 -3.517 -2.148 1.00 0.00 CD +HETATM 73 CD60 UNL 1 3.300 -4.139 -4.524 1.00 0.00 CD +HETATM 74 CD61 UNL 1 -0.434 -4.650 -6.325 1.00 0.00 CD +HETATM 75 CD62 UNL 1 8.088 0.036 -2.226 1.00 0.00 CD +HETATM 76 CD63 UNL 1 5.077 -0.479 -4.534 1.00 0.00 CD +HETATM 77 CD64 UNL 1 1.552 -1.013 -6.618 1.00 0.00 CD +HETATM 78 CD65 UNL 1 -1.958 -1.602 -8.092 1.00 0.00 CD +HETATM 79 CD66 UNL 1 6.336 3.082 -4.337 1.00 0.00 CD +HETATM 80 CD67 UNL 1 3.058 2.736 -6.465 1.00 0.00 CD +HETATM 81 CD68 UNL 1 -0.611 2.056 -8.256 1.00 0.00 CD +HETATM 82 SE14 UNL 1 0.132 -0.361 9.272 1.00 0.00 SE +HETATM 83 SE15 UNL 1 -3.746 -0.855 6.701 1.00 0.00 SE +HETATM 84 SE16 UNL 1 -7.855 -1.567 4.535 1.00 0.00 SE +HETATM 85 SE17 UNL 1 -1.653 3.209 6.490 1.00 0.00 SE +HETATM 86 SE18 UNL 1 -5.419 2.478 4.739 1.00 0.00 SE +HETATM 87 SE19 UNL 1 -3.690 6.898 4.505 1.00 0.00 SE +HETATM 88 SE20 UNL 1 1.927 -3.659 7.126 1.00 0.00 SE +HETATM 89 SE21 UNL 1 -1.959 -4.118 4.577 1.00 0.00 SE +HETATM 90 SE22 UNL 1 -6.051 -4.914 2.418 1.00 0.00 SE +HETATM 91 SE23 UNL 1 3.865 0.281 7.153 1.00 0.00 SE +HETATM 92 SE24 UNL 1 -7.872 -1.556 0.205 1.00 0.00 SE +HETATM 93 SE25 UNL 1 1.977 3.694 4.583 1.00 0.00 SE +HETATM 94 SE26 UNL 1 -5.448 2.474 0.205 1.00 0.00 SE +HETATM 95 SE27 UNL 1 0.093 7.417 2.417 1.00 0.00 SE +HETATM 96 SE28 UNL 1 -3.712 6.854 0.205 1.00 0.00 SE +HETATM 97 SE29 UNL 1 3.784 -6.929 4.962 1.00 0.00 SE +HETATM 98 SE30 UNL 1 -0.201 -7.484 2.448 1.00 0.00 SE +HETATM 99 SE31 UNL 1 -4.328 -8.192 0.205 1.00 0.00 SE +HETATM 100 SE32 UNL 1 5.717 -3.056 4.915 1.00 0.00 SE +HETATM 101 SE33 UNL 1 -6.051 -4.914 -2.008 1.00 0.00 SE +HETATM 102 SE34 UNL 1 7.566 0.854 4.988 1.00 0.00 SE +HETATM 103 SE35 UNL 1 -7.855 -1.567 -4.125 1.00 0.00 SE +HETATM 104 SE36 UNL 1 5.703 4.348 2.470 1.00 0.00 SE +HETATM 105 SE37 UNL 1 -5.419 2.478 -4.329 1.00 0.00 SE +HETATM 106 SE38 UNL 1 3.732 8.039 0.205 1.00 0.00 SE +HETATM 107 SE39 UNL 1 0.093 7.417 -2.007 1.00 0.00 SE +HETATM 108 SE40 UNL 1 -3.690 6.898 -4.095 1.00 0.00 SE +HETATM 109 SE41 UNL 1 3.635 -6.903 0.205 1.00 0.00 SE +HETATM 110 SE42 UNL 1 -0.201 -7.484 -2.038 1.00 0.00 SE +HETATM 111 SE43 UNL 1 5.501 -2.952 0.205 1.00 0.00 SE +HETATM 112 SE44 UNL 1 -1.959 -4.118 -4.168 1.00 0.00 SE +HETATM 113 SE45 UNL 1 7.487 0.938 0.205 1.00 0.00 SE +HETATM 114 SE46 UNL 1 -3.746 -0.855 -6.291 1.00 0.00 SE +HETATM 115 SE47 UNL 1 5.703 4.348 -2.061 1.00 0.00 SE +HETATM 116 SE48 UNL 1 1.977 3.694 -4.173 1.00 0.00 SE +HETATM 117 SE49 UNL 1 -1.653 3.209 -6.081 1.00 0.00 SE +HETATM 118 SE50 UNL 1 3.784 -6.929 -4.552 1.00 0.00 SE +HETATM 119 SE51 UNL 1 5.717 -3.056 -4.505 1.00 0.00 SE +HETATM 120 SE52 UNL 1 1.927 -3.659 -6.716 1.00 0.00 SE +HETATM 121 SE53 UNL 1 7.566 0.854 -4.578 1.00 0.00 SE +HETATM 122 SE54 UNL 1 3.865 0.281 -6.744 1.00 0.00 SE +HETATM 123 SE55 UNL 1 0.132 -0.361 -8.862 1.00 0.00 SE +HETATM 124 CL1 UNL 1 -3.323 1.946 -8.993 1.00 0.00 CL +HETATM 125 CL2 UNL 1 -3.745 -7.832 -4.119 1.00 0.00 CL +HETATM 126 CL3 UNL 1 -5.545 -4.630 6.617 1.00 0.00 CL +HETATM 127 CL4 UNL 1 3.807 7.373 -4.115 1.00 0.00 CL +HETATM 128 CL5 UNL 1 -3.323 1.946 9.403 1.00 0.00 CL +HETATM 129 CL6 UNL 1 8.979 -2.271 -1.867 1.00 0.00 CL +HETATM 130 CL7 UNL 1 -7.143 5.529 -1.941 1.00 0.00 CL +HETATM 131 CL8 UNL 1 -2.042 -4.119 -8.300 1.00 0.00 CL +HETATM 132 CL9 UNL 1 3.807 7.373 4.525 1.00 0.00 CL +HETATM 133 CL10 UNL 1 1.565 3.502 -8.419 1.00 0.00 CL +HETATM 134 CL11 UNL 1 -5.545 -4.630 -6.207 1.00 0.00 CL +HETATM 135 CL12 UNL 1 -2.042 -4.119 8.710 1.00 0.00 CL +HETATM 136 CL13 UNL 1 0.179 6.935 6.599 1.00 0.00 CL +HETATM 137 CL14 UNL 1 -7.143 5.529 2.351 1.00 0.00 CL +HETATM 138 CL15 UNL 1 5.308 4.220 -6.272 1.00 0.00 CL +HETATM 139 CL16 UNL 1 8.979 -2.271 2.277 1.00 0.00 CL +HETATM 140 CL17 UNL 1 5.308 4.220 6.682 1.00 0.00 CL +HETATM 141 CL18 UNL 1 7.121 -6.101 2.258 1.00 0.00 CL +HETATM 142 CL19 UNL 1 -0.268 -7.296 -6.169 1.00 0.00 CL +HETATM 143 CL20 UNL 1 -8.919 1.969 2.349 1.00 0.00 CL +HETATM 144 CL21 UNL 1 1.565 3.502 8.829 1.00 0.00 CL +HETATM 145 CL22 UNL 1 -3.745 -7.832 4.529 1.00 0.00 CL +HETATM 146 CL23 UNL 1 -0.268 -7.296 6.579 1.00 0.00 CL +HETATM 147 CL24 UNL 1 7.121 -6.101 -1.848 1.00 0.00 CL +HETATM 148 CL25 UNL 1 0.179 6.935 -6.189 1.00 0.00 CL +HETATM 149 CL26 UNL 1 -8.919 1.969 -1.939 1.00 0.00 CL +HETATM 150 H1 UNL 1 1.694 4.454 -10.173 1.00 0.00 H +HETATM 151 H2 UNL 1 6.335 5.225 -7.663 1.00 0.00 H +HETATM 152 H3 UNL 1 4.440 8.960 -5.154 1.00 0.00 H +HETATM 153 H4 UNL 1 0.043 8.404 -7.539 1.00 0.00 H +CONECT 133 150 +CONECT 138 151 +CONECT 127 152 +CONECT 148 153 +END diff --git a/tests/test_files/test_allignment.hdf5 b/tests/test_files/test_allignment.hdf5 index 53b5a744..0228bb55 100644 Binary files a/tests/test_files/test_allignment.hdf5 and b/tests/test_files/test_allignment.hdf5 differ diff --git a/tests/test_ligand_anchoring.py b/tests/test_ligand_anchoring.py index 03d24ab4..d578120f 100644 --- a/tests/test_ligand_anchoring.py +++ b/tests/test_ligand_anchoring.py @@ -275,11 +275,30 @@ def test_raise(self, inp: Any, exc_type: "type[Exception]") -> None: with pytest.raises(exc_type): parse_anchors(inp) + PARAM_RAISE_CORE: "OrderedDict[str, tuple[Any, type[Exception]]]" = OrderedDict( + none=(None, TypeError), + angle_offset=({"group": "Cl", "group_idx": 0, "angle_offset": 90}, TypeError), + dihedral=({"group": "Cl", "group_idx": 0, "dihedral": 90}, TypeError), + multiple=(["OC", "OCC"], NotImplementedError), + kind=({"group": "Cl", "group_idx": 0, "kind": "mean"}, NotImplementedError), + ) + + @pytest.mark.parametrize("inp,exc_type", PARAM_RAISE_CORE.values(), ids=PARAM_RAISE_CORE) + def test_raise_core(self, inp: Any, exc_type: "type[Exception]") -> None: + with pytest.raises(exc_type): + parse_anchors(inp, is_core=True) + _PARAM_PASS1 = OrderedDict( idx_scalar={"group": "OCC", "group_idx": 0}, idx_list={"group": "OCC", "group_idx": [0]}, list=[{"group": "OCC", "group_idx": 0}], - str=["O(C)[H]"], + str_COH=["O(C)[H]"], + str_Cd=["Cd"], + str_Cl=["Cl"], + int_Cd=[48], + int_Cl=[17], + group_cd={"group": "Cd", "group_idx": 0}, + group_cl={"group": "Cl", "group_idx": 0}, angle_unit={"group": "OCC", "group_idx": range(3), "angle_offset": "1 rad"}, angle_no_unit={"group": "OCC", "group_idx": range(3), "angle_offset": "180"}, angle_none={"group": "OCC", "group_idx": range(3), "angle_offset": None}, @@ -293,7 +312,13 @@ def test_raise(self, inp: Any, exc_type: "type[Exception]") -> None: idx_scalar=AnchorTup(None, group="OCC", group_idx=(0,)), idx_list=AnchorTup(None, group="OCC", group_idx=(0,)), list=AnchorTup(None, group="OCC", group_idx=(0,)), - str=AnchorTup(None, group="O(C)[H]", group_idx=(0,), remove=(2,)), + str_COH=AnchorTup(None, group="O(C)[H]", group_idx=(0,), remove=(2,)), + str_Cd=AnchorTup(None, group="[Cd]", group_idx=(0,), remove=(0,)), + str_Cl=AnchorTup(None, group="Cl", group_idx=(0,), remove=(0,)), + int_Cd=AnchorTup(None, group="[Cd]", group_idx=(0,), remove=(0,)), + int_Cl=AnchorTup(None, group="Cl", group_idx=(0,), remove=(0,)), + group_cd=AnchorTup(None, group="[Cd]", group_idx=(0,)), + group_cl=AnchorTup(None, group="Cl", group_idx=(0,)), angle_unit=AnchorTup(None, group="OCC", group_idx=(0, 1, 2), angle_offset=1.0), angle_no_unit=AnchorTup(None, group="OCC", group_idx=(0, 1, 2), angle_offset=math.pi), angle_none=AnchorTup(None, group="OCC", group_idx=(0, 1, 2), angle_offset=None), diff --git a/tests/test_ligand_attach.py b/tests/test_ligand_attach.py index 548b9621..caa7c26c 100644 --- a/tests/test_ligand_attach.py +++ b/tests/test_ligand_attach.py @@ -176,3 +176,58 @@ def test_bonds(self, output: AllignmentTup) -> None: np.testing.assert_array_equal(bonds.atom1, output.bonds_ref.atom1) np.testing.assert_array_equal(bonds.atom2, output.bonds_ref.atom2) np.testing.assert_allclose(bonds.order, output.bonds_ref.order) + + +class TestCoreAnchor: + PARAMS = { + "HCl": {"group": "[H]Cl", "group_idx": 0, "remove": 0} + } + + @pytest.fixture(scope="class", name="output", params=PARAMS.items(), ids=PARAMS) + def run_cat( + self, request: "_pytest.fixtures.SubRequest" + ) -> Generator[AllignmentTup, None, None]: + # Setup + name, kwargs = request.param # type: str, dict[str, Any] + yaml_path = PATH / 'CAT_allignment.yaml' + with open(yaml_path, 'r') as f1: + arg = Settings(yaml.load(f1, Loader=yaml.FullLoader)) + + arg.path = PATH + arg.input_cores = ["Cd68Se55_HCl.pdb"] + arg.optional.core.anchor = kwargs + qd_df, _, _ = prep(arg) + qd = qd_df[MOL].iloc[0] + + with h5py.File(PATH / "test_allignment.hdf5", "r") as f2: + atoms_ref = f2[f"TestCoreAnchor/{name}/atoms"][...].view(np.recarray) + bonds_ref = f2[f"TestCoreAnchor/{name}/bonds"][...].view(np.recarray) + yield AllignmentTup(qd, atoms_ref, bonds_ref, name) + + # Teardown + files = [LIG_PATH, QD_PATH, DB_PATH] + for file in files: + shutil.rmtree(file, ignore_errors=True) + + def test_atoms(self, output: AllignmentTup) -> None: + dtype = [("symbols", "S2"), ("coords", "f8", 3)] + iterator = ((at.symbol, at.coords) for at in output.mol) + atoms = np.fromiter(iterator, dtype=dtype).view(np.recarray) + + assertion.eq(atoms.dtype, output.atoms_ref.dtype) + np.testing.assert_array_equal(atoms.symbols, output.atoms_ref.symbols) + np.testing.assert_allclose(atoms.coords, output.atoms_ref.coords) + + def test_bonds(self, output: AllignmentTup) -> None: + dtype = [("atom1", "i8"), ("atom2", "i8"), ("order", "f8")] + try: + output.mol.set_atoms_id() + iterator = ((b.atom1.id, b.atom2.id, b.order) for b in output.mol.bonds) + bonds = np.fromiter(iterator, dtype=dtype).view(np.recarray) + finally: + output.mol.unset_atoms_id() + + assertion.eq(bonds.dtype, output.bonds_ref.dtype) + np.testing.assert_array_equal(bonds.atom1, output.bonds_ref.atom1) + np.testing.assert_array_equal(bonds.atom2, output.bonds_ref.atom2) + np.testing.assert_allclose(bonds.order, output.bonds_ref.order) diff --git a/tests/test_schemas.py b/tests/test_schemas.py index bc97f76a..40545b79 100644 --- a/tests/test_schemas.py +++ b/tests/test_schemas.py @@ -162,15 +162,6 @@ def test_core_schema() -> None: assertion.eq(core_schema.validate(core_dict), ref) - core_dict['anchor'] = 1.1 # Exception: incorrect value - assertion.assert_(core_schema.validate, core_dict, exception=SchemaError) - core_dict['anchor'] = 'H' - assertion.eq(core_schema.validate(core_dict)['anchor'], 1) - core_dict['anchor'] = 1 - assertion.eq(core_schema.validate(core_dict)['anchor'], 1) - core_dict['anchor'] = 1.0 - assertion.eq(core_schema.validate(core_dict)['anchor'], 1) - core_dict['allignment'] = 1.1 # Exception: incorrect type assertion.assert_(core_schema.validate, core_dict, exception=SchemaError) core_dict['allignment'] = 'bob' # Exception: incorrect value diff --git a/tests/test_validate_input.py b/tests/test_validate_input.py index 82eecb7e..3d41196d 100644 --- a/tests/test_validate_input.py +++ b/tests/test_validate_input.py @@ -46,7 +46,6 @@ def test_validate_input() -> None: ref = Settings() ref.core.dirname = join(PATH, 'core') - ref.core.anchor = 35 ref.core.allignment = AllignmentTup(AllignmentEnum.SURFACE, False) ref.core.subset = None @@ -98,11 +97,16 @@ def test_validate_input() -> None: ref.forcefield = Settings() - func_groups1 = s.optional.ligand.pop('anchor') - func_groups2 = s2.optional.ligand.pop('anchor') + lig_func_groups1 = s.optional.ligand.pop('anchor') + lig_func_groups2 = s2.optional.ligand.pop('anchor') + for tup in chain(lig_func_groups1, lig_func_groups2): + assertion.isinstance(tup.mol, Chem.Mol) - for tup in chain(func_groups1, func_groups2): + core_func_groups1 = s.optional.core.pop('anchor') + core_func_groups2 = s2.optional.core.pop('anchor') + for tup in chain(core_func_groups1, core_func_groups2): assertion.isinstance(tup.mol, Chem.Mol) + assertion.eq(s.optional, ref) assertion.eq(s2.optional, ref)