Skip to content

Commit

Permalink
Merge pull request #45 from alstonlo/master
Browse files Browse the repository at this point in the history
1.0.4
  • Loading branch information
MarioKrenn6240 authored Mar 24, 2021
2 parents 5e91b3d + 1e6685c commit f4e3d7a
Show file tree
Hide file tree
Showing 9 changed files with 163 additions and 17 deletions.
7 changes: 5 additions & 2 deletions docs/source/selfies.rst
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,9 @@ By default, :mod:`selfies` operates under the following semantic constraints
+-----------+------------------------------+
| 4 | ``C`` |
+-----------+------------------------------+
| 6 | ``S`` |
| 5 | ``P`` |
+-----------+------------------------------+
| 7 | ``P`` |
| 6 | ``S`` |
+-----------+------------------------------+
| 8 | All other atoms |
+-----------+------------------------------+
Expand All @@ -65,5 +65,8 @@ of :mod:`selfies`.
Therefore, if custom semantic constraints are used, it is recommended to report
them for reproducibility reasons.

.. autofunction:: get_default_constraints
.. autofunction:: get_octet_rule_constraints
.. autofunction:: get_hypervalent_constraints
.. autofunction:: get_semantic_constraints
.. autofunction:: set_semantic_constraints
4 changes: 2 additions & 2 deletions docs/source/selfies_examples.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@
"output_type": "stream",
"text": [
"Default Constraints:\n",
" {'H': 1, 'F': 1, 'Cl': 1, 'Br': 1, 'I': 1, 'O': 2, 'O+1': 3, 'O-1': 1, 'N': 3, 'N+1': 4, 'N-1': 2, 'C': 4, 'C+1': 5, 'C-1': 3, 'S': 6, 'S+1': 7, 'S-1': 5, 'P': 7, 'P+1': 8, 'P-1': 6, '?': 8}\n"
" {'H': 1, 'F': 1, 'Cl': 1, 'Br': 1, 'I': 1, 'O': 2, 'O+1': 3, 'O-1': 1, 'N': 3, 'N+1': 4, 'N-1': 2, 'C': 4, 'C+1': 5, 'C-1': 3, 'P': 5, 'P+1': 6, 'P-1': 4, 'S': 6, 'S+1': 7, 'S-1': 5, '?': 8}\n"
]
}
],
Expand Down Expand Up @@ -213,7 +213,7 @@
"output_type": "stream",
"text": [
"Updated Constraints:\n",
" {'H': 1, 'F': 1, 'Cl': 1, 'Br': 1, 'I': 1, 'O': 2, 'O+1': 3, 'O-1': 1, 'N': 3, 'N+1': 4, 'N-1': 2, 'C': 4, 'C+1': 5, 'C-1': 3, 'S': 2, 'S+1': 7, 'S-1': 5, 'P': 7, 'P+1': 8, 'P-1': 6, '?': 8, 'Li': 1}\n"
" {'H': 1, 'F': 1, 'Cl': 1, 'Br': 1, 'I': 1, 'O': 2, 'O+1': 3, 'O-1': 1, 'N': 3, 'N+1': 4, 'N-1': 2, 'C': 4, 'C+1': 5, 'C-1': 3, 'P': 5, 'P+1': 6, 'P-1': 4, 'S': 2, 'S+1': 7, 'S-1': 5, '?': 8, 'Li': 1}\n"
]
}
],
Expand Down
4 changes: 2 additions & 2 deletions examples/selfies_examples.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@
"output_type": "stream",
"text": [
"Default Constraints:\n",
" {'H': 1, 'F': 1, 'Cl': 1, 'Br': 1, 'I': 1, 'O': 2, 'O+1': 3, 'O-1': 1, 'N': 3, 'N+1': 4, 'N-1': 2, 'C': 4, 'C+1': 5, 'C-1': 3, 'S': 6, 'S+1': 7, 'S-1': 5, 'P': 7, 'P+1': 8, 'P-1': 6, '?': 8}\n"
" {'H': 1, 'F': 1, 'Cl': 1, 'Br': 1, 'I': 1, 'O': 2, 'O+1': 3, 'O-1': 1, 'N': 3, 'N+1': 4, 'N-1': 2, 'C': 4, 'C+1': 5, 'C-1': 3, 'P': 5, 'P+1': 6, 'P-1': 4, 'S': 6, 'S+1': 7, 'S-1': 5, '?': 8}\n"
]
}
],
Expand Down Expand Up @@ -213,7 +213,7 @@
"output_type": "stream",
"text": [
"Updated Constraints:\n",
" {'H': 1, 'F': 1, 'Cl': 1, 'Br': 1, 'I': 1, 'O': 2, 'O+1': 3, 'O-1': 1, 'N': 3, 'N+1': 4, 'N-1': 2, 'C': 4, 'C+1': 5, 'C-1': 3, 'S': 2, 'S+1': 7, 'S-1': 5, 'P': 7, 'P+1': 8, 'P-1': 6, '?': 8, 'Li': 1}\n"
" {'H': 1, 'F': 1, 'Cl': 1, 'Br': 1, 'I': 1, 'O': 2, 'O+1': 3, 'O-1': 1, 'N': 3, 'N+1': 4, 'N-1': 2, 'C': 4, 'C+1': 5, 'C-1': 3, 'P': 5, 'P+1': 6, 'P-1': 4, 'S': 2, 'S+1': 7, 'S-1': 5, '?': 8, 'Li': 1}\n"
]
}
],
Expand Down
6 changes: 6 additions & 0 deletions selfies/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,9 @@
"encoder",
"decoder",
"get_semantic_robust_alphabet",
"get_default_constraints",
"get_octet_rule_constraints",
"get_hypervalent_constraints",
"get_semantic_constraints",
"set_semantic_constraints",
"len_selfies",
Expand All @@ -46,6 +49,9 @@
from .encoder import encoder
from .grammar_rules import (
get_semantic_robust_alphabet,
get_default_constraints,
get_octet_rule_constraints,
get_hypervalent_constraints,
get_semantic_constraints,
set_semantic_constraints,
)
Expand Down
52 changes: 47 additions & 5 deletions selfies/decoder.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,18 @@
from collections import OrderedDict
from typing import Dict, Iterable, List, Optional, Tuple, Union

from selfies.grammar_rules import get_bond_from_num, get_n_from_symbols, \
get_next_branch_state, get_next_state, get_num_from_bond


def decoder(selfies: str, print_error: bool = False) -> Optional[str]:
from selfies.grammar_rules import (get_bond_from_num,
get_hypervalent_constraints,
get_n_from_symbols, get_next_branch_state,
get_next_state, get_num_from_bond,
get_octet_rule_constraints,
get_semantic_constraints,
set_semantic_constraints)


def decoder(selfies: str,
print_error: bool = False,
constraints: Optional[str] = None) -> Optional[str]:
"""Translates a SELFIES into a SMILES.
The SELFIES to SMILES translation operates based on the :mod:`selfies`
Expand All @@ -19,6 +26,10 @@ def decoder(selfies: str, print_error: bool = False) -> Optional[str]:
:param selfies: the SELFIES to be translated.
:param print_error: if True, error messages will be printed to console.
Defaults to False.
:param constraints: if ``'octet_rule'`` or ``'hypervalent'``,
the corresponding preset bond constraints will be used instead.
If ``None``, :func:`selfies.decoder` will use the
currently configured bond constraints. Defaults to ``None``.
:return: the SMILES translation of ``selfies``. If an error occurs,
and ``selfies`` cannot be translated, ``None`` is returned instead.
Expand All @@ -27,8 +38,33 @@ def decoder(selfies: str, print_error: bool = False) -> Optional[str]:
>>> import selfies
>>> selfies.decoder('[C][=C][F]')
'C=CF'
.. seealso:: The
`"octet_rule" <https://en.wikipedia.org/wiki/Octet_rule>`_
and
`"hypervalent" <https://en.wikipedia.org/wiki/Hypervalent_molecule>`_
preset bond constraints
can be viewed with :func:`selfies.get_octet_rule_constraints` and
:func:`selfies.get_hypervalent_constraints`, respectively. These
presets are variants of the "default" bond constraints, which can
be viewed with :func:`selfies.get_default_constraints`. Their
differences can be summarized as follows:
* def. : ``Cl``, ``Br``, ``I``: 1, ``N``: 3, ``P``: 5, ``P+1``: 6, ``P-1``: 4, ``S``: 6, ``S+1``: 7, ``S-1``: 5
* oct. : ``Cl``, ``Br``, ``I``: 1, ``N``: 3, ``P``: 3, ``P+1``: 4, ``P-1``: 2, ``S``: 2, ``S+1``: 3, ``S-1``: 1
* hyp. : ``Cl``, ``Br``, ``I``: 7, ``N``: 5, ``P``: 5, ``P+1``: 6, ``P-1``: 4, ``S``: 6, ``S+1``: 7, ``S-1``: 5
"""

old_constraints = get_semantic_constraints()
if constraints is None:
pass
elif constraints == 'octet_rule':
set_semantic_constraints(get_octet_rule_constraints())
elif constraints == 'hypervalent':
set_semantic_constraints(get_hypervalent_constraints())
else:
raise ValueError("unrecognized constraint type")

try:
all_smiles = [] # process dot-separated fragments separately

Expand All @@ -38,9 +74,15 @@ def decoder(selfies: str, print_error: bool = False) -> Optional[str]:
if smiles != "": # prevent malformed dots (e.g. [C]..[C], .[C][C])
all_smiles.append(smiles)

if constraints is not None: # restore old constraints
set_semantic_constraints(old_constraints)

return '.'.join(all_smiles)

except ValueError as err:
if constraints is not None: # restore old constraints
set_semantic_constraints(old_constraints)

if print_error:
print("Decoding error '{}': {}.".format(selfies, err))
return None
Expand Down
55 changes: 53 additions & 2 deletions selfies/grammar_rules.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,21 @@
'O': 2, 'O+1': 3, 'O-1': 1,
'N': 3, 'N+1': 4, 'N-1': 2,
'C': 4, 'C+1': 5, 'C-1': 3,
'P': 5, 'P+1': 6, 'P-1': 4,
'S': 6, 'S+1': 7, 'S-1': 5,
'P': 7, 'P+1': 8, 'P-1': 6,
'?': 8,
'?': 8
}

octet_rule_bond_constraints = dict(default_bond_constraints)
octet_rule_bond_constraints.update(
{'S': 2, 'S+1': 3, 'S-1': 1, 'P': 3, 'P+1': 4, 'P-1': 2}
)

hypervalent_bond_constraints = dict(default_bond_constraints)
hypervalent_bond_constraints.update(
{'Cl': 7, 'Br': 7, 'I': 7, 'N': 5}
)

_bond_constraints = default_bond_constraints


Expand Down Expand Up @@ -53,6 +63,47 @@ def get_semantic_robust_alphabet() -> Set[str]:
return alphabet_subset


def get_default_constraints() -> Dict[str, int]:
"""Returns the preset "default" bond constraint settings.
:return: the default constraint settings.
"""

global default_bond_constraints
return dict(default_bond_constraints)


def get_octet_rule_constraints() -> Dict[str, int]:
"""Returns the preset "octet rule" bond constraint settings. These
constraints are a harsher version of the default constraints, so that
the `octet rule <https://en.wikipedia.org/wiki/Octet_rule>`_
is obeyed. In particular, ``S`` and ``P`` are
restricted to a 2 and 3 bond capacity, respectively (and similarly with
``S+``, ``S-``, ``P+``, ``P-``).
:return: the octet rule constraint settings.
"""

global octet_rule_bond_constraints
return dict(octet_rule_bond_constraints)


def get_hypervalent_constraints() -> Dict[str, int]:
"""Returns the preset "hypervalent" bond constraint settings. These
constraints are a relaxed version of the default constraints, to allow
for `hypervalent molecules
<https://en.wikipedia.org/wiki/Hypervalent_molecule>`_.
In particular, ``Cl``, ``Br``, and ``I``
are relaxed to a 7 bond capacity, and ``N`` is relaxed to a 5 bond
capacity.
:return: the hypervalent constraint settings.
"""

global hypervalent_bond_constraints
return dict(hypervalent_bond_constraints)


def get_semantic_constraints() -> Dict[str, int]:
"""Returns the semantic bond constraints that :mod:`selfies` is currently
operating on.
Expand Down
12 changes: 9 additions & 3 deletions tests/test_on_datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,15 @@ def test_roundtrip_translation(test_name, column_name, dataset_samples):
"""

# modify semantic bond constraints
constraints = sf.get_semantic_constraints()
constraints['N'] = 6
sf.set_semantic_constraints(constraints)
sf.set_semantic_constraints({
'H': 1, 'F': 1, 'Cl': 1, 'Br': 1, 'I': 1,
'O': 2, 'O+1': 3, 'O-1': 1,
'N': 6, 'N+1': 4, 'N-1': 2,
'C': 4, 'C+1': 5, 'C-1': 3,
'S': 6, 'S+1': 7, 'S-1': 5,
'P': 7, 'P+1': 8, 'P-1': 6,
'?': 8,
})

# file I/O
curr_dir = os.path.dirname(__file__)
Expand Down
2 changes: 1 addition & 1 deletion tests/test_on_emolecules.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ def test_roundtrip_translation():
"""

# modify constraints
constraints = sf.get_semantic_constraints()
constraints = sf.get_hypervalent_constraints()
constraints['N'] = 6
constraints['Br'] = 7
constraints['Cl'] = 7
Expand Down
Loading

0 comments on commit f4e3d7a

Please sign in to comment.