From 51266bb29af5988fefb49ada8f9cbd2a0366d196 Mon Sep 17 00:00:00 2001 From: alstonlo <40709307+alstonlo@users.noreply.github.com> Date: Tue, 23 Mar 2021 05:55:38 -0400 Subject: [PATCH 01/14] 1.0.4 --- docs/source/selfies.rst | 2 ++ selfies/__init__.py | 4 ++++ selfies/decoder.py | 32 +++++++++++++++++++++++++++++--- selfies/grammar_rules.py | 28 ++++++++++++++++++++++++++++ 4 files changed, 63 insertions(+), 3 deletions(-) diff --git a/docs/source/selfies.rst b/docs/source/selfies.rst index aaf2d921..dc0cbd81 100644 --- a/docs/source/selfies.rst +++ b/docs/source/selfies.rst @@ -65,5 +65,7 @@ of :mod:`selfies`. Therefore, if custom semantic constraints are used, it is recommended to report them for reproducibility reasons. +.. autofunction:: get_default_constraints +.. autofunction:: get_hypervalent_constraints .. autofunction:: get_semantic_constraints .. autofunction:: set_semantic_constraints diff --git a/selfies/__init__.py b/selfies/__init__.py index b0b85b3b..40cc7584 100644 --- a/selfies/__init__.py +++ b/selfies/__init__.py @@ -31,6 +31,8 @@ "encoder", "decoder", "get_semantic_robust_alphabet", + "get_default_constraints", + "get_hypervalent_constraints", "get_semantic_constraints", "set_semantic_constraints", "len_selfies", @@ -46,6 +48,8 @@ from .encoder import encoder from .grammar_rules import ( get_semantic_robust_alphabet, + get_default_constraints, + get_hypervalent_constraints, get_semantic_constraints, set_semantic_constraints, ) diff --git a/selfies/decoder.py b/selfies/decoder.py index 52d35b84..7ff19c41 100644 --- a/selfies/decoder.py +++ b/selfies/decoder.py @@ -1,11 +1,17 @@ from collections import OrderedDict from typing import Dict, Iterable, List, Optional, Tuple, Union -from selfies.grammar_rules import get_bond_from_num, get_n_from_symbols, \ - get_next_branch_state, get_next_state, get_num_from_bond +from selfies.grammar_rules import (get_bond_from_num, get_default_constraints, + get_hypervalent_constraints, + get_n_from_symbols, get_next_branch_state, + get_next_state, get_num_from_bond, + get_semantic_constraints, + set_semantic_constraints) -def decoder(selfies: str, print_error: bool = False) -> Optional[str]: +def decoder(selfies: str, + print_error: bool = False, + constraints: Optional[str] = None) -> Optional[str]: """Translates a SELFIES into a SMILES. The SELFIES to SMILES translation operates based on the :mod:`selfies` @@ -19,6 +25,10 @@ def decoder(selfies: str, print_error: bool = False) -> Optional[str]: :param selfies: the SELFIES to be translated. :param print_error: if True, error messages will be printed to console. Defaults to False. + :param constraints: if ``None``, :func:`selfies.decoder` will use the + currently configured bond constraints. If ``'default'`` or + ``'hypervalent'``, the corresponding preset bond constraints + will be used instead. :return: the SMILES translation of ``selfies``. If an error occurs, and ``selfies`` cannot be translated, ``None`` is returned instead. @@ -29,6 +39,16 @@ def decoder(selfies: str, print_error: bool = False) -> Optional[str]: 'C=CF' """ + old_constraints = get_semantic_constraints() + if constraints is None: + pass + elif constraints == 'default': + set_semantic_constraints(get_default_constraints()) + elif constraints == 'hypervalent': + set_semantic_constraints(get_hypervalent_constraints()) + else: + raise ValueError("unrecognized constraint type") + try: all_smiles = [] # process dot-separated fragments separately @@ -38,9 +58,15 @@ def decoder(selfies: str, print_error: bool = False) -> Optional[str]: if smiles != "": # prevent malformed dots (e.g. [C]..[C], .[C][C]) all_smiles.append(smiles) + if constraints is not None: # restore old constraints + set_semantic_constraints(old_constraints) + return '.'.join(all_smiles) except ValueError as err: + if constraints is not None: # restore old constraints + set_semantic_constraints(old_constraints) + if print_error: print("Decoding error '{}': {}.".format(selfies, err)) return None diff --git a/selfies/grammar_rules.py b/selfies/grammar_rules.py index 8a3d844f..3ac7b62a 100644 --- a/selfies/grammar_rules.py +++ b/selfies/grammar_rules.py @@ -2,6 +2,16 @@ from typing import Dict, List, Optional, Set, Tuple default_bond_constraints = { + 'H': 1, 'F': 1, 'Cl': 1, 'Br': 1, 'I': 1, + 'O': 2, 'O+1': 3, 'O-1': 1, + 'N': 3, 'N+1': 4, 'N-1': 2, + 'C': 4, 'C+1': 5, 'C-1': 3, + 'S': 2, 'S+1': 3, 'S-1': 1, + 'P': 3, 'P+1': 4, 'P-1': 2, + '?': 8, +} + +hypervalent_bond_constraints = { 'H': 1, 'F': 1, 'Cl': 1, 'Br': 1, 'I': 1, 'O': 2, 'O+1': 3, 'O-1': 1, 'N': 3, 'N+1': 4, 'N-1': 2, @@ -53,6 +63,24 @@ def get_semantic_robust_alphabet() -> Set[str]: return alphabet_subset +def get_default_constraints() -> Dict[str, int]: + """Returns the preset "default" bond constraint settings. + + :return: the default constraint settings. + """ + global default_bond_constraints + return dict(default_bond_constraints) + + +def get_hypervalent_constraints() -> Dict[str, int]: + """Returns the preset "hypervalent" bond constraint settings. + + :return: the hypervalent constraint settings. + """ + global hypervalent_bond_constraints + return dict(hypervalent_bond_constraints) + + def get_semantic_constraints() -> Dict[str, int]: """Returns the semantic bond constraints that :mod:`selfies` is currently operating on. From 4e00e172d03decc2840591892d693d80876ee999 Mon Sep 17 00:00:00 2001 From: alstonlo <40709307+alstonlo@users.noreply.github.com> Date: Tue, 23 Mar 2021 06:24:25 -0400 Subject: [PATCH 02/14] update tests --- tests/test_on_datasets.py | 12 +++++++++--- tests/test_on_emolecules.py | 2 +- tests/test_specific_cases.py | 38 ++++++++++++++++++++++++++++++++++++ 3 files changed, 48 insertions(+), 4 deletions(-) diff --git a/tests/test_on_datasets.py b/tests/test_on_datasets.py index 9b2f7871..e4720eb8 100644 --- a/tests/test_on_datasets.py +++ b/tests/test_on_datasets.py @@ -32,9 +32,15 @@ def test_roundtrip_translation(test_name, column_name, dataset_samples): """ # modify semantic bond constraints - constraints = sf.get_semantic_constraints() - constraints['N'] = 6 - sf.set_semantic_constraints(constraints) + sf.set_semantic_constraints({ + 'H': 1, 'F': 1, 'Cl': 1, 'Br': 1, 'I': 1, + 'O': 2, 'O+1': 3, 'O-1': 1, + 'N': 6, 'N+1': 4, 'N-1': 2, + 'C': 4, 'C+1': 5, 'C-1': 3, + 'S': 6, 'S+1': 7, 'S-1': 5, + 'P': 7, 'P+1': 8, 'P-1': 6, + '?': 8, + }) # file I/O curr_dir = os.path.dirname(__file__) diff --git a/tests/test_on_emolecules.py b/tests/test_on_emolecules.py index 686f061f..3ac39373 100644 --- a/tests/test_on_emolecules.py +++ b/tests/test_on_emolecules.py @@ -40,7 +40,7 @@ def test_roundtrip_translation(): """ # modify constraints - constraints = sf.get_semantic_constraints() + constraints = sf.get_hypervalent_constraints() constraints['N'] = 6 constraints['Br'] = 7 constraints['Cl'] = 7 diff --git a/tests/test_specific_cases.py b/tests/test_specific_cases.py index 66f01140..96a8e7fc 100644 --- a/tests/test_specific_cases.py +++ b/tests/test_specific_cases.py @@ -3,11 +3,24 @@ import selfies as sf +def reset_alphabet(): + sf.set_semantic_constraints({ + 'H': 1, 'F': 1, 'Cl': 1, 'Br': 1, 'I': 1, + 'O': 2, 'O+1': 3, 'O-1': 1, + 'N': 6, 'N+1': 4, 'N-1': 2, + 'C': 4, 'C+1': 5, 'C-1': 3, + 'S': 6, 'S+1': 7, 'S-1': 5, + 'P': 7, 'P+1': 8, 'P-1': 6, + '?': 8, + }) + + def test_branch_and_ring_at_state_X0(): """Tests SELFIES with branches and rings at state X0 (i.e. at the very beginning of a SELFIES). These symbols should be skipped. """ + reset_alphabet() assert is_eq(sf.decoder("[Branch3_1][C][S][C][O]"), "CSCO") assert is_eq(sf.decoder("[Ring3][C][S][C][O]"), "CSCO") assert is_eq(sf.decoder("[Branch1_1][Ring1][Ring3][C][S][C][O]"), "CSCO") @@ -17,6 +30,8 @@ def test_branch_at_state_X1(): """Test SELFIES with branches at state X1 (i.e. at an atom that can only make one bond. In this case, the branch symbol should be skipped. """ + + reset_alphabet() assert is_eq(sf.decoder("[C][C][O][Branch1_1][C][I]"), "CCOCI") assert is_eq(sf.decoder("[C][C][C][O][Branch3_3][C][I]"), "CCCOCI") @@ -25,6 +40,7 @@ def test_branch_at_end_of_selfies(): """Test SELFIES that have a branch symbol as its very last symbol. """ + reset_alphabet() assert is_eq(sf.decoder("[C][C][C][C][Branch1_1]"), "CCCC") assert is_eq(sf.decoder("[C][C][C][C][Branch3_3]"), "CCCC") @@ -33,6 +49,7 @@ def test_ring_at_end_of_selfies(): """Test SELFIES that have a ring symbol as its very last symbol. """ + reset_alphabet() assert is_eq(sf.decoder("[C][C][C][C][C][Ring1]"), "CCCC=C") assert is_eq(sf.decoder("[C][C][C][C][C][Ring3]"), "CCCC=C") @@ -42,6 +59,7 @@ def test_branch_with_no_atoms(): Such branches should not be made in the outputted SMILES. """ + reset_alphabet() assert is_eq(sf.decoder("[C][Branch1_1][Ring2][Branch1_1]" "[Branch1_1][Branch1_1][F]"), "CF") @@ -62,6 +80,7 @@ def test_oversized_branch(): of the SELFIES """ + reset_alphabet() assert is_eq(sf.decoder("[C][Branch2_1][O][O][C][C][S][F][C]"), "C(CCSF)") assert is_eq(sf.decoder("[C][Branch2_3][O][O][#C][C][S][F]"), "C(#CCSF)") @@ -71,6 +90,7 @@ def test_oversized_ring(): previously derived atom does not exist. """ + reset_alphabet() assert is_eq(sf.decoder("[C][C][C][C][Ring1][O]"), "C1CCC1") assert is_eq(sf.decoder("[C][C][C][C][Ring2][O][C]"), "C1CCC1") @@ -86,6 +106,8 @@ def test_branch_at_beginning_of_branch(): """Test SELFIES that have a branch immediately at the start of a branch. """ + reset_alphabet() + # [C@]((Br)Cl)F assert is_eq(sf.decoder("[C@expl][Branch1_2][Branch1_1]" "[Branch1_1][C][Br]" @@ -111,6 +133,8 @@ def test_ring_at_beginning_of_branch(): """Test SELFIES that have a ring immediately at the start of a branch. """ + reset_alphabet() + # CC1CCC(1CCl)F assert is_eq(sf.decoder("[C][C][C][C][C][Branch1_1][Branch1_1]" "[Ring1][Ring2][C][Cl][F]"), @@ -127,6 +151,8 @@ def test_branch_and_ring_at_beginning_of_branch(): of a branch. """ + reset_alphabet() + # CC1CCCS((Br)1Cl)F assert is_eq(sf.decoder("[C][C][C][C][C][S][Branch1_2][Branch1_3]" "[Branch1_1][C][Br]" @@ -151,6 +177,8 @@ def test_ring_immediately_following_branch(): """Test SELFIES that have a ring immediately following after a branch. """ + reset_alphabet() + # CCC1CCCC(OCO)1 assert is_eq(sf.decoder("[C][C][C][C][C][C][C][Branch1_1][Ring2][O][C][O]" "[Ring1][Branch1_1]"), @@ -167,6 +195,8 @@ def test_ring_after_branch(): immediately after a branch. """ + reset_alphabet() + # CCCCCCC1(OCO)1 assert is_eq(sf.decoder("[C][C][C][C][C][C][C][Branch1_1][Ring2][O][C][O]" "[C][Ring1][Branch1_1]"), @@ -182,6 +212,8 @@ def test_ring_on_top_of_existing_bond(): in the main scaffold. """ + reset_alphabet() + # C1C1, C1C=1, C1C#1, ... assert is_eq(sf.decoder("[C][C][Ring1][C]"), "C=C") assert is_eq(sf.decoder("[C][/C][Ring1][C]"), "C=C") @@ -193,6 +225,7 @@ def test_consecutive_rings(): """Test SELFIES which have multiple consecutive rings. """ + reset_alphabet() assert is_eq(sf.decoder("[C][C][C][C][Ring1][Ring2][Ring1][Ring2]"), "C=1CCC=1") # 1 + 1 assert is_eq(sf.decoder("[C][C][C][C][Ring1][Ring2][Ring1][Ring2]" @@ -224,6 +257,7 @@ def test_unconstrained_symbols(): """Tests SELFIES with symbols that are not semantically constrained. """ + reset_alphabet() assert sf.decoder("[Xe-2expl][Branch1_1][C][F][Branch1_1][C][F]" "[Branch1_1][C][F][Branch1_1][C][F][Branch1_1][C][F]" "[Branch1_1][C][F][Branch1_1][C][F][Branch1_1][C][F]") \ @@ -247,6 +281,7 @@ def test_isotope_symbols(): constrained properly. """ + reset_alphabet() assert sf.decoder("[13Cexpl][Branch1_1][C][Cl][Branch1_1][C][F]" "[Branch1_1][C][Br][Branch1_1][C][I]") \ == "[13C](Cl)(F)(Br)CI" @@ -258,6 +293,7 @@ def test_chiral_symbols(): constrained properly. """ + reset_alphabet() assert sf.decoder("[C@@expl][Branch1_1][C][Cl][Branch1_1][C][F]" "[Branch1_1][C][Br][Branch1_1][C][I]") \ == "[C@@](Cl)(F)(Br)CI" @@ -271,6 +307,7 @@ def test_explicit_hydrogen_symbols(): are constrained properly. """ + reset_alphabet() assert sf.decoder("[CHexpl][Branch1_1][C][Cl][#C]") == "[CH](Cl)=C" assert sf.decoder("[CH3expl][=C]") == "[CH3]C" @@ -279,6 +316,7 @@ def test_charged_symbols(): """Tests that SELFIES symbols with charges are constrained properly. """ + reset_alphabet() constraints = sf.get_semantic_constraints() constraints['Sn+4'] = 1 constraints['O-2'] = 2 From e125c9a9f05ff6b7b1901b39a10ede740fa96479 Mon Sep 17 00:00:00 2001 From: alstonlo <40709307+alstonlo@users.noreply.github.com> Date: Tue, 23 Mar 2021 06:24:46 -0400 Subject: [PATCH 03/14] update docs --- docs/source/selfies.rst | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/docs/source/selfies.rst b/docs/source/selfies.rst index dc0cbd81..b7c727df 100644 --- a/docs/source/selfies.rst +++ b/docs/source/selfies.rst @@ -32,16 +32,12 @@ By default, :mod:`selfies` operates under the following semantic constraints +===========+==============================+ | 1 | ``F``, ``Cl``, ``Br``, ``I`` | +-----------+------------------------------+ - | 2 | ``O`` | + | 2 | ``O``, ``S`` | +-----------+------------------------------+ - | 3 | ``N`` | + | 3 | ``N``, ``P`` | +-----------+------------------------------+ | 4 | ``C`` | +-----------+------------------------------+ - | 6 | ``S`` | - +-----------+------------------------------+ - | 7 | ``P`` | - +-----------+------------------------------+ | 8 | All other atoms | +-----------+------------------------------+ From 6a99530b26e42fa1733ecb48516ee207956a6dba Mon Sep 17 00:00:00 2001 From: alstonlo <40709307+alstonlo@users.noreply.github.com> Date: Tue, 23 Mar 2021 14:44:43 -0400 Subject: [PATCH 04/14] Update grammar_rules.py --- selfies/grammar_rules.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/selfies/grammar_rules.py b/selfies/grammar_rules.py index 3ac7b62a..41c06ec6 100644 --- a/selfies/grammar_rules.py +++ b/selfies/grammar_rules.py @@ -12,12 +12,12 @@ } hypervalent_bond_constraints = { - 'H': 1, 'F': 1, 'Cl': 1, 'Br': 1, 'I': 1, + 'H': 1, 'F': 1, 'Cl': 7, 'Br': 7, 'I': 7, 'O': 2, 'O+1': 3, 'O-1': 1, - 'N': 3, 'N+1': 4, 'N-1': 2, + 'N': 5, 'N+1': 4, 'N-1': 2, 'C': 4, 'C+1': 5, 'C-1': 3, 'S': 6, 'S+1': 7, 'S-1': 5, - 'P': 7, 'P+1': 8, 'P-1': 6, + 'P': 5, 'P+1': 6, 'P-1': 4, '?': 8, } From 77e3ed4d62a2499fd059bcf46a58017bbf0e3a53 Mon Sep 17 00:00:00 2001 From: alstonlo <40709307+alstonlo@users.noreply.github.com> Date: Tue, 23 Mar 2021 15:04:55 -0400 Subject: [PATCH 05/14] spacing --- selfies/grammar_rules.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/selfies/grammar_rules.py b/selfies/grammar_rules.py index 41c06ec6..ed01b48e 100644 --- a/selfies/grammar_rules.py +++ b/selfies/grammar_rules.py @@ -68,6 +68,7 @@ def get_default_constraints() -> Dict[str, int]: :return: the default constraint settings. """ + global default_bond_constraints return dict(default_bond_constraints) @@ -77,6 +78,7 @@ def get_hypervalent_constraints() -> Dict[str, int]: :return: the hypervalent constraint settings. """ + global hypervalent_bond_constraints return dict(hypervalent_bond_constraints) From a291fe2fc9decdbf8e361082f88cfb8db29e2a7e Mon Sep 17 00:00:00 2001 From: alstonlo <40709307+alstonlo@users.noreply.github.com> Date: Tue, 23 Mar 2021 15:22:11 -0400 Subject: [PATCH 06/14] passing lint --- selfies/grammar_rules.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/selfies/grammar_rules.py b/selfies/grammar_rules.py index ed01b48e..cf575373 100644 --- a/selfies/grammar_rules.py +++ b/selfies/grammar_rules.py @@ -68,7 +68,7 @@ def get_default_constraints() -> Dict[str, int]: :return: the default constraint settings. """ - + global default_bond_constraints return dict(default_bond_constraints) From 7d4dcad2092fd8a35af85ec27e8533e015c9445e Mon Sep 17 00:00:00 2001 From: alstonlo <40709307+alstonlo@users.noreply.github.com> Date: Tue, 23 Mar 2021 16:25:47 -0400 Subject: [PATCH 07/14] remove constraint='default' --- selfies/decoder.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/selfies/decoder.py b/selfies/decoder.py index 7ff19c41..16622941 100644 --- a/selfies/decoder.py +++ b/selfies/decoder.py @@ -1,7 +1,7 @@ from collections import OrderedDict from typing import Dict, Iterable, List, Optional, Tuple, Union -from selfies.grammar_rules import (get_bond_from_num, get_default_constraints, +from selfies.grammar_rules import (get_bond_from_num, get_hypervalent_constraints, get_n_from_symbols, get_next_branch_state, get_next_state, get_num_from_bond, @@ -26,9 +26,8 @@ def decoder(selfies: str, :param print_error: if True, error messages will be printed to console. Defaults to False. :param constraints: if ``None``, :func:`selfies.decoder` will use the - currently configured bond constraints. If ``'default'`` or - ``'hypervalent'``, the corresponding preset bond constraints - will be used instead. + currently configured bond constraints. If ``'hypervalent'``, + the corresponding preset bond constraints will be used instead. :return: the SMILES translation of ``selfies``. If an error occurs, and ``selfies`` cannot be translated, ``None`` is returned instead. @@ -42,8 +41,6 @@ def decoder(selfies: str, old_constraints = get_semantic_constraints() if constraints is None: pass - elif constraints == 'default': - set_semantic_constraints(get_default_constraints()) elif constraints == 'hypervalent': set_semantic_constraints(get_hypervalent_constraints()) else: From f0f67263b4b3a0e0a8a5002416b12db9a09ecda8 Mon Sep 17 00:00:00 2001 From: alstonlo <40709307+alstonlo@users.noreply.github.com> Date: Tue, 23 Mar 2021 16:30:43 -0400 Subject: [PATCH 08/14] relax constraints of S --- docs/source/selfies.rst | 4 +++- selfies/grammar_rules.py | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/docs/source/selfies.rst b/docs/source/selfies.rst index b7c727df..719cf413 100644 --- a/docs/source/selfies.rst +++ b/docs/source/selfies.rst @@ -32,12 +32,14 @@ By default, :mod:`selfies` operates under the following semantic constraints +===========+==============================+ | 1 | ``F``, ``Cl``, ``Br``, ``I`` | +-----------+------------------------------+ - | 2 | ``O``, ``S`` | + | 2 | ``O`` | +-----------+------------------------------+ | 3 | ``N``, ``P`` | +-----------+------------------------------+ | 4 | ``C`` | +-----------+------------------------------+ + | 6 | ``S`` | + +-----------+------------------------------+ | 8 | All other atoms | +-----------+------------------------------+ diff --git a/selfies/grammar_rules.py b/selfies/grammar_rules.py index cf575373..73a83b26 100644 --- a/selfies/grammar_rules.py +++ b/selfies/grammar_rules.py @@ -6,7 +6,7 @@ 'O': 2, 'O+1': 3, 'O-1': 1, 'N': 3, 'N+1': 4, 'N-1': 2, 'C': 4, 'C+1': 5, 'C-1': 3, - 'S': 2, 'S+1': 3, 'S-1': 1, + 'S': 6, 'S+1': 7, 'S-1': 5, 'P': 3, 'P+1': 4, 'P-1': 2, '?': 8, } From b5abb39a52eb298aeb56ea6630b5541eaa0f0a4d Mon Sep 17 00:00:00 2001 From: alstonlo <40709307+alstonlo@users.noreply.github.com> Date: Tue, 23 Mar 2021 17:11:40 -0400 Subject: [PATCH 09/14] adding octet rule --- docs/source/selfies.rst | 5 ++++- selfies/__init__.py | 2 ++ selfies/decoder.py | 5 +++-- selfies/grammar_rules.py | 32 +++++++++++++++++++++----------- 4 files changed, 30 insertions(+), 14 deletions(-) diff --git a/docs/source/selfies.rst b/docs/source/selfies.rst index 719cf413..4b061f0d 100644 --- a/docs/source/selfies.rst +++ b/docs/source/selfies.rst @@ -34,10 +34,12 @@ By default, :mod:`selfies` operates under the following semantic constraints +-----------+------------------------------+ | 2 | ``O`` | +-----------+------------------------------+ - | 3 | ``N``, ``P`` | + | 3 | ``N`` | +-----------+------------------------------+ | 4 | ``C`` | +-----------+------------------------------+ + | 5 | ``P`` | + +-----------+------------------------------+ | 6 | ``S`` | +-----------+------------------------------+ | 8 | All other atoms | @@ -64,6 +66,7 @@ of :mod:`selfies`. them for reproducibility reasons. .. autofunction:: get_default_constraints +.. autofunction:: get_octet_rule_constraints .. autofunction:: get_hypervalent_constraints .. autofunction:: get_semantic_constraints .. autofunction:: set_semantic_constraints diff --git a/selfies/__init__.py b/selfies/__init__.py index 40cc7584..8b880c1b 100644 --- a/selfies/__init__.py +++ b/selfies/__init__.py @@ -32,6 +32,7 @@ "decoder", "get_semantic_robust_alphabet", "get_default_constraints", + "get_octet_rule_constraints", "get_hypervalent_constraints", "get_semantic_constraints", "set_semantic_constraints", @@ -49,6 +50,7 @@ from .grammar_rules import ( get_semantic_robust_alphabet, get_default_constraints, + get_octet_rule_constraints, get_hypervalent_constraints, get_semantic_constraints, set_semantic_constraints, diff --git a/selfies/decoder.py b/selfies/decoder.py index 16622941..a94a5a8d 100644 --- a/selfies/decoder.py +++ b/selfies/decoder.py @@ -25,9 +25,10 @@ def decoder(selfies: str, :param selfies: the SELFIES to be translated. :param print_error: if True, error messages will be printed to console. Defaults to False. - :param constraints: if ``None``, :func:`selfies.decoder` will use the - currently configured bond constraints. If ``'hypervalent'``, + :param constraints: if ``'octet_rule'`` or ``'hypervalent'``, the corresponding preset bond constraints will be used instead. + If ``None``, :func:`selfies.decoder` will use the + currently configured bond constraints. Defaults to ``None``. :return: the SMILES translation of ``selfies``. If an error occurs, and ``selfies`` cannot be translated, ``None`` is returned instead. diff --git a/selfies/grammar_rules.py b/selfies/grammar_rules.py index 73a83b26..2310a983 100644 --- a/selfies/grammar_rules.py +++ b/selfies/grammar_rules.py @@ -6,20 +6,20 @@ 'O': 2, 'O+1': 3, 'O-1': 1, 'N': 3, 'N+1': 4, 'N-1': 2, 'C': 4, 'C+1': 5, 'C-1': 3, + 'P': 5, 'P+1': 6, 'P-1': 4, 'S': 6, 'S+1': 7, 'S-1': 5, - 'P': 3, 'P+1': 4, 'P-1': 2, - '?': 8, + '?': 8 } -hypervalent_bond_constraints = { - 'H': 1, 'F': 1, 'Cl': 7, 'Br': 7, 'I': 7, - 'O': 2, 'O+1': 3, 'O-1': 1, - 'N': 5, 'N+1': 4, 'N-1': 2, - 'C': 4, 'C+1': 5, 'C-1': 3, - 'S': 6, 'S+1': 7, 'S-1': 5, - 'P': 5, 'P+1': 6, 'P-1': 4, - '?': 8, -} +octet_rule_bond_constraints = dict(default_bond_constraints) +octet_rule_bond_constraints.update( + {'S': 2, 'S+1': 3, 'S-1': 1, 'P': 3, 'P+1': 4, 'P-1': 2} +) + +hypervalent_bond_constraints = dict(default_bond_constraints) +hypervalent_bond_constraints.update( + {'Cl': 7, 'Br': 7, 'I': 7, 'N': 5} +) _bond_constraints = default_bond_constraints @@ -73,6 +73,16 @@ def get_default_constraints() -> Dict[str, int]: return dict(default_bond_constraints) +def get_octet_rule_constraints() -> Dict[str, int]: + """Returns the preset "octet rule" bond constraint settings. + + :return: the octet rule constraint settings. + """ + + global octet_rule_bond_constraints + return dict(octet_rule_bond_constraints) + + def get_hypervalent_constraints() -> Dict[str, int]: """Returns the preset "hypervalent" bond constraint settings. From da6fa07ed21306039bbb5008079cc17ac72efe86 Mon Sep 17 00:00:00 2001 From: alstonlo <40709307+alstonlo@users.noreply.github.com> Date: Tue, 23 Mar 2021 17:19:03 -0400 Subject: [PATCH 10/14] Update grammar_rules.py --- selfies/grammar_rules.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/selfies/grammar_rules.py b/selfies/grammar_rules.py index 2310a983..e830124b 100644 --- a/selfies/grammar_rules.py +++ b/selfies/grammar_rules.py @@ -74,7 +74,11 @@ def get_default_constraints() -> Dict[str, int]: def get_octet_rule_constraints() -> Dict[str, int]: - """Returns the preset "octet rule" bond constraint settings. + """Returns the preset "octet rule" bond constraint settings. These + constraints are a harsher version of the default constraints, so that + the octet rule is obeyed. In particular, ``S`` and ``P`` are + restricted to a 2 and 3 bond capacity, respectively (and similarly with + ``S+``, ``S-``, ``P+``, ``P-``). :return: the octet rule constraint settings. """ @@ -84,7 +88,11 @@ def get_octet_rule_constraints() -> Dict[str, int]: def get_hypervalent_constraints() -> Dict[str, int]: - """Returns the preset "hypervalent" bond constraint settings. + """Returns the preset "hypervalent" bond constraint settings. These + constraints are a relaxed version of the default constraints, to allow + for hypervalent molecules. In particular, ``Cl``, ``Br``, and ``I`` + are relaxed to a 7 bond capacity, and ``N`` is relaxed to a 5 bond + capacity. :return: the hypervalent constraint settings. """ From a42e13f4698a0f9e6c35fa13adbcb43457bc798d Mon Sep 17 00:00:00 2001 From: alstonlo <40709307+alstonlo@users.noreply.github.com> Date: Tue, 23 Mar 2021 17:21:50 -0400 Subject: [PATCH 11/14] Update decoder.py --- selfies/decoder.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/selfies/decoder.py b/selfies/decoder.py index a94a5a8d..8e49bffa 100644 --- a/selfies/decoder.py +++ b/selfies/decoder.py @@ -5,6 +5,7 @@ get_hypervalent_constraints, get_n_from_symbols, get_next_branch_state, get_next_state, get_num_from_bond, + get_octet_rule_constraints, get_semantic_constraints, set_semantic_constraints) @@ -42,6 +43,8 @@ def decoder(selfies: str, old_constraints = get_semantic_constraints() if constraints is None: pass + elif constraints == 'octet_rule': + set_semantic_constraints(get_octet_rule_constraints()) elif constraints == 'hypervalent': set_semantic_constraints(get_hypervalent_constraints()) else: From 09870ec166863cc23ca757bf55347dd2290dc9f9 Mon Sep 17 00:00:00 2001 From: alstonlo <40709307+alstonlo@users.noreply.github.com> Date: Tue, 23 Mar 2021 18:51:11 -0400 Subject: [PATCH 12/14] See also for decoder --- selfies/decoder.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/selfies/decoder.py b/selfies/decoder.py index 8e49bffa..d9a6a0e4 100644 --- a/selfies/decoder.py +++ b/selfies/decoder.py @@ -38,6 +38,17 @@ def decoder(selfies: str, >>> import selfies >>> selfies.decoder('[C][=C][F]') 'C=CF' + + .. seealso:: The "octet_rule" and "hypervalent" preset bond constraints + can be viewed with :func:`selfies.get_octet_rule_constraints` and + :func:`selfies.get_hypervalent_constraints`, respectively. These + presets are variants of the "default" bond constraints, which can + be viewed with :func:`selfies.get_default_constraints`. Their + differences can be summarized as follows: + + * def. : ``Cl``, ``Br``, ``I``: 1, ``N``: 3, ``P``: 5, ``P+1``: 6, ``P-1``: 4, ``S``: 6, ``S+1``: 7, ``S-1``: 5 + * oct. : ``Cl``, ``Br``, ``I``: 1, ``N``: 3, ``P``: 3, ``P+1``: 4, ``P-1``: 2, ``S``: 2, ``S+1``: 3, ``S-1``: 1 + * hyp. : ``Cl``, ``Br``, ``I``: 7, ``N``: 5, ``P``: 5, ``P+1``: 6, ``P-1``: 4, ``S``: 6, ``S+1``: 7, ``S-1``: 5 """ old_constraints = get_semantic_constraints() From 4aa690ba20c09ffcfafd891da25cf3c4d68833b0 Mon Sep 17 00:00:00 2001 From: alstonlo <40709307+alstonlo@users.noreply.github.com> Date: Tue, 23 Mar 2021 19:09:21 -0400 Subject: [PATCH 13/14] updating documentation --- docs/source/selfies_examples.ipynb | 4 ++-- examples/selfies_examples.ipynb | 4 ++-- selfies/decoder.py | 6 +++++- selfies/grammar_rules.py | 6 ++++-- 4 files changed, 13 insertions(+), 7 deletions(-) diff --git a/docs/source/selfies_examples.ipynb b/docs/source/selfies_examples.ipynb index b2cfabf6..395cb8af 100644 --- a/docs/source/selfies_examples.ipynb +++ b/docs/source/selfies_examples.ipynb @@ -144,7 +144,7 @@ "output_type": "stream", "text": [ "Default Constraints:\n", - " {'H': 1, 'F': 1, 'Cl': 1, 'Br': 1, 'I': 1, 'O': 2, 'O+1': 3, 'O-1': 1, 'N': 3, 'N+1': 4, 'N-1': 2, 'C': 4, 'C+1': 5, 'C-1': 3, 'S': 6, 'S+1': 7, 'S-1': 5, 'P': 7, 'P+1': 8, 'P-1': 6, '?': 8}\n" + " {'H': 1, 'F': 1, 'Cl': 1, 'Br': 1, 'I': 1, 'O': 2, 'O+1': 3, 'O-1': 1, 'N': 3, 'N+1': 4, 'N-1': 2, 'C': 4, 'C+1': 5, 'C-1': 3, 'P': 5, 'P+1': 6, 'P-1': 4, 'S': 6, 'S+1': 7, 'S-1': 5, '?': 8}\n" ] } ], @@ -213,7 +213,7 @@ "output_type": "stream", "text": [ "Updated Constraints:\n", - " {'H': 1, 'F': 1, 'Cl': 1, 'Br': 1, 'I': 1, 'O': 2, 'O+1': 3, 'O-1': 1, 'N': 3, 'N+1': 4, 'N-1': 2, 'C': 4, 'C+1': 5, 'C-1': 3, 'S': 2, 'S+1': 7, 'S-1': 5, 'P': 7, 'P+1': 8, 'P-1': 6, '?': 8, 'Li': 1}\n" + " {'H': 1, 'F': 1, 'Cl': 1, 'Br': 1, 'I': 1, 'O': 2, 'O+1': 3, 'O-1': 1, 'N': 3, 'N+1': 4, 'N-1': 2, 'C': 4, 'C+1': 5, 'C-1': 3, 'P': 5, 'P+1': 6, 'P-1': 4, 'S': 2, 'S+1': 7, 'S-1': 5, '?': 8, 'Li': 1}\n" ] } ], diff --git a/examples/selfies_examples.ipynb b/examples/selfies_examples.ipynb index b2cfabf6..395cb8af 100644 --- a/examples/selfies_examples.ipynb +++ b/examples/selfies_examples.ipynb @@ -144,7 +144,7 @@ "output_type": "stream", "text": [ "Default Constraints:\n", - " {'H': 1, 'F': 1, 'Cl': 1, 'Br': 1, 'I': 1, 'O': 2, 'O+1': 3, 'O-1': 1, 'N': 3, 'N+1': 4, 'N-1': 2, 'C': 4, 'C+1': 5, 'C-1': 3, 'S': 6, 'S+1': 7, 'S-1': 5, 'P': 7, 'P+1': 8, 'P-1': 6, '?': 8}\n" + " {'H': 1, 'F': 1, 'Cl': 1, 'Br': 1, 'I': 1, 'O': 2, 'O+1': 3, 'O-1': 1, 'N': 3, 'N+1': 4, 'N-1': 2, 'C': 4, 'C+1': 5, 'C-1': 3, 'P': 5, 'P+1': 6, 'P-1': 4, 'S': 6, 'S+1': 7, 'S-1': 5, '?': 8}\n" ] } ], @@ -213,7 +213,7 @@ "output_type": "stream", "text": [ "Updated Constraints:\n", - " {'H': 1, 'F': 1, 'Cl': 1, 'Br': 1, 'I': 1, 'O': 2, 'O+1': 3, 'O-1': 1, 'N': 3, 'N+1': 4, 'N-1': 2, 'C': 4, 'C+1': 5, 'C-1': 3, 'S': 2, 'S+1': 7, 'S-1': 5, 'P': 7, 'P+1': 8, 'P-1': 6, '?': 8, 'Li': 1}\n" + " {'H': 1, 'F': 1, 'Cl': 1, 'Br': 1, 'I': 1, 'O': 2, 'O+1': 3, 'O-1': 1, 'N': 3, 'N+1': 4, 'N-1': 2, 'C': 4, 'C+1': 5, 'C-1': 3, 'P': 5, 'P+1': 6, 'P-1': 4, 'S': 2, 'S+1': 7, 'S-1': 5, '?': 8, 'Li': 1}\n" ] } ], diff --git a/selfies/decoder.py b/selfies/decoder.py index d9a6a0e4..0c4a3830 100644 --- a/selfies/decoder.py +++ b/selfies/decoder.py @@ -39,7 +39,11 @@ def decoder(selfies: str, >>> selfies.decoder('[C][=C][F]') 'C=CF' - .. seealso:: The "octet_rule" and "hypervalent" preset bond constraints + .. seealso:: The + `"octet_rule" `_ + and + `"hypervalent" `_ + preset bond constraints can be viewed with :func:`selfies.get_octet_rule_constraints` and :func:`selfies.get_hypervalent_constraints`, respectively. These presets are variants of the "default" bond constraints, which can diff --git a/selfies/grammar_rules.py b/selfies/grammar_rules.py index e830124b..604f3aa3 100644 --- a/selfies/grammar_rules.py +++ b/selfies/grammar_rules.py @@ -76,7 +76,8 @@ def get_default_constraints() -> Dict[str, int]: def get_octet_rule_constraints() -> Dict[str, int]: """Returns the preset "octet rule" bond constraint settings. These constraints are a harsher version of the default constraints, so that - the octet rule is obeyed. In particular, ``S`` and ``P`` are + the `octet rule `_ + is obeyed. In particular, ``S`` and ``P`` are restricted to a 2 and 3 bond capacity, respectively (and similarly with ``S+``, ``S-``, ``P+``, ``P-``). @@ -90,7 +91,8 @@ def get_octet_rule_constraints() -> Dict[str, int]: def get_hypervalent_constraints() -> Dict[str, int]: """Returns the preset "hypervalent" bond constraint settings. These constraints are a relaxed version of the default constraints, to allow - for hypervalent molecules. In particular, ``Cl``, ``Br``, and ``I`` + for `hypervalent molecules `_. + In particular, ``Cl``, ``Br``, and ``I`` are relaxed to a 7 bond capacity, and ``N`` is relaxed to a 5 bond capacity. From 1e6685c6bbb405e043870c08baf8bdee3fc71bc0 Mon Sep 17 00:00:00 2001 From: alstonlo <40709307+alstonlo@users.noreply.github.com> Date: Tue, 23 Mar 2021 19:44:01 -0400 Subject: [PATCH 14/14] line break for linter --- selfies/grammar_rules.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/selfies/grammar_rules.py b/selfies/grammar_rules.py index 604f3aa3..3aa0de2c 100644 --- a/selfies/grammar_rules.py +++ b/selfies/grammar_rules.py @@ -91,7 +91,8 @@ def get_octet_rule_constraints() -> Dict[str, int]: def get_hypervalent_constraints() -> Dict[str, int]: """Returns the preset "hypervalent" bond constraint settings. These constraints are a relaxed version of the default constraints, to allow - for `hypervalent molecules `_. + for `hypervalent molecules + `_. In particular, ``Cl``, ``Br``, and ``I`` are relaxed to a 7 bond capacity, and ``N`` is relaxed to a 5 bond capacity.