Skip to content

Commit

Permalink
Improved handling of SP/TB/OH reording in SMILES/SMARTS. (rdkit#6777)
Browse files Browse the repository at this point in the history
* Improved handling of SP/TB/OH reording in SMILES/SMARTS.

- add a getMaxNbors(tag) utility function to avoid repeated logic in multiple places
- tweak getChiralPermutation() for handling implicit/missing ligands (uses -1), allow inverse lookup
- use the tweaked chiral ordering in the reading/writing from SMILES

* clang-format run

* Reviewer requested changes.
- curly braces on all if conditions
- null check raw pointers with precondition.

* Correct additional test case introduced in the last year
  • Loading branch information
johnmay authored Oct 21, 2024
1 parent cf29f08 commit 2553cfc
Show file tree
Hide file tree
Showing 9 changed files with 193 additions and 91 deletions.
33 changes: 28 additions & 5 deletions Code/GraphMol/Canon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1001,6 +1001,20 @@ void removeRedundantBondDirSpecs(ROMol &mol, MolStack &molStack,
}
}

// insert (-1) for hydrogens or missing ligands, where these are placed
// depends on if it is the first atom or not
static void insertImplicitNbors(INT_LIST &bonds, const Atom::ChiralType tag,
const bool firstAtom) {
unsigned int ref_max = Chirality::getMaxNbors(tag);
if (bonds.size() < ref_max) {
if (firstAtom) {
bonds.insert(bonds.begin(), ref_max - bonds.size(), -1);
} else {
bonds.insert(++bonds.begin(), ref_max - bonds.size(), -1);
}
}
}

void canonicalizeFragment(ROMol &mol, int atomIdx,
std::vector<AtomColors> &colors,
const UINT_VECT &ranks, MolStack &molStack,
Expand Down Expand Up @@ -1070,13 +1084,18 @@ void canonicalizeFragment(ROMol &mol, int atomIdx,

// Check if the atom can be chiral, and if chirality needs inversion
const INT_LIST &trueOrder = atomTraversalBondOrder[atom->getIdx()];
if (trueOrder.size() >= 3) {

// Extra check needed if/when @AL1/@AL2 supported
if (trueOrder.size() >= 3 || Chirality::hasNonTetrahedralStereo(atom)) {
int nSwaps = 0;
int perm = 0;
if (Chirality::hasNonTetrahedralStereo(atom)) {
atom->getPropIfPresent(common_properties::_chiralPermutation, perm);
}

const unsigned int firstIdx = molStack.begin()->obj.atom->getIdx();
const bool firstInPart = atom->getIdx() == firstIdx;

// We have to make sure that trueOrder contains all the
// bonds, even if they won't be written to the SMILES
if (trueOrder.size() < atom->getDegree()) {
Expand All @@ -1092,20 +1111,24 @@ void canonicalizeFragment(ROMol &mol, int atomIdx,
if (!perm) {
nSwaps = atom->getPerturbationOrder(tOrder);
} else {
insertImplicitNbors(tOrder, atom->getChiralTag(), firstInPart);
perm = Chirality::getChiralPermutation(atom, tOrder);
}
} else {
if (!perm) {
nSwaps = atom->getPerturbationOrder(trueOrder);
} else {
perm = Chirality::getChiralPermutation(atom, trueOrder);
INT_LIST tOrder = trueOrder;
insertImplicitNbors(tOrder, atom->getChiralTag(), firstInPart);
perm = Chirality::getChiralPermutation(atom, tOrder);
}
}
// FIX: handle this case for non-tet stereo too

// in future this should be moved up and simplified, there should not
// be an option to not do chiral inversions
if (doChiralInversions &&
chiralAtomNeedsTagInversion(
mol, atom,
molStack.begin()->obj.atom->getIdx() == atom->getIdx(),
mol, atom, firstInPart,
atomRingClosures[atom->getIdx()].size())) {
// This is a special case. Here's an example:
// Our internal representation of a chiral center is equivalent
Expand Down
14 changes: 12 additions & 2 deletions Code/GraphMol/Chirality.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@
#ifndef RD_CHIRALITY_20AUG2008_H
#define RD_CHIRALITY_20AUG2008_H
#include <RDGeneral/types.h>
#include <GraphMol/Bond.h>
#include <GraphMol/Atom.h> /* for Atom:ChiralType enum */
#include <GraphMol/Bond.h> /* for Bond::BondDir enum */
#include <boost/dynamic_bitset.hpp>
#include <limits>

Expand Down Expand Up @@ -208,8 +209,17 @@ RDKIT_GRAPHMOL_EXPORT double getIdealAngleBetweenLigands(const Atom *center,
const Atom *lig1,
const Atom *lig2);

RDKIT_GRAPHMOL_EXPORT unsigned int getMaxNbors(const Atom::ChiralType tag);

//
// Get the chiral permutation from the storage order of bonds on an atom
// to the desired output order (probe). Missing/implicit neihgbors can be
// represented with (-1). To get the inverse order, i.e. from the probe to the
// current storage order set (inverse=true)
//
RDKIT_GRAPHMOL_EXPORT unsigned int getChiralPermutation(const Atom *center,
const INT_LIST &probe);
const INT_LIST &probe,
bool inverse = false);
//! @}

RDKIT_GRAPHMOL_EXPORT std::ostream &operator<<(std::ostream &oss,
Expand Down
6 changes: 4 additions & 2 deletions Code/GraphMol/Depictor/catch_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,8 @@ TEST_CASE("trigonal bipyramidal", "[nontetrahedral]") {
CHECK(v2.length() > v4.length());
}
SECTION("TB1 missing ax") {
auto m = "S[As@TB1](F)(Cl)Br"_smiles;
// // S[As@TB1](F)(Cl)(Br)* => S[As@TB7](*)(F)(Cl)Br
auto m = "S[As@TB7](F)(Cl)Br"_smiles;
REQUIRE(m);

CHECK_THAT(
Expand Down Expand Up @@ -193,7 +194,8 @@ TEST_CASE("octahedral", "[nontetrahedral]") {
CHECK(v5.length() > v6.length());
}
SECTION("OH1 missing one ligand") {
auto m = "O[Co@OH1](Cl)(C)(N)F"_smiles;
// O[Co@OH1](Cl)(C)(N)(F)* => O[Co@OH25](*)(Cl)(C)(N)F
auto m = "O[Co@OH25](Cl)(C)(N)F"_smiles;
REQUIRE(m);
CHECK(RDDepict::compute2DCoords(*m) == 0);
// std::cerr << MolToV3KMolBlock(*m) << std::endl;
Expand Down
3 changes: 2 additions & 1 deletion Code/GraphMol/DistGeomHelpers/catch_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -244,7 +244,8 @@ TEST_CASE("nontetrahedral stereo", "[nontetrahedral]") {
}

{
auto m = "Cl[Pt@SP1]([35Cl])[36Cl]"_smiles;
// Cl[Pt@SP1]([35Cl])([36Cl])* => Cl[Pt@SP3](*)([35Cl])[36Cl]
auto m = "Cl[Pt@SP3]([35Cl])[36Cl]"_smiles;
REQUIRE(m);
CHECK(Chirality::getChiralAcrossAtom(m->getAtomWithIdx(1),
m->getAtomWithIdx(0))
Expand Down
67 changes: 47 additions & 20 deletions Code/GraphMol/NontetrahedralStereo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -283,6 +283,28 @@ int isTrigonalBipyramidalAxialAtom(const Atom *cen, const Atom *qry) {
return isTrigonalBipyramidalAxialBond(cen, bnd);
}

unsigned int getMaxNbors(const Atom::ChiralType tag) {
switch (tag) {
case Atom::CHI_TETRAHEDRAL_CW:
case Atom::CHI_TETRAHEDRAL_CCW:
case Atom::CHI_TETRAHEDRAL: // fall through
return 4;
case Atom::CHI_ALLENE:
return 2; // not used other than SMI/SMA parsers?
case Atom::CHI_SQUAREPLANAR:
return 4;
case Atom::CHI_TRIGONALBIPYRAMIDAL:
return 5;
case Atom::CHI_OCTAHEDRAL:
return 6;
default:
BOOST_LOG(rdWarningLog)
<< "Warning: unexpected chiral tag getMaxNbors(): " << tag
<< std::endl;
return 0;
}
}

Bond *getChiralAcrossBond(const Atom *cen, const Bond *qry) {
PRECONDITION(cen, "bad center pointer");
PRECONDITION(qry, "bad query pointer");
Expand All @@ -291,22 +313,6 @@ Bond *getChiralAcrossBond(const Atom *cen, const Bond *qry) {
"center and query must come from the same molecule");

Atom::ChiralType tag = cen->getChiralTag();
unsigned int ref_max = 0;

switch (tag) {
case Atom::ChiralType::CHI_SQUAREPLANAR:
ref_max = 4;
break;
case Atom::ChiralType::CHI_TRIGONALBIPYRAMIDAL:
ref_max = 5;
break;
case Atom::ChiralType::CHI_OCTAHEDRAL:
ref_max = 6;
break;
default:
return nullptr;
}

unsigned int perm = 0;
cen->getPropIfPresent(common_properties::_chiralPermutation, perm);
if (!perm) {
Expand All @@ -318,6 +324,7 @@ Bond *getChiralAcrossBond(const Atom *cen, const Bond *qry) {
Bond *ref[6];
int found = -1;

unsigned int ref_max = getMaxNbors(tag);
for (auto bnd : mol.atomBonds(cen)) {
if (count == ref_max) {
return nullptr;
Expand Down Expand Up @@ -466,7 +473,8 @@ bool hasNonTetrahedralStereo(const Atom *cen) {
tag == Atom::ChiralType::CHI_OCTAHEDRAL;
}

unsigned int getChiralPermutation(const Atom *cen, const INT_LIST &probe) {
unsigned int getChiralPermutation(const Atom *cen, const INT_LIST &probe,
bool inverse) {
PRECONDITION(cen, "bad center pointer");
PRECONDITION(cen->hasOwningMol(), "no owning mol");

Expand All @@ -479,12 +487,21 @@ unsigned int getChiralPermutation(const Atom *cen, const INT_LIST &probe) {
decltype(&swap_octahedral) swap_func = nullptr;
switch (cen->getChiralTag()) {
case Atom::ChiralType::CHI_OCTAHEDRAL:
if (probe.size() > 6) {
return 0;
}
swap_func = swap_octahedral;
break;
case Atom::ChiralType::CHI_TRIGONALBIPYRAMIDAL:
if (probe.size() > 5) {
return 0;
}
swap_func = swap_trigonalbipyramidal;
break;
case Atom::ChiralType::CHI_SQUAREPLANAR:
if (probe.size() > 4) {
return 0;
}
swap_func = swap_squareplanar;
break;
default:
Expand All @@ -498,15 +515,25 @@ unsigned int getChiralPermutation(const Atom *cen, const INT_LIST &probe) {
for (const auto bnd : cen->getOwningMol().atomBonds(cen)) {
order[bnd->getIdx()] = nbrIdx++;
}
CHECK_INVARIANT(nbrIdx == probe.size(), "probe vector size does not match");

// nbrPerm maps original index to array position
std::vector<unsigned int> nbrPerm(nbrIdx);
std::iota(nbrPerm.begin(), nbrPerm.end(), 0);
std::vector<unsigned int> probePerm(nbrIdx);
std::vector<unsigned int> probePerm(probe.size());
nbrIdx = 0;
for (auto v : probe) {
probePerm[nbrIdx++] = order[v];
probePerm[nbrIdx++] = v < 0 ? -1 : order[v];
}

// Missing (implicit) neighbors are at the end when in storage order
if (nbrPerm.size() < nbrIdx)
nbrPerm.insert(nbrPerm.end(), nbrIdx - nbrPerm.size(), -1);

CHECK_INVARIANT(nbrPerm.size() == probePerm.size(),
"probe vector size does not match");

if (inverse) {
std::swap(nbrPerm, probePerm);
}

boost::dynamic_bitset<> swapped(probe.size());
Expand Down
Loading

0 comments on commit 2553cfc

Please sign in to comment.