diff --git a/Code/GraphMol/Canon.cpp b/Code/GraphMol/Canon.cpp index d48f05412e0..4faf0fda6b9 100644 --- a/Code/GraphMol/Canon.cpp +++ b/Code/GraphMol/Canon.cpp @@ -1001,6 +1001,20 @@ void removeRedundantBondDirSpecs(ROMol &mol, MolStack &molStack, } } +// insert (-1) for hydrogens or missing ligands, where these are placed +// depends on if it is the first atom or not +static void insertImplicitNbors(INT_LIST &bonds, const Atom::ChiralType tag, + const bool firstAtom) { + unsigned int ref_max = Chirality::getMaxNbors(tag); + if (bonds.size() < ref_max) { + if (firstAtom) { + bonds.insert(bonds.begin(), ref_max - bonds.size(), -1); + } else { + bonds.insert(++bonds.begin(), ref_max - bonds.size(), -1); + } + } +} + void canonicalizeFragment(ROMol &mol, int atomIdx, std::vector &colors, const UINT_VECT &ranks, MolStack &molStack, @@ -1070,13 +1084,18 @@ void canonicalizeFragment(ROMol &mol, int atomIdx, // Check if the atom can be chiral, and if chirality needs inversion const INT_LIST &trueOrder = atomTraversalBondOrder[atom->getIdx()]; - if (trueOrder.size() >= 3) { + + // Extra check needed if/when @AL1/@AL2 supported + if (trueOrder.size() >= 3 || Chirality::hasNonTetrahedralStereo(atom)) { int nSwaps = 0; int perm = 0; if (Chirality::hasNonTetrahedralStereo(atom)) { atom->getPropIfPresent(common_properties::_chiralPermutation, perm); } + const unsigned int firstIdx = molStack.begin()->obj.atom->getIdx(); + const bool firstInPart = atom->getIdx() == firstIdx; + // We have to make sure that trueOrder contains all the // bonds, even if they won't be written to the SMILES if (trueOrder.size() < atom->getDegree()) { @@ -1092,20 +1111,24 @@ void canonicalizeFragment(ROMol &mol, int atomIdx, if (!perm) { nSwaps = atom->getPerturbationOrder(tOrder); } else { + insertImplicitNbors(tOrder, atom->getChiralTag(), firstInPart); perm = Chirality::getChiralPermutation(atom, tOrder); } } else { if (!perm) { nSwaps = atom->getPerturbationOrder(trueOrder); } else { - perm = Chirality::getChiralPermutation(atom, trueOrder); + INT_LIST tOrder = trueOrder; + insertImplicitNbors(tOrder, atom->getChiralTag(), firstInPart); + perm = Chirality::getChiralPermutation(atom, tOrder); } } - // FIX: handle this case for non-tet stereo too + + // in future this should be moved up and simplified, there should not + // be an option to not do chiral inversions if (doChiralInversions && chiralAtomNeedsTagInversion( - mol, atom, - molStack.begin()->obj.atom->getIdx() == atom->getIdx(), + mol, atom, firstInPart, atomRingClosures[atom->getIdx()].size())) { // This is a special case. Here's an example: // Our internal representation of a chiral center is equivalent diff --git a/Code/GraphMol/Chirality.h b/Code/GraphMol/Chirality.h index 7284f924ec7..60e81befb6e 100644 --- a/Code/GraphMol/Chirality.h +++ b/Code/GraphMol/Chirality.h @@ -14,7 +14,8 @@ #ifndef RD_CHIRALITY_20AUG2008_H #define RD_CHIRALITY_20AUG2008_H #include -#include +#include /* for Atom:ChiralType enum */ +#include /* for Bond::BondDir enum */ #include #include @@ -208,8 +209,17 @@ RDKIT_GRAPHMOL_EXPORT double getIdealAngleBetweenLigands(const Atom *center, const Atom *lig1, const Atom *lig2); +RDKIT_GRAPHMOL_EXPORT unsigned int getMaxNbors(const Atom::ChiralType tag); + +// +// Get the chiral permutation from the storage order of bonds on an atom +// to the desired output order (probe). Missing/implicit neihgbors can be +// represented with (-1). To get the inverse order, i.e. from the probe to the +// current storage order set (inverse=true) +// RDKIT_GRAPHMOL_EXPORT unsigned int getChiralPermutation(const Atom *center, - const INT_LIST &probe); + const INT_LIST &probe, + bool inverse = false); //! @} RDKIT_GRAPHMOL_EXPORT std::ostream &operator<<(std::ostream &oss, diff --git a/Code/GraphMol/Depictor/catch_tests.cpp b/Code/GraphMol/Depictor/catch_tests.cpp index 8ce515985cb..d3e4f2bc111 100644 --- a/Code/GraphMol/Depictor/catch_tests.cpp +++ b/Code/GraphMol/Depictor/catch_tests.cpp @@ -120,7 +120,8 @@ TEST_CASE("trigonal bipyramidal", "[nontetrahedral]") { CHECK(v2.length() > v4.length()); } SECTION("TB1 missing ax") { - auto m = "S[As@TB1](F)(Cl)Br"_smiles; + // // S[As@TB1](F)(Cl)(Br)* => S[As@TB7](*)(F)(Cl)Br + auto m = "S[As@TB7](F)(Cl)Br"_smiles; REQUIRE(m); CHECK_THAT( @@ -193,7 +194,8 @@ TEST_CASE("octahedral", "[nontetrahedral]") { CHECK(v5.length() > v6.length()); } SECTION("OH1 missing one ligand") { - auto m = "O[Co@OH1](Cl)(C)(N)F"_smiles; + // O[Co@OH1](Cl)(C)(N)(F)* => O[Co@OH25](*)(Cl)(C)(N)F + auto m = "O[Co@OH25](Cl)(C)(N)F"_smiles; REQUIRE(m); CHECK(RDDepict::compute2DCoords(*m) == 0); // std::cerr << MolToV3KMolBlock(*m) << std::endl; diff --git a/Code/GraphMol/DistGeomHelpers/catch_tests.cpp b/Code/GraphMol/DistGeomHelpers/catch_tests.cpp index 3da666f7428..e3648610c57 100644 --- a/Code/GraphMol/DistGeomHelpers/catch_tests.cpp +++ b/Code/GraphMol/DistGeomHelpers/catch_tests.cpp @@ -244,7 +244,8 @@ TEST_CASE("nontetrahedral stereo", "[nontetrahedral]") { } { - auto m = "Cl[Pt@SP1]([35Cl])[36Cl]"_smiles; + // Cl[Pt@SP1]([35Cl])([36Cl])* => Cl[Pt@SP3](*)([35Cl])[36Cl] + auto m = "Cl[Pt@SP3]([35Cl])[36Cl]"_smiles; REQUIRE(m); CHECK(Chirality::getChiralAcrossAtom(m->getAtomWithIdx(1), m->getAtomWithIdx(0)) diff --git a/Code/GraphMol/NontetrahedralStereo.cpp b/Code/GraphMol/NontetrahedralStereo.cpp index 2689c1bf687..8df1cbc0ec7 100644 --- a/Code/GraphMol/NontetrahedralStereo.cpp +++ b/Code/GraphMol/NontetrahedralStereo.cpp @@ -283,6 +283,28 @@ int isTrigonalBipyramidalAxialAtom(const Atom *cen, const Atom *qry) { return isTrigonalBipyramidalAxialBond(cen, bnd); } +unsigned int getMaxNbors(const Atom::ChiralType tag) { + switch (tag) { + case Atom::CHI_TETRAHEDRAL_CW: + case Atom::CHI_TETRAHEDRAL_CCW: + case Atom::CHI_TETRAHEDRAL: // fall through + return 4; + case Atom::CHI_ALLENE: + return 2; // not used other than SMI/SMA parsers? + case Atom::CHI_SQUAREPLANAR: + return 4; + case Atom::CHI_TRIGONALBIPYRAMIDAL: + return 5; + case Atom::CHI_OCTAHEDRAL: + return 6; + default: + BOOST_LOG(rdWarningLog) + << "Warning: unexpected chiral tag getMaxNbors(): " << tag + << std::endl; + return 0; + } +} + Bond *getChiralAcrossBond(const Atom *cen, const Bond *qry) { PRECONDITION(cen, "bad center pointer"); PRECONDITION(qry, "bad query pointer"); @@ -291,22 +313,6 @@ Bond *getChiralAcrossBond(const Atom *cen, const Bond *qry) { "center and query must come from the same molecule"); Atom::ChiralType tag = cen->getChiralTag(); - unsigned int ref_max = 0; - - switch (tag) { - case Atom::ChiralType::CHI_SQUAREPLANAR: - ref_max = 4; - break; - case Atom::ChiralType::CHI_TRIGONALBIPYRAMIDAL: - ref_max = 5; - break; - case Atom::ChiralType::CHI_OCTAHEDRAL: - ref_max = 6; - break; - default: - return nullptr; - } - unsigned int perm = 0; cen->getPropIfPresent(common_properties::_chiralPermutation, perm); if (!perm) { @@ -318,6 +324,7 @@ Bond *getChiralAcrossBond(const Atom *cen, const Bond *qry) { Bond *ref[6]; int found = -1; + unsigned int ref_max = getMaxNbors(tag); for (auto bnd : mol.atomBonds(cen)) { if (count == ref_max) { return nullptr; @@ -466,7 +473,8 @@ bool hasNonTetrahedralStereo(const Atom *cen) { tag == Atom::ChiralType::CHI_OCTAHEDRAL; } -unsigned int getChiralPermutation(const Atom *cen, const INT_LIST &probe) { +unsigned int getChiralPermutation(const Atom *cen, const INT_LIST &probe, + bool inverse) { PRECONDITION(cen, "bad center pointer"); PRECONDITION(cen->hasOwningMol(), "no owning mol"); @@ -479,12 +487,21 @@ unsigned int getChiralPermutation(const Atom *cen, const INT_LIST &probe) { decltype(&swap_octahedral) swap_func = nullptr; switch (cen->getChiralTag()) { case Atom::ChiralType::CHI_OCTAHEDRAL: + if (probe.size() > 6) { + return 0; + } swap_func = swap_octahedral; break; case Atom::ChiralType::CHI_TRIGONALBIPYRAMIDAL: + if (probe.size() > 5) { + return 0; + } swap_func = swap_trigonalbipyramidal; break; case Atom::ChiralType::CHI_SQUAREPLANAR: + if (probe.size() > 4) { + return 0; + } swap_func = swap_squareplanar; break; default: @@ -498,15 +515,25 @@ unsigned int getChiralPermutation(const Atom *cen, const INT_LIST &probe) { for (const auto bnd : cen->getOwningMol().atomBonds(cen)) { order[bnd->getIdx()] = nbrIdx++; } - CHECK_INVARIANT(nbrIdx == probe.size(), "probe vector size does not match"); // nbrPerm maps original index to array position std::vector nbrPerm(nbrIdx); std::iota(nbrPerm.begin(), nbrPerm.end(), 0); - std::vector probePerm(nbrIdx); + std::vector probePerm(probe.size()); nbrIdx = 0; for (auto v : probe) { - probePerm[nbrIdx++] = order[v]; + probePerm[nbrIdx++] = v < 0 ? -1 : order[v]; + } + + // Missing (implicit) neighbors are at the end when in storage order + if (nbrPerm.size() < nbrIdx) + nbrPerm.insert(nbrPerm.end(), nbrIdx - nbrPerm.size(), -1); + + CHECK_INVARIANT(nbrPerm.size() == probePerm.size(), + "probe vector size does not match"); + + if (inverse) { + std::swap(nbrPerm, probePerm); } boost::dynamic_bitset<> swapped(probe.size()); diff --git a/Code/GraphMol/SmilesParse/SmilesParseOps.cpp b/Code/GraphMol/SmilesParse/SmilesParseOps.cpp index 4712bf2e8de..4cc50a7043d 100644 --- a/Code/GraphMol/SmilesParse/SmilesParseOps.cpp +++ b/Code/GraphMol/SmilesParse/SmilesParseOps.cpp @@ -205,67 +205,81 @@ bool operator<(const std::pair &p1, const std::pair &p2) { return p1.first < p2.first; } +// +// Helper function to get the SMILES bond ordering around a given atom, this is +// required to make sure the stereo information is correct as the storage order +// may not match how it is SMILES due to ring closures and implicit/missing +// ligands. +// +unsigned int GetBondOrdering(INT_LIST &bondOrdering, const RDKit::RWMol *mol, + const RDKit::Atom *atom) { + PRECONDITION(mol, "no mol"); + PRECONDITION(atom, "no atom"); + + // + // The atom is marked as chiral, set the SMILES-order of the + // atom's bonds. This is easy for non-ring-closure bonds, + // because the SMILES order is determined solely by the atom + // indices. Things are trickier for ring-closure bonds, which we + // need to insert into the list in a particular order + // + INT_VECT ringClosures; + atom->getPropIfPresent(common_properties::_RingClosures, ringClosures); + +#if 0 + std::cerr << "CLOSURES: "; + std::copy(ringClosures.begin(), ringClosures.end(), + std::ostream_iterator(std::cerr, " ")); + std::cerr << std::endl; +#endif + std::list neighbors; + // push this atom onto the list of neighbors (we'll use this + // to find our place later): + neighbors.emplace_back(atom->getIdx(), -1); + std::list bondOrder; + for (auto nbrIdx : boost::make_iterator_range(mol->getAtomNeighbors(atom))) { + const Bond *nbrBond = mol->getBondBetweenAtoms(atom->getIdx(), nbrIdx); + if (std::find(ringClosures.begin(), ringClosures.end(), + static_cast(nbrBond->getIdx())) == ringClosures.end()) { + neighbors.emplace_back(nbrIdx, nbrBond->getIdx()); + } + } + // sort the list of non-ring-closure bonds: + neighbors.sort(); + + // find the location of this atom. it pretty much has to be + // first in the list, e.g for smiles like [C@](F)(Cl)(Br)I, or + // second (everything else). + auto selfPos = neighbors.begin(); + if (selfPos->first != atom->getIdx()) { + ++selfPos; + } + CHECK_INVARIANT(selfPos->first == atom->getIdx(), "weird atom ordering"); + + // copy over the bond ids: + for (auto neighborIt = neighbors.begin(); neighborIt != neighbors.end(); + ++neighborIt) { + if (neighborIt != selfPos) { + bondOrdering.push_back(rdcast(neighborIt->second)); + } else { + // we are not going to add the atom itself, but we will push on + // ring closure bonds at this point (if required): + bondOrdering.insert(bondOrdering.end(), ringClosures.begin(), + ringClosures.end()); + } + } + + return ringClosures.size(); +} + void AdjustAtomChiralityFlags(RWMol *mol) { PRECONDITION(mol, "no molecule"); for (auto atom : mol->atoms()) { Atom::ChiralType chiralType = atom->getChiralTag(); if (chiralType == Atom::CHI_TETRAHEDRAL_CW || chiralType == Atom::CHI_TETRAHEDRAL_CCW) { - // - // The atom is marked as chiral, set the SMILES-order of the - // atom's bonds. This is easy for non-ring-closure bonds, - // because the SMILES order is determined solely by the atom - // indices. Things are trickier for ring-closure bonds, which we - // need to insert into the list in a particular order - // - INT_VECT ringClosures; - atom->getPropIfPresent(common_properties::_RingClosures, ringClosures); - -#if 0 - std::cerr << "CLOSURES: "; - std::copy(ringClosures.begin(), ringClosures.end(), - std::ostream_iterator(std::cerr, " ")); - std::cerr << std::endl; -#endif - std::list neighbors; - // push this atom onto the list of neighbors (we'll use this - // to find our place later): - neighbors.emplace_back(atom->getIdx(), -1); - std::list bondOrder; - for (auto nbrIdx : - boost::make_iterator_range(mol->getAtomNeighbors(atom))) { - Bond *nbrBond = mol->getBondBetweenAtoms(atom->getIdx(), nbrIdx); - if (std::find(ringClosures.begin(), ringClosures.end(), - static_cast(nbrBond->getIdx())) == - ringClosures.end()) { - neighbors.emplace_back(nbrIdx, nbrBond->getIdx()); - } - } - // sort the list of non-ring-closure bonds: - neighbors.sort(); - - // find the location of this atom. it pretty much has to be - // first in the list, e.g for smiles like [C@](F)(Cl)(Br)I, or - // second (everything else). - auto selfPos = neighbors.begin(); - if (selfPos->first != atom->getIdx()) { - ++selfPos; - } - CHECK_INVARIANT(selfPos->first == atom->getIdx(), "weird atom ordering"); - - // copy over the bond ids: INT_LIST bondOrdering; - for (auto neighborIt = neighbors.begin(); neighborIt != neighbors.end(); - ++neighborIt) { - if (neighborIt != selfPos) { - bondOrdering.push_back(rdcast(neighborIt->second)); - } else { - // we are not going to add the atom itself, but we will push on - // ring closure bonds at this point (if required): - bondOrdering.insert(bondOrdering.end(), ringClosures.begin(), - ringClosures.end()); - } - } + unsigned int numClosures = GetBondOrdering(bondOrdering, mol, atom); // ok, we now have the SMILES ordering of the bonds, figure out the // permutation order. @@ -297,7 +311,7 @@ void AdjustAtomChiralityFlags(RWMol *mol) { // if (Canon::chiralAtomNeedsTagInversion( *mol, atom, atom->hasProp(common_properties::_SmilesStart), - ringClosures.size())) { + numClosures)) { ++nSwaps; } // std::cerr << "nswaps " << atom->getIdx() << " " << nSwaps @@ -308,6 +322,26 @@ void AdjustAtomChiralityFlags(RWMol *mol) { if (nSwaps % 2) { atom->invertChirality(); } + } else if (chiralType == Atom::CHI_SQUAREPLANAR || + chiralType == Atom::CHI_TRIGONALBIPYRAMIDAL || + chiralType == Atom::CHI_OCTAHEDRAL) { + INT_LIST bonds; + GetBondOrdering(bonds, mol, atom); + + unsigned int ref_max = Chirality::getMaxNbors(chiralType); + + // insert (-1) for hydrogens or missing ligands, where these are placed + // depends on if it is the first atom or not + if (bonds.size() < ref_max) { + if (atom->hasProp(common_properties::_SmilesStart)) { + bonds.insert(bonds.begin(), ref_max - bonds.size(), -1); + } else { + bonds.insert(++bonds.begin(), ref_max - bonds.size(), -1); + } + } + + atom->setProp(common_properties::_chiralPermutation, + Chirality::getChiralPermutation(atom, bonds, true)); } } } // namespace SmilesParseOps diff --git a/Code/GraphMol/SmilesParse/nontetrahedral_tests.cpp b/Code/GraphMol/SmilesParse/nontetrahedral_tests.cpp index 4e3d5ec5f01..149880114a7 100644 --- a/Code/GraphMol/SmilesParse/nontetrahedral_tests.cpp +++ b/Code/GraphMol/SmilesParse/nontetrahedral_tests.cpp @@ -203,12 +203,12 @@ TEST_CASE("SP getChiralAcrossBond et al.") { REQUIRE(m); CHECK(Chirality::getChiralAcrossBond(m->getAtomWithIdx(1), m->getBondWithIdx(0)) - ->getIdx() == 2); + ->getIdx() == 1); CHECK(Chirality::getChiralAcrossBond(m->getAtomWithIdx(1), - m->getBondWithIdx(2)) + m->getBondWithIdx(1)) ->getIdx() == 0); CHECK(Chirality::getChiralAcrossBond(m->getAtomWithIdx(1), - m->getBondWithIdx(1)) == nullptr); + m->getBondWithIdx(2)) == nullptr); } } } diff --git a/Code/GraphMol/catch_chirality.cpp b/Code/GraphMol/catch_chirality.cpp index 0414852f17d..3abd59ecb0d 100644 --- a/Code/GraphMol/catch_chirality.cpp +++ b/Code/GraphMol/catch_chirality.cpp @@ -2339,7 +2339,8 @@ TEST_CASE("getIdealAngle", "[nontetrahedral]") { Catch::Matchers::WithinAbs(90, 0.001)); } SECTION("TB1 missing 1") { - auto m = "S[As@TB1](F)(Cl)Br"_smiles; + // S[As@TB1](F)(Cl)(Br)* => S[As@TB7](*)(F)(Cl)Br + auto m = "S[As@TB7](F)(Cl)Br"_smiles; REQUIRE(m); CHECK(Chirality::isTrigonalBipyramidalAxialAtom(m->getAtomWithIdx(1), @@ -2582,7 +2583,8 @@ TEST_CASE("nontetrahedral StereoInfo", "[nontetrahedral]") { std::vector{0, 2, 3, 4, 5, 6}); } SECTION("OH missing ligand") { - auto m = "C[Fe@OH9](F)(Cl)(O)N"_smiles; + // C[Fe@OH9](F)(Cl)(O)(N)* => C[Fe@OH4](*)(F)(Cl)(O)N + auto m = "C[Fe@OH4](F)(Cl)(O)N"_smiles; REQUIRE(m); auto sinfo = Chirality::findPotentialStereo(*m); REQUIRE(sinfo.size() == 1); diff --git a/Code/GraphMol/catch_graphmol.cpp b/Code/GraphMol/catch_graphmol.cpp index 9fe9ddc70dc..7c8a06943bb 100644 --- a/Code/GraphMol/catch_graphmol.cpp +++ b/Code/GraphMol/catch_graphmol.cpp @@ -3905,13 +3905,16 @@ TEST_CASE("atom output") { ss.str(""); } SECTION("chirality 2") { + // same as + // C[Pt@SP2]([H])(F)Cl which is stored internally as + // C[Pt@SP3](F)(Cl)[H] auto m = "C[Pt@SP2H](F)Cl"_smiles; REQUIRE(m); std::stringstream ss; ss << *m->getAtomWithIdx(1); CHECK( ss.str() == - "1 78 Pt chg: 0 deg: 3 exp: 4 imp: 0 hyb: SP2D chi: SqP(2) nbrs:[0 2 3]"); + "1 78 Pt chg: 0 deg: 3 exp: 4 imp: 0 hyb: SP2D chi: SqP(3) nbrs:[0 2 3]"); ss.str(""); } }