From 6db3f982cb0545feaeedd9adec88d9400a7dd970 Mon Sep 17 00:00:00 2001 From: David Cosgrove Date: Fri, 13 Dec 2024 06:20:24 +0000 Subject: [PATCH] Allow fragments of aromatic rings to match in RascalMCES (#8088) * Allow fragments of aromatic rings to match. * Use existing code for checking bond is in ring. --------- Co-authored-by: David Cosgrove --- Code/GraphMol/RascalMCES/RascalMCES.cpp | 29 ++++++++++++++++++------- Code/GraphMol/RascalMCES/mces_catch.cpp | 16 ++++++++++++++ 2 files changed, 37 insertions(+), 8 deletions(-) diff --git a/Code/GraphMol/RascalMCES/RascalMCES.cpp b/Code/GraphMol/RascalMCES/RascalMCES.cpp index 8306f23a2f6..4ee08112b3e 100644 --- a/Code/GraphMol/RascalMCES/RascalMCES.cpp +++ b/Code/GraphMol/RascalMCES/RascalMCES.cpp @@ -350,8 +350,17 @@ bool checkAromaticRings(const ROMol &mol1, if (!mol1Bond->getIsAromatic() || !mol2Bond->getIsAromatic()) { return true; } + + // If neither bond was in a ring, but they were marked aromatic, then + // the two mols are fragments so it's ok to match these bonds. const auto &mol1BondRings = mol1.getRingInfo()->bondRings(); const auto &mol2BondRings = mol2.getRingInfo()->bondRings(); + bool mol1BondInRing = mol1.getRingInfo()->numBondRings(mol1BondIdx); + bool mol2BondInRing = mol2.getRingInfo()->numBondRings(mol2BondIdx); + if (!mol1BondInRing && !mol2BondInRing) { + return true; + } + for (size_t i = 0u; i < mol1BondRings.size(); ++i) { if (std::find(mol1BondRings[i].begin(), mol1BondRings[i].end(), mol1BondIdx) == mol1BondRings[i].end()) { @@ -419,7 +428,8 @@ bool checkRingMatchesRing(const ROMol &mol1, int mol1BondIdx, const ROMol &mol2, return true; } -// Make the set of pairs of vertices, where they're a pair if the labels match. +// Make the set of pairs of vertices, where they're a pair if the labels +// match. void buildPairs(const ROMol &mol1, const std::vector &vtxLabels1, const ROMol &mol2, const std::vector &vtxLabels2, const RascalOptions &opts, @@ -427,7 +437,8 @@ void buildPairs(const ROMol &mol1, const std::vector &vtxLabels1, std::vector mol1RingSmiles, mol2RingSmiles; std::vector> mol1Rings, mol2Rings; // For these purposes, it is correct that n1cccc1 and [nH]1cccc1 match - the - // former would be from an N-substituted pyrrole, the latter from a plain one. + // former would be from an N-substituted pyrrole, the latter from a plain + // one. static const std::regex reg(R"(\[([np])H\])"); if (opts.completeAromaticRings) { extractRings(mol1, mol1Rings, mol1RingSmiles); @@ -660,7 +671,8 @@ RWMol *makeCliqueFrags(const ROMol &mol, return molFrags; } -// Calculate the shortest bond distance between the 2 fragments in the molecule. +// Calculate the shortest bond distance between the 2 fragments in the +// molecule. int minFragSeparation(const ROMol &mol, const ROMol &molFrags, std::vector &fragMapping, int frag1, int frag2) { auto extractFragAtoms = [&](int fragNum, std::vector &fragAtoms) { @@ -758,7 +770,8 @@ void updateMaxClique(const std::vector &clique, bool deltaYPoss, } } -// If the current time is beyond the timeout limit, throws a TimedOutException. +// If the current time is beyond the timeout limit, throws a +// TimedOutException. void checkTimeout( const std::chrono::time_point &startTime, @@ -924,8 +937,8 @@ void explorePartitions( bool deltaYExchangePossible(const ROMol &mol1, const ROMol &mol2) { // A Delta-y exchange is an incorrect match when a cyclopropyl ring (the // delta) is matched to a C(C)(C) group (the y) because they both have - // isomorphic line graphs. This checks to see if that's something we need to - // worry about for these molecules. + // isomorphic line graphs. This checks to see if that's something we need + // to worry about for these molecules. const static std::unique_ptr delta(SmartsToMol("C1CC1")); const static std::unique_ptr y(SmartsToMol("C(C)C")); return (hasSubstructMatch(mol1, *delta) && hasSubstructMatch(mol2, *y)) || @@ -1126,8 +1139,8 @@ std::vector findMCES(RascalStartPoint &starter, return results; } -// calculate the RASCAL MCES between the 2 molecules, provided it is within the -// similarity threshold given. +// calculate the RASCAL MCES between the 2 molecules, provided it is within +// the similarity threshold given. std::vector rascalMCES(const ROMol &mol1, const ROMol &mol2, const RascalOptions &opts) { auto starter = makeInitialPartitionSet(&mol1, &mol2, opts); diff --git a/Code/GraphMol/RascalMCES/mces_catch.cpp b/Code/GraphMol/RascalMCES/mces_catch.cpp index 7a317770e2d..bc3167e46b4 100644 --- a/Code/GraphMol/RascalMCES/mces_catch.cpp +++ b/Code/GraphMol/RascalMCES/mces_catch.cpp @@ -1412,3 +1412,19 @@ TEST_CASE("Atom aromaticity match") { check_smarts_ok(*m1, *m2, res.front()); } } + +TEST_CASE("Aromatic fragment match") { + v2::SmilesParse::SmilesParserParams params; + params.sanitize = false; + + auto m1 = v2::SmilesParse::MolFromSmiles("[1*]c([3*])nc[2*]", params); + REQUIRE(m1); + auto m2 = v2::SmilesParse::MolFromSmiles("[1*]c([3*])nc[2*]", params); + REQUIRE(m2); + + RascalOptions opts; + auto res = rascalMCES(*m1, *m2, opts); + REQUIRE(res.size() == 1); + CHECK(res.front().getAtomMatches().size() == 6); + CHECK(res.front().getBondMatches().size() == 5); +} \ No newline at end of file