Skip to content

Commit

Permalink
Allow fragments of aromatic rings to match in RascalMCES (rdkit#8088)
Browse files Browse the repository at this point in the history
* Allow fragments of aromatic rings to match.

* Use existing code for checking bond is in ring.

---------

Co-authored-by: David Cosgrove <[email protected]>
  • Loading branch information
DavidACosgrove and David Cosgrove authored Dec 13, 2024
1 parent 403cd55 commit 6db3f98
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 8 deletions.
29 changes: 21 additions & 8 deletions Code/GraphMol/RascalMCES/RascalMCES.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -350,8 +350,17 @@ bool checkAromaticRings(const ROMol &mol1,
if (!mol1Bond->getIsAromatic() || !mol2Bond->getIsAromatic()) {
return true;
}

// If neither bond was in a ring, but they were marked aromatic, then
// the two mols are fragments so it's ok to match these bonds.
const auto &mol1BondRings = mol1.getRingInfo()->bondRings();
const auto &mol2BondRings = mol2.getRingInfo()->bondRings();
bool mol1BondInRing = mol1.getRingInfo()->numBondRings(mol1BondIdx);
bool mol2BondInRing = mol2.getRingInfo()->numBondRings(mol2BondIdx);
if (!mol1BondInRing && !mol2BondInRing) {
return true;
}

for (size_t i = 0u; i < mol1BondRings.size(); ++i) {
if (std::find(mol1BondRings[i].begin(), mol1BondRings[i].end(),
mol1BondIdx) == mol1BondRings[i].end()) {
Expand Down Expand Up @@ -419,15 +428,17 @@ bool checkRingMatchesRing(const ROMol &mol1, int mol1BondIdx, const ROMol &mol2,
return true;
}

// Make the set of pairs of vertices, where they're a pair if the labels match.
// Make the set of pairs of vertices, where they're a pair if the labels
// match.
void buildPairs(const ROMol &mol1, const std::vector<unsigned int> &vtxLabels1,
const ROMol &mol2, const std::vector<unsigned int> &vtxLabels2,
const RascalOptions &opts,
std::vector<std::pair<int, int>> &vtxPairs) {
std::vector<std::string> mol1RingSmiles, mol2RingSmiles;
std::vector<std::unique_ptr<ROMol>> mol1Rings, mol2Rings;
// For these purposes, it is correct that n1cccc1 and [nH]1cccc1 match - the
// former would be from an N-substituted pyrrole, the latter from a plain one.
// former would be from an N-substituted pyrrole, the latter from a plain
// one.
static const std::regex reg(R"(\[([np])H\])");
if (opts.completeAromaticRings) {
extractRings(mol1, mol1Rings, mol1RingSmiles);
Expand Down Expand Up @@ -660,7 +671,8 @@ RWMol *makeCliqueFrags(const ROMol &mol,
return molFrags;
}

// Calculate the shortest bond distance between the 2 fragments in the molecule.
// Calculate the shortest bond distance between the 2 fragments in the
// molecule.
int minFragSeparation(const ROMol &mol, const ROMol &molFrags,
std::vector<int> &fragMapping, int frag1, int frag2) {
auto extractFragAtoms = [&](int fragNum, std::vector<int> &fragAtoms) {
Expand Down Expand Up @@ -758,7 +770,8 @@ void updateMaxClique(const std::vector<unsigned int> &clique, bool deltaYPoss,
}
}

// If the current time is beyond the timeout limit, throws a TimedOutException.
// If the current time is beyond the timeout limit, throws a
// TimedOutException.
void checkTimeout(
const std::chrono::time_point<std::chrono::high_resolution_clock>
&startTime,
Expand Down Expand Up @@ -924,8 +937,8 @@ void explorePartitions(
bool deltaYExchangePossible(const ROMol &mol1, const ROMol &mol2) {
// A Delta-y exchange is an incorrect match when a cyclopropyl ring (the
// delta) is matched to a C(C)(C) group (the y) because they both have
// isomorphic line graphs. This checks to see if that's something we need to
// worry about for these molecules.
// isomorphic line graphs. This checks to see if that's something we need
// to worry about for these molecules.
const static std::unique_ptr<ROMol> delta(SmartsToMol("C1CC1"));
const static std::unique_ptr<ROMol> y(SmartsToMol("C(C)C"));
return (hasSubstructMatch(mol1, *delta) && hasSubstructMatch(mol2, *y)) ||
Expand Down Expand Up @@ -1126,8 +1139,8 @@ std::vector<RascalResult> findMCES(RascalStartPoint &starter,
return results;
}

// calculate the RASCAL MCES between the 2 molecules, provided it is within the
// similarity threshold given.
// calculate the RASCAL MCES between the 2 molecules, provided it is within
// the similarity threshold given.
std::vector<RascalResult> rascalMCES(const ROMol &mol1, const ROMol &mol2,
const RascalOptions &opts) {
auto starter = makeInitialPartitionSet(&mol1, &mol2, opts);
Expand Down
16 changes: 16 additions & 0 deletions Code/GraphMol/RascalMCES/mces_catch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1412,3 +1412,19 @@ TEST_CASE("Atom aromaticity match") {
check_smarts_ok(*m1, *m2, res.front());
}
}

TEST_CASE("Aromatic fragment match") {
v2::SmilesParse::SmilesParserParams params;
params.sanitize = false;

auto m1 = v2::SmilesParse::MolFromSmiles("[1*]c([3*])nc[2*]", params);
REQUIRE(m1);
auto m2 = v2::SmilesParse::MolFromSmiles("[1*]c([3*])nc[2*]", params);
REQUIRE(m2);

RascalOptions opts;
auto res = rascalMCES(*m1, *m2, opts);
REQUIRE(res.size() == 1);
CHECK(res.front().getAtomMatches().size() == 6);
CHECK(res.front().getBondMatches().size() == 5);
}

0 comments on commit 6db3f98

Please sign in to comment.