Skip to content

Commit

Permalink
SynthonSpace search timeout (rdkit#8070)
Browse files Browse the repository at this point in the history
* Add timeout to searches.

* Correct docstring.

* Include chrono header.

* Get it compiling with gcc.

* And then clang didn't like it...

* Revert to tmpnam as msktemp isn't available on Windows.

* Response to review.
Run time no longer saved in SearchResults.
Timeout check not tied to size of results.
Made the test timeout shorter.

* Fix the Python wrapper.

* Shamelessly steal the better timeout method from PR8110.

* suggested changes

* be more conservative about what does not time out

the CI machines can be surprisingly slow

---------

Co-authored-by: David Cosgrove <[email protected]>
Co-authored-by: Greg Landrum <[email protected]>
  • Loading branch information
3 people authored Dec 21, 2024
1 parent 9bac197 commit b8effb8
Show file tree
Hide file tree
Showing 9 changed files with 162 additions and 108 deletions.
16 changes: 12 additions & 4 deletions Code/GraphMol/SynthonSpaceSearch/SearchResults.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@ namespace RDKit::SynthonSpaceSearch {
class RDKIT_SYNTHONSPACESEARCH_EXPORT SearchResults {
public:
explicit SearchResults() : d_maxNumResults(0) {}
SearchResults(std::vector<std::unique_ptr<ROMol>> &&mols, size_t maxNumRes);
SearchResults(std::vector<std::unique_ptr<ROMol>> &&mols, size_t maxNumRes,
bool timedOut);
SearchResults(const SearchResults &other);
SearchResults(SearchResults &&other) = default;
~SearchResults() = default;
Expand All @@ -45,20 +46,27 @@ class RDKIT_SYNTHONSPACESEARCH_EXPORT SearchResults {
return d_hitMolecules;
}

/*!
* Returns whether the search timed out or not,
* @return bool
*/
bool getTimedOut() const { return d_timedOut; }

private:
std::vector<std::unique_ptr<ROMol>> d_hitMolecules;
size_t d_maxNumResults;
bool d_timedOut{false};
};

inline SearchResults::SearchResults(std::vector<std::unique_ptr<ROMol>> &&mols,
const size_t maxNumRes)
: d_maxNumResults(maxNumRes) {
const size_t maxNumRes, bool timedOut)
: d_maxNumResults(maxNumRes), d_timedOut(timedOut) {
d_hitMolecules = std::move(mols);
mols.clear();
}

inline SearchResults::SearchResults(const SearchResults &other)
: d_maxNumResults(other.d_maxNumResults) {
: d_maxNumResults(other.d_maxNumResults), d_timedOut(other.d_timedOut) {
for (const auto &hm : other.d_hitMolecules) {
d_hitMolecules.emplace_back(new ROMol(*hm));
}
Expand Down
69 changes: 0 additions & 69 deletions Code/GraphMol/SynthonSpaceSearch/SubstructureResults.h

This file was deleted.

6 changes: 4 additions & 2 deletions Code/GraphMol/SynthonSpaceSearch/SynthonSpace.h
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,8 @@ struct RDKIT_SYNTHONSPACESEARCH_EXPORT SynthonSpaceSearchParams {
// times, a lower number will give faster searches at the
// risk of missing some hits. The value you use should have
// a positive correlation with your FOMO.
std::uint64_t timeOut{600}; // Maximum number of seconds to spend on a single
// search. 0 means no maximum.
};

// Holds the information about a set of hits. The molecules can be built
Expand Down Expand Up @@ -121,7 +123,7 @@ class RDKIT_SYNTHONSPACESEARCH_EXPORT SynthonSpace {
*
* @param query : query molecule
* @param params : (optional) settings for the search
* @return : the hits as a SubstructureResults object.
* @return : the hits as a SearchResults object.
*/
SearchResults substructureSearch(
const ROMol &query,
Expand All @@ -136,7 +138,7 @@ class RDKIT_SYNTHONSPACESEARCH_EXPORT SynthonSpace {
* @param fpGen: a FingerprintGenerator object that will provide the
* fingerprints for the similarity calculation
* @param params : (optional) settings for the search
* @return : the hits as a SubstructureResults object.
* @return : the hits as a SearchResults object.
*/
SearchResults fingerprintSearch(
const ROMol &query, const FingerprintGenerator<std::uint64_t> &fpGen,
Expand Down
66 changes: 56 additions & 10 deletions Code/GraphMol/SynthonSpaceSearch/SynthonSpaceSearcher.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,16 @@

namespace RDKit::SynthonSpaceSearch {

namespace {
bool checkTimeOut(const TimePoint *endTime) {
if (endTime != nullptr && Clock::now() > *endTime) {
BOOST_LOG(rdWarningLog) << "Timed out.\n";
return true;
}
return false;
}
} // namespace

SearchResults SynthonSpaceSearcher::search() {
if (d_params.randomSample && d_params.maxHits == -1) {
throw std::runtime_error(
Expand All @@ -44,7 +54,18 @@ SearchResults SynthonSpaceSearcher::search() {
auto fragments = details::splitMolecule(d_query, d_params.maxBondSplits);
std::vector<SynthonSpaceHitSet> allHits;
size_t totHits = 0;
TimePoint *endTime = nullptr;
TimePoint endTimePt;
if (d_params.timeOut > 0) {
endTimePt = Clock::now() + std::chrono::seconds(d_params.timeOut);
endTime = &endTimePt;
}
bool timedOut = false;
for (auto &fragSet : fragments) {
timedOut = checkTimeOut(endTime);
if (timedOut) {
break;
}
if (auto theseHits = searchFragSet(fragSet); !theseHits.empty()) {
totHits += std::accumulate(
theseHits.begin(), theseHits.end(), 0,
Expand All @@ -55,10 +76,11 @@ SearchResults SynthonSpaceSearcher::search() {
}
}

if (d_params.buildHits) {
buildHits(allHits, totHits, results);
if (!timedOut && d_params.buildHits) {
buildHits(allHits, totHits, endTime, timedOut, results);
}
return SearchResults{std::move(results), totHits};

return SearchResults{std::move(results), totHits, timedOut};
}

std::unique_ptr<ROMol> SynthonSpaceSearcher::buildAndVerifyHit(
Expand Down Expand Up @@ -97,6 +119,7 @@ std::unique_ptr<ROMol> SynthonSpaceSearcher::buildAndVerifyHit(

void SynthonSpaceSearcher::buildHits(
std::vector<SynthonSpaceHitSet> &hitsets, const size_t totHits,
const TimePoint *endTime, bool &timedOut,
std::vector<std::unique_ptr<ROMol>> &results) const {
if (hitsets.empty()) {
return;
Expand All @@ -119,9 +142,9 @@ void SynthonSpaceSearcher::buildHits(
std::set<std::string> resultsNames;

if (d_params.randomSample) {
buildRandomHits(hitsets, totHits, resultsNames, results);
buildRandomHits(hitsets, totHits, resultsNames, endTime, timedOut, results);
} else {
buildAllHits(hitsets, resultsNames, results);
buildAllHits(hitsets, resultsNames, endTime, timedOut, results);
}
}

Expand All @@ -141,8 +164,9 @@ void sortHits(std::vector<std::unique_ptr<ROMol>> &hits) {

void SynthonSpaceSearcher::buildAllHits(
const std::vector<SynthonSpaceHitSet> &hitsets,
std::set<std::string> &resultsNames,
std::vector<std::unique_ptr<ROMol>> &results) const {
std::set<std::string> &resultsNames, const TimePoint *endTime,
bool &timedOut, std::vector<std::unique_ptr<ROMol>> &results) const {
std::uint64_t numTries = 100;
for (const auto &[reactionId, synthonsToUse, numHits] : hitsets) {
std::vector<std::vector<size_t>> synthonNums;
synthonNums.reserve(synthonsToUse.size());
Expand Down Expand Up @@ -174,6 +198,18 @@ void SynthonSpaceSearcher::buildAllHits(
return;
}
stepper.step();
// Don't check the time every go, as it's quite expensive.
--numTries;
if (!numTries) {
numTries = 100;
timedOut = checkTimeOut(endTime);
if (timedOut) {
break;
}
}
}
if (timedOut) {
break;
}
}
sortHits(results);
Expand Down Expand Up @@ -240,14 +276,15 @@ struct RandomHitSelector {

void SynthonSpaceSearcher::buildRandomHits(
const std::vector<SynthonSpaceHitSet> &hitsets, const size_t totHits,
std::set<std::string> &resultsNames,
std::vector<std::unique_ptr<ROMol>> &results) const {
std::set<std::string> &resultsNames, const TimePoint *endTime,
bool &timedOut, std::vector<std::unique_ptr<ROMol>> &results) const {
if (hitsets.empty()) {
return;
}
auto rhs = RandomHitSelector(hitsets, d_space);

uint64_t numFails = 0;
std::uint64_t numFails = 0;
std::uint64_t numTries = 100;
while (results.size() < std::min(static_cast<std::uint64_t>(d_params.maxHits),
static_cast<std::uint64_t>(totHits)) &&
numFails < totHits * d_params.numRandomSweeps) {
Expand All @@ -258,6 +295,15 @@ void SynthonSpaceSearcher::buildRandomHits(
} else {
numFails++;
}
// Don't check the time every go, as it's quite expensive.
--numTries;
if (!numTries) {
numTries = 100;
timedOut = checkTimeOut(endTime);
if (timedOut) {
break;
}
}
}
sortHits(results);
}
Expand Down
7 changes: 7 additions & 0 deletions Code/GraphMol/SynthonSpaceSearch/SynthonSpaceSearcher.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,16 @@
#ifndef SYNTHONSPACESEARCHER_H
#define SYNTHONSPACESEARCHER_H

#include <chrono>
#include <boost/random.hpp>

#include <RDGeneral/export.h>
#include <GraphMol/SynthonSpaceSearch/SynthonSpace.h>
#include <GraphMol/SynthonSpaceSearch/SearchResults.h>

using Clock = std::chrono::steady_clock;
using TimePoint = std::chrono::time_point<Clock>;

namespace RDKit {
class ROMol;

Expand Down Expand Up @@ -77,12 +81,15 @@ class SynthonSpaceSearcher {
// but duplicate SMILES from different reactions will be. Hitsets will
// be re-ordered on exit.
void buildHits(std::vector<SynthonSpaceHitSet> &hitsets, size_t totHits,
const TimePoint *endTime, bool &timedOut,
std::vector<std::unique_ptr<ROMol>> &results) const;
void buildAllHits(const std::vector<SynthonSpaceHitSet> &hitsets,
std::set<std::string> &resultsNames,
const TimePoint *endTime, bool &timedOut,
std::vector<std::unique_ptr<ROMol>> &results) const;
void buildRandomHits(const std::vector<SynthonSpaceHitSet> &hitsets,
size_t totHits, std::set<std::string> &resultsNames,
const TimePoint *endTime, bool &timedOut,
std::vector<std::unique_ptr<ROMol>> &results) const;
// get the subset of synthons for the given reaction to use for this
// enumeration.
Expand Down
22 changes: 14 additions & 8 deletions Code/GraphMol/SynthonSpaceSearch/Wrap/rdSynthonSpaceSearch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,13 +34,15 @@ struct SearchResults_wrapper {
"SubstructureResult", docString.c_str(), python::no_init)
.def("GetHitMolecules", hitMolecules_helper, python::args("self"),
"A function returning hits from the search")
.def_readonly("GetMaxNumResults",
&SynthonSpaceSearch::SearchResults::getMaxNumResults,
"The upper bound on number of results possible. There"
" may be fewer than this in practice for several reasons"
" such as duplicate reagent sets being removed or the"
" final product not matching the query even though the"
" synthons suggested they would.");
.def("GetMaxNumResults",
&SynthonSpaceSearch::SearchResults::getMaxNumResults,
"The upper bound on number of results possible. There"
" may be fewer than this in practice for several reasons"
" such as duplicate reagent sets being removed or the"
" final product not matching the query even though the"
" synthons suggested they would.")
.def("GetTimedOut", &SynthonSpaceSearch::SearchResults::getTimedOut,
"Returns whether the search timed out or not.");
}
};

Expand Down Expand Up @@ -141,7 +143,11 @@ BOOST_PYTHON_MODULE(rdSynthonSpaceSearch) {
&SynthonSpaceSearch::SynthonSpaceSearchParams::fragSimilarityAdjuster,
"Similarities of fragments are generally low due to low bit"
" densities. For the fragment matching, reduce the similarity cutoff"
" off by this amount. Default=0.3.");
" off by this amount. Default=0.1.")
.def_readwrite(
"timeOut", &SynthonSpaceSearch::SynthonSpaceSearchParams::timeOut,
"Time limit for search, in seconds. Default is 600s, 0 means no"
" timeout. Requires an integer");

docString = "SynthonSpaceSearch object.";
python::class_<SynthonSpaceSearch::SynthonSpace, boost::noncopyable>(
Expand Down
Loading

0 comments on commit b8effb8

Please sign in to comment.