Skip to content

Commit

Permalink
Fixed all the tests etc.
Browse files Browse the repository at this point in the history
Signed-off-by: Johannes Kalmbach <[email protected]>
  • Loading branch information
joka921 committed Jan 20, 2025
1 parent acb6633 commit faf4699
Show file tree
Hide file tree
Showing 2 changed files with 79 additions and 28 deletions.
99 changes: 74 additions & 25 deletions src/engine/QueryPlanner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -246,7 +246,9 @@ std::vector<QueryPlanner::SubtreePlan> QueryPlanner::optimize(
// (it might be, that the last join was optional and introduced new variables)
if (!candidatePlans.empty()) {
applyFiltersIfPossible<true>(candidatePlans[0], rootPattern->_filters);
applyTextLimitsIfPossible(candidatePlans[0], rootPattern->textLimits_,
applyTextLimitsIfPossible(candidatePlans[0],
TextLimitVec{rootPattern->textLimits_.begin(),
rootPattern->textLimits_.end()},
true);
checkCancellation();
}
Expand Down Expand Up @@ -1232,7 +1234,7 @@ void QueryPlanner::applyFiltersIfPossible(

// _____________________________________________________________________________
void QueryPlanner::applyTextLimitsIfPossible(
vector<QueryPlanner::SubtreePlan>& row, const TextLimitMap& textLimits,
vector<QueryPlanner::SubtreePlan>& row, const TextLimitVec& textLimits,
bool replace) const {
// Apply text limits if possible.
// A text limit can be applied to a plan if:
Expand Down Expand Up @@ -1307,7 +1309,7 @@ size_t QueryPlanner::findUniqueNodeIds(
std::vector<QueryPlanner::SubtreePlan>
QueryPlanner::runDynamicProgrammingOnConnectedComponent(
std::vector<SubtreePlan> connectedComponent,
const vector<SparqlFilter>& filters, const TextLimitMap& textLimits,
const vector<SparqlFilter>& filters, const TextLimitVec& textLimits,
const TripleGraph& tg) const {
vector<vector<QueryPlanner::SubtreePlan>> dpTab;
// find the unique number of nodes in the current connected component
Expand All @@ -1333,7 +1335,10 @@ QueryPlanner::runDynamicProgrammingOnConnectedComponent(
// be nonempty.
AD_CORRECTNESS_CHECK(!dpTab[k - 1].empty());
}
return std::move(dpTab.back());
auto& result = dpTab.back();
applyFiltersIfPossible<true>(result, filters);
applyTextLimitsIfPossible(result, textLimits, true);
return std::move(result);
}

// _____________________________________________________________________________
Expand Down Expand Up @@ -1373,32 +1378,74 @@ size_t QueryPlanner::countSubgraphs(
std::vector<QueryPlanner::SubtreePlan>
QueryPlanner::runGreedyPlanningOnConnectedComponent(
std::vector<SubtreePlan> connectedComponent,
const vector<SparqlFilter>& filters, const TextLimitMap& textLimits,
const vector<SparqlFilter>& filters, const TextLimitVec& textLimits,
const TripleGraph& tg) const {
auto& result = connectedComponent;
applyFiltersIfPossible<true>(result, filters);
applyTextLimitsIfPossible(result, textLimits, true);
size_t numSeeds = findUniqueNodeIds(result);

while (numSeeds > 1) {
const size_t numSeeds = findUniqueNodeIds(result);

// Intermediate variables that will be filled by the `greedyStep` lambda
// below.
using Plans = std::vector<SubtreePlan>;
Plans precomputedCache;
Plans nextResult;

// Perform a single step of greedy query planning.
// `nextResult` contains the result of the last step of greedy query planning.
// `input` contains all plans that have been chosen/combined so far (which
// might still be the initial start plans), except for the most recently
// chosen plan, which is stored in `nextResult`. `cache` contains all the
// plans that can be obtained by combining two plans in `input`. The function
// then performs one additional step of greedy planning and reinforces the
// above pre-/postconditions. Exception: if `isFirstStep` then `cache` and
// `nextResult` must be empty, and the first step of greedy planning is
// performed, which also establishes the pre-/postconditions.
auto greedyStep = [this, tg, filters, textLimits](Plans* input, Plans* cache,
Plans* nextResult,
bool isFirstStep) {
checkCancellation();
auto newPlans = merge(result, result, tg);
// We already have all combinations of two nodes in `input` in the cache, so
// we only have to add the combinations between `input` and `nextResult`. In
// the first step, we initially fill the cache.
auto newPlans = isFirstStep ? merge(*input, *input, tg)
: merge(*input, *nextResult, tg);
applyFiltersIfPossible<true>(newPlans, filters);
applyTextLimitsIfPossible(newPlans, textLimits, true);
auto smallestIdx = findSmallestExecutionTree(newPlans);
auto& cheapestNewTree = newPlans.at(smallestIdx);
size_t oldSize = result.size();
std::erase_if(result, [&cheapestNewTree](const auto& plan) {
// TODO<joka921> We can also assert some other invariants here.
return (cheapestNewTree._idsOfIncludedNodes & plan._idsOfIncludedNodes) !=
0;
});
result.push_back(std::move(cheapestNewTree));
AD_CORRECTNESS_CHECK(result.size() < oldSize);
numSeeds--;
AD_CORRECTNESS_CHECK(!newPlans.empty());
ql::ranges::move(newPlans, std::back_inserter(*cache));
ql::ranges::move(*nextResult, std::back_inserter(*input));
// All candidates for the next greedy step are in the `cache`, choose the
// cheapest one, remove it from the cache and make it the `nextResult`
{
auto smallestIdxNew = findSmallestExecutionTree(*cache);
auto& cheapestNewTree = cache->at(smallestIdxNew);
std::swap(cheapestNewTree, cache->back());
nextResult->clear();
nextResult->push_back(std::move(cache->back()));
cache->pop_back();
}
// All plans which are not disjoint with the newly chosen plan have to be
// deleted from the `input` and therefore also from the `cache`.
auto shouldBeErased = [&nextTree = nextResult->front()](const auto& plan) {
return (nextTree._idsOfIncludedNodes & plan._idsOfIncludedNodes) != 0;
};
std::erase_if(*input, shouldBeErased);
std::erase_if(*cache, shouldBeErased);
};

bool first = true;
if (numSeeds <= 1) {
// Only 0 or 1 nodes in the input, nothing to plan.
return std::move(result);
}

for ([[maybe_unused]] size_t i : ad_utility::integerRange(numSeeds - 1)) {
bool isFirstStep = std::exchange(first, false);
greedyStep(&result, &precomputedCache, &nextResult, isFirstStep);
}
// TODO<joka921> Assert that all seeds are covered by the result.
return std::move(result);
return nextResult;
}

// _____________________________________________________________________________
Expand All @@ -1418,6 +1465,7 @@ vector<vector<QueryPlanner::SubtreePlan>> QueryPlanner::fillDpTab(
components[componentIndices.at(i)].push_back(std::move(initialPlans.at(i)));
}
vector<vector<SubtreePlan>> lastDpRowFromComponents;
TextLimitVec textLimitVec(textLimits.begin(), textLimits.end());
for (auto& component : components | ql::views::values) {
std::vector<const SubtreePlan*> g;
for (const auto& plan : component) {
Expand All @@ -1433,8 +1481,8 @@ vector<vector<QueryPlanner::SubtreePlan>> QueryPlanner::fillDpTab(
auto impl = useGreedyPlanning
? &QueryPlanner::runGreedyPlanningOnConnectedComponent
: &QueryPlanner::runDynamicProgrammingOnConnectedComponent;
lastDpRowFromComponents.push_back(
std::invoke(impl, this, std::move(component), filters, textLimits, tg));
lastDpRowFromComponents.push_back(std::invoke(
impl, this, std::move(component), filters, textLimitVec, tg));
checkCancellation();
}
size_t numConnectedComponents = lastDpRowFromComponents.size();
Expand All @@ -1447,7 +1495,8 @@ vector<vector<QueryPlanner::SubtreePlan>> QueryPlanner::fillDpTab(
if (numConnectedComponents == 1) {
// A Cartesian product is not needed if there is only one component.
applyFiltersIfPossible<true>(lastDpRowFromComponents.back(), filters);
applyTextLimitsIfPossible(lastDpRowFromComponents.back(), textLimits, true);
applyTextLimitsIfPossible(lastDpRowFromComponents.back(), textLimitVec,
true);
return lastDpRowFromComponents;
}
// More than one connected component, set up a Cartesian product.
Expand Down Expand Up @@ -1478,7 +1527,7 @@ vector<vector<QueryPlanner::SubtreePlan>> QueryPlanner::fillDpTab(
plan._idsOfIncludedFilters = filterIds;
plan.idsOfIncludedTextLimits_ = textLimitIds;
applyFiltersIfPossible<true>(result.at(0), filters);
applyTextLimitsIfPossible(result.at(0), textLimits, true);
applyTextLimitsIfPossible(result.at(0), textLimitVec, true);
return result;
}

Expand Down
8 changes: 5 additions & 3 deletions src/engine/QueryPlanner.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
class QueryPlanner {
using TextLimitMap =
ad_utility::HashMap<Variable, parsedQuery::TextLimitMetaObject>;
using TextLimitVec =
std::vector<std::pair<Variable, parsedQuery::TextLimitMetaObject>>;
using CancellationHandle = ad_utility::SharedCancellationHandle;
template <typename T>
using vector = std::vector<T>;
Expand Down Expand Up @@ -400,7 +402,7 @@ class QueryPlanner {
// 1) There is no text operation for the text record column left.
// 2) The text limit has not already been applied to the plan.
void applyTextLimitsIfPossible(std::vector<SubtreePlan>& row,
const TextLimitMap& textLimits,
const TextLimitVec& textLimits,
bool replaceInsteadOfAddPlans) const;

/**
Expand Down Expand Up @@ -470,15 +472,15 @@ class QueryPlanner {
std::vector<QueryPlanner::SubtreePlan>
runDynamicProgrammingOnConnectedComponent(
std::vector<SubtreePlan> connectedComponent,
const vector<SparqlFilter>& filters, const TextLimitMap& textLimits,
const vector<SparqlFilter>& filters, const TextLimitVec& textLimits,
const TripleGraph& tg) const;

// Same as `runDynamicProgrammingOnConnectedComponent`, but uses a greedy
// algorithm that always greedily chooses the smallest result of the possible
// join operations using the "Greedy Operator Ordering (GOO)" algorithm.
std::vector<QueryPlanner::SubtreePlan> runGreedyPlanningOnConnectedComponent(
std::vector<SubtreePlan> connectedComponent,
const vector<SparqlFilter>& filters, const TextLimitMap& textLimits,
const vector<SparqlFilter>& filters, const TextLimitVec& textLimits,
const TripleGraph& tg) const;

// Return the number of connected subgraphs is the `graph`, or `budget + 1`,
Expand Down

0 comments on commit faf4699

Please sign in to comment.