From faf469962fa2200f6560cfa1223b72a264a6ad1b Mon Sep 17 00:00:00 2001 From: Johannes Kalmbach Date: Mon, 20 Jan 2025 17:13:43 +0100 Subject: [PATCH 1/4] Fixed all the tests etc. Signed-off-by: Johannes Kalmbach --- src/engine/QueryPlanner.cpp | 99 +++++++++++++++++++++++++++---------- src/engine/QueryPlanner.h | 8 +-- 2 files changed, 79 insertions(+), 28 deletions(-) diff --git a/src/engine/QueryPlanner.cpp b/src/engine/QueryPlanner.cpp index 9dd6b5599c..d38bd66ac3 100644 --- a/src/engine/QueryPlanner.cpp +++ b/src/engine/QueryPlanner.cpp @@ -246,7 +246,9 @@ std::vector QueryPlanner::optimize( // (it might be, that the last join was optional and introduced new variables) if (!candidatePlans.empty()) { applyFiltersIfPossible(candidatePlans[0], rootPattern->_filters); - applyTextLimitsIfPossible(candidatePlans[0], rootPattern->textLimits_, + applyTextLimitsIfPossible(candidatePlans[0], + TextLimitVec{rootPattern->textLimits_.begin(), + rootPattern->textLimits_.end()}, true); checkCancellation(); } @@ -1232,7 +1234,7 @@ void QueryPlanner::applyFiltersIfPossible( // _____________________________________________________________________________ void QueryPlanner::applyTextLimitsIfPossible( - vector& row, const TextLimitMap& textLimits, + vector& row, const TextLimitVec& textLimits, bool replace) const { // Apply text limits if possible. // A text limit can be applied to a plan if: @@ -1307,7 +1309,7 @@ size_t QueryPlanner::findUniqueNodeIds( std::vector QueryPlanner::runDynamicProgrammingOnConnectedComponent( std::vector connectedComponent, - const vector& filters, const TextLimitMap& textLimits, + const vector& filters, const TextLimitVec& textLimits, const TripleGraph& tg) const { vector> dpTab; // find the unique number of nodes in the current connected component @@ -1333,7 +1335,10 @@ QueryPlanner::runDynamicProgrammingOnConnectedComponent( // be nonempty. AD_CORRECTNESS_CHECK(!dpTab[k - 1].empty()); } - return std::move(dpTab.back()); + auto& result = dpTab.back(); + applyFiltersIfPossible(result, filters); + applyTextLimitsIfPossible(result, textLimits, true); + return std::move(result); } // _____________________________________________________________________________ @@ -1373,32 +1378,74 @@ size_t QueryPlanner::countSubgraphs( std::vector QueryPlanner::runGreedyPlanningOnConnectedComponent( std::vector connectedComponent, - const vector& filters, const TextLimitMap& textLimits, + const vector& filters, const TextLimitVec& textLimits, const TripleGraph& tg) const { auto& result = connectedComponent; applyFiltersIfPossible(result, filters); applyTextLimitsIfPossible(result, textLimits, true); - size_t numSeeds = findUniqueNodeIds(result); - - while (numSeeds > 1) { + const size_t numSeeds = findUniqueNodeIds(result); + + // Intermediate variables that will be filled by the `greedyStep` lambda + // below. + using Plans = std::vector; + Plans precomputedCache; + Plans nextResult; + + // Perform a single step of greedy query planning. + // `nextResult` contains the result of the last step of greedy query planning. + // `input` contains all plans that have been chosen/combined so far (which + // might still be the initial start plans), except for the most recently + // chosen plan, which is stored in `nextResult`. `cache` contains all the + // plans that can be obtained by combining two plans in `input`. The function + // then performs one additional step of greedy planning and reinforces the + // above pre-/postconditions. Exception: if `isFirstStep` then `cache` and + // `nextResult` must be empty, and the first step of greedy planning is + // performed, which also establishes the pre-/postconditions. + auto greedyStep = [this, tg, filters, textLimits](Plans* input, Plans* cache, + Plans* nextResult, + bool isFirstStep) { checkCancellation(); - auto newPlans = merge(result, result, tg); + // We already have all combinations of two nodes in `input` in the cache, so + // we only have to add the combinations between `input` and `nextResult`. In + // the first step, we initially fill the cache. + auto newPlans = isFirstStep ? merge(*input, *input, tg) + : merge(*input, *nextResult, tg); applyFiltersIfPossible(newPlans, filters); applyTextLimitsIfPossible(newPlans, textLimits, true); - auto smallestIdx = findSmallestExecutionTree(newPlans); - auto& cheapestNewTree = newPlans.at(smallestIdx); - size_t oldSize = result.size(); - std::erase_if(result, [&cheapestNewTree](const auto& plan) { - // TODO We can also assert some other invariants here. - return (cheapestNewTree._idsOfIncludedNodes & plan._idsOfIncludedNodes) != - 0; - }); - result.push_back(std::move(cheapestNewTree)); - AD_CORRECTNESS_CHECK(result.size() < oldSize); - numSeeds--; + AD_CORRECTNESS_CHECK(!newPlans.empty()); + ql::ranges::move(newPlans, std::back_inserter(*cache)); + ql::ranges::move(*nextResult, std::back_inserter(*input)); + // All candidates for the next greedy step are in the `cache`, choose the + // cheapest one, remove it from the cache and make it the `nextResult` + { + auto smallestIdxNew = findSmallestExecutionTree(*cache); + auto& cheapestNewTree = cache->at(smallestIdxNew); + std::swap(cheapestNewTree, cache->back()); + nextResult->clear(); + nextResult->push_back(std::move(cache->back())); + cache->pop_back(); + } + // All plans which are not disjoint with the newly chosen plan have to be + // deleted from the `input` and therefore also from the `cache`. + auto shouldBeErased = [&nextTree = nextResult->front()](const auto& plan) { + return (nextTree._idsOfIncludedNodes & plan._idsOfIncludedNodes) != 0; + }; + std::erase_if(*input, shouldBeErased); + std::erase_if(*cache, shouldBeErased); + }; + + bool first = true; + if (numSeeds <= 1) { + // Only 0 or 1 nodes in the input, nothing to plan. + return std::move(result); + } + + for ([[maybe_unused]] size_t i : ad_utility::integerRange(numSeeds - 1)) { + bool isFirstStep = std::exchange(first, false); + greedyStep(&result, &precomputedCache, &nextResult, isFirstStep); } // TODO Assert that all seeds are covered by the result. - return std::move(result); + return nextResult; } // _____________________________________________________________________________ @@ -1418,6 +1465,7 @@ vector> QueryPlanner::fillDpTab( components[componentIndices.at(i)].push_back(std::move(initialPlans.at(i))); } vector> lastDpRowFromComponents; + TextLimitVec textLimitVec(textLimits.begin(), textLimits.end()); for (auto& component : components | ql::views::values) { std::vector g; for (const auto& plan : component) { @@ -1433,8 +1481,8 @@ vector> QueryPlanner::fillDpTab( auto impl = useGreedyPlanning ? &QueryPlanner::runGreedyPlanningOnConnectedComponent : &QueryPlanner::runDynamicProgrammingOnConnectedComponent; - lastDpRowFromComponents.push_back( - std::invoke(impl, this, std::move(component), filters, textLimits, tg)); + lastDpRowFromComponents.push_back(std::invoke( + impl, this, std::move(component), filters, textLimitVec, tg)); checkCancellation(); } size_t numConnectedComponents = lastDpRowFromComponents.size(); @@ -1447,7 +1495,8 @@ vector> QueryPlanner::fillDpTab( if (numConnectedComponents == 1) { // A Cartesian product is not needed if there is only one component. applyFiltersIfPossible(lastDpRowFromComponents.back(), filters); - applyTextLimitsIfPossible(lastDpRowFromComponents.back(), textLimits, true); + applyTextLimitsIfPossible(lastDpRowFromComponents.back(), textLimitVec, + true); return lastDpRowFromComponents; } // More than one connected component, set up a Cartesian product. @@ -1478,7 +1527,7 @@ vector> QueryPlanner::fillDpTab( plan._idsOfIncludedFilters = filterIds; plan.idsOfIncludedTextLimits_ = textLimitIds; applyFiltersIfPossible(result.at(0), filters); - applyTextLimitsIfPossible(result.at(0), textLimits, true); + applyTextLimitsIfPossible(result.at(0), textLimitVec, true); return result; } diff --git a/src/engine/QueryPlanner.h b/src/engine/QueryPlanner.h index 52ee540a0a..049babcfb7 100644 --- a/src/engine/QueryPlanner.h +++ b/src/engine/QueryPlanner.h @@ -17,6 +17,8 @@ class QueryPlanner { using TextLimitMap = ad_utility::HashMap; + using TextLimitVec = + std::vector>; using CancellationHandle = ad_utility::SharedCancellationHandle; template using vector = std::vector; @@ -400,7 +402,7 @@ class QueryPlanner { // 1) There is no text operation for the text record column left. // 2) The text limit has not already been applied to the plan. void applyTextLimitsIfPossible(std::vector& row, - const TextLimitMap& textLimits, + const TextLimitVec& textLimits, bool replaceInsteadOfAddPlans) const; /** @@ -470,7 +472,7 @@ class QueryPlanner { std::vector runDynamicProgrammingOnConnectedComponent( std::vector connectedComponent, - const vector& filters, const TextLimitMap& textLimits, + const vector& filters, const TextLimitVec& textLimits, const TripleGraph& tg) const; // Same as `runDynamicProgrammingOnConnectedComponent`, but uses a greedy @@ -478,7 +480,7 @@ class QueryPlanner { // join operations using the "Greedy Operator Ordering (GOO)" algorithm. std::vector runGreedyPlanningOnConnectedComponent( std::vector connectedComponent, - const vector& filters, const TextLimitMap& textLimits, + const vector& filters, const TextLimitVec& textLimits, const TripleGraph& tg) const; // Return the number of connected subgraphs is the `graph`, or `budget + 1`, From a8847361034279bdac72136501782dcf1776783c Mon Sep 17 00:00:00 2001 From: Johannes Kalmbach Date: Mon, 20 Jan 2025 19:46:52 +0100 Subject: [PATCH 2/4] capture by reference. Signed-off-by: Johannes Kalmbach --- src/engine/QueryPlanner.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/engine/QueryPlanner.cpp b/src/engine/QueryPlanner.cpp index d38bd66ac3..04ab4b4c4c 100644 --- a/src/engine/QueryPlanner.cpp +++ b/src/engine/QueryPlanner.cpp @@ -1401,9 +1401,9 @@ QueryPlanner::runGreedyPlanningOnConnectedComponent( // above pre-/postconditions. Exception: if `isFirstStep` then `cache` and // `nextResult` must be empty, and the first step of greedy planning is // performed, which also establishes the pre-/postconditions. - auto greedyStep = [this, tg, filters, textLimits](Plans* input, Plans* cache, - Plans* nextResult, - bool isFirstStep) { + auto greedyStep = [this, &tg, &filters, &textLimits]( + Plans* input, Plans* cache, Plans* nextResult, + bool isFirstStep) { checkCancellation(); // We already have all combinations of two nodes in `input` in the cache, so // we only have to add the combinations between `input` and `nextResult`. In From edea5cc1fb3cdcd8781724bbe09210cf410a5877 Mon Sep 17 00:00:00 2001 From: Johannes Kalmbach Date: Mon, 20 Jan 2025 20:20:28 +0100 Subject: [PATCH 3/4] Fix another warning. Signed-off-by: Johannes Kalmbach --- src/engine/QueryPlanner.cpp | 68 ++++++++++++++++++------------------- 1 file changed, 34 insertions(+), 34 deletions(-) diff --git a/src/engine/QueryPlanner.cpp b/src/engine/QueryPlanner.cpp index 04ab4b4c4c..db07f2d4ce 100644 --- a/src/engine/QueryPlanner.cpp +++ b/src/engine/QueryPlanner.cpp @@ -1380,16 +1380,17 @@ QueryPlanner::runGreedyPlanningOnConnectedComponent( std::vector connectedComponent, const vector& filters, const TextLimitVec& textLimits, const TripleGraph& tg) const { - auto& result = connectedComponent; - applyFiltersIfPossible(result, filters); - applyTextLimitsIfPossible(result, textLimits, true); - const size_t numSeeds = findUniqueNodeIds(result); + applyFiltersIfPossible(connectedComponent, filters); + applyTextLimitsIfPossible(connectedComponent, textLimits, true); + const size_t numSeeds = findUniqueNodeIds(connectedComponent); + if (numSeeds <= 1) { + // Only 0 or 1 nodes in the input, nothing to plan. + return connectedComponent; + } // Intermediate variables that will be filled by the `greedyStep` lambda // below. using Plans = std::vector; - Plans precomputedCache; - Plans nextResult; // Perform a single step of greedy query planning. // `nextResult` contains the result of the last step of greedy query planning. @@ -1401,48 +1402,47 @@ QueryPlanner::runGreedyPlanningOnConnectedComponent( // above pre-/postconditions. Exception: if `isFirstStep` then `cache` and // `nextResult` must be empty, and the first step of greedy planning is // performed, which also establishes the pre-/postconditions. - auto greedyStep = [this, &tg, &filters, &textLimits]( - Plans* input, Plans* cache, Plans* nextResult, - bool isFirstStep) { + auto greedyStep = [this, &tg, &filters, &textLimits, + input = std::move(connectedComponent), cache = Plans{}]( + Plans& nextBestPlan, bool isFirstStep) mutable { checkCancellation(); - // We already have all combinations of two nodes in `input` in the cache, so - // we only have to add the combinations between `input` and `nextResult`. In - // the first step, we initially fill the cache. - auto newPlans = isFirstStep ? merge(*input, *input, tg) - : merge(*input, *nextResult, tg); + // Normally, we already have all combinations of two nodes in `input` in the + // cache, so we only have to add the combinations between `input` and + // `nextResult`. In the first step, we need to initially compute all + // possible combinations. + auto newPlans = + isFirstStep ? merge(input, input, tg) : merge(input, nextBestPlan, tg); applyFiltersIfPossible(newPlans, filters); applyTextLimitsIfPossible(newPlans, textLimits, true); AD_CORRECTNESS_CHECK(!newPlans.empty()); - ql::ranges::move(newPlans, std::back_inserter(*cache)); - ql::ranges::move(*nextResult, std::back_inserter(*input)); + ql::ranges::move(newPlans, std::back_inserter(cache)); + ql::ranges::move(nextBestPlan, std::back_inserter(input)); + // All candidates for the next greedy step are in the `cache`, choose the // cheapest one, remove it from the cache and make it the `nextResult` { - auto smallestIdxNew = findSmallestExecutionTree(*cache); - auto& cheapestNewTree = cache->at(smallestIdxNew); - std::swap(cheapestNewTree, cache->back()); - nextResult->clear(); - nextResult->push_back(std::move(cache->back())); - cache->pop_back(); - } - // All plans which are not disjoint with the newly chosen plan have to be + auto smallestIdxNew = findSmallestExecutionTree(cache); + auto& cheapestNewTree = cache.at(smallestIdxNew); + std::swap(cheapestNewTree, cache.back()); + nextBestPlan.clear(); + nextBestPlan.push_back(std::move(cache.back())); + cache.pop_back(); + } + + // All plans which have a node in common with the chosen plan have to be // deleted from the `input` and therefore also from the `cache`. - auto shouldBeErased = [&nextTree = nextResult->front()](const auto& plan) { + auto shouldBeErased = [&nextTree = nextBestPlan.front()](const auto& plan) { return (nextTree._idsOfIncludedNodes & plan._idsOfIncludedNodes) != 0; }; - std::erase_if(*input, shouldBeErased); - std::erase_if(*cache, shouldBeErased); + std::erase_if(input, shouldBeErased); + std::erase_if(cache, shouldBeErased); }; bool first = true; - if (numSeeds <= 1) { - // Only 0 or 1 nodes in the input, nothing to plan. - return std::move(result); - } - + Plans nextResult; for ([[maybe_unused]] size_t i : ad_utility::integerRange(numSeeds - 1)) { - bool isFirstStep = std::exchange(first, false); - greedyStep(&result, &precomputedCache, &nextResult, isFirstStep); + greedyStep(nextResult, first); + first = false; } // TODO Assert that all seeds are covered by the result. return nextResult; From 807324e3f7d85d1ee9b4d3a17e0f3560c4751608 Mon Sep 17 00:00:00 2001 From: Johannes Kalmbach Date: Mon, 20 Jan 2025 20:26:48 +0100 Subject: [PATCH 4/4] another fix. Signed-off-by: Johannes Kalmbach --- src/engine/QueryPlanner.cpp | 47 +++++++++++++++++++------------------ 1 file changed, 24 insertions(+), 23 deletions(-) diff --git a/src/engine/QueryPlanner.cpp b/src/engine/QueryPlanner.cpp index db07f2d4ce..416adca7cb 100644 --- a/src/engine/QueryPlanner.cpp +++ b/src/engine/QueryPlanner.cpp @@ -1393,30 +1393,31 @@ QueryPlanner::runGreedyPlanningOnConnectedComponent( using Plans = std::vector; // Perform a single step of greedy query planning. - // `nextResult` contains the result of the last step of greedy query planning. - // `input` contains all plans that have been chosen/combined so far (which - // might still be the initial start plans), except for the most recently - // chosen plan, which is stored in `nextResult`. `cache` contains all the - // plans that can be obtained by combining two plans in `input`. The function - // then performs one additional step of greedy planning and reinforces the - // above pre-/postconditions. Exception: if `isFirstStep` then `cache` and - // `nextResult` must be empty, and the first step of greedy planning is - // performed, which also establishes the pre-/postconditions. + // `nextBestPlan` contains the result of the last step of greedy query + // planning. `currentPlans` contains all plans that have been chosen/combined + // so far (which might still be the initial start plans), except for the most + // recently chosen plan, which is stored in `nextResult`. `cache` contains all + // the plans that can be obtained by combining two plans in `input`. The + // function then performs one additional step of greedy planning and + // reinforces the above pre-/postconditions. Exception: if `isFirstStep` then + // `cache` and `nextResult` must be empty, and the first step of greedy + // planning is performed, which also establishes the pre-/postconditions. auto greedyStep = [this, &tg, &filters, &textLimits, - input = std::move(connectedComponent), cache = Plans{}]( - Plans& nextBestPlan, bool isFirstStep) mutable { + currentPlans = std::move(connectedComponent), + cache = Plans{}](Plans& nextBestPlan, + bool isFirstStep) mutable { checkCancellation(); - // Normally, we already have all combinations of two nodes in `input` in the - // cache, so we only have to add the combinations between `input` and - // `nextResult`. In the first step, we need to initially compute all - // possible combinations. - auto newPlans = - isFirstStep ? merge(input, input, tg) : merge(input, nextBestPlan, tg); + // Normally, we already have all combinations of two nodes in `currentPlans` + // in the cache, so we only have to add the combinations between + // `currentPlans` and `nextResult`. In the first step, we need to initially + // compute all possible combinations. + auto newPlans = isFirstStep ? merge(currentPlans, currentPlans, tg) + : merge(currentPlans, nextBestPlan, tg); applyFiltersIfPossible(newPlans, filters); applyTextLimitsIfPossible(newPlans, textLimits, true); AD_CORRECTNESS_CHECK(!newPlans.empty()); ql::ranges::move(newPlans, std::back_inserter(cache)); - ql::ranges::move(nextBestPlan, std::back_inserter(input)); + ql::ranges::move(nextBestPlan, std::back_inserter(currentPlans)); // All candidates for the next greedy step are in the `cache`, choose the // cheapest one, remove it from the cache and make it the `nextResult` @@ -1430,22 +1431,22 @@ QueryPlanner::runGreedyPlanningOnConnectedComponent( } // All plans which have a node in common with the chosen plan have to be - // deleted from the `input` and therefore also from the `cache`. + // deleted from the `currentPlans` and therefore also from the `cache`. auto shouldBeErased = [&nextTree = nextBestPlan.front()](const auto& plan) { return (nextTree._idsOfIncludedNodes & plan._idsOfIncludedNodes) != 0; }; - std::erase_if(input, shouldBeErased); + std::erase_if(currentPlans, shouldBeErased); std::erase_if(cache, shouldBeErased); }; bool first = true; - Plans nextResult; + Plans result; for ([[maybe_unused]] size_t i : ad_utility::integerRange(numSeeds - 1)) { - greedyStep(nextResult, first); + greedyStep(result, first); first = false; } // TODO Assert that all seeds are covered by the result. - return nextResult; + return result; } // _____________________________________________________________________________