From b10d0b739405eaf3f93ce9ba40388e9a13223ab8 Mon Sep 17 00:00:00 2001
From: Johannes Kalmbach <joka921@users.noreply.github.com>
Date: Fri, 24 Jan 2025 00:29:53 +0100
Subject: [PATCH 1/2] Avoid reading and decompressing index blocks during query
 planning  (#1725)

Avoid reading and decompressing the first and last block of an index scan during query planning (which requires only a size estimate and not the exact size). This added significant performance overhead, when only few blocks were eventually read in the index scan. Instead, we now do the following: First check whether the first and last block fully match the scan specification (this is fast). Second, if this is not the case, assume that the number of elements is a fixed fraction of total number of elements in the block (which we know from the metadata). The fixed fraction is defined by the new runtime parameter `small-index-scan-size-estimate-divisor`.
---
 src/global/RuntimeParameters.h   |  7 ++-
 src/index/CompressedRelation.cpp | 82 +++++++++++++++++++++++++-------
 test/engine/IndexScanTest.cpp    |  3 +-
 test/engine/SpatialJoinTest.cpp  | 14 +++---
 4 files changed, 81 insertions(+), 25 deletions(-)

diff --git a/src/global/RuntimeParameters.h b/src/global/RuntimeParameters.h
index 8e60725ffe..cce7e321b9 100644
--- a/src/global/RuntimeParameters.h
+++ b/src/global/RuntimeParameters.h
@@ -53,7 +53,12 @@ inline auto& RuntimeParameters() {
         Bool<"throw-on-unbound-variables">{false},
         // Control up until which size lazy results should be cached. Caching
         // does cause significant overhead for this case.
-        MemorySizeParameter<"lazy-result-max-cache-size">{5_MB}};
+        MemorySizeParameter<"lazy-result-max-cache-size">{5_MB},
+        // When the result of an index scan is smaller than a single block, then
+        // its size estimate will be the size of the block divided by this
+        // value.
+        SizeT<"small-index-scan-size-estimate-divisor">{5},
+    };
   }();
   return params;
 }
diff --git a/src/index/CompressedRelation.cpp b/src/index/CompressedRelation.cpp
index fc306e892f..c11081f66f 100644
--- a/src/index/CompressedRelation.cpp
+++ b/src/index/CompressedRelation.cpp
@@ -29,6 +29,35 @@ static auto getBeginAndEnd(auto& range) {
   return std::pair{ql::ranges::begin(range), ql::ranges::end(range)};
 }
 
+// Return true iff the `triple` is contained in the `scanSpec`. For example, the
+// triple ` 42 0 3 ` is contained in the specs `U U U`, `42 U U` and `42 0 U` ,
+// but not in `42 2 U` where `U` means "scan for all possible values".
+static auto isTripleInSpecification =
+    [](const ScanSpecification& scanSpec,
+       const CompressedBlockMetadata::PermutedTriple& triple) {
+      enum struct M { GuaranteedMatch, Mismatch, MustCheckNextElement };
+      auto checkElement = [](const auto& optId, Id id) {
+        if (!optId.has_value()) {
+          return M::GuaranteedMatch;
+        } else if (optId.value() != id) {
+          return M::Mismatch;
+        } else {
+          return M::MustCheckNextElement;
+        }
+      };
+      auto result = checkElement(scanSpec.col0Id(), triple.col0Id_);
+      if (result == M::MustCheckNextElement) {
+        result = checkElement(scanSpec.col1Id(), triple.col1Id_);
+      }
+      if (result == M::MustCheckNextElement) {
+        result = checkElement(scanSpec.col2Id(), triple.col2Id_);
+      }
+      // The case `result == M::MustCheckNextElement` can happen in the unlikely
+      // case that there only is a single triple in the block, which is scanned
+      // for explicitly.
+      return result != M::Mismatch;
+    };
+
 // modify the `block` according to the `limitOffset`. Also modify the
 // `limitOffset` to reflect the parts of the LIMIT and OFFSET that have been
 // performed by pruning this `block`.
@@ -631,21 +660,46 @@ std::pair<size_t, size_t> CompressedRelationReader::getResultSizeImpl(
   // a part of these blocks is actually part of the result,
   // set up a lambda which allows us to read these blocks, and returns
   // the size of the result.
+  size_t numResults = 0;
+  // Determine the total size of the result.
+  // First accumulate the complete blocks in the "middle"
+  std::size_t inserted = 0;
+  std::size_t deleted = 0;
+
   auto readSizeOfPossiblyIncompleteBlock = [&](const auto& block) {
-    return readPossiblyIncompleteBlock(scanSpec, config, block, std::nullopt,
-                                       locatedTriplesPerBlock)
-        .numRows();
+    if (exactSize) {
+      numResults +=
+          readPossiblyIncompleteBlock(scanSpec, config, block, std::nullopt,
+                                      locatedTriplesPerBlock)
+              .numRows();
+    } else {
+      // If the first and last triple of the block match, then we know that the
+      // whole block belongs to the result.
+      bool isComplete = isTripleInSpecification(scanSpec, block.firstTriple_) &&
+                        isTripleInSpecification(scanSpec, block.lastTriple_);
+      size_t divisor =
+          isComplete ? 1
+                     : RuntimeParameters()
+                           .get<"small-index-scan-size-estimate-divisor">();
+      const auto [ins, del] =
+          locatedTriplesPerBlock.numTriples(block.blockIndex_);
+      auto trunc = [divisor](size_t num) {
+        return std::max(std::min(num, 1ul), num / divisor);
+      };
+      inserted += trunc(ins);
+      deleted += trunc(del);
+      numResults += trunc(block.numRows_);
+    }
   };
 
-  size_t numResults = 0;
   // The first and the last block might be incomplete, compute
   // and store the partial results from them.
   if (beginBlock < endBlock) {
-    numResults += readSizeOfPossiblyIncompleteBlock(*beginBlock);
+    readSizeOfPossiblyIncompleteBlock(*beginBlock);
     ++beginBlock;
   }
   if (beginBlock < endBlock) {
-    numResults += readSizeOfPossiblyIncompleteBlock(*(endBlock - 1));
+    readSizeOfPossiblyIncompleteBlock(*(endBlock - 1));
     --endBlock;
   }
 
@@ -653,10 +707,6 @@ std::pair<size_t, size_t> CompressedRelationReader::getResultSizeImpl(
     return {numResults, numResults};
   }
 
-  // Determine the total size of the result.
-  // First accumulate the complete blocks in the "middle"
-  std::size_t inserted = 0;
-  std::size_t deleted = 0;
   ql::ranges::for_each(
       ql::ranges::subrange{beginBlock, endBlock}, [&](const auto& block) {
         const auto [ins, del] =
@@ -666,8 +716,8 @@ std::pair<size_t, size_t> CompressedRelationReader::getResultSizeImpl(
           deleted += del;
           numResults += block.numRows_;
         } else {
-          // TODO<joka921> We could cache the exact size as soon as we have
-          // merged the block once since the last update.
+          // TODO<joka921> We could cache the exact size as soon as we
+          // have merged the block once since the last update.
           auto b = readAndDecompressBlock(block, config);
           numResults += b.has_value() ? b.value().block_.numRows() : 0u;
         }
@@ -1366,10 +1416,10 @@ auto CompressedRelationWriter::createPermutationPair(
         // relation as its overhead is far too high for small relations.
         relation.swapColumns(c1Idx, c2Idx);
 
-        // We only need to sort by the columns of the triple + the graph column,
-        // not the additional payload. Note: We could also use
-        // `compareWithoutLocalVocab` to compare the IDs cheaper, but this sort
-        // is far from being a performance bottleneck.
+        // We only need to sort by the columns of the triple + the graph
+        // column, not the additional payload. Note: We could also use
+        // `compareWithoutLocalVocab` to compare the IDs cheaper, but this
+        // sort is far from being a performance bottleneck.
         auto compare = [](const auto& a, const auto& b) {
           return std::tie(a[0], a[1], a[2], a[3]) <
                  std::tie(b[0], b[1], b[2], b[3]);
diff --git a/test/engine/IndexScanTest.cpp b/test/engine/IndexScanTest.cpp
index 62f01647a0..395a21261a 100644
--- a/test/engine/IndexScanTest.cpp
+++ b/test/engine/IndexScanTest.cpp
@@ -498,7 +498,8 @@ TEST(IndexScan, getResultSizeOfScan) {
     SparqlTripleSimple scanTriple{I::fromIriref("<x2>"), I::fromIriref("<p>"),
                                   I::fromIriref("<s1>")};
     IndexScan scan{qec, Permutation::Enum::POS, scanTriple};
-    EXPECT_EQ(scan.getSizeEstimate(), 0);
+    EXPECT_EQ(scan.getSizeEstimate(), 1);
+    EXPECT_EQ(scan.getExactSize(), 0);
   }
   {
     SparqlTripleSimple scanTriple{I::fromIriref("<x>"), I::fromIriref("<p>"),
diff --git a/test/engine/SpatialJoinTest.cpp b/test/engine/SpatialJoinTest.cpp
index ed49d54e86..766801ee3b 100644
--- a/test/engine/SpatialJoinTest.cpp
+++ b/test/engine/SpatialJoinTest.cpp
@@ -926,22 +926,22 @@ class SpatialJoinMultiplicityAndSizeEstimateTest
         spatialJoin = static_cast<SpatialJoin*>(spJoin2.get());
         auto varColsMap = spatialJoin->getExternallyVisibleVariableColumns();
 
-        assertMultiplicity(subj1.getVariable(), 9.8, spatialJoin, varColsMap);
-        assertMultiplicity(obj1.getVariable(), 7.0, spatialJoin, varColsMap);
-        assertMultiplicity(subj2.getVariable(), 9.8, spatialJoin, varColsMap);
-        assertMultiplicity(obj2.getVariable(), 7.0, spatialJoin, varColsMap);
+        assertMultiplicity(subj1.getVariable(), 4.2, spatialJoin, varColsMap);
+        assertMultiplicity(obj1.getVariable(), 3.0, spatialJoin, varColsMap);
+        assertMultiplicity(subj2.getVariable(), 4.2, spatialJoin, varColsMap);
+        assertMultiplicity(obj2.getVariable(), 3.0, spatialJoin, varColsMap);
         ASSERT_TRUE(
             spatialJoin->onlyForTestingGetDistanceVariable().has_value());
         assertMultiplicity(Variable{"?distanceForTesting"}, 1, spatialJoin,
                            varColsMap);
       } else {
-        ASSERT_EQ(leftChild->getSizeEstimate(), 7);
-        ASSERT_EQ(rightChild->getSizeEstimate(), 7);
+        auto leftEstimate = leftChild->getSizeEstimate();
+        auto rightEstimate = rightChild->getSizeEstimate();
         auto spJoin1 = spatialJoin->addChild(firstChild, firstVariable);
         spatialJoin = static_cast<SpatialJoin*>(spJoin1.get());
         auto spJoin2 = spatialJoin->addChild(secondChild, secondVariable);
         spatialJoin = static_cast<SpatialJoin*>(spJoin2.get());
-        ASSERT_LE(spatialJoin->getSizeEstimate(), 49);
+        ASSERT_LE(spatialJoin->getSizeEstimate(), leftEstimate * rightEstimate);
       }
     }
   }

From ff479223aa6d7597e886b3fee24f1167e643cf5e Mon Sep 17 00:00:00 2001
From: Johannes Kalmbach <joka921@users.noreply.github.com>
Date: Fri, 24 Jan 2025 00:31:05 +0100
Subject: [PATCH 2/2] Various efficiency improvements for the query planner
 (#1724)

These are rather minor changes in the code, but with a very significant effect for "large" queries (that is, queries with many operations):

1. Replace `std::optional<const TripleGraph>` by `boost::optional<const TripleGraph&>`; before this change, many copies of `TripleGraph`s were made for large queries; note that `std::optional` does not work for `const &`
2. Avoid re-building the cache key from scratch for each call to `Operation::getCacheKey`
3. Avoid re-computing the result width from scratch for each call to `Operation::getResultWidth`
4. We can now call `setLimit` for a whole `QueryExecutionTree` (before: only for an individual `Operation`);  this invalidates the cache key and the result width
5. Make the variable name check (which calls the ANTLR parser) an expensive check, so that these are only executed in test and in debug mode
---
 src/engine/CartesianProductJoin.cpp |  9 +++++----
 src/engine/QueryExecutionTree.cpp   |  2 +-
 src/engine/QueryExecutionTree.h     | 18 +++++++++++++++++-
 src/engine/QueryPlanner.cpp         |  8 ++++----
 src/engine/QueryPlanner.h           |  3 ++-
 src/parser/data/Variable.cpp        |  2 +-
 test/SparqlDataTypesTest.cpp        | 15 ++++++++++-----
 test/parser/data/VariableTest.cpp   | 14 +++++++++-----
 8 files changed, 49 insertions(+), 22 deletions(-)

diff --git a/src/engine/CartesianProductJoin.cpp b/src/engine/CartesianProductJoin.cpp
index cedb832648..d99285479c 100644
--- a/src/engine/CartesianProductJoin.cpp
+++ b/src/engine/CartesianProductJoin.cpp
@@ -255,13 +255,14 @@ CartesianProductJoin::calculateSubResults(bool requestLaziness) {
   auto children = childView();
   AD_CORRECTNESS_CHECK(!ql::ranges::empty(children));
   // Get all child results (possibly with limit, see above).
-  for (Operation& child : children) {
-    if (limitIfPresent.has_value() && child.supportsLimit()) {
-      child.setLimit(limitIfPresent.value());
+  for (std::shared_ptr<QueryExecutionTree>& childTree : children_) {
+    if (limitIfPresent.has_value() && childTree->supportsLimit()) {
+      childTree->setLimit(limitIfPresent.value());
     }
+    auto& child = *childTree->getRootOperation();
     // To preserve order of the columns we can only consume the first child
     // lazily. In the future this restriction may be lifted by permutating the
-    // columns afterwards.
+    // columns afterward.
     bool isLast = &child == &children.back();
     bool requestLazy = requestLaziness && isLast;
     auto result = child.getResult(
diff --git a/src/engine/QueryExecutionTree.cpp b/src/engine/QueryExecutionTree.cpp
index 7f22de2020..9b58d0bb78 100644
--- a/src/engine/QueryExecutionTree.cpp
+++ b/src/engine/QueryExecutionTree.cpp
@@ -24,7 +24,7 @@ QueryExecutionTree::QueryExecutionTree(QueryExecutionContext* const qec)
 
 // _____________________________________________________________________________
 std::string QueryExecutionTree::getCacheKey() const {
-  return rootOperation_->getCacheKey();
+  return cacheKey_.value();
 }
 
 // _____________________________________________________________________________
diff --git a/src/engine/QueryExecutionTree.h b/src/engine/QueryExecutionTree.h
index 0eac785f16..c8bd4f5fc9 100644
--- a/src/engine/QueryExecutionTree.h
+++ b/src/engine/QueryExecutionTree.h
@@ -25,6 +25,8 @@ class QueryExecutionTree {
                      std::shared_ptr<Operation> operation)
       : QueryExecutionTree(qec) {
     rootOperation_ = std::move(operation);
+    resultWidth_ = rootOperation_->getResultWidth();
+    cacheKey_ = rootOperation_->getCacheKey();
     readFromCache();
   }
 
@@ -58,7 +60,7 @@ class QueryExecutionTree {
   std::optional<size_t> getVariableColumnOrNullopt(
       const Variable& variable) const;
 
-  size_t getResultWidth() const { return rootOperation_->getResultWidth(); }
+  size_t getResultWidth() const { return resultWidth_.value(); }
 
   std::shared_ptr<const Result> getResult(bool requestLaziness = false) const {
     return rootOperation_->getResult(
@@ -203,11 +205,25 @@ class QueryExecutionTree {
     s << tree.getRootOperation()->getDescriptor();
   }
 
+  bool supportsLimit() const { return getRootOperation()->supportsLimit(); }
+
+  // Set the value of the `LIMIT` clause that will be applied to the result of
+  // this operation.
+  void setLimit(const LimitOffsetClause& limitOffsetClause) {
+    getRootOperation()->setLimit(limitOffsetClause);
+    // Setting the limit invalidates the `cacheKey` as well as the
+    // `sizeEstimate`.
+    cacheKey_ = getRootOperation()->getCacheKey();
+    sizeEstimate_ = getRootOperation()->getSizeEstimate();
+  }
+
  private:
   QueryExecutionContext* qec_;  // No ownership
   std::shared_ptr<Operation> rootOperation_ =
       nullptr;  // Owned child. Will be deleted at deconstruction.
   std::optional<size_t> sizeEstimate_ = std::nullopt;
+  std::optional<std::string> cacheKey_ = std::nullopt;
+  std::optional<size_t> resultWidth_ = std::nullopt;
   bool isRoot_ = false;  // used to distinguish the root from child
                          // operations/subtrees when pinning only the result.
 
diff --git a/src/engine/QueryPlanner.cpp b/src/engine/QueryPlanner.cpp
index 9dd6b5599c..60ea4feeb6 100644
--- a/src/engine/QueryPlanner.cpp
+++ b/src/engine/QueryPlanner.cpp
@@ -185,7 +185,7 @@ std::vector<QueryPlanner::SubtreePlan> QueryPlanner::createExecutionTrees(
 
   for (auto& plan : lastRow) {
     if (plan._qet->getRootOperation()->supportsLimit()) {
-      plan._qet->getRootOperation()->setLimit(pq._limitOffset);
+      plan._qet->setLimit(pq._limitOffset);
     }
   }
 
@@ -1793,7 +1793,7 @@ size_t QueryPlanner::findSmallestExecutionTree(
 // _____________________________________________________________________________
 std::vector<QueryPlanner::SubtreePlan> QueryPlanner::createJoinCandidates(
     const SubtreePlan& ain, const SubtreePlan& bin,
-    std::optional<TripleGraph> tg) const {
+    boost::optional<const TripleGraph&> tg) const {
   bool swapForTesting = isInTestMode() && bin.type != SubtreePlan::OPTIONAL &&
                         ain._qet->getCacheKey() < bin._qet->getCacheKey();
   const auto& a = !swapForTesting ? ain : bin;
@@ -2261,7 +2261,7 @@ void QueryPlanner::GraphPatternPlanner::visitGroupOptionalOrMinus(
   // whether `b` is from an OPTIONAL or MINUS.
   for (const auto& a : candidatePlans_.at(0)) {
     for (const auto& b : candidates) {
-      auto vec = planner_.createJoinCandidates(a, b, std::nullopt);
+      auto vec = planner_.createJoinCandidates(a, b, boost::none);
       nextCandidates.insert(nextCandidates.end(),
                             std::make_move_iterator(vec.begin()),
                             std::make_move_iterator(vec.end()));
@@ -2572,7 +2572,7 @@ void QueryPlanner::GraphPatternPlanner::visitSubquery(
   ql::ranges::for_each(candidatesForSubquery, setSelectedVariables);
   // A subquery must also respect LIMIT and OFFSET clauses
   ql::ranges::for_each(candidatesForSubquery, [&](SubtreePlan& plan) {
-    plan._qet->getRootOperation()->setLimit(arg.get()._limitOffset);
+    plan._qet->setLimit(arg.get()._limitOffset);
   });
   visitGroupOptionalOrMinus(std::move(candidatesForSubquery));
 }
diff --git a/src/engine/QueryPlanner.h b/src/engine/QueryPlanner.h
index 52ee540a0a..201ad7e3c5 100644
--- a/src/engine/QueryPlanner.h
+++ b/src/engine/QueryPlanner.h
@@ -6,6 +6,7 @@
 
 #pragma once
 
+#include <boost/optional.hpp>
 #include <vector>
 
 #include "engine/CheckUsePatternTrick.h"
@@ -324,7 +325,7 @@ class QueryPlanner {
 
   [[nodiscard]] std::vector<QueryPlanner::SubtreePlan> createJoinCandidates(
       const SubtreePlan& a, const SubtreePlan& b,
-      std::optional<TripleGraph> tg) const;
+      boost::optional<const TripleGraph&> tg) const;
 
   // Used internally by `createJoinCandidates`. If `a` or `b` is a transitive
   // path operation and the other input can be bound to this transitive path
diff --git a/src/parser/data/Variable.cpp b/src/parser/data/Variable.cpp
index 00371c3537..bf6a774008 100644
--- a/src/parser/data/Variable.cpp
+++ b/src/parser/data/Variable.cpp
@@ -14,7 +14,7 @@
 
 // ___________________________________________________________________________
 Variable::Variable(std::string name, bool checkName) : _name{std::move(name)} {
-  if (checkName) {
+  if (checkName && ad_utility::areExpensiveChecksEnabled) {
     AD_CONTRACT_CHECK(isValidVariableName(_name), [this]() {
       return absl::StrCat("\"", _name, "\" is not a valid SPARQL variable");
     });
diff --git a/test/SparqlDataTypesTest.cpp b/test/SparqlDataTypesTest.cpp
index a84e39f8d4..57a0ec1e85 100644
--- a/test/SparqlDataTypesTest.cpp
+++ b/test/SparqlDataTypesTest.cpp
@@ -198,11 +198,16 @@ TEST(SparqlDataTypesTest, VariableNormalizesDollarSign) {
 }
 
 TEST(SparqlDataTypesTest, VariableInvalidNamesThrowException) {
-  EXPECT_THROW(Variable{"no_leading_var_or_dollar"}, ad_utility::Exception);
-  EXPECT_THROW(Variable{""}, ad_utility::Exception);
-  EXPECT_THROW(Variable{"? var with space"}, ad_utility::Exception);
-  EXPECT_THROW(Variable{"?"}, ad_utility::Exception);
-  EXPECT_THROW(Variable{"$"}, ad_utility::Exception);
+  if constexpr (!ad_utility::areExpensiveChecksEnabled) {
+    GTEST_SKIP()
+        << "validity of variable names is only checked with expensive checks";
+  }
+  EXPECT_THROW(Variable("no_leading_var_or_dollar", true),
+               ad_utility::Exception);
+  EXPECT_THROW(Variable("", true), ad_utility::Exception);
+  EXPECT_THROW(Variable("? var with space", true), ad_utility::Exception);
+  EXPECT_THROW(Variable("?", true), ad_utility::Exception);
+  EXPECT_THROW(Variable("$", true), ad_utility::Exception);
 }
 
 TEST(SparqlDataTypesTest, VariableEvaluatesCorrectlyBasedOnContext) {
diff --git a/test/parser/data/VariableTest.cpp b/test/parser/data/VariableTest.cpp
index 38e16743ff..a3cf9812ae 100644
--- a/test/parser/data/VariableTest.cpp
+++ b/test/parser/data/VariableTest.cpp
@@ -9,14 +9,18 @@
 
 // _____________________________________________________________________________
 TEST(Variable, legalAndIllegalNames) {
-  EXPECT_NO_THROW(Variable("?x"));
-  EXPECT_NO_THROW(Variable("$x"));
-  EXPECT_NO_THROW(Variable("?ql_matching_word_thür"));
+  if constexpr (!ad_utility::areExpensiveChecksEnabled) {
+    GTEST_SKIP()
+        << "legality of variable names is only checked with expensive checks";
+  }
+  EXPECT_NO_THROW(Variable("?x", true));
+  EXPECT_NO_THROW(Variable("$x", true));
+  EXPECT_NO_THROW(Variable("?ql_matching_word_thür", true));
 
   // No leading ? or $
   auto matcher = ::testing::HasSubstr("not a valid SPARQL variable");
-  AD_EXPECT_THROW_WITH_MESSAGE(Variable("x"), matcher);
-  AD_EXPECT_THROW_WITH_MESSAGE(Variable("?x spaceInVar"), matcher);
+  AD_EXPECT_THROW_WITH_MESSAGE(Variable("x", true), matcher);
+  AD_EXPECT_THROW_WITH_MESSAGE(Variable("?x spaceInVar", true), matcher);
 }
 
 // _____________________________________________________________________________