Skip to content

Commit

Permalink
Merge branch 'master' into disable-websocket-updates
Browse files Browse the repository at this point in the history
  • Loading branch information
hannahbast authored Jan 23, 2025
2 parents 46dedd3 + ff47922 commit 517c7ef
Show file tree
Hide file tree
Showing 12 changed files with 128 additions and 46 deletions.
9 changes: 5 additions & 4 deletions src/engine/CartesianProductJoin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -255,13 +255,14 @@ CartesianProductJoin::calculateSubResults(bool requestLaziness) {
auto children = childView();
AD_CORRECTNESS_CHECK(!ql::ranges::empty(children));
// Get all child results (possibly with limit, see above).
for (Operation& child : children) {
if (limitIfPresent.has_value() && child.supportsLimit()) {
child.setLimit(limitIfPresent.value());
for (std::shared_ptr<QueryExecutionTree>& childTree : children_) {
if (limitIfPresent.has_value() && childTree->supportsLimit()) {
childTree->setLimit(limitIfPresent.value());
}
auto& child = *childTree->getRootOperation();
// To preserve order of the columns we can only consume the first child
// lazily. In the future this restriction may be lifted by permutating the
// columns afterwards.
// columns afterward.
bool isLast = &child == &children.back();
bool requestLazy = requestLaziness && isLast;
auto result = child.getResult(
Expand Down
2 changes: 1 addition & 1 deletion src/engine/QueryExecutionTree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ QueryExecutionTree::QueryExecutionTree(QueryExecutionContext* const qec)

// _____________________________________________________________________________
std::string QueryExecutionTree::getCacheKey() const {
return rootOperation_->getCacheKey();
return cacheKey_.value();
}

// _____________________________________________________________________________
Expand Down
18 changes: 17 additions & 1 deletion src/engine/QueryExecutionTree.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ class QueryExecutionTree {
std::shared_ptr<Operation> operation)
: QueryExecutionTree(qec) {
rootOperation_ = std::move(operation);
resultWidth_ = rootOperation_->getResultWidth();
cacheKey_ = rootOperation_->getCacheKey();
readFromCache();
}

Expand Down Expand Up @@ -58,7 +60,7 @@ class QueryExecutionTree {
std::optional<size_t> getVariableColumnOrNullopt(
const Variable& variable) const;

size_t getResultWidth() const { return rootOperation_->getResultWidth(); }
size_t getResultWidth() const { return resultWidth_.value(); }

std::shared_ptr<const Result> getResult(bool requestLaziness = false) const {
return rootOperation_->getResult(
Expand Down Expand Up @@ -203,11 +205,25 @@ class QueryExecutionTree {
s << tree.getRootOperation()->getDescriptor();
}

bool supportsLimit() const { return getRootOperation()->supportsLimit(); }

// Set the value of the `LIMIT` clause that will be applied to the result of
// this operation.
void setLimit(const LimitOffsetClause& limitOffsetClause) {
getRootOperation()->setLimit(limitOffsetClause);
// Setting the limit invalidates the `cacheKey` as well as the
// `sizeEstimate`.
cacheKey_ = getRootOperation()->getCacheKey();
sizeEstimate_ = getRootOperation()->getSizeEstimate();
}

private:
QueryExecutionContext* qec_; // No ownership
std::shared_ptr<Operation> rootOperation_ =
nullptr; // Owned child. Will be deleted at deconstruction.
std::optional<size_t> sizeEstimate_ = std::nullopt;
std::optional<std::string> cacheKey_ = std::nullopt;
std::optional<size_t> resultWidth_ = std::nullopt;
bool isRoot_ = false; // used to distinguish the root from child
// operations/subtrees when pinning only the result.

Expand Down
8 changes: 4 additions & 4 deletions src/engine/QueryPlanner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,7 @@ std::vector<QueryPlanner::SubtreePlan> QueryPlanner::createExecutionTrees(

for (auto& plan : lastRow) {
if (plan._qet->getRootOperation()->supportsLimit()) {
plan._qet->getRootOperation()->setLimit(pq._limitOffset);
plan._qet->setLimit(pq._limitOffset);
}
}

Expand Down Expand Up @@ -1793,7 +1793,7 @@ size_t QueryPlanner::findSmallestExecutionTree(
// _____________________________________________________________________________
std::vector<QueryPlanner::SubtreePlan> QueryPlanner::createJoinCandidates(
const SubtreePlan& ain, const SubtreePlan& bin,
std::optional<TripleGraph> tg) const {
boost::optional<const TripleGraph&> tg) const {
bool swapForTesting = isInTestMode() && bin.type != SubtreePlan::OPTIONAL &&
ain._qet->getCacheKey() < bin._qet->getCacheKey();
const auto& a = !swapForTesting ? ain : bin;
Expand Down Expand Up @@ -2261,7 +2261,7 @@ void QueryPlanner::GraphPatternPlanner::visitGroupOptionalOrMinus(
// whether `b` is from an OPTIONAL or MINUS.
for (const auto& a : candidatePlans_.at(0)) {
for (const auto& b : candidates) {
auto vec = planner_.createJoinCandidates(a, b, std::nullopt);
auto vec = planner_.createJoinCandidates(a, b, boost::none);
nextCandidates.insert(nextCandidates.end(),
std::make_move_iterator(vec.begin()),
std::make_move_iterator(vec.end()));
Expand Down Expand Up @@ -2572,7 +2572,7 @@ void QueryPlanner::GraphPatternPlanner::visitSubquery(
ql::ranges::for_each(candidatesForSubquery, setSelectedVariables);
// A subquery must also respect LIMIT and OFFSET clauses
ql::ranges::for_each(candidatesForSubquery, [&](SubtreePlan& plan) {
plan._qet->getRootOperation()->setLimit(arg.get()._limitOffset);
plan._qet->setLimit(arg.get()._limitOffset);
});
visitGroupOptionalOrMinus(std::move(candidatesForSubquery));
}
Expand Down
3 changes: 2 additions & 1 deletion src/engine/QueryPlanner.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

#pragma once

#include <boost/optional.hpp>
#include <vector>

#include "engine/CheckUsePatternTrick.h"
Expand Down Expand Up @@ -324,7 +325,7 @@ class QueryPlanner {

[[nodiscard]] std::vector<QueryPlanner::SubtreePlan> createJoinCandidates(
const SubtreePlan& a, const SubtreePlan& b,
std::optional<TripleGraph> tg) const;
boost::optional<const TripleGraph&> tg) const;

// Used internally by `createJoinCandidates`. If `a` or `b` is a transitive
// path operation and the other input can be bound to this transitive path
Expand Down
4 changes: 4 additions & 0 deletions src/global/RuntimeParameters.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,10 @@ inline auto& RuntimeParameters() {
// does cause significant overhead for this case.
MemorySizeParameter<"lazy-result-max-cache-size">{5_MB},
Bool<"websocket-updates-enabled">{true},
// When the result of an index scan is smaller than a single block, then
// its size estimate will be the size of the block divided by this
// value.
SizeT<"small-index-scan-size-estimate-divisor">{5},
};
}();
return params;
Expand Down
82 changes: 66 additions & 16 deletions src/index/CompressedRelation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,35 @@ static auto getBeginAndEnd(auto& range) {
return std::pair{ql::ranges::begin(range), ql::ranges::end(range)};
}

// Return true iff the `triple` is contained in the `scanSpec`. For example, the
// triple ` 42 0 3 ` is contained in the specs `U U U`, `42 U U` and `42 0 U` ,
// but not in `42 2 U` where `U` means "scan for all possible values".
static auto isTripleInSpecification =
[](const ScanSpecification& scanSpec,
const CompressedBlockMetadata::PermutedTriple& triple) {
enum struct M { GuaranteedMatch, Mismatch, MustCheckNextElement };
auto checkElement = [](const auto& optId, Id id) {
if (!optId.has_value()) {
return M::GuaranteedMatch;
} else if (optId.value() != id) {
return M::Mismatch;
} else {
return M::MustCheckNextElement;
}
};
auto result = checkElement(scanSpec.col0Id(), triple.col0Id_);
if (result == M::MustCheckNextElement) {
result = checkElement(scanSpec.col1Id(), triple.col1Id_);
}
if (result == M::MustCheckNextElement) {
result = checkElement(scanSpec.col2Id(), triple.col2Id_);
}
// The case `result == M::MustCheckNextElement` can happen in the unlikely
// case that there only is a single triple in the block, which is scanned
// for explicitly.
return result != M::Mismatch;
};

// modify the `block` according to the `limitOffset`. Also modify the
// `limitOffset` to reflect the parts of the LIMIT and OFFSET that have been
// performed by pruning this `block`.
Expand Down Expand Up @@ -631,32 +660,53 @@ std::pair<size_t, size_t> CompressedRelationReader::getResultSizeImpl(
// a part of these blocks is actually part of the result,
// set up a lambda which allows us to read these blocks, and returns
// the size of the result.
size_t numResults = 0;
// Determine the total size of the result.
// First accumulate the complete blocks in the "middle"
std::size_t inserted = 0;
std::size_t deleted = 0;

auto readSizeOfPossiblyIncompleteBlock = [&](const auto& block) {
return readPossiblyIncompleteBlock(scanSpec, config, block, std::nullopt,
locatedTriplesPerBlock)
.numRows();
if (exactSize) {
numResults +=
readPossiblyIncompleteBlock(scanSpec, config, block, std::nullopt,
locatedTriplesPerBlock)
.numRows();
} else {
// If the first and last triple of the block match, then we know that the
// whole block belongs to the result.
bool isComplete = isTripleInSpecification(scanSpec, block.firstTriple_) &&
isTripleInSpecification(scanSpec, block.lastTriple_);
size_t divisor =
isComplete ? 1
: RuntimeParameters()
.get<"small-index-scan-size-estimate-divisor">();
const auto [ins, del] =
locatedTriplesPerBlock.numTriples(block.blockIndex_);
auto trunc = [divisor](size_t num) {
return std::max(std::min(num, 1ul), num / divisor);
};
inserted += trunc(ins);
deleted += trunc(del);
numResults += trunc(block.numRows_);
}
};

size_t numResults = 0;
// The first and the last block might be incomplete, compute
// and store the partial results from them.
if (beginBlock < endBlock) {
numResults += readSizeOfPossiblyIncompleteBlock(*beginBlock);
readSizeOfPossiblyIncompleteBlock(*beginBlock);
++beginBlock;
}
if (beginBlock < endBlock) {
numResults += readSizeOfPossiblyIncompleteBlock(*(endBlock - 1));
readSizeOfPossiblyIncompleteBlock(*(endBlock - 1));
--endBlock;
}

if (beginBlock == endBlock) {
return {numResults, numResults};
}

// Determine the total size of the result.
// First accumulate the complete blocks in the "middle"
std::size_t inserted = 0;
std::size_t deleted = 0;
ql::ranges::for_each(
ql::ranges::subrange{beginBlock, endBlock}, [&](const auto& block) {
const auto [ins, del] =
Expand All @@ -666,8 +716,8 @@ std::pair<size_t, size_t> CompressedRelationReader::getResultSizeImpl(
deleted += del;
numResults += block.numRows_;
} else {
// TODO<joka921> We could cache the exact size as soon as we have
// merged the block once since the last update.
// TODO<joka921> We could cache the exact size as soon as we
// have merged the block once since the last update.
auto b = readAndDecompressBlock(block, config);
numResults += b.has_value() ? b.value().block_.numRows() : 0u;
}
Expand Down Expand Up @@ -1366,10 +1416,10 @@ auto CompressedRelationWriter::createPermutationPair(
// relation as its overhead is far too high for small relations.
relation.swapColumns(c1Idx, c2Idx);

// We only need to sort by the columns of the triple + the graph column,
// not the additional payload. Note: We could also use
// `compareWithoutLocalVocab` to compare the IDs cheaper, but this sort
// is far from being a performance bottleneck.
// We only need to sort by the columns of the triple + the graph
// column, not the additional payload. Note: We could also use
// `compareWithoutLocalVocab` to compare the IDs cheaper, but this
// sort is far from being a performance bottleneck.
auto compare = [](const auto& a, const auto& b) {
return std::tie(a[0], a[1], a[2], a[3]) <
std::tie(b[0], b[1], b[2], b[3]);
Expand Down
2 changes: 1 addition & 1 deletion src/parser/data/Variable.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

// ___________________________________________________________________________
Variable::Variable(std::string name, bool checkName) : _name{std::move(name)} {
if (checkName) {
if (checkName && ad_utility::areExpensiveChecksEnabled) {
AD_CONTRACT_CHECK(isValidVariableName(_name), [this]() {
return absl::StrCat("\"", _name, "\" is not a valid SPARQL variable");
});
Expand Down
15 changes: 10 additions & 5 deletions test/SparqlDataTypesTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -198,11 +198,16 @@ TEST(SparqlDataTypesTest, VariableNormalizesDollarSign) {
}

TEST(SparqlDataTypesTest, VariableInvalidNamesThrowException) {
EXPECT_THROW(Variable{"no_leading_var_or_dollar"}, ad_utility::Exception);
EXPECT_THROW(Variable{""}, ad_utility::Exception);
EXPECT_THROW(Variable{"? var with space"}, ad_utility::Exception);
EXPECT_THROW(Variable{"?"}, ad_utility::Exception);
EXPECT_THROW(Variable{"$"}, ad_utility::Exception);
if constexpr (!ad_utility::areExpensiveChecksEnabled) {
GTEST_SKIP()
<< "validity of variable names is only checked with expensive checks";
}
EXPECT_THROW(Variable("no_leading_var_or_dollar", true),
ad_utility::Exception);
EXPECT_THROW(Variable("", true), ad_utility::Exception);
EXPECT_THROW(Variable("? var with space", true), ad_utility::Exception);
EXPECT_THROW(Variable("?", true), ad_utility::Exception);
EXPECT_THROW(Variable("$", true), ad_utility::Exception);
}

TEST(SparqlDataTypesTest, VariableEvaluatesCorrectlyBasedOnContext) {
Expand Down
3 changes: 2 additions & 1 deletion test/engine/IndexScanTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -498,7 +498,8 @@ TEST(IndexScan, getResultSizeOfScan) {
SparqlTripleSimple scanTriple{I::fromIriref("<x2>"), I::fromIriref("<p>"),
I::fromIriref("<s1>")};
IndexScan scan{qec, Permutation::Enum::POS, scanTriple};
EXPECT_EQ(scan.getSizeEstimate(), 0);
EXPECT_EQ(scan.getSizeEstimate(), 1);
EXPECT_EQ(scan.getExactSize(), 0);
}
{
SparqlTripleSimple scanTriple{I::fromIriref("<x>"), I::fromIriref("<p>"),
Expand Down
14 changes: 7 additions & 7 deletions test/engine/SpatialJoinTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -926,22 +926,22 @@ class SpatialJoinMultiplicityAndSizeEstimateTest
spatialJoin = static_cast<SpatialJoin*>(spJoin2.get());
auto varColsMap = spatialJoin->getExternallyVisibleVariableColumns();

assertMultiplicity(subj1.getVariable(), 9.8, spatialJoin, varColsMap);
assertMultiplicity(obj1.getVariable(), 7.0, spatialJoin, varColsMap);
assertMultiplicity(subj2.getVariable(), 9.8, spatialJoin, varColsMap);
assertMultiplicity(obj2.getVariable(), 7.0, spatialJoin, varColsMap);
assertMultiplicity(subj1.getVariable(), 4.2, spatialJoin, varColsMap);
assertMultiplicity(obj1.getVariable(), 3.0, spatialJoin, varColsMap);
assertMultiplicity(subj2.getVariable(), 4.2, spatialJoin, varColsMap);
assertMultiplicity(obj2.getVariable(), 3.0, spatialJoin, varColsMap);
ASSERT_TRUE(
spatialJoin->onlyForTestingGetDistanceVariable().has_value());
assertMultiplicity(Variable{"?distanceForTesting"}, 1, spatialJoin,
varColsMap);
} else {
ASSERT_EQ(leftChild->getSizeEstimate(), 7);
ASSERT_EQ(rightChild->getSizeEstimate(), 7);
auto leftEstimate = leftChild->getSizeEstimate();
auto rightEstimate = rightChild->getSizeEstimate();
auto spJoin1 = spatialJoin->addChild(firstChild, firstVariable);
spatialJoin = static_cast<SpatialJoin*>(spJoin1.get());
auto spJoin2 = spatialJoin->addChild(secondChild, secondVariable);
spatialJoin = static_cast<SpatialJoin*>(spJoin2.get());
ASSERT_LE(spatialJoin->getSizeEstimate(), 49);
ASSERT_LE(spatialJoin->getSizeEstimate(), leftEstimate * rightEstimate);
}
}
}
Expand Down
14 changes: 9 additions & 5 deletions test/parser/data/VariableTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,18 @@

// _____________________________________________________________________________
TEST(Variable, legalAndIllegalNames) {
EXPECT_NO_THROW(Variable("?x"));
EXPECT_NO_THROW(Variable("$x"));
EXPECT_NO_THROW(Variable("?ql_matching_word_thür"));
if constexpr (!ad_utility::areExpensiveChecksEnabled) {
GTEST_SKIP()
<< "legality of variable names is only checked with expensive checks";
}
EXPECT_NO_THROW(Variable("?x", true));
EXPECT_NO_THROW(Variable("$x", true));
EXPECT_NO_THROW(Variable("?ql_matching_word_thür", true));

// No leading ? or $
auto matcher = ::testing::HasSubstr("not a valid SPARQL variable");
AD_EXPECT_THROW_WITH_MESSAGE(Variable("x"), matcher);
AD_EXPECT_THROW_WITH_MESSAGE(Variable("?x spaceInVar"), matcher);
AD_EXPECT_THROW_WITH_MESSAGE(Variable("x", true), matcher);
AD_EXPECT_THROW_WITH_MESSAGE(Variable("?x spaceInVar", true), matcher);
}

// _____________________________________________________________________________
Expand Down

0 comments on commit 517c7ef

Please sign in to comment.