diff --git a/src/engine/Bind.cpp b/src/engine/Bind.cpp index bdccf1448..276f04e9f 100644 --- a/src/engine/Bind.cpp +++ b/src/engine/Bind.cpp @@ -5,7 +5,7 @@ #include "Bind.h" #include "engine/CallFixedSize.h" -#include "engine/ExistsScan.h" +#include "engine/ExistsJoin.h" #include "engine/QueryExecutionTree.h" #include "engine/sparqlExpressions/SparqlExpression.h" #include "engine/sparqlExpressions/SparqlExpressionGenerators.h" @@ -16,7 +16,7 @@ Bind::Bind(QueryExecutionContext* qec, std::shared_ptr subtree, parsedQuery::Bind b) : Operation(qec), _subtree(std::move(subtree)), _bind(std::move(b)) { - _subtree = ExistsScan::addExistsScansToSubtree( + _subtree = ExistsJoin::addExistsScansToSubtree( _bind._expression, std::move(_subtree), getExecutionContext(), cancellationHandle_); } diff --git a/src/engine/CMakeLists.txt b/src/engine/CMakeLists.txt index c724a8fb3..a3750a07e 100644 --- a/src/engine/CMakeLists.txt +++ b/src/engine/CMakeLists.txt @@ -14,5 +14,5 @@ add_library(engine CartesianProductJoin.cpp TextIndexScanForWord.cpp TextIndexScanForEntity.cpp TextLimit.cpp LazyGroupBy.cpp GroupByHashMapOptimization.cpp SpatialJoin.cpp CountConnectedSubgraphs.cpp SpatialJoinAlgorithms.cpp PathSearch.cpp ExecuteUpdate.cpp - Describe.cpp ExistsScan.cpp) + Describe.cpp ExistsJoin.cpp) qlever_target_link_libraries(engine util index parser sparqlExpressions http SortPerformanceEstimator Boost::iostreams s2) diff --git a/src/engine/ExistsScan.cpp b/src/engine/ExistsJoin.cpp similarity index 90% rename from src/engine/ExistsScan.cpp rename to src/engine/ExistsJoin.cpp index c416d1dc4..d8d3f564d 100644 --- a/src/engine/ExistsScan.cpp +++ b/src/engine/ExistsJoin.cpp @@ -2,7 +2,7 @@ // Chair of Algorithms and Data Structures. // Author: Johannes Kalmbach -#include "engine/ExistsScan.h" +#include "engine/ExistsJoin.h" #include "engine/QueryPlanner.h" #include "engine/sparqlExpressions/ExistsExpression.h" @@ -10,7 +10,7 @@ #include "util/JoinAlgorithms/JoinAlgorithms.h" // _____________________________________________________________________________ -ExistsScan::ExistsScan(QueryExecutionContext* qec, +ExistsJoin::ExistsJoin(QueryExecutionContext* qec, std::shared_ptr left, std::shared_ptr right, Variable existsVariable) @@ -24,16 +24,16 @@ ExistsScan::ExistsScan(QueryExecutionContext* qec, } // _____________________________________________________________________________ -string ExistsScan::getCacheKeyImpl() const { +string ExistsJoin::getCacheKeyImpl() const { return absl::StrCat("EXISTS SCAN left: ", left_->getCacheKey(), " right: ", right_->getCacheKey()); } // _____________________________________________________________________________ -string ExistsScan::getDescriptor() const { return "EXISTS scan"; } +string ExistsJoin::getDescriptor() const { return "EXISTS scan"; } // ____________________________________________________________________________ -VariableToColumnMap ExistsScan::computeVariableToColumnMap() const { +VariableToColumnMap ExistsJoin::computeVariableToColumnMap() const { auto res = left_->getVariableColumns(); AD_CONTRACT_CHECK( !res.contains(existsVariable_), @@ -43,18 +43,18 @@ VariableToColumnMap ExistsScan::computeVariableToColumnMap() const { } // ____________________________________________________________________________ -size_t ExistsScan::getResultWidth() const { +size_t ExistsJoin::getResultWidth() const { // We add one column to the input. return left_->getResultWidth() + 1; } // ____________________________________________________________________________ -vector ExistsScan::resultSortedOn() const { +vector ExistsJoin::resultSortedOn() const { return left_->resultSortedOn(); } // ____________________________________________________________________________ -float ExistsScan::getMultiplicity(size_t col) { +float ExistsJoin::getMultiplicity(size_t col) { if (col < getResultWidth() - 1) { return left_->getMultiplicity(col); } @@ -64,18 +64,18 @@ float ExistsScan::getMultiplicity(size_t col) { } // ____________________________________________________________________________ -uint64_t ExistsScan::getSizeEstimateBeforeLimit() { +uint64_t ExistsJoin::getSizeEstimateBeforeLimit() { return left_->getSizeEstimate(); } // ____________________________________________________________________________ -size_t ExistsScan::getCostEstimate() { +size_t ExistsJoin::getCostEstimate() { return left_->getCostEstimate() + right_->getCostEstimate() + left_->getSizeEstimate() + right_->getSizeEstimate(); } // ____________________________________________________________________________ -ProtoResult ExistsScan::computeResult([[maybe_unused]] bool requestLaziness) { +ProtoResult ExistsJoin::computeResult([[maybe_unused]] bool requestLaziness) { auto leftRes = left_->getResult(); auto rightRes = right_->getResult(); const auto& left = leftRes->idTable(); @@ -139,7 +139,7 @@ ProtoResult ExistsScan::computeResult([[maybe_unused]] bool requestLaziness) { } // _____________________________________________________________________________ -std::shared_ptr ExistsScan::addExistsScansToSubtree( +std::shared_ptr ExistsJoin::addExistsScansToSubtree( const sparqlExpression::SparqlExpressionPimpl& expression, std::shared_ptr subtree, QueryExecutionContext* qec, const ad_utility::SharedCancellationHandle& cancellationHandle) { @@ -158,7 +158,7 @@ std::shared_ptr ExistsScan::addExistsScansToSubtree( auto pq = exists.argument(); auto tree = std::make_shared(qp.createExecutionTree(pq)); - subtree = ad_utility::makeExecutionTree( + subtree = ad_utility::makeExecutionTree( qec, std::move(subtree), std::move(tree), exists.variable()); } return subtree; diff --git a/src/engine/ExistsScan.h b/src/engine/ExistsJoin.h similarity index 95% rename from src/engine/ExistsScan.h rename to src/engine/ExistsJoin.h index dbd947d30..9b9c7483c 100644 --- a/src/engine/ExistsScan.h +++ b/src/engine/ExistsJoin.h @@ -7,7 +7,7 @@ #include "engine/Operation.h" #include "engine/QueryExecutionTree.h" -class ExistsScan : public Operation { +class ExistsJoin : public Operation { private: std::shared_ptr left_; std::shared_ptr right_; @@ -19,7 +19,7 @@ class ExistsScan : public Operation { std::vector> _matchedColumns; public: - ExistsScan(QueryExecutionContext* qec, + ExistsJoin(QueryExecutionContext* qec, std::shared_ptr left, std::shared_ptr right, Variable existsVariable); diff --git a/src/engine/Filter.cpp b/src/engine/Filter.cpp index ff8edc1fc..9da7c1272 100644 --- a/src/engine/Filter.cpp +++ b/src/engine/Filter.cpp @@ -10,7 +10,7 @@ #include "backports/algorithm.h" #include "engine/CallFixedSize.h" -#include "engine/ExistsScan.h" +#include "engine/ExistsJoin.h" #include "engine/QueryExecutionTree.h" #include "engine/QueryPlanner.h" #include "engine/sparqlExpressions/SparqlExpression.h" @@ -31,7 +31,7 @@ Filter::Filter(QueryExecutionContext* qec, : Operation(qec), _subtree(std::move(subtree)), _expression{std::move(expression)} { - _subtree = ExistsScan::addExistsScansToSubtree( + _subtree = ExistsJoin::addExistsScansToSubtree( _expression, std::move(_subtree), getExecutionContext(), cancellationHandle_); setPrefilterExpressionForChildren(); diff --git a/src/engine/GroupBy.cpp b/src/engine/GroupBy.cpp index cfa862170..3e8af1cb2 100644 --- a/src/engine/GroupBy.cpp +++ b/src/engine/GroupBy.cpp @@ -9,7 +9,7 @@ #include #include "engine/CallFixedSize.h" -#include "engine/ExistsScan.h" +#include "engine/ExistsJoin.h" #include "engine/IndexScan.h" #include "engine/Join.h" #include "engine/LazyGroupBy.h" @@ -55,7 +55,7 @@ GroupBy::GroupBy(QueryExecutionContext* qec, vector groupByVariables, auto sortColumns = computeSortColumns(subtree.get()); for (const auto& alias : _aliases) { - subtree = ExistsScan::addExistsScansToSubtree( + subtree = ExistsJoin::addExistsScansToSubtree( alias._expression, std::move(subtree), getExecutionContext(), cancellationHandle_); } diff --git a/src/util/JoinAlgorithms/FindUndefRanges.h b/src/util/JoinAlgorithms/FindUndefRanges.h index bf15685f3..6313bea88 100644 --- a/src/util/JoinAlgorithms/FindUndefRanges.h +++ b/src/util/JoinAlgorithms/FindUndefRanges.h @@ -196,35 +196,4 @@ struct FindSmallerUndefRanges { } }; constexpr FindSmallerUndefRanges findSmallerUndefRanges; -/* -template -auto findSmallerUndefRanges(const auto& row, It begin, It end, - bool& resultMightBeUnsorted) - -> cppcoro::generator { - size_t numLastUndefined = 0; - assert(row.size() > 0); - auto it = ql::ranges::rbegin(row); - auto rend = ql::ranges::rend(row); - for (; it < rend; ++it) { - if (*it != Id::makeUndefined()) { - break; - } - ++numLastUndefined; - } - - for (; it < rend; ++it) { - if (*it == Id::makeUndefined()) { - return findSmallerUndefRangesArbitrary(row, begin, end, - resultMightBeUnsorted); - } - } - if (numLastUndefined == 0) { - return findSmallerUndefRangesForRowsWithoutUndef(row, begin, end, - resultMightBeUnsorted); - } else { - return findSmallerUndefRangesForRowsWithUndefInLastColumns( - row, numLastUndefined, begin, end, resultMightBeUnsorted); - } -} -*/ } // namespace ad_utility diff --git a/test/QueryPlannerTest.cpp b/test/QueryPlannerTest.cpp index 90462f3cc..c7d806319 100644 --- a/test/QueryPlannerTest.cpp +++ b/test/QueryPlannerTest.cpp @@ -2906,10 +2906,21 @@ TEST(QueryPlanner, Describe) { } // ____________________________________________________________________________ -TEST(QueryPlanner, GroupByRedundanteParensAndVariables) { +TEST(QueryPlanner, GroupByRedundantParensAndVariables) { auto matcher = h::GroupBy({Variable{"?x"}}, {}, h::IndexScanFromStrings("?x", "?y", "?z")); h::expect("SELECT ?x { ?x ?y ?z} GROUP BY (?x)", matcher); h::expect("SELECT ?x { ?x ?y ?z} GROUP BY ?x ?x", matcher); h::expect("SELECT ?x { ?x ?y ?z} GROUP BY ?x ?x (?x)", matcher); } + +// ____________________________________________________________________________ +TEST(QueryPlanner, Exists) { + auto xyz = h::IndexScanFromStrings("?x", "?y", "?z"); + auto a = h::IndexScanFromStrings("?x", "?y", "?z"); + h::expect( + "SELECT * { ?x ?y ?z FILTER EXISTS {?a ?b ?c}}", + h::Filter("EXISTS {?a ?b ?c}", + h::ExistsJoin(h::IndexScanFromStrings("?x", "?y", "?z"), + h::IndexScanFromStrings("?a", "?b", "?c")))); +} diff --git a/test/QueryPlannerTestHelpers.h b/test/QueryPlannerTestHelpers.h index c300bf0d5..f53f30c5b 100644 --- a/test/QueryPlannerTestHelpers.h +++ b/test/QueryPlannerTestHelpers.h @@ -15,6 +15,7 @@ #include "engine/CartesianProductJoin.h" #include "engine/CountAvailablePredicates.h" #include "engine/Describe.h" +#include "engine/ExistsJoin.h" #include "engine/Filter.h" #include "engine/GroupBy.h" #include "engine/IndexScan.h" @@ -405,6 +406,12 @@ inline QetMatcher Describe( AD_PROPERTY(::Describe, getDescribe, describeMatcher))); } +// Match an `ExistsJoin` +inline QetMatcher ExistsJoin(const QetMatcher& leftChild, + const QetMatcher& rightChild) { + return RootOperation<::ExistsJoin>(AllOf(children(leftChild, rightChild))); +} + // inline QetMatcher QetWithWarnings( const std::vector& warningSubstrings, diff --git a/test/SparqlAntlrParserTest.cpp b/test/SparqlAntlrParserTest.cpp index 0803f96f0..f5a65169b 100644 --- a/test/SparqlAntlrParserTest.cpp +++ b/test/SparqlAntlrParserTest.cpp @@ -4,6 +4,7 @@ // Julian Mundhahs // Hannah Bast +#include #include #include @@ -14,6 +15,7 @@ #include "./SparqlExpressionTestHelpers.h" #include "./util/GTestHelpers.h" #include "./util/TripleComponentTestHelpers.h" +#include "QueryPlannerTestHelpers.h" #include "SparqlAntlrParserTestHelpers.h" #include "engine/sparqlExpressions/CountStarExpression.h" #include "engine/sparqlExpressions/GroupConcatExpression.h" @@ -1860,6 +1862,43 @@ TEST(SparqlParser, binaryStringExpressions) { expectBuiltInCall("STRBEFORE(?x, ?y)", makeMatcher(&makeStrBeforeExpression)); } +// Matchers for EXISTS and NOT EXISTS functions. +namespace existsTestHelpers { +using namespace sparqlExpression; +using namespace ::testing; + +// Match an EXISTS function +auto existsMatcher(Matcher pattern) { + return Pointee(WhenDynamicCastTo( + AD_PROPERTY(ExistsExpression, argument, pattern))); +} +// Match a NOT EXISTS function +auto notExistsMatcher(Matcher pattern) { + return builtInCallTestHelpers::matchNaryWithChildrenMatchers( + &makeUnaryNegateExpression, existsMatcher(pattern)); +} +} // namespace existsTestHelpers + +// _____________________________________________________________________________ +TEST(SparqlParser, Exists) { + using namespace existsTestHelpers; + auto expectBuiltInCall = ExpectCompleteParse<&Parser::builtInCall>{}; + // A matcher that matches the query `SELECT * { ?x ?foo}`, where the + // FROM and FROM NAMED clauses can still be specified via arguments. + using Graphs = ScanSpecificationAsTripleComponent::Graphs; + auto selectABarFooMatcher = [](Graphs defaultGraphs = std::nullopt, + Graphs namedGraphs = std::nullopt) { + return testing::AllOf(m::SelectQuery( + m::AsteriskSelect(), + m::GraphPattern(m::Triples({{Var{"?a"}, "", Var{"?foo"}}})), + defaultGraphs, namedGraphs)); + }; + expectBuiltInCall("EXISTS {?a ?foo}", + existsMatcher(selectABarFooMatcher())); + expectBuiltInCall("NOT EXISTS {?a ?foo}", + notExistsMatcher(selectABarFooMatcher())); +} + namespace aggregateTestHelpers { using namespace sparqlExpression;