Skip to content

Commit

Permalink
Minor refactorings for the Join class (#1560)
Browse files Browse the repository at this point in the history
These include removing dead code and unused arguments, making functions `const` or even `static`, and applying similar minor non-functional refactorings.
  • Loading branch information
RobinTF authored Oct 18, 2024
1 parent ef057ac commit 264919e
Show file tree
Hide file tree
Showing 8 changed files with 31 additions and 54 deletions.
1 change: 1 addition & 0 deletions src/engine/IndexScan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -307,6 +307,7 @@ Permutation::IdTableGenerator IndexScan::lazyScanForJoinOfColumnWithScan(
std::span<const Id> joinColumn) const {
AD_EXPENSIVE_CHECK(std::ranges::is_sorted(joinColumn));
AD_CORRECTNESS_CHECK(numVariables_ <= 3 && numVariables_ > 0);
AD_CONTRACT_CHECK(joinColumn.empty() || !joinColumn[0].isUndefined());

auto metaBlocks1 = getMetadataForScan();

Expand Down
13 changes: 4 additions & 9 deletions src/engine/Join.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ using std::string;
// _____________________________________________________________________________
Join::Join(QueryExecutionContext* qec, std::shared_ptr<QueryExecutionTree> t1,
std::shared_ptr<QueryExecutionTree> t2, ColumnIndex t1JoinCol,
ColumnIndex t2JoinCol, bool keepJoinColumn)
ColumnIndex t2JoinCol)
: Operation(qec) {
AD_CONTRACT_CHECK(t1 && t2);
// Currently all join algorithms require both inputs to be sorted, so we
Expand Down Expand Up @@ -55,7 +55,6 @@ Join::Join(QueryExecutionContext* qec, std::shared_ptr<QueryExecutionTree> t1,
_leftJoinCol = t1JoinCol;
_right = std::move(t2);
_rightJoinCol = t2JoinCol;
_keepJoinColumn = keepJoinColumn;
_sizeEstimate = 0;
_sizeEstimateComputed = false;
_multiplicities.clear();
Expand Down Expand Up @@ -92,10 +91,7 @@ string Join::getDescriptor() const { return "Join on " + _joinVar.name(); }
// _____________________________________________________________________________
ProtoResult Join::computeResult([[maybe_unused]] bool requestLaziness) {
LOG(DEBUG) << "Getting sub-results for join result computation..." << endl;
size_t leftWidth = _left->getResultWidth();
size_t rightWidth = _right->getResultWidth();
IdTable idTable{getExecutionContext()->getAllocator()};
idTable.setNumColumns(leftWidth + rightWidth - 1);
IdTable idTable{getResultWidth(), getExecutionContext()->getAllocator()};

if (_left->knownEmptyResult() || _right->knownEmptyResult()) {
_left->getRootOperation()->updateRuntimeInformationWhenOptimizedOut();
Expand Down Expand Up @@ -158,7 +154,7 @@ ProtoResult Join::computeResult([[maybe_unused]] bool requestLaziness) {
std::shared_ptr<const Result> leftRes =
leftResIfCached ? leftResIfCached : _left->getResult();
checkCancellation();
if (leftRes->idTable().size() == 0) {
if (leftRes->idTable().empty()) {
_right->getRootOperation()->updateRuntimeInformationWhenOptimizedOut();
// TODO<joka921, hannahbast, SPARQL update> When we add triples to the
// index, the vocabularies of index scans will not necessarily be empty and
Expand Down Expand Up @@ -206,8 +202,7 @@ VariableToColumnMap Join::computeVariableToColumnMap() const {

// _____________________________________________________________________________
size_t Join::getResultWidth() const {
size_t res = _left->getResultWidth() + _right->getResultWidth() -
(_keepJoinColumn ? 1 : 2);
size_t res = _left->getResultWidth() + _right->getResultWidth() - 1;
AD_CONTRACT_CHECK(res > 0);
return res;
}
Expand Down
10 changes: 3 additions & 7 deletions src/engine/Join.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,6 @@ class Join : public Operation {

Variable _joinVar{"?notSet"};

bool _keepJoinColumn;

bool _sizeEstimateComputed;
size_t _sizeEstimate;

Expand All @@ -33,7 +31,7 @@ class Join : public Operation {
public:
Join(QueryExecutionContext* qec, std::shared_ptr<QueryExecutionTree> t1,
std::shared_ptr<QueryExecutionTree> t2, ColumnIndex t1JoinCol,
ColumnIndex t2JoinCol, bool keepJoinColumn = true);
ColumnIndex t2JoinCol);

// A very explicit constructor, which initializes an invalid join object (it
// has no subtrees, which violates class invariants). These invalid Join
Expand Down Expand Up @@ -108,8 +106,8 @@ class Join : public Operation {
* @return The result is only sorted, if the bigger table is sorted.
* Otherwise it is not sorted.
**/
void hashJoin(const IdTable& dynA, ColumnIndex jc1, const IdTable& dynB,
ColumnIndex jc2, IdTable* dynRes);
static void hashJoin(const IdTable& dynA, ColumnIndex jc1,
const IdTable& dynB, ColumnIndex jc2, IdTable* dynRes);

protected:
virtual string getCacheKeyImpl() const override;
Expand All @@ -134,8 +132,6 @@ class Join : public Operation {
IndexScan& scan,
ColumnIndex joinColScan);

using ScanMethodType = std::function<IdTable(Id)>;

/*
* @brief Combines 2 rows like in a join and inserts the result in the
* given table.
Expand Down
45 changes: 15 additions & 30 deletions src/util/JoinAlgorithms/JoinAlgorithms.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,6 @@ namespace ad_utility {

// Some helper concepts.

// A predicate type `Pred` fulfills `BinaryRangePredicate<Range>` if it can be
// called with two values of the `Range`'s `value_type` and produces a result
// that can be converted to `bool`.
template <typename Pred, typename Range>
concept BinaryRangePredicate =
std::indirect_binary_predicate<Pred, std::ranges::iterator_t<Range>,
std::ranges::iterator_t<Range>>;

// A function `F` fulfills `UnaryIteratorFunction` if it can be called with a
// single argument of the `Range`'s iterator type (NOT value type).
template <typename F, typename Range>
Expand Down Expand Up @@ -667,14 +659,14 @@ class BlockAndSubrange {
template <typename Iterator, typename End, typename Projection>
struct JoinSide {
using CurrentBlocks =
std::vector<detail::BlockAndSubrange<typename Iterator::value_type>>;
std::vector<detail::BlockAndSubrange<std::iter_value_t<Iterator>>>;
Iterator it_;
[[no_unique_address]] const End end_;
const Projection& projection_;
CurrentBlocks currentBlocks_{};

// Type aliases for a single element from a block from the left/right input.
using value_type = std::ranges::range_value_t<typename Iterator::value_type>;
using value_type = std::ranges::range_value_t<std::iter_value_t<Iterator>>;
// Type alias for the result of the projection.
using ProjectedEl =
std::decay_t<std::invoke_result_t<const Projection&, value_type>>;
Expand All @@ -691,7 +683,7 @@ template <typename Blocks>
auto makeJoinSide(Blocks& blocks, const auto& projection) {
return JoinSide{std::ranges::begin(blocks), std::ranges::end(blocks),
projection};
};
}

// A concept to identify instantiations of the `JoinSide` template.
template <typename T>
Expand Down Expand Up @@ -759,14 +751,10 @@ struct BlockZipperJoinImpl {
using ProjectedEl = LeftSide::ProjectedEl;
static_assert(std::same_as<ProjectedEl, typename RightSide::ProjectedEl>);

// The largest element for which all blocks are currently stored in the buffer
// and processed.
std::optional<ProjectedEl> currentMinEl_ = std::nullopt;

// Create an equality comparison from the `lessThan` predicate.
bool eq(const auto& el1, const auto& el2) {
return !lessThan_(el1, el2) && !lessThan_(el2, el1);
};
}

// Recompute the `currentEl`. It is the minimum of the last element in the
// first block of either of the join sides.
Expand All @@ -775,7 +763,7 @@ struct BlockZipperJoinImpl {
return side.projection_(side.currentBlocks_.front().back());
};
return std::min(getFirst(leftSide_), getFirst(rightSide_), lessThan_);
};
}

// Fill `side.currentBlocks_` with blocks from the range `[side.it_,
// side.end_)` and advance `side.it_` for each read buffer until all elements
Expand Down Expand Up @@ -823,7 +811,7 @@ struct BlockZipperJoinImpl {
} else {
return BlockStatus::allFilled;
}
};
}

// Remove all elements from `blocks` (either `leftSide_.currentBlocks_` or
// `rightSide_.currentBlocks`) s.t. only elements `> lastProcessedElement`
Expand All @@ -848,7 +836,7 @@ struct BlockZipperJoinImpl {
if (blocks.at(0).empty()) {
blocks.clear();
}
};
}

// For one of the inputs (`leftSide_.currentBlocks_` or
// `rightSide_.currentBlocks_`) obtain a tuple of the following elements:
Expand All @@ -860,7 +848,7 @@ struct BlockZipperJoinImpl {
const auto& first = currentBlocks.at(0);
auto it = std::ranges::lower_bound(first.subrange(), currentEl, lessThan_);
return std::tuple{std::ref(first.fullBlock()), first.subrange(), it};
};
}

// Call `compatibleRowAction` for all pairs of elements in the Cartesian
// product of the blocks in `blocksLeft` and `blocksRight`.
Expand Down Expand Up @@ -892,7 +880,7 @@ struct BlockZipperJoinImpl {
}
}
compatibleRowAction_.flush();
};
}

// Return a vector of subranges of all elements in `blocks` that are equal to
// `currentEl`. Effectively, these subranges cover all the blocks completely
Expand All @@ -907,7 +895,7 @@ struct BlockZipperJoinImpl {
last.setSubrange(
std::ranges::equal_range(last.subrange(), currentEl, lessThan_));
return result;
};
}

// Join the first block in `currentBlocksLeft` with the first block in
// `currentBlocksRight`, but ignore all elements that are `>= currentEl`
Expand Down Expand Up @@ -949,7 +937,7 @@ struct BlockZipperJoinImpl {
// Remove the joined elements.
currentBlocksLeft.at(0).setSubrange(currentElItL, subrangeLeft.end());
currentBlocksRight.at(0).setSubrange(currentElItR, subrangeRight.end());
};
}

// If the `targetBuffer` is empty, read the next nonempty block from `[it,
// end)` if there is one.
Expand All @@ -965,7 +953,7 @@ struct BlockZipperJoinImpl {
}
++it;
}
};
}

// Fill both buffers (left and right) until they contain at least one block.
// Then recompute the `currentEl()` and keep on filling the buffers until at
Expand All @@ -988,9 +976,7 @@ struct BlockZipperJoinImpl {
}

// Add the remaining blocks such that condition 3 from above is fulfilled.
auto blockStatus = fillEqualToCurrentElBothSides(getCurrentEl());
currentMinEl_ = getCurrentEl();
return blockStatus;
return fillEqualToCurrentElBothSides(getCurrentEl());
}

// Combine the above functionality and perform one round of joining.
Expand All @@ -999,11 +985,10 @@ struct BlockZipperJoinImpl {
void joinBuffers(BlockStatus& blockStatus) {
auto& currentBlocksLeft = leftSide_.currentBlocks_;
auto& currentBlocksRight = rightSide_.currentBlocks_;
ProjectedEl currentEl = getCurrentEl();
joinAndRemoveLessThanCurrentEl<DoOptionalJoin>(
currentBlocksLeft, currentBlocksRight, getCurrentEl());
currentBlocksLeft, currentBlocksRight, currentEl);

// TODO<joka921> This should still be the same.
ProjectedEl currentEl = getCurrentEl();
// At this point the `currentBlocksLeft/Right` only consist of elements `>=
// currentEl`. We now obtain a view on the elements `== currentEl` which are
// needed for the next step (the Cartesian product). In the last block,
Expand Down
2 changes: 1 addition & 1 deletion src/util/JoinAlgorithms/JoinColumnMapping.h
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ struct IdTableAndFirstCol {
auto begin() const { return col().begin(); }
auto end() const { return col().end(); }

bool empty() { return col().empty(); }
bool empty() const { return col().empty(); }

const Id& operator[](size_t idx) const { return col()[idx]; }

Expand Down
3 changes: 1 addition & 2 deletions test/engine/SpatialJoinTestHelpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -126,8 +126,7 @@ inline std::shared_ptr<QueryExecutionTree> buildJoin(
auto varCol2 = tree2->getVariableColumns();
size_t col1 = varCol1[joinVariable].columnIndex_;
size_t col2 = varCol2[joinVariable].columnIndex_;
return ad_utility::makeExecutionTree<Join>(qec, tree1, tree2, col1, col2,
true);
return ad_utility::makeExecutionTree<Join>(qec, tree1, tree2, col1, col2);
}

inline std::shared_ptr<QueryExecutionTree> buildMediumChild(
Expand Down
4 changes: 3 additions & 1 deletion test/engine/ValuesForTesting.h
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,9 @@ class ValuesForTesting : public Operation {
}

size_t getResultWidth() const override {
return tables_.empty() ? 0 : tables_.at(0).numColumns();
// Assume a width of 1 if we have no tables and no other information to base
// it on because 0 would otherwise cause stuff to break.
return tables_.empty() ? 1 : tables_.at(0).numColumns();
}

vector<ColumnIndex> resultSortedOn() const override {
Expand Down
7 changes: 3 additions & 4 deletions test/util/JoinHelpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,9 +58,8 @@ IdTable useJoinFunctionOnIdTables(const IdTableAndJoinColumn& tableA,
* `ad_utility::callFixedSize`.
*/
auto makeHashJoinLambda() {
Join J{Join::InvalidOnlyForTestingJoinTag{}, ad_utility::testing::getQec()};
return [J = std::move(J)]<int A, int B, int C>(auto&&... args) mutable {
return J.hashJoin(AD_FWD(args)...);
return []<int A, int B, int C>(auto&&... args) {
return Join::hashJoin(AD_FWD(args)...);
};
}

Expand All @@ -70,7 +69,7 @@ auto makeHashJoinLambda() {
*/
auto makeJoinLambda() {
Join J{Join::InvalidOnlyForTestingJoinTag{}, ad_utility::testing::getQec()};
return [J = std::move(J)]<int A, int B, int C>(auto&&... args) mutable {
return [J = std::move(J)]<int A, int B, int C>(auto&&... args) {
return J.join(AD_FWD(args)...);
};
}

0 comments on commit 264919e

Please sign in to comment.