Skip to content

Commit

Permalink
Add BlankNode support for SERVICE (#1504)
Browse files Browse the repository at this point in the history
With this commit, QLever supports to add new blank nodes during the evaluation of a query. This function is used to support blank nodes in the result of a `SERVICE` request. These blank nodes are distinct from all blank nodes in the index, and also from all blank nodes from other SERVICE request, eve if they came from the same server. This behavior is correct accordin to the SPARQL 1.1 federated query standard.
  • Loading branch information
UNEXENU authored Oct 18, 2024
1 parent e53d783 commit ef057ac
Show file tree
Hide file tree
Showing 21 changed files with 399 additions and 44 deletions.
11 changes: 11 additions & 0 deletions src/engine/LocalVocab.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -77,3 +77,14 @@ std::vector<LocalVocab::LiteralOrIri> LocalVocab::getAllWordsForTesting()
}
return result;
}

// _____________________________________________________________________________
BlankNodeIndex LocalVocab::getBlankNodeIndex(
ad_utility::BlankNodeManager* blankNodeManager) {
AD_CONTRACT_CHECK(blankNodeManager);
// Initialize the `localBlankNodeManager_` if it doesn't exist yet.
if (!localBlankNodeManager_) [[unlikely]] {
localBlankNodeManager_.emplace(blankNodeManager);
}
return BlankNodeIndex::make(localBlankNodeManager_->getId());
}
8 changes: 8 additions & 0 deletions src/engine/LocalVocab.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include "absl/container/node_hash_set.h"
#include "global/Id.h"
#include "parser/LiteralOrIri.h"
#include "util/BlankNodeManager.h"

// A class for maintaining a local vocabulary with contiguous (local) IDs. This
// is meant for words that are not part of the normal vocabulary (constructed
Expand All @@ -38,6 +39,9 @@ class LocalVocab {
auto& primaryWordSet() { return *primaryWordSet_; }
const auto& primaryWordSet() const { return *primaryWordSet_; }

std::optional<ad_utility::BlankNodeManager::LocalBlankNodeManager>
localBlankNodeManager_;

public:
// Create a new, empty local vocabulary.
LocalVocab() = default;
Expand Down Expand Up @@ -90,6 +94,10 @@ class LocalVocab {
// Return all the words from all the word sets as a vector.
std::vector<LiteralOrIri> getAllWordsForTesting() const;

// Get a new BlankNodeIndex using the LocalBlankNodeManager.
[[nodiscard]] BlankNodeIndex getBlankNodeIndex(
ad_utility::BlankNodeManager* blankNodeManager);

private:
// Common implementation for the two variants of
// `getIndexAndAddIfNotContainedImpl` above.
Expand Down
1 change: 0 additions & 1 deletion src/engine/QueryExecutionContext.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
#pragma once

#include <memory>
#include <shared_mutex>
#include <string>

#include "engine/QueryPlanningCostFactors.h"
Expand Down
25 changes: 17 additions & 8 deletions src/engine/Service.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#include "parser/RdfParser.h"
#include "parser/TokenizerCtre.h"
#include "util/Exception.h"
#include "util/HashMap.h"
#include "util/HashSet.h"
#include "util/StringUtils.h"
#include "util/http/HttpUtils.h"
Expand Down Expand Up @@ -204,14 +205,18 @@ void Service::writeJsonResult(const std::vector<std::string>& vars,
IdTableStatic<I> idTable = std::move(*idTablePtr).toStatic<I>();
checkCancellation();
std::vector<size_t> numLocalVocabPerColumn(idTable.numColumns());
// TODO<joka921> We should include a memory limit, as soon as we can do proper
// memory-limited HashMaps.
ad_utility::HashMap<std::string, Id> blankNodeMap;

auto writeBindings = [&](const nlohmann::json& bindings, size_t& rowIdx) {
for (const auto& binding : bindings) {
idTable.emplace_back();
for (size_t colIdx = 0; colIdx < vars.size(); ++colIdx) {
TripleComponent tc =
binding.contains(vars[colIdx])
? bindingToTripleComponent(binding[vars[colIdx]])
? bindingToTripleComponent(binding[vars[colIdx]], blankNodeMap,
localVocab)
: TripleComponent::UNDEF();

Id id = std::move(tc).toValueId(getIndex().getVocab(), *localVocab);
Expand Down Expand Up @@ -359,7 +364,9 @@ std::optional<std::string> Service::getSiblingValuesClause() const {

// ____________________________________________________________________________
TripleComponent Service::bindingToTripleComponent(
const nlohmann::json& binding) {
const nlohmann::json& binding,
ad_utility::HashMap<std::string, Id>& blankNodeMap,
LocalVocab* localVocab) const {
if (!binding.contains("type") || !binding.contains("value")) {
throw std::runtime_error(absl::StrCat(
"Missing type or value field in binding. The binding is: '",
Expand All @@ -368,6 +375,8 @@ TripleComponent Service::bindingToTripleComponent(

const auto type = binding["type"].get<std::string_view>();
const auto value = binding["value"].get<std::string_view>();
auto blankNodeManagerPtr =
getExecutionContext()->getIndex().getBlankNodeManager();

TripleComponent tc;
if (type == "literal") {
Expand All @@ -386,12 +395,12 @@ TripleComponent Service::bindingToTripleComponent(
} else if (type == "uri") {
tc = TripleComponent::Iri::fromIrirefWithoutBrackets(value);
} else if (type == "bnode") {
throw std::runtime_error(
"Blank nodes in the result of a SERVICE are currently not "
"supported. "
"For now, consider filtering them out using the ISBLANK function "
"or "
"converting them via the STR function.");
auto [it, wasNew] = blankNodeMap.try_emplace(value, Id());
if (wasNew) {
it->second = Id::makeFromBlankNodeIndex(
localVocab->getBlankNodeIndex(blankNodeManagerPtr));
}
tc = it->second;
} else {
throw std::runtime_error(absl::StrCat("Type ", type,
" is undefined. The binding is: '",
Expand Down
6 changes: 4 additions & 2 deletions src/engine/Service.h
Original file line number Diff line number Diff line change
Expand Up @@ -99,8 +99,10 @@ class Service : public Operation {
vector<QueryExecutionTree*> getChildren() override { return {}; }

// Convert the given binding to TripleComponent.
static TripleComponent bindingToTripleComponent(
const nlohmann::json& binding);
TripleComponent bindingToTripleComponent(
const nlohmann::json& binding,
ad_utility::HashMap<std::string, Id>& blankNodeMap,
LocalVocab* localVocab) const;

// Create a value for the VALUES-clause used in `getSiblingValuesClause` from
// id. If the id is of type blank node `std::nullopt` is returned.
Expand Down
1 change: 0 additions & 1 deletion src/global/Constants.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@

#include <chrono>
#include <ctre.hpp>
#include <limits>
#include <stdexcept>
#include <string>
#include <string_view>
Expand Down
5 changes: 5 additions & 0 deletions src/index/Index.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,11 @@ auto Index::getTextVocab() const -> const TextVocab& {
return pimpl_->getTextVocab();
}

// ____________________________________________________________________________
ad_utility::BlankNodeManager* Index::getBlankNodeManager() const {
return pimpl_->getBlankNodeManager();
}

// ____________________________________________________________________________
size_t Index::getCardinality(const TripleComponent& comp,
Permutation::Enum p) const {
Expand Down
3 changes: 3 additions & 0 deletions src/index/Index.h
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,9 @@ class Index {
Vocabulary<std::string, SimpleStringComparator, WordVocabIndex>;
[[nodiscard]] const TextVocab& getTextVocab() const;

// Get a (non-owning) pointer to the BlankNodeManager of this Index.
ad_utility::BlankNodeManager* getBlankNodeManager() const;

// --------------------------------------------------------------------------
// RDF RETRIEVAL
// --------------------------------------------------------------------------
Expand Down
2 changes: 1 addition & 1 deletion src/index/IndexFormatVersion.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,5 +36,5 @@ struct IndexFormatVersion {
// The actual index version. Change it once the binary format of the index
// changes.
inline const IndexFormatVersion& indexFormatVersion{
1532, DateYearOrDuration{Date{2024, 10, 4}}};
1504, DateYearOrDuration{Date{2024, 10, 18}}};
} // namespace qlever
15 changes: 15 additions & 0 deletions src/index/IndexImpl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -390,6 +390,9 @@ void IndexImpl::createFromFiles(
configurationJson_["has-all-permutations"] = true;
}

configurationJson_["num-blank-nodes-total"] =
indexBuilderData.vocabularyMetaData_.getNextBlankNodeIndex();

addInternalStatisticsToConfiguration(numTriplesInternal,
numPredicatesInternal);
LOG(INFO) << "Index build completed" << std::endl;
Expand Down Expand Up @@ -1077,6 +1080,12 @@ void IndexImpl::readConfiguration() {
loadDataMember("num-objects", numObjects_, NumNormalAndInternal{});
loadDataMember("num-triples", numTriples_, NumNormalAndInternal{});

// Initialize BlankNodeManager
uint64_t numBlankNodesTotal;
loadDataMember("num-blank-nodes-total", numBlankNodesTotal);
blankNodeManager_ =
std::make_unique<ad_utility::BlankNodeManager>(numBlankNodesTotal);

// Compute unique ID for this index.
//
// TODO: This is a simplistic way. It would be better to incorporate bytes
Expand Down Expand Up @@ -1686,3 +1695,9 @@ std::unique_ptr<ExternalSorter<Comparator, I>> IndexImpl::makeSorterPtr(
std::string_view permutationName) const {
return makeSorterImpl<Comparator, I, true>(permutationName);
}

// _____________________________________________________________________________
ad_utility::BlankNodeManager* IndexImpl::getBlankNodeManager() const {
AD_CONTRACT_CHECK(blankNodeManager_);
return blankNodeManager_.get();
}
5 changes: 5 additions & 0 deletions src/index/IndexImpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,9 @@ class IndexImpl {
std::optional<Id> idOfHasPatternDuringIndexBuilding_;
std::optional<Id> idOfInternalGraphDuringIndexBuilding_;

// BlankNodeManager, initialized during `readConfiguration`
std::unique_ptr<ad_utility::BlankNodeManager> blankNodeManager_{nullptr};

public:
explicit IndexImpl(ad_utility::AllocatorWithLimit<Id> allocator);

Expand Down Expand Up @@ -255,6 +258,8 @@ class IndexImpl {

const auto& getTextVocab() const { return textVocab_; };

ad_utility::BlankNodeManager* getBlankNodeManager() const;

// --------------------------------------------------------------------------
// -- RETRIEVAL ---
// --------------------------------------------------------------------------
Expand Down
2 changes: 1 addition & 1 deletion src/index/VocabularyMerger.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ struct VocabularyMetaData {
Id begin() const { return begin_; }
Id end() const { return end_; }

// Return true iff the `id` belongs to this range.
// Return true if the `id` belongs to this range.
bool contains(Id id) const { return begin_ <= id && id < end_; }

private:
Expand Down
62 changes: 62 additions & 0 deletions src/util/BlankNodeManager.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
// Copyright 2024, University of Freiburg,
// Chair of Algorithms and Data Structures.
// Author: Moritz Dom ([email protected])

#include "util/BlankNodeManager.h"

namespace ad_utility {

// _____________________________________________________________________________
BlankNodeManager::BlankNodeManager(uint64_t minIndex)
: minIndex_(minIndex),
randBlockIndex_(
SlowRandomIntGenerator<uint64_t>(0, totalAvailableBlocks_ - 1)) {}

// _____________________________________________________________________________
BlankNodeManager::Block BlankNodeManager::allocateBlock() {
// The Random-Generation Algorithm's performance is reduced once the number of
// used blocks exceeds a limit.
auto numBlocks = usedBlocksSet_.rlock()->size();
AD_CORRECTNESS_CHECK(
numBlocks < totalAvailableBlocks_ / 256,
absl::StrCat("Critical high number of blank node blocks in use: ",
numBlocks, " blocks"));

auto usedBlocksSetPtr = usedBlocksSet_.wlock();
while (true) {
auto blockIdx = randBlockIndex_();
if (!usedBlocksSetPtr->contains(blockIdx)) {
usedBlocksSetPtr->insert(blockIdx);
return Block(blockIdx, minIndex_ + blockIdx * blockSize_);
}
}
}

// _____________________________________________________________________________
BlankNodeManager::Block::Block(uint64_t blockIndex, uint64_t startIndex)
: blockIdx_(blockIndex), nextIdx_(startIndex) {}

// _____________________________________________________________________________
BlankNodeManager::LocalBlankNodeManager::LocalBlankNodeManager(
BlankNodeManager* blankNodeManager)
: blankNodeManager_(blankNodeManager) {}

// _____________________________________________________________________________
BlankNodeManager::LocalBlankNodeManager::~LocalBlankNodeManager() {
auto ptr = blankNodeManager_->usedBlocksSet_.wlock();
for (auto block : blocks_) {
AD_CONTRACT_CHECK(ptr->contains(block.blockIdx_));
ptr->erase(block.blockIdx_);
}
}

// _____________________________________________________________________________
uint64_t BlankNodeManager::LocalBlankNodeManager::getId() {
if (blocks_.empty() || blocks_.back().nextIdx_ == idxAfterCurrentBlock_) {
blocks_.emplace_back(blankNodeManager_->allocateBlock());
idxAfterCurrentBlock_ = blocks_.back().nextIdx_ + blockSize_;
}
return blocks_.back().nextIdx_++;
}

} // namespace ad_utility
103 changes: 103 additions & 0 deletions src/util/BlankNodeManager.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
// Copyright 2024, University of Freiburg,
// Chair of Algorithms and Data Structures.
// Author: Moritz Dom ([email protected])

#pragma once

#include <gtest/gtest_prod.h>

#include <vector>

#include "global/ValueId.h"
#include "util/HashSet.h"
#include "util/Random.h"
#include "util/Synchronized.h"

namespace ad_utility {
/*
* Manager class owned by an `Index` to manage currently available indices for
* blank nodes to be added during runtime. The intention is to use the same
* `BlankNodeIndex`-Datatype as for blank nodes given at indexing time, by
* setting their count as the minimum index for the ones added at runtime.
* A `LocalVocab` can register new blank nodes (e.g. resulting from a `Service`
* operation) by obtaining a `Block` of currently unused indices using it's own
* `LocalBlankNodeManager` from the `BlankNodeManager`.
*/
class BlankNodeManager {
public:
// Minimum blank node index.
const uint64_t minIndex_;

// Number of indices that make up a single block.
static constexpr uint blockSize_ = 1000;

// Number of blocks available.
const uint64_t totalAvailableBlocks_ =
(ValueId::maxIndex - minIndex_ + 1) / blockSize_;

private:
// Int Generator yielding random block indices.
SlowRandomIntGenerator<uint64_t> randBlockIndex_;

// Tracks blocks currently used by instances of `LocalBlankNodeManager`.
Synchronized<HashSet<uint64_t>> usedBlocksSet_;

public:
// Constructor, where `minIndex` is the minimum index such that all managed
// indices are in [`minIndex_`, `ValueId::maxIndex`]. `minIndex_` is
// determined by the number of BlankNodes in the current Index.
explicit BlankNodeManager(uint64_t minIndex = 0);
~BlankNodeManager() = default;

// A BlankNodeIndex Block of size `blockSize_`.
class Block {
// Intentional private constructor, allowing only the BlankNodeManager to
// create Blocks (for a `LocalBlankNodeManager`).
explicit Block(uint64_t blockIndex, uint64_t startIndex);
friend class BlankNodeManager;

public:
~Block() = default;
// The index of this block.
const uint64_t blockIdx_;
// The next free index within this block.
uint64_t nextIdx_;
};

// Manages the BlankNodes used within a LocalVocab.
class LocalBlankNodeManager {
public:
explicit LocalBlankNodeManager(BlankNodeManager* blankNodeManager);
~LocalBlankNodeManager();

// No copy, as the managed blocks shall not be duplicated.
LocalBlankNodeManager(const LocalBlankNodeManager&) = delete;
LocalBlankNodeManager& operator=(const LocalBlankNodeManager&) = delete;

LocalBlankNodeManager(LocalBlankNodeManager&&) = default;
LocalBlankNodeManager& operator=(LocalBlankNodeManager&&) = default;

// Get a new id.
[[nodiscard]] uint64_t getId();

private:
// Reserved blocks.
std::vector<BlankNodeManager::Block> blocks_;

// Reference of the BlankNodeManager, used to free the reserved blocks.
BlankNodeManager* blankNodeManager_;

// The first index after the current Block.
uint64_t idxAfterCurrentBlock_{0};

FRIEND_TEST(BlankNodeManager, LocalBlankNodeManagerGetID);
};

// Allocate and retrieve a block of free ids.
[[nodiscard]] Block allocateBlock();

FRIEND_TEST(BlankNodeManager, blockAllocationAndFree);
FRIEND_TEST(BlankNodeManager, moveLocalBlankNodeManager);
};

} // namespace ad_utility
Loading

0 comments on commit ef057ac

Please sign in to comment.