From ef057ac92ebe6401cedefc6a883dc20ff4fe1422 Mon Sep 17 00:00:00 2001 From: unex <63149623+UNEXENU@users.noreply.github.com> Date: Fri, 18 Oct 2024 19:00:10 +0200 Subject: [PATCH] Add BlankNode support for SERVICE (#1504) With this commit, QLever supports to add new blank nodes during the evaluation of a query. This function is used to support blank nodes in the result of a `SERVICE` request. These blank nodes are distinct from all blank nodes in the index, and also from all blank nodes from other SERVICE request, eve if they came from the same server. This behavior is correct accordin to the SPARQL 1.1 federated query standard. --- src/engine/LocalVocab.cpp | 11 +++ src/engine/LocalVocab.h | 8 +++ src/engine/QueryExecutionContext.h | 1 - src/engine/Service.cpp | 25 ++++--- src/engine/Service.h | 6 +- src/global/Constants.h | 1 - src/index/Index.cpp | 5 ++ src/index/Index.h | 3 + src/index/IndexFormatVersion.h | 2 +- src/index/IndexImpl.cpp | 15 +++++ src/index/IndexImpl.h | 5 ++ src/index/VocabularyMerger.h | 2 +- src/util/BlankNodeManager.cpp | 62 +++++++++++++++++ src/util/BlankNodeManager.h | 103 +++++++++++++++++++++++++++++ src/util/CMakeLists.txt | 2 +- src/util/Synchronized.h | 2 +- test/BlankNodeManagerTest.cpp | 82 +++++++++++++++++++++++ test/CMakeLists.txt | 2 + test/IndexTest.cpp | 21 ++++++ test/LocalVocabTest.cpp | 9 +++ test/ServiceTest.cpp | 76 +++++++++++++-------- 21 files changed, 399 insertions(+), 44 deletions(-) create mode 100644 src/util/BlankNodeManager.cpp create mode 100644 src/util/BlankNodeManager.h create mode 100644 test/BlankNodeManagerTest.cpp diff --git a/src/engine/LocalVocab.cpp b/src/engine/LocalVocab.cpp index 3c87ff5bdc..ee0285c532 100644 --- a/src/engine/LocalVocab.cpp +++ b/src/engine/LocalVocab.cpp @@ -77,3 +77,14 @@ std::vector LocalVocab::getAllWordsForTesting() } return result; } + +// _____________________________________________________________________________ +BlankNodeIndex LocalVocab::getBlankNodeIndex( + ad_utility::BlankNodeManager* blankNodeManager) { + AD_CONTRACT_CHECK(blankNodeManager); + // Initialize the `localBlankNodeManager_` if it doesn't exist yet. + if (!localBlankNodeManager_) [[unlikely]] { + localBlankNodeManager_.emplace(blankNodeManager); + } + return BlankNodeIndex::make(localBlankNodeManager_->getId()); +} diff --git a/src/engine/LocalVocab.h b/src/engine/LocalVocab.h index d0ec7ccfaa..be9a7c4499 100644 --- a/src/engine/LocalVocab.h +++ b/src/engine/LocalVocab.h @@ -13,6 +13,7 @@ #include "absl/container/node_hash_set.h" #include "global/Id.h" #include "parser/LiteralOrIri.h" +#include "util/BlankNodeManager.h" // A class for maintaining a local vocabulary with contiguous (local) IDs. This // is meant for words that are not part of the normal vocabulary (constructed @@ -38,6 +39,9 @@ class LocalVocab { auto& primaryWordSet() { return *primaryWordSet_; } const auto& primaryWordSet() const { return *primaryWordSet_; } + std::optional + localBlankNodeManager_; + public: // Create a new, empty local vocabulary. LocalVocab() = default; @@ -90,6 +94,10 @@ class LocalVocab { // Return all the words from all the word sets as a vector. std::vector getAllWordsForTesting() const; + // Get a new BlankNodeIndex using the LocalBlankNodeManager. + [[nodiscard]] BlankNodeIndex getBlankNodeIndex( + ad_utility::BlankNodeManager* blankNodeManager); + private: // Common implementation for the two variants of // `getIndexAndAddIfNotContainedImpl` above. diff --git a/src/engine/QueryExecutionContext.h b/src/engine/QueryExecutionContext.h index 0c17ea684b..e82e8fd0ef 100644 --- a/src/engine/QueryExecutionContext.h +++ b/src/engine/QueryExecutionContext.h @@ -7,7 +7,6 @@ #pragma once #include -#include #include #include "engine/QueryPlanningCostFactors.h" diff --git a/src/engine/Service.cpp b/src/engine/Service.cpp index 200924e119..9533a69e4f 100644 --- a/src/engine/Service.cpp +++ b/src/engine/Service.cpp @@ -15,6 +15,7 @@ #include "parser/RdfParser.h" #include "parser/TokenizerCtre.h" #include "util/Exception.h" +#include "util/HashMap.h" #include "util/HashSet.h" #include "util/StringUtils.h" #include "util/http/HttpUtils.h" @@ -204,6 +205,9 @@ void Service::writeJsonResult(const std::vector& vars, IdTableStatic idTable = std::move(*idTablePtr).toStatic(); checkCancellation(); std::vector numLocalVocabPerColumn(idTable.numColumns()); + // TODO We should include a memory limit, as soon as we can do proper + // memory-limited HashMaps. + ad_utility::HashMap blankNodeMap; auto writeBindings = [&](const nlohmann::json& bindings, size_t& rowIdx) { for (const auto& binding : bindings) { @@ -211,7 +215,8 @@ void Service::writeJsonResult(const std::vector& vars, for (size_t colIdx = 0; colIdx < vars.size(); ++colIdx) { TripleComponent tc = binding.contains(vars[colIdx]) - ? bindingToTripleComponent(binding[vars[colIdx]]) + ? bindingToTripleComponent(binding[vars[colIdx]], blankNodeMap, + localVocab) : TripleComponent::UNDEF(); Id id = std::move(tc).toValueId(getIndex().getVocab(), *localVocab); @@ -359,7 +364,9 @@ std::optional Service::getSiblingValuesClause() const { // ____________________________________________________________________________ TripleComponent Service::bindingToTripleComponent( - const nlohmann::json& binding) { + const nlohmann::json& binding, + ad_utility::HashMap& blankNodeMap, + LocalVocab* localVocab) const { if (!binding.contains("type") || !binding.contains("value")) { throw std::runtime_error(absl::StrCat( "Missing type or value field in binding. The binding is: '", @@ -368,6 +375,8 @@ TripleComponent Service::bindingToTripleComponent( const auto type = binding["type"].get(); const auto value = binding["value"].get(); + auto blankNodeManagerPtr = + getExecutionContext()->getIndex().getBlankNodeManager(); TripleComponent tc; if (type == "literal") { @@ -386,12 +395,12 @@ TripleComponent Service::bindingToTripleComponent( } else if (type == "uri") { tc = TripleComponent::Iri::fromIrirefWithoutBrackets(value); } else if (type == "bnode") { - throw std::runtime_error( - "Blank nodes in the result of a SERVICE are currently not " - "supported. " - "For now, consider filtering them out using the ISBLANK function " - "or " - "converting them via the STR function."); + auto [it, wasNew] = blankNodeMap.try_emplace(value, Id()); + if (wasNew) { + it->second = Id::makeFromBlankNodeIndex( + localVocab->getBlankNodeIndex(blankNodeManagerPtr)); + } + tc = it->second; } else { throw std::runtime_error(absl::StrCat("Type ", type, " is undefined. The binding is: '", diff --git a/src/engine/Service.h b/src/engine/Service.h index 68643aae97..d11b0191ba 100644 --- a/src/engine/Service.h +++ b/src/engine/Service.h @@ -99,8 +99,10 @@ class Service : public Operation { vector getChildren() override { return {}; } // Convert the given binding to TripleComponent. - static TripleComponent bindingToTripleComponent( - const nlohmann::json& binding); + TripleComponent bindingToTripleComponent( + const nlohmann::json& binding, + ad_utility::HashMap& blankNodeMap, + LocalVocab* localVocab) const; // Create a value for the VALUES-clause used in `getSiblingValuesClause` from // id. If the id is of type blank node `std::nullopt` is returned. diff --git a/src/global/Constants.h b/src/global/Constants.h index 970cde9cd0..0e94c1ed99 100644 --- a/src/global/Constants.h +++ b/src/global/Constants.h @@ -9,7 +9,6 @@ #include #include -#include #include #include #include diff --git a/src/index/Index.cpp b/src/index/Index.cpp index 0bb7d770fa..142ace17b5 100644 --- a/src/index/Index.cpp +++ b/src/index/Index.cpp @@ -51,6 +51,11 @@ auto Index::getTextVocab() const -> const TextVocab& { return pimpl_->getTextVocab(); } +// ____________________________________________________________________________ +ad_utility::BlankNodeManager* Index::getBlankNodeManager() const { + return pimpl_->getBlankNodeManager(); +} + // ____________________________________________________________________________ size_t Index::getCardinality(const TripleComponent& comp, Permutation::Enum p) const { diff --git a/src/index/Index.h b/src/index/Index.h index 991d6573dc..dd21b4eaec 100644 --- a/src/index/Index.h +++ b/src/index/Index.h @@ -112,6 +112,9 @@ class Index { Vocabulary; [[nodiscard]] const TextVocab& getTextVocab() const; + // Get a (non-owning) pointer to the BlankNodeManager of this Index. + ad_utility::BlankNodeManager* getBlankNodeManager() const; + // -------------------------------------------------------------------------- // RDF RETRIEVAL // -------------------------------------------------------------------------- diff --git a/src/index/IndexFormatVersion.h b/src/index/IndexFormatVersion.h index 84bee08318..a21fcc96e0 100644 --- a/src/index/IndexFormatVersion.h +++ b/src/index/IndexFormatVersion.h @@ -36,5 +36,5 @@ struct IndexFormatVersion { // The actual index version. Change it once the binary format of the index // changes. inline const IndexFormatVersion& indexFormatVersion{ - 1532, DateYearOrDuration{Date{2024, 10, 4}}}; + 1504, DateYearOrDuration{Date{2024, 10, 18}}}; } // namespace qlever diff --git a/src/index/IndexImpl.cpp b/src/index/IndexImpl.cpp index c7eb2e8a4d..a2222b4b77 100644 --- a/src/index/IndexImpl.cpp +++ b/src/index/IndexImpl.cpp @@ -390,6 +390,9 @@ void IndexImpl::createFromFiles( configurationJson_["has-all-permutations"] = true; } + configurationJson_["num-blank-nodes-total"] = + indexBuilderData.vocabularyMetaData_.getNextBlankNodeIndex(); + addInternalStatisticsToConfiguration(numTriplesInternal, numPredicatesInternal); LOG(INFO) << "Index build completed" << std::endl; @@ -1077,6 +1080,12 @@ void IndexImpl::readConfiguration() { loadDataMember("num-objects", numObjects_, NumNormalAndInternal{}); loadDataMember("num-triples", numTriples_, NumNormalAndInternal{}); + // Initialize BlankNodeManager + uint64_t numBlankNodesTotal; + loadDataMember("num-blank-nodes-total", numBlankNodesTotal); + blankNodeManager_ = + std::make_unique(numBlankNodesTotal); + // Compute unique ID for this index. // // TODO: This is a simplistic way. It would be better to incorporate bytes @@ -1686,3 +1695,9 @@ std::unique_ptr> IndexImpl::makeSorterPtr( std::string_view permutationName) const { return makeSorterImpl(permutationName); } + +// _____________________________________________________________________________ +ad_utility::BlankNodeManager* IndexImpl::getBlankNodeManager() const { + AD_CONTRACT_CHECK(blankNodeManager_); + return blankNodeManager_.get(); +} diff --git a/src/index/IndexImpl.h b/src/index/IndexImpl.h index fb5da51e1b..c9fe149517 100644 --- a/src/index/IndexImpl.h +++ b/src/index/IndexImpl.h @@ -184,6 +184,9 @@ class IndexImpl { std::optional idOfHasPatternDuringIndexBuilding_; std::optional idOfInternalGraphDuringIndexBuilding_; + // BlankNodeManager, initialized during `readConfiguration` + std::unique_ptr blankNodeManager_{nullptr}; + public: explicit IndexImpl(ad_utility::AllocatorWithLimit allocator); @@ -255,6 +258,8 @@ class IndexImpl { const auto& getTextVocab() const { return textVocab_; }; + ad_utility::BlankNodeManager* getBlankNodeManager() const; + // -------------------------------------------------------------------------- // -- RETRIEVAL --- // -------------------------------------------------------------------------- diff --git a/src/index/VocabularyMerger.h b/src/index/VocabularyMerger.h index 11ba956e19..3c171d2c65 100644 --- a/src/index/VocabularyMerger.h +++ b/src/index/VocabularyMerger.h @@ -62,7 +62,7 @@ struct VocabularyMetaData { Id begin() const { return begin_; } Id end() const { return end_; } - // Return true iff the `id` belongs to this range. + // Return true if the `id` belongs to this range. bool contains(Id id) const { return begin_ <= id && id < end_; } private: diff --git a/src/util/BlankNodeManager.cpp b/src/util/BlankNodeManager.cpp new file mode 100644 index 0000000000..cff86c08fa --- /dev/null +++ b/src/util/BlankNodeManager.cpp @@ -0,0 +1,62 @@ +// Copyright 2024, University of Freiburg, +// Chair of Algorithms and Data Structures. +// Author: Moritz Dom (domm@informatik.uni-freiburg.de) + +#include "util/BlankNodeManager.h" + +namespace ad_utility { + +// _____________________________________________________________________________ +BlankNodeManager::BlankNodeManager(uint64_t minIndex) + : minIndex_(minIndex), + randBlockIndex_( + SlowRandomIntGenerator(0, totalAvailableBlocks_ - 1)) {} + +// _____________________________________________________________________________ +BlankNodeManager::Block BlankNodeManager::allocateBlock() { + // The Random-Generation Algorithm's performance is reduced once the number of + // used blocks exceeds a limit. + auto numBlocks = usedBlocksSet_.rlock()->size(); + AD_CORRECTNESS_CHECK( + numBlocks < totalAvailableBlocks_ / 256, + absl::StrCat("Critical high number of blank node blocks in use: ", + numBlocks, " blocks")); + + auto usedBlocksSetPtr = usedBlocksSet_.wlock(); + while (true) { + auto blockIdx = randBlockIndex_(); + if (!usedBlocksSetPtr->contains(blockIdx)) { + usedBlocksSetPtr->insert(blockIdx); + return Block(blockIdx, minIndex_ + blockIdx * blockSize_); + } + } +} + +// _____________________________________________________________________________ +BlankNodeManager::Block::Block(uint64_t blockIndex, uint64_t startIndex) + : blockIdx_(blockIndex), nextIdx_(startIndex) {} + +// _____________________________________________________________________________ +BlankNodeManager::LocalBlankNodeManager::LocalBlankNodeManager( + BlankNodeManager* blankNodeManager) + : blankNodeManager_(blankNodeManager) {} + +// _____________________________________________________________________________ +BlankNodeManager::LocalBlankNodeManager::~LocalBlankNodeManager() { + auto ptr = blankNodeManager_->usedBlocksSet_.wlock(); + for (auto block : blocks_) { + AD_CONTRACT_CHECK(ptr->contains(block.blockIdx_)); + ptr->erase(block.blockIdx_); + } +} + +// _____________________________________________________________________________ +uint64_t BlankNodeManager::LocalBlankNodeManager::getId() { + if (blocks_.empty() || blocks_.back().nextIdx_ == idxAfterCurrentBlock_) { + blocks_.emplace_back(blankNodeManager_->allocateBlock()); + idxAfterCurrentBlock_ = blocks_.back().nextIdx_ + blockSize_; + } + return blocks_.back().nextIdx_++; +} + +} // namespace ad_utility diff --git a/src/util/BlankNodeManager.h b/src/util/BlankNodeManager.h new file mode 100644 index 0000000000..a39616e94f --- /dev/null +++ b/src/util/BlankNodeManager.h @@ -0,0 +1,103 @@ +// Copyright 2024, University of Freiburg, +// Chair of Algorithms and Data Structures. +// Author: Moritz Dom (domm@informatik.uni-freiburg.de) + +#pragma once + +#include + +#include + +#include "global/ValueId.h" +#include "util/HashSet.h" +#include "util/Random.h" +#include "util/Synchronized.h" + +namespace ad_utility { +/* + * Manager class owned by an `Index` to manage currently available indices for + * blank nodes to be added during runtime. The intention is to use the same + * `BlankNodeIndex`-Datatype as for blank nodes given at indexing time, by + * setting their count as the minimum index for the ones added at runtime. + * A `LocalVocab` can register new blank nodes (e.g. resulting from a `Service` + * operation) by obtaining a `Block` of currently unused indices using it's own + * `LocalBlankNodeManager` from the `BlankNodeManager`. + */ +class BlankNodeManager { + public: + // Minimum blank node index. + const uint64_t minIndex_; + + // Number of indices that make up a single block. + static constexpr uint blockSize_ = 1000; + + // Number of blocks available. + const uint64_t totalAvailableBlocks_ = + (ValueId::maxIndex - minIndex_ + 1) / blockSize_; + + private: + // Int Generator yielding random block indices. + SlowRandomIntGenerator randBlockIndex_; + + // Tracks blocks currently used by instances of `LocalBlankNodeManager`. + Synchronized> usedBlocksSet_; + + public: + // Constructor, where `minIndex` is the minimum index such that all managed + // indices are in [`minIndex_`, `ValueId::maxIndex`]. `minIndex_` is + // determined by the number of BlankNodes in the current Index. + explicit BlankNodeManager(uint64_t minIndex = 0); + ~BlankNodeManager() = default; + + // A BlankNodeIndex Block of size `blockSize_`. + class Block { + // Intentional private constructor, allowing only the BlankNodeManager to + // create Blocks (for a `LocalBlankNodeManager`). + explicit Block(uint64_t blockIndex, uint64_t startIndex); + friend class BlankNodeManager; + + public: + ~Block() = default; + // The index of this block. + const uint64_t blockIdx_; + // The next free index within this block. + uint64_t nextIdx_; + }; + + // Manages the BlankNodes used within a LocalVocab. + class LocalBlankNodeManager { + public: + explicit LocalBlankNodeManager(BlankNodeManager* blankNodeManager); + ~LocalBlankNodeManager(); + + // No copy, as the managed blocks shall not be duplicated. + LocalBlankNodeManager(const LocalBlankNodeManager&) = delete; + LocalBlankNodeManager& operator=(const LocalBlankNodeManager&) = delete; + + LocalBlankNodeManager(LocalBlankNodeManager&&) = default; + LocalBlankNodeManager& operator=(LocalBlankNodeManager&&) = default; + + // Get a new id. + [[nodiscard]] uint64_t getId(); + + private: + // Reserved blocks. + std::vector blocks_; + + // Reference of the BlankNodeManager, used to free the reserved blocks. + BlankNodeManager* blankNodeManager_; + + // The first index after the current Block. + uint64_t idxAfterCurrentBlock_{0}; + + FRIEND_TEST(BlankNodeManager, LocalBlankNodeManagerGetID); + }; + + // Allocate and retrieve a block of free ids. + [[nodiscard]] Block allocateBlock(); + + FRIEND_TEST(BlankNodeManager, blockAllocationAndFree); + FRIEND_TEST(BlankNodeManager, moveLocalBlankNodeManager); +}; + +} // namespace ad_utility diff --git a/src/util/CMakeLists.txt b/src/util/CMakeLists.txt index e670b86a40..80547c4d37 100644 --- a/src/util/CMakeLists.txt +++ b/src/util/CMakeLists.txt @@ -1,5 +1,5 @@ add_subdirectory(ConfigManager) add_subdirectory(MemorySize) add_subdirectory(http) -add_library(util GeoSparqlHelpers.cpp antlr/ANTLRErrorHandling.cpp ParseException.cpp Conversions.cpp Date.cpp DateYearDuration.cpp Duration.cpp antlr/GenerateAntlrExceptionMetadata.cpp CancellationHandle.cpp StringUtils.cpp LazyJsonParser.cpp) +add_library(util GeoSparqlHelpers.cpp antlr/ANTLRErrorHandling.cpp ParseException.cpp Conversions.cpp Date.cpp DateYearDuration.cpp Duration.cpp antlr/GenerateAntlrExceptionMetadata.cpp CancellationHandle.cpp StringUtils.cpp LazyJsonParser.cpp BlankNodeManager.cpp) qlever_target_link_libraries(util re2::re2 s2) diff --git a/src/util/Synchronized.h b/src/util/Synchronized.h index d699e612ca..657abc210d 100644 --- a/src/util/Synchronized.h +++ b/src/util/Synchronized.h @@ -116,7 +116,7 @@ class Synchronized { return f(data_); } - /// const overload of with WriteLock + /// const overload of `withWriteLock` template auto withWriteLock(F f) const { std::lock_guard l(mutex()); diff --git a/test/BlankNodeManagerTest.cpp b/test/BlankNodeManagerTest.cpp new file mode 100644 index 0000000000..8a6e89ea6b --- /dev/null +++ b/test/BlankNodeManagerTest.cpp @@ -0,0 +1,82 @@ +// Copyright 2024, University of Freiburg, +// Chair of Algorithms and Data Structures. +// Author: Moritz Dom (domm@informatik.uni-freiburg.de) + +#include + +#include "gmock/gmock.h" +#include "util/BlankNodeManager.h" +#include "util/GTestHelpers.h" + +namespace ad_utility { +// _____________________________________________________________________________ +TEST(BlankNodeManager, blockAllocationAndFree) { + BlankNodeManager bnm(0); + EXPECT_TRUE(bnm.usedBlocksSet_.rlock()->empty()); + + { + // LocalBlankNodeManager allocates a new block + BlankNodeManager::LocalBlankNodeManager lbnm(&bnm); + [[maybe_unused]] uint64_t id = lbnm.getId(); + EXPECT_EQ(bnm.usedBlocksSet_.rlock()->size(), 1); + } + + // Once the LocalBlankNodeManager is destroyed, all Blocks allocated through + // it are freed/removed from the BlankNodeManager's set. + EXPECT_TRUE(bnm.usedBlocksSet_.rlock()->empty()); + + // Mock randomIntGenerator to let the block index generation collide. + bnm.randBlockIndex_ = SlowRandomIntGenerator(0, 1); + [[maybe_unused]] auto _ = bnm.allocateBlock(); + for (int i = 0; i < 30; ++i) { + auto block = bnm.allocateBlock(); + bnm.usedBlocksSet_.wlock()->erase(block.blockIdx_); + } +} + +// _____________________________________________________________________________ +TEST(BlankNodeManager, LocalBlankNodeManagerGetID) { + BlankNodeManager bnm(0); + BlankNodeManager::LocalBlankNodeManager l(&bnm); + + // initially the LocalBlankNodeManager doesn't have any blocks + EXPECT_EQ(l.blocks_.size(), 0); + + // A new Block is allocated, if + // no blocks are allocated yet + uint64_t id = l.getId(); + EXPECT_EQ(l.blocks_.size(), 1); + + // or the ids of the last block are all used + l.blocks_.back().nextIdx_ = id + BlankNodeManager::blockSize_; + id = l.getId(); + EXPECT_EQ(l.blocks_.size(), 2); +} + +// _____________________________________________________________________________ +TEST(BlankNodeManager, maxNumOfBlocks) { + // Mock a high `minIndex_` to simulate reduced space in the `usedBlocksSet_` + BlankNodeManager bnm(ValueId::maxIndex - 256 * BlankNodeManager::blockSize_ + + 2); + AD_EXPECT_THROW_WITH_MESSAGE( + [[maybe_unused]] auto _ = bnm.allocateBlock(), + ::testing::HasSubstr( + "Critical high number of blank node blocks in use:")); +} + +// _____________________________________________________________________________ +TEST(BlankNodeManager, moveLocalBlankNodeManager) { + // This ensures that the `blocks_` of the `LocalBlankNodeManager` are moved + // correctly, such that they're freed/removed from the `BlankNodeManager` + // set only once. + BlankNodeManager bnm(0); + EXPECT_NO_THROW({ + BlankNodeManager::LocalBlankNodeManager l1(&bnm); + auto l2(std::move(l1)); + BlankNodeManager::LocalBlankNodeManager l3(&bnm); + l3 = std::move(l2); + }); + EXPECT_TRUE(bnm.usedBlocksSet_.rlock()->empty()); +} + +} // namespace ad_utility diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 6db9886307..bcbee2b48e 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -405,6 +405,8 @@ addLinkAndDiscoverTest(FilterTest engine) addLinkAndDiscoverTest(ResultTest engine) +addLinkAndDiscoverTest(BlankNodeManagerTest) + addLinkAndDiscoverTest(SparqlExpressionGeneratorsTest engine) addLinkAndDiscoverTest(UrlParserTest) diff --git a/test/IndexTest.cpp b/test/IndexTest.cpp index 6ed660aae9..ecb72ec189 100644 --- a/test/IndexTest.cpp +++ b/test/IndexTest.cpp @@ -527,3 +527,24 @@ TEST(IndexTest, loggingAndSettingOfParallelParsing) { HasSubstr("but has to be specified")); } } + +TEST(IndexTest, getBlankNodeManager) { + // The `blankNodeManager_` is initialized after initializing the Index itself. + // Therefore we expect a throw when the getter is called by an + // uninitialized Index. + Index index{ad_utility::makeUnlimitedAllocator()}; + EXPECT_ANY_THROW(index.getBlankNodeManager()); + + // Index is initialized -> no throw + const Index& index2 = getQec("")->getIndex(); + EXPECT_NO_THROW(index2.getBlankNodeManager()); + + // Given an Index, ensure that the BlankNodeManager's `minIndex_` is set to + // the number of blank nodes the Index is initialized with. + std::string kb = + "_:a .\n" + "_:b .\n" + "_:c ."; + const Index& index3 = getQec(kb)->getIndex(); + EXPECT_EQ(index3.getBlankNodeManager()->minIndex_, 3); +} diff --git a/test/LocalVocabTest.cpp b/test/LocalVocabTest.cpp index d381c05c8d..a9058d3a68 100644 --- a/test/LocalVocabTest.cpp +++ b/test/LocalVocabTest.cpp @@ -369,3 +369,12 @@ TEST(LocalVocab, propagation) { // TODO Maybe add tests for the new TextIndexScanFor... classes, // they never introduce any local vocab. } + +// _____________________________________________________________________________ +TEST(LocalVocab, getBlankNodeIndex) { + ad_utility::BlankNodeManager bnm(0); + LocalVocab v; + BlankNodeIndex a = v.getBlankNodeIndex(&bnm); + BlankNodeIndex b = v.getBlankNodeIndex(&bnm); + EXPECT_NE(a, b); +} diff --git a/test/ServiceTest.cpp b/test/ServiceTest.cpp index 7280b96043..ff66ca9938 100644 --- a/test/ServiceTest.cpp +++ b/test/ServiceTest.cpp @@ -10,6 +10,8 @@ #include #include "engine/Service.h" +#include "global/Constants.h" +#include "global/IndexTypes.h" #include "global/RuntimeParameters.h" #include "gmock/gmock.h" #include "parser/GraphPatternOperation.h" @@ -558,61 +560,79 @@ TEST_F(ServiceTest, getCacheKey) { EXPECT_NE(baseCacheKey, silentService.getCacheKey()); } -// Test that bindingToValueId behaves as expected. +// Test that bindingToTripleComponent behaves as expected. TEST_F(ServiceTest, bindingToTripleComponent) { - Index::Vocab vocabulary; - nlohmann::json binding; + ad_utility::HashMap blankNodeMap; + parsedQuery::Service parsedServiceClause{ + {Variable{"?x"}, Variable{"?y"}}, + TripleComponent::Iri::fromIriref(""), + "PREFIX doof: ", + "{ }", + false}; + Service service{testQec, parsedServiceClause}; + LocalVocab localVocab{}; + + auto bTTC = [&service, &blankNodeMap, + &localVocab](const nlohmann::json& binding) -> TripleComponent { + return service.bindingToTripleComponent(binding, blankNodeMap, &localVocab); + }; // Missing type or value. - AD_EXPECT_THROW_WITH_MESSAGE( - Service::bindingToTripleComponent({{"type", "literal"}}), - ::testing::HasSubstr("Missing type or value")); - AD_EXPECT_THROW_WITH_MESSAGE( - Service::bindingToTripleComponent({{"value", "v"}}), - ::testing::HasSubstr("Missing type or value")); + AD_EXPECT_THROW_WITH_MESSAGE(bTTC({{"type", "literal"}}), + ::testing::HasSubstr("Missing type or value")); + AD_EXPECT_THROW_WITH_MESSAGE(bTTC({{"value", "v"}}), + ::testing::HasSubstr("Missing type or value")); EXPECT_EQ( - Service::bindingToTripleComponent( - {{"type", "literal"}, {"value", "42"}, {"datatype", XSD_INT_TYPE}}), + bTTC({{"type", "literal"}, {"value", "42"}, {"datatype", XSD_INT_TYPE}}), 42); EXPECT_EQ( - Service::bindingToTripleComponent( - {{"type", "literal"}, {"value", "Hallo Welt"}, {"xml:lang", "de"}}), + bTTC({{"type", "literal"}, {"value", "Hallo Welt"}, {"xml:lang", "de"}}), TripleComponent::Literal::literalWithoutQuotes("Hallo Welt", "@de")); - EXPECT_EQ(Service::bindingToTripleComponent( - {{"type", "literal"}, {"value", "Hello World"}}), + EXPECT_EQ(bTTC({{"type", "literal"}, {"value", "Hello World"}}), TripleComponent::Literal::literalWithoutQuotes("Hello World")); // Test literals with escape characters (there used to be a bug for those) EXPECT_EQ( - Service::bindingToTripleComponent( - {{"type", "literal"}, {"value", "Hello \\World"}}), + bTTC({{"type", "literal"}, {"value", "Hello \\World"}}), TripleComponent::Literal::fromEscapedRdfLiteral("\"Hello \\\\World\"")); EXPECT_EQ( - Service::bindingToTripleComponent( + bTTC( {{"type", "literal"}, {"value", "Hallo \\Welt"}, {"xml:lang", "de"}}), TripleComponent::Literal::fromEscapedRdfLiteral("\"Hallo \\\\Welt\"", "@de")); - EXPECT_EQ(Service::bindingToTripleComponent( - {{"type", "literal"}, {"value", "a\"b\"c"}}), + EXPECT_EQ(bTTC({{"type", "literal"}, {"value", "a\"b\"c"}}), TripleComponent::Literal::fromEscapedRdfLiteral("\"a\\\"b\\\"c\"")); - EXPECT_EQ(Service::bindingToTripleComponent( - {{"type", "uri"}, {"value", "http://doof.org"}}), + EXPECT_EQ(bTTC({{"type", "uri"}, {"value", "http://doof.org"}}), TripleComponent::Iri::fromIrirefWithoutBrackets("http://doof.org")); - // Blank Node not supported yet. - EXPECT_ANY_THROW( - Service::bindingToTripleComponent({{"type", "bnode"}, {"value", "b"}})); + // Blank Nodes. + EXPECT_EQ(blankNodeMap.size(), 0); + + Id a = + bTTC({{"type", "bnode"}, {"value", "A"}}).toValueIdIfNotString().value(); + Id b = + bTTC({{"type", "bnode"}, {"value", "B"}}).toValueIdIfNotString().value(); + EXPECT_EQ(a.getDatatype(), Datatype::BlankNodeIndex); + EXPECT_EQ(b.getDatatype(), Datatype::BlankNodeIndex); + EXPECT_NE(a, b); + + EXPECT_EQ(blankNodeMap.size(), 2); + + // This BlankNode exists already, known Id will be used. + Id a2 = + bTTC({{"type", "bnode"}, {"value", "A"}}).toValueIdIfNotString().value(); + EXPECT_EQ(a, a2); + // Invalid type -> throw. AD_EXPECT_THROW_WITH_MESSAGE( - Service::bindingToTripleComponent( - {{"type", "INVALID_TYPE"}, {"value", "v"}}), - ::testing::HasSubstr("Type INVALID_TYPE is undefined")); + bTTC({{"type", "INVALID_TYPE"}, {"value", "v"}}), + ::testing::HasSubstr("Type INVALID_TYPE is undefined.")); } // ____________________________________________________________________________