-
Notifications
You must be signed in to change notification settings - Fork 56
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
So far, the buffer size of the parser was hard-coded in `src/index/ConstantsIndexBuilding.h` to `10 MiB`. This was too little for datasets like OSM, which have some very large triples (due to WKT literals of complex geometries). There is now an option `--parser-buffer-size` with which the default buffer size can be overridden. While at it, externalize all IRIs and literals (except for the Qlever-internal ones) by default. It has been the default in all our Qleverfiles for quite some time now.
- Loading branch information
1 parent
6ac2e0e
commit acb6633
Showing
17 changed files
with
145 additions
and
96 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,6 @@ | ||
// Copyright 2023, University of Freiburg, | ||
// Copyright 2023 - 2025, University of Freiburg, | ||
// Chair of Algorithms and Data Structures. | ||
// | ||
// Authors: Björn Buchhold <[email protected]> | ||
// Authors: Björn Buchhold <[email protected]> [2014 - 2017] | ||
// Johannes Kalmbach <[email protected]> | ||
// Hannah Bast <[email protected]> | ||
|
||
|
@@ -22,6 +21,7 @@ using namespace ad_utility::memory_literals; | |
constexpr inline ad_utility::MemorySize DEFAULT_MEMORY_LIMIT_INDEX_BUILDING = | ||
5_GB; | ||
constexpr inline ad_utility::MemorySize STXXL_DISK_SIZE_INDEX_BUILDER = 1_GB; | ||
constexpr inline ad_utility::MemorySize DEFAULT_PARSER_BUFFER_SIZE = 10_MB; | ||
|
||
constexpr inline ad_utility::MemorySize DEFAULT_MEM_FOR_QUERIES = 4_GB; | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,8 +1,8 @@ | ||
// Copyright 2014, University of Freiburg, | ||
// Copyright 2014 - 2025 University of Freiburg | ||
// Chair of Algorithms and Data Structures. | ||
// Author: | ||
// 2014-2017 Björn Buchhold (buchhold@informatik.uni-freiburg.de) | ||
// 2018- Johannes Kalmbach (kalmbach@informatik.uni-freiburg.de) | ||
// Authors: Björn Buchhold <[email protected]> [2014 - 2017] | ||
// Johannes Kalmbach <kalmbach@cs.uni-freiburg.de> | ||
// Hannah Bast <bast@cs.uni-freiburg.de> | ||
|
||
#include <boost/program_options.hpp> | ||
#include <cstdlib> | ||
|
@@ -165,6 +165,7 @@ int main(int argc, char** argv) { | |
bool onlyPsoAndPos = false; | ||
bool addWordsFromLiterals = false; | ||
std::optional<ad_utility::MemorySize> stxxlMemory; | ||
std::optional<ad_utility::MemorySize> parserBufferSize; | ||
optind = 1; | ||
|
||
Index index{ad_utility::makeUnlimitedAllocator<Id>()}; | ||
|
@@ -228,6 +229,9 @@ int main(int argc, char** argv) { | |
add("stxxl-memory,m", po::value(&stxxlMemory), | ||
"The amount of memory in to use for sorting during the index build. " | ||
"Decrease if the index builder runs out of memory."); | ||
add("parser-buffer-size,b", po::value(&parserBufferSize), | ||
"The size of the buffer used for parsing the input files. This must be " | ||
"large enough to hold a single input triple. Default: 10 MB."); | ||
add("keep-temporary-files,k", po::bool_switch(&keepTemporaryFiles), | ||
"Do not delete temporary files from index creation for debugging."); | ||
|
||
|
@@ -249,6 +253,9 @@ int main(int argc, char** argv) { | |
if (stxxlMemory.has_value()) { | ||
index.memoryLimitIndexBuilding() = stxxlMemory.value(); | ||
} | ||
if (parserBufferSize.has_value()) { | ||
index.parserBufferSize() = parserBufferSize.value(); | ||
} | ||
|
||
// If no text index name was specified, take the part of the wordsfile after | ||
// the last slash. | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.