diff --git a/.clang-format b/.clang-format new file mode 100644 index 000000000..c1b90b4cf --- /dev/null +++ b/.clang-format @@ -0,0 +1,254 @@ +--- +Language: Cpp +AccessModifierOffset: -2 +AlignAfterOpenBracket: Align +AlignArrayOfStructures: None +AlignConsecutiveAssignments: + Enabled: false + AcrossEmptyLines: false + AcrossComments: false + AlignCompound: false + PadOperators: true +AlignConsecutiveBitFields: + Enabled: false + AcrossEmptyLines: false + AcrossComments: false + AlignCompound: false + PadOperators: false +AlignConsecutiveDeclarations: + Enabled: false + AcrossEmptyLines: false + AcrossComments: false + AlignCompound: false + PadOperators: false +AlignConsecutiveMacros: + Enabled: false + AcrossEmptyLines: false + AcrossComments: false + AlignCompound: false + PadOperators: false +AlignConsecutiveShortCaseStatements: + Enabled: false + AcrossEmptyLines: false + AcrossComments: false + AlignCaseColons: false +AlignEscapedNewlines: Right +AlignOperands: Align +AlignTrailingComments: + Kind: Always + OverEmptyLines: 0 +AllowAllArgumentsOnNextLine: true +AllowAllParametersOfDeclarationOnNextLine: true +AllowShortBlocksOnASingleLine: Never +AllowShortCaseLabelsOnASingleLine: false +AllowShortEnumsOnASingleLine: true +AllowShortFunctionsOnASingleLine: All +AllowShortIfStatementsOnASingleLine: Never +AllowShortLambdasOnASingleLine: All +AllowShortLoopsOnASingleLine: false +AlwaysBreakAfterReturnType: None +AlwaysBreakBeforeMultilineStrings: false +AlwaysBreakTemplateDeclarations: 'Yes' +AttributeMacros: + - __capability +BinPackArguments: true +BinPackParameters: true +BitFieldColonSpacing: Both +BraceWrapping: + AfterCaseLabel: false + AfterClass: true + AfterControlStatement: Always + AfterEnum: true + AfterFunction: true + AfterNamespace: false + AfterStruct: true + AfterUnion: true + BeforeCatch: true + BeforeElse: true + BeforeLambdaBody: false + BeforeWhile: false + IndentBraces: false + SplitEmptyFunction: true + SplitEmptyRecord: true + SplitEmptyNamespace: true +BreakAfterAttributes: Never +BreakAfterJavaFieldAnnotations: false +BreakArrays: true +BreakBeforeBinaryOperators: None +BreakBeforeConceptDeclarations: Always +BreakBeforeBraces: Custom +BreakBeforeInlineASMColon: OnlyMultiline +BreakBeforeTernaryOperators: false +BreakConstructorInitializers: AfterColon +BreakInheritanceList: BeforeColon +BreakStringLiterals: false +ColumnLimit: 90 +CommentPragmas: '^ IWYU pragma:' +CompactNamespaces: false +ConstructorInitializerIndentWidth: 4 +ContinuationIndentWidth: 4 +Cpp11BracedListStyle: false +DerivePointerAlignment: false +DisableFormat: false +EmptyLineAfterAccessModifier: Never +EmptyLineBeforeAccessModifier: LogicalBlock +ExperimentalAutoDetectBinPacking: false +FixNamespaceComments: false +ForEachMacros: + - foreach + - Q_FOREACH + - BOOST_FOREACH +IfMacros: + - KJ_IF_MAYBE +IncludeBlocks: Preserve +IncludeCategories: + - Regex: '^(<|"(gtest|gmock|isl|json)/)' + Priority: 3 + SortPriority: 0 + CaseSensitive: false + - Regex: '<.+\.h>' # Platform specific headers. + Priority: 1 + - Regex: '<[_[:alnum:]]+>' # C++ headers. + Priority: 2 + - Regex: '<.+/.+>' # Third-party library headers. + Priority: 4 + - Regex: '^"include/' + Priority: 5 + - Regex: '^"mt-kahypar' + Priority: 5 + - Regex: '^"mt-kahypar/partition' + Priority: 5 + - Regex: '^"mt-kahypar/partition/preprocessing' + Priority: 6 + - Regex: '^"mt-kahypar/partition/coarsening' + Priority: 7 + - Regex: '^"mt-kahypar/partition/initial_partitioning' + Priority: 8 + - Regex: '^"mt-kahypar/partition/refinement' + Priority: 9 + - Regex: '^"mt-kahypar/partition/mapping' + Priority: 10 + - Regex: '^"mt-kahypar/datastructures' + Priority: 11 + - Regex: '^"mt-kahypar/parallel' + Priority: 12 + - Regex: '^"mt-kahypar/io' + Priority: 13 + - Regex: '^"mt-kahypar/utils' + Priority: 14 +IncludeIsMainRegex: '(Test)?$' +IncludeIsMainSourceRegex: '' +IndentAccessModifiers: false +IndentCaseBlocks: false +IndentCaseLabels: false +IndentExternBlock: AfterExternBlock +IndentGotoLabels: true +IndentPPDirectives: None +IndentRequiresClause: true +IndentWidth: 2 +IndentWrappedFunctionNames: false +InsertBraces: false +InsertNewlineAtEOF: false +InsertTrailingCommas: None +IntegerLiteralSeparator: + Binary: 0 + BinaryMinDigits: 0 + Decimal: 0 + DecimalMinDigits: 0 + Hex: 0 + HexMinDigits: 0 +JavaScriptQuotes: Leave +JavaScriptWrapImports: true +KeepEmptyLinesAtTheStartOfBlocks: true +KeepEmptyLinesAtEOF: false +LambdaBodyIndentation: Signature +LineEnding: DeriveLF +MacroBlockBegin: '' +MacroBlockEnd: '' +MaxEmptyLinesToKeep: 1 +NamespaceIndentation: None +ObjCBinPackProtocolList: Auto +ObjCBlockIndentWidth: 2 +ObjCBreakBeforeNestedBlockParam: true +ObjCSpaceAfterProperty: false +ObjCSpaceBeforeProtocolList: true +PackConstructorInitializers: BinPack +PenaltyBreakAssignment: 2 +PenaltyBreakBeforeFirstCallParameter: 19 +PenaltyBreakComment: 300 +PenaltyBreakFirstLessLess: 120 +PenaltyBreakOpenParenthesis: 0 +PenaltyBreakString: 1000 +PenaltyBreakTemplateDeclaration: 10 +PenaltyExcessCharacter: 1000000 +PenaltyIndentedWhitespace: 0 +PenaltyReturnTypeOnItsOwnLine: 60 +PointerAlignment: Right +PPIndentWidth: -1 +QualifierAlignment: Leave +ReferenceAlignment: Pointer +ReflowComments: true +RemoveBracesLLVM: false +RemoveParentheses: Leave +RemoveSemicolon: false +RequiresClausePosition: OwnLine +RequiresExpressionIndentation: OuterScope +SeparateDefinitionBlocks: Leave +ShortNamespaceLines: 1 +SortIncludes: CaseSensitive +SortJavaStaticImport: Before +SortUsingDeclarations: LexicographicNumeric +SpaceAfterCStyleCast: false +SpaceAfterLogicalNot: false +SpaceAfterTemplateKeyword: true +SpaceAroundPointerQualifiers: Default +SpaceBeforeAssignmentOperators: true +SpaceBeforeCaseColon: false +SpaceBeforeCpp11BracedList: false +SpaceBeforeCtorInitializerColon: true +SpaceBeforeInheritanceColon: true +SpaceBeforeJsonColon: false +SpaceBeforeParens: Custom +SpaceBeforeParensOptions: + AfterControlStatements: false + AfterForeachMacros: false + AfterFunctionDefinitionName: false + AfterFunctionDeclarationName: false + AfterIfMacros: false + AfterOverloadedOperator: false + AfterRequiresInClause: false + AfterRequiresInExpression: false + BeforeNonEmptyParentheses: false +SpaceBeforeRangeBasedForLoopColon: true +SpaceBeforeSquareBrackets: false +SpaceInEmptyBlock: false +SpacesBeforeTrailingComments: 1 +SpacesInAngles: Never +SpacesInContainerLiterals: true +SpacesInLineCommentPrefix: + Minimum: 1 + Maximum: -1 +SpacesInParens: Never +SpacesInParensOptions: + InCStyleCasts: false + InConditionalStatements: false + InEmptyParentheses: false + Other: false +SpacesInSquareBrackets: false +Standard: c++03 +StatementAttributeLikeMacros: + - Q_EMIT +StatementMacros: + - Q_UNUSED + - QT_REQUIRE_VERSION +TabWidth: 8 +UseTab: Never +VerilogBreakBetweenInstancePorts: true +WhitespaceSensitiveMacros: + - BOOST_PP_STRINGIZE + - CF_SWIFT_NAME + - NS_SWIFT_NAME + - PP_STRINGIZE + - STRINGIZE +... + diff --git a/mt-kahypar/application/mt_kahypar.cc.in b/mt-kahypar/application/mt_kahypar.cc.in index 5cdd7bad2..d21b09ab7 100644 --- a/mt-kahypar/application/mt_kahypar.cc.in +++ b/mt-kahypar/application/mt_kahypar.cc.in @@ -13,8 +13,8 @@ * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. + * The above copyright notice and this permission notice shall be included in + *all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, @@ -31,71 +31,89 @@ #include "mt-kahypar/io/command_line_options.h" #include "mt-kahypar/io/hypergraph_factory.h" #include "mt-kahypar/io/partitioning_output.h" -#include "mt-kahypar/partition/partitioner_facade.h" -#include "mt-kahypar/partition/registries/register_memory_pool.h" #include "mt-kahypar/partition/conversion.h" #include "mt-kahypar/partition/mapping/target_graph.h" +#include "mt-kahypar/partition/partitioner_facade.h" +#include "mt-kahypar/partition/registries/register_memory_pool.h" #include "mt-kahypar/utils/cast.h" #include "mt-kahypar/utils/delete.h" +#include "mt-kahypar/utils/exception.h" #include "mt-kahypar/utils/randomize.h" #include "mt-kahypar/utils/utilities.h" -#include "mt-kahypar/utils/exception.h" using namespace mt_kahypar; #define MT_KAHYPAR_CONFIG_DIR "@PROJECT_SOURCE_DIR@/config/" -static std::string getPresetFile(const Context& context) { - switch ( context.partition.preset_type ) { - case PresetType::deterministic: return std::string(MT_KAHYPAR_CONFIG_DIR) + "deterministic_preset.ini"; - case PresetType::large_k: return std::string(MT_KAHYPAR_CONFIG_DIR) + "large_k_preset.ini"; - case PresetType::default_preset: return std::string(MT_KAHYPAR_CONFIG_DIR) + "default_preset.ini"; - case PresetType::quality: return std::string(MT_KAHYPAR_CONFIG_DIR) + "quality_preset.ini"; - case PresetType::highest_quality: return std::string(MT_KAHYPAR_CONFIG_DIR) + "highest_quality_preset.ini"; - case PresetType::UNDEFINED: return ""; +static std::string getPresetFile(const Context &context) +{ + switch(context.partition.preset_type) + { + case PresetType::deterministic: + return std::string(MT_KAHYPAR_CONFIG_DIR) + "deterministic_preset.ini"; + case PresetType::large_k: + return std::string(MT_KAHYPAR_CONFIG_DIR) + "large_k_preset.ini"; + case PresetType::default_preset: + return std::string(MT_KAHYPAR_CONFIG_DIR) + "default_preset.ini"; + case PresetType::quality: + return std::string(MT_KAHYPAR_CONFIG_DIR) + "quality_preset.ini"; + case PresetType::highest_quality: + return std::string(MT_KAHYPAR_CONFIG_DIR) + "highest_quality_preset.ini"; + case PresetType::UNDEFINED: + return ""; } return ""; } -int main(int argc, char* argv[]) { +int main(int argc, char *argv[]) +{ Context context(false); processCommandLineInput(context, argc, argv); - if ( context.partition.preset_file == "" ) { - if ( context.partition.preset_type != PresetType::UNDEFINED ) { - // Only a preset type specified => load context from corresponding ini file + if(context.partition.preset_file == "") + { + if(context.partition.preset_type != PresetType::UNDEFINED) + { + // Only a preset type specified => load context from corresponding ini + // file context.partition.preset_file = getPresetFile(context); processCommandLineInput(context, argc, argv); - } else { + } + else + { throw InvalidInputException("No preset specified"); } } // Determine instance (graph or hypergraph) and partition type - if ( context.partition.instance_type == InstanceType::UNDEFINED ) { + if(context.partition.instance_type == InstanceType::UNDEFINED) + { context.partition.instance_type = to_instance_type(context.partition.file_format); } - context.partition.partition_type = to_partition_c_type( - context.partition.preset_type, context.partition.instance_type); - + context.partition.partition_type = + to_partition_c_type(context.partition.preset_type, context.partition.instance_type); context.utility_id = utils::Utilities::instance().registerNewUtilityObjects(); - if (context.partition.verbose_output) { + if(context.partition.verbose_output) + { io::printBanner(); } utils::Randomize::instance().setSeed(context.partition.seed); - if ( context.shared_memory.use_localized_random_shuffle ) { + if(context.shared_memory.use_localized_random_shuffle) + { utils::Randomize::instance().enableLocalizedParallelShuffle( - context.shared_memory.shuffle_block_size); + context.shared_memory.shuffle_block_size); } size_t num_available_cpus = HardwareTopology::instance().num_cpus(); - if ( num_available_cpus < context.shared_memory.num_threads ) { + if(num_available_cpus < context.shared_memory.num_threads) + { WARNING("There are currently only" << num_available_cpus << "cpus available." - << "Setting number of threads from" << context.shared_memory.num_threads - << "to" << num_available_cpus); + << "Setting number of threads from" + << context.shared_memory.num_threads << "to" + << num_available_cpus); context.shared_memory.num_threads = num_available_cpus; } @@ -109,31 +127,35 @@ int main(int argc, char* argv[]) { hwloc_bitmap_free(cpuset); // Read Hypergraph - utils::Timer& timer = - utils::Utilities::instance().getTimer(context.utility_id); + utils::Timer &timer = utils::Utilities::instance().getTimer(context.utility_id); timer.start_timer("io_hypergraph", "I/O Hypergraph"); - mt_kahypar_hypergraph_t hypergraph = io::readInputFile( - context.partition.graph_filename, context.partition.preset_type, - context.partition.instance_type, context.partition.file_format, - context.preprocessing.stable_construction_of_incident_edges); + mt_kahypar_hypergraph_t hypergraph = + io::readInputFile(context.partition.graph_filename, context.partition.preset_type, + context.partition.instance_type, context.partition.file_format, + context.preprocessing.stable_construction_of_incident_edges); timer.stop_timer("io_hypergraph"); // Read Target Graph std::unique_ptr target_graph; - if ( context.partition.objective == Objective::steiner_tree ) { - if ( context.mapping.target_graph_file != "" ) { - target_graph = std::make_unique( - io::readInputFile( + if(context.partition.objective == Objective::steiner_tree) + { + if(context.mapping.target_graph_file != "") + { + target_graph = std::make_unique(io::readInputFile( context.mapping.target_graph_file, FileFormat::Metis, true)); - } else { - throw InvalidInputException("No target graph file specified (use -g or --target-graph-file=)!"); + } + else + { + throw InvalidInputException("No target graph file specified (use -g " + " or --target-graph-file=)!"); } } - if ( context.partition.fixed_vertex_filename != "" ) { + if(context.partition.fixed_vertex_filename != "") + { timer.start_timer("read_fixed_vertices", "Read Fixed Vertex File"); - io::addFixedVerticesFromFile(hypergraph, - context.partition.fixed_vertex_filename, context.partition.k); + io::addFixedVerticesFromFile(hypergraph, context.partition.fixed_vertex_filename, + context.partition.k); timer.stop_timer("read_fixed_vertices"); } @@ -143,27 +165,32 @@ int main(int argc, char* argv[]) { // Partition Hypergraph HighResClockTimepoint start = std::chrono::high_resolution_clock::now(); mt_kahypar_partitioned_hypergraph_t partitioned_hypergraph = - PartitionerFacade::partition(hypergraph, context, target_graph.get()); + PartitionerFacade::partition(hypergraph, context, target_graph.get()); HighResClockTimepoint end = std::chrono::high_resolution_clock::now(); // Print Stats std::chrono::duration elapsed_seconds(end - start); - PartitionerFacade::printPartitioningResults( - partitioned_hypergraph, context, elapsed_seconds); - - if ( context.partition.sp_process_output ) { - std::cout << PartitionerFacade::serializeResultLine( - partitioned_hypergraph, context, elapsed_seconds) << std::endl; + PartitionerFacade::printPartitioningResults(partitioned_hypergraph, context, + elapsed_seconds); + + if(context.partition.sp_process_output) + { + std::cout << PartitionerFacade::serializeResultLine(partitioned_hypergraph, context, + elapsed_seconds) + << std::endl; } - if ( context.partition.csv_output ) { - std::cout << PartitionerFacade::serializeCSV( - partitioned_hypergraph, context, elapsed_seconds) << std::endl; + if(context.partition.csv_output) + { + std::cout << PartitionerFacade::serializeCSV(partitioned_hypergraph, context, + elapsed_seconds) + << std::endl; } - if (context.partition.write_partition_file) { - PartitionerFacade::writePartitionFile( - partitioned_hypergraph, context.partition.graph_partition_filename); + if(context.partition.write_partition_file) + { + PartitionerFacade::writePartitionFile(partitioned_hypergraph, + context.partition.graph_partition_filename); } parallel::MemoryPool::instance().free_memory_chunks(); diff --git a/mt-kahypar/datastructures/array.h b/mt-kahypar/datastructures/array.h index 92a9c1da9..ca96f7f35 100644 --- a/mt-kahypar/datastructures/array.h +++ b/mt-kahypar/datastructures/array.h @@ -13,8 +13,8 @@ * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. + * The above copyright notice and this permission notice shall be included in + *all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, @@ -27,13 +27,13 @@ #pragma once -#include -#include #include +#include +#include +#include "tbb//parallel_invoke.h" #include "tbb/parallel_for.h" #include "tbb/scalable_allocator.h" -#include "tbb//parallel_invoke.h" #include "mt-kahypar/macros.h" #include "mt-kahypar/parallel/memory_pool.h" @@ -44,165 +44,142 @@ namespace mt_kahypar { namespace ds { template -class Array { +class Array +{ - class ArrayIterator { + class ArrayIterator + { - public: - using iterator_category = std::random_access_iterator_tag; - using value_type = T; - using reference = T&; - using pointer = T*; - using difference_type = std::ptrdiff_t; + public: + using iterator_category = std::random_access_iterator_tag; + using value_type = T; + using reference = T &; + using pointer = T *; + using difference_type = std::ptrdiff_t; - ArrayIterator() : _ptr(nullptr) { } - ArrayIterator(T* ptr) : _ptr(ptr) { } - ArrayIterator(const ArrayIterator& other) : _ptr(other._ptr) { } + ArrayIterator() : _ptr(nullptr) {} + ArrayIterator(T *ptr) : _ptr(ptr) {} + ArrayIterator(const ArrayIterator &other) : _ptr(other._ptr) {} - reference operator*() const { - return *_ptr; - } + reference operator*() const { return *_ptr; } - pointer operator->() const { - return _ptr; - } + pointer operator->() const { return _ptr; } - ArrayIterator& operator++() { - ++_ptr; - return *this; - } + ArrayIterator &operator++() + { + ++_ptr; + return *this; + } - ArrayIterator& operator--() { - --_ptr; - return *this; - } + ArrayIterator &operator--() + { + --_ptr; + return *this; + } - ArrayIterator operator++(int) { - ArrayIterator tmp_it(_ptr); - ++_ptr; - return tmp_it; - } + ArrayIterator operator++(int) + { + ArrayIterator tmp_it(_ptr); + ++_ptr; + return tmp_it; + } - ArrayIterator operator--(int) { - ArrayIterator tmp_it(_ptr); - --_ptr; - return tmp_it; - } + ArrayIterator operator--(int) + { + ArrayIterator tmp_it(_ptr); + --_ptr; + return tmp_it; + } - ArrayIterator operator+(const difference_type& n) const { - return ArrayIterator(_ptr + n); - } + ArrayIterator operator+(const difference_type &n) const + { + return ArrayIterator(_ptr + n); + } - ArrayIterator& operator+=(const difference_type& n) { - _ptr += n; - return *this; - } + ArrayIterator &operator+=(const difference_type &n) + { + _ptr += n; + return *this; + } - ArrayIterator operator-(const difference_type& n) const { - return ArrayIterator(_ptr - n); - } + ArrayIterator operator-(const difference_type &n) const + { + return ArrayIterator(_ptr - n); + } - ArrayIterator& operator-=(const difference_type& n) { - _ptr -= n; - return *this; - } + ArrayIterator &operator-=(const difference_type &n) + { + _ptr -= n; + return *this; + } - reference operator[](const difference_type& n) const { - return _ptr[n]; - } + reference operator[](const difference_type &n) const { return _ptr[n]; } - bool operator==(const ArrayIterator& other) const { - return _ptr == other._ptr; - } + bool operator==(const ArrayIterator &other) const { return _ptr == other._ptr; } - bool operator!=(const ArrayIterator& other) const { - return _ptr != other._ptr; - } + bool operator!=(const ArrayIterator &other) const { return _ptr != other._ptr; } - bool operator<(const ArrayIterator& other) const { - return _ptr < other._ptr; - } + bool operator<(const ArrayIterator &other) const { return _ptr < other._ptr; } - bool operator>(const ArrayIterator& other) const { - return _ptr > other._ptr; - } + bool operator>(const ArrayIterator &other) const { return _ptr > other._ptr; } - bool operator<=(const ArrayIterator& other) const { - return _ptr <= other._ptr; - } + bool operator<=(const ArrayIterator &other) const { return _ptr <= other._ptr; } - bool operator>=(const ArrayIterator& other) const { - return _ptr >= other._ptr; - } + bool operator>=(const ArrayIterator &other) const { return _ptr >= other._ptr; } - difference_type operator+(const ArrayIterator& other) const { - return ( _ptr + other._ptr ); - } - - difference_type operator-(const ArrayIterator& other) const { - return (_ptr - other._ptr); - } + difference_type operator+(const ArrayIterator &other) const + { + return (_ptr + other._ptr); + } - private: - T* _ptr; + difference_type operator-(const ArrayIterator &other) const + { + return (_ptr - other._ptr); + } + private: + T *_ptr; }; - public: - +public: // Type Traits - using value_type = T; - using size_type = size_t; - using reference = T&; - using const_reference = const T&; - using iterator = ArrayIterator; - using const_iterator = const ArrayIterator; - - Array() : - _group(""), - _key(""), - _size(0), - _data(nullptr), - _underlying_data(nullptr) { } - - Array(const size_type size, - const value_type init_value = value_type()) : - _group(""), - _key(""), - _size(0), - _data(nullptr), - _underlying_data(nullptr) { + using value_type = T; + using size_type = size_t; + using reference = T &; + using const_reference = const T &; + using iterator = ArrayIterator; + using const_iterator = const ArrayIterator; + + Array() : _group(""), _key(""), _size(0), _data(nullptr), _underlying_data(nullptr) {} + + Array(const size_type size, const value_type init_value = value_type()) : + _group(""), _key(""), _size(0), _data(nullptr), _underlying_data(nullptr) + { resize(size, init_value); } - Array(const std::string& group, - const std::string& key, - const size_type size, - const bool zero_initialize = false, - const bool assign_parallel = true) : - _group(""), - _key(""), - _size(size), - _data(nullptr), - _underlying_data(nullptr) { + Array(const std::string &group, const std::string &key, const size_type size, + const bool zero_initialize = false, const bool assign_parallel = true) : + _group(""), + _key(""), _size(size), _data(nullptr), _underlying_data(nullptr) + { resize(group, key, size, zero_initialize, assign_parallel); } - Array(const Array&) = delete; - Array & operator= (const Array &) = delete; + Array(const Array &) = delete; + Array &operator=(const Array &) = delete; - Array(Array&& other) : - _group(std::move(other._group)), - _key(std::move(other._key)), - _size(other._size), - _data(std::move(other._data)), - _underlying_data(std::move(other._underlying_data)) { + Array(Array &&other) : + _group(std::move(other._group)), _key(std::move(other._key)), _size(other._size), + _data(std::move(other._data)), _underlying_data(std::move(other._underlying_data)) + { other._size = 0; other._data = nullptr; other._underlying_data = nullptr; } - Array & operator=(Array&& other) { + Array &operator=(Array &&other) + { _group = std::move(other._group); _key = std::move(other._key); _size = other._size; @@ -214,8 +191,10 @@ class Array { return *this; } - ~Array() { - if ( !_data && _underlying_data && !_group.empty() && !_key.empty() ) { + ~Array() + { + if(!_data && _underlying_data && !_group.empty() && !_key.empty()) + { // Memory was allocated from memory pool // => Release Memory parallel::MemoryPool::instance().release_mem_chunk(_group, _key); @@ -225,148 +204,173 @@ class Array { // ####################### Access Operators ####################### // ! Returns a reference to the element at specified location pos - reference operator[](const size_type pos) { + reference operator[](const size_type pos) + { ASSERT(pos < _size); return _underlying_data[pos]; } // ! Returns a reference to the element at specified location pos - const_reference operator[](const size_type pos) const { + const_reference operator[](const size_type pos) const + { ASSERT(pos < _size); return _underlying_data[pos]; } - reference back() { + reference back() + { ASSERT(_underlying_data && _size > 0); return _underlying_data[_size - 1]; } - const_reference back() const { + const_reference back() const + { ASSERT(_underlying_data && _size > 0); return _underlying_data[_size - 1]; } - value_type* data() { + value_type *data() + { ASSERT(_underlying_data); return _underlying_data; } - const value_type* data() const { + const value_type *data() const + { ASSERT(_underlying_data); return _underlying_data; } // ####################### Iterators ####################### - iterator begin() { + iterator begin() + { ASSERT(_underlying_data); return iterator(_underlying_data); } - const_iterator cbegin() const { + const_iterator cbegin() const + { ASSERT(_underlying_data); return const_iterator(_underlying_data); } - iterator end() { + iterator end() + { ASSERT(_underlying_data); return iterator(_underlying_data + _size); } - const_iterator cend() const { + const_iterator cend() const + { ASSERT(_underlying_data); return const_iterator(_underlying_data + _size); } // ####################### Capacity ####################### - bool empty() const { - return _size == 0; - } + bool empty() const { return _size == 0; } - size_type size() const { - return _size; - } + size_type size() const { return _size; } // ####################### Initialization ####################### - void resize(const size_type size, - const value_type init_value = value_type(), - const bool assign_parallel = true) { - if ( _data || _underlying_data ) { + void resize(const size_type size, const value_type init_value = value_type(), + const bool assign_parallel = true) + { + if(_data || _underlying_data) + { throw SystemException("Memory of vector already allocated"); } allocate_data(size); assign(size, init_value, assign_parallel); } - void resizeNoAssign(const size_type size) { - if ( _data || _underlying_data ) { + void resizeNoAssign(const size_type size) + { + if(_data || _underlying_data) + { throw SystemException("Memory of vector already allocated"); } allocate_data(size); } - void resize(const std::string& group, - const std::string& key, - const size_type size, - const bool zero_initialize = false, - const bool assign_parallel = true) { + void resize(const std::string &group, const std::string &key, const size_type size, + const bool zero_initialize = false, const bool assign_parallel = true) + { _size = size; - char* data = parallel::MemoryPool::instance().request_mem_chunk( - group, key, size, sizeof(value_type)); - if ( data ) { + char *data = parallel::MemoryPool::instance().request_mem_chunk(group, key, size, + sizeof(value_type)); + if(data) + { _group = group; _key = key; - _underlying_data = reinterpret_cast(data); - if ( zero_initialize ) { + _underlying_data = reinterpret_cast(data); + if(zero_initialize) + { assign(size, value_type(), assign_parallel); } - } else { + } + else + { resize_with_unused_memory(size, zero_initialize, assign_parallel); } } - void resize_with_unused_memory(const size_type size, - const bool zero_initialize = false, - const bool assign_parallel = true) { + void resize_with_unused_memory(const size_type size, const bool zero_initialize = false, + const bool assign_parallel = true) + { _size = size; - char* data = parallel::MemoryPool::instance().request_unused_mem_chunk(size, sizeof(value_type)); - if ( data ) { - _underlying_data = reinterpret_cast(data); - if ( zero_initialize ) { + char *data = parallel::MemoryPool::instance().request_unused_mem_chunk( + size, sizeof(value_type)); + if(data) + { + _underlying_data = reinterpret_cast(data); + if(zero_initialize) + { assign(size, value_type(), assign_parallel); } - } else { + } + else + { resize(size, value_type(), assign_parallel); } } // ! Replaces the contents of the container - void assign(const size_type count, - const value_type value, - const bool assign_parallel = true) { - if ( _underlying_data ) { + void assign(const size_type count, const value_type value, + const bool assign_parallel = true) + { + if(_underlying_data) + { ASSERT(count <= _size); - if ( assign_parallel ) { + if(assign_parallel) + { const size_t step = std::max(count / std::thread::hardware_concurrency(), UL(1)); tbb::parallel_for(UL(0), count, step, [&](const size_type i) { - for ( size_t j = i; j < std::min(i + step, count); ++j ) { + for(size_t j = i; j < std::min(i + step, count); ++j) + { _underlying_data[j] = value; } }); - } else { - for ( size_t i = 0; i < count; ++i ) { + } + else + { + for(size_t i = 0; i < count; ++i) + { _underlying_data[i] = value; } } - } else { + } + else + { resize(count, value, assign_parallel); } } - private: - void allocate_data(const size_type size) { +private: + void allocate_data(const size_type size) + { _data = parallel::make_unique(size); _underlying_data = _data.get(); _size = size; @@ -376,118 +380,61 @@ class Array { std::string _key; size_type _size; parallel::tbb_unique_ptr _data; - value_type* _underlying_data; + value_type *_underlying_data; }; - -} // namespace ds - +} // namespace ds namespace parallel { - template - static inline void free(ds::Array& vec) { - ds::Array tmp_vec; - vec = std::move(tmp_vec); - } - - template - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE static void parallel_free(ds::Array& vec1, - ds::Array& vec2) { - tbb::parallel_invoke([&] { - free(vec1); - }, [&] { - free(vec2); - }); - } - - template - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE static void parallel_free(ds::Array& vec1, - ds::Array& vec2, - ds::Array& vec3) { - tbb::parallel_invoke([&] { - free(vec1); - }, [&] { - free(vec2); - }, [&] { - free(vec3); - }); - } - - template - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE static void parallel_free(ds::Array& vec1, - ds::Array& vec2, - ds::Array& vec3, - ds::Array& vec4) { - tbb::parallel_invoke([&] { - free(vec1); - }, [&] { - free(vec2); - }, [&] { - free(vec3); - }, [&] { - free(vec4); - }); - } +template +static inline void free(ds::Array &vec) +{ + ds::Array tmp_vec; + vec = std::move(tmp_vec); +} +template +MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE static void parallel_free(ds::Array &vec1, + ds::Array &vec2) +{ + tbb::parallel_invoke([&] { free(vec1); }, [&] { free(vec2); }); +} - template - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE static void parallel_free(ds::Array& vec1, - ds::Array& vec2, - ds::Array& vec3, - ds::Array& vec4, - ds::Array& vec5) { - tbb::parallel_invoke([&] { - free(vec1); - }, [&] { - free(vec2); - }, [&] { - free(vec3); - }, [&] { - free(vec4); - }, [&] { - free(vec5); - }); - } +template +MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE static void +parallel_free(ds::Array &vec1, ds::Array &vec2, ds::Array &vec3) +{ + tbb::parallel_invoke([&] { free(vec1); }, [&] { free(vec2); }, [&] { free(vec3); }); +} - template - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE static void parallel_free(ds::Array& vec1, - ds::Array& vec2, - ds::Array& vec3, - ds::Array& vec4, - ds::Array& vec5, - ds::Array& vec6) { - tbb::parallel_invoke([&] { - free(vec1); - }, [&] { - free(vec2); - }, [&] { - free(vec3); - }, [&] { - free(vec4); - }, [&] { - free(vec5); - }, [&] { - free(vec6); - }); - } +template +MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE static void +parallel_free(ds::Array &vec1, ds::Array &vec2, ds::Array &vec3, + ds::Array &vec4) +{ + tbb::parallel_invoke([&] { free(vec1); }, [&] { free(vec2); }, [&] { free(vec3); }, + [&] { free(vec4); }); +} +template +MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE static void +parallel_free(ds::Array &vec1, ds::Array &vec2, ds::Array &vec3, + ds::Array &vec4, ds::Array &vec5) +{ + tbb::parallel_invoke([&] { free(vec1); }, [&] { free(vec2); }, [&] { free(vec3); }, + [&] { free(vec4); }, [&] { free(vec5); }); +} +template +MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE static void +parallel_free(ds::Array &vec1, ds::Array &vec2, ds::Array &vec3, + ds::Array &vec4, ds::Array &vec5, ds::Array &vec6) +{ + tbb::parallel_invoke([&] { free(vec1); }, [&] { free(vec2); }, [&] { free(vec3); }, + [&] { free(vec4); }, [&] { free(vec5); }, [&] { free(vec6); }); } -} // namespace mt_kahypar +} // namespace parallel + +} // namespace mt_kahypar diff --git a/mt-kahypar/datastructures/bitset.h b/mt-kahypar/datastructures/bitset.h index 7a27762e3..546a68998 100644 --- a/mt-kahypar/datastructures/bitset.h +++ b/mt-kahypar/datastructures/bitset.h @@ -31,6 +31,7 @@ #include "mt-kahypar/macros.h" #include "mt-kahypar/parallel/stl/scalable_vector.h" +#include "mt-kahypar/utils/bit_ops.h" namespace mt_kahypar { namespace ds { @@ -38,7 +39,8 @@ namespace ds { // Forward Declaration class StaticBitset; -class Bitset { +class Bitset +{ using Block = uint64_t; static constexpr int BITS_PER_BLOCK = std::numeric_limits::digits; @@ -46,67 +48,63 @@ class Bitset { static constexpr Block MOD_MASK = BITS_PER_BLOCK - 1; static constexpr Block DIV_SHIFT = utils::log2(BITS_PER_BLOCK); - public: - explicit Bitset() : - _size(0), - _bitset() { } +public: + explicit Bitset() : _size(0), _bitset() {} - explicit Bitset(const size_t size) : - _size(size), - _bitset() { - _bitset.assign(( size >> DIV_SHIFT ) + ( ( size & MOD_MASK ) != 0 ), 0); + explicit Bitset(const size_t size) : _size(size), _bitset() + { + _bitset.assign((size >> DIV_SHIFT) + ((size & MOD_MASK) != 0), 0); } - Bitset(const Bitset&) = delete; - Bitset & operator= (const Bitset &) = delete; - Bitset(Bitset&&) = default; - Bitset & operator= (Bitset &&) = default; + Bitset(const Bitset &) = delete; + Bitset &operator=(const Bitset &) = delete; + Bitset(Bitset &&) = default; + Bitset &operator=(Bitset &&) = default; - size_t numBlocks() const { - return _bitset.size(); - } + size_t numBlocks() const { return _bitset.size(); } - const Block* data() const { - return _bitset.data(); - } + const Block *data() const { return _bitset.data(); } - void reset() { - memset(_bitset.data(), 0, sizeof(Block) * _bitset.size()); - } + void reset() { memset(_bitset.data(), 0, sizeof(Block) * _bitset.size()); } - void resize(const size_t size) { + void resize(const size_t size) + { _size = size; - _bitset.assign(( size >> DIV_SHIFT ) + ( ( size & MOD_MASK ) != 0 ), 0); + _bitset.assign((size >> DIV_SHIFT) + ((size & MOD_MASK) != 0), 0); } - void copy(const size_t num_blocks, const Block* blocks) { + void copy(const size_t num_blocks, const Block *blocks) + { _size = num_blocks * BITS_PER_BLOCK; _bitset.resize(num_blocks); std::memcpy(_bitset.data(), blocks, sizeof(Block) * num_blocks); } - bool isSet(const size_t pos) { + bool isSet(const size_t pos) + { ASSERT(pos < _size); const size_t block_idx = pos >> DIV_SHIFT; // pos / BITS_PER_BLOCK; - const size_t idx = pos & MOD_MASK; // pos % BITS_PER_BLOCK; - return ( _bitset[block_idx] >> idx ) & UL(1); + const size_t idx = pos & MOD_MASK; // pos % BITS_PER_BLOCK; + return (_bitset[block_idx] >> idx) & UL(1); } - void set(const size_t pos) { + void set(const size_t pos) + { ASSERT(pos < _size); const size_t block_idx = pos >> DIV_SHIFT; // pos / BITS_PER_BLOCK; - const size_t idx = pos & MOD_MASK; // pos % BITS_PER_BLOCK; + const size_t idx = pos & MOD_MASK; // pos % BITS_PER_BLOCK; _bitset[block_idx] |= (static_cast(1) << idx); } - void unset(const size_t pos) { + void unset(const size_t pos) + { ASSERT(pos < _size); const size_t block_idx = pos >> DIV_SHIFT; // pos / BITS_PER_BLOCK; - const size_t idx = pos & MOD_MASK; // pos % BITS_PER_BLOCK; + const size_t idx = pos & MOD_MASK; // pos % BITS_PER_BLOCK; _bitset[block_idx] &= ~(static_cast(1) << idx); } - private: +private: friend class StaticBitset; size_t _size; diff --git a/mt-kahypar/datastructures/buffered_vector.h b/mt-kahypar/datastructures/buffered_vector.h index 86030a68b..ad02c8cf5 100644 --- a/mt-kahypar/datastructures/buffered_vector.h +++ b/mt-kahypar/datastructures/buffered_vector.h @@ -27,81 +27,92 @@ #pragma once -#include -#include #include +#include +#include namespace mt_kahypar::ds { -template -class BufferedVector { +template +class BufferedVector +{ public: - using vec_t = std::vector>; + using vec_t = std::vector >; BufferedVector(size_t max_size) : - data(max_size, T()), - buffers([&] { vec_t x; x.reserve(MAX_BUFFER_SIZE); return x; }) - { } + data(max_size, T()), buffers([&] { + vec_t x; + x.reserve(MAX_BUFFER_SIZE); + return x; + }) + { + } - void clear() { + void clear() + { back.store(0, std::memory_order_relaxed); - assert(std::all_of(buffers.begin(), buffers.end(), [&](vec_t& x) { return x.empty(); })); + assert( + std::all_of(buffers.begin(), buffers.end(), [&](vec_t &x) { return x.empty(); })); } - size_t size() const { - return back.load(std::memory_order_relaxed); - } + size_t size() const { return back.load(std::memory_order_relaxed); } - size_t capacity() const { - return data.size(); - } + size_t capacity() const { return data.size(); } - void adapt_capacity(size_t sz) { - if (sz > data.size()) { + void adapt_capacity(size_t sz) + { + if(sz > data.size()) + { data.resize(sz, T()); } } - void push_back_atomic(const T& element) { + void push_back_atomic(const T &element) + { size_t pos = back.fetch_add(1, std::memory_order_relaxed); assert(pos < data.size()); data[pos] = element; } - void push_back_buffered(const T& element) { - vec_t& buffer = buffers.local(); + void push_back_buffered(const T &element) + { + vec_t &buffer = buffers.local(); buffer.push_back(element); - if (buffer.size() == MAX_BUFFER_SIZE) { + if(buffer.size() == MAX_BUFFER_SIZE) + { flush_buffer(buffer); } } - void finalize() { - for (vec_t& buffer : buffers) { + void finalize() + { + for(vec_t &buffer : buffers) + { flush_buffer(buffer); } } auto begin() { return data.begin(); } auto end() { return data.begin() + size(); } - T& operator[](size_t pos) { return data[pos]; } - const T& operator[](size_t pos) const { return data[pos]; } + T &operator[](size_t pos) { return data[pos]; } + const T &operator[](size_t pos) const { return data[pos]; } - - struct RandomAccessRange { + struct RandomAccessRange + { size_t actual_size; - const vec_t& data_ref; - const T& operator[](size_t i) const { return data_ref[i]; } + const vec_t &data_ref; + const T &operator[](size_t i) const { return data_ref[i]; } size_t size() const { return actual_size; } }; RandomAccessRange range() const { return { size(), data }; } - const vec_t& getData() { return data; } + const vec_t &getData() { return data; } private: - - void flush_buffer(vec_t& buffer) { - if (!buffer.empty()) { + void flush_buffer(vec_t &buffer) + { + if(!buffer.empty()) + { size_t pos = back.fetch_add(buffer.size(), std::memory_order_relaxed); assert(pos + buffer.size() - 1 < data.size()); std::copy_n(buffer.begin(), buffer.size(), data.begin() + pos); @@ -110,7 +121,7 @@ class BufferedVector { } vec_t data; - std::atomic back{0}; + std::atomic back{ 0 }; tbb::enumerable_thread_specific buffers; static constexpr size_t MAX_BUFFER_SIZE = 1024; }; diff --git a/mt-kahypar/datastructures/concurrent_bucket_map.h b/mt-kahypar/datastructures/concurrent_bucket_map.h index 1cd2b41d7..26c140b79 100644 --- a/mt-kahypar/datastructures/concurrent_bucket_map.h +++ b/mt-kahypar/datastructures/concurrent_bucket_map.h @@ -28,8 +28,8 @@ #pragma once #include -#include #include +#include #include "tbb/task_arena.h" #include "tbb/task_group.h" @@ -37,8 +37,8 @@ #include "kahypar-resources/meta/mandatory.h" #include "mt-kahypar/macros.h" -#include "mt-kahypar/parallel/stl/scalable_vector.h" #include "mt-kahypar/parallel/atomic_wrapper.h" +#include "mt-kahypar/parallel/stl/scalable_vector.h" namespace mt_kahypar { namespace ds { @@ -59,93 +59,96 @@ namespace ds { * key must be of type uint64_t. */ template -class ConcurrentBucketMap { +class ConcurrentBucketMap +{ static constexpr bool debug = false; static constexpr size_t BUCKET_FACTOR = 128; using Bucket = parallel::scalable_vector; - public: - +public: ConcurrentBucketMap() : - _num_buckets(align_to_next_power_of_two(BUCKET_FACTOR * std::thread::hardware_concurrency())), - _mod_mask(_num_buckets - 1), - _spin_locks(_num_buckets), - _buckets(_num_buckets) { } - - ConcurrentBucketMap(const ConcurrentBucketMap&) = delete; - ConcurrentBucketMap & operator= (const ConcurrentBucketMap &) = delete; - - ConcurrentBucketMap(ConcurrentBucketMap&& other) : - _num_buckets(other._num_buckets), - _mod_mask(_num_buckets - 1), - _spin_locks(_num_buckets), - _buckets(std::move(other._buffer)) { } - - template - void doParallelForAllBuckets(const F& f) { - tbb::parallel_for(UL(0), _num_buckets, [&](const size_t i) { - f(i); - }); + _num_buckets(align_to_next_power_of_two(BUCKET_FACTOR * + std::thread::hardware_concurrency())), + _mod_mask(_num_buckets - 1), _spin_locks(_num_buckets), _buckets(_num_buckets) + { } - // ! Returns the number of buckets - size_t numBuckets() const { - return _num_buckets; + ConcurrentBucketMap(const ConcurrentBucketMap &) = delete; + ConcurrentBucketMap &operator=(const ConcurrentBucketMap &) = delete; + + ConcurrentBucketMap(ConcurrentBucketMap &&other) : + _num_buckets(other._num_buckets), _mod_mask(_num_buckets - 1), + _spin_locks(_num_buckets), _buckets(std::move(other._buffer)) + { + } + + template + void doParallelForAllBuckets(const F &f) + { + tbb::parallel_for(UL(0), _num_buckets, [&](const size_t i) { f(i); }); } + // ! Returns the number of buckets + size_t numBuckets() const { return _num_buckets; } + // ! Returns the corresponding bucket - Bucket& getBucket(const size_t bucket) { + Bucket &getBucket(const size_t bucket) + { ASSERT(bucket < _num_buckets); return _buckets[bucket]; } // ! Reserves memory in each bucket such that the estimated number of insertions - // ! can be handled without the need (with high probability) of expensive bucket resizing. - void reserve_for_estimated_number_of_insertions(const size_t estimated_num_insertions) { - // ! Assumption is that keys are evenly distributed among buckets (with a small buffer) + // ! can be handled without the need (with high probability) of expensive bucket + // resizing. + void reserve_for_estimated_number_of_insertions(const size_t estimated_num_insertions) + { + // ! Assumption is that keys are evenly distributed among buckets (with a small + // buffer) const size_t estimated_bucket_size = std::max( - static_cast( 1.5 * estimated_num_insertions ) / _num_buckets, UL(1)); + static_cast(1.5 * estimated_num_insertions) / _num_buckets, UL(1)); tbb::parallel_for(UL(0), _num_buckets, [&](const size_t i) { _buckets[i].reserve(estimated_bucket_size); }); } // ! Inserts a key-value pair - void insert(const size_t& key, Value&& value) { + void insert(const size_t &key, Value &&value) + { size_t bucket = key & _mod_mask; ASSERT(bucket < _num_buckets); _spin_locks[bucket].lock(); - _buckets[bucket].emplace_back( std::move(value) ); + _buckets[bucket].emplace_back(std::move(value)); _spin_locks[bucket].unlock(); } // ! Frees the memory of all buckets - void free() { - parallel::parallel_free(_buckets); - } + void free() { parallel::parallel_free(_buckets); } // ! Frees the memory of the corresponding bucket - void free(const size_t bucket) { + void free(const size_t bucket) + { ASSERT(bucket < _num_buckets); parallel::free(_buckets[bucket]); } // ! Clears the corresponding bucket - void clear(const size_t bucket) { + void clear(const size_t bucket) + { ASSERT(bucket < _num_buckets); _buckets[bucket].clear(); } - void clearParallel() { - doParallelForAllBuckets([&](const size_t i) { - clear(i); - }); + void clearParallel() + { + doParallelForAllBuckets([&](const size_t i) { clear(i); }); } - private: - size_t align_to_next_power_of_two(const size_t size) const { +private: + size_t align_to_next_power_of_two(const size_t size) const + { return std::pow(2.0, std::ceil(std::log2(static_cast(size)))); } @@ -154,5 +157,5 @@ class ConcurrentBucketMap { std::vector _spin_locks; parallel::scalable_vector _buckets; }; -} // namespace ds -} // namespace mt_kahypar +} // namespace ds +} // namespace mt_kahypar diff --git a/mt-kahypar/datastructures/concurrent_flat_map.h b/mt-kahypar/datastructures/concurrent_flat_map.h index 8cedd89fa..0b831b13b 100644 --- a/mt-kahypar/datastructures/concurrent_flat_map.h +++ b/mt-kahypar/datastructures/concurrent_flat_map.h @@ -28,10 +28,10 @@ #pragma once #include +#include #include #include #include -#include #include "kahypar-resources/macros.h" #include "kahypar-resources/meta/mandatory.h" @@ -41,72 +41,70 @@ namespace mt_kahypar { namespace ds { -template -class ConcurrentFlatMap { +template +class ConcurrentFlatMap +{ - struct MapElement { + struct MapElement + { Key key; Value value; int32_t timestamp; }; - public: +public: static constexpr size_t MAP_SIZE = 32768; // Size of sparse map is approx. 1 MB - static_assert(MAP_SIZE && ((MAP_SIZE & (MAP_SIZE - 1)) == UL(0)), "Size of map is not a power of two!"); + static_assert(MAP_SIZE && ((MAP_SIZE & (MAP_SIZE - 1)) == UL(0)), + "Size of map is not a power of two!"); - explicit ConcurrentFlatMap() : - _map_size(0), - _data(nullptr), - _threshold(2), - _map() { + explicit ConcurrentFlatMap() : _map_size(0), _data(nullptr), _threshold(2), _map() + { allocate(MAP_SIZE); } explicit ConcurrentFlatMap(const size_t max_size) : - _map_size(0), - _data(nullptr), - _threshold(2), - _map(nullptr) { + _map_size(0), _data(nullptr), _threshold(2), _map(nullptr) + { allocate(max_size); } - ConcurrentFlatMap(const ConcurrentFlatMap&) = delete; - ConcurrentFlatMap& operator= (const ConcurrentFlatMap& other) = delete; + ConcurrentFlatMap(const ConcurrentFlatMap &) = delete; + ConcurrentFlatMap &operator=(const ConcurrentFlatMap &other) = delete; - ConcurrentFlatMap(ConcurrentFlatMap&& other) : - _map_size(other._map_size), - _data(std::move(other._data)), - _threshold(other._threshold), - _map(std::move(other._map)) { + ConcurrentFlatMap(ConcurrentFlatMap &&other) : + _map_size(other._map_size), _data(std::move(other._data)), + _threshold(other._threshold), _map(std::move(other._map)) + { other._data = nullptr; other._map = nullptr; } ~ConcurrentFlatMap() = default; - size_t capacity() const { - return _map_size; - } + size_t capacity() const { return _map_size; } - void setMaxSize(const size_t max_size) { - if ( 4 * max_size > _map_size ) { + void setMaxSize(const size_t max_size) + { + if(4 * max_size > _map_size) + { freeInternalData(); allocate(4 * max_size); } } - Value& operator[] (const Key key) { - size_t hash = key & ( _map_size - 1 ); - MapElement* elem = &_map[hash]; + Value &operator[](const Key key) + { + size_t hash = key & (_map_size - 1); + MapElement *elem = &_map[hash]; int32_t expected = elem->timestamp; int32_t desired = _threshold - 1; - while ( ! ( expected == _threshold && elem->key == key ) ) { - if ( expected < desired && - __atomic_compare_exchange_n( - &elem->timestamp, &expected, desired, false, - __ATOMIC_ACQ_REL, __ATOMIC_RELAXED ) ) { + while(!(expected == _threshold && elem->key == key)) + { + if(expected < desired && + __atomic_compare_exchange_n(&elem->timestamp, &expected, desired, false, + __ATOMIC_ACQ_REL, __ATOMIC_RELAXED)) + { elem->key = key; elem->value = Value(); elem->timestamp = _threshold; @@ -120,53 +118,64 @@ class ConcurrentFlatMap { return elem->value; } - Value* get_if_contained(const Key key) { - size_t hash = find(key, key & ( _map_size - 1 )); - MapElement* elem = &_map[hash]; + Value *get_if_contained(const Key key) + { + size_t hash = find(key, key & (_map_size - 1)); + MapElement *elem = &_map[hash]; return elem->timestamp == _threshold && elem->key == key ? &elem->value : nullptr; } - void clear() { - if ( _threshold >= std::numeric_limits::max() - 2 ) { + void clear() + { + if(_threshold >= std::numeric_limits::max() - 2) + { _threshold = 0; - for ( size_t i = 0; i < _map_size; ++i ) { + for(size_t i = 0; i < _map_size; ++i) + { _map[i].timestamp = 0; } } _threshold += 2; } - void freeInternalData() { + void freeInternalData() + { _map_size = 0; _threshold = 0; _data = nullptr; _map = nullptr; } - private: +private: MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE size_t find(const Key key, - const size_t start_hash) const { - size_t hash = start_hash & ( _map_size - 1 ); - while ( _map[hash].timestamp == _threshold ) { - if ( _map[hash].key == key ) { + const size_t start_hash) const + { + size_t hash = start_hash & (_map_size - 1); + while(_map[hash].timestamp == _threshold) + { + if(_map[hash].key == key) + { return hash; } - hash = (hash + 1) & ( _map_size - 1 ); + hash = (hash + 1) & (_map_size - 1); } return hash; } - void allocate(const size_t size) { - if ( _data == nullptr ) { + void allocate(const size_t size) + { + if(_data == nullptr) + { _map_size = align_to_next_power_of_two(size); _data = std::make_unique(_map_size * sizeof(MapElement)); _threshold = 2; - _map = reinterpret_cast(_data.get()); + _map = reinterpret_cast(_data.get()); memset(_data.get(), 0, _map_size * sizeof(MapElement)); } } - size_t align_to_next_power_of_two(const size_t size) const { + size_t align_to_next_power_of_two(const size_t size) const + { return std::pow(2.0, std::ceil(std::log2(static_cast(size)))); } @@ -174,7 +183,7 @@ class ConcurrentFlatMap { std::unique_ptr _data; int32_t _threshold; - MapElement* _map; + MapElement *_map; }; } // namespace ds diff --git a/mt-kahypar/datastructures/connectivity_info.h b/mt-kahypar/datastructures/connectivity_info.h index 622e94f7c..ab54b4ba6 100644 --- a/mt-kahypar/datastructures/connectivity_info.h +++ b/mt-kahypar/datastructures/connectivity_info.h @@ -28,49 +28,48 @@ #include "tbb/parallel_invoke.h" -#include "mt-kahypar/datastructures/pin_count_in_part.h" #include "mt-kahypar/datastructures/connectivity_set.h" +#include "mt-kahypar/datastructures/pin_count_in_part.h" #include "mt-kahypar/datastructures/sparse_pin_counts.h" namespace mt_kahypar { namespace ds { -class ConnectivityInfo { +class ConnectivityInfo +{ - public: +public: using Iterator = typename ConnectivitySets::Iterator; - ConnectivityInfo() : - _pin_counts(), - _con_set() { } + ConnectivityInfo() : _pin_counts(), _con_set() {} - ConnectivityInfo(const HyperedgeID num_hyperedges, - const PartitionID k, + ConnectivityInfo(const HyperedgeID num_hyperedges, const PartitionID k, const HypernodeID max_value) : - _pin_counts(num_hyperedges, k, max_value, false), - _con_set(num_hyperedges, k, false) { } - - ConnectivityInfo(const HyperedgeID num_hyperedges, - const PartitionID k, - const HypernodeID max_value, - parallel_tag_t) : - _pin_counts(), - _con_set() { - tbb::parallel_invoke([&] { - _pin_counts.initialize(num_hyperedges, k, max_value, true); - }, [&] { - _con_set = ConnectivitySets(num_hyperedges, k, true); - }); - } - - ConnectivityInfo(const ConnectivityInfo&) = delete; - ConnectivityInfo & operator= (const ConnectivityInfo &) = delete; - - ConnectivityInfo(ConnectivityInfo&& other) : - _pin_counts(std::move(other._pin_counts)), - _con_set(std::move(other._con_set)) { } - - ConnectivityInfo & operator= (ConnectivityInfo&& other) { + _pin_counts(num_hyperedges, k, max_value, false), + _con_set(num_hyperedges, k, false) + { + } + + ConnectivityInfo(const HyperedgeID num_hyperedges, const PartitionID k, + const HypernodeID max_value, parallel_tag_t) : + _pin_counts(), + _con_set() + { + tbb::parallel_invoke( + [&] { _pin_counts.initialize(num_hyperedges, k, max_value, true); }, + [&] { _con_set = ConnectivitySets(num_hyperedges, k, true); }); + } + + ConnectivityInfo(const ConnectivityInfo &) = delete; + ConnectivityInfo &operator=(const ConnectivityInfo &) = delete; + + ConnectivityInfo(ConnectivityInfo &&other) : + _pin_counts(std::move(other._pin_counts)), _con_set(std::move(other._con_set)) + { + } + + ConnectivityInfo &operator=(ConnectivityInfo &&other) + { _pin_counts = std::move(other._pin_counts); _con_set = std::move(other._con_set); return *this; @@ -78,237 +77,249 @@ class ConnectivityInfo { // ################## Connectivity Set ################## - inline void addBlock(const HyperedgeID he, const PartitionID p) { - _con_set.add(he, p); - } + inline void addBlock(const HyperedgeID he, const PartitionID p) { _con_set.add(he, p); } - inline void removeBlock(const HyperedgeID he, const PartitionID p) { + inline void removeBlock(const HyperedgeID he, const PartitionID p) + { _con_set.remove(he, p); } - inline bool containsBlock(const HyperedgeID he, const PartitionID p) const { + inline bool containsBlock(const HyperedgeID he, const PartitionID p) const + { return _con_set.contains(he, p); } - inline void clear(const HyperedgeID he) { - _con_set.clear(he); - } + inline void clear(const HyperedgeID he) { _con_set.clear(he); } - inline PartitionID connectivity(const HyperedgeID he) const { + inline PartitionID connectivity(const HyperedgeID he) const + { return _con_set.connectivity(he); } - inline IteratorRange connectivitySet(const HyperedgeID he) const { + inline IteratorRange connectivitySet(const HyperedgeID he) const + { return _con_set.connectivitySet(he); } - inline StaticBitset& shallowCopy(const HyperedgeID he) const { + inline StaticBitset &shallowCopy(const HyperedgeID he) const + { return _con_set.shallowCopy(he); } - inline Bitset& deepCopy(const HyperedgeID he) const { - return _con_set.deepCopy(he); - } + inline Bitset &deepCopy(const HyperedgeID he) const { return _con_set.deepCopy(he); } // ################## Pin Count In Part ################## // ! Returns the pin count of the hyperedge in the corresponding block - inline HypernodeID pinCountInPart(const HyperedgeID he, - const PartitionID id) const { + inline HypernodeID pinCountInPart(const HyperedgeID he, const PartitionID id) const + { return _pin_counts.pinCountInPart(he, id); } // ! Sets the pin count of the hyperedge in the corresponding block to value - inline void setPinCountInPart(const HyperedgeID he, - const PartitionID id, - const HypernodeID value) { + inline void setPinCountInPart(const HyperedgeID he, const PartitionID id, + const HypernodeID value) + { _pin_counts.setPinCountInPart(he, id, value); } // ! Increments the pin count of the hyperedge in the corresponding block - inline HypernodeID incrementPinCountInPart(const HyperedgeID he, - const PartitionID id) { + inline HypernodeID incrementPinCountInPart(const HyperedgeID he, const PartitionID id) + { return _pin_counts.incrementPinCountInPart(he, id); } // ! Decrements the pin count of the hyperedge in the corresponding block - inline HypernodeID decrementPinCountInPart(const HyperedgeID he, - const PartitionID id) { + inline HypernodeID decrementPinCountInPart(const HyperedgeID he, const PartitionID id) + { return _pin_counts.decrementPinCountInPart(he, id); } // ! Returns a snapshot of the pin counts of the hyperedge - inline PinCountSnapshot& pinCountSnapshot(const HyperedgeID he) { + inline PinCountSnapshot &pinCountSnapshot(const HyperedgeID he) + { return _pin_counts.snapshot(he); } // ################## Miscellaneous ################## // ! Returns the size in bytes of this data structure - size_t size_in_bytes() const { + size_t size_in_bytes() const + { return _pin_counts.size_in_bytes() /* + connectivity set */; } - void reset(const bool reset_parallel = false) { - if ( reset_parallel ) { - tbb::parallel_invoke( - [&] { _pin_counts.reset(true); }, - [&] { _con_set.reset(true); }); - } else { + void reset(const bool reset_parallel = false) + { + if(reset_parallel) + { + tbb::parallel_invoke([&] { _pin_counts.reset(true); }, + [&] { _con_set.reset(true); }); + } + else + { _pin_counts.reset(false); _con_set.reset(false); } } - void freeInternalData() { - tbb::parallel_invoke( - [&] { _pin_counts.freeInternalData(); }, - [&] { _con_set.freeInternalData(); }); + void freeInternalData() + { + tbb::parallel_invoke([&] { _pin_counts.freeInternalData(); }, + [&] { _con_set.freeInternalData(); }); } - void memoryConsumption(utils::MemoryTreeNode* parent) const { + void memoryConsumption(utils::MemoryTreeNode *parent) const + { ASSERT(parent); _pin_counts.memoryConsumption(parent); _con_set.memoryConsumption(parent); } - - private: +private: // ! For each hyperedge and each block, _pins_in_part stores the // ! number of pins in that block PinCountInPart _pin_counts; - // ! For each hyperedge, _connectivity_set stores the set of blocks that the hyperedge spans + // ! For each hyperedge, _connectivity_set stores the set of blocks that the hyperedge + // spans ConnectivitySets _con_set; }; -class SparseConnectivityInfo { +class SparseConnectivityInfo +{ - public: +public: using Iterator = typename SparsePinCounts::Iterator; - SparseConnectivityInfo() : - _pin_counts() { } + SparseConnectivityInfo() : _pin_counts() {} - SparseConnectivityInfo(const HyperedgeID num_hyperedges, - const PartitionID k, + SparseConnectivityInfo(const HyperedgeID num_hyperedges, const PartitionID k, const HypernodeID max_value) : - _pin_counts(num_hyperedges, k, max_value, false) { } + _pin_counts(num_hyperedges, k, max_value, false) + { + } - SparseConnectivityInfo(const HyperedgeID num_hyperedges, - const PartitionID k, - const HypernodeID max_value, - parallel_tag_t) : - _pin_counts() { + SparseConnectivityInfo(const HyperedgeID num_hyperedges, const PartitionID k, + const HypernodeID max_value, parallel_tag_t) : + _pin_counts() + { _pin_counts.initialize(num_hyperedges, k, max_value, true); } - SparseConnectivityInfo(const SparseConnectivityInfo&) = delete; - SparseConnectivityInfo & operator= (const SparseConnectivityInfo &) = delete; + SparseConnectivityInfo(const SparseConnectivityInfo &) = delete; + SparseConnectivityInfo &operator=(const SparseConnectivityInfo &) = delete; - SparseConnectivityInfo(SparseConnectivityInfo&& other) : - _pin_counts(std::move(other._pin_counts)) { } + SparseConnectivityInfo(SparseConnectivityInfo &&other) : + _pin_counts(std::move(other._pin_counts)) + { + } - SparseConnectivityInfo & operator= (SparseConnectivityInfo&& other) { + SparseConnectivityInfo &operator=(SparseConnectivityInfo &&other) + { _pin_counts = std::move(other._pin_counts); return *this; } // ################## Connectivity Set ################## - inline void addBlock(const HyperedgeID, const PartitionID) { + inline void addBlock(const HyperedgeID, const PartitionID) + { // Do nothing, handled by incrementPinCountInPart } - inline void removeBlock(const HyperedgeID, const PartitionID) { + inline void removeBlock(const HyperedgeID, const PartitionID) + { // Do nothing, handled by decrementPinCountInPart } - inline bool containsBlock(const HyperedgeID he, const PartitionID p) const { + inline bool containsBlock(const HyperedgeID he, const PartitionID p) const + { return _pin_counts.contains(he, p); } - inline void clear(const HyperedgeID he) { - _pin_counts.clear(he); - } + inline void clear(const HyperedgeID he) { _pin_counts.clear(he); } - inline PartitionID connectivity(const HyperedgeID he) const { + inline PartitionID connectivity(const HyperedgeID he) const + { return _pin_counts.connectivity(he); } - inline IteratorRange connectivitySet(const HyperedgeID he) const { + inline IteratorRange connectivitySet(const HyperedgeID he) const + { return _pin_counts.connectivitySet(he); } - inline StaticBitset& shallowCopy(const HyperedgeID he) const { + inline StaticBitset &shallowCopy(const HyperedgeID he) const + { return _pin_counts.shallowCopy(he); } - inline Bitset& deepCopy(const HyperedgeID he) const { - return _pin_counts.deepCopy(he); - } + inline Bitset &deepCopy(const HyperedgeID he) const { return _pin_counts.deepCopy(he); } // ################## Pin Count In Part ################## // ! Returns the pin count of the hyperedge in the corresponding block - inline HypernodeID pinCountInPart(const HyperedgeID he, - const PartitionID id) const { + inline HypernodeID pinCountInPart(const HyperedgeID he, const PartitionID id) const + { return _pin_counts.pinCountInPart(he, id); } // ! Sets the pin count of the hyperedge in the corresponding block to value - inline void setPinCountInPart(const HyperedgeID he, - const PartitionID id, - const HypernodeID value) { + inline void setPinCountInPart(const HyperedgeID he, const PartitionID id, + const HypernodeID value) + { _pin_counts.setPinCountInPart(he, id, value); } // ! Increments the pin count of the hyperedge in the corresponding block - inline HypernodeID incrementPinCountInPart(const HyperedgeID he, - const PartitionID id) { + inline HypernodeID incrementPinCountInPart(const HyperedgeID he, const PartitionID id) + { return _pin_counts.incrementPinCountInPart(he, id); } // ! Decrements the pin count of the hyperedge in the corresponding block - inline HypernodeID decrementPinCountInPart(const HyperedgeID he, - const PartitionID id) { + inline HypernodeID decrementPinCountInPart(const HyperedgeID he, const PartitionID id) + { return _pin_counts.decrementPinCountInPart(he, id); } // ! Returns a snapshot of the pin counts of the hyperedge - inline PinCountSnapshot& pinCountSnapshot(const HyperedgeID he) { + inline PinCountSnapshot &pinCountSnapshot(const HyperedgeID he) + { return _pin_counts.snapshot(he); } // ################## Miscellaneous ################## // ! Returns the size in bytes of this data structure - size_t size_in_bytes() const { - return _pin_counts.size_in_bytes(); - } + size_t size_in_bytes() const { return _pin_counts.size_in_bytes(); } - void reset(const bool reset_parallel = false) { - if ( reset_parallel ) { + void reset(const bool reset_parallel = false) + { + if(reset_parallel) + { _pin_counts.reset(true); - } else { + } + else + { _pin_counts.reset(false); } } - void freeInternalData() { - _pin_counts.freeInternalData(); - } + void freeInternalData() { _pin_counts.freeInternalData(); } - void memoryConsumption(utils::MemoryTreeNode* parent) const { + void memoryConsumption(utils::MemoryTreeNode *parent) const + { ASSERT(parent); _pin_counts.memoryConsumption(parent); } - private: +private: // ! For each hyperedge and each block, _pins_in_part stores the // ! number of pins in that block and also the connectivity set SparsePinCounts _pin_counts; }; -} // namespace ds -} // namespace mt_kahypar +} // namespace ds +} // namespace mt_kahypar diff --git a/mt-kahypar/datastructures/connectivity_set.h b/mt-kahypar/datastructures/connectivity_set.h index 3f24e5b5d..3a3f7250f 100644 --- a/mt-kahypar/datastructures/connectivity_set.h +++ b/mt-kahypar/datastructures/connectivity_set.h @@ -28,17 +28,16 @@ #pragma once #include -#include -#include #include +#include +#include #include "tbb/enumerable_thread_specific.h" -#include "mt-kahypar/parallel/stl/scalable_vector.h" #include "mt-kahypar/datastructures/array.h" #include "mt-kahypar/datastructures/static_bitset.h" #include "mt-kahypar/parallel/atomic_wrapper.h" -#include "mt-kahypar/utils/bit_ops.h" +#include "mt-kahypar/parallel/stl/scalable_vector.h" #include "mt-kahypar/utils/memory_tree.h" #include "mt-kahypar/utils/range.h" @@ -49,19 +48,21 @@ namespace mt_kahypar { namespace ds { /** - * The connectivity set of a hyperedge is the set of parts of the partition, that it has pins in. - * For each hyperedge we maintain its connectivity set in a packed format (std::vector) - * and implement the necessary bitset functionality ourselves, i.e. add, remove, contains, clear, iteration. - * That is because we want atomic updates to support safe parallel modification of the partition. - * Adding/removing a part are both implemented as a toggle of the corresponding bit, in case an add and - * a remove operation are interweaved. However, this means the user must ensure that no two threads simultaneously try - * to add a part. One correct way is to keep an atomic count of pins for each hyperedge and part. Then only the thread - * raising the counter from zero to one performs the add, and only the thread decreasing the counter from one to zero - * performs the removal. + * The connectivity set of a hyperedge is the set of parts of the partition, that it + * has pins in. For each hyperedge we maintain its connectivity set in a packed format + * (std::vector) and implement the necessary bitset functionality ourselves, + * i.e. add, remove, contains, clear, iteration. That is because we want atomic updates to + * support safe parallel modification of the partition. Adding/removing a part are both + * implemented as a toggle of the corresponding bit, in case an add and a remove operation + * are interweaved. However, this means the user must ensure that no two threads + * simultaneously try to add a part. One correct way is to keep an atomic count of pins + * for each hyperedge and part. Then only the thread raising the counter from zero to one + * performs the add, and only the thread decreasing the counter from one to zero performs + * the removal. */ -class ConnectivitySets { +class ConnectivitySets +{ public: - static constexpr bool debug = false; static constexpr int BITS_PER_BLOCK = StaticBitset::BITS_PER_BLOCK; @@ -69,44 +70,41 @@ class ConnectivitySets { using Iterator = typename StaticBitset::const_iterator; ConnectivitySets() : - _k(0), - _num_hyperedges(0), - _num_blocks_per_hyperedge(0), - _bits(), - _deep_copy_bitset(), - _shallow_copy_bitset() { } - - ConnectivitySets(const HyperedgeID num_hyperedges, - const PartitionID k, + _k(0), _num_hyperedges(0), _num_blocks_per_hyperedge(0), _bits(), + _deep_copy_bitset(), _shallow_copy_bitset() + { + } + + ConnectivitySets(const HyperedgeID num_hyperedges, const PartitionID k, const bool assign_parallel = true) : - _k(k), - _num_hyperedges(num_hyperedges), - _num_blocks_per_hyperedge(k / BITS_PER_BLOCK + (k % BITS_PER_BLOCK != 0)), - _bits(), - _deep_copy_bitset(), - _shallow_copy_bitset() { - if ( num_hyperedges > 0 ) { - _bits.resize("Refinement", "connectivity_set", - static_cast(num_hyperedges) * _num_blocks_per_hyperedge, true, assign_parallel); - } + _k(k), + _num_hyperedges(num_hyperedges), + _num_blocks_per_hyperedge(k / BITS_PER_BLOCK + (k % BITS_PER_BLOCK != 0)), _bits(), + _deep_copy_bitset(), _shallow_copy_bitset() + { + if(num_hyperedges > 0) + { + _bits.resize("Refinement", "connectivity_set", + static_cast(num_hyperedges) * _num_blocks_per_hyperedge, true, + assign_parallel); } + } - IteratorRange connectivitySet(const HyperedgeID he) const { + IteratorRange connectivitySet(const HyperedgeID he) const + { return IteratorRange( - Iterator(_num_blocks_per_hyperedge, _bits.data() + he * _num_blocks_per_hyperedge, -1), - Iterator(_num_blocks_per_hyperedge, _bits.data() + - he * _num_blocks_per_hyperedge, _num_blocks_per_hyperedge * BITS_PER_BLOCK)); + Iterator(_num_blocks_per_hyperedge, _bits.data() + he * _num_blocks_per_hyperedge, + -1), + Iterator(_num_blocks_per_hyperedge, _bits.data() + he * _num_blocks_per_hyperedge, + _num_blocks_per_hyperedge * BITS_PER_BLOCK)); } - void add(const HyperedgeID he, const PartitionID p) { - toggle(he, p); - } + void add(const HyperedgeID he, const PartitionID p) { toggle(he, p); } - void remove(const HyperedgeID he, const PartitionID p) { - toggle(he, p); - } + void remove(const HyperedgeID he, const PartitionID p) { toggle(he, p); } - bool contains(const HyperedgeID he, const PartitionID p) const { + bool contains(const HyperedgeID he, const PartitionID p) const + { const size_t div = p / BITS_PER_BLOCK; const size_t rem = p % BITS_PER_BLOCK; const size_t pos = static_cast(he) * _num_blocks_per_hyperedge + div; @@ -114,87 +112,96 @@ class ConnectivitySets { } // not threadsafe - void clear(const HyperedgeID he) { + void clear(const HyperedgeID he) + { const size_t start = static_cast(he) * _num_blocks_per_hyperedge; - const size_t end = ( static_cast(he) + 1 ) * _num_blocks_per_hyperedge; - for (size_t i = start; i < end; ++i) { + const size_t end = (static_cast(he) + 1) * _num_blocks_per_hyperedge; + for(size_t i = start; i < end; ++i) + { __atomic_store_n(&_bits[i], 0, __ATOMIC_RELAXED); } } - void reset(const bool reset_parallel = false) { - if ( reset_parallel ) { + void reset(const bool reset_parallel = false) + { + if(reset_parallel) + { tbb::parallel_for(UL(0), _bits.size(), [&](const size_t i) { __atomic_store_n(&_bits[i], 0, __ATOMIC_RELAXED); }); - } else { - for (size_t i = 0; i < _bits.size(); ++i) { + } + else + { + for(size_t i = 0; i < _bits.size(); ++i) + { __atomic_store_n(&_bits[i], 0, __ATOMIC_RELAXED); } } } - PartitionID connectivity(const HyperedgeID he) const { + PartitionID connectivity(const HyperedgeID he) const + { PartitionID conn = 0; const size_t start = static_cast(he) * _num_blocks_per_hyperedge; - const size_t end = ( static_cast(he) + 1 ) * _num_blocks_per_hyperedge; - for (size_t i = start; i < end; ++i) { + const size_t end = (static_cast(he) + 1) * _num_blocks_per_hyperedge; + for(size_t i = start; i < end; ++i) + { conn += utils::popcount_64(__atomic_load_n(&_bits[i], __ATOMIC_RELAXED)); } return conn; } // Creates a shallow copy of the connectivity set of hyperedge he - StaticBitset& shallowCopy(const HyperedgeID he) const { - StaticBitset& shallow_copy = _shallow_copy_bitset.local(); + StaticBitset &shallowCopy(const HyperedgeID he) const + { + StaticBitset &shallow_copy = _shallow_copy_bitset.local(); shallow_copy.set(_num_blocks_per_hyperedge, - &_bits[UL(he) * _num_blocks_per_hyperedge]); + &_bits[UL(he) * _num_blocks_per_hyperedge]); return shallow_copy; } // Creates a deep copy of the connectivity set of hyperedge he - Bitset& deepCopy(const HyperedgeID he) const { - Bitset& deep_copy = _deep_copy_bitset.local(); - deep_copy.copy(_num_blocks_per_hyperedge, - &_bits[UL(he) * _num_blocks_per_hyperedge]); + Bitset &deepCopy(const HyperedgeID he) const + { + Bitset &deep_copy = _deep_copy_bitset.local(); + deep_copy.copy(_num_blocks_per_hyperedge, &_bits[UL(he) * _num_blocks_per_hyperedge]); return deep_copy; } - void freeInternalData() { - parallel::free(_bits); - } + void freeInternalData() { parallel::free(_bits); } - void memoryConsumption(utils::MemoryTreeNode* parent) const { + void memoryConsumption(utils::MemoryTreeNode *parent) const + { ASSERT(parent); parent->addChild("Connectivity Bit Vector", sizeof(UnsafeBlock) * _bits.size()); } - static size_t num_elements(const HyperedgeID num_hyperedges, - const PartitionID k) { - return static_cast(num_hyperedges) * (k / BITS_PER_BLOCK + (k % BITS_PER_BLOCK != 0)); + static size_t num_elements(const HyperedgeID num_hyperedges, const PartitionID k) + { + return static_cast(num_hyperedges) * + (k / BITS_PER_BLOCK + (k % BITS_PER_BLOCK != 0)); } private: - void toggle(const HyperedgeID he, const PartitionID p) { - assert(p < _k); - assert(he < _num_hyperedges); + void toggle(const HyperedgeID he, const PartitionID p) + { + assert(p < _k); + assert(he < _num_hyperedges); const size_t div = p / BITS_PER_BLOCK, rem = p % BITS_PER_BLOCK; const size_t idx = static_cast(he) * _num_blocks_per_hyperedge + div; assert(idx < _bits.size()); __atomic_xor_fetch(&_bits[idx], UnsafeBlock(1) << rem, __ATOMIC_RELAXED); - } + } - PartitionID _k; - HyperedgeID _num_hyperedges; - PartitionID _num_blocks_per_hyperedge; - Array _bits; + PartitionID _k; + HyperedgeID _num_hyperedges; + PartitionID _num_blocks_per_hyperedge; + Array _bits; // Bitsets to create shallow and deep copies of the connectivity set mutable tbb::enumerable_thread_specific _deep_copy_bitset; mutable tbb::enumerable_thread_specific _shallow_copy_bitset; }; - - -} // namespace ds -} // namespace mt_kahypar +} // namespace ds +} // namespace mt_kahypar diff --git a/mt-kahypar/datastructures/contraction_tree.cpp b/mt-kahypar/datastructures/contraction_tree.cpp index 9873276f0..a40c82b71 100644 --- a/mt-kahypar/datastructures/contraction_tree.cpp +++ b/mt-kahypar/datastructures/contraction_tree.cpp @@ -29,70 +29,87 @@ #include -#include -#include #include +#include +#include -#include "mt-kahypar/parallel/parallel_prefix_sum.h" #include "mt-kahypar/datastructures/streaming_vector.h" +#include "mt-kahypar/parallel/parallel_prefix_sum.h" #include "mt-kahypar/utils/timer.h" namespace mt_kahypar { namespace ds { // ! Initializes the data structure in parallel -void ContractionTree::initialize(const HypernodeID num_hypernodes) { +void ContractionTree::initialize(const HypernodeID num_hypernodes) +{ _num_hypernodes = num_hypernodes; - tbb::parallel_invoke([&] { - _tree.resize(_num_hypernodes); - tbb::parallel_for(ID(0), _num_hypernodes, [&](const HypernodeID hn) { - node(hn).setParent(hn); - }); - }, [&] { - _out_degrees.assign(_num_hypernodes + 1, parallel::IntegralAtomicWrapper(0)); - }, [&] { - _incidence_array.resize(_num_hypernodes); - }); + tbb::parallel_invoke( + [&] { + _tree.resize(_num_hypernodes); + tbb::parallel_for(ID(0), _num_hypernodes, + [&](const HypernodeID hn) { node(hn).setParent(hn); }); + }, + [&] { + _out_degrees.assign(_num_hypernodes + 1, + parallel::IntegralAtomicWrapper(0)); + }, + [&] { _incidence_array.resize(_num_hypernodes); }); } // ! Finalizes the contraction tree which involve reversing the parent pointers // ! such that the contraction tree can be traversed in a top-down fashion and // ! computing the subtree sizes. -void ContractionTree::finalize(const size_t num_versions) { +void ContractionTree::finalize(const size_t num_versions) +{ ASSERT(!_finalized, "Contraction tree already finalized"); // Compute out degrees of each tree node tbb::parallel_for(ID(0), _num_hypernodes, [&](const HypernodeID hn) { - ASSERT(node(hn).pendingContractions() == 0, "There are" - << node(hn).pendingContractions() << "pending contractions for node" << hn); + ASSERT(node(hn).pendingContractions() == 0, + "There are" << node(hn).pendingContractions() + << "pending contractions for node" << hn); const HypernodeID parent = node(hn).parent(); - if ( parent != hn ) { + if(parent != hn) + { ASSERT(parent + 1 <= _num_hypernodes, "Parent" << parent << "does not exist!"); ++_out_degrees[parent + 1]; } }); - // Compute prefix sum over out degrees which will be the index pointer into the incidence array - parallel::scalable_vector> incidence_array_pos; - parallel::TBBPrefixSum, parallel::scalable_vector> - out_degree_prefix_sum(_out_degrees); - tbb::parallel_invoke([&] { - tbb::parallel_scan(tbb::blocked_range(UL(0), _out_degrees.size()), out_degree_prefix_sum); - }, [&] { - incidence_array_pos.assign(_num_hypernodes, parallel::IntegralAtomicWrapper(0)); - }); - - // Reverse parent pointer of contraction tree such that it can be traversed in top-down fashion + // Compute prefix sum over out degrees which will be the index pointer into the + // incidence array + parallel::scalable_vector > + incidence_array_pos; + parallel::TBBPrefixSum, + parallel::scalable_vector> + out_degree_prefix_sum(_out_degrees); + tbb::parallel_invoke( + [&] { + tbb::parallel_scan(tbb::blocked_range(UL(0), _out_degrees.size()), + out_degree_prefix_sum); + }, + [&] { + incidence_array_pos.assign(_num_hypernodes, + parallel::IntegralAtomicWrapper(0)); + }); + + // Reverse parent pointer of contraction tree such that it can be traversed in top-down + // fashion StreamingVector tmp_roots; tbb::parallel_for(ID(0), _num_hypernodes, [&](const HypernodeID hn) { const HypernodeID parent = node(hn).parent(); - if ( parent != hn ) { + if(parent != hn) + { const HypernodeID pos = _out_degrees[parent] + incidence_array_pos[parent]++; ASSERT(pos < _out_degrees[parent + 1]); _incidence_array[pos] = hn; - } else { + } + else + { // In that case node hn is a root const bool contains_subtree = (_out_degrees[hn + 1] - _out_degrees[hn]) > 0; - if ( contains_subtree ) { + if(contains_subtree) + { tmp_roots.stream(hn); } } @@ -104,32 +121,37 @@ void ContractionTree::finalize(const size_t num_versions) { // Compute roots for each version // Each contraction/edge in the contraction tree is associated with a version. // Later we want to be able to traverse the contraction tree for a specific version - // in a top-down fashion. Therefore, we compute for each version the corresponding roots. - // A vertex is a root of a version if contains a child with that version less than - // the version number of the vertex itself. Note, that for all vertices in the contraction - // tree version(u) <= version(parent(u)). - parallel::scalable_vector> tmp_version_roots(num_versions); + // in a top-down fashion. Therefore, we compute for each version the corresponding + // roots. A vertex is a root of a version if contains a child with that version less + // than the version number of the vertex itself. Note, that for all vertices in the + // contraction tree version(u) <= version(parent(u)). + parallel::scalable_vector > tmp_version_roots( + num_versions); tbb::parallel_for(ID(0), _num_hypernodes, [&](const HypernodeID u) { std::sort(_incidence_array.begin() + _out_degrees[u], _incidence_array.begin() + _out_degrees[u + 1], - [&](const HypernodeID& u, const HypernodeID& v) { + [&](const HypernodeID &u, const HypernodeID &v) { const size_t u_version = version(u); const size_t v_version = version(v); - const Interval& u_ival = node(u).interval(); - const Interval& v_ival = node(v).interval(); + const Interval &u_ival = node(u).interval(); + const Interval &v_ival = node(v).interval(); return u_version < v_version || - ( u_version == v_version && u_ival.end > v_ival.end ) || - ( u_version == v_version && u_ival.end == v_ival.end && u_ival.start > v_ival.start ) || - ( u_version == v_version && u_ival.end == v_ival.end && u_ival.start == v_ival.start && u < v ); + (u_version == v_version && u_ival.end > v_ival.end) || + (u_version == v_version && u_ival.end == v_ival.end && + u_ival.start > v_ival.start) || + (u_version == v_version && u_ival.end == v_ival.end && + u_ival.start == v_ival.start && u < v); }); size_t version_u = _tree[u].version(); ASSERT(version_u <= _tree[_tree[u].parent()].version()); size_t last_version = kInvalidVersion; - for ( const HypernodeID& v : childs(u) ) { + for(const HypernodeID &v : childs(u)) + { size_t version_v = _tree[v].version(); ASSERT(version_v < num_versions, V(version_v) << V(num_versions)); - if ( version_v != last_version && version_v < version_u ) { + if(version_v != last_version && version_v < version_u) + { tmp_version_roots[version_v].stream(u); } last_version = version_v; @@ -145,108 +167,129 @@ void ContractionTree::finalize(const size_t num_versions) { tbb::parallel_for(UL(0), _roots.size(), [&](const size_t i) { parallel::scalable_vector dfs; dfs.push_back(_roots[i]); - while( !dfs.empty() ) { + while(!dfs.empty()) + { const HypernodeID u = dfs.back(); - if ( subtreeSize(u) == 0 ) { + if(subtreeSize(u) == 0) + { // Visit u for the first time => push all childs on the dfs stack - for ( const HypernodeID& v : childs(u)) { + for(const HypernodeID &v : childs(u)) + { dfs.push_back(v); } // Mark u as visited node(u).setSubtreeSize(1); - } else { + } + else + { // Visit u for second time => accumulate subtree sizes and pop u dfs.pop_back(); HypernodeID subtree_size = 0; - for ( const HypernodeID& v : childs(u) ) { - subtree_size += ( subtreeSize(v) + 1 ); + for(const HypernodeID &v : childs(u)) + { + subtree_size += (subtreeSize(v) + 1); } node(u).setSubtreeSize(subtree_size); } } }); - tbb::parallel_invoke([&] { - parallel::free(incidence_array_pos); - }, [&] { - tmp_roots.clear_parallel(); - }); + tbb::parallel_invoke([&] { parallel::free(incidence_array_pos); }, + [&] { tmp_roots.clear_parallel(); }); } // ####################### Copy ####################### // ! Copy contraction tree in parallel -ContractionTree ContractionTree::copy(parallel_tag_t) const { +ContractionTree ContractionTree::copy(parallel_tag_t) const +{ ContractionTree tree; tree._num_hypernodes = _num_hypernodes; tree._finalized = _finalized; - tbb::parallel_invoke([&] { - if (!_tree.empty()) { - tree._tree.resize(_tree.size()); - memcpy(tree._tree.data(), _tree.data(), sizeof(Node) * _tree.size()); - } - }, [&] { - if (!_roots.empty()) { - tree._roots.resize(_roots.size()); - memcpy(tree._roots.data(), _roots.data(), sizeof(HypernodeID) * _roots.size()); - } - }, [&] { - const size_t num_versions = _version_roots.size(); - tree._version_roots.resize(num_versions); - tbb::parallel_for(UL(0), num_versions, [&](const size_t i) { - if (!_version_roots[i].empty()) { - tree._version_roots[i].resize(_version_roots[i].size()); - memcpy(tree._version_roots[i].data(), _version_roots[i].data(), - sizeof(HypernodeID) * _version_roots[i].size()); - } - }); - }, [&] { - tree._out_degrees.resize(_out_degrees.size()); - for ( size_t i = 0; i < _out_degrees.size(); ++i ) { - tree._out_degrees[i] = _out_degrees[i]; - } - }, [&] { - if (!_incidence_array.empty()) { - tree._incidence_array.resize(_incidence_array.size()); - memcpy(tree._incidence_array.data(), _incidence_array.data(), - sizeof(HypernodeID) * _incidence_array.size()); - } - }); + tbb::parallel_invoke( + [&] { + if(!_tree.empty()) + { + tree._tree.resize(_tree.size()); + memcpy(tree._tree.data(), _tree.data(), sizeof(Node) * _tree.size()); + } + }, + [&] { + if(!_roots.empty()) + { + tree._roots.resize(_roots.size()); + memcpy(tree._roots.data(), _roots.data(), sizeof(HypernodeID) * _roots.size()); + } + }, + [&] { + const size_t num_versions = _version_roots.size(); + tree._version_roots.resize(num_versions); + tbb::parallel_for(UL(0), num_versions, [&](const size_t i) { + if(!_version_roots[i].empty()) + { + tree._version_roots[i].resize(_version_roots[i].size()); + memcpy(tree._version_roots[i].data(), _version_roots[i].data(), + sizeof(HypernodeID) * _version_roots[i].size()); + } + }); + }, + [&] { + tree._out_degrees.resize(_out_degrees.size()); + for(size_t i = 0; i < _out_degrees.size(); ++i) + { + tree._out_degrees[i] = _out_degrees[i]; + } + }, + [&] { + if(!_incidence_array.empty()) + { + tree._incidence_array.resize(_incidence_array.size()); + memcpy(tree._incidence_array.data(), _incidence_array.data(), + sizeof(HypernodeID) * _incidence_array.size()); + } + }); return tree; } // ! Copy contraction tree sequentially -ContractionTree ContractionTree::copy() const { +ContractionTree ContractionTree::copy() const +{ ContractionTree tree; tree._num_hypernodes = _num_hypernodes; tree._finalized = _finalized; - if (!_tree.empty()) { + if(!_tree.empty()) + { tree._tree.resize(_tree.size()); memcpy(tree._tree.data(), _tree.data(), sizeof(Node) * _tree.size()); } - if (!_roots.empty()) { + if(!_roots.empty()) + { tree._roots.resize(_roots.size()); memcpy(tree._roots.data(), _roots.data(), sizeof(HypernodeID) * _roots.size()); } const size_t num_versions = _version_roots.size(); tree._version_roots.resize(num_versions); - for ( size_t i = 0; i < num_versions; ++i ) { - if (!_version_roots[i].empty()) { + for(size_t i = 0; i < num_versions; ++i) + { + if(!_version_roots[i].empty()) + { tree._version_roots[i].resize(_version_roots[i].size()); memcpy(tree._version_roots[i].data(), _version_roots[i].data(), sizeof(HypernodeID) * _version_roots[i].size()); } } tree._out_degrees.resize(_out_degrees.size()); - for ( size_t i = 0; i < _out_degrees.size(); ++i ) { + for(size_t i = 0; i < _out_degrees.size(); ++i) + { tree._out_degrees[i] = _out_degrees[i]; } - if (!_incidence_array.empty()) { + if(!_incidence_array.empty()) + { tree._incidence_array.resize(_incidence_array.size()); memcpy(tree._incidence_array.data(), _incidence_array.data(), sizeof(HypernodeID) * _incidence_array.size()); @@ -256,30 +299,36 @@ ContractionTree ContractionTree::copy() const { } // ! Resets internal data structures -void ContractionTree::reset() { - tbb::parallel_invoke([&] { - tbb::parallel_for(ID(0), _num_hypernodes, [&](const HypernodeID hn) { - _tree[hn].reset(hn); - _out_degrees[hn].store(0); - }); - _out_degrees[_num_hypernodes].store(0); - }, [&] { - parallel::parallel_free(_version_roots); - _roots.clear(); - }); +void ContractionTree::reset() +{ + tbb::parallel_invoke( + [&] { + tbb::parallel_for(ID(0), _num_hypernodes, [&](const HypernodeID hn) { + _tree[hn].reset(hn); + _out_degrees[hn].store(0); + }); + _out_degrees[_num_hypernodes].store(0); + }, + [&] { + parallel::parallel_free(_version_roots); + _roots.clear(); + }); _finalized = false; } // ! Free internal data in parallel -void ContractionTree::freeInternalData() { - if ( _num_hypernodes > 0 ) { +void ContractionTree::freeInternalData() +{ + if(_num_hypernodes > 0) + { parallel::parallel_free(_tree, _roots, _out_degrees, _incidence_array); } _num_hypernodes = 0; _finalized = false; } -void ContractionTree::memoryConsumption(utils::MemoryTreeNode* parent) const { +void ContractionTree::memoryConsumption(utils::MemoryTreeNode *parent) const +{ ASSERT(parent); parent->addChild("Tree Nodes", sizeof(Node) * _tree.size()); @@ -288,65 +337,85 @@ void ContractionTree::memoryConsumption(utils::MemoryTreeNode* parent) const { parent->addChild("Incidence Array", sizeof(HypernodeID) * _incidence_array.size()); } - using ContractionInterval = typename ContractionTree::Interval; using ChildIterator = typename ContractionTree::ChildIterator; -struct PQBatchUncontractionElement { +struct PQBatchUncontractionElement +{ int64_t _objective; std::pair _iterator; }; -struct PQElementComparator { - bool operator()(const PQBatchUncontractionElement& lhs, const PQBatchUncontractionElement& rhs){ - return lhs._objective < rhs._objective; +struct PQElementComparator +{ + bool operator()(const PQBatchUncontractionElement &lhs, + const PQBatchUncontractionElement &rhs) + { + return lhs._objective < rhs._objective; } }; bool ContractionTree::verifyBatchIndexAssignments( - const BatchIndexAssigner& batch_assigner, - const parallel::scalable_vector>& local_batch_assignments) const { + const BatchIndexAssigner &batch_assigner, + const parallel::scalable_vector > + &local_batch_assignments) const +{ parallel::scalable_vector assignments; - for ( size_t i = 0; i < local_batch_assignments.size(); ++i ) { - for ( const BatchAssignment& batch_assign : local_batch_assignments[i] ) { + for(size_t i = 0; i < local_batch_assignments.size(); ++i) + { + for(const BatchAssignment &batch_assign : local_batch_assignments[i]) + { assignments.push_back(batch_assign); } } std::sort(assignments.begin(), assignments.end(), - [&](const BatchAssignment& lhs, const BatchAssignment& rhs) { - return lhs.batch_index < rhs.batch_index || - (lhs.batch_index == rhs.batch_index && lhs.batch_pos < rhs.batch_pos); - }); - - if ( assignments.size() > 0 ) { - if ( assignments[0].batch_index != 0 || assignments[0].batch_pos != 0 ) { + [&](const BatchAssignment &lhs, const BatchAssignment &rhs) { + return lhs.batch_index < rhs.batch_index || + (lhs.batch_index == rhs.batch_index && + lhs.batch_pos < rhs.batch_pos); + }); + + if(assignments.size() > 0) + { + if(assignments[0].batch_index != 0 || assignments[0].batch_pos != 0) + { LOG << "First uncontraction should start at batch 0 at position 0" << V(assignments[0].batch_index) << V(assignments[0].batch_pos); return false; } - for ( size_t i = 1; i < assignments.size(); ++i ) { - if ( assignments[i - 1].batch_index == assignments[i].batch_index ) { - if ( assignments[i - 1].batch_pos + 1 != assignments[i].batch_pos ) { - LOG << "Batch positions are not consecutive" - << V(i) << V(assignments[i - 1].batch_pos) << V(assignments[i].batch_pos); + for(size_t i = 1; i < assignments.size(); ++i) + { + if(assignments[i - 1].batch_index == assignments[i].batch_index) + { + if(assignments[i - 1].batch_pos + 1 != assignments[i].batch_pos) + { + LOG << "Batch positions are not consecutive" << V(i) + << V(assignments[i - 1].batch_pos) << V(assignments[i].batch_pos); return false; } - } else { - if ( assignments[i - 1].batch_index + 1 != assignments[i].batch_index ) { - LOG << "Batch indices are not consecutive" - << V(i) << V(assignments[i - 1].batch_index) << V(assignments[i].batch_index); + } + else + { + if(assignments[i - 1].batch_index + 1 != assignments[i].batch_index) + { + LOG << "Batch indices are not consecutive" << V(i) + << V(assignments[i - 1].batch_index) << V(assignments[i].batch_index); return false; } - if ( assignments[i].batch_pos != 0 ) { + if(assignments[i].batch_pos != 0) + { LOG << "First uncontraction of each batch should start at position 0" << V(assignments[i].batch_pos); return false; } - if ( assignments[i - 1].batch_pos + 1 != batch_assigner.batchSize(assignments[i - 1].batch_index) ) { - LOG << "Position of last uncontraction in batch" << assignments[i - 1].batch_index - << "does not match size of batch" - << V(assignments[i - 1].batch_pos) << V(batch_assigner.batchSize(assignments[i - 1].batch_index)); + if(assignments[i - 1].batch_pos + 1 != + batch_assigner.batchSize(assignments[i - 1].batch_index)) + { + LOG << "Position of last uncontraction in batch" + << assignments[i - 1].batch_index << "does not match size of batch" + << V(assignments[i - 1].batch_pos) + << V(batch_assigner.batchSize(assignments[i - 1].batch_index)); return false; } } @@ -356,32 +425,37 @@ bool ContractionTree::verifyBatchIndexAssignments( return true; } -BatchVector ContractionTree::createBatchUncontractionHierarchyForVersion(BatchIndexAssigner& batch_assigner, - const size_t version) { +BatchVector ContractionTree::createBatchUncontractionHierarchyForVersion( + BatchIndexAssigner &batch_assigner, const size_t version) +{ using PQ = std::priority_queue, PQElementComparator>; // Checks if two contraction intervals intersect - auto does_interval_intersect = [&](const ContractionInterval& i1, const ContractionInterval& i2) { - if (i1.start == kInvalidHypernode || i2.start == kInvalidHypernode) { + auto does_interval_intersect = [&](const ContractionInterval &i1, + const ContractionInterval &i2) { + if(i1.start == kInvalidHypernode || i2.start == kInvalidHypernode) + { return false; } return (i1.start <= i2.end && i1.end >= i2.end) || - (i2.start <= i1.end && i2.end >= i1.end); + (i2.start <= i1.end && i2.end >= i1.end); }; - auto push_into_pq = [&](PQ& prio_q, const HypernodeID& u) { + auto push_into_pq = [&](PQ &prio_q, const HypernodeID &u) { auto it = childs(u); auto current = it.begin(); auto end = it.end(); - while ( current != end && this->version(*current) != version ) { + while(current != end && this->version(*current) != version) + { ++current; } - if ( current != end ) { - prio_q.push(PQBatchUncontractionElement { - subtreeSize(*current), std::make_pair(current, end) } ); + if(current != end) + { + prio_q.push(PQBatchUncontractionElement{ subtreeSize(*current), + std::make_pair(current, end) }); } }; @@ -389,22 +463,24 @@ BatchVector ContractionTree::createBatchUncontractionHierarchyForVersion(BatchIn // each thread. const size_t num_hardware_threads = std::thread::hardware_concurrency(); parallel::scalable_vector local_pqs(num_hardware_threads); - const parallel::scalable_vector& roots = roots_of_version(version); + const parallel::scalable_vector &roots = roots_of_version(version); tbb::parallel_for(UL(0), roots.size(), [&](const size_t i) { const int cpu_id = THREAD_ID; push_into_pq(local_pqs[cpu_id], roots[i]); }); using LocalBatchAssignments = parallel::scalable_vector; - parallel::scalable_vector local_batch_assignments(num_hardware_threads); + parallel::scalable_vector local_batch_assignments( + num_hardware_threads); parallel::scalable_vector local_batch_indices(num_hardware_threads, 0); tbb::parallel_for(UL(0), num_hardware_threads, [&](const size_t i) { - size_t& current_batch_index = local_batch_indices[i]; - LocalBatchAssignments& batch_assignments = local_batch_assignments[i]; - PQ& pq = local_pqs[i]; + size_t ¤t_batch_index = local_batch_indices[i]; + LocalBatchAssignments &batch_assignments = local_batch_assignments[i]; + PQ &pq = local_pqs[i]; PQ next_pq; - while ( !pq.empty() ) { + while(!pq.empty()) + { // Iterator over the childs of a active vertex auto it = pq.top()._iterator; ASSERT(it.first != it.second); @@ -415,7 +491,7 @@ BatchVector ContractionTree::createBatchUncontractionHierarchyForVersion(BatchIn const size_t start_idx = batch_assignments.size(); size_t num_uncontractions = 1; const HypernodeID u = parent(v); - batch_assignments.push_back(BatchAssignment { u, v, UL(0), UL(0) }); + batch_assignments.push_back(BatchAssignment{ u, v, UL(0), UL(0) }); // Push contraction partner into pq for the next BFS level push_into_pq(next_pq, v); @@ -423,17 +499,21 @@ BatchVector ContractionTree::createBatchUncontractionHierarchyForVersion(BatchIn // (u,v) into the current batch ++it.first; ContractionInterval current_ival = interval(v); - while ( it.first != it.second && this->version(*it.first) == version ) { + while(it.first != it.second && this->version(*it.first) == version) + { const HypernodeID w = *it.first; const ContractionInterval w_ival = interval(w); - if ( does_interval_intersect(current_ival, w_ival) ) { + if(does_interval_intersect(current_ival, w_ival)) + { ASSERT(parent(w) == u); ++num_uncontractions; - batch_assignments.push_back(BatchAssignment { u, w, UL(0), UL(0) }); + batch_assignments.push_back(BatchAssignment{ u, w, UL(0), UL(0) }); current_ival.start = std::min(current_ival.start, w_ival.start); current_ival.end = std::max(current_ival.end, w_ival.end); push_into_pq(next_pq, w); - } else { + } + else + { break; } ++it.first; @@ -441,27 +521,31 @@ BatchVector ContractionTree::createBatchUncontractionHierarchyForVersion(BatchIn // If there are still childs left of u, we push the iterator again into the // priority queue of the current BFS level. - if ( it.first != it.second && this->version(*it.first) == version ) { - pq.push(PQBatchUncontractionElement { subtreeSize(*it.first), it }); + if(it.first != it.second && this->version(*it.first) == version) + { + pq.push(PQBatchUncontractionElement{ subtreeSize(*it.first), it }); } // Request batch index and its position within that batch - BatchAssignment assignment = batch_assigner.getBatchIndex( - current_batch_index, num_uncontractions); - for ( size_t j = start_idx; j < start_idx + num_uncontractions; ++j ) { + BatchAssignment assignment = + batch_assigner.getBatchIndex(current_batch_index, num_uncontractions); + for(size_t j = start_idx; j < start_idx + num_uncontractions; ++j) + { batch_assignments[j].batch_index = assignment.batch_index; batch_assignments[j].batch_pos = assignment.batch_pos + (j - start_idx); } current_batch_index = assignment.batch_index; - if ( pq.empty() ) { + if(pq.empty()) + { std::swap(pq, next_pq); // Compute minimum batch index to which a thread assigned last. // Afterwards, transmit information to batch assigner to speed up // batch index computation. ++current_batch_index; size_t min_batch_index = current_batch_index; - for ( const size_t& batch_index : local_batch_indices ) { + for(const size_t &batch_index : local_batch_indices) + { min_batch_index = std::min(min_batch_index, batch_index); } batch_assigner.increaseHighWaterMark(min_batch_index); @@ -469,7 +553,8 @@ BatchVector ContractionTree::createBatchUncontractionHierarchyForVersion(BatchIn } }); - ASSERT(verifyBatchIndexAssignments(batch_assigner, local_batch_assignments), "Batch asisignment failed"); + ASSERT(verifyBatchIndexAssignments(batch_assigner, local_batch_assignments), + "Batch asisignment failed"); // In the previous step we have calculated for each uncontraction a batch index and // its position within that batch. We have to write the uncontractions @@ -481,8 +566,9 @@ BatchVector ContractionTree::createBatchUncontractionHierarchyForVersion(BatchIn }); tbb::parallel_for(UL(0), num_hardware_threads, [&](const size_t i) { - LocalBatchAssignments& batch_assignments = local_batch_assignments[i]; - for ( const BatchAssignment& batch_assignment : batch_assignments ) { + LocalBatchAssignments &batch_assignments = local_batch_assignments[i]; + for(const BatchAssignment &batch_assignment : batch_assignments) + { const size_t batch_index = batch_assignment.batch_index; const size_t batch_pos = batch_assignment.batch_pos; ASSERT(batch_index < batches.size()); @@ -492,7 +578,8 @@ BatchVector ContractionTree::createBatchUncontractionHierarchyForVersion(BatchIn } }); - while ( !batches.empty() && batches.back().empty() ) { + while(!batches.empty() && batches.back().empty()) + { batches.pop_back(); } std::reverse(batches.begin(), batches.end()); @@ -500,5 +587,5 @@ BatchVector ContractionTree::createBatchUncontractionHierarchyForVersion(BatchIn return batches; } -} // namespace ds -} // namespace mt_kahypar +} // namespace ds +} // namespace mt_kahypar diff --git a/mt-kahypar/datastructures/contraction_tree.h b/mt-kahypar/datastructures/contraction_tree.h index 61f124395..7236cf604 100644 --- a/mt-kahypar/datastructures/contraction_tree.h +++ b/mt-kahypar/datastructures/contraction_tree.h @@ -29,11 +29,11 @@ #include -#include "mt-kahypar/parallel/stl/scalable_vector.h" -#include "mt-kahypar/parallel/atomic_wrapper.h" #include "mt-kahypar/datastructures/array.h" -#include "mt-kahypar/utils/memory_tree.h" #include "mt-kahypar/datastructures/hypergraph_common.h" +#include "mt-kahypar/parallel/atomic_wrapper.h" +#include "mt-kahypar/parallel/stl/scalable_vector.h" +#include "mt-kahypar/utils/memory_tree.h" #include "mt-kahypar/utils/range.h" namespace mt_kahypar { @@ -41,7 +41,8 @@ namespace ds { // Represents a uncontraction that is assigned to a certain batch // and within that batch to a certain position. -struct BatchAssignment { +struct BatchAssignment +{ HypernodeID u; HypernodeID v; size_t batch_index; @@ -49,76 +50,93 @@ struct BatchAssignment { }; /*! - * Helper class that synchronizes assignements of uncontractions - * to batches. A batch has a certain maximum allowed batch size. The - * class provides functionality to compute such an assignment in a - * thread-safe manner. Several threads can request a batch index - * and a position within that batch for its uncontraction it wants - * to assign. The class guarantees that each combination of - * (batch_index, batch_position) is unique and consecutive. - * Furthermore, it is ensured that batch_position is always - * smaller than max_batch_size. - */ -class BatchIndexAssigner { + * Helper class that synchronizes assignements of uncontractions + * to batches. A batch has a certain maximum allowed batch size. The + * class provides functionality to compute such an assignment in a + * thread-safe manner. Several threads can request a batch index + * and a position within that batch for its uncontraction it wants + * to assign. The class guarantees that each combination of + * (batch_index, batch_position) is unique and consecutive. + * Furthermore, it is ensured that batch_position is always + * smaller than max_batch_size. + */ +class BatchIndexAssigner +{ using AtomicCounter = parallel::IntegralAtomicWrapper; - public: +public: explicit BatchIndexAssigner(const HypernodeID num_hypernodes, const size_t max_batch_size) : - _max_batch_size(max_batch_size), - _high_water_mark(0), - _current_batch_counter(num_hypernodes, AtomicCounter(0)), - _current_batch_sizes(num_hypernodes, AtomicCounter(0)) { } + _max_batch_size(max_batch_size), + _high_water_mark(0), _current_batch_counter(num_hypernodes, AtomicCounter(0)), + _current_batch_sizes(num_hypernodes, AtomicCounter(0)) + { + } BatchAssignment getBatchIndex(const size_t min_required_batch, - const size_t num_uncontractions = 1) { - if ( min_required_batch <= _high_water_mark ) { + const size_t num_uncontractions = 1) + { + if(min_required_batch <= _high_water_mark) + { size_t current_high_water_mark = _high_water_mark.load(); - const BatchAssignment assignment = findBatchAssignment( - current_high_water_mark, num_uncontractions); + const BatchAssignment assignment = + findBatchAssignment(current_high_water_mark, num_uncontractions); // Update high water mark in case batch index is greater than // current high water mark size_t current_batch_index = assignment.batch_index; increaseHighWaterMark(current_batch_index); return assignment; - } else { + } + else + { return findBatchAssignment(min_required_batch, num_uncontractions); } } - size_t batchSize(const size_t batch_index) const { + size_t batchSize(const size_t batch_index) const + { ASSERT(batch_index < _current_batch_sizes.size()); return _current_batch_sizes[batch_index]; } - void increaseHighWaterMark(size_t new_high_water_mark) { + void increaseHighWaterMark(size_t new_high_water_mark) + { size_t current_high_water_mark = _high_water_mark.load(); - while ( new_high_water_mark > current_high_water_mark ) { - _high_water_mark.compare_exchange_strong( - current_high_water_mark, new_high_water_mark); + while(new_high_water_mark > current_high_water_mark) + { + _high_water_mark.compare_exchange_strong(current_high_water_mark, + new_high_water_mark); } } - size_t numberOfNonEmptyBatches() { + size_t numberOfNonEmptyBatches() + { size_t current_batch = _high_water_mark; - if ( _current_batch_sizes[_high_water_mark] == 0 ) { - while ( current_batch > 0 && _current_batch_sizes[current_batch] == 0 ) { + if(_current_batch_sizes[_high_water_mark] == 0) + { + while(current_batch > 0 && _current_batch_sizes[current_batch] == 0) + { --current_batch; } - if ( _current_batch_sizes[current_batch] > 0 ) { + if(_current_batch_sizes[current_batch] > 0) + { ++current_batch; } - } else { - while ( _current_batch_sizes[current_batch] > 0 ) { + } + else + { + while(_current_batch_sizes[current_batch] > 0) + { ++current_batch; } } return current_batch; } - void reset(const size_t num_batches) { + void reset(const size_t num_batches) + { ASSERT(num_batches <= _current_batch_sizes.size()); _high_water_mark = 0; tbb::parallel_for(UL(0), num_batches, [&](const size_t i) { @@ -127,24 +145,26 @@ class BatchIndexAssigner { }); } - private: +private: BatchAssignment findBatchAssignment(const size_t start_batch_index, - const size_t num_uncontractions) { + const size_t num_uncontractions) + { size_t current_batch_index = start_batch_index; size_t batch_pos = _current_batch_counter[current_batch_index].fetch_add( - num_uncontractions, std::memory_order_relaxed); + num_uncontractions, std::memory_order_relaxed); // Search for batch in which atomic update of the batch counter // return a position smaller than max_batch_size. - while ( batch_pos >= _max_batch_size ) { + while(batch_pos >= _max_batch_size) + { ++current_batch_index; ASSERT(current_batch_index < _current_batch_counter.size()); batch_pos = _current_batch_counter[current_batch_index].fetch_add( - num_uncontractions, std::memory_order_relaxed); + num_uncontractions, std::memory_order_relaxed); } ASSERT(batch_pos < _max_batch_size); _current_batch_sizes[current_batch_index] += num_uncontractions; - return BatchAssignment { kInvalidHypernode, - kInvalidHypernode, current_batch_index, batch_pos }; + return BatchAssignment{ kInvalidHypernode, kInvalidHypernode, current_batch_index, + batch_pos }; } const size_t _max_batch_size; @@ -153,7 +173,8 @@ class BatchIndexAssigner { parallel::scalable_vector _current_batch_sizes; }; -class ContractionTree { +class ContractionTree +{ static constexpr bool debug = false; static constexpr bool enable_heavy_assert = false; @@ -161,123 +182,106 @@ class ContractionTree { using Timepoint = HypernodeID; - public: - struct Interval { - explicit Interval() : - start(kInvalidHypernode), - end(kInvalidHypernode) { } +public: + struct Interval + { + explicit Interval() : start(kInvalidHypernode), end(kInvalidHypernode) {} Timepoint start; Timepoint end; }; - private: +private: /** * Represents a node in contraction tree and contains all information * associated with that node. */ - class Node { - public: - Node() : - _parent(0), - _pending_contractions(0), - _subtree_size(0), - _version(kInvalidVersion), - _interval() { } - - inline HypernodeID parent() const { - return _parent; - } + class Node + { + public: + Node() : + _parent(0), _pending_contractions(0), _subtree_size(0), _version(kInvalidVersion), + _interval() + { + } - inline void setParent(const HypernodeID parent) { - _parent = parent; - } + inline HypernodeID parent() const { return _parent; } - inline HypernodeID pendingContractions() const { - return _pending_contractions; - } + inline void setParent(const HypernodeID parent) { _parent = parent; } - inline void incrementPendingContractions() { - ++_pending_contractions; - } + inline HypernodeID pendingContractions() const { return _pending_contractions; } - inline void decrementPendingContractions() { - --_pending_contractions; - } + inline void incrementPendingContractions() { ++_pending_contractions; } - inline HypernodeID subtreeSize() const { - return _subtree_size; - } + inline void decrementPendingContractions() { --_pending_contractions; } - inline void setSubtreeSize(const HypernodeID subtree_size) { - _subtree_size = subtree_size; - } + inline HypernodeID subtreeSize() const { return _subtree_size; } - inline size_t version() const { - return _version; - } + inline void setSubtreeSize(const HypernodeID subtree_size) + { + _subtree_size = subtree_size; + } - inline void setVersion(const size_t version) { - _version = version; - } + inline size_t version() const { return _version; } - inline Interval interval() const { - return _interval; - } + inline void setVersion(const size_t version) { _version = version; } - inline void setInterval(const Timepoint start, const Timepoint end) { - ASSERT(start < end); - _interval.start = start; - _interval.end = end; - } + inline Interval interval() const { return _interval; } - inline void reset(const HypernodeID u) { - _parent = u; - _pending_contractions = 0; - _subtree_size = 0; - _version = kInvalidVersion; - _interval.start = kInvalidHypernode; - _interval.end = kInvalidHypernode; - } + inline void setInterval(const Timepoint start, const Timepoint end) + { + ASSERT(start < end); + _interval.start = start; + _interval.end = end; + } - private: - // ! Parent in the contraction tree - HypernodeID _parent; - // ! Number of pending contractions - HypernodeID _pending_contractions; - // ! Size of the subtree - HypernodeID _subtree_size; - // ! Version number of the hypergraph for which contract the corresponding vertex - size_t _version; - // ! "Time" interval on which the contraction of this node takes place - Interval _interval; + inline void reset(const HypernodeID u) + { + _parent = u; + _pending_contractions = 0; + _subtree_size = 0; + _version = kInvalidVersion; + _interval.start = kInvalidHypernode; + _interval.end = kInvalidHypernode; + } + + private: + // ! Parent in the contraction tree + HypernodeID _parent; + // ! Number of pending contractions + HypernodeID _pending_contractions; + // ! Size of the subtree + HypernodeID _subtree_size; + // ! Version number of the hypergraph for which contract the corresponding vertex + size_t _version; + // ! "Time" interval on which the contraction of this node takes place + Interval _interval; }; - static_assert(std::is_trivially_copyable::value, "Node is not trivially copyable"); + static_assert(std::is_trivially_copyable::value, + "Node is not trivially copyable"); - public: +public: // ! Iterator to iterate over the childs of a tree node using ChildIterator = typename parallel::scalable_vector::const_iterator; explicit ContractionTree() : - _num_hypernodes(0), - _finalized(false), - _tree(), - _roots(), - _version_roots(), - _out_degrees(), - _incidence_array() { } - - ContractionTree(ContractionTree&& other) : - _num_hypernodes(other._num_hypernodes), - _finalized(other._finalized), - _tree(std::move(other._tree)), - _roots(std::move(other._roots)), - _version_roots(std::move(other._version_roots)), - _out_degrees(std::move(other._out_degrees)), - _incidence_array(std::move(other._incidence_array)) { } - - ContractionTree& operator= (ContractionTree&& other) { + _num_hypernodes(0), _finalized(false), _tree(), _roots(), _version_roots(), + _out_degrees(), _incidence_array() + { + } + + ContractionTree(ContractionTree &&other) : + _num_hypernodes(other._num_hypernodes), _finalized(other._finalized), + _tree(std::move(other._tree)), _roots(std::move(other._roots)), + _version_roots(std::move(other._version_roots)), + _out_degrees(std::move(other._out_degrees)), + _incidence_array(std::move(other._incidence_array)) + { + } + + ContractionTree &operator=(ContractionTree &&other) + { _num_hypernodes = other._num_hypernodes; _finalized = other._finalized; _tree = std::move(other._tree); @@ -288,55 +292,57 @@ class ContractionTree { return *this; } - ~ContractionTree() { - freeInternalData(); - } + ~ContractionTree() { freeInternalData(); } // ####################### Tree Node Information ####################### - HypernodeID num_hypernodes() const { - return _num_hypernodes; - } + HypernodeID num_hypernodes() const { return _num_hypernodes; } // ! Returns the parent of node u - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE HypernodeID parent(const HypernodeID u) const { + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE HypernodeID parent(const HypernodeID u) const + { return node(u).parent(); } // ! Number of pending contractions of node u - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE HypernodeID pendingContractions(const HypernodeID u) const { + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE HypernodeID + pendingContractions(const HypernodeID u) const + { return node(u).pendingContractions(); } // ! Subtree size of node u - HypernodeID subtreeSize(const HypernodeID u) const { + HypernodeID subtreeSize(const HypernodeID u) const + { ASSERT(_finalized, "Information currently not available"); return node(u).subtreeSize(); } - - size_t version(const HypernodeID u) const { + size_t version(const HypernodeID u) const + { ASSERT(u < _num_hypernodes, "Hypernode" << u << "does not exist"); return _tree[u].version(); } // ! Degree/Number of childs of node u - HypernodeID degree(const HypernodeID u) const { + HypernodeID degree(const HypernodeID u) const + { ASSERT(_finalized, "Information currently not available"); ASSERT(u < _num_hypernodes, "Hypernode" << u << "does not exist"); return _out_degrees[u + 1] - _out_degrees[u]; } - const parallel::scalable_vector& roots() const { - return _roots; - } + const parallel::scalable_vector &roots() const { return _roots; } - const parallel::scalable_vector& roots_of_version(const size_t version) const { + const parallel::scalable_vector & + roots_of_version(const size_t version) const + { ASSERT(version < _version_roots.size()); return _version_roots[version]; } - Interval interval(const HypernodeID u) const { + Interval interval(const HypernodeID u) const + { ASSERT(u < _num_hypernodes, "Hypernode" << u << "does not exist"); return node(u).interval(); } @@ -344,21 +350,25 @@ class ContractionTree { // ####################### Iterators ####################### // ! Returns a range to loop over the childs of a tree node u. - IteratorRange childs(const HypernodeID u) const { + IteratorRange childs(const HypernodeID u) const + { ASSERT(_finalized, "Information currently not available"); ASSERT(u < _num_hypernodes, "Hypernode" << u << "does not exist"); - return IteratorRange( - _incidence_array.cbegin() + _out_degrees[u], - _incidence_array.cbegin() + _out_degrees[u + 1]); + return IteratorRange(_incidence_array.cbegin() + _out_degrees[u], + _incidence_array.cbegin() + _out_degrees[u + 1]); } // ! Calls function f for each child of vertex u with the corresponding version - template - void doForEachChildOfVersion(const HypernodeID u, const size_t version, const F& f) const { + template + void doForEachChildOfVersion(const HypernodeID u, const size_t version, + const F &f) const + { ASSERT(_finalized, "Information currently not available"); ASSERT(u < _num_hypernodes, "Hypernode" << u << "does not exist"); - for ( const HypernodeID& v : childs(u) ) { - if ( _tree[v].version() == version ) { + for(const HypernodeID &v : childs(u)) + { + if(_tree[v].version() == version) + { f(v); } } @@ -367,74 +377,93 @@ class ContractionTree { // ####################### Contraction Functions ####################### // ! Registers a contraction in the contraction tree - void registerContraction(const HypernodeID u, const HypernodeID v, const size_t version = 0) { + void registerContraction(const HypernodeID u, const HypernodeID v, + const size_t version = 0) + { node(u).incrementPendingContractions(); node(v).setParent(u); node(v).setVersion(version); } - template - bool registerContraction(const HypernodeID u, const HypernodeID v, const size_t version, A acquire, R release) { + template + bool registerContraction(const HypernodeID u, const HypernodeID v, const size_t version, + A acquire, R release) + { // Acquires ownership of vertex v that gives the calling thread exclusive rights // to modify the contraction tree entry of v acquire(v); // If there is no other contraction registered for vertex v // we try to determine its representative in the contraction tree - if ( parent(v) == v ) { + if(parent(v) == v) + { HypernodeID w = u; bool cycle_detected = false; - while ( true ) { + while(true) + { // Search for representative of u in the contraction tree. // It is either a root of the contraction tree or a vertex // with a reference count greater than zero, which indicates // that there are still ongoing contractions on this node that // have to be processed. - while ( parent(w) != w && pendingContractions(w) == 0 ) { + while(parent(w) != w && pendingContractions(w) == 0) + { w = parent(w); - if ( w == v ) { + if(w == v) + { cycle_detected = true; break; } } - if ( !cycle_detected ) { + if(!cycle_detected) + { // In case contraction of u and v does not induce any // cycle in the contraction tree we try to acquire vertex w - if ( w < v ) { + if(w < v) + { // Acquire ownership in correct order to prevent deadlocks release(v); acquire(w); acquire(v); - if ( parent(v) != v ) { + if(parent(v) != v) + { release(v); release(w); return false; } - } else { + } + else + { acquire(w); } // Double-check condition of while loop above after acquiring // ownership of w - if ( parent(w) != w && pendingContractions(w) == 0 ) { + if(parent(w) != w && pendingContractions(w) == 0) + { // In case something changed, we release ownership of w and // search again for the representative of u. release(w); - } else { + } + else + { // Otherwise we perform final cycle check to verify that // contraction of u and v will not introduce any new cycle. HypernodeID x = w; - do { + do + { x = parent(x); - if ( x == v ) { + if(x == v) + { cycle_detected = true; break; } - } while ( parent(x) != x ); + } while(parent(x) != x); - if ( cycle_detected ) { + if(cycle_detected) + { release(w); release(v); return false; @@ -444,7 +473,9 @@ class ContractionTree { // reference count of w and update the contraction tree break; } - } else { + } + else + { release(v); return false; } @@ -457,7 +488,9 @@ class ContractionTree { release(w); release(v); return true; - } else { + } + else + { release(v); return false; } @@ -466,30 +499,37 @@ class ContractionTree { // ! Unregisters a contraction in the contraction tree void unregisterContraction(const HypernodeID u, const HypernodeID v, const Timepoint start, const Timepoint end, - const bool failed = false) { + const bool failed = false) + { ASSERT(node(v).parent() == u, "Node" << u << "is not parent of node" << v); - ASSERT(node(u).pendingContractions() > 0, "There are no pending contractions for node" << u); + ASSERT(node(u).pendingContractions() > 0, + "There are no pending contractions for node" << u); node(u).decrementPendingContractions(); - if ( failed ) { + if(failed) + { node(v).setParent(v); node(v).setVersion(kInvalidVersion); - } else { + } + else + { node(v).setInterval(start, end); } } - BatchVector createBatchUncontractionHierarchyForVersion(BatchIndexAssigner& batch_assigner, - const size_t version); + BatchVector + createBatchUncontractionHierarchyForVersion(BatchIndexAssigner &batch_assigner, + const size_t version); // ! Only for testing - void setParent(const HypernodeID u, const HypernodeID v, const size_t version = 0) { + void setParent(const HypernodeID u, const HypernodeID v, const size_t version = 0) + { node(u).setParent(v); node(u).setVersion(version); } - // ! Only for testing - void decrementPendingContractions(const HypernodeID u) { + void decrementPendingContractions(const HypernodeID u) + { node(u).decrementPendingContractions(); } @@ -517,32 +557,35 @@ class ContractionTree { // ! Free internal data in parallel void freeInternalData(); - void memoryConsumption(utils::MemoryTreeNode* parent) const; + void memoryConsumption(utils::MemoryTreeNode *parent) const; - private: +private: // ! Accessor for contraction tree-related information - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE const Node& node(const HypernodeID u) const { + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE const Node &node(const HypernodeID u) const + { ASSERT(u < _num_hypernodes, "Hypernode" << u << "does not exist"); return _tree[u]; } // ! To avoid code duplication we implement non-const version in terms of const version - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE Node& node(const HypernodeID u) { - return const_cast(static_cast(*this).node(u)); + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE Node &node(const HypernodeID u) + { + return const_cast(static_cast(*this).node(u)); } bool verifyBatchIndexAssignments( - const BatchIndexAssigner& batch_assigner, - const parallel::scalable_vector>& local_batch_assignments) const; + const BatchIndexAssigner &batch_assigner, + const parallel::scalable_vector > + &local_batch_assignments) const; HypernodeID _num_hypernodes; bool _finalized; parallel::scalable_vector _tree; parallel::scalable_vector _roots; - parallel::scalable_vector> _version_roots; - parallel::scalable_vector> _out_degrees; + parallel::scalable_vector > _version_roots; + parallel::scalable_vector > _out_degrees; parallel::scalable_vector _incidence_array; }; -} // namespace ds -} // namespace mt_kahypar +} // namespace ds +} // namespace mt_kahypar diff --git a/mt-kahypar/datastructures/delta_connectivity_set.h b/mt-kahypar/datastructures/delta_connectivity_set.h index 27f2077c9..7ede70db0 100644 --- a/mt-kahypar/datastructures/delta_connectivity_set.h +++ b/mt-kahypar/datastructures/delta_connectivity_set.h @@ -28,114 +28,123 @@ #pragma once #include -#include -#include #include +#include +#include #include "tbb/enumerable_thread_specific.h" -#include "mt-kahypar/parallel/stl/scalable_vector.h" #include "mt-kahypar/datastructures/array.h" -#include "mt-kahypar/datastructures/static_bitset.h" #include "mt-kahypar/datastructures/connectivity_set.h" #include "mt-kahypar/datastructures/hypergraph_common.h" #include "mt-kahypar/datastructures/sparse_map.h" -#include "mt-kahypar/utils/range.h" +#include "mt-kahypar/datastructures/static_bitset.h" #include "mt-kahypar/macros.h" +#include "mt-kahypar/parallel/stl/scalable_vector.h" +#include "mt-kahypar/utils/range.h" namespace mt_kahypar { namespace ds { /** * Data structure maintains the connectivity set relative to an shared connectivity set in - * the global partition. It is used in the thread-local partition data structure to apply moves - * that are not visible to other threads. The shared and thread-local connectivity set store - * the connectivity set of a hyperedge as a bitset of size k. If a move adds or removes a block - * from the connectivity set of hyperedge, we set a bit representing the block to one. We then compute - * the thread-local connectivity set of hyperedge with a xor operation between the bitset in shared - * and thread-local partition. + * the global partition. It is used in the thread-local partition data structure to apply + * moves that are not visible to other threads. The shared and thread-local connectivity + * set store the connectivity set of a hyperedge as a bitset of size k. If a move adds or + * removes a block from the connectivity set of hyperedge, we set a bit representing the + * block to one. We then compute the thread-local connectivity set of hyperedge with a xor + * operation between the bitset in shared and thread-local partition. */ -template -class DeltaConnectivitySet { +template +class DeltaConnectivitySet +{ - public: +public: static constexpr bool debug = false; static constexpr int BITS_PER_BLOCK = StaticBitset::BITS_PER_BLOCK; using UnsafeBlock = StaticBitset::Block; - private: +private: // ! Iterator enumerates the position of all one bits in a bitset - class OneBitIterator { - public: + class OneBitIterator + { + public: using iterator_category = std::forward_iterator_tag; using value_type = PartitionID; - using reference = PartitionID&; - using pointer = PartitionID*; + using reference = PartitionID &; + using pointer = PartitionID *; using difference_type = std::ptrdiff_t; - OneBitIterator(const size_t num_blocks, - const UnsafeBlock* shared_bitset, - const UnsafeBlock* thread_local_bitset, + OneBitIterator(const size_t num_blocks, const UnsafeBlock *shared_bitset, + const UnsafeBlock *thread_local_bitset, const PartitionID start_block) : - _num_blocks(num_blocks), - _shared_bitset(shared_bitset), - _thread_local_bitset(thread_local_bitset), - _max_block_id(num_blocks * BITS_PER_BLOCK), - _current_block_id(start_block) { - if ( _current_block_id < _max_block_id ) { + _num_blocks(num_blocks), + _shared_bitset(shared_bitset), _thread_local_bitset(thread_local_bitset), + _max_block_id(num_blocks * BITS_PER_BLOCK), _current_block_id(start_block) + { + if(_current_block_id < _max_block_id) + { nextBlockID(); } } - PartitionID operator*() const { - return _current_block_id; - } + PartitionID operator*() const { return _current_block_id; } - OneBitIterator& operator++() { + OneBitIterator &operator++() + { nextBlockID(); return *this; } - OneBitIterator operator++(int ) { + OneBitIterator operator++(int) + { const OneBitIterator res = *this; nextBlockID(); return res; } - bool operator==(const OneBitIterator& o) const { + bool operator==(const OneBitIterator &o) const + { return _current_block_id == o._current_block_id; } - bool operator!=(const OneBitIterator& o) const { - return !operator==(o); - } + bool operator!=(const OneBitIterator &o) const { return !operator==(o); } - private: - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE void nextBlockID() { + private: + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE void nextBlockID() + { ++_current_block_id; UnsafeBlock b = _current_block_id < _max_block_id ? loadCurrentBlock() : 0; - while ( b >> ( _current_block_id % BITS_PER_BLOCK ) == 0 && _current_block_id < _max_block_id ) { + while(b >> (_current_block_id % BITS_PER_BLOCK) == 0 && + _current_block_id < _max_block_id) + { // no more one bits in current block -> load next block _current_block_id += (BITS_PER_BLOCK - (_current_block_id % BITS_PER_BLOCK)); b = _current_block_id < _max_block_id ? loadCurrentBlock() : 0; } - if ( _current_block_id < _max_block_id ) { - _current_block_id += utils::lowest_set_bit_64(b >> ( _current_block_id % BITS_PER_BLOCK )); - } else { + if(_current_block_id < _max_block_id) + { + _current_block_id += + utils::lowest_set_bit_64(b >> (_current_block_id % BITS_PER_BLOCK)); + } + else + { _current_block_id = _max_block_id; } } - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE UnsafeBlock loadCurrentBlock() { + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE UnsafeBlock loadCurrentBlock() + { ASSERT(static_cast(_current_block_id / BITS_PER_BLOCK) < _num_blocks); const size_t block_idx = _current_block_id / BITS_PER_BLOCK; - return __atomic_load_n(_shared_bitset + block_idx, __ATOMIC_RELAXED) ^ *( _thread_local_bitset + block_idx ); + return __atomic_load_n(_shared_bitset + block_idx, __ATOMIC_RELAXED) ^ + *(_thread_local_bitset + block_idx); } const size_t _num_blocks; - const UnsafeBlock* _shared_bitset; - const UnsafeBlock* _thread_local_bitset; + const UnsafeBlock *_shared_bitset; + const UnsafeBlock *_thread_local_bitset; const PartitionID _max_block_id; PartitionID _current_block_id; }; @@ -144,31 +153,28 @@ class DeltaConnectivitySet { using Iterator = OneBitIterator; DeltaConnectivitySet() : - _connectivity_set(nullptr), - _k(0), - _num_blocks_per_hyperedge(0), - _touched_hes(), - _delta_connectivity_set(), - _empty_connectivity_set(), - _deep_copy_bitset() { } + _connectivity_set(nullptr), _k(0), _num_blocks_per_hyperedge(0), _touched_hes(), + _delta_connectivity_set(), _empty_connectivity_set(), _deep_copy_bitset() + { + } DeltaConnectivitySet(const PartitionID k) : - _connectivity_set(nullptr), - _k(k), - _num_blocks_per_hyperedge(k / BITS_PER_BLOCK + (k % BITS_PER_BLOCK != 0)), - _touched_hes(), - _delta_connectivity_set(), - _empty_connectivity_set(), - _deep_copy_bitset() { + _connectivity_set(nullptr), _k(k), + _num_blocks_per_hyperedge(k / BITS_PER_BLOCK + (k % BITS_PER_BLOCK != 0)), + _touched_hes(), _delta_connectivity_set(), _empty_connectivity_set(), + _deep_copy_bitset() + { _empty_connectivity_set.assign(_num_blocks_per_hyperedge, 0); } - void setConnectivitySet(const ConnectivitySet* connectivity_set) { + void setConnectivitySet(const ConnectivitySet *connectivity_set) + { ASSERT(connectivity_set); _connectivity_set = connectivity_set; } - void setNumberOfBlocks(const PartitionID k) { + void setNumberOfBlocks(const PartitionID k) + { _k = k; _num_blocks_per_hyperedge = k / BITS_PER_BLOCK + (k % BITS_PER_BLOCK != 0); _empty_connectivity_set.clear(); @@ -176,100 +182,121 @@ class DeltaConnectivitySet { } // ! Returns an iterator over the connectivity set of the corresponding hyperedge - IteratorRange connectivitySet(const HyperedgeID he) const { + IteratorRange connectivitySet(const HyperedgeID he) const + { ASSERT(_connectivity_set); - const size_t* entry = _touched_hes.get_if_contained(he); - const UnsafeBlock* shared_connectivity_set = _connectivity_set->shallowCopy(he).data(); - const UnsafeBlock* thread_local_connectivity_set = entry ? - &_delta_connectivity_set[*entry] : _empty_connectivity_set.data(); + const size_t *entry = _touched_hes.get_if_contained(he); + const UnsafeBlock *shared_connectivity_set = + _connectivity_set->shallowCopy(he).data(); + const UnsafeBlock *thread_local_connectivity_set = + entry ? &_delta_connectivity_set[*entry] : _empty_connectivity_set.data(); return IteratorRange( - Iterator(_num_blocks_per_hyperedge, shared_connectivity_set, thread_local_connectivity_set, -1), - Iterator(_num_blocks_per_hyperedge, shared_connectivity_set, thread_local_connectivity_set, - _num_blocks_per_hyperedge * BITS_PER_BLOCK)); + Iterator(_num_blocks_per_hyperedge, shared_connectivity_set, + thread_local_connectivity_set, -1), + Iterator(_num_blocks_per_hyperedge, shared_connectivity_set, + thread_local_connectivity_set, + _num_blocks_per_hyperedge * BITS_PER_BLOCK)); } // ! Adds the block to the connectivity set of the hyperedge - void add(const HyperedgeID he, const PartitionID p) { + void add(const HyperedgeID he, const PartitionID p) + { ASSERT(p != kInvalidPartition && p < _k); toggle(he, p); } // ! Removes the block from the connectivity set of the hyperedge - void remove(const HyperedgeID he, const PartitionID p) { + void remove(const HyperedgeID he, const PartitionID p) + { ASSERT(p != kInvalidPartition && p < _k); toggle(he, p); } // ! Returns true, if the block is contained in the connectivity set of the hyperedge - bool contains(const HyperedgeID he, const PartitionID p) const { + bool contains(const HyperedgeID he, const PartitionID p) const + { ASSERT(_connectivity_set); ASSERT(p != kInvalidPartition && p < _k); return _connectivity_set->contains(he, p) ^ isSet(he, p); } // ! Clears all touched entries of the thread-local connectivity set - void reset() { + void reset() + { _touched_hes.clear(); _delta_connectivity_set.clear(); } // ! Returns the number of blocks contained in the hyperedge - PartitionID connectivity(const HyperedgeID he) const { + PartitionID connectivity(const HyperedgeID he) const + { ASSERT(_connectivity_set); - ds::StaticBitset& connectivity_set = _connectivity_set->shallowCopy(he); - const size_t* entry = _touched_hes.get_if_contained(he); - if ( entry ) { + ds::StaticBitset &connectivity_set = _connectivity_set->shallowCopy(he); + const size_t *entry = _touched_hes.get_if_contained(he); + if(entry) + { PartitionID connectivity = 0; - const UnsafeBlock* original_data = connectivity_set.data(); - const UnsafeBlock* delta_data = &_delta_connectivity_set[*entry]; - for ( size_t i = 0; i < _num_blocks_per_hyperedge; ++i ) { - connectivity += utils::popcount_64( *(original_data + i) ^ *(delta_data + i) ); + const UnsafeBlock *original_data = connectivity_set.data(); + const UnsafeBlock *delta_data = &_delta_connectivity_set[*entry]; + for(size_t i = 0; i < _num_blocks_per_hyperedge; ++i) + { + connectivity += utils::popcount_64(*(original_data + i) ^ *(delta_data + i)); } return connectivity; - } else { + } + else + { return connectivity_set.popcount(); } } - Bitset& deepCopy(const HyperedgeID he) const { + Bitset &deepCopy(const HyperedgeID he) const + { ASSERT(_connectivity_set); - StaticBitset& shared_con_set = _connectivity_set->shallowCopy(he); - const size_t* entry = _touched_hes.get_if_contained(he); - const UnsafeBlock* data = entry ? - &_delta_connectivity_set[*entry] : _empty_connectivity_set.data(); + StaticBitset &shared_con_set = _connectivity_set->shallowCopy(he); + const size_t *entry = _touched_hes.get_if_contained(he); + const UnsafeBlock *data = + entry ? &_delta_connectivity_set[*entry] : _empty_connectivity_set.data(); StaticBitset thread_local_con_set(_num_blocks_per_hyperedge, data); _deep_copy_bitset = shared_con_set ^ thread_local_con_set; return _deep_copy_bitset; } - size_t size_in_bytes() const { - return _touched_hes.size_in_bytes() + _delta_connectivity_set.capacity() * sizeof(UnsafeBlock); + size_t size_in_bytes() const + { + return _touched_hes.size_in_bytes() + + _delta_connectivity_set.capacity() * sizeof(UnsafeBlock); } - void freeInternalData() { + void freeInternalData() + { _touched_hes.freeInternalData(); _delta_connectivity_set.clear(); _delta_connectivity_set.shrink_to_fit(); } private: - void toggle(const HyperedgeID he, const PartitionID p) { - const size_t* entry = _touched_hes.get_if_contained(he); + void toggle(const HyperedgeID he, const PartitionID p) + { + const size_t *entry = _touched_hes.get_if_contained(he); size_t pos = entry ? *entry : _delta_connectivity_set.size(); - if ( !entry ) { + if(!entry) + { _touched_hes[he] = pos; _delta_connectivity_set.resize( - _delta_connectivity_set.size() + _num_blocks_per_hyperedge, 0); + _delta_connectivity_set.size() + _num_blocks_per_hyperedge, 0); } const size_t offset = p / BITS_PER_BLOCK; const size_t idx = p % BITS_PER_BLOCK; _delta_connectivity_set[pos + offset] ^= (UL(1) << idx); - } + } - bool isSet(const HyperedgeID he, const PartitionID p) const { + bool isSet(const HyperedgeID he, const PartitionID p) const + { bool is_set = false; - const size_t* entry = _touched_hes.get_if_contained(he); - if ( entry ) { + const size_t *entry = _touched_hes.get_if_contained(he); + if(entry) + { const size_t offset = p / BITS_PER_BLOCK; const size_t idx = p % BITS_PER_BLOCK; is_set = _delta_connectivity_set[*entry + offset] & (UnsafeBlock(1) << idx); @@ -277,9 +304,9 @@ class DeltaConnectivitySet { return is_set; } - const ConnectivitySet* _connectivity_set; - PartitionID _k; - size_t _num_blocks_per_hyperedge; + const ConnectivitySet *_connectivity_set; + PartitionID _k; + size_t _num_blocks_per_hyperedge; DynamicFlatMap _touched_hes; vec _delta_connectivity_set; @@ -289,7 +316,5 @@ class DeltaConnectivitySet { mutable Bitset _deep_copy_bitset; }; - - -} // namespace ds -} // namespace mt_kahypar +} // namespace ds +} // namespace mt_kahypar diff --git a/mt-kahypar/datastructures/delta_partitioned_graph.h b/mt-kahypar/datastructures/delta_partitioned_graph.h index 43e11c484..591d3deb6 100644 --- a/mt-kahypar/datastructures/delta_partitioned_graph.h +++ b/mt-kahypar/datastructures/delta_partitioned_graph.h @@ -33,10 +33,10 @@ #include "kahypar-resources/meta/mandatory.h" +#include "mt-kahypar/datastructures/connectivity_set.h" +#include "mt-kahypar/datastructures/delta_connectivity_set.h" #include "mt-kahypar/datastructures/hypergraph_common.h" #include "mt-kahypar/datastructures/sparse_map.h" -#include "mt-kahypar/datastructures/delta_connectivity_set.h" -#include "mt-kahypar/datastructures/connectivity_set.h" #include "mt-kahypar/parallel/stl/scalable_vector.h" #include "mt-kahypar/partition/context.h" #include "mt-kahypar/utils/exception.h" @@ -49,10 +49,10 @@ namespace ds { * This is a variant of DeltaPartitionedHypergraph specialized for graphs. * See delte_partitioned_hypergraph.h for more details. */ -template -class DeltaPartitionedGraph { - private: +template +class DeltaPartitionedGraph +{ +private: static constexpr size_t MAP_SIZE_LARGE = 16384; static constexpr size_t MAP_SIZE_MOVE_DELTA = 8192; static constexpr size_t MAP_SIZE_SMALL = 128; @@ -64,40 +64,39 @@ class DeltaPartitionedGraph { using DummyConnectivitySet = DeltaConnectivitySet; using ConnectivitySetIterator = typename DummyConnectivitySet::Iterator; - public: +public: static constexpr bool supports_connectivity_set = false; - static constexpr HyperedgeID HIGH_DEGREE_THRESHOLD = PartitionedGraph::HIGH_DEGREE_THRESHOLD; - - DeltaPartitionedGraph(const Context& context) : - _k(context.partition.k), - _pg(nullptr), - _part_weights_delta(context.partition.k, 0), - _part_ids_delta(), - _dummy_connectivity_set() { - const bool top_level = context.type == ContextType::main; - _part_ids_delta.initialize(MAP_SIZE_SMALL); - } + static constexpr HyperedgeID HIGH_DEGREE_THRESHOLD = + PartitionedGraph::HIGH_DEGREE_THRESHOLD; + + DeltaPartitionedGraph(const Context &context) : + _k(context.partition.k), _pg(nullptr), _part_weights_delta(context.partition.k, 0), + _part_ids_delta(), _dummy_connectivity_set() + { + const bool top_level = context.type == ContextType::main; + _part_ids_delta.initialize(MAP_SIZE_SMALL); + } - DeltaPartitionedGraph(const DeltaPartitionedGraph&) = delete; - DeltaPartitionedGraph & operator= (const DeltaPartitionedGraph &) = delete; + DeltaPartitionedGraph(const DeltaPartitionedGraph &) = delete; + DeltaPartitionedGraph &operator=(const DeltaPartitionedGraph &) = delete; - DeltaPartitionedGraph(DeltaPartitionedGraph&& other) = default; - DeltaPartitionedGraph & operator= (DeltaPartitionedGraph&& other) = default; + DeltaPartitionedGraph(DeltaPartitionedGraph &&other) = default; + DeltaPartitionedGraph &operator=(DeltaPartitionedGraph &&other) = default; ~DeltaPartitionedGraph() = default; - void setPartitionedHypergraph(PartitionedGraph* pg) { - _pg = pg; - } + void setPartitionedHypergraph(PartitionedGraph *pg) { _pg = pg; } // ####################### Mapping ###################### - bool hasTargetGraph() const { + bool hasTargetGraph() const + { ASSERT(_pg); return _pg->hasTargetGraph(); } - const TargetGraph* targetGraph() const { + const TargetGraph *targetGraph() const + { ASSERT(_pg); return _pg->targetGraph(); } @@ -105,37 +104,43 @@ class DeltaPartitionedGraph { // ####################### Iterators ####################### // ! Returns an iterator over the set of active nodes of the hypergraph - IteratorRange nodes() const { + IteratorRange nodes() const + { ASSERT(_pg); return _pg->nodes(); } // ! Returns an iterator over the set of active edges of the hypergraph - IteratorRange edges() const { + IteratorRange edges() const + { ASSERT(_pg); return _pg->edges(); } // ! Returns a range to loop over the incident nets of hypernode u. - IteratorRange incidentEdges(const HypernodeID u) const { + IteratorRange incidentEdges(const HypernodeID u) const + { ASSERT(_pg); return _pg->incidentEdges(u); } // ! Returns a range to loop over the pins of hyperedge e. - IteratorRange pins(const HyperedgeID e) const { + IteratorRange pins(const HyperedgeID e) const + { ASSERT(_pg); return _pg->pins(e); } // ####################### Hypernode Information ####################### - HypernodeWeight nodeWeight(const HypernodeID u) const { + HypernodeWeight nodeWeight(const HypernodeID u) const + { ASSERT(_pg); return _pg->nodeWeight(u); } - HyperedgeID nodeDegree(const HypernodeID u) const { + HyperedgeID nodeDegree(const HypernodeID u) const + { ASSERT(_pg); return _pg->nodeDegree(u); } @@ -143,28 +148,27 @@ class DeltaPartitionedGraph { // ####################### Hyperedge Information ####################### // ! Target of an edge - HypernodeID edgeTarget(const HyperedgeID e) const { - return _pg->edgeTarget(e); - } + HypernodeID edgeTarget(const HyperedgeID e) const { return _pg->edgeTarget(e); } // ! Source of an edge - HypernodeID edgeSource(const HyperedgeID e) const { - return _pg->edgeSource(e); - } + HypernodeID edgeSource(const HyperedgeID e) const { return _pg->edgeSource(e); } // ! Returns true, if the edge is selfloop - bool isSinglePin(const HyperedgeID e) const { + bool isSinglePin(const HyperedgeID e) const + { ASSERT(_pg); return _pg->isSinglePin(e); } // ! Number of pins of an edge - HypernodeID edgeSize(const HyperedgeID e) const { + HypernodeID edgeSize(const HyperedgeID e) const + { ASSERT(_pg); return _pg->edgeSize(e); } - HyperedgeWeight edgeWeight(const HyperedgeID e) const { + HyperedgeWeight edgeWeight(const HyperedgeID e) const + { ASSERT(_pg); return _pg->edgeWeight(e); } @@ -174,18 +178,17 @@ class DeltaPartitionedGraph { // ! Changes the block of hypernode u from 'from' to 'to'. // ! Move is successful, if it is not violating the balance // ! constraint specified by 'max_weight_to'. - template - bool changeNodePart(const HypernodeID u, - const PartitionID from, - const PartitionID to, - const HypernodeWeight max_weight_to, - DeltaFunc&& delta_func) { + template + bool changeNodePart(const HypernodeID u, const PartitionID from, const PartitionID to, + const HypernodeWeight max_weight_to, DeltaFunc &&delta_func) + { ASSERT(_pg); ASSERT(partID(u) == from); ASSERT(from != to); const HypernodeWeight weight = _pg->nodeWeight(u); - if (partWeight(to) + weight <= max_weight_to) { + if(partWeight(to) + weight <= max_weight_to) + { _part_ids_delta[u] = to; _part_weights_delta[to] += weight; _part_weights_delta[from] -= weight; @@ -194,7 +197,8 @@ class DeltaPartitionedGraph { sync_update.from = from; sync_update.to = to; sync_update.target_graph = _pg->targetGraph(); - for (const HyperedgeID edge : _pg->incidentEdges(u)) { + for(const HyperedgeID edge : _pg->incidentEdges(u)) + { const PartitionID target_part = partID(_pg->edgeTarget(edge)); sync_update.he = edge; sync_update.edge_weight = _pg->edgeWeight(edge); @@ -204,142 +208,160 @@ class DeltaPartitionedGraph { delta_func(sync_update); } return true; - } else { + } + else + { return false; } } - bool changeNodePart(const HypernodeID u, - const PartitionID from, - const PartitionID to, - const HypernodeWeight max_weight_to) { + bool changeNodePart(const HypernodeID u, const PartitionID from, const PartitionID to, + const HypernodeWeight max_weight_to) + { ASSERT(_pg); ASSERT(partID(u) == from); ASSERT(from != to); const HypernodeWeight weight = _pg->nodeWeight(u); - if (partWeight(to) + weight <= max_weight_to) { + if(partWeight(to) + weight <= max_weight_to) + { _part_ids_delta[u] = to; _part_weights_delta[to] += weight; _part_weights_delta[from] -= weight; return true; - } else { + } + else + { return false; } } // ! Returns the block of hypernode u - PartitionID partID(const HypernodeID u) const { + PartitionID partID(const HypernodeID u) const + { ASSERT(_pg); - const PartitionID* part_id = _part_ids_delta.get_if_contained(u); + const PartitionID *part_id = _part_ids_delta.get_if_contained(u); return part_id ? *part_id : _pg->partID(u); } // ! Returns if the node is a fixed vertex - bool isFixed(const HypernodeID u) const { + bool isFixed(const HypernodeID u) const + { ASSERT(_pg); return _pg->isFixed(u); } // ! Returns the total weight of block p - HypernodeWeight partWeight(const PartitionID p) const { + HypernodeWeight partWeight(const PartitionID p) const + { ASSERT(_pg); ASSERT(p != kInvalidPartition && p < _k); return _pg->partWeight(p) + _part_weights_delta[p]; } // ! Returns the number of pins of edge e in block p - HypernodeID pinCountInPart(const HyperedgeID e, const PartitionID p) const { + HypernodeID pinCountInPart(const HyperedgeID e, const PartitionID p) const + { ASSERT(_pg); ASSERT(e < _pg->initialNumEdges(), "Hyperedge" << e << "does not exist"); ASSERT(_pg->edgeIsEnabled(e), "Hyperedge" << e << "is disabled"); ASSERT(p != kInvalidPartition && p < _k); HypernodeID count = 0; - if (p == partID(edgeSource(e))) { + if(p == partID(edgeSource(e))) + { count++; } - if (!_pg->isSinglePin(e) && p == partID(edgeTarget(e))) { + if(!_pg->isSinglePin(e) && p == partID(edgeTarget(e))) + { count++; } return count; } // ! Returns an iterator over the connectivity set of hyperedge he (not supported) - IteratorRange connectivitySet(const HyperedgeID e) const { + IteratorRange connectivitySet(const HyperedgeID e) const + { throw NonSupportedOperationException("Not supported for graphs"); return _dummy_connectivity_set.connectivitySet(e); } // ! Returns the number of blocks contained in hyperedge he (not supported) - PartitionID connectivity(const HyperedgeID e) const { + PartitionID connectivity(const HyperedgeID e) const + { throw NonSupportedOperationException("Not supported for graphs"); return _dummy_connectivity_set.connectivity(e); } // ! Creates a deep copy of the connectivity set of hyperedge he (not supported) - Bitset& deepCopyOfConnectivitySet(const HyperedgeID he) const { + Bitset &deepCopyOfConnectivitySet(const HyperedgeID he) const + { throw NonSupportedOperationException("Not supported for graphs"); return _dummy_connectivity_set.deepCopy(he); } // ! Clears all deltas applied to the partitioned hypergraph - void clear() { + void clear() + { // O(k) _part_weights_delta.assign(_k, 0); // Constant Time _part_ids_delta.clear(); } - void dropMemory() { - if (!_memory_dropped) { + void dropMemory() + { + if(!_memory_dropped) + { _memory_dropped = true; _part_ids_delta.freeInternalData(); } } - size_t combinedMemoryConsumption() const { - return _part_ids_delta.size_in_bytes(); - } + size_t combinedMemoryConsumption() const { return _part_ids_delta.size_in_bytes(); } - PartitionID k() const { - return _k; - } + PartitionID k() const { return _k; } - void changeNumberOfBlocks(const PartitionID new_k) { - if ( new_k > _k ) { + void changeNumberOfBlocks(const PartitionID new_k) + { + if(new_k > _k) + { _part_weights_delta.assign(new_k, 0); } _k = new_k; } - void memoryConsumption(utils::MemoryTreeNode* parent) const { + void memoryConsumption(utils::MemoryTreeNode *parent) const + { ASSERT(parent); - utils::MemoryTreeNode* delta_pg_node = parent->addChild("Delta Partitioned Hypergraph"); - utils::MemoryTreeNode* part_weights_node = delta_pg_node->addChild("Delta Part Weights"); - part_weights_node->updateSize(_part_weights_delta.capacity() * sizeof(HypernodeWeight)); - utils::MemoryTreeNode* part_ids_node = delta_pg_node->addChild("Delta Part IDs"); + utils::MemoryTreeNode *delta_pg_node = + parent->addChild("Delta Partitioned Hypergraph"); + utils::MemoryTreeNode *part_weights_node = + delta_pg_node->addChild("Delta Part Weights"); + part_weights_node->updateSize(_part_weights_delta.capacity() * + sizeof(HypernodeWeight)); + utils::MemoryTreeNode *part_ids_node = delta_pg_node->addChild("Delta Part IDs"); part_ids_node->updateSize(_part_ids_delta.size_in_bytes()); } - private: +private: bool _memory_dropped = false; // ! Number of blocks PartitionID _k; // ! Partitioned graph where all deltas are stored relative to - PartitionedGraph* _pg; + PartitionedGraph *_pg; // ! Delta for block weights - vec< HypernodeWeight > _part_weights_delta; + vec _part_weights_delta; // ! Stores for each locally moved node its new block id DynamicFlatMap _part_ids_delta; // ! Maintain the connectivity set is not supported in the delta partitioned graph. - // ! We therefore add here a dummy delta connectivity set to implement the same interface - // ! as the delta partitioned hypergraph + // ! We therefore add here a dummy delta connectivity set to implement the same + // interface ! as the delta partitioned hypergraph DummyConnectivitySet _dummy_connectivity_set; }; diff --git a/mt-kahypar/datastructures/delta_partitioned_hypergraph.h b/mt-kahypar/datastructures/delta_partitioned_hypergraph.h index 9eb52ab3d..396001b89 100644 --- a/mt-kahypar/datastructures/delta_partitioned_hypergraph.h +++ b/mt-kahypar/datastructures/delta_partitioned_hypergraph.h @@ -32,11 +32,11 @@ #include "kahypar-resources/meta/mandatory.h" -#include "mt-kahypar/datastructures/hypergraph_common.h" -#include "mt-kahypar/datastructures/sparse_map.h" -#include "mt-kahypar/datastructures/delta_connectivity_set.h" #include "mt-kahypar/parallel/stl/scalable_vector.h" #include "mt-kahypar/partition/context.h" +#include "mt-kahypar/datastructures/delta_connectivity_set.h" +#include "mt-kahypar/datastructures/hypergraph_common.h" +#include "mt-kahypar/datastructures/sparse_map.h" namespace mt_kahypar { namespace ds { @@ -53,14 +53,15 @@ namespace ds { * to it. * The rationale behind this is that the majority of local searches to not yield to * an improvement and are immediatly reverted. However, applying them directly to global - * partitioned hypergraph would affect other local searches running concurrently, which build - * upon that state. This special partitioned hypergraph allows a local search to hide its - * current search state from other searches in a space efficient manner. + * partitioned hypergraph would affect other local searches running concurrently, which + * build upon that state. This special partitioned hypergraph allows a local search to + * hide its current search state from other searches in a space efficient manner. */ template -class DeltaPartitionedHypergraph { - private: +class DeltaPartitionedHypergraph +{ +private: static constexpr size_t MAP_SIZE_LARGE = 16384; static constexpr size_t MAP_SIZE_MOVE_DELTA = 8192; static constexpr size_t MAP_SIZE_SMALL = 128; @@ -72,114 +73,127 @@ class DeltaPartitionedHypergraph { using DeltaConSet = DeltaConnectivitySet; using ConnectivitySetIterator = typename DeltaConSet::Iterator; - public: +public: static constexpr bool supports_connectivity_set = false; - static constexpr HyperedgeID HIGH_DEGREE_THRESHOLD = PartitionedHypergraph::HIGH_DEGREE_THRESHOLD; - - DeltaPartitionedHypergraph(const Context& context) : - _k(context.partition.k), - _phg(nullptr), - _part_weights_delta(context.partition.k, 0), - _part_ids_delta(), - _pins_in_part_delta(), - _connectivity_set_delta(context.partition.k) { - const bool top_level = context.type == ContextType::main; - _part_ids_delta.initialize(MAP_SIZE_SMALL); - _pins_in_part_delta.initialize(MAP_SIZE_LARGE); - } + static constexpr HyperedgeID HIGH_DEGREE_THRESHOLD = + PartitionedHypergraph::HIGH_DEGREE_THRESHOLD; + + DeltaPartitionedHypergraph (const Context &context) : + _k (context.partition.k), _phg (nullptr), + _part_weights_delta (context.partition.k, 0), _part_ids_delta (), + _pins_in_part_delta (), _connectivity_set_delta (context.partition.k) + { + const bool top_level = context.type == ContextType::main; + _part_ids_delta.initialize (MAP_SIZE_SMALL); + _pins_in_part_delta.initialize (MAP_SIZE_LARGE); + } - DeltaPartitionedHypergraph(const DeltaPartitionedHypergraph&) = delete; - DeltaPartitionedHypergraph & operator= (const DeltaPartitionedHypergraph &) = delete; + DeltaPartitionedHypergraph (const DeltaPartitionedHypergraph &) = delete; + DeltaPartitionedHypergraph &operator= (const DeltaPartitionedHypergraph &) = delete; - DeltaPartitionedHypergraph(DeltaPartitionedHypergraph&& other) = default; - DeltaPartitionedHypergraph & operator= (DeltaPartitionedHypergraph&& other) = default; + DeltaPartitionedHypergraph (DeltaPartitionedHypergraph &&other) = default; + DeltaPartitionedHypergraph &operator= (DeltaPartitionedHypergraph &&other) = default; - ~DeltaPartitionedHypergraph() = default; + ~DeltaPartitionedHypergraph () = default; - void setPartitionedHypergraph(PartitionedHypergraph* phg) { + void setPartitionedHypergraph (PartitionedHypergraph *phg) + { _phg = phg; - _connectivity_set_delta.setConnectivitySet(&phg->getConnectivityInformation()); + _connectivity_set_delta.setConnectivitySet (&phg->getConnectivityInformation ()); } // ####################### Mapping ###################### - bool hasTargetGraph() const { - ASSERT(_phg); - return _phg->hasTargetGraph(); + bool hasTargetGraph () const + { + ASSERT (_phg); + return _phg->hasTargetGraph (); } - const TargetGraph* targetGraph() const { - ASSERT(_phg); - return _phg->targetGraph(); + const TargetGraph *targetGraph () const + { + ASSERT (_phg); + return _phg->targetGraph (); } // ####################### Iterators ####################### // ! Returns an iterator over the set of active nodes of the hypergraph - IteratorRange nodes() const { - ASSERT(_phg); - return _phg->nodes(); + IteratorRange nodes () const + { + ASSERT (_phg); + return _phg->nodes (); } // ! Returns an iterator over the set of active edges of the hypergraph - IteratorRange edges() const { - ASSERT(_phg); - return _phg->edges(); + IteratorRange edges () const + { + ASSERT (_phg); + return _phg->edges (); } // ! Returns a range to loop over the incident nets of hypernode u. - IteratorRange incidentEdges(const HypernodeID u) const { - ASSERT(_phg); - return _phg->incidentEdges(u); + IteratorRange incidentEdges (const HypernodeID u) const + { + ASSERT (_phg); + return _phg->incidentEdges (u); } // ! Returns a range to loop over the pins of hyperedge e. - IteratorRange pins(const HyperedgeID e) const { - ASSERT(_phg); - return _phg->pins(e); + IteratorRange pins (const HyperedgeID e) const + { + ASSERT (_phg); + return _phg->pins (e); } // ####################### Hypernode Information ####################### - HypernodeWeight nodeWeight(const HypernodeID u) const { - ASSERT(_phg); - return _phg->nodeWeight(u); + HypernodeWeight nodeWeight (const HypernodeID u) const + { + ASSERT (_phg); + return _phg->nodeWeight (u); } - HyperedgeID nodeDegree(const HypernodeID u) const { - ASSERT(_phg); - return _phg->nodeDegree(u); + HyperedgeID nodeDegree (const HypernodeID u) const + { + ASSERT (_phg); + return _phg->nodeDegree (u); } // ####################### Hyperedge Information ####################### // ! Number of pins of a hyperedge - HypernodeID edgeSize(const HyperedgeID e) const { - ASSERT(_phg); - return _phg->edgeSize(e); + HypernodeID edgeSize (const HyperedgeID e) const + { + ASSERT (_phg); + return _phg->edgeSize (e); } - HyperedgeWeight edgeWeight(const HyperedgeID e) const { - ASSERT(_phg); - return _phg->edgeWeight(e); + HyperedgeWeight edgeWeight (const HyperedgeID e) const + { + ASSERT (_phg); + return _phg->edgeWeight (e); } // ! Returns true, if the hyperedge contains only a single pin - bool isSinglePin(const HyperedgeID e) const { - ASSERT(_phg); - return _phg->isSinglePin(e); + bool isSinglePin (const HyperedgeID e) const + { + ASSERT (_phg); + return _phg->isSinglePin (e); } // ! Target of an edge - HypernodeID edgeTarget(const HyperedgeID e) const { - ASSERT(_phg); - return _phg->edgeTarget(e); + HypernodeID edgeTarget (const HyperedgeID e) const + { + ASSERT (_phg); + return _phg->edgeTarget (e); } // ! Source of an edge - HypernodeID edgeSource(const HyperedgeID e) const { - ASSERT(_phg); - return _phg->edgeSource(e); + HypernodeID edgeSource (const HyperedgeID e) const + { + ASSERT (_phg); + return _phg->edgeSource (e); } // ####################### Partition Information ####################### @@ -187,17 +201,16 @@ class DeltaPartitionedHypergraph { // ! Changes the block of hypernode u from 'from' to 'to'. // ! Move is successful, if it is not violating the balance // ! constraint specified by 'max_weight_to'. - template - bool changeNodePart(const HypernodeID u, - const PartitionID from, - const PartitionID to, - const HypernodeWeight max_weight_to, - DeltaFunc&& delta_func) { - ASSERT(_phg); - assert(partID(u) == from); - assert(from != to); - const HypernodeWeight wu = _phg->nodeWeight(u); - if ( partWeight(to) + wu <= max_weight_to ) { + template + bool changeNodePart (const HypernodeID u, const PartitionID from, const PartitionID to, + const HypernodeWeight max_weight_to, DeltaFunc &&delta_func) + { + ASSERT (_phg); + assert (partID (u) == from); + assert (from != to); + const HypernodeWeight wu = _phg->nodeWeight (u); + if (partWeight (to) + wu <= max_weight_to) + { _part_ids_delta[u] = to; _part_weights_delta[to] += wu; _part_weights_delta[from] -= wu; @@ -205,147 +218,173 @@ class DeltaPartitionedHypergraph { SynchronizedEdgeUpdate sync_update; sync_update.from = from; sync_update.to = to; - sync_update.target_graph = _phg->targetGraph(); - for ( const HyperedgeID& he : _phg->incidentEdges(u) ) { + sync_update.target_graph = _phg->targetGraph (); + for (const HyperedgeID &he : _phg->incidentEdges (u)) + { sync_update.he = he; - sync_update.edge_weight = edgeWeight(he); - sync_update.edge_size = edgeSize(he); - sync_update.pin_count_in_from_part_after = decrementPinCountOfBlock(he, from); - sync_update.pin_count_in_to_part_after = incrementPinCountOfBlock(he, to); - if constexpr ( maintain_connectivity_set ) { - updateConnectivitySet(he, sync_update); - sync_update.connectivity_set_after = &deepCopyOfConnectivitySet(he); + sync_update.edge_weight = edgeWeight (he); + sync_update.edge_size = edgeSize (he); + sync_update.pin_count_in_from_part_after = decrementPinCountOfBlock (he, from); + sync_update.pin_count_in_to_part_after = incrementPinCountOfBlock (he, to); + if constexpr (maintain_connectivity_set) + { + updateConnectivitySet (he, sync_update); + sync_update.connectivity_set_after = &deepCopyOfConnectivitySet (he); } - delta_func(sync_update); + delta_func (sync_update); } return true; - } else { + } + else + { return false; } } // curry - bool changeNodePart(const HypernodeID u, - const PartitionID from, - const PartitionID to, - const HypernodeWeight max_weight_to) { - return changeNodePart(u, from, to, max_weight_to, NoOpDeltaFunc()); + bool changeNodePart (const HypernodeID u, const PartitionID from, const PartitionID to, + const HypernodeWeight max_weight_to) + { + return changeNodePart (u, from, to, max_weight_to, NoOpDeltaFunc ()); } // ! Returns the block of hypernode u - PartitionID partID(const HypernodeID u) const { - ASSERT(_phg); - const PartitionID* part_id = _part_ids_delta.get_if_contained(u); - return part_id ? *part_id : _phg->partID(u); + PartitionID partID (const HypernodeID u) const + { + ASSERT (_phg); + const PartitionID *part_id = _part_ids_delta.get_if_contained (u); + return part_id ? *part_id : _phg->partID (u); } // ! Returns if the node is a fixed vertex - bool isFixed(const HypernodeID u) const { - ASSERT(_phg); - return _phg->isFixed(u); + bool isFixed (const HypernodeID u) const + { + ASSERT (_phg); + return _phg->isFixed (u); } // ! Returns the total weight of block p - HypernodeWeight partWeight(const PartitionID p) const { - ASSERT(_phg); - ASSERT(p != kInvalidPartition && p < _k); - return _phg->partWeight(p) + _part_weights_delta[p]; + HypernodeWeight partWeight (const PartitionID p) const + { + ASSERT (_phg); + ASSERT (p != kInvalidPartition && p < _k); + return _phg->partWeight (p) + _part_weights_delta[p]; } // ! Returns the number of pins of hyperedge e in block p - HypernodeID pinCountInPart(const HyperedgeID e, const PartitionID p) const { - ASSERT(_phg); - ASSERT(p != kInvalidPartition && p < _k); - const int32_t* pin_count_delta = _pins_in_part_delta.get_if_contained(e * _k + p); - return std::max(static_cast(_phg->pinCountInPart(e, p)) + - ( pin_count_delta ? *pin_count_delta : 0 ), 0); + HypernodeID pinCountInPart (const HyperedgeID e, const PartitionID p) const + { + ASSERT (_phg); + ASSERT (p != kInvalidPartition && p < _k); + const int32_t *pin_count_delta = _pins_in_part_delta.get_if_contained (e * _k + p); + return std::max (static_cast (_phg->pinCountInPart (e, p)) + + (pin_count_delta ? *pin_count_delta : 0), + 0); } // ! Returns an iterator over the connectivity set of hyperedge he - IteratorRange connectivitySet(const HyperedgeID e) const { - return _connectivity_set_delta.connectivitySet(e); + IteratorRange connectivitySet (const HyperedgeID e) const + { + return _connectivity_set_delta.connectivitySet (e); } // ! Returns the number of blocks contained in hyperedge he - PartitionID connectivity(const HyperedgeID e) const { - return _connectivity_set_delta.connectivity(e); + PartitionID connectivity (const HyperedgeID e) const + { + return _connectivity_set_delta.connectivity (e); } // ! Creates a deep copy of the connectivity set of hyperedge he - Bitset& deepCopyOfConnectivitySet(const HyperedgeID he) const { - return _connectivity_set_delta.deepCopy(he); + Bitset &deepCopyOfConnectivitySet (const HyperedgeID he) const + { + return _connectivity_set_delta.deepCopy (he); } // ! Clears all deltas applied to the partitioned hypergraph - void clear() { + void clear () + { // O(k) - _part_weights_delta.assign(_k, 0); + _part_weights_delta.assign (_k, 0); // Constant Time - _part_ids_delta.clear(); - _pins_in_part_delta.clear(); - _connectivity_set_delta.reset(); + _part_ids_delta.clear (); + _pins_in_part_delta.clear (); + _connectivity_set_delta.reset (); } - void dropMemory() { - if (!_memory_dropped) { + void dropMemory () + { + if (!_memory_dropped) + { _memory_dropped = true; - _part_ids_delta.freeInternalData(); - _pins_in_part_delta.freeInternalData(); - _connectivity_set_delta.freeInternalData(); + _part_ids_delta.freeInternalData (); + _pins_in_part_delta.freeInternalData (); + _connectivity_set_delta.freeInternalData (); } } - size_t combinedMemoryConsumption() const { - return _pins_in_part_delta.size_in_bytes() - + _part_ids_delta.size_in_bytes() - + _connectivity_set_delta.size_in_bytes(); + size_t combinedMemoryConsumption () const + { + return _pins_in_part_delta.size_in_bytes () + _part_ids_delta.size_in_bytes () + + _connectivity_set_delta.size_in_bytes (); } - PartitionID k() const { - return _k; - } + PartitionID k () const { return _k; } - void changeNumberOfBlocks(const PartitionID new_k) { - if ( new_k > _k ) { - _part_weights_delta.assign(new_k, 0); + void changeNumberOfBlocks (const PartitionID new_k) + { + if (new_k > _k) + { + _part_weights_delta.assign (new_k, 0); } - _connectivity_set_delta.setNumberOfBlocks(new_k); + _connectivity_set_delta.setNumberOfBlocks (new_k); _k = new_k; } - void memoryConsumption(utils::MemoryTreeNode* parent) const { - ASSERT(parent); + void memoryConsumption (utils::MemoryTreeNode *parent) const + { + ASSERT (parent); - utils::MemoryTreeNode* delta_phg_node = parent->addChild("Delta Partitioned Hypergraph"); - utils::MemoryTreeNode* part_weights_node = delta_phg_node->addChild("Delta Part Weights"); - part_weights_node->updateSize(_part_weights_delta.capacity() * sizeof(HypernodeWeight)); - utils::MemoryTreeNode* part_ids_node = delta_phg_node->addChild("Delta Part IDs"); - part_ids_node->updateSize(_part_ids_delta.size_in_bytes()); - utils::MemoryTreeNode* pins_in_part_node = delta_phg_node->addChild("Delta Pins In Part"); - pins_in_part_node->updateSize(_pins_in_part_delta.size_in_bytes()); + utils::MemoryTreeNode *delta_phg_node = + parent->addChild ("Delta Partitioned Hypergraph"); + utils::MemoryTreeNode *part_weights_node = + delta_phg_node->addChild ("Delta Part Weights"); + part_weights_node->updateSize (_part_weights_delta.capacity () * + sizeof (HypernodeWeight)); + utils::MemoryTreeNode *part_ids_node = delta_phg_node->addChild ("Delta Part IDs"); + part_ids_node->updateSize (_part_ids_delta.size_in_bytes ()); + utils::MemoryTreeNode *pins_in_part_node = + delta_phg_node->addChild ("Delta Pins In Part"); + pins_in_part_node->updateSize (_pins_in_part_delta.size_in_bytes ()); } - private: +private: MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE - HypernodeID decrementPinCountOfBlock(const HyperedgeID e, const PartitionID p) { - return std::max(static_cast( - _phg->pinCountInPart(e, p)) + --_pins_in_part_delta[e * _k + p], static_cast(0)); + HypernodeID decrementPinCountOfBlock (const HyperedgeID e, const PartitionID p) + { + return std::max (static_cast (_phg->pinCountInPart (e, p)) + + --_pins_in_part_delta[e * _k + p], + static_cast (0)); } MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE - HypernodeID incrementPinCountOfBlock(const HyperedgeID e, const PartitionID p) { - return std::max(static_cast( - _phg->pinCountInPart(e, p)) + ++_pins_in_part_delta[e * _k + p], static_cast(0)); + HypernodeID incrementPinCountOfBlock (const HyperedgeID e, const PartitionID p) + { + return std::max (static_cast (_phg->pinCountInPart (e, p)) + + ++_pins_in_part_delta[e * _k + p], + static_cast (0)); } MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE - void updateConnectivitySet(const HyperedgeID e, - const SynchronizedEdgeUpdate& sync_update) { - if ( sync_update.pin_count_in_from_part_after == 0 ) { - _connectivity_set_delta.remove(sync_update.he, sync_update.from); + void updateConnectivitySet (const HyperedgeID e, + const SynchronizedEdgeUpdate &sync_update) + { + if (sync_update.pin_count_in_from_part_after == 0) + { + _connectivity_set_delta.remove (sync_update.he, sync_update.from); } - if ( sync_update.pin_count_in_to_part_after == 1 ) { - _connectivity_set_delta.add(sync_update.he, sync_update.to); + if (sync_update.pin_count_in_to_part_after == 1) + { + _connectivity_set_delta.add (sync_update.he, sync_update.to); } } @@ -355,10 +394,10 @@ class DeltaPartitionedHypergraph { PartitionID _k; // ! Partitioned hypergraph where all deltas are stored relative to - PartitionedHypergraph* _phg; + PartitionedHypergraph *_phg; // ! Delta for block weights - vec< HypernodeWeight > _part_weights_delta; + vec _part_weights_delta; // ! Stores for each locally moved node, its new block id DynamicFlatMap _part_ids_delta; diff --git a/mt-kahypar/datastructures/dynamic_adjacency_array.cpp b/mt-kahypar/datastructures/dynamic_adjacency_array.cpp index 5a517c730..6efd550fe 100644 --- a/mt-kahypar/datastructures/dynamic_adjacency_array.cpp +++ b/mt-kahypar/datastructures/dynamic_adjacency_array.cpp @@ -33,17 +33,15 @@ namespace mt_kahypar { namespace ds { -IncidentEdgeIterator::IncidentEdgeIterator(const HypernodeID u, - const DynamicAdjacencyArray* dynamic_adjacency_array, - const size_t pos, - const bool end): +IncidentEdgeIterator::IncidentEdgeIterator( + const HypernodeID u, const DynamicAdjacencyArray *dynamic_adjacency_array, + const size_t pos, const bool end) : _u(u), - _current_u(u), - _current_size(dynamic_adjacency_array->header(u).size()), - _current_pos(pos), - _dynamic_adjacency_array(dynamic_adjacency_array), - _end(end) { - if ( end ) { + _current_u(u), _current_size(dynamic_adjacency_array->header(u).size()), + _current_pos(pos), _dynamic_adjacency_array(dynamic_adjacency_array), _end(end) +{ + if(end) + { _current_pos = _current_size; } @@ -51,28 +49,34 @@ IncidentEdgeIterator::IncidentEdgeIterator(const HypernodeID u, traverse_headers(); } -HyperedgeID IncidentEdgeIterator::operator* () const { +HyperedgeID IncidentEdgeIterator::operator*() const +{ return _dynamic_adjacency_array->firstActiveEdge(_current_u) + _current_pos; } -IncidentEdgeIterator & IncidentEdgeIterator::operator++ () { +IncidentEdgeIterator &IncidentEdgeIterator::operator++() +{ ASSERT(!_end); ++_current_pos; traverse_headers(); return *this; } -bool IncidentEdgeIterator::operator!= (const IncidentEdgeIterator& rhs) { +bool IncidentEdgeIterator::operator!=(const IncidentEdgeIterator &rhs) +{ return !(*this == rhs); } -bool IncidentEdgeIterator::operator== (const IncidentEdgeIterator& rhs) { +bool IncidentEdgeIterator::operator==(const IncidentEdgeIterator &rhs) +{ return _u == rhs._u && _end == rhs._end; } -void IncidentEdgeIterator::traverse_headers() { +void IncidentEdgeIterator::traverse_headers() +{ skip_invalid(); - while ( _current_pos >= _current_size ) { + while(_current_pos >= _current_size) + { const HypernodeID last_u = _current_u; _current_u = _dynamic_adjacency_array->header(last_u).it_next; _current_pos -= _current_size; @@ -82,8 +86,8 @@ void IncidentEdgeIterator::traverse_headers() { // changes. Therefore, we set the end flag if we reach the current // head of the list or it_next is equal with the current vertex (means // that list becomes empty due to a contraction) - if ( _dynamic_adjacency_array->header(_current_u).is_head || - last_u == _current_u ) { + if(_dynamic_adjacency_array->header(_current_u).is_head || last_u == _current_u) + { _end = true; break; } @@ -91,43 +95,46 @@ void IncidentEdgeIterator::traverse_headers() { } } -void IncidentEdgeIterator::skip_invalid() { - while (_current_pos < _current_size && - !_dynamic_adjacency_array->edge(**this).isValid()) { +void IncidentEdgeIterator::skip_invalid() +{ + while(_current_pos < _current_size && !_dynamic_adjacency_array->edge(**this).isValid()) + { ++_current_pos; } } EdgeIterator::EdgeIterator(const HypernodeID u, - const DynamicAdjacencyArray* dynamic_adjacency_array): + const DynamicAdjacencyArray *dynamic_adjacency_array) : _current_u(u), _current_id(dynamic_adjacency_array->firstActiveEdge(u)), _current_last_id(dynamic_adjacency_array->firstInactiveEdge(u)), - _dynamic_adjacency_array(dynamic_adjacency_array) { + _dynamic_adjacency_array(dynamic_adjacency_array) +{ traverse_headers(); } -HyperedgeID EdgeIterator::operator* () const { - return _current_id; -} +HyperedgeID EdgeIterator::operator*() const { return _current_id; } -EdgeIterator & EdgeIterator::operator++ () { +EdgeIterator &EdgeIterator::operator++() +{ ++_current_id; traverse_headers(); return *this; } -bool EdgeIterator::operator!= (const EdgeIterator& rhs) { - return !(*this == rhs); -} +bool EdgeIterator::operator!=(const EdgeIterator &rhs) { return !(*this == rhs); } -bool EdgeIterator::operator== (const EdgeIterator& rhs) { +bool EdgeIterator::operator==(const EdgeIterator &rhs) +{ return _current_id == rhs._current_id; } -void EdgeIterator::traverse_headers() { +void EdgeIterator::traverse_headers() +{ skip_invalid(); - while (_current_id == _current_last_id && _current_u < _dynamic_adjacency_array->_num_nodes) { + while(_current_id == _current_last_id && + _current_u < _dynamic_adjacency_array->_num_nodes) + { ++_current_u; _current_id = _dynamic_adjacency_array->firstActiveEdge(_current_u); _current_last_id = _dynamic_adjacency_array->firstInactiveEdge(_current_u); @@ -135,63 +142,66 @@ void EdgeIterator::traverse_headers() { } } -void EdgeIterator::skip_invalid() { - while (_current_id < _current_last_id && - !_dynamic_adjacency_array->edge(**this).isValid()) { +void EdgeIterator::skip_invalid() +{ + while(_current_id < _current_last_id && + !_dynamic_adjacency_array->edge(**this).isValid()) + { ++_current_id; } } -void DynamicAdjacencyArray::construct(const EdgeVector& edge_vector, const HyperedgeWeight* edge_weight) { +void DynamicAdjacencyArray::construct(const EdgeVector &edge_vector, + const HyperedgeWeight *edge_weight) +{ // Accumulate degree of each vertex thread local const HyperedgeID num_edges = edge_vector.size(); ThreadLocalCounter local_incident_nets_per_vertex(_num_nodes + 1, 0); Array node_degrees; AtomicCounter current_incident_net_pos; - tbb::parallel_invoke([&] { - tbb::parallel_for(ID(0), num_edges, [&](const size_t pos) { - parallel::scalable_vector& num_incident_nets_per_vertex = - local_incident_nets_per_vertex.local(); - ++num_incident_nets_per_vertex[edge_vector[pos].first]; - ++num_incident_nets_per_vertex[edge_vector[pos].second]; - }); - }, [&] { - _header_array.resize(_num_nodes + 1); - }, [&] { - _edges.resize(2 * num_edges); - }, [&] { - _removable_edges.setSize(2 * num_edges); - }, [&] { - _edge_mapping.resize(2 * num_edges); - }, [&] { - node_degrees.resize(_num_nodes); - }, [&] { - current_incident_net_pos.assign( - _num_nodes, parallel::IntegralAtomicWrapper(0)); - }); + tbb::parallel_invoke( + [&] { + tbb::parallel_for(ID(0), num_edges, [&](const size_t pos) { + parallel::scalable_vector &num_incident_nets_per_vertex = + local_incident_nets_per_vertex.local(); + ++num_incident_nets_per_vertex[edge_vector[pos].first]; + ++num_incident_nets_per_vertex[edge_vector[pos].second]; + }); + }, + [&] { _header_array.resize(_num_nodes + 1); }, + [&] { _edges.resize(2 * num_edges); }, + [&] { _removable_edges.setSize(2 * num_edges); }, + [&] { _edge_mapping.resize(2 * num_edges); }, + [&] { node_degrees.resize(_num_nodes); }, + [&] { + current_incident_net_pos.assign(_num_nodes, + parallel::IntegralAtomicWrapper(0)); + }); // We sum up the number of incident nets per vertex only thread local. // To obtain the global number of incident nets per vertex, we iterate // over each thread local counter and sum it up. - for ( const parallel::scalable_vector& c : local_incident_nets_per_vertex ) { - tbb::parallel_for(ID(0), _num_nodes, [&](const size_t pos) { - node_degrees[pos] += c[pos]; - }); + for(const parallel::scalable_vector &c : local_incident_nets_per_vertex) + { + tbb::parallel_for(ID(0), _num_nodes, + [&](const size_t pos) { node_degrees[pos] += c[pos]; }); } // Compute start positon of the incident nets of each vertex via a parallel prefix sum parallel::TBBPrefixSum incident_net_prefix_sum(node_degrees); - tbb::parallel_scan(tbb::blocked_range( - ID(0), ID(_num_nodes)), incident_net_prefix_sum); + tbb::parallel_scan(tbb::blocked_range(ID(0), ID(_num_nodes)), + incident_net_prefix_sum); // Setup Header of each vertex tbb::parallel_for(ID(0), _num_nodes + 1, [&](const HypernodeID u) { - Header& head = header(u); + Header &head = header(u); head.prev = u; head.next = u; head.it_prev = u; head.it_next = u; - head.degree = (u == _num_nodes) ? 0 : incident_net_prefix_sum[u + 1] - incident_net_prefix_sum[u]; + head.degree = (u == _num_nodes) ? + 0 : + incident_net_prefix_sum[u + 1] - incident_net_prefix_sum[u]; head.first = incident_net_prefix_sum[u]; head.first_active = head.first; head.first_inactive = head.first + head.degree; @@ -205,12 +215,12 @@ void DynamicAdjacencyArray::construct(const EdgeVector& edge_vector, const Hyper const HyperedgeWeight weight = edge_weight == nullptr ? 1 : edge_weight[he]; HyperedgeID id1 = firstEdge(source) + current_incident_net_pos[source].fetch_add(1); HyperedgeID id2 = firstEdge(target) + current_incident_net_pos[target].fetch_add(1); - Edge& e1 = edge(id1); + Edge &e1 = edge(id1); e1.source = source; e1.target = target; e1.weight = weight; e1.back_edge = id2; - Edge& e2 = edge(id2); + Edge &e2 = edge(id2); e2.source = target; e2.target = source; e2.weight = weight; @@ -218,21 +228,26 @@ void DynamicAdjacencyArray::construct(const EdgeVector& edge_vector, const Hyper }); } -void DynamicAdjacencyArray::contract(const HypernodeID u, - const HypernodeID v, - const AcquireLockFunc& acquire_lock, - const ReleaseLockFunc& release_lock) { +void DynamicAdjacencyArray::contract(const HypernodeID u, const HypernodeID v, + const AcquireLockFunc &acquire_lock, + const ReleaseLockFunc &release_lock) +{ // iterate over edges of v and update them - Header& head_v = header(v); - for (const HypernodeID& current_v: headers(v)) { + Header &head_v = header(v); + for(const HypernodeID ¤t_v : headers(v)) + { const HyperedgeID last = firstInactiveEdge(current_v); - for ( HyperedgeID curr_edge = firstActiveEdge(current_v); curr_edge < last; ++curr_edge ) { - Edge& e = edge(curr_edge); - if (e.isValid() && e.isSinglePin()) { + for(HyperedgeID curr_edge = firstActiveEdge(current_v); curr_edge < last; ++curr_edge) + { + Edge &e = edge(curr_edge); + if(e.isValid() && e.isSinglePin()) + { ASSERT(e.source == v); e.disable(); --head_v.degree; - } else if (e.isValid()) { + } + else if(e.isValid()) + { ASSERT(e.source == v && edge(e.back_edge).target == v); e.source = u; edge(e.back_edge).target = u; @@ -244,47 +259,55 @@ void DynamicAdjacencyArray::contract(const HypernodeID u, // Concatenate double-linked list of u and v append(u, v); header(u).degree += head_v.degree; - ASSERT(verifyIteratorPointers(u), "Iterator pointers of vertex" << u << "are corrupted"); + ASSERT(verifyIteratorPointers(u), + "Iterator pointers of vertex" << u << "are corrupted"); release_lock(u); } -void DynamicAdjacencyArray::uncontract(const HypernodeID u, - const HypernodeID v, - const AcquireLockFunc& acquire_lock, - const ReleaseLockFunc& release_lock) { - uncontract(u, v, [](HyperedgeID) { return false; }, [](HyperedgeID) {}, [](HyperedgeID) {}, - acquire_lock, release_lock); +void DynamicAdjacencyArray::uncontract(const HypernodeID u, const HypernodeID v, + const AcquireLockFunc &acquire_lock, + const ReleaseLockFunc &release_lock) +{ + uncontract( + u, v, [](HyperedgeID) { return false; }, [](HyperedgeID) {}, [](HyperedgeID) {}, + acquire_lock, release_lock); } -void DynamicAdjacencyArray::uncontract(const HypernodeID u, - const HypernodeID v, - const MarkEdgeFunc& mark_edge, - const CaseOneFunc& case_one_func, - const CaseTwoFunc& case_two_func, - const AcquireLockFunc& acquire_lock, - const ReleaseLockFunc& release_lock) { +void DynamicAdjacencyArray::uncontract(const HypernodeID u, const HypernodeID v, + const MarkEdgeFunc &mark_edge, + const CaseOneFunc &case_one_func, + const CaseTwoFunc &case_two_func, + const AcquireLockFunc &acquire_lock, + const ReleaseLockFunc &release_lock) +{ ASSERT(header(v).prev != v); - Header& head_u = header(u); - Header& head_v = header(v); + Header &head_u = header(u); + Header &head_v = header(v); acquire_lock(u); // Restores the incident list of v to the time before it was appended // to the double-linked list of u. splice(u, v); - ASSERT(verifyIteratorPointers(u), "Iterator pointers of vertex" << u << "are corrupted"); + ASSERT(verifyIteratorPointers(u), + "Iterator pointers of vertex" << u << "are corrupted"); ASSERT(head_u.degree >= head_v.degree, V(head_u.degree) << V(head_v.degree)); head_u.degree -= head_v.degree; release_lock(u); // iterate over edges of v, update backwards edges and restore removed edges HypernodeID last_non_empty_v = v; - for (const HypernodeID& current_v: headers(v)) { + for(const HypernodeID ¤t_v : headers(v)) + { const HyperedgeID first_inactive = firstInactiveEdge(current_v); - for (HyperedgeID curr_edge = firstActiveEdge(current_v); curr_edge < first_inactive; ++curr_edge) { - Edge& e = edge(curr_edge); + for(HyperedgeID curr_edge = firstActiveEdge(current_v); curr_edge < first_inactive; + ++curr_edge) + { + Edge &e = edge(curr_edge); ASSERT(e.source == u || !e.isValid()); - if (e.source == u) { + if(e.source == u) + { bool singlePin = false; - if (e.target == u) { + if(e.target == u) + { // If we use a gain cache, it is necessary to correctly attribute // which uncontraction changes an edge from single pin to two pins. // To achieve this, we introduce a synchronization point with mark_edge. @@ -292,27 +315,35 @@ void DynamicAdjacencyArray::uncontract(const HypernodeID u, } e.source = v; edge(e.back_edge).target = v; - if (singlePin) { + if(singlePin) + { case_one_func(curr_edge); - } else { + } + else + { case_two_func(curr_edge); } - } else if (e.source == v) { + } + else if(e.source == v) + { e.enable(); ++head_v.degree; } } - - if (header(current_v).size() > 0) { + if(header(current_v).size() > 0) + { restoreItLink(v, last_non_empty_v, current_v); last_non_empty_v = current_v; } } - ASSERT(verifyIteratorPointers(v), "Iterator pointers of vertex" << v << "are corrupted"); + ASSERT(verifyIteratorPointers(v), + "Iterator pointers of vertex" << v << "are corrupted"); } -parallel::scalable_vector DynamicAdjacencyArray::removeSinglePinAndParallelEdges() { +parallel::scalable_vector +DynamicAdjacencyArray::removeSinglePinAndParallelEdges() +{ // TODO(maas): special case for high degree nodes? StreamingVector tmp_removed_edges; _removable_edges.reset(); @@ -321,45 +352,58 @@ parallel::scalable_vector DynamicAdjacencyAr // Step one: We mark each edge that should be removed and // update the weight of the representative edges. tbb::parallel_for(ID(0), _num_nodes, [&](const HypernodeID u) { - if (header(u).is_head) { - vec& local_vec = _thread_local_vec.local(); + if(header(u).is_head) + { + vec &local_vec = _thread_local_vec.local(); local_vec.clear(); // mark single pin/invalid edges and sort all other incident edges - for (const HypernodeID& current_u: headers(u)) { + for(const HypernodeID ¤t_u : headers(u)) + { const HyperedgeID first_inactive = firstInactiveEdge(current_u); - for (HyperedgeID id = firstActiveEdge(current_u); id < first_inactive; ++id) { - const Edge& e = edge(id); - if (e.isValid() && !e.isSinglePin()) { + for(HyperedgeID id = firstActiveEdge(current_u); id < first_inactive; ++id) + { + const Edge &e = edge(id); + if(e.isValid() && !e.isSinglePin()) + { local_vec.emplace_back(e.target, id, uniqueEdgeID(id)); - } else { + } + else + { _removable_edges.set(id, true); - if (e.isValid()) { + if(e.isValid()) + { --header(u).degree; } } } } - std::sort(local_vec.begin(), local_vec.end(), [](const auto& e1, const auto& e2) { + std::sort(local_vec.begin(), local_vec.end(), [](const auto &e1, const auto &e2) { // we need a symmetric order on edges and backedges to ensure that the // kept forward and backward edge are actually the same edge - return e1.target < e2.target || (e1.target == e2.target && e1.unique_id < e2.unique_id); + return e1.target < e2.target || + (e1.target == e2.target && e1.unique_id < e2.unique_id); }); // mark all duplicate edges and update weight - if (!local_vec.empty()) { + if(!local_vec.empty()) + { HyperedgeID current_representative = local_vec[0].edge_id; - for (size_t i = 0; i + 1 < local_vec.size(); ++i) { - const ParallelEdgeInformation& e1 = local_vec[i]; - const ParallelEdgeInformation& e2 = local_vec[i + 1]; + for(size_t i = 0; i + 1 < local_vec.size(); ++i) + { + const ParallelEdgeInformation &e1 = local_vec[i]; + const ParallelEdgeInformation &e2 = local_vec[i + 1]; ASSERT(e2.target != kInvalidHypernode && e2.target != u); - if (e1.target == e2.target) { + if(e1.target == e2.target) + { // we abuse the source to save the representative edge edge(e2.edge_id).source = current_representative; edge(current_representative).weight += edge(e2.edge_id).weight; _removable_edges.set(e2.edge_id, true); --header(u).degree; - } else { + } + else + { current_representative = e2.edge_id; } } @@ -369,18 +413,21 @@ parallel::scalable_vector DynamicAdjacencyAr // Step two: Swap each marked edge and update the edge mapping accordingly. tbb::parallel_for(ID(0), _num_nodes, [&](const HypernodeID u) { - Header& head = header(u); + Header &head = header(u); const HyperedgeID first_inactive = firstInactiveEdge(u); - for (HyperedgeID e = firstActiveEdge(u); e < first_inactive; ++e) { - if (_removable_edges[e]) { + for(HyperedgeID e = firstActiveEdge(u); e < first_inactive; ++e) + { + if(_removable_edges[e]) + { const HyperedgeID new_id = firstActiveEdge(u); swapAndUpdateMapping(e, new_id); ++head.first_active; - tmp_removed_edges.stream(RemovedEdge {new_id, e}); + tmp_removed_edges.stream(RemovedEdge{ new_id, e }); } } - if (head.size() == 0 && !head.is_head) { + if(head.size() == 0 && !head.is_head) + { head.it_next = u; head.it_prev = u; } @@ -388,43 +435,48 @@ parallel::scalable_vector DynamicAdjacencyAr // Step three: Update iterator pointers and back edges, collect removed edges. vec removed_edges; - tbb::parallel_invoke([&]() { - tbb::parallel_for(ID(0), _num_nodes, [&](const HypernodeID u) { - if (header(u).is_head) { - restoreIteratorPointers(u); - } - }); - }, [&]() { - applyEdgeMapping(_edge_mapping); - }, [&]() { - removed_edges = tmp_removed_edges.copy_parallel(); - tmp_removed_edges.clear_parallel(); - }); + tbb::parallel_invoke( + [&]() { + tbb::parallel_for(ID(0), _num_nodes, [&](const HypernodeID u) { + if(header(u).is_head) + { + restoreIteratorPointers(u); + } + }); + }, + [&]() { applyEdgeMapping(_edge_mapping); }, + [&]() { + removed_edges = tmp_removed_edges.copy_parallel(); + tmp_removed_edges.clear_parallel(); + }); HEAVY_COARSENING_ASSERT(verifyBackEdges()); return removed_edges; } void DynamicAdjacencyArray::restoreSinglePinAndParallelEdges( - const parallel::scalable_vector& edges_to_restore) { + const parallel::scalable_vector &edges_to_restore) +{ _removable_edges.reset(); initializeEdgeMapping(_edge_mapping); // Step one: We mark all edges that need to be restored and save their swap target. tbb::parallel_for(UL(0), edges_to_restore.size(), [&](const size_t i) { - const RemovedEdge& re = edges_to_restore[i]; + const RemovedEdge &re = edges_to_restore[i]; _removable_edges.set(re.edge_id, true); // we abuse the edge mapping to save the swap target _edge_mapping[re.edge_id] = re.old_id; }); - // Step two: We swap each marked edge (in reverse order to removeSinglePinAndParallelEdges), - // update the edge mapping accordingly and mark the edge again. + // Step two: We swap each marked edge (in reverse order to + // removeSinglePinAndParallelEdges), update the edge mapping accordingly and mark the + // edge again. tbb::parallel_for(ID(0), _num_nodes, [&](const HypernodeID u) { - Header& head = header(u); + Header &head = header(u); const HyperedgeID first = firstEdge(u); - for (HyperedgeID curr = firstActiveEdge(u); - curr > first && _removable_edges[curr - 1]; --curr) { + for(HyperedgeID curr = firstActiveEdge(u); curr > first && _removable_edges[curr - 1]; + --curr) + { const HyperedgeID e = curr - 1; _removable_edges.set(e, false); _removable_edges.set(_edge_mapping[e], true); @@ -435,51 +487,61 @@ void DynamicAdjacencyArray::restoreSinglePinAndParallelEdges( // Step three: We update the node degrees, restore iterator pointers and the weights // of the representatives and update the back edges. - tbb::parallel_invoke([&]() { - tbb::parallel_for(ID(0), _num_nodes, [&](const HypernodeID u) { - if (header(u).is_head) { - bool restore_it = false; - for (const HypernodeID& current_u: headers(u)) { - HyperedgeID num_restored = 0; - const HyperedgeID first_inactive = firstInactiveEdge(current_u); - for (HyperedgeID id = firstActiveEdge(current_u); id < first_inactive; ++id) { - Edge& e = edge(id); - // Note: We use e.target to check whether it is a single pin edge. - // Comparing e.source and e.target does not work, because e.source - // currently holds the representative edge and the id of the representative - // could accidentially be equal to e.target. - if (_removable_edges[id] && e.isValid() && e.target != u) { - Edge& representative = edge(e.source); - representative.weight -= e.weight; - e.source = u; - } else if (e.isValid()) { - ASSERT(e.source == u); + tbb::parallel_invoke( + [&]() { + tbb::parallel_for(ID(0), _num_nodes, [&](const HypernodeID u) { + if(header(u).is_head) + { + bool restore_it = false; + for(const HypernodeID ¤t_u : headers(u)) + { + HyperedgeID num_restored = 0; + const HyperedgeID first_inactive = firstInactiveEdge(current_u); + for(HyperedgeID id = firstActiveEdge(current_u); id < first_inactive; ++id) + { + Edge &e = edge(id); + // Note: We use e.target to check whether it is a single pin edge. + // Comparing e.source and e.target does not work, because e.source + // currently holds the representative edge and the id of the + // representative could accidentially be equal to e.target. + if(_removable_edges[id] && e.isValid() && e.target != u) + { + Edge &representative = edge(e.source); + representative.weight -= e.weight; + e.source = u; + } + else if(e.isValid()) + { + ASSERT(e.source == u); + } + if(_removable_edges[id] && e.isValid()) + { + ++num_restored; + } + } + header(u).degree += num_restored; + restore_it |= (num_restored > 0); } - if (_removable_edges[id] && e.isValid()) { - ++num_restored; + + if(restore_it) + { + restoreIteratorPointers(u); } } - header(u).degree += num_restored; - restore_it |= (num_restored > 0); - } - - if (restore_it) { - restoreIteratorPointers(u); - } - } - }); - }, [&]() { - applyEdgeMapping(_edge_mapping); - }); + }); + }, + [&]() { applyEdgeMapping(_edge_mapping); }); HEAVY_REFINEMENT_ASSERT(verifyBackEdges()); } -void DynamicAdjacencyArray::reset() { +void DynamicAdjacencyArray::reset() +{ // Nothing to do here } -void DynamicAdjacencyArray::sortIncidentEdges() { +void DynamicAdjacencyArray::sortIncidentEdges() +{ // this is a bit complicated because we need to update the back edges Array edge_permutation; edge_permutation.resize(_edges.size()); @@ -490,15 +552,16 @@ void DynamicAdjacencyArray::sortIncidentEdges() { const HyperedgeID start = firstActiveEdge(u); const HyperedgeID end = firstInactiveEdge(u); std::sort(edge_permutation.data() + start, edge_permutation.data() + end, - [&](const auto& e1, const auto& e2) { - return edge(e1).target < edge(e2).target; - } - ); + [&](const auto &e1, const auto &e2) { + return edge(e1).target < edge(e2).target; + }); // apply permutation - for (size_t i = start; i < end; ++i) { + for(size_t i = start; i < end; ++i) + { HyperedgeID target = edge_permutation[i]; - while (target < i) { + while(target < i) + { target = edge_permutation[target]; } std::swap(_edges[i], _edges[target]); @@ -506,41 +569,43 @@ void DynamicAdjacencyArray::sortIncidentEdges() { }); // we need the reversed permutation for the back edges - tbb::parallel_for(ID(0), ID(edge_permutation.size()), [&](const HyperedgeID e) { - _edge_mapping[edge_permutation[e]] = e; - }); + tbb::parallel_for(ID(0), ID(edge_permutation.size()), + [&](const HyperedgeID e) { _edge_mapping[edge_permutation[e]] = e; }); applyEdgeMapping(_edge_mapping); HEAVY_PREPROCESSING_ASSERT(verifyBackEdges()); } -DynamicAdjacencyArray DynamicAdjacencyArray::copy(parallel_tag_t) const { +DynamicAdjacencyArray DynamicAdjacencyArray::copy(parallel_tag_t) const +{ DynamicAdjacencyArray adjacency_array; adjacency_array._num_nodes = _num_nodes; - tbb::parallel_invoke([&] { - adjacency_array._header_array.resize(_header_array.size()); - memcpy(adjacency_array._header_array.data(), _header_array.data(), - sizeof(Header) * _header_array.size()); - }, [&] { - adjacency_array._edges.resize(_edges.size()); - memcpy(adjacency_array._edges.data(), _edges.data(), sizeof(Edge) * _edges.size()); - },[&] { - adjacency_array._removable_edges.setSize(_edges.size()); - }, [&] { - adjacency_array._edge_mapping.resize(_edge_mapping.size()); - }); + tbb::parallel_invoke( + [&] { + adjacency_array._header_array.resize(_header_array.size()); + memcpy(adjacency_array._header_array.data(), _header_array.data(), + sizeof(Header) * _header_array.size()); + }, + [&] { + adjacency_array._edges.resize(_edges.size()); + memcpy(adjacency_array._edges.data(), _edges.data(), + sizeof(Edge) * _edges.size()); + }, + [&] { adjacency_array._removable_edges.setSize(_edges.size()); }, + [&] { adjacency_array._edge_mapping.resize(_edge_mapping.size()); }); return adjacency_array; } -DynamicAdjacencyArray DynamicAdjacencyArray::copy() const { +DynamicAdjacencyArray DynamicAdjacencyArray::copy() const +{ DynamicAdjacencyArray adjacency_array; adjacency_array._num_nodes = _num_nodes; adjacency_array._header_array.resize(_header_array.size()); memcpy(adjacency_array._header_array.data(), _header_array.data(), - sizeof(Header) * _header_array.size()); + sizeof(Header) * _header_array.size()); adjacency_array._edges.resize(_edges.size()); memcpy(adjacency_array._edges.data(), _edges.data(), sizeof(Edge) * _edges.size()); adjacency_array._removable_edges.setSize(_edges.size()); @@ -548,9 +613,12 @@ DynamicAdjacencyArray DynamicAdjacencyArray::copy() const { return adjacency_array; } -void DynamicAdjacencyArray::swapAndUpdateMapping(const HyperedgeID e, const HyperedgeID new_id) { +void DynamicAdjacencyArray::swapAndUpdateMapping(const HyperedgeID e, + const HyperedgeID new_id) +{ HyperedgeID permutation_source = new_id; - while (_edge_mapping[permutation_source] != new_id) { + while(_edge_mapping[permutation_source] != new_id) + { permutation_source = _edge_mapping[permutation_source]; } ASSERT(_edge_mapping[permutation_source] == new_id); @@ -559,7 +627,8 @@ void DynamicAdjacencyArray::swapAndUpdateMapping(const HyperedgeID e, const Hype _edge_mapping[permutation_source] = e; } -void DynamicAdjacencyArray::append(const HypernodeID u, const HypernodeID v) { +void DynamicAdjacencyArray::append(const HypernodeID u, const HypernodeID v) +{ const HypernodeID tail_u = header(u).prev; const HypernodeID tail_v = header(v).prev; header(tail_u).next = v; @@ -577,25 +646,28 @@ void DynamicAdjacencyArray::append(const HypernodeID u, const HypernodeID v) { header(v).tail = tail_v; header(v).is_head = false; - if ( header(v).size() == 0 ) { + if(header(v).size() == 0) + { removeEmptyIncidentEdgeList(v); } } -void DynamicAdjacencyArray::splice(const HypernodeID u, const HypernodeID v) { +void DynamicAdjacencyArray::splice(const HypernodeID u, const HypernodeID v) +{ // Restore the iterator double-linked list of u such that it does not contain // any incident net list of v const HypernodeID tail = header(v).tail; HypernodeID non_empty_entry_prev_v = v; HypernodeID non_empty_entry_next_tail = tail; - while ( ( non_empty_entry_prev_v == v || - header(non_empty_entry_prev_v).size() == 0 ) && - non_empty_entry_prev_v != u ) { + while((non_empty_entry_prev_v == v || header(non_empty_entry_prev_v).size() == 0) && + non_empty_entry_prev_v != u) + { non_empty_entry_prev_v = header(non_empty_entry_prev_v).prev; } - while ( ( non_empty_entry_next_tail == tail || - header(non_empty_entry_next_tail).size() == 0 ) && - non_empty_entry_next_tail != u ) { + while((non_empty_entry_next_tail == tail || + header(non_empty_entry_next_tail).size() == 0) && + non_empty_entry_next_tail != u) + { non_empty_entry_next_tail = header(non_empty_entry_next_tail).next; } header(non_empty_entry_prev_v).it_next = non_empty_entry_next_tail; @@ -611,21 +683,25 @@ void DynamicAdjacencyArray::splice(const HypernodeID u, const HypernodeID v) { header(v).is_head = true; } -void DynamicAdjacencyArray::removeEmptyIncidentEdgeList(const HypernodeID u) { +void DynamicAdjacencyArray::removeEmptyIncidentEdgeList(const HypernodeID u) +{ ASSERT(!header(u).is_head); ASSERT(header(u).size() == 0, V(u) << V(header(u).size())); - Header& head = header(u); + Header &head = header(u); header(head.it_prev).it_next = head.it_next; header(head.it_next).it_prev = head.it_prev; head.it_next = u; head.it_prev = u; } -void DynamicAdjacencyArray::restoreIteratorPointers(const HypernodeID u) { +void DynamicAdjacencyArray::restoreIteratorPointers(const HypernodeID u) +{ ASSERT(header(u).is_head); HypernodeID last_non_empty_u = u; - for (const HypernodeID& current_u: headers(u)) { - if (header(current_u).size() > 0 || current_u == u) { + for(const HypernodeID ¤t_u : headers(u)) + { + if(header(current_u).size() > 0 || current_u == u) + { restoreItLink(u, last_non_empty_u, current_u); last_non_empty_u = current_u; } @@ -633,30 +709,44 @@ void DynamicAdjacencyArray::restoreIteratorPointers(const HypernodeID u) { ASSERT(verifyIteratorPointers(u)); } -void DynamicAdjacencyArray::restoreItLink(const HypernodeID u, const HypernodeID prev, const HypernodeID current) { +void DynamicAdjacencyArray::restoreItLink(const HypernodeID u, const HypernodeID prev, + const HypernodeID current) +{ header(prev).it_next = current; header(current).it_prev = prev; header(current).it_next = u; header(u).it_prev = current; } -bool DynamicAdjacencyArray::verifyIteratorPointers(const HypernodeID u) const { +bool DynamicAdjacencyArray::verifyIteratorPointers(const HypernodeID u) const +{ HypernodeID current_u = u; HypernodeID last_non_empty_entry = kInvalidHypernode; - do { - if ( header(current_u).size() > 0 || current_u == u ) { - if ( last_non_empty_entry != kInvalidHypernode ) { - if ( header(current_u).it_prev != last_non_empty_entry ) { + do + { + if(header(current_u).size() > 0 || current_u == u) + { + if(last_non_empty_entry != kInvalidHypernode) + { + if(header(current_u).it_prev != last_non_empty_entry) + { return false; - } else if ( header(last_non_empty_entry).it_next != current_u ) { + } + else if(header(last_non_empty_entry).it_next != current_u) + { return false; } } last_non_empty_entry = current_u; - } else { - if ( header(current_u).it_next != current_u ) { + } + else + { + if(header(current_u).it_next != current_u) + { return false; - } else if ( header(current_u).it_prev != current_u ) { + } + else if(header(current_u).it_prev != current_u) + { return false; } } @@ -664,23 +754,29 @@ bool DynamicAdjacencyArray::verifyIteratorPointers(const HypernodeID u) const { current_u = header(current_u).next; } while(current_u != u); - if ( header(u).it_prev != last_non_empty_entry ) { + if(header(u).it_prev != last_non_empty_entry) + { return false; - } else if ( header(last_non_empty_entry).it_next != u ) { + } + else if(header(last_non_empty_entry).it_next != u) + { return false; } return true; } -bool DynamicAdjacencyArray::verifyBackEdges() const { - for (HyperedgeID e = 0; e < _edges.size(); ++e) { - if (edge(edge(e).back_edge).back_edge != e) { +bool DynamicAdjacencyArray::verifyBackEdges() const +{ + for(HyperedgeID e = 0; e < _edges.size(); ++e) + { + if(edge(edge(e).back_edge).back_edge != e) + { return false; } } return true; } -} // namespace ds -} // namespace mt_kahypar +} // namespace ds +} // namespace mt_kahypar diff --git a/mt-kahypar/datastructures/dynamic_adjacency_array.h b/mt-kahypar/datastructures/dynamic_adjacency_array.h index 61ea71b6f..2e785a4ec 100644 --- a/mt-kahypar/datastructures/dynamic_adjacency_array.h +++ b/mt-kahypar/datastructures/dynamic_adjacency_array.h @@ -33,13 +33,13 @@ #include "kahypar-resources/datastructure/fast_reset_flag_array.h" -#include "mt-kahypar/macros.h" +#include "mt-kahypar/datastructures/array.h" #include "mt-kahypar/datastructures/hypergraph_common.h" #include "mt-kahypar/datastructures/streaming_vector.h" -#include "mt-kahypar/datastructures/array.h" -#include "mt-kahypar/parallel/stl/scalable_vector.h" -#include "mt-kahypar/parallel/stl/scalable_unique_ptr.h" +#include "mt-kahypar/macros.h" #include "mt-kahypar/parallel/atomic_wrapper.h" +#include "mt-kahypar/parallel/stl/scalable_unique_ptr.h" +#include "mt-kahypar/parallel/stl/scalable_vector.h" #include "mt-kahypar/utils/range.h" namespace mt_kahypar { @@ -49,34 +49,35 @@ namespace ds { class DynamicAdjacencyArray; // Iterator over the incident edges of a vertex u -class IncidentEdgeIterator { - public: - using iterator_category = std::forward_iterator_tag; - using value_type = HyperedgeID; - using reference = HyperedgeID&; - using pointer = const HyperedgeID*; - using difference_type = std::ptrdiff_t; +class IncidentEdgeIterator +{ +public: + using iterator_category = std::forward_iterator_tag; + using value_type = HyperedgeID; + using reference = HyperedgeID &; + using pointer = const HyperedgeID *; + using difference_type = std::ptrdiff_t; IncidentEdgeIterator(const HypernodeID u, - const DynamicAdjacencyArray* dynamic_adjacency_array, - const size_t pos, - const bool end); + const DynamicAdjacencyArray *dynamic_adjacency_array, + const size_t pos, const bool end); - HyperedgeID operator* () const; + HyperedgeID operator*() const; - IncidentEdgeIterator & operator++ (); + IncidentEdgeIterator &operator++(); - IncidentEdgeIterator operator++ (int) { + IncidentEdgeIterator operator++(int) + { IncidentEdgeIterator copy = *this; - operator++ (); + operator++(); return copy; } - bool operator!= (const IncidentEdgeIterator& rhs); + bool operator!=(const IncidentEdgeIterator &rhs); - bool operator== (const IncidentEdgeIterator& rhs); + bool operator==(const IncidentEdgeIterator &rhs); - private: +private: void traverse_headers(); void skip_invalid(); @@ -85,37 +86,38 @@ class IncidentEdgeIterator { HypernodeID _current_u; HypernodeID _current_size; HyperedgeID _current_pos; - const DynamicAdjacencyArray* _dynamic_adjacency_array; + const DynamicAdjacencyArray *_dynamic_adjacency_array; bool _end; }; // Iterator over all edges -class EdgeIterator { - public: +class EdgeIterator +{ +public: using iterator_category = std::forward_iterator_tag; using value_type = HyperedgeID; - using reference = HyperedgeID&; - using pointer = const HyperedgeID*; + using reference = HyperedgeID &; + using pointer = const HyperedgeID *; using difference_type = std::ptrdiff_t; - EdgeIterator(const HypernodeID u, - const DynamicAdjacencyArray* dynamic_adjacency_array); + EdgeIterator(const HypernodeID u, const DynamicAdjacencyArray *dynamic_adjacency_array); - HyperedgeID operator* () const; + HyperedgeID operator*() const; - EdgeIterator & operator++ (); + EdgeIterator &operator++(); - EdgeIterator operator++ (int) { + EdgeIterator operator++(int) + { EdgeIterator copy = *this; - operator++ (); + operator++(); return copy; } - bool operator!= (const EdgeIterator& rhs); + bool operator!=(const EdgeIterator &rhs); - bool operator== (const EdgeIterator& rhs); + bool operator==(const EdgeIterator &rhs); - private: +private: void traverse_headers(); void skip_invalid(); @@ -123,45 +125,45 @@ class EdgeIterator { HypernodeID _current_u; HyperedgeID _current_id; HyperedgeID _current_last_id; - const DynamicAdjacencyArray* _dynamic_adjacency_array; + const DynamicAdjacencyArray *_dynamic_adjacency_array; }; -class DynamicAdjacencyArray { - using HyperedgeVector = parallel::scalable_vector>; - using EdgeVector = parallel::scalable_vector>; - using ThreadLocalCounter = tbb::enumerable_thread_specific>; - using AtomicCounter = parallel::scalable_vector>; - - using AcquireLockFunc = std::function; - using ReleaseLockFunc = std::function; - using MarkEdgeFunc = std::function; - using CaseOneFunc = std::function; - using CaseTwoFunc = std::function; - #define NOOP_LOCK_FUNC [] (const HypernodeID) { } +class DynamicAdjacencyArray +{ + using HyperedgeVector = + parallel::scalable_vector >; + using EdgeVector = parallel::scalable_vector >; + using ThreadLocalCounter = + tbb::enumerable_thread_specific >; + using AtomicCounter = + parallel::scalable_vector >; + + using AcquireLockFunc = std::function; + using ReleaseLockFunc = std::function; + using MarkEdgeFunc = std::function; + using CaseOneFunc = std::function; + using CaseTwoFunc = std::function; +#define NOOP_LOCK_FUNC [](const HypernodeID) {} static constexpr bool enable_heavy_assert = false; - public: +public: // Represents one (directed) edge of a vertex. // Note that we maintain a direct link to the corresponding // backwards edge via its edge id, which is updated when any // edge ids change. - struct Edge { + struct Edge + { static_assert(sizeof(HyperedgeID) == sizeof(HypernodeID)); - bool isSinglePin() const { - return source == target; - } + bool isSinglePin() const { return source == target; } - bool isValid() const { - return target != kInvalidHypernode; - } + bool isValid() const { return target != kInvalidHypernode; } - void enable() { - target = source; - } + void enable() { target = source; } - void disable() { + void disable() + { ASSERT(isSinglePin()); target = kInvalidHypernode; } @@ -176,43 +178,33 @@ class DynamicAdjacencyArray { HyperedgeID back_edge; }; - struct RemovedEdge { + struct RemovedEdge + { // current id of removed edge HyperedgeID edge_id; // id of the edge before it was removed HyperedgeID old_id; }; - private: +private: // Header of the incident edge list of a vertex. The incident edge lists // contracted into one vertex are concatenated in a double linked list. - struct Header { + struct Header + { Header() : - prev(0), - next(0), - it_prev(0), - it_next(0), - tail(0), - first_active(0), - first_inactive(0), - degree(0), - is_head(false) { } + prev(0), next(0), it_prev(0), it_next(0), tail(0), first_active(0), + first_inactive(0), degree(0), is_head(false) + { + } explicit Header(const HypernodeID u) : - prev(u), - next(u), - it_prev(u), - it_next(u), - tail(u), - first_active(0), - first_inactive(0), - degree(0), - is_head(true) { } - - HyperedgeID size() const { - return first_inactive - first_active; + prev(u), next(u), it_prev(u), it_next(u), tail(u), first_active(0), + first_inactive(0), degree(0), is_head(true) + { } + HyperedgeID size() const { return first_inactive - first_active; } + // ! Previous incident edge list HypernodeID prev; // ! Next incident edge list @@ -240,11 +232,16 @@ class DynamicAdjacencyArray { // Used for detecting parallel edges. // Represents one edge with the required information // for detecting duplicates and removing the represented edge. - struct ParallelEdgeInformation { + struct ParallelEdgeInformation + { ParallelEdgeInformation() = default; - ParallelEdgeInformation(HypernodeID target, HyperedgeID edge_id, HyperedgeID unique_id): - target(target), edge_id(edge_id), unique_id(unique_id) { } + ParallelEdgeInformation(HypernodeID target, HyperedgeID edge_id, + HyperedgeID unique_id) : + target(target), + edge_id(edge_id), unique_id(unique_id) + { + } // ! Index of target node HypernodeID target; @@ -254,87 +251,88 @@ class DynamicAdjacencyArray { HyperedgeID unique_id; }; - using ThreadLocalParallelEdgeVector = tbb::enumerable_thread_specific>; + using ThreadLocalParallelEdgeVector = + tbb::enumerable_thread_specific >; - public: +public: using const_iterator = IncidentEdgeIterator; DynamicAdjacencyArray() : - _num_nodes(0), - _header_array(), - _edges(), - _removable_edges(), - _edge_mapping() { } - - DynamicAdjacencyArray(const HypernodeID num_nodes, - const EdgeVector& edge_vector, - const HyperedgeWeight* edge_weight = nullptr) : - _num_nodes(num_nodes), - _header_array(), - _edges(), - _thread_local_vec(), - _removable_edges(), - _edge_mapping() { + _num_nodes(0), _header_array(), _edges(), _removable_edges(), _edge_mapping() + { + } + + DynamicAdjacencyArray(const HypernodeID num_nodes, const EdgeVector &edge_vector, + const HyperedgeWeight *edge_weight = nullptr) : + _num_nodes(num_nodes), + _header_array(), _edges(), _thread_local_vec(), _removable_edges(), _edge_mapping() + { construct(edge_vector, edge_weight); } - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE const Edge& edge(const HyperedgeID e) const { + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE const Edge &edge(const HyperedgeID e) const + { ASSERT(e < _edges.size(), "Edge" << e << "does not exist"); return _edges[e]; } - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE Edge& edge(const HyperedgeID e) { + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE Edge &edge(const HyperedgeID e) + { ASSERT(e <= _edges.size(), "Edge" << e << "does not exist"); return _edges[e]; } - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE HypernodeID numNodes() const { - return _num_nodes; - } + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE HypernodeID numNodes() const { return _num_nodes; } - HyperedgeID uniqueEdgeID(const HyperedgeID e) const { + HyperedgeID uniqueEdgeID(const HyperedgeID e) const + { return std::min(e, edge(e).back_edge); } // ! Degree of the vertex - HypernodeID nodeDegree(const HypernodeID u) const { + HypernodeID nodeDegree(const HypernodeID u) const + { ASSERT(u < _num_nodes, "Hypernode" << u << "does not exist"); return header(u).degree; } // ! Returns a range to loop over the incident edges of hypernode u. - IteratorRange incidentEdges(const HypernodeID u) const { + IteratorRange incidentEdges(const HypernodeID u) const + { ASSERT(u < _num_nodes, "Hypernode" << u << "does not exist"); return IteratorRange( - IncidentEdgeIterator(u, this, UL(0), false), - IncidentEdgeIterator(u, this, UL(0), true)); + IncidentEdgeIterator(u, this, UL(0), false), + IncidentEdgeIterator(u, this, UL(0), true)); } // ! Returns a range to loop over the incident edges of hypernode u. IteratorRange incidentEdges(const HypernodeID u, - const size_t pos) const { + const size_t pos) const + { ASSERT(u < _num_nodes, "Hypernode" << u << "does not exist"); return IteratorRange( - IncidentEdgeIterator(u, this, pos, false), - IncidentEdgeIterator(u, this, UL(0), true)); + IncidentEdgeIterator(u, this, pos, false), + IncidentEdgeIterator(u, this, UL(0), true)); } // ! Returns a range to loop over all edges. - IteratorRange edges() const { - return IteratorRange( - EdgeIterator(0, this), - EdgeIterator(_num_nodes, this)); + IteratorRange edges() const + { + return IteratorRange(EdgeIterator(0, this), + EdgeIterator(_num_nodes, this)); } - // ! Iterates in parallel over all active edges and calls function f // ! for each net - template - void doParallelForAllEdges(const F& f) const { - tbb::parallel_for(ID(0), _num_nodes, [&](const HypernodeID& head) { + template + void doParallelForAllEdges(const F &f) const + { + tbb::parallel_for(ID(0), _num_nodes, [&](const HypernodeID &head) { const HyperedgeID last = firstInactiveEdge(head); - for (HyperedgeID e = firstActiveEdge(head); e < last; ++e) { - if (edge(e).isValid()) { + for(HyperedgeID e = firstActiveEdge(head); e < last; ++e) + { + if(edge(e).isValid()) + { f(e); } } @@ -343,40 +341,37 @@ class DynamicAdjacencyArray { // ! Contracts two incident list of u and v, whereby u is the representative and // ! v the contraction partner of the contraction. The contraction involves to remove - // ! all incident edges shared between u and v from the incident edge list of v and append - // ! the list of v to u, while also updating the back edges of all affected edges. - void contract(const HypernodeID u, - const HypernodeID v, - const AcquireLockFunc& acquire_lock = NOOP_LOCK_FUNC, - const ReleaseLockFunc& release_lock = NOOP_LOCK_FUNC); + // ! all incident edges shared between u and v from the incident edge list of v and + // append ! the list of v to u, while also updating the back edges of all affected + // edges. + void contract(const HypernodeID u, const HypernodeID v, + const AcquireLockFunc &acquire_lock = NOOP_LOCK_FUNC, + const ReleaseLockFunc &release_lock = NOOP_LOCK_FUNC); // ! Uncontract two previously contracted vertices u and v. - // ! Uncontraction means restoring the incident edge list of v from the current list of u - // ! and updating all affected backward edges. - // ! Note, uncontraction must be done in relative contraction order - void uncontract(const HypernodeID u, - const HypernodeID v, - const AcquireLockFunc& acquire_lock = NOOP_LOCK_FUNC, - const ReleaseLockFunc& release_lock = NOOP_LOCK_FUNC); + // ! Uncontraction means restoring the incident edge list of v from the current list of + // u ! and updating all affected backward edges. ! Note, uncontraction must be done in + // relative contraction order + void uncontract(const HypernodeID u, const HypernodeID v, + const AcquireLockFunc &acquire_lock = NOOP_LOCK_FUNC, + const ReleaseLockFunc &release_lock = NOOP_LOCK_FUNC); // ! Uncontract two previously contracted vertices u and v. - // ! Uncontraction means restoring the incident edge list of v from the current list of u - // ! and updating all affected backward edges. - // ! Additionally it calls case_one_func for an edge e, if u and v were previously both - // ! adjacent to e and case_two_func if only v was previously adjacent to e. - // ! mark_edge must return whether the edge was already locked previously in this round of uncontractions. - // ! Note, uncontraction must be done in relative contraction order. - void uncontract(const HypernodeID u, - const HypernodeID v, - const MarkEdgeFunc& mark_edge, - const CaseOneFunc& case_one_func, - const CaseTwoFunc& case_two_func, - const AcquireLockFunc& acquire_lock, - const ReleaseLockFunc& release_lock); + // ! Uncontraction means restoring the incident edge list of v from the current list of + // u ! and updating all affected backward edges. ! Additionally it calls case_one_func + // for an edge e, if u and v were previously both ! adjacent to e and case_two_func if + // only v was previously adjacent to e. ! mark_edge must return whether the edge was + // already locked previously in this round of uncontractions. ! Note, uncontraction must + // be done in relative contraction order. + void uncontract(const HypernodeID u, const HypernodeID v, const MarkEdgeFunc &mark_edge, + const CaseOneFunc &case_one_func, const CaseTwoFunc &case_two_func, + const AcquireLockFunc &acquire_lock, + const ReleaseLockFunc &release_lock); parallel::scalable_vector removeSinglePinAndParallelEdges(); - void restoreSinglePinAndParallelEdges(const parallel::scalable_vector& edges_to_restore); + void restoreSinglePinAndParallelEdges( + const parallel::scalable_vector &edges_to_restore); DynamicAdjacencyArray copy(parallel_tag_t) const; @@ -386,112 +381,124 @@ class DynamicAdjacencyArray { void sortIncidentEdges(); - size_t size_in_bytes() const { - return _edges.size() * sizeof(Edge) - + _edge_mapping.size() * sizeof(HyperedgeID) - + _header_array.size() * sizeof(Header); + size_t size_in_bytes() const + { + return _edges.size() * sizeof(Edge) + _edge_mapping.size() * sizeof(HyperedgeID) + + _header_array.size() * sizeof(Header); } - private: +private: friend class IncidentEdgeIterator; friend class EdgeIterator; - class HeaderIterator { - public: + class HeaderIterator + { + public: using iterator_category = std::forward_iterator_tag; using value_type = HypernodeID; - using reference = HypernodeID&; - using pointer = const HypernodeID*; + using reference = HypernodeID &; + using pointer = const HypernodeID *; using difference_type = std::ptrdiff_t; HeaderIterator(const HypernodeID u, - const DynamicAdjacencyArray* dynamic_adjacency_array, - const bool end): - _u(u), - _current_u(u), - _dynamic_adjacency_array(dynamic_adjacency_array), - _end(end) { } - - HypernodeID operator* () const { - return _current_u; + const DynamicAdjacencyArray *dynamic_adjacency_array, const bool end) : + _u(u), + _current_u(u), _dynamic_adjacency_array(dynamic_adjacency_array), _end(end) + { } - HeaderIterator & operator++ () { + HypernodeID operator*() const { return _current_u; } + + HeaderIterator &operator++() + { _current_u = _dynamic_adjacency_array->header(_current_u).next; - if (_current_u == _u) { + if(_current_u == _u) + { _end = true; } return *this; } - HeaderIterator operator++ (int) { + HeaderIterator operator++(int) + { HeaderIterator copy = *this; - operator++ (); + operator++(); return copy; } - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE bool operator== (const HeaderIterator& rhs) { + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE bool operator==(const HeaderIterator &rhs) + { return _u == rhs._u && _end == rhs._end; } - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE bool operator!= (const HeaderIterator& rhs) { + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE bool operator!=(const HeaderIterator &rhs) + { return !(*this == rhs); } - private: + private: HypernodeID _u; HypernodeID _current_u; - const DynamicAdjacencyArray* _dynamic_adjacency_array; + const DynamicAdjacencyArray *_dynamic_adjacency_array; bool _end; }; - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE const Header& header(const HypernodeID u) const { + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE const Header &header(const HypernodeID u) const + { ASSERT(u <= _num_nodes, "Hypernode" << u << "does not exist"); return _header_array[u]; } - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE Header& header(const HypernodeID u) { + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE Header &header(const HypernodeID u) + { ASSERT(u <= _num_nodes, "Hypernode" << u << "does not exist"); return _header_array[u]; } - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE HyperedgeID firstEdge(const HypernodeID u) const { + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE HyperedgeID firstEdge(const HypernodeID u) const + { ASSERT(u <= _num_nodes, "Hypernode" << u << "does not exist"); return header(u).first; } - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE HyperedgeID firstActiveEdge(const HypernodeID u) const { + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE HyperedgeID + firstActiveEdge(const HypernodeID u) const + { ASSERT(u <= _num_nodes, "Hypernode" << u << "does not exist"); return header(u).first_active; } - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE HyperedgeID firstInactiveEdge(const HypernodeID u) const { + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE HyperedgeID + firstInactiveEdge(const HypernodeID u) const + { ASSERT(u <= _num_nodes, "Hypernode" << u << "does not exist"); return header(u).first_inactive; } - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE HyperedgeID lastEdge(const HypernodeID u) const { + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE HyperedgeID lastEdge(const HypernodeID u) const + { ASSERT(u <= _num_nodes, "Hypernode" << u << "does not exist"); return header(u + 1).first; } // ! Returns a range to loop over the headers of node u. - IteratorRange headers(const HypernodeID u) const { + IteratorRange headers(const HypernodeID u) const + { ASSERT(u < _num_nodes, "Hypernode" << u << "does not exist"); - return IteratorRange( - HeaderIterator(u, this, false), - HeaderIterator(u, this, true)); + return IteratorRange(HeaderIterator(u, this, false), + HeaderIterator(u, this, true)); } - void initializeEdgeMapping(Array& mapping) { + void initializeEdgeMapping(Array &mapping) + { ASSERT(mapping.size() == _edges.size()); - tbb::parallel_for(ID(0), ID(mapping.size()), [&](const HyperedgeID e) { - mapping[e] = e; - }); + tbb::parallel_for(ID(0), ID(mapping.size()), + [&](const HyperedgeID e) { mapping[e] = e; }); } // ! Updates all backedges using the provided mapping - void applyEdgeMapping(Array& mapping) { + void applyEdgeMapping(Array &mapping) + { ASSERT(mapping.size() == _edges.size()); tbb::parallel_for(ID(0), ID(mapping.size()), [&](const HyperedgeID e) { edge(e).back_edge = mapping[edge(e).back_edge]; @@ -508,9 +515,11 @@ class DynamicAdjacencyArray { void restoreIteratorPointers(const HypernodeID u); - void restoreItLink(const HypernodeID u, const HypernodeID prev, const HypernodeID current); + void restoreItLink(const HypernodeID u, const HypernodeID prev, + const HypernodeID current); - void construct(const EdgeVector& edge_vector, const HyperedgeWeight* edge_weight = nullptr); + void construct(const EdgeVector &edge_vector, + const HyperedgeWeight *edge_weight = nullptr); bool verifyIteratorPointers(const HypernodeID u) const; @@ -525,5 +534,5 @@ class DynamicAdjacencyArray { Array _edge_mapping; }; -} // namespace ds -} // namespace mt_kahypar +} // namespace ds +} // namespace mt_kahypar diff --git a/mt-kahypar/datastructures/dynamic_graph.cpp b/mt-kahypar/datastructures/dynamic_graph.cpp index 13f2901c2..f8d10f462 100644 --- a/mt-kahypar/datastructures/dynamic_graph.cpp +++ b/mt-kahypar/datastructures/dynamic_graph.cpp @@ -29,38 +29,48 @@ #include "mt-kahypar/datastructures/dynamic_graph.h" #include "tbb/blocked_range.h" +#include "tbb/concurrent_queue.h" +#include "tbb/parallel_reduce.h" #include "tbb/parallel_scan.h" #include "tbb/parallel_sort.h" -#include "tbb/parallel_reduce.h" -#include "tbb/concurrent_queue.h" -#include "mt-kahypar/parallel/stl/scalable_queue.h" #include "mt-kahypar/datastructures/concurrent_bucket_map.h" #include "mt-kahypar/datastructures/streaming_vector.h" +#include "mt-kahypar/parallel/stl/scalable_queue.h" #include "mt-kahypar/utils/timer.h" namespace mt_kahypar { namespace ds { // ! Recomputes the total weight of the hypergraph (parallel) -void DynamicGraph::updateTotalWeight(parallel_tag_t) { - _total_weight = tbb::parallel_reduce(tbb::blocked_range(ID(0), numNodes()), 0, - [this](const tbb::blocked_range& range, HypernodeWeight init) { - HypernodeWeight weight = init; - for (HypernodeID hn = range.begin(); hn < range.end(); ++hn) { - if ( nodeIsEnabled(hn) ) { - weight += this->_nodes[hn].weight(); - } - } - return weight; - }, std::plus()) + _removed_degree_zero_hn_weight; +void DynamicGraph::updateTotalWeight(parallel_tag_t) +{ + _total_weight = + tbb::parallel_reduce( + tbb::blocked_range(ID(0), numNodes()), 0, + [this](const tbb::blocked_range &range, HypernodeWeight init) { + HypernodeWeight weight = init; + for(HypernodeID hn = range.begin(); hn < range.end(); ++hn) + { + if(nodeIsEnabled(hn)) + { + weight += this->_nodes[hn].weight(); + } + } + return weight; + }, + std::plus()) + + _removed_degree_zero_hn_weight; } // ! Recomputes the total weight of the hypergraph (sequential) -void DynamicGraph::updateTotalWeight() { +void DynamicGraph::updateTotalWeight() +{ _total_weight = 0; - for ( const HypernodeID& hn : nodes() ) { - if ( nodeIsEnabled(hn) ) { + for(const HypernodeID &hn : nodes()) + { + if(nodeIsEnabled(hn)) + { _total_weight += nodeWeight(hn); } } @@ -69,27 +79,29 @@ void DynamicGraph::updateTotalWeight() { /**! * Registers a contraction in the hypergraph whereas vertex u is the representative - * of the contraction and v its contraction partner. Several threads can call this function - * in parallel. The function adds the contraction of u and v to a contraction tree that determines - * a parallel execution order and synchronization points for all running contractions. - * The contraction can be executed by calling function contract(v, max_node_weight). + * of the contraction and v its contraction partner. Several threads can call this + * function in parallel. The function adds the contraction of u and v to a contraction + * tree that determines a parallel execution order and synchronization points for all + * running contractions. The contraction can be executed by calling function contract(v, + * max_node_weight). */ -bool DynamicGraph::registerContraction(const HypernodeID u, const HypernodeID v) { - return _contraction_tree.registerContraction(u, v, _version, - [&](HypernodeID u) { acquireHypernode(u); }, - [&](HypernodeID u) { releaseHypernode(u); }); +bool DynamicGraph::registerContraction(const HypernodeID u, const HypernodeID v) +{ + return _contraction_tree.registerContraction( + u, v, _version, [&](HypernodeID u) { acquireHypernode(u); }, + [&](HypernodeID u) { releaseHypernode(u); }); } /**! - * Contracts a previously registered contraction. Representative u of vertex v is looked up - * in the contraction tree and performed if there are no pending contractions in the subtree - * of v and the contractions respects the maximum allowed node weight. If (u,v) is the last - * pending contraction in the subtree of u then the function recursively contracts also - * u (if any contraction is registered). Therefore, function can return several contractions - * or also return an empty contraction vector. + * Contracts a previously registered contraction. Representative u of vertex v is looked + * up in the contraction tree and performed if there are no pending contractions in the + * subtree of v and the contractions respects the maximum allowed node weight. If (u,v) is + * the last pending contraction in the subtree of u then the function recursively + * contracts also u (if any contraction is registered). Therefore, function can return + * several contractions or also return an empty contraction vector. */ -size_t DynamicGraph::contract(const HypernodeID v, - const HypernodeWeight max_node_weight) { +size_t DynamicGraph::contract(const HypernodeID v, const HypernodeWeight max_node_weight) +{ ASSERT(_contraction_tree.parent(v) != v, "No contraction registered for node " << v); HypernodeID x = _contraction_tree.parent(v); @@ -98,10 +110,12 @@ size_t DynamicGraph::contract(const HypernodeID v, size_t num_contractions = 0; // We perform all contractions registered in the contraction tree // as long as there are no pending contractions - while ( x != y && res != ContractionResult::PENDING_CONTRACTIONS) { + while(x != y && res != ContractionResult::PENDING_CONTRACTIONS) + { // Perform Contraction res = contract(x, y, max_node_weight); - if ( res == ContractionResult::CONTRACTED ) { + if(res == ContractionResult::CONTRACTED) + { ++num_contractions; } y = x; @@ -119,15 +133,19 @@ size_t DynamicGraph::contract(const HypernodeID v, * greater than the maximum allowed node weight) or PENDING_CONTRACTIONS (in case * there are some unfinished contractions in the subtree of v) is returned. */ -DynamicGraph::ContractionResult DynamicGraph::contract(const HypernodeID u, - const HypernodeID v, - const HypernodeWeight max_node_weight) { +DynamicGraph::ContractionResult +DynamicGraph::contract(const HypernodeID u, const HypernodeID v, + const HypernodeWeight max_node_weight) +{ // Acquire ownership in correct order to prevent deadlocks - if ( u < v ) { + if(u < v) + { acquireHypernode(u); acquireHypernode(v); - } else { + } + else + { acquireHypernode(v); acquireHypernode(u); } @@ -138,14 +156,16 @@ DynamicGraph::ContractionResult DynamicGraph::contract(const HypernodeID u, // 3.) Resulting node weight is less or equal than a predefined upper bound // 4.) Fixed Vertex Contraction is valid const bool contraction_partner_valid = - nodeIsEnabled(v) && _contraction_tree.pendingContractions(v) == 0; + nodeIsEnabled(v) && _contraction_tree.pendingContractions(v) == 0; const bool less_or_equal_than_max_node_weight = - hypernode(u).weight() + hypernode(v).weight() <= max_node_weight; + hypernode(u).weight() + hypernode(v).weight() <= max_node_weight; const bool valid_contraction = - contraction_partner_valid && less_or_equal_than_max_node_weight && - ( !hasFixedVertices() || - /** only run this if all previous checks were successful */ _fixed_vertices.contract(u, v) ); - if ( valid_contraction ) { + contraction_partner_valid && less_or_equal_than_max_node_weight && + (!hasFixedVertices() || + /** only run this if all previous checks were successful */ _fixed_vertices + .contract(u, v)); + if(valid_contraction) + { ASSERT(nodeIsEnabled(u), "Hypernode" << u << "is disabled!"); hypernode(u).setWeight(nodeWeight(u) + nodeWeight(v)); hypernode(v).disable(); @@ -155,28 +175,29 @@ DynamicGraph::ContractionResult DynamicGraph::contract(const HypernodeID u, HypernodeID contraction_start = _contraction_index.load(); // Contract incident net lists of u and v - _adjacency_array.contract(u, v, [&](const HypernodeID u) { - acquireHypernode(u); - }, [&](const HypernodeID u) { - releaseHypernode(u); - }); + _adjacency_array.contract( + u, v, [&](const HypernodeID u) { acquireHypernode(u); }, + [&](const HypernodeID u) { releaseHypernode(u); }); HypernodeID contraction_end = ++_contraction_index; acquireHypernode(u); _contraction_tree.unregisterContraction(u, v, contraction_start, contraction_end); releaseHypernode(u); return ContractionResult::CONTRACTED; - } else { + } + else + { ContractionResult res = ContractionResult::PENDING_CONTRACTIONS; const bool fixed_vertex_contraction_failed = - contraction_partner_valid && less_or_equal_than_max_node_weight; - if ( ( !less_or_equal_than_max_node_weight || fixed_vertex_contraction_failed ) && - nodeIsEnabled(v) && _contraction_tree.parent(v) == u ) { - _contraction_tree.unregisterContraction(u, v, - kInvalidHypernode, kInvalidHypernode, true /* failed */); + contraction_partner_valid && less_or_equal_than_max_node_weight; + if((!less_or_equal_than_max_node_weight || fixed_vertex_contraction_failed) && + nodeIsEnabled(v) && _contraction_tree.parent(v) == u) + { + _contraction_tree.unregisterContraction(u, v, kInvalidHypernode, kInvalidHypernode, + true /* failed */); res = fixed_vertex_contraction_failed ? - ContractionResult::INVALID_FIXED_VERTEX_CONTRACTION : - ContractionResult::WEIGHT_LIMIT_REACHED; + ContractionResult::INVALID_FIXED_VERTEX_CONTRACTION : + ContractionResult::WEIGHT_LIMIT_REACHED; } releaseHypernode(u); releaseHypernode(v); @@ -184,62 +205,68 @@ DynamicGraph::ContractionResult DynamicGraph::contract(const HypernodeID u, } } - /** - * Uncontracts a batch of contractions in parallel. The batches must be uncontracted exactly - * in the order computed by the function createBatchUncontractionHierarchy(...). - * The two uncontraction functions are required by the partitioned graph to update - * gain cache values. - */ -void DynamicGraph::uncontract(const Batch& batch, - const MarkEdgeFunc& mark_edge, - const UncontractionFunction& case_one_func, - const UncontractionFunction& case_two_func) { +/** + * Uncontracts a batch of contractions in parallel. The batches must be uncontracted + * exactly in the order computed by the function createBatchUncontractionHierarchy(...). + * The two uncontraction functions are required by the partitioned graph to update + * gain cache values. + */ +void DynamicGraph::uncontract(const Batch &batch, const MarkEdgeFunc &mark_edge, + const UncontractionFunction &case_one_func, + const UncontractionFunction &case_two_func) +{ ASSERT(batch.size() > UL(0)); - ASSERT([&] { - const HypernodeID expected_batch_index = hypernode(batch[0].v).batchIndex(); - for ( const Memento& memento : batch ) { - if ( hypernode(memento.v).batchIndex() != expected_batch_index ) { - LOG << "Batch contains uncontraction from different batches." - << "Hypernode" << memento.v << "with version" << hypernode(memento.v).batchIndex() - << "but expected is" << expected_batch_index; - return false; - } - if ( _contraction_tree.version(memento.v) != _version ) { - LOG << "Batch contains uncontraction from a different version." - << "Hypernode" << memento.v << "with version" << _contraction_tree.version(memento.v) - << "but expected is" << _version; - return false; - } - } - return true; - }(), "Batch contains uncontractions from different batches or from a different hypergraph version"); + ASSERT( + [&] { + const HypernodeID expected_batch_index = hypernode(batch[0].v).batchIndex(); + for(const Memento &memento : batch) + { + if(hypernode(memento.v).batchIndex() != expected_batch_index) + { + LOG << "Batch contains uncontraction from different batches." + << "Hypernode" << memento.v << "with version" + << hypernode(memento.v).batchIndex() << "but expected is" + << expected_batch_index; + return false; + } + if(_contraction_tree.version(memento.v) != _version) + { + LOG << "Batch contains uncontraction from a different version." + << "Hypernode" << memento.v << "with version" + << _contraction_tree.version(memento.v) << "but expected is" << _version; + return false; + } + } + return true; + }(), + "Batch contains uncontractions from different batches or from a different " + "hypergraph version"); tbb::parallel_for(UL(0), batch.size(), [&](const size_t i) { - const Memento& memento = batch[i]; + const Memento &memento = batch[i]; ASSERT(!hypernode(memento.u).isDisabled(), "Hypernode" << memento.u << "is disabled"); - ASSERT(hypernode(memento.v).isDisabled(), "Hypernode" << memento.v << "is not invalid"); + ASSERT(hypernode(memento.v).isDisabled(), + "Hypernode" << memento.v << "is not invalid"); // Restore incident net list of u and v - _adjacency_array.uncontract(memento.u, memento.v, mark_edge, - [&](const HyperedgeID e) { - case_one_func(memento.u, memento.v, e); - }, [&](const HyperedgeID e) { - case_two_func(memento.u, memento.v, e); - }, [&](const HypernodeID u) { - acquireHypernode(u); - }, [&](const HypernodeID u) { - releaseHypernode(u); - }); + _adjacency_array.uncontract( + memento.u, memento.v, mark_edge, + [&](const HyperedgeID e) { case_one_func(memento.u, memento.v, e); }, + [&](const HyperedgeID e) { case_two_func(memento.u, memento.v, e); }, + [&](const HypernodeID u) { acquireHypernode(u); }, + [&](const HypernodeID u) { releaseHypernode(u); }); acquireHypernode(memento.u); // Restore hypernode v which includes enabling it and subtract its weight // from its representative hypernode(memento.v).enable(); - hypernode(memento.u).setWeight(hypernode(memento.u).weight() - hypernode(memento.v).weight()); + hypernode(memento.u).setWeight(hypernode(memento.u).weight() - + hypernode(memento.v).weight()); releaseHypernode(memento.u); // Revert contraction in fixed vertex support - if ( hasFixedVertices() ) { + if(hasFixedVertices()) + { _fixed_vertices.uncontract(memento.u, memento.v); } }); @@ -252,9 +279,12 @@ void DynamicGraph::uncontract(const Batch& batch, * single-pin and parallel net detection. Once we process all batches of a versioned * batch vector, we have to restore all previously removed single-pin and parallel nets * in order to uncontract the next batch vector. We create for each version of the - * hypergraph a seperate batch uncontraction hierarchy (see createBatchUncontractionHierarchyOfVersion(...)) + * hypergraph a seperate batch uncontraction hierarchy (see + * createBatchUncontractionHierarchyOfVersion(...)) */ -VersionedBatchVector DynamicGraph::createBatchUncontractionHierarchy(const size_t batch_size) { +VersionedBatchVector +DynamicGraph::createBatchUncontractionHierarchy(const size_t batch_size) +{ const size_t num_versions = _version + 1; // Finalizes the contraction tree such that it is traversable in a top-down fashion // and contains subtree size for each tree node @@ -263,12 +293,15 @@ VersionedBatchVector DynamicGraph::createBatchUncontractionHierarchy(const size_ VersionedBatchVector versioned_batches(num_versions); parallel::scalable_vector batch_sizes_prefix_sum(num_versions, 0); BatchIndexAssigner batch_index_assigner(numNodes(), batch_size); - for ( size_t version = 0; version < num_versions; ++version ) { + for(size_t version = 0; version < num_versions; ++version) + { versioned_batches[version] = - _contraction_tree.createBatchUncontractionHierarchyForVersion(batch_index_assigner, version); - if ( version > 0 ) { + _contraction_tree.createBatchUncontractionHierarchyForVersion( + batch_index_assigner, version); + if(version > 0) + { batch_sizes_prefix_sum[version] = - batch_sizes_prefix_sum[version - 1] + versioned_batches[version - 1].size(); + batch_sizes_prefix_sum[version - 1] + versioned_batches[version - 1].size(); } batch_index_assigner.reset(versioned_batches[version].size()); } @@ -281,22 +314,28 @@ VersionedBatchVector DynamicGraph::createBatchUncontractionHierarchy(const size_ * of a set of identical nets is aggregated in one representative hyperedge * and single-pin hyperedges are removed. Returns a vector of removed hyperedges. */ -parallel::scalable_vector DynamicGraph::removeSinglePinAndParallelHyperedges() { +parallel::scalable_vector +DynamicGraph::removeSinglePinAndParallelHyperedges() +{ ++_version; return _adjacency_array.removeSinglePinAndParallelEdges(); } /** - * Restores a previously removed set of singple-pin and parallel hyperedges. Note, that hes_to_restore - * must be exactly the same and given in the reverse order as returned by removeSinglePinAndParallelNets(...). + * Restores a previously removed set of singple-pin and parallel hyperedges. Note, that + * hes_to_restore must be exactly the same and given in the reverse order as returned by + * removeSinglePinAndParallelNets(...). */ -void DynamicGraph::restoreSinglePinAndParallelNets(const parallel::scalable_vector& hes_to_restore) { +void DynamicGraph::restoreSinglePinAndParallelNets( + const parallel::scalable_vector &hes_to_restore) +{ _adjacency_array.restoreSinglePinAndParallelEdges(hes_to_restore); --_version; } // ! Copy dynamic hypergraph in parallel -DynamicGraph DynamicGraph::copy(parallel_tag_t) const { +DynamicGraph DynamicGraph::copy(parallel_tag_t) const +{ DynamicGraph hypergraph; hypergraph._num_removed_nodes = _num_removed_nodes; @@ -306,28 +345,29 @@ DynamicGraph DynamicGraph::copy(parallel_tag_t) const { hypergraph._version = _version; hypergraph._contraction_index.store(_contraction_index.load()); - tbb::parallel_invoke([&] { - hypergraph._nodes.resize(_nodes.size()); - memcpy(hypergraph._nodes.data(), _nodes.data(), - sizeof(Node) * _nodes.size()); - }, [&] { - hypergraph._adjacency_array = _adjacency_array.copy(parallel_tag_t()); - }, [&] { - hypergraph._acquired_nodes.resize(_acquired_nodes.size()); - tbb::parallel_for(ID(0), numNodes(), [&](const HypernodeID& hn) { - hypergraph._acquired_nodes[hn] = _acquired_nodes[hn]; - }); - }, [&] { - hypergraph._contraction_tree = _contraction_tree.copy(parallel_tag_t()); - }, [&] { - hypergraph._fixed_vertices = _fixed_vertices.copy(); - hypergraph._fixed_vertices.setHypergraph(&hypergraph); - }); + tbb::parallel_invoke( + [&] { + hypergraph._nodes.resize(_nodes.size()); + memcpy(hypergraph._nodes.data(), _nodes.data(), sizeof(Node) * _nodes.size()); + }, + [&] { hypergraph._adjacency_array = _adjacency_array.copy(parallel_tag_t()); }, + [&] { + hypergraph._acquired_nodes.resize(_acquired_nodes.size()); + tbb::parallel_for(ID(0), numNodes(), [&](const HypernodeID &hn) { + hypergraph._acquired_nodes[hn] = _acquired_nodes[hn]; + }); + }, + [&] { hypergraph._contraction_tree = _contraction_tree.copy(parallel_tag_t()); }, + [&] { + hypergraph._fixed_vertices = _fixed_vertices.copy(); + hypergraph._fixed_vertices.setHypergraph(&hypergraph); + }); return hypergraph; } // ! Copy dynamic hypergraph sequential -DynamicGraph DynamicGraph::copy() const { +DynamicGraph DynamicGraph::copy() const +{ DynamicGraph hypergraph; hypergraph._num_removed_nodes = _num_removed_nodes; @@ -338,11 +378,11 @@ DynamicGraph DynamicGraph::copy() const { hypergraph._contraction_index.store(_contraction_index.load()); hypergraph._nodes.resize(_nodes.size()); - memcpy(hypergraph._nodes.data(), _nodes.data(), - sizeof(Node) * _nodes.size()); - hypergraph._adjacency_array = _adjacency_array.copy(parallel_tag_t()); + memcpy(hypergraph._nodes.data(), _nodes.data(), sizeof(Node) * _nodes.size()); + hypergraph._adjacency_array = _adjacency_array.copy(parallel_tag_t()); hypergraph._acquired_nodes.resize(numNodes()); - for ( HypernodeID hn = 0; hn < numNodes(); ++hn ) { + for(HypernodeID hn = 0; hn < numNodes(); ++hn) + { hypergraph._acquired_nodes[hn] = _acquired_nodes[hn]; } hypergraph._contraction_tree = _contraction_tree.copy(); @@ -352,54 +392,67 @@ DynamicGraph DynamicGraph::copy() const { return hypergraph; } -void DynamicGraph::memoryConsumption(utils::MemoryTreeNode* parent) const { +void DynamicGraph::memoryConsumption(utils::MemoryTreeNode *parent) const +{ ASSERT(parent); parent->addChild("Hypernodes", sizeof(Node) * _nodes.size()); parent->addChild("Incident Nets", _adjacency_array.size_in_bytes()); parent->addChild("Hypernode Ownership Vector", sizeof(bool) * _acquired_nodes.size()); - utils::MemoryTreeNode* contraction_tree_node = parent->addChild("Contraction Tree"); + utils::MemoryTreeNode *contraction_tree_node = parent->addChild("Contraction Tree"); _contraction_tree.memoryConsumption(contraction_tree_node); - if ( hasFixedVertices() ) { + if(hasFixedVertices()) + { parent->addChild("Fixed Vertex Support", _fixed_vertices.size_in_bytes()); } } // ! Only for testing -bool DynamicGraph::verifyIncidenceArrayAndIncidentNets() { +bool DynamicGraph::verifyIncidenceArrayAndIncidentNets() +{ bool success = true; - tbb::parallel_invoke([&] { - doParallelForAllNodes([&](const HypernodeID& hn) { - for ( const HyperedgeID& he : incidentEdges(hn) ) { - if (edgeSource(he) != hn) { - LOG << "Edge" << he << "has source" << edgeSource(he) << "but should be" << hn; - success = false; - } - const HypernodeID back_target = edge(edge(he).back_edge).target; - if (back_target != hn) { - LOG << "Backedge" << edge(he).back_edge << "(of edge" << he - << ") has target" << back_target << "but should be" << hn; - success = false; - } - } - }); - }, [&] { - doParallelForAllEdges([&](const HyperedgeID& he) { - bool found = false; - for ( const HyperedgeID& e : incidentEdges(edgeSource(he)) ) { - if ( e == he ) { - found = true; - break; - } - } - if ( !found ) { - LOG << "Edge" << he << "not found in incident nets of vertex" << edgeSource(he); - success = false; - } - }); - }); + tbb::parallel_invoke( + [&] { + doParallelForAllNodes([&](const HypernodeID &hn) { + for(const HyperedgeID &he : incidentEdges(hn)) + { + if(edgeSource(he) != hn) + { + LOG << "Edge" << he << "has source" << edgeSource(he) << "but should be" + << hn; + success = false; + } + const HypernodeID back_target = edge(edge(he).back_edge).target; + if(back_target != hn) + { + LOG << "Backedge" << edge(he).back_edge << "(of edge" << he + << ") has target" << back_target << "but should be" << hn; + success = false; + } + } + }); + }, + [&] { + doParallelForAllEdges([&](const HyperedgeID &he) { + bool found = false; + for(const HyperedgeID &e : incidentEdges(edgeSource(he))) + { + if(e == he) + { + found = true; + break; + } + } + if(!found) + { + LOG << "Edge" << he << "not found in incident nets of vertex" + << edgeSource(he); + success = false; + } + }); + }); return success; } diff --git a/mt-kahypar/datastructures/dynamic_graph.h b/mt-kahypar/datastructures/dynamic_graph.h index 90e867471..1d4559fa8 100644 --- a/mt-kahypar/datastructures/dynamic_graph.h +++ b/mt-kahypar/datastructures/dynamic_graph.h @@ -35,18 +35,18 @@ #include "include/libmtkahypartypes.h" -#include "kahypar-resources/meta/mandatory.h" #include "kahypar-resources/datastructure/fast_reset_flag_array.h" +#include "kahypar-resources/meta/mandatory.h" #include "kahypar-resources/utils/math.h" -#include "mt-kahypar/datastructures/hypergraph_common.h" -#include "mt-kahypar/datastructures/fixed_vertex_support.h" -#include "mt-kahypar/datastructures/dynamic_adjacency_array.h" #include "mt-kahypar/datastructures/contraction_tree.h" +#include "mt-kahypar/datastructures/dynamic_adjacency_array.h" +#include "mt-kahypar/datastructures/fixed_vertex_support.h" +#include "mt-kahypar/datastructures/hypergraph_common.h" #include "mt-kahypar/datastructures/thread_safe_fast_reset_flag_array.h" #include "mt-kahypar/parallel/stl/scalable_vector.h" -#include "mt-kahypar/utils/memory_tree.h" #include "mt-kahypar/utils/exception.h" +#include "mt-kahypar/utils/memory_tree.h" namespace mt_kahypar { namespace ds { @@ -56,7 +56,8 @@ class DynamicGraphFactory; template class PartitionedGraph; -class DynamicGraph { +class DynamicGraph +{ static constexpr bool debug = false; static constexpr bool enable_heavy_assert = false; @@ -66,84 +67,81 @@ class DynamicGraph { // ! In order to update gain cache correctly for an uncontraction (u,v), // ! the partitioned hypergraph has to know wheter v replaces u in a hyperedge - // ! or both a incident to that hyperedge after uncontraction. Therefore, the partitioned - // ! hypergraph passes two lambda functions to the batch uncontraction function, one for - // ! each case. - using UncontractionFunction = std::function; - using MarkEdgeFunc = std::function; - #define NOOP_BATCH_FUNC [] (const HypernodeID, const HypernodeID, const HyperedgeID) { } + // ! or both a incident to that hyperedge after uncontraction. Therefore, the + // partitioned ! hypergraph passes two lambda functions to the batch uncontraction + // function, one for ! each case. + using UncontractionFunction = + std::function; + using MarkEdgeFunc = std::function; +#define NOOP_BATCH_FUNC [](const HypernodeID, const HypernodeID, const HyperedgeID) {} // Represents a uncontraction that is assigned to a certain batch // and within that batch to a certain position. - struct BatchAssignment { + struct BatchAssignment + { HypernodeID u; HypernodeID v; size_t batch_index; size_t batch_pos; }; - private: +private: /** * Represents a hypernode of the hypergraph and contains all information * associated with a vertex. */ - class Node { - public: + class Node + { + public: using IDType = HypernodeID; Node() : - _weight(1), - _community_id(0), - _batch_idx(std::numeric_limits::max()), - _valid(false) { } + _weight(1), _community_id(0), _batch_idx(std::numeric_limits::max()), + _valid(false) + { + } Node(const bool valid) : - _weight(1), - _community_id(0), - _batch_idx(std::numeric_limits::max()), - _valid(valid) { } - - bool isDisabled() const { - return _valid == false; + _weight(1), _community_id(0), _batch_idx(std::numeric_limits::max()), + _valid(valid) + { } - void enable() { + bool isDisabled() const { return _valid == false; } + + void enable() + { ASSERT(isDisabled()); _valid = true; } - void disable() { + void disable() + { ASSERT(!isDisabled()); _valid = false; } - HyperedgeWeight weight() const { - return _weight; - } + HyperedgeWeight weight() const { return _weight; } - void setWeight(HyperedgeWeight weight) { + void setWeight(HyperedgeWeight weight) + { ASSERT(!isDisabled()); _weight = weight; } - PartitionID communityID() const { - return _community_id; - } + PartitionID communityID() const { return _community_id; } - void setCommunityID(const PartitionID community_id) { + void setCommunityID(const PartitionID community_id) + { ASSERT(!isDisabled()); _community_id = community_id; } - HypernodeID batchIndex() const { - return _batch_idx; - } + HypernodeID batchIndex() const { return _batch_idx; } - void setBatchIndex(const HypernodeID batch_idx) { - _batch_idx = batch_idx; - } + void setBatchIndex(const HypernodeID batch_idx) { _batch_idx = batch_idx; } - private: + private: // ! Hypernode weight HypernodeWeight _weight; // ! Community id @@ -171,13 +169,14 @@ class DynamicGraph { * hypernodes/hyperedges. */ template - class HypergraphElementIterator { - public: + class HypergraphElementIterator + { + public: using IDType = typename ElementType::IDType; using iterator_category = std::forward_iterator_tag; using value_type = IDType; - using reference = IDType&; - using pointer = const IDType*; + using reference = IDType &; + using pointer = const IDType *; using difference_type = std::ptrdiff_t; /*! @@ -191,52 +190,51 @@ class DynamicGraph { * \param id The index of the element the pointer points to * \param max_id The maximum index allowed */ - HypergraphElementIterator(const ElementType* start_element, IDType id, IDType max_id) : - _id(id), - _max_id(max_id), - _element(start_element) { - if (_id != _max_id && _element->isDisabled()) { - operator++ (); + HypergraphElementIterator(const ElementType *start_element, IDType id, + IDType max_id) : + _id(id), + _max_id(max_id), _element(start_element) + { + if(_id != _max_id && _element->isDisabled()) + { + operator++(); } } // ! Returns the id of the element the iterator currently points to. - IDType operator* () const { - return _id; - } + IDType operator*() const { return _id; } // ! Prefix increment. The iterator advances to the next valid element. - HypergraphElementIterator & operator++ () { + HypergraphElementIterator &operator++() + { ASSERT(_id < _max_id); - do { + do + { ++_id; ++_element; - } while (_id < _max_id && _element->isDisabled()); + } while(_id < _max_id && _element->isDisabled()); return *this; } // ! Postfix increment. The iterator advances to the next valid element. - HypergraphElementIterator operator++ (int) { + HypergraphElementIterator operator++(int) + { HypergraphElementIterator copy = *this; - operator++ (); + operator++(); return copy; } - bool operator!= (const HypergraphElementIterator& rhs) { - return _id != rhs._id; - } + bool operator!=(const HypergraphElementIterator &rhs) { return _id != rhs._id; } - bool operator== (const HypergraphElementIterator& rhs) { - return _id == rhs._id; - } + bool operator==(const HypergraphElementIterator &rhs) { return _id == rhs._id; } - private: + private: // Handle to the HypergraphElement the iterator currently points to IDType _id = 0; // Maximum allowed index IDType _max_id = 0; // HypergraphElement the iterator currently points to - const ElementType* _element = nullptr; + const ElementType *_element = nullptr; }; /*! @@ -244,35 +242,38 @@ class DynamicGraph { * * Note that because this is a graph, each edge has exactly two pins. */ - class PinIterator { - public: + class PinIterator + { + public: using iterator_category = std::forward_iterator_tag; using value_type = HypernodeID; - using reference = HypernodeID&; - using pointer = const HypernodeID*; + using reference = HypernodeID &; + using pointer = const HypernodeID *; using difference_type = std::ptrdiff_t; /*! * Constructs a pin iterator based on the IDs of the two nodes */ PinIterator(HypernodeID source, HypernodeID target, unsigned int iteration_count) : - _source(source), - _target(target), - _iteration_count(iteration_count) { + _source(source), _target(target), _iteration_count(iteration_count) + { // ASSERT(target != kInvalidHypernode); -- doesn't hold for parallel contractions } // ! Returns the id of the element the iterator currently points to. - HypernodeID operator* () const { + HypernodeID operator*() const + { ASSERT(_iteration_count < 2); return _iteration_count == 0 ? _source : _target; } // ! Prefix increment. The iterator advances to the next valid element. - PinIterator & operator++ () { + PinIterator &operator++() + { ASSERT(_iteration_count < 2); ++_iteration_count; - if (_iteration_count == 1 && (_source == _target || _target == kInvalidHypernode)) { + if(_iteration_count == 1 && (_source == _target || _target == kInvalidHypernode)) + { // the edge is a single pin edge ++_iteration_count; } @@ -280,23 +281,26 @@ class DynamicGraph { } // ! Postfix increment. The iterator advances to the next valid element. - PinIterator operator++ (int) { + PinIterator operator++(int) + { PinIterator copy = *this; - operator++ (); + operator++(); return copy; } - bool operator!= (const PinIterator& rhs) { - return _iteration_count != rhs._iteration_count || - _source != rhs._source || _target != rhs._target; + bool operator!=(const PinIterator &rhs) + { + return _iteration_count != rhs._iteration_count || _source != rhs._source || + _target != rhs._target; } - bool operator== (const PinIterator& rhs) { - return _iteration_count == rhs._iteration_count && - _source == rhs._source && _target == rhs._target; + bool operator==(const PinIterator &rhs) + { + return _iteration_count == rhs._iteration_count && _source == rhs._source && + _target == rhs._target; } - private: + private: // source node of the edge HypernodeID _source = 0; // target node of the edge @@ -305,18 +309,21 @@ class DynamicGraph { unsigned int _iteration_count = 0; }; - enum class ContractionResult : uint8_t { + enum class ContractionResult : uint8_t + { CONTRACTED = 0, PENDING_CONTRACTIONS = 1, WEIGHT_LIMIT_REACHED = 2, INVALID_FIXED_VERTEX_CONTRACTION = 3 }; - using OwnershipVector = parallel::scalable_vector>; - using ThreadLocalHyperedgeVector = tbb::enumerable_thread_specific>; + using OwnershipVector = + parallel::scalable_vector >; + using ThreadLocalHyperedgeVector = + tbb::enumerable_thread_specific >; using Edge = DynamicAdjacencyArray::Edge; - public: +public: static constexpr bool is_graph = true; static constexpr bool is_static_hypergraph = false; static constexpr bool is_partitioned = false; @@ -339,37 +346,30 @@ class DynamicGraph { using ParallelHyperedge = DynamicAdjacencyArray::RemovedEdge; explicit DynamicGraph() : - _num_removed_nodes(0), - _removed_degree_zero_hn_weight(0), - _num_edges(0), - _total_weight(0), - _version(0), - _contraction_index(0), - _nodes(), - _contraction_tree(), - _adjacency_array(), - _acquired_nodes(), - _fixed_vertices() { } - - DynamicGraph(const DynamicGraph&) = delete; - DynamicGraph & operator= (const DynamicGraph &) = delete; - - DynamicGraph(DynamicGraph&& other) : - _num_removed_nodes(other._num_removed_nodes), - _removed_degree_zero_hn_weight(other._removed_degree_zero_hn_weight), - _num_edges(other._num_edges), - _total_weight(other._total_weight), - _version(other._version), - _contraction_index(0), - _nodes(std::move(other._nodes)), - _contraction_tree(std::move(other._contraction_tree)), - _adjacency_array(std::move(other._adjacency_array)), - _acquired_nodes(std::move(other._acquired_nodes)), - _fixed_vertices(std::move(other._fixed_vertices)) { + _num_removed_nodes(0), _removed_degree_zero_hn_weight(0), _num_edges(0), + _total_weight(0), _version(0), _contraction_index(0), _nodes(), _contraction_tree(), + _adjacency_array(), _acquired_nodes(), _fixed_vertices() + { + } + + DynamicGraph(const DynamicGraph &) = delete; + DynamicGraph &operator=(const DynamicGraph &) = delete; + + DynamicGraph(DynamicGraph &&other) : + _num_removed_nodes(other._num_removed_nodes), + _removed_degree_zero_hn_weight(other._removed_degree_zero_hn_weight), + _num_edges(other._num_edges), _total_weight(other._total_weight), + _version(other._version), _contraction_index(0), _nodes(std::move(other._nodes)), + _contraction_tree(std::move(other._contraction_tree)), + _adjacency_array(std::move(other._adjacency_array)), + _acquired_nodes(std::move(other._acquired_nodes)), + _fixed_vertices(std::move(other._fixed_vertices)) + { _fixed_vertices.setHypergraph(this); } - DynamicGraph & operator= (DynamicGraph&& other) { + DynamicGraph &operator=(DynamicGraph &&other) + { _num_removed_nodes = other._num_removed_nodes; _num_edges = other._num_edges; _removed_degree_zero_hn_weight = other._removed_degree_zero_hn_weight; @@ -385,57 +385,43 @@ class DynamicGraph { return *this; } - ~DynamicGraph() { - freeInternalData(); - } + ~DynamicGraph() { freeInternalData(); } // ####################### General Hypergraph Stats ####################### // ! Initial number of hypernodes - HypernodeID initialNumNodes() const { - return numNodes(); - } + HypernodeID initialNumNodes() const { return numNodes(); } // ! Number of removed hypernodes - HypernodeID numRemovedHypernodes() const { - return _num_removed_nodes; - } + HypernodeID numRemovedHypernodes() const { return _num_removed_nodes; } // ! Weight of removed degree zero vertics - HypernodeWeight weightOfRemovedDegreeZeroVertices() const { + HypernodeWeight weightOfRemovedDegreeZeroVertices() const + { return _removed_degree_zero_hn_weight; } // ! Initial number of hyperedges - HyperedgeID initialNumEdges() const { - return _num_edges; - } + HyperedgeID initialNumEdges() const { return _num_edges; } // ! Number of removed hyperedges - HyperedgeID numRemovedHyperedges() const { - return 0; - } + HyperedgeID numRemovedHyperedges() const { return 0; } // ! Set the number of removed hyperedges - void setNumRemovedHyperedges(const HyperedgeID num_removed_hyperedges) { + void setNumRemovedHyperedges(const HyperedgeID num_removed_hyperedges) + { ASSERT(num_removed_hyperedges == 0); unused(num_removed_hyperedges); } // ! Initial number of pins - HypernodeID initialNumPins() const { - return _num_edges; - } + HypernodeID initialNumPins() const { return _num_edges; } // ! Initial sum of the degree of all vertices - HypernodeID initialTotalVertexDegree() const { - return _num_edges; - } + HypernodeID initialTotalVertexDegree() const { return _num_edges; } // ! Total weight of hypergraph - HypernodeWeight totalWeight() const { - return _total_weight; - } + HypernodeWeight totalWeight() const { return _total_weight; } // ! Recomputes the total weight of the hypergraph (parallel) void updateTotalWeight(parallel_tag_t); @@ -447,10 +433,12 @@ class DynamicGraph { // ! Iterates in parallel over all active nodes and calls function f // ! for each vertex - template - void doParallelForAllNodes(const F& f) const { - tbb::parallel_for(ID(0), numNodes(), [&](const HypernodeID& hn) { - if ( nodeIsEnabled(hn) ) { + template + void doParallelForAllNodes(const F &f) const + { + tbb::parallel_for(ID(0), numNodes(), [&](const HypernodeID &hn) { + if(nodeIsEnabled(hn)) + { f(hn); } }); @@ -458,89 +446,90 @@ class DynamicGraph { // ! Iterates in parallel over all active edges and calls function f // ! for each net - template - void doParallelForAllEdges(const F& f) const { + template + void doParallelForAllEdges(const F &f) const + { _adjacency_array.doParallelForAllEdges(f); } // ! Returns a range of the active nodes of the hypergraph - IteratorRange nodes() const { + IteratorRange nodes() const + { return IteratorRange( - HypernodeIterator(_nodes.data(), ID(0), numNodes()), - HypernodeIterator(_nodes.data() + numNodes(), numNodes(), numNodes())); + HypernodeIterator(_nodes.data(), ID(0), numNodes()), + HypernodeIterator(_nodes.data() + numNodes(), numNodes(), numNodes())); } // ! Returns a range of the active edges of the hypergraph - IteratorRange edges() const { - return _adjacency_array.edges(); - } + IteratorRange edges() const { return _adjacency_array.edges(); } // ! Returns a range to loop over the incident edges of node u. - IteratorRange incidentEdges(const HypernodeID u) const { + IteratorRange incidentEdges(const HypernodeID u) const + { ASSERT(u < numNodes(), "Hypernode" << u << "does not exist"); return _adjacency_array.incidentEdges(u); } // ! Returns a range to loop over the pins of hyperedge e. - IteratorRange pins(const HyperedgeID id) const { - const Edge& e = edge(id); + IteratorRange pins(const HyperedgeID id) const + { + const Edge &e = edge(id); const HypernodeID source = e.source; const HypernodeID target = e.target; - return IteratorRange( - IncidenceIterator(source, target, 0), - IncidenceIterator(source, target, 2)); + return IteratorRange(IncidenceIterator(source, target, 0), + IncidenceIterator(source, target, 2)); } // ####################### Hypernode Information ####################### // ! Weight of a vertex - HypernodeWeight nodeWeight(const HypernodeID u) const { + HypernodeWeight nodeWeight(const HypernodeID u) const + { ASSERT(u < numNodes(), "Hypernode" << u << "does not exist"); return hypernode(u).weight(); } // ! Sets the weight of a vertex - void setNodeWeight(const HypernodeID u, const HypernodeWeight weight) { + void setNodeWeight(const HypernodeID u, const HypernodeWeight weight) + { ASSERT(!hypernode(u).isDisabled(), "Hypernode" << u << "is disabled"); return hypernode(u).setWeight(weight); } // ! Degree of a hypernode - HyperedgeID nodeDegree(const HypernodeID u) const { + HyperedgeID nodeDegree(const HypernodeID u) const + { ASSERT(u < numNodes(), "Hypernode" << u << "does not exist"); return _adjacency_array.nodeDegree(u); } // ! Returns, whether a hypernode is enabled or not - bool nodeIsEnabled(const HypernodeID u) const { - return !hypernode(u).isDisabled(); - } + bool nodeIsEnabled(const HypernodeID u) const { return !hypernode(u).isDisabled(); } // ! Enables a hypernode (must be disabled before) - void enableHypernode(const HypernodeID u) { - hypernode(u).enable(); - } + void enableHypernode(const HypernodeID u) { hypernode(u).enable(); } // ! Disables a hypernode (must be enabled before) - void disableHypernode(const HypernodeID u) { - hypernode(u).disable(); - } + void disableHypernode(const HypernodeID u) { hypernode(u).disable(); } // ! Removes a hypernode (must be enabled before) - void removeHypernode(const HypernodeID u) { + void removeHypernode(const HypernodeID u) + { hypernode(u).disable(); ++_num_removed_nodes; } // ! Removes a degree zero hypernode - void removeDegreeZeroHypernode(const HypernodeID u) { + void removeDegreeZeroHypernode(const HypernodeID u) + { ASSERT(nodeDegree(u) == 0); removeHypernode(u); _removed_degree_zero_hn_weight += nodeWeight(u); } // ! Restores a degree zero hypernode - void restoreDegreeZeroHypernode(const HypernodeID u) { + void restoreDegreeZeroHypernode(const HypernodeID u) + { hypernode(u).enable(); ASSERT(nodeDegree(u) == 0); _removed_degree_zero_hn_weight -= nodeWeight(u); @@ -549,76 +538,66 @@ class DynamicGraph { // ####################### Hyperedge Information ####################### // ! Accessor for hyperedge-related information - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE const Edge& edge(const HyperedgeID e) const { - const Edge& he = _adjacency_array.edge(e); + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE const Edge &edge(const HyperedgeID e) const + { + const Edge &he = _adjacency_array.edge(e); // ASSERT(he.isValid()); -- doesn't hold for parallel contractions return he; } // ! To avoid code duplication we implement non-const version in terms of const version - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE Hyperedge& edge(const HyperedgeID e) { - Hyperedge& he = _adjacency_array.edge(e); + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE Hyperedge &edge(const HyperedgeID e) + { + Hyperedge &he = _adjacency_array.edge(e); // ASSERT(he.isValid()); -- doesn't hold for parallel contractions return he; } // ! Weight of an edge - HypernodeWeight edgeWeight(const HyperedgeID e) const { - return edge(e).weight; - } + HypernodeWeight edgeWeight(const HyperedgeID e) const { return edge(e).weight; } // ! Unique id of a hyperedge - HyperedgeID uniqueEdgeID(const HyperedgeID e) const { + HyperedgeID uniqueEdgeID(const HyperedgeID e) const + { return _adjacency_array.uniqueEdgeID(e); } // ! Range of unique id edge ids - HyperedgeID maxUniqueID() const { - return initialNumEdges(); - } + HyperedgeID maxUniqueID() const { return initialNumEdges(); } // ! Sets the weight of a hyperedge - void setEdgeWeight(const HyperedgeID e, const HyperedgeWeight weight) { + void setEdgeWeight(const HyperedgeID e, const HyperedgeWeight weight) + { edge(e).weight = weight; } // ! Number of pins of a hyperedge - HypernodeID edgeSize(const HyperedgeID e) const { - return isSinglePin(e) ? 1 : 2; - } + HypernodeID edgeSize(const HyperedgeID e) const { return isSinglePin(e) ? 1 : 2; } // ! Maximum size of a hyperedge - HypernodeID maxEdgeSize() const { - return 2; - } + HypernodeID maxEdgeSize() const { return 2; } // ! Returns, whether a hyperedge is enabled or not - bool edgeIsEnabled(const HyperedgeID) const { - return true; - } + bool edgeIsEnabled(const HyperedgeID) const { return true; } // ! Enables a hyperedge (must be disabled before) - void enableHyperedge(const HyperedgeID) { + void enableHyperedge(const HyperedgeID) + { throw NonSupportedOperationException( - "enableHyperedge() is not supported in dynamic graph"); + "enableHyperedge() is not supported in dynamic graph"); } - HyperedgeID edgeSource(const HyperedgeID e) const { - return edge(e).source; - } + HyperedgeID edgeSource(const HyperedgeID e) const { return edge(e).source; } - HyperedgeID edgeTarget(const HyperedgeID e) const { - return edge(e).target; - } + HyperedgeID edgeTarget(const HyperedgeID e) const { return edge(e).target; } - bool isSinglePin(const HyperedgeID e) const { - return edgeSource(e) == edgeTarget(e); - } + bool isSinglePin(const HyperedgeID e) const { return edgeSource(e) == edgeTarget(e); } // ####################### Community Information ####################### // ! Community id which hypernode u is assigned to - PartitionID communityID(const HypernodeID u) const { + PartitionID communityID(const HypernodeID u) const + { ASSERT(u < numNodes(), "Hypernode" << u << "does not exist"); return hypernode(u).communityID(); } @@ -626,96 +605,102 @@ class DynamicGraph { // ! Assign a community to a hypernode // ! Note, in order to use all community-related functions, initializeCommunities() // ! have to be called after assigning to each vertex a community id - void setCommunityID(const HypernodeID u, const PartitionID community_id) { + void setCommunityID(const HypernodeID u, const PartitionID community_id) + { ASSERT(!hypernode(u).isDisabled(), "Hypernode" << u << "is disabled"); return hypernode(u).setCommunityID(community_id); } // ! Reset internal community information - void setCommunityIDs(const parallel::scalable_vector& community_ids) { + void setCommunityIDs(const parallel::scalable_vector &community_ids) + { ASSERT(community_ids.size() == UI64(numNodes())); - doParallelForAllNodes([&](const HypernodeID& hn) { - hypernode(hn).setCommunityID(community_ids[hn]); - }); + doParallelForAllNodes( + [&](const HypernodeID &hn) { hypernode(hn).setCommunityID(community_ids[hn]); }); } // ####################### Fixed Vertex Support ####################### - void addFixedVertexSupport(FixedVertexSupport&& fixed_vertices) { + void addFixedVertexSupport(FixedVertexSupport &&fixed_vertices) + { _fixed_vertices = std::move(fixed_vertices); _fixed_vertices.setHypergraph(this); } - bool hasFixedVertices() const { - return _fixed_vertices.hasFixedVertices(); - } + bool hasFixedVertices() const { return _fixed_vertices.hasFixedVertices(); } - HypernodeWeight totalFixedVertexWeight() const { + HypernodeWeight totalFixedVertexWeight() const + { return _fixed_vertices.totalFixedVertexWeight(); } - HypernodeWeight fixedVertexBlockWeight(const PartitionID block) const { + HypernodeWeight fixedVertexBlockWeight(const PartitionID block) const + { return _fixed_vertices.fixedVertexBlockWeight(block); } - bool isFixed(const HypernodeID hn) const { - return _fixed_vertices.isFixed(hn); - } + bool isFixed(const HypernodeID hn) const { return _fixed_vertices.isFixed(hn); } - PartitionID fixedVertexBlock(const HypernodeID hn) const { + PartitionID fixedVertexBlock(const HypernodeID hn) const + { return _fixed_vertices.fixedVertexBlock(hn); } - void setMaxFixedVertexBlockWeight(const std::vector max_block_weights) { + void setMaxFixedVertexBlockWeight(const std::vector max_block_weights) + { _fixed_vertices.setMaxBlockWeight(max_block_weights); } - const FixedVertexSupport& fixedVertexSupport() const { + const FixedVertexSupport &fixedVertexSupport() const + { return _fixed_vertices; } - FixedVertexSupport copyOfFixedVertexSupport() const { + FixedVertexSupport copyOfFixedVertexSupport() const + { return _fixed_vertices.copy(); } // ####################### Contract / Uncontract ####################### - DynamicGraph contract(parallel::scalable_vector&, bool deterministic = false) { + DynamicGraph contract(parallel::scalable_vector &, + bool deterministic = false) + { throw NonSupportedOperationException( - "contract(c, id) is not supported in dynamic graph"); + "contract(c, id) is not supported in dynamic graph"); return DynamicGraph(); } /**! * Registers a contraction in the hypergraph whereas vertex u is the representative - * of the contraction and v its contraction partner. Several threads can call this function - * in parallel. The function adds the contraction of u and v to a contraction tree that determines - * a parallel execution order and synchronization points for all running contractions. - * The contraction can be executed by calling function contract(v, max_node_weight). + * of the contraction and v its contraction partner. Several threads can call this + * function in parallel. The function adds the contraction of u and v to a contraction + * tree that determines a parallel execution order and synchronization points for all + * running contractions. The contraction can be executed by calling function contract(v, + * max_node_weight). */ bool registerContraction(const HypernodeID u, const HypernodeID v); /**! - * Contracts a previously registered contraction. Representative u of vertex v is looked up - * in the contraction tree and performed if there are no pending contractions in the subtree - * of v and the contractions respects the maximum allowed node weight. If (u,v) is the last - * pending contraction in the subtree of u then the function recursively contracts also - * u (if any contraction is registered). Therefore, function can return several contractions - * or also return an empty contraction vector. + * Contracts a previously registered contraction. Representative u of vertex v is looked + * up in the contraction tree and performed if there are no pending contractions in the + * subtree of v and the contractions respects the maximum allowed node weight. If (u,v) + * is the last pending contraction in the subtree of u then the function recursively + * contracts also u (if any contraction is registered). Therefore, function can return + * several contractions or also return an empty contraction vector. */ - size_t contract(const HypernodeID v, - const HypernodeWeight max_node_weight = std::numeric_limits::max()); + size_t contract(const HypernodeID v, const HypernodeWeight max_node_weight = + std::numeric_limits::max()); /** - * Uncontracts a batch of contractions in parallel. The batches must be uncontracted exactly - * in the order computed by the function createBatchUncontractionHierarchy(...). + * Uncontracts a batch of contractions in parallel. The batches must be uncontracted + * exactly in the order computed by the function createBatchUncontractionHierarchy(...). * The two uncontraction functions are required by the partitioned graph to update * gain cache values. */ - void uncontract(const Batch& batch, - const MarkEdgeFunc& mark_edge, - const UncontractionFunction& case_one_func = NOOP_BATCH_FUNC, - const UncontractionFunction& case_two_func = NOOP_BATCH_FUNC); + void uncontract(const Batch &batch, const MarkEdgeFunc &mark_edge, + const UncontractionFunction &case_one_func = NOOP_BATCH_FUNC, + const UncontractionFunction &case_two_func = NOOP_BATCH_FUNC); /** * Computes a batch uncontraction hierarchy. A batch is a vector of mementos @@ -724,14 +709,16 @@ class DynamicGraph { * single-pin and parallel net detection. Once we process all batches of a versioned * batch vector, we have to restore all previously removed single-pin and parallel nets * in order to uncontract the next batch vector. We create for each version of the - * hypergraph a seperate batch uncontraction hierarchy (see createBatchUncontractionHierarchyOfVersion(...)) + * hypergraph a seperate batch uncontraction hierarchy (see + * createBatchUncontractionHierarchyOfVersion(...)) */ VersionedBatchVector createBatchUncontractionHierarchy(const size_t batch_size); // ! Only for testing - VersionedBatchVector createBatchUncontractionHierarchy(ContractionTree&& tree, + VersionedBatchVector createBatchUncontractionHierarchy(ContractionTree &&tree, const size_t batch_size, - const size_t num_versions = 1) { + const size_t num_versions = 1) + { ASSERT(num_versions > 0); _version = num_versions - 1; _contraction_tree = std::move(tree); @@ -739,52 +726,55 @@ class DynamicGraph { } // ! Only for testing - HypernodeID contractionTree(const HypernodeID u) const { + HypernodeID contractionTree(const HypernodeID u) const + { ASSERT(!hypernode(u).isDisabled(), "Hypernode" << u << "is disabled"); return _contraction_tree.parent(u); } // ! Only for testing - HypernodeID pendingContractions(const HypernodeID u) const { + HypernodeID pendingContractions(const HypernodeID u) const + { ASSERT(!hypernode(u).isDisabled(), "Hypernode" << u << "is disabled"); return _contraction_tree.pendingContractions(u); } // ! Only for testing - void decrementPendingContractions(const HypernodeID u) { + void decrementPendingContractions(const HypernodeID u) + { ASSERT(!hypernode(u).isDisabled(), "Hypernode" << u << "is disabled"); _contraction_tree.decrementPendingContractions(u); } // ! Only for testing - void sortIncidentEdges() { - _adjacency_array.sortIncidentEdges(); - } + void sortIncidentEdges() { _adjacency_array.sortIncidentEdges(); } // ####################### Remove / Restore Hyperedges ####################### /*! - * (Not supported.) - */ - void removeEdge(const HyperedgeID) { - throw NonSupportedOperationException( - "removeEdge is not supported in dynamic graph"); + * (Not supported.) + */ + void removeEdge(const HyperedgeID) + { + throw NonSupportedOperationException("removeEdge is not supported in dynamic graph"); } /*! - * (Not supported.) - */ - void removeLargeEdge(const HyperedgeID) { + * (Not supported.) + */ + void removeLargeEdge(const HyperedgeID) + { throw NonSupportedOperationException( - "removeLargeEdge is not supported in dynamic graph"); + "removeLargeEdge is not supported in dynamic graph"); } /*! * (Not supported.) */ - void restoreLargeEdge(const HyperedgeID&) { + void restoreLargeEdge(const HyperedgeID &) + { throw NonSupportedOperationException( - "restoreLargeEdge is not supported in dynamic graph"); + "restoreLargeEdge is not supported in dynamic graph"); } /** @@ -794,10 +784,12 @@ class DynamicGraph { parallel::scalable_vector removeSinglePinAndParallelHyperedges(); /** - * Restores a previously removed set of singple-pin and parallel hyperedges. Note, that hes_to_restore - * must be exactly the same and given in the reverse order as returned by removeSinglePinAndParallelNets(...). + * Restores a previously removed set of singple-pin and parallel hyperedges. Note, that + * hes_to_restore must be exactly the same and given in the reverse order as returned by + * removeSinglePinAndParallelNets(...). */ - void restoreSinglePinAndParallelNets(const parallel::scalable_vector& hes_to_restore); + void restoreSinglePinAndParallelNets( + const parallel::scalable_vector &hes_to_restore); // ####################### Copy ####################### @@ -808,57 +800,62 @@ class DynamicGraph { DynamicGraph copy() const; // ! Reset internal data structure - void reset() { + void reset() + { _contraction_tree.reset(); _adjacency_array.reset(); _version = 0; } // ! Free internal data in parallel - void freeInternalData() { - _num_edges = 0; - } + void freeInternalData() { _num_edges = 0; } - void freeTmpContractionBuffer() { + void freeTmpContractionBuffer() + { throw NonSupportedOperationException( - "freeTmpContractionBuffer() is not supported in dynamic hypergraph"); + "freeTmpContractionBuffer() is not supported in dynamic hypergraph"); } - void memoryConsumption(utils::MemoryTreeNode* parent) const; + void memoryConsumption(utils::MemoryTreeNode *parent) const; // ! Only for testing bool verifyIncidenceArrayAndIncidentNets(); - private: +private: friend class DynamicGraphFactory; - template + template friend class CommunitySupport; template friend class PartitionedGraph; // ####################### Acquiring / Releasing Ownership ####################### - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE HypernodeID numNodes() const { + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE HypernodeID numNodes() const + { return _adjacency_array.numNodes(); } - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE void acquireHypernode(const HypernodeID u) { + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE void acquireHypernode(const HypernodeID u) + { ASSERT(u < numNodes(), "Hypernode" << u << "does not exist"); bool expected = false; bool desired = true; - while ( !_acquired_nodes[u].compare_exchange_strong(expected, desired) ) { + while(!_acquired_nodes[u].compare_exchange_strong(expected, desired)) + { expected = false; } } - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE bool tryAcquireHypernode(const HypernodeID u) { + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE bool tryAcquireHypernode(const HypernodeID u) + { ASSERT(u < numNodes(), "Hypernode" << u << "does not exist"); bool expected = false; bool desired = true; return _acquired_nodes[u].compare_exchange_strong(expected, desired); } - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE void releaseHypernode(const HypernodeID u) { + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE void releaseHypernode(const HypernodeID u) + { ASSERT(u < numNodes(), "Hypernode" << u << "does not exist"); ASSERT(_acquired_nodes[u], "Hypernode" << u << "is not acquired!"); _acquired_nodes[u] = false; @@ -868,7 +865,8 @@ class DynamicGraph { MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE IteratorRange incident_nets_of(const HypernodeID u, - const size_t pos = 0) const { + const size_t pos = 0) const + { ASSERT(u < numNodes(), "Hypernode" << u << "does not exist"); return _adjacency_array.incidentEdges(u, pos); } @@ -876,14 +874,16 @@ class DynamicGraph { // ####################### Hypernode Information ####################### // ! Accessor for hypernode-related information - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE const Node& hypernode(const HypernodeID u) const { + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE const Node &hypernode(const HypernodeID u) const + { ASSERT(u <= numNodes(), "Hypernode" << u << "does not exist"); return _nodes[u]; } // ! To avoid code duplication we implement non-const version in terms of const version - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE Node& hypernode(const HypernodeID u) { - return const_cast(static_cast(*this).hypernode(u)); + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE Node &hypernode(const HypernodeID u) + { + return const_cast(static_cast(*this).hypernode(u)); } // ####################### Contract / Uncontract ####################### @@ -897,8 +897,7 @@ class DynamicGraph { * greater than the maximum allowed node weight) or PENDING_CONTRACTIONS (in case * there are some unfinished contractions in the subtree of v) is returned. */ - ContractionResult contract(const HypernodeID u, - const HypernodeID v, + ContractionResult contract(const HypernodeID u, const HypernodeID v, const HypernodeWeight max_node_weight); // ! Number of removed hypernodes diff --git a/mt-kahypar/datastructures/dynamic_graph_factory.cpp b/mt-kahypar/datastructures/dynamic_graph_factory.cpp index 3708fb7d0..ae9063c6d 100644 --- a/mt-kahypar/datastructures/dynamic_graph_factory.cpp +++ b/mt-kahypar/datastructures/dynamic_graph_factory.cpp @@ -33,158 +33,174 @@ #include "mt-kahypar/parallel/parallel_prefix_sum.h" #include "mt-kahypar/parallel/stl/scalable_vector.h" -#include "mt-kahypar/utils/timer.h" #include "mt-kahypar/utils/exception.h" +#include "mt-kahypar/utils/timer.h" namespace mt_kahypar::ds { DynamicGraph DynamicGraphFactory::construct( - const HypernodeID num_nodes, - const HyperedgeID num_edges, - const HyperedgeVector& edge_vector, - const HyperedgeWeight* edge_weight, - const HypernodeWeight* node_weight, - const bool stable_construction_of_incident_edges) { + const HypernodeID num_nodes, const HyperedgeID num_edges, + const HyperedgeVector &edge_vector, const HyperedgeWeight *edge_weight, + const HypernodeWeight *node_weight, const bool stable_construction_of_incident_edges) +{ ASSERT(edge_vector.size() == num_edges); EdgeVector edges; edges.resize(num_edges); tbb::parallel_for(UL(0), edge_vector.size(), [&](const size_t i) { - const auto& e = edge_vector[i]; - if (e.size() != 2) { + const auto &e = edge_vector[i]; + if(e.size() != 2) + { throw InvalidInputException( - "Using graph data structure; but the input hypergraph is not a graph."); + "Using graph data structure; but the input hypergraph is not a graph."); } edges[i] = std::make_pair(e[0], e[1]); }); - return construct_from_graph_edges(num_nodes, num_edges, edges, - edge_weight, node_weight, stable_construction_of_incident_edges); + return construct_from_graph_edges(num_nodes, num_edges, edges, edge_weight, node_weight, + stable_construction_of_incident_edges); } DynamicGraph DynamicGraphFactory::construct_from_graph_edges( - const HypernodeID num_nodes, - const HyperedgeID num_edges, - const EdgeVector& edge_vector, - const HyperedgeWeight* edge_weight, - const HypernodeWeight* node_weight, - const bool stable_construction_of_incident_edges) { + const HypernodeID num_nodes, const HyperedgeID num_edges, + const EdgeVector &edge_vector, const HyperedgeWeight *edge_weight, + const HypernodeWeight *node_weight, const bool stable_construction_of_incident_edges) +{ DynamicGraph graph; ASSERT(edge_vector.size() == num_edges); graph._num_edges = 2 * num_edges; // TODO: calculate required id range - tbb::parallel_invoke([&] { - graph._nodes.resize(num_nodes + 1); - tbb::parallel_for(ID(0), num_nodes, [&](const HypernodeID n) { - // setup nodes - DynamicGraph::Node& node = graph._nodes[n]; - node.enable(); - if ( node_weight ) { - node.setWeight(node_weight[n]); - } - }); - // Compute total weight of graph - graph.updateTotalWeight(parallel_tag_t()); - }, [&] { - graph._adjacency_array = DynamicAdjacencyArray(num_nodes, edge_vector, edge_weight); - if (stable_construction_of_incident_edges) { - graph._adjacency_array.sortIncidentEdges(); - } - }, [&] { - graph._acquired_nodes.assign( - num_nodes, parallel::IntegralAtomicWrapper(false)); - }, [&] { - graph._contraction_tree.initialize(num_nodes); - }); + tbb::parallel_invoke( + [&] { + graph._nodes.resize(num_nodes + 1); + tbb::parallel_for(ID(0), num_nodes, [&](const HypernodeID n) { + // setup nodes + DynamicGraph::Node &node = graph._nodes[n]; + node.enable(); + if(node_weight) + { + node.setWeight(node_weight[n]); + } + }); + // Compute total weight of graph + graph.updateTotalWeight(parallel_tag_t()); + }, + [&] { + graph._adjacency_array = + DynamicAdjacencyArray(num_nodes, edge_vector, edge_weight); + if(stable_construction_of_incident_edges) + { + graph._adjacency_array.sortIncidentEdges(); + } + }, + [&] { + graph._acquired_nodes.assign(num_nodes, + parallel::IntegralAtomicWrapper(false)); + }, + [&] { graph._contraction_tree.initialize(num_nodes); }); // Compute total weight of the graph graph.updateTotalWeight(parallel_tag_t()); return graph; } - -std::pair > DynamicGraphFactory::compactify(const DynamicGraph& graph) { +std::pair > +DynamicGraphFactory::compactify(const DynamicGraph &graph) +{ HypernodeID num_nodes = 0; HyperedgeID num_edges = 0; parallel::scalable_vector hn_mapping; parallel::scalable_vector he_mapping; // Computes a mapping for vertices and edges to a consecutive range of IDs // in the compactified hypergraph via a parallel prefix sum - tbb::parallel_invoke([&] { - hn_mapping.assign(graph.numNodes() + 1, 0); - graph.doParallelForAllNodes([&](const HypernodeID hn) { - hn_mapping[hn + 1] = ID(1); - }); - - parallel::TBBPrefixSum hn_mapping_prefix_sum(hn_mapping); - tbb::parallel_scan(tbb::blocked_range( - UL(0), graph.numNodes() + 1), hn_mapping_prefix_sum); - num_nodes = hn_mapping_prefix_sum.total_sum(); - }, [&] { - he_mapping.assign(graph._num_edges + 1, 0); - graph.doParallelForAllEdges([&](const HyperedgeID& he) { - if (graph.edgeSource(he) < graph.edgeTarget(he)) { - he_mapping[he + 1] = ID(1); - } - }); - - parallel::TBBPrefixSum he_mapping_prefix_sum(he_mapping); - tbb::parallel_scan(tbb::blocked_range( - UL(0), graph._num_edges + 1), he_mapping_prefix_sum); - num_edges = he_mapping_prefix_sum.total_sum(); - }); + tbb::parallel_invoke( + [&] { + hn_mapping.assign(graph.numNodes() + 1, 0); + graph.doParallelForAllNodes( + [&](const HypernodeID hn) { hn_mapping[hn + 1] = ID(1); }); + + parallel::TBBPrefixSum + hn_mapping_prefix_sum(hn_mapping); + tbb::parallel_scan(tbb::blocked_range(UL(0), graph.numNodes() + 1), + hn_mapping_prefix_sum); + num_nodes = hn_mapping_prefix_sum.total_sum(); + }, + [&] { + he_mapping.assign(graph._num_edges + 1, 0); + graph.doParallelForAllEdges([&](const HyperedgeID &he) { + if(graph.edgeSource(he) < graph.edgeTarget(he)) + { + he_mapping[he + 1] = ID(1); + } + }); + + parallel::TBBPrefixSum + he_mapping_prefix_sum(he_mapping); + tbb::parallel_scan(tbb::blocked_range(UL(0), graph._num_edges + 1), + he_mapping_prefix_sum); + num_edges = he_mapping_prefix_sum.total_sum(); + }); // Remap pins of each hyperedge - parallel::scalable_vector> edge_vector; + parallel::scalable_vector > edge_vector; parallel::scalable_vector edge_weights; parallel::scalable_vector node_weights; - tbb::parallel_invoke([&] { - node_weights.resize(num_nodes); - graph.doParallelForAllNodes([&](const HypernodeID hn) { - const HypernodeID mapped_hn = hn_mapping[hn]; - ASSERT(mapped_hn < num_nodes); - node_weights[mapped_hn] = graph.nodeWeight(hn); - }); - }, [&] { - edge_vector.resize(num_edges); - edge_weights.resize(num_edges); - graph.doParallelForAllEdges([&](const HyperedgeID he) { - if (graph.edgeSource(he) < graph.edgeTarget(he)) { - const HyperedgeID mapped_he = he_mapping[he]; - ASSERT(mapped_he < num_edges); - edge_weights[mapped_he] = graph.edgeWeight(he); - edge_vector[mapped_he] = {hn_mapping[graph.edgeSource(he)], hn_mapping[graph.edgeTarget(he)]}; - } - }); - }); + tbb::parallel_invoke( + [&] { + node_weights.resize(num_nodes); + graph.doParallelForAllNodes([&](const HypernodeID hn) { + const HypernodeID mapped_hn = hn_mapping[hn]; + ASSERT(mapped_hn < num_nodes); + node_weights[mapped_hn] = graph.nodeWeight(hn); + }); + }, + [&] { + edge_vector.resize(num_edges); + edge_weights.resize(num_edges); + graph.doParallelForAllEdges([&](const HyperedgeID he) { + if(graph.edgeSource(he) < graph.edgeTarget(he)) + { + const HyperedgeID mapped_he = he_mapping[he]; + ASSERT(mapped_he < num_edges); + edge_weights[mapped_he] = graph.edgeWeight(he); + edge_vector[mapped_he] = { hn_mapping[graph.edgeSource(he)], + hn_mapping[graph.edgeTarget(he)] }; + } + }); + }); // Construct compactified graph DynamicGraph compactified_graph = DynamicGraphFactory::construct_from_graph_edges( - num_nodes, num_edges, edge_vector, edge_weights.data(), node_weights.data()); - compactified_graph._removed_degree_zero_hn_weight = graph._removed_degree_zero_hn_weight; + num_nodes, num_edges, edge_vector, edge_weights.data(), node_weights.data()); + compactified_graph._removed_degree_zero_hn_weight = + graph._removed_degree_zero_hn_weight; compactified_graph._total_weight += graph._removed_degree_zero_hn_weight; - tbb::parallel_invoke([&] { - // Set community ids - graph.doParallelForAllNodes([&](const HypernodeID& hn) { - const HypernodeID mapped_hn = hn_mapping[hn]; - compactified_graph.setCommunityID(mapped_hn, graph.communityID(hn)); - }); - }, [&] { - if ( graph.hasFixedVertices() ) { - // Set fixed vertices - ds::FixedVertexSupport fixed_vertices( - compactified_graph.initialNumNodes(), graph._fixed_vertices.numBlocks()); - fixed_vertices.setHypergraph(&compactified_graph); - graph.doParallelForAllNodes([&](const HypernodeID& hn) { - if ( graph.isFixed(hn) ) { + tbb::parallel_invoke( + [&] { + // Set community ids + graph.doParallelForAllNodes([&](const HypernodeID &hn) { const HypernodeID mapped_hn = hn_mapping[hn]; - fixed_vertices.fixToBlock(mapped_hn, graph.fixedVertexBlock(hn)); + compactified_graph.setCommunityID(mapped_hn, graph.communityID(hn)); + }); + }, + [&] { + if(graph.hasFixedVertices()) + { + // Set fixed vertices + ds::FixedVertexSupport fixed_vertices( + compactified_graph.initialNumNodes(), graph._fixed_vertices.numBlocks()); + fixed_vertices.setHypergraph(&compactified_graph); + graph.doParallelForAllNodes([&](const HypernodeID &hn) { + if(graph.isFixed(hn)) + { + const HypernodeID mapped_hn = hn_mapping[hn]; + fixed_vertices.fixToBlock(mapped_hn, graph.fixedVertexBlock(hn)); + } + }); + compactified_graph.addFixedVertexSupport(std::move(fixed_vertices)); } }); - compactified_graph.addFixedVertexSupport(std::move(fixed_vertices)); - } - }); parallel::parallel_free(he_mapping, edge_weights, node_weights, edge_vector); diff --git a/mt-kahypar/datastructures/dynamic_graph_factory.h b/mt-kahypar/datastructures/dynamic_graph_factory.h index b82b601c0..265d22f78 100644 --- a/mt-kahypar/datastructures/dynamic_graph_factory.h +++ b/mt-kahypar/datastructures/dynamic_graph_factory.h @@ -33,39 +33,41 @@ #include "mt-kahypar/datastructures/dynamic_graph.h" #include "mt-kahypar/parallel/atomic_wrapper.h" - namespace mt_kahypar { namespace ds { -class DynamicGraphFactory { - using EdgeVector = parallel::scalable_vector>; - using HyperedgeVector = parallel::scalable_vector>; +class DynamicGraphFactory +{ + using EdgeVector = parallel::scalable_vector >; + using HyperedgeVector = + parallel::scalable_vector >; using Counter = parallel::scalable_vector; - using AtomicCounter = parallel::scalable_vector>; + using AtomicCounter = + parallel::scalable_vector >; using ThreadLocalCounter = tbb::enumerable_thread_specific; - public: - static DynamicGraph construct(const HypernodeID num_nodes, - const HyperedgeID num_edges, - const HyperedgeVector& edge_vector, - const HyperedgeWeight* edge_weight = nullptr, - const HypernodeWeight* node_weight = nullptr, - const bool stable_construction_of_incident_edges = false); +public: + static DynamicGraph construct(const HypernodeID num_nodes, const HyperedgeID num_edges, + const HyperedgeVector &edge_vector, + const HyperedgeWeight *edge_weight = nullptr, + const HypernodeWeight *node_weight = nullptr, + const bool stable_construction_of_incident_edges = false); - // ! Provides a more performant construction method by using continuous space for the edges - // ! (instead of a separate vec per edge). - // ! No backwards edges allowed, i.e. each edge is unique - static DynamicGraph construct_from_graph_edges(const HypernodeID num_nodes, - const HyperedgeID num_edges, - const EdgeVector& edge_vector, - const HyperedgeWeight* edge_weight = nullptr, - const HypernodeWeight* node_weight = nullptr, - const bool stable_construction_of_incident_edges = false); + // ! Provides a more performant construction method by using continuous space for the + // edges ! (instead of a separate vec per edge). ! No backwards edges allowed, i.e. each + // edge is unique + static DynamicGraph + construct_from_graph_edges(const HypernodeID num_nodes, const HyperedgeID num_edges, + const EdgeVector &edge_vector, + const HyperedgeWeight *edge_weight = nullptr, + const HypernodeWeight *node_weight = nullptr, + const bool stable_construction_of_incident_edges = false); - static std::pair > compactify(const DynamicGraph&); + static std::pair > + compactify(const DynamicGraph &); - private: - DynamicGraphFactory() { } +private: + DynamicGraphFactory() {} }; } // namespace ds diff --git a/mt-kahypar/datastructures/dynamic_hypergraph.cpp b/mt-kahypar/datastructures/dynamic_hypergraph.cpp index 99afceec6..0f5cf976e 100644 --- a/mt-kahypar/datastructures/dynamic_hypergraph.cpp +++ b/mt-kahypar/datastructures/dynamic_hypergraph.cpp @@ -28,38 +28,48 @@ #include "mt-kahypar/datastructures/dynamic_hypergraph.h" #include "tbb/blocked_range.h" +#include "tbb/concurrent_queue.h" +#include "tbb/parallel_reduce.h" #include "tbb/parallel_scan.h" #include "tbb/parallel_sort.h" -#include "tbb/parallel_reduce.h" -#include "tbb/concurrent_queue.h" -#include "mt-kahypar/parallel/stl/scalable_queue.h" #include "mt-kahypar/datastructures/concurrent_bucket_map.h" #include "mt-kahypar/datastructures/streaming_vector.h" +#include "mt-kahypar/parallel/stl/scalable_queue.h" #include "mt-kahypar/utils/timer.h" namespace mt_kahypar { namespace ds { // ! Recomputes the total weight of the hypergraph (parallel) -void DynamicHypergraph::updateTotalWeight(parallel_tag_t) { - _total_weight = tbb::parallel_reduce(tbb::blocked_range(ID(0), _num_hypernodes), 0, - [this](const tbb::blocked_range& range, HypernodeWeight init) { - HypernodeWeight weight = init; - for (HypernodeID hn = range.begin(); hn < range.end(); ++hn) { - if ( nodeIsEnabled(hn) ) { - weight += this->_hypernodes[hn].weight(); - } - } - return weight; - }, std::plus()) + _removed_degree_zero_hn_weight; +void DynamicHypergraph::updateTotalWeight(parallel_tag_t) +{ + _total_weight = + tbb::parallel_reduce( + tbb::blocked_range(ID(0), _num_hypernodes), 0, + [this](const tbb::blocked_range &range, HypernodeWeight init) { + HypernodeWeight weight = init; + for(HypernodeID hn = range.begin(); hn < range.end(); ++hn) + { + if(nodeIsEnabled(hn)) + { + weight += this->_hypernodes[hn].weight(); + } + } + return weight; + }, + std::plus()) + + _removed_degree_zero_hn_weight; } // ! Recomputes the total weight of the hypergraph (sequential) -void DynamicHypergraph::updateTotalWeight() { +void DynamicHypergraph::updateTotalWeight() +{ _total_weight = 0; - for ( const HypernodeID& hn : nodes() ) { - if ( nodeIsEnabled(hn) ) { + for(const HypernodeID &hn : nodes()) + { + if(nodeIsEnabled(hn)) + { _total_weight += nodeWeight(hn); } } @@ -68,28 +78,32 @@ void DynamicHypergraph::updateTotalWeight() { /**! * Registers a contraction in the hypergraph whereas vertex u is the representative - * of the contraction and v its contraction partner. Several threads can call this function - * in parallel. The function adds the contraction of u and v to a contraction tree that determines - * a parallel execution order and synchronization points for all running contractions. - * The contraction can be executed by calling function contract(v, max_node_weight). + * of the contraction and v its contraction partner. Several threads can call this + * function in parallel. The function adds the contraction of u and v to a contraction + * tree that determines a parallel execution order and synchronization points for all + * running contractions. The contraction can be executed by calling function contract(v, + * max_node_weight). */ -bool DynamicHypergraph::registerContraction(const HypernodeID u, const HypernodeID v) { - return _contraction_tree.registerContraction(u, v, _version, - [&](HypernodeID u) { acquireHypernode(u); }, - [&](HypernodeID u) { releaseHypernode(u); }); +bool DynamicHypergraph::registerContraction(const HypernodeID u, const HypernodeID v) +{ + return _contraction_tree.registerContraction( + u, v, _version, [&](HypernodeID u) { acquireHypernode(u); }, + [&](HypernodeID u) { releaseHypernode(u); }); } /**! - * Contracts a previously registered contraction. Representative u of vertex v is looked up - * in the contraction tree and performed if there are no pending contractions in the subtree - * of v and the contractions respects the maximum allowed node weight. If (u,v) is the last - * pending contraction in the subtree of u then the function recursively contracts also - * u (if any contraction is registered). Therefore, function can return several contractions - * or also return an empty contraction vector. + * Contracts a previously registered contraction. Representative u of vertex v is looked + * up in the contraction tree and performed if there are no pending contractions in the + * subtree of v and the contractions respects the maximum allowed node weight. If (u,v) is + * the last pending contraction in the subtree of u then the function recursively + * contracts also u (if any contraction is registered). Therefore, function can return + * several contractions or also return an empty contraction vector. */ size_t DynamicHypergraph::contract(const HypernodeID v, - const HypernodeWeight max_node_weight) { - ASSERT(_contraction_tree.parent(v) != v, "No contraction registered for hypernode" << v); + const HypernodeWeight max_node_weight) +{ + ASSERT(_contraction_tree.parent(v) != v, + "No contraction registered for hypernode" << v); HypernodeID x = _contraction_tree.parent(v); HypernodeID y = v; @@ -97,10 +111,12 @@ size_t DynamicHypergraph::contract(const HypernodeID v, size_t num_contractions = 0; // We perform all contractions registered in the contraction tree // as long as there are no pending contractions - while ( x != y && res != ContractionResult::PENDING_CONTRACTIONS) { + while(x != y && res != ContractionResult::PENDING_CONTRACTIONS) + { // Perform Contraction res = contract(x, y, max_node_weight); - if ( res == ContractionResult::CONTRACTED ) { + if(res == ContractionResult::CONTRACTED) + { ++num_contractions; } y = x; @@ -109,79 +125,90 @@ size_t DynamicHypergraph::contract(const HypernodeID v, return num_contractions; } - /** - * Uncontracts a batch of contractions in parallel. The batches must be uncontracted exactly - * in the order computed by the function createBatchUncontractionHierarchy(...). + * Uncontracts a batch of contractions in parallel. The batches must be uncontracted + * exactly in the order computed by the function createBatchUncontractionHierarchy(...). * The two uncontraction functions are required by the partitioned hypergraph to restore * pin counts and gain cache values. */ -void DynamicHypergraph::uncontract(const Batch& batch, - const UncontractionFunction& case_one_func, - const UncontractionFunction& case_two_func) { +void DynamicHypergraph::uncontract(const Batch &batch, + const UncontractionFunction &case_one_func, + const UncontractionFunction &case_two_func) +{ ASSERT(batch.size() > UL(0)); - ASSERT([&] { - const HypernodeID expected_batch_index = hypernode(batch[0].v).batchIndex(); - for ( const Memento& memento : batch ) { - if ( hypernode(memento.v).batchIndex() != expected_batch_index ) { - LOG << "Batch contains uncontraction from different batches." - << "Hypernode" << memento.v << "with version" << hypernode(memento.v).batchIndex() - << "but expected is" << expected_batch_index; - return false; - } - if ( _contraction_tree.version(memento.v) != _version ) { - LOG << "Batch contains uncontraction from a different version." - << "Hypernode" << memento.v << "with version" << _contraction_tree.version(memento.v) - << "but expected is" << _version; - return false; - } - } - return true; - }(), "Batch contains uncontractions from different batches or from a different hypergraph version"); + ASSERT( + [&] { + const HypernodeID expected_batch_index = hypernode(batch[0].v).batchIndex(); + for(const Memento &memento : batch) + { + if(hypernode(memento.v).batchIndex() != expected_batch_index) + { + LOG << "Batch contains uncontraction from different batches." + << "Hypernode" << memento.v << "with version" + << hypernode(memento.v).batchIndex() << "but expected is" + << expected_batch_index; + return false; + } + if(_contraction_tree.version(memento.v) != _version) + { + LOG << "Batch contains uncontraction from a different version." + << "Hypernode" << memento.v << "with version" + << _contraction_tree.version(memento.v) << "but expected is" << _version; + return false; + } + } + return true; + }(), + "Batch contains uncontractions from different batches or from a different " + "hypergraph version"); _hes_to_resize_flag_array.reset(); tbb::parallel_for(UL(0), batch.size(), [&](const size_t i) { - const Memento& memento = batch[i]; + const Memento &memento = batch[i]; ASSERT(!hypernode(memento.u).isDisabled(), "Hypernode" << memento.u << "is disabled"); - ASSERT(hypernode(memento.v).isDisabled(), "Hypernode" << memento.v << "is not invalid"); + ASSERT(hypernode(memento.v).isDisabled(), + "Hypernode" << memento.v << "is not invalid"); // Restore incident net list of u and v const HypernodeID batch_index = hypernode(batch[0].v).batchIndex(); - _incident_nets.uncontract(memento.u, memento.v, - [&](const HyperedgeID e) { - // In that case, u and v were both previously part of hyperedge e. - if ( !_hes_to_resize_flag_array[e] && - _hes_to_resize_flag_array.compare_and_set_to_true(e) ) { - // This part is only triggered once for each hyperedge per batch uncontraction. - // It restores all pins that are part of the current batch as contraction partners - // in hyperedge e - restoreHyperedgeSizeForBatch(e, batch_index, case_one_func); - } - }, [&](const HyperedgeID e) { - // In that case only v was part of hyperedge e before and - // u must be replaced by v in hyperedge e - const size_t slot_of_u = findPositionOfPinInIncidenceArray(memento.u, e); - - acquireHyperedge(e); - ASSERT(_incidence_array[slot_of_u] == memento.u); - _incidence_array[slot_of_u] = memento.v; - case_two_func(memento.u, memento.v, e); - releaseHyperedge(e); - }, [&](const HypernodeID u) { - acquireHypernode(u); - }, [&](const HypernodeID u) { - releaseHypernode(u); - }); + _incident_nets.uncontract( + memento.u, memento.v, + [&](const HyperedgeID e) { + // In that case, u and v were both previously part of hyperedge e. + if(!_hes_to_resize_flag_array[e] && + _hes_to_resize_flag_array.compare_and_set_to_true(e)) + { + // This part is only triggered once for each hyperedge per batch + // uncontraction. It restores all pins that are part of the current batch as + // contraction partners in hyperedge e + restoreHyperedgeSizeForBatch(e, batch_index, case_one_func); + } + }, + [&](const HyperedgeID e) { + // In that case only v was part of hyperedge e before and + // u must be replaced by v in hyperedge e + const size_t slot_of_u = findPositionOfPinInIncidenceArray(memento.u, e); + + acquireHyperedge(e); + ASSERT(_incidence_array[slot_of_u] == memento.u); + _incidence_array[slot_of_u] = memento.v; + case_two_func(memento.u, memento.v, e); + releaseHyperedge(e); + }, + [&](const HypernodeID u) { acquireHypernode(u); }, + [&](const HypernodeID u) { releaseHypernode(u); }); acquireHypernode(memento.u); // Restore hypernode v which includes enabling it and subtract its weight // from its representative hypernode(memento.v).enable(); - hypernode(memento.u).setWeight(hypernode(memento.u).weight() - hypernode(memento.v).weight()); + hypernode(memento.u).setWeight(hypernode(memento.u).weight() - + hypernode(memento.v).weight()); releaseHypernode(memento.u); // Revert contraction in fixed vertex support - if ( hasFixedVertices() ) { + if(hasFixedVertices()) + { _fixed_vertices.uncontract(memento.u, memento.v); } }); @@ -194,10 +221,13 @@ void DynamicHypergraph::uncontract(const Batch& batch, * single-pin and parallel net detection. Once we process all batches of a versioned * batch vector, we have to restore all previously removed single-pin and parallel nets * in order to uncontract the next batch vector. We create for each version of the - * hypergraph a seperate batch uncontraction hierarchy (see createBatchUncontractionHierarchyOfVersion(...)) + * hypergraph a seperate batch uncontraction hierarchy (see + * createBatchUncontractionHierarchyOfVersion(...)) */ -VersionedBatchVector DynamicHypergraph::createBatchUncontractionHierarchy(const size_t batch_size, - const bool test) { +VersionedBatchVector +DynamicHypergraph::createBatchUncontractionHierarchy(const size_t batch_size, + const bool test) +{ const size_t num_versions = _version + 1; // Finalizes the contraction tree such that it is traversable in a top-down fashion // and contains subtree size for each tree node @@ -206,41 +236,46 @@ VersionedBatchVector DynamicHypergraph::createBatchUncontractionHierarchy(const VersionedBatchVector versioned_batches(num_versions); parallel::scalable_vector batch_sizes_prefix_sum(num_versions, 0); BatchIndexAssigner batch_index_assigner(_num_hypernodes, batch_size); - for ( size_t version = 0; version < num_versions; ++version ) { + for(size_t version = 0; version < num_versions; ++version) + { versioned_batches[version] = - createBatchUncontractionHierarchyForVersion( - batch_index_assigner, version); - if ( version > 0 ) { + createBatchUncontractionHierarchyForVersion(batch_index_assigner, version); + if(version > 0) + { batch_sizes_prefix_sum[version] = - batch_sizes_prefix_sum[version - 1] + versioned_batches[version - 1].size(); + batch_sizes_prefix_sum[version - 1] + versioned_batches[version - 1].size(); } batch_index_assigner.reset(versioned_batches[version].size()); } - if ( !test ) { + if(!test) + { // Store the batch index of each vertex in its hypernode data structure tbb::parallel_for(UL(0), num_versions, [&](const size_t version) { - tbb::parallel_for(UL(0), versioned_batches[version].size(), [&](const size_t local_batch_idx) { - const size_t batch_idx = batch_sizes_prefix_sum[version] + local_batch_idx; - for ( const Memento& memento : versioned_batches[version][local_batch_idx] ) { - hypernode(memento.v).setBatchIndex(batch_idx); - } - }); + tbb::parallel_for( + UL(0), versioned_batches[version].size(), [&](const size_t local_batch_idx) { + const size_t batch_idx = batch_sizes_prefix_sum[version] + local_batch_idx; + for(const Memento &memento : versioned_batches[version][local_batch_idx]) + { + hypernode(memento.v).setBatchIndex(batch_idx); + } + }); }); // Sort the invalid part of each hyperedge according to the batch indices of its pins - tbb::parallel_for(ID(0), _num_hyperedges, [&](const HyperedgeID& he) { + tbb::parallel_for(ID(0), _num_hyperedges, [&](const HyperedgeID &he) { const size_t first_invalid_entry = hyperedge(he).firstInvalidEntry(); const size_t last_invalid_entry = hyperedge(he + 1).firstEntry(); - std::sort(_incidence_array.begin() + first_invalid_entry, - _incidence_array.begin() + last_invalid_entry, - [&](const HypernodeID u, const HypernodeID v) { - ASSERT(hypernode(u).batchIndex() != std::numeric_limits::max(), - "Hypernode" << u << "is not contained in the uncontraction hierarchy"); - ASSERT(hypernode(v).batchIndex() != std::numeric_limits::max(), - "Hypernode" << v << "is not contained in the uncontraction hierarchy"); - return hypernode(u).batchIndex() > hypernode(v).batchIndex(); - }); + std::sort( + _incidence_array.begin() + first_invalid_entry, + _incidence_array.begin() + last_invalid_entry, + [&](const HypernodeID u, const HypernodeID v) { + ASSERT(hypernode(u).batchIndex() != std::numeric_limits::max(), + "Hypernode" << u << "is not contained in the uncontraction hierarchy"); + ASSERT(hypernode(v).batchIndex() != std::numeric_limits::max(), + "Hypernode" << v << "is not contained in the uncontraction hierarchy"); + return hypernode(u).batchIndex() > hypernode(v).batchIndex(); + }); }); } @@ -252,7 +287,9 @@ VersionedBatchVector DynamicHypergraph::createBatchUncontractionHierarchy(const * of a set of identical nets is aggregated in one representative hyperedge * and single-pin hyperedges are removed. Returns a vector of removed hyperedges. */ -parallel::scalable_vector DynamicHypergraph::removeSinglePinAndParallelHyperedges() { +parallel::scalable_vector +DynamicHypergraph::removeSinglePinAndParallelHyperedges() +{ _removable_single_pin_and_parallel_nets.reset(); // Remove singple-pin hyperedges directly from the hypergraph and // insert all other hyperedges into a bucket data structure such that @@ -260,19 +297,22 @@ parallel::scalable_vector DynamicHypergrap StreamingVector tmp_removed_hyperedges; ConcurrentBucketMap hyperedge_hash_map; hyperedge_hash_map.reserve_for_estimated_number_of_insertions(_num_hyperedges); - doParallelForAllEdges([&](const HyperedgeID& he) { + doParallelForAllEdges([&](const HyperedgeID &he) { const HypernodeID edge_size = edgeSize(he); - if ( edge_size > 1 ) { - const Hyperedge& e = hyperedge(he); + if(edge_size > 1) + { + const Hyperedge &e = hyperedge(he); const size_t footprint = e.hash(); std::sort(_incidence_array.begin() + e.firstEntry(), _incidence_array.begin() + e.firstInvalidEntry()); - hyperedge_hash_map.insert(footprint, - ContractedHyperedgeInformation { he, footprint, edge_size, true }); - } else { + hyperedge_hash_map.insert( + footprint, ContractedHyperedgeInformation{ he, footprint, edge_size, true }); + } + else + { hyperedge(he).disable(); _removable_single_pin_and_parallel_nets.set(he, true); - tmp_removed_hyperedges.stream(ParallelHyperedge { he, kInvalidHyperedge }); + tmp_removed_hyperedges.stream(ParallelHyperedge{ he, kInvalidHyperedge }); } }); @@ -280,20 +320,25 @@ parallel::scalable_vector DynamicHypergrap // Note, pins inside the hyperedges are sorted. auto check_if_hyperedges_are_parallel = [&](const HyperedgeID lhs, const HyperedgeID rhs) { - const Hyperedge& lhs_he = hyperedge(lhs); - const Hyperedge& rhs_he = hyperedge(rhs); - if ( lhs_he.size() == rhs_he.size() ) { + const Hyperedge &lhs_he = hyperedge(lhs); + const Hyperedge &rhs_he = hyperedge(rhs); + if(lhs_he.size() == rhs_he.size()) + { const size_t lhs_start = lhs_he.firstEntry(); const size_t rhs_start = rhs_he.firstEntry(); - for ( size_t i = 0; i < lhs_he.size(); ++i ) { + for(size_t i = 0; i < lhs_he.size(); ++i) + { const size_t lhs_pos = lhs_start + i; const size_t rhs_pos = rhs_start + i; - if ( _incidence_array[lhs_pos] != _incidence_array[rhs_pos] ) { + if(_incidence_array[lhs_pos] != _incidence_array[rhs_pos]) + { return false; } } return true; - } else { + } + else + { return false; } }; @@ -305,32 +350,40 @@ parallel::scalable_vector DynamicHypergrap // hyperedges are detected by comparing the pins of hyperedges with // the same hash. tbb::parallel_for(UL(0), hyperedge_hash_map.numBuckets(), [&](const size_t bucket) { - auto& hyperedge_bucket = hyperedge_hash_map.getBucket(bucket); + auto &hyperedge_bucket = hyperedge_hash_map.getBucket(bucket); std::sort(hyperedge_bucket.begin(), hyperedge_bucket.end(), - [&](const ContractedHyperedgeInformation& lhs, const ContractedHyperedgeInformation& rhs) { - return lhs.hash < rhs.hash || (lhs.hash == rhs.hash && lhs.size < rhs.size)|| - (lhs.hash == rhs.hash && lhs.size == rhs.size && lhs.he < rhs.he); - }); + [&](const ContractedHyperedgeInformation &lhs, + const ContractedHyperedgeInformation &rhs) { + return lhs.hash < rhs.hash || + (lhs.hash == rhs.hash && lhs.size < rhs.size) || + (lhs.hash == rhs.hash && lhs.size == rhs.size && lhs.he < rhs.he); + }); // Parallel Hyperedge Detection - for ( size_t i = 0; i < hyperedge_bucket.size(); ++i ) { - ContractedHyperedgeInformation& contracted_he_lhs = hyperedge_bucket[i]; - if ( contracted_he_lhs.valid ) { + for(size_t i = 0; i < hyperedge_bucket.size(); ++i) + { + ContractedHyperedgeInformation &contracted_he_lhs = hyperedge_bucket[i]; + if(contracted_he_lhs.valid) + { const HyperedgeID lhs_he = contracted_he_lhs.he; HyperedgeWeight lhs_weight = hyperedge(lhs_he).weight(); - for ( size_t j = i + 1; j < hyperedge_bucket.size(); ++j ) { - ContractedHyperedgeInformation& contracted_he_rhs = hyperedge_bucket[j]; + for(size_t j = i + 1; j < hyperedge_bucket.size(); ++j) + { + ContractedHyperedgeInformation &contracted_he_rhs = hyperedge_bucket[j]; const HyperedgeID rhs_he = contracted_he_rhs.he; - if ( contracted_he_rhs.valid && - contracted_he_lhs.hash == contracted_he_rhs.hash && - check_if_hyperedges_are_parallel(lhs_he, rhs_he) ) { - // Hyperedges are parallel - lhs_weight += hyperedge(rhs_he).weight(); - hyperedge(rhs_he).disable(); - _removable_single_pin_and_parallel_nets.set(rhs_he, true); - contracted_he_rhs.valid = false; - tmp_removed_hyperedges.stream( ParallelHyperedge { rhs_he, lhs_he } ); - } else if ( contracted_he_lhs.hash != contracted_he_rhs.hash ) { + if(contracted_he_rhs.valid && + contracted_he_lhs.hash == contracted_he_rhs.hash && + check_if_hyperedges_are_parallel(lhs_he, rhs_he)) + { + // Hyperedges are parallel + lhs_weight += hyperedge(rhs_he).weight(); + hyperedge(rhs_he).disable(); + _removable_single_pin_and_parallel_nets.set(rhs_he, true); + contracted_he_rhs.valid = false; + tmp_removed_hyperedges.stream(ParallelHyperedge{ rhs_he, lhs_he }); + } + else if(contracted_he_lhs.hash != contracted_he_rhs.hash) + { // In case, hash of both are not equal we go to the next hyperedge // because we compared it with all hyperedges that had an equal hash break; @@ -343,11 +396,12 @@ parallel::scalable_vector DynamicHypergrap }); // Remove single-pin and parallel nets from incident net vector of vertices - doParallelForAllNodes([&](const HypernodeID& u) { + doParallelForAllNodes([&](const HypernodeID &u) { _incident_nets.removeIncidentNets(u, _removable_single_pin_and_parallel_nets); }); - parallel::scalable_vector removed_hyperedges = tmp_removed_hyperedges.copy_parallel(); + parallel::scalable_vector removed_hyperedges = + tmp_removed_hyperedges.copy_parallel(); tmp_removed_hyperedges.clear_parallel(); ++_version; @@ -355,35 +409,39 @@ parallel::scalable_vector DynamicHypergrap } /** - * Restores a previously removed set of singple-pin and parallel hyperedges. Note, that hes_to_restore - * must be exactly the same and given in the reverse order as returned by removeSinglePinAndParallelNets(...). + * Restores a previously removed set of singple-pin and parallel hyperedges. Note, that + * hes_to_restore must be exactly the same and given in the reverse order as returned by + * removeSinglePinAndParallelNets(...). */ -void DynamicHypergraph::restoreSinglePinAndParallelNets(const parallel::scalable_vector& hes_to_restore) { +void DynamicHypergraph::restoreSinglePinAndParallelNets( + const parallel::scalable_vector &hes_to_restore) +{ // Restores all previously removed hyperedges tbb::parallel_for(UL(0), hes_to_restore.size(), [&](const size_t i) { - const ParallelHyperedge& parallel_he = hes_to_restore[i]; + const ParallelHyperedge ¶llel_he = hes_to_restore[i]; const HyperedgeID he = parallel_he.removed_hyperedge; ASSERT(!edgeIsEnabled(he), "Hyperedge" << he << "should be disabled"); const bool is_parallel_net = parallel_he.representative != kInvalidHyperedge; hyperedge(he).enable(); - if ( is_parallel_net ) { + if(is_parallel_net) + { const HyperedgeID rep = parallel_he.representative; ASSERT(edgeIsEnabled(rep), "Hyperedge" << rep << "should be enabled"); - Hyperedge& rep_he = hyperedge(rep); + Hyperedge &rep_he = hyperedge(rep); acquireHyperedge(rep); rep_he.setWeight(rep_he.weight() - hyperedge(he).weight()); releaseHyperedge(rep); } }); - doParallelForAllNodes([&](const HypernodeID u) { - _incident_nets.restoreIncidentNets(u); - }); + doParallelForAllNodes( + [&](const HypernodeID u) { _incident_nets.restoreIncidentNets(u); }); --_version; } // ! Copy dynamic hypergraph in parallel -DynamicHypergraph DynamicHypergraph::copy(parallel_tag_t) const { +DynamicHypergraph DynamicHypergraph::copy(parallel_tag_t) const +{ DynamicHypergraph hypergraph; hypergraph._num_hypernodes = _num_hypernodes; @@ -398,51 +456,58 @@ DynamicHypergraph DynamicHypergraph::copy(parallel_tag_t) const { hypergraph._version = _version; hypergraph._contraction_index.store(_contraction_index.load()); - tbb::parallel_invoke([&] { - hypergraph._hypernodes.resize(_hypernodes.size()); - memcpy(hypergraph._hypernodes.data(), _hypernodes.data(), - sizeof(Hypernode) * _hypernodes.size()); - }, [&] { - tbb::parallel_invoke([&] { - hypergraph._incident_nets = _incident_nets.copy(parallel_tag_t()); - }, [&] { - hypergraph._acquired_hns.resize(_acquired_hns.size()); - tbb::parallel_for(ID(0), _num_hypernodes, [&](const HypernodeID& hn) { - hypergraph._acquired_hns[hn] = _acquired_hns[hn]; + tbb::parallel_invoke( + [&] { + hypergraph._hypernodes.resize(_hypernodes.size()); + memcpy(hypergraph._hypernodes.data(), _hypernodes.data(), + sizeof(Hypernode) * _hypernodes.size()); + }, + [&] { + tbb::parallel_invoke( + [&] { hypergraph._incident_nets = _incident_nets.copy(parallel_tag_t()); }, + [&] { + hypergraph._acquired_hns.resize(_acquired_hns.size()); + tbb::parallel_for(ID(0), _num_hypernodes, [&](const HypernodeID &hn) { + hypergraph._acquired_hns[hn] = _acquired_hns[hn]; + }); + }); + }, + [&] { hypergraph._contraction_tree = _contraction_tree.copy(parallel_tag_t()); }, + [&] { + hypergraph._hyperedges.resize(_hyperedges.size()); + memcpy(hypergraph._hyperedges.data(), _hyperedges.data(), + sizeof(Hyperedge) * _hyperedges.size()); + }, + [&] { + hypergraph._incidence_array.resize(_incidence_array.size()); + memcpy(hypergraph._incidence_array.data(), _incidence_array.data(), + sizeof(HypernodeID) * _incidence_array.size()); + }, + [&] { + hypergraph._acquired_hes.resize(_num_hyperedges); + tbb::parallel_for(ID(0), _num_hyperedges, [&](const HyperedgeID &he) { + hypergraph._acquired_hes[he] = _acquired_hes[he]; + }); + }, + [&] { + hypergraph._hes_to_resize_flag_array = + ThreadSafeFastResetFlagArray<>(_num_hyperedges); + }, + [&] { hypergraph._he_bitset = ThreadLocalBitset(_num_hyperedges); }, + [&] { + hypergraph._removable_single_pin_and_parallel_nets = + kahypar::ds::FastResetFlagArray<>(_num_hyperedges); + }, + [&] { + hypergraph._fixed_vertices = _fixed_vertices.copy(); + hypergraph._fixed_vertices.setHypergraph(&hypergraph); }); - }); - }, [&] { - hypergraph._contraction_tree = _contraction_tree.copy(parallel_tag_t()); - }, [&] { - hypergraph._hyperedges.resize(_hyperedges.size()); - memcpy(hypergraph._hyperedges.data(), _hyperedges.data(), - sizeof(Hyperedge) * _hyperedges.size()); - }, [&] { - hypergraph._incidence_array.resize(_incidence_array.size()); - memcpy(hypergraph._incidence_array.data(), _incidence_array.data(), - sizeof(HypernodeID) * _incidence_array.size()); - }, [&] { - hypergraph._acquired_hes.resize(_num_hyperedges); - tbb::parallel_for(ID(0), _num_hyperedges, [&](const HyperedgeID& he) { - hypergraph._acquired_hes[he] = _acquired_hes[he]; - }); - }, [&] { - hypergraph._hes_to_resize_flag_array = - ThreadSafeFastResetFlagArray<>(_num_hyperedges); - }, [&] { - hypergraph._he_bitset = ThreadLocalBitset(_num_hyperedges); - }, [&] { - hypergraph._removable_single_pin_and_parallel_nets = - kahypar::ds::FastResetFlagArray<>(_num_hyperedges); - }, [&] { - hypergraph._fixed_vertices = _fixed_vertices.copy(); - hypergraph._fixed_vertices.setHypergraph(&hypergraph); - }); return hypergraph; } // ! Copy dynamic hypergraph sequential -DynamicHypergraph DynamicHypergraph::copy() const { +DynamicHypergraph DynamicHypergraph::copy() const +{ DynamicHypergraph hypergraph; hypergraph._num_hypernodes = _num_hypernodes; @@ -459,28 +524,29 @@ DynamicHypergraph DynamicHypergraph::copy() const { hypergraph._hypernodes.resize(_hypernodes.size()); memcpy(hypergraph._hypernodes.data(), _hypernodes.data(), - sizeof(Hypernode) * _hypernodes.size()); + sizeof(Hypernode) * _hypernodes.size()); hypergraph._incident_nets = _incident_nets.copy(); hypergraph._acquired_hns.resize(_num_hypernodes); - for ( HypernodeID hn = 0; hn < _num_hypernodes; ++hn ) { + for(HypernodeID hn = 0; hn < _num_hypernodes; ++hn) + { hypergraph._acquired_hns[hn] = _acquired_hns[hn]; } hypergraph._contraction_tree = _contraction_tree.copy(); hypergraph._hyperedges.resize(_hyperedges.size()); memcpy(hypergraph._hyperedges.data(), _hyperedges.data(), - sizeof(Hyperedge) * _hyperedges.size()); + sizeof(Hyperedge) * _hyperedges.size()); hypergraph._incidence_array.resize(_incidence_array.size()); memcpy(hypergraph._incidence_array.data(), _incidence_array.data(), - sizeof(HypernodeID) * _incidence_array.size()); + sizeof(HypernodeID) * _incidence_array.size()); hypergraph._acquired_hes.resize(_num_hyperedges); - for ( HyperedgeID he = 0; he < _num_hyperedges; ++he ) { + for(HyperedgeID he = 0; he < _num_hyperedges; ++he) + { hypergraph._acquired_hes[he] = _acquired_hes[he]; } - hypergraph._hes_to_resize_flag_array = - ThreadSafeFastResetFlagArray<>(_num_hyperedges); + hypergraph._hes_to_resize_flag_array = ThreadSafeFastResetFlagArray<>(_num_hyperedges); hypergraph._he_bitset = ThreadLocalBitset(_num_hyperedges); hypergraph._removable_single_pin_and_parallel_nets = - kahypar::ds::FastResetFlagArray<>(_num_hyperedges); + kahypar::ds::FastResetFlagArray<>(_num_hyperedges); hypergraph._fixed_vertices = _fixed_vertices.copy(); hypergraph._fixed_vertices.setHypergraph(&hypergraph); @@ -488,7 +554,8 @@ DynamicHypergraph DynamicHypergraph::copy() const { return hypergraph; } -void DynamicHypergraph::memoryConsumption(utils::MemoryTreeNode* parent) const { +void DynamicHypergraph::memoryConsumption(utils::MemoryTreeNode *parent) const +{ ASSERT(parent); parent->addChild("Hypernodes", sizeof(Hypernode) * _hypernodes.size()); @@ -497,53 +564,65 @@ void DynamicHypergraph::memoryConsumption(utils::MemoryTreeNode* parent) const { parent->addChild("Hyperedges", sizeof(Hyperedge) * _hyperedges.size()); parent->addChild("Incidence Array", sizeof(HypernodeID) * _incidence_array.size()); parent->addChild("Hyperedge Ownership Vector", sizeof(bool) * _acquired_hes.size()); - parent->addChild("Bitsets", - ( _num_hyperedges * _he_bitset.size() ) / size_t(8) + sizeof(uint16_t) * _num_hyperedges); + parent->addChild("Bitsets", (_num_hyperedges * _he_bitset.size()) / size_t(8) + + sizeof(uint16_t) * _num_hyperedges); - utils::MemoryTreeNode* contraction_tree_node = parent->addChild("Contraction Tree"); + utils::MemoryTreeNode *contraction_tree_node = parent->addChild("Contraction Tree"); _contraction_tree.memoryConsumption(contraction_tree_node); - if ( hasFixedVertices() ) { + if(hasFixedVertices()) + { parent->addChild("Fixed Vertex Support", _fixed_vertices.size_in_bytes()); } } // ! Only for testing -bool DynamicHypergraph::verifyIncidenceArrayAndIncidentNets() { +bool DynamicHypergraph::verifyIncidenceArrayAndIncidentNets() +{ bool success = true; - tbb::parallel_invoke([&] { - doParallelForAllNodes([&](const HypernodeID& hn) { - for ( const HyperedgeID& he : incidentEdges(hn) ) { - bool found = false; - for ( const HypernodeID& pin : pins(he) ) { - if ( pin == hn ) { - found = true; - break; + tbb::parallel_invoke( + [&] { + doParallelForAllNodes([&](const HypernodeID &hn) { + for(const HyperedgeID &he : incidentEdges(hn)) + { + bool found = false; + for(const HypernodeID &pin : pins(he)) + { + if(pin == hn) + { + found = true; + break; + } + } + if(!found) + { + LOG << "Hypernode" << hn << "not found in incidence array of net" << he; + success = false; + } } - } - if ( !found ) { - LOG << "Hypernode" << hn << "not found in incidence array of net" << he; - success = false; - } - } - }); - }, [&] { - doParallelForAllEdges([&](const HyperedgeID& he) { - for ( const HypernodeID& pin : pins(he) ) { - bool found = false; - for ( const HyperedgeID& e : incidentEdges(pin) ) { - if ( e == he ) { - found = true; - break; + }); + }, + [&] { + doParallelForAllEdges([&](const HyperedgeID &he) { + for(const HypernodeID &pin : pins(he)) + { + bool found = false; + for(const HyperedgeID &e : incidentEdges(pin)) + { + if(e == he) + { + found = true; + break; + } + } + if(!found) + { + LOG << "Hyperedge" << he << "not found in incident nets of vertex" << pin; + success = false; + } } - } - if ( !found ) { - LOG << "Hyperedge" << he << "not found in incident nets of vertex" << pin; - success = false; - } - } - }); - }); + }); + }); return success; } @@ -556,15 +635,19 @@ bool DynamicHypergraph::verifyIncidenceArrayAndIncidentNets() { * greater than the maximum allowed node weight) or PENDING_CONTRACTIONS (in case * there are some unfinished contractions in the subtree of v) is returned. */ -DynamicHypergraph::ContractionResult DynamicHypergraph::contract(const HypernodeID u, - const HypernodeID v, - const HypernodeWeight max_node_weight) { +DynamicHypergraph::ContractionResult +DynamicHypergraph::contract(const HypernodeID u, const HypernodeID v, + const HypernodeWeight max_node_weight) +{ // Acquire ownership in correct order to prevent deadlocks - if ( u < v ) { + if(u < v) + { acquireHypernode(u); acquireHypernode(v); - } else { + } + else + { acquireHypernode(v); acquireHypernode(u); } @@ -574,14 +657,16 @@ DynamicHypergraph::ContractionResult DynamicHypergraph::contract(const Hypernode // 2.) There are no pending contractions on v // 4.) Resulting node weight is less or equal than a predefined upper bound const bool contraction_partner_valid = - nodeIsEnabled(v) && _contraction_tree.pendingContractions(v) == 0; + nodeIsEnabled(v) && _contraction_tree.pendingContractions(v) == 0; const bool less_or_equal_than_max_node_weight = - hypernode(u).weight() + hypernode(v).weight() <= max_node_weight; + hypernode(u).weight() + hypernode(v).weight() <= max_node_weight; const bool valid_contraction = - contraction_partner_valid && less_or_equal_than_max_node_weight && - ( !hasFixedVertices() || - /** only run this if all previous checks were successful */ _fixed_vertices.contract(u, v) ); - if ( valid_contraction ) { + contraction_partner_valid && less_or_equal_than_max_node_weight && + (!hasFixedVertices() || + /** only run this if all previous checks were successful */ _fixed_vertices + .contract(u, v)); + if(valid_contraction) + { ASSERT(nodeIsEnabled(u), "Hypernode" << u << "is disabled!"); hypernode(u).setWeight(nodeWeight(u) + nodeWeight(v)); hypernode(v).disable(); @@ -589,34 +674,38 @@ DynamicHypergraph::ContractionResult DynamicHypergraph::contract(const Hypernode releaseHypernode(v); HypernodeID contraction_start = _contraction_index.load(); - kahypar::ds::FastResetFlagArray<>& shared_incident_nets_u_and_v = _he_bitset.local(); + kahypar::ds::FastResetFlagArray<> &shared_incident_nets_u_and_v = _he_bitset.local(); shared_incident_nets_u_and_v.reset(); - parallel::scalable_vector& failed_hyperedge_contractions = _failed_hyperedge_contractions.local(); - for ( const HyperedgeID& he : incidentEdges(v) ) { + parallel::scalable_vector &failed_hyperedge_contractions = + _failed_hyperedge_contractions.local(); + for(const HyperedgeID &he : incidentEdges(v)) + { // Try to acquire ownership of hyperedge. In case of success, we perform the // contraction and otherwise, we remember the hyperedge and try later again. - if ( tryAcquireHyperedge(he) ) { + if(tryAcquireHyperedge(he)) + { contractHyperedge(u, v, he, shared_incident_nets_u_and_v); releaseHyperedge(he); - } else { + } + else + { failed_hyperedge_contractions.push_back(he); } } // Perform contraction on which we failed to acquire ownership on the first try - for ( const HyperedgeID& he : failed_hyperedge_contractions ) { + for(const HyperedgeID &he : failed_hyperedge_contractions) + { acquireHyperedge(he); contractHyperedge(u, v, he, shared_incident_nets_u_and_v); releaseHyperedge(he); } // Contract incident net lists of u and v - _incident_nets.contract(u, v, shared_incident_nets_u_and_v, - [&](const HypernodeID u) { - acquireHypernode(u); - }, [&](const HypernodeID u) { - releaseHypernode(u); - }); + _incident_nets.contract( + u, v, shared_incident_nets_u_and_v, + [&](const HypernodeID u) { acquireHypernode(u); }, + [&](const HypernodeID u) { releaseHypernode(u); }); shared_incident_nets_u_and_v.reset(); failed_hyperedge_contractions.clear(); @@ -625,17 +714,20 @@ DynamicHypergraph::ContractionResult DynamicHypergraph::contract(const Hypernode _contraction_tree.unregisterContraction(u, v, contraction_start, contraction_end); releaseHypernode(u); return ContractionResult::CONTRACTED; - } else { + } + else + { ContractionResult res = ContractionResult::PENDING_CONTRACTIONS; const bool fixed_vertex_contraction_failed = - contraction_partner_valid && less_or_equal_than_max_node_weight; - if ( ( !less_or_equal_than_max_node_weight || fixed_vertex_contraction_failed ) && - nodeIsEnabled(v) && _contraction_tree.parent(v) == u ) { - _contraction_tree.unregisterContraction(u, v, - kInvalidHypernode, kInvalidHypernode, true /* failed */); + contraction_partner_valid && less_or_equal_than_max_node_weight; + if((!less_or_equal_than_max_node_weight || fixed_vertex_contraction_failed) && + nodeIsEnabled(v) && _contraction_tree.parent(v) == u) + { + _contraction_tree.unregisterContraction(u, v, kInvalidHypernode, kInvalidHypernode, + true /* failed */); res = fixed_vertex_contraction_failed ? - ContractionResult::INVALID_FIXED_VERTEX_CONTRACTION : - ContractionResult::WEIGHT_LIMIT_REACHED; + ContractionResult::INVALID_FIXED_VERTEX_CONTRACTION : + ContractionResult::WEIGHT_LIMIT_REACHED; } releaseHypernode(u); releaseHypernode(v); @@ -644,29 +736,34 @@ DynamicHypergraph::ContractionResult DynamicHypergraph::contract(const Hypernode } // ! Performs the contraction of (u,v) inside hyperedge he -void DynamicHypergraph::contractHyperedge(const HypernodeID u, - const HypernodeID v, - const HyperedgeID he, - kahypar::ds::FastResetFlagArray<>& shared_incident_nets_u_and_v) { - Hyperedge& e = hyperedge(he); +void DynamicHypergraph::contractHyperedge( + const HypernodeID u, const HypernodeID v, const HyperedgeID he, + kahypar::ds::FastResetFlagArray<> &shared_incident_nets_u_and_v) +{ + Hyperedge &e = hyperedge(he); const HypernodeID pins_begin = e.firstEntry(); const HypernodeID pins_end = e.firstInvalidEntry(); HypernodeID slot_of_u = pins_end - 1; HypernodeID last_pin_slot = pins_end - 1; - for (HypernodeID idx = pins_begin; idx != last_pin_slot; ++idx) { + for(HypernodeID idx = pins_begin; idx != last_pin_slot; ++idx) + { const HypernodeID pin = _incidence_array[idx]; - if (pin == v) { + if(pin == v) + { std::swap(_incidence_array[idx], _incidence_array[last_pin_slot]); --idx; - } else if (pin == u) { + } + else if(pin == u) + { slot_of_u = idx; } } ASSERT(_incidence_array[last_pin_slot] == v, "v is not last entry in adjacency array!"); - if (slot_of_u != last_pin_slot) { + if(slot_of_u != last_pin_slot) + { // Case 1: // Hyperedge e contains both u and v. Thus we don't need to connect u to e and // can just cut off the last entry in the edge array of e that now contains v. @@ -674,11 +771,14 @@ void DynamicHypergraph::contractHyperedge(const HypernodeID u, e.hash() -= kahypar::math::hash(v); e.decrementSize(); shared_incident_nets_u_and_v.set(he, true); - } else { + } + else + { DBG << V(he) << ": Case 2"; // Case 2: - // Hyperedge e does not contain u. Therefore we have to connect e to the representative u. - // This reuses the pin slot of v in e's incidence array (i.e. last_pin_slot!) + // Hyperedge e does not contain u. Therefore we have to connect e to the + // representative u. This reuses the pin slot of v in e's incidence array (i.e. + // last_pin_slot!) e.hash() -= kahypar::math::hash(v); e.hash() += kahypar::math::hash(u); _incidence_array[last_pin_slot] = u; @@ -688,16 +788,19 @@ void DynamicHypergraph::contractHyperedge(const HypernodeID u, // ! Restore the size of the hyperedge to the size before the batch with // ! index batch_index was contracted. After each size increment, we call case_one_func // ! that triggers updates in the partitioned hypergraph and gain cache -void DynamicHypergraph::restoreHyperedgeSizeForBatch(const HyperedgeID he, - const HypernodeID batch_index, - const UncontractionFunction& case_one_func) { +void DynamicHypergraph::restoreHyperedgeSizeForBatch( + const HyperedgeID he, const HypernodeID batch_index, + const UncontractionFunction &case_one_func) +{ const size_t first_invalid_entry = hyperedge(he).firstInvalidEntry(); const size_t last_invalid_entry = hyperedge(he + 1).firstEntry(); ASSERT(hypernode(_incidence_array[first_invalid_entry]).batchIndex() == batch_index); - for ( size_t pos = first_invalid_entry; pos < last_invalid_entry; ++pos ) { + for(size_t pos = first_invalid_entry; pos < last_invalid_entry; ++pos) + { const HypernodeID pin = _incidence_array[pos]; ASSERT(hypernode(pin).batchIndex() <= batch_index, V(he)); - if ( hypernode(pin).batchIndex() != batch_index ) { + if(hypernode(pin).batchIndex() != batch_index) + { break; } const HypernodeID rep = _contraction_tree.parent(pin); @@ -710,19 +813,22 @@ void DynamicHypergraph::restoreHyperedgeSizeForBatch(const HyperedgeID he, // ! Search for the position of pin u in hyperedge he in the incidence array size_t DynamicHypergraph::findPositionOfPinInIncidenceArray(const HypernodeID u, - const HyperedgeID he) { + const HyperedgeID he) +{ const size_t first_valid_entry = hyperedge(he).firstEntry(); const size_t first_invalid_entry = hyperedge(he).firstInvalidEntry(); size_t slot_of_u = first_invalid_entry; - for ( size_t pos = first_invalid_entry - 1; pos != first_valid_entry - 1; --pos ) { - if ( u == _incidence_array[pos] ) { + for(size_t pos = first_invalid_entry - 1; pos != first_valid_entry - 1; --pos) + { + if(u == _incidence_array[pos]) + { slot_of_u = pos; break; } } ASSERT(slot_of_u != first_invalid_entry, - "Hypernode" << u << "is not incident to hyperedge" << he); + "Hypernode" << u << "is not incident to hyperedge" << he); return slot_of_u; } @@ -733,33 +839,38 @@ size_t DynamicHypergraph::findPositionOfPinInIncidenceArray(const HypernodeID u, * the hypergraph. * A batch of uncontractions that is uncontracted in parallel must satisfy two conditions: * 1.) All representatives must be active vertices of the hypergraph - * 2.) For a specific representative its contraction partners must be uncontracted in reverse - * contraction order. Meaning that a contraction (u, v) that happens before a contraction (u, w) - * must be uncontracted in a batch that is part of the same batch or a batch uncontracted after the - * batch which (u, w) is part of. This ensures that a parallel batch uncontraction does not - * increase the objective function. - * We use the contraction tree to create a batch uncontraction order. Note, uncontractions from - * different subtrees can be interleaved abitrary. To ensure condition 1.) we peform a BFS starting - * from all roots of the contraction tree. Each BFS level induces a new batch. Since we contract - * vertices in parallel its not possible to create a relative order of the contractions which is - * neccassary for condition 2.). However, during a contraction we store a start and end "timepoint" - * of a contraction. If two contractions time intervals do not intersect, we can determine - * which contraction happens strictly before the other. If they intersect, it is not possible to - * give a relative order. To ensure condition 2.) we sort the childs of a vertex in the contraction tree - * after its time intervals. Once we add a uncontraction (u,v) to a batch, we also add all uncontractions - * (u,w) to the batch which intersect the time interval of (u,v). To merge uncontractions of different - * subtrees in a batch, we insert all eligble uncontractions into a max priority queue with the subtree - * size of the contraction partner as key. We insert uncontractions into the current batch as long - * as the maximum batch size is not reached or the PQ is empty. Once the batch reaches its maximum - * batch size, we create a new empty batch. If the PQ is empty, we replace it with the PQ of the next - * BFS level. With this approach heavy vertices are uncontracted earlier (subtree size in the PQ as key = weight of - * a vertex for an unweighted hypergraph) such that average node weight of the hypergraph decreases faster and - * local searches are more effective in early stages of the uncontraction hierarchy where hyperedge sizes are - * usually smaller than on the original hypergraph. + * 2.) For a specific representative its contraction partners must be uncontracted in + * reverse contraction order. Meaning that a contraction (u, v) that happens before a + * contraction (u, w) must be uncontracted in a batch that is part of the same batch or a + * batch uncontracted after the batch which (u, w) is part of. This ensures that a + * parallel batch uncontraction does not increase the objective function. We use the + * contraction tree to create a batch uncontraction order. Note, uncontractions from + * different subtrees can be interleaved abitrary. To ensure condition 1.) we peform a BFS + * starting from all roots of the contraction tree. Each BFS level induces a new batch. + * Since we contract vertices in parallel its not possible to create a relative order of + * the contractions which is neccassary for condition 2.). However, during a contraction + * we store a start and end "timepoint" of a contraction. If two contractions time + * intervals do not intersect, we can determine which contraction happens strictly before + * the other. If they intersect, it is not possible to give a relative order. To ensure + * condition 2.) we sort the childs of a vertex in the contraction tree after its time + * intervals. Once we add a uncontraction (u,v) to a batch, we also add all uncontractions + * (u,w) to the batch which intersect the time interval of (u,v). To merge uncontractions + * of different subtrees in a batch, we insert all eligble uncontractions into a max + * priority queue with the subtree size of the contraction partner as key. We insert + * uncontractions into the current batch as long as the maximum batch size is not reached + * or the PQ is empty. Once the batch reaches its maximum batch size, we create a new + * empty batch. If the PQ is empty, we replace it with the PQ of the next BFS level. With + * this approach heavy vertices are uncontracted earlier (subtree size in the PQ as key = + * weight of a vertex for an unweighted hypergraph) such that average node weight of the + * hypergraph decreases faster and local searches are more effective in early stages of + * the uncontraction hierarchy where hyperedge sizes are usually smaller than on the + * original hypergraph. */ -BatchVector DynamicHypergraph::createBatchUncontractionHierarchyForVersion(BatchIndexAssigner& batch_assigner, - const size_t version) { - return _contraction_tree.createBatchUncontractionHierarchyForVersion(batch_assigner, version); +BatchVector DynamicHypergraph::createBatchUncontractionHierarchyForVersion( + BatchIndexAssigner &batch_assigner, const size_t version) +{ + return _contraction_tree.createBatchUncontractionHierarchyForVersion(batch_assigner, + version); } } // namespace ds diff --git a/mt-kahypar/datastructures/dynamic_hypergraph.h b/mt-kahypar/datastructures/dynamic_hypergraph.h index c36ac98a4..4f147e46a 100644 --- a/mt-kahypar/datastructures/dynamic_hypergraph.h +++ b/mt-kahypar/datastructures/dynamic_hypergraph.h @@ -34,29 +34,29 @@ #include "include/libmtkahypartypes.h" -#include "kahypar-resources/meta/mandatory.h" #include "kahypar-resources/datastructure/fast_reset_flag_array.h" +#include "kahypar-resources/meta/mandatory.h" #include "kahypar-resources/utils/math.h" -#include "mt-kahypar/datastructures/hypergraph_common.h" +#include "mt-kahypar/datastructures/contraction_tree.h" #include "mt-kahypar/datastructures/fixed_vertex_support.h" +#include "mt-kahypar/datastructures/hypergraph_common.h" #include "mt-kahypar/datastructures/incident_net_array.h" -#include "mt-kahypar/datastructures/contraction_tree.h" #include "mt-kahypar/datastructures/thread_safe_fast_reset_flag_array.h" #include "mt-kahypar/parallel/stl/scalable_vector.h" -#include "mt-kahypar/utils/memory_tree.h" #include "mt-kahypar/utils/exception.h" +#include "mt-kahypar/utils/memory_tree.h" namespace mt_kahypar { namespace ds { // Forward class DynamicHypergraphFactory; -template +template class PartitionedHypergraph; -class DynamicHypergraph { +class DynamicHypergraph +{ static constexpr bool debug = false; static constexpr bool enable_heavy_assert = false; @@ -66,85 +66,82 @@ class DynamicHypergraph { // ! In order to update gain cache correctly for an uncontraction (u,v), // ! the partitioned hypergraph has to know wheter v replaces u in a hyperedge - // ! or both a incident to that hyperedge after uncontraction. Therefore, the partitioned - // ! hypergraph passes two lambda functions to the batch uncontraction function, one for - // ! each case. - using UncontractionFunction = std::function; - #define NOOP_BATCH_FUNC [] (const HypernodeID, const HypernodeID, const HyperedgeID) { } + // ! or both a incident to that hyperedge after uncontraction. Therefore, the + // partitioned ! hypergraph passes two lambda functions to the batch uncontraction + // function, one for ! each case. + using UncontractionFunction = + std::function; +#define NOOP_BATCH_FUNC [](const HypernodeID, const HypernodeID, const HyperedgeID) {} /*! - * This struct is used during multilevel coarsening to efficiently - * detect parallel hyperedges. - */ - struct ContractedHyperedgeInformation { + * This struct is used during multilevel coarsening to efficiently + * detect parallel hyperedges. + */ + struct ContractedHyperedgeInformation + { HyperedgeID he = kInvalidHyperedge; size_t hash = kEdgeHashSeed; size_t size = std::numeric_limits::max(); bool valid = false; }; - private: +private: /** * Represents a hypernode of the hypergraph and contains all information * associated with a vertex. */ - class Hypernode { - public: + class Hypernode + { + public: using IDType = HypernodeID; Hypernode() : - _weight(1), - _community_id(0), - _batch_idx(std::numeric_limits::max()), - _valid(false) { } + _weight(1), _community_id(0), _batch_idx(std::numeric_limits::max()), + _valid(false) + { + } Hypernode(const bool valid) : - _weight(1), - _community_id(0), - _batch_idx(std::numeric_limits::max()), - _valid(valid) { } - - bool isDisabled() const { - return _valid == false; + _weight(1), _community_id(0), _batch_idx(std::numeric_limits::max()), + _valid(valid) + { } - void enable() { + bool isDisabled() const { return _valid == false; } + + void enable() + { ASSERT(isDisabled()); _valid = true; } - void disable() { + void disable() + { ASSERT(!isDisabled()); _valid = false; } - HyperedgeWeight weight() const { - return _weight; - } + HyperedgeWeight weight() const { return _weight; } - void setWeight(HyperedgeWeight weight) { + void setWeight(HyperedgeWeight weight) + { ASSERT(!isDisabled()); _weight = weight; } - PartitionID communityID() const { - return _community_id; - } + PartitionID communityID() const { return _community_id; } - void setCommunityID(const PartitionID community_id) { + void setCommunityID(const PartitionID community_id) + { ASSERT(!isDisabled()); _community_id = community_id; } - HypernodeID batchIndex() const { - return _batch_idx; - } + HypernodeID batchIndex() const { return _batch_idx; } - void setBatchIndex(const HypernodeID batch_idx) { - _batch_idx = batch_idx; - } + void setBatchIndex(const HypernodeID batch_idx) { _batch_idx = batch_idx; } - private: + private: // ! Hypernode weight HyperedgeWeight _weight; // ! Community id @@ -159,104 +156,99 @@ class DynamicHypergraph { * Represents a hyperedge of the hypergraph and contains all information * associated with a net (except connectivity information). */ - class Hyperedge { - public: + class Hyperedge + { + public: using IDType = HyperedgeID; - Hyperedge() : - _begin(0), - _size(0), - _weight(1), - _hash(kEdgeHashSeed), - _valid(false) { } + Hyperedge() : _begin(0), _size(0), _weight(1), _hash(kEdgeHashSeed), _valid(false) {} // Sentinel Constructor Hyperedge(const size_t begin) : - _begin(begin), - _size(0), - _weight(1), - _hash(kEdgeHashSeed), - _valid(false) { } + _begin(begin), _size(0), _weight(1), _hash(kEdgeHashSeed), _valid(false) + { + } // ! Disables the hypernode/hyperedge. Disable hypernodes/hyperedges will be skipped // ! when iterating over the set of all nodes/edges. - void disable() { + void disable() + { ASSERT(!isDisabled()); _valid = false; } - void enable() { + void enable() + { ASSERT(isDisabled()); _valid = true; } - bool isDisabled() const { - return _valid == false; - } + bool isDisabled() const { return _valid == false; } // ! Returns the index of the first element in _incidence_array - size_t firstEntry() const { - return _begin; - } + size_t firstEntry() const { return _begin; } // ! Sets the index of the first element in _incidence_array to begin - void setFirstEntry(size_t begin) { + void setFirstEntry(size_t begin) + { ASSERT(!isDisabled()); _begin = begin; } // ! Returns the index of the first element in _incidence_array - size_t firstInvalidEntry() const { - return _begin + _size; - } + size_t firstInvalidEntry() const { return _begin + _size; } - size_t size() const { + size_t size() const + { ASSERT(!isDisabled()); return _size; } - void setSize(size_t size) { + void setSize(size_t size) + { ASSERT(!isDisabled()); _size = size; } - void incrementSize() { + void incrementSize() + { ASSERT(!isDisabled()); ++_size; } - void decrementSize() { + void decrementSize() + { ASSERT(!isDisabled()); --_size; } - HyperedgeWeight weight() const { + HyperedgeWeight weight() const + { ASSERT(!isDisabled()); return _weight; } - void setWeight(HyperedgeWeight weight) { + void setWeight(HyperedgeWeight weight) + { ASSERT(!isDisabled()); _weight = weight; } - size_t& hash() { - return _hash; - } + size_t &hash() { return _hash; } - size_t hash() const { - return _hash; - } + size_t hash() const { return _hash; } - bool operator== (const Hyperedge& rhs) const { + bool operator==(const Hyperedge &rhs) const + { return _begin == rhs._begin && _size == rhs._size && _weight == rhs._weight; } - bool operator!= (const Hyperedge& rhs) const { + bool operator!=(const Hyperedge &rhs) const + { return _begin != rhs._begin || _size != rhs._size || _weight != rhs._weight; } - private: + private: // ! Index of the first element in _incidence_array size_t _begin; // ! Number of pins @@ -287,13 +279,14 @@ class DynamicHypergraph { * */ template - class HypergraphElementIterator { - public: + class HypergraphElementIterator + { + public: using IDType = typename ElementType::IDType; using iterator_category = std::forward_iterator_tag; using value_type = IDType; - using reference = IDType&; - using pointer = const IDType*; + using reference = IDType &; + using pointer = const IDType *; using difference_type = std::ptrdiff_t; /*! @@ -307,58 +300,60 @@ class DynamicHypergraph { * \param id The index of the element the pointer points to * \param max_id The maximum index allowed */ - HypergraphElementIterator(const ElementType* start_element, IDType id, IDType max_id) : - _id(id), - _max_id(max_id), - _element(start_element) { - if (_id != _max_id && _element->isDisabled()) { - operator++ (); + HypergraphElementIterator(const ElementType *start_element, IDType id, + IDType max_id) : + _id(id), + _max_id(max_id), _element(start_element) + { + if(_id != _max_id && _element->isDisabled()) + { + operator++(); } } // ! Returns the id of the element the iterator currently points to. - IDType operator* () const { - return _id; - } + IDType operator*() const { return _id; } // ! Prefix increment. The iterator advances to the next valid element. - HypergraphElementIterator & operator++ () { + HypergraphElementIterator &operator++() + { ASSERT(_id < _max_id); - do { + do + { ++_id; ++_element; - } while (_id < _max_id && _element->isDisabled()); + } while(_id < _max_id && _element->isDisabled()); return *this; } // ! Postfix increment. The iterator advances to the next valid element. - HypergraphElementIterator operator++ (int) { + HypergraphElementIterator operator++(int) + { HypergraphElementIterator copy = *this; - operator++ (); + operator++(); return copy; } - bool operator!= (const HypergraphElementIterator& rhs) { - return _id != rhs._id; - } + bool operator!=(const HypergraphElementIterator &rhs) { return _id != rhs._id; } - bool operator== (const HypergraphElementIterator& rhs) { - return _id == rhs._id; - } + bool operator==(const HypergraphElementIterator &rhs) { return _id == rhs._id; } - private: + private: // Handle to the HypergraphElement the iterator currently points to IDType _id = 0; // Maximum allowed index IDType _max_id = 0; // HypergraphElement the iterator currently points to - const ElementType* _element = nullptr; + const ElementType *_element = nullptr; }; - static_assert(std::is_trivially_copyable::value, "Hypernode is not trivially copyable"); - static_assert(std::is_trivially_copyable::value, "Hyperedge is not trivially copyable"); + static_assert(std::is_trivially_copyable::value, + "Hypernode is not trivially copyable"); + static_assert(std::is_trivially_copyable::value, + "Hyperedge is not trivially copyable"); - enum class ContractionResult : uint8_t { + enum class ContractionResult : uint8_t + { CONTRACTED = 0, PENDING_CONTRACTIONS = 1, WEIGHT_LIMIT_REACHED = 2, @@ -368,24 +363,32 @@ class DynamicHypergraph { using ContractionInterval = typename ContractionTree::Interval; using ChildIterator = typename ContractionTree::ChildIterator; - struct PQBatchUncontractionElement { + struct PQBatchUncontractionElement + { int64_t _objective; std::pair _iterator; }; - struct PQElementComparator { - bool operator()(const PQBatchUncontractionElement& lhs, const PQBatchUncontractionElement& rhs){ - return lhs._objective < rhs._objective; + struct PQElementComparator + { + bool operator()(const PQBatchUncontractionElement &lhs, + const PQBatchUncontractionElement &rhs) + { + return lhs._objective < rhs._objective; } }; using IncidenceArray = Array; - using OwnershipVector = parallel::scalable_vector>; - using ThreadLocalHyperedgeVector = tbb::enumerable_thread_specific>; - using ThreadLocalBitset = tbb::enumerable_thread_specific>; - using ThreadLocalBitvector = tbb::enumerable_thread_specific>; - - public: + using OwnershipVector = + parallel::scalable_vector >; + using ThreadLocalHyperedgeVector = + tbb::enumerable_thread_specific >; + using ThreadLocalBitset = + tbb::enumerable_thread_specific >; + using ThreadLocalBitvector = + tbb::enumerable_thread_specific >; + +public: static constexpr bool is_graph = false; static constexpr bool is_static_hypergraph = false; static constexpr bool is_partitioned = false; @@ -404,67 +407,54 @@ class DynamicHypergraph { // ! Iterator to iterate over the incident nets of a hypernode using IncidentNetsIterator = typename IncidentNetArray::const_iterator; - struct ParallelHyperedge { + struct ParallelHyperedge + { HyperedgeID removed_hyperedge; HyperedgeID representative; }; explicit DynamicHypergraph() : - _num_hypernodes(0), - _num_removed_hypernodes(0), - _removed_degree_zero_hn_weight(0), - _num_hyperedges(0), - _num_removed_hyperedges(0), - _max_edge_size(0), - _num_pins(0), - _total_degree(0), - _total_weight(0), - _version(0), - _contraction_index(0), - _hypernodes(), - _contraction_tree(), - _incident_nets(), - _acquired_hns(), - _hyperedges(), - _incidence_array(), - _acquired_hes(), - _hes_to_resize_flag_array(), - _failed_hyperedge_contractions(), - _he_bitset(), - _removable_single_pin_and_parallel_nets(), - _fixed_vertices() { } - - DynamicHypergraph(const DynamicHypergraph&) = delete; - DynamicHypergraph & operator= (const DynamicHypergraph &) = delete; - - DynamicHypergraph(DynamicHypergraph&& other) : - _num_hypernodes(other._num_hypernodes), - _num_removed_hypernodes(other._num_removed_hypernodes), - _removed_degree_zero_hn_weight(other._removed_degree_zero_hn_weight), - _num_hyperedges(other._num_hyperedges), - _num_removed_hyperedges(other._num_removed_hyperedges), - _max_edge_size(other._max_edge_size), - _num_pins(other._num_pins), - _total_degree(other._total_degree), - _total_weight(other._total_weight), - _version(other._version), - _contraction_index(0), - _hypernodes(std::move(other._hypernodes)), - _contraction_tree(std::move(other._contraction_tree)), - _incident_nets(std::move(other._incident_nets)), - _acquired_hns(std::move(other._acquired_hns)), - _hyperedges(std::move(other._hyperedges)), - _incidence_array(std::move(other._incidence_array)), - _acquired_hes(std::move(other._acquired_hes)), - _hes_to_resize_flag_array(std::move(other._hes_to_resize_flag_array)), - _failed_hyperedge_contractions(std::move(other._failed_hyperedge_contractions)), - _he_bitset(std::move(other._he_bitset)), - _removable_single_pin_and_parallel_nets(std::move(other._removable_single_pin_and_parallel_nets)), - _fixed_vertices(std::move(other._fixed_vertices)) { + _num_hypernodes(0), _num_removed_hypernodes(0), _removed_degree_zero_hn_weight(0), + _num_hyperedges(0), _num_removed_hyperedges(0), _max_edge_size(0), _num_pins(0), + _total_degree(0), _total_weight(0), _version(0), _contraction_index(0), + _hypernodes(), _contraction_tree(), _incident_nets(), _acquired_hns(), + _hyperedges(), _incidence_array(), _acquired_hes(), _hes_to_resize_flag_array(), + _failed_hyperedge_contractions(), _he_bitset(), + _removable_single_pin_and_parallel_nets(), _fixed_vertices() + { + } + + DynamicHypergraph(const DynamicHypergraph &) = delete; + DynamicHypergraph &operator=(const DynamicHypergraph &) = delete; + + DynamicHypergraph(DynamicHypergraph &&other) : + _num_hypernodes(other._num_hypernodes), + _num_removed_hypernodes(other._num_removed_hypernodes), + _removed_degree_zero_hn_weight(other._removed_degree_zero_hn_weight), + _num_hyperedges(other._num_hyperedges), + _num_removed_hyperedges(other._num_removed_hyperedges), + _max_edge_size(other._max_edge_size), _num_pins(other._num_pins), + _total_degree(other._total_degree), _total_weight(other._total_weight), + _version(other._version), _contraction_index(0), + _hypernodes(std::move(other._hypernodes)), + _contraction_tree(std::move(other._contraction_tree)), + _incident_nets(std::move(other._incident_nets)), + _acquired_hns(std::move(other._acquired_hns)), + _hyperedges(std::move(other._hyperedges)), + _incidence_array(std::move(other._incidence_array)), + _acquired_hes(std::move(other._acquired_hes)), + _hes_to_resize_flag_array(std::move(other._hes_to_resize_flag_array)), + _failed_hyperedge_contractions(std::move(other._failed_hyperedge_contractions)), + _he_bitset(std::move(other._he_bitset)), + _removable_single_pin_and_parallel_nets( + std::move(other._removable_single_pin_and_parallel_nets)), + _fixed_vertices(std::move(other._fixed_vertices)) + { _fixed_vertices.setHypergraph(this); } - DynamicHypergraph & operator= (DynamicHypergraph&& other) { + DynamicHypergraph &operator=(DynamicHypergraph &&other) + { _num_hypernodes = other._num_hypernodes; _num_removed_hypernodes = other._num_removed_hypernodes; _num_hyperedges = other._num_hyperedges; @@ -486,62 +476,49 @@ class DynamicHypergraph { _hes_to_resize_flag_array = std::move(other._hes_to_resize_flag_array); _failed_hyperedge_contractions = std::move(other._failed_hyperedge_contractions); _he_bitset = std::move(other._he_bitset); - _removable_single_pin_and_parallel_nets = std::move(other._removable_single_pin_and_parallel_nets); + _removable_single_pin_and_parallel_nets = + std::move(other._removable_single_pin_and_parallel_nets); _fixed_vertices = std::move(other._fixed_vertices); _fixed_vertices.setHypergraph(this); return *this; } - ~DynamicHypergraph() { - freeInternalData(); - } + ~DynamicHypergraph() { freeInternalData(); } // ####################### General Hypergraph Stats ####################### // ! Initial number of hypernodes - HypernodeID initialNumNodes() const { - return _num_hypernodes; - } + HypernodeID initialNumNodes() const { return _num_hypernodes; } // ! Number of removed hypernodes - HypernodeID numRemovedHypernodes() const { - return _num_removed_hypernodes; - } + HypernodeID numRemovedHypernodes() const { return _num_removed_hypernodes; } // ! Weight of removed degree zero vertics - HypernodeWeight weightOfRemovedDegreeZeroVertices() const { + HypernodeWeight weightOfRemovedDegreeZeroVertices() const + { return _removed_degree_zero_hn_weight; } // ! Initial number of hyperedges - HyperedgeID initialNumEdges() const { - return _num_hyperedges; - } + HyperedgeID initialNumEdges() const { return _num_hyperedges; } // ! Number of removed hyperedges - HyperedgeID numRemovedHyperedges() const { - return _num_removed_hyperedges; - } + HyperedgeID numRemovedHyperedges() const { return _num_removed_hyperedges; } // ! Set the number of removed hyperedges - void setNumRemovedHyperedges(const HyperedgeID num_removed_hyperedges) { + void setNumRemovedHyperedges(const HyperedgeID num_removed_hyperedges) + { _num_removed_hyperedges = num_removed_hyperedges; } // ! Initial number of pins - HypernodeID initialNumPins() const { - return _num_pins; - } + HypernodeID initialNumPins() const { return _num_pins; } // ! Initial sum of the degree of all vertices - HypernodeID initialTotalVertexDegree() const { - return _total_degree; - } + HypernodeID initialTotalVertexDegree() const { return _total_degree; } // ! Total weight of hypergraph - HypernodeWeight totalWeight() const { - return _total_weight; - } + HypernodeWeight totalWeight() const { return _total_weight; } // ! Recomputes the total weight of the hypergraph (parallel) void updateTotalWeight(parallel_tag_t); @@ -553,17 +530,20 @@ class DynamicHypergraph { // ! Iterates in parallel over all active nodes and calls function f // ! for each vertex - template - void doParallelForAllNodes(const F& f) { - static_cast(*this).doParallelForAllNodes(f); + template + void doParallelForAllNodes(const F &f) + { + static_cast(*this).doParallelForAllNodes(f); } // ! Iterates in parallel over all active nodes and calls function f // ! for each vertex - template - void doParallelForAllNodes(const F& f) const { - tbb::parallel_for(ID(0), _num_hypernodes, [&](const HypernodeID& hn) { - if ( nodeIsEnabled(hn) ) { + template + void doParallelForAllNodes(const F &f) const + { + tbb::parallel_for(ID(0), _num_hypernodes, [&](const HypernodeID &hn) { + if(nodeIsEnabled(hn)) + { f(hn); } }); @@ -571,101 +551,110 @@ class DynamicHypergraph { // ! Iterates in parallel over all active edges and calls function f // ! for each net - template - void doParallelForAllEdges(const F& f) { - static_cast(*this).doParallelForAllEdges(f); + template + void doParallelForAllEdges(const F &f) + { + static_cast(*this).doParallelForAllEdges(f); } // ! Iterates in parallel over all active edges and calls function f // ! for each net - template - void doParallelForAllEdges(const F& f) const { - tbb::parallel_for(ID(0), _num_hyperedges, [&](const HyperedgeID& he) { - if ( edgeIsEnabled(he) ) { + template + void doParallelForAllEdges(const F &f) const + { + tbb::parallel_for(ID(0), _num_hyperedges, [&](const HyperedgeID &he) { + if(edgeIsEnabled(he)) + { f(he); } }); } // ! Returns a range of the active nodes of the hypergraph - IteratorRange nodes() const { + IteratorRange nodes() const + { return IteratorRange( - HypernodeIterator(_hypernodes.data(), ID(0), _num_hypernodes), - HypernodeIterator(_hypernodes.data() + _num_hypernodes, _num_hypernodes, _num_hypernodes)); + HypernodeIterator(_hypernodes.data(), ID(0), _num_hypernodes), + HypernodeIterator(_hypernodes.data() + _num_hypernodes, _num_hypernodes, + _num_hypernodes)); } // ! Returns a range of the active edges of the hypergraph - IteratorRange edges() const { + IteratorRange edges() const + { return IteratorRange( - HyperedgeIterator(_hyperedges.data(), ID(0), _num_hyperedges), - HyperedgeIterator(_hyperedges.data() + _num_hyperedges, _num_hyperedges, _num_hyperedges)); + HyperedgeIterator(_hyperedges.data(), ID(0), _num_hyperedges), + HyperedgeIterator(_hyperedges.data() + _num_hyperedges, _num_hyperedges, + _num_hyperedges)); } // ! Returns a range to loop over the incident nets of hypernode u. - IteratorRange incidentEdges(const HypernodeID u) const { + IteratorRange incidentEdges(const HypernodeID u) const + { ASSERT(u < _num_hypernodes, "Hypernode" << u << "does not exist"); return _incident_nets.incidentEdges(u); } // ! Returns a range to loop over the pins of hyperedge e. - IteratorRange pins(const HyperedgeID e) const { + IteratorRange pins(const HyperedgeID e) const + { ASSERT(!hyperedge(e).isDisabled(), "Hyperedge" << e << "is disabled"); - const Hyperedge& he = hyperedge(e); - return IteratorRange( - _incidence_array.cbegin() + he.firstEntry(), - _incidence_array.cbegin() + he.firstInvalidEntry()); + const Hyperedge &he = hyperedge(e); + return IteratorRange(_incidence_array.cbegin() + he.firstEntry(), + _incidence_array.cbegin() + + he.firstInvalidEntry()); } // ####################### Hypernode Information ####################### // ! Weight of a vertex - HypernodeWeight nodeWeight(const HypernodeID u) const { + HypernodeWeight nodeWeight(const HypernodeID u) const + { ASSERT(u < _num_hypernodes, "Hypernode" << u << "does not exist"); return hypernode(u).weight(); } // ! Sets the weight of a vertex - void setNodeWeight(const HypernodeID u, const HypernodeWeight weight) { + void setNodeWeight(const HypernodeID u, const HypernodeWeight weight) + { ASSERT(!hypernode(u).isDisabled(), "Hypernode" << u << "is disabled"); return hypernode(u).setWeight(weight); } // ! Degree of a hypernode - HyperedgeID nodeDegree(const HypernodeID u) const { + HyperedgeID nodeDegree(const HypernodeID u) const + { ASSERT(u < _num_hypernodes, "Hypernode" << u << "does not exist"); return _incident_nets.nodeDegree(u); } // ! Returns, whether a hypernode is enabled or not - bool nodeIsEnabled(const HypernodeID u) const { - return !hypernode(u).isDisabled(); - } + bool nodeIsEnabled(const HypernodeID u) const { return !hypernode(u).isDisabled(); } // ! Enables a hypernode (must be disabled before) - void enableHypernode(const HypernodeID u) { - hypernode(u).enable(); - } + void enableHypernode(const HypernodeID u) { hypernode(u).enable(); } // ! Disables a hypernode (must be enabled before) - void disableHypernode(const HypernodeID u) { - hypernode(u).disable(); - } + void disableHypernode(const HypernodeID u) { hypernode(u).disable(); } // ! Removes a hypernode (must be enabled before) - void removeHypernode(const HypernodeID u) { + void removeHypernode(const HypernodeID u) + { hypernode(u).disable(); ++_num_removed_hypernodes; } // ! Removes a degree zero hypernode - void removeDegreeZeroHypernode(const HypernodeID u) { + void removeDegreeZeroHypernode(const HypernodeID u) + { ASSERT(nodeDegree(u) == 0); removeHypernode(u); _removed_degree_zero_hn_weight += nodeWeight(u); } // ! Restores a degree zero hypernode - void restoreDegreeZeroHypernode(const HypernodeID u) { + void restoreDegreeZeroHypernode(const HypernodeID u) + { hypernode(u).enable(); ASSERT(nodeDegree(u) == 0); _removed_degree_zero_hn_weight -= nodeWeight(u); @@ -674,53 +663,50 @@ class DynamicHypergraph { // ####################### Hyperedge Information ####################### // ! Weight of a hyperedge - HypernodeWeight edgeWeight(const HyperedgeID e) const { + HypernodeWeight edgeWeight(const HyperedgeID e) const + { ASSERT(!hyperedge(e).isDisabled(), "Hyperedge" << e << "is disabled"); return hyperedge(e).weight(); } // ! Sets the weight of a hyperedge - void setEdgeWeight(const HyperedgeID e, const HyperedgeWeight weight) { + void setEdgeWeight(const HyperedgeID e, const HyperedgeWeight weight) + { ASSERT(!hyperedge(e).isDisabled(), "Hyperedge" << e << "is disabled"); return hyperedge(e).setWeight(weight); } // ! Number of pins of a hyperedge - HypernodeID edgeSize(const HyperedgeID e) const { + HypernodeID edgeSize(const HyperedgeID e) const + { ASSERT(!hyperedge(e).isDisabled(), "Hyperedge" << e << "is disabled"); return hyperedge(e).size(); } // ! Maximum size of a hyperedge - HypernodeID maxEdgeSize() const { - return _max_edge_size; - } + HypernodeID maxEdgeSize() const { return _max_edge_size; } // ! Hash value defined over the pins of a hyperedge - size_t edgeHash(const HyperedgeID e) const { + size_t edgeHash(const HyperedgeID e) const + { ASSERT(!hyperedge(e).isDisabled(), "Hyperedge" << e << "is disabled"); return hyperedge(e).hash(); } // ! Returns, whether a hyperedge is enabled or not - bool edgeIsEnabled(const HyperedgeID e) const { - return !hyperedge(e).isDisabled(); - } + bool edgeIsEnabled(const HyperedgeID e) const { return !hyperedge(e).isDisabled(); } // ! Enables a hyperedge (must be disabled before) - void enableHyperedge(const HyperedgeID e) { - hyperedge(e).enable(); - } + void enableHyperedge(const HyperedgeID e) { hyperedge(e).enable(); } // ! Disabled a hyperedge (must be enabled before) - void disableHyperedge(const HyperedgeID e) { - hyperedge(e).disable(); - } + void disableHyperedge(const HyperedgeID e) { hyperedge(e).disable(); } // ####################### Community Information ####################### // ! Community id which hypernode u is assigned to - PartitionID communityID(const HypernodeID u) const { + PartitionID communityID(const HypernodeID u) const + { ASSERT(u < _num_hypernodes, "Hypernode" << u << "does not exist"); return hypernode(u).communityID(); } @@ -728,87 +714,94 @@ class DynamicHypergraph { // ! Assign a community to a hypernode // ! Note, in order to use all community-related functions, initializeCommunities() // ! have to be called after assigning to each vertex a community id - void setCommunityID(const HypernodeID u, const PartitionID community_id) { + void setCommunityID(const HypernodeID u, const PartitionID community_id) + { ASSERT(!hypernode(u).isDisabled(), "Hypernode" << u << "is disabled"); return hypernode(u).setCommunityID(community_id); } // ####################### Fixed Vertex Support ####################### - void addFixedVertexSupport(FixedVertexSupport&& fixed_vertices) { + void addFixedVertexSupport(FixedVertexSupport &&fixed_vertices) + { _fixed_vertices = std::move(fixed_vertices); _fixed_vertices.setHypergraph(this); } - bool hasFixedVertices() const { - return _fixed_vertices.hasFixedVertices(); - } + bool hasFixedVertices() const { return _fixed_vertices.hasFixedVertices(); } - HypernodeWeight totalFixedVertexWeight() const { + HypernodeWeight totalFixedVertexWeight() const + { return _fixed_vertices.totalFixedVertexWeight(); } - HypernodeWeight fixedVertexBlockWeight(const PartitionID block) const { + HypernodeWeight fixedVertexBlockWeight(const PartitionID block) const + { return _fixed_vertices.fixedVertexBlockWeight(block); } - bool isFixed(const HypernodeID hn) const { - return _fixed_vertices.isFixed(hn); - } + bool isFixed(const HypernodeID hn) const { return _fixed_vertices.isFixed(hn); } - PartitionID fixedVertexBlock(const HypernodeID hn) const { + PartitionID fixedVertexBlock(const HypernodeID hn) const + { return _fixed_vertices.fixedVertexBlock(hn); } - void setMaxFixedVertexBlockWeight(const std::vector max_block_weights) { + void setMaxFixedVertexBlockWeight(const std::vector max_block_weights) + { _fixed_vertices.setMaxBlockWeight(max_block_weights); } - const FixedVertexSupport& fixedVertexSupport() const { + const FixedVertexSupport &fixedVertexSupport() const + { return _fixed_vertices; } - FixedVertexSupport copyOfFixedVertexSupport() const { + FixedVertexSupport copyOfFixedVertexSupport() const + { return _fixed_vertices.copy(); } // ####################### Contract / Uncontract ####################### - DynamicHypergraph contract(parallel::scalable_vector&, bool deterministic = false) { + DynamicHypergraph contract(parallel::scalable_vector &, + bool deterministic = false) + { throw NonSupportedOperationException( - "contract(c, id) is not supported in dynamic hypergraph"); + "contract(c, id) is not supported in dynamic hypergraph"); return DynamicHypergraph(); } /**! * Registers a contraction in the hypergraph whereas vertex u is the representative - * of the contraction and v its contraction partner. Several threads can call this function - * in parallel. The function adds the contraction of u and v to a contraction tree that determines - * a parallel execution order and synchronization points for all running contractions. - * The contraction can be executed by calling function contract(v, max_node_weight). + * of the contraction and v its contraction partner. Several threads can call this + * function in parallel. The function adds the contraction of u and v to a contraction + * tree that determines a parallel execution order and synchronization points for all + * running contractions. The contraction can be executed by calling function contract(v, + * max_node_weight). */ bool registerContraction(const HypernodeID u, const HypernodeID v); /**! - * Contracts a previously registered contraction. Representative u of vertex v is looked up - * in the contraction tree and performed if there are no pending contractions in the subtree - * of v and the contractions respects the maximum allowed node weight. If (u,v) is the last - * pending contraction in the subtree of u then the function recursively contracts also - * u (if any contraction is registered). Therefore, function can return several contractions - * or also return an empty contraction vector. + * Contracts a previously registered contraction. Representative u of vertex v is looked + * up in the contraction tree and performed if there are no pending contractions in the + * subtree of v and the contractions respects the maximum allowed node weight. If (u,v) + * is the last pending contraction in the subtree of u then the function recursively + * contracts also u (if any contraction is registered). Therefore, function can return + * several contractions or also return an empty contraction vector. */ - size_t contract(const HypernodeID v, - const HypernodeWeight max_node_weight = std::numeric_limits::max()); + size_t contract(const HypernodeID v, const HypernodeWeight max_node_weight = + std::numeric_limits::max()); /** - * Uncontracts a batch of contractions in parallel. The batches must be uncontracted exactly - * in the order computed by the function createBatchUncontractionHierarchy(...). + * Uncontracts a batch of contractions in parallel. The batches must be uncontracted + * exactly in the order computed by the function createBatchUncontractionHierarchy(...). * The two uncontraction functions are required by the partitioned hypergraph to restore * pin counts and gain cache values. */ - void uncontract(const Batch& batch, - const UncontractionFunction& case_one_func = NOOP_BATCH_FUNC, - const UncontractionFunction& case_two_func = NOOP_BATCH_FUNC); + void uncontract(const Batch &batch, + const UncontractionFunction &case_one_func = NOOP_BATCH_FUNC, + const UncontractionFunction &case_two_func = NOOP_BATCH_FUNC); /** * Computes a batch uncontraction hierarchy. A batch is a vector of mementos @@ -817,15 +810,17 @@ class DynamicHypergraph { * single-pin and parallel net detection. Once we process all batches of a versioned * batch vector, we have to restore all previously removed single-pin and parallel nets * in order to uncontract the next batch vector. We create for each version of the - * hypergraph a seperate batch uncontraction hierarchy (see createBatchUncontractionHierarchyOfVersion(...)) + * hypergraph a seperate batch uncontraction hierarchy (see + * createBatchUncontractionHierarchyOfVersion(...)) */ VersionedBatchVector createBatchUncontractionHierarchy(const size_t batch_size, const bool test = false); // ! Only for testing - VersionedBatchVector createBatchUncontractionHierarchy(ContractionTree&& tree, + VersionedBatchVector createBatchUncontractionHierarchy(ContractionTree &&tree, const size_t batch_size, - const size_t num_versions = 1) { + const size_t num_versions = 1) + { ASSERT(num_versions > 0); _version = num_versions - 1; _contraction_tree = std::move(tree); @@ -833,19 +828,22 @@ class DynamicHypergraph { } // ! Only for testing - HypernodeID contractionTree(const HypernodeID u) const { + HypernodeID contractionTree(const HypernodeID u) const + { ASSERT(!hypernode(u).isDisabled(), "Hypernode" << u << "is disabled"); return _contraction_tree.parent(u); } // ! Only for testing - HypernodeID pendingContractions(const HypernodeID u) const { + HypernodeID pendingContractions(const HypernodeID u) const + { ASSERT(!hypernode(u).isDisabled(), "Hypernode" << u << "is disabled"); return _contraction_tree.pendingContractions(u); } // ! Only for testing - void decrementPendingContractions(const HypernodeID u) { + void decrementPendingContractions(const HypernodeID u) + { ASSERT(!hypernode(u).isDisabled(), "Hypernode" << u << "is disabled"); _contraction_tree.decrementPendingContractions(u); } @@ -853,17 +851,19 @@ class DynamicHypergraph { // ####################### Remove / Restore Hyperedges ####################### /*! - * Removes a hyperedge from the hypergraph. This includes the removal of he from all - * of its pins and to disable the hyperedge. - * - * NOTE, this function is not thread-safe and should only be called in a single-threaded - * setting. - */ - void removeEdge(const HyperedgeID he) { + * Removes a hyperedge from the hypergraph. This includes the removal of he from all + * of its pins and to disable the hyperedge. + * + * NOTE, this function is not thread-safe and should only be called in a single-threaded + * setting. + */ + void removeEdge(const HyperedgeID he) + { ASSERT(edgeIsEnabled(he), "Hyperedge" << he << "is disabled"); - kahypar::ds::FastResetFlagArray<>& he_to_remove = _he_bitset.local(); + kahypar::ds::FastResetFlagArray<> &he_to_remove = _he_bitset.local(); he_to_remove.set(he, true); - for ( const HypernodeID& pin : pins(he) ) { + for(const HypernodeID &pin : pins(he)) + { _incident_nets.removeIncidentNets(pin, he_to_remove); } ++_num_removed_hyperedges; @@ -871,18 +871,19 @@ class DynamicHypergraph { } /*! - * Removes a hyperedge from the hypergraph. This includes the removal of he from all - * of its pins and to disable the hyperedge. Note, in contrast to removeEdge, this function - * removes hyperedge from all its pins in parallel. - * - * NOTE, this function is not thread-safe and should only be called in a single-threaded - * setting. - */ - void removeLargeEdge(const HyperedgeID he) { + * Removes a hyperedge from the hypergraph. This includes the removal of he from all + * of its pins and to disable the hyperedge. Note, in contrast to removeEdge, this + * function removes hyperedge from all its pins in parallel. + * + * NOTE, this function is not thread-safe and should only be called in a single-threaded + * setting. + */ + void removeLargeEdge(const HyperedgeID he) + { ASSERT(edgeIsEnabled(he), "Hyperedge" << he << "is disabled"); const size_t incidence_array_start = hyperedge(he).firstEntry(); const size_t incidence_array_end = hyperedge(he).firstInvalidEntry(); - kahypar::ds::FastResetFlagArray<>& he_to_remove = _he_bitset.local(); + kahypar::ds::FastResetFlagArray<> &he_to_remove = _he_bitset.local(); he_to_remove.set(he, true); tbb::parallel_for(incidence_array_start, incidence_array_end, [&](const size_t pos) { const HypernodeID pin = _incidence_array[pos]; @@ -894,7 +895,8 @@ class DynamicHypergraph { /*! * Restores a large hyperedge previously removed from the hypergraph. */ - void restoreLargeEdge(const HyperedgeID& he) { + void restoreLargeEdge(const HyperedgeID &he) + { ASSERT(!edgeIsEnabled(he), "Hyperedge" << he << "is enabled"); enableHyperedge(he); const size_t incidence_array_start = hyperedge(he).firstEntry(); @@ -913,20 +915,21 @@ class DynamicHypergraph { parallel::scalable_vector removeSinglePinAndParallelHyperedges(); /** - * Restores a previously removed set of singple-pin and parallel hyperedges. Note, that hes_to_restore - * must be exactly the same and given in the reverse order as returned by removeSinglePinAndParallelNets(...). + * Restores a previously removed set of singple-pin and parallel hyperedges. Note, that + * hes_to_restore must be exactly the same and given in the reverse order as returned by + * removeSinglePinAndParallelNets(...). */ - void restoreSinglePinAndParallelNets(const parallel::scalable_vector& hes_to_restore); + void restoreSinglePinAndParallelNets( + const parallel::scalable_vector &hes_to_restore); // ####################### Initialization / Reset Functions ####################### // ! Reset internal community information - void setCommunityIDs(const parallel::scalable_vector& community_ids) { + void setCommunityIDs(const parallel::scalable_vector &community_ids) + { ASSERT(community_ids.size() == UI64(_num_hypernodes)); - doParallelForAllNodes([&](const HypernodeID& hn) { - hypernode(hn).setCommunityID(community_ids[hn]); - }); - + doParallelForAllNodes( + [&](const HypernodeID &hn) { hypernode(hn).setCommunityID(community_ids[hn]); }); } // ####################### Copy ####################### @@ -938,77 +941,87 @@ class DynamicHypergraph { DynamicHypergraph copy() const; // ! Reset internal data structure - void reset() { + void reset() + { _contraction_tree.reset(); _incident_nets.reset(); _version = 0; } // ! Free internal data in parallel - void freeInternalData() { + void freeInternalData() + { _num_hypernodes = 0; _num_hyperedges = 0; } - void freeTmpContractionBuffer() { + void freeTmpContractionBuffer() + { throw NonSupportedOperationException( - "freeTmpContractionBuffer() is not supported in dynamic hypergraph"); + "freeTmpContractionBuffer() is not supported in dynamic hypergraph"); } - void memoryConsumption(utils::MemoryTreeNode* parent) const; + void memoryConsumption(utils::MemoryTreeNode *parent) const; // ! Only for testing bool verifyIncidenceArrayAndIncidentNets(); - private: +private: friend class DynamicHypergraphFactory; - template + template friend class CommunitySupport; - template + template friend class PartitionedHypergraph; // ####################### Acquiring / Releasing Ownership ####################### - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE void acquireHypernode(const HypernodeID u) { + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE void acquireHypernode(const HypernodeID u) + { ASSERT(u < _num_hypernodes, "Hypernode" << u << "does not exist"); bool expected = false; bool desired = true; - while ( !_acquired_hns[u].compare_exchange_strong(expected, desired) ) { + while(!_acquired_hns[u].compare_exchange_strong(expected, desired)) + { expected = false; } } - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE bool tryAcquireHypernode(const HypernodeID u) { + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE bool tryAcquireHypernode(const HypernodeID u) + { ASSERT(u < _num_hypernodes, "Hypernode" << u << "does not exist"); bool expected = false; bool desired = true; return _acquired_hns[u].compare_exchange_strong(expected, desired); } - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE void releaseHypernode(const HypernodeID u) { + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE void releaseHypernode(const HypernodeID u) + { ASSERT(u < _num_hypernodes, "Hypernode" << u << "does not exist"); ASSERT(_acquired_hns[u], "Hypernode" << u << "is not acquired!"); _acquired_hns[u] = false; } - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE void acquireHyperedge(const HyperedgeID e) { + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE void acquireHyperedge(const HyperedgeID e) + { ASSERT(e < _num_hyperedges, "Hyperedge" << e << "does not exist"); bool expected = false; bool desired = true; - while ( !_acquired_hes[e].compare_exchange_strong(expected, desired) ) { + while(!_acquired_hes[e].compare_exchange_strong(expected, desired)) + { expected = false; } } - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE bool tryAcquireHyperedge(const HyperedgeID e) { + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE bool tryAcquireHyperedge(const HyperedgeID e) + { ASSERT(e < _num_hyperedges, "Hyperedge" << e << "does not exist"); bool expected = false; bool desired = true; return _acquired_hes[e].compare_exchange_strong(expected, desired); } - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE void releaseHyperedge(const HyperedgeID e) { + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE void releaseHyperedge(const HyperedgeID e) + { ASSERT(e < _num_hyperedges, "Hyperedge" << e << "does not exist"); ASSERT(_acquired_hes[e], "Hyperedge" << e << "is not acquired!"); _acquired_hes[e] = false; @@ -1017,32 +1030,39 @@ class DynamicHypergraph { // ####################### Hypernode Information ####################### // ! Accessor for hypernode-related information - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE const Hypernode& hypernode(const HypernodeID u) const { + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE const Hypernode &hypernode(const HypernodeID u) const + { ASSERT(u <= _num_hypernodes, "Hypernode" << u << "does not exist"); return _hypernodes[u]; } // ! To avoid code duplication we implement non-const version in terms of const version - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE Hypernode& hypernode(const HypernodeID u) { - return const_cast(static_cast(*this).hypernode(u)); + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE Hypernode &hypernode(const HypernodeID u) + { + return const_cast( + static_cast(*this).hypernode(u)); } - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE IteratorRange incident_nets_of(const HypernodeID u, - const size_t pos = 0) const { + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE IteratorRange + incident_nets_of(const HypernodeID u, const size_t pos = 0) const + { return _incident_nets.incidentEdges(u, pos); } // ####################### Hyperedge Information ####################### // ! Accessor for hyperedge-related information - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE const Hyperedge& hyperedge(const HyperedgeID e) const { + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE const Hyperedge &hyperedge(const HyperedgeID e) const + { ASSERT(e <= _num_hyperedges, "Hyperedge" << e << "does not exist"); return _hyperedges[e]; } // ! To avoid code duplication we implement non-const version in terms of const version - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE Hyperedge& hyperedge(const HyperedgeID e) { - return const_cast(static_cast(*this).hyperedge(e)); + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE Hyperedge &hyperedge(const HyperedgeID e) + { + return const_cast( + static_cast(*this).hyperedge(e)); } // ####################### Contract / Uncontract ####################### @@ -1056,58 +1076,62 @@ class DynamicHypergraph { * greater than the maximum allowed node weight) or PENDING_CONTRACTIONS (in case * there are some unfinished contractions in the subtree of v) is returned. */ - ContractionResult contract(const HypernodeID u, - const HypernodeID v, + ContractionResult contract(const HypernodeID u, const HypernodeID v, const HypernodeWeight max_node_weight); // ! Performs the contraction of (u,v) inside hyperedge he - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE void contractHyperedge(const HypernodeID u, const HypernodeID v, const HyperedgeID he, - kahypar::ds::FastResetFlagArray<>& shared_incident_nets_u_and_v); + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE void + contractHyperedge(const HypernodeID u, const HypernodeID v, const HyperedgeID he, + kahypar::ds::FastResetFlagArray<> &shared_incident_nets_u_and_v); // ! Restore the size of the hyperedge to the size before the batch with // ! index batch_index was contracted. After each size increment, we call case_one_func // ! that triggers updates in the partitioned hypergraph and gain cache - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE void restoreHyperedgeSizeForBatch(const HyperedgeID he, - const HypernodeID batch_index, - const UncontractionFunction& case_one_func); + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE void + restoreHyperedgeSizeForBatch(const HyperedgeID he, const HypernodeID batch_index, + const UncontractionFunction &case_one_func); // ! Search for the position of pin u in hyperedge he in the incidence array - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE size_t findPositionOfPinInIncidenceArray(const HypernodeID u, - const HyperedgeID he); + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE size_t + findPositionOfPinInIncidenceArray(const HypernodeID u, const HyperedgeID he); /** * Computes a batch uncontraction hierarchy for a specific version of the hypergraph. * A batch is a vector of mementos (uncontractions) that are uncontracted in parallel. * Each time we perform single-pin and parallel net detection we create a new version of * the hypergraph. - * A batch of uncontractions that is uncontracted in parallel must satisfy two conditions: - * 1.) All representatives must be active vertices of the hypergraph - * 2.) For a specific representative its contraction partners must be uncontracted in reverse - * contraction order. Meaning that a contraction (u, v) that happens before a contraction (u, w) - * must be uncontracted in a batch that is part of the same batch or a batch uncontracted after the - * batch which (u, w) is part of. This ensures that a parallel batch uncontraction does not - * increase the objective function. - * We use the contraction tree to create a batch uncontraction order. Note, uncontractions from - * different subtrees can be interleaved abitrary. To ensure condition 1.) we peform a BFS starting - * from all roots of the contraction tree. Each BFS level induces a new batch. Since we contract - * vertices in parallel its not possible to create a relative order of the contractions which is - * neccassary for condition 2.). However, during a contraction we store a start and end "timepoint" - * of a contraction. If two contractions time intervals do not intersect, we can determine - * which contraction happens strictly before the other. If they intersect, it is not possible to - * give a relative order. To ensure condition 2.) we sort the childs of a vertex in the contraction tree - * after its time intervals. Once we add a uncontraction (u,v) to a batch, we also add all uncontractions - * (u,w) to the batch which intersect the time interval of (u,v). To merge uncontractions of different - * subtrees in a batch, we insert all eligble uncontractions into a max priority queue with the subtree - * size of the contraction partner as key. We insert uncontractions into the current batch as long - * as the maximum batch size is not reached or the PQ is empty. Once the batch reaches its maximum - * batch size, we create a new empty batch. If the PQ is empty, we replace it with the PQ of the next - * BFS level. With this approach heavy vertices are uncontracted earlier (subtree size in the PQ as key = weight of - * a vertex for an unweighted hypergraph) such that average node weight of the hypergraph decreases faster and - * local searches are more effective in early stages of the uncontraction hierarchy where hyperedge sizes are - * usually smaller than on the original hypergraph. + * A batch of uncontractions that is uncontracted in parallel must satisfy two + * conditions: 1.) All representatives must be active vertices of the hypergraph 2.) For + * a specific representative its contraction partners must be uncontracted in reverse + * contraction order. Meaning that a contraction (u, v) that happens before a + * contraction (u, w) must be uncontracted in a batch that is part of the same batch or + * a batch uncontracted after the batch which (u, w) is part of. This ensures that a + * parallel batch uncontraction does not increase the objective function. We use the + * contraction tree to create a batch uncontraction order. Note, uncontractions from + * different subtrees can be interleaved abitrary. To ensure condition 1.) we peform a + * BFS starting from all roots of the contraction tree. Each BFS level induces a new + * batch. Since we contract vertices in parallel its not possible to create a relative + * order of the contractions which is neccassary for condition 2.). However, during a + * contraction we store a start and end "timepoint" of a contraction. If two + * contractions time intervals do not intersect, we can determine which contraction + * happens strictly before the other. If they intersect, it is not possible to give a + * relative order. To ensure condition 2.) we sort the childs of a vertex in the + * contraction tree after its time intervals. Once we add a uncontraction (u,v) to a + * batch, we also add all uncontractions (u,w) to the batch which intersect the time + * interval of (u,v). To merge uncontractions of different subtrees in a batch, we + * insert all eligble uncontractions into a max priority queue with the subtree size of + * the contraction partner as key. We insert uncontractions into the current batch as + * long as the maximum batch size is not reached or the PQ is empty. Once the batch + * reaches its maximum batch size, we create a new empty batch. If the PQ is empty, we + * replace it with the PQ of the next BFS level. With this approach heavy vertices are + * uncontracted earlier (subtree size in the PQ as key = weight of a vertex for an + * unweighted hypergraph) such that average node weight of the hypergraph decreases + * faster and local searches are more effective in early stages of the uncontraction + * hierarchy where hyperedge sizes are usually smaller than on the original hypergraph. */ - BatchVector createBatchUncontractionHierarchyForVersion(BatchIndexAssigner& batch_assigner, - const size_t version); + BatchVector + createBatchUncontractionHierarchyForVersion(BatchIndexAssigner &batch_assigner, + const size_t version); // ! Number of hypernodes HypernodeID _num_hypernodes; @@ -1142,7 +1166,6 @@ class DynamicHypergraph { // ! Atomic bool vector used to acquire unique ownership of hypernodes OwnershipVector _acquired_hns; - // ! Hyperedges Array _hyperedges; // ! Incident nets of hypernodes diff --git a/mt-kahypar/datastructures/dynamic_hypergraph_factory.cpp b/mt-kahypar/datastructures/dynamic_hypergraph_factory.cpp index 6e31fdea1..50f89ec56 100644 --- a/mt-kahypar/datastructures/dynamic_hypergraph_factory.cpp +++ b/mt-kahypar/datastructures/dynamic_hypergraph_factory.cpp @@ -40,26 +40,23 @@ namespace mt_kahypar { namespace ds { DynamicHypergraph DynamicHypergraphFactory::construct( - const HypernodeID num_hypernodes, - const HyperedgeID num_hyperedges, - const HyperedgeVector& edge_vector, - const HyperedgeWeight* hyperedge_weight, - const HypernodeWeight* hypernode_weight, - const bool) { + const HypernodeID num_hypernodes, const HyperedgeID num_hyperedges, + const HyperedgeVector &edge_vector, const HyperedgeWeight *hyperedge_weight, + const HypernodeWeight *hypernode_weight, const bool) +{ DynamicHypergraph hypergraph; hypergraph._num_hypernodes = num_hypernodes; hypergraph._num_hyperedges = num_hyperedges; - tbb::parallel_invoke([&] { - hypergraph._hypernodes.resize(num_hypernodes); - }, [&] { - hypergraph._hyperedges.resize(num_hyperedges + 1); - }, [&] { - hypergraph._removable_single_pin_and_parallel_nets = - kahypar::ds::FastResetFlagArray<>(num_hyperedges); - }, [&] { - hypergraph._hes_to_resize_flag_array = - ThreadSafeFastResetFlagArray<>(num_hyperedges); - }); + tbb::parallel_invoke([&] { hypergraph._hypernodes.resize(num_hypernodes); }, + [&] { hypergraph._hyperedges.resize(num_hyperedges + 1); }, + [&] { + hypergraph._removable_single_pin_and_parallel_nets = + kahypar::ds::FastResetFlagArray<>(num_hyperedges); + }, + [&] { + hypergraph._hes_to_resize_flag_array = + ThreadSafeFastResetFlagArray<>(num_hyperedges); + }); hypergraph._he_bitset = ThreadLocalBitset(num_hyperedges); ASSERT(edge_vector.size() == num_hyperedges); @@ -69,72 +66,76 @@ DynamicHypergraph DynamicHypergraphFactory::construct( tbb::enumerable_thread_specific local_max_edge_size(UL(0)); tbb::parallel_for(ID(0), num_hyperedges, [&](const size_t pos) { num_pins_per_hyperedge[pos] = edge_vector[pos].size(); - local_max_edge_size.local() = std::max( - local_max_edge_size.local(), edge_vector[pos].size()); + local_max_edge_size.local() = + std::max(local_max_edge_size.local(), edge_vector[pos].size()); }); hypergraph._max_edge_size = local_max_edge_size.combine( - [&](const size_t lhs, const size_t rhs) { - return std::max(lhs, rhs); - }); + [&](const size_t lhs, const size_t rhs) { return std::max(lhs, rhs); }); // Compute prefix sum over the number of pins per hyperedge and the. // The prefix sum is used than as // start position for each hyperedge in the incidence array. parallel::TBBPrefixSum pin_prefix_sum(num_pins_per_hyperedge); - tbb::parallel_scan(tbb::blocked_range( - UL(0), UI64(num_hyperedges)), pin_prefix_sum); + tbb::parallel_scan(tbb::blocked_range(UL(0), UI64(num_hyperedges)), + pin_prefix_sum); hypergraph._num_pins = pin_prefix_sum.total_sum(); hypergraph._total_degree = pin_prefix_sum.total_sum(); hypergraph._incidence_array.resize(hypergraph._num_pins); - tbb::parallel_invoke([&] { - hypergraph._acquired_hes.assign( - num_hyperedges, parallel::IntegralAtomicWrapper(false)); - tbb::parallel_for(ID(0), num_hyperedges, [&](const size_t pos) { - // Setup hyperedges - DynamicHypergraph::Hyperedge& hyperedge = hypergraph._hyperedges[pos]; - hyperedge.enable(); - hyperedge.setFirstEntry(pin_prefix_sum[pos]); - hyperedge.setSize(pin_prefix_sum.value(pos)); - if ( hyperedge_weight ) { - hyperedge.setWeight(hyperedge_weight[pos]); - } - - size_t incidence_array_pos = hyperedge.firstEntry(); - size_t hash = kEdgeHashSeed; - for ( const HypernodeID& pin : edge_vector[pos] ) { - ASSERT(incidence_array_pos < hyperedge.firstInvalidEntry()); - ASSERT(pin < num_hypernodes); - // Compute hash of hyperedge - hash += kahypar::math::hash(pin); - // Add pin to incidence array - hypergraph._incidence_array[incidence_array_pos++] = pin; - } - hyperedge.hash() = hash; - }); - // Sentinel - hypergraph._hyperedges[num_hyperedges].enable(); - hypergraph._hyperedges[num_hyperedges].setFirstEntry(hypergraph._num_pins); - }, [&] { - tbb::parallel_invoke([&] { - hypergraph._acquired_hns.assign( - num_hypernodes, parallel::IntegralAtomicWrapper(false)); - }, [&] { - hypergraph._contraction_tree.initialize(num_hypernodes); - }); - tbb::parallel_for(ID(0), num_hypernodes, [&](const HypernodeID hn) { - // Setup hypernodes - DynamicHypergraph::Hypernode& hypernode = hypergraph._hypernodes[hn]; - hypernode.enable(); - if ( hypernode_weight ) { - hypernode.setWeight(hypernode_weight[hn]); - } - }); - }, [&] { - // Construct incident net array - hypergraph._incident_nets = IncidentNetArray(num_hypernodes, edge_vector); - }); + tbb::parallel_invoke( + [&] { + hypergraph._acquired_hes.assign(num_hyperedges, + parallel::IntegralAtomicWrapper(false)); + tbb::parallel_for(ID(0), num_hyperedges, [&](const size_t pos) { + // Setup hyperedges + DynamicHypergraph::Hyperedge &hyperedge = hypergraph._hyperedges[pos]; + hyperedge.enable(); + hyperedge.setFirstEntry(pin_prefix_sum[pos]); + hyperedge.setSize(pin_prefix_sum.value(pos)); + if(hyperedge_weight) + { + hyperedge.setWeight(hyperedge_weight[pos]); + } + + size_t incidence_array_pos = hyperedge.firstEntry(); + size_t hash = kEdgeHashSeed; + for(const HypernodeID &pin : edge_vector[pos]) + { + ASSERT(incidence_array_pos < hyperedge.firstInvalidEntry()); + ASSERT(pin < num_hypernodes); + // Compute hash of hyperedge + hash += kahypar::math::hash(pin); + // Add pin to incidence array + hypergraph._incidence_array[incidence_array_pos++] = pin; + } + hyperedge.hash() = hash; + }); + // Sentinel + hypergraph._hyperedges[num_hyperedges].enable(); + hypergraph._hyperedges[num_hyperedges].setFirstEntry(hypergraph._num_pins); + }, + [&] { + tbb::parallel_invoke( + [&] { + hypergraph._acquired_hns.assign( + num_hypernodes, parallel::IntegralAtomicWrapper(false)); + }, + [&] { hypergraph._contraction_tree.initialize(num_hypernodes); }); + tbb::parallel_for(ID(0), num_hypernodes, [&](const HypernodeID hn) { + // Setup hypernodes + DynamicHypergraph::Hypernode &hypernode = hypergraph._hypernodes[hn]; + hypernode.enable(); + if(hypernode_weight) + { + hypernode.setWeight(hypernode_weight[hn]); + } + }); + }, + [&] { + // Construct incident net array + hypergraph._incident_nets = IncidentNetArray(num_hypernodes, edge_vector); + }); // Compute total weight of hypergraph hypergraph.updateTotalWeight(parallel_tag_t()); @@ -142,100 +143,113 @@ DynamicHypergraph DynamicHypergraphFactory::construct( } /** - * Compactifies a given hypergraph such that it only contains enabled vertices and hyperedges within - * a consecutive range of IDs. + * Compactifies a given hypergraph such that it only contains enabled vertices and + * hyperedges within a consecutive range of IDs. */ std::pair > -DynamicHypergraphFactory::compactify(const DynamicHypergraph& hypergraph) { +DynamicHypergraphFactory::compactify(const DynamicHypergraph &hypergraph) +{ HypernodeID num_hypernodes = 0; HyperedgeID num_hyperedges = 0; parallel::scalable_vector hn_mapping; parallel::scalable_vector he_mapping; // Computes a mapping for vertices and hyperedges to a consecutive range of IDs // in the compactified hypergraph via a parallel prefix sum - tbb::parallel_invoke([&] { - hn_mapping.assign(hypergraph._num_hypernodes + 1, 0); - hypergraph.doParallelForAllNodes([&](const HypernodeID hn) { - hn_mapping[hn + 1] = ID(1); - }); - - parallel::TBBPrefixSum hn_mapping_prefix_sum(hn_mapping); - tbb::parallel_scan(tbb::blocked_range( - UL(0), hypergraph._num_hypernodes + 1), hn_mapping_prefix_sum); - num_hypernodes = hn_mapping_prefix_sum.total_sum(); - hn_mapping.resize(hypergraph._num_hypernodes); - }, [&] { - he_mapping.assign(hypergraph._num_hyperedges + 1, 0); - hypergraph.doParallelForAllEdges([&](const HyperedgeID& he) { - he_mapping[he + 1] = ID(1); - }); - - parallel::TBBPrefixSum he_mapping_prefix_sum(he_mapping); - tbb::parallel_scan(tbb::blocked_range( - UL(0), hypergraph._num_hyperedges + 1), he_mapping_prefix_sum); - num_hyperedges = he_mapping_prefix_sum.total_sum(); - he_mapping.resize(hypergraph._num_hyperedges); - }); + tbb::parallel_invoke( + [&] { + hn_mapping.assign(hypergraph._num_hypernodes + 1, 0); + hypergraph.doParallelForAllNodes( + [&](const HypernodeID hn) { hn_mapping[hn + 1] = ID(1); }); + + parallel::TBBPrefixSum + hn_mapping_prefix_sum(hn_mapping); + tbb::parallel_scan( + tbb::blocked_range(UL(0), hypergraph._num_hypernodes + 1), + hn_mapping_prefix_sum); + num_hypernodes = hn_mapping_prefix_sum.total_sum(); + hn_mapping.resize(hypergraph._num_hypernodes); + }, + [&] { + he_mapping.assign(hypergraph._num_hyperedges + 1, 0); + hypergraph.doParallelForAllEdges( + [&](const HyperedgeID &he) { he_mapping[he + 1] = ID(1); }); + + parallel::TBBPrefixSum + he_mapping_prefix_sum(he_mapping); + tbb::parallel_scan( + tbb::blocked_range(UL(0), hypergraph._num_hyperedges + 1), + he_mapping_prefix_sum); + num_hyperedges = he_mapping_prefix_sum.total_sum(); + he_mapping.resize(hypergraph._num_hyperedges); + }); // Remap pins of each hyperedge - using HyperedgeVector = parallel::scalable_vector>; + using HyperedgeVector = + parallel::scalable_vector >; HyperedgeVector edge_vector; parallel::scalable_vector hyperedge_weights; parallel::scalable_vector hypernode_weights; - tbb::parallel_invoke([&] { - hypernode_weights.resize(num_hypernodes); - hypergraph.doParallelForAllNodes([&](const HypernodeID hn) { - const HypernodeID mapped_hn = hn_mapping[hn]; - ASSERT(mapped_hn < num_hypernodes); - hypernode_weights[mapped_hn] = hypergraph.nodeWeight(hn); - }); - }, [&] { - edge_vector.resize(num_hyperedges); - hyperedge_weights.resize(num_hyperedges); - hypergraph.doParallelForAllEdges([&](const HyperedgeID he) { - const HyperedgeID mapped_he = he_mapping[he]; - ASSERT(mapped_he < num_hyperedges); - hyperedge_weights[mapped_he] = hypergraph.edgeWeight(he); - for ( const HypernodeID pin : hypergraph.pins(he) ) { - edge_vector[mapped_he].push_back(hn_mapping[pin]); - } - }); - }); + tbb::parallel_invoke( + [&] { + hypernode_weights.resize(num_hypernodes); + hypergraph.doParallelForAllNodes([&](const HypernodeID hn) { + const HypernodeID mapped_hn = hn_mapping[hn]; + ASSERT(mapped_hn < num_hypernodes); + hypernode_weights[mapped_hn] = hypergraph.nodeWeight(hn); + }); + }, + [&] { + edge_vector.resize(num_hyperedges); + hyperedge_weights.resize(num_hyperedges); + hypergraph.doParallelForAllEdges([&](const HyperedgeID he) { + const HyperedgeID mapped_he = he_mapping[he]; + ASSERT(mapped_he < num_hyperedges); + hyperedge_weights[mapped_he] = hypergraph.edgeWeight(he); + for(const HypernodeID pin : hypergraph.pins(he)) + { + edge_vector[mapped_he].push_back(hn_mapping[pin]); + } + }); + }); // Construct compactified hypergraph DynamicHypergraph compactified_hypergraph = DynamicHypergraphFactory::construct( - num_hypernodes, num_hyperedges, edge_vector, hyperedge_weights.data(), hypernode_weights.data()); - compactified_hypergraph._removed_degree_zero_hn_weight = hypergraph._removed_degree_zero_hn_weight; + num_hypernodes, num_hyperedges, edge_vector, hyperedge_weights.data(), + hypernode_weights.data()); + compactified_hypergraph._removed_degree_zero_hn_weight = + hypergraph._removed_degree_zero_hn_weight; compactified_hypergraph._total_weight += hypergraph._removed_degree_zero_hn_weight; - tbb::parallel_invoke([&] { - // Set community ids - hypergraph.doParallelForAllNodes([&](const HypernodeID& hn) { - const HypernodeID mapped_hn = hn_mapping[hn]; - compactified_hypergraph.setCommunityID(mapped_hn, hypergraph.communityID(hn)); - }); - }, [&] { - if ( hypergraph.hasFixedVertices() ) { - // Set fixed vertices - ds::FixedVertexSupport fixed_vertices( - compactified_hypergraph.initialNumNodes(), hypergraph._fixed_vertices.numBlocks()); - fixed_vertices.setHypergraph(&compactified_hypergraph); - hypergraph.doParallelForAllNodes([&](const HypernodeID& hn) { - if ( hypergraph.isFixed(hn) ) { + tbb::parallel_invoke( + [&] { + // Set community ids + hypergraph.doParallelForAllNodes([&](const HypernodeID &hn) { const HypernodeID mapped_hn = hn_mapping[hn]; - fixed_vertices.fixToBlock(mapped_hn, hypergraph.fixedVertexBlock(hn)); + compactified_hypergraph.setCommunityID(mapped_hn, hypergraph.communityID(hn)); + }); + }, + [&] { + if(hypergraph.hasFixedVertices()) + { + // Set fixed vertices + ds::FixedVertexSupport fixed_vertices( + compactified_hypergraph.initialNumNodes(), + hypergraph._fixed_vertices.numBlocks()); + fixed_vertices.setHypergraph(&compactified_hypergraph); + hypergraph.doParallelForAllNodes([&](const HypernodeID &hn) { + if(hypergraph.isFixed(hn)) + { + const HypernodeID mapped_hn = hn_mapping[hn]; + fixed_vertices.fixToBlock(mapped_hn, hypergraph.fixedVertexBlock(hn)); + } + }); + compactified_hypergraph.addFixedVertexSupport(std::move(fixed_vertices)); } }); - compactified_hypergraph.addFixedVertexSupport(std::move(fixed_vertices)); - } - }); - tbb::parallel_invoke([&] { - parallel::parallel_free(he_mapping, - hyperedge_weights, hypernode_weights); - }, [&] { - parallel::parallel_free(edge_vector); - }); + tbb::parallel_invoke( + [&] { parallel::parallel_free(he_mapping, hyperedge_weights, hypernode_weights); }, + [&] { parallel::parallel_free(edge_vector); }); return std::make_pair(std::move(compactified_hypergraph), std::move(hn_mapping)); } diff --git a/mt-kahypar/datastructures/dynamic_hypergraph_factory.h b/mt-kahypar/datastructures/dynamic_hypergraph_factory.h index 5e23f9e70..d982cde48 100644 --- a/mt-kahypar/datastructures/dynamic_hypergraph_factory.h +++ b/mt-kahypar/datastructures/dynamic_hypergraph_factory.h @@ -29,41 +29,44 @@ #include "tbb/enumerable_thread_specific.h" - #include "mt-kahypar/datastructures/dynamic_hypergraph.h" -#include "mt-kahypar/parallel/stl/scalable_vector.h" #include "mt-kahypar/parallel/atomic_wrapper.h" +#include "mt-kahypar/parallel/stl/scalable_vector.h" namespace mt_kahypar { namespace ds { -class DynamicHypergraphFactory { +class DynamicHypergraphFactory +{ - using HyperedgeVector = parallel::scalable_vector>; + using HyperedgeVector = + parallel::scalable_vector >; using Counter = parallel::scalable_vector; - using AtomicCounter = parallel::scalable_vector>; + using AtomicCounter = + parallel::scalable_vector >; using ThreadLocalCounter = tbb::enumerable_thread_specific; - using ThreadLocalBitset = tbb::enumerable_thread_specific>; - using ThreadLocalBitvector = tbb::enumerable_thread_specific>; - + using ThreadLocalBitset = + tbb::enumerable_thread_specific >; + using ThreadLocalBitvector = + tbb::enumerable_thread_specific >; - public: - static DynamicHypergraph construct(const HypernodeID num_hypernodes, - const HyperedgeID num_hyperedges, - const HyperedgeVector& edge_vector, - const HyperedgeWeight* hyperedge_weight = nullptr, - const HypernodeWeight* hypernode_weight = nullptr, - const bool stable_construction_of_incident_edges = false); +public: + static DynamicHypergraph + construct(const HypernodeID num_hypernodes, const HyperedgeID num_hyperedges, + const HyperedgeVector &edge_vector, + const HyperedgeWeight *hyperedge_weight = nullptr, + const HypernodeWeight *hypernode_weight = nullptr, + const bool stable_construction_of_incident_edges = false); /** - * Compactifies a given hypergraph such that it only contains enabled vertices and hyperedges within - * a consecutive range of IDs. + * Compactifies a given hypergraph such that it only contains enabled vertices and + * hyperedges within a consecutive range of IDs. */ - static std::pair > compactify(const DynamicHypergraph& hypergraph); + static std::pair > + compactify(const DynamicHypergraph &hypergraph); - private: - DynamicHypergraphFactory() { } +private: + DynamicHypergraphFactory() {} }; } // namespace ds diff --git a/mt-kahypar/datastructures/fixed_vertex_support.cpp b/mt-kahypar/datastructures/fixed_vertex_support.cpp index 36482b667..174dc6e57 100644 --- a/mt-kahypar/datastructures/fixed_vertex_support.cpp +++ b/mt-kahypar/datastructures/fixed_vertex_support.cpp @@ -31,8 +31,9 @@ namespace mt_kahypar { namespace ds { -template -bool FixedVertexSupport::contract(const HypernodeID u, const HypernodeID v) { +template +bool FixedVertexSupport::contract(const HypernodeID u, const HypernodeID v) +{ ASSERT(_hg); ASSERT(u < _num_nodes && v < _num_nodes); bool success = true; @@ -47,40 +48,51 @@ bool FixedVertexSupport::contract(const HypernodeID u, const Hyperno // => we therefore do not have to lock v const bool is_fixed_u = isFixed(u); const bool both_fixed = is_fixed_u && is_fixed_v; - if ( !is_fixed_u && is_fixed_v ) { + if(!is_fixed_u && is_fixed_v) + { // u becomes a fixed vertex since v is a fixed vertex fixed_vertex_block = fixedVertexBlock(v); u_becomes_fixed = true; - } else if ( is_fixed_u && !is_fixed_v ) { + } + else if(is_fixed_u && !is_fixed_v) + { // v becomes a fixed vertex since it is contracted onto a fixed vertex fixed_vertex_block = fixedVertexBlock(u); v_becomes_fixed = true; - } else if ( both_fixed ) { - if ( fixedVertexBlock(u) == fixedVertexBlock(v) ) { + } + else if(both_fixed) + { + if(fixedVertexBlock(u) == fixedVertexBlock(v)) + { ASSERT(_fixed_vertex_data[u].fixed_vertex_contraction_cnt > 0); ASSERT(_fixed_vertex_data[v].fixed_vertex_contraction_cnt > 0); ++_fixed_vertex_data[u].fixed_vertex_contraction_cnt; - } else { + } + else + { // Both nodes are fixed vertices, but are assigned to different blocks // => contraction is not allowed success = false; } } - if ( success && ( u_becomes_fixed || v_becomes_fixed ) ) { + if(success && (u_becomes_fixed || v_becomes_fixed)) + { ASSERT(fixed_vertex_block != kInvalidPartition); ASSERT(!(u_becomes_fixed && v_becomes_fixed)); - // Either u or v becomes a fixed vertex. Therefore, the fixed vertex block weight changes. - // To guarantee that we find a feasible initial partition, we ensure that the new block weight - // is smaller than the maximum allowed block weight. + // Either u or v becomes a fixed vertex. Therefore, the fixed vertex block weight + // changes. To guarantee that we find a feasible initial partition, we ensure that the + // new block weight is smaller than the maximum allowed block weight. const HypernodeWeight delta_weight = - u_becomes_fixed * weight_of_u + v_becomes_fixed * weight_of_v; + u_becomes_fixed * weight_of_u + v_becomes_fixed * weight_of_v; const HypernodeWeight block_weight_after = - _fixed_vertex_block_weights[fixed_vertex_block].add_fetch( - delta_weight, std::memory_order_relaxed); - if ( likely( block_weight_after <= _max_block_weights[fixed_vertex_block] ) ) { + _fixed_vertex_block_weights[fixed_vertex_block].add_fetch( + delta_weight, std::memory_order_relaxed); + if(likely(block_weight_after <= _max_block_weights[fixed_vertex_block])) + { _total_fixed_vertex_weight.fetch_add(delta_weight, std::memory_order_relaxed); - if ( u_becomes_fixed ) { + if(u_becomes_fixed) + { ASSERT(isFixed(v)); ASSERT(_fixed_vertex_data[u].fixed_vertex_contraction_cnt == 0); // Block weight update was successful => set fixed vertex block of u @@ -88,58 +100,65 @@ bool FixedVertexSupport::contract(const HypernodeID u, const Hyperno _fixed_vertex_data[u].fixed_vertex_contraction_cnt = 1; _fixed_vertex_data[u].fixed_vertex_weight = weight_of_u; } - } else { + } + else + { // The new fixed vertex block weight is larger than the maximum allowed bock weight // => revert block weight update and forbid contraction _fixed_vertex_block_weights[fixed_vertex_block].sub_fetch( - delta_weight, std::memory_order_relaxed); + delta_weight, std::memory_order_relaxed); v_becomes_fixed = false; success = false; } } _fixed_vertex_data[u].sync.unlock(); - if ( v_becomes_fixed ) { + if(v_becomes_fixed) + { // Our contraction algorithm ensures that there are no concurrent contractions onto v - // if v is contracted onto another node. We therefore can set the fixed vertex block of - // v outside the lock + // if v is contracted onto another node. We therefore can set the fixed vertex block + // of v outside the lock _fixed_vertex_data[v].block = fixed_vertex_block; _fixed_vertex_data[v].fixed_vertex_weight = weight_of_v; } return success; } -template -void FixedVertexSupport::uncontract(const HypernodeID u, const HypernodeID v) { +template +void FixedVertexSupport::uncontract(const HypernodeID u, const HypernodeID v) +{ ASSERT(_hg); ASSERT(u < _num_nodes && v < _num_nodes); - if ( isFixed(v) ) { - if ( _fixed_vertex_data[v].fixed_vertex_contraction_cnt > 0 ) { + if(isFixed(v)) + { + if(_fixed_vertex_data[v].fixed_vertex_contraction_cnt > 0) + { // v was fixed before the contraction _fixed_vertex_data[u].sync.lock(); ASSERT(_fixed_vertex_data[u].fixed_vertex_contraction_cnt > 0); const HypernodeID contraction_cnt_of_u_after = - --_fixed_vertex_data[u].fixed_vertex_contraction_cnt; + --_fixed_vertex_data[u].fixed_vertex_contraction_cnt; _fixed_vertex_data[u].sync.unlock(); - if ( contraction_cnt_of_u_after == 0 ) { + if(contraction_cnt_of_u_after == 0) + { // u was not fixed before the contraction const PartitionID fixed_vertex_block_of_u = _fixed_vertex_data[u].block; const HypernodeWeight weight_of_u = _fixed_vertex_data[u].fixed_vertex_weight; _fixed_vertex_block_weights[fixed_vertex_block_of_u].fetch_sub( - weight_of_u, std::memory_order_relaxed); - _total_fixed_vertex_weight.fetch_sub( - weight_of_u, std::memory_order_relaxed); + weight_of_u, std::memory_order_relaxed); + _total_fixed_vertex_weight.fetch_sub(weight_of_u, std::memory_order_relaxed); // Make u a not fixed vertex again _fixed_vertex_data[u].block = kInvalidPartition; } - } else { + } + else + { // v was not fixed before the contraction const PartitionID fixed_vertex_block_of_v = _fixed_vertex_data[v].block; const HypernodeWeight weight_of_v = _fixed_vertex_data[v].fixed_vertex_weight; _fixed_vertex_block_weights[fixed_vertex_block_of_v].fetch_sub( - weight_of_v, std::memory_order_relaxed); - _total_fixed_vertex_weight.fetch_sub( - weight_of_v, std::memory_order_relaxed); + weight_of_v, std::memory_order_relaxed); + _total_fixed_vertex_weight.fetch_sub(weight_of_v, std::memory_order_relaxed); // Make v a not fixed vertex again _fixed_vertex_data[v].block = kInvalidPartition; } @@ -149,10 +168,10 @@ void FixedVertexSupport::uncontract(const HypernodeID u, const Hyper } // namespace ds } // namespace mt_kahypar -#include "mt-kahypar/datastructures/static_graph.h" -#include "mt-kahypar/datastructures/static_hypergraph.h" #include "mt-kahypar/datastructures/dynamic_graph.h" #include "mt-kahypar/datastructures/dynamic_hypergraph.h" +#include "mt-kahypar/datastructures/static_graph.h" +#include "mt-kahypar/datastructures/static_hypergraph.h" template class mt_kahypar::ds::FixedVertexSupport; template class mt_kahypar::ds::FixedVertexSupport; diff --git a/mt-kahypar/datastructures/fixed_vertex_support.h b/mt-kahypar/datastructures/fixed_vertex_support.h index 938683a39..90ab88223 100644 --- a/mt-kahypar/datastructures/fixed_vertex_support.h +++ b/mt-kahypar/datastructures/fixed_vertex_support.h @@ -29,18 +29,20 @@ #include #include "mt-kahypar/datastructures/hypergraph_common.h" -#include "mt-kahypar/parallel/stl/scalable_vector.h" #include "mt-kahypar/parallel/atomic_wrapper.h" +#include "mt-kahypar/parallel/stl/scalable_vector.h" namespace mt_kahypar { namespace ds { -template -class FixedVertexSupport { +template +class FixedVertexSupport +{ static constexpr bool debug = false; - struct FixedVertexData { + struct FixedVertexData + { // ! Fixed vertex block ID PartitionID block; // ! Number of fixed vertices contracted onto this node @@ -51,59 +53,57 @@ class FixedVertexSupport { SpinLock sync; }; - public: +public: FixedVertexSupport() : - _num_nodes(0), - _k(kInvalidPartition), - _hg(nullptr), - _total_fixed_vertex_weight(0), - _fixed_vertex_block_weights(), - _max_block_weights(), - _fixed_vertex_data() { } - - FixedVertexSupport(const HypernodeID num_nodes, - const PartitionID k) : - _num_nodes(num_nodes), - _k(k), - _hg(nullptr), - _total_fixed_vertex_weight(0), - _fixed_vertex_block_weights(k, CAtomic(0) ), - _max_block_weights(k, std::numeric_limits::max()), - _fixed_vertex_data(num_nodes, FixedVertexData { kInvalidPartition, 0, 0, SpinLock() }) { } - - FixedVertexSupport(const FixedVertexSupport&) = delete; - FixedVertexSupport & operator= (const FixedVertexSupport &) = delete; - - FixedVertexSupport(FixedVertexSupport&&) = default; - FixedVertexSupport & operator= (FixedVertexSupport &&) = default; - - void setHypergraph(const Hypergraph* hg) { - _hg = hg; + _num_nodes(0), _k(kInvalidPartition), _hg(nullptr), _total_fixed_vertex_weight(0), + _fixed_vertex_block_weights(), _max_block_weights(), _fixed_vertex_data() + { } - void setMaxBlockWeight(const std::vector max_block_weights) { - if ( hasFixedVertices() ) { + FixedVertexSupport(const HypernodeID num_nodes, const PartitionID k) : + _num_nodes(num_nodes), _k(k), _hg(nullptr), _total_fixed_vertex_weight(0), + _fixed_vertex_block_weights(k, CAtomic(0)), + _max_block_weights(k, std::numeric_limits::max()), + _fixed_vertex_data(num_nodes, + FixedVertexData{ kInvalidPartition, 0, 0, SpinLock() }) + { + } + + FixedVertexSupport(const FixedVertexSupport &) = delete; + FixedVertexSupport &operator=(const FixedVertexSupport &) = delete; + + FixedVertexSupport(FixedVertexSupport &&) = default; + FixedVertexSupport &operator=(FixedVertexSupport &&) = default; + + void setHypergraph(const Hypergraph *hg) { _hg = hg; } + + void setMaxBlockWeight(const std::vector max_block_weights) + { + if(hasFixedVertices()) + { ASSERT(max_block_weights.size() >= static_cast(_k)); _max_block_weights = max_block_weights; } } - PartitionID numBlocks() const { - return _k; - } + PartitionID numBlocks() const { return _k; } // ####################### Fixed Vertex Block Weights ####################### - bool hasFixedVertices() const { + bool hasFixedVertices() const + { return _total_fixed_vertex_weight.load(std::memory_order_relaxed) > 0; } - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE HypernodeWeight totalFixedVertexWeight() const { + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE HypernodeWeight totalFixedVertexWeight() const + { return _total_fixed_vertex_weight.load(std::memory_order_relaxed); } // ! Returns the weight of all fixed vertices assigned to the corresponding block - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE HypernodeWeight fixedVertexBlockWeight(const PartitionID block) const { + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE HypernodeWeight + fixedVertexBlockWeight(const PartitionID block) const + { ASSERT(block != kInvalidPartition && block < _k); return _fixed_vertex_block_weights[block].load(std::memory_order_relaxed); } @@ -111,35 +111,42 @@ class FixedVertexSupport { // ####################### Fixed Vertex Information ####################### // ! Fixes a node to a block - void fixToBlock(const HypernodeID hn, const PartitionID block) { + void fixToBlock(const HypernodeID hn, const PartitionID block) + { ASSERT(_hg); ASSERT(hn < _num_nodes); ASSERT(block != kInvalidPartition && block < _k); PartitionID expected = kInvalidPartition; PartitionID desired = block; - if ( __atomic_compare_exchange_n(&_fixed_vertex_data[hn].block, - &expected, desired, false, __ATOMIC_ACQ_REL, __ATOMIC_RELAXED) ) { + if(__atomic_compare_exchange_n(&_fixed_vertex_data[hn].block, &expected, desired, + false, __ATOMIC_ACQ_REL, __ATOMIC_RELAXED)) + { const HypernodeWeight weight_of_hn = _hg->nodeWeight(hn); _fixed_vertex_data[hn].fixed_vertex_contraction_cnt = 1; _fixed_vertex_data[hn].fixed_vertex_weight = weight_of_hn; - _fixed_vertex_block_weights[block].fetch_add( - weight_of_hn, std::memory_order_relaxed); - _total_fixed_vertex_weight.fetch_add( - weight_of_hn, std::memory_order_relaxed); - } else { + _fixed_vertex_block_weights[block].fetch_add(weight_of_hn, + std::memory_order_relaxed); + _total_fixed_vertex_weight.fetch_add(weight_of_hn, std::memory_order_relaxed); + } + else + { ASSERT(_fixed_vertex_data[hn].block == block, - "Try to fix hypernode" << hn << "to block" << block - << ", but it is already fixed to block" << _fixed_vertex_data[hn].block); + "Try to fix hypernode" << hn << "to block" << block + << ", but it is already fixed to block" + << _fixed_vertex_data[hn].block); } } // ! Returns whether or not the node is fixed to a block - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE bool isFixed(const HypernodeID hn) const { + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE bool isFixed(const HypernodeID hn) const + { return hn < _num_nodes && fixedVertexBlock(hn) != kInvalidPartition; } // ! Returns the fixed vertex block of the node - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE PartitionID fixedVertexBlock(const HypernodeID hn) const { + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE PartitionID + fixedVertexBlock(const HypernodeID hn) const + { ASSERT(hn < _num_nodes); return __atomic_load_n(&_fixed_vertex_data[hn].block, __ATOMIC_RELAXED); } @@ -148,15 +155,18 @@ class FixedVertexSupport { // ! Contracts v onto u. If v is a fixed vertex than u becomes also an fixed vertex. // ! If u and v are fixed vertices, then both must be assigned to same block - // ! The function returns false, if u and v are fixed and are assigned to different blocks + // ! The function returns false, if u and v are fixed and are assigned to different + // blocks bool contract(const HypernodeID u, const HypernodeID v); - // ! Uncontract v from u. This reverts the corresponding contraction operation of v onto u. + // ! Uncontract v from u. This reverts the corresponding contraction operation of v onto + // u. void uncontract(const HypernodeID u, const HypernodeID v); // ####################### Miscellaneous ####################### - FixedVertexSupport copy() const { + FixedVertexSupport copy() const + { FixedVertexSupport cpy; cpy._num_nodes = _num_nodes; cpy._k = _k; @@ -168,12 +178,13 @@ class FixedVertexSupport { return cpy; } - size_t size_in_bytes() const { - return ( sizeof(CAtomic) + sizeof(HypernodeWeight)) * _k + - sizeof(FixedVertexData) * _num_nodes; + size_t size_in_bytes() const + { + return (sizeof(CAtomic) + sizeof(HypernodeWeight)) * _k + + sizeof(FixedVertexData) * _num_nodes; } - private: +private: // ! Number of nodes HypernodeID _num_nodes; @@ -181,13 +192,13 @@ class FixedVertexSupport { PartitionID _k; // ! Underlying hypergraph - const Hypergraph* _hg; + const Hypergraph *_hg; // ! Total weight of all fixed vertices CAtomic _total_fixed_vertex_weight; // ! Weight of all vertices fixed to a block - vec< CAtomic > _fixed_vertex_block_weights; + vec > _fixed_vertex_block_weights; // ! Maximum allowed fixed vertex block weight std::vector _max_block_weights; @@ -196,5 +207,5 @@ class FixedVertexSupport { vec _fixed_vertex_data; }; -} // namespace ds -} // namespace mt_kahypar +} // namespace ds +} // namespace mt_kahypar diff --git a/mt-kahypar/datastructures/graph.cpp b/mt-kahypar/datastructures/graph.cpp index 5b609e67c..80adcd103 100644 --- a/mt-kahypar/datastructures/graph.cpp +++ b/mt-kahypar/datastructures/graph.cpp @@ -27,197 +27,209 @@ #include "graph.h" - +#include #include -#include #include -#include +#include #include "mt-kahypar/definitions.h" -#include "mt-kahypar/parallel/parallel_prefix_sum.h" #include "mt-kahypar/parallel/atomic_wrapper.h" -#include "mt-kahypar/utils/timer.h" #include "mt-kahypar/parallel/parallel_counting_sort.h" +#include "mt-kahypar/parallel/parallel_prefix_sum.h" #include "mt-kahypar/utils/exception.h" +#include "mt-kahypar/utils/timer.h" namespace mt_kahypar::ds { - template - Graph::Graph(Hypergraph& hypergraph, const LouvainEdgeWeight edge_weight_type, bool is_graph) : +template +Graph::Graph(Hypergraph &hypergraph, const LouvainEdgeWeight edge_weight_type, + bool is_graph) : _num_nodes(0), - _num_arcs(0), - _total_volume(0), - _max_degree(0), - _indices(), - _arcs(), - _node_volumes(), - _tmp_graph_buffer(nullptr) { - - switch( edge_weight_type ) { - case LouvainEdgeWeight::uniform: - construct(hypergraph, is_graph, - [&](const HyperedgeWeight edge_weight, - const HypernodeID, - const HyperedgeID) { - return static_cast(edge_weight); - }); - break; - case LouvainEdgeWeight::non_uniform: - construct(hypergraph, is_graph, - [&](const HyperedgeWeight edge_weight, - const HypernodeID edge_size, - const HyperedgeID) { - return static_cast(edge_weight) / - static_cast(edge_size); - }); - break; - case LouvainEdgeWeight::degree: - construct(hypergraph, is_graph, - [&](const HyperedgeWeight edge_weight, - const HypernodeID edge_size, - const HyperedgeID node_degree) { - return static_cast(edge_weight) * - (static_cast(node_degree) / - static_cast(edge_size)); - }); - break; - case LouvainEdgeWeight::hybrid: - case LouvainEdgeWeight::UNDEFINED: - throw InvalidInputException("No valid louvain edge weight"); - } + _num_arcs(0), _total_volume(0), _max_degree(0), _indices(), _arcs(), _node_volumes(), + _tmp_graph_buffer(nullptr) +{ + + switch(edge_weight_type) + { + case LouvainEdgeWeight::uniform: + construct(hypergraph, is_graph, + [&](const HyperedgeWeight edge_weight, const HypernodeID, + const HyperedgeID) { return static_cast(edge_weight); }); + break; + case LouvainEdgeWeight::non_uniform: + construct(hypergraph, is_graph, + [&](const HyperedgeWeight edge_weight, const HypernodeID edge_size, + const HyperedgeID) { + return static_cast(edge_weight) / + static_cast(edge_size); + }); + break; + case LouvainEdgeWeight::degree: + construct(hypergraph, is_graph, + [&](const HyperedgeWeight edge_weight, const HypernodeID edge_size, + const HyperedgeID node_degree) { + return static_cast(edge_weight) * + (static_cast(node_degree) / + static_cast(edge_size)); + }); + break; + case LouvainEdgeWeight::hybrid: + case LouvainEdgeWeight::UNDEFINED: + throw InvalidInputException("No valid louvain edge weight"); } +} - template - Graph::Graph(Graph&& other) : - _num_nodes(other._num_nodes), - _num_arcs(other._num_arcs), - _total_volume(other._total_volume), - _max_degree(other._max_degree), - _indices(std::move(other._indices)), - _arcs(std::move(other._arcs)), +template +Graph::Graph(Graph &&other) : + _num_nodes(other._num_nodes), _num_arcs(other._num_arcs), + _total_volume(other._total_volume), _max_degree(other._max_degree), + _indices(std::move(other._indices)), _arcs(std::move(other._arcs)), _node_volumes(std::move(other._node_volumes)), - _tmp_graph_buffer(other._tmp_graph_buffer) { - other._num_nodes = 0; - other._num_arcs = 0; - other._total_volume = 0; - other._max_degree = 0; - other._tmp_graph_buffer = nullptr; - } - - template - Graph& Graph::operator= (Graph&& other) { - _num_nodes = other._num_nodes; - _num_arcs = other._num_arcs; - _total_volume = other._total_volume; - _max_degree = other._max_degree; - _indices = std::move(other._indices); - _arcs = std::move(other._arcs); - _node_volumes = std::move(other._node_volumes); - _tmp_graph_buffer = std::move(other._tmp_graph_buffer); - other._num_nodes = 0; - other._num_arcs = 0; - other._total_volume = 0; - other._max_degree = 0; - other._tmp_graph_buffer = nullptr; - return *this; - } - - template - Graph::~Graph() { - if ( _tmp_graph_buffer ) { - delete(_tmp_graph_buffer); - } + _tmp_graph_buffer(other._tmp_graph_buffer) +{ + other._num_nodes = 0; + other._num_arcs = 0; + other._total_volume = 0; + other._max_degree = 0; + other._tmp_graph_buffer = nullptr; +} + +template +Graph &Graph::operator=(Graph &&other) +{ + _num_nodes = other._num_nodes; + _num_arcs = other._num_arcs; + _total_volume = other._total_volume; + _max_degree = other._max_degree; + _indices = std::move(other._indices); + _arcs = std::move(other._arcs); + _node_volumes = std::move(other._node_volumes); + _tmp_graph_buffer = std::move(other._tmp_graph_buffer); + other._num_nodes = 0; + other._num_arcs = 0; + other._total_volume = 0; + other._max_degree = 0; + other._tmp_graph_buffer = nullptr; + return *this; +} + +template +Graph::~Graph() +{ + if(_tmp_graph_buffer) + { + delete (_tmp_graph_buffer); } - - template - Graph Graph::contract_low_memory(Clustering& communities) { - // map cluster IDs to consecutive range - vec mapping(numNodes(), 0); // TODO use memory pool? - tbb::parallel_for(UL(0), numNodes(), [&](NodeID u) { mapping[communities[u]] = 1; }); - parallel_prefix_sum(mapping.begin(), mapping.begin() + numNodes(), mapping.begin(), std::plus<>(), 0); - NodeID num_coarse_nodes = mapping[numNodes() - 1]; - // apply mapping to cluster IDs. subtract one because prefix sum is inclusive - tbb::parallel_for(UL(0), numNodes(), [&](NodeID u) { communities[u] = mapping[communities[u]] - 1; }); - - // sort nodes by cluster - auto get_cluster = [&](NodeID u) { assert(u < communities.size()); return communities[u]; }; - vec nodes_sorted_by_cluster(std::move(mapping)); // reuse memory from mapping since it's no longer needed - auto cluster_bounds = parallel::counting_sort(nodes(), nodes_sorted_by_cluster, num_coarse_nodes, - get_cluster, TBBInitializer::instance().total_number_of_threads()); - - Graph coarse_graph; - coarse_graph._num_nodes = num_coarse_nodes; - coarse_graph._indices.resize(num_coarse_nodes + 1); - coarse_graph._node_volumes.resize(num_coarse_nodes); - coarse_graph._total_volume = totalVolume(); - - struct ClearList { - vec used; - vec values; - - ClearList(size_t n) : values(n, 0.0) { } - }; - tbb::enumerable_thread_specific clear_lists(num_coarse_nodes); - tbb::enumerable_thread_specific local_max_degree(0); - - // first pass generating unique coarse arcs to determine coarse node degrees - tbb::parallel_for(0U, num_coarse_nodes, [&](NodeID cu) { - auto& clear_list = clear_lists.local(); - ArcWeight volume_cu = 0.0; - for (auto i = cluster_bounds[cu]; i < cluster_bounds[cu + 1]; ++i) { - NodeID fu = nodes_sorted_by_cluster[i]; - volume_cu += nodeVolume(fu); - for (const Arc& arc : arcsOf(fu)) { - NodeID cv = get_cluster(arc.head); - if (cv != cu && clear_list.values[cv] == 0.0) { - clear_list.used.push_back(cv); - clear_list.values[cv] = 1.0; - } +} + +template +Graph Graph::contract_low_memory(Clustering &communities) +{ + // map cluster IDs to consecutive range + vec mapping(numNodes(), 0); // TODO use memory pool? + tbb::parallel_for(UL(0), numNodes(), [&](NodeID u) { mapping[communities[u]] = 1; }); + parallel_prefix_sum(mapping.begin(), mapping.begin() + numNodes(), mapping.begin(), + std::plus<>(), 0); + NodeID num_coarse_nodes = mapping[numNodes() - 1]; + // apply mapping to cluster IDs. subtract one because prefix sum is inclusive + tbb::parallel_for(UL(0), numNodes(), + [&](NodeID u) { communities[u] = mapping[communities[u]] - 1; }); + + // sort nodes by cluster + auto get_cluster = [&](NodeID u) { + assert(u < communities.size()); + return communities[u]; + }; + vec nodes_sorted_by_cluster( + std::move(mapping)); // reuse memory from mapping since it's no longer needed + auto cluster_bounds = parallel::counting_sort( + nodes(), nodes_sorted_by_cluster, num_coarse_nodes, get_cluster, + TBBInitializer::instance().total_number_of_threads()); + + Graph coarse_graph; + coarse_graph._num_nodes = num_coarse_nodes; + coarse_graph._indices.resize(num_coarse_nodes + 1); + coarse_graph._node_volumes.resize(num_coarse_nodes); + coarse_graph._total_volume = totalVolume(); + + struct ClearList + { + vec used; + vec values; + + ClearList(size_t n) : values(n, 0.0) {} + }; + tbb::enumerable_thread_specific clear_lists(num_coarse_nodes); + tbb::enumerable_thread_specific local_max_degree(0); + + // first pass generating unique coarse arcs to determine coarse node degrees + tbb::parallel_for(0U, num_coarse_nodes, [&](NodeID cu) { + auto &clear_list = clear_lists.local(); + ArcWeight volume_cu = 0.0; + for(auto i = cluster_bounds[cu]; i < cluster_bounds[cu + 1]; ++i) + { + NodeID fu = nodes_sorted_by_cluster[i]; + volume_cu += nodeVolume(fu); + for(const Arc &arc : arcsOf(fu)) + { + NodeID cv = get_cluster(arc.head); + if(cv != cu && clear_list.values[cv] == 0.0) + { + clear_list.used.push_back(cv); + clear_list.values[cv] = 1.0; } } - coarse_graph._indices[cu + 1] = clear_list.used.size(); - local_max_degree.local() = std::max(local_max_degree.local(), clear_list.used.size()); - for (const NodeID cv : clear_list.used) { - clear_list.values[cv] = 0.0; - } - clear_list.used.clear(); - coarse_graph._node_volumes[cu] = volume_cu; - }); - - // prefix sum coarse node degrees for offsets to write the coarse arcs in second pass - parallel_prefix_sum(coarse_graph._indices.begin(), coarse_graph._indices.end(), coarse_graph._indices.begin(), std::plus<>(), UL(0)); - size_t num_coarse_arcs = coarse_graph._indices.back(); - coarse_graph._arcs.resize(num_coarse_arcs); - coarse_graph._num_arcs = num_coarse_arcs; - coarse_graph._max_degree = local_max_degree.combine([](size_t lhs, size_t rhs) { return std::max(lhs, rhs); }); - - // second pass generating unique coarse arcs - tbb::parallel_for(0U, num_coarse_nodes, [&](NodeID cu) { - auto& clear_list = clear_lists.local(); - for (auto i = cluster_bounds[cu]; i < cluster_bounds[cu+1]; ++i) { - for (const Arc& arc : arcsOf(nodes_sorted_by_cluster[i])) { - NodeID cv = get_cluster(arc.head); - if (cv != cu) { - if (clear_list.values[cv] == 0.0) { - clear_list.used.push_back(cv); - } - clear_list.values[cv] += arc.weight; + } + coarse_graph._indices[cu + 1] = clear_list.used.size(); + local_max_degree.local() = std::max(local_max_degree.local(), clear_list.used.size()); + for(const NodeID cv : clear_list.used) + { + clear_list.values[cv] = 0.0; + } + clear_list.used.clear(); + coarse_graph._node_volumes[cu] = volume_cu; + }); + + // prefix sum coarse node degrees for offsets to write the coarse arcs in second pass + parallel_prefix_sum(coarse_graph._indices.begin(), coarse_graph._indices.end(), + coarse_graph._indices.begin(), std::plus<>(), UL(0)); + size_t num_coarse_arcs = coarse_graph._indices.back(); + coarse_graph._arcs.resize(num_coarse_arcs); + coarse_graph._num_arcs = num_coarse_arcs; + coarse_graph._max_degree = + local_max_degree.combine([](size_t lhs, size_t rhs) { return std::max(lhs, rhs); }); + + // second pass generating unique coarse arcs + tbb::parallel_for(0U, num_coarse_nodes, [&](NodeID cu) { + auto &clear_list = clear_lists.local(); + for(auto i = cluster_bounds[cu]; i < cluster_bounds[cu + 1]; ++i) + { + for(const Arc &arc : arcsOf(nodes_sorted_by_cluster[i])) + { + NodeID cv = get_cluster(arc.head); + if(cv != cu) + { + if(clear_list.values[cv] == 0.0) + { + clear_list.used.push_back(cv); } + clear_list.values[cv] += arc.weight; } } - size_t pos = coarse_graph._indices[cu]; - for (const NodeID cv : clear_list.used) { - coarse_graph._arcs[pos++] = Arc(cv, clear_list.values[cv]); - clear_list.values[cv] = 0.0; - } - clear_list.used.clear(); - }); - - return coarse_graph; - } + } + size_t pos = coarse_graph._indices[cu]; + for(const NodeID cv : clear_list.used) + { + coarse_graph._arcs[pos++] = Arc(cv, clear_list.values[cv]); + clear_list.values[cv] = 0.0; + } + clear_list.used.clear(); + }); + return coarse_graph; +} - /*! +/*! * Contracts the graph based on the community structure passed as argument. * In the first step the community ids are compactified (via parallel prefix sum) * which also determines the node ids in the coarse graph. Afterwards, we create @@ -225,314 +237,361 @@ namespace mt_kahypar::ds { * coarse graph. Finally, the weights of each multiedge in that temporary graph * are aggregated and the result is written to the final contracted graph. */ - template - Graph Graph::contract(Clustering& communities, bool low_memory) { - if (low_memory) { - return contract_low_memory(communities); - } - ASSERT(canBeUsed()); - ASSERT(_num_nodes == communities.size()); - if ( !_tmp_graph_buffer ) { - allocateContractionBuffers(); - } - Graph coarse_graph; - coarse_graph._total_volume = _total_volume; - - // #################### STAGE 1 #################### - // Compute node ids of coarse graph with a parallel prefix sum - parallel::scalable_vector mapping(_num_nodes, UL(0)); - ds::Array>& tmp_pos = _tmp_graph_buffer->tmp_pos; - ds::Array>& tmp_indices = _tmp_graph_buffer->tmp_indices; - ds::Array>& coarse_node_volumes = _tmp_graph_buffer->tmp_node_volumes; - tbb::parallel_for(0U, static_cast(_num_nodes), [&](const NodeID u) { - ASSERT(static_cast(communities[u]) < _num_nodes); - mapping[communities[u]] = UL(1); - tmp_pos[u] = 0; - tmp_indices[u] = 0; - coarse_node_volumes[u].store(0.0); - }); - - // Prefix sum determines node ids in coarse graph - parallel::TBBPrefixSum mapping_prefix_sum(mapping); - tbb::parallel_scan(tbb::blocked_range(UL(0), _num_nodes), mapping_prefix_sum); - - // Remap community ids - coarse_graph._num_nodes = mapping_prefix_sum.total_sum(); - tbb::parallel_for(0U, static_cast(_num_nodes), [&](const NodeID u) { - communities[u] = mapping_prefix_sum[communities[u]]; - }); - - // #################### STAGE 2 #################### - // Write all arcs, that will not form a selfloop in the coarse graph, into a tmp - // adjacence array. For that, we compute a prefix sum over the sum of all arcs - // in each community (which are no selfloop) and write them in parallel to - // the tmp adjacence array. - // Compute number of arcs in tmp adjacence array with parallel prefix sum - ASSERT(coarse_graph._num_nodes <= coarse_node_volumes.size()); - tbb::parallel_for(0U, static_cast(_num_nodes), [&](const NodeID u) { - const NodeID coarse_u = communities[u]; - ASSERT(static_cast(coarse_u) < coarse_graph._num_nodes); - coarse_node_volumes[coarse_u] += nodeVolume(u); // not deterministic! - for ( const Arc& arc : arcsOf(u) ) { - const NodeID coarse_v = communities[arc.head]; - if ( coarse_u != coarse_v ) { - ++tmp_indices[coarse_u]; - } - } - }); - - parallel::TBBPrefixSum, ds::Array> tmp_indices_prefix_sum(tmp_indices); - tbb::parallel_scan(tbb::blocked_range(UL(0), _num_nodes), tmp_indices_prefix_sum); - - // Write all arcs into corresponding tmp adjacence array blocks - ds::Array& tmp_arcs = _tmp_graph_buffer->tmp_arcs; - ds::Array& valid_arcs = _tmp_graph_buffer->valid_arcs; - tbb::parallel_for(0U, static_cast(_num_nodes), [&](const NodeID u) { - const NodeID coarse_u = communities[u]; - ASSERT(static_cast(coarse_u) < coarse_graph._num_nodes); - for ( const Arc& arc : arcsOf(u) ) { - const NodeID coarse_v = communities[arc.head]; - if ( coarse_u != coarse_v ) { - const size_t tmp_arcs_pos = tmp_indices_prefix_sum[coarse_u] + tmp_pos[coarse_u]++; - ASSERT(tmp_arcs_pos < tmp_indices_prefix_sum[coarse_u + 1]); - tmp_arcs[tmp_arcs_pos] = Arc { coarse_v, arc.weight }; - valid_arcs[tmp_arcs_pos] = UL(1); - } +template +Graph Graph::contract(Clustering &communities, bool low_memory) +{ + if(low_memory) + { + return contract_low_memory(communities); + } + ASSERT(canBeUsed()); + ASSERT(_num_nodes == communities.size()); + if(!_tmp_graph_buffer) + { + allocateContractionBuffers(); + } + Graph coarse_graph; + coarse_graph._total_volume = _total_volume; + + // #################### STAGE 1 #################### + // Compute node ids of coarse graph with a parallel prefix sum + parallel::scalable_vector mapping(_num_nodes, UL(0)); + ds::Array > &tmp_pos = + _tmp_graph_buffer->tmp_pos; + ds::Array > &tmp_indices = + _tmp_graph_buffer->tmp_indices; + ds::Array > &coarse_node_volumes = + _tmp_graph_buffer->tmp_node_volumes; + tbb::parallel_for(0U, static_cast(_num_nodes), [&](const NodeID u) { + ASSERT(static_cast(communities[u]) < _num_nodes); + mapping[communities[u]] = UL(1); + tmp_pos[u] = 0; + tmp_indices[u] = 0; + coarse_node_volumes[u].store(0.0); + }); + + // Prefix sum determines node ids in coarse graph + parallel::TBBPrefixSum mapping_prefix_sum(mapping); + tbb::parallel_scan(tbb::blocked_range(UL(0), _num_nodes), mapping_prefix_sum); + + // Remap community ids + coarse_graph._num_nodes = mapping_prefix_sum.total_sum(); + tbb::parallel_for(0U, static_cast(_num_nodes), [&](const NodeID u) { + communities[u] = mapping_prefix_sum[communities[u]]; + }); + + // #################### STAGE 2 #################### + // Write all arcs, that will not form a selfloop in the coarse graph, into a tmp + // adjacence array. For that, we compute a prefix sum over the sum of all arcs + // in each community (which are no selfloop) and write them in parallel to + // the tmp adjacence array. + // Compute number of arcs in tmp adjacence array with parallel prefix sum + ASSERT(coarse_graph._num_nodes <= coarse_node_volumes.size()); + tbb::parallel_for(0U, static_cast(_num_nodes), [&](const NodeID u) { + const NodeID coarse_u = communities[u]; + ASSERT(static_cast(coarse_u) < coarse_graph._num_nodes); + coarse_node_volumes[coarse_u] += nodeVolume(u); // not deterministic! + for(const Arc &arc : arcsOf(u)) + { + const NodeID coarse_v = communities[arc.head]; + if(coarse_u != coarse_v) + { + ++tmp_indices[coarse_u]; } - }); - - // #################### STAGE 3 #################### - // Aggregate weights of arcs that are equal in each community. - // Therefore, we sort the arcs according to their endpoints - // and aggregate weight of arcs with equal endpoints. - tbb::enumerable_thread_specific local_max_degree(0); - tbb::parallel_for(0U, static_cast(coarse_graph._num_nodes), [&](const NodeID u) { - const size_t tmp_arc_start = tmp_indices_prefix_sum[u]; - const size_t tmp_arc_end = tmp_indices_prefix_sum[u + 1]; - // commented out comparison is needed for deterministic arc weights - // auto comp = [](const Arc& lhs, const Arc& rhs) { return std::tie(lhs.head, lhs.weight) < std::tie(rhs.head, rhs.weight); }; - auto comp = [](const Arc& lhs, const Arc& rhs) { return lhs.head < rhs.head; }; - std::sort(tmp_arcs.begin() + tmp_arc_start, tmp_arcs.begin() + tmp_arc_end, comp); - - size_t arc_rep = tmp_arc_start; - size_t degree = tmp_arc_start < tmp_arc_end ? 1 : 0; - for ( size_t pos = tmp_arc_start + 1; pos < tmp_arc_end; ++pos ) { - if ( tmp_arcs[arc_rep].head == tmp_arcs[pos].head ) { - tmp_arcs[arc_rep].weight += tmp_arcs[pos].weight; - valid_arcs[pos] = UL(0); - } else { - arc_rep = pos; - ++degree; - } + } + }); + + parallel::TBBPrefixSum, ds::Array> + tmp_indices_prefix_sum(tmp_indices); + tbb::parallel_scan(tbb::blocked_range(UL(0), _num_nodes), + tmp_indices_prefix_sum); + + // Write all arcs into corresponding tmp adjacence array blocks + ds::Array &tmp_arcs = _tmp_graph_buffer->tmp_arcs; + ds::Array &valid_arcs = _tmp_graph_buffer->valid_arcs; + tbb::parallel_for(0U, static_cast(_num_nodes), [&](const NodeID u) { + const NodeID coarse_u = communities[u]; + ASSERT(static_cast(coarse_u) < coarse_graph._num_nodes); + for(const Arc &arc : arcsOf(u)) + { + const NodeID coarse_v = communities[arc.head]; + if(coarse_u != coarse_v) + { + const size_t tmp_arcs_pos = + tmp_indices_prefix_sum[coarse_u] + tmp_pos[coarse_u]++; + ASSERT(tmp_arcs_pos < tmp_indices_prefix_sum[coarse_u + 1]); + tmp_arcs[tmp_arcs_pos] = Arc{ coarse_v, arc.weight }; + valid_arcs[tmp_arcs_pos] = UL(1); } - local_max_degree.local() = std::max(local_max_degree.local(), degree); - }); - coarse_graph._max_degree = local_max_degree.combine( - [&](const size_t& lhs, const size_t& rhs) { - return std::max(lhs, rhs); - }); - - // Write all arcs to coarse graph - parallel::TBBPrefixSum valid_arcs_prefix_sum(valid_arcs); - tbb::parallel_scan(tbb::blocked_range(UL(0), - tmp_indices_prefix_sum.total_sum()), valid_arcs_prefix_sum); - coarse_graph._num_arcs = valid_arcs_prefix_sum.total_sum(); - - // Move memory down to coarse graph - coarse_graph._indices = std::move(_indices); - coarse_graph._arcs = std::move(_arcs); - coarse_graph._node_volumes = std::move(_node_volumes); - - tbb::parallel_invoke([&] { - const size_t tmp_num_arcs = tmp_indices_prefix_sum.total_sum(); - tbb::parallel_for(UL(0), tmp_num_arcs, [&](const size_t i) { - if ( valid_arcs_prefix_sum.value(i) ) { - const size_t pos = valid_arcs_prefix_sum[i]; - ASSERT(pos < coarse_graph._num_arcs); - coarse_graph._arcs[pos] = tmp_arcs[i]; + } + }); + + // #################### STAGE 3 #################### + // Aggregate weights of arcs that are equal in each community. + // Therefore, we sort the arcs according to their endpoints + // and aggregate weight of arcs with equal endpoints. + tbb::enumerable_thread_specific local_max_degree(0); + tbb::parallel_for( + 0U, static_cast(coarse_graph._num_nodes), [&](const NodeID u) { + const size_t tmp_arc_start = tmp_indices_prefix_sum[u]; + const size_t tmp_arc_end = tmp_indices_prefix_sum[u + 1]; + // commented out comparison is needed for deterministic arc weights + // auto comp = [](const Arc& lhs, const Arc& rhs) { return std::tie(lhs.head, + // lhs.weight) < std::tie(rhs.head, rhs.weight); }; + auto comp = [](const Arc &lhs, const Arc &rhs) { return lhs.head < rhs.head; }; + std::sort(tmp_arcs.begin() + tmp_arc_start, tmp_arcs.begin() + tmp_arc_end, comp); + + size_t arc_rep = tmp_arc_start; + size_t degree = tmp_arc_start < tmp_arc_end ? 1 : 0; + for(size_t pos = tmp_arc_start + 1; pos < tmp_arc_end; ++pos) + { + if(tmp_arcs[arc_rep].head == tmp_arcs[pos].head) + { + tmp_arcs[arc_rep].weight += tmp_arcs[pos].weight; + valid_arcs[pos] = UL(0); + } + else + { + arc_rep = pos; + ++degree; + } } + local_max_degree.local() = std::max(local_max_degree.local(), degree); }); - }, [&] { - tbb::parallel_for(0U, static_cast(coarse_graph._num_nodes), [&](const NodeID u) { - const size_t start_index_pos = valid_arcs_prefix_sum[tmp_indices_prefix_sum[u]]; - ASSERT(start_index_pos <= coarse_graph._num_arcs); - coarse_graph._indices[u] = start_index_pos; - coarse_graph._node_volumes[u] = coarse_node_volumes[u]; + coarse_graph._max_degree = local_max_degree.combine( + [&](const size_t &lhs, const size_t &rhs) { return std::max(lhs, rhs); }); + + // Write all arcs to coarse graph + parallel::TBBPrefixSum valid_arcs_prefix_sum(valid_arcs); + tbb::parallel_scan( + tbb::blocked_range(UL(0), tmp_indices_prefix_sum.total_sum()), + valid_arcs_prefix_sum); + coarse_graph._num_arcs = valid_arcs_prefix_sum.total_sum(); + + // Move memory down to coarse graph + coarse_graph._indices = std::move(_indices); + coarse_graph._arcs = std::move(_arcs); + coarse_graph._node_volumes = std::move(_node_volumes); + + tbb::parallel_invoke( + [&] { + const size_t tmp_num_arcs = tmp_indices_prefix_sum.total_sum(); + tbb::parallel_for(UL(0), tmp_num_arcs, [&](const size_t i) { + if(valid_arcs_prefix_sum.value(i)) + { + const size_t pos = valid_arcs_prefix_sum[i]; + ASSERT(pos < coarse_graph._num_arcs); + coarse_graph._arcs[pos] = tmp_arcs[i]; + } + }); + }, + [&] { + tbb::parallel_for(0U, static_cast(coarse_graph._num_nodes), + [&](const NodeID u) { + const size_t start_index_pos = + valid_arcs_prefix_sum[tmp_indices_prefix_sum[u]]; + ASSERT(start_index_pos <= coarse_graph._num_arcs); + coarse_graph._indices[u] = start_index_pos; + coarse_graph._node_volumes[u] = coarse_node_volumes[u]; + }); + coarse_graph._indices[coarse_graph._num_nodes] = coarse_graph._num_arcs; }); - coarse_graph._indices[coarse_graph._num_nodes] = coarse_graph._num_arcs; - }); - coarse_graph._tmp_graph_buffer = _tmp_graph_buffer; - _tmp_graph_buffer = nullptr; + coarse_graph._tmp_graph_buffer = _tmp_graph_buffer; + _tmp_graph_buffer = nullptr; - return coarse_graph; - } - - template - Graph::Graph() : - _num_nodes(0), - _num_arcs(0), - _total_volume(0), - _max_degree(0), - _indices(), - _arcs(), - _node_volumes(), - _tmp_graph_buffer(nullptr) { } - - /*! - * Constructs a graph from a given hypergraph. - */ - template - template - void Graph::construct(const Hypergraph& hypergraph, - const bool is_graph, - const F& edge_weight_func) { - if ( is_graph ) { - ASSERT(hypergraph.maxEdgeSize() == 2); - _num_nodes = hypergraph.initialNumNodes(); - _num_arcs = hypergraph.initialNumPins(); - constructGraph(hypergraph, edge_weight_func); - } else { - _num_nodes = hypergraph.initialNumNodes() + hypergraph.initialNumEdges(); - _num_arcs = 2 * hypergraph.initialNumPins(); - constructBipartiteGraph(hypergraph, edge_weight_func); - } + return coarse_graph; +} - // deterministic reduce of node volumes since double addition is not commutative or associative - // node volumes are computed in for loop because deterministic reduce does not have dynamic load balancing - // whereas for loop does. this important since each node incurs O(degree) time - tbb::parallel_for(0U, NodeID(numNodes()), [&](NodeID u) { computeNodeVolume(u); }); +template +Graph::Graph() : + _num_nodes(0), _num_arcs(0), _total_volume(0), _max_degree(0), _indices(), _arcs(), + _node_volumes(), _tmp_graph_buffer(nullptr) +{ +} - auto aggregate_volume = [&](const tbb::blocked_range& r, ArcWeight partial_volume) -> ArcWeight { - for (NodeID u = r.begin(); u < r.end(); ++u) { - partial_volume += nodeVolume(u); - } - return partial_volume; - }; - auto r = tbb::blocked_range(0U, numNodes(), 1000); - _total_volume = tbb::parallel_deterministic_reduce(r, 0.0, aggregate_volume, std::plus<>()); +/*! + * Constructs a graph from a given hypergraph. + */ +template +template +void Graph::construct(const Hypergraph &hypergraph, const bool is_graph, + const F &edge_weight_func) +{ + if(is_graph) + { + ASSERT(hypergraph.maxEdgeSize() == 2); + _num_nodes = hypergraph.initialNumNodes(); + _num_arcs = hypergraph.initialNumPins(); + constructGraph(hypergraph, edge_weight_func); + } + else + { + _num_nodes = hypergraph.initialNumNodes() + hypergraph.initialNumEdges(); + _num_arcs = 2 * hypergraph.initialNumPins(); + constructBipartiteGraph(hypergraph, edge_weight_func); } - template - template - void Graph::constructBipartiteGraph(const Hypergraph& hypergraph, - F& edge_weight_func) { - _indices.resize("Preprocessing", "indices", _num_nodes + 1); - _arcs.resize("Preprocessing", "arcs", _num_arcs); - _node_volumes.resize("Preprocessing", "node_volumes", _num_nodes); - - // Initialize data structure - const HypernodeID num_hypernodes = hypergraph.initialNumNodes(); - const HypernodeID num_hyperedges = hypergraph.initialNumEdges(); - tbb::parallel_invoke([&] { - tbb::parallel_for(ID(0), num_hypernodes, [&](const HypernodeID u) { - ASSERT(u + 1 < _indices.size()); - _indices[u + 1] = hypergraph.nodeDegree(u); - }); - }, [&] { - tbb::parallel_for(num_hypernodes, num_hypernodes + num_hyperedges, [&](const HyperedgeID u) { - ASSERT(u + 1 < _indices.size()); - const HyperedgeID he = u - num_hypernodes; - _indices[u + 1] = hypergraph.edgeSize(he); - }); - }); - - parallel::TBBPrefixSum indices_prefix_sum(_indices); - tbb::parallel_scan(tbb::blocked_range(UL(0), _indices.size()), indices_prefix_sum); - - tbb::enumerable_thread_specific local_max_degree(0); - tbb::parallel_invoke([&] { - tbb::parallel_for(ID(0), num_hypernodes, [&](const HypernodeID u) { - ASSERT(u + 1 < _indices.size()); - size_t pos = _indices[u]; - const HypernodeID hn = u; - const HyperedgeID node_degree = hypergraph.nodeDegree(hn); - local_max_degree.local() = std::max( - local_max_degree.local(), static_cast(node_degree)); - for ( const HyperedgeID& he : hypergraph.incidentEdges(hn) ) { - const NodeID v = he + num_hypernodes; - const HyperedgeWeight edge_weight = hypergraph.edgeWeight(he); - const HypernodeID edge_size = hypergraph.edgeSize(he); - ASSERT(pos < _indices[u + 1]); - _arcs[pos++] = Arc(v, edge_weight_func(edge_weight, edge_size, node_degree)); - } - }); - }, [&] { - tbb::parallel_for(num_hypernodes, num_hypernodes + num_hyperedges, [&](const HyperedgeID u) { - ASSERT(u + 1 < _indices.size()); - size_t pos = _indices[u]; - const HyperedgeID he = u - num_hypernodes; - const HyperedgeWeight edge_weight = hypergraph.edgeWeight(he); - const HypernodeID edge_size = hypergraph.edgeSize(he); - local_max_degree.local() = std::max( - local_max_degree.local(), static_cast(edge_size)); - for ( const HypernodeID& pin : hypergraph.pins(he) ) { - const NodeID v = pin; - const HyperedgeID node_degree = hypergraph.nodeDegree(pin); - ASSERT(pos < _indices[u + 1]); - _arcs[pos++] = Arc(v, edge_weight_func(edge_weight, edge_size, node_degree)); - } + // deterministic reduce of node volumes since double addition is not commutative or + // associative node volumes are computed in for loop because deterministic reduce does + // not have dynamic load balancing whereas for loop does. this important since each node + // incurs O(degree) time + tbb::parallel_for(0U, NodeID(numNodes()), [&](NodeID u) { computeNodeVolume(u); }); + + auto aggregate_volume = [&](const tbb::blocked_range &r, + ArcWeight partial_volume) -> ArcWeight { + for(NodeID u = r.begin(); u < r.end(); ++u) + { + partial_volume += nodeVolume(u); + } + return partial_volume; + }; + auto r = tbb::blocked_range(0U, numNodes(), 1000); + _total_volume = + tbb::parallel_deterministic_reduce(r, 0.0, aggregate_volume, std::plus<>()); +} + +template +template +void Graph::constructBipartiteGraph(const Hypergraph &hypergraph, + F &edge_weight_func) +{ + _indices.resize("Preprocessing", "indices", _num_nodes + 1); + _arcs.resize("Preprocessing", "arcs", _num_arcs); + _node_volumes.resize("Preprocessing", "node_volumes", _num_nodes); + + // Initialize data structure + const HypernodeID num_hypernodes = hypergraph.initialNumNodes(); + const HypernodeID num_hyperedges = hypergraph.initialNumEdges(); + tbb::parallel_invoke( + [&] { + tbb::parallel_for(ID(0), num_hypernodes, [&](const HypernodeID u) { + ASSERT(u + 1 < _indices.size()); + _indices[u + 1] = hypergraph.nodeDegree(u); + }); + }, + [&] { + tbb::parallel_for(num_hypernodes, num_hypernodes + num_hyperedges, + [&](const HyperedgeID u) { + ASSERT(u + 1 < _indices.size()); + const HyperedgeID he = u - num_hypernodes; + _indices[u + 1] = hypergraph.edgeSize(he); + }); }); - }); - _max_degree = local_max_degree.combine([&](const size_t& lhs, const size_t& rhs) { - return std::max(lhs, rhs); - }); - } - template - template - void Graph::constructGraph(const Hypergraph& hypergraph, const F& edge_weight_func) { - _indices.resize("Preprocessing", "indices", _num_nodes + 1); - _arcs.resize("Preprocessing", "arcs", _num_arcs); - _node_volumes.resize("Preprocessing", "node_volumes", _num_nodes); - - // Initialize data structure - const HypernodeID num_hypernodes = hypergraph.initialNumNodes(); - tbb::parallel_for(ID(0), num_hypernodes, [&](const HypernodeID u) { - ASSERT(u + 1 < _indices.size()); - _indices[u + 1] = hypergraph.nodeDegree(u); - }); - - parallel::TBBPrefixSum indices_prefix_sum(_indices); - tbb::parallel_scan(tbb::blocked_range(UL(0), num_hypernodes + 1), indices_prefix_sum); - - tbb::enumerable_thread_specific local_max_degree(0); - tbb::parallel_for(ID(0), num_hypernodes, [&](const HypernodeID u) { - ASSERT(u + 1 < _indices.size()); - size_t pos = _indices[u]; - const HypernodeID hn = u; - const HyperedgeID node_degree = hypergraph.nodeDegree(hn); - local_max_degree.local() = std::max( - local_max_degree.local(), static_cast(node_degree)); - for ( const HyperedgeID& he : hypergraph.incidentEdges(hn) ) { - const HyperedgeWeight edge_weight = hypergraph.edgeWeight(he); - NodeID v = std::numeric_limits::max(); - for ( const HypernodeID& pin : hypergraph.pins(he) ) { - if ( pin != hn ) { - v = pin; - break; + parallel::TBBPrefixSum indices_prefix_sum(_indices); + tbb::parallel_scan(tbb::blocked_range(UL(0), _indices.size()), + indices_prefix_sum); + + tbb::enumerable_thread_specific local_max_degree(0); + tbb::parallel_invoke( + [&] { + tbb::parallel_for(ID(0), num_hypernodes, [&](const HypernodeID u) { + ASSERT(u + 1 < _indices.size()); + size_t pos = _indices[u]; + const HypernodeID hn = u; + const HyperedgeID node_degree = hypergraph.nodeDegree(hn); + local_max_degree.local() = + std::max(local_max_degree.local(), static_cast(node_degree)); + for(const HyperedgeID &he : hypergraph.incidentEdges(hn)) + { + const NodeID v = he + num_hypernodes; + const HyperedgeWeight edge_weight = hypergraph.edgeWeight(he); + const HypernodeID edge_size = hypergraph.edgeSize(he); + ASSERT(pos < _indices[u + 1]); + _arcs[pos++] = Arc(v, edge_weight_func(edge_weight, edge_size, node_degree)); } + }); + }, + [&] { + tbb::parallel_for( + num_hypernodes, num_hypernodes + num_hyperedges, [&](const HyperedgeID u) { + ASSERT(u + 1 < _indices.size()); + size_t pos = _indices[u]; + const HyperedgeID he = u - num_hypernodes; + const HyperedgeWeight edge_weight = hypergraph.edgeWeight(he); + const HypernodeID edge_size = hypergraph.edgeSize(he); + local_max_degree.local() = + std::max(local_max_degree.local(), static_cast(edge_size)); + for(const HypernodeID &pin : hypergraph.pins(he)) + { + const NodeID v = pin; + const HyperedgeID node_degree = hypergraph.nodeDegree(pin); + ASSERT(pos < _indices[u + 1]); + _arcs[pos++] = + Arc(v, edge_weight_func(edge_weight, edge_size, node_degree)); + } + }); + }); + _max_degree = local_max_degree.combine( + [&](const size_t &lhs, const size_t &rhs) { return std::max(lhs, rhs); }); +} + +template +template +void Graph::constructGraph(const Hypergraph &hypergraph, + const F &edge_weight_func) +{ + _indices.resize("Preprocessing", "indices", _num_nodes + 1); + _arcs.resize("Preprocessing", "arcs", _num_arcs); + _node_volumes.resize("Preprocessing", "node_volumes", _num_nodes); + + // Initialize data structure + const HypernodeID num_hypernodes = hypergraph.initialNumNodes(); + tbb::parallel_for(ID(0), num_hypernodes, [&](const HypernodeID u) { + ASSERT(u + 1 < _indices.size()); + _indices[u + 1] = hypergraph.nodeDegree(u); + }); + + parallel::TBBPrefixSum indices_prefix_sum(_indices); + tbb::parallel_scan(tbb::blocked_range(UL(0), num_hypernodes + 1), + indices_prefix_sum); + + tbb::enumerable_thread_specific local_max_degree(0); + tbb::parallel_for(ID(0), num_hypernodes, [&](const HypernodeID u) { + ASSERT(u + 1 < _indices.size()); + size_t pos = _indices[u]; + const HypernodeID hn = u; + const HyperedgeID node_degree = hypergraph.nodeDegree(hn); + local_max_degree.local() = + std::max(local_max_degree.local(), static_cast(node_degree)); + for(const HyperedgeID &he : hypergraph.incidentEdges(hn)) + { + const HyperedgeWeight edge_weight = hypergraph.edgeWeight(he); + NodeID v = std::numeric_limits::max(); + for(const HypernodeID &pin : hypergraph.pins(he)) + { + if(pin != hn) + { + v = pin; + break; } - ASSERT(v != std::numeric_limits::max()); - ASSERT(pos < _indices[u + 1]); - _arcs[pos++] = Arc(v, edge_weight_func(edge_weight, ID(2), node_degree)); } - }); - _max_degree = local_max_degree.combine([&](const size_t& lhs, const size_t& rhs) { - return std::max(lhs, rhs); - }); - } - - template - bool Graph::canBeUsed(const bool verbose) const { - const bool result = _indices.size() >= numNodes() + 1 && _arcs.size() >= numArcs() && _node_volumes.size() >= numNodes(); - if (verbose && !result) { - LOG << "Some of the graph's members were stolen. For example the contract function does this. " - "Make sure you're calling functions with a fresh graph or catch this condition and reinitialize." - "If you do reinitialize, feel free to silence this warning by passing false to the canBeUsed function"; + ASSERT(v != std::numeric_limits::max()); + ASSERT(pos < _indices[u + 1]); + _arcs[pos++] = Arc(v, edge_weight_func(edge_weight, ID(2), node_degree)); } - return result; + }); + _max_degree = local_max_degree.combine( + [&](const size_t &lhs, const size_t &rhs) { return std::max(lhs, rhs); }); +} + +template +bool Graph::canBeUsed(const bool verbose) const +{ + const bool result = _indices.size() >= numNodes() + 1 && _arcs.size() >= numArcs() && + _node_volumes.size() >= numNodes(); + if(verbose && !result) + { + LOG << "Some of the graph's members were stolen. For example the contract function " + "does this. " + "Make sure you're calling functions with a fresh graph or catch this " + "condition and reinitialize." + "If you do reinitialize, feel free to silence this warning by passing false " + "to the canBeUsed function"; } + return result; +} - INSTANTIATE_CLASS_WITH_HYPERGRAPHS(Graph) +INSTANTIATE_CLASS_WITH_HYPERGRAPHS(Graph) } // namespace mt_kahypar::ds \ No newline at end of file diff --git a/mt-kahypar/datastructures/graph.h b/mt-kahypar/datastructures/graph.h index 3a9c07557..d99a792e4 100644 --- a/mt-kahypar/datastructures/graph.h +++ b/mt-kahypar/datastructures/graph.h @@ -27,106 +27,104 @@ #pragma once -#include -#include #include +#include +#include #include "mt-kahypar/datastructures/array.h" #include "mt-kahypar/datastructures/hypergraph_common.h" #include "mt-kahypar/partition/context_enum_classes.h" #include "mt-kahypar/utils/range.h" - namespace mt_kahypar { namespace ds { /*! * CSR Graph Data Structure */ -template -class Graph { +template +class Graph +{ static constexpr bool debug = false; static constexpr bool enable_heavy_assert = false; - struct TmpGraphBuffer { - explicit TmpGraphBuffer(const size_t num_nodes, - const size_t num_arcs) : - tmp_indices("Preprocessing", "tmp_indices", num_nodes + 1), - tmp_pos("Preprocessing", "tmp_pos", num_nodes), - tmp_node_volumes("Preprocessing", "tmp_node_volumes", num_nodes), - tmp_arcs("Preprocessing", "tmp_arcs", num_arcs), - valid_arcs("Preprocessing", "valid_arcs", num_arcs) { } - - ds::Array> tmp_indices; - ds::Array> tmp_pos; - ds::Array> tmp_node_volumes; + struct TmpGraphBuffer + { + explicit TmpGraphBuffer(const size_t num_nodes, const size_t num_arcs) : + tmp_indices("Preprocessing", "tmp_indices", num_nodes + 1), + tmp_pos("Preprocessing", "tmp_pos", num_nodes), + tmp_node_volumes("Preprocessing", "tmp_node_volumes", num_nodes), + tmp_arcs("Preprocessing", "tmp_arcs", num_arcs), + valid_arcs("Preprocessing", "valid_arcs", num_arcs) + { + } + + ds::Array > tmp_indices; + ds::Array > tmp_pos; + ds::Array > tmp_node_volumes; ds::Array tmp_arcs; ds::Array valid_arcs; }; - public: +public: using AdjacenceIterator = typename ds::Array::const_iterator; - public: - Graph(Hypergraph& hypergraph, const LouvainEdgeWeight edge_weight_type, bool is_graph = false); - Graph(Graph&& other); - Graph& operator= (Graph&& other); +public: + Graph(Hypergraph &hypergraph, const LouvainEdgeWeight edge_weight_type, + bool is_graph = false); + Graph(Graph &&other); + Graph &operator=(Graph &&other); ~Graph(); // ! Number of nodes in the graph - size_t numNodes() const { - return _num_nodes; - } + size_t numNodes() const { return _num_nodes; } // ! Number of arcs in the graph - size_t numArcs() const { - return _num_arcs; - } + size_t numArcs() const { return _num_arcs; } // ! Iterator over all nodes of the graph - auto nodes() const { - return boost::irange(0, static_cast(numNodes())); - } + auto nodes() const { return boost::irange(0, static_cast(numNodes())); } // ! Iterator over all adjacent vertices of u // ! If 'n' is set, then only an iterator over the first n elements is returned - IteratorRange arcsOf(const NodeID u, - const size_t n = std::numeric_limits::max()) const { + IteratorRange + arcsOf(const NodeID u, const size_t n = std::numeric_limits::max()) const + { ASSERT(u < _num_nodes); const size_t start = _indices[u]; size_t end = _indices[u + 1]; - if ( n < ( end - start ) ) { + if(n < (end - start)) + { end = start + n; } - return IteratorRange( - _arcs.cbegin() + start, _arcs.cbegin() + end); + return IteratorRange(_arcs.cbegin() + start, _arcs.cbegin() + end); } // ! Degree of vertex u - size_t degree(const NodeID u) const { + size_t degree(const NodeID u) const + { ASSERT(u < _num_nodes); return _indices[u + 1] - _indices[u]; } // ! Maximum degree of a vertex - size_t max_degree() const { - return _max_degree; - } + size_t max_degree() const { return _max_degree; } // ! Total Volume of the graph - ArcWeight totalVolume() const { - return _total_volume; - } + ArcWeight totalVolume() const { return _total_volume; } // ! Node volume of vertex u - ArcWeight nodeVolume(const NodeID u) const { + ArcWeight nodeVolume(const NodeID u) const + { ASSERT(u < _num_nodes); return _node_volumes[u]; } - // ! Projects the clustering of the (likely bipartite star-expansion) graph to the hypergraph - void restrictClusteringToHypernodes(const Hypergraph& hg, ds::Clustering& C) const { + // ! Projects the clustering of the (likely bipartite star-expansion) graph to the + // hypergraph + void restrictClusteringToHypernodes(const Hypergraph &hg, ds::Clustering &C) const + { C.resize(hg.initialNumNodes()); } @@ -140,34 +138,37 @@ class Graph { * coarse graph. Finally, the weights of each multiedge in that temporary graph * are aggregated and the result is written to the final contracted graph. */ - Graph contract(Clustering& communities, bool low_memory); + Graph contract(Clustering &communities, bool low_memory); - Graph contract_low_memory(Clustering& communities); + Graph contract_low_memory(Clustering &communities); - void allocateContractionBuffers() { + void allocateContractionBuffers() + { _tmp_graph_buffer = new TmpGraphBuffer(_num_nodes, _num_arcs); } - private: +private: Graph(); /*! * Constructs a graph from a given hypergraph. */ - template - void construct(const Hypergraph& hypergraph, const bool is_graph, const F& edge_weight_func); + template + void construct(const Hypergraph &hypergraph, const bool is_graph, + const F &edge_weight_func); - template - void constructBipartiteGraph(const Hypergraph& hypergraph, F& edge_weight_func); + template + void constructBipartiteGraph(const Hypergraph &hypergraph, F &edge_weight_func); - template - void constructGraph(const Hypergraph& hypergraph, - const F& edge_weight_func); + template + void constructGraph(const Hypergraph &hypergraph, const F &edge_weight_func); - ArcWeight computeNodeVolume(const NodeID u) { + ArcWeight computeNodeVolume(const NodeID u) + { ASSERT(u < _num_nodes); ArcWeight x = 0.0; - for (const Arc& arc : arcsOf(u)) { + for(const Arc &arc : arcsOf(u)) + { x += arc.weight; } _node_volumes[u] = x; @@ -191,13 +192,13 @@ class Graph { ds::Array _node_volumes; // ! Data that is reused throughout the louvain method // ! to construct and contract a graph and to prevent expensive allocations - TmpGraphBuffer* _tmp_graph_buffer; + TmpGraphBuffer *_tmp_graph_buffer; }; -} // namespace ds +} // namespace ds // expose -template +template using Graph = ds::Graph; -} // namespace mt_kahypar +} // namespace mt_kahypar diff --git a/mt-kahypar/datastructures/hypergraph_common.h b/mt-kahypar/datastructures/hypergraph_common.h index ee7720433..9a89a741e 100644 --- a/mt-kahypar/datastructures/hypergraph_common.h +++ b/mt-kahypar/datastructures/hypergraph_common.h @@ -32,27 +32,29 @@ #include "include/libmtkahypartypes.h" -#include "mt-kahypar/parallel/stl/scalable_vector.h" +#include "mt-kahypar/datastructures/array.h" +#include "mt-kahypar/parallel/atomic_wrapper.h" #include "mt-kahypar/parallel/hardware_topology.h" +#include "mt-kahypar/parallel/stl/scalable_vector.h" #include "mt-kahypar/parallel/tbb_initializer.h" -#include "mt-kahypar/parallel/atomic_wrapper.h" -#include "mt-kahypar/datastructures/array.h" namespace mt_kahypar { using HardwareTopology = mt_kahypar::parallel::HardwareTopology<>; using TBBInitializer = mt_kahypar::parallel::TBBInitializer; -#define UI64(X) static_cast(X) +#define UI64(X) static_cast (X) -struct parallel_tag_t { }; +struct parallel_tag_t +{ +}; using RatingType = double; #if KAHYPAR_USE_64_BIT_IDS -#define ID(X) static_cast(X) +#define ID(X) static_cast (X) using HypernodeID = uint64_t; using HyperedgeID = uint64_t; #else -#define ID(X) static_cast(X) +#define ID(X) static_cast (X) using HypernodeID = uint32_t; using HyperedgeID = uint32_t; #endif @@ -65,55 +67,51 @@ using Gain = HyperedgeWeight; using NodeID = uint32_t; using ArcWeight = double; -struct Arc { +struct Arc +{ NodeID head; ArcWeight weight; - Arc() : - head(0), - weight(0) { } + Arc () : head (0), weight (0) {} - Arc(NodeID head, ArcWeight weight) : - head(head), - weight(weight) { } + Arc (NodeID head, ArcWeight weight) : head (head), weight (weight) {} }; // Constant Declarations static constexpr PartitionID kInvalidPartition = -1; -static constexpr HypernodeID kInvalidHypernode = std::numeric_limits::max(); -static constexpr HypernodeID kInvalidHyperedge = std::numeric_limits::max(); -static constexpr Gain kInvalidGain = std::numeric_limits::min(); +static constexpr HypernodeID kInvalidHypernode = std::numeric_limits::max (); +static constexpr HypernodeID kInvalidHyperedge = std::numeric_limits::max (); +static constexpr Gain kInvalidGain = std::numeric_limits::min (); static constexpr size_t kEdgeHashSeed = 42; -static constexpr HypernodeID invalidNode = std::numeric_limits::max(); -static constexpr Gain invalidGain = std::numeric_limits::min(); +static constexpr HypernodeID invalidNode = std::numeric_limits::max (); +static constexpr Gain invalidGain = std::numeric_limits::min (); namespace ds { - using Clustering = vec; +using Clustering = vec; } -struct Move { +struct Move +{ PartitionID from = kInvalidPartition; PartitionID to = kInvalidPartition; HypernodeID node = invalidNode; Gain gain = invalidGain; - bool isValid() const { - return from != kInvalidPartition; - } + bool isValid () const { return from != kInvalidPartition; } - void invalidate() { - from = kInvalidPartition; - } + void invalidate () { from = kInvalidPartition; } }; -struct Memento { +struct Memento +{ HypernodeID u; // representative HypernodeID v; // contraction partner }; -template -struct ExtractedHypergraph { +template +struct ExtractedHypergraph +{ Hypergraph hg; vec hn_mapping; vec already_cut; @@ -139,7 +137,8 @@ class ConnectivityInfo; class SparseConnectivityInfo; } -struct SynchronizedEdgeUpdate { +struct SynchronizedEdgeUpdate +{ HyperedgeID he = kInvalidHyperedge; PartitionID from = kInvalidPartition; PartitionID to = kInvalidPartition; @@ -148,50 +147,57 @@ struct SynchronizedEdgeUpdate { HypernodeID pin_count_in_from_part_after = kInvalidHypernode; HypernodeID pin_count_in_to_part_after = kInvalidHypernode; PartitionID block_of_other_node = kInvalidPartition; - mutable ds::Bitset* connectivity_set_after = nullptr; - mutable ds::PinCountSnapshot* pin_counts_after = nullptr; - const TargetGraph* target_graph = nullptr; - ds::Array* edge_locks = nullptr; + mutable ds::Bitset *connectivity_set_after = nullptr; + mutable ds::PinCountSnapshot *pin_counts_after = nullptr; + const TargetGraph *target_graph = nullptr; + ds::Array *edge_locks = nullptr; }; -struct NoOpDeltaFunc { - void operator() (const SynchronizedEdgeUpdate&) { } +struct NoOpDeltaFunc +{ + void operator() (const SynchronizedEdgeUpdate &) {} }; -template -struct PartitionedHypergraphType { +template +struct PartitionedHypergraphType +{ static constexpr mt_kahypar_partition_type_t TYPE = NULLPTR_PARTITION; }; -template<> -struct PartitionedHypergraphType { +template <> +struct PartitionedHypergraphType +{ static constexpr mt_kahypar_partition_type_t TYPE = MULTILEVEL_HYPERGRAPH_PARTITIONING; }; -template<> -struct PartitionedHypergraphType { +template <> +struct PartitionedHypergraphType +{ static constexpr mt_kahypar_partition_type_t TYPE = LARGE_K_PARTITIONING; }; -template<> -struct PartitionedHypergraphType { +template <> +struct PartitionedHypergraphType +{ static constexpr mt_kahypar_partition_type_t TYPE = N_LEVEL_HYPERGRAPH_PARTITIONING; }; -template -struct PartitionedGraphType { +template +struct PartitionedGraphType +{ static constexpr mt_kahypar_partition_type_t TYPE = NULLPTR_PARTITION; }; -template<> -struct PartitionedGraphType { +template <> +struct PartitionedGraphType +{ static constexpr mt_kahypar_partition_type_t TYPE = MULTILEVEL_GRAPH_PARTITIONING; }; -template<> -struct PartitionedGraphType { +template <> +struct PartitionedGraphType +{ static constexpr mt_kahypar_partition_type_t TYPE = N_LEVEL_GRAPH_PARTITIONING; }; - } // namespace mt_kahypar diff --git a/mt-kahypar/datastructures/incident_net_array.cpp b/mt-kahypar/datastructures/incident_net_array.cpp index 5c2351a1e..f2adfb554 100644 --- a/mt-kahypar/datastructures/incident_net_array.cpp +++ b/mt-kahypar/datastructures/incident_net_array.cpp @@ -33,33 +33,37 @@ namespace mt_kahypar { namespace ds { IncidentNetIterator::IncidentNetIterator(const HypernodeID u, - const IncidentNetArray* incident_net_array, - const size_t pos, - const bool end) : - _u(u), - _current_u(u), - _current_size(incident_net_array->header(u)->size), - _current_pos(0), - _incident_net_array(incident_net_array), - _end(end) { - if ( end ) { + const IncidentNetArray *incident_net_array, + const size_t pos, const bool end) : + _u(u), + _current_u(u), _current_size(incident_net_array->header(u)->size), _current_pos(0), + _incident_net_array(incident_net_array), _end(end) +{ + if(end) + { _current_pos = _current_size; } - if ( !end && _current_pos == _current_size ) { + if(!end && _current_pos == _current_size) + { next_iterator(); } - if ( pos > 0 ) { + if(pos > 0) + { ASSERT(pos <= incident_net_array->nodeDegree(u)); size_t c_pos = pos; - while ( c_pos != 0 ) { + while(c_pos != 0) + { const size_t current_size = _current_size; - if ( c_pos >= current_size ) { + if(c_pos >= current_size) + { c_pos -= current_size; _current_pos = current_size; next_iterator(); - } else { + } + else + { _current_pos = c_pos; c_pos = 0; } @@ -67,30 +71,37 @@ IncidentNetIterator::IncidentNetIterator(const HypernodeID u, } } -HyperedgeID IncidentNetIterator::operator* () const { +HyperedgeID IncidentNetIterator::operator*() const +{ ASSERT(!_end); return _incident_net_array->firstEntry(_current_u)[_current_pos].e; } -IncidentNetIterator & IncidentNetIterator::operator++ () { +IncidentNetIterator &IncidentNetIterator::operator++() +{ ASSERT(!_end); ++_current_pos; - if ( _current_pos == _current_size) { + if(_current_pos == _current_size) + { next_iterator(); } return *this; } -bool IncidentNetIterator::operator!= (const IncidentNetIterator& rhs) { +bool IncidentNetIterator::operator!=(const IncidentNetIterator &rhs) +{ return _u != rhs._u || _end != rhs._end; } -bool IncidentNetIterator::operator== (const IncidentNetIterator& rhs) { +bool IncidentNetIterator::operator==(const IncidentNetIterator &rhs) +{ return _u == rhs._u && _end == rhs._end; } -void IncidentNetIterator::next_iterator() { - while ( _current_pos == _current_size ) { +void IncidentNetIterator::next_iterator() +{ + while(_current_pos == _current_size) + { const HypernodeID last_u = _current_u; _current_u = _incident_net_array->header(_current_u)->it_next; _current_pos = 0; @@ -100,8 +111,8 @@ void IncidentNetIterator::next_iterator() { // changes. Therefore, we set the end flag if we reach the current // head of the list or it_next is equal with the current vertex (means // that list becomes empty due to a contraction) - if ( _incident_net_array->header(_current_u)->is_head || - last_u == _current_u ) { + if(_incident_net_array->header(_current_u)->is_head || last_u == _current_u) + { _end = true; break; } @@ -112,11 +123,11 @@ void IncidentNetIterator::next_iterator() { // ! v the contraction partner of the contraction. The contraction involves to remove // ! all incident nets shared between u and v from the incident net list of v and append // ! the list of v to u. -void IncidentNetArray::contract(const HypernodeID u, - const HypernodeID v, - const kahypar::ds::FastResetFlagArray<>& shared_hes_of_u_and_v, - const AcquireLockFunc& acquire_lock, - const ReleaseLockFunc& release_lock) { +void IncidentNetArray::contract( + const HypernodeID u, const HypernodeID v, + const kahypar::ds::FastResetFlagArray<> &shared_hes_of_u_and_v, + const AcquireLockFunc &acquire_lock, const ReleaseLockFunc &release_lock) +{ // Remove all HEs flagged in shared_hes_of_u_and_v from v removeIncidentNets(v, shared_hes_of_u_and_v); @@ -124,41 +135,44 @@ void IncidentNetArray::contract(const HypernodeID u, // Concatenate double-linked list of u and v append(u, v); header(u)->degree += header(v)->degree; - ASSERT(verifyIteratorPointers(u), "Iterator pointers of vertex" << u << "are corrupted"); + ASSERT(verifyIteratorPointers(u), + "Iterator pointers of vertex" << u << "are corrupted"); release_lock(u); } // ! Uncontract two previously contracted vertices u and v. -// ! Uncontraction involves to decrement the version number of all incident lists contained -// ! in v and restore all incident nets with a version number equal to the new version. -// ! Note, uncontraction must be done in relative contraction order -void IncidentNetArray::uncontract(const HypernodeID u, - const HypernodeID v, - const AcquireLockFunc& acquire_lock, - const ReleaseLockFunc& release_lock) { - uncontract(u, v, [](HyperedgeID) {}, [](HyperedgeID) {}, acquire_lock, release_lock); +// ! Uncontraction involves to decrement the version number of all incident lists +// contained ! in v and restore all incident nets with a version number equal to the new +// version. ! Note, uncontraction must be done in relative contraction order +void IncidentNetArray::uncontract(const HypernodeID u, const HypernodeID v, + const AcquireLockFunc &acquire_lock, + const ReleaseLockFunc &release_lock) +{ + uncontract( + u, v, [](HyperedgeID) {}, [](HyperedgeID) {}, acquire_lock, release_lock); } // ! Uncontract two previously contracted vertices u and v. -// ! Uncontraction involves to decrement the version number of all incident lists contained -// ! in v and restore all incident nets with a version number equal to the new version. -// ! Additionally it calls case_one_func for a hyperedge he, if u and v were previously both -// ! adjacent to he and case_two_func if only v was previously adjacent to he. -// ! Note, uncontraction must be done in relative contraction order -void IncidentNetArray::uncontract(const HypernodeID u, - const HypernodeID v, - const CaseOneFunc& case_one_func, - const CaseTwoFunc& case_two_func, - const AcquireLockFunc& acquire_lock, - const ReleaseLockFunc& release_lock) { +// ! Uncontraction involves to decrement the version number of all incident lists +// contained ! in v and restore all incident nets with a version number equal to the new +// version. ! Additionally it calls case_one_func for a hyperedge he, if u and v were +// previously both ! adjacent to he and case_two_func if only v was previously adjacent to +// he. ! Note, uncontraction must be done in relative contraction order +void IncidentNetArray::uncontract(const HypernodeID u, const HypernodeID v, + const CaseOneFunc &case_one_func, + const CaseTwoFunc &case_two_func, + const AcquireLockFunc &acquire_lock, + const ReleaseLockFunc &release_lock) +{ ASSERT(header(v)->prev != v); - Header* head_v = header(v); + Header *head_v = header(v); acquire_lock(u); // Restores the incident list of v to the time before it was appended // to the double-linked list of u. splice(u, v); header(u)->degree -= head_v->degree; - ASSERT(verifyIteratorPointers(u), "Iterator pointers of vertex" << u << "are corrupted"); + ASSERT(verifyIteratorPointers(u), + "Iterator pointers of vertex" << u << "are corrupted"); release_lock(u); // Restore all incident nets of v removed by the contraction of u and v @@ -166,35 +180,45 @@ void IncidentNetArray::uncontract(const HypernodeID u, } // ! Removes all incidents nets of u flagged in hes_to_remove. -void IncidentNetArray::removeIncidentNets(const HypernodeID u, - const kahypar::ds::FastResetFlagArray<>& hes_to_remove) { +void IncidentNetArray::removeIncidentNets( + const HypernodeID u, const kahypar::ds::FastResetFlagArray<> &hes_to_remove) +{ HypernodeID current_u = u; - Header* head_u = header(u); - do { - Header* head = header(current_u); + Header *head_u = header(u); + do + { + Header *head = header(current_u); const HypernodeID new_version = ++head->current_version; - Entry* last_entry = lastEntry(current_u); - for ( Entry* current_entry = firstEntry(current_u); current_entry != last_entry; ++current_entry ) { - if ( hes_to_remove[current_entry->e] ) { + Entry *last_entry = lastEntry(current_u); + for(Entry *current_entry = firstEntry(current_u); current_entry != last_entry; + ++current_entry) + { + if(hes_to_remove[current_entry->e]) + { // Hyperedge should be removed => decrement size of incident net list swap(current_entry--, --last_entry); ASSERT(head->size > 0); --head->size; --head_u->degree; - } else { - // Vertex is non-shared between u and v => adapt version number of current incident net + } + else + { + // Vertex is non-shared between u and v => adapt version number of current + // incident net current_entry->version = new_version; } } - if ( head->size == 0 && current_u != u ) { + if(head->size == 0 && current_u != u) + { // Current list becomes empty => remove it from the iterator double linked list // such that iteration over the incident nets is more efficient removeEmptyIncidentNetList(current_u); } current_u = head->next; - } while ( current_u != u ); - ASSERT(verifyIteratorPointers(u), "Iterator pointers of vertex" << u << "are corrupted"); + } while(current_u != u); + ASSERT(verifyIteratorPointers(u), + "Iterator pointers of vertex" << u << "are corrupted"); } // ! Restores all previously removed incident nets @@ -202,8 +226,10 @@ void IncidentNetArray::removeIncidentNets(const HypernodeID u, // ! removeIncidentNets(...) and all uncontraction that happens // ! between two consecutive calls to removeIncidentNets(...) must // ! be processed. -void IncidentNetArray::restoreIncidentNets(const HypernodeID u) { - restoreIncidentNets(u, [](HyperedgeID) {}, [](HyperedgeID) {}); +void IncidentNetArray::restoreIncidentNets(const HypernodeID u) +{ + restoreIncidentNets( + u, [](HyperedgeID) {}, [](HyperedgeID) {}); } // ! Restores all previously removed incident nets @@ -212,97 +238,114 @@ void IncidentNetArray::restoreIncidentNets(const HypernodeID u) { // ! between two consecutive calls to removeIncidentNets(...) must // ! be processed. void IncidentNetArray::restoreIncidentNets(const HypernodeID u, - const CaseOneFunc& case_one_func, - const CaseTwoFunc& case_two_func) { - Header* head_u = header(u); + const CaseOneFunc &case_one_func, + const CaseTwoFunc &case_two_func) +{ + Header *head_u = header(u); HypernodeID current_u = u; HypernodeID last_non_empty_entry = kInvalidHypernode; - do { - Header* head = header(current_u); + do + { + Header *head = header(current_u); ASSERT(head->current_version > 0); const HypernodeID new_version = --head->current_version; // Iterate over all active entries and call case_two_func // => After an uncontraction only u was part of them not its representative - for ( Entry* current_entry = firstEntry(current_u); - current_entry != lastEntry(current_u); - ++current_entry ) { + for(Entry *current_entry = firstEntry(current_u); + current_entry != lastEntry(current_u); ++current_entry) + { case_two_func(current_entry->e); } // Iterate over non-active entries (and activate them) until the version number // is not equal to the new version of the list - const Entry* last_entry = reinterpret_cast(header(current_u + 1)); - for ( Entry* current_entry = lastEntry(current_u); - current_entry != last_entry; - ++current_entry ) { - if ( current_entry->version == new_version ) { + const Entry *last_entry = reinterpret_cast(header(current_u + 1)); + for(Entry *current_entry = lastEntry(current_u); current_entry != last_entry; + ++current_entry) + { + if(current_entry->version == new_version) + { ++head->size; ++head_u->degree; case_one_func(current_entry->e); - } else { + } + else + { break; } } // Restore iterator double-linked list which only contains // non-empty incident net lists - if ( head->size > 0 || current_u == u ) { - if ( last_non_empty_entry != kInvalidHypernode && - head->it_prev != last_non_empty_entry ) { + if(head->size > 0 || current_u == u) + { + if(last_non_empty_entry != kInvalidHypernode && + head->it_prev != last_non_empty_entry) + { header(last_non_empty_entry)->it_next = current_u; head->it_prev = last_non_empty_entry; } last_non_empty_entry = current_u; } current_u = head->next; - } while ( current_u != u ); + } while(current_u != u); ASSERT(last_non_empty_entry != kInvalidHypernode); head_u->it_prev = last_non_empty_entry; header(last_non_empty_entry)->it_next = u; - ASSERT(verifyIteratorPointers(u), "Iterator pointers of vertex" << u << "are corrupted"); + ASSERT(verifyIteratorPointers(u), + "Iterator pointers of vertex" << u << "are corrupted"); } -IncidentNetArray IncidentNetArray::copy(parallel_tag_t) const { +IncidentNetArray IncidentNetArray::copy(parallel_tag_t) const +{ IncidentNetArray incident_nets; incident_nets._num_hypernodes = _num_hypernodes; incident_nets._size_in_bytes = _size_in_bytes; - tbb::parallel_invoke([&] { - incident_nets._index_array.resize(_index_array.size()); - memcpy(incident_nets._index_array.data(), _index_array.data(), - sizeof(size_t) * _index_array.size()); - }, [&] { - incident_nets._incident_net_array = parallel::make_unique(_size_in_bytes); - memcpy(incident_nets._incident_net_array.get(), _incident_net_array.get(), _size_in_bytes); - }); + tbb::parallel_invoke( + [&] { + incident_nets._index_array.resize(_index_array.size()); + memcpy(incident_nets._index_array.data(), _index_array.data(), + sizeof(size_t) * _index_array.size()); + }, + [&] { + incident_nets._incident_net_array = parallel::make_unique(_size_in_bytes); + memcpy(incident_nets._incident_net_array.get(), _incident_net_array.get(), + _size_in_bytes); + }); return incident_nets; } -IncidentNetArray IncidentNetArray::copy() const { +IncidentNetArray IncidentNetArray::copy() const +{ IncidentNetArray incident_nets; incident_nets._num_hypernodes = _num_hypernodes; incident_nets._size_in_bytes = _size_in_bytes; incident_nets._index_array.resize(_index_array.size()); memcpy(incident_nets._index_array.data(), _index_array.data(), - sizeof(size_t) * _index_array.size()); + sizeof(size_t) * _index_array.size()); incident_nets._incident_net_array = parallel::make_unique(_size_in_bytes); - memcpy(incident_nets._incident_net_array.get(), _incident_net_array.get(), _size_in_bytes); + memcpy(incident_nets._incident_net_array.get(), _incident_net_array.get(), + _size_in_bytes); return incident_nets; } -void IncidentNetArray::reset() { +void IncidentNetArray::reset() +{ tbb::parallel_for(ID(0), _num_hypernodes, [&](const HypernodeID u) { header(u)->current_version = 0; - for ( Entry* entry = firstEntry(u); entry != lastEntry(u); ++entry ) { + for(Entry *entry = firstEntry(u); entry != lastEntry(u); ++entry) + { entry->version = 0; } }); } -void IncidentNetArray::append(const HypernodeID u, const HypernodeID v) { +void IncidentNetArray::append(const HypernodeID u, const HypernodeID v) +{ const HypernodeID tail_u = header(u)->prev; const HypernodeID tail_v = header(v)->prev; header(tail_u)->next = v; @@ -319,25 +362,28 @@ void IncidentNetArray::append(const HypernodeID u, const HypernodeID v) { header(it_tail_v)->it_next = u; header(v)->is_head = false; - if ( header(v)->size == 0 ) { + if(header(v)->size == 0) + { removeEmptyIncidentNetList(v); } } -void IncidentNetArray::splice(const HypernodeID u, const HypernodeID v) { +void IncidentNetArray::splice(const HypernodeID u, const HypernodeID v) +{ // Restore the iterator double-linked list of u such that it does not contain // any incident net list of v const HypernodeID tail = header(v)->tail; HypernodeID non_empty_entry_prev_v = v; HypernodeID non_empty_entry_next_tail = tail; - while ( ( non_empty_entry_prev_v == v || - header(non_empty_entry_prev_v)->size == 0 ) && - non_empty_entry_prev_v != u ) { + while((non_empty_entry_prev_v == v || header(non_empty_entry_prev_v)->size == 0) && + non_empty_entry_prev_v != u) + { non_empty_entry_prev_v = header(non_empty_entry_prev_v)->prev; } - while ( ( non_empty_entry_next_tail == tail || - header(non_empty_entry_next_tail)->size == 0 ) && - non_empty_entry_next_tail != u ) { + while((non_empty_entry_next_tail == tail || + header(non_empty_entry_next_tail)->size == 0) && + non_empty_entry_next_tail != u) + { non_empty_entry_next_tail = header(non_empty_entry_next_tail)->next; } header(non_empty_entry_prev_v)->it_next = non_empty_entry_next_tail; @@ -353,40 +399,46 @@ void IncidentNetArray::splice(const HypernodeID u, const HypernodeID v) { header(v)->is_head = true; } -void IncidentNetArray::removeEmptyIncidentNetList(const HypernodeID u) { +void IncidentNetArray::removeEmptyIncidentNetList(const HypernodeID u) +{ ASSERT(!header(u)->is_head); ASSERT(header(u)->size == 0, V(u) << V(header(u)->size)); - Header* head = header(u); + Header *head = header(u); header(head->it_prev)->it_next = head->it_next; header(head->it_next)->it_prev = head->it_prev; head->it_next = u; head->it_prev = u; } -void IncidentNetArray::construct(const HyperedgeVector& edge_vector) { +void IncidentNetArray::construct(const HyperedgeVector &edge_vector) +{ // Accumulate degree of each vertex thread local const HyperedgeID num_hyperedges = edge_vector.size(); ThreadLocalCounter local_incident_nets_per_vertex(_num_hypernodes + 1, 0); AtomicCounter current_incident_net_pos; - tbb::parallel_invoke([&] { - tbb::parallel_for(ID(0), num_hyperedges, [&](const size_t pos) { - parallel::scalable_vector& num_incident_nets_per_vertex = - local_incident_nets_per_vertex.local(); - for ( const HypernodeID& pin : edge_vector[pos] ) { - ASSERT(pin < _num_hypernodes, V(pin) << V(_num_hypernodes)); - ++num_incident_nets_per_vertex[pin + 1]; - } - }); - }, [&] { - _index_array.assign(_num_hypernodes + 1, sizeof(Header)); - current_incident_net_pos.assign( - _num_hypernodes, parallel::IntegralAtomicWrapper(0)); - }); + tbb::parallel_invoke( + [&] { + tbb::parallel_for(ID(0), num_hyperedges, [&](const size_t pos) { + parallel::scalable_vector &num_incident_nets_per_vertex = + local_incident_nets_per_vertex.local(); + for(const HypernodeID &pin : edge_vector[pos]) + { + ASSERT(pin < _num_hypernodes, V(pin) << V(_num_hypernodes)); + ++num_incident_nets_per_vertex[pin + 1]; + } + }); + }, + [&] { + _index_array.assign(_num_hypernodes + 1, sizeof(Header)); + current_incident_net_pos.assign(_num_hypernodes, + parallel::IntegralAtomicWrapper(0)); + }); // We sum up the number of incident nets per vertex only thread local. // To obtain the global number of incident nets per vertex, we iterate // over each thread local counter and sum it up. - for ( const parallel::scalable_vector& c : local_incident_nets_per_vertex ) { + for(const parallel::scalable_vector &c : local_incident_nets_per_vertex) + { tbb::parallel_for(ID(0), _num_hypernodes + 1, [&](const size_t pos) { _index_array[pos] += c[pos] * sizeof(Entry); }); @@ -394,15 +446,16 @@ void IncidentNetArray::construct(const HyperedgeVector& edge_vector) { // Compute start positon of the incident nets of each vertex via a parallel prefix sum parallel::TBBPrefixSum incident_net_prefix_sum(_index_array); - tbb::parallel_scan(tbb::blocked_range( - UL(0), UI64(_num_hypernodes + 1)), incident_net_prefix_sum); + tbb::parallel_scan(tbb::blocked_range(UL(0), UI64(_num_hypernodes + 1)), + incident_net_prefix_sum); _size_in_bytes = incident_net_prefix_sum.total_sum(); _incident_net_array = parallel::make_unique(_size_in_bytes); // Insert incident nets into incidence array tbb::parallel_for(ID(0), num_hyperedges, [&](const HyperedgeID he) { - for ( const HypernodeID& pin : edge_vector[he] ) { - Entry* entry = firstEntry(pin) + current_incident_net_pos[pin]++; + for(const HypernodeID &pin : edge_vector[he]) + { + Entry *entry = firstEntry(pin) + current_incident_net_pos[pin]++; entry->e = he; entry->version = 0; } @@ -410,7 +463,7 @@ void IncidentNetArray::construct(const HyperedgeVector& edge_vector) { // Setup Header of each vertex tbb::parallel_for(ID(0), _num_hypernodes, [&](const HypernodeID u) { - Header* head = header(u); + Header *head = header(u); head->prev = u; head->next = u; head->it_prev = u; @@ -422,23 +475,35 @@ void IncidentNetArray::construct(const HyperedgeVector& edge_vector) { }); } -bool IncidentNetArray::verifyIteratorPointers(const HypernodeID u) const { +bool IncidentNetArray::verifyIteratorPointers(const HypernodeID u) const +{ HypernodeID current_u = u; HypernodeID last_non_empty_entry = kInvalidHypernode; - do { - if ( header(current_u)->size > 0 || current_u == u ) { - if ( last_non_empty_entry != kInvalidHypernode ) { - if ( header(current_u)->it_prev != last_non_empty_entry ) { + do + { + if(header(current_u)->size > 0 || current_u == u) + { + if(last_non_empty_entry != kInvalidHypernode) + { + if(header(current_u)->it_prev != last_non_empty_entry) + { return false; - } else if ( header(last_non_empty_entry)->it_next != current_u ) { + } + else if(header(last_non_empty_entry)->it_next != current_u) + { return false; } } last_non_empty_entry = current_u; - } else { - if ( header(current_u)->it_next != current_u ) { + } + else + { + if(header(current_u)->it_next != current_u) + { return false; - } else if ( header(current_u)->it_prev != current_u ) { + } + else if(header(current_u)->it_prev != current_u) + { return false; } } @@ -446,14 +511,17 @@ bool IncidentNetArray::verifyIteratorPointers(const HypernodeID u) const { current_u = header(current_u)->next; } while(current_u != u); - if ( header(u)->it_prev != last_non_empty_entry ) { + if(header(u)->it_prev != last_non_empty_entry) + { return false; - } else if ( header(last_non_empty_entry)->it_next != u ) { + } + else if(header(last_non_empty_entry)->it_next != u) + { return false; } return true; } -} // namespace ds -} // namespace mt_kahypar \ No newline at end of file +} // namespace ds +} // namespace mt_kahypar \ No newline at end of file diff --git a/mt-kahypar/datastructures/incident_net_array.h b/mt-kahypar/datastructures/incident_net_array.h index efefa3294..2ad6b189a 100644 --- a/mt-kahypar/datastructures/incident_net_array.h +++ b/mt-kahypar/datastructures/incident_net_array.h @@ -35,13 +35,13 @@ #include "kahypar-resources/datastructure/fast_reset_flag_array.h" -#include "mt-kahypar/macros.h" -#include "mt-kahypar/datastructures/hypergraph_common.h" #include "mt-kahypar/datastructures/array.h" -#include "mt-kahypar/parallel/stl/scalable_vector.h" -#include "mt-kahypar/parallel/stl/scalable_unique_ptr.h" +#include "mt-kahypar/datastructures/hypergraph_common.h" +#include "mt-kahypar/macros.h" #include "mt-kahypar/parallel/atomic_wrapper.h" #include "mt-kahypar/parallel/parallel_prefix_sum.h" +#include "mt-kahypar/parallel/stl/scalable_unique_ptr.h" +#include "mt-kahypar/parallel/stl/scalable_vector.h" #include "mt-kahypar/utils/range.h" namespace mt_kahypar { @@ -51,56 +51,60 @@ namespace ds { class IncidentNetArray; // Iterator over the incident nets of a vertex u -class IncidentNetIterator { - public: +class IncidentNetIterator +{ +public: using iterator_category = std::forward_iterator_tag; using value_type = HyperedgeID; - using reference = HyperedgeID&; - using pointer = const HyperedgeID*; + using reference = HyperedgeID &; + using pointer = const HyperedgeID *; using difference_type = std::ptrdiff_t; - IncidentNetIterator(const HypernodeID u, - const IncidentNetArray* incident_net_array, - const size_t pos, - const bool end); + IncidentNetIterator(const HypernodeID u, const IncidentNetArray *incident_net_array, + const size_t pos, const bool end); - HyperedgeID operator* () const; + HyperedgeID operator*() const; - IncidentNetIterator & operator++ (); + IncidentNetIterator &operator++(); - IncidentNetIterator operator++ (int) { + IncidentNetIterator operator++(int) + { IncidentNetIterator copy = *this; - operator++ (); + operator++(); return copy; } - bool operator!= (const IncidentNetIterator& rhs); + bool operator!=(const IncidentNetIterator &rhs); - bool operator== (const IncidentNetIterator& rhs); + bool operator==(const IncidentNetIterator &rhs); - private: +private: void next_iterator(); HypernodeID _u; HypernodeID _current_u; HypernodeID _current_size; size_t _current_pos; - const IncidentNetArray* _incident_net_array; + const IncidentNetArray *_incident_net_array; bool _end; }; // ! Class allows in-place contraction and uncontraction of the incident net array -class IncidentNetArray { - - using HyperedgeVector = parallel::scalable_vector>; - using ThreadLocalCounter = tbb::enumerable_thread_specific>; - using AtomicCounter = parallel::scalable_vector>; - - using AcquireLockFunc = std::function; - using ReleaseLockFunc = std::function; - using CaseOneFunc = std::function; - using CaseTwoFunc = std::function; - #define NOOP_LOCK_FUNC [] (const HypernodeID) { } +class IncidentNetArray +{ + + using HyperedgeVector = + parallel::scalable_vector >; + using ThreadLocalCounter = + tbb::enumerable_thread_specific >; + using AtomicCounter = + parallel::scalable_vector >; + + using AcquireLockFunc = std::function; + using ReleaseLockFunc = std::function; + using CaseOneFunc = std::function; + using CaseTwoFunc = std::function; +#define NOOP_LOCK_FUNC [](const HypernodeID) {} static_assert(sizeof(char) == 1); @@ -108,24 +112,21 @@ class IncidentNetArray { // A incident net is associated with a version number. Incident nets // with a version number greater or equal than the version number in // header (see Header -> current_version) are active. - struct Entry { + struct Entry + { HyperedgeID e; HypernodeID version; }; // Header of the incident net list of a vertex. The incident net lists // contracted into one vertex are concatenated in a double linked list. - struct Header { + struct Header + { explicit Header(const HypernodeID u) : - prev(u), - next(u), - it_prev(u), - it_next(u), - tail(u), - size(0), - degree(0), - current_version(0), - is_head(true) { } + prev(u), next(u), it_prev(u), it_next(u), tail(u), size(0), degree(0), + current_version(0), is_head(true) + { + } // ! Previous incident net list HypernodeID prev; @@ -149,82 +150,76 @@ class IncidentNetArray { bool is_head; }; - public: +public: using const_iterator = IncidentNetIterator; IncidentNetArray() : - _num_hypernodes(0), - _size_in_bytes(0), - _index_array(), - _incident_net_array(nullptr) { } - - IncidentNetArray(const HypernodeID num_hypernodes, - const HyperedgeVector& edge_vector) : - _num_hypernodes(num_hypernodes), - _size_in_bytes(0), - _index_array(), - _incident_net_array(nullptr) { + _num_hypernodes(0), _size_in_bytes(0), _index_array(), _incident_net_array(nullptr) + { + } + + IncidentNetArray(const HypernodeID num_hypernodes, const HyperedgeVector &edge_vector) : + _num_hypernodes(num_hypernodes), _size_in_bytes(0), _index_array(), + _incident_net_array(nullptr) + { construct(edge_vector); } // ! Degree of the vertex - HypernodeID nodeDegree(const HypernodeID u) const { + HypernodeID nodeDegree(const HypernodeID u) const + { ASSERT(u < _num_hypernodes, "Hypernode" << u << "does not exist"); return header(u)->degree; } // ! Returns a range to loop over the incident nets of hypernode u. - IteratorRange incidentEdges(const HypernodeID u) const { + IteratorRange incidentEdges(const HypernodeID u) const + { ASSERT(u < _num_hypernodes, "Hypernode" << u << "does not exist"); - return IteratorRange( - IncidentNetIterator(u, this, UL(0), false), - IncidentNetIterator(u, this, UL(0), true)); + return IteratorRange(IncidentNetIterator(u, this, UL(0), false), + IncidentNetIterator(u, this, UL(0), true)); } // ! Returns a range to loop over the incident nets of hypernode u. IteratorRange incidentEdges(const HypernodeID u, - const size_t pos) const { + const size_t pos) const + { ASSERT(u < _num_hypernodes, "Hypernode" << u << "does not exist"); - return IteratorRange( - IncidentNetIterator(u, this, pos, false), - IncidentNetIterator(u, this, UL(0), true)); + return IteratorRange(IncidentNetIterator(u, this, pos, false), + IncidentNetIterator(u, this, UL(0), true)); } // ! Contracts two incident list of u and v, whereby u is the representative and // ! v the contraction partner of the contraction. The contraction involves to remove // ! all incident nets shared between u and v from the incident net list of v and append // ! the list of v to u. - void contract(const HypernodeID u, - const HypernodeID v, - const kahypar::ds::FastResetFlagArray<>& shared_hes_of_u_and_v, - const AcquireLockFunc& acquire_lock = NOOP_LOCK_FUNC, - const ReleaseLockFunc& release_lock = NOOP_LOCK_FUNC); + void contract(const HypernodeID u, const HypernodeID v, + const kahypar::ds::FastResetFlagArray<> &shared_hes_of_u_and_v, + const AcquireLockFunc &acquire_lock = NOOP_LOCK_FUNC, + const ReleaseLockFunc &release_lock = NOOP_LOCK_FUNC); // ! Uncontract two previously contracted vertices u and v. - // ! Uncontraction involves to decrement the version number of all incident lists contained - // ! in v and restore all incident nets with a version number equal to the new version. - // ! Note, uncontraction must be done in relative contraction order - void uncontract(const HypernodeID u, - const HypernodeID v, - const AcquireLockFunc& acquire_lock = NOOP_LOCK_FUNC, - const ReleaseLockFunc& release_lock = NOOP_LOCK_FUNC); + // ! Uncontraction involves to decrement the version number of all incident lists + // contained ! in v and restore all incident nets with a version number equal to the new + // version. ! Note, uncontraction must be done in relative contraction order + void uncontract(const HypernodeID u, const HypernodeID v, + const AcquireLockFunc &acquire_lock = NOOP_LOCK_FUNC, + const ReleaseLockFunc &release_lock = NOOP_LOCK_FUNC); // ! Uncontract two previously contracted vertices u and v. - // ! Uncontraction involves to decrement the version number of all incident lists contained - // ! in v and restore all incident nets with a version number equal to the new version. - // ! Additionally it calls case_one_func for a hyperedge he, if u and v were previously both - // ! adjacent to he and case_two_func if only v was previously adjacent to he. - // ! Note, uncontraction must be done in relative contraction order - void uncontract(const HypernodeID u, - const HypernodeID v, - const CaseOneFunc& case_one_func, - const CaseTwoFunc& case_two_func, - const AcquireLockFunc& acquire_lock, - const ReleaseLockFunc& release_lock); + // ! Uncontraction involves to decrement the version number of all incident lists + // contained ! in v and restore all incident nets with a version number equal to the new + // version. ! Additionally it calls case_one_func for a hyperedge he, if u and v were + // previously both ! adjacent to he and case_two_func if only v was previously adjacent + // to he. ! Note, uncontraction must be done in relative contraction order + void uncontract(const HypernodeID u, const HypernodeID v, + const CaseOneFunc &case_one_func, const CaseTwoFunc &case_two_func, + const AcquireLockFunc &acquire_lock, + const ReleaseLockFunc &release_lock); // ! Removes all incidents nets of u flagged in hes_to_remove. void removeIncidentNets(const HypernodeID u, - const kahypar::ds::FastResetFlagArray<>& hes_to_remove); + const kahypar::ds::FastResetFlagArray<> &hes_to_remove); // ! Restores all previously removed incident nets // ! Note, function must be called in reverse order of calls to @@ -239,42 +234,53 @@ class IncidentNetArray { void reset(); - size_t size_in_bytes() const { + size_t size_in_bytes() const + { return _size_in_bytes + sizeof(size_t) * _index_array.size(); } - private: +private: friend class IncidentNetIterator; - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE const Header* header(const HypernodeID u) const { + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE const Header *header(const HypernodeID u) const + { ASSERT(u <= _num_hypernodes, "Hypernode" << u << "does not exist"); - return reinterpret_cast(_incident_net_array.get() + _index_array[u]); + return reinterpret_cast(_incident_net_array.get() + _index_array[u]); } - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE Header* header(const HypernodeID u) { - return const_cast(static_cast(*this).header(u)); + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE Header *header(const HypernodeID u) + { + return const_cast
(static_cast(*this).header(u)); } - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE const Entry* firstEntry(const HypernodeID u) const { + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE const Entry *firstEntry(const HypernodeID u) const + { ASSERT(u <= _num_hypernodes, "Hypernode" << u << "does not exist"); - return reinterpret_cast(_incident_net_array.get() + _index_array[u] + sizeof(Header)); + return reinterpret_cast(_incident_net_array.get() + _index_array[u] + + sizeof(Header)); } - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE Entry* firstEntry(const HypernodeID u) { - return const_cast(static_cast(*this).firstEntry(u)); + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE Entry *firstEntry(const HypernodeID u) + { + return const_cast( + static_cast(*this).firstEntry(u)); } - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE const Entry* lastEntry(const HypernodeID u) const { + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE const Entry *lastEntry(const HypernodeID u) const + { ASSERT(u <= _num_hypernodes, "Hypernode" << u << "does not exist"); - return reinterpret_cast(_incident_net_array.get() + - _index_array[u] + sizeof(Header) + header(u)->size * sizeof(Entry)); + return reinterpret_cast(_incident_net_array.get() + _index_array[u] + + sizeof(Header) + + header(u)->size * sizeof(Entry)); } - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE Entry* lastEntry(const HypernodeID u) { - return const_cast(static_cast(*this).lastEntry(u)); + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE Entry *lastEntry(const HypernodeID u) + { + return const_cast(static_cast(*this).lastEntry(u)); } - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE void swap(Entry* lhs, Entry* rhs) { + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE void swap(Entry *lhs, Entry *rhs) + { Entry tmp_lhs = *lhs; *lhs = *rhs; *rhs = tmp_lhs; @@ -285,9 +291,8 @@ class IncidentNetArray { // ! removeIncidentNets(...) and all uncontraction that happens // ! between two consecutive calls to removeIncidentNets(...) must // ! be processed. - void restoreIncidentNets(const HypernodeID u, - const CaseOneFunc& case_one_func, - const CaseTwoFunc& case_two_func); + void restoreIncidentNets(const HypernodeID u, const CaseOneFunc &case_one_func, + const CaseTwoFunc &case_two_func); void append(const HypernodeID u, const HypernodeID v); @@ -295,7 +300,7 @@ class IncidentNetArray { void removeEmptyIncidentNetList(const HypernodeID u); - void construct(const HyperedgeVector& edge_vector); + void construct(const HyperedgeVector &edge_vector); bool verifyIteratorPointers(const HypernodeID u) const; @@ -305,5 +310,5 @@ class IncidentNetArray { parallel::tbb_unique_ptr _incident_net_array; }; -} // namespace ds -} // namespace mt_kahypar \ No newline at end of file +} // namespace ds +} // namespace mt_kahypar \ No newline at end of file diff --git a/mt-kahypar/datastructures/partitioned_graph.h b/mt-kahypar/datastructures/partitioned_graph.h index 1bc3acd9d..6e414b196 100644 --- a/mt-kahypar/datastructures/partitioned_graph.h +++ b/mt-kahypar/datastructures/partitioned_graph.h @@ -29,15 +29,15 @@ #pragma once #include -#include #include +#include #include "tbb/parallel_invoke.h" #include "kahypar-resources/meta/mandatory.h" -#include "mt-kahypar/datastructures/hypergraph_common.h" #include "mt-kahypar/datastructures/connectivity_set.h" +#include "mt-kahypar/datastructures/hypergraph_common.h" #include "mt-kahypar/datastructures/thread_safe_fast_reset_flag_array.h" #include "mt-kahypar/parallel/atomic_wrapper.h" #include "mt-kahypar/parallel/stl/scalable_vector.h" @@ -53,20 +53,21 @@ class TargetGraph; namespace ds { // Forward -template +template class DeltaPartitionedGraph; template -class PartitionedGraph { +class PartitionedGraph +{ private: - static_assert(!Hypergraph::is_partitioned, "Only unpartitioned hypergraphs are allowed"); + static_assert(!Hypergraph::is_partitioned, + "Only unpartitioned hypergraphs are allowed"); using Self = PartitionedGraph; - using NotificationFunc = std::function; - using DeltaFunction = std::function; - #define NOOP_NOTIFY_FUNC [] (const SynchronizedEdgeUpdate&) { } - #define NOOP_FUNC [] (const SynchronizedEdgeUpdate&) { } + using NotificationFunc = std::function; + using DeltaFunction = std::function; +#define NOOP_NOTIFY_FUNC [](const SynchronizedEdgeUpdate &) {} +#define NOOP_FUNC [](const SynchronizedEdgeUpdate &) {} // Factory using HypergraphFactory = typename Hypergraph::Factory; @@ -74,88 +75,95 @@ class PartitionedGraph { static constexpr bool debug = false; static constexpr bool enable_heavy_assert = false; - class ConnectivityIterator { - public: + class ConnectivityIterator + { + public: using iterator_category = std::forward_iterator_tag; using value_type = PartitionID; - using reference = PartitionID&; - using pointer = const PartitionID*; + using reference = PartitionID &; + using pointer = const PartitionID *; using difference_type = std::ptrdiff_t; /*! * Constructs a connectivity iterator based on a pin iterator */ ConnectivityIterator(PartitionID first, PartitionID second, unsigned int count) : - _first(first), - _second(second), - _iteration_count(count) { - if (_first == _second) { - ++_iteration_count; - } - if (_first == kInvalidPartition) { - ++_iteration_count; - } else if (_second == kInvalidPartition) { - ++_iteration_count; - _second = _first; - } - _iteration_count = std::min(_iteration_count, 2); + _first(first), _second(second), _iteration_count(count) + { + if(_first == _second) + { + ++_iteration_count; + } + if(_first == kInvalidPartition) + { + ++_iteration_count; + } + else if(_second == kInvalidPartition) + { + ++_iteration_count; + _second = _first; + } + _iteration_count = std::min(_iteration_count, 2); } // ! Returns the current partiton id. - PartitionID operator* () const { + PartitionID operator*() const + { ASSERT(_iteration_count < 2); return _iteration_count == 0 ? _first : _second; } // ! Prefix increment. The iterator advances to the next valid element. - ConnectivityIterator & operator++ () { + ConnectivityIterator &operator++() + { ASSERT(_iteration_count < 2); ++_iteration_count; return *this; } // ! Postfix increment. The iterator advances to the next valid element. - ConnectivityIterator operator++ (int) { + ConnectivityIterator operator++(int) + { ConnectivityIterator copy = *this; - operator++ (); + operator++(); return copy; } - bool operator!= (const ConnectivityIterator& rhs) { + bool operator!=(const ConnectivityIterator &rhs) + { return _first != rhs._first || _second != rhs._second || _iteration_count != rhs._iteration_count; } - bool operator== (const ConnectivityIterator& rhs) { + bool operator==(const ConnectivityIterator &rhs) + { return _first == rhs._first && _second == rhs._second && _iteration_count == rhs._iteration_count; } - - private: + private: PartitionID _first = 0; PartitionID _second = 0; // state of the iterator unsigned int _iteration_count = 0; }; - struct EdgeMove { - EdgeMove() : - u(kInvalidHypernode), - to(kInvalidPartition), - version(0) { } + struct EdgeMove + { + EdgeMove() : u(kInvalidHypernode), to(kInvalidPartition), version(0) {} HypernodeID u; PartitionID to; uint32_t version; }; - public: +public: static constexpr bool is_static_hypergraph = Hypergraph::is_static_hypergraph; static constexpr bool is_graph = Hypergraph::is_graph; static constexpr bool is_partitioned = true; static constexpr bool supports_connectivity_set = true; - static constexpr mt_kahypar_partition_type_t TYPE = PartitionedGraphType::TYPE; + static constexpr mt_kahypar_partition_type_t TYPE = + PartitionedGraphType::TYPE; static constexpr HyperedgeID HIGH_DEGREE_THRESHOLD = ID(100000); static constexpr size_t SIZE_OF_EDGE_LOCK = sizeof(EdgeMove); @@ -165,228 +173,191 @@ class PartitionedGraph { using HyperedgeIterator = typename Hypergraph::HyperedgeIterator; using IncidenceIterator = typename Hypergraph::IncidenceIterator; using IncidentNetsIterator = typename Hypergraph::IncidentNetsIterator; - template - using DeltaPartition = DeltaPartitionedGraph, maintain_connectivity_set>; + template + using DeltaPartition = + DeltaPartitionedGraph, maintain_connectivity_set>; using ExtractedBlock = ExtractedHypergraph; PartitionedGraph() = default; - explicit PartitionedGraph(const PartitionID k, - Hypergraph& hypergraph) : - _input_num_nodes(hypergraph.initialNumNodes()), - _input_num_edges(hypergraph.initialNumEdges()), - _input_unique_ids(hypergraph.maxUniqueID()), - _k(k), - _hg(&hypergraph), - _target_graph(nullptr), - _part_weights(k, CAtomic(0)), - _part_ids( - "Refinement", "part_ids", hypergraph.initialNumNodes(), false, false), - _edge_sync_version(0), - _edge_sync( - "Refinement", "edge_sync", hypergraph.maxUniqueID(), false, false), - _edge_locks( - "Refinement", "edge_locks", hypergraph.maxUniqueID(), false, false), - _edge_markers(Hypergraph::is_static_hypergraph ? 0 : hypergraph.maxUniqueID()) { + explicit PartitionedGraph(const PartitionID k, Hypergraph &hypergraph) : + _input_num_nodes(hypergraph.initialNumNodes()), + _input_num_edges(hypergraph.initialNumEdges()), + _input_unique_ids(hypergraph.maxUniqueID()), _k(k), _hg(&hypergraph), + _target_graph(nullptr), _part_weights(k, CAtomic(0)), + _part_ids("Refinement", "part_ids", hypergraph.initialNumNodes(), false, false), + _edge_sync_version(0), + _edge_sync("Refinement", "edge_sync", hypergraph.maxUniqueID(), false, false), + _edge_locks("Refinement", "edge_locks", hypergraph.maxUniqueID(), false, false), + _edge_markers(Hypergraph::is_static_hypergraph ? 0 : hypergraph.maxUniqueID()) + { _part_ids.assign(hypergraph.initialNumNodes(), kInvalidPartition, false); _edge_sync.assign(hypergraph.maxUniqueID(), EdgeMove(), false); _edge_locks.assign(hypergraph.maxUniqueID(), SpinLock(), false); } - explicit PartitionedGraph(const PartitionID k, - Hypergraph& hypergraph, - parallel_tag_t) : - _input_num_nodes(hypergraph.initialNumNodes()), - _input_num_edges(hypergraph.initialNumEdges()), - _input_unique_ids(hypergraph.maxUniqueID()), - _k(k), - _hg(&hypergraph), - _target_graph(nullptr), - _part_weights(k, CAtomic(0)), - _part_ids(), - _edge_sync_version(0), - _edge_sync(), - _edge_locks(), - _edge_markers() { - tbb::parallel_invoke([&] { - _part_ids.resize( - "Refinement", "part_ids", hypergraph.initialNumNodes()); - _part_ids.assign(hypergraph.initialNumNodes(), kInvalidPartition); - }, [&] { - _edge_sync.resize( - "Refinement", "edge_sync", static_cast(hypergraph.maxUniqueID())); - _edge_sync.assign(hypergraph.maxUniqueID(), EdgeMove()); - }, [&] { - _edge_locks.resize( - "Refinement", "edge_locks", static_cast(hypergraph.maxUniqueID())); - _edge_locks.assign(hypergraph.maxUniqueID(), SpinLock()); - }, [&] { - if (!Hypergraph::is_static_hypergraph) { - _edge_markers.setSize(hypergraph.maxUniqueID()); - } - }); + explicit PartitionedGraph(const PartitionID k, Hypergraph &hypergraph, parallel_tag_t) : + _input_num_nodes(hypergraph.initialNumNodes()), + _input_num_edges(hypergraph.initialNumEdges()), + _input_unique_ids(hypergraph.maxUniqueID()), _k(k), _hg(&hypergraph), + _target_graph(nullptr), _part_weights(k, CAtomic(0)), _part_ids(), + _edge_sync_version(0), _edge_sync(), _edge_locks(), _edge_markers() + { + tbb::parallel_invoke( + [&] { + _part_ids.resize("Refinement", "part_ids", hypergraph.initialNumNodes()); + _part_ids.assign(hypergraph.initialNumNodes(), kInvalidPartition); + }, + [&] { + _edge_sync.resize("Refinement", "edge_sync", + static_cast(hypergraph.maxUniqueID())); + _edge_sync.assign(hypergraph.maxUniqueID(), EdgeMove()); + }, + [&] { + _edge_locks.resize("Refinement", "edge_locks", + static_cast(hypergraph.maxUniqueID())); + _edge_locks.assign(hypergraph.maxUniqueID(), SpinLock()); + }, + [&] { + if(!Hypergraph::is_static_hypergraph) + { + _edge_markers.setSize(hypergraph.maxUniqueID()); + } + }); } - PartitionedGraph(const PartitionedGraph&) = delete; - PartitionedGraph & operator= (const PartitionedGraph &) = delete; + PartitionedGraph(const PartitionedGraph &) = delete; + PartitionedGraph &operator=(const PartitionedGraph &) = delete; - PartitionedGraph(PartitionedGraph&& other) = default; - PartitionedGraph & operator= (PartitionedGraph&& other) = default; + PartitionedGraph(PartitionedGraph &&other) = default; + PartitionedGraph &operator=(PartitionedGraph &&other) = default; - ~PartitionedGraph() { - freeInternalData(); - } + ~PartitionedGraph() { freeInternalData(); } - void resetData() { - tbb::parallel_invoke([&] { - }, [&] { - _part_ids.assign(_part_ids.size(), kInvalidPartition); - }, [&] { - for (auto& x : _part_weights) x.store(0, std::memory_order_relaxed); - }, [&] { - _edge_sync.assign(_hg->maxUniqueID(), EdgeMove()); - }); + void resetData() + { + tbb::parallel_invoke([&] {}, + [&] { _part_ids.assign(_part_ids.size(), kInvalidPartition); }, + [&] { + for(auto &x : _part_weights) + x.store(0, std::memory_order_relaxed); + }, + [&] { _edge_sync.assign(_hg->maxUniqueID(), EdgeMove()); }); } // ####################### General Hypergraph Stats ###################### - Hypergraph& hypergraph() { + Hypergraph &hypergraph() + { ASSERT(_hg); return *_hg; } - void setHypergraph(Hypergraph& hypergraph) { - _hg = &hypergraph; - } + void setHypergraph(Hypergraph &hypergraph) { _hg = &hypergraph; } // ! Initial number of hypernodes - HypernodeID initialNumNodes() const { - return _hg->initialNumNodes(); - } + HypernodeID initialNumNodes() const { return _hg->initialNumNodes(); } // ! Number of nodes of the input hypergraph - HypernodeID topLevelNumNodes() const { - return _input_num_nodes; - } + HypernodeID topLevelNumNodes() const { return _input_num_nodes; } // ! Number of removed hypernodes - HypernodeID numRemovedHypernodes() const { - return _hg->numRemovedHypernodes(); - } + HypernodeID numRemovedHypernodes() const { return _hg->numRemovedHypernodes(); } // ! Initial number of hyperedges - HyperedgeID initialNumEdges() const { - return _hg->initialNumEdges(); - } + HyperedgeID initialNumEdges() const { return _hg->initialNumEdges(); } // ! Number of edges of the input hypergraph - HyperedgeID topLevelNumEdges() const { - return _input_num_edges; - } + HyperedgeID topLevelNumEdges() const { return _input_num_edges; } // ! Number of unique edge ids of the input hypergraph - HyperedgeID topLevelNumUniqueIds() const { - return _input_unique_ids; - } + HyperedgeID topLevelNumUniqueIds() const { return _input_unique_ids; } // ! Initial number of pins - HypernodeID initialNumPins() const { - return _hg->initialNumPins(); - } + HypernodeID initialNumPins() const { return _hg->initialNumPins(); } // ! Initial sum of the degree of all vertices - HypernodeID initialTotalVertexDegree() const { - return _hg->initialTotalVertexDegree(); - } + HypernodeID initialTotalVertexDegree() const { return _hg->initialTotalVertexDegree(); } // ! Total weight of hypergraph - HypernodeWeight totalWeight() const { - return _hg->totalWeight(); - } + HypernodeWeight totalWeight() const { return _hg->totalWeight(); } // ! Number of blocks this hypergraph is partitioned into - PartitionID k() const { - return _k; - } + PartitionID k() const { return _k; } // ####################### Mapping ###################### - void setTargetGraph(const TargetGraph* target_graph) { - _target_graph = target_graph; - } + void setTargetGraph(const TargetGraph *target_graph) { _target_graph = target_graph; } - bool hasTargetGraph() const { - return _target_graph != nullptr; - } + bool hasTargetGraph() const { return _target_graph != nullptr; } - const TargetGraph* targetGraph() const { - return _target_graph; - } + const TargetGraph *targetGraph() const { return _target_graph; } // ####################### Iterators ####################### // ! Iterates in parallel over all active nodes and calls function f // ! for each vertex - template - void doParallelForAllNodes(const F& f) const { + template + void doParallelForAllNodes(const F &f) const + { _hg->doParallelForAllNodes(f); } // ! Iterates in parallel over all active edges and calls function f // ! for each net - template - void doParallelForAllEdges(const F& f) const { + template + void doParallelForAllEdges(const F &f) const + { _hg->doParallelForAllEdges(f); } // ! Returns an iterator over the set of active nodes of the hypergraph - IteratorRange nodes() const { - return _hg->nodes(); - } + IteratorRange nodes() const { return _hg->nodes(); } // ! Returns an iterator over the set of active edges of the hypergraph - IteratorRange edges() const { - return _hg->edges(); - } + IteratorRange edges() const { return _hg->edges(); } // ! Returns a range to loop over the incident nets of hypernode u. - IteratorRange incidentEdges(const HypernodeID u) const { + IteratorRange incidentEdges(const HypernodeID u) const + { return _hg->incidentEdges(u); } // ! Returns a range to loop over the incident nets of hypernode u. IteratorRange incidentEdges(const HypernodeID u, - const size_t pos) const { + const size_t pos) const + { return _hg->incident_nets_of(u, pos); } // ! Returns a range to loop over the pins of hyperedge e. - IteratorRange pins(const HyperedgeID e) const { + IteratorRange pins(const HyperedgeID e) const + { return _hg->pins(e); } // ! Returns a range to loop over the set of block ids contained in hyperedge e. - IteratorRange connectivitySet(const HyperedgeID e) const { + IteratorRange connectivitySet(const HyperedgeID e) const + { ASSERT(_hg->edgeIsEnabled(e), "Hyperedge" << e << "is disabled"); ASSERT(e < _hg->initialNumEdges(), "Hyperedge" << e << "does not exist"); PartitionID first = partID(edgeSource(e)); PartitionID second = partID(edgeTarget(e)); - return IteratorRange( - ConnectivityIterator(first, second, 0), - ConnectivityIterator(first, second, 2)); + return IteratorRange(ConnectivityIterator(first, second, 0), + ConnectivityIterator(first, second, 2)); } // ####################### Hypernode Information ####################### // ! Weight of a vertex - HypernodeWeight nodeWeight(const HypernodeID u) const { - return _hg->nodeWeight(u); - } + HypernodeWeight nodeWeight(const HypernodeID u) const { return _hg->nodeWeight(u); } // ! Sets the weight of a vertex - void setNodeWeight(const HypernodeID u, const HypernodeWeight weight) { + void setNodeWeight(const HypernodeID u, const HypernodeWeight weight) + { const PartitionID block = partID(u); - if ( block != kInvalidPartition ) { + if(block != kInvalidPartition) + { ASSERT(block < _k); const HypernodeWeight delta = weight - _hg->nodeWeight(u); _part_weights[block] += delta; @@ -395,17 +366,14 @@ class PartitionedGraph { } // ! Degree of a hypernode - HyperedgeID nodeDegree(const HypernodeID u) const { - return _hg->nodeDegree(u); - } + HyperedgeID nodeDegree(const HypernodeID u) const { return _hg->nodeDegree(u); } // ! Returns, whether a hypernode is enabled or not - bool nodeIsEnabled(const HypernodeID u) const { - return _hg->nodeIsEnabled(u); - } + bool nodeIsEnabled(const HypernodeID u) const { return _hg->nodeIsEnabled(u); } // ! Restores a degree zero hypernode - void restoreDegreeZeroHypernode(const HypernodeID u, const PartitionID to) { + void restoreDegreeZeroHypernode(const HypernodeID u, const PartitionID to) + { _hg->restoreDegreeZeroHypernode(u); setNodePart(u, to); } @@ -413,52 +381,40 @@ class PartitionedGraph { // ####################### Hyperedge Information ####################### // ! Target of an edge - HypernodeID edgeTarget(const HyperedgeID e) const { - return _hg->edgeTarget(e); - } + HypernodeID edgeTarget(const HyperedgeID e) const { return _hg->edgeTarget(e); } // ! Source of an edge - HypernodeID edgeSource(const HyperedgeID e) const { - return _hg->edgeSource(e); - } + HypernodeID edgeSource(const HyperedgeID e) const { return _hg->edgeSource(e); } // ! Whether the edge is a single pin edge - bool isSinglePin(const HyperedgeID e) const { - return _hg->isSinglePin(e); - } + bool isSinglePin(const HyperedgeID e) const { return _hg->isSinglePin(e); } // ! Weight of a hyperedge - HypernodeWeight edgeWeight(const HyperedgeID e) const { - return _hg->edgeWeight(e); - } + HypernodeWeight edgeWeight(const HyperedgeID e) const { return _hg->edgeWeight(e); } // ! Unique id of a hyperedge - HyperedgeID uniqueEdgeID(const HyperedgeID e) const { - return _hg->uniqueEdgeID(e); - } + HyperedgeID uniqueEdgeID(const HyperedgeID e) const { return _hg->uniqueEdgeID(e); } // ! Sets the weight of a hyperedge - void setEdgeWeight(const HyperedgeID e, const HyperedgeWeight weight) { + void setEdgeWeight(const HyperedgeID e, const HyperedgeWeight weight) + { _hg->setEdgeWeight(e, weight); } // ! Number of pins of a hyperedge - HypernodeID edgeSize(const HyperedgeID e) const { - return _hg->edgeSize(e); - } + HypernodeID edgeSize(const HyperedgeID e) const { return _hg->edgeSize(e); } // ! Returns, whether a hyperedge is enabled or not - bool edgeIsEnabled(const HyperedgeID e) const { - return _hg->edgeIsEnabled(e); - } + bool edgeIsEnabled(const HyperedgeID e) const { return _hg->edgeIsEnabled(e); } // ####################### Uncontraction ####################### - template - void uncontract(const Batch& batch, GainCache& gain_cache) { + template + void uncontract(const Batch &batch, GainCache &gain_cache) + { // Set block ids of contraction partners tbb::parallel_for(UL(0), batch.size(), [&](const size_t i) { - const Memento& memento = batch[i]; + const Memento &memento = batch[i]; ASSERT(nodeIsEnabled(memento.u)); ASSERT(!nodeIsEnabled(memento.v)); const PartitionID part_id = partID(memento.u); @@ -466,20 +422,24 @@ class PartitionedGraph { setOnlyNodePart(memento.v, part_id); }); - _hg->uncontract(batch, - [&](const HyperedgeID e) { return !_edge_markers.compare_and_set_to_true(uniqueEdgeID(e)); }, - [&](const HypernodeID u, const HypernodeID v, const HyperedgeID e) { - // In this case, e was a single pin edge before uncontraction - gain_cache.uncontractUpdateAfterRestore(*this, u, v, e, 0); - }, - [&](const HypernodeID u, const HypernodeID v, const HyperedgeID e) { - // In this case, u is replaced by v in e - gain_cache.uncontractUpdateAfterReplacement(*this, u, v, e); - }); + _hg->uncontract( + batch, + [&](const HyperedgeID e) { + return !_edge_markers.compare_and_set_to_true(uniqueEdgeID(e)); + }, + [&](const HypernodeID u, const HypernodeID v, const HyperedgeID e) { + // In this case, e was a single pin edge before uncontraction + gain_cache.uncontractUpdateAfterRestore(*this, u, v, e, 0); + }, + [&](const HypernodeID u, const HypernodeID v, const HyperedgeID e) { + // In this case, u is replaced by v in e + gain_cache.uncontractUpdateAfterReplacement(*this, u, v, e); + }); - if constexpr ( GainCache::initializes_gain_cache_entry_after_batch_uncontractions ) { + if constexpr(GainCache::initializes_gain_cache_entry_after_batch_uncontractions) + { tbb::parallel_for(UL(0), batch.size(), [&](const size_t i) { - const Memento& memento = batch[i]; + const Memento &memento = batch[i]; gain_cache.initializeGainCacheEntryForNode(*this, memento.v); }); } @@ -489,13 +449,13 @@ class PartitionedGraph { // ####################### Restore Hyperedges ####################### - void restoreLargeEdge(const HyperedgeID& he) { - _hg->restoreLargeEdge(he); - } + void restoreLargeEdge(const HyperedgeID &he) { _hg->restoreLargeEdge(he); } - template - void restoreSinglePinAndParallelNets(const vec& hes_to_restore, - GainCache& gain_cache) { + template + void restoreSinglePinAndParallelNets( + const vec &hes_to_restore, + GainCache &gain_cache) + { _edge_markers.reset(); _hg->restoreSinglePinAndParallelNets(hes_to_restore); @@ -503,13 +463,16 @@ class PartitionedGraph { const HyperedgeID he = hes_to_restore[i].old_id; ASSERT(edgeIsEnabled(he)); const bool is_single_pin_he = edgeSize(he) == 1; - if ( is_single_pin_he ) { + if(is_single_pin_he) + { // Restore single-pin net HypernodeID single_vertex_of_he = edgeSource(he); const PartitionID block_of_single_pin = partID(single_vertex_of_he); - gain_cache.restoreSinglePinHyperedge( - single_vertex_of_he, block_of_single_pin, edgeWeight(he)); - } else if ( nodeIsEnabled(edgeSource(he)) ) { + gain_cache.restoreSinglePinHyperedge(single_vertex_of_he, block_of_single_pin, + edgeWeight(he)); + } + else if(nodeIsEnabled(edgeSource(he))) + { // Restore parallel net gain_cache.restoreIdenticalHyperedge(*this, he); } @@ -519,35 +482,40 @@ class PartitionedGraph { // ####################### Partition Information ####################### // ! Block that vertex u belongs to - PartitionID partID(const HypernodeID u) const { + PartitionID partID(const HypernodeID u) const + { ASSERT(u < initialNumNodes(), "Hypernode" << u << "does not exist"); return _part_ids[u]; } - void extractPartIDs(Array& part_ids) { + void extractPartIDs(Array &part_ids) + { // If we pass the input hypergraph to initial partitioning, then initial partitioning // will pass an part ID vector of size |V'|, where V' are the number of nodes of - // smallest hypergraph, while the _part_ids vector of the input hypergraph is initialized - // with the original number of nodes. This can cause segmentation fault when we simply swap them - // during main uncoarsening. - if ( _part_ids.size() == part_ids.size() ) { + // smallest hypergraph, while the _part_ids vector of the input hypergraph is + // initialized with the original number of nodes. This can cause segmentation fault + // when we simply swap them during main uncoarsening. + if(_part_ids.size() == part_ids.size()) + { std::swap(_part_ids, part_ids); - } else { + } + else + { ASSERT(part_ids.size() <= _part_ids.size()); - tbb::parallel_for(UL(0), part_ids.size(), [&](const size_t i) { - part_ids[i] = _part_ids[i]; - }); + tbb::parallel_for(UL(0), part_ids.size(), + [&](const size_t i) { part_ids[i] = _part_ids[i]; }); } } - - void setOnlyNodePart(const HypernodeID u, PartitionID p) { + void setOnlyNodePart(const HypernodeID u, PartitionID p) + { ASSERT(p != kInvalidPartition && p < _k); ASSERT(_part_ids[u] == kInvalidPartition); _part_ids[u] = p; } - void setNodePart(const HypernodeID u, PartitionID p) { + void setNodePart(const HypernodeID u, PartitionID p) + { ASSERT(_part_ids[u] == kInvalidPartition); setOnlyNodePart(u, p); _part_weights[p].fetch_add(nodeWeight(u), std::memory_order_relaxed); @@ -555,71 +523,74 @@ class PartitionedGraph { // ! Changes the block id of vertex u from block 'from' to block 'to' // ! Returns true, if move of vertex u to corresponding block succeeds. - template - bool changeNodePart(const HypernodeID u, - PartitionID from, - PartitionID to, - HypernodeWeight max_weight_to, - SuccessFunc&& report_success, - const DeltaFunction& delta_func) { - return changeNodePartImpl(u, from, to, - max_weight_to, report_success, delta_func, NOOP_NOTIFY_FUNC); - } - - bool changeNodePart(const HypernodeID u, - PartitionID from, - PartitionID to, - const DeltaFunction& delta_func = NOOP_FUNC, - const bool force_moving_fixed_vertices = false) { - return changeNodePartImpl(u, from, to, - std::numeric_limits::max(), []{}, - delta_func, NOOP_NOTIFY_FUNC, force_moving_fixed_vertices); - } - - template - bool changeNodePart(GainCache& gain_cache, - const HypernodeID u, - PartitionID from, - PartitionID to, - HypernodeWeight max_weight_to, - SuccessFunc&& report_success, - const DeltaFunction& delta_func) { - auto my_delta_func = [&](const SynchronizedEdgeUpdate& sync_update) { + template + bool changeNodePart(const HypernodeID u, PartitionID from, PartitionID to, + HypernodeWeight max_weight_to, SuccessFunc &&report_success, + const DeltaFunction &delta_func) + { + return changeNodePartImpl(u, from, to, max_weight_to, report_success, + delta_func, NOOP_NOTIFY_FUNC); + } + + bool changeNodePart(const HypernodeID u, PartitionID from, PartitionID to, + const DeltaFunction &delta_func = NOOP_FUNC, + const bool force_moving_fixed_vertices = false) + { + return changeNodePartImpl( + u, from, to, std::numeric_limits::max(), [] {}, delta_func, + NOOP_NOTIFY_FUNC, force_moving_fixed_vertices); + } + + template + bool changeNodePart(GainCache &gain_cache, const HypernodeID u, PartitionID from, + PartitionID to, HypernodeWeight max_weight_to, + SuccessFunc &&report_success, const DeltaFunction &delta_func) + { + auto my_delta_func = [&](const SynchronizedEdgeUpdate &sync_update) { delta_func(sync_update); gain_cache.deltaGainUpdate(*this, sync_update); }; - if constexpr ( !GainCache::requires_notification_before_update ) { - return changeNodePartImpl(u, from, to, max_weight_to, - report_success, my_delta_func, NOOP_NOTIFY_FUNC); - } else { - return changeNodePartImpl(u, from, to, max_weight_to, - report_success, my_delta_func, [&](SynchronizedEdgeUpdate& sync_update) { - gain_cache.notifyBeforeDeltaGainUpdate(*this, sync_update); - }); + if constexpr(!GainCache::requires_notification_before_update) + { + return changeNodePartImpl(u, from, to, max_weight_to, report_success, + my_delta_func, NOOP_NOTIFY_FUNC); + } + else + { + return changeNodePartImpl( + u, from, to, max_weight_to, report_success, my_delta_func, + [&](SynchronizedEdgeUpdate &sync_update) { + gain_cache.notifyBeforeDeltaGainUpdate(*this, sync_update); + }); } } - template - bool changeNodePart(GainCache& gain_cache, - const HypernodeID u, - PartitionID from, - PartitionID to) { - return changeNodePart(gain_cache, u, from, to, - std::numeric_limits::max(), []{}, NoOpDeltaFunc()); + template + bool changeNodePart(GainCache &gain_cache, const HypernodeID u, PartitionID from, + PartitionID to) + { + return changeNodePart( + gain_cache, u, from, to, std::numeric_limits::max(), [] {}, + NoOpDeltaFunc()); } // ! Weight of a block - HypernodeWeight partWeight(const PartitionID p) const { + HypernodeWeight partWeight(const PartitionID p) const + { ASSERT(p != kInvalidPartition && p < _k); return _part_weights[p].load(std::memory_order_relaxed); } // ! Returns whether hypernode u is adjacent to a least one cut hyperedge. - bool isBorderNode(const HypernodeID u) const { + bool isBorderNode(const HypernodeID u) const + { const PartitionID part_id = partID(u); - if ( nodeDegree(u) <= HIGH_DEGREE_THRESHOLD ) { - for ( const HyperedgeID& he : incidentEdges(u) ) { - if ( partID(edgeTarget(he)) != part_id ) { + if(nodeDegree(u) <= HIGH_DEGREE_THRESHOLD) + { + for(const HyperedgeID &he : incidentEdges(u)) + { + if(partID(edgeTarget(he)) != part_id) + { return true; } } @@ -627,11 +598,14 @@ class PartitionedGraph { return false; } - HypernodeID numIncidentCutHyperedges(const HypernodeID u) const { + HypernodeID numIncidentCutHyperedges(const HypernodeID u) const + { const PartitionID part_id = partID(u); HypernodeID num_incident_cut_hyperedges = 0; - for ( const HyperedgeID& he : incidentEdges(u) ) { - if ( partID(edgeTarget(he)) != part_id ) { + for(const HyperedgeID &he : incidentEdges(u)) + { + if(partID(edgeTarget(he)) != part_id) + { ++num_incident_cut_hyperedges; } } @@ -639,99 +613,118 @@ class PartitionedGraph { } // ! Number of blocks which pins of hyperedge e belongs to - PartitionID connectivity(const HyperedgeID e) const { + PartitionID connectivity(const HyperedgeID e) const + { ASSERT(e < _hg->initialNumEdges(), "Hyperedge" << e << "does not exist"); ASSERT(edgeIsEnabled(e), "Hyperedge" << e << "is disabled"); const PartitionID source_id = partID(edgeSource(e)); const PartitionID target_id = partID(edgeTarget(e)); PartitionID sum = 0; - if (source_id != kInvalidPartition) { + if(source_id != kInvalidPartition) + { ++sum; } - if (target_id != kInvalidPartition && target_id != source_id) { + if(target_id != kInvalidPartition && target_id != source_id) + { ++sum; } return sum; } // ! Returns the number pins of hyperedge e that are part of block id - HypernodeID pinCountInPart(const HyperedgeID e, const PartitionID p) const { + HypernodeID pinCountInPart(const HyperedgeID e, const PartitionID p) const + { ASSERT(e < _hg->initialNumEdges(), "Hyperedge" << e << "does not exist"); ASSERT(edgeIsEnabled(e), "Hyperedge" << e << "is disabled"); ASSERT(p != kInvalidPartition && p < _k); HypernodeID count = 0; - if (p == partID(edgeSource(e))) { + if(p == partID(edgeSource(e))) + { count++; } - if (!isSinglePin(e) && p == partID(edgeTarget(e))) { + if(!isSinglePin(e) && p == partID(edgeTarget(e))) + { count++; } return count; } // ! Creates a shallow copy of the connectivity set of hyperedge he - StaticBitset& shallowCopyOfConnectivitySet(const HyperedgeID he) const { + StaticBitset &shallowCopyOfConnectivitySet(const HyperedgeID he) const + { // Shallow copy not possible for graph data structure - Bitset& deep_copy = deepCopyOfConnectivitySet(he); - StaticBitset& shallow_copy = _shallow_copy_bitset.local(); + Bitset &deep_copy = deepCopyOfConnectivitySet(he); + StaticBitset &shallow_copy = _shallow_copy_bitset.local(); shallow_copy.set(deep_copy.numBlocks(), deep_copy.data()); return shallow_copy; } // ! Creates a deep copy of the connectivity set of hyperedge he - Bitset& deepCopyOfConnectivitySet(const HyperedgeID he) const { - Bitset& deep_copy = _deep_copy_bitset.local(); + Bitset &deepCopyOfConnectivitySet(const HyperedgeID he) const + { + Bitset &deep_copy = _deep_copy_bitset.local(); deep_copy.resize(_k); const PartitionID source_block = partID(edgeSource(he)); const PartitionID target_block = partID(edgeTarget(he)); - if ( source_block != kInvalidPartition ) deep_copy.set(source_block); - if ( target_block != kInvalidPartition ) deep_copy.set(target_block); + if(source_block != kInvalidPartition) + deep_copy.set(source_block); + if(target_block != kInvalidPartition) + deep_copy.set(target_block); return deep_copy; } // ! Initializes the partition of the hypergraph, if block ids are assigned with - // ! setOnlyNodePart(...). In that case, block weights must be initialized explicitly here. - void initializePartition() { - initializeBlockWeights(); - } + // ! setOnlyNodePart(...). In that case, block weights must be initialized explicitly + // here. + void initializePartition() { initializeBlockWeights(); } // ! Reset partition (not thread-safe) - void resetPartition() { + void resetPartition() + { _part_ids.assign(_part_ids.size(), kInvalidPartition, false); _edge_sync.assign(_hg->maxUniqueID(), EdgeMove(), false); - for (auto& weight : _part_weights) { + for(auto &weight : _part_weights) + { weight.store(0, std::memory_order_relaxed); } } // ! Only for testing - void recomputePartWeights() { - for (PartitionID p = 0; p < _k; ++p) { + void recomputePartWeights() + { + for(PartitionID p = 0; p < _k; ++p) + { _part_weights[p].store(0); } - for (HypernodeID u : nodes()) { - _part_weights[ partID(u) ] += nodeWeight(u); + for(HypernodeID u : nodes()) + { + _part_weights[partID(u)] += nodeWeight(u); } } - void recomputeMoveFromPenalty(const HypernodeID) { + void recomputeMoveFromPenalty(const HypernodeID) + { // Nothing to do here } // ! Only for testing - bool checkTrackedPartitionInformation() { + bool checkTrackedPartitionInformation() + { bool success = true; - for (HyperedgeID e : edges()) { + for(HyperedgeID e : edges()) + { PartitionID expected_connectivity = 0; - for (PartitionID i = 0; i < k(); ++i) { + for(PartitionID i = 0; i < k(); ++i) + { expected_connectivity += (pinCountInPart(e, i) > 0); } - if ( expected_connectivity != connectivity(e) ) { - LOG << "Connectivity of hyperedge" << e << "=>" << - "Expected:" << V(expected_connectivity) << "," << - "Actual:" << V(connectivity(e)); + if(expected_connectivity != connectivity(e)) + { + LOG << "Connectivity of hyperedge" << e << "=>" + << "Expected:" << V(expected_connectivity) << "," + << "Actual:" << V(connectivity(e)); success = false; } } @@ -740,51 +733,64 @@ class PartitionedGraph { } // ! Only for testing - template - bool checkTrackedPartitionInformation(GainCache& gain_cache) { + template + bool checkTrackedPartitionInformation(GainCache &gain_cache) + { bool success = true; - for (HyperedgeID e : edges()) { + for(HyperedgeID e : edges()) + { PartitionID expected_connectivity = 0; - for (PartitionID i = 0; i < k(); ++i) { + for(PartitionID i = 0; i < k(); ++i) + { expected_connectivity += (pinCountInPart(e, i) > 0); } - if ( expected_connectivity != connectivity(e) ) { - LOG << "Connectivity of hyperedge" << e << "=>" << - "Expected:" << V(expected_connectivity) << "," << - "Actual:" << V(connectivity(e)); + if(expected_connectivity != connectivity(e)) + { + LOG << "Connectivity of hyperedge" << e << "=>" + << "Expected:" << V(expected_connectivity) << "," + << "Actual:" << V(connectivity(e)); success = false; } } - if ( gain_cache.isInitialized() ) { - for (HypernodeID u : nodes()) { + if(gain_cache.isInitialized()) + { + for(HypernodeID u : nodes()) + { const PartitionID block_of_u = partID(u); - if ( gain_cache.penaltyTerm(u, block_of_u) != - gain_cache.recomputePenaltyTerm(*this, u) ) { - LOG << "Penalty term of hypernode" << u << "=>" << - "Expected:" << V(gain_cache.recomputePenaltyTerm(*this, u)) << ", " << - "Actual:" << V(gain_cache.penaltyTerm(u, block_of_u)); - for ( const HyperedgeID& e : incidentEdges(u) ) { - LOG << V(u) << V(partID(u)) << V(e) << V(edgeSize(e)) - << V(edgeWeight(e)) << V(pinCountInPart(e, partID(u))); + if(gain_cache.penaltyTerm(u, block_of_u) != + gain_cache.recomputePenaltyTerm(*this, u)) + { + LOG << "Penalty term of hypernode" << u << "=>" + << "Expected:" << V(gain_cache.recomputePenaltyTerm(*this, u)) << ", " + << "Actual:" << V(gain_cache.penaltyTerm(u, block_of_u)); + for(const HyperedgeID &e : incidentEdges(u)) + { + LOG << V(u) << V(partID(u)) << V(e) << V(edgeSize(e)) << V(edgeWeight(e)) + << V(pinCountInPart(e, partID(u))); } success = false; } - for ( const PartitionID& i : gain_cache.adjacentBlocks(u) ) { - if (partID(u) != i) { - if ( gain_cache.benefitTerm(u, i) != - gain_cache.recomputeBenefitTerm(*this, u, i) ) { - LOG << "Benefit term of hypernode" << u << "in block" << i << "=>" << - "Expected:" << V(gain_cache.recomputeBenefitTerm(*this, u, i)) << ", " << - "Actual:" << V(gain_cache.benefitTerm(u, i)); + for(const PartitionID &i : gain_cache.adjacentBlocks(u)) + { + if(partID(u) != i) + { + if(gain_cache.benefitTerm(u, i) != + gain_cache.recomputeBenefitTerm(*this, u, i)) + { + LOG << "Benefit term of hypernode" << u << "in block" << i << "=>" + << "Expected:" << V(gain_cache.recomputeBenefitTerm(*this, u, i)) + << ", " + << "Actual:" << V(gain_cache.benefitTerm(u, i)); success = false; } } } } - if ( !gain_cache.verifyTrackedAdjacentBlocksOfNodes(*this) ) { + if(!gain_cache.verifyTrackedAdjacentBlocksOfNodes(*this)) + { success = false; } } @@ -793,21 +799,19 @@ class PartitionedGraph { // ####################### Fixed Vertex Support ####################### - bool hasFixedVertices() const { - return _hg->hasFixedVertices(); - } + bool hasFixedVertices() const { return _hg->hasFixedVertices(); } - bool isFixed(const HypernodeID hn) const { - return _hg->isFixed(hn); - } + bool isFixed(const HypernodeID hn) const { return _hg->isFixed(hn); } - PartitionID fixedVertexBlock(const HypernodeID hn) const { + PartitionID fixedVertexBlock(const HypernodeID hn) const + { return _hg->fixedVertexBlock(hn); } // ####################### Memory Consumption ####################### - void memoryConsumption(utils::MemoryTreeNode* parent) const { + void memoryConsumption(utils::MemoryTreeNode *parent) const + { ASSERT(parent); parent->addChild("Part Weights", sizeof(CAtomic) * _k); parent->addChild("Part IDs", sizeof(PartitionID) * _hg->initialNumNodes()); @@ -826,78 +830,93 @@ class PartitionedGraph { // ! the original hypergraph. // ! If cut_net_splitting is activated, then cut hyperedges are splitted containing // ! only the pins of the corresponding block. Otherwise, they are discarded. - ExtractedBlock extract(const PartitionID block, - const vec* already_cut, + ExtractedBlock extract(const PartitionID block, const vec *already_cut, bool /*cut_net_splitting*/, - bool stable_construction_of_incident_edges) { + bool stable_construction_of_incident_edges) + { ASSERT(block != kInvalidPartition && block < _k); ASSERT(!already_cut || already_cut->size() == _hg->initialNumEdges()); // Compactify vertex ids ExtractedBlock extracted_block; - vec& node_mapping = extracted_block.hn_mapping; + vec &node_mapping = extracted_block.hn_mapping; node_mapping.assign(_hg->initialNumNodes(), kInvalidHypernode); vec he_mapping(_hg->initialNumEdges(), kInvalidHyperedge); HypernodeID num_nodes = 0; HypernodeID num_edges = 0; - tbb::parallel_invoke([&] { - for (const HypernodeID& node : nodes()) { - if (partID(node) == block) { - node_mapping[node] = num_nodes++; - } - } - }, [&] { - for (const HyperedgeID& edge : edges()) { - const HypernodeID source = edgeSource(edge); - const HypernodeID target = edgeTarget(edge); - if (partID(source) == block && partID(target) == block && source < target) { - he_mapping[edge] = num_edges++; - } - } - }); + tbb::parallel_invoke( + [&] { + for(const HypernodeID &node : nodes()) + { + if(partID(node) == block) + { + node_mapping[node] = num_nodes++; + } + } + }, + [&] { + for(const HyperedgeID &edge : edges()) + { + const HypernodeID source = edgeSource(edge); + const HypernodeID target = edgeTarget(edge); + if(partID(source) == block && partID(target) == block && source < target) + { + he_mapping[edge] = num_edges++; + } + } + }); // Extract plain hypergraph data for corresponding block - using EdgeVector = vec>; + using EdgeVector = vec >; EdgeVector edge_vector; vec edge_weight; vec node_weight; - tbb::parallel_invoke([&] { - edge_vector.resize(num_edges); - edge_weight.resize(num_edges); - doParallelForAllEdges([&](const HyperedgeID edge) { - const HypernodeID source = edgeSource(edge); - const HypernodeID target = edgeTarget(edge); - if (partID(source) == block && partID(target) == block && source < target) { - ASSERT(he_mapping[edge] < num_edges); - edge_weight[he_mapping[edge]] = edgeWeight(edge); - for (const HypernodeID& pin : pins(edge)) { - unused(pin); - edge_vector[he_mapping[edge]] = {node_mapping[source], node_mapping[target]}; + tbb::parallel_invoke( + [&] { + edge_vector.resize(num_edges); + edge_weight.resize(num_edges); + doParallelForAllEdges([&](const HyperedgeID edge) { + const HypernodeID source = edgeSource(edge); + const HypernodeID target = edgeTarget(edge); + if(partID(source) == block && partID(target) == block && source < target) + { + ASSERT(he_mapping[edge] < num_edges); + edge_weight[he_mapping[edge]] = edgeWeight(edge); + for(const HypernodeID &pin : pins(edge)) + { + unused(pin); + edge_vector[he_mapping[edge]] = { node_mapping[source], + node_mapping[target] }; + } + } + }); + }, + [&] { + node_weight.resize(num_nodes); + doParallelForAllNodes([&](const HypernodeID node) { + if(partID(node) == block) + { + node_weight[node_mapping[node]] = nodeWeight(node); + } + }); + }, + [&] { + if(already_cut) + { + // Extracted graph only contains non-cut edges + extracted_block.already_cut.assign(num_edges, 0); } - } - }); - }, [&] { - node_weight.resize(num_nodes); - doParallelForAllNodes([&](const HypernodeID node) { - if (partID(node) == block) { - node_weight[node_mapping[node]] = nodeWeight(node); - } - }); - }, [&] { - if ( already_cut ) { - // Extracted graph only contains non-cut edges - extracted_block.already_cut.assign(num_edges, 0); - } - }); + }); // Construct hypergraph extracted_block.hg = HypergraphFactory::construct_from_graph_edges( - num_nodes, num_edges, edge_vector, edge_weight.data(), node_weight.data(), - stable_construction_of_incident_edges); + num_nodes, num_edges, edge_vector, edge_weight.data(), node_weight.data(), + stable_construction_of_incident_edges); // Set community ids - doParallelForAllNodes([&](const HypernodeID& node) { - if (partID(node) == block) { + doParallelForAllNodes([&](const HypernodeID &node) { + if(partID(node) == block) + { const HypernodeID extracted_node = node_mapping[node]; extracted_block.hg.setCommunityID(extracted_node, _hg->communityID(node)); } @@ -907,124 +926,142 @@ class PartitionedGraph { // ! Extracts all blocks of the partition (from block 0 to block k). // ! This function has running time linear in the size of the original hypergraph - // ! and should be used instead of extract(...) when more than two blocks should be extracted. - std::pair, vec> extractAllBlocks(const PartitionID k, - const vec* already_cut, - const bool /*cut_net_splitting*/, - const bool stable_construction_of_incident_edges) { + // ! and should be used instead of extract(...) when more than two blocks should be + // extracted. + std::pair, vec > + extractAllBlocks(const PartitionID k, const vec *already_cut, + const bool /*cut_net_splitting*/, + const bool stable_construction_of_incident_edges) + { ASSERT(k <= _k); // Compactify node and edge ids vec hn_mapping(_hg->initialNumNodes(), kInvalidHypernode); vec he_mapping(_hg->initialNumEdges(), kInvalidHyperedge); - vec> nodes_cnt( - k, parallel::AtomicWrapper(0)); - vec> edges_cnt( - k, parallel::AtomicWrapper(0)); - if ( stable_construction_of_incident_edges ) { + vec > nodes_cnt( + k, parallel::AtomicWrapper(0)); + vec > edges_cnt( + k, parallel::AtomicWrapper(0)); + if(stable_construction_of_incident_edges) + { // Stable construction for deterministic behavior requires // to determine node and edge IDs sequentially - tbb::parallel_invoke([&] { - for ( const HypernodeID& hn : nodes() ) { - const PartitionID block = partID(hn); - if ( block < k ) { - hn_mapping[hn] = nodes_cnt[block]++; - } - } - }, [&] { - for ( const HyperedgeID& he : edges() ) { - const HypernodeID source = edgeSource(he); - const HypernodeID target = edgeTarget(he); - const PartitionID sourceBlock = partID(source); - const PartitionID targetBlock = partID(target); - if (source < target && sourceBlock == targetBlock && sourceBlock < k) { - he_mapping[he] = edges_cnt[sourceBlock]++; - } - } - }); - } else { - tbb::parallel_invoke([&] { - doParallelForAllNodes([&](const HypernodeID& hn) { - const PartitionID block = partID(hn); - if ( block < k ) { - hn_mapping[hn] = nodes_cnt[block]++; - } - }); - }, [&] { - doParallelForAllEdges([&](const HyperedgeID& he) { - const HypernodeID source = edgeSource(he); - const HypernodeID target = edgeTarget(he); - const PartitionID sourceBlock = partID(source); - const PartitionID targetBlock = partID(target); - if (source < target && sourceBlock == targetBlock && sourceBlock < k) { - he_mapping[he] = edges_cnt[sourceBlock]++; - } - }); - }); + tbb::parallel_invoke( + [&] { + for(const HypernodeID &hn : nodes()) + { + const PartitionID block = partID(hn); + if(block < k) + { + hn_mapping[hn] = nodes_cnt[block]++; + } + } + }, + [&] { + for(const HyperedgeID &he : edges()) + { + const HypernodeID source = edgeSource(he); + const HypernodeID target = edgeTarget(he); + const PartitionID sourceBlock = partID(source); + const PartitionID targetBlock = partID(target); + if(source < target && sourceBlock == targetBlock && sourceBlock < k) + { + he_mapping[he] = edges_cnt[sourceBlock]++; + } + } + }); + } + else + { + tbb::parallel_invoke( + [&] { + doParallelForAllNodes([&](const HypernodeID &hn) { + const PartitionID block = partID(hn); + if(block < k) + { + hn_mapping[hn] = nodes_cnt[block]++; + } + }); + }, + [&] { + doParallelForAllEdges([&](const HyperedgeID &he) { + const HypernodeID source = edgeSource(he); + const HypernodeID target = edgeTarget(he); + const PartitionID sourceBlock = partID(source); + const PartitionID targetBlock = partID(target); + if(source < target && sourceBlock == targetBlock && sourceBlock < k) + { + he_mapping[he] = edges_cnt[sourceBlock]++; + } + }); + }); } - using EdgeVector = vec>; + using EdgeVector = vec >; vec extracted_blocks(k); vec edge_vector(k); - vec> edge_weight(k); - vec> node_weight(k); + vec > edge_weight(k); + vec > node_weight(k); // Allocate auxilliary graph data structures tbb::parallel_for(static_cast(0), k, [&](const PartitionID p) { const HypernodeID num_nodes = nodes_cnt[p]; const HyperedgeID num_edges = edges_cnt[p]; - tbb::parallel_invoke([&] { - edge_vector[p].resize(num_edges); - }, [&] { - edge_weight[p].resize(num_edges); - }, [&] { - node_weight[p].resize(num_nodes); - }, [&] { - if ( already_cut ) { - // Extracted graph only contains non-cut edges - extracted_blocks[p].already_cut.assign(num_edges, 0); - } - }); + tbb::parallel_invoke([&] { edge_vector[p].resize(num_edges); }, + [&] { edge_weight[p].resize(num_edges); }, + [&] { node_weight[p].resize(num_nodes); }, + [&] { + if(already_cut) + { + // Extracted graph only contains non-cut edges + extracted_blocks[p].already_cut.assign(num_edges, 0); + } + }); }); // Write blocks to auxilliary graph data structure - tbb::parallel_invoke([&] { - doParallelForAllEdges([&](const HyperedgeID& he) { - const HyperedgeID mapped_he = he_mapping[he]; - const HypernodeID source = edgeSource(he); - const HypernodeID target = edgeTarget(he); - const PartitionID sourceBlock = partID(source); - const PartitionID targetBlock = partID(target); - if (source < target && sourceBlock == targetBlock && sourceBlock < k) { - ASSERT(UL(mapped_he) < edge_weight[sourceBlock].size()); - edge_weight[sourceBlock][mapped_he] = edgeWeight(he); - edge_vector[sourceBlock][mapped_he] = - { hn_mapping[edgeSource(he)], hn_mapping[edgeTarget(he)] }; - } - }); - }, [&] { - doParallelForAllNodes([&](const HypernodeID& hn) { - const PartitionID block = partID(hn); - const HypernodeID mapped_hn = hn_mapping[hn]; - if ( block < k ) { - ASSERT(UL(mapped_hn) < node_weight[block].size()); - node_weight[block][mapped_hn] = nodeWeight(hn); - } - }); - }); + tbb::parallel_invoke( + [&] { + doParallelForAllEdges([&](const HyperedgeID &he) { + const HyperedgeID mapped_he = he_mapping[he]; + const HypernodeID source = edgeSource(he); + const HypernodeID target = edgeTarget(he); + const PartitionID sourceBlock = partID(source); + const PartitionID targetBlock = partID(target); + if(source < target && sourceBlock == targetBlock && sourceBlock < k) + { + ASSERT(UL(mapped_he) < edge_weight[sourceBlock].size()); + edge_weight[sourceBlock][mapped_he] = edgeWeight(he); + edge_vector[sourceBlock][mapped_he] = { hn_mapping[edgeSource(he)], + hn_mapping[edgeTarget(he)] }; + } + }); + }, + [&] { + doParallelForAllNodes([&](const HypernodeID &hn) { + const PartitionID block = partID(hn); + const HypernodeID mapped_hn = hn_mapping[hn]; + if(block < k) + { + ASSERT(UL(mapped_hn) < node_weight[block].size()); + node_weight[block][mapped_hn] = nodeWeight(hn); + } + }); + }); // Construct graph of each block tbb::parallel_for(static_cast(0), k, [&](const PartitionID p) { const HypernodeID num_nodes = nodes_cnt[p]; const HyperedgeID num_edges = edges_cnt[p]; extracted_blocks[p].hg = HypergraphFactory::construct_from_graph_edges( - num_nodes, num_edges, edge_vector[p], edge_weight[p].data(), node_weight[p].data(), - stable_construction_of_incident_edges); + num_nodes, num_edges, edge_vector[p], edge_weight[p].data(), + node_weight[p].data(), stable_construction_of_incident_edges); }); // Set community ids - doParallelForAllNodes([&](const HypernodeID& hn) { + doParallelForAllNodes([&](const HypernodeID &hn) { const PartitionID block = partID(hn); - if ( block < k ) { + if(block < k) + { extracted_blocks[block].hg.setCommunityID(hn_mapping[hn], _hg->communityID(hn)); } }); @@ -1035,74 +1072,87 @@ class PartitionedGraph { return std::make_pair(std::move(extracted_blocks), std::move(hn_mapping)); } - void freeInternalData() { - if ( _k > 0 ) { + void freeInternalData() + { + if(_k > 0) + { parallel::parallel_free(_part_ids, _edge_sync, _edge_locks); } _k = 0; } - private: - template - bool changeNodePartImpl(const HypernodeID u, - PartitionID from, - PartitionID to, - HypernodeWeight max_weight_to, - SuccessFunc&& report_success, - const DeltaFunction& delta_func, - const NotificationFunc& notify_func, - const bool force_moving_fixed_vertices = false) { +private: + template + bool changeNodePartImpl(const HypernodeID u, PartitionID from, PartitionID to, + HypernodeWeight max_weight_to, SuccessFunc &&report_success, + const DeltaFunction &delta_func, + const NotificationFunc ¬ify_func, + const bool force_moving_fixed_vertices = false) + { unused(force_moving_fixed_vertices); ASSERT(partID(u) == from); ASSERT(from != to); ASSERT(force_moving_fixed_vertices || !isFixed(u)); const HypernodeWeight weight = nodeWeight(u); - const HypernodeWeight to_weight_after = _part_weights[to].add_fetch(weight, std::memory_order_relaxed); - if (to_weight_after <= max_weight_to) { + const HypernodeWeight to_weight_after = + _part_weights[to].add_fetch(weight, std::memory_order_relaxed); + if(to_weight_after <= max_weight_to) + { _part_weights[from].fetch_sub(weight, std::memory_order_relaxed); report_success(); - DBG << "<<< Start changing node part: " << V(u) << " - " << V(from) << " - " << V(to); + DBG << "<<< Start changing node part: " << V(u) << " - " << V(from) << " - " + << V(to); SynchronizedEdgeUpdate sync_update; sync_update.from = from; sync_update.to = to; sync_update.target_graph = _target_graph; sync_update.edge_locks = &_edge_locks; - for (const HyperedgeID edge : incidentEdges(u)) { - if (!isSinglePin(edge)) { + for(const HyperedgeID edge : incidentEdges(u)) + { + if(!isSinglePin(edge)) + { sync_update.he = edge; sync_update.edge_weight = edgeWeight(edge); sync_update.edge_size = edgeSize(edge); synchronizeMoveOnEdge(sync_update, edge, u, to, notify_func); - sync_update.pin_count_in_from_part_after = sync_update.block_of_other_node == from ? 1 : 0; - sync_update.pin_count_in_to_part_after = sync_update.block_of_other_node == to ? 2 : 1; + sync_update.pin_count_in_from_part_after = + sync_update.block_of_other_node == from ? 1 : 0; + sync_update.pin_count_in_to_part_after = + sync_update.block_of_other_node == to ? 2 : 1; delta_func(sync_update); } } _part_ids[u] = to; DBG << "Done changing node part: " << V(u) << " >>>"; return true; - } else { + } + else + { _part_weights[to].fetch_sub(weight, std::memory_order_relaxed); return false; } } - void initializeBlockWeights() { - tbb::parallel_for(tbb::blocked_range(HypernodeID(0), initialNumNodes()), - [&](tbb::blocked_range& r) { - // this is not enumerable_thread_specific because of the static partitioner - parallel::scalable_vector part_weight_deltas(_k, 0); - for (HypernodeID node = r.begin(); node < r.end(); ++node) { - if (nodeIsEnabled(node)) { - part_weight_deltas[partID(node)] += nodeWeight(node); + void initializeBlockWeights() + { + tbb::parallel_for( + tbb::blocked_range(HypernodeID(0), initialNumNodes()), + [&](tbb::blocked_range &r) { + // this is not enumerable_thread_specific because of the static partitioner + parallel::scalable_vector part_weight_deltas(_k, 0); + for(HypernodeID node = r.begin(); node < r.end(); ++node) + { + if(nodeIsEnabled(node)) + { + part_weight_deltas[partID(node)] += nodeWeight(node); + } } - } - for (PartitionID p = 0; p < _k; ++p) { - _part_weights[p].fetch_add(part_weight_deltas[p], std::memory_order_relaxed); - } - }, - tbb::static_partitioner() - ); + for(PartitionID p = 0; p < _k; ++p) + { + _part_weights[p].fetch_add(part_weight_deltas[p], std::memory_order_relaxed); + } + }, + tbb::static_partitioner()); } // ####################### Edge Locks ####################### @@ -1110,19 +1160,18 @@ class PartitionedGraph { // This function synchronizes a move on an edge and returns the block ID // of the target node of the corresponding edge. The function assumes that // node u is moved to the block 'to'. - template - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE - PartitionID synchronizeMoveOnEdge(SynchronizedEdgeUpdate& sync_update, - const HyperedgeID edge, - const HypernodeID u, - const PartitionID to, - const NotificationFunc& notify_func) { + template + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE PartitionID synchronizeMoveOnEdge( + SynchronizedEdgeUpdate &sync_update, const HyperedgeID edge, const HypernodeID u, + const PartitionID to, const NotificationFunc ¬ify_func) + { const HyperedgeID unique_id = uniqueEdgeID(edge); const HypernodeID v = edgeTarget(edge); PartitionID block_of_v = partID(v); - EdgeMove& edge_move = _edge_sync[unique_id]; + EdgeMove &edge_move = _edge_sync[unique_id]; _edge_locks[unique_id].lock(); - if ( edge_move.u == v && edge_move.version == _edge_sync_version ) { + if(edge_move.u == v && edge_move.version == _edge_sync_version) + { ASSERT(edge_move.to < _k && edge_move.to != kInvalidPartition); block_of_v = edge_move.to; } @@ -1130,7 +1179,8 @@ class PartitionedGraph { edge_move.to = to; edge_move.version = _edge_sync_version; sync_update.block_of_other_node = block_of_v; - if constexpr ( notify ) { + if constexpr(notify) + { notify_func(sync_update); } _edge_locks[unique_id].unlock(); @@ -1147,26 +1197,26 @@ class PartitionedGraph { PartitionID _k = 0; // ! Underlying graph - Hypergraph* _hg = nullptr; + Hypergraph *_hg = nullptr; // ! Target graph on which this graph is mapped - const TargetGraph* _target_graph; + const TargetGraph *_target_graph; // ! Weight and information for all blocks. - parallel::scalable_vector< CAtomic > _part_weights; + parallel::scalable_vector > _part_weights; // ! Current block IDs of the vertices - Array< PartitionID > _part_ids; + Array _part_ids; // ! Incrementing this counter invalidates all EdgeMove objects (see _edge_sync) // ! with a version < _edge_sync_version uint32_t _edge_sync_version; // ! Used to syncronize moves on edges - Array< EdgeMove > _edge_sync; + Array _edge_sync; // ! Lock to syncronize moves on edges - Array< SpinLock > _edge_locks; + Array _edge_locks; // ! We need to synchronize uncontractions via atomic markers ThreadSafeFastResetFlagArray _edge_markers; diff --git a/mt-kahypar/datastructures/partitioned_hypergraph.h b/mt-kahypar/datastructures/partitioned_hypergraph.h index b8826a080..2b6b2ceae 100644 --- a/mt-kahypar/datastructures/partitioned_hypergraph.h +++ b/mt-kahypar/datastructures/partitioned_hypergraph.h @@ -28,22 +28,22 @@ #pragma once #include -#include #include +#include #include "tbb/parallel_invoke.h" #include "kahypar-resources/meta/mandatory.h" -#include "mt-kahypar/datastructures/hypergraph_common.h" #include "mt-kahypar/datastructures/connectivity_info.h" +#include "mt-kahypar/datastructures/hypergraph_common.h" #include "mt-kahypar/datastructures/streaming_vector.h" #include "mt-kahypar/parallel/atomic_wrapper.h" #include "mt-kahypar/parallel/stl/scalable_vector.h" #include "mt-kahypar/parallel/stl/thread_locals.h" +#include "mt-kahypar/utils/exception.h" #include "mt-kahypar/utils/range.h" #include "mt-kahypar/utils/timer.h" -#include "mt-kahypar/utils/exception.h" namespace mt_kahypar { @@ -53,36 +53,39 @@ class TargetGraph; namespace ds { // Forward -template +template class DeltaPartitionedHypergraph; template -class PartitionedHypergraph { - private: - static_assert(!Hypergraph::is_partitioned, "Only unpartitioned hypergraphs are allowed"); +class PartitionedHypergraph +{ +private: + static_assert(!Hypergraph::is_partitioned, + "Only unpartitioned hypergraphs are allowed"); - using NotificationFunc = std::function; - using DeltaFunction = std::function; - #define NOOP_NOTIFY_FUNC [] (const SynchronizedEdgeUpdate&) { } - #define NOOP_FUNC [] (const SynchronizedEdgeUpdate&) { } + using NotificationFunc = std::function; + using DeltaFunction = std::function; +#define NOOP_NOTIFY_FUNC [](const SynchronizedEdgeUpdate &) {} +#define NOOP_FUNC [](const SynchronizedEdgeUpdate &) {} // Factory using HypergraphFactory = typename Hypergraph::Factory; - // REVIEW NOTE: Can't we use a lambda in changeNodePart. And write a second function that calls the first with a lambda that does nothing. - // Then we could guarantee inlining - // This would also reduce the code/documentation copy-pasta for with or without gain updates + // REVIEW NOTE: Can't we use a lambda in changeNodePart. And write a second function + // that calls the first with a lambda that does nothing. Then we could guarantee + // inlining This would also reduce the code/documentation copy-pasta for with or without + // gain updates static constexpr bool enable_heavy_assert = false; - public: +public: static constexpr bool is_static_hypergraph = Hypergraph::is_static_hypergraph; static constexpr bool is_graph = Hypergraph::is_graph; static constexpr bool is_partitioned = true; static constexpr bool supports_connectivity_set = true; static constexpr mt_kahypar_partition_type_t TYPE = - PartitionedHypergraphType::TYPE; + PartitionedHypergraphType::TYPE; static constexpr HyperedgeID HIGH_DEGREE_THRESHOLD = ID(100000); @@ -94,209 +97,178 @@ class PartitionedHypergraph { using IncidentNetsIterator = typename Hypergraph::IncidentNetsIterator; using ConInfo = ConnectivityInformation; using ConnectivitySetIterator = typename ConnectivityInformation::Iterator; - template + template using DeltaPartition = DeltaPartitionedHypergraph; using ExtractedBlock = ExtractedHypergraph; PartitionedHypergraph() = default; - explicit PartitionedHypergraph(const PartitionID k, - Hypergraph& hypergraph) : - _input_num_nodes(hypergraph.initialNumNodes()), - _input_num_edges(hypergraph.initialNumEdges()), - _k(k), - _hg(&hypergraph), - _target_graph(nullptr), - _part_weights(k, CAtomic(0)), - _part_ids( - "Refinement", "part_ids", hypergraph.initialNumNodes(), false, false), - _con_info(hypergraph.initialNumEdges(), k, hypergraph.maxEdgeSize()), - _pin_count_update_ownership( - "Refinement", "pin_count_update_ownership", hypergraph.initialNumEdges(), true, false) { + explicit PartitionedHypergraph(const PartitionID k, Hypergraph &hypergraph) : + _input_num_nodes(hypergraph.initialNumNodes()), + _input_num_edges(hypergraph.initialNumEdges()), _k(k), _hg(&hypergraph), + _target_graph(nullptr), _part_weights(k, CAtomic(0)), + _part_ids("Refinement", "part_ids", hypergraph.initialNumNodes(), false, false), + _con_info(hypergraph.initialNumEdges(), k, hypergraph.maxEdgeSize()), + _pin_count_update_ownership("Refinement", "pin_count_update_ownership", + hypergraph.initialNumEdges(), true, false) + { _part_ids.assign(hypergraph.initialNumNodes(), kInvalidPartition, false); } - explicit PartitionedHypergraph(const PartitionID k, - Hypergraph& hypergraph, + explicit PartitionedHypergraph(const PartitionID k, Hypergraph &hypergraph, parallel_tag_t) : - _input_num_nodes(hypergraph.initialNumNodes()), - _input_num_edges(hypergraph.initialNumEdges()), - _k(k), - _hg(&hypergraph), - _target_graph(nullptr), - _part_weights(k, CAtomic(0)), - _part_ids(), - _con_info(), - _pin_count_update_ownership() { - tbb::parallel_invoke([&] { - _part_ids.resize( - "Refinement", "vertex_part_info", hypergraph.initialNumNodes()); - _part_ids.assign(hypergraph.initialNumNodes(), kInvalidPartition); - }, [&] { - _con_info = ConnectivityInformation( - hypergraph.initialNumEdges(), k, hypergraph.maxEdgeSize(), parallel_tag_t { }); - }, [&] { - _pin_count_update_ownership.resize( - "Refinement", "pin_count_update_ownership", hypergraph.initialNumEdges(), true); - }); + _input_num_nodes(hypergraph.initialNumNodes()), + _input_num_edges(hypergraph.initialNumEdges()), _k(k), _hg(&hypergraph), + _target_graph(nullptr), _part_weights(k, CAtomic(0)), _part_ids(), + _con_info(), _pin_count_update_ownership() + { + tbb::parallel_invoke( + [&] { + _part_ids.resize("Refinement", "vertex_part_info", + hypergraph.initialNumNodes()); + _part_ids.assign(hypergraph.initialNumNodes(), kInvalidPartition); + }, + [&] { + _con_info = ConnectivityInformation(hypergraph.initialNumEdges(), k, + hypergraph.maxEdgeSize(), parallel_tag_t{}); + }, + [&] { + _pin_count_update_ownership.resize("Refinement", "pin_count_update_ownership", + hypergraph.initialNumEdges(), true); + }); } - // REVIEW NOTE why do we delete copy assignment/construction? wouldn't it be useful to make a copy, e.g. for initial partitioning - PartitionedHypergraph(const PartitionedHypergraph&) = delete; - PartitionedHypergraph & operator= (const PartitionedHypergraph &) = delete; + // REVIEW NOTE why do we delete copy assignment/construction? wouldn't it be useful to + // make a copy, e.g. for initial partitioning + PartitionedHypergraph(const PartitionedHypergraph &) = delete; + PartitionedHypergraph &operator=(const PartitionedHypergraph &) = delete; - PartitionedHypergraph(PartitionedHypergraph&& other) = default; - PartitionedHypergraph & operator= (PartitionedHypergraph&& other) = default; + PartitionedHypergraph(PartitionedHypergraph &&other) = default; + PartitionedHypergraph &operator=(PartitionedHypergraph &&other) = default; - ~PartitionedHypergraph() { - freeInternalData(); - } + ~PartitionedHypergraph() { freeInternalData(); } - void resetData() { - tbb::parallel_invoke([&] { - }, [&] { - _part_ids.assign(_part_ids.size(), kInvalidPartition); - }, [&] { - _con_info.reset(); - }, [&] { - for (auto& x : _part_weights) x.store(0, std::memory_order_relaxed); - }); + void resetData() + { + tbb::parallel_invoke([&] {}, + [&] { _part_ids.assign(_part_ids.size(), kInvalidPartition); }, + [&] { _con_info.reset(); }, + [&] { + for(auto &x : _part_weights) + x.store(0, std::memory_order_relaxed); + }); } // ####################### General Hypergraph Stats ###################### - Hypergraph& hypergraph() { + Hypergraph &hypergraph() + { ASSERT(_hg); return *_hg; } - void setHypergraph(Hypergraph& hypergraph) { - _hg = &hypergraph; - } + void setHypergraph(Hypergraph &hypergraph) { _hg = &hypergraph; } // ! Initial number of hypernodes - HypernodeID initialNumNodes() const { - return _hg->initialNumNodes(); - } + HypernodeID initialNumNodes() const { return _hg->initialNumNodes(); } // ! Number of nodes of the input hypergraph - HypernodeID topLevelNumNodes() const { - return _input_num_nodes; - } + HypernodeID topLevelNumNodes() const { return _input_num_nodes; } // ! Number of removed hypernodes - HypernodeID numRemovedHypernodes() const { - return _hg->numRemovedHypernodes(); - } + HypernodeID numRemovedHypernodes() const { return _hg->numRemovedHypernodes(); } // ! Initial number of hyperedges - HyperedgeID initialNumEdges() const { - return _hg->initialNumEdges(); - } + HyperedgeID initialNumEdges() const { return _hg->initialNumEdges(); } // ! Number of nodes of the input hypergraph - HyperedgeID topLevelNumEdges() const { - return _input_num_edges; - } + HyperedgeID topLevelNumEdges() const { return _input_num_edges; } // ! Number of unique edge ids of the input hypergraph - HyperedgeID topLevelNumUniqueIds() const { - return _input_num_edges; - } + HyperedgeID topLevelNumUniqueIds() const { return _input_num_edges; } // ! Initial number of pins - HypernodeID initialNumPins() const { - return _hg->initialNumPins(); - } + HypernodeID initialNumPins() const { return _hg->initialNumPins(); } // ! Initial sum of the degree of all vertices - HypernodeID initialTotalVertexDegree() const { - return _hg->initialTotalVertexDegree(); - } + HypernodeID initialTotalVertexDegree() const { return _hg->initialTotalVertexDegree(); } // ! Total weight of hypergraph - HypernodeWeight totalWeight() const { - return _hg->totalWeight(); - } + HypernodeWeight totalWeight() const { return _hg->totalWeight(); } // ! Number of blocks this hypergraph is partitioned into - PartitionID k() const { - return _k; - } - + PartitionID k() const { return _k; } // ####################### Mapping ###################### - void setTargetGraph(const TargetGraph* target_graph) { - _target_graph = target_graph; - } + void setTargetGraph(const TargetGraph *target_graph) { _target_graph = target_graph; } - bool hasTargetGraph() const { - return _target_graph != nullptr; - } + bool hasTargetGraph() const { return _target_graph != nullptr; } - const TargetGraph* targetGraph() const { - return _target_graph; - } + const TargetGraph *targetGraph() const { return _target_graph; } // ####################### Iterators ####################### // ! Iterates in parallel over all active nodes and calls function f // ! for each vertex - template - void doParallelForAllNodes(const F& f) { - static_cast(*this).doParallelForAllNodes(f); + template + void doParallelForAllNodes(const F &f) + { + static_cast(*this).doParallelForAllNodes(f); } // ! Iterates in parallel over all active nodes and calls function f // ! for each vertex - template - void doParallelForAllNodes(const F& f) const { + template + void doParallelForAllNodes(const F &f) const + { _hg->doParallelForAllNodes(f); } // ! Iterates in parallel over all active edges and calls function f // ! for each net - template - void doParallelForAllEdges(const F& f) { - static_cast(*this).doParallelForAllEdges(f); + template + void doParallelForAllEdges(const F &f) + { + static_cast(*this).doParallelForAllEdges(f); } // ! Iterates in parallel over all active edges and calls function f // ! for each net - template - void doParallelForAllEdges(const F& f) const { + template + void doParallelForAllEdges(const F &f) const + { _hg->doParallelForAllEdges(f); } // ! Returns an iterator over the set of active nodes of the hypergraph - IteratorRange nodes() const { - return _hg->nodes(); - } + IteratorRange nodes() const { return _hg->nodes(); } // ! Returns an iterator over the set of active edges of the hypergraph - IteratorRange edges() const { - return _hg->edges(); - } + IteratorRange edges() const { return _hg->edges(); } // ! Returns a range to loop over the incident nets of hypernode u. - IteratorRange incidentEdges(const HypernodeID u) const { + IteratorRange incidentEdges(const HypernodeID u) const + { return _hg->incidentEdges(u); } // ! Returns a range to loop over the incident nets of hypernode u. IteratorRange incidentEdges(const HypernodeID u, - const size_t pos) const { + const size_t pos) const + { return _hg->incident_nets_of(u, pos); } // ! Returns a range to loop over the pins of hyperedge e. - IteratorRange pins(const HyperedgeID e) const { + IteratorRange pins(const HyperedgeID e) const + { return _hg->pins(e); } // ! Returns a range to loop over the set of block ids contained in hyperedge e. - IteratorRange connectivitySet(const HyperedgeID e) const { + IteratorRange connectivitySet(const HyperedgeID e) const + { ASSERT(_hg->edgeIsEnabled(e), "Hyperedge" << e << "is disabled"); ASSERT(e < _hg->initialNumEdges(), "Hyperedge" << e << "does not exist"); return _con_info.connectivitySet(e); @@ -305,14 +277,14 @@ class PartitionedHypergraph { // ####################### Hypernode Information ####################### // ! Weight of a vertex - HypernodeWeight nodeWeight(const HypernodeID u) const { - return _hg->nodeWeight(u); - } + HypernodeWeight nodeWeight(const HypernodeID u) const { return _hg->nodeWeight(u); } // ! Sets the weight of a vertex - void setNodeWeight(const HypernodeID u, const HypernodeWeight weight) { + void setNodeWeight(const HypernodeID u, const HypernodeWeight weight) + { const PartitionID block = partID(u); - if ( block != kInvalidPartition ) { + if(block != kInvalidPartition) + { ASSERT(block < _k); const HypernodeWeight delta = weight - _hg->nodeWeight(u); _part_weights[block] += delta; @@ -321,27 +293,20 @@ class PartitionedHypergraph { } // ! Degree of a hypernode - HyperedgeID nodeDegree(const HypernodeID u) const { - return _hg->nodeDegree(u); - } + HyperedgeID nodeDegree(const HypernodeID u) const { return _hg->nodeDegree(u); } // ! Returns, whether a hypernode is enabled or not - bool nodeIsEnabled(const HypernodeID u) const { - return _hg->nodeIsEnabled(u); - } + bool nodeIsEnabled(const HypernodeID u) const { return _hg->nodeIsEnabled(u); } // ! Enables a hypernode (must be disabled before) - void enableHypernode(const HypernodeID u) { - _hg->enableHypernode(u); - } + void enableHypernode(const HypernodeID u) { _hg->enableHypernode(u); } // ! Disable a hypernode (must be enabled before) - void disableHypernode(const HypernodeID u) { - _hg->disableHypernode(u); - } + void disableHypernode(const HypernodeID u) { _hg->disableHypernode(u); } // ! Restores a degree zero hypernode - void restoreDegreeZeroHypernode(const HypernodeID u, const PartitionID to) { + void restoreDegreeZeroHypernode(const HypernodeID u, const PartitionID to) + { _hg->restoreDegreeZeroHypernode(u); setNodePart(u, to); } @@ -349,72 +314,65 @@ class PartitionedHypergraph { // ####################### Hyperedge Information ####################### // ! Weight of a hyperedge - HypernodeWeight edgeWeight(const HyperedgeID e) const { - return _hg->edgeWeight(e); - } + HypernodeWeight edgeWeight(const HyperedgeID e) const { return _hg->edgeWeight(e); } // ! Unique id of a hyperedge - HyperedgeID uniqueEdgeID(const HyperedgeID e) const { - return e; - } + HyperedgeID uniqueEdgeID(const HyperedgeID e) const { return e; } // ! Sets the weight of a hyperedge - void setEdgeWeight(const HyperedgeID e, const HyperedgeWeight weight) { + void setEdgeWeight(const HyperedgeID e, const HyperedgeWeight weight) + { _hg->setEdgeWeight(e, weight); } // ! Number of pins of a hyperedge - HypernodeID edgeSize(const HyperedgeID e) const { - return _hg->edgeSize(e); - } + HypernodeID edgeSize(const HyperedgeID e) const { return _hg->edgeSize(e); } // ! Returns, whether a hyperedge is enabled or not - bool edgeIsEnabled(const HyperedgeID e) const { - return _hg->edgeIsEnabled(e); - } + bool edgeIsEnabled(const HyperedgeID e) const { return _hg->edgeIsEnabled(e); } // ! Enables a hyperedge (must be disabled before) - void enableHyperedge(const HyperedgeID e) { - _hg->enableHyperedge(e); - } + void enableHyperedge(const HyperedgeID e) { _hg->enableHyperedge(e); } // ! Disabled a hyperedge (must be enabled before) - void disableHyperedge(const HyperedgeID e) { - _hg->disableHyperedge(e); - } + void disableHyperedge(const HyperedgeID e) { _hg->disableHyperedge(e); } - // ! Target of an edge - HypernodeID edgeTarget(const HyperedgeID) const { + // ! Target of an edge + HypernodeID edgeTarget(const HyperedgeID) const + { throw NonSupportedOperationException( - "edgeTarget(e) is only supported on graph data structure"); + "edgeTarget(e) is only supported on graph data structure"); return kInvalidHypernode; } // ! Source of an edge - HypernodeID edgeSource(const HyperedgeID) const { + HypernodeID edgeSource(const HyperedgeID) const + { throw NonSupportedOperationException( - "edgeSource(e) is only supported on graph data structure"); + "edgeSource(e) is only supported on graph data structure"); return kInvalidHypernode; } // ! Whether the edge is a single pin edge - bool isSinglePin(const HyperedgeID) const { + bool isSinglePin(const HyperedgeID) const + { throw NonSupportedOperationException( - "isSinglePin(e) is only supported on graph data structure"); + "isSinglePin(e) is only supported on graph data structure"); return false; } // ####################### Uncontraction ####################### /** - * Uncontracts a batch of contractions in parallel. The batches must be uncontracted exactly - * in the order computed by the function createBatchUncontractionHierarchy(...). + * Uncontracts a batch of contractions in parallel. The batches must be uncontracted + * exactly in the order computed by the function createBatchUncontractionHierarchy(...). */ - template - void uncontract(const Batch& batch, GainCache& gain_cache) { + template + void uncontract(const Batch &batch, GainCache &gain_cache) + { // Set block ids of contraction partners tbb::parallel_for(UL(0), batch.size(), [&](const size_t i) { - const Memento& memento = batch[i]; + const Memento &memento = batch[i]; ASSERT(nodeIsEnabled(memento.u)); ASSERT(!nodeIsEnabled(memento.v)); const PartitionID part_id = partID(memento.u); @@ -422,21 +380,24 @@ class PartitionedHypergraph { setOnlyNodePart(memento.v, part_id); }); - _hg->uncontract(batch, - [&](const HypernodeID u, const HypernodeID v, const HyperedgeID he) { - // In this case, u and v are incident to hyperedge he after uncontraction - const PartitionID block = partID(u); - const HypernodeID pin_count_in_part_after = incrementPinCountOfBlock(he, block); - ASSERT(pin_count_in_part_after > 1, V(u) << V(v) << V(he)); - gain_cache.uncontractUpdateAfterRestore(*this, u, v, he, pin_count_in_part_after); - }, - [&](const HypernodeID u, const HypernodeID v, const HyperedgeID he) { - gain_cache.uncontractUpdateAfterReplacement(*this, u, v, he); - }); + _hg->uncontract( + batch, + [&](const HypernodeID u, const HypernodeID v, const HyperedgeID he) { + // In this case, u and v are incident to hyperedge he after uncontraction + const PartitionID block = partID(u); + const HypernodeID pin_count_in_part_after = incrementPinCountOfBlock(he, block); + ASSERT(pin_count_in_part_after > 1, V(u) << V(v) << V(he)); + gain_cache.uncontractUpdateAfterRestore(*this, u, v, he, + pin_count_in_part_after); + }, + [&](const HypernodeID u, const HypernodeID v, const HyperedgeID he) { + gain_cache.uncontractUpdateAfterReplacement(*this, u, v, he); + }); - if constexpr ( GainCache::initializes_gain_cache_entry_after_batch_uncontractions ) { + if constexpr(GainCache::initializes_gain_cache_entry_after_batch_uncontractions) + { tbb::parallel_for(UL(0), batch.size(), [&](const size_t i) { - const Memento& memento = batch[i]; + const Memento &memento = batch[i]; gain_cache.initializeGainCacheEntryForNode(*this, memento.v); }); } @@ -448,13 +409,14 @@ class PartitionedHypergraph { /*! * Restores a large hyperedge previously removed from the hypergraph. */ - void restoreLargeEdge(const HyperedgeID& he) { + void restoreLargeEdge(const HyperedgeID &he) + { _hg->restoreLargeEdge(he); // Recalculate pin count in parts const size_t incidence_array_start = _hg->hyperedge(he).firstEntry(); const size_t incidence_array_end = _hg->hyperedge(he).firstInvalidEntry(); - tbb::enumerable_thread_specific< vec > ets_pin_count_in_part(_k, 0); + tbb::enumerable_thread_specific > ets_pin_count_in_part(_k, 0); tbb::parallel_for(incidence_array_start, incidence_array_end, [&](const size_t pos) { const HypernodeID pin = _hg->_incidence_array[pos]; const PartitionID block = partID(pin); @@ -462,13 +424,16 @@ class PartitionedHypergraph { }); // Aggregate local pin count for each block - for ( PartitionID block = 0; block < _k; ++block ) { + for(PartitionID block = 0; block < _k; ++block) + { HypernodeID pin_count_in_part = 0; - for ( const vec& local_pin_count : ets_pin_count_in_part ) { + for(const vec &local_pin_count : ets_pin_count_in_part) + { pin_count_in_part += local_pin_count[block]; } - if ( pin_count_in_part > 0 ) { + if(pin_count_in_part > 0) + { _con_info.setPinCountInPart(he, block, pin_count_in_part); _con_info.addBlock(he, block); } @@ -476,28 +441,34 @@ class PartitionedHypergraph { } /** - * Restores a previously removed set of singple-pin and parallel hyperedges. Note, that hes_to_restore - * must be exactly the same and given in the reverse order as returned by removeSinglePinAndParallelNets(...). + * Restores a previously removed set of singple-pin and parallel hyperedges. Note, that + * hes_to_restore must be exactly the same and given in the reverse order as returned by + * removeSinglePinAndParallelNets(...). */ - template - void restoreSinglePinAndParallelNets(const vec& hes_to_restore, - GainCache& gain_cache) { + template + void restoreSinglePinAndParallelNets( + const vec &hes_to_restore, + GainCache &gain_cache) + { // Restore hyperedges in hypergraph _hg->restoreSinglePinAndParallelNets(hes_to_restore); - // Compute pin counts of restored hyperedges and gain cache values of vertices contained - // single-pin hyperedges. Note, that restoring parallel hyperedges does not change any - // value in the gain cache, since it already contributes to the gain via its representative. - tls_enumerable_thread_specific< vec > ets_pin_count_in_part(_k, 0); + // Compute pin counts of restored hyperedges and gain cache values of vertices + // contained single-pin hyperedges. Note, that restoring parallel hyperedges does not + // change any value in the gain cache, since it already contributes to the gain via + // its representative. + tls_enumerable_thread_specific > ets_pin_count_in_part(_k, 0); tbb::parallel_for(UL(0), hes_to_restore.size(), [&](const size_t i) { const HyperedgeID he = hes_to_restore[i].removed_hyperedge; const HyperedgeID representative = hes_to_restore[i].representative; ASSERT(edgeIsEnabled(he)); const bool is_single_pin_he = edgeSize(he) == 1; - if ( is_single_pin_he ) { + if(is_single_pin_he) + { // Restore single-pin net HypernodeID single_vertex_of_he = kInvalidHypernode; - for ( const HypernodeID& pin : pins(he) ) { + for(const HypernodeID &pin : pins(he)) + { single_vertex_of_he = pin; } ASSERT(single_vertex_of_he != kInvalidHypernode); @@ -505,22 +476,27 @@ class PartitionedHypergraph { const PartitionID block_of_single_pin = partID(single_vertex_of_he); _con_info.addBlock(he, block_of_single_pin); _con_info.setPinCountInPart(he, block_of_single_pin, 1); - gain_cache.restoreSinglePinHyperedge( - single_vertex_of_he, block_of_single_pin, edgeWeight(he)); - } else { + gain_cache.restoreSinglePinHyperedge(single_vertex_of_he, block_of_single_pin, + edgeWeight(he)); + } + else + { // Restore parallel net => pin count information given by representative ASSERT(edgeIsEnabled(representative)); - for ( const PartitionID& block : connectivitySet(representative) ) { + for(const PartitionID &block : connectivitySet(representative)) + { _con_info.addBlock(he, block); _con_info.setPinCountInPart(he, block, pinCountInPart(representative, block)); } gain_cache.restoreIdenticalHyperedge(*this, he); HEAVY_REFINEMENT_ASSERT([&] { - for ( PartitionID block = 0; block < _k; ++block ) { - if ( pinCountInPart(he, block) != pinCountInPartRecomputed(he, block) ) { - LOG << "Pin count in part of hyperedge" << he << "in block" << block - << "is" << pinCountInPart(he, block) << ", but should be" + for(PartitionID block = 0; block < _k; ++block) + { + if(pinCountInPart(he, block) != pinCountInPartRecomputed(he, block)) + { + LOG << "Pin count in part of hyperedge" << he << "in block" << block << "is" + << pinCountInPart(he, block) << ", but should be" << pinCountInPartRecomputed(he, block); return false; } @@ -534,59 +510,66 @@ class PartitionedHypergraph { // ####################### Partition Information ####################### // ! Block that vertex u belongs to - PartitionID partID(const HypernodeID u) const { + PartitionID partID(const HypernodeID u) const + { ASSERT(u < initialNumNodes(), "Hypernode" << u << "does not exist"); return _part_ids[u]; } - void extractPartIDs(Array& part_ids) { + void extractPartIDs(Array &part_ids) + { // If we pass the input hypergraph to initial partitioning, then initial partitioning // will pass an part ID vector of size |V'|, where V' are the number of nodes of - // smallest hypergraph, while the _part_ids vector of the input hypergraph is initialized - // with the original number of nodes. This can cause segmentation fault when we simply swap them - // during main uncoarsening. - if ( _part_ids.size() == part_ids.size() ) { + // smallest hypergraph, while the _part_ids vector of the input hypergraph is + // initialized with the original number of nodes. This can cause segmentation fault + // when we simply swap them during main uncoarsening. + if(_part_ids.size() == part_ids.size()) + { std::swap(_part_ids, part_ids); - } else { + } + else + { ASSERT(part_ids.size() <= _part_ids.size()); - tbb::parallel_for(UL(0), part_ids.size(), [&](const size_t i) { - part_ids[i] = _part_ids[i]; - }); + tbb::parallel_for(UL(0), part_ids.size(), + [&](const size_t i) { part_ids[i] = _part_ids[i]; }); } } - void setOnlyNodePart(const HypernodeID u, PartitionID p) { + void setOnlyNodePart(const HypernodeID u, PartitionID p) + { ASSERT(p != kInvalidPartition && p < _k); ASSERT(_part_ids[u] == kInvalidPartition); _part_ids[u] = p; } - void setNodePart(const HypernodeID u, PartitionID p) { + void setNodePart(const HypernodeID u, PartitionID p) + { setOnlyNodePart(u, p); _part_weights[p].fetch_add(nodeWeight(u), std::memory_order_relaxed); - for (HyperedgeID he : incidentEdges(u)) { + for(HyperedgeID he : incidentEdges(u)) + { incrementPinCountOfBlock(he, p); } } // ! Changes the block id of vertex u from block 'from' to block 'to' // ! Returns true, if move of vertex u to corresponding block succeeds. - template - bool changeNodePart(const HypernodeID u, - PartitionID from, - PartitionID to, - HypernodeWeight max_weight_to, - SuccessFunc&& report_success, - const DeltaFunction& delta_func, - const NotificationFunc& notify_func = NOOP_NOTIFY_FUNC, - const bool force_moving_fixed_vertices = false) { + template + bool changeNodePart(const HypernodeID u, PartitionID from, PartitionID to, + HypernodeWeight max_weight_to, SuccessFunc &&report_success, + const DeltaFunction &delta_func, + const NotificationFunc ¬ify_func = NOOP_NOTIFY_FUNC, + const bool force_moving_fixed_vertices = false) + { unused(force_moving_fixed_vertices); ASSERT(partID(u) == from); ASSERT(from != to); ASSERT(force_moving_fixed_vertices || !isFixed(u)); const HypernodeWeight wu = nodeWeight(u); - const HypernodeWeight to_weight_after = _part_weights[to].add_fetch(wu, std::memory_order_relaxed); - if (to_weight_after <= max_weight_to) { + const HypernodeWeight to_weight_after = + _part_weights[to].add_fetch(wu, std::memory_order_relaxed); + if(to_weight_after <= max_weight_to) + { _part_ids[u] = to; _part_weights[from].fetch_sub(wu, std::memory_order_relaxed); report_success(); @@ -595,76 +578,87 @@ class PartitionedHypergraph { sync_update.to = to; sync_update.target_graph = _target_graph; sync_update.edge_locks = &_pin_count_update_ownership; - for ( const HyperedgeID he : incidentEdges(u) ) { + for(const HyperedgeID he : incidentEdges(u)) + { updatePinCountOfHyperedge(he, from, to, sync_update, delta_func, notify_func); } return true; - } else { + } + else + { _part_weights[to].fetch_sub(wu, std::memory_order_relaxed); return false; } } // curry - bool changeNodePart(const HypernodeID u, - PartitionID from, - PartitionID to, - const DeltaFunction& delta_func = NOOP_FUNC, - const bool force_moving_fixed_vertex = false) { - return changeNodePart(u, from, to, - std::numeric_limits::max(), []{}, - delta_func, NOOP_NOTIFY_FUNC, force_moving_fixed_vertex); - } - - template - bool changeNodePart(GainCache& gain_cache, - const HypernodeID u, - PartitionID from, - PartitionID to, - HypernodeWeight max_weight_to, - SuccessFunc&& report_success, - const DeltaFunction& delta_func) { - auto my_delta_func = [&](const SynchronizedEdgeUpdate& sync_update) { + bool changeNodePart(const HypernodeID u, PartitionID from, PartitionID to, + const DeltaFunction &delta_func = NOOP_FUNC, + const bool force_moving_fixed_vertex = false) + { + return changeNodePart( + u, from, to, std::numeric_limits::max(), [] {}, delta_func, + NOOP_NOTIFY_FUNC, force_moving_fixed_vertex); + } + + template + bool changeNodePart(GainCache &gain_cache, const HypernodeID u, PartitionID from, + PartitionID to, HypernodeWeight max_weight_to, + SuccessFunc &&report_success, const DeltaFunction &delta_func) + { + auto my_delta_func = [&](const SynchronizedEdgeUpdate &sync_update) { delta_func(sync_update); gain_cache.deltaGainUpdate(*this, sync_update); }; - if constexpr ( !GainCache::requires_notification_before_update ) { + if constexpr(!GainCache::requires_notification_before_update) + { return changeNodePart(u, from, to, max_weight_to, report_success, my_delta_func); - } else { + } + else + { return changeNodePart(u, from, to, max_weight_to, report_success, my_delta_func, - [&](SynchronizedEdgeUpdate& sync_update) { - sync_update.pin_count_in_from_part_after = pinCountInPart(sync_update.he, from) - 1; - sync_update.pin_count_in_to_part_after = pinCountInPart(sync_update.he, to) + 1; - gain_cache.notifyBeforeDeltaGainUpdate(*this, sync_update); - }); + [&](SynchronizedEdgeUpdate &sync_update) { + sync_update.pin_count_in_from_part_after = + pinCountInPart(sync_update.he, from) - 1; + sync_update.pin_count_in_to_part_after = + pinCountInPart(sync_update.he, to) + 1; + gain_cache.notifyBeforeDeltaGainUpdate(*this, sync_update); + }); } } - template - bool changeNodePart(GainCache& gain_cache, - const HypernodeID u, - PartitionID from, - PartitionID to) { - return changeNodePart(gain_cache, u, from, to, - std::numeric_limits::max(), []{}, NoOpDeltaFunc()); + template + bool changeNodePart(GainCache &gain_cache, const HypernodeID u, PartitionID from, + PartitionID to) + { + return changeNodePart( + gain_cache, u, from, to, std::numeric_limits::max(), [] {}, + NoOpDeltaFunc()); } // ! Weight of a block - HypernodeWeight partWeight(const PartitionID p) const { + HypernodeWeight partWeight(const PartitionID p) const + { ASSERT(p != kInvalidPartition && p < _k); return _part_weights[p].load(std::memory_order_relaxed); } // ! Returns, whether hypernode u is adjacent to a least one cut hyperedge. - bool isBorderNode(const HypernodeID u) const { - if ( nodeDegree(u) <= HIGH_DEGREE_THRESHOLD ) { - for ( const HyperedgeID& he : incidentEdges(u) ) { - if ( connectivity(he) > 1 ) { + bool isBorderNode(const HypernodeID u) const + { + if(nodeDegree(u) <= HIGH_DEGREE_THRESHOLD) + { + for(const HyperedgeID &he : incidentEdges(u)) + { + if(connectivity(he) > 1) + { return true; } } return false; - } else { + } + else + { // TODO maybe we should allow these in label propagation? definitely not in FM // In case u is a high degree vertex, we omit the border node check and // and return false. Assumption is that it is very unlikely that such a @@ -673,10 +667,13 @@ class PartitionedHypergraph { } } - HypernodeID numIncidentCutHyperedges(const HypernodeID u) const { + HypernodeID numIncidentCutHyperedges(const HypernodeID u) const + { HypernodeID num_incident_cut_hyperedges = 0; - for ( const HyperedgeID& he : incidentEdges(u) ) { - if ( connectivity(he) > 1 ) { + for(const HyperedgeID &he : incidentEdges(u)) + { + if(connectivity(he) > 1) + { ++num_incident_cut_hyperedges; } } @@ -684,14 +681,16 @@ class PartitionedHypergraph { } // ! Number of blocks which pins of hyperedge e belongs to - PartitionID connectivity(const HyperedgeID e) const { + PartitionID connectivity(const HyperedgeID e) const + { ASSERT(e < _hg->initialNumEdges(), "Hyperedge" << e << "does not exist"); ASSERT(edgeIsEnabled(e), "Hyperedge" << e << "is disabled"); return _con_info.connectivity(e); } // ! Returns the number pins of hyperedge e that are part of block id - HypernodeID pinCountInPart(const HyperedgeID e, const PartitionID p) const { + HypernodeID pinCountInPart(const HyperedgeID e, const PartitionID p) const + { ASSERT(e < _hg->initialNumEdges(), "Hyperedge" << e << "does not exist"); ASSERT(edgeIsEnabled(e), "Hyperedge" << e << "is disabled"); ASSERT(p != kInvalidPartition && p < _k); @@ -699,69 +698,78 @@ class PartitionedHypergraph { } // ! Creates a shallow copy of the connectivity set of hyperedge he - StaticBitset& shallowCopyOfConnectivitySet(const HyperedgeID he) const { + StaticBitset &shallowCopyOfConnectivitySet(const HyperedgeID he) const + { return _con_info.shallowCopy(he); } // ! Creates a deep copy of the connectivity set of hyperedge he - Bitset& deepCopyOfConnectivitySet(const HyperedgeID he) const { + Bitset &deepCopyOfConnectivitySet(const HyperedgeID he) const + { return _con_info.deepCopy(he); } - const ConInfo& getConnectivityInformation() const { - return _con_info; - } + const ConInfo &getConnectivityInformation() const { return _con_info; } // ! Initializes the partition of the hypergraph, if block ids are assigned with // ! setOnlyNodePart(...). In that case, block weights and pin counts in part for // ! each hyperedge must be initialized explicitly here. - void initializePartition() { - tbb::parallel_invoke( - [&] { initializeBlockWeights(); }, - [&] { initializePinCountInPart(); } - ); + void initializePartition() + { + tbb::parallel_invoke([&] { initializeBlockWeights(); }, + [&] { initializePinCountInPart(); }); } // ! Reset partition (not thread-safe) - void resetPartition() { + void resetPartition() + { _part_ids.assign(_part_ids.size(), kInvalidPartition, false); - for (auto& x : _part_weights) x.store(0, std::memory_order_relaxed); + for(auto &x : _part_weights) + x.store(0, std::memory_order_relaxed); // Reset pin count in part and connectivity set _con_info.reset(false); } // ! Only for testing - void recomputePartWeights() { - for (PartitionID p = 0; p < _k; ++p) { + void recomputePartWeights() + { + for(PartitionID p = 0; p < _k; ++p) + { _part_weights[p].store(0); } - for (HypernodeID u : nodes()) { - _part_weights[ partID(u) ] += nodeWeight(u); + for(HypernodeID u : nodes()) + { + _part_weights[partID(u)] += nodeWeight(u); } } // ! Only for testing - bool checkTrackedPartitionInformation() { + bool checkTrackedPartitionInformation() + { bool success = true; - for (HyperedgeID e : edges()) { + for(HyperedgeID e : edges()) + { PartitionID expected_connectivity = 0; - for (PartitionID i = 0; i < k(); ++i) { + for(PartitionID i = 0; i < k(); ++i) + { const HypernodeID actual_pin_count_in_part = pinCountInPart(e, i); - if ( actual_pin_count_in_part != pinCountInPartRecomputed(e, i) ) { - LOG << "Pin count of hyperedge" << e << "in block" << i << "=>" << - "Expected:" << V(pinCountInPartRecomputed(e, i)) << "," << - "Actual:" << V(pinCountInPart(e, i)); + if(actual_pin_count_in_part != pinCountInPartRecomputed(e, i)) + { + LOG << "Pin count of hyperedge" << e << "in block" << i << "=>" + << "Expected:" << V(pinCountInPartRecomputed(e, i)) << "," + << "Actual:" << V(pinCountInPart(e, i)); success = false; } expected_connectivity += (actual_pin_count_in_part > 0); } - if ( expected_connectivity != connectivity(e) ) { - LOG << "Connectivity of hyperedge" << e << "=>" << - "Expected:" << V(expected_connectivity) << "," << - "Actual:" << V(connectivity(e)); + if(expected_connectivity != connectivity(e)) + { + LOG << "Connectivity of hyperedge" << e << "=>" + << "Expected:" << V(expected_connectivity) << "," + << "Actual:" << V(connectivity(e)); success = false; } } @@ -769,58 +777,72 @@ class PartitionedHypergraph { } // ! Only for testing - template - bool checkTrackedPartitionInformation(GainCache& gain_cache) { + template + bool checkTrackedPartitionInformation(GainCache &gain_cache) + { bool success = true; - for (HyperedgeID e : edges()) { + for(HyperedgeID e : edges()) + { PartitionID expected_connectivity = 0; - for (PartitionID i = 0; i < k(); ++i) { + for(PartitionID i = 0; i < k(); ++i) + { const HypernodeID actual_pin_count_in_part = pinCountInPart(e, i); - if ( actual_pin_count_in_part != pinCountInPartRecomputed(e, i) ) { - LOG << "Pin count of hyperedge" << e << "in block" << i << "=>" << - "Expected:" << V(pinCountInPartRecomputed(e, i)) << "," << - "Actual:" << V(pinCountInPart(e, i)); + if(actual_pin_count_in_part != pinCountInPartRecomputed(e, i)) + { + LOG << "Pin count of hyperedge" << e << "in block" << i << "=>" + << "Expected:" << V(pinCountInPartRecomputed(e, i)) << "," + << "Actual:" << V(pinCountInPart(e, i)); success = false; } expected_connectivity += (actual_pin_count_in_part > 0); } - if ( expected_connectivity != connectivity(e) ) { - LOG << "Connectivity of hyperedge" << e << "=>" << - "Expected:" << V(expected_connectivity) << "," << - "Actual:" << V(connectivity(e)); + if(expected_connectivity != connectivity(e)) + { + LOG << "Connectivity of hyperedge" << e << "=>" + << "Expected:" << V(expected_connectivity) << "," + << "Actual:" << V(connectivity(e)); success = false; } } - if ( gain_cache.isInitialized() ) { - for (HypernodeID u : nodes()) { + if(gain_cache.isInitialized()) + { + for(HypernodeID u : nodes()) + { const PartitionID block_of_u = partID(u); - if ( gain_cache.penaltyTerm(u, block_of_u) != - gain_cache.recomputePenaltyTerm(*this, u) ) { - LOG << "Penalty term of hypernode" << u << "=>" << - "Expected:" << V(gain_cache.recomputePenaltyTerm(*this, u)) << ", " << - "Actual:" << V(gain_cache.penaltyTerm(u, block_of_u)); - for ( const HyperedgeID& e : incidentEdges(u) ) { - LOG << V(u) << V(partID(u)) << V(e) << V(edgeSize(e)) - << V(edgeWeight(e)) << V(pinCountInPart(e, partID(u))); + if(gain_cache.penaltyTerm(u, block_of_u) != + gain_cache.recomputePenaltyTerm(*this, u)) + { + LOG << "Penalty term of hypernode" << u << "=>" + << "Expected:" << V(gain_cache.recomputePenaltyTerm(*this, u)) << ", " + << "Actual:" << V(gain_cache.penaltyTerm(u, block_of_u)); + for(const HyperedgeID &e : incidentEdges(u)) + { + LOG << V(u) << V(partID(u)) << V(e) << V(edgeSize(e)) << V(edgeWeight(e)) + << V(pinCountInPart(e, partID(u))); } success = false; } - for (const PartitionID& i : gain_cache.adjacentBlocks(u)) { - if (partID(u) != i) { - if ( gain_cache.benefitTerm(u, i) != - gain_cache.recomputeBenefitTerm(*this, u, i) ) { - LOG << "Benefit term of hypernode" << u << "in block" << i << "=>" << - "Expected:" << V(gain_cache.recomputeBenefitTerm(*this, u, i)) << ", " << - "Actual:" << V(gain_cache.benefitTerm(u, i)); + for(const PartitionID &i : gain_cache.adjacentBlocks(u)) + { + if(partID(u) != i) + { + if(gain_cache.benefitTerm(u, i) != + gain_cache.recomputeBenefitTerm(*this, u, i)) + { + LOG << "Benefit term of hypernode" << u << "in block" << i << "=>" + << "Expected:" << V(gain_cache.recomputeBenefitTerm(*this, u, i)) + << ", " + << "Actual:" << V(gain_cache.benefitTerm(u, i)); success = false; } } } } - if ( !gain_cache.verifyTrackedAdjacentBlocksOfNodes(*this) ) { + if(!gain_cache.verifyTrackedAdjacentBlocksOfNodes(*this)) + { success = false; } } @@ -829,26 +851,25 @@ class PartitionedHypergraph { // ####################### Fixed Vertex Support ####################### - bool hasFixedVertices() const { - return _hg->hasFixedVertices(); - } + bool hasFixedVertices() const { return _hg->hasFixedVertices(); } - bool isFixed(const HypernodeID hn) const { - return _hg->isFixed(hn); - } + bool isFixed(const HypernodeID hn) const { return _hg->isFixed(hn); } - PartitionID fixedVertexBlock(const HypernodeID hn) const { + PartitionID fixedVertexBlock(const HypernodeID hn) const + { return _hg->fixedVertexBlock(hn); } // ####################### Memory Consumption ####################### - void memoryConsumption(utils::MemoryTreeNode* parent) const { + void memoryConsumption(utils::MemoryTreeNode *parent) const + { ASSERT(parent); - utils::MemoryTreeNode* hypergraph_node = parent->addChild("Hypergraph"); + utils::MemoryTreeNode *hypergraph_node = parent->addChild("Hypergraph"); _hg->memoryConsumption(hypergraph_node); - utils::MemoryTreeNode* connectivity_info_node = parent->addChild("Connectivity Information"); + utils::MemoryTreeNode *connectivity_info_node = + parent->addChild("Connectivity Information"); _con_info.memoryConsumption(connectivity_info_node); parent->addChild("Part Weights", sizeof(CAtomic) * _k); @@ -866,83 +887,100 @@ class PartitionedHypergraph { // ! the original hypergraph. // ! If cut_net_splitting is activated, then cut hyperedges are splitted containing // ! only the pins of the corresponding block. Otherwise, they are discarded. - ExtractedBlock extract(const PartitionID block, - const vec* already_cut, + ExtractedBlock extract(const PartitionID block, const vec *already_cut, bool cut_net_splitting, - bool stable_construction_of_incident_edges) { + bool stable_construction_of_incident_edges) + { ASSERT(block != kInvalidPartition && block < _k); ASSERT(!already_cut || already_cut->size() == _hg->initialNumEdges()); // Compactify vertex ids ExtractedBlock extracted_block; - vec& hn_mapping = extracted_block.hn_mapping; + vec &hn_mapping = extracted_block.hn_mapping; hn_mapping.assign(_hg->initialNumNodes(), kInvalidHypernode); vec he_mapping(_hg->initialNumEdges(), kInvalidHyperedge); HypernodeID num_hypernodes = 0; HypernodeID num_hyperedges = 0; - tbb::parallel_invoke([&] { - for ( const HypernodeID& hn : nodes() ) { - if ( partID(hn) == block ) { - hn_mapping[hn] = num_hypernodes++; - } - } - }, [&] { - for ( const HyperedgeID& he : edges() ) { - if ( pinCountInPart(he, block) > 1 && - (cut_net_splitting || connectivity(he) == 1) ) { - he_mapping[he] = num_hyperedges++; - } - } - }); + tbb::parallel_invoke( + [&] { + for(const HypernodeID &hn : nodes()) + { + if(partID(hn) == block) + { + hn_mapping[hn] = num_hypernodes++; + } + } + }, + [&] { + for(const HyperedgeID &he : edges()) + { + if(pinCountInPart(he, block) > 1 && + (cut_net_splitting || connectivity(he) == 1)) + { + he_mapping[he] = num_hyperedges++; + } + } + }); // Extract plain hypergraph data for corresponding block - using HyperedgeVector = vec>; + using HyperedgeVector = vec >; HyperedgeVector edge_vector; vec hyperedge_weight; vec hypernode_weight; vec extracted_already_cut; - tbb::parallel_invoke([&] { - edge_vector.resize(num_hyperedges); - hyperedge_weight.resize(num_hyperedges); - doParallelForAllEdges([&](const HyperedgeID he) { - if ( pinCountInPart(he, block) > 1 && - (cut_net_splitting || connectivity(he) == 1) ) { - ASSERT(he_mapping[he] < num_hyperedges); - hyperedge_weight[he_mapping[he]] = edgeWeight(he); - for ( const HypernodeID& pin : pins(he) ) { - if ( partID(pin) == block ) { - edge_vector[he_mapping[he]].push_back(hn_mapping[pin]); + tbb::parallel_invoke( + [&] { + edge_vector.resize(num_hyperedges); + hyperedge_weight.resize(num_hyperedges); + doParallelForAllEdges([&](const HyperedgeID he) { + if(pinCountInPart(he, block) > 1 && + (cut_net_splitting || connectivity(he) == 1)) + { + ASSERT(he_mapping[he] < num_hyperedges); + hyperedge_weight[he_mapping[he]] = edgeWeight(he); + for(const HypernodeID &pin : pins(he)) + { + if(partID(pin) == block) + { + edge_vector[he_mapping[he]].push_back(hn_mapping[pin]); + } + } } - } - } - }); - }, [&] { - hypernode_weight.resize(num_hypernodes); - doParallelForAllNodes([&](const HypernodeID hn) { - if ( partID(hn) == block ) { - hypernode_weight[hn_mapping[hn]] = nodeWeight(hn); - } - }); - }, [&] { - if ( already_cut ) { - extracted_block.already_cut.resize(num_hyperedges); - const vec& already_cut_hes = *already_cut; - doParallelForAllEdges([&](const HyperedgeID he) { - if ( he_mapping[he] != kInvalidHyperedge ) { - ASSERT(he_mapping[he] < num_hyperedges); - extracted_block.already_cut[he_mapping[he]] = already_cut_hes[he]; + }); + }, + [&] { + hypernode_weight.resize(num_hypernodes); + doParallelForAllNodes([&](const HypernodeID hn) { + if(partID(hn) == block) + { + hypernode_weight[hn_mapping[hn]] = nodeWeight(hn); + } + }); + }, + [&] { + if(already_cut) + { + extracted_block.already_cut.resize(num_hyperedges); + const vec &already_cut_hes = *already_cut; + doParallelForAllEdges([&](const HyperedgeID he) { + if(he_mapping[he] != kInvalidHyperedge) + { + ASSERT(he_mapping[he] < num_hyperedges); + extracted_block.already_cut[he_mapping[he]] = already_cut_hes[he]; + } + }); } }); - } - }); // Construct hypergraph - extracted_block.hg = HypergraphFactory::construct(num_hypernodes, num_hyperedges, - edge_vector, hyperedge_weight.data(), hypernode_weight.data(), stable_construction_of_incident_edges); + extracted_block.hg = HypergraphFactory::construct( + num_hypernodes, num_hyperedges, edge_vector, hyperedge_weight.data(), + hypernode_weight.data(), stable_construction_of_incident_edges); // Set community ids - doParallelForAllNodes([&](const HypernodeID& hn) { - if ( partID(hn) == block ) { + doParallelForAllNodes([&](const HypernodeID &hn) { + if(partID(hn) == block) + { const HypernodeID extracted_hn = hn_mapping[hn]; extracted_block.hg.setCommunityID(extracted_hn, _hg->communityID(hn)); } @@ -952,135 +990,158 @@ class PartitionedHypergraph { // ! Extracts all blocks of the partition (from block 0 to block k). // ! This function has running time linear in the size of the original hypergraph - // ! and should be used instead of extract(...) when more than two blocks should be extracted. - std::pair, vec> extractAllBlocks(const PartitionID k, - const vec* already_cut, - const bool cut_net_splitting, - const bool stable_construction_of_incident_edges) { + // ! and should be used instead of extract(...) when more than two blocks should be + // extracted. + std::pair, vec > + extractAllBlocks(const PartitionID k, const vec *already_cut, + const bool cut_net_splitting, + const bool stable_construction_of_incident_edges) + { ASSERT(k <= _k); vec hn_mapping(_hg->initialNumNodes(), kInvalidHypernode); - vec> nodes_cnt( - k, parallel::AtomicWrapper(0)); - vec> hes2block(k); + vec > nodes_cnt( + k, parallel::AtomicWrapper(0)); + vec > hes2block(k); - if ( stable_construction_of_incident_edges ) { + if(stable_construction_of_incident_edges) + { // Stable construction for deterministic behavior requires // to determine node and edge IDs sequentially - tbb::parallel_invoke([&] { - // Compactify node IDs - for ( const HypernodeID& hn : nodes() ) { - const PartitionID block = partID(hn); - if ( block < k ) { - hn_mapping[hn] = nodes_cnt[block]++; - } - } - }, [&] { - // Get hyperedges contained in each block - for ( const HyperedgeID& he : edges() ) { - for ( const PartitionID& block : connectivitySet(he) ) { - if ( pinCountInPart(he, block) > 1 && - (cut_net_splitting || connectivity(he) == 1) ) { - hes2block[block].push_back(he); + tbb::parallel_invoke( + [&] { + // Compactify node IDs + for(const HypernodeID &hn : nodes()) + { + const PartitionID block = partID(hn); + if(block < k) + { + hn_mapping[hn] = nodes_cnt[block]++; + } } - } - } - }); - } else { - vec> hes2block_stream(k); - tbb::parallel_invoke([&] { - // Compactify node IDs - doParallelForAllNodes([&](const HypernodeID& hn) { - const PartitionID block = partID(hn); - if ( block < k ) { - hn_mapping[hn] = nodes_cnt[block]++; - } - }); - }, [&] { - // Get hyperedges contained in each block - doParallelForAllEdges([&](const HyperedgeID& he) { - for ( const PartitionID& block : connectivitySet(he) ) { - if ( pinCountInPart(he, block) > 1 && - (cut_net_splitting || connectivity(he) == 1) ) { - hes2block_stream[block].stream(he); + }, + [&] { + // Get hyperedges contained in each block + for(const HyperedgeID &he : edges()) + { + for(const PartitionID &block : connectivitySet(he)) + { + if(pinCountInPart(he, block) > 1 && + (cut_net_splitting || connectivity(he) == 1)) + { + hes2block[block].push_back(he); + } + } } - } - }); - // Copy hyperedges of a block into one vector - tbb::parallel_for(static_cast(0), k, [&](const PartitionID p) { - hes2block[p] = hes2block_stream[p].copy_parallel(); - }); - }); + }); + } + else + { + vec > hes2block_stream(k); + tbb::parallel_invoke( + [&] { + // Compactify node IDs + doParallelForAllNodes([&](const HypernodeID &hn) { + const PartitionID block = partID(hn); + if(block < k) + { + hn_mapping[hn] = nodes_cnt[block]++; + } + }); + }, + [&] { + // Get hyperedges contained in each block + doParallelForAllEdges([&](const HyperedgeID &he) { + for(const PartitionID &block : connectivitySet(he)) + { + if(pinCountInPart(he, block) > 1 && + (cut_net_splitting || connectivity(he) == 1)) + { + hes2block_stream[block].stream(he); + } + } + }); + // Copy hyperedges of a block into one vector + tbb::parallel_for(static_cast(0), k, [&](const PartitionID p) { + hes2block[p] = hes2block_stream[p].copy_parallel(); + }); + }); } // Extract plain hypergraph data for corresponding block - using HyperedgeVector = vec>; + using HyperedgeVector = vec >; vec extracted_blocks(k); vec edge_vector(k); - vec> he_weight(k); - vec> hn_weight(k); + vec > he_weight(k); + vec > hn_weight(k); // Allocate auxilliary graph data structures tbb::parallel_for(static_cast(0), k, [&](const PartitionID p) { const HypernodeID num_nodes = nodes_cnt[p]; const HyperedgeID num_edges = hes2block[p].size(); - tbb::parallel_invoke([&] { - edge_vector[p].resize(num_edges); - }, [&] { - he_weight[p].resize(num_edges); - }, [&] { - hn_weight[p].resize(num_nodes); - }, [&] { - if ( already_cut ) { - extracted_blocks[p].already_cut.resize(num_edges); - } - }); + tbb::parallel_invoke([&] { edge_vector[p].resize(num_edges); }, + [&] { he_weight[p].resize(num_edges); }, + [&] { hn_weight[p].resize(num_nodes); }, + [&] { + if(already_cut) + { + extracted_blocks[p].already_cut.resize(num_edges); + } + }); }); // Write blocks to auxilliary graph data structure - tbb::parallel_invoke([&] { - tbb::parallel_for(static_cast(0), k, [&](const PartitionID p) { - tbb::parallel_for(UL(0), hes2block[p].size(), [&, p](const size_t i) { - const HyperedgeID he = hes2block[p][i]; - he_weight[p][i] = edgeWeight(he); - for ( const HypernodeID& pin : pins(he) ) { - if ( partID(pin) == p ) { - edge_vector[p][i].push_back(hn_mapping[pin]); + tbb::parallel_invoke( + [&] { + tbb::parallel_for(static_cast(0), k, [&](const PartitionID p) { + tbb::parallel_for(UL(0), hes2block[p].size(), [&, p](const size_t i) { + const HyperedgeID he = hes2block[p][i]; + he_weight[p][i] = edgeWeight(he); + for(const HypernodeID &pin : pins(he)) + { + if(partID(pin) == p) + { + edge_vector[p][i].push_back(hn_mapping[pin]); + } + } + }); + }); + }, + [&] { + doParallelForAllNodes([&](const HypernodeID &hn) { + const PartitionID block = partID(hn); + const HypernodeID mapped_hn = hn_mapping[hn]; + if(block < k) + { + ASSERT(UL(mapped_hn) < hn_weight[block].size()); + hn_weight[block][mapped_hn] = nodeWeight(hn); } - } - }); - }); - }, [&] { - doParallelForAllNodes([&](const HypernodeID& hn) { - const PartitionID block = partID(hn); - const HypernodeID mapped_hn = hn_mapping[hn]; - if ( block < k ) { - ASSERT(UL(mapped_hn) < hn_weight[block].size()); - hn_weight[block][mapped_hn] = nodeWeight(hn); - } - }); - }, [&] { - if ( already_cut ) { - const vec& already_cut_hes = *already_cut; - tbb::parallel_for(static_cast(0), k, [&](const PartitionID p) { - tbb::parallel_for(UL(0), hes2block[p].size(), [&, p](const size_t i) { - extracted_blocks[p].already_cut[i] = already_cut_hes[hes2block[p][i]]; }); + }, + [&] { + if(already_cut) + { + const vec &already_cut_hes = *already_cut; + tbb::parallel_for(static_cast(0), k, [&](const PartitionID p) { + tbb::parallel_for(UL(0), hes2block[p].size(), [&, p](const size_t i) { + extracted_blocks[p].already_cut[i] = already_cut_hes[hes2block[p][i]]; + }); + }); + } }); - } - }); tbb::parallel_for(static_cast(0), k, [&](const PartitionID p) { const HypernodeID num_nodes = nodes_cnt[p]; const HyperedgeID num_hyperedges = hes2block[p].size(); - extracted_blocks[p].hg = HypergraphFactory::construct(num_nodes, num_hyperedges, - edge_vector[p], he_weight[p].data(), hn_weight[p].data(), - stable_construction_of_incident_edges); + extracted_blocks[p].hg = HypergraphFactory::construct( + num_nodes, num_hyperedges, edge_vector[p], he_weight[p].data(), + hn_weight[p].data(), stable_construction_of_incident_edges); }); // Set community ids - doParallelForAllNodes([&](const HypernodeID& hn) { + doParallelForAllNodes([&](const HypernodeID &hn) { const PartitionID block = partID(hn); - if ( block < k ) { + if(block < k) + { extracted_blocks[block].hg.setCommunityID(hn_mapping[hn], _hg->communityID(hn)); } }); @@ -1091,31 +1152,38 @@ class PartitionedHypergraph { return std::make_pair(std::move(extracted_blocks), std::move(hn_mapping)); } - void freeInternalData() { - if ( _k > 0 ) { - tbb::parallel_invoke( [&] { - parallel::parallel_free(_part_ids, _pin_count_update_ownership); - }, [&] { - _con_info.freeInternalData(); - } ); + void freeInternalData() + { + if(_k > 0) + { + tbb::parallel_invoke( + [&] { parallel::parallel_free(_part_ids, _pin_count_update_ownership); }, + [&] { _con_info.freeInternalData(); }); } _k = 0; } - private: - void applyPartWeightUpdates(vec& part_weight_deltas) { - for (PartitionID p = 0; p < _k; ++p) { +private: + void applyPartWeightUpdates(vec &part_weight_deltas) + { + for(PartitionID p = 0; p < _k; ++p) + { _part_weights[p].fetch_add(part_weight_deltas[p], std::memory_order_relaxed); } } - void initializeBlockWeights() { - auto accumulate = [&](tbb::blocked_range& r) { - vec pws(_k, 0); // this is not enumerable_thread_specific because of the static partitioner - for (HypernodeID u = r.begin(); u < r.end(); ++u) { - if ( nodeIsEnabled(u) ) { - const PartitionID pu = partID( u ); - const HypernodeWeight wu = nodeWeight( u ); + void initializeBlockWeights() + { + auto accumulate = [&](tbb::blocked_range &r) { + vec pws( + _k, + 0); // this is not enumerable_thread_specific because of the static partitioner + for(HypernodeID u = r.begin(); u < r.end(); ++u) + { + if(nodeIsEnabled(u)) + { + const PartitionID pu = partID(u); + const HypernodeWeight wu = nodeWeight(u); pws[pu] += wu; } } @@ -1123,25 +1191,29 @@ class PartitionedHypergraph { }; tbb::parallel_for(tbb::blocked_range(HypernodeID(0), initialNumNodes()), - accumulate, - tbb::static_partitioner() - ); + accumulate, tbb::static_partitioner()); } - void initializePinCountInPart() { - tls_enumerable_thread_specific< vec > ets_pin_count_in_part(_k, 0); + void initializePinCountInPart() + { + tls_enumerable_thread_specific > ets_pin_count_in_part(_k, 0); - auto assign = [&](tbb::blocked_range& r) { - vec& pin_counts = ets_pin_count_in_part.local(); - for (HyperedgeID he = r.begin(); he < r.end(); ++he) { - if ( edgeIsEnabled(he) ) { - for (const HypernodeID& pin : pins(he)) { + auto assign = [&](tbb::blocked_range &r) { + vec &pin_counts = ets_pin_count_in_part.local(); + for(HyperedgeID he = r.begin(); he < r.end(); ++he) + { + if(edgeIsEnabled(he)) + { + for(const HypernodeID &pin : pins(he)) + { ++pin_counts[partID(pin)]; } - for (PartitionID p = 0; p < _k; ++p) { + for(PartitionID p = 0; p < _k; ++p) + { ASSERT(pinCountInPart(he, p) == 0); - if (pin_counts[p] > 0) { + if(pin_counts[p] > 0) + { _con_info.addBlock(he, p); _con_info.setPinCountInPart(he, p, pin_counts[p]); } @@ -1151,13 +1223,17 @@ class PartitionedHypergraph { } }; - tbb::parallel_for(tbb::blocked_range(HyperedgeID(0), initialNumEdges()), assign); + tbb::parallel_for(tbb::blocked_range(HyperedgeID(0), initialNumEdges()), + assign); } - HypernodeID pinCountInPartRecomputed(const HyperedgeID e, PartitionID p) const { + HypernodeID pinCountInPartRecomputed(const HyperedgeID e, PartitionID p) const + { HypernodeID pcip = 0; - for (HypernodeID u : pins(e)) { - if (partID(u) == p) { + for(HypernodeID u : pins(e)) + { + if(partID(u) == p) + { pcip++; } } @@ -1165,12 +1241,12 @@ class PartitionedHypergraph { } // ! Updates pin count in part using a spinlock. - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE void updatePinCountOfHyperedge(const HyperedgeID he, - const PartitionID from, - const PartitionID to, - SynchronizedEdgeUpdate& sync_update, - const DeltaFunction& delta_func, - const NotificationFunc& notify_func) { + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE void + updatePinCountOfHyperedge(const HyperedgeID he, const PartitionID from, + const PartitionID to, SynchronizedEdgeUpdate &sync_update, + const DeltaFunction &delta_func, + const NotificationFunc ¬ify_func) + { ASSERT(he < _pin_count_update_ownership.size()); sync_update.he = he; sync_update.edge_weight = edgeWeight(he); @@ -1179,37 +1255,42 @@ class PartitionedHypergraph { notify_func(sync_update); sync_update.pin_count_in_from_part_after = decrementPinCountOfBlock(he, from); sync_update.pin_count_in_to_part_after = incrementPinCountOfBlock(he, to); - sync_update.connectivity_set_after = hasTargetGraph() ? &deepCopyOfConnectivitySet(he) : nullptr; - sync_update.pin_counts_after = hasTargetGraph() ? &_con_info.pinCountSnapshot(he) : nullptr; + sync_update.connectivity_set_after = + hasTargetGraph() ? &deepCopyOfConnectivitySet(he) : nullptr; + sync_update.pin_counts_after = + hasTargetGraph() ? &_con_info.pinCountSnapshot(he) : nullptr; _pin_count_update_ownership[he].unlock(); delta_func(sync_update); } MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE - HypernodeID decrementPinCountOfBlock(const HyperedgeID e, const PartitionID p) { + HypernodeID decrementPinCountOfBlock(const HyperedgeID e, const PartitionID p) + { ASSERT(e < _hg->initialNumEdges(), "Hyperedge" << e << "does not exist"); ASSERT(edgeIsEnabled(e), "Hyperedge" << e << "is disabled"); ASSERT(p != kInvalidPartition && p < _k); const HypernodeID pin_count_after = _con_info.decrementPinCountInPart(e, p); - if ( pin_count_after == 0 ) { + if(pin_count_after == 0) + { _con_info.removeBlock(e, p); } return pin_count_after; } MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE - HypernodeID incrementPinCountOfBlock(const HyperedgeID e, const PartitionID p) { + HypernodeID incrementPinCountOfBlock(const HyperedgeID e, const PartitionID p) + { ASSERT(e < _hg->initialNumEdges(), "Hyperedge" << e << "does not exist"); ASSERT(edgeIsEnabled(e), "Hyperedge" << e << "is disabled"); ASSERT(p != kInvalidPartition && p < _k); const HypernodeID pin_count_after = _con_info.incrementPinCountInPart(e, p); - if ( pin_count_after == 1 ) { + if(pin_count_after == 1) + { _con_info.addBlock(e, p); } return pin_count_after; } - // ! Number of nodes of the top level hypergraph HypernodeID _input_num_nodes = 0; @@ -1220,16 +1301,16 @@ class PartitionedHypergraph { PartitionID _k = 0; // ! Underlying hypergraph - Hypergraph* _hg = nullptr; + Hypergraph *_hg = nullptr; // ! Target graph on which this hypergraph is mapped - const TargetGraph* _target_graph; + const TargetGraph *_target_graph; // ! Weight and information for all blocks. - vec< CAtomic > _part_weights; + vec > _part_weights; // ! Current block IDs of the vertices - Array< PartitionID > _part_ids; + Array _part_ids; // ! Stores the pin count values and connectivity sets ConnectivityInformation _con_info; diff --git a/mt-kahypar/datastructures/pin_count_in_part.h b/mt-kahypar/datastructures/pin_count_in_part.h index b901391d5..648e2a525 100644 --- a/mt-kahypar/datastructures/pin_count_in_part.h +++ b/mt-kahypar/datastructures/pin_count_in_part.h @@ -25,18 +25,16 @@ * SOFTWARE. ******************************************************************************/ - #pragma once #include #include "tbb/enumerable_thread_specific.h" -#include "mt-kahypar/macros.h" -#include "mt-kahypar/datastructures/hypergraph_common.h" #include "mt-kahypar/datastructures/array.h" +#include "mt-kahypar/datastructures/hypergraph_common.h" #include "mt-kahypar/datastructures/pin_count_snapshot.h" - +#include "mt-kahypar/macros.h" namespace mt_kahypar { namespace ds { @@ -53,55 +51,47 @@ namespace ds { * of a hyperedge must be done exclusively. Different hyperedges can be updated * concurrently. */ -class PinCountInPart { +class PinCountInPart +{ static constexpr bool debug = false; - public: +public: using Value = uint64_t; PinCountInPart() : - _num_hyperedges(0), - _k(0), - _max_value(0), - _bits_per_element(0), - _entries_per_value(0), - _values_per_hyperedge(0), - _extraction_mask(0), - _pin_count_in_part(), - _ets_pin_counts([&] { return initPinCountSnapshot(); }) { } - - PinCountInPart(const HyperedgeID num_hyperedges, - const PartitionID k, - const HypernodeID max_value, - const bool assign_parallel = true) : - _num_hyperedges(0), - _k(0), - _max_value(0), - _bits_per_element(0), - _entries_per_value(0), - _values_per_hyperedge(0), - _extraction_mask(0), - _pin_count_in_part(), - _ets_pin_counts([&] { return initPinCountSnapshot(); }) { + _num_hyperedges(0), _k(0), _max_value(0), _bits_per_element(0), + _entries_per_value(0), _values_per_hyperedge(0), _extraction_mask(0), + _pin_count_in_part(), _ets_pin_counts([&] { return initPinCountSnapshot(); }) + { + } + + PinCountInPart(const HyperedgeID num_hyperedges, const PartitionID k, + const HypernodeID max_value, const bool assign_parallel = true) : + _num_hyperedges(0), + _k(0), _max_value(0), _bits_per_element(0), _entries_per_value(0), + _values_per_hyperedge(0), _extraction_mask(0), _pin_count_in_part(), + _ets_pin_counts([&] { return initPinCountSnapshot(); }) + { initialize(num_hyperedges, k, max_value, assign_parallel); } - PinCountInPart(const PinCountInPart&) = delete; - PinCountInPart & operator= (const PinCountInPart &) = delete; + PinCountInPart(const PinCountInPart &) = delete; + PinCountInPart &operator=(const PinCountInPart &) = delete; - PinCountInPart(PinCountInPart&& other) : - _num_hyperedges(other._num_hyperedges), - _k(other._k), - _max_value(other._max_value), - _bits_per_element(other._bits_per_element), - _entries_per_value(other._entries_per_value), - _values_per_hyperedge(other._values_per_hyperedge), - _extraction_mask(other._extraction_mask), - _pin_count_in_part(std::move(other._pin_count_in_part)), - _ets_pin_counts([&] { return initPinCountSnapshot(); }) { } + PinCountInPart(PinCountInPart &&other) : + _num_hyperedges(other._num_hyperedges), _k(other._k), _max_value(other._max_value), + _bits_per_element(other._bits_per_element), + _entries_per_value(other._entries_per_value), + _values_per_hyperedge(other._values_per_hyperedge), + _extraction_mask(other._extraction_mask), + _pin_count_in_part(std::move(other._pin_count_in_part)), + _ets_pin_counts([&] { return initPinCountSnapshot(); }) + { + } - PinCountInPart & operator= (PinCountInPart&& other) { + PinCountInPart &operator=(PinCountInPart &&other) + { _num_hyperedges = other._num_hyperedges; _k = other._k; _max_value = other._max_value; @@ -110,17 +100,18 @@ class PinCountInPart { _values_per_hyperedge = other._values_per_hyperedge; _extraction_mask = other._extraction_mask; _pin_count_in_part = std::move(other._pin_count_in_part); - _ets_pin_counts = tbb::enumerable_thread_specific([&] { return initPinCountSnapshot(); }); + _ets_pin_counts = tbb::enumerable_thread_specific( + [&] { return initPinCountSnapshot(); }); return *this; } // ! Initializes the data structure - void initialize(const HyperedgeID num_hyperedges, - const PartitionID k, - const HypernodeID max_value, - const bool assign_parallel = true) { + void initialize(const HyperedgeID num_hyperedges, const PartitionID k, + const HypernodeID max_value, const bool assign_parallel = true) + { ASSERT(_num_hyperedges == 0); - if ( num_hyperedges > 0 ) { + if(num_hyperedges > 0) + { _num_hyperedges = num_hyperedges; _k = k; _max_value = max_value; @@ -129,24 +120,27 @@ class PinCountInPart { _values_per_hyperedge = num_values_per_hyperedge(k, max_value); _extraction_mask = std::pow(2UL, _bits_per_element) - UL(1); _pin_count_in_part.resize("Refinement", "pin_count_in_part", - num_hyperedges * _values_per_hyperedge, true, assign_parallel); + num_hyperedges * _values_per_hyperedge, true, + assign_parallel); } } - void reset(const bool assign_parallel = true) { + void reset(const bool assign_parallel = true) + { _pin_count_in_part.assign(_pin_count_in_part.size(), 0, assign_parallel); } // ! Returns a snapshot of the connectivity set of hyperedge he - inline PinCountSnapshot& snapshot(const HyperedgeID he) { - PinCountSnapshot& cpy = _ets_pin_counts.local(); + inline PinCountSnapshot &snapshot(const HyperedgeID he) + { + PinCountSnapshot &cpy = _ets_pin_counts.local(); cpy.snapshot(_pin_count_in_part.data() + he * _values_per_hyperedge); return cpy; } // ! Returns the pin count of the hyperedge in the corresponding block - inline HypernodeID pinCountInPart(const HyperedgeID he, - const PartitionID id) const { + inline HypernodeID pinCountInPart(const HyperedgeID he, const PartitionID id) const + { ASSERT(he < _num_hyperedges); ASSERT(id != kInvalidPartition && id < _k); const size_t value_pos = he * _values_per_hyperedge + id / _entries_per_value; @@ -156,9 +150,9 @@ class PinCountInPart { } // ! Sets the pin count of the hyperedge in the corresponding block to value - inline void setPinCountInPart(const HyperedgeID he, - const PartitionID id, - const HypernodeID value) { + inline void setPinCountInPart(const HyperedgeID he, const PartitionID id, + const HypernodeID value) + { ASSERT(he < _num_hyperedges); ASSERT(id != kInvalidPartition && id < _k); const size_t value_pos = he * _values_per_hyperedge + id / _entries_per_value; @@ -167,14 +161,14 @@ class PinCountInPart { } // ! Increments the pin count of the hyperedge in the corresponding block - inline HypernodeID incrementPinCountInPart(const HyperedgeID he, - const PartitionID id) { + inline HypernodeID incrementPinCountInPart(const HyperedgeID he, const PartitionID id) + { ASSERT(he < _num_hyperedges); ASSERT(id != kInvalidPartition && id < _k); const size_t value_pos = he * _values_per_hyperedge + id / _entries_per_value; const size_t bit_pos = (id % _entries_per_value) * _bits_per_element; const Value mask = _extraction_mask << bit_pos; - Value& current_value = _pin_count_in_part[value_pos]; + Value ¤t_value = _pin_count_in_part[value_pos]; Value pin_count_in_part = (current_value & mask) >> bit_pos; ASSERT(pin_count_in_part + 1 <= _max_value); updateEntry(current_value, bit_pos, pin_count_in_part + 1); @@ -182,14 +176,14 @@ class PinCountInPart { } // ! Decrements the pin count of the hyperedge in the corresponding block - inline HypernodeID decrementPinCountInPart(const HyperedgeID he, - const PartitionID id) { + inline HypernodeID decrementPinCountInPart(const HyperedgeID he, const PartitionID id) + { ASSERT(he < _num_hyperedges); ASSERT(id != kInvalidPartition && id < _k); const size_t value_pos = he * _values_per_hyperedge + id / _entries_per_value; const size_t bit_pos = (id % _entries_per_value) * _bits_per_element; const Value mask = _extraction_mask << bit_pos; - Value& current_value = _pin_count_in_part[value_pos]; + Value ¤t_value = _pin_count_in_part[value_pos]; Value pin_count_in_part = (current_value & mask) >> bit_pos; ASSERT(pin_count_in_part > UL(0)); updateEntry(current_value, bit_pos, pin_count_in_part - 1); @@ -197,55 +191,53 @@ class PinCountInPart { } // ! Returns the size in bytes of this data structure - size_t size_in_bytes() const { - return sizeof(Value) * _pin_count_in_part.size(); - } + size_t size_in_bytes() const { return sizeof(Value) * _pin_count_in_part.size(); } - void freeInternalData() { - parallel::free(_pin_count_in_part); - } + void freeInternalData() { parallel::free(_pin_count_in_part); } - void memoryConsumption(utils::MemoryTreeNode* parent) const { + void memoryConsumption(utils::MemoryTreeNode *parent) const + { ASSERT(parent); parent->addChild("Pin Count Values", sizeof(Value) * _pin_count_in_part.size()); } - static size_t num_elements(const HyperedgeID num_hyperedges, - const PartitionID k, - const HypernodeID max_value) { + static size_t num_elements(const HyperedgeID num_hyperedges, const PartitionID k, + const HypernodeID max_value) + { return num_hyperedges * num_values_per_hyperedge(k, max_value); } - private: - inline void updateEntry(Value& value, - const size_t bit_pos, - const Value new_value) { +private: + inline void updateEntry(Value &value, const size_t bit_pos, const Value new_value) + { ASSERT(new_value <= _max_value); const Value zero_mask = ~(_extraction_mask << bit_pos); const Value value_mask = new_value << bit_pos; value = (value & zero_mask) | value_mask; } - PinCountSnapshot initPinCountSnapshot() const { + PinCountSnapshot initPinCountSnapshot() const + { return PinCountSnapshot(_k, _max_value); } - static size_t num_values_per_hyperedge(const PartitionID k, - const HypernodeID max_value) { + static size_t num_values_per_hyperedge(const PartitionID k, const HypernodeID max_value) + { const size_t entries_per_value = num_entries_per_value(k, max_value); ASSERT(entries_per_value <= static_cast(k)); - return k / entries_per_value + (k % entries_per_value!= 0); + return k / entries_per_value + (k % entries_per_value != 0); } - static size_t num_entries_per_value(const PartitionID k, - const HypernodeID max_value) { + static size_t num_entries_per_value(const PartitionID k, const HypernodeID max_value) + { const size_t bits_per_element = num_bits_per_element(max_value); const size_t bits_per_value = sizeof(Value) * 8UL; ASSERT(bits_per_element <= bits_per_value); return std::min(bits_per_value / bits_per_element, static_cast(k)); } - static size_t num_bits_per_element(const HypernodeID max_value) { + static size_t num_bits_per_element(const HypernodeID max_value) + { return std::ceil(std::log2(static_cast(max_value + 1))); } @@ -258,7 +250,6 @@ class PinCountInPart { Value _extraction_mask; Array _pin_count_in_part; tbb::enumerable_thread_specific _ets_pin_counts; - }; -} // namespace ds -} // namespace mt_kahypar +} // namespace ds +} // namespace mt_kahypar diff --git a/mt-kahypar/datastructures/pin_count_snapshot.h b/mt-kahypar/datastructures/pin_count_snapshot.h index ce16c19f0..176f53b86 100644 --- a/mt-kahypar/datastructures/pin_count_snapshot.h +++ b/mt-kahypar/datastructures/pin_count_snapshot.h @@ -25,51 +25,47 @@ * SOFTWARE. ******************************************************************************/ - #pragma once #include - -#include "mt-kahypar/macros.h" -#include "mt-kahypar/datastructures/hypergraph_common.h" #include "mt-kahypar/datastructures/array.h" - +#include "mt-kahypar/datastructures/hypergraph_common.h" +#include "mt-kahypar/macros.h" namespace mt_kahypar { namespace ds { -class PinCountSnapshot { +class PinCountSnapshot +{ static constexpr bool debug = false; - public: +public: using Value = uint64_t; - PinCountSnapshot(const PartitionID k, - const HypernodeID max_value) : - _k(k), - _max_value(max_value), - _bits_per_element(num_bits_per_element(max_value)), - _entries_per_value(num_entries_per_value(k, max_value)), - _extraction_mask(0), - _pin_counts() { + PinCountSnapshot(const PartitionID k, const HypernodeID max_value) : + _k(k), _max_value(max_value), _bits_per_element(num_bits_per_element(max_value)), + _entries_per_value(num_entries_per_value(k, max_value)), _extraction_mask(0), + _pin_counts() + { _extraction_mask = std::pow(2UL, _bits_per_element) - UL(1); _pin_counts.assign(num_values_per_hyperedge(k, max_value), 0); } - PinCountSnapshot(const PinCountSnapshot&) = delete; - PinCountSnapshot & operator= (const PinCountSnapshot &) = delete; + PinCountSnapshot(const PinCountSnapshot &) = delete; + PinCountSnapshot &operator=(const PinCountSnapshot &) = delete; - PinCountSnapshot(PinCountSnapshot&& other) : - _k(other._k), - _max_value(other._max_value), - _bits_per_element(other._bits_per_element), - _entries_per_value(other._entries_per_value), - _extraction_mask(other._extraction_mask), - _pin_counts(std::move(other._pin_counts)) { } + PinCountSnapshot(PinCountSnapshot &&other) : + _k(other._k), _max_value(other._max_value), + _bits_per_element(other._bits_per_element), + _entries_per_value(other._entries_per_value), + _extraction_mask(other._extraction_mask), _pin_counts(std::move(other._pin_counts)) + { + } - PinCountSnapshot & operator= (PinCountSnapshot&& other) { + PinCountSnapshot &operator=(PinCountSnapshot &&other) + { _k = other._k; _max_value = other._max_value; _bits_per_element = other._bits_per_element; @@ -79,16 +75,16 @@ class PinCountSnapshot { return *this; } - void reset() { - memset(_pin_counts.data(), 0, sizeof(Value) * _pin_counts.size()); - } + void reset() { memset(_pin_counts.data(), 0, sizeof(Value) * _pin_counts.size()); } - void snapshot(const Value* src) { + void snapshot(const Value *src) + { std::memcpy(_pin_counts.data(), src, sizeof(Value) * _pin_counts.size()); } // ! Returns the pin count of the hyperedge in the corresponding block - inline HypernodeID pinCountInPart(const PartitionID id) const { + inline HypernodeID pinCountInPart(const PartitionID id) const + { ASSERT(id != kInvalidPartition && id < _k); const size_t value_pos = id / _entries_per_value; const size_t bit_pos = (id % _entries_per_value) * _bits_per_element; @@ -97,8 +93,8 @@ class PinCountSnapshot { } // ! Sets the pin count of the hyperedge in the corresponding block to value - inline void setPinCountInPart(const PartitionID id, - const HypernodeID value) { + inline void setPinCountInPart(const PartitionID id, const HypernodeID value) + { ASSERT(id != kInvalidPartition && id < _k); const size_t value_pos = id / _entries_per_value; const size_t bit_pos = (id % _entries_per_value) * _bits_per_element; @@ -106,12 +102,13 @@ class PinCountSnapshot { } // ! Increments the pin count of the hyperedge in the corresponding block - inline HypernodeID incrementPinCountInPart(const PartitionID id) { + inline HypernodeID incrementPinCountInPart(const PartitionID id) + { ASSERT(id != kInvalidPartition && id < _k); const size_t value_pos = id / _entries_per_value; const size_t bit_pos = (id % _entries_per_value) * _bits_per_element; const Value mask = _extraction_mask << bit_pos; - Value& current_value = _pin_counts[value_pos]; + Value ¤t_value = _pin_counts[value_pos]; Value pin_count_in_part = (current_value & mask) >> bit_pos; ASSERT(pin_count_in_part + 1 <= _max_value); updateEntry(current_value, bit_pos, pin_count_in_part + 1); @@ -119,44 +116,45 @@ class PinCountSnapshot { } // ! Decrements the pin count of the hyperedge in the corresponding block - inline HypernodeID decrementPinCountInPart(const PartitionID id) { + inline HypernodeID decrementPinCountInPart(const PartitionID id) + { ASSERT(id != kInvalidPartition && id < _k); const size_t value_pos = id / _entries_per_value; const size_t bit_pos = (id % _entries_per_value) * _bits_per_element; const Value mask = _extraction_mask << bit_pos; - Value& current_value = _pin_counts[value_pos]; + Value ¤t_value = _pin_counts[value_pos]; Value pin_count_in_part = (current_value & mask) >> bit_pos; ASSERT(pin_count_in_part > UL(0)); updateEntry(current_value, bit_pos, pin_count_in_part - 1); return pin_count_in_part - 1; } - private: - inline void updateEntry(Value& value, - const size_t bit_pos, - const Value new_value) { +private: + inline void updateEntry(Value &value, const size_t bit_pos, const Value new_value) + { ASSERT(new_value <= _max_value); const Value zero_mask = ~(_extraction_mask << bit_pos); const Value value_mask = new_value << bit_pos; value = (value & zero_mask) | value_mask; } - static size_t num_values_per_hyperedge(const PartitionID k, - const HypernodeID max_value) { + static size_t num_values_per_hyperedge(const PartitionID k, const HypernodeID max_value) + { const size_t entries_per_value = num_entries_per_value(k, max_value); ASSERT(entries_per_value <= static_cast(k)); - return k / entries_per_value + (k % entries_per_value!= 0); + return k / entries_per_value + (k % entries_per_value != 0); } - static size_t num_entries_per_value(const PartitionID k, - const HypernodeID max_value) { + static size_t num_entries_per_value(const PartitionID k, const HypernodeID max_value) + { const size_t bits_per_element = num_bits_per_element(max_value); const size_t bits_per_value = sizeof(Value) * size_t(8); ASSERT(bits_per_element <= bits_per_value); return std::min(bits_per_value / bits_per_element, static_cast(k)); } - static size_t num_bits_per_element(const HypernodeID max_value) { + static size_t num_bits_per_element(const HypernodeID max_value) + { return std::ceil(std::log2(static_cast(max_value + 1))); } @@ -167,5 +165,5 @@ class PinCountSnapshot { Value _extraction_mask; vec _pin_counts; }; -} // namespace ds -} // namespace mt_kahypar +} // namespace ds +} // namespace mt_kahypar diff --git a/mt-kahypar/datastructures/priority_queue.h b/mt-kahypar/datastructures/priority_queue.h index 6d22d179b..280e9bce6 100644 --- a/mt-kahypar/datastructures/priority_queue.h +++ b/mt-kahypar/datastructures/priority_queue.h @@ -26,11 +26,11 @@ ******************************************************************************/ #pragma once -#include -#include -#include #include #include +#include +#include +#include #include @@ -41,27 +41,26 @@ static constexpr PosT invalid_position = std::numeric_limits::max(); namespace ds { -template, uint32_t arity = 4> -class Heap { -static constexpr bool enable_heavy_assert = false; +template , + uint32_t arity = 4> +class Heap +{ + static constexpr bool enable_heavy_assert = false; + public: static_assert(arity > 1); - explicit Heap(PosT* positions, size_t positions_size) : - comp(), - heap(), - positions(positions), - positions_size(positions_size) { } - - IdT top() const { - return heap[0].id; + explicit Heap(PosT *positions, size_t positions_size) : + comp(), heap(), positions(positions), positions_size(positions_size) + { } - KeyT topKey() const { - return heap[0].key; - } + IdT top() const { return heap[0].id; } + + KeyT topKey() const { return heap[0].key; } - void deleteTop() { + void deleteTop() + { assert(!empty()); positions[heap[0].id] = invalid_position; positions[heap.back().id] = 0; @@ -70,16 +69,18 @@ static constexpr bool enable_heavy_assert = false; siftDown(0); } - void insert(const IdT e, const KeyT k) { + void insert(const IdT e, const KeyT k) + { ASSERT(!contains(e)); ASSERT(size() < positions_size); const PosT pos = size(); positions[e] = pos; - heap.push_back({k, e}); + heap.push_back({ k, e }); siftUp(pos); } - void remove(const IdT e) { + void remove(const IdT e) + { assert(!empty() && contains(e)); PosT pos = positions[e]; const KeyT removedKey = heap[pos].key, lastKey = heap.back().key; @@ -87,15 +88,21 @@ static constexpr bool enable_heavy_assert = false; positions[heap.back().id] = pos; positions[e] = invalid_position; heap.pop_back(); - if (comp(removedKey, lastKey)) { + if(comp(removedKey, lastKey)) + { siftUp(pos); - } else { + } + else + { siftDown(pos); } } - // assumes semantics of comp = std::less, i.e. we have a MaxHeap and increaseKey moves the element up the tree. if comp = std::greater, increaseKey will still move the element up the tree - void increaseKey(const IdT e, const KeyT newKey) { + // assumes semantics of comp = std::less, i.e. we have a MaxHeap and increaseKey moves + // the element up the tree. if comp = std::greater, increaseKey will still move the + // element up the tree + void increaseKey(const IdT e, const KeyT newKey) + { assert(contains(e)); const PosT pos = positions[e]; assert(comp(heap[pos].key, newKey)); @@ -103,8 +110,10 @@ static constexpr bool enable_heavy_assert = false; siftUp(pos); } - // assumes semantics of comp = std::less, i.e. we have a MaxHeap and decreaseKey moves the element down the tree - void decreaseKey(const IdT e, const KeyT newKey) { + // assumes semantics of comp = std::less, i.e. we have a MaxHeap and decreaseKey moves + // the element down the tree + void decreaseKey(const IdT e, const KeyT newKey) + { assert(contains(e)); const PosT pos = positions[e]; assert(comp(newKey, heap[pos].key)); @@ -112,118 +121,120 @@ static constexpr bool enable_heavy_assert = false; siftDown(pos); } - void adjustKey(const IdT e, const KeyT newKey) { + void adjustKey(const IdT e, const KeyT newKey) + { assert(contains(e)); const PosT pos = positions[e]; - if (comp(heap[pos].key, newKey)) { + if(comp(heap[pos].key, newKey)) + { increaseKey(e, newKey); - } else if (comp(newKey, heap[pos].key)) { + } + else if(comp(newKey, heap[pos].key)) + { decreaseKey(e, newKey); } } - KeyT getKey(const IdT e) const { + KeyT getKey(const IdT e) const + { assert(contains(e)); return heap[positions[e]].key; } - void insertOrAdjustKey(const IdT e, const KeyT newKey) { - if (contains(e)) { + void insertOrAdjustKey(const IdT e, const KeyT newKey) + { + if(contains(e)) + { adjustKey(e, newKey); - } else { + } + else + { insert(e, newKey); } } - void clear() { - heap.clear(); - } + void clear() { heap.clear(); } - bool contains(const IdT e) const { + bool contains(const IdT e) const + { assert(fits(e)); return positions[e] < heap.size() && heap[positions[e]].id == e; } - PosT size() const { - return static_cast(heap.size()); - } + PosT size() const { return static_cast(heap.size()); } - bool empty() const { - return size() == 0; - } + bool empty() const { return size() == 0; } - KeyT keyAtPos(const PosT pos) const { - return heap[pos].key; - } + KeyT keyAtPos(const PosT pos) const { return heap[pos].key; } - KeyT keyOf(const IdT id) const { - return heap[positions[id]].key; - } + KeyT keyOf(const IdT id) const { return heap[positions[id]].key; } - IdT at(const PosT pos) const { - return heap[pos].id; - } + IdT at(const PosT pos) const { return heap[pos].id; } - void setHandle(PosT* pos, size_t pos_size) { + void setHandle(PosT *pos, size_t pos_size) + { clear(); positions = pos; positions_size = pos_size; } - void print() { - for (PosT i = 0; i < size(); ++i) { - std::cout << "(" << heap[i].id << "," << heap[i].key << ")" << " "; + void print() + { + for(PosT i = 0; i < size(); ++i) + { + std::cout << "(" << heap[i].id << "," << heap[i].key << ")" + << " "; } std::cout << std::endl; } - size_t size_in_bytes() const { - return heap.capacity() * sizeof(HeapElement); - } - + size_t size_in_bytes() const { return heap.capacity() * sizeof(HeapElement); } protected: - - bool isHeap() const { - for (PosT i = 1; i < size(); ++i) { - if (comp(heap[parent(i)].key, heap[i].key)) { - LOG << "heap property violation" << V(i) << V(parent(i)) << V(arity) << V(heap[i].key) << V(heap[parent(i)].key); + bool isHeap() const + { + for(PosT i = 1; i < size(); ++i) + { + if(comp(heap[parent(i)].key, heap[i].key)) + { + LOG << "heap property violation" << V(i) << V(parent(i)) << V(arity) + << V(heap[i].key) << V(heap[parent(i)].key); return false; } } return true; } - bool positionsMatch() const { - for (PosT i = 0; i < size(); ++i) { + bool positionsMatch() const + { + for(PosT i = 0; i < size(); ++i) + { assert(size_t(heap[i].id) < positions_size); - if (positions[heap[i].id] != i) { - LOG << "position mismatch" << V(heap.size()) << V(i) << V(heap[i].id) << V(positions[heap[i].id]) << V(positions_size); + if(positions[heap[i].id] != i) + { + LOG << "position mismatch" << V(heap.size()) << V(i) << V(heap[i].id) + << V(positions[heap[i].id]) << V(positions_size); return false; } } return true; } - bool fits(const IdT id) const { - return static_cast(id) < positions_size; - } + bool fits(const IdT id) const { return static_cast(id) < positions_size; } - PosT parent(const PosT pos) const { - return (pos - 1) / arity; - } + PosT parent(const PosT pos) const { return (pos - 1) / arity; } - PosT firstChild(const PosT pos) const { - return pos*arity + 1; - } + PosT firstChild(const PosT pos) const { return pos * arity + 1; } - void siftUp(PosT pos) { + void siftUp(PosT pos) + { const KeyT k = heap[pos].key; const IdT id = heap[pos].id; PosT parent_pos = parent(pos); - while (pos > 0 && comp(heap[parent_pos].key, k)) { // eliminate pos > 0 check by a sentinel at position zero? - positions[ heap[parent_pos].id ] = pos; + while(pos > 0 && comp(heap[parent_pos].key, k)) + { // eliminate pos > 0 check by a sentinel at position zero? + positions[heap[parent_pos].id] = pos; heap[pos] = heap[parent_pos]; pos = parent_pos; parent_pos = parent(pos); @@ -232,106 +243,124 @@ static constexpr bool enable_heavy_assert = false; heap[pos].id = id; heap[pos].key = k; - //HEAVY_REFINEMENT_ASSERT(isHeap()); - //HEAVY_REFINEMENT_ASSERT(positionsMatch()); + // HEAVY_REFINEMENT_ASSERT(isHeap()); + // HEAVY_REFINEMENT_ASSERT(positionsMatch()); } - void siftDown(PosT pos) { + void siftDown(PosT pos) + { const KeyT k = heap[pos].key; const IdT id = heap[pos].id; const PosT initial_pos = pos; PosT first = firstChild(pos); - while (first < size() && first != pos) { + while(first < size() && first != pos) + { PosT largestChild; - if constexpr (arity > 2) { + if constexpr(arity > 2) + { largestChild = first; KeyT largestChildKey = heap[largestChild].key; // find child with largest key for MaxHeap / smallest key for MinHeap const PosT firstInvalid = std::min(size(), firstChild(pos + 1)); - for (PosT c = first + 1; c < firstInvalid; ++c) { - if ( comp(largestChildKey, heap[c].key) ) { + for(PosT c = first + 1; c < firstInvalid; ++c) + { + if(comp(largestChildKey, heap[c].key)) + { largestChildKey = heap[c].key; largestChild = c; } } - if (comp(largestChildKey, k) || largestChildKey == k) { + if(comp(largestChildKey, k) || largestChildKey == k) + { break; } - - } else { + } + else + { assert(arity == 2); - const PosT second = std::min(first + 1, size() - 1); // TODO this branch is not cool. maybe make the while loop condition secondChild(pos) < size() ? + const PosT second = std::min( + first + 1, size() - 1); // TODO this branch is not cool. maybe make the while + // loop condition secondChild(pos) < size() ? const KeyT k1 = heap[first].key, k2 = heap[second].key; const bool c2IsLarger = comp(k1, k2); const KeyT largestChildKey = c2IsLarger ? k2 : k1; - if (comp(largestChildKey, k) || largestChildKey == k) { + if(comp(largestChildKey, k) || largestChildKey == k) + { break; } largestChild = c2IsLarger ? second : first; } - positions[ heap[largestChild].id ] = pos; + positions[heap[largestChild].id] = pos; heap[pos] = heap[largestChild]; pos = largestChild; first = firstChild(pos); } - if (pos != initial_pos) { + if(pos != initial_pos) + { positions[id] = pos; heap[pos].key = k; heap[pos].id = id; } - //HEAVY_REFINEMENT_ASSERT(isHeap()); - //HEAVY_REFINEMENT_ASSERT(positionsMatch()); + // HEAVY_REFINEMENT_ASSERT(isHeap()); + // HEAVY_REFINEMENT_ASSERT(positionsMatch()); } - struct HeapElement { + struct HeapElement + { KeyT key; IdT id; }; - Comparator comp; // comp(heap[parent(pos)].key, heap[pos].key) returns true if the element at pos should move upward --> comp = std::less for MaxHeaps - // similarly comp(heap[child(pos)].key, heap[pos].key) returns false if the element at pos should move downward + Comparator comp; // comp(heap[parent(pos)].key, heap[pos].key) returns true if the + // element at pos should move upward --> comp = std::less for MaxHeaps + // similarly comp(heap[child(pos)].key, heap[pos].key) returns false if + // the element at pos should move downward vec heap; - PosT* positions; + PosT *positions; size_t positions_size; }; - // used to initialize handles in ExclusiveHandleHeap before handing a ref to Heap -struct HandlesPBase { - explicit HandlesPBase(size_t n) : - handles(n, invalid_position) { } +struct HandlesPBase +{ + explicit HandlesPBase(size_t n) : handles(n, invalid_position) {} vec handles; }; -template -class ExclusiveHandleHeap : protected HandlesPBase, public HeapT { +template +class ExclusiveHandleHeap : protected HandlesPBase, public HeapT +{ public: explicit ExclusiveHandleHeap(size_t nHandles) : - HandlesPBase(nHandles), - HeapT(this->handles.data(), this->handles.size()) { } + HandlesPBase(nHandles), HeapT(this->handles.data(), this->handles.size()) + { + } - //at this point this->handles is already a deep copy of other.handles - ExclusiveHandleHeap(const ExclusiveHandleHeap& other) : - HandlesPBase(other), - HeapT(this->handles.data(), this->handles.size()) { } + // at this point this->handles is already a deep copy of other.handles + ExclusiveHandleHeap(const ExclusiveHandleHeap &other) : + HandlesPBase(other), HeapT(this->handles.data(), this->handles.size()) + { + } - void resize(const size_t new_n) { - if ( this->handles.size() < new_n ) { + void resize(const size_t new_n) + { + if(this->handles.size() < new_n) + { this->handles.assign(new_n, invalid_position); this->setHandle(this->handles.data(), new_n); } } }; -template +template using MaxHeap = Heap, 2>; } diff --git a/mt-kahypar/datastructures/sparse_map.h b/mt-kahypar/datastructures/sparse_map.h index 6131f9e12..b82563275 100644 --- a/mt-kahypar/datastructures/sparse_map.h +++ b/mt-kahypar/datastructures/sparse_map.h @@ -34,18 +34,18 @@ #pragma once #include +#include #include #include #include -#include #include "kahypar-resources/macros.h" #include "kahypar-resources/meta/mandatory.h" #include "mt-kahypar/macros.h" #include "mt-kahypar/parallel/memory_pool.h" -#include "mt-kahypar/parallel/stl/scalable_vector.h" #include "mt-kahypar/parallel/stl/scalable_unique_ptr.h" +#include "mt-kahypar/parallel/stl/scalable_vector.h" namespace mt_kahypar { namespace ds { @@ -56,148 +56,147 @@ namespace ds { * ACM Letters on Programming Languages and Systems (LOPLAS) 2.1-4 (1993): 59-69. */ -template -class SparseMapBase { - protected: - struct MapElement { +class SparseMapBase +{ +protected: + struct MapElement + { Key key; Value value; }; - public: - SparseMapBase(const SparseMapBase&) = delete; - SparseMapBase& operator= (const SparseMapBase&) = delete; +public: + SparseMapBase(const SparseMapBase &) = delete; + SparseMapBase &operator=(const SparseMapBase &) = delete; - SparseMapBase& operator= (SparseMapBase&&) = delete; + SparseMapBase &operator=(SparseMapBase &&) = delete; - size_t size() const { - return _size; - } + size_t size() const { return _size; } - void setMaxSize(const size_t max_size) { + void setMaxSize(const size_t max_size) + { ASSERT(_sparse); - _dense = reinterpret_cast(_sparse + max_size); + _dense = reinterpret_cast(_sparse + max_size); } - bool contains(const Key key) const { - return static_cast(this)->containsImpl(key); + bool contains(const Key key) const + { + return static_cast(this)->containsImpl(key); } - void add(const Key key, const Value value) { - static_cast(this)->addImpl(key, value); + void add(const Key key, const Value value) + { + static_cast(this)->addImpl(key, value); } - const MapElement* begin() const { - return _dense; - } + const MapElement *begin() const { return _dense; } - const MapElement* end() const { - return _dense + _size; - } + const MapElement *end() const { return _dense + _size; } - MapElement* begin() { - return _dense; - } + MapElement *begin() { return _dense; } - MapElement* end() { - return _dense + _size; - } + MapElement *end() { return _dense + _size; } - void clear() { - static_cast(this)->clearImpl(); - } + void clear() { static_cast(this)->clearImpl(); } - Value& operator[] (const Key key) { + Value &operator[](const Key key) + { const size_t index = _sparse[key]; - if (!contains(key)) { - _dense[_size] = MapElement { key, Value() }; + if(!contains(key)) + { + _dense[_size] = MapElement{ key, Value() }; _sparse[key] = _size++; return _dense[_size - 1].value; } return _dense[index].value; } - const Value & get(const Key key) const { + const Value &get(const Key key) const + { ASSERT(contains(key), V(key)); return _dense[_sparse[key]].value; } - Value getOrDefault(const Key key) const { + Value getOrDefault(const Key key) const + { const size_t index = _sparse[key]; - if (!contains(key)) { + if(!contains(key)) + { return Value(); } return _dense[index].value; } - void freeInternalData() { + void freeInternalData() + { _size = 0; _data = nullptr; _sparse = nullptr; _dense = nullptr; } - protected: +protected: explicit SparseMapBase(const size_t max_size) : - _size(0), - _data(nullptr), - _sparse(nullptr), - _dense(nullptr) { + _size(0), _data(nullptr), _sparse(nullptr), _dense(nullptr) + { allocate_data(max_size); } ~SparseMapBase() = default; - SparseMapBase(SparseMapBase&& other) : - _size(other._size), - _data(std::move(other._data)), - _sparse(std::move(other._sparse)), - _dense(std::move(other._dense)) { + SparseMapBase(SparseMapBase &&other) : + _size(other._size), _data(std::move(other._data)), + _sparse(std::move(other._sparse)), _dense(std::move(other._dense)) + { other._size = 0; other._data = nullptr; other._sparse = nullptr; other._dense = nullptr; } - void allocate_data(const size_t max_size) { + void allocate_data(const size_t max_size) + { ASSERT(!_data && !_sparse); - const size_t num_elements = (max_size * sizeof(MapElement) + max_size * sizeof(size_t)) / sizeof(size_t); - char* data = parallel::MemoryPool::instance().request_unused_mem_chunk(num_elements, sizeof(size_t)); - if ( data ) { - _sparse = reinterpret_cast(data); - } else { + const size_t num_elements = + (max_size * sizeof(MapElement) + max_size * sizeof(size_t)) / sizeof(size_t); + char *data = parallel::MemoryPool::instance().request_unused_mem_chunk( + num_elements, sizeof(size_t)); + if(data) + { + _sparse = reinterpret_cast(data); + } + else + { _data = parallel::make_unique(num_elements); - _sparse = reinterpret_cast(_data.get()); + _sparse = reinterpret_cast(_data.get()); } - _dense = reinterpret_cast(_sparse + max_size); + _dense = reinterpret_cast(_sparse + max_size); } size_t _size; parallel::tbb_unique_ptr _data; - size_t* _sparse; - MapElement* _dense; + size_t *_sparse; + MapElement *_dense; }; - -template -class SparseMap final : public SparseMapBase >{ +template +class SparseMap final : public SparseMapBase > +{ using Base = SparseMapBase >; friend Base; - public: - explicit SparseMap(const Key max_size) : - Base(max_size) { } +public: + explicit SparseMap(const Key max_size) : Base(max_size) {} - SparseMap(const SparseMap&) = delete; - SparseMap& operator= (const SparseMap& other) = delete; + SparseMap(const SparseMap &) = delete; + SparseMap &operator=(const SparseMap &other) = delete; - SparseMap(SparseMap&& other) : - Base(std::move(other)) { } + SparseMap(SparseMap &&other) : Base(std::move(other)) {} - SparseMap& operator= (SparseMap&& other) { + SparseMap &operator=(SparseMap &&other) + { _data = std::move(other._data); _sparse = std::move(other._sparse); _size = 0; @@ -211,105 +210,97 @@ class SparseMap final : public SparseMapBase > ~SparseMap() = default; - void remove(const Key key) { + void remove(const Key key) + { const size_t index = _sparse[key]; - if (index < _size && _dense[index].key == key) { + if(index < _size && _dense[index].key == key) + { std::swap(_dense[index], _dense[_size - 1]); _sparse[_dense[index].key] = index; --_size; } } - private: - bool containsImpl(const Key key) const { +private: + bool containsImpl(const Key key) const + { const size_t index = _sparse[key]; return index < _size && _dense[index].key == key; } - - void addImpl(const Key key, const Value value) { + void addImpl(const Key key, const Value value) + { const size_t index = _sparse[key]; - if (index >= _size || _dense[index].key != key) { + if(index >= _size || _dense[index].key != key) + { _dense[_size] = { key, value }; _sparse[key] = _size++; } } - void clearImpl() { - _size = 0; - } + void clearImpl() { _size = 0; } using Base::_data; - using Base::_sparse; using Base::_dense; using Base::_size; + using Base::_sparse; }; /*! * Sparse map implementation that uses a fixed size. * In contrast to the implementation in KaHyPar (see kahypar/datastructure/sparse_map.h), - * which uses as size the cardinality of the key universe, hash collisions have to be handled - * explicitly. Hash collisions are resolved with linear probing. - * Advantage of the implementation is that it uses significantly less space than the - * version in KaHyPar and should be therefore more cache-efficient. - * Note, there is no fallback strategy if all slots of the sparse map are occupied by an - * element. Please make sure that no more than MAP_SIZE elements are inserted into the - * sparse map. Otherwise, the behavior is undefined. + * which uses as size the cardinality of the key universe, hash collisions have to be + * handled explicitly. Hash collisions are resolved with linear probing. Advantage of the + * implementation is that it uses significantly less space than the version in KaHyPar and + * should be therefore more cache-efficient. Note, there is no fallback strategy if all + * slots of the sparse map are occupied by an element. Please make sure that no more than + * MAP_SIZE elements are inserted into the sparse map. Otherwise, the behavior is + * undefined. */ -template -class FixedSizeSparseMap { +template +class FixedSizeSparseMap +{ - struct MapElement { + struct MapElement + { Key key; Value value; }; - struct SparseElement { - MapElement* element; + struct SparseElement + { + MapElement *element; size_t timestamp; }; - public: - +public: static constexpr size_t MAP_SIZE = 32768; // Size of sparse map is approx. 1 MB - static_assert(MAP_SIZE && ((MAP_SIZE & (MAP_SIZE - 1)) == UL(0)), "Size of map is not a power of two!"); + static_assert(MAP_SIZE && ((MAP_SIZE & (MAP_SIZE - 1)) == UL(0)), + "Size of map is not a power of two!"); explicit FixedSizeSparseMap(const Value initial_value) : - _map_size(0), - _initial_value(initial_value), - _data(nullptr), - _size(0), - _timestamp(1), - _sparse(nullptr), - _dense(nullptr) { + _map_size(0), _initial_value(initial_value), _data(nullptr), _size(0), + _timestamp(1), _sparse(nullptr), _dense(nullptr) + { allocate(MAP_SIZE); } - explicit FixedSizeSparseMap(const size_t max_size, - const Value initial_value) : - _map_size(0), - _initial_value(initial_value), - _data(nullptr), - _size(0), - _timestamp(1), - _sparse(nullptr), - _dense(nullptr) { + explicit FixedSizeSparseMap(const size_t max_size, const Value initial_value) : + _map_size(0), _initial_value(initial_value), _data(nullptr), _size(0), + _timestamp(1), _sparse(nullptr), _dense(nullptr) + { allocate(max_size); } - FixedSizeSparseMap(const FixedSizeSparseMap&) = delete; - FixedSizeSparseMap& operator= (const FixedSizeSparseMap& other) = delete; + FixedSizeSparseMap(const FixedSizeSparseMap &) = delete; + FixedSizeSparseMap &operator=(const FixedSizeSparseMap &other) = delete; - FixedSizeSparseMap(FixedSizeSparseMap&& other) : - _map_size(other._map_size), - _initial_value(other._initial_value), - _data(std::move(other._data)), - _size(other._size), - _timestamp(other._timestamp), - _sparse(std::move(other._sparse)), - _dense(std::move(other._dense)) { + FixedSizeSparseMap(FixedSizeSparseMap &&other) : + _map_size(other._map_size), _initial_value(other._initial_value), + _data(std::move(other._data)), _size(other._size), _timestamp(other._timestamp), + _sparse(std::move(other._sparse)), _dense(std::move(other._dense)) + { other._data = nullptr; other._sparse = nullptr; other._dense = nullptr; @@ -317,63 +308,61 @@ class FixedSizeSparseMap { ~FixedSizeSparseMap() = default; - size_t capacity() const { - return _map_size; - } + size_t capacity() const { return _map_size; } - size_t size() const { - return _size; - } + size_t size() const { return _size; } - const MapElement* begin() const { - return _dense; - } + const MapElement *begin() const { return _dense; } - const MapElement* end() const { - return _dense + _size; - } + const MapElement *end() const { return _dense + _size; } - MapElement* begin() { - return _dense; - } + MapElement *begin() { return _dense; } - MapElement* end() { - return _dense + _size; - } + MapElement *end() { return _dense + _size; } - void setMaxSize(const size_t max_size) { - if ( max_size > _map_size ) { + void setMaxSize(const size_t max_size) + { + if(max_size > _map_size) + { freeInternalData(); allocate(max_size); } } - bool contains(const Key key) const { - SparseElement* s = find(key); + bool contains(const Key key) const + { + SparseElement *s = find(key); return containsValidElement(key, s); } - Value& operator[] (const Key key) { - SparseElement* s = find(key); - if ( containsValidElement(key, s) ) { + Value &operator[](const Key key) + { + SparseElement *s = find(key); + if(containsValidElement(key, s)) + { ASSERT(s->element); return s->element->value; - } else { + } + else + { return addElement(key, _initial_value, s)->value; } } - const Value & get(const Key key) const { + const Value &get(const Key key) const + { ASSERT(contains(key)); return find(key)->element->value; } - void clear() { + void clear() + { _size = 0; ++_timestamp; } - void freeInternalData() { + void freeInternalData() + { _size = 0; _timestamp = 0; _data = nullptr; @@ -381,22 +370,25 @@ class FixedSizeSparseMap { _dense = nullptr; } - private: - inline SparseElement* find(const Key key) const { +private: + inline SparseElement *find(const Key key) const + { ASSERT(_size < _map_size); - size_t hash = key & ( _map_size - 1 ); - while ( _sparse[hash].timestamp == _timestamp ) { + size_t hash = key & (_map_size - 1); + while(_sparse[hash].timestamp == _timestamp) + { ASSERT(_sparse[hash].element); - if ( _sparse[hash].element->key == key ) { + if(_sparse[hash].element->key == key) + { return &_sparse[hash]; } - hash = (hash + 1) & ( _map_size - 1 ); + hash = (hash + 1) & (_map_size - 1); } return &_sparse[hash]; } - inline bool containsValidElement(const Key key, - const SparseElement* s) const { + inline bool containsValidElement(const Key key, const SparseElement *s) const + { unused(key); ASSERT(s); const bool is_contained = s->timestamp == _timestamp; @@ -404,30 +396,32 @@ class FixedSizeSparseMap { return is_contained; } - inline MapElement* addElement(const Key key, - const Value value, - SparseElement* s) { + inline MapElement *addElement(const Key key, const Value value, SparseElement *s) + { ASSERT(find(key) == s); - _dense[_size] = MapElement { key, value }; - *s = SparseElement { &_dense[_size++], _timestamp }; + _dense[_size] = MapElement{ key, value }; + *s = SparseElement{ &_dense[_size++], _timestamp }; return s->element; } - void allocate(const size_t size) { - if ( _data == nullptr ) { + void allocate(const size_t size) + { + if(_data == nullptr) + { _map_size = align_to_next_power_of_two(size); - _data = std::make_unique( - _map_size * sizeof(MapElement) + _map_size * sizeof(SparseElement)); + _data = std::make_unique(_map_size * sizeof(MapElement) + + _map_size * sizeof(SparseElement)); _size = 0; _timestamp = 1; - _sparse = reinterpret_cast(_data.get()); - _dense = reinterpret_cast(_data.get() + + sizeof(SparseElement) * _map_size); + _sparse = reinterpret_cast(_data.get()); + _dense = reinterpret_cast(_data.get() + + +sizeof(SparseElement) * _map_size); memset(_data.get(), 0, _map_size * (sizeof(MapElement) + sizeof(SparseElement))); } - } - size_t align_to_next_power_of_two(const size_t size) const { + size_t align_to_next_power_of_two(const size_t size) const + { return std::pow(2.0, std::ceil(std::log2(static_cast(size)))); } @@ -437,178 +431,181 @@ class FixedSizeSparseMap { size_t _size; size_t _timestamp; - SparseElement* _sparse; - MapElement* _dense; + SparseElement *_sparse; + MapElement *_dense; }; - -template -class DynamicMapBase { +class DynamicMapBase +{ - public: - static constexpr size_t INVALID_POS_MASK = ~(std::numeric_limits::max() >> 1); // MSB is set +public: + static constexpr size_t INVALID_POS_MASK = + ~(std::numeric_limits::max() >> 1); // MSB is set static constexpr size_t INITIAL_CAPACITY = 16; - explicit DynamicMapBase() : - _capacity(32), - _size(0), - _timestamp(1), - _data(nullptr) { + explicit DynamicMapBase() : _capacity(32), _size(0), _timestamp(1), _data(nullptr) + { initialize(INITIAL_CAPACITY); } - DynamicMapBase(const DynamicMapBase&) = delete; - DynamicMapBase& operator= (const DynamicMapBase& other) = delete; + DynamicMapBase(const DynamicMapBase &) = delete; + DynamicMapBase &operator=(const DynamicMapBase &other) = delete; - DynamicMapBase(DynamicMapBase&& other) = default; - DynamicMapBase& operator= (DynamicMapBase&& other) = default; + DynamicMapBase(DynamicMapBase &&other) = default; + DynamicMapBase &operator=(DynamicMapBase &&other) = default; ~DynamicMapBase() = default; - size_t capacity() const { - return _capacity; - } + size_t capacity() const { return _capacity; } - size_t size() const { - return _size; - } + size_t size() const { return _size; } - void initialize(const size_t capacity) { + void initialize(const size_t capacity) + { _size = 0; _capacity = align_to_next_power_of_two(capacity); _timestamp = 1; - const size_t alloc_size = static_cast(this)->size_in_bytes(); + const size_t alloc_size = static_cast(this)->size_in_bytes(); _data = std::make_unique(alloc_size); memset(_data.get(), 0, alloc_size); - static_cast(this)->initializeImpl(); + static_cast(this)->initializeImpl(); } - bool contains(const Key key) const { + bool contains(const Key key) const + { const size_t pos = find(key); return pos < INVALID_POS_MASK; } - Value& operator[] (const Key key) { + Value &operator[](const Key key) + { size_t pos = find(key); - if ( pos < INVALID_POS_MASK ) { + if(pos < INVALID_POS_MASK) + { return getValue(pos); - } else { - if (_size + 1 > (_capacity * 2) / 5) { + } + else + { + if(_size + 1 > (_capacity * 2) / 5) + { grow(); pos = find(key); } - return static_cast(this)->addElementImpl(key, Value(), pos & ~INVALID_POS_MASK); + return static_cast(this)->addElementImpl(key, Value(), + pos & ~INVALID_POS_MASK); } } - const Value& get(const Key key) const { + const Value &get(const Key key) const + { ASSERT(contains(key)); return getValue(find(key)); } - const Value* get_if_contained(const Key key) const { + const Value *get_if_contained(const Key key) const + { const size_t pos = find(key); - if ( pos < INVALID_POS_MASK ) { + if(pos < INVALID_POS_MASK) + { return &getValue(pos); - } else { + } + else + { return nullptr; } } - void clear() { + void clear() + { _size = 0; ++_timestamp; } - private: - inline size_t find(const Key key) const { - return static_cast(this)->findImpl(key); +private: + inline size_t find(const Key key) const + { + return static_cast(this)->findImpl(key); } - void grow() { + void grow() + { const size_t old_size = _size; const size_t old_capacity = _capacity; const size_t old_timestamp = _timestamp; const size_t new_capacity = 2UL * _capacity; const std::unique_ptr old_data = std::move(_data); - const uint8_t* old_data_begin = old_data.get(); + const uint8_t *old_data_begin = old_data.get(); initialize(new_capacity); - static_cast(this)->rehashImpl( - old_data_begin, old_size, old_capacity, old_timestamp); + static_cast(this)->rehashImpl(old_data_begin, old_size, old_capacity, + old_timestamp); } - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE Value& getValue(const size_t pos) const { - return static_cast(this)->valueAtPos(pos); + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE Value &getValue(const size_t pos) const + { + return static_cast(this)->valueAtPos(pos); } - constexpr size_t align_to_next_power_of_two(const size_t size) const { + constexpr size_t align_to_next_power_of_two(const size_t size) const + { ASSERT(size > 0); return std::pow(2.0, std::ceil(std::log2(static_cast(size)))); } - protected: +protected: size_t _capacity; size_t _size; size_t _timestamp; std::unique_ptr _data; }; +template +class DynamicSparseMap final + : public DynamicMapBase > +{ - -template -class DynamicSparseMap final : public DynamicMapBase> { - - struct MapElement { + struct MapElement + { Key key; Value value; }; - struct SparseElement { - MapElement* element; + struct SparseElement + { + MapElement *element; size_t timestamp; }; - using Base = DynamicMapBase>; + using Base = DynamicMapBase >; using Base::INVALID_POS_MASK; friend Base; - public: - explicit DynamicSparseMap() : - Base(), - _sparse(nullptr), - _dense(nullptr) { +public: + explicit DynamicSparseMap() : Base(), _sparse(nullptr), _dense(nullptr) + { Base::initialize(_capacity); } - DynamicSparseMap(const DynamicSparseMap&) = delete; - DynamicSparseMap& operator= (const DynamicSparseMap& other) = delete; + DynamicSparseMap(const DynamicSparseMap &) = delete; + DynamicSparseMap &operator=(const DynamicSparseMap &other) = delete; - DynamicSparseMap(DynamicSparseMap&& other) = default; - DynamicSparseMap& operator= (DynamicSparseMap&& other) = default; + DynamicSparseMap(DynamicSparseMap &&other) = default; + DynamicSparseMap &operator=(DynamicSparseMap &&other) = default; ~DynamicSparseMap() = default; - const MapElement* begin() const { - return _dense; - } + const MapElement *begin() const { return _dense; } - const MapElement* end() const { - return _dense + _size; - } + const MapElement *end() const { return _dense + _size; } - MapElement* begin() { - return _dense; - } + MapElement *begin() { return _dense; } - MapElement* end() { - return _dense + _size; - } + MapElement *end() { return _dense + _size; } - void freeInternalData() { + void freeInternalData() + { _size = 0; _timestamp = 0; _data = nullptr; @@ -616,15 +613,19 @@ class DynamicSparseMap final : public DynamicMapBasekey == key ) { + while(_sparse[hash].timestamp == _timestamp) + { + if(_sparse[hash].element->key == key) + { return hash; } hash = (hash + 1) & (_capacity - 1); @@ -632,93 +633,96 @@ class DynamicSparseMap final : public DynamicMapBasevalue; } - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE Value& addElementImpl(const Key key, const Value value, const size_t pos) { + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE Value & + addElementImpl(const Key key, const Value value, const size_t pos) + { ASSERT(pos < _capacity); - _dense[_size] = MapElement { key, value }; - _sparse[pos] = SparseElement { &_dense[_size++], _timestamp }; + _dense[_size] = MapElement{ key, value }; + _sparse[pos] = SparseElement{ &_dense[_size++], _timestamp }; return _sparse[pos].element->value; } - void initializeImpl() { - _sparse = reinterpret_cast(_data.get()); - _dense = reinterpret_cast(_data.get() + sizeof(SparseElement) * _capacity); + void initializeImpl() + { + _sparse = reinterpret_cast(_data.get()); + _dense = + reinterpret_cast(_data.get() + sizeof(SparseElement) * _capacity); } - void rehashImpl(const uint8_t* old_data_begin, - const size_t old_size, - const size_t old_capacity, - const size_t) { - const MapElement* elements = reinterpret_cast( - old_data_begin + sizeof(SparseElement) * old_capacity); - for (size_t i = 0; i < old_size; ++i ) { + void rehashImpl(const uint8_t *old_data_begin, const size_t old_size, + const size_t old_capacity, const size_t) + { + const MapElement *elements = reinterpret_cast( + old_data_begin + sizeof(SparseElement) * old_capacity); + for(size_t i = 0; i < old_size; ++i) + { const size_t pos = findImpl(elements[i].key) & ~INVALID_POS_MASK; addElementImpl(elements[i].key, elements[i].value, pos); } ASSERT(old_size == _size); } - using Base::_capacity; + using Base::_data; using Base::_size; using Base::_timestamp; - using Base::_data; - SparseElement* _sparse; - MapElement* _dense; + SparseElement *_sparse; + MapElement *_dense; }; +template +class DynamicFlatMap final + : public DynamicMapBase > +{ -template -class DynamicFlatMap final : public DynamicMapBase> { - - struct MapElement { + struct MapElement + { Key key; Value value; size_t timestamp; }; - using Base = DynamicMapBase>; + using Base = DynamicMapBase >; using Base::INVALID_POS_MASK; friend Base; - public: - explicit DynamicFlatMap() : - Base(), - _elements(nullptr) { - initializeImpl(); - } +public: + explicit DynamicFlatMap() : Base(), _elements(nullptr) { initializeImpl(); } - DynamicFlatMap(const DynamicFlatMap&) = delete; - DynamicFlatMap& operator= (const DynamicFlatMap& other) = delete; + DynamicFlatMap(const DynamicFlatMap &) = delete; + DynamicFlatMap &operator=(const DynamicFlatMap &other) = delete; - DynamicFlatMap(DynamicFlatMap&& other) = default; - DynamicFlatMap& operator= (DynamicFlatMap&& other) = default; + DynamicFlatMap(DynamicFlatMap &&other) = default; + DynamicFlatMap &operator=(DynamicFlatMap &&other) = default; ~DynamicFlatMap() = default; - void freeInternalData() { + void freeInternalData() + { _size = 0; _timestamp = 0; _data = nullptr; _elements = nullptr; } - size_t size_in_bytes() const { - return _capacity * sizeof(MapElement); - } + size_t size_in_bytes() const { return _capacity * sizeof(MapElement); } - private: - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE size_t findImpl(const Key key) const { +private: + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE size_t findImpl(const Key key) const + { size_t hash = key & (_capacity - 1); - while ( _elements[hash].timestamp == _timestamp ) { - if ( _elements[hash].key == key ) { + while(_elements[hash].timestamp == _timestamp) + { + if(_elements[hash].key == key) + { return hash; } hash = (hash + 1) & (_capacity - 1); @@ -726,31 +730,33 @@ class DynamicFlatMap final : public DynamicMapBase(_data.get()); - } + void initializeImpl() { _elements = reinterpret_cast(_data.get()); } - void rehashImpl(const uint8_t* old_data_begin, - const size_t old_size, - const size_t old_capacity, - const size_t old_timestamp) { + void rehashImpl(const uint8_t *old_data_begin, const size_t old_size, + const size_t old_capacity, const size_t old_timestamp) + { unused(old_size); - const MapElement* elements = reinterpret_cast(old_data_begin); - for (size_t i = 0; i < old_capacity; ++i ) { - if ( elements[i].timestamp == old_timestamp ) { + const MapElement *elements = reinterpret_cast(old_data_begin); + for(size_t i = 0; i < old_capacity; ++i) + { + if(elements[i].timestamp == old_timestamp) + { const size_t pos = findImpl(elements[i].key) & ~INVALID_POS_MASK; addElementImpl(elements[i].key, elements[i].value, pos); } @@ -759,15 +765,16 @@ class DynamicFlatMap final : public DynamicMapBase +struct EmptyStruct +{ +}; +template using DynamicSparseSet = DynamicSparseMap; } // namespace ds diff --git a/mt-kahypar/datastructures/sparse_pin_counts.h b/mt-kahypar/datastructures/sparse_pin_counts.h index 5eb69774d..1bf0d3b96 100644 --- a/mt-kahypar/datastructures/sparse_pin_counts.h +++ b/mt-kahypar/datastructures/sparse_pin_counts.h @@ -24,21 +24,19 @@ * SOFTWARE. ******************************************************************************/ - #pragma once #include -#include #include +#include -#include "mt-kahypar/macros.h" -#include "mt-kahypar/datastructures/hypergraph_common.h" #include "mt-kahypar/datastructures/array.h" +#include "mt-kahypar/datastructures/hypergraph_common.h" #include "mt-kahypar/datastructures/pin_count_snapshot.h" +#include "mt-kahypar/macros.h" #include "mt-kahypar/parallel/atomic_wrapper.h" #include "mt-kahypar/parallel/stl/scalable_vector.h" - namespace mt_kahypar { namespace ds { @@ -47,23 +45,26 @@ namespace ds { * Our original data structure for the pin count values takes O(k*|E|) space which would * be not feasible when k is large. The sparse implementation uses the observation * that the connectivity for most hyperedges is small in real-world hypergraphs. - * The data structure stores for each hyperedge at most c <= k tuples of the form (block, pin_count), - * where pin_count is the number of nodes part of the given block. If the connectivity of a - * hyperedge is larger then c, then the data structure explicitly stores all pin count values - * in an external pin count list. However, this external list is only initialized when the connectivity becomes larger then - * c, which should happen rarely in practice. Thus, the data structures takes O(c * |E|) space when - * overflows are negligible. + * The data structure stores for each hyperedge at most c <= k tuples of the form (block, + * pin_count), where pin_count is the number of nodes part of the given block. If the + * connectivity of a hyperedge is larger then c, then the data structure explicitly stores + * all pin count values in an external pin count list. However, this external list is only + * initialized when the connectivity becomes larger then c, which should happen rarely in + * practice. Thus, the data structures takes O(c * |E|) space when overflows are + * negligible. * - * The data structure supports concurrent read, but only one thread can modify the pin count values of - * a hyperedge. Multiple writes to different hyperedges are supported. + * The data structure supports concurrent read, but only one thread can modify the pin + * count values of a hyperedge. Multiple writes to different hyperedges are supported. */ -class SparsePinCounts { +class SparsePinCounts +{ static constexpr bool debug = false; static constexpr size_t MAX_ENTRIES_PER_HYPEREDGE = 8; // = c - struct PinCountHeader { + struct PinCountHeader + { // Stores the connectivity of a hyperedge PartitionID connectivity; // Flag that indicates whether or not pin counts are stored @@ -72,90 +73,91 @@ class SparsePinCounts { }; // Stores the number of pins contained in a block - struct PinCountEntry { + struct PinCountEntry + { PartitionID block; HypernodeID pin_count; }; - public: +public: using Value = char; - class Iterator { - public: + class Iterator + { + public: using iterator_category = std::forward_iterator_tag; using value_type = PartitionID; - using reference = PartitionID&; - using pointer = PartitionID*; + using reference = PartitionID &; + using pointer = PartitionID *; using difference_type = std::ptrdiff_t; - Iterator(const size_t start, const size_t end, const PartitionID k, const PinCountEntry* data) : - _cur_entry( { kInvalidPartition, 0 } ), - _cur(start), - _end(end), - _k(k), - _pin_count_list(data), - _ext_pin_count_list(nullptr) { + Iterator(const size_t start, const size_t end, const PartitionID k, + const PinCountEntry *data) : + _cur_entry({ kInvalidPartition, 0 }), + _cur(start), _end(end), _k(k), _pin_count_list(data), _ext_pin_count_list(nullptr) + { next_valid_entry(); } Iterator(const size_t start, const size_t end, const PartitionID k, - const tbb::concurrent_vector* data) : - _cur_entry( { kInvalidPartition, 0 } ), - _cur(start), - _end(end), - _k(k), - _pin_count_list(nullptr), - _ext_pin_count_list(data) { + const tbb::concurrent_vector *data) : + _cur_entry({ kInvalidPartition, 0 }), + _cur(start), _end(end), _k(k), _pin_count_list(nullptr), _ext_pin_count_list(data) + { next_valid_entry(); } - PartitionID operator*() const { - return _cur_entry.block; - } + PartitionID operator*() const { return _cur_entry.block; } - Iterator& operator++() { + Iterator &operator++() + { ++_cur; next_valid_entry(); return *this; } - Iterator operator++(int ) { + Iterator operator++(int) + { const Iterator res = *this; ++_cur; next_valid_entry(); return res; } - bool operator==(const Iterator& o) const { - return _cur == o._cur && _end == o._end; - } + bool operator==(const Iterator &o) const { return _cur == o._cur && _end == o._end; } - bool operator!=(const Iterator& o) const { - return !operator==(o); - } + bool operator!=(const Iterator &o) const { return !operator==(o); } - private: - inline void next_valid_entry() { + private: + inline void next_valid_entry() + { // Note that the pin list can change due to concurrent writes. // Therefore, we only return valid pin count entries get_current_entry(); - while ( !is_valid() && _cur < _end ) { + while(!is_valid() && _cur < _end) + { ++_cur; get_current_entry(); } } - inline void get_current_entry() { - if ( _cur < _end ) { - if ( _pin_count_list ) { + inline void get_current_entry() + { + if(_cur < _end) + { + if(_pin_count_list) + { _cur_entry = *(_pin_count_list + _cur); - } else { + } + else + { _cur_entry = (*_ext_pin_count_list)[_cur]; } } } - inline bool is_valid() { + inline bool is_valid() + { return _cur_entry.block >= 0 && _cur_entry.block < _k && _cur_entry.pin_count > 0; } @@ -163,58 +165,48 @@ class SparsePinCounts { size_t _cur; const size_t _end; const PartitionID _k; - const PinCountEntry* _pin_count_list; - const tbb::concurrent_vector* _ext_pin_count_list; + const PinCountEntry *_pin_count_list; + const tbb::concurrent_vector *_ext_pin_count_list; }; SparsePinCounts() : - _num_hyperedges(0), - _k(0), - _max_hyperedge_size(0), - _entries_per_hyperedge(0), - _size_of_pin_counts_per_he(0), - _pin_count_in_part(), - _pin_count_ptr(nullptr), - _ext_pin_count_list(), - _deep_copy_bitset(), - _shallow_copy_bitset(), - _pin_count_snapshot([&] { return initPinCountSnapshot(); }) { } - - SparsePinCounts(const HyperedgeID num_hyperedges, - const PartitionID k, - const HypernodeID max_value, - const bool assign_parallel = true) : - _num_hyperedges(0), - _k(0), - _max_hyperedge_size(0), - _entries_per_hyperedge(0), - _size_of_pin_counts_per_he(0), - _pin_count_in_part(), - _pin_count_ptr(nullptr), - _ext_pin_count_list(), - _deep_copy_bitset(), - _shallow_copy_bitset(), - _pin_count_snapshot([&] { return initPinCountSnapshot(); }) { + _num_hyperedges(0), _k(0), _max_hyperedge_size(0), _entries_per_hyperedge(0), + _size_of_pin_counts_per_he(0), _pin_count_in_part(), _pin_count_ptr(nullptr), + _ext_pin_count_list(), _deep_copy_bitset(), _shallow_copy_bitset(), + _pin_count_snapshot([&] { return initPinCountSnapshot(); }) + { + } + + SparsePinCounts(const HyperedgeID num_hyperedges, const PartitionID k, + const HypernodeID max_value, const bool assign_parallel = true) : + _num_hyperedges(0), + _k(0), _max_hyperedge_size(0), _entries_per_hyperedge(0), + _size_of_pin_counts_per_he(0), _pin_count_in_part(), _pin_count_ptr(nullptr), + _ext_pin_count_list(), _deep_copy_bitset(), _shallow_copy_bitset(), + _pin_count_snapshot([&] { return initPinCountSnapshot(); }) + { initialize(num_hyperedges, k, max_value, assign_parallel); } - SparsePinCounts(const SparsePinCounts&) = delete; - SparsePinCounts & operator= (const SparsePinCounts &) = delete; - - SparsePinCounts(SparsePinCounts&& other) : - _num_hyperedges(other._num_hyperedges), - _k(other._k), - _max_hyperedge_size(other._max_hyperedge_size), - _entries_per_hyperedge(other._entries_per_hyperedge), - _size_of_pin_counts_per_he(other._size_of_pin_counts_per_he), - _pin_count_in_part(std::move(other._pin_count_in_part)), - _pin_count_ptr(std::move(other._pin_count_ptr)), - _ext_pin_count_list(std::move(other._ext_pin_count_list)), - _deep_copy_bitset(std::move(other._deep_copy_bitset)), - _shallow_copy_bitset(std::move(other._shallow_copy_bitset)), - _pin_count_snapshot([&] { return initPinCountSnapshot(); }) { } - - SparsePinCounts & operator= (SparsePinCounts&& other) { + SparsePinCounts(const SparsePinCounts &) = delete; + SparsePinCounts &operator=(const SparsePinCounts &) = delete; + + SparsePinCounts(SparsePinCounts &&other) : + _num_hyperedges(other._num_hyperedges), _k(other._k), + _max_hyperedge_size(other._max_hyperedge_size), + _entries_per_hyperedge(other._entries_per_hyperedge), + _size_of_pin_counts_per_he(other._size_of_pin_counts_per_he), + _pin_count_in_part(std::move(other._pin_count_in_part)), + _pin_count_ptr(std::move(other._pin_count_ptr)), + _ext_pin_count_list(std::move(other._ext_pin_count_list)), + _deep_copy_bitset(std::move(other._deep_copy_bitset)), + _shallow_copy_bitset(std::move(other._shallow_copy_bitset)), + _pin_count_snapshot([&] { return initPinCountSnapshot(); }) + { + } + + SparsePinCounts &operator=(SparsePinCounts &&other) + { _num_hyperedges = other._num_hyperedges; _k = other._k; _max_hyperedge_size = other._max_hyperedge_size; @@ -225,59 +217,66 @@ class SparsePinCounts { _ext_pin_count_list = std::move(other._ext_pin_count_list); _deep_copy_bitset = std::move(other._deep_copy_bitset); _shallow_copy_bitset = std::move(other._shallow_copy_bitset); - _pin_count_snapshot = tbb::enumerable_thread_specific([&] { - return initPinCountSnapshot(); - }); + _pin_count_snapshot = tbb::enumerable_thread_specific( + [&] { return initPinCountSnapshot(); }); return *this; } // ################## Connectivity Set ################## - inline bool contains(const HyperedgeID he, const PartitionID p) const { + inline bool contains(const HyperedgeID he, const PartitionID p) const + { ASSERT(he < _num_hyperedges); ASSERT(p < _k); return find_entry(he, p) != nullptr; } - inline void clear(const HyperedgeID he) { + inline void clear(const HyperedgeID he) + { ASSERT(he < _num_hyperedges); init_pin_count_of_hyperedge(he); } - inline PartitionID connectivity(const HyperedgeID he) const { + inline PartitionID connectivity(const HyperedgeID he) const + { ASSERT(he < _num_hyperedges); - const PinCountHeader* head = header(he); + const PinCountHeader *head = header(he); return head->connectivity; } - IteratorRange connectivitySet(const HyperedgeID he) const { + IteratorRange connectivitySet(const HyperedgeID he) const + { ASSERT(he < _num_hyperedges); - const PinCountHeader* head = header(he); + const PinCountHeader *head = header(he); const size_t con = head->connectivity; - if ( likely(!head->is_external) ) { - return IteratorRange( - Iterator(UL(0), con, _k, entry(he, 0)), - Iterator(con, con, _k, entry(he, 0))); - } else { - return IteratorRange( - Iterator(UL(0), con, _k, &_ext_pin_count_list[he]), - Iterator(con, con, _k, &_ext_pin_count_list[he])); + if(likely(!head->is_external)) + { + return IteratorRange(Iterator(UL(0), con, _k, entry(he, 0)), + Iterator(con, con, _k, entry(he, 0))); + } + else + { + return IteratorRange(Iterator(UL(0), con, _k, &_ext_pin_count_list[he]), + Iterator(con, con, _k, &_ext_pin_count_list[he])); } } - StaticBitset& shallowCopy(const HyperedgeID he) const { + StaticBitset &shallowCopy(const HyperedgeID he) const + { // Shallow copy not possible for sparse pin count data structure - Bitset& deep_copy = deepCopy(he); - StaticBitset& shallow_copy = _shallow_copy_bitset.local(); + Bitset &deep_copy = deepCopy(he); + StaticBitset &shallow_copy = _shallow_copy_bitset.local(); shallow_copy.set(deep_copy.numBlocks(), deep_copy.data()); return shallow_copy; } // Creates a deep copy of the connectivity set of hyperedge he - Bitset& deepCopy(const HyperedgeID he) const { - Bitset& deep_copy = _deep_copy_bitset.local(); + Bitset &deepCopy(const HyperedgeID he) const + { + Bitset &deep_copy = _deep_copy_bitset.local(); deep_copy.resize(_k); - for ( const PartitionID& block : connectivitySet(he) ) { + for(const PartitionID &block : connectivitySet(he)) + { deep_copy.set(block); } return deep_copy; @@ -286,33 +285,36 @@ class SparsePinCounts { // ################## Pin Count In Part ################## // ! Returns the pin count of the hyperedge in the corresponding block - inline HypernodeID pinCountInPart(const HyperedgeID he, - const PartitionID p) const { + inline HypernodeID pinCountInPart(const HyperedgeID he, const PartitionID p) const + { ASSERT(he < _num_hyperedges); ASSERT(p < _k); - const PinCountEntry* val = find_entry(he, p); + const PinCountEntry *val = find_entry(he, p); return val ? val->pin_count : 0; } // ! Sets the pin count of the hyperedge in the corresponding block to value - inline void setPinCountInPart(const HyperedgeID he, - const PartitionID p, - const HypernodeID value) { + inline void setPinCountInPart(const HyperedgeID he, const PartitionID p, + const HypernodeID value) + { ASSERT(he < _num_hyperedges); ASSERT(p < _k); add_pin_count_entry(he, p, value); } // ! Increments the pin count of the hyperedge in the corresponding block - inline HypernodeID incrementPinCountInPart(const HyperedgeID he, - const PartitionID p) { + inline HypernodeID incrementPinCountInPart(const HyperedgeID he, const PartitionID p) + { ASSERT(he < _num_hyperedges); ASSERT(p < _k); - PinCountEntry* val = find_entry(he, p); + PinCountEntry *val = find_entry(he, p); HypernodeID inc_pin_count = 0; - if ( val ) { + if(val) + { inc_pin_count = ++val->pin_count; - } else { + } + else + { inc_pin_count = 1; add_pin_count_entry(he, p, inc_pin_count); } @@ -320,34 +322,41 @@ class SparsePinCounts { } // ! Decrements the pin count of the hyperedge in the corresponding block - inline HypernodeID decrementPinCountInPart(const HyperedgeID he, - const PartitionID p) { + inline HypernodeID decrementPinCountInPart(const HyperedgeID he, const PartitionID p) + { ASSERT(he < _num_hyperedges); ASSERT(p < _k); - PinCountEntry* val = find_entry(he, p); + PinCountEntry *val = find_entry(he, p); ASSERT(val); const HypernodeID dec_pin_count = --val->pin_count; - if ( dec_pin_count == 0 ) { + if(dec_pin_count == 0) + { // Remove pin count entry // Note that only one thread can modify the pin count list of // a hyperedge at the same time. Therefore, this operation is thread-safe. - PinCountHeader* head = header(he); + PinCountHeader *head = header(he); --head->connectivity; - if ( likely(!head->is_external) ) { - PinCountEntry* back = entry(he, head->connectivity); + if(likely(!head->is_external)) + { + PinCountEntry *back = entry(he, head->connectivity); *val = *back; back->block = kInvalidPartition; back->pin_count = 0; - } else { + } + else + { // Note that in case the connectivity becomes smaller than c, // we do not fallback to the smaller pin count list bounded by c. size_t pos = 0; - for ( ; pos < _ext_pin_count_list[he].size(); ++pos ) { - if ( _ext_pin_count_list[he][pos].block == p ) { + for(; pos < _ext_pin_count_list[he].size(); ++pos) + { + if(_ext_pin_count_list[he][pos].block == p) + { break; } } - std::swap(_ext_pin_count_list[he][pos], _ext_pin_count_list[he][head->connectivity]); + std::swap(_ext_pin_count_list[he][pos], + _ext_pin_count_list[he][head->connectivity]); _ext_pin_count_list[he][head->connectivity].block = kInvalidPartition; _ext_pin_count_list[he][head->connectivity].pin_count = 0; } @@ -355,10 +364,12 @@ class SparsePinCounts { return dec_pin_count; } - PinCountSnapshot& snapshot(const HyperedgeID he) { - PinCountSnapshot& cpy = _pin_count_snapshot.local(); + PinCountSnapshot &snapshot(const HyperedgeID he) + { + PinCountSnapshot &cpy = _pin_count_snapshot.local(); cpy.reset(); - for ( const PartitionID block : connectivitySet(he) ) { + for(const PartitionID block : connectivitySet(he)) + { cpy.setPinCountInPart(block, pinCountInPart(he, block)); } return cpy; @@ -367,148 +378,175 @@ class SparsePinCounts { // ################## Miscellaneous ################## // ! Initializes the data structure - void initialize(const HyperedgeID num_hyperedges, - const PartitionID k, - const HypernodeID max_value, - const bool assign_parallel = true) { + void initialize(const HyperedgeID num_hyperedges, const PartitionID k, + const HypernodeID max_value, const bool assign_parallel = true) + { _num_hyperedges = num_hyperedges; _k = k; _max_hyperedge_size = max_value; - _entries_per_hyperedge = std::min( - static_cast(k), MAX_ENTRIES_PER_HYPEREDGE); - _size_of_pin_counts_per_he = sizeof(PinCountHeader) + - sizeof(PinCountEntry) * _entries_per_hyperedge; + _entries_per_hyperedge = std::min(static_cast(k), MAX_ENTRIES_PER_HYPEREDGE); + _size_of_pin_counts_per_he = + sizeof(PinCountHeader) + sizeof(PinCountEntry) * _entries_per_hyperedge; _pin_count_in_part.resize("Refinement", "pin_count_in_part", - _size_of_pin_counts_per_he * num_hyperedges, false, assign_parallel); + _size_of_pin_counts_per_he * num_hyperedges, false, + assign_parallel); _pin_count_ptr = _pin_count_in_part.data(); _ext_pin_count_list.resize(_num_hyperedges); reset(assign_parallel); } - void reset(const bool assign_parallel = true) { - if ( assign_parallel ) { - tbb::parallel_for(ID(0), _num_hyperedges, [&](const HyperedgeID he) { - init_pin_count_of_hyperedge(he); - }); - } else { - for ( HyperedgeID he = 0; he < _num_hyperedges; ++he ) { + void reset(const bool assign_parallel = true) + { + if(assign_parallel) + { + tbb::parallel_for(ID(0), _num_hyperedges, + [&](const HyperedgeID he) { init_pin_count_of_hyperedge(he); }); + } + else + { + for(HyperedgeID he = 0; he < _num_hyperedges; ++he) + { init_pin_count_of_hyperedge(he); } } } // ! Returns the size in bytes of this data structure - size_t size_in_bytes() const { + size_t size_in_bytes() const + { // TODO: size of external list is missing return sizeof(char) * _pin_count_in_part.size(); } - void freeInternalData() { - parallel::free(_pin_count_in_part); - } + void freeInternalData() { parallel::free(_pin_count_in_part); } - void memoryConsumption(utils::MemoryTreeNode* parent) const { + void memoryConsumption(utils::MemoryTreeNode *parent) const + { ASSERT(parent); parent->addChild("Pin Count Values", sizeof(char) * _pin_count_in_part.size()); tbb::enumerable_thread_specific ext_pin_count_entries(0); tbb::parallel_for(ID(0), _num_hyperedges, [&](const HyperedgeID he) { ext_pin_count_entries.local() += _ext_pin_count_list[he].size(); }); - parent->addChild("External Pin Count Values", sizeof(PinCountEntry) * - ext_pin_count_entries.combine(std::plus())); + parent->addChild("External Pin Count Values", + sizeof(PinCountEntry) * + ext_pin_count_entries.combine(std::plus())); } - static size_t num_elements(const HyperedgeID num_hyperedges, - const PartitionID k, - const HypernodeID) { - const size_t entries_per_hyperedge = std::min( - static_cast(k), MAX_ENTRIES_PER_HYPEREDGE); - const size_t size_of_pin_counts_per_he = sizeof(PinCountHeader) + - sizeof(PinCountEntry) * entries_per_hyperedge; + static size_t num_elements(const HyperedgeID num_hyperedges, const PartitionID k, + const HypernodeID) + { + const size_t entries_per_hyperedge = + std::min(static_cast(k), MAX_ENTRIES_PER_HYPEREDGE); + const size_t size_of_pin_counts_per_he = + sizeof(PinCountHeader) + sizeof(PinCountEntry) * entries_per_hyperedge; return size_of_pin_counts_per_he * num_hyperedges; } - private: - inline void init_pin_count_of_hyperedge(const HyperedgeID& he) { - PinCountHeader* head = header(he); +private: + inline void init_pin_count_of_hyperedge(const HyperedgeID &he) + { + PinCountHeader *head = header(he); head->connectivity = 0; head->is_external = false; - for ( size_t i = 0; i < _entries_per_hyperedge; ++i ) { - PinCountEntry* pin_count = entry(he, i); + for(size_t i = 0; i < _entries_per_hyperedge; ++i) + { + PinCountEntry *pin_count = entry(he, i); pin_count->block = kInvalidPartition; pin_count->pin_count = 0; } _ext_pin_count_list[he].clear(); } - inline void add_pin_count_entry(const HyperedgeID he, - const PartitionID p, - const HypernodeID value) { + inline void add_pin_count_entry(const HyperedgeID he, const PartitionID p, + const HypernodeID value) + { // Assumes that the block with the given ID does not exist // and inserts it at the end of the pin count list // Note that only one thread can modify the pin count list of // a hyperedge at the same time. Therefore, this operation is thread-safe. - PinCountHeader* head = header(he); - if ( likely(!head->is_external) ) { + PinCountHeader *head = header(he); + if(likely(!head->is_external)) + { const size_t connectivity = head->connectivity; - if ( connectivity < _entries_per_hyperedge ) { + if(connectivity < _entries_per_hyperedge) + { // Still enough entries to add the pin count entry - PinCountEntry* pin_count = entry(he, connectivity); + PinCountEntry *pin_count = entry(he, connectivity); pin_count->block = p; pin_count->pin_count = value; - } else { + } + else + { // Connecitivity is now larger than c // => copy entries to external pin count list handle_overflow(he); add_pin_count_entry_to_external(he, p, value); } - } else { + } + else + { add_pin_count_entry_to_external(he, p, value); } ++head->connectivity; } - inline void handle_overflow(const HyperedgeID& he) { - PinCountHeader* head = header(he); + inline void handle_overflow(const HyperedgeID &he) + { + PinCountHeader *head = header(he); // Copy entries to external pin count list - for ( size_t i = 0; i < _entries_per_hyperedge; ++i ) { + for(size_t i = 0; i < _entries_per_hyperedge; ++i) + { _ext_pin_count_list[he].push_back(*entry(he, i)); } head->is_external = true; } - inline void add_pin_count_entry_to_external(const HyperedgeID he, - const PartitionID p, - const HypernodeID value) { - PinCountHeader* head = header(he); + inline void add_pin_count_entry_to_external(const HyperedgeID he, const PartitionID p, + const HypernodeID value) + { + PinCountHeader *head = header(he); ASSERT(head->is_external); - if ( static_cast(head->connectivity) < _ext_pin_count_list[he].size() ) { + if(static_cast(head->connectivity) < _ext_pin_count_list[he].size()) + { // Reuse existing entry that was removed due to decrementing the pin count ASSERT(_ext_pin_count_list[he][head->connectivity].block == kInvalidPartition); _ext_pin_count_list[he][head->connectivity].block = p; _ext_pin_count_list[he][head->connectivity].pin_count = value; - } else { - _ext_pin_count_list[he].push_back(PinCountEntry { p, value }); + } + else + { + _ext_pin_count_list[he].push_back(PinCountEntry{ p, value }); } } - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE const PinCountEntry* find_entry(const HyperedgeID he, const PartitionID p) const { - const PinCountHeader* head = header(he); - if ( likely(!head->is_external) ) { - // Due to concurrent writes, the connectivity can become larger than MAX_ENTRIES_PER_HYPEREDGE. + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE const PinCountEntry * + find_entry(const HyperedgeID he, const PartitionID p) const + { + const PinCountHeader *head = header(he); + if(likely(!head->is_external)) + { + // Due to concurrent writes, the connectivity can become larger than + // MAX_ENTRIES_PER_HYPEREDGE. const size_t connectivity = - std::min(static_cast(head->connectivity), MAX_ENTRIES_PER_HYPEREDGE); - for ( size_t i = 0; i < connectivity; ++i ) { - const PinCountEntry* value = entry(he, i); - if ( value->block == p ) { + std::min(static_cast(head->connectivity), MAX_ENTRIES_PER_HYPEREDGE); + for(size_t i = 0; i < connectivity; ++i) + { + const PinCountEntry *value = entry(he, i); + if(value->block == p) + { return value; } } - } else { + } + else + { const size_t num_entries = head->connectivity; - for ( size_t i = 0; i < num_entries; ++i ) { - const PinCountEntry& value = _ext_pin_count_list[he][i]; - if ( value.block == p ) { + for(size_t i = 0; i < num_entries; ++i) + { + const PinCountEntry &value = _ext_pin_count_list[he][i]; + if(value.block == p) + { return &value; } } @@ -516,32 +554,45 @@ class SparsePinCounts { return nullptr; } - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE PinCountEntry* find_entry(const HyperedgeID he, const PartitionID p) { - return const_cast(static_cast(*this).find_entry(he, p)); + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE PinCountEntry *find_entry(const HyperedgeID he, + const PartitionID p) + { + return const_cast( + static_cast(*this).find_entry(he, p)); } - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE const PinCountHeader* header(const HyperedgeID he) const { + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE const PinCountHeader * + header(const HyperedgeID he) const + { ASSERT(he <= _num_hyperedges, "Hyperedge" << he << "does not exist"); - return reinterpret_cast(_pin_count_ptr + he * _size_of_pin_counts_per_he); + return reinterpret_cast(_pin_count_ptr + + he * _size_of_pin_counts_per_he); } - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE PinCountHeader* header(const HyperedgeID he) { - return const_cast(static_cast(*this).header(he)); + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE PinCountHeader *header(const HyperedgeID he) + { + return const_cast( + static_cast(*this).header(he)); } - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE const PinCountEntry* entry(const HyperedgeID he, - const size_t idx) const { + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE const PinCountEntry *entry(const HyperedgeID he, + const size_t idx) const + { ASSERT(he <= _num_hyperedges, "Hyperedge" << he << "does not exist"); - return reinterpret_cast(_pin_count_ptr + - he * _size_of_pin_counts_per_he + sizeof(PinCountHeader) + sizeof(PinCountEntry) * idx); + return reinterpret_cast( + _pin_count_ptr + he * _size_of_pin_counts_per_he + sizeof(PinCountHeader) + + sizeof(PinCountEntry) * idx); } - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE PinCountEntry* entry(const HyperedgeID he, - const size_t idx) { - return const_cast(static_cast(*this).entry(he, idx)); + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE PinCountEntry *entry(const HyperedgeID he, + const size_t idx) + { + return const_cast( + static_cast(*this).entry(he, idx)); } - PinCountSnapshot initPinCountSnapshot() const { + PinCountSnapshot initPinCountSnapshot() const + { return PinCountSnapshot(_k, _max_hyperedge_size); } @@ -562,18 +613,18 @@ class SparsePinCounts { // ! Stores the pin count list bounded by c Array _pin_count_in_part; - char* _pin_count_ptr; + char *_pin_count_ptr; // ! External pin count list that stores the pin count values when // ! the connectivity becomes larger than c. // ! Note that we have to use concurrent_vector since we allow concurrent // ! read while modyfing the entries. - vec> _ext_pin_count_list; + vec > _ext_pin_count_list; // Bitsets to create shallow and deep copies of the connectivity set mutable tbb::enumerable_thread_specific _deep_copy_bitset; mutable tbb::enumerable_thread_specific _shallow_copy_bitset; mutable tbb::enumerable_thread_specific _pin_count_snapshot; }; -} // namespace ds -} // namespace mt_kahypar +} // namespace ds +} // namespace mt_kahypar diff --git a/mt-kahypar/datastructures/static_bitset.h b/mt-kahypar/datastructures/static_bitset.h index 7db3c36d5..61273c3f9 100644 --- a/mt-kahypar/datastructures/static_bitset.h +++ b/mt-kahypar/datastructures/static_bitset.h @@ -29,172 +29,180 @@ #include #include +#include "mt-kahypar/datastructures/bitset.h" +#include "mt-kahypar/datastructures/hypergraph_common.h" #include "mt-kahypar/macros.h" #include "mt-kahypar/utils/bit_ops.h" -#include "mt-kahypar/datastructures/hypergraph_common.h" -#include "mt-kahypar/datastructures/bitset.h" namespace mt_kahypar { namespace ds { -class StaticBitset { +class StaticBitset +{ - public: +public: using Block = uint64_t; static constexpr Block BITS_PER_BLOCK = std::numeric_limits::digits; static_assert(__builtin_popcountll(BITS_PER_BLOCK) == 1); static constexpr Block MOD_MASK = BITS_PER_BLOCK - 1; static constexpr Block DIV_SHIFT = utils::log2(BITS_PER_BLOCK); - private: +private: // ! Iterator enumerates the position of all one bits in the bitset - class OneBitIterator { - public: + class OneBitIterator + { + public: using iterator_category = std::forward_iterator_tag; using value_type = PartitionID; - using reference = PartitionID&; - using pointer = PartitionID*; + using reference = PartitionID &; + using pointer = PartitionID *; using difference_type = std::ptrdiff_t; - OneBitIterator(const size_t num_blocks, - const Block* bitset, + OneBitIterator(const size_t num_blocks, const Block *bitset, const PartitionID start_block) : - _num_blocks(num_blocks), - _bitset(bitset), - _max_block_id(num_blocks * BITS_PER_BLOCK), - _current_block_id(start_block) { - if ( _current_block_id < _max_block_id ) { + _num_blocks(num_blocks), + _bitset(bitset), _max_block_id(num_blocks * BITS_PER_BLOCK), + _current_block_id(start_block) + { + if(_current_block_id < _max_block_id) + { nextBlockID(); } } - PartitionID operator*() const { - return _current_block_id; - } + PartitionID operator*() const { return _current_block_id; } - OneBitIterator& operator++() { + OneBitIterator &operator++() + { nextBlockID(); return *this; } - OneBitIterator operator++(int ) { + OneBitIterator operator++(int) + { const OneBitIterator res = *this; nextBlockID(); return res; } - bool operator==(const OneBitIterator& o) const { + bool operator==(const OneBitIterator &o) const + { return _current_block_id == o._current_block_id; } - bool operator!=(const OneBitIterator& o) const { - return !operator==(o); - } + bool operator!=(const OneBitIterator &o) const { return !operator==(o); } - private: - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE void nextBlockID() { + private: + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE void nextBlockID() + { ++_current_block_id; Block b = loadCurrentBlock(); - while ( b >> ( _current_block_id & MOD_MASK ) == 0 && _current_block_id < _max_block_id ) { + while(b >> (_current_block_id & MOD_MASK) == 0 && _current_block_id < _max_block_id) + { // no more one bits in current block -> load next block _current_block_id += (BITS_PER_BLOCK - (_current_block_id & MOD_MASK)); - b = ( _current_block_id < _max_block_id ) * loadCurrentBlock(); + b = (_current_block_id < _max_block_id) * loadCurrentBlock(); } const bool reached_max_id = _current_block_id == _max_block_id; // Avoid if statement here - _current_block_id = (1 - reached_max_id) * ( _current_block_id + - utils::lowest_set_bit_64( - std::max(b >> ( _current_block_id & MOD_MASK ), static_cast(1)))) + - reached_max_id * _max_block_id; + _current_block_id = + (1 - reached_max_id) * + (_current_block_id + + utils::lowest_set_bit_64(std::max(b >> (_current_block_id & MOD_MASK), + static_cast(1)))) + + reached_max_id * _max_block_id; } - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE Block loadCurrentBlock() { + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE Block loadCurrentBlock() + { ASSERT(static_cast(_current_block_id >> DIV_SHIFT) <= _num_blocks); - return __atomic_load_n(_bitset + ( _current_block_id >> DIV_SHIFT ), __ATOMIC_RELAXED); + return __atomic_load_n(_bitset + (_current_block_id >> DIV_SHIFT), + __ATOMIC_RELAXED); } const size_t _num_blocks; - const Block* _bitset; + const Block *_bitset; const PartitionID _max_block_id; PartitionID _current_block_id; }; - public: +public: using iterator = OneBitIterator; using const_iterator = const OneBitIterator; - StaticBitset() : - _num_blocks(0), - _bitset(nullptr) { } + StaticBitset() : _num_blocks(0), _bitset(nullptr) {} - StaticBitset(const size_t num_blocks, - const Block* bitset) : - _num_blocks(num_blocks), - _bitset(bitset) { } + StaticBitset(const size_t num_blocks, const Block *bitset) : + _num_blocks(num_blocks), _bitset(bitset) + { + } - void set(const size_t size, const Block* block) { + void set(const size_t size, const Block *block) + { _num_blocks = size; _bitset = block; } - iterator begin() const { - return iterator(_num_blocks, _bitset, -1); - } + iterator begin() const { return iterator(_num_blocks, _bitset, -1); } - iterator end() const { + iterator end() const + { return iterator(_num_blocks, _bitset, _num_blocks * BITS_PER_BLOCK); } - const_iterator cbegin() const { - return const_iterator(_num_blocks, _bitset, -1); - } + const_iterator cbegin() const { return const_iterator(_num_blocks, _bitset, -1); } - const_iterator cend() const { + const_iterator cend() const + { return const_iterator(_num_blocks, _bitset, _num_blocks * BITS_PER_BLOCK); } - const Block* data() const { - return _bitset; - } + const Block *data() const { return _bitset; } - bool isSet(const size_t pos) const { + bool isSet(const size_t pos) const + { ASSERT(pos < _num_blocks * BITS_PER_BLOCK); const size_t block_idx = pos >> DIV_SHIFT; const size_t idx = pos & MOD_MASK; - return ( *(_bitset + block_idx) >> idx ) & UL(1); + return (*(_bitset + block_idx) >> idx) & UL(1); } // ! Returns the number of one bits in the bitset - int popcount() const { + int popcount() const + { int cnt = 0; - for ( size_t i = 0; i < _num_blocks; ++i ) { - cnt += utils::popcount_64( - __atomic_load_n(_bitset + i, __ATOMIC_RELAXED)); + for(size_t i = 0; i < _num_blocks; ++i) + { + cnt += utils::popcount_64(__atomic_load_n(_bitset + i, __ATOMIC_RELAXED)); } return cnt; } - Bitset copy() const { + Bitset copy() const + { Bitset res(_num_blocks * BITS_PER_BLOCK); - for ( size_t i = 0; i < _num_blocks; ++i ) { - res._bitset[i] = *( _bitset + i ); + for(size_t i = 0; i < _num_blocks; ++i) + { + res._bitset[i] = *(_bitset + i); } return res; } - Bitset operator^(const StaticBitset& other) const { + Bitset operator^(const StaticBitset &other) const + { ASSERT(_num_blocks == other._num_blocks); Bitset res(_num_blocks * BITS_PER_BLOCK); - for ( size_t i = 0; i < _num_blocks; ++i ) { - res._bitset[i] = *( _bitset + i ) ^ *( other._bitset + i ); + for(size_t i = 0; i < _num_blocks; ++i) + { + res._bitset[i] = *(_bitset + i) ^ *(other._bitset + i); } return res; } - private: +private: size_t _num_blocks; - const Block* _bitset; + const Block *_bitset; }; -} // namespace ds -} // namespace mt_kahypar +} // namespace ds +} // namespace mt_kahypar diff --git a/mt-kahypar/datastructures/static_graph.cpp b/mt-kahypar/datastructures/static_graph.cpp index 4a2b4db87..e9a07fd65 100644 --- a/mt-kahypar/datastructures/static_graph.cpp +++ b/mt-kahypar/datastructures/static_graph.cpp @@ -28,500 +28,576 @@ #include "static_graph.h" +#include "mt-kahypar/datastructures/concurrent_bucket_map.h" #include "mt-kahypar/parallel/chunking.h" #include "mt-kahypar/parallel/parallel_prefix_sum.h" -#include "mt-kahypar/datastructures/concurrent_bucket_map.h" -#include "mt-kahypar/utils/timer.h" #include "mt-kahypar/utils/memory_tree.h" +#include "mt-kahypar/utils/timer.h" #include #include #include - namespace mt_kahypar::ds { - /*! - * Contracts a given community structure. All vertices with the same label - * are collapsed into the same vertex. The resulting single-pin and parallel - * hyperedges are removed from the contracted graph. The function returns - * the contracted hypergraph and a mapping which specifies a mapping from - * community label (given in 'communities') to a vertex in the coarse hypergraph. - * - * \param communities Community structure that should be contracted - */ - StaticGraph StaticGraph::contract(parallel::scalable_vector& communities, bool /*deterministic*/) { - ASSERT(communities.size() == _num_nodes); - - if ( !_tmp_contraction_buffer ) { - allocateTmpContractionBuffer(); +/*! + * Contracts a given community structure. All vertices with the same label + * are collapsed into the same vertex. The resulting single-pin and parallel + * hyperedges are removed from the contracted graph. The function returns + * the contracted hypergraph and a mapping which specifies a mapping from + * community label (given in 'communities') to a vertex in the coarse hypergraph. + * + * \param communities Community structure that should be contracted + */ +StaticGraph StaticGraph::contract(parallel::scalable_vector &communities, + bool /*deterministic*/) +{ + ASSERT(communities.size() == _num_nodes); + + if(!_tmp_contraction_buffer) + { + allocateTmpContractionBuffer(); + } + + // AUXILIARY BUFFERS - Reused during multilevel hierarchy to prevent expensive + // allocations + Array &mapping = _tmp_contraction_buffer->mapping; + Array &tmp_nodes = _tmp_contraction_buffer->tmp_nodes; + Array &node_sizes = _tmp_contraction_buffer->node_sizes; + Array > &tmp_num_incident_edges = + _tmp_contraction_buffer->tmp_num_incident_edges; + Array > &node_weights = + _tmp_contraction_buffer->node_weights; + Array &tmp_edges = _tmp_contraction_buffer->tmp_edges; + Array &edge_id_mapping = _tmp_contraction_buffer->edge_id_mapping; + + ASSERT(static_cast(_num_nodes) <= mapping.size()); + ASSERT(static_cast(_num_nodes) <= tmp_nodes.size()); + ASSERT(static_cast(_num_nodes) <= node_sizes.size()); + ASSERT(static_cast(_num_nodes) <= tmp_num_incident_edges.size()); + ASSERT(static_cast(_num_nodes) <= node_weights.size()); + ASSERT(static_cast(_num_edges) <= tmp_edges.size()); + ASSERT(static_cast(_num_edges / 2) <= edge_id_mapping.size()); + + // #################### STAGE 1 #################### + // Compute vertex ids of coarse graph with a parallel prefix sum + mapping.assign(_num_nodes, 0); + + doParallelForAllNodes([&](const HypernodeID &node) { + ASSERT(static_cast(communities[node]) < mapping.size()); + mapping[communities[node]] = UL(1); + }); + + // Prefix sum determines vertex ids in coarse graph + parallel::TBBPrefixSum mapping_prefix_sum(mapping); + tbb::parallel_scan(tbb::blocked_range(ID(0), _num_nodes), mapping_prefix_sum); + HypernodeID coarsened_num_nodes = mapping_prefix_sum.total_sum(); + + // Remap community ids + tbb::parallel_for(ID(0), _num_nodes, [&](const HypernodeID &node) { + if(nodeIsEnabled(node)) + { + communities[node] = mapping_prefix_sum[communities[node]]; + } + else + { + communities[node] = kInvalidHypernode; } - // AUXILIARY BUFFERS - Reused during multilevel hierarchy to prevent expensive allocations - Array& mapping = _tmp_contraction_buffer->mapping; - Array& tmp_nodes = _tmp_contraction_buffer->tmp_nodes; - Array& node_sizes = _tmp_contraction_buffer->node_sizes; - Array>& tmp_num_incident_edges = - _tmp_contraction_buffer->tmp_num_incident_edges; - Array>& node_weights = - _tmp_contraction_buffer->node_weights; - Array& tmp_edges = _tmp_contraction_buffer->tmp_edges; - Array& edge_id_mapping = _tmp_contraction_buffer->edge_id_mapping; - - ASSERT(static_cast(_num_nodes) <= mapping.size()); - ASSERT(static_cast(_num_nodes) <= tmp_nodes.size()); - ASSERT(static_cast(_num_nodes) <= node_sizes.size()); - ASSERT(static_cast(_num_nodes) <= tmp_num_incident_edges.size()); - ASSERT(static_cast(_num_nodes) <= node_weights.size()); - ASSERT(static_cast(_num_edges) <= tmp_edges.size()); - ASSERT(static_cast(_num_edges / 2) <= edge_id_mapping.size()); - - - // #################### STAGE 1 #################### - // Compute vertex ids of coarse graph with a parallel prefix sum - mapping.assign(_num_nodes, 0); - - doParallelForAllNodes([&](const HypernodeID& node) { - ASSERT(static_cast(communities[node]) < mapping.size()); - mapping[communities[node]] = UL(1); - }); + // Reset tmp contraction buffer + if(node < coarsened_num_nodes) + { + node_weights[node] = 0; + tmp_nodes[node] = Node(true); + node_sizes[node] = 0; + tmp_num_incident_edges[node] = 0; + } + }); + + // Mapping from a vertex id of the current hypergraph to its + // id in the coarse hypergraph + auto map_to_coarse_graph = [&](const HypernodeID node) { + ASSERT(node < communities.size()); + return communities[node]; + }; + + doParallelForAllNodes([&](const HypernodeID &node) { + const HypernodeID coarse_node = map_to_coarse_graph(node); + ASSERT(coarse_node < coarsened_num_nodes, V(coarse_node) << V(coarsened_num_nodes)); + // Weight vector is atomic => thread-safe + node_weights[coarse_node] += nodeWeight(node); + // Aggregate upper bound for number of incident nets of the contracted vertex + tmp_num_incident_edges[coarse_node] += nodeDegree(node); + }); + + // #################### STAGE 2 #################### + // In this step the incident edges of vertices are processed and stored inside the + // temporary buffer. The vertex ids of the targets are remapped and edges that are + // contained inside one community after contraction are marked as invalid. Note that + // parallel edges are not invalidated yet. + + // Compute start position the incident nets of a coarse vertex in the + // temporary incident nets array with a parallel prefix sum + parallel::scalable_vector > + tmp_incident_edges_pos; + parallel::TBBPrefixSum, Array> + tmp_incident_edges_prefix_sum(tmp_num_incident_edges); + tbb::parallel_invoke( + [&] { + tbb::parallel_scan( + tbb::blocked_range(ID(0), static_cast(coarsened_num_nodes)), + tmp_incident_edges_prefix_sum); + }, + [&] { + tmp_incident_edges_pos.assign(coarsened_num_nodes, + parallel::IntegralAtomicWrapper(0)); + }); - // Prefix sum determines vertex ids in coarse graph - parallel::TBBPrefixSum mapping_prefix_sum(mapping); - tbb::parallel_scan(tbb::blocked_range(ID(0), _num_nodes), mapping_prefix_sum); - HypernodeID coarsened_num_nodes = mapping_prefix_sum.total_sum(); - - // Remap community ids - tbb::parallel_for(ID(0), _num_nodes, [&](const HypernodeID& node) { - if ( nodeIsEnabled(node) ) { - communities[node] = mapping_prefix_sum[communities[node]]; - } else { - communities[node] = kInvalidHypernode; + // Write the incident edges of each contracted vertex to the temporary edge array + doParallelForAllNodes([&](const HypernodeID &node) { + const HypernodeID coarse_node = map_to_coarse_graph(node); + const HyperedgeID node_degree = nodeDegree(node); + const size_t coarse_edges_pos = + tmp_incident_edges_prefix_sum[coarse_node] + + tmp_incident_edges_pos[coarse_node].fetch_add(node_degree); + const size_t edges_pos = _nodes[node].firstEntry(); + ASSERT(coarse_edges_pos + node_degree <= + tmp_incident_edges_prefix_sum[coarse_node + 1]); + ASSERT(edges_pos + node_degree <= _edges.size()); + for(size_t i = 0; i < static_cast(node_degree); ++i) + { + const Edge &edge = _edges[edges_pos + i]; + const HyperedgeID unique_id = _unique_edge_ids[edges_pos + i]; + const HypernodeID target = map_to_coarse_graph(edge.target()); + const bool is_valid = target != coarse_node; + if(is_valid) + { + tmp_edges[coarse_edges_pos + i] = + TmpEdgeInformation(target, edge.weight(), unique_id); } - - // Reset tmp contraction buffer - if ( node < coarsened_num_nodes ) { - node_weights[node] = 0; - tmp_nodes[node] = Node(true); - node_sizes[node] = 0; - tmp_num_incident_edges[node] = 0; + else + { + tmp_edges[coarse_edges_pos + i] = TmpEdgeInformation(); } - }); + } + }); + + // #################### STAGE 3 #################### + // In this step, we deduplicate parallel edges. To this end, the incident edges + // of each vertex are sorted and aggregated. However, there is a special treatment + // for vertices with extremely high degree, as they might become a bottleneck + // otherwise. Afterwards, for all parallel edges all but one are invalidated and + // the weight of the remaining edge is set to the sum of the weights. + + // A list of high degree vertices that are processed afterwards + parallel::scalable_vector high_degree_vertices; + std::mutex high_degree_vertex_mutex; + tbb::parallel_for(ID(0), coarsened_num_nodes, [&](const HypernodeID &coarse_node) { + const size_t incident_edges_start = tmp_incident_edges_prefix_sum[coarse_node]; + const size_t incident_edges_end = tmp_incident_edges_prefix_sum[coarse_node + 1]; + const size_t tmp_degree = incident_edges_end - incident_edges_start; + if(tmp_degree <= HIGH_DEGREE_CONTRACTION_THRESHOLD) + { + // if the degree is small enough, we directly deduplicate the edges + node_sizes[coarse_node] = deduplicateTmpEdges( + tmp_edges.data() + incident_edges_start, tmp_edges.data() + incident_edges_end); + } + else + { + std::lock_guard lock(high_degree_vertex_mutex); + high_degree_vertices.push_back(coarse_node); + } + tmp_nodes[coarse_node].setWeight(node_weights[coarse_node]); + tmp_nodes[coarse_node].setFirstEntry(incident_edges_start); + }); + + if(!high_degree_vertices.empty()) + { + // High degree vertices are treated special, because sorting and afterwards + // removing duplicates can become a major sequential bottleneck + ConcurrentBucketMap incident_edges_map; + size_t max_degree = 0; + for(const HypernodeID &coarse_node : high_degree_vertices) + { + const size_t incident_edges_start = tmp_incident_edges_prefix_sum[coarse_node]; + const size_t incident_edges_end = tmp_incident_edges_prefix_sum[coarse_node + 1]; + max_degree = std::max(max_degree, incident_edges_end - incident_edges_start); + } + incident_edges_map.reserve_for_estimated_number_of_insertions(max_degree); - // Mapping from a vertex id of the current hypergraph to its - // id in the coarse hypergraph - auto map_to_coarse_graph = [&](const HypernodeID node) { - ASSERT(node < communities.size()); - return communities[node]; - }; - - - doParallelForAllNodes([&](const HypernodeID& node) { - const HypernodeID coarse_node = map_to_coarse_graph(node); - ASSERT(coarse_node < coarsened_num_nodes, V(coarse_node) << V(coarsened_num_nodes)); - // Weight vector is atomic => thread-safe - node_weights[coarse_node] += nodeWeight(node); - // Aggregate upper bound for number of incident nets of the contracted vertex - tmp_num_incident_edges[coarse_node] += nodeDegree(node); - }); + parallel::scalable_vector > resulting_ranges; + for(const HypernodeID &coarse_node : high_degree_vertices) + { + const size_t incident_edges_start = tmp_incident_edges_prefix_sum[coarse_node]; + const size_t incident_edges_end = tmp_incident_edges_prefix_sum[coarse_node + 1]; + const size_t size_of_range = incident_edges_end - incident_edges_start; + + // Insert incident edges into concurrent bucket map + const size_t num_chunks = tbb::this_task_arena::max_concurrency(); + const size_t chunk_size = parallel::chunking::idiv_ceil(size_of_range, num_chunks); + tbb::parallel_for( + UL(0), num_chunks, + [&](const size_t chunk_id) { + auto [start_offset, end_offset] = + parallel::chunking::bounds(chunk_id, size_of_range, chunk_size); + const size_t start = incident_edges_start + start_offset; + const size_t end = incident_edges_start + end_offset; + // First, we apply a deduplication step to the thread-local range. The reason + // is that on large irregular graphs, extremely large clusters (thousands of + // nodes) can be created during coarsening. In this case, there can be + // thousands of edges with the same target, which creates a massive imbalance + // and thus high contention in the bucket map if we insert them directly. The + // local deduplication avoids this problem by ensuring that each edge appears + // at most t times. + const HyperedgeID local_degree = + deduplicateTmpEdges(tmp_edges.data() + start, tmp_edges.data() + end); + for(size_t pos = start; pos < start + local_degree; ++pos) + { + const TmpEdgeInformation &edge = tmp_edges[pos]; + ASSERT(edge.isValid()); + incident_edges_map.insert(edge.getTarget(), TmpEdgeInformation(edge)); + } + }, + tbb::static_partitioner()); + + // Process each bucket in parallel and deduplicate the edges + std::atomic incident_edges_pos(incident_edges_start); + tbb::parallel_for(UL(0), incident_edges_map.numBuckets(), [&](const size_t bucket) { + auto &incident_edges_bucket = incident_edges_map.getBucket(bucket); + const HyperedgeID bucket_degree = deduplicateTmpEdges( + incident_edges_bucket.data(), + incident_edges_bucket.data() + incident_edges_bucket.size()); + const size_t tmp_incident_edges_pos = incident_edges_pos.fetch_add(bucket_degree); + memcpy(tmp_edges.data() + tmp_incident_edges_pos, incident_edges_bucket.data(), + sizeof(TmpEdgeInformation) * bucket_degree); + incident_edges_map.clear(bucket); + }); - // #################### STAGE 2 #################### - // In this step the incident edges of vertices are processed and stored inside the temporary - // buffer. The vertex ids of the targets are remapped and edges that are contained inside - // one community after contraction are marked as invalid. Note that parallel edges are not - // invalidated yet. - - // Compute start position the incident nets of a coarse vertex in the - // temporary incident nets array with a parallel prefix sum - parallel::scalable_vector> tmp_incident_edges_pos; - parallel::TBBPrefixSum, Array> - tmp_incident_edges_prefix_sum(tmp_num_incident_edges); - tbb::parallel_invoke([&] { - tbb::parallel_scan(tbb::blocked_range( - ID(0), static_cast(coarsened_num_nodes)), tmp_incident_edges_prefix_sum); - }, [&] { - tmp_incident_edges_pos.assign(coarsened_num_nodes, parallel::IntegralAtomicWrapper(0)); - }); + const size_t contracted_size = incident_edges_pos.load() - incident_edges_start; + node_sizes[coarse_node] = contracted_size; + resulting_ranges.emplace_back(incident_edges_start, incident_edges_pos.load()); + } - // Write the incident edges of each contracted vertex to the temporary edge array - doParallelForAllNodes([&](const HypernodeID& node) { - const HypernodeID coarse_node = map_to_coarse_graph(node); - const HyperedgeID node_degree = nodeDegree(node); - const size_t coarse_edges_pos = tmp_incident_edges_prefix_sum[coarse_node] + - tmp_incident_edges_pos[coarse_node].fetch_add(node_degree); - const size_t edges_pos = _nodes[node].firstEntry(); - ASSERT(coarse_edges_pos + node_degree <= tmp_incident_edges_prefix_sum[coarse_node + 1]); - ASSERT(edges_pos + node_degree <= _edges.size()); - for (size_t i = 0; i < static_cast(node_degree); ++i) { - const Edge& edge = _edges[edges_pos + i]; - const HyperedgeID unique_id = _unique_edge_ids[edges_pos + i]; - const HypernodeID target = map_to_coarse_graph(edge.target()); - const bool is_valid = target != coarse_node; - if (is_valid) { - tmp_edges[coarse_edges_pos + i] = TmpEdgeInformation(target, edge.weight(), unique_id); - } else { - tmp_edges[coarse_edges_pos + i] = TmpEdgeInformation(); - } + // We still sort the adjacent edges since this results in better cache locality when + // accessing the neighbors. Also, sorting is necessary for deterministic partitioning. + tbb::parallel_for(UL(0), resulting_ranges.size(), [&](const size_t i) { + auto [start, end] = resulting_ranges[i]; + auto comparator = [](const TmpEdgeInformation &e1, const TmpEdgeInformation &e2) { + return e1._target < e2._target; + }; + if(end - start > HIGH_DEGREE_CONTRACTION_THRESHOLD || + resulting_ranges.size() < + 2 * static_cast(tbb::this_task_arena::max_concurrency())) + { + tbb::parallel_sort(tmp_edges.begin() + start, tmp_edges.begin() + end, + comparator); } - }); - - - // #################### STAGE 3 #################### - // In this step, we deduplicate parallel edges. To this end, the incident edges - // of each vertex are sorted and aggregated. However, there is a special treatment - // for vertices with extremely high degree, as they might become a bottleneck - // otherwise. Afterwards, for all parallel edges all but one are invalidated and - // the weight of the remaining edge is set to the sum of the weights. - - // A list of high degree vertices that are processed afterwards - parallel::scalable_vector high_degree_vertices; - std::mutex high_degree_vertex_mutex; - tbb::parallel_for(ID(0), coarsened_num_nodes, [&](const HypernodeID& coarse_node) { - const size_t incident_edges_start = tmp_incident_edges_prefix_sum[coarse_node]; - const size_t incident_edges_end = tmp_incident_edges_prefix_sum[coarse_node + 1]; - const size_t tmp_degree = incident_edges_end - incident_edges_start; - if (tmp_degree <= HIGH_DEGREE_CONTRACTION_THRESHOLD) { - // if the degree is small enough, we directly deduplicate the edges - node_sizes[coarse_node] = deduplicateTmpEdges(tmp_edges.data() + incident_edges_start, - tmp_edges.data() + incident_edges_end); - } else { - std::lock_guard lock(high_degree_vertex_mutex); - high_degree_vertices.push_back(coarse_node); + else + { + std::sort(tmp_edges.begin() + start, tmp_edges.begin() + end, comparator); } - tmp_nodes[coarse_node].setWeight(node_weights[coarse_node]); - tmp_nodes[coarse_node].setFirstEntry(incident_edges_start); }); + } - if ( !high_degree_vertices.empty() ) { - // High degree vertices are treated special, because sorting and afterwards - // removing duplicates can become a major sequential bottleneck - ConcurrentBucketMap incident_edges_map; - size_t max_degree = 0; - for ( const HypernodeID& coarse_node : high_degree_vertices ) { - const size_t incident_edges_start = tmp_incident_edges_prefix_sum[coarse_node]; - const size_t incident_edges_end = tmp_incident_edges_prefix_sum[coarse_node + 1]; - max_degree = std::max(max_degree, incident_edges_end - incident_edges_start); - } - incident_edges_map.reserve_for_estimated_number_of_insertions(max_degree); - - parallel::scalable_vector> resulting_ranges; - for ( const HypernodeID& coarse_node : high_degree_vertices ) { - const size_t incident_edges_start = tmp_incident_edges_prefix_sum[coarse_node]; - const size_t incident_edges_end = tmp_incident_edges_prefix_sum[coarse_node + 1]; - const size_t size_of_range = incident_edges_end - incident_edges_start; - - // Insert incident edges into concurrent bucket map - const size_t num_chunks = tbb::this_task_arena::max_concurrency(); - const size_t chunk_size = parallel::chunking::idiv_ceil(size_of_range, num_chunks); - tbb::parallel_for(UL(0), num_chunks, [&](const size_t chunk_id) { - auto [start_offset, end_offset] = parallel::chunking::bounds(chunk_id, size_of_range, chunk_size); - const size_t start = incident_edges_start + start_offset; - const size_t end = incident_edges_start + end_offset; - // First, we apply a deduplication step to the thread-local range. The reason is that on large irregular - // graphs, extremely large clusters (thousands of nodes) can be created during coarsening. In this case, - // there can be thousands of edges with the same target, which creates a massive imbalance and thus high - // contention in the bucket map if we insert them directly. The local deduplication avoids this problem - // by ensuring that each edge appears at most t times. - const HyperedgeID local_degree = deduplicateTmpEdges(tmp_edges.data() + start, tmp_edges.data() + end); - for (size_t pos = start; pos < start + local_degree; ++pos) { - const TmpEdgeInformation& edge = tmp_edges[pos]; - ASSERT(edge.isValid()); - incident_edges_map.insert(edge.getTarget(), TmpEdgeInformation(edge)); - } - }, tbb::static_partitioner()); - - // Process each bucket in parallel and deduplicate the edges - std::atomic incident_edges_pos(incident_edges_start); - tbb::parallel_for(UL(0), incident_edges_map.numBuckets(), [&](const size_t bucket) { - auto& incident_edges_bucket = incident_edges_map.getBucket(bucket); - const HyperedgeID bucket_degree = deduplicateTmpEdges(incident_edges_bucket.data(), - incident_edges_bucket.data() + incident_edges_bucket.size()); - const size_t tmp_incident_edges_pos = incident_edges_pos.fetch_add(bucket_degree); - memcpy(tmp_edges.data() + tmp_incident_edges_pos, - incident_edges_bucket.data(), sizeof(TmpEdgeInformation) * bucket_degree); - incident_edges_map.clear(bucket); - }); - - const size_t contracted_size = incident_edges_pos.load() - incident_edges_start; - node_sizes[coarse_node] = contracted_size; - resulting_ranges.emplace_back(incident_edges_start, incident_edges_pos.load()); + // #################### STAGE 4 #################### + // Coarsened graph is constructed here by writting data from temporary + // buffers to corresponding members in coarsened graph. We compute + // a prefix sum over the vertex sizes to determine the start index + // of the edges in the edge array, removing all invalid edges. + // Additionally, we need to calculate new unique edge ids. + + StaticGraph hypergraph; + + // Compute number of edges in coarse graph (those flagged as valid) + parallel::TBBPrefixSum degree_mapping(node_sizes); + tbb::parallel_scan( + tbb::blocked_range(ID(0), static_cast(coarsened_num_nodes)), + degree_mapping); + const HyperedgeID coarsened_num_edges = degree_mapping.total_sum(); + hypergraph._num_nodes = coarsened_num_nodes; + hypergraph._num_edges = coarsened_num_edges; + + HEAVY_COARSENING_ASSERT([&] { + HyperedgeID last_end = 0; + for(size_t i = 0; i < coarsened_num_nodes; ++i) + { + const HyperedgeID tmp_edges_start = tmp_nodes[i].firstEntry(); + if(last_end > tmp_edges_start) + { + return false; } - - // We still sort the adjacent edges since this results in better cache locality when accessing the neighbors. - // Also, sorting is necessary for deterministic partitioning. - tbb::parallel_for(UL(0), resulting_ranges.size(), [&](const size_t i) { - auto [start, end] = resulting_ranges[i]; - auto comparator = [](const TmpEdgeInformation& e1, const TmpEdgeInformation& e2) { - return e1._target < e2._target; - }; - if (end - start > HIGH_DEGREE_CONTRACTION_THRESHOLD - || resulting_ranges.size() < 2 * static_cast(tbb::this_task_arena::max_concurrency())) { - tbb::parallel_sort(tmp_edges.begin() + start, tmp_edges.begin() + end, comparator); - } else { - std::sort(tmp_edges.begin() + start, tmp_edges.begin() + end, comparator); - } - }); + last_end = tmp_edges_start + degree_mapping.value(i); } - - // #################### STAGE 4 #################### - // Coarsened graph is constructed here by writting data from temporary - // buffers to corresponding members in coarsened graph. We compute - // a prefix sum over the vertex sizes to determine the start index - // of the edges in the edge array, removing all invalid edges. - // Additionally, we need to calculate new unique edge ids. - - StaticGraph hypergraph; - - // Compute number of edges in coarse graph (those flagged as valid) - parallel::TBBPrefixSum degree_mapping(node_sizes); - tbb::parallel_scan(tbb::blocked_range( - ID(0), static_cast(coarsened_num_nodes)), degree_mapping); - const HyperedgeID coarsened_num_edges = degree_mapping.total_sum(); - hypergraph._num_nodes = coarsened_num_nodes; - hypergraph._num_edges = coarsened_num_edges; - - HEAVY_COARSENING_ASSERT( - [&]{ - HyperedgeID last_end = 0; - for (size_t i = 0; i < coarsened_num_nodes; ++i) { - const HyperedgeID tmp_edges_start = tmp_nodes[i].firstEntry(); - if (last_end > tmp_edges_start) { - return false; - } - last_end = tmp_edges_start + degree_mapping.value(i); - } - return true; - }() - ); - - tbb::parallel_invoke([&] { - // Copy edges - edge_id_mapping.assign(_num_edges / 2, 0); - hypergraph._edges.resizeNoAssign(coarsened_num_edges); - hypergraph._unique_edge_ids.resizeNoAssign(coarsened_num_edges); - tbb::parallel_for(ID(0), coarsened_num_nodes, [&](const HyperedgeID& coarse_node) { - const HyperedgeID tmp_edges_start = tmp_nodes[coarse_node].firstEntry(); - const HyperedgeID edges_start = degree_mapping[coarse_node]; - auto handle_edge = [&](const HyperedgeID& index) { - ASSERT(tmp_edges_start + index < tmp_edges.size() && edges_start + index < hypergraph._edges.size()); - const TmpEdgeInformation& tmp_edge = tmp_edges[tmp_edges_start + index]; - Edge& edge = hypergraph.edge(edges_start + index); - edge.setTarget(tmp_edge.getTarget()); - edge.setSource(coarse_node); - edge.setWeight(tmp_edge.getWeight()); - hypergraph._unique_edge_ids[edges_start + index] = tmp_edge.getID(); - ASSERT(static_cast(tmp_edge.getID()) < edge_id_mapping.size()); - edge_id_mapping[tmp_edge.getID()] = UL(1); - }; - - if (degree_mapping.value(coarse_node) > HIGH_DEGREE_CONTRACTION_THRESHOLD / 8) { - tbb::parallel_for(ID(0), degree_mapping.value(coarse_node), handle_edge); - } else { - for (size_t index = 0; index < degree_mapping.value(coarse_node); ++index) { - handle_edge(index); - } - } - }); - }, [&] { - hypergraph._nodes.resize(coarsened_num_nodes + 1); - tbb::parallel_for(ID(0), coarsened_num_nodes, [&](const HyperedgeID& coarse_node) { - Node& node = hypergraph.node(coarse_node); - node.enable(); - node.setFirstEntry(degree_mapping[coarse_node]); - node.setWeight(tmp_nodes[coarse_node].weight()); - }); - hypergraph._nodes.back() = Node(static_cast(coarsened_num_edges)); - }, [&] { - hypergraph._community_ids.resize(coarsened_num_nodes); - doParallelForAllNodes([&](HypernodeID fine_node) { - hypergraph.setCommunityID(map_to_coarse_graph(fine_node), communityID(fine_node)); + return true; + }()); + + tbb::parallel_invoke( + [&] { + // Copy edges + edge_id_mapping.assign(_num_edges / 2, 0); + hypergraph._edges.resizeNoAssign(coarsened_num_edges); + hypergraph._unique_edge_ids.resizeNoAssign(coarsened_num_edges); + tbb::parallel_for( + ID(0), coarsened_num_nodes, [&](const HyperedgeID &coarse_node) { + const HyperedgeID tmp_edges_start = tmp_nodes[coarse_node].firstEntry(); + const HyperedgeID edges_start = degree_mapping[coarse_node]; + auto handle_edge = [&](const HyperedgeID &index) { + ASSERT(tmp_edges_start + index < tmp_edges.size() && + edges_start + index < hypergraph._edges.size()); + const TmpEdgeInformation &tmp_edge = tmp_edges[tmp_edges_start + index]; + Edge &edge = hypergraph.edge(edges_start + index); + edge.setTarget(tmp_edge.getTarget()); + edge.setSource(coarse_node); + edge.setWeight(tmp_edge.getWeight()); + hypergraph._unique_edge_ids[edges_start + index] = tmp_edge.getID(); + ASSERT(static_cast(tmp_edge.getID()) < edge_id_mapping.size()); + edge_id_mapping[tmp_edge.getID()] = UL(1); + }; + + if(degree_mapping.value(coarse_node) > + HIGH_DEGREE_CONTRACTION_THRESHOLD / 8) + { + tbb::parallel_for(ID(0), degree_mapping.value(coarse_node), handle_edge); + } + else + { + for(size_t index = 0; index < degree_mapping.value(coarse_node); ++index) + { + handle_edge(index); + } + } + }); + }, + [&] { + hypergraph._nodes.resize(coarsened_num_nodes + 1); + tbb::parallel_for(ID(0), coarsened_num_nodes, + [&](const HyperedgeID &coarse_node) { + Node &node = hypergraph.node(coarse_node); + node.enable(); + node.setFirstEntry(degree_mapping[coarse_node]); + node.setWeight(tmp_nodes[coarse_node].weight()); + }); + hypergraph._nodes.back() = Node(static_cast(coarsened_num_edges)); + }, + [&] { + hypergraph._community_ids.resize(coarsened_num_nodes); + doParallelForAllNodes([&](HypernodeID fine_node) { + hypergraph.setCommunityID(map_to_coarse_graph(fine_node), + communityID(fine_node)); + }); }); - }); - - // Remap unique edge ids via prefix sum - parallel::TBBPrefixSum edge_id_prefix_sum(edge_id_mapping); - tbb::parallel_scan(tbb::blocked_range(ID(0), _num_edges / 2), edge_id_prefix_sum); - ASSERT(edge_id_prefix_sum.total_sum() == coarsened_num_edges / 2); - tbb::parallel_for(ID(0), coarsened_num_edges, [&](const HyperedgeID& e) { - HyperedgeID& unique_id = hypergraph._unique_edge_ids[e]; - unique_id = edge_id_prefix_sum[unique_id]; + // Remap unique edge ids via prefix sum + parallel::TBBPrefixSum edge_id_prefix_sum(edge_id_mapping); + tbb::parallel_scan(tbb::blocked_range(ID(0), _num_edges / 2), + edge_id_prefix_sum); + ASSERT(edge_id_prefix_sum.total_sum() == coarsened_num_edges / 2); + + tbb::parallel_for(ID(0), coarsened_num_edges, [&](const HyperedgeID &e) { + HyperedgeID &unique_id = hypergraph._unique_edge_ids[e]; + unique_id = edge_id_prefix_sum[unique_id]; + }); + + if(hasFixedVertices()) + { + // Map fixed vertices to coarse graph + FixedVertexSupport coarse_fixed_vertices(hypergraph.initialNumNodes(), + _fixed_vertices.numBlocks()); + coarse_fixed_vertices.setHypergraph(&hypergraph); + doParallelForAllNodes([&](const HypernodeID hn) { + if(isFixed(hn)) + { + coarse_fixed_vertices.fixToBlock(communities[hn], fixedVertexBlock(hn)); + } }); + hypergraph.addFixedVertexSupport(std::move(coarse_fixed_vertices)); + } - if ( hasFixedVertices() ) { - // Map fixed vertices to coarse graph - FixedVertexSupport coarse_fixed_vertices( - hypergraph.initialNumNodes(), _fixed_vertices.numBlocks()); - coarse_fixed_vertices.setHypergraph(&hypergraph); - doParallelForAllNodes([&](const HypernodeID hn) { - if ( isFixed(hn) ) { - coarse_fixed_vertices.fixToBlock(communities[hn], fixedVertexBlock(hn)); - } - }); - hypergraph.addFixedVertexSupport(std::move(coarse_fixed_vertices)); - } - - HEAVY_COARSENING_ASSERT( - [&](){ - parallel::scalable_vector covered_ids(hypergraph.initialNumEdges() / 2, false); - for (HyperedgeID e : hypergraph.edges()) { + HEAVY_COARSENING_ASSERT( + [&]() { + parallel::scalable_vector covered_ids(hypergraph.initialNumEdges() / 2, + false); + for(HyperedgeID e : hypergraph.edges()) + { HyperedgeID id = hypergraph.uniqueEdgeID(e); covered_ids.at(id) = true; bool success = false; - for (HyperedgeID b_edge : hypergraph.incidentEdges(hypergraph.edgeTarget(e))) { - if (hypergraph.edgeTarget(b_edge) == hypergraph.edgeSource(e)) { - if (hypergraph.uniqueEdgeID(b_edge) != id) { + for(HyperedgeID b_edge : hypergraph.incidentEdges(hypergraph.edgeTarget(e))) + { + if(hypergraph.edgeTarget(b_edge) == hypergraph.edgeSource(e)) + { + if(hypergraph.uniqueEdgeID(b_edge) != id) + { return false; } success = true; break; } } - if (!success) { + if(!success) + { return false; } } - for (bool val : covered_ids) { - if (!val) { + for(bool val : covered_ids) + { + if(!val) + { return false; } } return true; }(), - "Unique edge IDs are not initialized correctly." - ); - - hypergraph._total_weight = _total_weight; - hypergraph._tmp_contraction_buffer = _tmp_contraction_buffer; - _tmp_contraction_buffer = nullptr; - return hypergraph; - } - - - size_t StaticGraph::deduplicateTmpEdges(TmpEdgeInformation* edge_start, TmpEdgeInformation* edge_end) { - ASSERT(std::distance(edge_start, edge_end) >= 0); - std::sort(edge_start, edge_end, - [](const TmpEdgeInformation& e1, const TmpEdgeInformation& e2) { - return e1._target < e2._target; - }); - - // Deduplicate, aggregate weights and calculate minimum unique id - // - // <-- deduplicated --> <-- already processed --> <-- to be processed --> <-- invalid edges --> - // ^ ^ - // valid_edge_index --- tmp_edge_index --- - size_t valid_edge_index = 0; - size_t tmp_edge_index = 1; - while (tmp_edge_index < static_cast(std::distance(edge_start, edge_end)) && edge_start[tmp_edge_index].isValid()) { - HEAVY_COARSENING_ASSERT( - [&](){ + "Unique edge IDs are not initialized correctly."); + + hypergraph._total_weight = _total_weight; + hypergraph._tmp_contraction_buffer = _tmp_contraction_buffer; + _tmp_contraction_buffer = nullptr; + return hypergraph; +} + +size_t StaticGraph::deduplicateTmpEdges(TmpEdgeInformation *edge_start, + TmpEdgeInformation *edge_end) +{ + ASSERT(std::distance(edge_start, edge_end) >= 0); + std::sort(edge_start, edge_end, + [](const TmpEdgeInformation &e1, const TmpEdgeInformation &e2) { + return e1._target < e2._target; + }); + + // Deduplicate, aggregate weights and calculate minimum unique id + // + // <-- deduplicated --> <-- already processed --> <-- to be processed --> <-- invalid + // edges --> + // ^ ^ + // valid_edge_index --- tmp_edge_index --- + size_t valid_edge_index = 0; + size_t tmp_edge_index = 1; + while(tmp_edge_index < static_cast(std::distance(edge_start, edge_end)) && + edge_start[tmp_edge_index].isValid()) + { + HEAVY_COARSENING_ASSERT( + [&]() { size_t i = 0; - for (; i <= valid_edge_index; ++i) { - if (!edge_start[i].isValid()) { + for(; i <= valid_edge_index; ++i) + { + if(!edge_start[i].isValid()) + { return false; - } else if ((i + 1 <= valid_edge_index) && - edge_start[i].getTarget() >= edge_start[i + 1].getTarget()) { + } + else if((i + 1 <= valid_edge_index) && + edge_start[i].getTarget() >= edge_start[i + 1].getTarget()) + { return false; } } return true; }(), - "Invariant violated while deduplicating incident edges!" - ); - - TmpEdgeInformation& valid_edge = edge_start[valid_edge_index]; - TmpEdgeInformation& next_edge = edge_start[tmp_edge_index]; - if (valid_edge.getTarget() == next_edge.getTarget()) { - valid_edge.addWeight(next_edge.getWeight()); - valid_edge.updateID(next_edge.getID()); - next_edge.invalidate(); - } else { - edge_start[++valid_edge_index] = next_edge; - } - ++tmp_edge_index; + "Invariant violated while deduplicating incident edges!"); + + TmpEdgeInformation &valid_edge = edge_start[valid_edge_index]; + TmpEdgeInformation &next_edge = edge_start[tmp_edge_index]; + if(valid_edge.getTarget() == next_edge.getTarget()) + { + valid_edge.addWeight(next_edge.getWeight()); + valid_edge.updateID(next_edge.getID()); + next_edge.invalidate(); } - const bool is_non_empty = (std::distance(edge_start, edge_end) > 0) && edge_start[0].isValid(); - return is_non_empty ? (valid_edge_index + 1) : 0; - } - - // ! Copy static hypergraph in parallel - StaticGraph StaticGraph::copy(parallel_tag_t) const { - StaticGraph hypergraph; - - hypergraph._num_nodes = _num_nodes; - hypergraph._num_removed_nodes = _num_removed_nodes; - hypergraph._num_edges = _num_edges; - hypergraph._total_weight = _total_weight; - - tbb::parallel_invoke([&] { - hypergraph._nodes.resize(_nodes.size()); - memcpy(hypergraph._nodes.data(), _nodes.data(), - sizeof(Node) * _nodes.size()); - }, [&] { - hypergraph._edges.resize(_edges.size()); - memcpy(hypergraph._edges.data(), _edges.data(), - sizeof(Edge) * _edges.size()); - }, [&] { - hypergraph._unique_edge_ids.resize(_unique_edge_ids.size()); - memcpy(hypergraph._unique_edge_ids.data(), _unique_edge_ids.data(), - sizeof(HyperedgeID) * _unique_edge_ids.size()); - }, [&] { - hypergraph._community_ids = _community_ids; - }, [&] { - hypergraph.addFixedVertexSupport(_fixed_vertices.copy()); - }); - return hypergraph; - } - - // ! Copy static hypergraph sequential - StaticGraph StaticGraph::copy() const { - StaticGraph hypergraph; - - hypergraph._num_nodes = _num_nodes; - hypergraph._num_removed_nodes = _num_removed_nodes; - hypergraph._num_edges = _num_edges; - hypergraph._total_weight = _total_weight; - - hypergraph._nodes.resize(_nodes.size()); - memcpy(hypergraph._nodes.data(), _nodes.data(), - sizeof(Node) * _nodes.size()); - - hypergraph._edges.resize(_edges.size()); - memcpy(hypergraph._edges.data(), _edges.data(), - sizeof(Edge) * _edges.size()); - - hypergraph._unique_edge_ids.resize(_unique_edge_ids.size()); - memcpy(hypergraph._unique_edge_ids.data(), _unique_edge_ids.data(), - sizeof(HyperedgeID) * _unique_edge_ids.size()); - - hypergraph._community_ids = _community_ids; - hypergraph.addFixedVertexSupport(_fixed_vertices.copy()); - - return hypergraph; - } - - void StaticGraph::memoryConsumption(utils::MemoryTreeNode* parent) const { - ASSERT(parent); - parent->addChild("Hypernodes", sizeof(Node) * _nodes.size()); - parent->addChild("Hyperedges", 2 * sizeof(Edge) * _edges.size()); - parent->addChild("Communities", sizeof(PartitionID) * _community_ids.capacity()); - if ( hasFixedVertices() ) { - parent->addChild("Fixed Vertex Support", _fixed_vertices.size_in_bytes()); + else + { + edge_start[++valid_edge_index] = next_edge; } + ++tmp_edge_index; } - - // ! Computes the total node weight of the hypergraph - void StaticGraph::computeAndSetTotalNodeWeight(parallel_tag_t) { - _total_weight = tbb::parallel_reduce(tbb::blocked_range(ID(0), _num_nodes), 0, - [this](const tbb::blocked_range& range, HypernodeWeight init) { - HypernodeWeight weight = init; - for (HypernodeID hn = range.begin(); hn < range.end(); ++hn) { - if (nodeIsEnabled(hn)) { - weight += this->_nodes[hn].weight(); - } - } - return weight; - }, std::plus<>()); + const bool is_non_empty = + (std::distance(edge_start, edge_end) > 0) && edge_start[0].isValid(); + return is_non_empty ? (valid_edge_index + 1) : 0; +} + +// ! Copy static hypergraph in parallel +StaticGraph StaticGraph::copy(parallel_tag_t) const +{ + StaticGraph hypergraph; + + hypergraph._num_nodes = _num_nodes; + hypergraph._num_removed_nodes = _num_removed_nodes; + hypergraph._num_edges = _num_edges; + hypergraph._total_weight = _total_weight; + + tbb::parallel_invoke( + [&] { + hypergraph._nodes.resize(_nodes.size()); + memcpy(hypergraph._nodes.data(), _nodes.data(), sizeof(Node) * _nodes.size()); + }, + [&] { + hypergraph._edges.resize(_edges.size()); + memcpy(hypergraph._edges.data(), _edges.data(), sizeof(Edge) * _edges.size()); + }, + [&] { + hypergraph._unique_edge_ids.resize(_unique_edge_ids.size()); + memcpy(hypergraph._unique_edge_ids.data(), _unique_edge_ids.data(), + sizeof(HyperedgeID) * _unique_edge_ids.size()); + }, + [&] { hypergraph._community_ids = _community_ids; }, + [&] { hypergraph.addFixedVertexSupport(_fixed_vertices.copy()); }); + return hypergraph; +} + +// ! Copy static hypergraph sequential +StaticGraph StaticGraph::copy() const +{ + StaticGraph hypergraph; + + hypergraph._num_nodes = _num_nodes; + hypergraph._num_removed_nodes = _num_removed_nodes; + hypergraph._num_edges = _num_edges; + hypergraph._total_weight = _total_weight; + + hypergraph._nodes.resize(_nodes.size()); + memcpy(hypergraph._nodes.data(), _nodes.data(), sizeof(Node) * _nodes.size()); + + hypergraph._edges.resize(_edges.size()); + memcpy(hypergraph._edges.data(), _edges.data(), sizeof(Edge) * _edges.size()); + + hypergraph._unique_edge_ids.resize(_unique_edge_ids.size()); + memcpy(hypergraph._unique_edge_ids.data(), _unique_edge_ids.data(), + sizeof(HyperedgeID) * _unique_edge_ids.size()); + + hypergraph._community_ids = _community_ids; + hypergraph.addFixedVertexSupport(_fixed_vertices.copy()); + + return hypergraph; +} + +void StaticGraph::memoryConsumption(utils::MemoryTreeNode *parent) const +{ + ASSERT(parent); + parent->addChild("Hypernodes", sizeof(Node) * _nodes.size()); + parent->addChild("Hyperedges", 2 * sizeof(Edge) * _edges.size()); + parent->addChild("Communities", sizeof(PartitionID) * _community_ids.capacity()); + if(hasFixedVertices()) + { + parent->addChild("Fixed Vertex Support", _fixed_vertices.size_in_bytes()); } +} + +// ! Computes the total node weight of the hypergraph +void StaticGraph::computeAndSetTotalNodeWeight(parallel_tag_t) +{ + _total_weight = tbb::parallel_reduce( + tbb::blocked_range(ID(0), _num_nodes), 0, + [this](const tbb::blocked_range &range, HypernodeWeight init) { + HypernodeWeight weight = init; + for(HypernodeID hn = range.begin(); hn < range.end(); ++hn) + { + if(nodeIsEnabled(hn)) + { + weight += this->_nodes[hn].weight(); + } + } + return weight; + }, + std::plus<>()); +} } // namespace diff --git a/mt-kahypar/datastructures/static_graph.h b/mt-kahypar/datastructures/static_graph.h index 0d49ee9ff..4d9e5dee3 100644 --- a/mt-kahypar/datastructures/static_graph.h +++ b/mt-kahypar/datastructures/static_graph.h @@ -34,16 +34,16 @@ #include "include/libmtkahypartypes.h" -#include "mt-kahypar/macros.h" #include "mt-kahypar/datastructures/array.h" -#include "mt-kahypar/datastructures/hypergraph_common.h" #include "mt-kahypar/datastructures/fixed_vertex_support.h" +#include "mt-kahypar/datastructures/hypergraph_common.h" +#include "mt-kahypar/macros.h" #include "mt-kahypar/parallel/atomic_wrapper.h" #include "mt-kahypar/parallel/stl/scalable_vector.h" #include "mt-kahypar/partition/context_enum_classes.h" +#include "mt-kahypar/utils/exception.h" #include "mt-kahypar/utils/memory_tree.h" #include "mt-kahypar/utils/range.h" -#include "mt-kahypar/utils/exception.h" namespace mt_kahypar { namespace ds { @@ -53,7 +53,8 @@ class StaticGraphFactory; template class PartitionedGraph; -class StaticGraph { +class StaticGraph +{ static constexpr bool enable_heavy_assert = false; @@ -70,69 +71,60 @@ class StaticGraph { using AtomicHypernodeID = parallel::IntegralAtomicWrapper; using AtomicHypernodeWeight = parallel::IntegralAtomicWrapper; - using UncontractionFunction = std::function; - using MarkEdgeFunc = std::function; - #define NOOP_BATCH_FUNC [] (const HypernodeID, const HypernodeID, const HyperedgeID) { } + using UncontractionFunction = + std::function; + using MarkEdgeFunc = std::function; +#define NOOP_BATCH_FUNC [](const HypernodeID, const HypernodeID, const HyperedgeID) {} /** * Represents a hypernode of the hypergraph and contains all information * associated with a vertex. */ - class Node { - public: + class Node + { + public: using IDType = HypernodeID; - Node() : - _begin(0), - _weight(1), - _valid(false) { } + Node() : _begin(0), _weight(1), _valid(false) {} - explicit Node(const bool valid) : - _begin(0), - _weight(1), - _valid(valid) { } + explicit Node(const bool valid) : _begin(0), _weight(1), _valid(valid) {} // Sentinel Constructor - explicit Node(const size_t begin) : - _begin(begin), - _weight(1), - _valid(false) { } + explicit Node(const size_t begin) : _begin(begin), _weight(1), _valid(false) {} - bool isDisabled() const { - return _valid == false; - } + bool isDisabled() const { return _valid == false; } - void enable() { + void enable() + { ASSERT(isDisabled()); _valid = true; } - void disable() { + void disable() + { ASSERT(!isDisabled()); _valid = false; } // ! Returns the index of the first element in _incident_nets - HyperedgeID firstEntry() const { - return _begin; - } + HyperedgeID firstEntry() const { return _begin; } // ! Sets the index of the first element in _incident_nets to begin - void setFirstEntry(size_t begin) { + void setFirstEntry(size_t begin) + { ASSERT(!isDisabled()); _begin = begin; } - HypernodeWeight weight() const { - return _weight; - } + HypernodeWeight weight() const { return _weight; } - void setWeight(HypernodeWeight weight) { + void setWeight(HypernodeWeight weight) + { ASSERT(!isDisabled()); _weight = weight; } - private: + private: // ! Index of the first element in _edges HyperedgeID _begin; // ! Node weight @@ -145,57 +137,45 @@ class StaticGraph { * Represents a hyperedge of the hypergraph and contains all information * associated with a net (except connectivity information). */ - class Edge { - public: + class Edge + { + public: using IDType = HyperedgeID; - Edge() : - _target(0), - _source(0), - _weight(1) { } + Edge() : _target(0), _source(0), _weight(1) {} explicit Edge(HypernodeID target, HypernodeID source) : - _target(target), - _source(source), - _weight(1) { } + _target(target), _source(source), _weight(1) + { + } // ! Returns the index of the target node - HypernodeID target() const { - return _target; - } + HypernodeID target() const { return _target; } // ! Sets the index of the target node - void setTarget(HypernodeID target) { - _target = target; - } + void setTarget(HypernodeID target) { _target = target; } // ! Returns the index of the source node - HypernodeID source() const { - return _source; - } + HypernodeID source() const { return _source; } // ! Sets the index of the source node - void setSource(HypernodeID source) { - _source = source; - } + void setSource(HypernodeID source) { _source = source; } - HyperedgeWeight weight() const { - return _weight; - } + HyperedgeWeight weight() const { return _weight; } - void setWeight(HyperedgeWeight weight) { - _weight = weight; - } + void setWeight(HyperedgeWeight weight) { _weight = weight; } - bool operator== (const Edge& rhs) const { + bool operator==(const Edge &rhs) const + { return _target == rhs._target && _source == rhs._source && _weight == rhs._weight; } - bool operator!= (const Edge& rhs) const { + bool operator!=(const Edge &rhs) const + { return _target != rhs._target || _source != rhs._source || _weight != rhs._weight; } - private: + private: // ! Index of target node HypernodeID _target; // ! Index of source node @@ -218,12 +198,13 @@ class StaticGraph { * internal representation. Instead only handles to the respective elements * are returned, i.e. the IDs of the corresponding hypernodes/hyperedges. */ - class NodeIterator { - public: + class NodeIterator + { + public: using iterator_category = std::forward_iterator_tag; using value_type = HypernodeID; - using reference = HypernodeID&; - using pointer = const HypernodeID*; + using reference = HypernodeID &; + using pointer = const HypernodeID *; using difference_type = std::ptrdiff_t; /*! @@ -234,52 +215,49 @@ class StaticGraph { * \param id The index of the element the pointer points to * \param max_id The maximum index allowed */ - NodeIterator(const Node* start_element, HypernodeID id, HypernodeID max_id) : - _id(id), - _max_id(max_id), - _node(start_element) { - if (_id != _max_id && _node->isDisabled()) { - operator++ (); + NodeIterator(const Node *start_element, HypernodeID id, HypernodeID max_id) : + _id(id), _max_id(max_id), _node(start_element) + { + if(_id != _max_id && _node->isDisabled()) + { + operator++(); } } // ! Returns the id of the element the iterator currently points to. - HypernodeID operator* () const { - return _id; - } + HypernodeID operator*() const { return _id; } // ! Prefix increment. The iterator advances to the next valid element. - NodeIterator & operator++ () { + NodeIterator &operator++() + { ASSERT(_id < _max_id); - do { + do + { ++_id; ++_node; - } while (_id < _max_id && _node->isDisabled()); + } while(_id < _max_id && _node->isDisabled()); return *this; } // ! Postfix increment. The iterator advances to the next valid element. - NodeIterator operator++ (int) { + NodeIterator operator++(int) + { NodeIterator copy = *this; - operator++ (); + operator++(); return copy; } - bool operator!= (const NodeIterator& rhs) { - return _id != rhs._id; - } + bool operator!=(const NodeIterator &rhs) { return _id != rhs._id; } - bool operator== (const NodeIterator& rhs) { - return _id == rhs._id; - } + bool operator==(const NodeIterator &rhs) { return _id == rhs._id; } - private: + private: // Handle to the node the iterator currently points to HypernodeID _id = 0; // Maximum allowed index HypernodeID _max_id = 0; // node the iterator currently points to - const Node* _node = nullptr; + const Node *_node = nullptr; }; /*! @@ -287,54 +265,59 @@ class StaticGraph { * * Note that because this is a graph, each edge has exactly two pins. */ - class PinIterator { - public: + class PinIterator + { + public: using iterator_category = std::forward_iterator_tag; using value_type = HypernodeID; - using reference = HypernodeID&; - using pointer = const HypernodeID*; + using reference = HypernodeID &; + using pointer = const HypernodeID *; using difference_type = std::ptrdiff_t; /*! * Constructs a pin iterator based on the IDs of the two nodes */ PinIterator(HypernodeID source, HypernodeID target, unsigned int iteration_count) : - _source(source), - _target(target), - _iteration_count(iteration_count) { + _source(source), _target(target), _iteration_count(iteration_count) + { } // ! Returns the id of the element the iterator currently points to. - HypernodeID operator* () const { + HypernodeID operator*() const + { ASSERT(_iteration_count < 2); return _iteration_count == 0 ? _source : _target; } // ! Prefix increment. The iterator advances to the next valid element. - PinIterator & operator++ () { + PinIterator &operator++() + { ASSERT(_iteration_count < 2); ++_iteration_count; return *this; } // ! Postfix increment. The iterator advances to the next valid element. - PinIterator operator++ (int) { + PinIterator operator++(int) + { PinIterator copy = *this; - operator++ (); + operator++(); return copy; } - bool operator!= (const PinIterator& rhs) { - return _iteration_count != rhs._iteration_count || - _source != rhs._source || _target != rhs._target; + bool operator!=(const PinIterator &rhs) + { + return _iteration_count != rhs._iteration_count || _source != rhs._source || + _target != rhs._target; } - bool operator== (const PinIterator& rhs) { - return _iteration_count == rhs._iteration_count && - _source == rhs._source && _target == rhs._target; + bool operator==(const PinIterator &rhs) + { + return _iteration_count == rhs._iteration_count && _source == rhs._source && + _target == rhs._target; } - private: + private: // source node of the edge HypernodeID _source = 0; // target node of the edge @@ -343,55 +326,57 @@ class StaticGraph { unsigned int _iteration_count = 0; }; - static_assert(std::is_trivially_copyable::value, "Node is not trivially copyable"); - static_assert(std::is_trivially_copyable::value, "Hyperedge is not trivially copyable"); + static_assert(std::is_trivially_copyable::value, + "Node is not trivially copyable"); + static_assert(std::is_trivially_copyable::value, + "Hyperedge is not trivially copyable"); - private: - struct TmpEdgeInformation { +private: + struct TmpEdgeInformation + { // ! invalid edge TmpEdgeInformation() : - _target(kInvalidHyperedge), - _valid_or_weight(0), - _id(kInvalidHyperedge) { + _target(kInvalidHyperedge), _valid_or_weight(0), _id(kInvalidHyperedge) + { } // ! valid edge TmpEdgeInformation(HyperedgeID target, HyperedgeWeight weight, HyperedgeID id) : - _target(target), - _valid_or_weight(weight), - _id(id) { + _target(target), _valid_or_weight(weight), _id(id) + { ASSERT(isValid()); } - bool isValid() const { - return _valid_or_weight != 0; - } + bool isValid() const { return _valid_or_weight != 0; } - HyperedgeID getTarget() const { + HyperedgeID getTarget() const + { ASSERT(isValid()); return _target; } - HyperedgeWeight getWeight() const { + HyperedgeWeight getWeight() const + { ASSERT(isValid()); return _valid_or_weight; } - HyperedgeID getID() const { + HyperedgeID getID() const + { ASSERT(isValid()); return _id; } - void invalidate() { - _valid_or_weight = 0; - } + void invalidate() { _valid_or_weight = 0; } - void addWeight(HyperedgeWeight weight) { + void addWeight(HyperedgeWeight weight) + { ASSERT(isValid()); _valid_or_weight += weight; } - void updateID(HyperedgeID id) { + void updateID(HyperedgeID id) + { ASSERT(isValid()); _id = std::min(_id, id); } @@ -404,36 +389,36 @@ class StaticGraph { // ! Contains buffers that are needed during multilevel contractions. // ! Struct is allocated on top level hypergraph and passed to each contracted // ! hypergraph such that memory can be reused in consecutive contractions. - struct TmpContractionBuffer { + struct TmpContractionBuffer + { explicit TmpContractionBuffer(const HypernodeID num_nodes, - const HyperedgeID num_edges) { - tbb::parallel_invoke([&] { - mapping.resize("Coarsening", "mapping", num_nodes); - }, [&] { - tmp_nodes.resize("Coarsening", "tmp_nodes", num_nodes); - }, [&] { - node_sizes.resize("Coarsening", "node_sizes", num_nodes); - }, [&] { - tmp_num_incident_edges.resize("Coarsening", "tmp_num_incident_edges", num_nodes); - }, [&] { - node_weights.resize("Coarsening", "node_weights", num_nodes); - }, [&] { - tmp_edges.resize("Coarsening", "tmp_edges", num_edges); - }, [&] { - edge_id_mapping.resize("Coarsening", "edge_id_mapping", num_edges / 2); - }); + const HyperedgeID num_edges) + { + tbb::parallel_invoke( + [&] { mapping.resize("Coarsening", "mapping", num_nodes); }, + [&] { tmp_nodes.resize("Coarsening", "tmp_nodes", num_nodes); }, + [&] { node_sizes.resize("Coarsening", "node_sizes", num_nodes); }, + [&] { + tmp_num_incident_edges.resize("Coarsening", "tmp_num_incident_edges", + num_nodes); + }, + [&] { node_weights.resize("Coarsening", "node_weights", num_nodes); }, + [&] { tmp_edges.resize("Coarsening", "tmp_edges", num_edges); }, + [&] { + edge_id_mapping.resize("Coarsening", "edge_id_mapping", num_edges / 2); + }); } Array mapping; Array tmp_nodes; Array node_sizes; - Array> tmp_num_incident_edges; - Array> node_weights; + Array > tmp_num_incident_edges; + Array > node_weights; Array tmp_edges; Array edge_id_mapping; }; - public: +public: static constexpr bool is_graph = true; static constexpr bool is_static_hypergraph = true; static constexpr bool is_partitioned = false; @@ -453,42 +438,37 @@ class StaticGraph { using IncidentNetsIterator = boost::range_detail::integer_iterator; // ! static graph does not support explicit parallel edge detection - struct ParallelHyperedge { + struct ParallelHyperedge + { HyperedgeID edge_id; HyperedgeID old_id; }; explicit StaticGraph() : - _num_nodes(0), - _num_removed_nodes(0), - _num_edges(0), - _total_weight(0), - _nodes(), - _edges(), - _unique_edge_ids(), - _community_ids(), - _fixed_vertices(), - _tmp_contraction_buffer(nullptr) { } - - StaticGraph(const StaticGraph&) = delete; - StaticGraph & operator= (const StaticGraph &) = delete; - - StaticGraph(StaticGraph&& other) : - _num_nodes(other._num_nodes), - _num_removed_nodes(other._num_removed_nodes), - _num_edges(other._num_edges), - _total_weight(other._total_weight), - _nodes(std::move(other._nodes)), - _edges(std::move(other._edges)), - _unique_edge_ids(std::move(other._unique_edge_ids)), - _community_ids(std::move(other._community_ids)), - _fixed_vertices(std::move(other._fixed_vertices)), - _tmp_contraction_buffer(std::move(other._tmp_contraction_buffer)) { + _num_nodes(0), _num_removed_nodes(0), _num_edges(0), _total_weight(0), _nodes(), + _edges(), _unique_edge_ids(), _community_ids(), _fixed_vertices(), + _tmp_contraction_buffer(nullptr) + { + } + + StaticGraph(const StaticGraph &) = delete; + StaticGraph &operator=(const StaticGraph &) = delete; + + StaticGraph(StaticGraph &&other) : + _num_nodes(other._num_nodes), _num_removed_nodes(other._num_removed_nodes), + _num_edges(other._num_edges), _total_weight(other._total_weight), + _nodes(std::move(other._nodes)), _edges(std::move(other._edges)), + _unique_edge_ids(std::move(other._unique_edge_ids)), + _community_ids(std::move(other._community_ids)), + _fixed_vertices(std::move(other._fixed_vertices)), + _tmp_contraction_buffer(std::move(other._tmp_contraction_buffer)) + { _fixed_vertices.setHypergraph(this); other._tmp_contraction_buffer = nullptr; } - StaticGraph & operator= (StaticGraph&& other) { + StaticGraph &operator=(StaticGraph &&other) + { _num_nodes = other._num_nodes; _num_removed_nodes = other._num_removed_nodes; _num_edges = other._num_edges; @@ -504,9 +484,11 @@ class StaticGraph { return *this; } - ~StaticGraph() { - if ( _tmp_contraction_buffer ) { - delete(_tmp_contraction_buffer); + ~StaticGraph() + { + if(_tmp_contraction_buffer) + { + delete (_tmp_contraction_buffer); _tmp_contraction_buffer = nullptr; } freeInternalData(); @@ -515,45 +497,32 @@ class StaticGraph { // ####################### General Hypergraph Stats ####################### // ! Initial number of hypernodes - HypernodeID initialNumNodes() const { - return _num_nodes; - } + HypernodeID initialNumNodes() const { return _num_nodes; } // ! Number of removed hypernodes - HypernodeID numRemovedHypernodes() const { - return _num_removed_nodes; - } + HypernodeID numRemovedHypernodes() const { return _num_removed_nodes; } // ! Initial number of hyperedges - HyperedgeID initialNumEdges() const { - return _num_edges; - } + HyperedgeID initialNumEdges() const { return _num_edges; } // ! Number of removed hyperedges - HyperedgeID numRemovedHyperedges() const { - return 0; - } + HyperedgeID numRemovedHyperedges() const { return 0; } // ! Set the number of removed hyperedges - void setNumRemovedHyperedges(const HyperedgeID num_removed_hyperedges) { + void setNumRemovedHyperedges(const HyperedgeID num_removed_hyperedges) + { ASSERT(num_removed_hyperedges == 0); unused(num_removed_hyperedges); } // ! Initial number of pins - HypernodeID initialNumPins() const { - return _num_edges; - } + HypernodeID initialNumPins() const { return _num_edges; } // ! Initial sum of the degree of all vertices - HypernodeID initialTotalVertexDegree() const { - return _num_edges; - } + HypernodeID initialTotalVertexDegree() const { return _num_edges; } // ! Total weight of hypergraph - HypernodeWeight totalWeight() const { - return _total_weight; - } + HypernodeWeight totalWeight() const { return _total_weight; } // ! Computes the total node weight of the hypergraph void computeAndSetTotalNodeWeight(parallel_tag_t); @@ -562,10 +531,12 @@ class StaticGraph { // ! Iterates in parallel over all active nodes and calls function f // ! for each vertex - template - void doParallelForAllNodes(const F& f) const { - tbb::parallel_for(ID(0), _num_nodes, [&](const HypernodeID& hn) { - if ( nodeIsEnabled(hn) ) { + template + void doParallelForAllNodes(const F &f) const + { + tbb::parallel_for(ID(0), _num_nodes, [&](const HypernodeID &hn) { + if(nodeIsEnabled(hn)) + { f(hn); } }); @@ -573,73 +544,75 @@ class StaticGraph { // ! Iterates in parallel over all active edges and calls function f // ! for each net - template - void doParallelForAllEdges(const F& f) const { - tbb::parallel_for(ID(0), _num_edges, [&](const HyperedgeID& e) { - f(e); - }); + template + void doParallelForAllEdges(const F &f) const + { + tbb::parallel_for(ID(0), _num_edges, [&](const HyperedgeID &e) { f(e); }); } // ! Returns a range of the active nodes of the hypergraph - IteratorRange nodes() const { + IteratorRange nodes() const + { return IteratorRange( - HypernodeIterator(_nodes.data(), ID(0), _num_nodes), - HypernodeIterator(_nodes.data() + _num_nodes, _num_nodes, _num_nodes)); + HypernodeIterator(_nodes.data(), ID(0), _num_nodes), + HypernodeIterator(_nodes.data() + _num_nodes, _num_nodes, _num_nodes)); } // ! Returns a range of the active edges of the hypergraph - IteratorRange edges() const { + IteratorRange edges() const + { return IteratorRange( - boost::range_detail::integer_iterator(0), - boost::range_detail::integer_iterator(_num_edges)); + boost::range_detail::integer_iterator(0), + boost::range_detail::integer_iterator(_num_edges)); } // ! Returns a range to loop over the incident nets of hypernode u. - IteratorRange incidentEdges(const HypernodeID u) const { + IteratorRange incidentEdges(const HypernodeID u) const + { return incident_nets_of(u, 0); } // ! Returns a range to loop over the pins of hyperedge e. - IteratorRange pins(const HyperedgeID id) const { - const Edge& e = edge(id); + IteratorRange pins(const HyperedgeID id) const + { + const Edge &e = edge(id); const HypernodeID source = e.source(); const HypernodeID target = e.target(); - return IteratorRange( - IncidenceIterator(source, target, 0), - IncidenceIterator(source, target, 2)); + return IteratorRange(IncidenceIterator(source, target, 0), + IncidenceIterator(source, target, 2)); } - // ####################### Node Information ####################### + // ####################### Node Information ####################### // ! Weight of a vertex - HypernodeWeight nodeWeight(const HypernodeID u) const { - return node(u).weight(); - } + HypernodeWeight nodeWeight(const HypernodeID u) const { return node(u).weight(); } // ! Sets the weight of a vertex - void setNodeWeight(const HypernodeID u, const HypernodeWeight weight) { + void setNodeWeight(const HypernodeID u, const HypernodeWeight weight) + { return node(u).setWeight(weight); } // ! Degree of a hypernode - HyperedgeID nodeDegree(const HypernodeID u) const { + HyperedgeID nodeDegree(const HypernodeID u) const + { return node(u + 1).firstEntry() - node(u).firstEntry(); } // ! Returns whether a hypernode is enabled or not - bool nodeIsEnabled(const HypernodeID u) const { - return !node(u).isDisabled(); - } + bool nodeIsEnabled(const HypernodeID u) const { return !node(u).isDisabled(); } // ! Removes a degree zero hypernode - void removeDegreeZeroHypernode(const HypernodeID u) { + void removeDegreeZeroHypernode(const HypernodeID u) + { ASSERT(nodeDegree(u) == 0); node(u).disable(); ++_num_removed_nodes; } // ! Restores a degree zero hypernode - void restoreDegreeZeroHypernode(const HypernodeID u) { + void restoreDegreeZeroHypernode(const HypernodeID u) + { node(u).enable(); ASSERT(nodeDegree(u) == 0); } @@ -647,26 +620,19 @@ class StaticGraph { // ####################### Hyperedge Information ####################### // ! Target of an edge - HypernodeID edgeTarget(const HyperedgeID e) const { - return edge(e).target(); - } + HypernodeID edgeTarget(const HyperedgeID e) const { return edge(e).target(); } // ! Source of an edge - HypernodeID edgeSource(const HyperedgeID e) const { - return edge(e).source(); - } + HypernodeID edgeSource(const HyperedgeID e) const { return edge(e).source(); } - bool isSinglePin(const HyperedgeID) const { - return false; - } + bool isSinglePin(const HyperedgeID) const { return false; } // ! Weight of a hyperedge - HypernodeWeight edgeWeight(const HyperedgeID e) const { - return edge(e).weight(); - } + HypernodeWeight edgeWeight(const HyperedgeID e) const { return edge(e).weight(); } // ! Unique id of a hyperedge, in the range of [0, initialNumEdges() / 2) - HyperedgeID uniqueEdgeID(const HyperedgeID e) const { + HyperedgeID uniqueEdgeID(const HyperedgeID e) const + { ASSERT(e <= _edges.size(), "Hyperedge" << e << "does not exist"); const HyperedgeID id = _unique_edge_ids[e]; ASSERT(id < initialNumEdges() / 2); @@ -674,84 +640,83 @@ class StaticGraph { } // ! Range of unique id edge ids - HyperedgeID maxUniqueID() const { - return initialNumEdges() / 2; - } + HyperedgeID maxUniqueID() const { return initialNumEdges() / 2; } // ! Sets the weight of a hyperedge - void setEdgeWeight(const HyperedgeID e, const HyperedgeWeight weight) { + void setEdgeWeight(const HyperedgeID e, const HyperedgeWeight weight) + { return edge(e).setWeight(weight); } // ! Number of pins of a hyperedge - HypernodeID edgeSize(const HyperedgeID e) const { + HypernodeID edgeSize(const HyperedgeID e) const + { ASSERT(e <= _edges.size(), "Hyperedge" << e << "does not exist"); unused(e); return 2; } // ! Maximum size of a hyperedge - HypernodeID maxEdgeSize() const { - return 2; - } + HypernodeID maxEdgeSize() const { return 2; } // ! Returns whether a hyperedge is enabled or not - bool edgeIsEnabled(const HyperedgeID) const { - return true; - } + bool edgeIsEnabled(const HyperedgeID) const { return true; } // ! Enables a hyperedge (must be disabled before) - void enableHyperedge(const HyperedgeID) { + void enableHyperedge(const HyperedgeID) + { throw NonSupportedOperationException( - "enableHyperedge() is not supported in static graph"); + "enableHyperedge() is not supported in static graph"); } // ! Community id which hypernode u is assigned to - PartitionID communityID(const HypernodeID u) const { - return _community_ids[u]; - } + PartitionID communityID(const HypernodeID u) const { return _community_ids[u]; } // ! Assign a community to a hypernode - void setCommunityID(const HypernodeID u, const PartitionID community_id) { + void setCommunityID(const HypernodeID u, const PartitionID community_id) + { _community_ids[u] = community_id; } // ####################### Fixed Vertex Support ####################### - void addFixedVertexSupport(FixedVertexSupport&& fixed_vertices) { + void addFixedVertexSupport(FixedVertexSupport &&fixed_vertices) + { _fixed_vertices = std::move(fixed_vertices); _fixed_vertices.setHypergraph(this); } - bool hasFixedVertices() const { - return _fixed_vertices.hasFixedVertices(); - } + bool hasFixedVertices() const { return _fixed_vertices.hasFixedVertices(); } - HypernodeWeight totalFixedVertexWeight() const { + HypernodeWeight totalFixedVertexWeight() const + { return _fixed_vertices.totalFixedVertexWeight(); } - HypernodeWeight fixedVertexBlockWeight(const PartitionID block) const { + HypernodeWeight fixedVertexBlockWeight(const PartitionID block) const + { return _fixed_vertices.fixedVertexBlockWeight(block); } - bool isFixed(const HypernodeID hn) const { - return _fixed_vertices.isFixed(hn); - } + bool isFixed(const HypernodeID hn) const { return _fixed_vertices.isFixed(hn); } - PartitionID fixedVertexBlock(const HypernodeID hn) const { + PartitionID fixedVertexBlock(const HypernodeID hn) const + { return _fixed_vertices.fixedVertexBlock(hn); } - void setMaxFixedVertexBlockWeight(const std::vector max_block_weights) { + void setMaxFixedVertexBlockWeight(const std::vector max_block_weights) + { _fixed_vertices.setMaxBlockWeight(max_block_weights); } - const FixedVertexSupport& fixedVertexSupport() const { + const FixedVertexSupport &fixedVertexSupport() const + { return _fixed_vertices; } - FixedVertexSupport copyOfFixedVertexSupport() const { + FixedVertexSupport copyOfFixedVertexSupport() const + { return _fixed_vertices.copy(); } @@ -766,84 +731,94 @@ class StaticGraph { * * \param communities Community structure that should be contracted */ - StaticGraph contract(parallel::scalable_vector& communities, bool deterministic = false); + StaticGraph contract(parallel::scalable_vector &communities, + bool deterministic = false); - bool registerContraction(const HypernodeID, const HypernodeID) { + bool registerContraction(const HypernodeID, const HypernodeID) + { throw NonSupportedOperationException( - "registerContraction(u, v) is not supported in static graph"); + "registerContraction(u, v) is not supported in static graph"); return false; } - size_t contract(const HypernodeID, - const HypernodeWeight max_node_weight = std::numeric_limits::max()) { + size_t contract(const HypernodeID, const HypernodeWeight max_node_weight = + std::numeric_limits::max()) + { unused(max_node_weight); throw NonSupportedOperationException( - "contract(v, max_node_weight) is not supported in static graph"); + "contract(v, max_node_weight) is not supported in static graph"); return 0; } - void uncontract(const Batch&, - const MarkEdgeFunc& mark_edge, - const UncontractionFunction& case_one_func = NOOP_BATCH_FUNC, - const UncontractionFunction& case_two_func = NOOP_BATCH_FUNC) { + void uncontract(const Batch &, const MarkEdgeFunc &mark_edge, + const UncontractionFunction &case_one_func = NOOP_BATCH_FUNC, + const UncontractionFunction &case_two_func = NOOP_BATCH_FUNC) + { unused(mark_edge); unused(case_one_func); unused(case_two_func); throw NonSupportedOperationException( - "uncontract(batch) is not supported in static graph"); + "uncontract(batch) is not supported in static graph"); } - VersionedBatchVector createBatchUncontractionHierarchy(const size_t) { + VersionedBatchVector createBatchUncontractionHierarchy(const size_t) + { throw NonSupportedOperationException( - "createBatchUncontractionHierarchy(batch_size) is not supported in static graph"); - return { }; + "createBatchUncontractionHierarchy(batch_size) is not supported in static graph"); + return {}; } // ####################### Remove / Restore Hyperedges ####################### /*! - * Removes a hyperedge from the hypergraph. This includes the removal of he from all - * of its pins and to disable the hyperedge. Noze, in contrast to removeEdge, this function - * removes hyperedge from all its pins in parallel. - * - * NOTE, this function is not thread-safe and should only be called in a single-threaded - * setting. - */ - void removeLargeEdge(const HyperedgeID) { + * Removes a hyperedge from the hypergraph. This includes the removal of he from all + * of its pins and to disable the hyperedge. Noze, in contrast to removeEdge, this + * function removes hyperedge from all its pins in parallel. + * + * NOTE, this function is not thread-safe and should only be called in a single-threaded + * setting. + */ + void removeLargeEdge(const HyperedgeID) + { throw NonSupportedOperationException( - "removeLargeEdge() is not supported in static graph"); + "removeLargeEdge() is not supported in static graph"); } /*! * Restores a large hyperedge previously removed from the hypergraph. */ - void restoreLargeEdge(const HyperedgeID&) { + void restoreLargeEdge(const HyperedgeID &) + { throw NonSupportedOperationException( - "restoreLargeEdge() is not supported in static graph"); + "restoreLargeEdge() is not supported in static graph"); } - parallel::scalable_vector removeSinglePinAndParallelHyperedges() { + parallel::scalable_vector removeSinglePinAndParallelHyperedges() + { throw NonSupportedOperationException( - "removeSinglePinAndParallelHyperedges() is not supported in static graph"); - return { }; + "removeSinglePinAndParallelHyperedges() is not supported in static graph"); + return {}; } - void restoreSinglePinAndParallelNets(const parallel::scalable_vector&) { - throw NonSupportedOperationException( - "restoreSinglePinAndParallelNets(hes_to_restore) is not supported in static graph"); + void + restoreSinglePinAndParallelNets(const parallel::scalable_vector &) + { + throw NonSupportedOperationException("restoreSinglePinAndParallelNets(hes_to_" + "restore) is not supported in static graph"); } // ####################### Initialization / Reset Functions ####################### // ! Reset internal community information - void copyCommunityIDs(const parallel::scalable_vector& community_ids) { + void copyCommunityIDs(const parallel::scalable_vector &community_ids) + { ASSERT(community_ids.size() == UI64(_num_nodes)); - doParallelForAllNodes([&](const HypernodeID& hn) { - _community_ids[hn] = community_ids[hn]; - }); + doParallelForAllNodes( + [&](const HypernodeID &hn) { _community_ids[hn] = community_ids[hn]; }); } - void setCommunityIDs(ds::Clustering&& communities) { + void setCommunityIDs(ds::Clustering &&communities) + { ASSERT(communities.size() == initialNumNodes()); _community_ids = std::move(communities); } @@ -855,36 +830,41 @@ class StaticGraph { StaticGraph copy() const; // ! Reset internal data structure - void reset() { } + void reset() {} // ! Free internal data in parallel - void freeInternalData() { - if ( _num_nodes > 0 || _num_edges > 0 ) { + void freeInternalData() + { + if(_num_nodes > 0 || _num_edges > 0) + { freeTmpContractionBuffer(); } _num_nodes = 0; _num_edges = 0; } - void freeTmpContractionBuffer() { - if ( _tmp_contraction_buffer ) { - delete(_tmp_contraction_buffer); + void freeTmpContractionBuffer() + { + if(_tmp_contraction_buffer) + { + delete (_tmp_contraction_buffer); _tmp_contraction_buffer = nullptr; } } - void memoryConsumption(utils::MemoryTreeNode* parent) const; + void memoryConsumption(utils::MemoryTreeNode *parent) const; - // ! Only for testing - bool verifyIncidenceArrayAndIncidentNets() { + // ! Only for testing + bool verifyIncidenceArrayAndIncidentNets() + { throw NonSupportedOperationException( - "verifyIncidenceArrayAndIncidentNets() not supported in static graph"); + "verifyIncidenceArrayAndIncidentNets() not supported in static graph"); return false; } - private: +private: friend class StaticGraphFactory; - template + template friend class CommunitySupport; template friend class PartitionedGraph; @@ -892,44 +872,53 @@ class StaticGraph { // ####################### Node Information ####################### // ! Accessor for node-related information - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE const Node& node(const HypernodeID u) const { + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE const Node &node(const HypernodeID u) const + { ASSERT(u <= _num_nodes, "Node" << u << "does not exist"); return _nodes[u]; } // ! Accessor for node-related information - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE Node& node(const HypernodeID u) { + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE Node &node(const HypernodeID u) + { ASSERT(u <= _num_nodes, "Node" << u << "does not exist"); return _nodes[u]; } - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE IteratorRange incident_nets_of(const HypernodeID u, - const size_t pos = 0) const { + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE IteratorRange + incident_nets_of(const HypernodeID u, const size_t pos = 0) const + { return IteratorRange( - boost::range_detail::integer_iterator(node(u).firstEntry() + pos), - boost::range_detail::integer_iterator(node(u + 1).firstEntry())); + boost::range_detail::integer_iterator(node(u).firstEntry() + pos), + boost::range_detail::integer_iterator(node(u + 1).firstEntry())); } // ####################### Hyperedge Information ####################### // ! Accessor for hyperedge-related information - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE const Edge& edge(const HyperedgeID e) const { + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE const Edge &edge(const HyperedgeID e) const + { ASSERT(e <= _edges.size(), "Hyperedge" << e << "does not exist"); return _edges[e]; } // ! Accessor for hyperedge-related information - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE Edge& edge(const HyperedgeID e) { + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE Edge &edge(const HyperedgeID e) + { ASSERT(e <= _edges.size(), "Hyperedge" << e << "does not exist"); return _edges[e]; } - // ! Helper function for deduplication of temporary edges. Returns the number of remaining edges - static size_t deduplicateTmpEdges(TmpEdgeInformation* edge_start, TmpEdgeInformation* edge_end); + // ! Helper function for deduplication of temporary edges. Returns the number of + // remaining edges + static size_t deduplicateTmpEdges(TmpEdgeInformation *edge_start, + TmpEdgeInformation *edge_end); // ! Allocate the temporary contraction buffer - void allocateTmpContractionBuffer() { - if ( !_tmp_contraction_buffer ) { + void allocateTmpContractionBuffer() + { + if(!_tmp_contraction_buffer) + { _tmp_contraction_buffer = new TmpContractionBuffer(_num_nodes, _num_edges); } } @@ -958,7 +947,7 @@ class StaticGraph { // ! Data that is reused throughout the multilevel hierarchy // ! to contract the hypergraph and to prevent expensive allocations - TmpContractionBuffer* _tmp_contraction_buffer; + TmpContractionBuffer *_tmp_contraction_buffer; }; } // namespace ds diff --git a/mt-kahypar/datastructures/static_graph_factory.cpp b/mt-kahypar/datastructures/static_graph_factory.cpp index 47f62618d..973dd4e23 100644 --- a/mt-kahypar/datastructures/static_graph_factory.cpp +++ b/mt-kahypar/datastructures/static_graph_factory.cpp @@ -33,156 +33,165 @@ #include "mt-kahypar/parallel/parallel_prefix_sum.h" #include "mt-kahypar/parallel/stl/scalable_vector.h" -#include "mt-kahypar/utils/timer.h" #include "mt-kahypar/utils/exception.h" +#include "mt-kahypar/utils/timer.h" namespace mt_kahypar::ds { - void StaticGraphFactory::sort_incident_edges(StaticGraph& graph) { - parallel::scalable_vector edge_ids_of_node; - edge_ids_of_node.resize(graph._edges.size()); - // sort incident edges of each node, so their ordering is independent of scheduling - // (and the same as a typical sequential implementation) - tbb::parallel_for(ID(0), graph._num_nodes, [&](HypernodeID u) { - const HyperedgeID start = graph.node(u).firstEntry(); - const HyperedgeID end = graph.node(u + 1).firstEntry(); - for (HyperedgeID id = 0; id < end - start; ++id) { - edge_ids_of_node[start + id] = id; - } - std::sort(edge_ids_of_node.begin() + start, edge_ids_of_node.begin() + end, [&](HyperedgeID& a, HyperedgeID& b) { - return graph.edge(start + a).target() < graph.edge(start + b).target(); - }); - - // apply permutation - // (yes, this applies the permutation defined by edge_ids_of_node, don't think about it) - for (size_t i = 0; i < end - start; ++i) { - HyperedgeID target = edge_ids_of_node[start + i]; - while (target < i) { - target = edge_ids_of_node[start + target]; - } - std::swap(graph._edges[start + i], graph._edges[start + target]); - std::swap(graph._unique_edge_ids[start + i], graph._unique_edge_ids[start + target]); +void StaticGraphFactory::sort_incident_edges(StaticGraph &graph) +{ + parallel::scalable_vector edge_ids_of_node; + edge_ids_of_node.resize(graph._edges.size()); + // sort incident edges of each node, so their ordering is independent of scheduling + // (and the same as a typical sequential implementation) + tbb::parallel_for(ID(0), graph._num_nodes, [&](HypernodeID u) { + const HyperedgeID start = graph.node(u).firstEntry(); + const HyperedgeID end = graph.node(u + 1).firstEntry(); + for(HyperedgeID id = 0; id < end - start; ++id) + { + edge_ids_of_node[start + id] = id; + } + std::sort(edge_ids_of_node.begin() + start, edge_ids_of_node.begin() + end, + [&](HyperedgeID &a, HyperedgeID &b) { + return graph.edge(start + a).target() < graph.edge(start + b).target(); + }); + + // apply permutation + // (yes, this applies the permutation defined by edge_ids_of_node, don't think about + // it) + for(size_t i = 0; i < end - start; ++i) + { + HyperedgeID target = edge_ids_of_node[start + i]; + while(target < i) + { + target = edge_ids_of_node[start + target]; } - }); - } - - StaticGraph StaticGraphFactory::construct( - const HypernodeID num_nodes, - const HyperedgeID num_edges, - const HyperedgeVector& edge_vector, - const HyperedgeWeight* edge_weight, - const HypernodeWeight* node_weight, - const bool stable_construction_of_incident_edges) { - ASSERT(edge_vector.size() == num_edges); - - EdgeVector edges; - edges.resize(num_edges); - tbb::parallel_for(UL(0), edge_vector.size(), [&](const size_t i) { - const auto& e = edge_vector[i]; - if (e.size() != 2) { - throw InvalidInputException( + std::swap(graph._edges[start + i], graph._edges[start + target]); + std::swap(graph._unique_edge_ids[start + i], + graph._unique_edge_ids[start + target]); + } + }); +} + +StaticGraph StaticGraphFactory::construct( + const HypernodeID num_nodes, const HyperedgeID num_edges, + const HyperedgeVector &edge_vector, const HyperedgeWeight *edge_weight, + const HypernodeWeight *node_weight, const bool stable_construction_of_incident_edges) +{ + ASSERT(edge_vector.size() == num_edges); + + EdgeVector edges; + edges.resize(num_edges); + tbb::parallel_for(UL(0), edge_vector.size(), [&](const size_t i) { + const auto &e = edge_vector[i]; + if(e.size() != 2) + { + throw InvalidInputException( "Using graph data structure; but the input hypergraph is not a graph."); - } - edges[i] = std::make_pair(e[0], e[1]); - }); - return construct_from_graph_edges(num_nodes, num_edges, edges, - edge_weight, node_weight, stable_construction_of_incident_edges); + } + edges[i] = std::make_pair(e[0], e[1]); + }); + return construct_from_graph_edges(num_nodes, num_edges, edges, edge_weight, node_weight, + stable_construction_of_incident_edges); +} + +StaticGraph StaticGraphFactory::construct_from_graph_edges( + const HypernodeID num_nodes, const HyperedgeID num_edges, + const EdgeVector &edge_vector, const HyperedgeWeight *edge_weight, + const HypernodeWeight *node_weight, const bool stable_construction_of_incident_edges) +{ + StaticGraph graph; + graph._num_nodes = num_nodes; + graph._num_edges = 2 * num_edges; + graph._nodes.resize(num_nodes + 1); + graph._edges.resize(2 * num_edges); + graph._unique_edge_ids.resize(2 * num_edges); + + ASSERT(edge_vector.size() == num_edges); + + // Compute degree for each vertex + ThreadLocalCounter local_degree_per_vertex(num_nodes); + tbb::parallel_for(ID(0), num_edges, [&](const size_t pos) { + Counter &num_degree_per_vertex = local_degree_per_vertex.local(); + const HypernodeID pins[2] = { edge_vector[pos].first, edge_vector[pos].second }; + for(const HypernodeID &pin : pins) + { + ASSERT(pin < num_nodes, V(pin) << V(num_nodes)); + ++num_degree_per_vertex[pin]; + } + }); + + // We sum up the degree per vertex only thread local. To obtain the + // global degree, we iterate over each thread local counter and sum it up. + Counter num_degree_per_vertex(num_nodes, 0); + for(Counter &c : local_degree_per_vertex) + { + tbb::parallel_for(ID(0), num_nodes, + [&](const size_t pos) { num_degree_per_vertex[pos] += c[pos]; }); } - StaticGraph StaticGraphFactory::construct_from_graph_edges( - const HypernodeID num_nodes, - const HyperedgeID num_edges, - const EdgeVector& edge_vector, - const HyperedgeWeight* edge_weight, - const HypernodeWeight* node_weight, - const bool stable_construction_of_incident_edges) { - StaticGraph graph; - graph._num_nodes = num_nodes; - graph._num_edges = 2 * num_edges; - graph._nodes.resize(num_nodes + 1); - graph._edges.resize(2 * num_edges); - graph._unique_edge_ids.resize(2 * num_edges); - - ASSERT(edge_vector.size() == num_edges); - - // Compute degree for each vertex - ThreadLocalCounter local_degree_per_vertex(num_nodes); + // Compute prefix sum over the degrees. The prefix sum is used than + // as start position for each node in the edge array. + parallel::TBBPrefixSum degree_prefix_sum(num_degree_per_vertex); + tbb::parallel_scan(tbb::blocked_range(UL(0), UI64(num_nodes)), + degree_prefix_sum); + + ASSERT(degree_prefix_sum.total_sum() == 2 * num_edges); + + AtomicCounter incident_edges_position(num_nodes, + parallel::IntegralAtomicWrapper(0)); + + auto setup_edges = [&] { tbb::parallel_for(ID(0), num_edges, [&](const size_t pos) { - Counter& num_degree_per_vertex = local_degree_per_vertex.local(); - const HypernodeID pins[2] = {edge_vector[pos].first, edge_vector[pos].second}; - for (const HypernodeID& pin : pins) { - ASSERT(pin < num_nodes, V(pin) << V(num_nodes)); - ++num_degree_per_vertex[pin]; + const HypernodeID pin0 = edge_vector[pos].first; + const HyperedgeID incident_edges_pos0 = + degree_prefix_sum[pin0] + incident_edges_position[pin0]++; + ASSERT(incident_edges_pos0 < graph._edges.size()); + StaticGraph::Edge &edge0 = graph._edges[incident_edges_pos0]; + const HypernodeID pin1 = edge_vector[pos].second; + const HyperedgeID incident_edges_pos1 = + degree_prefix_sum[pin1] + incident_edges_position[pin1]++; + ASSERT(incident_edges_pos1 < graph._edges.size()); + StaticGraph::Edge &edge1 = graph._edges[incident_edges_pos1]; + + edge0.setTarget(pin1); + edge0.setSource(pin0); + edge1.setTarget(pin0); + edge1.setSource(pin1); + + graph._unique_edge_ids[incident_edges_pos0] = pos; + graph._unique_edge_ids[incident_edges_pos1] = pos; + + if(edge_weight) + { + edge0.setWeight(edge_weight[pos]); + edge1.setWeight(edge_weight[pos]); } }); + }; + + auto setup_nodes = [&] { + tbb::parallel_for(ID(0), num_nodes, [&](const size_t pos) { + StaticGraph::Node &node = graph._nodes[pos]; + node.enable(); + node.setFirstEntry(degree_prefix_sum[pos]); + if(node_weight) + { + node.setWeight(node_weight[pos]); + } + }); + }; - // We sum up the degree per vertex only thread local. To obtain the - // global degree, we iterate over each thread local counter and sum it up. - Counter num_degree_per_vertex(num_nodes, 0); - for (Counter& c : local_degree_per_vertex) { - tbb::parallel_for(ID(0), num_nodes, [&](const size_t pos) { - num_degree_per_vertex[pos] += c[pos]; - }); - } + auto init_communities = [&] { graph._community_ids.resize(num_nodes, 0); }; - // Compute prefix sum over the degrees. The prefix sum is used than - // as start position for each node in the edge array. - parallel::TBBPrefixSum degree_prefix_sum(num_degree_per_vertex); - tbb::parallel_scan(tbb::blocked_range( UL(0), UI64(num_nodes)), degree_prefix_sum); - - ASSERT(degree_prefix_sum.total_sum() == 2 * num_edges); - - AtomicCounter incident_edges_position(num_nodes, - parallel::IntegralAtomicWrapper(0)); - - auto setup_edges = [&] { - tbb::parallel_for(ID(0), num_edges, [&](const size_t pos) { - const HypernodeID pin0 = edge_vector[pos].first; - const HyperedgeID incident_edges_pos0 = degree_prefix_sum[pin0] + incident_edges_position[pin0]++; - ASSERT(incident_edges_pos0 < graph._edges.size()); - StaticGraph::Edge& edge0 = graph._edges[incident_edges_pos0]; - const HypernodeID pin1 = edge_vector[pos].second; - const HyperedgeID incident_edges_pos1 = degree_prefix_sum[pin1] + incident_edges_position[pin1]++; - ASSERT(incident_edges_pos1 < graph._edges.size()); - StaticGraph::Edge& edge1 = graph._edges[incident_edges_pos1]; - - edge0.setTarget(pin1); - edge0.setSource(pin0); - edge1.setTarget(pin0); - edge1.setSource(pin1); - - graph._unique_edge_ids[incident_edges_pos0] = pos; - graph._unique_edge_ids[incident_edges_pos1] = pos; - - if (edge_weight) { - edge0.setWeight(edge_weight[pos]); - edge1.setWeight(edge_weight[pos]); - } - }); - }; - - auto setup_nodes = [&] { - tbb::parallel_for(ID(0), num_nodes, [&](const size_t pos) { - StaticGraph::Node& node = graph._nodes[pos]; - node.enable(); - node.setFirstEntry(degree_prefix_sum[pos]); - if ( node_weight ) { - node.setWeight(node_weight[pos]); - } - }); - }; - - auto init_communities = [&] { - graph._community_ids.resize(num_nodes, 0); - }; - - tbb::parallel_invoke(setup_edges, setup_nodes, init_communities); - - // Add Sentinel - graph._nodes.back() = StaticGraph::Node(graph._edges.size()); - if (stable_construction_of_incident_edges) { - sort_incident_edges(graph); - } - graph.computeAndSetTotalNodeWeight(parallel_tag_t()); - return graph; + tbb::parallel_invoke(setup_edges, setup_nodes, init_communities); + + // Add Sentinel + graph._nodes.back() = StaticGraph::Node(graph._edges.size()); + if(stable_construction_of_incident_edges) + { + sort_incident_edges(graph); } + graph.computeAndSetTotalNodeWeight(parallel_tag_t()); + return graph; +} } \ No newline at end of file diff --git a/mt-kahypar/datastructures/static_graph_factory.h b/mt-kahypar/datastructures/static_graph_factory.h index 1ecc2a4bd..b6c17fa5f 100644 --- a/mt-kahypar/datastructures/static_graph_factory.h +++ b/mt-kahypar/datastructures/static_graph_factory.h @@ -34,44 +34,46 @@ #include "mt-kahypar/parallel/atomic_wrapper.h" #include "mt-kahypar/utils/exception.h" - namespace mt_kahypar { namespace ds { -class StaticGraphFactory { - using EdgeVector = parallel::scalable_vector>; - using HyperedgeVector = parallel::scalable_vector>; +class StaticGraphFactory +{ + using EdgeVector = parallel::scalable_vector >; + using HyperedgeVector = + parallel::scalable_vector >; using Counter = parallel::scalable_vector; - using AtomicCounter = parallel::scalable_vector>; + using AtomicCounter = + parallel::scalable_vector >; using ThreadLocalCounter = tbb::enumerable_thread_specific; - public: - static StaticGraph construct(const HypernodeID num_nodes, - const HyperedgeID num_edges, - const HyperedgeVector& edge_vector, - const HyperedgeWeight* edge_weight = nullptr, - const HypernodeWeight* node_weight = nullptr, +public: + static StaticGraph construct(const HypernodeID num_nodes, const HyperedgeID num_edges, + const HyperedgeVector &edge_vector, + const HyperedgeWeight *edge_weight = nullptr, + const HypernodeWeight *node_weight = nullptr, const bool stable_construction_of_incident_edges = false); - // ! Provides a more performant construction method by using continuous space for the edges - // ! (instead of a separate vec per edge). - // ! No backwards edges allowed, i.e. each edge is unique - static StaticGraph construct_from_graph_edges(const HypernodeID num_nodes, - const HyperedgeID num_edges, - const EdgeVector& edge_vector, - const HyperedgeWeight* edge_weight = nullptr, - const HypernodeWeight* node_weight = nullptr, - const bool stable_construction_of_incident_edges = false); + // ! Provides a more performant construction method by using continuous space for the + // edges ! (instead of a separate vec per edge). ! No backwards edges allowed, i.e. each + // edge is unique + static StaticGraph + construct_from_graph_edges(const HypernodeID num_nodes, const HyperedgeID num_edges, + const EdgeVector &edge_vector, + const HyperedgeWeight *edge_weight = nullptr, + const HypernodeWeight *node_weight = nullptr, + const bool stable_construction_of_incident_edges = false); - static std::pair > compactify(const StaticGraph&) { - throw NonSupportedOperationException( - "Compactify not implemented for static graph."); + static std::pair > + compactify(const StaticGraph &) + { + throw NonSupportedOperationException("Compactify not implemented for static graph."); } - private: - StaticGraphFactory() { } +private: + StaticGraphFactory() {} - static void sort_incident_edges(StaticGraph& graph); + static void sort_incident_edges(StaticGraph &graph); }; } // namespace ds diff --git a/mt-kahypar/datastructures/static_hypergraph.cpp b/mt-kahypar/datastructures/static_hypergraph.cpp index e230c85fd..0fdd22585 100644 --- a/mt-kahypar/datastructures/static_hypergraph.cpp +++ b/mt-kahypar/datastructures/static_hypergraph.cpp @@ -27,573 +27,653 @@ #include "static_hypergraph.h" -#include "mt-kahypar/parallel/parallel_prefix_sum.h" #include "mt-kahypar/datastructures/concurrent_bucket_map.h" -#include "mt-kahypar/utils/timer.h" +#include "mt-kahypar/parallel/parallel_prefix_sum.h" #include "mt-kahypar/utils/memory_tree.h" +#include "mt-kahypar/utils/timer.h" #include #include namespace mt_kahypar::ds { +/*! + * This struct is used during multilevel coarsening to efficiently + * detect parallel hyperedges. + */ +struct ContractedHyperedgeInformation +{ + HyperedgeID he = kInvalidHyperedge; + size_t hash = kEdgeHashSeed; + size_t size = std::numeric_limits::max(); + bool valid = false; +}; + +/*! + * Contracts a given community structure. All vertices with the same label + * are collapsed into the same vertex. The resulting single-pin and parallel + * hyperedges are removed from the contracted graph. The function returns + * the contracted hypergraph and a mapping which specifies a mapping from + * community label (given in 'communities') to a vertex in the coarse hypergraph. + * + * \param communities Community structure that should be contracted + */ +StaticHypergraph +StaticHypergraph::contract(parallel::scalable_vector &communities, + bool deterministic) +{ + + ASSERT(communities.size() == _num_hypernodes); + + if(!_tmp_contraction_buffer) + { + allocateTmpContractionBuffer(); + } - /*! - * This struct is used during multilevel coarsening to efficiently - * detect parallel hyperedges. - */ - struct ContractedHyperedgeInformation { - HyperedgeID he = kInvalidHyperedge; - size_t hash = kEdgeHashSeed; - size_t size = std::numeric_limits::max(); - bool valid = false; - }; - - /*! - * Contracts a given community structure. All vertices with the same label - * are collapsed into the same vertex. The resulting single-pin and parallel - * hyperedges are removed from the contracted graph. The function returns - * the contracted hypergraph and a mapping which specifies a mapping from - * community label (given in 'communities') to a vertex in the coarse hypergraph. - * - * \param communities Community structure that should be contracted - */ - StaticHypergraph StaticHypergraph::contract(parallel::scalable_vector& communities, bool deterministic) { - - ASSERT(communities.size() == _num_hypernodes); - - if ( !_tmp_contraction_buffer ) { - allocateTmpContractionBuffer(); + // Auxiliary buffers - reused during multilevel hierarchy to prevent expensive + // allocations + Array &mapping = _tmp_contraction_buffer->mapping; + Array &tmp_hypernodes = _tmp_contraction_buffer->tmp_hypernodes; + IncidentNets &tmp_incident_nets = _tmp_contraction_buffer->tmp_incident_nets; + Array > &tmp_num_incident_nets = + _tmp_contraction_buffer->tmp_num_incident_nets; + Array > &hn_weights = + _tmp_contraction_buffer->hn_weights; + Array &tmp_hyperedges = _tmp_contraction_buffer->tmp_hyperedges; + IncidenceArray &tmp_incidence_array = _tmp_contraction_buffer->tmp_incidence_array; + Array &he_sizes = _tmp_contraction_buffer->he_sizes; + Array &valid_hyperedges = _tmp_contraction_buffer->valid_hyperedges; + + ASSERT(static_cast(_num_hypernodes) <= mapping.size()); + ASSERT(static_cast(_num_hypernodes) <= tmp_hypernodes.size()); + ASSERT(static_cast(_total_degree) <= tmp_incident_nets.size()); + ASSERT(static_cast(_num_hypernodes) <= tmp_num_incident_nets.size()); + ASSERT(static_cast(_num_hypernodes) <= hn_weights.size()); + ASSERT(static_cast(_num_hyperedges) <= tmp_hyperedges.size()); + ASSERT(static_cast(_num_pins) <= tmp_incidence_array.size()); + ASSERT(static_cast(_num_hyperedges) <= he_sizes.size()); + ASSERT(static_cast(_num_hyperedges) <= valid_hyperedges.size()); + + // #################### STAGE 1 #################### + // Compute vertex ids of coarse hypergraph with a parallel prefix sum + mapping.assign(_num_hypernodes, 0); + + doParallelForAllNodes([&](const HypernodeID &hn) { + ASSERT(static_cast(communities[hn]) < mapping.size()); + mapping[communities[hn]] = UL(1); + }); + + // Prefix sum determines vertex ids in coarse hypergraph + parallel::TBBPrefixSum mapping_prefix_sum(mapping); + tbb::parallel_scan(tbb::blocked_range(UL(0), _num_hypernodes), + mapping_prefix_sum); + HypernodeID num_hypernodes = mapping_prefix_sum.total_sum(); + + // Remap community ids + tbb::parallel_for(ID(0), _num_hypernodes, [&](const HypernodeID &hn) { + if(nodeIsEnabled(hn)) + { + communities[hn] = mapping_prefix_sum[communities[hn]]; + } + else + { + communities[hn] = kInvalidHypernode; } - // Auxiliary buffers - reused during multilevel hierarchy to prevent expensive allocations - Array& mapping = _tmp_contraction_buffer->mapping; - Array& tmp_hypernodes = _tmp_contraction_buffer->tmp_hypernodes; - IncidentNets& tmp_incident_nets = _tmp_contraction_buffer->tmp_incident_nets; - Array>& tmp_num_incident_nets = - _tmp_contraction_buffer->tmp_num_incident_nets; - Array>& hn_weights = - _tmp_contraction_buffer->hn_weights; - Array& tmp_hyperedges = _tmp_contraction_buffer->tmp_hyperedges; - IncidenceArray& tmp_incidence_array = _tmp_contraction_buffer->tmp_incidence_array; - Array& he_sizes = _tmp_contraction_buffer->he_sizes; - Array& valid_hyperedges = _tmp_contraction_buffer->valid_hyperedges; - - ASSERT(static_cast(_num_hypernodes) <= mapping.size()); - ASSERT(static_cast(_num_hypernodes) <= tmp_hypernodes.size()); - ASSERT(static_cast(_total_degree) <= tmp_incident_nets.size()); - ASSERT(static_cast(_num_hypernodes) <= tmp_num_incident_nets.size()); - ASSERT(static_cast(_num_hypernodes) <= hn_weights.size()); - ASSERT(static_cast(_num_hyperedges) <= tmp_hyperedges.size()); - ASSERT(static_cast(_num_pins) <= tmp_incidence_array.size()); - ASSERT(static_cast(_num_hyperedges) <= he_sizes.size()); - ASSERT(static_cast(_num_hyperedges) <= valid_hyperedges.size()); - - - // #################### STAGE 1 #################### - // Compute vertex ids of coarse hypergraph with a parallel prefix sum - mapping.assign(_num_hypernodes, 0); - - doParallelForAllNodes([&](const HypernodeID& hn) { - ASSERT(static_cast(communities[hn]) < mapping.size()); - mapping[communities[hn]] = UL(1); - }); - - // Prefix sum determines vertex ids in coarse hypergraph - parallel::TBBPrefixSum mapping_prefix_sum(mapping); - tbb::parallel_scan(tbb::blocked_range(UL(0), _num_hypernodes), mapping_prefix_sum); - HypernodeID num_hypernodes = mapping_prefix_sum.total_sum(); - - // Remap community ids - tbb::parallel_for(ID(0), _num_hypernodes, [&](const HypernodeID& hn) { - if ( nodeIsEnabled(hn) ) { - communities[hn] = mapping_prefix_sum[communities[hn]]; - } else { - communities[hn] = kInvalidHypernode; - } - - // Reset tmp contraction buffer - if ( hn < num_hypernodes ) { - hn_weights[hn] = 0; - tmp_hypernodes[hn] = Hypernode(true); - tmp_num_incident_nets[hn] = 0; - } - }); - - // Mapping from a vertex id of the current hypergraph to its - // id in the coarse hypergraph - auto map_to_coarse_hypergraph = [&](const HypernodeID hn) { - ASSERT(hn < communities.size()); - return communities[hn]; - }; - - doParallelForAllNodes([&](const HypernodeID& hn) { - const HypernodeID coarse_hn = map_to_coarse_hypergraph(hn); - ASSERT(coarse_hn < num_hypernodes, V(coarse_hn) << V(num_hypernodes)); - // Weight vector is atomic => thread-safe - hn_weights[coarse_hn] += nodeWeight(hn); - // Aggregate upper bound for number of incident nets of the contracted vertex - tmp_num_incident_nets[coarse_hn] += nodeDegree(hn); - }); - - // #################### STAGE 2 #################### - // In this step hyperedges and incident nets of vertices are contracted inside the temporary - // buffers. The vertex ids of pins are already remapped to the vertex ids in the coarse - // graph and duplicates are removed. Also nets that become single-pin hyperedges are marked - // as invalid. All incident nets of vertices that are collapsed into one vertex in the coarse - // graph are also aggregate in a consecutive memory range and duplicates are removed. Note - // that parallel and single-pin hyperedges are not removed from the incident nets (will be done - // in a postprocessing step). - auto cs2 = [](const HypernodeID x) { return x * x; }; - ConcurrentBucketMap hyperedge_hash_map; - hyperedge_hash_map.reserve_for_estimated_number_of_insertions(_num_hyperedges); - tbb::parallel_invoke([&] { - // Contract Hyperedges - tbb::parallel_for(ID(0), _num_hyperedges, [&](const HyperedgeID& he) { - if ( edgeIsEnabled(he) ) { - // Copy hyperedge and pins to temporary buffer - const Hyperedge& e = _hyperedges[he]; - ASSERT(static_cast(he) < tmp_hyperedges.size()); - ASSERT(e.firstInvalidEntry() <= tmp_incidence_array.size()); - tmp_hyperedges[he] = e; - valid_hyperedges[he] = 1; - - // Map pins to vertex ids in coarse graph - const size_t incidence_array_start = tmp_hyperedges[he].firstEntry(); - const size_t incidence_array_end = tmp_hyperedges[he].firstInvalidEntry(); - for ( size_t pos = incidence_array_start; pos < incidence_array_end; ++pos ) { - const HypernodeID pin = _incidence_array[pos]; - ASSERT(pos < tmp_incidence_array.size()); - tmp_incidence_array[pos] = map_to_coarse_hypergraph(pin); - } + // Reset tmp contraction buffer + if(hn < num_hypernodes) + { + hn_weights[hn] = 0; + tmp_hypernodes[hn] = Hypernode(true); + tmp_num_incident_nets[hn] = 0; + } + }); - // Remove duplicates and disabled vertices - auto first_entry_it = tmp_incidence_array.begin() + incidence_array_start; - std::sort(first_entry_it, tmp_incidence_array.begin() + incidence_array_end); - auto first_invalid_entry_it = std::unique(first_entry_it, tmp_incidence_array.begin() + incidence_array_end); - while ( first_entry_it != first_invalid_entry_it && *(first_invalid_entry_it - 1) == kInvalidHypernode ) { - --first_invalid_entry_it; - } + // Mapping from a vertex id of the current hypergraph to its + // id in the coarse hypergraph + auto map_to_coarse_hypergraph = [&](const HypernodeID hn) { + ASSERT(hn < communities.size()); + return communities[hn]; + }; - // Update size of hyperedge in temporary hyperedge buffer - const size_t contracted_size = std::distance( - tmp_incidence_array.begin() + incidence_array_start, first_invalid_entry_it); - tmp_hyperedges[he].setSize(contracted_size); + doParallelForAllNodes([&](const HypernodeID &hn) { + const HypernodeID coarse_hn = map_to_coarse_hypergraph(hn); + ASSERT(coarse_hn < num_hypernodes, V(coarse_hn) << V(num_hypernodes)); + // Weight vector is atomic => thread-safe + hn_weights[coarse_hn] += nodeWeight(hn); + // Aggregate upper bound for number of incident nets of the contracted vertex + tmp_num_incident_nets[coarse_hn] += nodeDegree(hn); + }); + + // #################### STAGE 2 #################### + // In this step hyperedges and incident nets of vertices are contracted inside the + // temporary buffers. The vertex ids of pins are already remapped to the vertex ids in + // the coarse graph and duplicates are removed. Also nets that become single-pin + // hyperedges are marked as invalid. All incident nets of vertices that are collapsed + // into one vertex in the coarse graph are also aggregate in a consecutive memory range + // and duplicates are removed. Note that parallel and single-pin hyperedges are not + // removed from the incident nets (will be done in a postprocessing step). + auto cs2 = [](const HypernodeID x) { return x * x; }; + ConcurrentBucketMap hyperedge_hash_map; + hyperedge_hash_map.reserve_for_estimated_number_of_insertions(_num_hyperedges); + tbb::parallel_invoke( + [&] { + // Contract Hyperedges + tbb::parallel_for(ID(0), _num_hyperedges, [&](const HyperedgeID &he) { + if(edgeIsEnabled(he)) + { + // Copy hyperedge and pins to temporary buffer + const Hyperedge &e = _hyperedges[he]; + ASSERT(static_cast(he) < tmp_hyperedges.size()); + ASSERT(e.firstInvalidEntry() <= tmp_incidence_array.size()); + tmp_hyperedges[he] = e; + valid_hyperedges[he] = 1; + + // Map pins to vertex ids in coarse graph + const size_t incidence_array_start = tmp_hyperedges[he].firstEntry(); + const size_t incidence_array_end = tmp_hyperedges[he].firstInvalidEntry(); + for(size_t pos = incidence_array_start; pos < incidence_array_end; ++pos) + { + const HypernodeID pin = _incidence_array[pos]; + ASSERT(pos < tmp_incidence_array.size()); + tmp_incidence_array[pos] = map_to_coarse_hypergraph(pin); + } + // Remove duplicates and disabled vertices + auto first_entry_it = tmp_incidence_array.begin() + incidence_array_start; + std::sort(first_entry_it, tmp_incidence_array.begin() + incidence_array_end); + auto first_invalid_entry_it = std::unique( + first_entry_it, tmp_incidence_array.begin() + incidence_array_end); + while(first_entry_it != first_invalid_entry_it && + *(first_invalid_entry_it - 1) == kInvalidHypernode) + { + --first_invalid_entry_it; + } - if ( contracted_size > 1 ) { - // Compute hash of contracted hyperedge - size_t footprint = kEdgeHashSeed; - for ( size_t pos = incidence_array_start; pos < incidence_array_start + contracted_size; ++pos ) { - footprint += cs2(tmp_incidence_array[pos]); + // Update size of hyperedge in temporary hyperedge buffer + const size_t contracted_size = + std::distance(tmp_incidence_array.begin() + incidence_array_start, + first_invalid_entry_it); + tmp_hyperedges[he].setSize(contracted_size); + + if(contracted_size > 1) + { + // Compute hash of contracted hyperedge + size_t footprint = kEdgeHashSeed; + for(size_t pos = incidence_array_start; + pos < incidence_array_start + contracted_size; ++pos) + { + footprint += cs2(tmp_incidence_array[pos]); + } + hyperedge_hash_map.insert( + footprint, + ContractedHyperedgeInformation{ he, footprint, contracted_size, true }); } - hyperedge_hash_map.insert(footprint, - ContractedHyperedgeInformation{ he, footprint, contracted_size, true }); - } else { - // Hyperedge becomes a single-pin hyperedge + else + { + // Hyperedge becomes a single-pin hyperedge + valid_hyperedges[he] = 0; + tmp_hyperedges[he].disable(); + } + } + else + { valid_hyperedges[he] = 0; - tmp_hyperedges[he].disable(); } - } else { - valid_hyperedges[he] = 0; - } - }); - }, [&] { - // Contract Incident Nets - // Compute start position the incident nets of a coarse vertex in the - // temporary incident nets array with a parallel prefix sum - parallel::scalable_vector> tmp_incident_nets_pos; - parallel::TBBPrefixSum, Array> - tmp_incident_nets_prefix_sum(tmp_num_incident_nets); - tbb::parallel_invoke([&] { - tbb::parallel_scan(tbb::blocked_range( - UL(0), UI64(num_hypernodes)), tmp_incident_nets_prefix_sum); - }, [&] { - tmp_incident_nets_pos.assign(num_hypernodes, parallel::IntegralAtomicWrapper(0)); - }); - - // Write the incident nets of each contracted vertex to the temporary incident net array - doParallelForAllNodes([&](const HypernodeID& hn) { - const HypernodeID coarse_hn = map_to_coarse_hypergraph(hn); - const HyperedgeID node_degree = nodeDegree(hn); - size_t incident_nets_pos = tmp_incident_nets_prefix_sum[coarse_hn] + - tmp_incident_nets_pos[coarse_hn].fetch_add(node_degree); - ASSERT(incident_nets_pos + node_degree <= tmp_incident_nets_prefix_sum[coarse_hn + 1]); - memcpy(tmp_incident_nets.data() + incident_nets_pos, - _incident_nets.data() + _hypernodes[hn].firstEntry(), - sizeof(HyperedgeID) * node_degree); - }); - - // Setup temporary hypernodes - std::mutex high_degree_vertex_mutex; - parallel::scalable_vector high_degree_vertices; - tbb::parallel_for(ID(0), num_hypernodes, [&](const HypernodeID& coarse_hn) { - // Remove duplicates - const size_t incident_nets_start = tmp_incident_nets_prefix_sum[coarse_hn]; - const size_t incident_nets_end = tmp_incident_nets_prefix_sum[coarse_hn + 1]; - const size_t tmp_degree = incident_nets_end - incident_nets_start; - if ( tmp_degree <= HIGH_DEGREE_CONTRACTION_THRESHOLD ) { - std::sort(tmp_incident_nets.begin() + incident_nets_start, - tmp_incident_nets.begin() + incident_nets_end); - auto first_invalid_entry_it = std::unique(tmp_incident_nets.begin() + incident_nets_start, - tmp_incident_nets.begin() + incident_nets_end); - - // Setup pointers to temporary incident nets - const size_t contracted_size = std::distance(tmp_incident_nets.begin() + incident_nets_start, - first_invalid_entry_it); - tmp_hypernodes[coarse_hn].setSize(contracted_size); - } else { - std::lock_guard lock(high_degree_vertex_mutex); - high_degree_vertices.push_back(coarse_hn); - } - tmp_hypernodes[coarse_hn].setWeight(hn_weights[coarse_hn]); - tmp_hypernodes[coarse_hn].setFirstEntry(incident_nets_start); - }); + }); + }, + [&] { + // Contract Incident Nets + // Compute start position the incident nets of a coarse vertex in the + // temporary incident nets array with a parallel prefix sum + parallel::scalable_vector > + tmp_incident_nets_pos; + parallel::TBBPrefixSum, Array> + tmp_incident_nets_prefix_sum(tmp_num_incident_nets); + tbb::parallel_invoke( + [&] { + tbb::parallel_scan(tbb::blocked_range(UL(0), UI64(num_hypernodes)), + tmp_incident_nets_prefix_sum); + }, + [&] { + tmp_incident_nets_pos.assign(num_hypernodes, + parallel::IntegralAtomicWrapper(0)); + }); + + // Write the incident nets of each contracted vertex to the temporary incident net + // array + doParallelForAllNodes([&](const HypernodeID &hn) { + const HypernodeID coarse_hn = map_to_coarse_hypergraph(hn); + const HyperedgeID node_degree = nodeDegree(hn); + size_t incident_nets_pos = + tmp_incident_nets_prefix_sum[coarse_hn] + + tmp_incident_nets_pos[coarse_hn].fetch_add(node_degree); + ASSERT(incident_nets_pos + node_degree <= + tmp_incident_nets_prefix_sum[coarse_hn + 1]); + memcpy(tmp_incident_nets.data() + incident_nets_pos, + _incident_nets.data() + _hypernodes[hn].firstEntry(), + sizeof(HyperedgeID) * node_degree); + }); - if ( !high_degree_vertices.empty() ) { - // High degree vertices are treated special, because sorting and afterwards - // removing duplicates can become a major sequential bottleneck. Therefore, - // we distribute the incident nets of a high degree vertex into our concurrent - // bucket map. As a result all equal incident nets reside in the same bucket - // afterwards. In a second step, we process each bucket in parallel and apply - // for each bucket the duplicate removal procedure from above. - ConcurrentBucketMap duplicate_incident_nets_map; - for ( const HypernodeID& coarse_hn : high_degree_vertices ) { + // Setup temporary hypernodes + std::mutex high_degree_vertex_mutex; + parallel::scalable_vector high_degree_vertices; + tbb::parallel_for(ID(0), num_hypernodes, [&](const HypernodeID &coarse_hn) { + // Remove duplicates const size_t incident_nets_start = tmp_incident_nets_prefix_sum[coarse_hn]; const size_t incident_nets_end = tmp_incident_nets_prefix_sum[coarse_hn + 1]; const size_t tmp_degree = incident_nets_end - incident_nets_start; + if(tmp_degree <= HIGH_DEGREE_CONTRACTION_THRESHOLD) + { + std::sort(tmp_incident_nets.begin() + incident_nets_start, + tmp_incident_nets.begin() + incident_nets_end); + auto first_invalid_entry_it = + std::unique(tmp_incident_nets.begin() + incident_nets_start, + tmp_incident_nets.begin() + incident_nets_end); + + // Setup pointers to temporary incident nets + const size_t contracted_size = std::distance( + tmp_incident_nets.begin() + incident_nets_start, first_invalid_entry_it); + tmp_hypernodes[coarse_hn].setSize(contracted_size); + } + else + { + std::lock_guard lock(high_degree_vertex_mutex); + high_degree_vertices.push_back(coarse_hn); + } + tmp_hypernodes[coarse_hn].setWeight(hn_weights[coarse_hn]); + tmp_hypernodes[coarse_hn].setFirstEntry(incident_nets_start); + }); - // Insert incident nets into concurrent bucket map - duplicate_incident_nets_map.reserve_for_estimated_number_of_insertions(tmp_degree); - tbb::parallel_for(incident_nets_start, incident_nets_end, [&](const size_t pos) { - HyperedgeID he = tmp_incident_nets[pos]; - duplicate_incident_nets_map.insert(he, std::move(he)); - }); - - // Process each bucket in parallel and remove duplicates - std::atomic incident_nets_pos(incident_nets_start); - tbb::parallel_for(UL(0), duplicate_incident_nets_map.numBuckets(), [&](const size_t bucket) { - auto& incident_net_bucket = duplicate_incident_nets_map.getBucket(bucket); - std::sort(incident_net_bucket.begin(), incident_net_bucket.end()); - auto first_invalid_entry_it = std::unique(incident_net_bucket.begin(), incident_net_bucket.end()); - const size_t bucket_degree = std::distance(incident_net_bucket.begin(), first_invalid_entry_it); - const size_t tmp_incident_nets_pos = incident_nets_pos.fetch_add(bucket_degree); - memcpy(tmp_incident_nets.data() + tmp_incident_nets_pos, - incident_net_bucket.data(), sizeof(HyperedgeID) * bucket_degree); - duplicate_incident_nets_map.clear(bucket); - }); + if(!high_degree_vertices.empty()) + { + // High degree vertices are treated special, because sorting and afterwards + // removing duplicates can become a major sequential bottleneck. Therefore, + // we distribute the incident nets of a high degree vertex into our concurrent + // bucket map. As a result all equal incident nets reside in the same bucket + // afterwards. In a second step, we process each bucket in parallel and apply + // for each bucket the duplicate removal procedure from above. + ConcurrentBucketMap duplicate_incident_nets_map; + for(const HypernodeID &coarse_hn : high_degree_vertices) + { + const size_t incident_nets_start = tmp_incident_nets_prefix_sum[coarse_hn]; + const size_t incident_nets_end = tmp_incident_nets_prefix_sum[coarse_hn + 1]; + const size_t tmp_degree = incident_nets_end - incident_nets_start; + + // Insert incident nets into concurrent bucket map + duplicate_incident_nets_map.reserve_for_estimated_number_of_insertions( + tmp_degree); + tbb::parallel_for(incident_nets_start, incident_nets_end, + [&](const size_t pos) { + HyperedgeID he = tmp_incident_nets[pos]; + duplicate_incident_nets_map.insert(he, std::move(he)); + }); + + // Process each bucket in parallel and remove duplicates + std::atomic incident_nets_pos(incident_nets_start); + tbb::parallel_for( + UL(0), duplicate_incident_nets_map.numBuckets(), + [&](const size_t bucket) { + auto &incident_net_bucket = + duplicate_incident_nets_map.getBucket(bucket); + std::sort(incident_net_bucket.begin(), incident_net_bucket.end()); + auto first_invalid_entry_it = + std::unique(incident_net_bucket.begin(), incident_net_bucket.end()); + const size_t bucket_degree = + std::distance(incident_net_bucket.begin(), first_invalid_entry_it); + const size_t tmp_incident_nets_pos = + incident_nets_pos.fetch_add(bucket_degree); + memcpy(tmp_incident_nets.data() + tmp_incident_nets_pos, + incident_net_bucket.data(), sizeof(HyperedgeID) * bucket_degree); + duplicate_incident_nets_map.clear(bucket); + }); - // Update number of incident nets of high degree vertex - const size_t contracted_size = incident_nets_pos.load() - incident_nets_start; - tmp_hypernodes[coarse_hn].setSize(contracted_size); + // Update number of incident nets of high degree vertex + const size_t contracted_size = incident_nets_pos.load() - incident_nets_start; + tmp_hypernodes[coarse_hn].setSize(contracted_size); - if (deterministic) { - // sort for determinism - tbb::parallel_sort(tmp_incident_nets.begin() + incident_nets_start, - tmp_incident_nets.begin() + incident_nets_start + contracted_size); + if(deterministic) + { + // sort for determinism + tbb::parallel_sort(tmp_incident_nets.begin() + incident_nets_start, + tmp_incident_nets.begin() + incident_nets_start + + contracted_size); + } } + duplicate_incident_nets_map.free(); } - duplicate_incident_nets_map.free(); - } - }); + }); - // #################### STAGE 3 #################### - // In the step before we aggregated hyperedges within a bucket data structure. - // Hyperedges with the same hash/footprint are stored inside the same bucket. - // We iterate now in parallel over each bucket and sort each bucket - // after its hash. A bucket is processed by one thread and parallel - // hyperedges are detected by comparing the pins of hyperedges with - // the same hash. - - // Helper function that checks if two hyperedges are parallel - // Note, pins inside the hyperedges are sorted. - auto check_if_hyperedges_are_parallel = [&](const HyperedgeID lhs, - const HyperedgeID rhs) { - const Hyperedge& lhs_he = tmp_hyperedges[lhs]; - const Hyperedge& rhs_he = tmp_hyperedges[rhs]; - if ( lhs_he.size() == rhs_he.size() ) { - const size_t lhs_start = lhs_he.firstEntry(); - const size_t rhs_start = rhs_he.firstEntry(); - for ( size_t i = 0; i < lhs_he.size(); ++i ) { - const size_t lhs_pos = lhs_start + i; - const size_t rhs_pos = rhs_start + i; - if ( tmp_incidence_array[lhs_pos] != tmp_incidence_array[rhs_pos] ) { - return false; - } + // #################### STAGE 3 #################### + // In the step before we aggregated hyperedges within a bucket data structure. + // Hyperedges with the same hash/footprint are stored inside the same bucket. + // We iterate now in parallel over each bucket and sort each bucket + // after its hash. A bucket is processed by one thread and parallel + // hyperedges are detected by comparing the pins of hyperedges with + // the same hash. + + // Helper function that checks if two hyperedges are parallel + // Note, pins inside the hyperedges are sorted. + auto check_if_hyperedges_are_parallel = [&](const HyperedgeID lhs, + const HyperedgeID rhs) { + const Hyperedge &lhs_he = tmp_hyperedges[lhs]; + const Hyperedge &rhs_he = tmp_hyperedges[rhs]; + if(lhs_he.size() == rhs_he.size()) + { + const size_t lhs_start = lhs_he.firstEntry(); + const size_t rhs_start = rhs_he.firstEntry(); + for(size_t i = 0; i < lhs_he.size(); ++i) + { + const size_t lhs_pos = lhs_start + i; + const size_t rhs_pos = rhs_start + i; + if(tmp_incidence_array[lhs_pos] != tmp_incidence_array[rhs_pos]) + { + return false; } - return true; - } else { - return false; } - }; + return true; + } + else + { + return false; + } + }; - tbb::parallel_for(UL(0), hyperedge_hash_map.numBuckets(), [&](const size_t bucket) { - auto& hyperedge_bucket = hyperedge_hash_map.getBucket(bucket); - std::sort(hyperedge_bucket.begin(), hyperedge_bucket.end(), - [&](const ContractedHyperedgeInformation& lhs, const ContractedHyperedgeInformation& rhs) { - return std::tie(lhs.hash, lhs.size, lhs.he) < std::tie(rhs.hash, rhs.size, rhs.he); - }); + tbb::parallel_for(UL(0), hyperedge_hash_map.numBuckets(), [&](const size_t bucket) { + auto &hyperedge_bucket = hyperedge_hash_map.getBucket(bucket); + std::sort(hyperedge_bucket.begin(), hyperedge_bucket.end(), + [&](const ContractedHyperedgeInformation &lhs, + const ContractedHyperedgeInformation &rhs) { + return std::tie(lhs.hash, lhs.size, lhs.he) < + std::tie(rhs.hash, rhs.size, rhs.he); + }); - // Parallel Hyperedge Detection - for ( size_t i = 0; i < hyperedge_bucket.size(); ++i ) { - ContractedHyperedgeInformation& contracted_he_lhs = hyperedge_bucket[i]; - if ( contracted_he_lhs.valid ) { - const HyperedgeID lhs_he = contracted_he_lhs.he; - HyperedgeWeight lhs_weight = tmp_hyperedges[lhs_he].weight(); - for ( size_t j = i + 1; j < hyperedge_bucket.size(); ++j ) { - ContractedHyperedgeInformation& contracted_he_rhs = hyperedge_bucket[j]; - const HyperedgeID rhs_he = contracted_he_rhs.he; - if ( contracted_he_rhs.valid && - contracted_he_lhs.hash == contracted_he_rhs.hash && - check_if_hyperedges_are_parallel(lhs_he, rhs_he) ) { - // Hyperedges are parallel - lhs_weight += tmp_hyperedges[rhs_he].weight(); - contracted_he_rhs.valid = false; - valid_hyperedges[rhs_he] = false; - } else if ( contracted_he_lhs.hash != contracted_he_rhs.hash ) { - // In case, hash of both are not equal we go to the next hyperedge - // because we compared it with all hyperedges that had an equal hash - break; - } + // Parallel Hyperedge Detection + for(size_t i = 0; i < hyperedge_bucket.size(); ++i) + { + ContractedHyperedgeInformation &contracted_he_lhs = hyperedge_bucket[i]; + if(contracted_he_lhs.valid) + { + const HyperedgeID lhs_he = contracted_he_lhs.he; + HyperedgeWeight lhs_weight = tmp_hyperedges[lhs_he].weight(); + for(size_t j = i + 1; j < hyperedge_bucket.size(); ++j) + { + ContractedHyperedgeInformation &contracted_he_rhs = hyperedge_bucket[j]; + const HyperedgeID rhs_he = contracted_he_rhs.he; + if(contracted_he_rhs.valid && + contracted_he_lhs.hash == contracted_he_rhs.hash && + check_if_hyperedges_are_parallel(lhs_he, rhs_he)) + { + // Hyperedges are parallel + lhs_weight += tmp_hyperedges[rhs_he].weight(); + contracted_he_rhs.valid = false; + valid_hyperedges[rhs_he] = false; + } + else if(contracted_he_lhs.hash != contracted_he_rhs.hash) + { + // In case, hash of both are not equal we go to the next hyperedge + // because we compared it with all hyperedges that had an equal hash + break; } - tmp_hyperedges[lhs_he].setWeight(lhs_weight); } + tmp_hyperedges[lhs_he].setWeight(lhs_weight); } - hyperedge_hash_map.free(bucket); - }); - - // #################### STAGE 4 #################### - // Coarsened hypergraph is constructed here by writting data from temporary - // buffers to corresponding members in coarsened hypergraph. For the - // incidence array, we compute a prefix sum over the hyperedge sizes in - // the contracted hypergraph which determines the start position of the pins - // of each net in the incidence array. Furthermore, we postprocess the incident - // nets of each vertex by removing invalid hyperedges and remapping hyperedge ids. - // Incident nets are also written to the incident nets array with the help of a prefix - // sum over the node degrees. - - StaticHypergraph hypergraph; - - // Compute number of hyperedges in coarse graph (those flagged as valid) - parallel::TBBPrefixSum he_mapping(valid_hyperedges); - tbb::parallel_invoke([&] { - tbb::parallel_scan(tbb::blocked_range(size_t(0), size_t(_num_hyperedges)), he_mapping); - }, [&] { - hypergraph._hypernodes.resize(num_hypernodes); + } + hyperedge_hash_map.free(bucket); + }); + + // #################### STAGE 4 #################### + // Coarsened hypergraph is constructed here by writting data from temporary + // buffers to corresponding members in coarsened hypergraph. For the + // incidence array, we compute a prefix sum over the hyperedge sizes in + // the contracted hypergraph which determines the start position of the pins + // of each net in the incidence array. Furthermore, we postprocess the incident + // nets of each vertex by removing invalid hyperedges and remapping hyperedge ids. + // Incident nets are also written to the incident nets array with the help of a prefix + // sum over the node degrees. + + StaticHypergraph hypergraph; + + // Compute number of hyperedges in coarse graph (those flagged as valid) + parallel::TBBPrefixSum he_mapping(valid_hyperedges); + tbb::parallel_invoke( + [&] { + tbb::parallel_scan(tbb::blocked_range(size_t(0), size_t(_num_hyperedges)), + he_mapping); + }, + [&] { hypergraph._hypernodes.resize(num_hypernodes); }); + + const HyperedgeID num_hyperedges = he_mapping.total_sum(); + hypergraph._num_hypernodes = num_hypernodes; + hypergraph._num_hyperedges = num_hyperedges; + + auto assign_communities = [&] { + hypergraph._community_ids.resize(num_hypernodes, 0); + doParallelForAllNodes([&](HypernodeID fine_hn) { + hypergraph.setCommunityID(map_to_coarse_hypergraph(fine_hn), communityID(fine_hn)); }); + }; - const HyperedgeID num_hyperedges = he_mapping.total_sum(); - hypergraph._num_hypernodes = num_hypernodes; - hypergraph._num_hyperedges = num_hyperedges; - - auto assign_communities = [&] { - hypergraph._community_ids.resize(num_hypernodes, 0); - doParallelForAllNodes([&](HypernodeID fine_hn) { - hypergraph.setCommunityID(map_to_coarse_hypergraph(fine_hn), communityID(fine_hn)); - }); - }; - - auto setup_hyperedges = [&] { - // Compute start position of each hyperedge in incidence array - parallel::TBBPrefixSum num_pins_prefix_sum(he_sizes); - tbb::parallel_invoke([&] { - tbb::parallel_for(HyperedgeID(0), _num_hyperedges, [&](HyperedgeID id) { - if ( he_mapping.value(id) ) { - he_sizes[id] = tmp_hyperedges[id].size(); - } else { - he_sizes[id] = 0; - } - }); - - tbb::parallel_scan(tbb::blocked_range(UL(0), UI64(_num_hyperedges)), num_pins_prefix_sum); + auto setup_hyperedges = [&] { + // Compute start position of each hyperedge in incidence array + parallel::TBBPrefixSum num_pins_prefix_sum(he_sizes); + tbb::parallel_invoke( + [&] { + tbb::parallel_for(HyperedgeID(0), _num_hyperedges, [&](HyperedgeID id) { + if(he_mapping.value(id)) + { + he_sizes[id] = tmp_hyperedges[id].size(); + } + else + { + he_sizes[id] = 0; + } + }); - const size_t num_pins = num_pins_prefix_sum.total_sum(); - hypergraph._num_pins = num_pins; - hypergraph._incidence_array.resize(num_pins); - }, [&] { - hypergraph._hyperedges.resize(num_hyperedges); - }); + tbb::parallel_scan(tbb::blocked_range(UL(0), UI64(_num_hyperedges)), + num_pins_prefix_sum); + + const size_t num_pins = num_pins_prefix_sum.total_sum(); + hypergraph._num_pins = num_pins; + hypergraph._incidence_array.resize(num_pins); + }, + [&] { hypergraph._hyperedges.resize(num_hyperedges); }); + + // Write hyperedges from temporary buffers to incidence array + tbb::enumerable_thread_specific local_max_edge_size(UL(0)); + tbb::parallel_for(ID(0), _num_hyperedges, [&](const HyperedgeID &id) { + if(he_mapping.value(id) > 0 /* hyperedge is valid */) + { + const size_t he_pos = he_mapping[id]; + const size_t incidence_array_start = num_pins_prefix_sum[id]; + Hyperedge &he = hypergraph._hyperedges[he_pos]; + he = tmp_hyperedges[id]; + const size_t tmp_incidence_array_start = he.firstEntry(); + const size_t edge_size = he.size(); + local_max_edge_size.local() = std::max(local_max_edge_size.local(), edge_size); + std::memcpy(hypergraph._incidence_array.data() + incidence_array_start, + tmp_incidence_array.data() + tmp_incidence_array_start, + sizeof(HypernodeID) * edge_size); + he.setFirstEntry(incidence_array_start); + } + }); + hypergraph._max_edge_size = local_max_edge_size.combine( + [&](const size_t lhs, const size_t rhs) { return std::max(lhs, rhs); }); + }; - // Write hyperedges from temporary buffers to incidence array - tbb::enumerable_thread_specific local_max_edge_size(UL(0)); - tbb::parallel_for(ID(0), _num_hyperedges, [&](const HyperedgeID& id) { - if ( he_mapping.value(id) > 0 /* hyperedge is valid */ ) { - const size_t he_pos = he_mapping[id]; - const size_t incidence_array_start = num_pins_prefix_sum[id]; - Hyperedge& he = hypergraph._hyperedges[he_pos]; - he = tmp_hyperedges[id]; - const size_t tmp_incidence_array_start = he.firstEntry(); - const size_t edge_size = he.size(); - local_max_edge_size.local() = std::max(local_max_edge_size.local(), edge_size); - std::memcpy(hypergraph._incidence_array.data() + incidence_array_start, - tmp_incidence_array.data() + tmp_incidence_array_start, - sizeof(HypernodeID) * edge_size); - he.setFirstEntry(incidence_array_start); + auto setup_hypernodes = [&] { + // Remap hyperedge ids in temporary incident nets to hyperedge ids of the + // coarse hypergraph and remove singple-pin/parallel hyperedges. + tbb::parallel_for(ID(0), num_hypernodes, [&](const HypernodeID &id) { + const size_t incident_nets_start = tmp_hypernodes[id].firstEntry(); + size_t incident_nets_end = tmp_hypernodes[id].firstInvalidEntry(); + for(size_t pos = incident_nets_start; pos < incident_nets_end; ++pos) + { + const HyperedgeID he = tmp_incident_nets[pos]; + if(he_mapping.value(he) > 0 /* hyperedge is valid */) + { + tmp_incident_nets[pos] = he_mapping[he]; } - }); - hypergraph._max_edge_size = local_max_edge_size.combine( - [&](const size_t lhs, const size_t rhs) { - return std::max(lhs, rhs); - }); - }; - - auto setup_hypernodes = [&] { - // Remap hyperedge ids in temporary incident nets to hyperedge ids of the - // coarse hypergraph and remove singple-pin/parallel hyperedges. - tbb::parallel_for(ID(0), num_hypernodes, [&](const HypernodeID& id) { - const size_t incident_nets_start = tmp_hypernodes[id].firstEntry(); - size_t incident_nets_end = tmp_hypernodes[id].firstInvalidEntry(); - for ( size_t pos = incident_nets_start; pos < incident_nets_end; ++pos ) { - const HyperedgeID he = tmp_incident_nets[pos]; - if ( he_mapping.value(he) > 0 /* hyperedge is valid */ ) { - tmp_incident_nets[pos] = he_mapping[he]; - } else { - std::swap(tmp_incident_nets[pos--], tmp_incident_nets[--incident_nets_end]); - } + else + { + std::swap(tmp_incident_nets[pos--], tmp_incident_nets[--incident_nets_end]); } - const size_t incident_nets_size = incident_nets_end - incident_nets_start; - tmp_hypernodes[id].setSize(incident_nets_size); - tmp_num_incident_nets[id] = incident_nets_size; - }); + } + const size_t incident_nets_size = incident_nets_end - incident_nets_start; + tmp_hypernodes[id].setSize(incident_nets_size); + tmp_num_incident_nets[id] = incident_nets_size; + }); - // Compute start position of the incident nets for each vertex inside - // the coarsened incident net array - parallel::TBBPrefixSum, Array> - num_incident_nets_prefix_sum(tmp_num_incident_nets); - tbb::parallel_scan(tbb::blocked_range( - UL(0), UI64(num_hypernodes)), num_incident_nets_prefix_sum); - const size_t total_degree = num_incident_nets_prefix_sum.total_sum(); - hypergraph._total_degree = total_degree; - hypergraph._incident_nets.resize(total_degree); - // Write incident nets from temporary buffer to incident nets array - tbb::parallel_for(ID(0), num_hypernodes, [&](const HypernodeID& id) { - const size_t incident_nets_start = num_incident_nets_prefix_sum[id]; - Hypernode& hn = hypergraph._hypernodes[id]; - hn = tmp_hypernodes[id]; - const size_t tmp_incident_nets_start = hn.firstEntry(); - std::memcpy(hypergraph._incident_nets.data() + incident_nets_start, - tmp_incident_nets.data() + tmp_incident_nets_start, - sizeof(HyperedgeID) * hn.size()); - hn.setFirstEntry(incident_nets_start); - }); - }; + // Compute start position of the incident nets for each vertex inside + // the coarsened incident net array + parallel::TBBPrefixSum, Array> + num_incident_nets_prefix_sum(tmp_num_incident_nets); + tbb::parallel_scan(tbb::blocked_range(UL(0), UI64(num_hypernodes)), + num_incident_nets_prefix_sum); + const size_t total_degree = num_incident_nets_prefix_sum.total_sum(); + hypergraph._total_degree = total_degree; + hypergraph._incident_nets.resize(total_degree); + // Write incident nets from temporary buffer to incident nets array + tbb::parallel_for(ID(0), num_hypernodes, [&](const HypernodeID &id) { + const size_t incident_nets_start = num_incident_nets_prefix_sum[id]; + Hypernode &hn = hypergraph._hypernodes[id]; + hn = tmp_hypernodes[id]; + const size_t tmp_incident_nets_start = hn.firstEntry(); + std::memcpy(hypergraph._incident_nets.data() + incident_nets_start, + tmp_incident_nets.data() + tmp_incident_nets_start, + sizeof(HyperedgeID) * hn.size()); + hn.setFirstEntry(incident_nets_start); + }); + }; - tbb::parallel_invoke(assign_communities, setup_hyperedges, setup_hypernodes); + tbb::parallel_invoke(assign_communities, setup_hyperedges, setup_hypernodes); - if ( hasFixedVertices() ) { - // Map fixed vertices to coarse hypergraph - FixedVertexSupport coarse_fixed_vertices( + if(hasFixedVertices()) + { + // Map fixed vertices to coarse hypergraph + FixedVertexSupport coarse_fixed_vertices( hypergraph.initialNumNodes(), _fixed_vertices.numBlocks()); - coarse_fixed_vertices.setHypergraph(&hypergraph); - doParallelForAllNodes([&](const HypernodeID hn) { - if ( isFixed(hn) ) { - coarse_fixed_vertices.fixToBlock(communities[hn], fixedVertexBlock(hn)); - } - }); - hypergraph.addFixedVertexSupport(std::move(coarse_fixed_vertices)); - } - - hypergraph._total_weight = _total_weight; // didn't lose any vertices - hypergraph._tmp_contraction_buffer = _tmp_contraction_buffer; - _tmp_contraction_buffer = nullptr; - return hypergraph; - } - - - // ! Copy static hypergraph in parallel - StaticHypergraph StaticHypergraph::copy(parallel_tag_t) const { - StaticHypergraph hypergraph; - - hypergraph._num_hypernodes = _num_hypernodes; - hypergraph._num_removed_hypernodes = _num_removed_hypernodes; - hypergraph._num_hyperedges = _num_hyperedges; - hypergraph._num_removed_hyperedges = _num_removed_hyperedges; - hypergraph._max_edge_size = _max_edge_size; - hypergraph._num_pins = _num_pins; - hypergraph._total_degree = _total_degree; - hypergraph._total_weight = _total_weight; - - tbb::parallel_invoke([&] { - hypergraph._hypernodes.resize(_hypernodes.size()); - memcpy(hypergraph._hypernodes.data(), _hypernodes.data(), - sizeof(Hypernode) * _hypernodes.size()); - }, [&] { - hypergraph._incident_nets.resize(_incident_nets.size()); - memcpy(hypergraph._incident_nets.data(), _incident_nets.data(), - sizeof(HyperedgeID) * _incident_nets.size()); - }, [&] { - hypergraph._hyperedges.resize(_hyperedges.size()); - memcpy(hypergraph._hyperedges.data(), _hyperedges.data(), - sizeof(Hyperedge) * _hyperedges.size()); - }, [&] { - hypergraph._incidence_array.resize(_incidence_array.size()); - memcpy(hypergraph._incidence_array.data(), _incidence_array.data(), - sizeof(HypernodeID) * _incidence_array.size()); - }, [&] { - hypergraph._community_ids = _community_ids; - }, [&] { - hypergraph.addFixedVertexSupport(_fixed_vertices.copy()); + coarse_fixed_vertices.setHypergraph(&hypergraph); + doParallelForAllNodes([&](const HypernodeID hn) { + if(isFixed(hn)) + { + coarse_fixed_vertices.fixToBlock(communities[hn], fixedVertexBlock(hn)); + } }); - return hypergraph; + hypergraph.addFixedVertexSupport(std::move(coarse_fixed_vertices)); } - // ! Copy static hypergraph sequential - StaticHypergraph StaticHypergraph::copy() const { - StaticHypergraph hypergraph; - - hypergraph._num_hypernodes = _num_hypernodes; - hypergraph._num_removed_hypernodes = _num_removed_hypernodes; - hypergraph._num_hyperedges = _num_hyperedges; - hypergraph._num_removed_hyperedges = _num_removed_hyperedges; - hypergraph._max_edge_size = _max_edge_size; - hypergraph._num_pins = _num_pins; - hypergraph._total_degree = _total_degree; - hypergraph._total_weight = _total_weight; - - hypergraph._hypernodes.resize(_hypernodes.size()); - memcpy(hypergraph._hypernodes.data(), _hypernodes.data(), - sizeof(Hypernode) * _hypernodes.size()); - hypergraph._incident_nets.resize(_incident_nets.size()); - memcpy(hypergraph._incident_nets.data(), _incident_nets.data(), - sizeof(HyperedgeID) * _incident_nets.size()); - - hypergraph._hyperedges.resize(_hyperedges.size()); - memcpy(hypergraph._hyperedges.data(), _hyperedges.data(), - sizeof(Hyperedge) * _hyperedges.size()); - hypergraph._incidence_array.resize(_incidence_array.size()); - memcpy(hypergraph._incidence_array.data(), _incidence_array.data(), - sizeof(HypernodeID) * _incidence_array.size()); - - hypergraph._community_ids = _community_ids; - hypergraph.addFixedVertexSupport(_fixed_vertices.copy()); - - return hypergraph; - } - - void StaticHypergraph::memoryConsumption(utils::MemoryTreeNode* parent) const { - ASSERT(parent); - parent->addChild("Hypernodes", sizeof(Hypernode) * _hypernodes.size()); - parent->addChild("Incident Nets", sizeof(HyperedgeID) * _incident_nets.size()); - parent->addChild("Hyperedges", sizeof(Hyperedge) * _hyperedges.size()); - parent->addChild("Incidence Array", sizeof(HypernodeID) * _incidence_array.size()); - parent->addChild("Communities", sizeof(PartitionID) * _community_ids.capacity()); - if ( hasFixedVertices() ) { - parent->addChild("Fixed Vertex Support", _fixed_vertices.size_in_bytes()); - } - } - - // ! Computes the total node weight of the hypergraph - void StaticHypergraph::computeAndSetTotalNodeWeight(parallel_tag_t) { - _total_weight = tbb::parallel_reduce(tbb::blocked_range(ID(0), _num_hypernodes), 0, - [this](const tbb::blocked_range& range, HypernodeWeight init) { - HypernodeWeight weight = init; - for (HypernodeID hn = range.begin(); hn < range.end(); ++hn) { - if (nodeIsEnabled(hn)) { - weight += this->_hypernodes[hn].weight(); - } - } - return weight; - }, std::plus<>()); + hypergraph._total_weight = _total_weight; // didn't lose any vertices + hypergraph._tmp_contraction_buffer = _tmp_contraction_buffer; + _tmp_contraction_buffer = nullptr; + return hypergraph; +} + +// ! Copy static hypergraph in parallel +StaticHypergraph StaticHypergraph::copy(parallel_tag_t) const +{ + StaticHypergraph hypergraph; + + hypergraph._num_hypernodes = _num_hypernodes; + hypergraph._num_removed_hypernodes = _num_removed_hypernodes; + hypergraph._num_hyperedges = _num_hyperedges; + hypergraph._num_removed_hyperedges = _num_removed_hyperedges; + hypergraph._max_edge_size = _max_edge_size; + hypergraph._num_pins = _num_pins; + hypergraph._total_degree = _total_degree; + hypergraph._total_weight = _total_weight; + + tbb::parallel_invoke( + [&] { + hypergraph._hypernodes.resize(_hypernodes.size()); + memcpy(hypergraph._hypernodes.data(), _hypernodes.data(), + sizeof(Hypernode) * _hypernodes.size()); + }, + [&] { + hypergraph._incident_nets.resize(_incident_nets.size()); + memcpy(hypergraph._incident_nets.data(), _incident_nets.data(), + sizeof(HyperedgeID) * _incident_nets.size()); + }, + [&] { + hypergraph._hyperedges.resize(_hyperedges.size()); + memcpy(hypergraph._hyperedges.data(), _hyperedges.data(), + sizeof(Hyperedge) * _hyperedges.size()); + }, + [&] { + hypergraph._incidence_array.resize(_incidence_array.size()); + memcpy(hypergraph._incidence_array.data(), _incidence_array.data(), + sizeof(HypernodeID) * _incidence_array.size()); + }, + [&] { hypergraph._community_ids = _community_ids; }, + [&] { hypergraph.addFixedVertexSupport(_fixed_vertices.copy()); }); + return hypergraph; +} + +// ! Copy static hypergraph sequential +StaticHypergraph StaticHypergraph::copy() const +{ + StaticHypergraph hypergraph; + + hypergraph._num_hypernodes = _num_hypernodes; + hypergraph._num_removed_hypernodes = _num_removed_hypernodes; + hypergraph._num_hyperedges = _num_hyperedges; + hypergraph._num_removed_hyperedges = _num_removed_hyperedges; + hypergraph._max_edge_size = _max_edge_size; + hypergraph._num_pins = _num_pins; + hypergraph._total_degree = _total_degree; + hypergraph._total_weight = _total_weight; + + hypergraph._hypernodes.resize(_hypernodes.size()); + memcpy(hypergraph._hypernodes.data(), _hypernodes.data(), + sizeof(Hypernode) * _hypernodes.size()); + hypergraph._incident_nets.resize(_incident_nets.size()); + memcpy(hypergraph._incident_nets.data(), _incident_nets.data(), + sizeof(HyperedgeID) * _incident_nets.size()); + + hypergraph._hyperedges.resize(_hyperedges.size()); + memcpy(hypergraph._hyperedges.data(), _hyperedges.data(), + sizeof(Hyperedge) * _hyperedges.size()); + hypergraph._incidence_array.resize(_incidence_array.size()); + memcpy(hypergraph._incidence_array.data(), _incidence_array.data(), + sizeof(HypernodeID) * _incidence_array.size()); + + hypergraph._community_ids = _community_ids; + hypergraph.addFixedVertexSupport(_fixed_vertices.copy()); + + return hypergraph; +} + +void StaticHypergraph::memoryConsumption(utils::MemoryTreeNode *parent) const +{ + ASSERT(parent); + parent->addChild("Hypernodes", sizeof(Hypernode) * _hypernodes.size()); + parent->addChild("Incident Nets", sizeof(HyperedgeID) * _incident_nets.size()); + parent->addChild("Hyperedges", sizeof(Hyperedge) * _hyperedges.size()); + parent->addChild("Incidence Array", sizeof(HypernodeID) * _incidence_array.size()); + parent->addChild("Communities", sizeof(PartitionID) * _community_ids.capacity()); + if(hasFixedVertices()) + { + parent->addChild("Fixed Vertex Support", _fixed_vertices.size_in_bytes()); } +} + +// ! Computes the total node weight of the hypergraph +void StaticHypergraph::computeAndSetTotalNodeWeight(parallel_tag_t) +{ + _total_weight = tbb::parallel_reduce( + tbb::blocked_range(ID(0), _num_hypernodes), 0, + [this](const tbb::blocked_range &range, HypernodeWeight init) { + HypernodeWeight weight = init; + for(HypernodeID hn = range.begin(); hn < range.end(); ++hn) + { + if(nodeIsEnabled(hn)) + { + weight += this->_hypernodes[hn].weight(); + } + } + return weight; + }, + std::plus<>()); +} } // namespace diff --git a/mt-kahypar/datastructures/static_hypergraph.h b/mt-kahypar/datastructures/static_hypergraph.h index c0184ec8c..9ae10b4b2 100644 --- a/mt-kahypar/datastructures/static_hypergraph.h +++ b/mt-kahypar/datastructures/static_hypergraph.h @@ -27,32 +27,31 @@ #pragma once - #include "tbb/parallel_for.h" #include "include/libmtkahypartypes.h" -#include "mt-kahypar/macros.h" #include "mt-kahypar/datastructures/array.h" -#include "mt-kahypar/datastructures/hypergraph_common.h" #include "mt-kahypar/datastructures/fixed_vertex_support.h" +#include "mt-kahypar/datastructures/hypergraph_common.h" +#include "mt-kahypar/macros.h" #include "mt-kahypar/parallel/atomic_wrapper.h" #include "mt-kahypar/parallel/stl/scalable_vector.h" #include "mt-kahypar/partition/context_enum_classes.h" +#include "mt-kahypar/utils/exception.h" #include "mt-kahypar/utils/memory_tree.h" #include "mt-kahypar/utils/range.h" -#include "mt-kahypar/utils/exception.h" namespace mt_kahypar { namespace ds { // Forward class StaticHypergraphFactory; -template +template class PartitionedHypergraph; -class StaticHypergraph { +class StaticHypergraph +{ static constexpr bool enable_heavy_assert = false; @@ -69,86 +68,74 @@ class StaticHypergraph { using AtomicHypernodeID = parallel::IntegralAtomicWrapper; using AtomicHypernodeWeight = parallel::IntegralAtomicWrapper; - using UncontractionFunction = std::function; - #define NOOP_BATCH_FUNC [] (const HypernodeID, const HypernodeID, const HyperedgeID) { } + using UncontractionFunction = + std::function; +#define NOOP_BATCH_FUNC [](const HypernodeID, const HypernodeID, const HyperedgeID) {} /** * Represents a hypernode of the hypergraph and contains all information * associated with a vertex. */ - class Hypernode { - public: + class Hypernode + { + public: using IDType = HypernodeID; - Hypernode() : - _begin(0), - _size(0), - _weight(1), - _valid(false) { } + Hypernode() : _begin(0), _size(0), _weight(1), _valid(false) {} - Hypernode(const bool valid) : - _begin(0), - _size(0), - _weight(1), - _valid(valid) { } + Hypernode(const bool valid) : _begin(0), _size(0), _weight(1), _valid(valid) {} // Sentinel Constructor - Hypernode(const size_t begin) : - _begin(begin), - _size(0), - _weight(1), - _valid(false) { } - - bool isDisabled() const { - return _valid == false; - } + Hypernode(const size_t begin) : _begin(begin), _size(0), _weight(1), _valid(false) {} + + bool isDisabled() const { return _valid == false; } - void enable() { + void enable() + { ASSERT(isDisabled()); _valid = true; } - void disable() { + void disable() + { ASSERT(!isDisabled()); _valid = false; } // ! Returns the index of the first element in _incident_nets - size_t firstEntry() const { - return _begin; - } + size_t firstEntry() const { return _begin; } // ! Sets the index of the first element in _incident_nets to begin - void setFirstEntry(size_t begin) { + void setFirstEntry(size_t begin) + { ASSERT(!isDisabled()); _begin = begin; } // ! Returns the index of the first element in _incident_nets - size_t firstInvalidEntry() const { - return _begin + _size; - } + size_t firstInvalidEntry() const { return _begin + _size; } - size_t size() const { + size_t size() const + { ASSERT(!isDisabled()); return _size; } - void setSize(size_t size) { + void setSize(size_t size) + { ASSERT(!isDisabled()); _size = size; } - HyperedgeWeight weight() const { - return _weight; - } + HyperedgeWeight weight() const { return _weight; } - void setWeight(HyperedgeWeight weight) { + void setWeight(HyperedgeWeight weight) + { ASSERT(!isDisabled()); _weight = weight; } - private: + private: // ! Index of the first element in _incident_nets size_t _begin; // ! Number of incident nets @@ -163,84 +150,80 @@ class StaticHypergraph { * Represents a hyperedge of the hypergraph and contains all information * associated with a net (except connectivity information). */ - class Hyperedge { - public: + class Hyperedge + { + public: using IDType = HyperedgeID; - Hyperedge() : - _begin(0), - _size(0), - _weight(1), - _valid(false) { } + Hyperedge() : _begin(0), _size(0), _weight(1), _valid(false) {} // Sentinel Constructor - Hyperedge(const size_t begin) : - _begin(begin), - _size(0), - _weight(1), - _valid(false) { } + Hyperedge(const size_t begin) : _begin(begin), _size(0), _weight(1), _valid(false) {} // ! Disables the hypernode/hyperedge. Disable hypernodes/hyperedges will be skipped // ! when iterating over the set of all nodes/edges. - void disable() { + void disable() + { ASSERT(!isDisabled()); _valid = false; } - void enable() { + void enable() + { ASSERT(isDisabled()); _valid = true; } - bool isDisabled() const { - return _valid == false; - } + bool isDisabled() const { return _valid == false; } // ! Returns the index of the first element in _incidence_array - size_t firstEntry() const { - return _begin; - } + size_t firstEntry() const { return _begin; } // ! Sets the index of the first element in _incidence_array to begin - void setFirstEntry(size_t begin) { + void setFirstEntry(size_t begin) + { ASSERT(!isDisabled()); _begin = begin; } // ! Returns the index of the first element in _incidence_array - size_t firstInvalidEntry() const { - return _begin + _size; - } + size_t firstInvalidEntry() const { return _begin + _size; } - size_t size() const { + size_t size() const + { ASSERT(!isDisabled()); return _size; } - void setSize(size_t size) { + void setSize(size_t size) + { ASSERT(!isDisabled()); _size = size; } - HyperedgeWeight weight() const { + HyperedgeWeight weight() const + { ASSERT(!isDisabled()); return _weight; } - void setWeight(HyperedgeWeight weight) { + void setWeight(HyperedgeWeight weight) + { ASSERT(!isDisabled()); _weight = weight; } - bool operator== (const Hyperedge& rhs) const { + bool operator==(const Hyperedge &rhs) const + { return _begin == rhs._begin && _size == rhs._size && _weight == rhs._weight; } - bool operator!= (const Hyperedge& rhs) const { + bool operator!=(const Hyperedge &rhs) const + { return _begin != rhs._begin || _size != rhs._size || _weight != rhs._weight; } - private: + private: // ! Index of the first element in _incidence_array size_t _begin; // ! Number of pins @@ -269,13 +252,14 @@ class StaticHypergraph { * */ template - class HypergraphElementIterator { - public: + class HypergraphElementIterator + { + public: using IDType = typename ElementType::IDType; using iterator_category = std::forward_iterator_tag; using value_type = IDType; - using reference = IDType&; - using pointer = const IDType*; + using reference = IDType &; + using pointer = const IDType *; using difference_type = std::ptrdiff_t; /*! @@ -289,56 +273,57 @@ class StaticHypergraph { * \param id The index of the element the pointer points to * \param max_id The maximum index allowed */ - HypergraphElementIterator(const ElementType* start_element, IDType id, IDType max_id) : - _id(id), - _max_id(max_id), - _element(start_element) { - if (_id != _max_id && _element->isDisabled()) { - operator++ (); + HypergraphElementIterator(const ElementType *start_element, IDType id, + IDType max_id) : + _id(id), + _max_id(max_id), _element(start_element) + { + if(_id != _max_id && _element->isDisabled()) + { + operator++(); } } // ! Returns the id of the element the iterator currently points to. - IDType operator* () const { - return _id; - } + IDType operator*() const { return _id; } // ! Prefix increment. The iterator advances to the next valid element. - HypergraphElementIterator & operator++ () { + HypergraphElementIterator &operator++() + { ASSERT(_id < _max_id); - do { + do + { ++_id; ++_element; - } while (_id < _max_id && _element->isDisabled()); + } while(_id < _max_id && _element->isDisabled()); return *this; } // ! Postfix increment. The iterator advances to the next valid element. - HypergraphElementIterator operator++ (int) { + HypergraphElementIterator operator++(int) + { HypergraphElementIterator copy = *this; - operator++ (); + operator++(); return copy; } - bool operator!= (const HypergraphElementIterator& rhs) { - return _id != rhs._id; - } + bool operator!=(const HypergraphElementIterator &rhs) { return _id != rhs._id; } - bool operator== (const HypergraphElementIterator& rhs) { - return _id == rhs._id; - } + bool operator==(const HypergraphElementIterator &rhs) { return _id == rhs._id; } - private: + private: // Handle to the HypergraphElement the iterator currently points to IDType _id = 0; // Maximum allowed index IDType _max_id = 0; // HypergraphElement the iterator currently points to - const ElementType* _element = nullptr; + const ElementType *_element = nullptr; }; - static_assert(std::is_trivially_copyable::value, "Hypernode is not trivially copyable"); - static_assert(std::is_trivially_copyable::value, "Hyperedge is not trivially copyable"); + static_assert(std::is_trivially_copyable::value, + "Hypernode is not trivially copyable"); + static_assert(std::is_trivially_copyable::value, + "Hyperedge is not trivially copyable"); using IncidenceArray = Array; using IncidentNets = Array; @@ -346,43 +331,43 @@ class StaticHypergraph { // ! Contains buffers that are needed during multilevel contractions. // ! Struct is allocated on top level hypergraph and passed to each contracted // ! hypergraph such that memory can be reused in consecutive contractions. - struct TmpContractionBuffer { + struct TmpContractionBuffer + { explicit TmpContractionBuffer(const HypernodeID num_hypernodes, const HyperedgeID num_hyperedges, - const HyperedgeID num_pins) { - tbb::parallel_invoke([&] { - mapping.resize("Coarsening", "mapping", num_hypernodes); - }, [&] { - tmp_hypernodes.resize("Coarsening", "tmp_hypernodes", num_hypernodes); - }, [&] { - tmp_incident_nets.resize("Coarsening", "tmp_incident_nets", num_pins); - }, [&] { - tmp_num_incident_nets.resize("Coarsening", "tmp_num_incident_nets", num_hypernodes); - }, [&] { - hn_weights.resize("Coarsening", "hn_weights", num_hypernodes); - }, [&] { - tmp_hyperedges.resize("Coarsening", "tmp_hyperedges", num_hyperedges); - }, [&] { - tmp_incidence_array.resize("Coarsening", "tmp_incidence_array", num_pins); - }, [&] { - he_sizes.resize("Coarsening", "he_sizes", num_hyperedges); - }, [&] { - valid_hyperedges.resize("Coarsening", "valid_hyperedges", num_hyperedges); - }); + const HyperedgeID num_pins) + { + tbb::parallel_invoke( + [&] { mapping.resize("Coarsening", "mapping", num_hypernodes); }, + [&] { tmp_hypernodes.resize("Coarsening", "tmp_hypernodes", num_hypernodes); }, + [&] { tmp_incident_nets.resize("Coarsening", "tmp_incident_nets", num_pins); }, + [&] { + tmp_num_incident_nets.resize("Coarsening", "tmp_num_incident_nets", + num_hypernodes); + }, + [&] { hn_weights.resize("Coarsening", "hn_weights", num_hypernodes); }, + [&] { tmp_hyperedges.resize("Coarsening", "tmp_hyperedges", num_hyperedges); }, + [&] { + tmp_incidence_array.resize("Coarsening", "tmp_incidence_array", num_pins); + }, + [&] { he_sizes.resize("Coarsening", "he_sizes", num_hyperedges); }, + [&] { + valid_hyperedges.resize("Coarsening", "valid_hyperedges", num_hyperedges); + }); } Array mapping; Array tmp_hypernodes; IncidentNets tmp_incident_nets; - Array> tmp_num_incident_nets; - Array> hn_weights; + Array > tmp_num_incident_nets; + Array > hn_weights; Array tmp_hyperedges; IncidenceArray tmp_incidence_array; Array he_sizes; Array valid_hyperedges; }; - public: +public: static constexpr bool is_graph = false; static constexpr bool is_static_hypergraph = true; static constexpr bool is_partitioned = false; @@ -401,54 +386,46 @@ class StaticHypergraph { // ! Iterator to iterate over the incident nets of a hypernode using IncidentNetsIterator = typename IncidentNets::const_iterator; - struct ParallelHyperedge { + struct ParallelHyperedge + { HyperedgeID removed_hyperedge; HyperedgeID representative; }; explicit StaticHypergraph() : - _num_hypernodes(0), - _num_removed_hypernodes(0), - _removed_degree_zero_hn_weight(0), - _num_hyperedges(0), - _num_removed_hyperedges(0), - _max_edge_size(0), - _num_pins(0), - _total_degree(0), - _total_weight(0), - _hypernodes(), - _incident_nets(), - _hyperedges(), - _incidence_array(), - _community_ids(0), - _fixed_vertices(), - _tmp_contraction_buffer(nullptr) { } - - StaticHypergraph(const StaticHypergraph&) = delete; - StaticHypergraph & operator= (const StaticHypergraph &) = delete; - - StaticHypergraph(StaticHypergraph&& other) : - _num_hypernodes(other._num_hypernodes), - _num_removed_hypernodes(other._num_removed_hypernodes), - _removed_degree_zero_hn_weight(other._removed_degree_zero_hn_weight), - _num_hyperedges(other._num_hyperedges), - _num_removed_hyperedges(other._num_removed_hyperedges), - _max_edge_size(other._max_edge_size), - _num_pins(other._num_pins), - _total_degree(other._total_degree), - _total_weight(other._total_weight), - _hypernodes(std::move(other._hypernodes)), - _incident_nets(std::move(other._incident_nets)), - _hyperedges(std::move(other._hyperedges)), - _incidence_array(std::move(other._incidence_array)), - _community_ids(std::move(other._community_ids)), - _fixed_vertices(std::move(other._fixed_vertices)), - _tmp_contraction_buffer(std::move(other._tmp_contraction_buffer)) { + _num_hypernodes(0), _num_removed_hypernodes(0), _removed_degree_zero_hn_weight(0), + _num_hyperedges(0), _num_removed_hyperedges(0), _max_edge_size(0), _num_pins(0), + _total_degree(0), _total_weight(0), _hypernodes(), _incident_nets(), _hyperedges(), + _incidence_array(), _community_ids(0), _fixed_vertices(), + _tmp_contraction_buffer(nullptr) + { + } + + StaticHypergraph(const StaticHypergraph &) = delete; + StaticHypergraph &operator=(const StaticHypergraph &) = delete; + + StaticHypergraph(StaticHypergraph &&other) : + _num_hypernodes(other._num_hypernodes), + _num_removed_hypernodes(other._num_removed_hypernodes), + _removed_degree_zero_hn_weight(other._removed_degree_zero_hn_weight), + _num_hyperedges(other._num_hyperedges), + _num_removed_hyperedges(other._num_removed_hyperedges), + _max_edge_size(other._max_edge_size), _num_pins(other._num_pins), + _total_degree(other._total_degree), _total_weight(other._total_weight), + _hypernodes(std::move(other._hypernodes)), + _incident_nets(std::move(other._incident_nets)), + _hyperedges(std::move(other._hyperedges)), + _incidence_array(std::move(other._incidence_array)), + _community_ids(std::move(other._community_ids)), + _fixed_vertices(std::move(other._fixed_vertices)), + _tmp_contraction_buffer(std::move(other._tmp_contraction_buffer)) + { _fixed_vertices.setHypergraph(this); other._tmp_contraction_buffer = nullptr; } - StaticHypergraph & operator= (StaticHypergraph&& other) { + StaticHypergraph &operator=(StaticHypergraph &&other) + { _num_hypernodes = other._num_hypernodes; _num_removed_hypernodes = other._num_removed_hypernodes; _removed_degree_zero_hn_weight = other._removed_degree_zero_hn_weight; @@ -470,9 +447,11 @@ class StaticHypergraph { return *this; } - ~StaticHypergraph() { - if ( _tmp_contraction_buffer ) { - delete(_tmp_contraction_buffer); + ~StaticHypergraph() + { + if(_tmp_contraction_buffer) + { + delete (_tmp_contraction_buffer); _tmp_contraction_buffer = nullptr; } freeInternalData(); @@ -481,49 +460,37 @@ class StaticHypergraph { // ####################### General Hypergraph Stats ####################### // ! Initial number of hypernodes - HypernodeID initialNumNodes() const { - return _num_hypernodes; - } + HypernodeID initialNumNodes() const { return _num_hypernodes; } // ! Number of removed hypernodes - HypernodeID numRemovedHypernodes() const { - return _num_removed_hypernodes; - } + HypernodeID numRemovedHypernodes() const { return _num_removed_hypernodes; } // ! Weight of removed degree zero vertics - HypernodeWeight weightOfRemovedDegreeZeroVertices() const { + HypernodeWeight weightOfRemovedDegreeZeroVertices() const + { return _removed_degree_zero_hn_weight; } // ! Initial number of hyperedges - HyperedgeID initialNumEdges() const { - return _num_hyperedges; - } + HyperedgeID initialNumEdges() const { return _num_hyperedges; } // ! Number of removed hyperedges - HyperedgeID numRemovedHyperedges() const { - return _num_removed_hyperedges; - } + HyperedgeID numRemovedHyperedges() const { return _num_removed_hyperedges; } // ! Set the number of removed hyperedges - void setNumRemovedHyperedges(const HyperedgeID num_removed_hyperedges) { + void setNumRemovedHyperedges(const HyperedgeID num_removed_hyperedges) + { _num_removed_hyperedges = num_removed_hyperedges; } // ! Initial number of pins - HypernodeID initialNumPins() const { - return _num_pins; - } + HypernodeID initialNumPins() const { return _num_pins; } // ! Initial sum of the degree of all vertices - HypernodeID initialTotalVertexDegree() const { - return _total_degree; - } + HypernodeID initialTotalVertexDegree() const { return _total_degree; } // ! Total weight of hypergraph - HypernodeWeight totalWeight() const { - return _total_weight; - } + HypernodeWeight totalWeight() const { return _total_weight; } // ! Computes the total node weight of the hypergraph void computeAndSetTotalNodeWeight(parallel_tag_t); @@ -532,10 +499,12 @@ class StaticHypergraph { // ! Iterates in parallel over all active nodes and calls function f // ! for each vertex - template - void doParallelForAllNodes(const F& f) const { - tbb::parallel_for(ID(0), _num_hypernodes, [&](const HypernodeID& hn) { - if ( nodeIsEnabled(hn) ) { + template + void doParallelForAllNodes(const F &f) const + { + tbb::parallel_for(ID(0), _num_hypernodes, [&](const HypernodeID &hn) { + if(nodeIsEnabled(hn)) + { f(hn); } }); @@ -543,96 +512,101 @@ class StaticHypergraph { // ! Iterates in parallel over all active edges and calls function f // ! for each net - template - void doParallelForAllEdges(const F& f) const { - tbb::parallel_for(ID(0), _num_hyperedges, [&](const HyperedgeID& he) { - if ( edgeIsEnabled(he) ) { + template + void doParallelForAllEdges(const F &f) const + { + tbb::parallel_for(ID(0), _num_hyperedges, [&](const HyperedgeID &he) { + if(edgeIsEnabled(he)) + { f(he); } }); } // ! Returns a range of the active nodes of the hypergraph - IteratorRange nodes() const { + IteratorRange nodes() const + { return IteratorRange( - HypernodeIterator(_hypernodes.data(), ID(0), _num_hypernodes), - HypernodeIterator(_hypernodes.data() + _num_hypernodes, _num_hypernodes, _num_hypernodes)); + HypernodeIterator(_hypernodes.data(), ID(0), _num_hypernodes), + HypernodeIterator(_hypernodes.data() + _num_hypernodes, _num_hypernodes, + _num_hypernodes)); } // ! Returns a range of the active edges of the hypergraph - IteratorRange edges() const { + IteratorRange edges() const + { return IteratorRange( - HyperedgeIterator(_hyperedges.data(), ID(0), _num_hyperedges), - HyperedgeIterator(_hyperedges.data() + _num_hyperedges, _num_hyperedges, _num_hyperedges)); + HyperedgeIterator(_hyperedges.data(), ID(0), _num_hyperedges), + HyperedgeIterator(_hyperedges.data() + _num_hyperedges, _num_hyperedges, + _num_hyperedges)); } // ! Returns a range to loop over the incident nets of hypernode u. - IteratorRange incidentEdges(const HypernodeID u) const { + IteratorRange incidentEdges(const HypernodeID u) const + { ASSERT(!hypernode(u).isDisabled(), "Hypernode" << u << "is disabled"); - const Hypernode& hn = hypernode(u); - return IteratorRange( - _incident_nets.cbegin() + hn.firstEntry(), - _incident_nets.cbegin() + hn.firstInvalidEntry()); + const Hypernode &hn = hypernode(u); + return IteratorRange(_incident_nets.cbegin() + hn.firstEntry(), + _incident_nets.cbegin() + + hn.firstInvalidEntry()); } // ! Returns a range to loop over the pins of hyperedge e. - IteratorRange pins(const HyperedgeID e) const { + IteratorRange pins(const HyperedgeID e) const + { ASSERT(!hyperedge(e).isDisabled(), "Hyperedge" << e << "is disabled"); - const Hyperedge& he = hyperedge(e); - return IteratorRange( - _incidence_array.cbegin() + he.firstEntry(), - _incidence_array.cbegin() + he.firstInvalidEntry()); + const Hyperedge &he = hyperedge(e); + return IteratorRange(_incidence_array.cbegin() + he.firstEntry(), + _incidence_array.cbegin() + + he.firstInvalidEntry()); } - // ####################### Hypernode Information ####################### + // ####################### Hypernode Information ####################### // ! Weight of a vertex - HypernodeWeight nodeWeight(const HypernodeID u) const { - return hypernode(u).weight(); - } + HypernodeWeight nodeWeight(const HypernodeID u) const { return hypernode(u).weight(); } // ! Sets the weight of a vertex - void setNodeWeight(const HypernodeID u, const HypernodeWeight weight) { + void setNodeWeight(const HypernodeID u, const HypernodeWeight weight) + { ASSERT(!hypernode(u).isDisabled(), "Hypernode" << u << "is disabled"); return hypernode(u).setWeight(weight); } // ! Degree of a hypernode - HyperedgeID nodeDegree(const HypernodeID u) const { + HyperedgeID nodeDegree(const HypernodeID u) const + { ASSERT(!hypernode(u).isDisabled(), "Hypernode" << u << "is disabled"); return hypernode(u).size(); } // ! Returns, whether a hypernode is enabled or not - bool nodeIsEnabled(const HypernodeID u) const { - return !hypernode(u).isDisabled(); - } + bool nodeIsEnabled(const HypernodeID u) const { return !hypernode(u).isDisabled(); } // ! Enables a hypernode (must be disabled before) - void enableHypernode(const HypernodeID u) { - hypernode(u).enable(); - } + void enableHypernode(const HypernodeID u) { hypernode(u).enable(); } // ! Disables a hypernode (must be enabled before) - void disableHypernode(const HypernodeID u) { - hypernode(u).disable(); - } + void disableHypernode(const HypernodeID u) { hypernode(u).disable(); } // ! Removes a hypernode (must be enabled before) - void removeHypernode(const HypernodeID u) { + void removeHypernode(const HypernodeID u) + { hypernode(u).disable(); ++_num_removed_hypernodes; } // ! Removes a degree zero hypernode - void removeDegreeZeroHypernode(const HypernodeID u) { + void removeDegreeZeroHypernode(const HypernodeID u) + { ASSERT(nodeDegree(u) == 0); _removed_degree_zero_hn_weight += nodeWeight(u); removeHypernode(u); } // ! Restores a degree zero hypernode - void restoreDegreeZeroHypernode(const HypernodeID u) { + void restoreDegreeZeroHypernode(const HypernodeID u) + { hypernode(u).enable(); ASSERT(nodeDegree(u) == 0); _removed_degree_zero_hn_weight -= nodeWeight(u); @@ -641,89 +615,86 @@ class StaticHypergraph { // ####################### Hyperedge Information ####################### // ! Weight of a hyperedge - HypernodeWeight edgeWeight(const HyperedgeID e) const { + HypernodeWeight edgeWeight(const HyperedgeID e) const + { ASSERT(!hyperedge(e).isDisabled(), "Hyperedge" << e << "is disabled"); return hyperedge(e).weight(); } // ! Sets the weight of a hyperedge - void setEdgeWeight(const HyperedgeID e, const HyperedgeWeight weight) { + void setEdgeWeight(const HyperedgeID e, const HyperedgeWeight weight) + { ASSERT(!hyperedge(e).isDisabled(), "Hyperedge" << e << "is disabled"); return hyperedge(e).setWeight(weight); } // ! Number of pins of a hyperedge - HypernodeID edgeSize(const HyperedgeID e) const { + HypernodeID edgeSize(const HyperedgeID e) const + { ASSERT(!hyperedge(e).isDisabled(), "Hyperedge" << e << "is disabled"); return hyperedge(e).size(); } // ! Maximum size of a hyperedge - HypernodeID maxEdgeSize() const { - return _max_edge_size; - } + HypernodeID maxEdgeSize() const { return _max_edge_size; } // ! Returns, whether a hyperedge is enabled or not - bool edgeIsEnabled(const HyperedgeID e) const { - return !hyperedge(e).isDisabled(); - } + bool edgeIsEnabled(const HyperedgeID e) const { return !hyperedge(e).isDisabled(); } // ! Enables a hyperedge (must be disabled before) - void enableHyperedge(const HyperedgeID e) { - hyperedge(e).enable(); - } + void enableHyperedge(const HyperedgeID e) { hyperedge(e).enable(); } // ! Disabled a hyperedge (must be enabled before) - void disableHyperedge(const HyperedgeID e) { - hyperedge(e).disable(); - } + void disableHyperedge(const HyperedgeID e) { hyperedge(e).disable(); } // ! Community id which hypernode u is assigned to - PartitionID communityID(const HypernodeID u) const { - return _community_ids[u]; - } + PartitionID communityID(const HypernodeID u) const { return _community_ids[u]; } // ! Assign a community to a hypernode - void setCommunityID(const HypernodeID u, const PartitionID community_id) { + void setCommunityID(const HypernodeID u, const PartitionID community_id) + { _community_ids[u] = community_id; } // ####################### Fixed Vertex Support ####################### - void addFixedVertexSupport(FixedVertexSupport&& fixed_vertices) { + void addFixedVertexSupport(FixedVertexSupport &&fixed_vertices) + { _fixed_vertices = std::move(fixed_vertices); _fixed_vertices.setHypergraph(this); } - bool hasFixedVertices() const { - return _fixed_vertices.hasFixedVertices(); - } + bool hasFixedVertices() const { return _fixed_vertices.hasFixedVertices(); } - HypernodeWeight totalFixedVertexWeight() const { + HypernodeWeight totalFixedVertexWeight() const + { return _fixed_vertices.totalFixedVertexWeight(); } - HypernodeWeight fixedVertexBlockWeight(const PartitionID block) const { + HypernodeWeight fixedVertexBlockWeight(const PartitionID block) const + { return _fixed_vertices.fixedVertexBlockWeight(block); } - bool isFixed(const HypernodeID hn) const { - return _fixed_vertices.isFixed(hn); - } + bool isFixed(const HypernodeID hn) const { return _fixed_vertices.isFixed(hn); } - PartitionID fixedVertexBlock(const HypernodeID hn) const { + PartitionID fixedVertexBlock(const HypernodeID hn) const + { return _fixed_vertices.fixedVertexBlock(hn); } - void setMaxFixedVertexBlockWeight(const std::vector max_block_weights) { + void setMaxFixedVertexBlockWeight(const std::vector max_block_weights) + { _fixed_vertices.setMaxBlockWeight(max_block_weights); } - const FixedVertexSupport& fixedVertexSupport() const { + const FixedVertexSupport &fixedVertexSupport() const + { return _fixed_vertices; } - FixedVertexSupport copyOfFixedVertexSupport() const { + FixedVertexSupport copyOfFixedVertexSupport() const + { return _fixed_vertices.copy(); } @@ -738,49 +709,56 @@ class StaticHypergraph { * * \param communities Community structure that should be contracted */ - StaticHypergraph contract(parallel::scalable_vector& communities, bool deterministic = false); + StaticHypergraph contract(parallel::scalable_vector &communities, + bool deterministic = false); - bool registerContraction(const HypernodeID, const HypernodeID) { + bool registerContraction(const HypernodeID, const HypernodeID) + { throw NonSupportedOperationException( - "registerContraction(u, v) is not supported in static hypergraph"); + "registerContraction(u, v) is not supported in static hypergraph"); return false; } - size_t contract(const HypernodeID, - const HypernodeWeight max_node_weight = std::numeric_limits::max()) { + size_t contract(const HypernodeID, const HypernodeWeight max_node_weight = + std::numeric_limits::max()) + { unused(max_node_weight); throw NonSupportedOperationException( - "contract(v, max_node_weight) is not supported in static hypergraph"); + "contract(v, max_node_weight) is not supported in static hypergraph"); return 0; } - void uncontract(const Batch&, - const UncontractionFunction& case_one_func = NOOP_BATCH_FUNC, - const UncontractionFunction& case_two_func = NOOP_BATCH_FUNC) { + void uncontract(const Batch &, + const UncontractionFunction &case_one_func = NOOP_BATCH_FUNC, + const UncontractionFunction &case_two_func = NOOP_BATCH_FUNC) + { unused(case_one_func); unused(case_two_func); throw NonSupportedOperationException( - "uncontract(batch) is not supported in static hypergraph"); + "uncontract(batch) is not supported in static hypergraph"); } - VersionedBatchVector createBatchUncontractionHierarchy(const size_t) { - throw NonSupportedOperationException( - "createBatchUncontractionHierarchy(batch_size) is not supported in static hypergraph"); - return { }; + VersionedBatchVector createBatchUncontractionHierarchy(const size_t) + { + throw NonSupportedOperationException("createBatchUncontractionHierarchy(batch_size) " + "is not supported in static hypergraph"); + return {}; } // ####################### Remove / Restore Hyperedges ####################### /*! - * Removes a hyperedge from the hypergraph. This includes the removal of he from all - * of its pins and to disable the hyperedge. - * - * NOTE, this function is not thread-safe and should only be called in a single-threaded - * setting. - */ - void removeEdge(const HyperedgeID he) { + * Removes a hyperedge from the hypergraph. This includes the removal of he from all + * of its pins and to disable the hyperedge. + * + * NOTE, this function is not thread-safe and should only be called in a single-threaded + * setting. + */ + void removeEdge(const HyperedgeID he) + { ASSERT(edgeIsEnabled(he), "Hyperedge" << he << "is disabled"); - for ( const HypernodeID& pin : pins(he) ) { + for(const HypernodeID &pin : pins(he)) + { removeIncidentEdgeFromHypernode(he, pin); } ++_num_removed_hyperedges; @@ -788,14 +766,15 @@ class StaticHypergraph { } /*! - * Removes a hyperedge from the hypergraph. This includes the removal of he from all - * of its pins and to disable the hyperedge. Noze, in contrast to removeEdge, this function - * removes hyperedge from all its pins in parallel. - * - * NOTE, this function is not thread-safe and should only be called in a single-threaded - * setting. - */ - void removeLargeEdge(const HyperedgeID he) { + * Removes a hyperedge from the hypergraph. This includes the removal of he from all + * of its pins and to disable the hyperedge. Noze, in contrast to removeEdge, this + * function removes hyperedge from all its pins in parallel. + * + * NOTE, this function is not thread-safe and should only be called in a single-threaded + * setting. + */ + void removeLargeEdge(const HyperedgeID he) + { ASSERT(edgeIsEnabled(he), "Hyperedge" << he << "is disabled"); const size_t incidence_array_start = hyperedge(he).firstEntry(); const size_t incidence_array_end = hyperedge(he).firstInvalidEntry(); @@ -809,7 +788,8 @@ class StaticHypergraph { /*! * Restores a large hyperedge previously removed from the hypergraph. */ - void restoreLargeEdge(const HyperedgeID& he) { + void restoreLargeEdge(const HyperedgeID &he) + { ASSERT(!edgeIsEnabled(he), "Hyperedge" << he << "is enabled"); enableHyperedge(he); const size_t incidence_array_start = hyperedge(he).firstEntry(); @@ -820,28 +800,33 @@ class StaticHypergraph { }); } - parallel::scalable_vector removeSinglePinAndParallelHyperedges() { + parallel::scalable_vector removeSinglePinAndParallelHyperedges() + { throw NonSupportedOperationException( - "removeSinglePinAndParallelHyperedges() is not supported in static hypergraph"); - return { }; + "removeSinglePinAndParallelHyperedges() is not supported in static hypergraph"); + return {}; } - void restoreSinglePinAndParallelNets(const parallel::scalable_vector&) { + void + restoreSinglePinAndParallelNets(const parallel::scalable_vector &) + { throw NonSupportedOperationException( - "restoreSinglePinAndParallelNets(hes_to_restore) is not supported in static hypergraph"); + "restoreSinglePinAndParallelNets(hes_to_restore) is not supported in static " + "hypergraph"); } // ####################### Initialization / Reset Functions ####################### // ! Reset internal community information - void copyCommunityIDs(const parallel::scalable_vector& community_ids) { + void copyCommunityIDs(const parallel::scalable_vector &community_ids) + { ASSERT(community_ids.size() == UI64(_num_hypernodes)); - doParallelForAllNodes([&](const HypernodeID& hn) { - _community_ids[hn] = community_ids[hn]; - }); + doParallelForAllNodes( + [&](const HypernodeID &hn) { _community_ids[hn] = community_ids[hn]; }); } - void setCommunityIDs(ds::Clustering&& communities) { + void setCommunityIDs(ds::Clustering &&communities) + { ASSERT(communities.size() == initialNumNodes()); _community_ids = std::move(communities); } @@ -853,88 +838,102 @@ class StaticHypergraph { StaticHypergraph copy() const; // ! Reset internal data structure - void reset() { } + void reset() {} // ! Free internal data in parallel - void freeInternalData() { - if ( _num_hypernodes > 0 || _num_hyperedges > 0 ) { + void freeInternalData() + { + if(_num_hypernodes > 0 || _num_hyperedges > 0) + { freeTmpContractionBuffer(); } _num_hypernodes = 0; _num_hyperedges = 0; } - void freeTmpContractionBuffer() { - if ( _tmp_contraction_buffer ) { - delete(_tmp_contraction_buffer); + void freeTmpContractionBuffer() + { + if(_tmp_contraction_buffer) + { + delete (_tmp_contraction_buffer); _tmp_contraction_buffer = nullptr; } } - void memoryConsumption(utils::MemoryTreeNode* parent) const; + void memoryConsumption(utils::MemoryTreeNode *parent) const; - // ! Only for testing - bool verifyIncidenceArrayAndIncidentNets() { + // ! Only for testing + bool verifyIncidenceArrayAndIncidentNets() + { throw NonSupportedOperationException( - "verifyIncidenceArrayAndIncidentNets() not supported in static hypergraph"); + "verifyIncidenceArrayAndIncidentNets() not supported in static hypergraph"); return false; } - private: +private: friend class StaticHypergraphFactory; - template + template friend class CommunitySupport; - template + template friend class PartitionedHypergraph; // ####################### Hypernode Information ####################### // ! Accessor for hypernode-related information - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE const Hypernode& hypernode(const HypernodeID u) const { + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE const Hypernode &hypernode(const HypernodeID u) const + { ASSERT(u <= _num_hypernodes, "Hypernode" << u << "does not exist"); return _hypernodes[u]; } // ! To avoid code duplication we implement non-const version in terms of const version - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE Hypernode& hypernode(const HypernodeID u) { - return const_cast(static_cast(*this).hypernode(u)); + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE Hypernode &hypernode(const HypernodeID u) + { + return const_cast( + static_cast(*this).hypernode(u)); } - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE IteratorRange incident_nets_of(const HypernodeID u, - const size_t pos = 0) const { + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE IteratorRange + incident_nets_of(const HypernodeID u, const size_t pos = 0) const + { ASSERT(!hypernode(u).isDisabled(), "Hypernode" << u << "is disabled"); - const Hypernode& hn = hypernode(u); + const Hypernode &hn = hypernode(u); return IteratorRange( - _incident_nets.cbegin() + hn.firstEntry() + pos, - _incident_nets.cbegin() + hn.firstInvalidEntry()); + _incident_nets.cbegin() + hn.firstEntry() + pos, + _incident_nets.cbegin() + hn.firstInvalidEntry()); } // ####################### Hyperedge Information ####################### // ! Accessor for hyperedge-related information - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE const Hyperedge& hyperedge(const HyperedgeID e) const { + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE const Hyperedge &hyperedge(const HyperedgeID e) const + { ASSERT(e <= _num_hyperedges, "Hyperedge" << e << "does not exist"); return _hyperedges[e]; } // ! To avoid code duplication we implement non-const version in terms of const version - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE Hyperedge& hyperedge(const HyperedgeID e) { - return const_cast(static_cast(*this).hyperedge(e)); + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE Hyperedge &hyperedge(const HyperedgeID e) + { + return const_cast( + static_cast(*this).hyperedge(e)); } // ####################### Remove / Restore Hyperedges ####################### // ! Removes hyperedge e from the incident nets of vertex hn - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE void removeIncidentEdgeFromHypernode(const HyperedgeID e, - const HypernodeID u) { + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE void + removeIncidentEdgeFromHypernode(const HyperedgeID e, const HypernodeID u) + { using std::swap; ASSERT(!hypernode(u).isDisabled(), "Hypernode" << u << "is disabled"); - Hypernode& hn = hypernode(u); + Hypernode &hn = hypernode(u); size_t incident_nets_pos = hn.firstEntry(); - for ( ; incident_nets_pos < hn.firstInvalidEntry(); ++incident_nets_pos ) { - if ( _incident_nets[incident_nets_pos] == e ) { + for(; incident_nets_pos < hn.firstInvalidEntry(); ++incident_nets_pos) + { + if(_incident_nets[incident_nets_pos] == e) + { break; } } @@ -944,19 +943,23 @@ class StaticHypergraph { } // ! Inserts hyperedge he to incident nets array of vertex hn - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE void insertIncidentEdgeToHypernode(const HyperedgeID e, - const HypernodeID u) { + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE void + insertIncidentEdgeToHypernode(const HyperedgeID e, const HypernodeID u) + { using std::swap; - Hypernode& hn = hypernode(u); + Hypernode &hn = hypernode(u); ASSERT(!hn.isDisabled(), "Hypernode" << u << "is disabled"); HEAVY_REFINEMENT_ASSERT(std::count(_incident_nets.cbegin() + hn.firstEntry(), - _incident_nets.cbegin() + hn.firstInvalidEntry(), e) == 0, - "HN" << u << "is already connected to HE" << e); + _incident_nets.cbegin() + hn.firstInvalidEntry(), + e) == 0, + "HN" << u << "is already connected to HE" << e); const size_t incident_nets_start = hn.firstInvalidEntry(); const size_t incident_nets_end = hypernode(u + 1).firstEntry(); size_t incident_nets_pos = incident_nets_start; - for ( ; incident_nets_pos < incident_nets_end; ++incident_nets_pos ) { - if ( _incident_nets[incident_nets_pos] == e ) { + for(; incident_nets_pos < incident_nets_end; ++incident_nets_pos) + { + if(_incident_nets[incident_nets_pos] == e) + { break; } } @@ -966,10 +969,12 @@ class StaticHypergraph { } // ! Allocate the temporary contraction buffer - void allocateTmpContractionBuffer() { - if ( !_tmp_contraction_buffer ) { - _tmp_contraction_buffer = new TmpContractionBuffer( - _num_hypernodes, _num_hyperedges, _num_pins); + void allocateTmpContractionBuffer() + { + if(!_tmp_contraction_buffer) + { + _tmp_contraction_buffer = + new TmpContractionBuffer(_num_hypernodes, _num_hyperedges, _num_pins); } } @@ -1009,7 +1014,7 @@ class StaticHypergraph { // ! Data that is reused throughout the multilevel hierarchy // ! to contract the hypergraph and to prevent expensive allocations - TmpContractionBuffer* _tmp_contraction_buffer; + TmpContractionBuffer *_tmp_contraction_buffer; }; } // namespace ds diff --git a/mt-kahypar/datastructures/static_hypergraph_factory.cpp b/mt-kahypar/datastructures/static_hypergraph_factory.cpp index eb40e7d5d..9dcdb3bbf 100644 --- a/mt-kahypar/datastructures/static_hypergraph_factory.cpp +++ b/mt-kahypar/datastructures/static_hypergraph_factory.cpp @@ -35,132 +35,140 @@ namespace mt_kahypar::ds { - StaticHypergraph StaticHypergraphFactory::construct( - const HypernodeID num_hypernodes, - const HyperedgeID num_hyperedges, - const HyperedgeVector& edge_vector, - const HyperedgeWeight* hyperedge_weight, - const HypernodeWeight* hypernode_weight, - const bool stable_construction_of_incident_edges) { - StaticHypergraph hypergraph; - hypergraph._num_hypernodes = num_hypernodes; - hypergraph._num_hyperedges = num_hyperedges; - hypergraph._hypernodes.resize(num_hypernodes + 1); - hypergraph._hyperedges.resize(num_hyperedges + 1); - - ASSERT(edge_vector.size() == num_hyperedges); - - // Compute number of pins per hyperedge and number - // of incident nets per vertex - Counter num_pins_per_hyperedge(num_hyperedges, 0); - ThreadLocalCounter local_incident_nets_per_vertex(num_hypernodes, 0); - tbb::enumerable_thread_specific local_max_edge_size(UL(0)); - tbb::parallel_for(ID(0), num_hyperedges, [&](const size_t pos) { - Counter& num_incident_nets_per_vertex = local_incident_nets_per_vertex.local(); - num_pins_per_hyperedge[pos] = edge_vector[pos].size(); - local_max_edge_size.local() = std::max( - local_max_edge_size.local(), edge_vector[pos].size()); - for ( const HypernodeID& pin : edge_vector[pos] ) { - ASSERT(pin < num_hypernodes, V(pin) << V(num_hypernodes)); - ++num_incident_nets_per_vertex[pin]; - } - }); - hypergraph._max_edge_size = local_max_edge_size.combine( - [&](const size_t lhs, const size_t rhs) { - return std::max(lhs, rhs); - }); - - // We sum up the number of incident nets per vertex only thread local. - // To obtain the global number of incident nets per vertex, we iterate - // over each thread local counter and sum it up. - Counter num_incident_nets_per_vertex(num_hypernodes, 0); - for ( Counter& c : local_incident_nets_per_vertex ) { - tbb::parallel_for(ID(0), num_hypernodes, [&](const size_t pos) { - num_incident_nets_per_vertex[pos] += c[pos]; - }); +StaticHypergraph StaticHypergraphFactory::construct( + const HypernodeID num_hypernodes, const HyperedgeID num_hyperedges, + const HyperedgeVector &edge_vector, const HyperedgeWeight *hyperedge_weight, + const HypernodeWeight *hypernode_weight, + const bool stable_construction_of_incident_edges) +{ + StaticHypergraph hypergraph; + hypergraph._num_hypernodes = num_hypernodes; + hypergraph._num_hyperedges = num_hyperedges; + hypergraph._hypernodes.resize(num_hypernodes + 1); + hypergraph._hyperedges.resize(num_hyperedges + 1); + + ASSERT(edge_vector.size() == num_hyperedges); + + // Compute number of pins per hyperedge and number + // of incident nets per vertex + Counter num_pins_per_hyperedge(num_hyperedges, 0); + ThreadLocalCounter local_incident_nets_per_vertex(num_hypernodes, 0); + tbb::enumerable_thread_specific local_max_edge_size(UL(0)); + tbb::parallel_for(ID(0), num_hyperedges, [&](const size_t pos) { + Counter &num_incident_nets_per_vertex = local_incident_nets_per_vertex.local(); + num_pins_per_hyperedge[pos] = edge_vector[pos].size(); + local_max_edge_size.local() = + std::max(local_max_edge_size.local(), edge_vector[pos].size()); + for(const HypernodeID &pin : edge_vector[pos]) + { + ASSERT(pin < num_hypernodes, V(pin) << V(num_hypernodes)); + ++num_incident_nets_per_vertex[pin]; } - - // Compute prefix sum over the number of pins per hyperedge and the - // number of incident nets per vertex. The prefix sum is used than as - // start position for each hyperedge resp. hypernode in the incidence - // resp. incident nets array. - parallel::TBBPrefixSum pin_prefix_sum(num_pins_per_hyperedge); - parallel::TBBPrefixSum incident_net_prefix_sum(num_incident_nets_per_vertex); - tbb::parallel_invoke([&] { - tbb::parallel_scan(tbb::blocked_range( - UL(0), UI64(num_hyperedges)), pin_prefix_sum); - }, [&] { - tbb::parallel_scan(tbb::blocked_range( - UL(0), UI64(num_hypernodes)), incident_net_prefix_sum); + }); + hypergraph._max_edge_size = local_max_edge_size.combine( + [&](const size_t lhs, const size_t rhs) { return std::max(lhs, rhs); }); + + // We sum up the number of incident nets per vertex only thread local. + // To obtain the global number of incident nets per vertex, we iterate + // over each thread local counter and sum it up. + Counter num_incident_nets_per_vertex(num_hypernodes, 0); + for(Counter &c : local_incident_nets_per_vertex) + { + tbb::parallel_for(ID(0), num_hypernodes, [&](const size_t pos) { + num_incident_nets_per_vertex[pos] += c[pos]; }); + } - ASSERT(pin_prefix_sum.total_sum() == incident_net_prefix_sum.total_sum()); - hypergraph._num_pins = pin_prefix_sum.total_sum(); - hypergraph._total_degree = incident_net_prefix_sum.total_sum(); - hypergraph._incident_nets.resize(hypergraph._num_pins); - hypergraph._incidence_array.resize(hypergraph._num_pins); - - AtomicCounter incident_nets_position(num_hypernodes, - parallel::IntegralAtomicWrapper(0)); - - auto setup_hyperedges = [&] { - tbb::parallel_for(ID(0), num_hyperedges, [&](const size_t pos) { - StaticHypergraph::Hyperedge& hyperedge = hypergraph._hyperedges[pos]; - hyperedge.enable(); - hyperedge.setFirstEntry(pin_prefix_sum[pos]); - hyperedge.setSize(pin_prefix_sum.value(pos)); - if ( hyperedge_weight ) { - hyperedge.setWeight(hyperedge_weight[pos]); - } - - const HyperedgeID he = pos; - size_t incidence_array_pos = hyperedge.firstEntry(); - for ( const HypernodeID& pin : edge_vector[pos] ) { - ASSERT(incidence_array_pos < hyperedge.firstInvalidEntry()); - ASSERT(pin < num_hypernodes); - // Add pin to incidence array - hypergraph._incidence_array[incidence_array_pos++] = pin; - // Add hyperedge he as a incident net to pin - const size_t incident_nets_pos = incident_net_prefix_sum[pin] + incident_nets_position[pin]++; - ASSERT(incident_nets_pos < incident_net_prefix_sum[pin + 1]); - hypergraph._incident_nets[incident_nets_pos] = he; - } - }); - }; - - auto setup_hypernodes = [&] { - tbb::parallel_for(ID(0), num_hypernodes, [&](const size_t pos) { - StaticHypergraph::Hypernode& hypernode = hypergraph._hypernodes[pos]; - hypernode.enable(); - hypernode.setFirstEntry(incident_net_prefix_sum[pos]); - hypernode.setSize(incident_net_prefix_sum.value(pos)); - if ( hypernode_weight ) { - hypernode.setWeight(hypernode_weight[pos]); - } + // Compute prefix sum over the number of pins per hyperedge and the + // number of incident nets per vertex. The prefix sum is used than as + // start position for each hyperedge resp. hypernode in the incidence + // resp. incident nets array. + parallel::TBBPrefixSum pin_prefix_sum(num_pins_per_hyperedge); + parallel::TBBPrefixSum incident_net_prefix_sum(num_incident_nets_per_vertex); + tbb::parallel_invoke( + [&] { + tbb::parallel_scan(tbb::blocked_range(UL(0), UI64(num_hyperedges)), + pin_prefix_sum); + }, + [&] { + tbb::parallel_scan(tbb::blocked_range(UL(0), UI64(num_hypernodes)), + incident_net_prefix_sum); }); - }; - auto init_communities = [&] { - hypergraph._community_ids.resize(num_hypernodes, 0); - }; + ASSERT(pin_prefix_sum.total_sum() == incident_net_prefix_sum.total_sum()); + hypergraph._num_pins = pin_prefix_sum.total_sum(); + hypergraph._total_degree = incident_net_prefix_sum.total_sum(); + hypergraph._incident_nets.resize(hypergraph._num_pins); + hypergraph._incidence_array.resize(hypergraph._num_pins); - tbb::parallel_invoke(setup_hyperedges, setup_hypernodes, init_communities); + AtomicCounter incident_nets_position(num_hypernodes, + parallel::IntegralAtomicWrapper(0)); - if (stable_construction_of_incident_edges) { - // sort incident hyperedges of each node, so their ordering is independent of scheduling (and the same as a typical sequential implementation) - tbb::parallel_for(ID(0), num_hypernodes, [&](HypernodeID u) { - auto b = hypergraph._incident_nets.begin() + hypergraph.hypernode(u).firstEntry(); - auto e = hypergraph._incident_nets.begin() + hypergraph.hypernode(u).firstInvalidEntry(); - std::sort(b, e); - }); - } + auto setup_hyperedges = [&] { + tbb::parallel_for(ID(0), num_hyperedges, [&](const size_t pos) { + StaticHypergraph::Hyperedge &hyperedge = hypergraph._hyperedges[pos]; + hyperedge.enable(); + hyperedge.setFirstEntry(pin_prefix_sum[pos]); + hyperedge.setSize(pin_prefix_sum.value(pos)); + if(hyperedge_weight) + { + hyperedge.setWeight(hyperedge_weight[pos]); + } + + const HyperedgeID he = pos; + size_t incidence_array_pos = hyperedge.firstEntry(); + for(const HypernodeID &pin : edge_vector[pos]) + { + ASSERT(incidence_array_pos < hyperedge.firstInvalidEntry()); + ASSERT(pin < num_hypernodes); + // Add pin to incidence array + hypergraph._incidence_array[incidence_array_pos++] = pin; + // Add hyperedge he as a incident net to pin + const size_t incident_nets_pos = + incident_net_prefix_sum[pin] + incident_nets_position[pin]++; + ASSERT(incident_nets_pos < incident_net_prefix_sum[pin + 1]); + hypergraph._incident_nets[incident_nets_pos] = he; + } + }); + }; + + auto setup_hypernodes = [&] { + tbb::parallel_for(ID(0), num_hypernodes, [&](const size_t pos) { + StaticHypergraph::Hypernode &hypernode = hypergraph._hypernodes[pos]; + hypernode.enable(); + hypernode.setFirstEntry(incident_net_prefix_sum[pos]); + hypernode.setSize(incident_net_prefix_sum.value(pos)); + if(hypernode_weight) + { + hypernode.setWeight(hypernode_weight[pos]); + } + }); + }; + + auto init_communities = [&] { hypergraph._community_ids.resize(num_hypernodes, 0); }; - // Add Sentinels - hypergraph._hypernodes.back() = StaticHypergraph::Hypernode(hypergraph._incident_nets.size()); - hypergraph._hyperedges.back() = StaticHypergraph::Hyperedge(hypergraph._incidence_array.size()); + tbb::parallel_invoke(setup_hyperedges, setup_hypernodes, init_communities); - hypergraph.computeAndSetTotalNodeWeight(parallel_tag_t()); - return hypergraph; + if(stable_construction_of_incident_edges) + { + // sort incident hyperedges of each node, so their ordering is independent of + // scheduling (and the same as a typical sequential implementation) + tbb::parallel_for(ID(0), num_hypernodes, [&](HypernodeID u) { + auto b = hypergraph._incident_nets.begin() + hypergraph.hypernode(u).firstEntry(); + auto e = + hypergraph._incident_nets.begin() + hypergraph.hypernode(u).firstInvalidEntry(); + std::sort(b, e); + }); } + // Add Sentinels + hypergraph._hypernodes.back() = + StaticHypergraph::Hypernode(hypergraph._incident_nets.size()); + hypergraph._hyperedges.back() = + StaticHypergraph::Hyperedge(hypergraph._incidence_array.size()); + + hypergraph.computeAndSetTotalNodeWeight(parallel_tag_t()); + return hypergraph; +} + } \ No newline at end of file diff --git a/mt-kahypar/datastructures/static_hypergraph_factory.h b/mt-kahypar/datastructures/static_hypergraph_factory.h index d583035f6..75ece0409 100644 --- a/mt-kahypar/datastructures/static_hypergraph_factory.h +++ b/mt-kahypar/datastructures/static_hypergraph_factory.h @@ -33,31 +33,35 @@ #include "mt-kahypar/parallel/atomic_wrapper.h" #include "mt-kahypar/utils/exception.h" - namespace mt_kahypar::ds { -class StaticHypergraphFactory { +class StaticHypergraphFactory +{ - using HyperedgeVector = parallel::scalable_vector>; + using HyperedgeVector = + parallel::scalable_vector >; using Counter = parallel::scalable_vector; - using AtomicCounter = parallel::scalable_vector>; + using AtomicCounter = + parallel::scalable_vector >; using ThreadLocalCounter = tbb::enumerable_thread_specific; - public: - static StaticHypergraph construct(const HypernodeID num_hypernodes, - const HyperedgeID num_hyperedges, - const HyperedgeVector& edge_vector, - const HyperedgeWeight* hyperedge_weight = nullptr, - const HypernodeWeight* hypernode_weight = nullptr, - const bool stable_construction_of_incident_edges = false); +public: + static StaticHypergraph + construct(const HypernodeID num_hypernodes, const HyperedgeID num_hyperedges, + const HyperedgeVector &edge_vector, + const HyperedgeWeight *hyperedge_weight = nullptr, + const HypernodeWeight *hypernode_weight = nullptr, + const bool stable_construction_of_incident_edges = false); - static std::pair> compactify(const StaticHypergraph&) { + static std::pair > + compactify(const StaticHypergraph &) + { throw NonSupportedOperationException( - "Compactify not implemented for static hypergraph."); + "Compactify not implemented for static hypergraph."); } - private: - StaticHypergraphFactory() { } +private: + StaticHypergraphFactory() {} }; } // namespace mt_kahypar \ No newline at end of file diff --git a/mt-kahypar/datastructures/streaming_vector.h b/mt-kahypar/datastructures/streaming_vector.h index d2137670a..fafdc313a 100644 --- a/mt-kahypar/datastructures/streaming_vector.h +++ b/mt-kahypar/datastructures/streaming_vector.h @@ -51,45 +51,54 @@ namespace ds { * that the calling threads are all scheduled on an unique CPU. */ template -class StreamingVector { - static_assert(std::is_trivially_copyable::value, "Value must be trivially copyable"); +class StreamingVector +{ + static_assert(std::is_trivially_copyable::value, + "Value must be trivially copyable"); static constexpr bool debug = false; using Buffer = parallel::scalable_vector >; - public: +public: StreamingVector() : - _cpu_buffer(std::thread::hardware_concurrency()), - _prefix_sum(std::thread::hardware_concurrency()) { } + _cpu_buffer(std::thread::hardware_concurrency()), + _prefix_sum(std::thread::hardware_concurrency()) + { + } - StreamingVector(const StreamingVector&) = delete; - StreamingVector & operator= (const StreamingVector &) = delete; + StreamingVector(const StreamingVector &) = delete; + StreamingVector &operator=(const StreamingVector &) = delete; - StreamingVector(StreamingVector&& other) = default; - StreamingVector & operator= (StreamingVector &&) = default; + StreamingVector(StreamingVector &&other) = default; + StreamingVector &operator=(StreamingVector &&) = default; - template - void stream(Args&& ... args) { + template + void stream(Args &&...args) + { int cpu_id = THREAD_ID; _cpu_buffer[cpu_id].emplace_back(std::forward(args)...); } - parallel::scalable_vector copy_sequential() { + parallel::scalable_vector copy_sequential() + { parallel::scalable_vector values; size_t total_size = init_prefix_sum(); values.resize(total_size); - for (int cpu_id = 0; cpu_id < (int)_cpu_buffer.size(); ++cpu_id) { + for(int cpu_id = 0; cpu_id < (int)_cpu_buffer.size(); ++cpu_id) + { memcpy_from_cpu_buffer_to_destination(values, cpu_id, _prefix_sum[cpu_id]); } return values; } - parallel::scalable_vector copy_parallel() { + parallel::scalable_vector copy_parallel() + { parallel::scalable_vector values; size_t total_size = init_prefix_sum(); - if (total_size == 0) { + if(total_size == 0) + { return values; } values.resize(total_size); @@ -100,11 +109,13 @@ class StreamingVector { return values; } - size_t copy_parallel(parallel::scalable_vector& values) { + size_t copy_parallel(parallel::scalable_vector &values) + { const size_t size = init_prefix_sum(); // Resize if necassary - if ( size > values.size() ) { + if(size > values.size()) + { values.resize(size); } @@ -114,42 +125,48 @@ class StreamingVector { return size; } - const Value& value(const size_t cpu_id, const size_t idx) { + const Value &value(const size_t cpu_id, const size_t idx) + { ASSERT(cpu_id < _cpu_buffer.size()); ASSERT(idx < _cpu_buffer[cpu_id].size()); return _cpu_buffer[cpu_id][idx]; } - size_t num_buffers() const { - return _cpu_buffer.size(); - } + size_t num_buffers() const { return _cpu_buffer.size(); } - size_t size() const { + size_t size() const + { size_t size = 0; - for (size_t i = 0; i < _cpu_buffer.size(); ++i) { + for(size_t i = 0; i < _cpu_buffer.size(); ++i) + { size += _cpu_buffer[i].size(); } return size; } - size_t size(const size_t cpu_id) const { + size_t size(const size_t cpu_id) const + { ASSERT(cpu_id < _cpu_buffer.size()); return _cpu_buffer[cpu_id].size(); } - size_t prefix_sum(const size_t cpu_id) const { + size_t prefix_sum(const size_t cpu_id) const + { ASSERT(cpu_id < _prefix_sum.size()); return _prefix_sum[cpu_id]; } - void clear_sequential() { - for ( int cpu_id = 0; cpu_id < static_cast(_cpu_buffer.size()); ++cpu_id ) { + void clear_sequential() + { + for(int cpu_id = 0; cpu_id < static_cast(_cpu_buffer.size()); ++cpu_id) + { _cpu_buffer[cpu_id].clear(); } _prefix_sum.assign(_cpu_buffer.size(), 0); } - void clear_parallel() { + void clear_parallel() + { tbb::parallel_for(0, static_cast(_cpu_buffer.size()), [&](const int cpu_id) { parallel::scalable_vector tmp_value; _cpu_buffer[cpu_id] = std::move(tmp_value); @@ -157,22 +174,26 @@ class StreamingVector { _prefix_sum.assign(_cpu_buffer.size(), 0); } - private: - size_t init_prefix_sum() { +private: + size_t init_prefix_sum() + { size_t total_size = 0; - for (size_t i = 0; i < _cpu_buffer.size(); ++i) { + for(size_t i = 0; i < _cpu_buffer.size(); ++i) + { _prefix_sum[i] = total_size; total_size += _cpu_buffer[i].size(); } return total_size; } - void memcpy_from_cpu_buffer_to_destination(parallel::scalable_vector& destination, - const int cpu_id, - const size_t position) { + void + memcpy_from_cpu_buffer_to_destination(parallel::scalable_vector &destination, + const int cpu_id, const size_t position) + { DBG << "Copy buffer of cpu" << cpu_id << "of size" << _cpu_buffer[cpu_id].size() << "to position" << position << "in dest ( CPU =" << THREAD_ID << " )"; - if (_cpu_buffer[cpu_id].empty()) { + if(_cpu_buffer[cpu_id].empty()) + { return; } memcpy(destination.data() + position, _cpu_buffer[cpu_id].data(), @@ -182,5 +203,5 @@ class StreamingVector { Buffer _cpu_buffer; parallel::scalable_vector _prefix_sum; }; -} // namespace ds -} // namespace mt_kahypar +} // namespace ds +} // namespace mt_kahypar diff --git a/mt-kahypar/datastructures/thread_safe_fast_reset_flag_array.h b/mt-kahypar/datastructures/thread_safe_fast_reset_flag_array.h index 8a0fe25fe..b57d5f76e 100644 --- a/mt-kahypar/datastructures/thread_safe_fast_reset_flag_array.h +++ b/mt-kahypar/datastructures/thread_safe_fast_reset_flag_array.h @@ -41,103 +41,109 @@ namespace mt_kahypar { namespace ds { template -class ThreadSafeFastResetFlagArray { +class ThreadSafeFastResetFlagArray +{ public: explicit ThreadSafeFastResetFlagArray(const size_t size) : - _v(std::make_unique(size)), - _threshold(1), - _size(size) { + _v(std::make_unique(size)), _threshold(1), _size(size) + { initialize(); } - ThreadSafeFastResetFlagArray() : - _v(nullptr), - _threshold(1), - _size(0) { } + ThreadSafeFastResetFlagArray() : _v(nullptr), _threshold(1), _size(0) {} - ThreadSafeFastResetFlagArray(const ThreadSafeFastResetFlagArray&) = delete; - ThreadSafeFastResetFlagArray& operator= (const ThreadSafeFastResetFlagArray&) = delete; + ThreadSafeFastResetFlagArray(const ThreadSafeFastResetFlagArray &) = delete; + ThreadSafeFastResetFlagArray &operator=(const ThreadSafeFastResetFlagArray &) = delete; - ThreadSafeFastResetFlagArray(ThreadSafeFastResetFlagArray&&) = default; - ThreadSafeFastResetFlagArray& operator= (ThreadSafeFastResetFlagArray&&) = default; + ThreadSafeFastResetFlagArray(ThreadSafeFastResetFlagArray &&) = default; + ThreadSafeFastResetFlagArray &operator=(ThreadSafeFastResetFlagArray &&) = default; ~ThreadSafeFastResetFlagArray() = default; - void swap(ThreadSafeFastResetFlagArray& other) { + void swap(ThreadSafeFastResetFlagArray &other) + { using std::swap; swap(_v, other._v); swap(_threshold, other._threshold); } - bool operator[] (const size_t i) const { - return isSet(i); - } + bool operator[](const size_t i) const { return isSet(i); } // ! Changes value of entry i from false to true and returns true, if the value // ! hold on position i was false and was successfully set to true - bool compare_and_set_to_true(const size_t i) { + bool compare_and_set_to_true(const size_t i) + { Type expected = __atomic_load_n(&_v[i], __ATOMIC_RELAXED); Type desired = _threshold; - if ( expected != _threshold && - __atomic_compare_exchange(&_v[i], &expected, &desired, - false, __ATOMIC_ACQ_REL, __ATOMIC_RELAXED) ) { + if(expected != _threshold && + __atomic_compare_exchange(&_v[i], &expected, &desired, false, __ATOMIC_ACQ_REL, + __ATOMIC_RELAXED)) + { // Value was successfully set from false to true return true; - } else { + } + else + { // Either expected == _threshold or compare_exchange_strong failed, which means that // an other thread set _v[i] to true before. return false; } } - void set(const size_t i, const bool value) { + void set(const size_t i, const bool value) + { __atomic_store_n(&_v[i], value ? _threshold : 0, __ATOMIC_RELAXED); } - void setUnsafe(const size_t i, const bool value) { - _v[i] = value ? _threshold : 0; - } + void setUnsafe(const size_t i, const bool value) { _v[i] = value ? _threshold : 0; } - void reset() { - if (_threshold == std::numeric_limits::max()) { + void reset() + { + if(_threshold == std::numeric_limits::max()) + { initialize(); _threshold = 0; } ++_threshold; } - size_t size() const { - return _size; - } + size_t size() const { return _size; } - void setSize(const size_t size, const bool init = false) { + void setSize(const size_t size, const bool init = false) + { ASSERT(_v == nullptr, "Error"); _v = std::make_unique(size); _size = size; initialize(init); } - void resize(const size_t size, const bool init = false) { - if ( size > _size ) { - std::unique_ptr tmp_v = - std::make_unique(size); + void resize(const size_t size, const bool init = false) + { + if(size > _size) + { + std::unique_ptr tmp_v = std::make_unique(size); std::swap(_v, tmp_v); _size = size; initialize(init); - } else { + } + else + { _size = size; } } - private: - bool isSet(size_t i) const { +private: + bool isSet(size_t i) const + { return __atomic_load_n(&_v[i], __ATOMIC_RELAXED) == _threshold; } - void initialize(const bool init = false) { + void initialize(const bool init = false) + { const Type init_value = init ? _threshold : 0; - for ( size_t i = 0; i < _size; ++i ) { + for(size_t i = 0; i < _size; ++i) + { __atomic_store_n(&_v[i], init_value, __ATOMIC_RELAXED); } } @@ -148,9 +154,9 @@ class ThreadSafeFastResetFlagArray { }; template -void swap(ThreadSafeFastResetFlagArray& a, - ThreadSafeFastResetFlagArray& b) { +void swap(ThreadSafeFastResetFlagArray &a, ThreadSafeFastResetFlagArray &b) +{ a.swap(b); } -} // namespace ds -} // namespace mt_kahypar +} // namespace ds +} // namespace mt_kahypar diff --git a/mt-kahypar/definitions.h b/mt-kahypar/definitions.h index 9f0fc7e20..3aae017c5 100644 --- a/mt-kahypar/definitions.h +++ b/mt-kahypar/definitions.h @@ -32,92 +32,99 @@ #include "include/libmtkahypartypes.h" #include "mt-kahypar/macros.h" +#include "mt-kahypar/datastructures/delta_partitioned_graph.h" +#include "mt-kahypar/datastructures/delta_partitioned_hypergraph.h" #include "mt-kahypar/datastructures/dynamic_graph.h" #include "mt-kahypar/datastructures/dynamic_graph_factory.h" -#include "mt-kahypar/datastructures/static_graph.h" -#include "mt-kahypar/datastructures/static_graph_factory.h" -#include "mt-kahypar/datastructures/partitioned_graph.h" -#include "mt-kahypar/datastructures/delta_partitioned_graph.h" #include "mt-kahypar/datastructures/dynamic_hypergraph.h" #include "mt-kahypar/datastructures/dynamic_hypergraph_factory.h" +#include "mt-kahypar/datastructures/partitioned_graph.h" +#include "mt-kahypar/datastructures/partitioned_hypergraph.h" +#include "mt-kahypar/datastructures/static_graph.h" +#include "mt-kahypar/datastructures/static_graph_factory.h" #include "mt-kahypar/datastructures/static_hypergraph.h" #include "mt-kahypar/datastructures/static_hypergraph_factory.h" -#include "mt-kahypar/datastructures/partitioned_hypergraph.h" -#include "mt-kahypar/datastructures/delta_partitioned_hypergraph.h" namespace mt_kahypar { using StaticPartitionedGraph = ds::PartitionedGraph; using DynamicPartitionedGraph = ds::PartitionedGraph; -using StaticPartitionedHypergraph = ds::PartitionedHypergraph; -using DynamicPartitionedHypergraph = ds::PartitionedHypergraph; -using StaticSparsePartitionedHypergraph = ds::PartitionedHypergraph; - -struct StaticGraphTypeTraits : public kahypar::meta::PolicyBase { +using StaticPartitionedHypergraph = + ds::PartitionedHypergraph; +using DynamicPartitionedHypergraph = + ds::PartitionedHypergraph; +using StaticSparsePartitionedHypergraph = + ds::PartitionedHypergraph; + +struct StaticGraphTypeTraits : public kahypar::meta::PolicyBase +{ using Hypergraph = ds::StaticGraph; using PartitionedHypergraph = StaticPartitionedGraph; }; -struct DynamicGraphTypeTraits : public kahypar::meta::PolicyBase { +struct DynamicGraphTypeTraits : public kahypar::meta::PolicyBase +{ using Hypergraph = ds::DynamicGraph; using PartitionedHypergraph = DynamicPartitionedGraph; }; -struct StaticHypergraphTypeTraits : public kahypar::meta::PolicyBase { +struct StaticHypergraphTypeTraits : public kahypar::meta::PolicyBase +{ using Hypergraph = ds::StaticHypergraph; using PartitionedHypergraph = StaticPartitionedHypergraph; }; -struct DynamicHypergraphTypeTraits : public kahypar::meta::PolicyBase { +struct DynamicHypergraphTypeTraits : public kahypar::meta::PolicyBase +{ using Hypergraph = ds::DynamicHypergraph; using PartitionedHypergraph = DynamicPartitionedHypergraph; }; -struct LargeKHypergraphTypeTraits : public kahypar::meta::PolicyBase { +struct LargeKHypergraphTypeTraits : public kahypar::meta::PolicyBase +{ using Hypergraph = ds::StaticHypergraph; using PartitionedHypergraph = StaticSparsePartitionedHypergraph; }; -using TypeTraitsList = kahypar::meta::Typelist; +using TypeTraitsList = kahypar::meta::Typelist< + StaticHypergraphTypeTraits ENABLE_GRAPHS(COMMA StaticGraphTypeTraits) + ENABLE_HIGHEST_QUALITY(COMMA DynamicHypergraphTypeTraits) + ENABLE_HIGHEST_QUALITY_FOR_GRAPHS(COMMA DynamicGraphTypeTraits) + ENABLE_LARGE_K(COMMA LargeKHypergraphTypeTraits)>; -#define INSTANTIATE_FUNC_WITH_HYPERGRAPHS(FUNC) \ - template FUNC(ds::StaticHypergraph); \ - ENABLE_GRAPHS(template FUNC(ds::StaticGraph);) \ - ENABLE_HIGHEST_QUALITY(template FUNC(ds::DynamicHypergraph);) \ +#define INSTANTIATE_FUNC_WITH_HYPERGRAPHS(FUNC) \ + template FUNC(ds::StaticHypergraph); \ + ENABLE_GRAPHS(template FUNC(ds::StaticGraph);) \ + ENABLE_HIGHEST_QUALITY(template FUNC(ds::DynamicHypergraph);) \ ENABLE_HIGHEST_QUALITY_FOR_GRAPHS(template FUNC(ds::DynamicGraph);) -#define INSTANTIATE_CLASS_WITH_HYPERGRAPHS(C) \ - template class C; \ - ENABLE_GRAPHS(template class C;) \ - ENABLE_HIGHEST_QUALITY(template class C;) \ +#define INSTANTIATE_CLASS_WITH_HYPERGRAPHS(C) \ + template class C; \ + ENABLE_GRAPHS(template class C;) \ + ENABLE_HIGHEST_QUALITY(template class C;) \ ENABLE_HIGHEST_QUALITY_FOR_GRAPHS(template class C;) -#define INSTANTIATE_FUNC_WITH_PARTITIONED_HG(FUNC) \ - template FUNC(StaticPartitionedHypergraph); \ - ENABLE_GRAPHS(template FUNC(StaticPartitionedGraph);) \ - ENABLE_LARGE_K(template FUNC(StaticSparsePartitionedHypergraph);) \ - ENABLE_HIGHEST_QUALITY(template FUNC(DynamicPartitionedHypergraph);) \ +#define INSTANTIATE_FUNC_WITH_PARTITIONED_HG(FUNC) \ + template FUNC(StaticPartitionedHypergraph); \ + ENABLE_GRAPHS(template FUNC(StaticPartitionedGraph);) \ + ENABLE_LARGE_K(template FUNC(StaticSparsePartitionedHypergraph);) \ + ENABLE_HIGHEST_QUALITY(template FUNC(DynamicPartitionedHypergraph);) \ ENABLE_HIGHEST_QUALITY_FOR_GRAPHS(template FUNC(DynamicPartitionedGraph);) -#define INSTANTIATE_CLASS_WITH_PARTITIONED_HG(C) \ - template class C; \ - ENABLE_GRAPHS(template class C;) \ - ENABLE_LARGE_K(template class C;) \ - ENABLE_HIGHEST_QUALITY(template class C;) \ +#define INSTANTIATE_CLASS_WITH_PARTITIONED_HG(C) \ + template class C; \ + ENABLE_GRAPHS(template class C;) \ + ENABLE_LARGE_K(template class C;) \ + ENABLE_HIGHEST_QUALITY(template class C;) \ ENABLE_HIGHEST_QUALITY_FOR_GRAPHS(template class C;) -#define INSTANTIATE_CLASS_WITH_TYPE_TRAITS(C) \ - template class C; \ - ENABLE_GRAPHS(template class C;) \ - ENABLE_HIGHEST_QUALITY(template class C;) \ - ENABLE_HIGHEST_QUALITY_FOR_GRAPHS(template class C;) \ +#define INSTANTIATE_CLASS_WITH_TYPE_TRAITS(C) \ + template class C; \ + ENABLE_GRAPHS(template class C;) \ + ENABLE_HIGHEST_QUALITY(template class C;) \ + ENABLE_HIGHEST_QUALITY_FOR_GRAPHS(template class C;) \ ENABLE_LARGE_K(template class C;) - using HighResClockTimepoint = std::chrono::time_point; -} // namespace mt_kahypar +} // namespace mt_kahypar diff --git a/mt-kahypar/io/command_line_options.cpp b/mt-kahypar/io/command_line_options.cpp index f177d1644..b391b17e2 100644 --- a/mt-kahypar/io/command_line_options.cpp +++ b/mt-kahypar/io/command_line_options.cpp @@ -29,8 +29,8 @@ #include #ifdef _WIN32 -#include #include +#include #else #include #endif @@ -43,866 +43,1233 @@ namespace po = boost::program_options; namespace mt_kahypar { - namespace platform { - int getTerminalWidth() { - int columns = 0; - #if defined(_WIN32) - CONSOLE_SCREEN_BUFFER_INFO csbi; - GetConsoleScreenBufferInfo(GetStdHandle(STD_OUTPUT_HANDLE), &csbi); - columns = csbi.srWindow.Right - csbi.srWindow.Left + 1; - #else - struct winsize w = { }; - ioctl(0, TIOCGWINSZ, &w); - columns = w.ws_col; - #endif - return columns; - } +namespace platform { +int getTerminalWidth() +{ + int columns = 0; +#if defined(_WIN32) + CONSOLE_SCREEN_BUFFER_INFO csbi; + GetConsoleScreenBufferInfo(GetStdHandle(STD_OUTPUT_HANDLE), &csbi); + columns = csbi.srWindow.Right - csbi.srWindow.Left + 1; +#else + struct winsize w = {}; + ioctl(0, TIOCGWINSZ, &w); + columns = w.ws_col; +#endif + return columns; +} - int getProcessID() { - #if defined(_WIN32) - return _getpid(); - #else - return getpid(); - #endif - } - } // namespace platform - - po::options_description createGeneralOptionsDescription(Context& context, const int num_columns) { - po::options_description options("General Options", num_columns); - options.add_options() - ("help", "show help message") - ("deterministic", po::value(&context.partition.deterministic)->value_name("")->default_value(false), - "Enables deterministic mode.") - ("verbose,v", po::value(&context.partition.verbose_output)->value_name("")->default_value(true), - "Verbose main partitioning output") - ("fixed,f", - po::value(&context.partition.fixed_vertex_filename)->value_name(""), - "Fixed vertex filename") - ("write-partition-file", - po::value(&context.partition.write_partition_file)->value_name("")->default_value(false), - "If true, then partition output file is generated") - ("partition-output-folder", - po::value(&context.partition.graph_partition_output_folder)->value_name(""), - "Output folder for partition file") - ("mode,m", - po::value()->value_name("")->notifier( - [&](const std::string& mode) { - context.partition.mode = modeFromString(mode); - }), - "Partitioning mode: \n" - " - direct: direct k-way partitioning\n" - " - rb: recursive bipartitioning\n" - " - deep: deep multilevel partitioning") - ("input-file-format", - po::value()->value_name("")->notifier([&](const std::string& s) { - if (s == "hmetis") { - context.partition.file_format = FileFormat::hMetis; - } else if (s == "metis") { - context.partition.file_format = FileFormat::Metis; - } - }), - "Input file format: \n" - " - hmetis : hMETIS hypergraph file format \n" - " - metis : METIS graph file format") - ("instance-type", - po::value()->value_name("")->notifier([&](const std::string& type) { - context.partition.instance_type = instanceTypeFromString(type); - }), - "Instance Type: \n" - " - graph\n" - " - hypergraph") - ("preset-type", - po::value()->value_name("")->notifier([&](const std::string& type) { - context.partition.preset_type = presetTypeFromString(type); - }), - "Preset Types: \n" - " - deterministic\n" - " - large_k\n" - " - default\n" - " - quality\n" - " - highest_quality" - ) - ("seed", - po::value(&context.partition.seed)->value_name("")->default_value(0), - "Seed for random number generator") - ("num-vcycles", - po::value(&context.partition.num_vcycles)->value_name("")->default_value(0), - "Number of V-Cycles") - ("perform-parallel-recursion-in-deep-multilevel", - po::value(&context.partition.perform_parallel_recursion_in_deep_multilevel)->value_name("")->default_value(true), - "If true, then we perform parallel recursion within the deep multilevel scheme.") - ("smallest-maxnet-threshold", - po::value(&context.partition.smallest_large_he_size_threshold)->value_name(""), - "No hyperedge whose size is smaller than this threshold is removed in the large hyperedge removal step (see maxnet-removal-factor)") - ("maxnet-removal-factor", - po::value(&context.partition.large_hyperedge_size_threshold_factor)->value_name( - "")->default_value(0.01), - "Hyperedges larger than max(|V| * (this factor), p-smallest-maxnet-threshold) are removed before partitioning.") - ("maxnet-ignore", - po::value(&context.partition.ignore_hyperedge_size_threshold)->value_name( - "")->default_value(1000), - "Hyperedges larger than this threshold are ignored during partitioning.") - ("show-detailed-timings", - po::value(&context.partition.show_detailed_timings)->value_name("")->default_value(false), - "If true, shows detailed subtimings of each multilevel phase at the end of the partitioning process.") - ("show-detailed-clustering-timings", - po::value(&context.partition.show_detailed_clustering_timings)->value_name("")->default_value( - false), - "If true, shows detailed timings of each clustering iteration.") - ("measure-detailed-uncontraction-timings", - po::value(&context.partition.measure_detailed_uncontraction_timings)->value_name("")->default_value( - false), - "If true, measure and show detailed timings for n-level uncontraction.") - ("timings-output-depth", - po::value(&context.partition.timings_output_depth)->value_name(""), - "Number of levels shown in timing output") - ("show-memory-consumption", - po::value(&context.partition.show_memory_consumption)->value_name("")->default_value(false), - "If true, shows detailed information on how much memory was allocated and how memory was reused throughout partitioning.") - ("show-advanced-cut-analysis", - po::value(&context.partition.show_advanced_cut_analysis)->value_name("")->default_value(false), - "If true, calculates cut matrix, potential positive gain move matrix and connected cut hyperedge components after partitioning.") - ("enable-progress-bar", - po::value(&context.partition.enable_progress_bar)->value_name("")->default_value(false), - "If true, shows a progress bar during coarsening and refinement phase.") - ("time-limit", po::value(&context.partition.time_limit)->value_name(""), - "Time limit in seconds (currently not supported)") - ("sp-process,s", - po::value(&context.partition.sp_process_output)->value_name("")->default_value(false), - "Summarize partitioning results in RESULT line compatible with sqlplottools " - "(https://github.com/bingmann/sqlplottools)") - ("csv", po::value(&context.partition.csv_output)->value_name("")->default_value(false), - "Summarize results in CSV format") - ("algorithm-name", - po::value(&context.algorithm_name)->value_name("")->default_value("MT-KaHyPar"), - "An algorithm name to print into the summarized output (csv or sqlplottools). ") - ("part-weights", - po::value >(&context.partition.max_part_weights)->multitoken()->notifier( - [&](auto) { - context.partition.use_individual_part_weights = true; - }), - "Use the specified individual part weights instead of epsilon."); - return options; - } +int getProcessID() +{ +#if defined(_WIN32) + return _getpid(); +#else + return getpid(); +#endif +} +} // namespace platform - po::options_description createPreprocessingOptionsDescription(Context& context, const int num_columns) { - po::options_description options("Preprocessing Options", num_columns); - options.add_options() - ("p-stable-io", - po::value(&context.preprocessing.stable_construction_of_incident_edges)->value_name( - "")->default_value(false), - "If true, the incident edges of a vertex are sorted after construction, so that the hypergraph " - "data structure is independent of scheduling during construction.") - ("p-enable-community-detection", - po::value(&context.preprocessing.use_community_detection)->value_name("")->default_value(true), - "If true, community detection is used as preprocessing step to restrict contractions to densely coupled regions in coarsening phase") - ("p-disable-community-detection-on-mesh-graphs", - po::value(&context.preprocessing.disable_community_detection_for_mesh_graphs)->value_name("")->default_value(true), - "If true, community detection is dynamically disabled for mesh graphs (as it is not effective for this type of graphs).") - ("p-louvain-edge-weight-function", - po::value()->value_name("")->notifier( - [&](const std::string& type) { - context.preprocessing.community_detection.edge_weight_function = louvainEdgeWeightFromString( - type); - })->default_value("hybrid"), - "Louvain edge weight functions:\n" - "- hybrid\n" - "- uniform\n" - "- non_uniform\n" - "- degree") - ("p-max-louvain-pass-iterations", - po::value(&context.preprocessing.community_detection.max_pass_iterations)->value_name( - "")->default_value(5), - "Maximum number of iterations over all nodes of one louvain pass") - ("p-louvain-low-memory-contraction", - po::value(&context.preprocessing.community_detection.low_memory_contraction)->value_name( - "")->default_value(false), - "Maximum number of iterations over all nodes of one louvain pass") - ("p-louvain-min-vertex-move-fraction", - po::value(&context.preprocessing.community_detection.min_vertex_move_fraction)->value_name( - "")->default_value(0.01), - "Louvain pass terminates if less than that fraction of nodes moves during a pass") - ("p-vertex-degree-sampling-threshold", - po::value(&context.preprocessing.community_detection.vertex_degree_sampling_threshold)->value_name( - "")->default_value(std::numeric_limits::max()), - "If set, then neighbors of a vertex are sampled during rating if its degree is greater than this threshold.") - ("p-num-sub-rounds", - po::value(&context.preprocessing.community_detection.num_sub_rounds_deterministic)->value_name( - "")->default_value(16), - "Number of sub-rounds used for deterministic community detection in preprocessing."); - return options; - } +po::options_description createGeneralOptionsDescription(Context &context, + const int num_columns) +{ + po::options_description options("General Options", num_columns); + options.add_options()("help", "show help message")( + "deterministic", + po::value(&context.partition.deterministic) + ->value_name("") + ->default_value(false), + "Enables deterministic mode.")("verbose,v", + po::value(&context.partition.verbose_output) + ->value_name("") + ->default_value(true), + "Verbose main partitioning output")( + "fixed,f", + po::value(&context.partition.fixed_vertex_filename) + ->value_name(""), + "Fixed vertex filename")("write-partition-file", + po::value(&context.partition.write_partition_file) + ->value_name("") + ->default_value(false), + "If true, then partition output file is generated")( + "partition-output-folder", + po::value(&context.partition.graph_partition_output_folder) + ->value_name(""), + "Output folder for partition file")("mode,m", + po::value() + ->value_name("") + ->notifier([&](const std::string &mode) { + context.partition.mode = + modeFromString(mode); + }), + "Partitioning mode: \n" + " - direct: direct k-way partitioning\n" + " - rb: recursive bipartitioning\n" + " - deep: deep multilevel partitioning")( + "input-file-format", + po::value() + ->value_name("") + ->notifier([&](const std::string &s) { + if(s == "hmetis") + { + context.partition.file_format = FileFormat::hMetis; + } + else if(s == "metis") + { + context.partition.file_format = FileFormat::Metis; + } + }), + "Input file format: \n" + " - hmetis : hMETIS hypergraph file format \n" + " - metis : METIS graph file format")( + "instance-type", + po::value() + ->value_name("") + ->notifier([&](const std::string &type) { + context.partition.instance_type = instanceTypeFromString(type); + }), + "Instance Type: \n" + " - graph\n" + " - hypergraph")("preset-type", + po::value() + ->value_name("") + ->notifier([&](const std::string &type) { + context.partition.preset_type = presetTypeFromString(type); + }), + "Preset Types: \n" + " - deterministic\n" + " - large_k\n" + " - default\n" + " - quality\n" + " - highest_quality")( + "seed", + po::value(&context.partition.seed)->value_name("")->default_value(0), + "Seed for random number generator")( + "num-vcycles", + po::value(&context.partition.num_vcycles) + ->value_name("") + ->default_value(0), + "Number of V-Cycles")( + "perform-parallel-recursion-in-deep-multilevel", + po::value(&context.partition.perform_parallel_recursion_in_deep_multilevel) + ->value_name("") + ->default_value(true), + "If true, then we perform parallel recursion within the deep multilevel scheme.")( + "smallest-maxnet-threshold", + po::value(&context.partition.smallest_large_he_size_threshold) + ->value_name(""), + "No hyperedge whose size is smaller than this threshold is removed in the large " + "hyperedge removal step (see maxnet-removal-factor)")( + "maxnet-removal-factor", + po::value(&context.partition.large_hyperedge_size_threshold_factor) + ->value_name("") + ->default_value(0.01), + "Hyperedges larger than max(|V| * (this factor), p-smallest-maxnet-threshold) are " + "removed before partitioning.")( + "maxnet-ignore", + po::value(&context.partition.ignore_hyperedge_size_threshold) + ->value_name("") + ->default_value(1000), + "Hyperedges larger than this threshold are ignored during partitioning.")( + "show-detailed-timings", + po::value(&context.partition.show_detailed_timings) + ->value_name("") + ->default_value(false), + "If true, shows detailed subtimings of each multilevel phase at the end of the " + "partitioning process.")( + "show-detailed-clustering-timings", + po::value(&context.partition.show_detailed_clustering_timings) + ->value_name("") + ->default_value(false), + "If true, shows detailed timings of each clustering iteration.")( + "measure-detailed-uncontraction-timings", + po::value(&context.partition.measure_detailed_uncontraction_timings) + ->value_name("") + ->default_value(false), + "If true, measure and show detailed timings for n-level uncontraction.")( + "timings-output-depth", + po::value(&context.partition.timings_output_depth)->value_name(""), + "Number of levels shown in timing output")( + "show-memory-consumption", + po::value(&context.partition.show_memory_consumption) + ->value_name("") + ->default_value(false), + "If true, shows detailed information on how much memory was allocated and how " + "memory was reused throughout partitioning.")( + "show-advanced-cut-analysis", + po::value(&context.partition.show_advanced_cut_analysis) + ->value_name("") + ->default_value(false), + "If true, calculates cut matrix, potential positive gain move matrix and connected " + "cut hyperedge components after partitioning.")( + "enable-progress-bar", + po::value(&context.partition.enable_progress_bar) + ->value_name("") + ->default_value(false), + "If true, shows a progress bar during coarsening and refinement phase.")( + "time-limit", po::value(&context.partition.time_limit)->value_name(""), + "Time limit in seconds (currently not supported)")( + "sp-process,s", + po::value(&context.partition.sp_process_output) + ->value_name("") + ->default_value(false), + "Summarize partitioning results in RESULT line compatible with sqlplottools " + "(https://github.com/bingmann/sqlplottools)")( + "csv", + po::value(&context.partition.csv_output) + ->value_name("") + ->default_value(false), + "Summarize results in CSV format")( + "algorithm-name", + po::value(&context.algorithm_name) + ->value_name("") + ->default_value("MT-KaHyPar"), + "An algorithm name to print into the summarized output (csv or sqlplottools). ")( + "part-weights", + po::value >(&context.partition.max_part_weights) + ->multitoken() + ->notifier([&](auto) { context.partition.use_individual_part_weights = true; }), + "Use the specified individual part weights instead of epsilon."); + return options; +} - po::options_description createCoarseningOptionsDescription(Context& context, - const int num_columns) { - po::options_description options("Coarsening Options", num_columns); - options.add_options() - ("c-type", - po::value()->value_name("")->notifier( - [&](const std::string& ctype) { - context.coarsening.algorithm = mt_kahypar::coarseningAlgorithmFromString(ctype); - })->default_value("multilevel_coarsener"), - "Coarsening Algorithm:\n" - " - multilevel_coarsener" - " - nlevel_coarsener" - " - deterministic_multilevel_coarsener" - ) - ("c-use-adaptive-edge-size", - po::value(&context.coarsening.use_adaptive_edge_size)->value_name("")->default_value(true), - "If true, the rating function uses the number of distinct cluster IDs of a net as edge size rather\n" - "than its original size during multilevel coarsing") - ("c-s", - po::value(&context.coarsening.max_allowed_weight_multiplier)->value_name( - "")->default_value(1), - "The maximum weight of a vertex in the coarsest hypergraph H is:\n" - "(s * w(H)) / (t * k)\n") - ("c-t", - po::value(&context.coarsening.contraction_limit_multiplier)->value_name( - "")->default_value(160), - "Coarsening stops when there are no more than t * k hypernodes left") - ("c-deep-t", - po::value(&context.coarsening.deep_ml_contraction_limit_multiplier)->value_name(""), - "Deep multilevel performs coarsening until 2 * deep-t hypernodes are left for bipartitioning calls") - ("c-min-shrink-factor", - po::value(&context.coarsening.minimum_shrink_factor)->value_name("")->default_value(1.01), - "Minimum factor a hypergraph must shrink in a multilevel pass. Otherwise, we terminate coarsening phase.") - ("c-max-shrink-factor", - po::value(&context.coarsening.maximum_shrink_factor)->value_name("")->default_value(2.5), - "Maximum factor a hypergraph is allowed to shrink in a clustering pass") - ("c-rating-score", - po::value()->value_name("")->notifier( - [&](const std::string& rating_score) { - context.coarsening.rating.rating_function = - mt_kahypar::ratingFunctionFromString(rating_score); - })->default_value("heavy_edge"), - "Rating function used to calculate scores for vertex pairs:\n" - #ifdef KAHYPAR_ENABLE_EXPERIMENTAL_FEATURES - "- sameness\n" - #endif - "- heavy_edge") - ("c-rating-heavy-node-penalty", - po::value()->value_name("")->notifier( - [&](const std::string& penalty) { - context.coarsening.rating.heavy_node_penalty_policy = - heavyNodePenaltyFromString(penalty); - })->default_value("no_penalty"), - "Penalty function to discourage heavy vertices:\n" - #ifdef KAHYPAR_ENABLE_EXPERIMENTAL_FEATURES - "- multiplicative\n" - "- edge_frequency_penalty\n" - #endif - "- no_penalty") - ("c-rating-acceptance-criterion", - po::value()->value_name("")->notifier( - [&](const std::string& crit) { - context.coarsening.rating.acceptance_policy = - acceptanceCriterionFromString(crit); - })->default_value("best_prefer_unmatched"), - "Acceptance/Tiebreaking criterion for contraction partners having the same score:\n" - #ifdef KAHYPAR_ENABLE_EXPERIMENTAL_FEATURES - "- best\n" - #endif - "- best_prefer_unmatched") - ("c-vertex-degree-sampling-threshold", - po::value(&context.coarsening.vertex_degree_sampling_threshold)->value_name( - "")->default_value(std::numeric_limits::max()), - "If set, then neighbors of a vertex are sampled during rating if its degree is greater than this threshold.") - ("c-num-sub-rounds", - po::value(&context.coarsening.num_sub_rounds_deterministic)->value_name( - "")->default_value(16), - "Number of sub-rounds used for deterministic coarsening."); - return options; - } +po::options_description createPreprocessingOptionsDescription(Context &context, + const int num_columns) +{ + po::options_description options("Preprocessing Options", num_columns); + options.add_options()( + "p-stable-io", + po::value(&context.preprocessing.stable_construction_of_incident_edges) + ->value_name("") + ->default_value(false), + "If true, the incident edges of a vertex are sorted after construction, so that " + "the hypergraph " + "data structure is independent of scheduling during construction.")( + "p-enable-community-detection", + po::value(&context.preprocessing.use_community_detection) + ->value_name("") + ->default_value(true), + "If true, community detection is used as preprocessing step to restrict " + "contractions to densely coupled regions in coarsening phase")( + "p-disable-community-detection-on-mesh-graphs", + po::value(&context.preprocessing.disable_community_detection_for_mesh_graphs) + ->value_name("") + ->default_value(true), + "If true, community detection is dynamically disabled for mesh graphs (as it is " + "not effective for this type of graphs).")( + "p-louvain-edge-weight-function", + po::value() + ->value_name("") + ->notifier([&](const std::string &type) { + context.preprocessing.community_detection.edge_weight_function = + louvainEdgeWeightFromString(type); + }) + ->default_value("hybrid"), + "Louvain edge weight functions:\n" + "- hybrid\n" + "- uniform\n" + "- non_uniform\n" + "- degree")( + "p-max-louvain-pass-iterations", + po::value(&context.preprocessing.community_detection.max_pass_iterations) + ->value_name("") + ->default_value(5), + "Maximum number of iterations over all nodes of one louvain pass")( + "p-louvain-low-memory-contraction", + po::value(&context.preprocessing.community_detection.low_memory_contraction) + ->value_name("") + ->default_value(false), + "Maximum number of iterations over all nodes of one louvain pass")( + "p-louvain-min-vertex-move-fraction", + po::value( + &context.preprocessing.community_detection.min_vertex_move_fraction) + ->value_name("") + ->default_value(0.01), + "Louvain pass terminates if less than that fraction of nodes moves during a pass")( + "p-vertex-degree-sampling-threshold", + po::value( + &context.preprocessing.community_detection.vertex_degree_sampling_threshold) + ->value_name("") + ->default_value(std::numeric_limits::max()), + "If set, then neighbors of a vertex are sampled during rating if its degree is " + "greater than this threshold.")( + "p-num-sub-rounds", + po::value( + &context.preprocessing.community_detection.num_sub_rounds_deterministic) + ->value_name("") + ->default_value(16), + "Number of sub-rounds used for deterministic community detection in " + "preprocessing."); + return options; +} - po::options_description createRefinementOptionsDescription(Context& context, - const int num_columns, - const bool initial_partitioning) { - po::options_description options("Refinement Options", num_columns); - options.add_options() - ((initial_partitioning ? "i-r-refine-until-no-improvement" : "r-refine-until-no-improvement"), - po::value((!initial_partitioning ? &context.refinement.refine_until_no_improvement : - &context.initial_partitioning.refinement.refine_until_no_improvement))->value_name( - "")->default_value(false), - "Executes all refinement algorithms as long as they find an improvement on the current partition.") - ((initial_partitioning ? "i-r-relative-improvement-threshold" : "r-relative-improvement-threshold"), - po::value((!initial_partitioning ? &context.refinement.relative_improvement_threshold : - &context.initial_partitioning.refinement.relative_improvement_threshold))->value_name( - "")->default_value(0.0), - "If the relative improvement during a refinement pass is less than this threshold, than refinement is aborted.") - (( initial_partitioning ? "i-r-max-batch-size" : "r-max-batch-size"), - po::value((!initial_partitioning ? &context.refinement.max_batch_size : - &context.initial_partitioning.refinement.max_batch_size))->value_name("")->default_value(1000), - "Maximum size of an uncontraction batch (n-Level Partitioner).") - (( initial_partitioning ? "i-r-min-border-vertices-per-thread" : "r-min-border-vertices-per-thread"), - po::value((!initial_partitioning ? &context.refinement.min_border_vertices_per_thread : - &context.initial_partitioning.refinement.min_border_vertices_per_thread))->value_name("")->default_value(0), - "Minimum number of border vertices per thread with which we perform a localized search (n-Level Partitioner).") - ((initial_partitioning ? "i-r-lp-type" : "r-lp-type"), - po::value()->value_name("")->notifier( - [&, initial_partitioning](const std::string& type) { - if (initial_partitioning) { - context.initial_partitioning.refinement.label_propagation.algorithm = - labelPropagationAlgorithmFromString(type); - } else { - context.refinement.label_propagation.algorithm = - labelPropagationAlgorithmFromString(type); - } - })->default_value("label_propagation"), - "Label Propagation Algorithm:\n" - "- label_propagation\n" - "- deterministic\n" - "- do_nothing") - ((initial_partitioning ? "i-r-lp-maximum-iterations" : "r-lp-maximum-iterations"), - po::value((!initial_partitioning ? &context.refinement.label_propagation.maximum_iterations : - &context.initial_partitioning.refinement.label_propagation.maximum_iterations))->value_name( - "")->default_value(5), - "Maximum number of label propagation rounds") - ((initial_partitioning ? "i-r-sync-lp-sub-rounds" : "r-sync-lp-sub-rounds"), - po::value((!initial_partitioning ? &context.refinement.deterministic_refinement.num_sub_rounds_sync_lp : - &context.initial_partitioning.refinement.deterministic_refinement.num_sub_rounds_sync_lp))->value_name( - "")->default_value(5), - "Number of sub-rounds for deterministic synchronous label propagation") - ((initial_partitioning ? "i-r-sync-lp-active-nodeset" : "r-sync-lp-active-nodeset"), - po::value((!initial_partitioning ? &context.refinement.deterministic_refinement.use_active_node_set : - &context.initial_partitioning.refinement.deterministic_refinement.use_active_node_set))->value_name( - "")->default_value(true), - "Use active nodeset in synchronous label propagation") - ((initial_partitioning ? "i-r-lp-rebalancing" : "r-lp-rebalancing"), - po::value((!initial_partitioning ? &context.refinement.label_propagation.rebalancing : - &context.initial_partitioning.refinement.label_propagation.rebalancing))->value_name( - "")->default_value(true), - "If true, then zero gain moves are only performed if they improve the balance of the solution (only in label propagation)") - ((initial_partitioning ? "i-r-lp-unconstrained" : "r-lp-unconstrained"), - po::value((!initial_partitioning ? &context.refinement.label_propagation.unconstrained : - &context.initial_partitioning.refinement.label_propagation.unconstrained))->value_name( - "")->default_value(false), - "If true, then unconstrained label propagation (including rebalancing) is used.") - ((initial_partitioning ? "i-r-lp-he-size-activation-threshold" : "r-lp-he-size-activation-threshold"), - po::value( - (!initial_partitioning ? &context.refinement.label_propagation.hyperedge_size_activation_threshold - : - &context.initial_partitioning.refinement.label_propagation.hyperedge_size_activation_threshold))->value_name( - "")->default_value(100), - "LP refiner activates only neighbors of moved vertices that are part of hyperedges with a size less than this threshold") - ((initial_partitioning ? "i-r-lp-relative-improvement-threshold" : "r-lp-relative-improvement-threshold"), - po::value((!initial_partitioning ? &context.refinement.label_propagation.relative_improvement_threshold : - &context.initial_partitioning.refinement.label_propagation.relative_improvement_threshold))->value_name( - "")->default_value(-1.0), - "Relative improvement threshold for label propagation.") - ((initial_partitioning ? "i-r-fm-type" : "r-fm-type"), - po::value()->value_name("")->notifier( - [&, initial_partitioning](const std::string& type) { - if (initial_partitioning) { - context.initial_partitioning.refinement.fm.algorithm = fmAlgorithmFromString(type); - } else { - context.refinement.fm.algorithm = fmAlgorithmFromString(type); - } - })->default_value("kway_fm"), - "FM Algorithm:\n" - "- kway_fm\n" - "- unconstrained_fm\n" - "- do_nothing") - ((initial_partitioning ? "i-r-fm-multitry-rounds" : "r-fm-multitry-rounds"), - po::value((initial_partitioning ? &context.initial_partitioning.refinement.fm.multitry_rounds : - &context.refinement.fm.multitry_rounds))->value_name("")->default_value(10), - "Number of FM rounds within one level of the multilevel hierarchy.") - ((initial_partitioning ? "i-r-fm-seed-nodes" : "r-fm-seed-nodes"), - po::value((initial_partitioning ? &context.initial_partitioning.refinement.fm.num_seed_nodes : - &context.refinement.fm.num_seed_nodes))->value_name("")->default_value(25), - "Number of nodes to start the 'highly localized FM' with.") - (( initial_partitioning ? "i-r-fm-rollback-parallel" : "r-fm-rollback-parallel"), - po::value((initial_partitioning ? &context.initial_partitioning.refinement.fm.rollback_parallel : - &context.refinement.fm.rollback_parallel)) - ->value_name("")->default_value(true), - "Perform gain and balance recalculation, and reverting to best prefix in parallel.") - (( initial_partitioning ? - "i-r-fm-iter-moves-on-recalc" : +po::options_description createCoarseningOptionsDescription(Context &context, + const int num_columns) +{ + po::options_description options("Coarsening Options", num_columns); + options.add_options()("c-type", + po::value() + ->value_name("") + ->notifier([&](const std::string &ctype) { + context.coarsening.algorithm = + mt_kahypar::coarseningAlgorithmFromString(ctype); + }) + ->default_value("multilevel_coarsener"), + "Coarsening Algorithm:\n" + " - multilevel_coarsener" + " - nlevel_coarsener" + " - deterministic_multilevel_coarsener")( + "c-use-adaptive-edge-size", + po::value(&context.coarsening.use_adaptive_edge_size) + ->value_name("") + ->default_value(true), + "If true, the rating function uses the number of distinct cluster IDs of a net as " + "edge size rather\n" + "than its original size during multilevel coarsing")( + "c-s", + po::value(&context.coarsening.max_allowed_weight_multiplier) + ->value_name("") + ->default_value(1), + "The maximum weight of a vertex in the coarsest hypergraph H is:\n" + "(s * w(H)) / (t * k)\n")( + "c-t", + po::value(&context.coarsening.contraction_limit_multiplier) + ->value_name("") + ->default_value(160), + "Coarsening stops when there are no more than t * k hypernodes left")( + "c-deep-t", + po::value(&context.coarsening.deep_ml_contraction_limit_multiplier) + ->value_name(""), + "Deep multilevel performs coarsening until 2 * deep-t hypernodes are left for " + "bipartitioning calls")( + "c-min-shrink-factor", + po::value(&context.coarsening.minimum_shrink_factor) + ->value_name("") + ->default_value(1.01), + "Minimum factor a hypergraph must shrink in a multilevel pass. Otherwise, we " + "terminate coarsening phase.")( + "c-max-shrink-factor", + po::value(&context.coarsening.maximum_shrink_factor) + ->value_name("") + ->default_value(2.5), + "Maximum factor a hypergraph is allowed to shrink in a clustering pass")( + "c-rating-score", + po::value() + ->value_name("") + ->notifier([&](const std::string &rating_score) { + context.coarsening.rating.rating_function = + mt_kahypar::ratingFunctionFromString(rating_score); + }) + ->default_value("heavy_edge"), + "Rating function used to calculate scores for vertex pairs:\n" +#ifdef KAHYPAR_ENABLE_EXPERIMENTAL_FEATURES + "- sameness\n" +#endif + "- heavy_edge")("c-rating-heavy-node-penalty", + po::value() + ->value_name("") + ->notifier([&](const std::string &penalty) { + context.coarsening.rating.heavy_node_penalty_policy = + heavyNodePenaltyFromString(penalty); + }) + ->default_value("no_penalty"), + "Penalty function to discourage heavy vertices:\n" +#ifdef KAHYPAR_ENABLE_EXPERIMENTAL_FEATURES + "- multiplicative\n" + "- edge_frequency_penalty\n" +#endif + "- no_penalty")( + "c-rating-acceptance-criterion", + po::value() + ->value_name("") + ->notifier([&](const std::string &crit) { + context.coarsening.rating.acceptance_policy = + acceptanceCriterionFromString(crit); + }) + ->default_value("best_prefer_unmatched"), + "Acceptance/Tiebreaking criterion for contraction partners having the same score:\n" +#ifdef KAHYPAR_ENABLE_EXPERIMENTAL_FEATURES + "- best\n" +#endif + "- best_prefer_unmatched")( + "c-vertex-degree-sampling-threshold", + po::value(&context.coarsening.vertex_degree_sampling_threshold) + ->value_name("") + ->default_value(std::numeric_limits::max()), + "If set, then neighbors of a vertex are sampled during rating if its degree is " + "greater than this threshold.")( + "c-num-sub-rounds", + po::value(&context.coarsening.num_sub_rounds_deterministic) + ->value_name("") + ->default_value(16), + "Number of sub-rounds used for deterministic coarsening."); + return options; +} + +po::options_description +createRefinementOptionsDescription(Context &context, const int num_columns, + const bool initial_partitioning) +{ + po::options_description options("Refinement Options", num_columns); + options.add_options()( + (initial_partitioning ? "i-r-refine-until-no-improvement" : + "r-refine-until-no-improvement"), + po::value( + (!initial_partitioning ? + &context.refinement.refine_until_no_improvement : + &context.initial_partitioning.refinement.refine_until_no_improvement)) + ->value_name("") + ->default_value(false), + "Executes all refinement algorithms as long as they find an improvement on the " + "current partition.")( + (initial_partitioning ? "i-r-relative-improvement-threshold" : + "r-relative-improvement-threshold"), + po::value( + (!initial_partitioning ? + &context.refinement.relative_improvement_threshold : + &context.initial_partitioning.refinement.relative_improvement_threshold)) + ->value_name("") + ->default_value(0.0), + "If the relative improvement during a refinement pass is less than this threshold, " + "than refinement is aborted.")( + (initial_partitioning ? "i-r-max-batch-size" : "r-max-batch-size"), + po::value((!initial_partitioning ? + &context.refinement.max_batch_size : + &context.initial_partitioning.refinement.max_batch_size)) + ->value_name("") + ->default_value(1000), + "Maximum size of an uncontraction batch (n-Level Partitioner).")( + (initial_partitioning ? "i-r-min-border-vertices-per-thread" : + "r-min-border-vertices-per-thread"), + po::value( + (!initial_partitioning ? + &context.refinement.min_border_vertices_per_thread : + &context.initial_partitioning.refinement.min_border_vertices_per_thread)) + ->value_name("") + ->default_value(0), + "Minimum number of border vertices per thread with which we perform a localized " + "search (n-Level Partitioner).")( + (initial_partitioning ? "i-r-lp-type" : "r-lp-type"), + po::value() + ->value_name("") + ->notifier([&, initial_partitioning](const std::string &type) { + if(initial_partitioning) + { + context.initial_partitioning.refinement.label_propagation.algorithm = + labelPropagationAlgorithmFromString(type); + } + else + { + context.refinement.label_propagation.algorithm = + labelPropagationAlgorithmFromString(type); + } + }) + ->default_value("label_propagation"), + "Label Propagation Algorithm:\n" + "- label_propagation\n" + "- deterministic\n" + "- do_nothing")( + (initial_partitioning ? "i-r-lp-maximum-iterations" : "r-lp-maximum-iterations"), + po::value((!initial_partitioning ? + &context.refinement.label_propagation.maximum_iterations : + &context.initial_partitioning.refinement.label_propagation + .maximum_iterations)) + ->value_name("") + ->default_value(5), + "Maximum number of label propagation rounds")( + (initial_partitioning ? "i-r-sync-lp-sub-rounds" : "r-sync-lp-sub-rounds"), + po::value( + (!initial_partitioning ? + &context.refinement.deterministic_refinement.num_sub_rounds_sync_lp : + &context.initial_partitioning.refinement.deterministic_refinement + .num_sub_rounds_sync_lp)) + ->value_name("") + ->default_value(5), + "Number of sub-rounds for deterministic synchronous label propagation")( + (initial_partitioning ? "i-r-sync-lp-active-nodeset" : "r-sync-lp-active-nodeset"), + po::value( + (!initial_partitioning ? + &context.refinement.deterministic_refinement.use_active_node_set : + &context.initial_partitioning.refinement.deterministic_refinement + .use_active_node_set)) + ->value_name("") + ->default_value(true), + "Use active nodeset in synchronous label propagation")( + (initial_partitioning ? "i-r-lp-rebalancing" : "r-lp-rebalancing"), + po::value( + (!initial_partitioning ? + &context.refinement.label_propagation.rebalancing : + &context.initial_partitioning.refinement.label_propagation.rebalancing)) + ->value_name("") + ->default_value(true), + "If true, then zero gain moves are only performed if they improve the balance of " + "the solution (only in label propagation)")( + (initial_partitioning ? "i-r-lp-unconstrained" : "r-lp-unconstrained"), + po::value( + (!initial_partitioning ? + &context.refinement.label_propagation.unconstrained : + &context.initial_partitioning.refinement.label_propagation.unconstrained)) + ->value_name("") + ->default_value(false), + "If true, then unconstrained label propagation (including rebalancing) is used.")( + (initial_partitioning ? "i-r-lp-he-size-activation-threshold" : + "r-lp-he-size-activation-threshold"), + po::value( + (!initial_partitioning ? + &context.refinement.label_propagation.hyperedge_size_activation_threshold : + &context.initial_partitioning.refinement.label_propagation + .hyperedge_size_activation_threshold)) + ->value_name("") + ->default_value(100), + "LP refiner activates only neighbors of moved vertices that are part of hyperedges " + "with a size less than this threshold")( + (initial_partitioning ? "i-r-lp-relative-improvement-threshold" : + "r-lp-relative-improvement-threshold"), + po::value( + (!initial_partitioning ? + &context.refinement.label_propagation.relative_improvement_threshold : + &context.initial_partitioning.refinement.label_propagation + .relative_improvement_threshold)) + ->value_name("") + ->default_value(-1.0), + "Relative improvement threshold for label propagation.")( + (initial_partitioning ? "i-r-fm-type" : "r-fm-type"), + po::value() + ->value_name("") + ->notifier([&, initial_partitioning](const std::string &type) { + if(initial_partitioning) + { + context.initial_partitioning.refinement.fm.algorithm = + fmAlgorithmFromString(type); + } + else + { + context.refinement.fm.algorithm = fmAlgorithmFromString(type); + } + }) + ->default_value("kway_fm"), + "FM Algorithm:\n" + "- kway_fm\n" + "- unconstrained_fm\n" + "- do_nothing")( + (initial_partitioning ? "i-r-fm-multitry-rounds" : "r-fm-multitry-rounds"), + po::value((initial_partitioning ? + &context.initial_partitioning.refinement.fm.multitry_rounds : + &context.refinement.fm.multitry_rounds)) + ->value_name("") + ->default_value(10), + "Number of FM rounds within one level of the multilevel hierarchy.")( + (initial_partitioning ? "i-r-fm-seed-nodes" : "r-fm-seed-nodes"), + po::value((initial_partitioning ? + &context.initial_partitioning.refinement.fm.num_seed_nodes : + &context.refinement.fm.num_seed_nodes)) + ->value_name("") + ->default_value(25), + "Number of nodes to start the 'highly localized FM' with.")( + (initial_partitioning ? "i-r-fm-rollback-parallel" : "r-fm-rollback-parallel"), + po::value((initial_partitioning ? + &context.initial_partitioning.refinement.fm.rollback_parallel : + &context.refinement.fm.rollback_parallel)) + ->value_name("") + ->default_value(true), + "Perform gain and balance recalculation, and reverting to best prefix in " + "parallel.")( + (initial_partitioning ? "i-r-fm-iter-moves-on-recalc" : "r-fm-iter-moves-on-recalc"), - po::value((initial_partitioning ? - &context.initial_partitioning.refinement.fm.iter_moves_on_recalc : - &context.refinement.fm.iter_moves_on_recalc)) - ->value_name("")->default_value(false), - "Touch only incident hyperedges of moved vertices for parallel gain recalculation.") - ((initial_partitioning ? "i-r-fm-rollback-balance-violation-factor" - : "r-fm-rollback-balance-violation-factor"), - po::value((initial_partitioning - ? &context.initial_partitioning.refinement.fm.rollback_balance_violation_factor : - &context.refinement.fm.rollback_balance_violation_factor))->value_name( - "")->default_value(1.25), - "Used to relax or disable the balance constraint during the rollback phase of parallel FM." - "Set to 0 for disabling. Set to a value > 1.0 to multiply epsilon with this value.") - ((initial_partitioning ? "i-r-fm-min-improvement" : "r-fm-min-improvement"), - po::value((initial_partitioning ? &context.initial_partitioning.refinement.fm.min_improvement : - &context.refinement.fm.min_improvement))->value_name("")->default_value(-1.0), - "Min improvement for FM (default disabled)") - ((initial_partitioning ? "i-r-fm-release-nodes" : "r-fm-release-nodes"), - po::value((initial_partitioning ? &context.initial_partitioning.refinement.fm.release_nodes : - &context.refinement.fm.release_nodes))->value_name("")->default_value(true), - "FM releases nodes that weren't moved, so they might be found by another search.") - ((initial_partitioning ? "i-r-fm-threshold-border-node-inclusion" : "r-fm-threshold-border-node-inclusion"), - po::value((initial_partitioning ? &context.initial_partitioning.refinement.fm.treshold_border_node_inclusion : - &context.refinement.fm.treshold_border_node_inclusion))->value_name("")->default_value(0.75), - "Threshold for block-internal incident weight when deciding whether to include border nodes for rebalancing estimation.") - ((initial_partitioning ? "i-r-fm-unconstrained-upper-bound" : "r-fm-unconstrained-upper-bound"), - po::value((initial_partitioning ? &context.initial_partitioning.refinement.fm.unconstrained_upper_bound : - &context.refinement.fm.unconstrained_upper_bound))->value_name("")->default_value(0.0), - "Still use upper limit for imbalance with unconstrained FM, expressed as a factor of the max part weight (default = 0 = no limit).") - ((initial_partitioning ? "i-r-fm-unconstrained-rounds" : "r-fm-unconstrained-rounds"), - po::value((initial_partitioning ? &context.initial_partitioning.refinement.fm.unconstrained_rounds : - &context.refinement.fm.unconstrained_rounds))->value_name("")->default_value(8), - "Unconstrained FM: Number of rounds that are unconstrained.") - ((initial_partitioning ? "i-r-fm-imbalance-penalty-min" : "r-fm-imbalance-penalty-min"), - po::value((initial_partitioning ? &context.initial_partitioning.refinement.fm.imbalance_penalty_min : - &context.refinement.fm.imbalance_penalty_min))->value_name("")->default_value(0.2), - "Unconstrained FM: Minimum (starting) penalty factor.") - ((initial_partitioning ? "i-r-fm-imbalance-penalty-max" : "r-fm-imbalance-penalty-max"), - po::value((initial_partitioning ? &context.initial_partitioning.refinement.fm.imbalance_penalty_max : - &context.refinement.fm.imbalance_penalty_max))->value_name("")->default_value(1.0), - "Unconstrained FM: Maximum (final) penalty factor.") - ((initial_partitioning ? "i-r-fm-unconstrained-upper-bound-min" : "r-fm-unconstrained-upper-bound-min"), - po::value((initial_partitioning ? &context.initial_partitioning.refinement.fm.unconstrained_upper_bound_min : - &context.refinement.fm.unconstrained_upper_bound_min))->value_name("")->default_value(0.0), - "Unconstrained FM: Minimum (final) upper bound (default = 0 = equal to start).") - ((initial_partitioning ? "i-r-fm-activate-unconstrained-dynamically" : "r-fm-activate-unconstrained-dynamically"), - po::value((initial_partitioning ? &context.initial_partitioning.refinement.fm.activate_unconstrained_dynamically : - &context.refinement.fm.activate_unconstrained_dynamically))->value_name("")->default_value(false), - "Decide dynamically (based on first two rounds) whether to use unconstrained FM.") - ((initial_partitioning ? "i-r-fm-penalty-for-activation-test" : "r-fm-penalty-for-activation-test"), - po::value((initial_partitioning ? &context.initial_partitioning.refinement.fm.penalty_for_activation_test : - &context.refinement.fm.penalty_for_activation_test))->value_name("")->default_value(0.5), - "If unconstrained FM is activated dynamically, determines the penalty factor used for the test round.") - ((initial_partitioning ? "i-r-fm-unconstrained-min-improvement" : "r-fm-unconstrained-min-improvement"), - po::value((initial_partitioning ? &context.initial_partitioning.refinement.fm.unconstrained_min_improvement : - &context.refinement.fm.unconstrained_min_improvement))->value_name("")->default_value(-1.0), - "Switch to constrained FM if relative improvement of unconstrained FM is below this treshold.") - ((initial_partitioning ? "i-r-fm-obey-minimal-parallelism" : "r-fm-obey-minimal-parallelism"), - po::value( - (initial_partitioning ? &context.initial_partitioning.refinement.fm.obey_minimal_parallelism : - &context.refinement.fm.obey_minimal_parallelism))->value_name("")->default_value(true), - "If true, then parallel FM refinement stops if more than a certain number of threads are finished.") - ((initial_partitioning ? "i-r-fm-time-limit-factor" : "r-fm-time-limit-factor"), - po::value((initial_partitioning ? &context.initial_partitioning.refinement.fm.time_limit_factor : - &context.refinement.fm.time_limit_factor))->value_name("")->default_value(0.25), - "If the FM time exceeds time_limit := k * factor * coarsening_time, than the FM config is switched into a light version." - "If the FM refiner exceeds 2 * time_limit, than the current multitry FM run is aborted and the algorithm proceeds to" - "the next finer level.") - ((initial_partitioning ? "i-r-use-global-fm" : "r-use-global-fm"), - po::value((!initial_partitioning ? &context.refinement.global_fm.use_global_fm : - &context.initial_partitioning.refinement.global_fm.use_global_fm))->value_name( - "")->default_value(false), - "If true, than we execute a globalized FM local search interleaved with the localized searches." - "Note, gobalized FM local searches are performed in multilevel style (not after each batch uncontraction)") - ((initial_partitioning ? "i-r-global-refine-until-no-improvement" : "r-global-refine-until-no-improvement"), - po::value((!initial_partitioning ? &context.refinement.global_fm.refine_until_no_improvement : - &context.initial_partitioning.refinement.global_fm.refine_until_no_improvement))->value_name( - "")->default_value(false), - "Executes a globalized FM local search as long as it finds an improvement on the current partition.") - ((initial_partitioning ? "i-r-global-fm-seed-nodes" : "r-global-fm-seed-nodes"), - po::value((initial_partitioning ? &context.initial_partitioning.refinement.global_fm.num_seed_nodes : - &context.refinement.global_fm.num_seed_nodes))->value_name("")->default_value(25), - "Number of nodes to start the 'highly localized FM' with during the globalized FM local search.") - ((initial_partitioning ? "i-r-global-fm-obey-minimal-parallelism" : "r-global-fm-obey-minimal-parallelism"), - po::value( - (initial_partitioning ? &context.initial_partitioning.refinement.global_fm.obey_minimal_parallelism : - &context.refinement.global_fm.obey_minimal_parallelism))->value_name("")->default_value(true), - "If true, then the globalized FM local search stops if more than a certain number of threads are finished.") - ((initial_partitioning ? "i-r-rebalancer-type" : "r-rebalancer-type"), - po::value()->value_name("")->notifier( - [&, initial_partitioning](const std::string& type) { - if (initial_partitioning) { - context.initial_partitioning.refinement.rebalancer = rebalancingAlgorithmFromString(type); - } else { - context.refinement.rebalancer = rebalancingAlgorithmFromString(type); - } - })->default_value("do_nothing"), - "Rebalancer Algorithm:\n" - "- simple_rebalancer\n" - "- advanced_rebalancer\n" - "- do_nothing"); - return options; - } + po::value( + (initial_partitioning ? + &context.initial_partitioning.refinement.fm.iter_moves_on_recalc : + &context.refinement.fm.iter_moves_on_recalc)) + ->value_name("") + ->default_value(false), + "Touch only incident hyperedges of moved vertices for parallel gain " + "recalculation.")( + (initial_partitioning ? "i-r-fm-rollback-balance-violation-factor" : + "r-fm-rollback-balance-violation-factor"), + po::value((initial_partitioning ? + &context.initial_partitioning.refinement.fm + .rollback_balance_violation_factor : + &context.refinement.fm.rollback_balance_violation_factor)) + ->value_name("") + ->default_value(1.25), + "Used to relax or disable the balance constraint during the rollback phase of " + "parallel FM." + "Set to 0 for disabling. Set to a value > 1.0 to multiply epsilon with this " + "value.")( + (initial_partitioning ? "i-r-fm-min-improvement" : "r-fm-min-improvement"), + po::value((initial_partitioning ? + &context.initial_partitioning.refinement.fm.min_improvement : + &context.refinement.fm.min_improvement)) + ->value_name("") + ->default_value(-1.0), + "Min improvement for FM (default disabled)")( + (initial_partitioning ? "i-r-fm-release-nodes" : "r-fm-release-nodes"), + po::value((initial_partitioning ? + &context.initial_partitioning.refinement.fm.release_nodes : + &context.refinement.fm.release_nodes)) + ->value_name("") + ->default_value(true), + "FM releases nodes that weren't moved, so they might be found by another search.")( + (initial_partitioning ? "i-r-fm-threshold-border-node-inclusion" : + "r-fm-threshold-border-node-inclusion"), + po::value((initial_partitioning ? + &context.initial_partitioning.refinement.fm + .treshold_border_node_inclusion : + &context.refinement.fm.treshold_border_node_inclusion)) + ->value_name("") + ->default_value(0.75), + "Threshold for block-internal incident weight when deciding whether to include " + "border nodes for rebalancing estimation.")( + (initial_partitioning ? "i-r-fm-unconstrained-upper-bound" : + "r-fm-unconstrained-upper-bound"), + po::value( + (initial_partitioning ? + &context.initial_partitioning.refinement.fm.unconstrained_upper_bound : + &context.refinement.fm.unconstrained_upper_bound)) + ->value_name("") + ->default_value(0.0), + "Still use upper limit for imbalance with unconstrained FM, expressed as a factor " + "of the max part weight (default = 0 = no limit).")( + (initial_partitioning ? "i-r-fm-unconstrained-rounds" : + "r-fm-unconstrained-rounds"), + po::value( + (initial_partitioning ? + &context.initial_partitioning.refinement.fm.unconstrained_rounds : + &context.refinement.fm.unconstrained_rounds)) + ->value_name("") + ->default_value(8), + "Unconstrained FM: Number of rounds that are unconstrained.")( + (initial_partitioning ? "i-r-fm-imbalance-penalty-min" : + "r-fm-imbalance-penalty-min"), + po::value( + (initial_partitioning ? + &context.initial_partitioning.refinement.fm.imbalance_penalty_min : + &context.refinement.fm.imbalance_penalty_min)) + ->value_name("") + ->default_value(0.2), + "Unconstrained FM: Minimum (starting) penalty factor.")( + (initial_partitioning ? "i-r-fm-imbalance-penalty-max" : + "r-fm-imbalance-penalty-max"), + po::value( + (initial_partitioning ? + &context.initial_partitioning.refinement.fm.imbalance_penalty_max : + &context.refinement.fm.imbalance_penalty_max)) + ->value_name("") + ->default_value(1.0), + "Unconstrained FM: Maximum (final) penalty factor.")( + (initial_partitioning ? "i-r-fm-unconstrained-upper-bound-min" : + "r-fm-unconstrained-upper-bound-min"), + po::value( + (initial_partitioning ? + &context.initial_partitioning.refinement.fm.unconstrained_upper_bound_min : + &context.refinement.fm.unconstrained_upper_bound_min)) + ->value_name("") + ->default_value(0.0), + "Unconstrained FM: Minimum (final) upper bound (default = 0 = equal to start).")( + (initial_partitioning ? "i-r-fm-activate-unconstrained-dynamically" : + "r-fm-activate-unconstrained-dynamically"), + po::value((initial_partitioning ? + &context.initial_partitioning.refinement.fm + .activate_unconstrained_dynamically : + &context.refinement.fm.activate_unconstrained_dynamically)) + ->value_name("") + ->default_value(false), + "Decide dynamically (based on first two rounds) whether to use unconstrained FM.")( + (initial_partitioning ? "i-r-fm-penalty-for-activation-test" : + "r-fm-penalty-for-activation-test"), + po::value( + (initial_partitioning ? + &context.initial_partitioning.refinement.fm.penalty_for_activation_test : + &context.refinement.fm.penalty_for_activation_test)) + ->value_name("") + ->default_value(0.5), + "If unconstrained FM is activated dynamically, determines the penalty factor used " + "for the test round.")( + (initial_partitioning ? "i-r-fm-unconstrained-min-improvement" : + "r-fm-unconstrained-min-improvement"), + po::value( + (initial_partitioning ? + &context.initial_partitioning.refinement.fm.unconstrained_min_improvement : + &context.refinement.fm.unconstrained_min_improvement)) + ->value_name("") + ->default_value(-1.0), + "Switch to constrained FM if relative improvement of unconstrained FM is below " + "this treshold.")( + (initial_partitioning ? "i-r-fm-obey-minimal-parallelism" : + "r-fm-obey-minimal-parallelism"), + po::value( + (initial_partitioning ? + &context.initial_partitioning.refinement.fm.obey_minimal_parallelism : + &context.refinement.fm.obey_minimal_parallelism)) + ->value_name("") + ->default_value(true), + "If true, then parallel FM refinement stops if more than a certain number of " + "threads are finished.")( + (initial_partitioning ? "i-r-fm-time-limit-factor" : "r-fm-time-limit-factor"), + po::value( + (initial_partitioning ? + &context.initial_partitioning.refinement.fm.time_limit_factor : + &context.refinement.fm.time_limit_factor)) + ->value_name("") + ->default_value(0.25), + "If the FM time exceeds time_limit := k * factor * coarsening_time, than the FM " + "config is switched into a light version." + "If the FM refiner exceeds 2 * time_limit, than the current multitry FM run is " + "aborted and the algorithm proceeds to" + "the next finer level.")( + (initial_partitioning ? "i-r-use-global-fm" : "r-use-global-fm"), + po::value( + (!initial_partitioning ? + &context.refinement.global_fm.use_global_fm : + &context.initial_partitioning.refinement.global_fm.use_global_fm)) + ->value_name("") + ->default_value(false), + "If true, than we execute a globalized FM local search interleaved with the " + "localized searches." + "Note, gobalized FM local searches are performed in multilevel style (not after " + "each batch uncontraction)")( + (initial_partitioning ? "i-r-global-refine-until-no-improvement" : + "r-global-refine-until-no-improvement"), + po::value((!initial_partitioning ? + &context.refinement.global_fm.refine_until_no_improvement : + &context.initial_partitioning.refinement.global_fm + .refine_until_no_improvement)) + ->value_name("") + ->default_value(false), + "Executes a globalized FM local search as long as it finds an improvement on the " + "current partition.")( + (initial_partitioning ? "i-r-global-fm-seed-nodes" : "r-global-fm-seed-nodes"), + po::value( + (initial_partitioning ? + &context.initial_partitioning.refinement.global_fm.num_seed_nodes : + &context.refinement.global_fm.num_seed_nodes)) + ->value_name("") + ->default_value(25), + "Number of nodes to start the 'highly localized FM' with during the globalized FM " + "local search.")( + (initial_partitioning ? "i-r-global-fm-obey-minimal-parallelism" : + "r-global-fm-obey-minimal-parallelism"), + po::value((initial_partitioning ? + &context.initial_partitioning.refinement.global_fm + .obey_minimal_parallelism : + &context.refinement.global_fm.obey_minimal_parallelism)) + ->value_name("") + ->default_value(true), + "If true, then the globalized FM local search stops if more than a certain number " + "of threads are finished.")( + (initial_partitioning ? "i-r-rebalancer-type" : "r-rebalancer-type"), + po::value() + ->value_name("") + ->notifier([&, initial_partitioning](const std::string &type) { + if(initial_partitioning) + { + context.initial_partitioning.refinement.rebalancer = + rebalancingAlgorithmFromString(type); + } + else + { + context.refinement.rebalancer = rebalancingAlgorithmFromString(type); + } + }) + ->default_value("do_nothing"), + "Rebalancer Algorithm:\n" + "- simple_rebalancer\n" + "- advanced_rebalancer\n" + "- do_nothing"); + return options; +} - po::options_description createFlowRefinementOptionsDescription(Context& context, - const int num_columns, - const bool initial_partitioning) { - po::options_description options("Initial Partitioning Options", num_columns); - options.add_options() - ((initial_partitioning ? "i-r-flow-algo" : "r-flow-algo"), - po::value()->value_name("")->notifier( - [&, initial_partitioning](const std::string& algo) { - if ( initial_partitioning ) { - context.initial_partitioning.refinement.flows.algorithm = flowAlgorithmFromString(algo); - } else { - context.refinement.flows.algorithm = flowAlgorithmFromString(algo); - } - })->default_value("do_nothing"), - "Flow Algorithms:\n" - "- do_nothing\n" - "- flow_cutter") - ((initial_partitioning ? "i-r-flow-parallel-search-multiplier" : "r-flow-parallel-search-multiplier"), - po::value((initial_partitioning ? &context.initial_partitioning.refinement.flows.parallel_searches_multiplier : - &context.refinement.flows.parallel_searches_multiplier))->value_name(""), - "Active block scheduling starts min(num_threads, mult * k) parallel searches") - ((initial_partitioning ? "i-r-flow-max-bfs-distance" : "r-flow-max-bfs-distance"), - po::value((initial_partitioning ? &context.initial_partitioning.refinement.flows.max_bfs_distance : - &context.refinement.flows.max_bfs_distance))->value_name(""), - "Flow problems are constructed via BFS search. The maximum BFS distance is the\n" - "maximum distance from a cut hyperedge to any vertex of the problem.") - ((initial_partitioning ? "i-r-flow-min-relative-improvement-per-round" : "r-flow-min-relative-improvement-per-round"), - po::value((initial_partitioning ? &context.initial_partitioning.refinement.flows.min_relative_improvement_per_round : - &context.refinement.flows.min_relative_improvement_per_round))->value_name(""), - "Minimum relative improvement per active block scheduling round. If improvement is smaller than flow algorithm terminates.") - ((initial_partitioning ? "i-r-flow-time-limit-factor" : "r-flow-time-limit-factor"), - po::value((initial_partitioning ? &context.initial_partitioning.refinement.flows.time_limit_factor : - &context.refinement.flows.time_limit_factor))->value_name(""), - "The time limit for each flow problem is time_limit_factor * average running time of all previous searches.") - ((initial_partitioning ? "i-r-flow-skip-small-cuts" : "r-flow-skip-small-cuts"), - po::value((initial_partitioning ? &context.initial_partitioning.refinement.flows.skip_small_cuts : - &context.refinement.flows.skip_small_cuts))->value_name(""), - "If true, than blocks with a cut <= 10 are not considered for refinement") - ((initial_partitioning ? "i-r-flow-skip-unpromising-blocks" : "r-flow-skip-unpromising-blocks"), - po::value((initial_partitioning ? &context.initial_partitioning.refinement.flows.skip_unpromising_blocks : - &context.refinement.flows.skip_unpromising_blocks))->value_name(""), - "If true, than blocks for which we never found an improvement are skipped") - ((initial_partitioning ? "i-r-flow-pierce-in-bulk" : "r-flow-pierce-in-bulk"), - po::value((initial_partitioning ? &context.initial_partitioning.refinement.flows.pierce_in_bulk : - &context.refinement.flows.pierce_in_bulk))->value_name(""), - "If true, then FlowCutter is accelerated by piercing multiple nodes at a time") - ((initial_partitioning ? "i-r-flow-scaling" : "r-flow-scaling"), - po::value((initial_partitioning ? &context.initial_partitioning.refinement.flows.alpha : - &context.refinement.flows.alpha))->value_name(""), - "Size constraint for flow problem: (1 + alpha * epsilon) * c(V) / k - c(V_1) (alpha = r-flow-scaling)") - ((initial_partitioning ? "i-r-flow-max-num-pins" : "r-flow-max-num-pins"), - po::value((initial_partitioning ? &context.initial_partitioning.refinement.flows.max_num_pins : - &context.refinement.flows.max_num_pins))->value_name(""), - "Maximum number of pins a flow problem is allowed to contain") - ((initial_partitioning ? "i-r-flow-find-most-balanced-cut" : "r-flow-find-most-balanced-cut"), - po::value((initial_partitioning ? &context.initial_partitioning.refinement.flows.find_most_balanced_cut : - &context.refinement.flows.find_most_balanced_cut))->value_name(""), - "If true, than hyperflowcutter searches for the most balanced minimum cut.") - ((initial_partitioning ? "i-r-flow-determine-distance-from-cut" : "r-flow-determine-distance-from-cut"), - po::value((initial_partitioning ? &context.initial_partitioning.refinement.flows.determine_distance_from_cut : - &context.refinement.flows.determine_distance_from_cut))->value_name(""), - "If true, than flow refiner determines distance of each node from cut which improves the piercing heuristic used in WHFC.") - ((initial_partitioning ? "i-r-flow-process-mapping-policy" : "r-flow-process-mapping-policy"), - po::value()->value_name("")->notifier( - [&, initial_partitioning](const std::string& policy) { - if ( initial_partitioning ) { - context.initial_partitioning.refinement.flows.steiner_tree_policy = - steinerTreeFlowValuePolicyFromString(policy); - } else { - context.refinement.flows.steiner_tree_policy = - steinerTreeFlowValuePolicyFromString(policy); - } - }), - "This option is only important for the Steiner tree metric. For flow-based refinement on hypergraphs, we cannot.\n" - "guarantee that the improvement found by solving the flow problem matches the exact improvement when we\n" - "applied on the hypergraph. However, we can either guarantee that improvement is an lower or upper bound for\n" - "the actual improvement. Therefore, the supported options are:\n" - "- lower_bound\n" - "- upper_bound"); - return options; - } +po::options_description +createFlowRefinementOptionsDescription(Context &context, const int num_columns, + const bool initial_partitioning) +{ + po::options_description options("Initial Partitioning Options", num_columns); + options.add_options()( + (initial_partitioning ? "i-r-flow-algo" : "r-flow-algo"), + po::value() + ->value_name("") + ->notifier([&, initial_partitioning](const std::string &algo) { + if(initial_partitioning) + { + context.initial_partitioning.refinement.flows.algorithm = + flowAlgorithmFromString(algo); + } + else + { + context.refinement.flows.algorithm = flowAlgorithmFromString(algo); + } + }) + ->default_value("do_nothing"), + "Flow Algorithms:\n" + "- do_nothing\n" + "- flow_cutter")( + (initial_partitioning ? "i-r-flow-parallel-search-multiplier" : + "r-flow-parallel-search-multiplier"), + po::value((initial_partitioning ? + &context.initial_partitioning.refinement.flows + .parallel_searches_multiplier : + &context.refinement.flows.parallel_searches_multiplier)) + ->value_name(""), + "Active block scheduling starts min(num_threads, mult * k) parallel searches")( + (initial_partitioning ? "i-r-flow-max-bfs-distance" : "r-flow-max-bfs-distance"), + po::value( + (initial_partitioning ? + &context.initial_partitioning.refinement.flows.max_bfs_distance : + &context.refinement.flows.max_bfs_distance)) + ->value_name(""), + "Flow problems are constructed via BFS search. The maximum BFS distance is the\n" + "maximum distance from a cut hyperedge to any vertex of the problem.")( + (initial_partitioning ? "i-r-flow-min-relative-improvement-per-round" : + "r-flow-min-relative-improvement-per-round"), + po::value( + (initial_partitioning ? + &context.initial_partitioning.refinement.flows + .min_relative_improvement_per_round : + &context.refinement.flows.min_relative_improvement_per_round)) + ->value_name(""), + "Minimum relative improvement per active block scheduling round. If improvement is " + "smaller than flow algorithm terminates.")( + (initial_partitioning ? "i-r-flow-time-limit-factor" : "r-flow-time-limit-factor"), + po::value( + (initial_partitioning ? + &context.initial_partitioning.refinement.flows.time_limit_factor : + &context.refinement.flows.time_limit_factor)) + ->value_name(""), + "The time limit for each flow problem is time_limit_factor * average running time " + "of all previous searches.")( + (initial_partitioning ? "i-r-flow-skip-small-cuts" : "r-flow-skip-small-cuts"), + po::value( + (initial_partitioning ? + &context.initial_partitioning.refinement.flows.skip_small_cuts : + &context.refinement.flows.skip_small_cuts)) + ->value_name(""), + "If true, than blocks with a cut <= 10 are not considered for refinement")( + (initial_partitioning ? "i-r-flow-skip-unpromising-blocks" : + "r-flow-skip-unpromising-blocks"), + po::value( + (initial_partitioning ? + &context.initial_partitioning.refinement.flows.skip_unpromising_blocks : + &context.refinement.flows.skip_unpromising_blocks)) + ->value_name(""), + "If true, than blocks for which we never found an improvement are skipped")( + (initial_partitioning ? "i-r-flow-pierce-in-bulk" : "r-flow-pierce-in-bulk"), + po::value((initial_partitioning ? + &context.initial_partitioning.refinement.flows.pierce_in_bulk : + &context.refinement.flows.pierce_in_bulk)) + ->value_name(""), + "If true, then FlowCutter is accelerated by piercing multiple nodes at a time")( + (initial_partitioning ? "i-r-flow-scaling" : "r-flow-scaling"), + po::value((initial_partitioning ? + &context.initial_partitioning.refinement.flows.alpha : + &context.refinement.flows.alpha)) + ->value_name(""), + "Size constraint for flow problem: (1 + alpha * epsilon) * c(V) / k - c(V_1) " + "(alpha = r-flow-scaling)")( + (initial_partitioning ? "i-r-flow-max-num-pins" : "r-flow-max-num-pins"), + po::value( + (initial_partitioning ? + &context.initial_partitioning.refinement.flows.max_num_pins : + &context.refinement.flows.max_num_pins)) + ->value_name(""), + "Maximum number of pins a flow problem is allowed to contain")( + (initial_partitioning ? "i-r-flow-find-most-balanced-cut" : + "r-flow-find-most-balanced-cut"), + po::value( + (initial_partitioning ? + &context.initial_partitioning.refinement.flows.find_most_balanced_cut : + &context.refinement.flows.find_most_balanced_cut)) + ->value_name(""), + "If true, than hyperflowcutter searches for the most balanced minimum cut.")( + (initial_partitioning ? "i-r-flow-determine-distance-from-cut" : + "r-flow-determine-distance-from-cut"), + po::value((initial_partitioning ? + &context.initial_partitioning.refinement.flows + .determine_distance_from_cut : + &context.refinement.flows.determine_distance_from_cut)) + ->value_name(""), + "If true, than flow refiner determines distance of each node from cut which " + "improves the piercing heuristic used in WHFC.")( + (initial_partitioning ? "i-r-flow-process-mapping-policy" : + "r-flow-process-mapping-policy"), + po::value() + ->value_name("") + ->notifier([&, initial_partitioning](const std::string &policy) { + if(initial_partitioning) + { + context.initial_partitioning.refinement.flows.steiner_tree_policy = + steinerTreeFlowValuePolicyFromString(policy); + } + else + { + context.refinement.flows.steiner_tree_policy = + steinerTreeFlowValuePolicyFromString(policy); + } + }), + "This option is only important for the Steiner tree metric. For flow-based " + "refinement on hypergraphs, we cannot.\n" + "guarantee that the improvement found by solving the flow problem matches the " + "exact improvement when we\n" + "applied on the hypergraph. However, we can either guarantee that improvement is " + "an lower or upper bound for\n" + "the actual improvement. Therefore, the supported options are:\n" + "- lower_bound\n" + "- upper_bound"); + return options; +} - po::options_description createInitialPartitioningOptionsDescription(Context& context, const int num_columns) { - po::options_description options("Initial Partitioning Options", num_columns); - options.add_options() - ("i-mode", - po::value()->value_name("")->notifier( - [&](const std::string& mode) { - context.initial_partitioning.mode = modeFromString(mode); - })->default_value("rb"), - "Mode of initial partitioning:\n" - "- direct\n" - "- deep\n" - "- rb") - ("i-enabled-ip-algos", - po::value >(&context.initial_partitioning.enabled_ip_algos)->multitoken(), - "Indicate which IP algorithms should be executed. E.g. i-enabled-ip-algos=1 1 0 1 0 1 1 1 0\n" - "indicates that\n" - " 1.) greedy_round_robin_fm (is executed)\n" - " 2.) greedy_global_fm (is executed)\n" - " 3.) greedy_sequential_fm (is NOT executed)\n" - " 4.) random (is executed)\n" - " 5.) bfs (is NOT executed)\n" - " 6.) label_propagation (is executed)\n" - " 7.) greedy_round_robin_max_net (is executed)\n" - " 8.) greedy_global_max_net (is executed)\n" - " 9.) greedy_sequential_max_net (is NOT executed)\n" - "Note vector must exactly contain 9 values otherwise partitioner will exit with failure") - ("i-runs", - po::value(&context.initial_partitioning.runs)->value_name("")->default_value(20), - "Number of runs for each bipartitioning algorithm.") - ("i-use-adaptive-ip-runs", - po::value(&context.initial_partitioning.use_adaptive_ip_runs)->value_name("")->default_value(true), - "If true, than each initial partitioner decides if it should further continue partitioning based on the" - "quality produced by itself compared to the quality of the other partitioners. If it is not likely that the partitioner" - "will produce a solution with a quality better than the current best, further runs of that partitioner are omitted.") - ("i-min-adaptive-ip-runs", - po::value(&context.initial_partitioning.min_adaptive_ip_runs)->value_name("")->default_value(5), - "If adaptive IP runs is enabled, than each initial partitioner performs minimum min_adaptive_ip_runs runs before\n" - "it decides if it should terminate.") - ("i-population-size", - po::value(&context.initial_partitioning.population_size)->value_name("")->default_value(16), - "Size of population of flat bipartitions to perform secondary FM refinement on in deterministic mode." - "Values < num threads are set to num threads. Does not affect behavior in non-deterministic mode.") - ("i-perform-refinement-on-best-partitions", - po::value(&context.initial_partitioning.perform_refinement_on_best_partitions)->value_name("")->default_value(false), - "If true, then we perform an additional refinement on the best thread local partitions after IP.") - ("i-fm-refinement-rounds", - po::value(&context.initial_partitioning.fm_refinment_rounds)->value_name("")->default_value(1), - "Maximum number of 2-way FM local searches on each bipartition produced by an initial partitioner.") - ("i-remove-degree-zero-hns-before-ip", - po::value(&context.initial_partitioning.remove_degree_zero_hns_before_ip)->value_name("")->default_value(true), - "If true, degree-zero vertices are removed before initial partitioning.") - ("i-lp-maximum-iterations", - po::value(&context.initial_partitioning.lp_maximum_iterations)->value_name( - "")->default_value(20), - "Maximum number of iterations of label propagation initial partitioner") - ("i-lp-initial-block-size", - po::value(&context.initial_partitioning.lp_initial_block_size)->value_name( - "")->default_value(5), - "Initial block size used for label propagation initial partitioner"); - options.add(createRefinementOptionsDescription(context, num_columns, true)); - options.add(createFlowRefinementOptionsDescription(context, num_columns, true)); - return options; - } +po::options_description createInitialPartitioningOptionsDescription(Context &context, + const int num_columns) +{ + po::options_description options("Initial Partitioning Options", num_columns); + options.add_options()("i-mode", + po::value() + ->value_name("") + ->notifier([&](const std::string &mode) { + context.initial_partitioning.mode = modeFromString(mode); + }) + ->default_value("rb"), + "Mode of initial partitioning:\n" + "- direct\n" + "- deep\n" + "- rb")( + "i-enabled-ip-algos", + po::value >(&context.initial_partitioning.enabled_ip_algos) + ->multitoken(), + "Indicate which IP algorithms should be executed. E.g. i-enabled-ip-algos=1 1 0 1 " + "0 1 1 1 0\n" + "indicates that\n" + " 1.) greedy_round_robin_fm (is executed)\n" + " 2.) greedy_global_fm (is executed)\n" + " 3.) greedy_sequential_fm (is NOT executed)\n" + " 4.) random (is executed)\n" + " 5.) bfs (is NOT executed)\n" + " 6.) label_propagation (is executed)\n" + " 7.) greedy_round_robin_max_net (is executed)\n" + " 8.) greedy_global_max_net (is executed)\n" + " 9.) greedy_sequential_max_net (is NOT executed)\n" + "Note vector must exactly contain 9 values otherwise partitioner will exit with " + "failure")("i-runs", + po::value(&context.initial_partitioning.runs) + ->value_name("") + ->default_value(20), + "Number of runs for each bipartitioning algorithm.")( + "i-use-adaptive-ip-runs", + po::value(&context.initial_partitioning.use_adaptive_ip_runs) + ->value_name("") + ->default_value(true), + "If true, than each initial partitioner decides if it should further continue " + "partitioning based on the" + "quality produced by itself compared to the quality of the other partitioners. If " + "it is not likely that the partitioner" + "will produce a solution with a quality better than the current best, further runs " + "of that partitioner are omitted.")( + "i-min-adaptive-ip-runs", + po::value(&context.initial_partitioning.min_adaptive_ip_runs) + ->value_name("") + ->default_value(5), + "If adaptive IP runs is enabled, than each initial partitioner performs minimum " + "min_adaptive_ip_runs runs before\n" + "it decides if it should terminate.")( + "i-population-size", + po::value(&context.initial_partitioning.population_size) + ->value_name("") + ->default_value(16), + "Size of population of flat bipartitions to perform secondary FM refinement on in " + "deterministic mode." + "Values < num threads are set to num threads. Does not affect behavior in " + "non-deterministic mode.")( + "i-perform-refinement-on-best-partitions", + po::value(&context.initial_partitioning.perform_refinement_on_best_partitions) + ->value_name("") + ->default_value(false), + "If true, then we perform an additional refinement on the best thread local " + "partitions after IP.")( + "i-fm-refinement-rounds", + po::value(&context.initial_partitioning.fm_refinment_rounds) + ->value_name("") + ->default_value(1), + "Maximum number of 2-way FM local searches on each bipartition produced by an " + "initial partitioner.")( + "i-remove-degree-zero-hns-before-ip", + po::value(&context.initial_partitioning.remove_degree_zero_hns_before_ip) + ->value_name("") + ->default_value(true), + "If true, degree-zero vertices are removed before initial partitioning.")( + "i-lp-maximum-iterations", + po::value(&context.initial_partitioning.lp_maximum_iterations) + ->value_name("") + ->default_value(20), + "Maximum number of iterations of label propagation initial partitioner")( + "i-lp-initial-block-size", + po::value(&context.initial_partitioning.lp_initial_block_size) + ->value_name("") + ->default_value(5), + "Initial block size used for label propagation initial partitioner"); + options.add(createRefinementOptionsDescription(context, num_columns, true)); + options.add(createFlowRefinementOptionsDescription(context, num_columns, true)); + return options; +} - po::options_description createMappingOptionsDescription(Context& context, - const int num_columns) { - po::options_description mapping_options("Mapping Options", num_columns); - mapping_options.add_options() - ("target-graph-file,g", - po::value(&context.mapping.target_graph_file)->value_name(""), - "Path to a target architecture graph in Metis file format.") - ("one-to-one-mapping-strategy", - po::value()->value_name("")->notifier( - [&](const std::string& strategy) { - context.mapping.strategy = oneToOneMappingStrategyFromString(strategy); - }), - "Strategy for solving the one-to-one mapping problem after initial partitioning.\n" - "Available strategies:\n" - " - greedy_mapping\n" - " - identity") - ("mapping-use-local-search", - po::value(&context.mapping.use_local_search)->value_name(""), - "If true, uses local search to improve the initial mapping.") - ("use-two-phase-approach", - po::value(&context.mapping.use_two_phase_approach)->value_name(""), - "If true, then we first compute a k-way partition via optimizing the connectivity metric.\n" - "Afterwards, each block of the partition is mapped onto a block of the target architecture graph.") - ("max-steiner-tree-size", - po::value(&context.mapping.max_steiner_tree_size)->value_name(""), - "We precompute all optimal steiner trees up to this size in the target graph.") - ("mapping-largest-he-fraction", - po::value(&context.mapping.largest_he_fraction)->value_name(""), - "If x% (x = process-mapping-largest-he-fraction) of the largest hyperedges covers more than y% of the pins\n" - "(y = process-mapping-min-pin-coverage), then we ignore hyperedges larger than the x%-percentile in\n" - "when counting adjacent blocks of a node.") - ("mapping-min-pin-coverage", - po::value(&context.mapping.min_pin_coverage_of_largest_hes)->value_name(""), - "If x% (x = process-mapping-largest-he-fraction) of the largest hyperedges covers more than y% of the pins\n" - "(y = process-mapping-min-pin-coverage), then we ignore hyperedges larger than the x%-percentile in\n" - "when counting adjacent blocks of a node."); - return mapping_options; - } +po::options_description createMappingOptionsDescription(Context &context, + const int num_columns) +{ + po::options_description mapping_options("Mapping Options", num_columns); + mapping_options.add_options()( + "target-graph-file,g", + po::value(&context.mapping.target_graph_file)->value_name(""), + "Path to a target architecture graph in Metis file format.")( + "one-to-one-mapping-strategy", + po::value() + ->value_name("") + ->notifier([&](const std::string &strategy) { + context.mapping.strategy = oneToOneMappingStrategyFromString(strategy); + }), + "Strategy for solving the one-to-one mapping problem after initial partitioning.\n" + "Available strategies:\n" + " - greedy_mapping\n" + " - identity")( + "mapping-use-local-search", + po::value(&context.mapping.use_local_search)->value_name(""), + "If true, uses local search to improve the initial mapping.")( + "use-two-phase-approach", + po::value(&context.mapping.use_two_phase_approach)->value_name(""), + "If true, then we first compute a k-way partition via optimizing the connectivity " + "metric.\n" + "Afterwards, each block of the partition is mapped onto a block of the target " + "architecture graph.")( + "max-steiner-tree-size", + po::value(&context.mapping.max_steiner_tree_size)->value_name(""), + "We precompute all optimal steiner trees up to this size in the target graph.")( + "mapping-largest-he-fraction", + po::value(&context.mapping.largest_he_fraction)->value_name(""), + "If x% (x = process-mapping-largest-he-fraction) of the largest hyperedges covers " + "more than y% of the pins\n" + "(y = process-mapping-min-pin-coverage), then we ignore hyperedges larger than the " + "x%-percentile in\n" + "when counting adjacent blocks of a node.")( + "mapping-min-pin-coverage", + po::value(&context.mapping.min_pin_coverage_of_largest_hes) + ->value_name(""), + "If x% (x = process-mapping-largest-he-fraction) of the largest hyperedges covers " + "more than y% of the pins\n" + "(y = process-mapping-min-pin-coverage), then we ignore hyperedges larger than the " + "x%-percentile in\n" + "when counting adjacent blocks of a node."); + return mapping_options; +} - po::options_description createSharedMemoryOptionsDescription(Context& context, - const int num_columns) { - po::options_description shared_memory_options("Shared Memory Options", num_columns); - shared_memory_options.add_options() - ("s-num-threads,t", - po::value()->value_name("")->notifier([&](const size_t num_threads) { - context.shared_memory.num_threads = num_threads; - context.shared_memory.original_num_threads = num_threads; - }), - "Number of Threads") - ("s-static-balancing-work-packages", - po::value(&context.shared_memory.static_balancing_work_packages)->value_name(""), - "Some sub-routines (sorting, shuffling) used in the deterministic presets employ static load balancing." - "This parameter sets the number of work packages, in order to achieve deterministic results across different numbers of threads." - "The default value is 128, and these sub-routines have little work, so there should rarely be a reason to change it. Max value is 256." - "It does not affect the non-deterministic configs, unless you activate one of the deterministic algorithms." - ) - ("s-use-localized-random-shuffle", - po::value(&context.shared_memory.use_localized_random_shuffle)->value_name(""), - "If true, localized parallel random shuffle is performed.") - ("s-shuffle-block-size", - po::value(&context.shared_memory.shuffle_block_size)->value_name(""), - "If we perform a localized random shuffle in parallel, we perform a parallel for over blocks of size" - "'shuffle_block_size' and shuffle them sequential."); - - return shared_memory_options; - } +po::options_description createSharedMemoryOptionsDescription(Context &context, + const int num_columns) +{ + po::options_description shared_memory_options("Shared Memory Options", num_columns); + shared_memory_options.add_options()("s-num-threads,t", + po::value() + ->value_name("") + ->notifier([&](const size_t num_threads) { + context.shared_memory.num_threads = + num_threads; + context.shared_memory.original_num_threads = + num_threads; + }), + "Number of Threads")( + "s-static-balancing-work-packages", + po::value(&context.shared_memory.static_balancing_work_packages) + ->value_name(""), + "Some sub-routines (sorting, shuffling) used in the deterministic presets employ " + "static load balancing." + "This parameter sets the number of work packages, in order to achieve " + "deterministic results across different numbers of threads." + "The default value is 128, and these sub-routines have little work, so there " + "should rarely be a reason to change it. Max value is 256." + "It does not affect the non-deterministic configs, unless you activate one of the " + "deterministic algorithms.")( + "s-use-localized-random-shuffle", + po::value(&context.shared_memory.use_localized_random_shuffle) + ->value_name(""), + "If true, localized parallel random shuffle is performed.")( + "s-shuffle-block-size", + po::value(&context.shared_memory.shuffle_block_size) + ->value_name(""), + "If we perform a localized random shuffle in parallel, we perform a parallel for " + "over blocks of size" + "'shuffle_block_size' and shuffle them sequential."); + return shared_memory_options; +} +void processCommandLineInput(Context &context, int argc, char *argv[]) +{ + const int num_columns = platform::getTerminalWidth(); - void processCommandLineInput(Context& context, int argc, char *argv[]) { - const int num_columns = platform::getTerminalWidth(); - - - po::options_description required_options("Required Options", num_columns); - required_options.add_options() - ("hypergraph,h", - po::value(&context.partition.graph_filename)->value_name("")->required(), - "Hypergraph filename") - ("blocks,k", - po::value(&context.partition.k)->value_name("")->required(), - "Number of blocks") - ("epsilon,e", - po::value(&context.partition.epsilon)->value_name("")->required(), - "Imbalance parameter epsilon") - ("objective,o", - po::value()->value_name("")->required()->notifier([&](const std::string& s) { - context.partition.objective = objectiveFromString(s); - }), - "Objective: \n" - " - cut : cut-net metric (FM only supports km1 metric) \n" - " - km1 : (lambda-1) metric\n" - " - soed: sum-of-external-degree metric\n" - " - steiner_tree: maps a (hyper)graph onto a graph and optimizes the Steiner tree metric"); - - po::options_description preset_options("Preset Options", num_columns); - preset_options.add_options() - ("preset,p", po::value(&context.partition.preset_file)->value_name(""), - "Context Presets (see config directory):\n" - " - "); - - po::options_description general_options = createGeneralOptionsDescription(context, num_columns); - - po::options_description preprocessing_options = - createPreprocessingOptionsDescription(context, num_columns); - po::options_description coarsening_options = - createCoarseningOptionsDescription(context, num_columns); - po::options_description initial_paritioning_options = - createInitialPartitioningOptionsDescription(context, num_columns); - po::options_description refinement_options = - createRefinementOptionsDescription(context, num_columns, false); - po::options_description flow_options = - createFlowRefinementOptionsDescription(context, num_columns, false); - po::options_description mapping_options = - createMappingOptionsDescription(context, num_columns); - po::options_description shared_memory_options = - createSharedMemoryOptionsDescription(context, num_columns); - - po::options_description cmd_line_options; - cmd_line_options - .add(required_options) - .add(preset_options) - .add(general_options) - .add(preprocessing_options) - .add(coarsening_options) - .add(initial_paritioning_options) - .add(refinement_options) - .add(flow_options) - .add(mapping_options) - .add(shared_memory_options); - - po::variables_map cmd_vm; - po::store(po::parse_command_line(argc, argv, cmd_line_options), cmd_vm); - - // placing vm.count("help") here prevents required attributes raising an - // error if only help was supplied - if (cmd_vm.count("help") != 0 || argc == 1) { - LOG << cmd_line_options; - exit(0); - } + po::options_description required_options("Required Options", num_columns); + required_options.add_options()("hypergraph,h", + po::value(&context.partition.graph_filename) + ->value_name("") + ->required(), + "Hypergraph filename")( + "blocks,k", + po::value(&context.partition.k)->value_name("")->required(), + "Number of blocks")( + "epsilon,e", + po::value(&context.partition.epsilon)->value_name("")->required(), + "Imbalance parameter epsilon")( + "objective,o", + po::value() + ->value_name("") + ->required() + ->notifier([&](const std::string &s) { + context.partition.objective = objectiveFromString(s); + }), + "Objective: \n" + " - cut : cut-net metric (FM only supports km1 metric) \n" + " - km1 : (lambda-1) metric\n" + " - soed: sum-of-external-degree metric\n" + " - steiner_tree: maps a (hyper)graph onto a graph and optimizes the Steiner tree " + "metric"); - po::notify(cmd_vm); + po::options_description preset_options("Preset Options", num_columns); + preset_options.add_options()( + "preset,p", + po::value(&context.partition.preset_file)->value_name(""), + "Context Presets (see config directory):\n" + " - "); - if ( context.partition.preset_file != "" ) { - std::ifstream file(context.partition.preset_file.c_str()); - if (!file) { - throw InvalidInputException( - "Could not load context file at: " + context.partition.preset_file); - } - - po::options_description ini_line_options; - ini_line_options.add(general_options) - .add(preprocessing_options) - .add(coarsening_options) - .add(initial_paritioning_options) - .add(refinement_options) - .add(flow_options) - .add(mapping_options) - .add(shared_memory_options); - - po::store(po::parse_config_file(file, ini_line_options, true), cmd_vm); - po::notify(cmd_vm); - } + po::options_description general_options = + createGeneralOptionsDescription(context, num_columns); - std::string epsilon_str = std::to_string(context.partition.epsilon); - epsilon_str.erase(epsilon_str.find_last_not_of('0') + 1, std::string::npos); - - if (context.partition.graph_partition_output_folder != "") { - std::string graph_base_name = context.partition.graph_filename.substr( - context.partition.graph_filename.find_last_of("/") + 1); - context.partition.graph_partition_filename = - context.partition.graph_partition_output_folder + "/" + graph_base_name; - } else { - context.partition.graph_partition_filename = - context.partition.graph_filename; - } - context.partition.graph_partition_filename = - context.partition.graph_partition_filename - + ".part" - + std::to_string(context.partition.k) - + ".epsilon" - + epsilon_str - + ".seed" - + std::to_string(context.partition.seed) - + ".KaHyPar"; - context.partition.graph_community_filename = - context.partition.graph_filename + ".community"; - - if (context.partition.deterministic) { - context.preprocessing.stable_construction_of_incident_edges = true; - } + po::options_description preprocessing_options = + createPreprocessingOptionsDescription(context, num_columns); + po::options_description coarsening_options = + createCoarseningOptionsDescription(context, num_columns); + po::options_description initial_paritioning_options = + createInitialPartitioningOptionsDescription(context, num_columns); + po::options_description refinement_options = + createRefinementOptionsDescription(context, num_columns, false); + po::options_description flow_options = + createFlowRefinementOptionsDescription(context, num_columns, false); + po::options_description mapping_options = + createMappingOptionsDescription(context, num_columns); + po::options_description shared_memory_options = + createSharedMemoryOptionsDescription(context, num_columns); + + po::options_description cmd_line_options; + cmd_line_options.add(required_options) + .add(preset_options) + .add(general_options) + .add(preprocessing_options) + .add(coarsening_options) + .add(initial_paritioning_options) + .add(refinement_options) + .add(flow_options) + .add(mapping_options) + .add(shared_memory_options); + + po::variables_map cmd_vm; + po::store(po::parse_command_line(argc, argv, cmd_line_options), cmd_vm); + + // placing vm.count("help") here prevents required attributes raising an + // error if only help was supplied + if(cmd_vm.count("help") != 0 || argc == 1) + { + LOG << cmd_line_options; + exit(0); } + po::notify(cmd_vm); - void parseIniToContext(Context& context, const std::string& ini_filename) { - std::ifstream file(ini_filename.c_str()); - if (!file) { - throw InvalidInputException( - "Could not load context file at: " + ini_filename); + if(context.partition.preset_file != "") + { + std::ifstream file(context.partition.preset_file.c_str()); + if(!file) + { + throw InvalidInputException("Could not load context file at: " + + context.partition.preset_file); } - const int num_columns = 80; - - po::options_description general_options = - createGeneralOptionsDescription(context, num_columns); - po::options_description preprocessing_options = - createPreprocessingOptionsDescription(context, num_columns); - po::options_description coarsening_options = - createCoarseningOptionsDescription(context, num_columns); - po::options_description initial_paritioning_options = - createInitialPartitioningOptionsDescription(context, num_columns); - po::options_description refinement_options = - createRefinementOptionsDescription(context, num_columns, false); - po::options_description flow_options = - createFlowRefinementOptionsDescription(context, num_columns, false); - po::options_description mapping_options = - createMappingOptionsDescription(context, num_columns); - po::options_description shared_memory_options = - createSharedMemoryOptionsDescription(context, num_columns); - - po::variables_map cmd_vm; + po::options_description ini_line_options; ini_line_options.add(general_options) - .add(preprocessing_options) - .add(coarsening_options) - .add(initial_paritioning_options) - .add(refinement_options) - .add(flow_options) - .add(mapping_options) - .add(shared_memory_options); + .add(preprocessing_options) + .add(coarsening_options) + .add(initial_paritioning_options) + .add(refinement_options) + .add(flow_options) + .add(mapping_options) + .add(shared_memory_options); po::store(po::parse_config_file(file, ini_line_options, true), cmd_vm); po::notify(cmd_vm); + } - if (context.partition.deterministic) { - context.preprocessing.stable_construction_of_incident_edges = true; - } + std::string epsilon_str = std::to_string(context.partition.epsilon); + epsilon_str.erase(epsilon_str.find_last_not_of('0') + 1, std::string::npos); + + if(context.partition.graph_partition_output_folder != "") + { + std::string graph_base_name = context.partition.graph_filename.substr( + context.partition.graph_filename.find_last_of("/") + 1); + context.partition.graph_partition_filename = + context.partition.graph_partition_output_folder + "/" + graph_base_name; + } + else + { + context.partition.graph_partition_filename = context.partition.graph_filename; + } + context.partition.graph_partition_filename = + context.partition.graph_partition_filename + ".part" + + std::to_string(context.partition.k) + ".epsilon" + epsilon_str + ".seed" + + std::to_string(context.partition.seed) + ".KaHyPar"; + context.partition.graph_community_filename = + context.partition.graph_filename + ".community"; + + if(context.partition.deterministic) + { + context.preprocessing.stable_construction_of_incident_edges = true; + } +} + +void parseIniToContext(Context &context, const std::string &ini_filename) +{ + std::ifstream file(ini_filename.c_str()); + if(!file) + { + throw InvalidInputException("Could not load context file at: " + ini_filename); } + const int num_columns = 80; + + po::options_description general_options = + createGeneralOptionsDescription(context, num_columns); + po::options_description preprocessing_options = + createPreprocessingOptionsDescription(context, num_columns); + po::options_description coarsening_options = + createCoarseningOptionsDescription(context, num_columns); + po::options_description initial_paritioning_options = + createInitialPartitioningOptionsDescription(context, num_columns); + po::options_description refinement_options = + createRefinementOptionsDescription(context, num_columns, false); + po::options_description flow_options = + createFlowRefinementOptionsDescription(context, num_columns, false); + po::options_description mapping_options = + createMappingOptionsDescription(context, num_columns); + po::options_description shared_memory_options = + createSharedMemoryOptionsDescription(context, num_columns); + + po::variables_map cmd_vm; + po::options_description ini_line_options; + ini_line_options.add(general_options) + .add(preprocessing_options) + .add(coarsening_options) + .add(initial_paritioning_options) + .add(refinement_options) + .add(flow_options) + .add(mapping_options) + .add(shared_memory_options); + + po::store(po::parse_config_file(file, ini_line_options, true), cmd_vm); + po::notify(cmd_vm); + + if(context.partition.deterministic) + { + context.preprocessing.stable_construction_of_incident_edges = true; + } +} } diff --git a/mt-kahypar/io/command_line_options.h b/mt-kahypar/io/command_line_options.h index 867732029..719c5f6bd 100644 --- a/mt-kahypar/io/command_line_options.h +++ b/mt-kahypar/io/command_line_options.h @@ -32,6 +32,6 @@ namespace mt_kahypar { -void processCommandLineInput(Context& context, int argc, char *argv[]); -void parseIniToContext(Context& context, const std::string& ini_filename); +void processCommandLineInput(Context &context, int argc, char *argv[]); +void parseIniToContext(Context &context, const std::string &ini_filename); } // namespace mt_kahypar \ No newline at end of file diff --git a/mt-kahypar/io/csv_output.cpp b/mt-kahypar/io/csv_output.cpp index 6014d42c6..de7b2b20a 100644 --- a/mt-kahypar/io/csv_output.cpp +++ b/mt-kahypar/io/csv_output.cpp @@ -37,59 +37,67 @@ namespace mt_kahypar::io::csv { - std::string header() { - return "algorithm,threads,graph,k,seed,epsilon,imbalance," - "objective,km1,cut,initial_km1,partitionTime,fmTime,lpTime,coarseningTime,ipTime,preprocessingTime" - "\n"; - } +std::string header() +{ + return "algorithm,threads,graph,k,seed,epsilon,imbalance," + "objective,km1,cut,initial_km1,partitionTime,fmTime,lpTime,coarseningTime," + "ipTime,preprocessingTime" + "\n"; +} - template - std::string serialize(const PartitionedHypergraph& phg, - const Context& context, - const std::chrono::duration& elapsed_seconds) { - const char sep = ','; - std::stringstream s; +template +std::string serialize(const PartitionedHypergraph &phg, const Context &context, + const std::chrono::duration &elapsed_seconds) +{ + const char sep = ','; + std::stringstream s; - s << context.algorithm_name; - if (context.algorithm_name == "MT-KaHyPar") { - if (context.partition.preset_file.find("fast") != std::string::npos) { - s << "-Fast"; - } else if (context.partition.preset_file.find("quality") != std::string::npos) { - s << "-Eco"; - } + s << context.algorithm_name; + if(context.algorithm_name == "MT-KaHyPar") + { + if(context.partition.preset_file.find("fast") != std::string::npos) + { + s << "-Fast"; + } + else if(context.partition.preset_file.find("quality") != std::string::npos) + { + s << "-Eco"; } - s << sep; + } + s << sep; - s << context.shared_memory.num_threads << sep; - s << context.partition.graph_filename.substr(context.partition.graph_filename.find_last_of('/') + 1) << sep; - s << context.partition.k << sep; - s << context.partition.seed << sep; + s << context.shared_memory.num_threads << sep; + s << context.partition.graph_filename.substr( + context.partition.graph_filename.find_last_of('/') + 1) + << sep; + s << context.partition.k << sep; + s << context.partition.seed << sep; - s << context.partition.epsilon << sep; - s << metrics::imbalance(phg, context) << sep; + s << context.partition.epsilon << sep; + s << metrics::imbalance(phg, context) << sep; - s << context.partition.objective << sep; - s << metrics::quality(phg, Objective::km1) << sep; - s << metrics::quality(phg, Objective::cut) << sep; - s << context.initial_km1 << sep; - s << elapsed_seconds.count() << sep; + s << context.partition.objective << sep; + s << metrics::quality(phg, Objective::km1) << sep; + s << metrics::quality(phg, Objective::cut) << sep; + s << context.initial_km1 << sep; + s << elapsed_seconds.count() << sep; - utils::Timer& timer = utils::Utilities::instance().getTimer(context.utility_id); - timer.showDetailedTimings(context.partition.show_detailed_timings); - s << (timer.get("fm") + timer.get("initialize_fm_refiner"))<< sep; - s << (timer.get("label_propagation") + timer.get("initialize_lp_refiner")) << sep; - s << timer.get("coarsening") << sep; - s << timer.get("initial_partitioning") << sep; - s << timer.get("preprocessing"); + utils::Timer &timer = utils::Utilities::instance().getTimer(context.utility_id); + timer.showDetailedTimings(context.partition.show_detailed_timings); + s << (timer.get("fm") + timer.get("initialize_fm_refiner")) << sep; + s << (timer.get("label_propagation") + timer.get("initialize_lp_refiner")) << sep; + s << timer.get("coarsening") << sep; + s << timer.get("initial_partitioning") << sep; + s << timer.get("preprocessing"); - return s.str(); - } + return s.str(); +} - namespace { - #define SERIALIZE(X) std::string serialize(const X& phg, \ - const Context& context, \ - const std::chrono::duration& elapsed_seconds) - } +namespace { +#define SERIALIZE(X) \ + std::string serialize(const X &phg, const Context &context, \ + const std::chrono::duration &elapsed_seconds) +} - INSTANTIATE_FUNC_WITH_PARTITIONED_HG(SERIALIZE) +INSTANTIATE_FUNC_WITH_PARTITIONED_HG(SERIALIZE) } \ No newline at end of file diff --git a/mt-kahypar/io/csv_output.h b/mt-kahypar/io/csv_output.h index 74076e1e3..7ee444ea6 100644 --- a/mt-kahypar/io/csv_output.h +++ b/mt-kahypar/io/csv_output.h @@ -33,10 +33,9 @@ #include "mt-kahypar/partition/context.h" namespace mt_kahypar::io::csv { - std::string header(); +std::string header(); - template - std::string serialize(const PartitionedHypergraph& phg, - const Context& context, - const std::chrono::duration& elapsed_seconds); +template +std::string serialize(const PartitionedHypergraph &phg, const Context &context, + const std::chrono::duration &elapsed_seconds); } \ No newline at end of file diff --git a/mt-kahypar/io/hypergraph_factory.cpp b/mt-kahypar/io/hypergraph_factory.cpp index b96b38e3f..f6bd83dd5 100644 --- a/mt-kahypar/io/hypergraph_factory.cpp +++ b/mt-kahypar/io/hypergraph_factory.cpp @@ -26,10 +26,10 @@ #include "hypergraph_factory.h" -#include "mt-kahypar/macros.h" +#include "mt-kahypar/datastructures/fixed_vertex_support.h" #include "mt-kahypar/definitions.h" #include "mt-kahypar/io/hypergraph_io.h" -#include "mt-kahypar/datastructures/fixed_vertex_support.h" +#include "mt-kahypar/macros.h" #include "mt-kahypar/partition/conversion.h" #include "mt-kahypar/utils/exception.h" @@ -38,26 +38,29 @@ namespace io { namespace { -template -mt_kahypar_hypergraph_t constructHypergraph(const HypernodeID& num_hypernodes, - const HyperedgeID& num_hyperedges, - const HyperedgeVector& hyperedges, - const HyperedgeWeight* hyperedge_weight, - const HypernodeWeight* hypernode_weight, +template +mt_kahypar_hypergraph_t constructHypergraph(const HypernodeID &num_hypernodes, + const HyperedgeID &num_hyperedges, + const HyperedgeVector &hyperedges, + const HyperedgeWeight *hyperedge_weight, + const HypernodeWeight *hypernode_weight, const HypernodeID num_removed_single_pin_hes, - const bool stable_construction) { - Hypergraph* hypergraph = new Hypergraph(); + const bool stable_construction) +{ + Hypergraph *hypergraph = new Hypergraph(); *hypergraph = Hypergraph::Factory::construct(num_hypernodes, num_hyperedges, hyperedges, - hyperedge_weight, hypernode_weight, stable_construction); + hyperedge_weight, hypernode_weight, + stable_construction); hypergraph->setNumRemovedHyperedges(num_removed_single_pin_hes); - return mt_kahypar_hypergraph_t { - reinterpret_cast(hypergraph), Hypergraph::TYPE }; + return mt_kahypar_hypergraph_t{ reinterpret_cast(hypergraph), + Hypergraph::TYPE }; } -mt_kahypar_hypergraph_t readHMetisFile(const std::string& filename, - const mt_kahypar_hypergraph_type_t& type, - const bool stable_construction, - const bool remove_single_pin_hes) { +mt_kahypar_hypergraph_t readHMetisFile(const std::string &filename, + const mt_kahypar_hypergraph_type_t &type, + const bool stable_construction, + const bool remove_single_pin_hes) +{ HyperedgeID num_hyperedges = 0; HypernodeID num_hypernodes = 0; HyperedgeID num_removed_single_pin_hyperedges = 0; @@ -65,40 +68,38 @@ mt_kahypar_hypergraph_t readHMetisFile(const std::string& filename, vec hyperedges_weight; vec hypernodes_weight; readHypergraphFile(filename, num_hyperedges, num_hypernodes, - num_removed_single_pin_hyperedges, hyperedges, - hyperedges_weight, hypernodes_weight, remove_single_pin_hes); + num_removed_single_pin_hyperedges, hyperedges, hyperedges_weight, + hypernodes_weight, remove_single_pin_hes); - switch ( type ) { - case STATIC_GRAPH: - return constructHypergraph( - num_hypernodes, num_hyperedges, hyperedges, - hyperedges_weight.data(), hypernodes_weight.data(), - num_removed_single_pin_hyperedges, stable_construction); - case DYNAMIC_GRAPH: - return constructHypergraph( - num_hypernodes, num_hyperedges, hyperedges, - hyperedges_weight.data(), hypernodes_weight.data(), - num_removed_single_pin_hyperedges, stable_construction); - case STATIC_HYPERGRAPH: - return constructHypergraph( - num_hypernodes, num_hyperedges, hyperedges, - hyperedges_weight.data(), hypernodes_weight.data(), - num_removed_single_pin_hyperedges, stable_construction); - case DYNAMIC_HYPERGRAPH: - return constructHypergraph( - num_hypernodes, num_hyperedges, hyperedges, - hyperedges_weight.data(), hypernodes_weight.data(), - num_removed_single_pin_hyperedges, stable_construction); - case NULLPTR_HYPERGRAPH: - return mt_kahypar_hypergraph_t { nullptr, NULLPTR_HYPERGRAPH }; + switch(type) + { + case STATIC_GRAPH: + return constructHypergraph( + num_hypernodes, num_hyperedges, hyperedges, hyperedges_weight.data(), + hypernodes_weight.data(), num_removed_single_pin_hyperedges, stable_construction); + case DYNAMIC_GRAPH: + return constructHypergraph( + num_hypernodes, num_hyperedges, hyperedges, hyperedges_weight.data(), + hypernodes_weight.data(), num_removed_single_pin_hyperedges, stable_construction); + case STATIC_HYPERGRAPH: + return constructHypergraph( + num_hypernodes, num_hyperedges, hyperedges, hyperedges_weight.data(), + hypernodes_weight.data(), num_removed_single_pin_hyperedges, stable_construction); + case DYNAMIC_HYPERGRAPH: + return constructHypergraph( + num_hypernodes, num_hyperedges, hyperedges, hyperedges_weight.data(), + hypernodes_weight.data(), num_removed_single_pin_hyperedges, stable_construction); + case NULLPTR_HYPERGRAPH: + return mt_kahypar_hypergraph_t{ nullptr, NULLPTR_HYPERGRAPH }; } - return mt_kahypar_hypergraph_t { nullptr, NULLPTR_HYPERGRAPH }; + return mt_kahypar_hypergraph_t{ nullptr, NULLPTR_HYPERGRAPH }; } -mt_kahypar_hypergraph_t readMetisFile(const std::string& filename, - const mt_kahypar_hypergraph_type_t& type, - const bool stable_construction) { +mt_kahypar_hypergraph_t readMetisFile(const std::string &filename, + const mt_kahypar_hypergraph_type_t &type, + const bool stable_construction) +{ HyperedgeID num_edges = 0; HypernodeID num_vertices = 0; HyperedgeVector edges; @@ -106,96 +107,109 @@ mt_kahypar_hypergraph_t readMetisFile(const std::string& filename, vec nodes_weight; readGraphFile(filename, num_edges, num_vertices, edges, edges_weight, nodes_weight); - switch ( type ) { - case STATIC_GRAPH: - return constructHypergraph( - num_vertices, num_edges, edges, - edges_weight.data(), nodes_weight.data(), 0, stable_construction); - case DYNAMIC_GRAPH: - return constructHypergraph( - num_vertices, num_edges, edges, - edges_weight.data(), nodes_weight.data(), 0, stable_construction); - case STATIC_HYPERGRAPH: - return constructHypergraph( - num_vertices, num_edges, edges, - edges_weight.data(), nodes_weight.data(), 0, stable_construction); - case DYNAMIC_HYPERGRAPH: - return constructHypergraph( - num_vertices, num_edges, edges, - edges_weight.data(), nodes_weight.data(), 0, stable_construction); - case NULLPTR_HYPERGRAPH: - return mt_kahypar_hypergraph_t { nullptr, NULLPTR_HYPERGRAPH }; + switch(type) + { + case STATIC_GRAPH: + return constructHypergraph(num_vertices, num_edges, edges, + edges_weight.data(), nodes_weight.data(), + 0, stable_construction); + case DYNAMIC_GRAPH: + return constructHypergraph(num_vertices, num_edges, edges, + edges_weight.data(), nodes_weight.data(), + 0, stable_construction); + case STATIC_HYPERGRAPH: + return constructHypergraph( + num_vertices, num_edges, edges, edges_weight.data(), nodes_weight.data(), 0, + stable_construction); + case DYNAMIC_HYPERGRAPH: + return constructHypergraph( + num_vertices, num_edges, edges, edges_weight.data(), nodes_weight.data(), 0, + stable_construction); + case NULLPTR_HYPERGRAPH: + return mt_kahypar_hypergraph_t{ nullptr, NULLPTR_HYPERGRAPH }; } - return mt_kahypar_hypergraph_t { nullptr, NULLPTR_HYPERGRAPH }; + return mt_kahypar_hypergraph_t{ nullptr, NULLPTR_HYPERGRAPH }; } } // namespace -mt_kahypar_hypergraph_t readInputFile(const std::string& filename, - const PresetType& preset, - const InstanceType& instance, - const FileFormat& format, - const bool stable_construction, - const bool remove_single_pin_hes) { +mt_kahypar_hypergraph_t +readInputFile(const std::string &filename, const PresetType &preset, + const InstanceType &instance, const FileFormat &format, + const bool stable_construction, const bool remove_single_pin_hes) +{ mt_kahypar_hypergraph_type_t type = to_hypergraph_c_type(preset, instance); - switch ( format ) { - case FileFormat::hMetis: return readHMetisFile( - filename, type, stable_construction, remove_single_pin_hes); - case FileFormat::Metis: return readMetisFile( - filename, type, stable_construction); + switch(format) + { + case FileFormat::hMetis: + return readHMetisFile(filename, type, stable_construction, remove_single_pin_hes); + case FileFormat::Metis: + return readMetisFile(filename, type, stable_construction); } - return mt_kahypar_hypergraph_t { nullptr, NULLPTR_HYPERGRAPH }; + return mt_kahypar_hypergraph_t{ nullptr, NULLPTR_HYPERGRAPH }; } -template -Hypergraph readInputFile(const std::string& filename, - const FileFormat& format, - const bool stable_construction, - const bool remove_single_pin_hes) { - mt_kahypar_hypergraph_t hypergraph { nullptr, NULLPTR_HYPERGRAPH }; - switch ( format ) { - case FileFormat::hMetis: hypergraph = readHMetisFile( - filename, Hypergraph::TYPE, stable_construction, remove_single_pin_hes); - break; - case FileFormat::Metis: hypergraph = readMetisFile( - filename, Hypergraph::TYPE, stable_construction); +template +Hypergraph readInputFile(const std::string &filename, const FileFormat &format, + const bool stable_construction, const bool remove_single_pin_hes) +{ + mt_kahypar_hypergraph_t hypergraph{ nullptr, NULLPTR_HYPERGRAPH }; + switch(format) + { + case FileFormat::hMetis: + hypergraph = readHMetisFile(filename, Hypergraph::TYPE, stable_construction, + remove_single_pin_hes); + break; + case FileFormat::Metis: + hypergraph = readMetisFile(filename, Hypergraph::TYPE, stable_construction); } return std::move(utils::cast(hypergraph)); } namespace { -HypernodeID numberOfNodes(mt_kahypar_hypergraph_t hypergraph) { - switch ( hypergraph.type ) { - case STATIC_HYPERGRAPH: return utils::cast(hypergraph).initialNumNodes(); - #ifdef KAHYPAR_ENABLE_GRAPH_PARTITIONING_FEATURES - case STATIC_GRAPH: return utils::cast(hypergraph).initialNumNodes(); - #ifdef KAHYPAR_ENABLE_HIGHEST_QUALITY_FEATURES - case DYNAMIC_GRAPH: return utils::cast(hypergraph).initialNumNodes(); - #endif - #endif - #ifdef KAHYPAR_ENABLE_HIGHEST_QUALITY_FEATURES - case DYNAMIC_HYPERGRAPH: return utils::cast(hypergraph).initialNumNodes(); - #endif - case NULLPTR_HYPERGRAPH: return 0; - default: return 0; +HypernodeID numberOfNodes(mt_kahypar_hypergraph_t hypergraph) +{ + switch(hypergraph.type) + { + case STATIC_HYPERGRAPH: + return utils::cast(hypergraph).initialNumNodes(); +#ifdef KAHYPAR_ENABLE_GRAPH_PARTITIONING_FEATURES + case STATIC_GRAPH: + return utils::cast(hypergraph).initialNumNodes(); +#ifdef KAHYPAR_ENABLE_HIGHEST_QUALITY_FEATURES + case DYNAMIC_GRAPH: + return utils::cast(hypergraph).initialNumNodes(); +#endif +#endif +#ifdef KAHYPAR_ENABLE_HIGHEST_QUALITY_FEATURES + case DYNAMIC_HYPERGRAPH: + return utils::cast(hypergraph).initialNumNodes(); +#endif + case NULLPTR_HYPERGRAPH: + return 0; + default: + return 0; } } -template -void addFixedVertices(Hypergraph& hypergraph, - const mt_kahypar_partition_id_t* fixed_vertices, - const PartitionID k) { - ds::FixedVertexSupport fixed_vertex_support( - hypergraph.initialNumNodes(), k); +template +void addFixedVertices(Hypergraph &hypergraph, + const mt_kahypar_partition_id_t *fixed_vertices, + const PartitionID k) +{ + ds::FixedVertexSupport fixed_vertex_support(hypergraph.initialNumNodes(), + k); fixed_vertex_support.setHypergraph(&hypergraph); - hypergraph.doParallelForAllNodes([&](const HypernodeID& hn) { - if ( fixed_vertices[hn] != -1 ) { - if ( fixed_vertices[hn] < 0 || fixed_vertices[hn] >= k ) { - throw InvalidInputException( - "Try to partition hypergraph into " + STR(k) + " blocks, but node " + - STR(hn) + " is fixed to block " + STR(fixed_vertices[hn])); + hypergraph.doParallelForAllNodes([&](const HypernodeID &hn) { + if(fixed_vertices[hn] != -1) + { + if(fixed_vertices[hn] < 0 || fixed_vertices[hn] >= k) + { + throw InvalidInputException("Try to partition hypergraph into " + STR(k) + + " blocks, but node " + STR(hn) + + " is fixed to block " + STR(fixed_vertices[hn])); } fixed_vertex_support.fixToBlock(hn, fixed_vertices[hn]); } @@ -203,8 +217,9 @@ void addFixedVertices(Hypergraph& hypergraph, hypergraph.addFixedVertexSupport(std::move(fixed_vertex_support)); } -template -void removeFixedVertices(Hypergraph& hypergraph) { +template +void removeFixedVertices(Hypergraph &hypergraph) +{ ds::FixedVertexSupport fixed_vertex_support; hypergraph.addFixedVertexSupport(std::move(fixed_vertex_support)); } @@ -212,76 +227,90 @@ void removeFixedVertices(Hypergraph& hypergraph) { } // namespace void addFixedVertices(mt_kahypar_hypergraph_t hypergraph, - const mt_kahypar_partition_id_t* fixed_vertices, - const PartitionID k) { - switch ( hypergraph.type ) { - case STATIC_HYPERGRAPH: - addFixedVertices(utils::cast(hypergraph), fixed_vertices, k); break; - #ifdef KAHYPAR_ENABLE_GRAPH_PARTITIONING_FEATURES - case STATIC_GRAPH: - addFixedVertices(utils::cast(hypergraph), fixed_vertices, k); break; - #ifdef KAHYPAR_ENABLE_HIGHEST_QUALITY_FEATURES - case DYNAMIC_GRAPH: - addFixedVertices(utils::cast(hypergraph), fixed_vertices, k); break; - #endif - #endif - #ifdef KAHYPAR_ENABLE_HIGHEST_QUALITY_FEATURES - case DYNAMIC_HYPERGRAPH: - addFixedVertices(utils::cast(hypergraph), fixed_vertices, k); break; - #endif - case NULLPTR_HYPERGRAPH: - default: break; + const mt_kahypar_partition_id_t *fixed_vertices, + const PartitionID k) +{ + switch(hypergraph.type) + { + case STATIC_HYPERGRAPH: + addFixedVertices(utils::cast(hypergraph), fixed_vertices, k); + break; +#ifdef KAHYPAR_ENABLE_GRAPH_PARTITIONING_FEATURES + case STATIC_GRAPH: + addFixedVertices(utils::cast(hypergraph), fixed_vertices, k); + break; +#ifdef KAHYPAR_ENABLE_HIGHEST_QUALITY_FEATURES + case DYNAMIC_GRAPH: + addFixedVertices(utils::cast(hypergraph), fixed_vertices, k); + break; +#endif +#endif +#ifdef KAHYPAR_ENABLE_HIGHEST_QUALITY_FEATURES + case DYNAMIC_HYPERGRAPH: + addFixedVertices(utils::cast(hypergraph), fixed_vertices, k); + break; +#endif + case NULLPTR_HYPERGRAPH: + default: + break; } } void addFixedVerticesFromFile(mt_kahypar_hypergraph_t hypergraph, - const std::string& filename, - const PartitionID k) { + const std::string &filename, const PartitionID k) +{ std::vector fixed_vertices; io::readPartitionFile(filename, fixed_vertices); - if ( ID(fixed_vertices.size()) != numberOfNodes(hypergraph) ) { + if(ID(fixed_vertices.size()) != numberOfNodes(hypergraph)) + { throw InvalidInputException( - "Fixed vertex file has more lines than the number of nodes!"); + "Fixed vertex file has more lines than the number of nodes!"); } addFixedVertices(hypergraph, fixed_vertices.data(), k); } -void removeFixedVertices(mt_kahypar_hypergraph_t hypergraph) { - switch ( hypergraph.type ) { - case STATIC_HYPERGRAPH: - removeFixedVertices(utils::cast(hypergraph)); break; - #ifdef KAHYPAR_ENABLE_GRAPH_PARTITIONING_FEATURES - case STATIC_GRAPH: - removeFixedVertices(utils::cast(hypergraph)); break; - #ifdef KAHYPAR_ENABLE_HIGHEST_QUALITY_FEATURES - case DYNAMIC_GRAPH: - removeFixedVertices(utils::cast(hypergraph)); break; - #endif - #endif - #ifdef KAHYPAR_ENABLE_HIGHEST_QUALITY_FEATURES - case DYNAMIC_HYPERGRAPH: - removeFixedVertices(utils::cast(hypergraph)); break; - #endif - case NULLPTR_HYPERGRAPH: - default: break; +void removeFixedVertices(mt_kahypar_hypergraph_t hypergraph) +{ + switch(hypergraph.type) + { + case STATIC_HYPERGRAPH: + removeFixedVertices(utils::cast(hypergraph)); + break; +#ifdef KAHYPAR_ENABLE_GRAPH_PARTITIONING_FEATURES + case STATIC_GRAPH: + removeFixedVertices(utils::cast(hypergraph)); + break; +#ifdef KAHYPAR_ENABLE_HIGHEST_QUALITY_FEATURES + case DYNAMIC_GRAPH: + removeFixedVertices(utils::cast(hypergraph)); + break; +#endif +#endif +#ifdef KAHYPAR_ENABLE_HIGHEST_QUALITY_FEATURES + case DYNAMIC_HYPERGRAPH: + removeFixedVertices(utils::cast(hypergraph)); + break; +#endif + case NULLPTR_HYPERGRAPH: + default: + break; } } namespace { - #define READ_INPUT_FILE(X) X readInputFile(const std::string& filename, \ - const FileFormat& format, \ - const bool stable_construction, \ - const bool remove_single_pin_hes) +#define READ_INPUT_FILE(X) \ + X readInputFile(const std::string &filename, const FileFormat &format, \ + const bool stable_construction, const bool remove_single_pin_hes) } INSTANTIATE_FUNC_WITH_HYPERGRAPHS(READ_INPUT_FILE) #ifndef KAHYPAR_ENABLE_GRAPH_PARTITIONING_FEATURES -template ds::StaticGraph readInputFile(const std::string& filename, - const FileFormat& format, +template ds::StaticGraph readInputFile(const std::string &filename, + const FileFormat &format, const bool stable_construction, const bool remove_single_pin_hes); #endif -} // namespace io -} // namespace mt_kahypar +} // namespace io +} // namespace mt_kahypar diff --git a/mt-kahypar/io/hypergraph_factory.h b/mt-kahypar/io/hypergraph_factory.h index 3535d3224..cb5f3381b 100644 --- a/mt-kahypar/io/hypergraph_factory.h +++ b/mt-kahypar/io/hypergraph_factory.h @@ -37,28 +37,26 @@ namespace mt_kahypar { namespace io { -mt_kahypar_hypergraph_t readInputFile(const std::string& filename, - const PresetType& preset, - const InstanceType& instance, - const FileFormat& format, +mt_kahypar_hypergraph_t readInputFile(const std::string &filename, + const PresetType &preset, + const InstanceType &instance, + const FileFormat &format, const bool stable_construction = false, const bool remove_single_pin_hes = true); -template -Hypergraph readInputFile(const std::string& filename, - const FileFormat& format, +template +Hypergraph readInputFile(const std::string &filename, const FileFormat &format, const bool stable_construction = false, const bool remove_single_pin_hes = true); void addFixedVertices(mt_kahypar_hypergraph_t hypergraph, - const mt_kahypar_partition_id_t* fixed_vertices, + const mt_kahypar_partition_id_t *fixed_vertices, const PartitionID k); void addFixedVerticesFromFile(mt_kahypar_hypergraph_t hypergraph, - const std::string& filename, - const PartitionID k); + const std::string &filename, const PartitionID k); void removeFixedVertices(mt_kahypar_hypergraph_t hypergraph); -} // namespace io -} // namespace mt_kahypar +} // namespace io +} // namespace mt_kahypar diff --git a/mt-kahypar/io/hypergraph_io.cpp b/mt-kahypar/io/hypergraph_io.cpp index 1c7705b4b..32489d290 100644 --- a/mt-kahypar/io/hypergraph_io.cpp +++ b/mt-kahypar/io/hypergraph_io.cpp @@ -13,8 +13,8 @@ * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. + * The above copyright notice and this permission notice shall be included in + *all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, @@ -28,678 +28,782 @@ #include "hypergraph_io.h" #include +#include #include #include -#include #include -#include -#include #include +#include +#include #if defined(__linux__) or defined(__APPLE__) #include #include #elif _WIN32 -#include -#include #include +#include +#include #endif - #include "tbb/parallel_for.h" #include "mt-kahypar/definitions.h" #include "mt-kahypar/partition/context_enum_classes.h" -#include "mt-kahypar/utils/timer.h" #include "mt-kahypar/utils/exception.h" +#include "mt-kahypar/utils/timer.h" namespace mt_kahypar::io { - #if defined(__linux__) or defined(__APPLE__) - struct FileHandle { - int fd; - char* mapped_file; - size_t length; - - void closeHandle() { - close(fd); - } - }; - #elif _WIN32 - struct FileHandle { - HANDLE hFile; - HANDLE hMem; - char* mapped_file; - size_t length; - - void closeHandle() { - CloseHandle(hFile); - CloseHandle(hMem); - } - }; - #endif - - size_t file_size(const std::string& filename) { - struct stat stat_buf; - const int res = stat( filename.c_str(), &stat_buf); - if (res < 0) { - throw InvalidInputException("Could not open:" + filename); - } - return static_cast(stat_buf.st_size); +#if defined(__linux__) or defined(__APPLE__) +struct FileHandle +{ + int fd; + char *mapped_file; + size_t length; + + void closeHandle() { close(fd); } +}; +#elif _WIN32 +struct FileHandle +{ + HANDLE hFile; + HANDLE hMem; + char *mapped_file; + size_t length; + + void closeHandle() + { + CloseHandle(hFile); + CloseHandle(hMem); } +}; +#endif - FileHandle mmap_file(const std::string& filename) { - FileHandle handle; - handle.length = file_size(filename); - - #ifdef _WIN32 - PSECURITY_DESCRIPTOR pSD; - SECURITY_ATTRIBUTES sa; - - /* create security descriptor (needed for Windows NT) */ - pSD = (PSECURITY_DESCRIPTOR) malloc( SECURITY_DESCRIPTOR_MIN_LENGTH ); - if( pSD == NULL ) { - throw SystemException("Error while creating security descriptor!"); - } - - InitializeSecurityDescriptor(pSD, SECURITY_DESCRIPTOR_REVISION); - SetSecurityDescriptorDacl(pSD, TRUE, (PACL) NULL, FALSE); - - sa.nLength = sizeof(sa); - sa.lpSecurityDescriptor = pSD; - sa.bInheritHandle = TRUE; - - // open file - handle.hFile = CreateFile ( filename.c_str(), GENERIC_READ, FILE_SHARE_READ, - &sa, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL); +size_t file_size(const std::string &filename) +{ + struct stat stat_buf; + const int res = stat(filename.c_str(), &stat_buf); + if(res < 0) + { + throw InvalidInputException("Could not open:" + filename); + } + return static_cast(stat_buf.st_size); +} + +FileHandle mmap_file(const std::string &filename) +{ + FileHandle handle; + handle.length = file_size(filename); + +#ifdef _WIN32 + PSECURITY_DESCRIPTOR pSD; + SECURITY_ATTRIBUTES sa; + + /* create security descriptor (needed for Windows NT) */ + pSD = (PSECURITY_DESCRIPTOR)malloc(SECURITY_DESCRIPTOR_MIN_LENGTH); + if(pSD == NULL) + { + throw SystemException("Error while creating security descriptor!"); + } - if (handle.hFile == INVALID_HANDLE_VALUE) { - free( pSD); - throw InvalidInputException("Invalid file handle when opening: " + filename); - } + InitializeSecurityDescriptor(pSD, SECURITY_DESCRIPTOR_REVISION); + SetSecurityDescriptorDacl(pSD, TRUE, (PACL)NULL, FALSE); - // Create file mapping - handle.hMem = CreateFileMapping( handle.hFile, &sa, PAGE_READONLY, 0, handle.length, NULL); - free(pSD); - if (handle.hMem == NULL) { - throw InvalidInputException("Invalid file mapping when opening: " + filename); - } + sa.nLength = sizeof(sa); + sa.lpSecurityDescriptor = pSD; + sa.bInheritHandle = TRUE; - // map file to memory - handle.mapped_file = (char*) MapViewOfFile(handle.hMem, FILE_MAP_READ, 0, 0, 0); - if ( handle.mapped_file == NULL ) { - throw SystemException("Failed to map file to main memory:" + filename); - } - #elif defined(__linux__) or defined(__APPLE__) - handle.fd = open(filename.c_str(), O_RDONLY); - if ( handle.fd < -1 ) { - throw InvalidInputException("Could not open: " + filename); - } - handle.mapped_file = (char*) mmap(0, handle.length, PROT_READ, MAP_SHARED, handle.fd, 0); - if ( handle.mapped_file == MAP_FAILED ) { - close(handle.fd); - throw SystemException("Error while mapping file to memory"); - } - #endif + // open file + handle.hFile = CreateFile(filename.c_str(), GENERIC_READ, FILE_SHARE_READ, &sa, + OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL); - return handle; + if(handle.hFile == INVALID_HANDLE_VALUE) + { + free(pSD); + throw InvalidInputException("Invalid file handle when opening: " + filename); } - void munmap_file(FileHandle& handle) { - #ifdef _WIN32 - UnmapViewOfFile(handle.mapped_file); - #elif defined(__linux__) or defined(__APPLE__) - munmap(handle.mapped_file, handle.length); - #endif - handle.closeHandle(); + // Create file mapping + handle.hMem = + CreateFileMapping(handle.hFile, &sa, PAGE_READONLY, 0, handle.length, NULL); + free(pSD); + if(handle.hMem == NULL) + { + throw InvalidInputException("Invalid file mapping when opening: " + filename); } - - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE - bool is_line_ending(char* mapped_file, size_t pos) { - return mapped_file[pos] == '\r' || mapped_file[pos] == '\n' || mapped_file[pos] == '\0'; + // map file to memory + handle.mapped_file = (char *)MapViewOfFile(handle.hMem, FILE_MAP_READ, 0, 0, 0); + if(handle.mapped_file == NULL) + { + throw SystemException("Failed to map file to main memory:" + filename); + } +#elif defined(__linux__) or defined(__APPLE__) + handle.fd = open(filename.c_str(), O_RDONLY); + if(handle.fd < -1) + { + throw InvalidInputException("Could not open: " + filename); + } + handle.mapped_file = + (char *)mmap(0, handle.length, PROT_READ, MAP_SHARED, handle.fd, 0); + if(handle.mapped_file == MAP_FAILED) + { + close(handle.fd); + throw SystemException("Error while mapping file to memory"); } +#endif - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE - void do_line_ending(char* mapped_file, size_t& pos) { - ASSERT(is_line_ending(mapped_file, pos)); - if (mapped_file[pos] != '\0') { - if (mapped_file[pos] == '\r') { // windows line ending - ++pos; - ASSERT(mapped_file[pos] == '\n'); - } + return handle; +} + +void munmap_file(FileHandle &handle) +{ +#ifdef _WIN32 + UnmapViewOfFile(handle.mapped_file); +#elif defined(__linux__) or defined(__APPLE__) + munmap(handle.mapped_file, handle.length); +#endif + handle.closeHandle(); +} + +MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE +bool is_line_ending(char *mapped_file, size_t pos) +{ + return mapped_file[pos] == '\r' || mapped_file[pos] == '\n' || mapped_file[pos] == '\0'; +} + +MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE +void do_line_ending(char *mapped_file, size_t &pos) +{ + ASSERT(is_line_ending(mapped_file, pos)); + if(mapped_file[pos] != '\0') + { + if(mapped_file[pos] == '\r') + { // windows line ending ++pos; + ASSERT(mapped_file[pos] == '\n'); } + ++pos; } - - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE - void goto_next_line(char* mapped_file, size_t& pos, const size_t length) { - for ( ; ; ++pos ) { - if ( pos == length || is_line_ending(mapped_file, pos) ) { - do_line_ending(mapped_file, pos); - break; - } +} + +MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE +void goto_next_line(char *mapped_file, size_t &pos, const size_t length) +{ + for(;; ++pos) + { + if(pos == length || is_line_ending(mapped_file, pos)) + { + do_line_ending(mapped_file, pos); + break; } } - - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE - int64_t read_number(char* mapped_file, size_t& pos, const size_t length) { - int64_t number = 0; - while ( mapped_file[pos] == ' ' ) { - ++pos; - } - for ( ; pos < length; ++pos ) { - if ( mapped_file[pos] == ' ' || is_line_ending(mapped_file, pos) ) { - while ( mapped_file[pos] == ' ' ) { - ++pos; - } - break; +} + +MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE +int64_t read_number(char *mapped_file, size_t &pos, const size_t length) +{ + int64_t number = 0; + while(mapped_file[pos] == ' ') + { + ++pos; + } + for(; pos < length; ++pos) + { + if(mapped_file[pos] == ' ' || is_line_ending(mapped_file, pos)) + { + while(mapped_file[pos] == ' ') + { + ++pos; } - ASSERT(mapped_file[pos] >= '0' && mapped_file[pos] <= '9'); - number = number * 10 + (mapped_file[pos] - '0'); + break; } - return number; + ASSERT(mapped_file[pos] >= '0' && mapped_file[pos] <= '9'); + number = number * 10 + (mapped_file[pos] - '0'); + } + return number; +} + +void readHGRHeader(char *mapped_file, size_t &pos, const size_t length, + HyperedgeID &num_hyperedges, HypernodeID &num_hypernodes, + mt_kahypar::Type &type) +{ + // Skip comments + while(mapped_file[pos] == '%') + { + goto_next_line(mapped_file, pos, length); } - void readHGRHeader(char* mapped_file, - size_t& pos, - const size_t length, - HyperedgeID& num_hyperedges, - HypernodeID& num_hypernodes, - mt_kahypar::Type& type) { - // Skip comments - while ( mapped_file[pos] == '%' ) { - goto_next_line(mapped_file, pos, length); + num_hyperedges = read_number(mapped_file, pos, length); + num_hypernodes = read_number(mapped_file, pos, length); + if(!is_line_ending(mapped_file, pos)) + { + type = static_cast(read_number(mapped_file, pos, length)); + } + do_line_ending(mapped_file, pos); +} + +struct HyperedgeRange +{ + const size_t start; + const size_t end; + const HyperedgeID start_id; + const HyperedgeID num_hyperedges; +}; + +inline bool isSinglePinHyperedge(char *mapped_file, size_t pos, const size_t length, + const bool has_hyperedge_weights) +{ + size_t num_spaces = 0; + for(; pos < length; ++pos) + { + if(is_line_ending(mapped_file, pos)) + { + break; } - - num_hyperedges = read_number(mapped_file, pos, length); - num_hypernodes = read_number(mapped_file, pos, length); - if (!is_line_ending(mapped_file, pos)) { - type = static_cast(read_number(mapped_file, pos, length)); + else if(mapped_file[pos] == ' ') + { + ++num_spaces; } - do_line_ending(mapped_file, pos); - } - - struct HyperedgeRange { - const size_t start; - const size_t end; - const HyperedgeID start_id; - const HyperedgeID num_hyperedges; - }; - - inline bool isSinglePinHyperedge(char* mapped_file, - size_t pos, - const size_t length, - const bool has_hyperedge_weights) { - size_t num_spaces = 0; - for ( ; pos < length; ++pos ) { - if (is_line_ending(mapped_file, pos)) { - break; - } else if ( mapped_file[pos] == ' ' ) { - ++num_spaces; - } - if ( num_spaces == 2 ) { - break; - } + if(num_spaces == 2) + { + break; } - return has_hyperedge_weights ? num_spaces == 1 : num_spaces == 0; + } + return has_hyperedge_weights ? num_spaces == 1 : num_spaces == 0; +} + +struct HyperedgeReadResult +{ + HyperedgeReadResult() : + num_removed_single_pin_hyperedges(0), num_duplicated_pins(0), + num_hes_with_duplicated_pins(0) + { } - struct HyperedgeReadResult { - HyperedgeReadResult() : - num_removed_single_pin_hyperedges(0), - num_duplicated_pins(0), - num_hes_with_duplicated_pins(0) { } - - size_t num_removed_single_pin_hyperedges; - size_t num_duplicated_pins; - size_t num_hes_with_duplicated_pins; - }; - - HyperedgeReadResult readHyperedges(char* mapped_file, - size_t& pos, - const size_t length, - const HyperedgeID num_hyperedges, - const mt_kahypar::Type type, - HyperedgeVector& hyperedges, - vec& hyperedges_weight, - const bool remove_single_pin_hes) { - HyperedgeReadResult res; - const bool has_hyperedge_weights = type == mt_kahypar::Type::EdgeWeights || - type == mt_kahypar::Type::EdgeAndNodeWeights ? - true : false; - - vec hyperedge_ranges; - tbb::parallel_invoke([&] { - // Sequential pass over all hyperedges to determine ranges in the - // input file that are read in parallel. - size_t current_range_start = pos; - HyperedgeID current_range_start_id = 0; - HyperedgeID current_range_num_hyperedges = 0; - HyperedgeID current_num_hyperedges = 0; - const HyperedgeID num_hyperedges_per_range = std::max( - (num_hyperedges / ( 2 * std::thread::hardware_concurrency())), ID(1)); - while ( current_num_hyperedges < num_hyperedges ) { - // Skip Comments - ASSERT(pos < length); - while ( mapped_file[pos] == '%' ) { - goto_next_line(mapped_file, pos, length); + size_t num_removed_single_pin_hyperedges; + size_t num_duplicated_pins; + size_t num_hes_with_duplicated_pins; +}; + +HyperedgeReadResult readHyperedges(char *mapped_file, size_t &pos, const size_t length, + const HyperedgeID num_hyperedges, + const mt_kahypar::Type type, + HyperedgeVector &hyperedges, + vec &hyperedges_weight, + const bool remove_single_pin_hes) +{ + HyperedgeReadResult res; + const bool has_hyperedge_weights = + type == mt_kahypar::Type::EdgeWeights || + type == mt_kahypar::Type::EdgeAndNodeWeights ? + true : + false; + + vec hyperedge_ranges; + tbb::parallel_invoke( + [&] { + // Sequential pass over all hyperedges to determine ranges in the + // input file that are read in parallel. + size_t current_range_start = pos; + HyperedgeID current_range_start_id = 0; + HyperedgeID current_range_num_hyperedges = 0; + HyperedgeID current_num_hyperedges = 0; + const HyperedgeID num_hyperedges_per_range = + std::max((num_hyperedges / (2 * std::thread::hardware_concurrency())), ID(1)); + while(current_num_hyperedges < num_hyperedges) + { + // Skip Comments ASSERT(pos < length); - } + while(mapped_file[pos] == '%') + { + goto_next_line(mapped_file, pos, length); + ASSERT(pos < length); + } + + // This check is fine even with windows line endings! + ASSERT(mapped_file[pos - 1] == '\n'); + if(!remove_single_pin_hes || + !isSinglePinHyperedge(mapped_file, pos, length, has_hyperedge_weights)) + { + ++current_range_num_hyperedges; + } + else + { + ++res.num_removed_single_pin_hyperedges; + } + ++current_num_hyperedges; + goto_next_line(mapped_file, pos, length); - // This check is fine even with windows line endings! - ASSERT(mapped_file[pos - 1] == '\n'); - if ( !remove_single_pin_hes || !isSinglePinHyperedge(mapped_file, pos, length, has_hyperedge_weights) ) { - ++current_range_num_hyperedges; - } else { - ++res.num_removed_single_pin_hyperedges; + // If there are enough hyperedges in the current scanned range + // we store that range, which will be later processed in parallel + if(current_range_num_hyperedges == num_hyperedges_per_range) + { + hyperedge_ranges.push_back(HyperedgeRange{ current_range_start, pos, + current_range_start_id, + current_range_num_hyperedges }); + current_range_start = pos; + current_range_start_id += current_range_num_hyperedges; + current_range_num_hyperedges = 0; + } } - ++current_num_hyperedges; - goto_next_line(mapped_file, pos, length); - - // If there are enough hyperedges in the current scanned range - // we store that range, which will be later processed in parallel - if ( current_range_num_hyperedges == num_hyperedges_per_range ) { - hyperedge_ranges.push_back(HyperedgeRange { - current_range_start, pos, current_range_start_id, current_range_num_hyperedges}); - current_range_start = pos; - current_range_start_id += current_range_num_hyperedges; - current_range_num_hyperedges = 0; + if(current_range_num_hyperedges > 0) + { + hyperedge_ranges.push_back(HyperedgeRange{ current_range_start, pos, + current_range_start_id, + current_range_num_hyperedges }); } - } - if ( current_range_num_hyperedges > 0 ) { - hyperedge_ranges.push_back(HyperedgeRange { - current_range_start, pos, current_range_start_id, current_range_num_hyperedges}); - } - }, [&] { - hyperedges.resize(num_hyperedges); - }, [&] { - if ( has_hyperedge_weights ) { - hyperedges_weight.resize(num_hyperedges); - } - }); - - const HyperedgeID tmp_num_hyperedges = num_hyperedges - res.num_removed_single_pin_hyperedges; - hyperedges.resize(tmp_num_hyperedges); - if ( has_hyperedge_weights ) { - hyperedges_weight.resize(tmp_num_hyperedges); - } - - // Process all ranges in parallel and build hyperedge vector - tbb::parallel_for(UL(0), hyperedge_ranges.size(), [&](const size_t i) { - HyperedgeRange& range = hyperedge_ranges[i]; - size_t current_pos = range.start; - const size_t current_end = range.end; - HyperedgeID current_id = range.start_id; - const HyperedgeID last_id = current_id + range.num_hyperedges; + }, + [&] { hyperedges.resize(num_hyperedges); }, + [&] { + if(has_hyperedge_weights) + { + hyperedges_weight.resize(num_hyperedges); + } + }); + + const HyperedgeID tmp_num_hyperedges = + num_hyperedges - res.num_removed_single_pin_hyperedges; + hyperedges.resize(tmp_num_hyperedges); + if(has_hyperedge_weights) + { + hyperedges_weight.resize(tmp_num_hyperedges); + } - while ( current_id < last_id ) { - // Skip Comments + // Process all ranges in parallel and build hyperedge vector + tbb::parallel_for(UL(0), hyperedge_ranges.size(), [&](const size_t i) { + HyperedgeRange &range = hyperedge_ranges[i]; + size_t current_pos = range.start; + const size_t current_end = range.end; + HyperedgeID current_id = range.start_id; + const HyperedgeID last_id = current_id + range.num_hyperedges; + + while(current_id < last_id) + { + // Skip Comments + ASSERT(current_pos < current_end); + while(mapped_file[current_pos] == '%') + { + goto_next_line(mapped_file, current_pos, current_end); ASSERT(current_pos < current_end); - while ( mapped_file[current_pos] == '%' ) { - goto_next_line(mapped_file, current_pos, current_end); - ASSERT(current_pos < current_end); - } + } - if ( !remove_single_pin_hes || !isSinglePinHyperedge(mapped_file, current_pos, current_end, has_hyperedge_weights) ) { - ASSERT(current_id < hyperedges.size()); - if ( has_hyperedge_weights ) { - hyperedges_weight[current_id] = read_number(mapped_file, current_pos, current_end); - } + if(!remove_single_pin_hes || + !isSinglePinHyperedge(mapped_file, current_pos, current_end, + has_hyperedge_weights)) + { + ASSERT(current_id < hyperedges.size()); + if(has_hyperedge_weights) + { + hyperedges_weight[current_id] = + read_number(mapped_file, current_pos, current_end); + } - Hyperedge& hyperedge = hyperedges[current_id]; - // Note, a hyperedge line must contain at least one pin - HypernodeID pin = read_number(mapped_file, current_pos, current_end); + Hyperedge &hyperedge = hyperedges[current_id]; + // Note, a hyperedge line must contain at least one pin + HypernodeID pin = read_number(mapped_file, current_pos, current_end); + ASSERT(pin > 0, V(current_id)); + hyperedge.push_back(pin - 1); + while(!is_line_ending(mapped_file, current_pos)) + { + pin = read_number(mapped_file, current_pos, current_end); ASSERT(pin > 0, V(current_id)); hyperedge.push_back(pin - 1); - while ( !is_line_ending(mapped_file, current_pos) ) { - pin = read_number(mapped_file, current_pos, current_end); - ASSERT(pin > 0, V(current_id)); - hyperedge.push_back(pin - 1); - } - do_line_ending(mapped_file, current_pos); - - // Detect duplicated pins - std::sort(hyperedge.begin(), hyperedge.end()); - size_t j = 1; - for ( size_t i = 1; i < hyperedge.size(); ++i ) { - if ( hyperedge[j - 1] != hyperedge[i] ) { - std::swap(hyperedge[i], hyperedge[j++]); - } + } + do_line_ending(mapped_file, current_pos); + + // Detect duplicated pins + std::sort(hyperedge.begin(), hyperedge.end()); + size_t j = 1; + for(size_t i = 1; i < hyperedge.size(); ++i) + { + if(hyperedge[j - 1] != hyperedge[i]) + { + std::swap(hyperedge[i], hyperedge[j++]); } - if ( j < hyperedge.size() ) { - // Remove duplicated pins - __atomic_fetch_add(&res.num_hes_with_duplicated_pins, 1, __ATOMIC_RELAXED); - __atomic_fetch_add(&res.num_duplicated_pins, hyperedge.size() - j, __ATOMIC_RELAXED); - for ( size_t i = j; i < hyperedge.size(); ++i ) { - hyperedge.pop_back(); - } + } + if(j < hyperedge.size()) + { + // Remove duplicated pins + __atomic_fetch_add(&res.num_hes_with_duplicated_pins, 1, __ATOMIC_RELAXED); + __atomic_fetch_add(&res.num_duplicated_pins, hyperedge.size() - j, + __ATOMIC_RELAXED); + for(size_t i = j; i < hyperedge.size(); ++i) + { + hyperedge.pop_back(); } - - ASSERT(hyperedge.size() >= 2); - ++current_id; - } else { - goto_next_line(mapped_file, current_pos, current_end); } - } - }); - return res; - } - void readHypernodeWeights(char* mapped_file, - size_t& pos, - const size_t length, - const HypernodeID num_hypernodes, - const mt_kahypar::Type type, - vec& hypernodes_weight) { - bool has_hypernode_weights = type == mt_kahypar::Type::NodeWeights || - type == mt_kahypar::Type::EdgeAndNodeWeights ? - true : false; - if ( has_hypernode_weights ) { - hypernodes_weight.resize(num_hypernodes); - for ( HypernodeID hn = 0; hn < num_hypernodes; ++hn ) { - ASSERT(pos > 0 && pos < length); - ASSERT(mapped_file[pos - 1] == '\n'); - hypernodes_weight[hn] = read_number(mapped_file, pos, length); - do_line_ending(mapped_file, pos); + ASSERT(hyperedge.size() >= 2); + ++current_id; + } + else + { + goto_next_line(mapped_file, current_pos, current_end); } } - } - - - void readHypergraphFile(const std::string& filename, - HyperedgeID& num_hyperedges, - HypernodeID& num_hypernodes, - HyperedgeID& num_removed_single_pin_hyperedges, - HyperedgeVector& hyperedges, - vec& hyperedges_weight, - vec& hypernodes_weight, - const bool remove_single_pin_hes) { - ASSERT(!filename.empty(), "No filename for hypergraph file specified"); - FileHandle handle = mmap_file(filename); - size_t pos = 0; - - // Read Hypergraph Header - mt_kahypar::Type type = mt_kahypar::Type::Unweighted; - readHGRHeader(handle.mapped_file, pos, handle.length, num_hyperedges, num_hypernodes, type); - - // Read Hyperedges - HyperedgeReadResult res = - readHyperedges(handle.mapped_file, pos, handle.length, num_hyperedges, - type, hyperedges, hyperedges_weight, remove_single_pin_hes); - num_hyperedges -= res.num_removed_single_pin_hyperedges; - num_removed_single_pin_hyperedges = res.num_removed_single_pin_hyperedges; - - if ( res.num_hes_with_duplicated_pins > 0 ) { - WARNING("Removed" << res.num_duplicated_pins << "duplicated pins in" - << res.num_hes_with_duplicated_pins << "hyperedges!"); + }); + return res; +} + +void readHypernodeWeights(char *mapped_file, size_t &pos, const size_t length, + const HypernodeID num_hypernodes, const mt_kahypar::Type type, + vec &hypernodes_weight) +{ + bool has_hypernode_weights = type == mt_kahypar::Type::NodeWeights || + type == mt_kahypar::Type::EdgeAndNodeWeights ? + true : + false; + if(has_hypernode_weights) + { + hypernodes_weight.resize(num_hypernodes); + for(HypernodeID hn = 0; hn < num_hypernodes; ++hn) + { + ASSERT(pos > 0 && pos < length); + ASSERT(mapped_file[pos - 1] == '\n'); + hypernodes_weight[hn] = read_number(mapped_file, pos, length); + do_line_ending(mapped_file, pos); } - - // Read Hypernode Weights - readHypernodeWeights(handle.mapped_file, pos, handle.length, num_hypernodes, type, hypernodes_weight); - ASSERT(pos == handle.length); - - munmap_file(handle); + } +} + +void readHypergraphFile(const std::string &filename, HyperedgeID &num_hyperedges, + HypernodeID &num_hypernodes, + HyperedgeID &num_removed_single_pin_hyperedges, + HyperedgeVector &hyperedges, + vec &hyperedges_weight, + vec &hypernodes_weight, + const bool remove_single_pin_hes) +{ + ASSERT(!filename.empty(), "No filename for hypergraph file specified"); + FileHandle handle = mmap_file(filename); + size_t pos = 0; + + // Read Hypergraph Header + mt_kahypar::Type type = mt_kahypar::Type::Unweighted; + readHGRHeader(handle.mapped_file, pos, handle.length, num_hyperedges, num_hypernodes, + type); + + // Read Hyperedges + HyperedgeReadResult res = + readHyperedges(handle.mapped_file, pos, handle.length, num_hyperedges, type, + hyperedges, hyperedges_weight, remove_single_pin_hes); + num_hyperedges -= res.num_removed_single_pin_hyperedges; + num_removed_single_pin_hyperedges = res.num_removed_single_pin_hyperedges; + + if(res.num_hes_with_duplicated_pins > 0) + { + WARNING("Removed" << res.num_duplicated_pins << "duplicated pins in" + << res.num_hes_with_duplicated_pins << "hyperedges!"); } - void readMetisHeader(char* mapped_file, - size_t& pos, - const size_t length, - HyperedgeID& num_edges, - HypernodeID& num_vertices, - bool& has_edge_weights, - bool& has_vertex_weights) { - // Skip comments - while ( mapped_file[pos] == '%' ) { - goto_next_line(mapped_file, pos, length); - } - - num_vertices = read_number(mapped_file, pos, length); - num_edges = read_number(mapped_file, pos, length); - - if (!is_line_ending(mapped_file, pos)) { - // read the (up to) three 0/1 format digits - uint32_t format_num = read_number(mapped_file, pos, length); - ASSERT(format_num < 100, "Vertex sizes in input file are not supported."); - ASSERT(format_num / 10 == 0 || format_num / 10 == 1); - has_vertex_weights = (format_num / 10 == 1); - ASSERT(format_num % 10 == 0 || format_num % 10 == 1); - has_edge_weights = (format_num % 10 == 1); - } - do_line_ending(mapped_file, pos); + // Read Hypernode Weights + readHypernodeWeights(handle.mapped_file, pos, handle.length, num_hypernodes, type, + hypernodes_weight); + ASSERT(pos == handle.length); + + munmap_file(handle); +} + +void readMetisHeader(char *mapped_file, size_t &pos, const size_t length, + HyperedgeID &num_edges, HypernodeID &num_vertices, + bool &has_edge_weights, bool &has_vertex_weights) +{ + // Skip comments + while(mapped_file[pos] == '%') + { + goto_next_line(mapped_file, pos, length); } - struct VertexRange { - const size_t start; - const size_t end; - const HypernodeID vertex_start_id; - const HypernodeID num_vertices; - const HyperedgeID edge_start_id; - }; - - void readVertices(char* mapped_file, - size_t& pos, - const size_t length, - const HyperedgeID num_edges, - const HypernodeID num_vertices, - const bool has_edge_weights, - const bool has_vertex_weights, - HyperedgeVector& edges, - vec& edges_weight, - vec& vertices_weight) { - vec vertex_ranges; - tbb::parallel_invoke([&] { - // Sequential pass over all vertices to determine ranges in the - // input file that are read in parallel. - // Additionally, we need to sum the vertex degrees to determine edge indices. - size_t current_range_start = pos; - HypernodeID current_range_vertex_id = 0; - HypernodeID current_range_num_vertices = 0; - HyperedgeID current_range_edge_id = 0; - HyperedgeID current_range_num_edges = 0; - const HypernodeID num_vertices_per_range = std::max( - (num_vertices / ( 2 * std::thread::hardware_concurrency())), ID(1)); - while ( current_range_vertex_id + current_range_num_vertices < num_vertices ) { - // Skip Comments - ASSERT(pos < length); - while ( mapped_file[pos] == '%' ) { - goto_next_line(mapped_file, pos, length); + num_vertices = read_number(mapped_file, pos, length); + num_edges = read_number(mapped_file, pos, length); + + if(!is_line_ending(mapped_file, pos)) + { + // read the (up to) three 0/1 format digits + uint32_t format_num = read_number(mapped_file, pos, length); + ASSERT(format_num < 100, "Vertex sizes in input file are not supported."); + ASSERT(format_num / 10 == 0 || format_num / 10 == 1); + has_vertex_weights = (format_num / 10 == 1); + ASSERT(format_num % 10 == 0 || format_num % 10 == 1); + has_edge_weights = (format_num % 10 == 1); + } + do_line_ending(mapped_file, pos); +} + +struct VertexRange +{ + const size_t start; + const size_t end; + const HypernodeID vertex_start_id; + const HypernodeID num_vertices; + const HyperedgeID edge_start_id; +}; + +void readVertices(char *mapped_file, size_t &pos, const size_t length, + const HyperedgeID num_edges, const HypernodeID num_vertices, + const bool has_edge_weights, const bool has_vertex_weights, + HyperedgeVector &edges, vec &edges_weight, + vec &vertices_weight) +{ + vec vertex_ranges; + tbb::parallel_invoke( + [&] { + // Sequential pass over all vertices to determine ranges in the + // input file that are read in parallel. + // Additionally, we need to sum the vertex degrees to determine edge + // indices. + size_t current_range_start = pos; + HypernodeID current_range_vertex_id = 0; + HypernodeID current_range_num_vertices = 0; + HyperedgeID current_range_edge_id = 0; + HyperedgeID current_range_num_edges = 0; + const HypernodeID num_vertices_per_range = + std::max((num_vertices / (2 * std::thread::hardware_concurrency())), ID(1)); + while(current_range_vertex_id + current_range_num_vertices < num_vertices) + { + // Skip Comments ASSERT(pos < length); - } + while(mapped_file[pos] == '%') + { + goto_next_line(mapped_file, pos, length); + ASSERT(pos < length); + } - ASSERT(mapped_file[pos - 1] == '\n'); - ++current_range_num_vertices; + ASSERT(mapped_file[pos - 1] == '\n'); + ++current_range_num_vertices; - // Count the forward edges, ignore backward edges. - // This is necessary because we can only calculate unique edge ids - // efficiently if the edges are deduplicated. - if ( has_vertex_weights ) { - read_number(mapped_file, pos, length); - } - HyperedgeID vertex_degree = 0; - while (!is_line_ending(mapped_file, pos) && pos < length) { - const HypernodeID source = current_range_vertex_id + current_range_num_vertices; - const HypernodeID target = read_number(mapped_file, pos, length); - ASSERT(source != target); - if ( source < target ) { - ++vertex_degree; - } - if ( has_edge_weights ) { + // Count the forward edges, ignore backward edges. + // This is necessary because we can only calculate unique edge ids + // efficiently if the edges are deduplicated. + if(has_vertex_weights) + { read_number(mapped_file, pos, length); } + HyperedgeID vertex_degree = 0; + while(!is_line_ending(mapped_file, pos) && pos < length) + { + const HypernodeID source = + current_range_vertex_id + current_range_num_vertices; + const HypernodeID target = read_number(mapped_file, pos, length); + ASSERT(source != target); + if(source < target) + { + ++vertex_degree; + } + if(has_edge_weights) + { + read_number(mapped_file, pos, length); + } + } + do_line_ending(mapped_file, pos); + current_range_num_edges += vertex_degree; + + // If there are enough vertices in the current scanned range + // we store that range, which will be processed in parallel later + if(current_range_num_vertices == num_vertices_per_range) + { + vertex_ranges.push_back( + VertexRange{ current_range_start, pos, current_range_vertex_id, + current_range_num_vertices, current_range_edge_id }); + current_range_start = pos; + current_range_vertex_id += current_range_num_vertices; + current_range_num_vertices = 0; + current_range_edge_id += current_range_num_edges; + current_range_num_edges = 0; + } } - do_line_ending(mapped_file, pos); - current_range_num_edges += vertex_degree; - - // If there are enough vertices in the current scanned range - // we store that range, which will be processed in parallel later - if ( current_range_num_vertices == num_vertices_per_range ) { - vertex_ranges.push_back(VertexRange { - current_range_start, pos, current_range_vertex_id, current_range_num_vertices, current_range_edge_id}); - current_range_start = pos; + if(current_range_num_vertices > 0) + { + vertex_ranges.push_back( + VertexRange{ current_range_start, pos, current_range_vertex_id, + current_range_num_vertices, current_range_edge_id }); current_range_vertex_id += current_range_num_vertices; - current_range_num_vertices = 0; current_range_edge_id += current_range_num_edges; - current_range_num_edges = 0; } + ASSERT(current_range_vertex_id == num_vertices); + ASSERT(current_range_edge_id == num_edges); + }, + [&] { edges.resize(num_edges); }, + [&] { + if(has_edge_weights) + { + edges_weight.resize(num_edges); + } + }, + [&] { + if(has_vertex_weights) + { + vertices_weight.resize(num_vertices); + } + }); + + ASSERT([&]() { + HyperedgeID last_end = 0; + for(const auto &range : vertex_ranges) + { + if(last_end > range.start) + { + return false; } - if ( current_range_num_vertices > 0 ) { - vertex_ranges.push_back(VertexRange { - current_range_start, pos, current_range_vertex_id, current_range_num_vertices, current_range_edge_id}); - current_range_vertex_id += current_range_num_vertices; - current_range_edge_id += current_range_num_edges; - } - ASSERT(current_range_vertex_id == num_vertices); - ASSERT(current_range_edge_id == num_edges); - }, [&] { - edges.resize(num_edges); - }, [&] { - if ( has_edge_weights ) { - edges_weight.resize(num_edges); + last_end = range.end; + } + return true; + }()); + + // Process all ranges in parallel, build edge vector and assign weights + tbb::parallel_for(UL(0), vertex_ranges.size(), [&](const size_t i) { + const VertexRange &range = vertex_ranges[i]; + size_t current_pos = range.start; + const size_t current_end = range.end; + HypernodeID current_vertex_id = range.vertex_start_id; + const HypernodeID last_vertex_id = current_vertex_id + range.num_vertices; + HyperedgeID current_edge_id = range.edge_start_id; + + while(current_vertex_id < last_vertex_id) + { + // Skip Comments + ASSERT(current_pos < current_end); + while(mapped_file[pos] == '%') + { + goto_next_line(mapped_file, current_pos, current_end); + ASSERT(current_pos < current_end); } - }, [&] { - if ( has_vertex_weights ) { - vertices_weight.resize(num_vertices); + + if(has_vertex_weights) + { + ASSERT(current_vertex_id < vertices_weight.size()); + vertices_weight[current_vertex_id] = + read_number(mapped_file, current_pos, current_end); } - }); - ASSERT([&]() { - HyperedgeID last_end = 0; - for(const auto& range: vertex_ranges) { - if (last_end > range.start) { - return false; + while(!is_line_ending(mapped_file, current_pos)) + { + const HypernodeID target = read_number(mapped_file, current_pos, current_end); + ASSERT(target > 0 && (target - 1) < num_vertices, V(target)); + + // process forward edges, ignore backward edges + if(current_vertex_id < (target - 1)) + { + ASSERT(current_edge_id < edges.size()); + // At this point, some magic is involved: + // In case of the graph partitioner, the right handed expression is + // considered a pair. In case of the hypergraph partitioner, the right + // handed expression is considered a vector. + edges[current_edge_id] = { current_vertex_id, target - 1 }; + + if(has_edge_weights) + { + edges_weight[current_edge_id] = + read_number(mapped_file, current_pos, current_end); } - last_end = range.end; - } - return true; - }() - ); - - // Process all ranges in parallel, build edge vector and assign weights - tbb::parallel_for(UL(0), vertex_ranges.size(), [&](const size_t i) { - const VertexRange& range = vertex_ranges[i]; - size_t current_pos = range.start; - const size_t current_end = range.end; - HypernodeID current_vertex_id = range.vertex_start_id; - const HypernodeID last_vertex_id = current_vertex_id + range.num_vertices; - HyperedgeID current_edge_id = range.edge_start_id; - - while ( current_vertex_id < last_vertex_id ) { - // Skip Comments - ASSERT(current_pos < current_end); - while ( mapped_file[pos] == '%' ) { - goto_next_line(mapped_file, current_pos, current_end); - ASSERT(current_pos < current_end); - } - - if ( has_vertex_weights ) { - ASSERT(current_vertex_id < vertices_weight.size()); - vertices_weight[current_vertex_id] = read_number(mapped_file, current_pos, current_end); + ++current_edge_id; } - - while ( !is_line_ending(mapped_file, current_pos) ) { - const HypernodeID target = read_number(mapped_file, current_pos, current_end); - ASSERT(target > 0 && (target - 1) < num_vertices, V(target)); - - // process forward edges, ignore backward edges - if ( current_vertex_id < (target - 1) ) { - ASSERT(current_edge_id < edges.size()); - // At this point, some magic is involved: - // In case of the graph partitioner, the right handed expression is considered a pair. - // In case of the hypergraph partitioner, the right handed expression is considered a vector. - edges[current_edge_id] = {current_vertex_id, target - 1}; - - if ( has_edge_weights ) { - edges_weight[current_edge_id] = read_number(mapped_file, current_pos, current_end); - } - ++current_edge_id; - } else if ( has_edge_weights ) { - read_number(mapped_file, current_pos, current_end); - } + else if(has_edge_weights) + { + read_number(mapped_file, current_pos, current_end); } - do_line_ending(mapped_file, current_pos); - ++current_vertex_id; } - }); + do_line_ending(mapped_file, current_pos); + ++current_vertex_id; + } + }); +} + +void readGraphFile(const std::string &filename, HyperedgeID &num_edges, + HypernodeID &num_vertices, HyperedgeVector &edges, + vec &edges_weight, + vec &vertices_weight) +{ + ASSERT(!filename.empty(), "No filename for metis file specified"); + FileHandle handle = mmap_file(filename); + size_t pos = 0; + + // Read Metis Header + bool has_edge_weights = false; + bool has_vertex_weights = false; + readMetisHeader(handle.mapped_file, pos, handle.length, num_edges, num_vertices, + has_edge_weights, has_vertex_weights); + + // Read Vertices + readVertices(handle.mapped_file, pos, handle.length, num_edges, num_vertices, + has_edge_weights, has_vertex_weights, edges, edges_weight, + vertices_weight); + ASSERT(pos == handle.length); + + munmap_file(handle); +} + +void readPartitionFile(const std::string &filename, std::vector &partition) +{ + ASSERT(!filename.empty(), "No filename for partition file specified"); + ASSERT(partition.empty(), "Partition vector is not empty"); + std::ifstream file(filename); + if(file) + { + int part; + while(file >> part) + { + partition.push_back(part); + } + file.close(); } - - void readGraphFile(const std::string& filename, - HyperedgeID& num_edges, - HypernodeID& num_vertices, - HyperedgeVector& edges, - vec& edges_weight, - vec& vertices_weight) { - ASSERT(!filename.empty(), "No filename for metis file specified"); - FileHandle handle = mmap_file(filename); - size_t pos = 0; - - // Read Metis Header - bool has_edge_weights = false; - bool has_vertex_weights = false; - readMetisHeader(handle.mapped_file, pos, handle.length, num_edges, - num_vertices, has_edge_weights, has_vertex_weights); - - // Read Vertices - readVertices(handle.mapped_file, pos, handle.length, num_edges, num_vertices, - has_edge_weights, has_vertex_weights, edges, edges_weight, vertices_weight); - ASSERT(pos == handle.length); - - munmap_file(handle); + else + { + std::cerr << "Error: File not found: " << std::endl; } - - void readPartitionFile(const std::string& filename, std::vector& partition) { - ASSERT(!filename.empty(), "No filename for partition file specified"); - ASSERT(partition.empty(), "Partition vector is not empty"); - std::ifstream file(filename); - if (file) { - int part; - while (file >> part) { - partition.push_back(part); - } - file.close(); - } else { - std::cerr << "Error: File not found: " << std::endl; +} + +void readPartitionFile(const std::string &filename, PartitionID *partition) +{ + ASSERT(!filename.empty(), "No filename for partition file specified"); + std::ifstream file(filename); + if(file) + { + int part; + HypernodeID hn = 0; + while(file >> part) + { + partition[hn++] = part; } + file.close(); } - - void readPartitionFile(const std::string& filename, PartitionID* partition) { - ASSERT(!filename.empty(), "No filename for partition file specified"); - std::ifstream file(filename); - if (file) { - int part; - HypernodeID hn = 0; - while (file >> part) { - partition[hn++] = part; - } - file.close(); - } else { - std::cerr << "Error: File not found: " << std::endl; - } + else + { + std::cerr << "Error: File not found: " << std::endl; } - - template - void writePartitionFile(const PartitionedHypergraph& phg, const std::string& filename) { - if (filename.empty()) { - LOG << "No filename for partition file specified"; - } else { - std::ofstream out_stream(filename.c_str()); - std::vector partition(phg.initialNumNodes(), -1); - for (const HypernodeID& hn : phg.nodes()) { - ASSERT(hn < partition.size()); - partition[hn] = phg.partID(hn); - } - for (const PartitionID& part : partition) { - out_stream << part << std::endl; - } - out_stream.close(); +} + +template +void writePartitionFile(const PartitionedHypergraph &phg, const std::string &filename) +{ + if(filename.empty()) + { + LOG << "No filename for partition file specified"; + } + else + { + std::ofstream out_stream(filename.c_str()); + std::vector partition(phg.initialNumNodes(), -1); + for(const HypernodeID &hn : phg.nodes()) + { + ASSERT(hn < partition.size()); + partition[hn] = phg.partID(hn); } + for(const PartitionID &part : partition) + { + out_stream << part << std::endl; + } + out_stream.close(); } +} - namespace { - #define WRITE_PARTITION_FILE(X) void writePartitionFile(const X& phg, const std::string& filename) - } +namespace { +#define WRITE_PARTITION_FILE(X) \ + void writePartitionFile(const X &phg, const std::string &filename) +} // namespace - INSTANTIATE_FUNC_WITH_PARTITIONED_HG(WRITE_PARTITION_FILE) +INSTANTIATE_FUNC_WITH_PARTITIONED_HG(WRITE_PARTITION_FILE) -} // namespace +} // namespace mt_kahypar::io diff --git a/mt-kahypar/io/hypergraph_io.h b/mt-kahypar/io/hypergraph_io.h index 751821ae9..67164677b 100644 --- a/mt-kahypar/io/hypergraph_io.h +++ b/mt-kahypar/io/hypergraph_io.h @@ -34,30 +34,27 @@ namespace mt_kahypar { namespace io { - using Hyperedge = vec; - using HyperedgeVector = vec; - - void readHypergraphFile(const std::string& filename, - HyperedgeID& num_hyperedges, - HypernodeID& num_hypernodes, - HyperedgeID& num_removed_single_pin_hyperedges, - HyperedgeVector& hyperedges, - vec& hyperedges_weight, - vec& hypernodes_weight, - const bool remove_single_pin_hes = true); - - void readGraphFile(const std::string& filename, - HyperedgeID& num_hyperedges, - HypernodeID& num_hypernodes, - HyperedgeVector& hyperedges, - vec& hyperedges_weight, - vec& hypernodes_weight); - - void readPartitionFile(const std::string& filename, std::vector& partition); - void readPartitionFile(const std::string& filename, PartitionID* partition); - - template - void writePartitionFile(const PartitionedHypergraph& phg, const std::string& filename); - -} // namespace io -} // namespace mt_kahypar +using Hyperedge = vec; +using HyperedgeVector = vec; + +void readHypergraphFile(const std::string &filename, HyperedgeID &num_hyperedges, + HypernodeID &num_hypernodes, + HyperedgeID &num_removed_single_pin_hyperedges, + HyperedgeVector &hyperedges, + vec &hyperedges_weight, + vec &hypernodes_weight, + const bool remove_single_pin_hes = true); + +void readGraphFile(const std::string &filename, HyperedgeID &num_hyperedges, + HypernodeID &num_hypernodes, HyperedgeVector &hyperedges, + vec &hyperedges_weight, + vec &hypernodes_weight); + +void readPartitionFile(const std::string &filename, std::vector &partition); +void readPartitionFile(const std::string &filename, PartitionID *partition); + +template +void writePartitionFile(const PartitionedHypergraph &phg, const std::string &filename); + +} // namespace io +} // namespace mt_kahypar diff --git a/mt-kahypar/io/partitioning_output.cpp b/mt-kahypar/io/partitioning_output.cpp index 13f1f0160..546484ca6 100644 --- a/mt-kahypar/io/partitioning_output.cpp +++ b/mt-kahypar/io/partitioning_output.cpp @@ -13,8 +13,8 @@ * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. + * The above copyright notice and this permission notice shall be included in + *all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, @@ -30,788 +30,918 @@ #include #include "tbb/blocked_range.h" +#include "tbb/enumerable_thread_specific.h" #include "tbb/parallel_invoke.h" -#include "tbb/parallel_sort.h" #include "tbb/parallel_reduce.h" -#include "tbb/enumerable_thread_specific.h" +#include "tbb/parallel_sort.h" #include "mt-kahypar/definitions.h" -#include "mt-kahypar/parallel/memory_pool.h" #include "mt-kahypar/parallel/atomic_wrapper.h" -#include "mt-kahypar/partition/metrics.h" +#include "mt-kahypar/parallel/memory_pool.h" #include "mt-kahypar/partition/mapping/target_graph.h" +#include "mt-kahypar/partition/metrics.h" #include "mt-kahypar/utils/hypergraph_statistics.h" #include "mt-kahypar/utils/memory_tree.h" #include "mt-kahypar/utils/timer.h" #include "kahypar-resources/utils/math.h" - namespace mt_kahypar::io { - namespace internal { - struct Statistic { - uint64_t min = 0; - uint64_t q1 = 0; - uint64_t med = 0; - uint64_t q3 = 0; - uint64_t max = 0; - double avg = 0.0; - double sd = 0.0; - }; - - template - Statistic createStats(const std::vector& vec, const double avg, const double stdev) { - internal::Statistic stats; - if (!vec.empty()) { - const auto quartiles = kahypar::math::firstAndThirdQuartile(vec); - stats.min = vec.front(); - stats.q1 = quartiles.first; - stats.med = kahypar::math::median(vec); - stats.q3 = quartiles.second; - stats.max = vec.back(); - stats.avg = avg; - stats.sd = stdev; - } - return stats; - } - - void printHypergraphStats(const Statistic& he_size_stats, - const Statistic& he_weight_stats, - const Statistic& hn_deg_stats, - const Statistic& hn_weight_stats) { - // default double precision is 7 - const uint8_t double_width = 7; - const uint8_t he_size_width = std::max(kahypar::math::digits(he_size_stats.max), double_width) + 4; - const uint8_t he_weight_width = std::max(kahypar::math::digits(he_weight_stats.max), double_width) + 4; - const uint8_t hn_deg_width = std::max(kahypar::math::digits(hn_deg_stats.max), double_width) + 4; - const uint8_t hn_weight_width = std::max(kahypar::math::digits(hn_weight_stats.max), double_width) + 4; - - LOG << "HE size" << std::right << std::setw(he_size_width + 10) - << "HE weight" << std::right << std::setw(he_weight_width + 8) - << "HN degree" << std::right << std::setw(hn_deg_width + 8) - << "HN weight"; - LOG << "| min=" << std::left << std::setw(he_size_width) << he_size_stats.min - << " | min=" << std::left << std::setw(he_weight_width) << he_weight_stats.min - << " | min=" << std::left << std::setw(hn_deg_width) << hn_deg_stats.min - << " | min=" << std::left << std::setw(hn_weight_width) << hn_weight_stats.min; - LOG << "| Q1 =" << std::left << std::setw(he_size_width) << he_size_stats.q1 - << " | Q1 =" << std::left << std::setw(he_weight_width) << he_weight_stats.q1 - << " | Q1 =" << std::left << std::setw(hn_deg_width) << hn_deg_stats.q1 - << " | Q1 =" << std::left << std::setw(hn_weight_width) << hn_weight_stats.q1; - LOG << "| med=" << std::left << std::setw(he_size_width) << he_size_stats.med - << " | med=" << std::left << std::setw(he_weight_width) << he_weight_stats.med - << " | med=" << std::left << std::setw(hn_deg_width) << hn_deg_stats.med - << " | med=" << std::left << std::setw(hn_weight_width) << hn_weight_stats.med; - LOG << "| Q3 =" << std::left << std::setw(he_size_width) << he_size_stats.q3 - << " | Q3 =" << std::left << std::setw(he_weight_width) << he_weight_stats.q3 - << " | Q3 =" << std::left << std::setw(hn_deg_width) << hn_deg_stats.q3 - << " | Q3 =" << std::left << std::setw(hn_weight_width) << hn_weight_stats.q3; - LOG << "| max=" << std::left << std::setw(he_size_width) << he_size_stats.max - << " | max=" << std::left << std::setw(he_weight_width) << he_weight_stats.max - << " | max=" << std::left << std::setw(hn_deg_width) << hn_deg_stats.max - << " | max=" << std::left << std::setw(hn_weight_width) << hn_weight_stats.max; - LOG << "| avg=" << std::left << std::setw(he_size_width) << he_size_stats.avg - << " | avg=" << std::left << std::setw(he_weight_width) << he_weight_stats.avg - << " | avg=" << std::left << std::setw(hn_deg_width) << hn_deg_stats.avg - << " | avg=" << std::left << std::setw(hn_weight_width) << hn_weight_stats.avg; - LOG << "| sd =" << std::left << std::setw(he_size_width) << he_size_stats.sd - << " | sd =" << std::left << std::setw(he_weight_width) << he_weight_stats.sd - << " | sd =" << std::left << std::setw(hn_deg_width) << hn_deg_stats.sd - << " | sd =" << std::left << std::setw(hn_weight_width) << hn_weight_stats.sd; - } - - } // namespace internal - - template - void printHypergraphInfo(const Hypergraph& hypergraph, - const Context& context, - const std::string& name, - const bool show_memory_consumption) { - std::vector he_sizes; - std::vector he_weights; - std::vector hn_degrees; - std::vector hn_weights; - - tbb::parallel_invoke([&] { - he_sizes.resize(hypergraph.initialNumEdges()); - }, [&] { - he_weights.resize(hypergraph.initialNumEdges()); - }, [&] { - hn_degrees.resize(hypergraph.initialNumNodes()); - }, [&] { - hn_weights.resize(hypergraph.initialNumNodes()); - }); +namespace internal { +struct Statistic { + uint64_t min = 0; + uint64_t q1 = 0; + uint64_t med = 0; + uint64_t q3 = 0; + uint64_t max = 0; + double avg = 0.0; + double sd = 0.0; +}; + +template +Statistic createStats(const std::vector &vec, const double avg, + const double stdev) { + internal::Statistic stats; + if (!vec.empty()) { + const auto quartiles = kahypar::math::firstAndThirdQuartile(vec); + stats.min = vec.front(); + stats.q1 = quartiles.first; + stats.med = kahypar::math::median(vec); + stats.q3 = quartiles.second; + stats.max = vec.back(); + stats.avg = avg; + stats.sd = stdev; + } + return stats; +} + +void printHypergraphStats(const Statistic &he_size_stats, + const Statistic &he_weight_stats, + const Statistic &hn_deg_stats, + const Statistic &hn_weight_stats) { + // default double precision is 7 + const uint8_t double_width = 7; + const uint8_t he_size_width = + std::max(kahypar::math::digits(he_size_stats.max), double_width) + 4; + const uint8_t he_weight_width = + std::max(kahypar::math::digits(he_weight_stats.max), double_width) + 4; + const uint8_t hn_deg_width = + std::max(kahypar::math::digits(hn_deg_stats.max), double_width) + 4; + const uint8_t hn_weight_width = + std::max(kahypar::math::digits(hn_weight_stats.max), double_width) + 4; + + LOG << "HE size" << std::right << std::setw(he_size_width + 10) << "HE weight" + << std::right << std::setw(he_weight_width + 8) << "HN degree" + << std::right << std::setw(hn_deg_width + 8) << "HN weight"; + LOG << "| min=" << std::left << std::setw(he_size_width) << he_size_stats.min + << " | min=" << std::left << std::setw(he_weight_width) + << he_weight_stats.min << " | min=" << std::left + << std::setw(hn_deg_width) << hn_deg_stats.min << " | min=" << std::left + << std::setw(hn_weight_width) << hn_weight_stats.min; + LOG << "| Q1 =" << std::left << std::setw(he_size_width) << he_size_stats.q1 + << " | Q1 =" << std::left << std::setw(he_weight_width) + << he_weight_stats.q1 << " | Q1 =" << std::left << std::setw(hn_deg_width) + << hn_deg_stats.q1 << " | Q1 =" << std::left << std::setw(hn_weight_width) + << hn_weight_stats.q1; + LOG << "| med=" << std::left << std::setw(he_size_width) << he_size_stats.med + << " | med=" << std::left << std::setw(he_weight_width) + << he_weight_stats.med << " | med=" << std::left + << std::setw(hn_deg_width) << hn_deg_stats.med << " | med=" << std::left + << std::setw(hn_weight_width) << hn_weight_stats.med; + LOG << "| Q3 =" << std::left << std::setw(he_size_width) << he_size_stats.q3 + << " | Q3 =" << std::left << std::setw(he_weight_width) + << he_weight_stats.q3 << " | Q3 =" << std::left << std::setw(hn_deg_width) + << hn_deg_stats.q3 << " | Q3 =" << std::left << std::setw(hn_weight_width) + << hn_weight_stats.q3; + LOG << "| max=" << std::left << std::setw(he_size_width) << he_size_stats.max + << " | max=" << std::left << std::setw(he_weight_width) + << he_weight_stats.max << " | max=" << std::left + << std::setw(hn_deg_width) << hn_deg_stats.max << " | max=" << std::left + << std::setw(hn_weight_width) << hn_weight_stats.max; + LOG << "| avg=" << std::left << std::setw(he_size_width) << he_size_stats.avg + << " | avg=" << std::left << std::setw(he_weight_width) + << he_weight_stats.avg << " | avg=" << std::left + << std::setw(hn_deg_width) << hn_deg_stats.avg << " | avg=" << std::left + << std::setw(hn_weight_width) << hn_weight_stats.avg; + LOG << "| sd =" << std::left << std::setw(he_size_width) << he_size_stats.sd + << " | sd =" << std::left << std::setw(he_weight_width) + << he_weight_stats.sd << " | sd =" << std::left << std::setw(hn_deg_width) + << hn_deg_stats.sd << " | sd =" << std::left << std::setw(hn_weight_width) + << hn_weight_stats.sd; +} + +} // namespace internal + +template +void printHypergraphInfo(const Hypergraph &hypergraph, const Context &context, + const std::string &name, + const bool show_memory_consumption) { + std::vector he_sizes; + std::vector he_weights; + std::vector hn_degrees; + std::vector hn_weights; + + tbb::parallel_invoke( + [&] { he_sizes.resize(hypergraph.initialNumEdges()); }, + [&] { he_weights.resize(hypergraph.initialNumEdges()); }, + [&] { hn_degrees.resize(hypergraph.initialNumNodes()); }, + [&] { hn_weights.resize(hypergraph.initialNumNodes()); }); + + HypernodeID num_hypernodes = hypergraph.initialNumNodes(); + const double avg_hn_degree = utils::avgHypernodeDegree(hypergraph); + hypergraph.doParallelForAllNodes([&](const HypernodeID &hn) { + hn_degrees[hn] = hypergraph.nodeDegree(hn); + hn_weights[hn] = hypergraph.nodeWeight(hn); + }); + const double avg_hn_weight = utils::parallel_avg(hn_weights, num_hypernodes); + const double stdev_hn_degree = + utils::parallel_stdev(hn_degrees, avg_hn_degree, num_hypernodes); + const double stdev_hn_weight = + utils::parallel_stdev(hn_weights, avg_hn_weight, num_hypernodes); + + HyperedgeID num_hyperedges = hypergraph.initialNumEdges(); + HypernodeID num_pins = hypergraph.initialNumPins(); + const double avg_he_size = utils::avgHyperedgeDegree(hypergraph); + hypergraph.doParallelForAllEdges([&](const HyperedgeID &he) { + he_sizes[he] = hypergraph.edgeSize(he); + he_weights[he] = hypergraph.edgeWeight(he); + }); + const double avg_he_weight = utils::parallel_avg(he_weights, num_hyperedges); + const double stdev_he_size = + utils::parallel_stdev(he_sizes, avg_he_size, num_hyperedges); + const double stdev_he_weight = + utils::parallel_stdev(he_weights, avg_he_weight, num_hyperedges); + + tbb::enumerable_thread_specific graph_edge_count(0); + hypergraph.doParallelForAllEdges([&](const HyperedgeID &he) { + if (hypergraph.edgeSize(he) == 2) { + graph_edge_count.local() += 1; + } + }); + + tbb::parallel_invoke( + [&] { tbb::parallel_sort(he_sizes.begin(), he_sizes.end()); }, + [&] { tbb::parallel_sort(he_weights.begin(), he_weights.end()); }, + [&] { tbb::parallel_sort(hn_degrees.begin(), hn_degrees.end()); }, + [&] { tbb::parallel_sort(hn_weights.begin(), hn_weights.end()); }); + + LOG << "Hypergraph Information"; + LOG << "Name :" << name; + LOG << "# HNs :" << num_hypernodes << "# HEs :" + << (Hypergraph::is_graph ? num_hyperedges / 2 : num_hyperedges) + << "# pins:" << num_pins << "# graph edges:" + << (Hypergraph::is_graph ? num_hyperedges / 2 + : graph_edge_count.combine(std::plus<>())); + + internal::printHypergraphStats( + internal::createStats(he_sizes, avg_he_size, stdev_he_size), + internal::createStats(he_weights, avg_he_weight, stdev_he_weight), + internal::createStats(hn_degrees, avg_hn_degree, stdev_hn_degree), + internal::createStats(hn_weights, avg_hn_weight, stdev_hn_weight)); + + if (hypergraph.hasFixedVertices()) { + printFixedVertexPartWeights(hypergraph, context); + } - HypernodeID num_hypernodes = hypergraph.initialNumNodes(); - const double avg_hn_degree = utils::avgHypernodeDegree(hypergraph); - hypergraph.doParallelForAllNodes([&](const HypernodeID& hn) { - hn_degrees[hn] = hypergraph.nodeDegree(hn); - hn_weights[hn] = hypergraph.nodeWeight(hn); - }); - const double avg_hn_weight = utils::parallel_avg(hn_weights, num_hypernodes); - const double stdev_hn_degree = utils::parallel_stdev(hn_degrees, avg_hn_degree, num_hypernodes); - const double stdev_hn_weight = utils::parallel_stdev(hn_weights, avg_hn_weight, num_hypernodes); - - HyperedgeID num_hyperedges = hypergraph.initialNumEdges(); - HypernodeID num_pins = hypergraph.initialNumPins(); - const double avg_he_size = utils::avgHyperedgeDegree(hypergraph); - hypergraph.doParallelForAllEdges([&](const HyperedgeID& he) { - he_sizes[he] = hypergraph.edgeSize(he); - he_weights[he] = hypergraph.edgeWeight(he); - }); - const double avg_he_weight = utils::parallel_avg(he_weights, num_hyperedges); - const double stdev_he_size = utils::parallel_stdev(he_sizes, avg_he_size, num_hyperedges); - const double stdev_he_weight = utils::parallel_stdev(he_weights, avg_he_weight, num_hyperedges); - - tbb::enumerable_thread_specific graph_edge_count(0); - hypergraph.doParallelForAllEdges([&](const HyperedgeID& he) { - if (hypergraph.edgeSize(he) == 2) { - graph_edge_count.local() += 1; + if (show_memory_consumption) { + // Print Memory Consumption + utils::MemoryTreeNode hypergraph_memory_consumption( + "Hypergraph", utils::OutputType::MEGABYTE); + hypergraph.memoryConsumption(&hypergraph_memory_consumption); + hypergraph_memory_consumption.finalize(); + LOG << "\nHypergraph Memory Consumption"; + LOG << hypergraph_memory_consumption; + } +} + +template +void printPartWeightsAndSizes(const PartitionedHypergraph &hypergraph, + const Context &context) { + vec part_sizes(context.partition.k, 0); + for (HypernodeID u : hypergraph.nodes()) { + part_sizes[hypergraph.partID(u)]++; + } + PartitionID min_block = kInvalidPartition; + HypernodeWeight min_part_weight = std::numeric_limits::max(); + HypernodeWeight avg_part_weight = 0; + PartitionID max_block = kInvalidPartition; + HypernodeWeight max_part_weight = 0; + HypernodeID max_part_size = 0; + size_t num_imbalanced_blocks = 0; + for (PartitionID i = 0; i < context.partition.k; ++i) { + avg_part_weight += hypergraph.partWeight(i); + if (hypergraph.partWeight(i) < min_part_weight) { + min_block = i; + min_part_weight = hypergraph.partWeight(i); + } + if (hypergraph.partWeight(i) > max_part_weight) { + max_block = i; + max_part_weight = hypergraph.partWeight(i); + } + max_part_size = std::max(max_part_size, part_sizes[i]); + num_imbalanced_blocks += + (hypergraph.partWeight(i) > context.partition.max_part_weights[i] || + (context.partition.preset_type != PresetType::large_k && + hypergraph.partWeight(i) == 0)); + } + avg_part_weight /= context.partition.k; + + const uint8_t part_digits = kahypar::math::digits(max_part_weight); + const uint8_t k_digits = kahypar::math::digits(context.partition.k); + if (context.partition.k <= 32) { + for (PartitionID i = 0; i != context.partition.k; ++i) { + bool is_imbalanced = + hypergraph.partWeight(i) > context.partition.max_part_weights[i] || + (context.partition.preset_type != PresetType::large_k && + hypergraph.partWeight(i) == 0); + if (is_imbalanced) + std::cout << RED; + std::cout << "|block " << std::left << std::setw(k_digits) << i + << std::setw(1) << "| = " << std::right + << std::setw(part_digits) << part_sizes[i] << std::setw(1) + << " w( " << std::right << std::setw(k_digits) << i + << std::setw(1) << " ) = " << std::right + << std::setw(part_digits) << hypergraph.partWeight(i) + << std::setw(1) << " max( " << std::right + << std::setw(k_digits) << i << std::setw(1) + << " ) = " << std::right << std::setw(part_digits) + << context.partition.max_part_weights[i] << std::endl; + if (is_imbalanced) + std::cout << END; + } + } else { + std::cout << "Avg Block Weight = " << avg_part_weight << std::endl; + std::cout << "Min Block Weight = " << min_part_weight + << (min_part_weight <= + context.partition.max_part_weights[min_block] + ? " <= " + : " > ") + << context.partition.max_part_weights[min_block] << " (Block " + << min_block << ")" << std::endl; + std::cout << "Max Block Weight = " << max_part_weight + << (max_part_weight <= + context.partition.max_part_weights[max_block] + ? " <= " + : " > ") + << context.partition.max_part_weights[max_block] << " (Block " + << max_block << ")" << std::endl; + if (num_imbalanced_blocks > 0) { + LOG << RED << "Number of Imbalanced Blocks =" << num_imbalanced_blocks + << END; + for (PartitionID i = 0; i != context.partition.k; ++i) { + const bool is_imbalanced = + hypergraph.partWeight(i) > context.partition.max_part_weights[i] || + (context.partition.preset_type != PresetType::large_k && + hypergraph.partWeight(i) == 0); + if (is_imbalanced) { + std::cout << RED << "|block " << std::left << std::setw(k_digits) << i + << std::setw(1) << "| = " << std::right + << std::setw(part_digits) << part_sizes[i] << std::setw(1) + << " w( " << std::right << std::setw(k_digits) << i + << std::setw(1) << " ) = " << std::right + << std::setw(part_digits) << hypergraph.partWeight(i) + << std::setw(1) << " max( " << std::right + << std::setw(k_digits) << i << std::setw(1) + << " ) = " << std::right << std::setw(part_digits) + << context.partition.max_part_weights[i] << END + << std::endl; + } } - }); - - tbb::parallel_invoke([&] { - tbb::parallel_sort(he_sizes.begin(), he_sizes.end()); - }, [&] { - tbb::parallel_sort(he_weights.begin(), he_weights.end()); - }, [&] { - tbb::parallel_sort(hn_degrees.begin(), hn_degrees.end()); - }, [&] { - tbb::parallel_sort(hn_weights.begin(), hn_weights.end()); - }); - - LOG << "Hypergraph Information"; - LOG << "Name :" << name; - LOG << "# HNs :" << num_hypernodes - << "# HEs :" << (Hypergraph::is_graph ? num_hyperedges / 2 : num_hyperedges) - << "# pins:" << num_pins - << "# graph edges:" << (Hypergraph::is_graph ? num_hyperedges / 2 : graph_edge_count.combine(std::plus<>())); - - internal::printHypergraphStats( - internal::createStats(he_sizes, avg_he_size, stdev_he_size), - internal::createStats(he_weights, avg_he_weight, stdev_he_weight), - internal::createStats(hn_degrees, avg_hn_degree, stdev_hn_degree), - internal::createStats(hn_weights, avg_hn_weight, stdev_hn_weight)); - - if ( hypergraph.hasFixedVertices() ) { - printFixedVertexPartWeights(hypergraph, context); - } - - if ( show_memory_consumption ) { - // Print Memory Consumption - utils::MemoryTreeNode hypergraph_memory_consumption("Hypergraph", utils::OutputType::MEGABYTE); - hypergraph.memoryConsumption(&hypergraph_memory_consumption); - hypergraph_memory_consumption.finalize(); - LOG << "\nHypergraph Memory Consumption"; - LOG << hypergraph_memory_consumption; } } +} - template - void printPartWeightsAndSizes(const PartitionedHypergraph& hypergraph, const Context& context) { - vec part_sizes(context.partition.k, 0); - for (HypernodeID u : hypergraph.nodes()) { - part_sizes[hypergraph.partID(u)]++; - } - PartitionID min_block = kInvalidPartition; - HypernodeWeight min_part_weight = std::numeric_limits::max(); - HypernodeWeight avg_part_weight = 0; - PartitionID max_block = kInvalidPartition; +template +void printFixedVertexPartWeights(const Hypergraph &hypergraph, + const Context &context) { + if (context.partition.verbose_output && hypergraph.hasFixedVertices()) { HypernodeWeight max_part_weight = 0; - HypernodeID max_part_size = 0; - size_t num_imbalanced_blocks = 0; for (PartitionID i = 0; i < context.partition.k; ++i) { - avg_part_weight += hypergraph.partWeight(i); - if ( hypergraph.partWeight(i) < min_part_weight ) { - min_block = i; - min_part_weight = hypergraph.partWeight(i); + if (hypergraph.fixedVertexBlockWeight(i) > max_part_weight) { + max_part_weight = hypergraph.fixedVertexBlockWeight(i); } - if ( hypergraph.partWeight(i) > max_part_weight ) { - max_block = i; - max_part_weight = hypergraph.partWeight(i); + if (context.partition.max_part_weights[i] > max_part_weight) { + max_part_weight = context.partition.max_part_weights[i]; } - max_part_size = std::max(max_part_size, part_sizes[i]); - num_imbalanced_blocks += - (hypergraph.partWeight(i) > context.partition.max_part_weights[i] || - ( context.partition.preset_type != PresetType::large_k && hypergraph.partWeight(i) == 0 )); } - avg_part_weight /= context.partition.k; const uint8_t part_digits = kahypar::math::digits(max_part_weight); const uint8_t k_digits = kahypar::math::digits(context.partition.k); - if ( context.partition.k <= 32 ) { - for (PartitionID i = 0; i != context.partition.k; ++i) { - bool is_imbalanced = - hypergraph.partWeight(i) > context.partition.max_part_weights[i] || - ( context.partition.preset_type != PresetType::large_k && hypergraph.partWeight(i) == 0 ); - if ( is_imbalanced ) std::cout << RED; - std::cout << "|block " << std::left << std::setw(k_digits) << i - << std::setw(1) << "| = " << std::right << std::setw(part_digits) << part_sizes[i] - << std::setw(1) << " w( " << std::right << std::setw(k_digits) << i - << std::setw(1) << " ) = " << std::right << std::setw(part_digits) << hypergraph.partWeight(i) - << std::setw(1) << " max( " << std::right << std::setw(k_digits) << i - << std::setw(1) << " ) = " << std::right << std::setw(part_digits) << context.partition.max_part_weights[i] - << std::endl; - if ( is_imbalanced ) std::cout << END; - } - } else { - std::cout << "Avg Block Weight = " << avg_part_weight << std::endl; - std::cout << "Min Block Weight = " << min_part_weight - << (min_part_weight <= context.partition.max_part_weights[min_block] ? " <= " : " > ") - << context.partition.max_part_weights[min_block] << " (Block " << min_block << ")" << std::endl; - std::cout << "Max Block Weight = " << max_part_weight - << (max_part_weight <= context.partition.max_part_weights[max_block] ? " <= " : " > ") - << context.partition.max_part_weights[max_block] << " (Block " << max_block << ")" << std::endl; - if ( num_imbalanced_blocks > 0 ) { - LOG << RED << "Number of Imbalanced Blocks =" << num_imbalanced_blocks << END; - for (PartitionID i = 0; i != context.partition.k; ++i) { - const bool is_imbalanced = - hypergraph.partWeight(i) > context.partition.max_part_weights[i] || - ( context.partition.preset_type != PresetType::large_k && hypergraph.partWeight(i) == 0 ); - if ( is_imbalanced ) { - std::cout << RED << "|block " << std::left << std::setw(k_digits) << i - << std::setw(1) << "| = " << std::right << std::setw(part_digits) << part_sizes[i] - << std::setw(1) << " w( " << std::right << std::setw(k_digits) << i - << std::setw(1) << " ) = " << std::right << std::setw(part_digits) << hypergraph.partWeight(i) - << std::setw(1) << " max( " << std::right << std::setw(k_digits) << i - << std::setw(1) << " ) = " << std::right << std::setw(part_digits) << context.partition.max_part_weights[i] - << END << std::endl; - } - } - } + LOG << BOLD << "\nHypergraph contains fixed vertices" << END; + for (PartitionID i = 0; i != context.partition.k; ++i) { + std::cout << "Fixed vertex weight of block " << std::left + << std::setw(k_digits) << i << std::setw(1) << ": " + << std::setw(1) << " w( " << std::right << std::setw(k_digits) + << i << std::setw(1) << " ) = " << std::right + << std::setw(part_digits) + << hypergraph.fixedVertexBlockWeight(i) << std::setw(1) + << " max( " << std::right << std::setw(k_digits) << i + << std::setw(1) << " ) = " << std::right + << std::setw(part_digits) + << context.partition.max_part_weights[i] << std::endl; } } - - template - void printFixedVertexPartWeights(const Hypergraph& hypergraph, const Context& context) { - if ( context.partition.verbose_output && hypergraph.hasFixedVertices() ) { - HypernodeWeight max_part_weight = 0; - for (PartitionID i = 0; i < context.partition.k; ++i) { - if ( hypergraph.fixedVertexBlockWeight(i) > max_part_weight ) { - max_part_weight = hypergraph.fixedVertexBlockWeight(i); - } - if ( context.partition.max_part_weights[i] > max_part_weight ) { - max_part_weight = context.partition.max_part_weights[i]; - } - } - - const uint8_t part_digits = kahypar::math::digits(max_part_weight); - const uint8_t k_digits = kahypar::math::digits(context.partition.k); - LOG << BOLD << "\nHypergraph contains fixed vertices" << END; - for (PartitionID i = 0; i != context.partition.k; ++i) { - std::cout << "Fixed vertex weight of block " << std::left << std::setw(k_digits) << i - << std::setw(1) << ": " - << std::setw(1) << " w( " << std::right << std::setw(k_digits) << i - << std::setw(1) << " ) = " << std::right << std::setw(part_digits) << hypergraph.fixedVertexBlockWeight(i) - << std::setw(1) << " max( " << std::right << std::setw(k_digits) << i - << std::setw(1) << " ) = " << std::right << std::setw(part_digits) << context.partition.max_part_weights[i] - << std::endl; - } - } +} + +template +void printPartitioningResults(const PartitionedHypergraph &hypergraph, + const Context &context, + const std::string &description) { + if (context.partition.verbose_output) { + LOG << description; + LOG << context.partition.objective << " =" + << metrics::quality(hypergraph, context); + LOG << "imbalance =" << metrics::imbalance(hypergraph, context); + LOG << "Part sizes and weights:"; + io::printPartWeightsAndSizes(hypergraph, context); + LOG << ""; } +} - template - void printPartitioningResults(const PartitionedHypergraph& hypergraph, - const Context& context, - const std::string& description) { - if (context.partition.verbose_output) { - LOG << description; - LOG << context.partition.objective << " =" - << metrics::quality(hypergraph, context); - LOG << "imbalance =" << metrics::imbalance(hypergraph, context); - LOG << "Part sizes and weights:"; - io::printPartWeightsAndSizes(hypergraph, context); - LOG << ""; - } +void printContext(const Context &context) { + if (context.partition.verbose_output) { + LOG << context; } - - void printContext(const Context& context) { - if (context.partition.verbose_output) { - LOG << context; - } +} + +void printMemoryPoolConsumption(const Context &context) { + if (context.partition.verbose_output && + context.partition.show_memory_consumption) { + utils::MemoryTreeNode memory_pool_consumption("Memory Pool", + utils::OutputType::MEGABYTE); + parallel::MemoryPool::instance().memory_consumption( + &memory_pool_consumption); + memory_pool_consumption.finalize(); + LOG << "\n Memory Pool Consumption:"; + LOG << memory_pool_consumption << "\n"; + parallel::MemoryPool::instance().explain_optimizations(); } - - void printMemoryPoolConsumption(const Context& context) { - if ( context.partition.verbose_output && context.partition.show_memory_consumption ) { - utils::MemoryTreeNode memory_pool_consumption("Memory Pool", utils::OutputType::MEGABYTE); - parallel::MemoryPool::instance().memory_consumption(&memory_pool_consumption); - memory_pool_consumption.finalize(); - LOG << "\n Memory Pool Consumption:"; - LOG << memory_pool_consumption << "\n"; - parallel::MemoryPool::instance().explain_optimizations(); - } +} + +template +void printInputInformation(const Context &context, + const Hypergraph &hypergraph) { + if (context.partition.verbose_output) { + LOG << "\n*****************************************************************" + "**********" + "*****"; + LOG << "* Input " + " " + " *"; + LOG << "*******************************************************************" + "**********" + "***"; + io::printHypergraphInfo( + hypergraph, context, + context.partition.graph_filename.substr( + context.partition.graph_filename.find_last_of('/') + 1), + context.partition.show_memory_consumption); } - - template - void printInputInformation(const Context& context, const Hypergraph& hypergraph) { - if (context.partition.verbose_output) { - LOG << "\n********************************************************************************"; - LOG << "* Input *"; - LOG << "********************************************************************************"; - io::printHypergraphInfo(hypergraph, context, context.partition.graph_filename.substr( - context.partition.graph_filename.find_last_of('/') + 1), - context.partition.show_memory_consumption); - } +} + +void printTopLevelPreprocessingBanner(const Context &context) { + if (context.partition.verbose_output) { + LOG << "\n*****************************************************************" + "**********" + "*****"; + LOG << "* Preprocessing... " + " " + " *"; + LOG << "*******************************************************************" + "**********" + "***"; } - - void printTopLevelPreprocessingBanner(const Context& context) { - if (context.partition.verbose_output) { - LOG << "\n********************************************************************************"; - LOG << "* Preprocessing... *"; - LOG << "********************************************************************************"; - } +} + +void printCoarseningBanner(const Context &context) { + if (context.partition.verbose_output) { + LOG << "*******************************************************************" + "**********" + "***"; + LOG << "* Coarsening... " + " " + " *"; + LOG << "*******************************************************************" + "**********" + "***"; } - - void printCoarseningBanner(const Context& context) { - if (context.partition.verbose_output) { - LOG << "********************************************************************************"; - LOG << "* Coarsening... *"; - LOG << "********************************************************************************"; - } +} + +void printInitialPartitioningBanner(const Context &context) { + if (context.partition.verbose_output) { + LOG << "\n*****************************************************************" + "**********" + "*****"; + LOG << "* Initial Partitioning... " + " " + " *"; + LOG << "*******************************************************************" + "**********" + "***"; } - - void printInitialPartitioningBanner(const Context& context) { - if (context.partition.verbose_output) { - LOG << "\n********************************************************************************"; - LOG << "* Initial Partitioning... *"; - LOG << "********************************************************************************"; - } +} + +void printLocalSearchBanner(const Context &context) { + if (context.partition.verbose_output) { + LOG << "\n*****************************************************************" + "**********" + "*****"; + LOG << "* Local Search... " + " " + " *"; + LOG << "*******************************************************************" + "**********" + "***"; } - - void printLocalSearchBanner(const Context& context) { - if (context.partition.verbose_output) { - LOG << "\n********************************************************************************"; - LOG << "* Local Search... *"; - LOG << "********************************************************************************"; +} + +void printVCycleBanner(const Context &context, const size_t vcycle_num) { + if (context.partition.verbose_output) { + LOG << "\n*****************************************************************" + "**********" + "*****"; + std::cout << "* V-Cycle " << vcycle_num; + if (vcycle_num < 10) { + std::cout << " *\n"; + } else { + std::cout << " *\n"; } + LOG << "*******************************************************************" + "**********" + "***"; } - - void printVCycleBanner(const Context& context, const size_t vcycle_num) { - if (context.partition.verbose_output) { - LOG << "\n********************************************************************************"; - std::cout << "* V-Cycle " << vcycle_num; - if ( vcycle_num < 10 ) { - std::cout << " *\n"; - } else { - std::cout << " *\n"; - } - LOG << "********************************************************************************"; - } +} +void printDeepMultilevelBanner(const Context &context) { + if (context.partition.verbose_output) { + LOG << "\n*****************************************************************" + "**********" + "*****"; + LOG << "* Deep Multilevel Partitioning... " + " " + " *"; + LOG << "*******************************************************************" + "**********" + "***"; } - void printDeepMultilevelBanner(const Context& context) { - if (context.partition.verbose_output) { - LOG << "\n********************************************************************************"; - LOG << "* Deep Multilevel Partitioning... *"; - LOG << "********************************************************************************"; - } +} + +namespace { + +template +void printKeyValue(const T &key, const V &value, + const std::string &details = "") { + LOG << " " << std::left << std::setw(20) << key << "=" << value << details; +} +} // namespace + +template +void printObjectives(const PartitionedHypergraph &hypergraph, + const Context &context, + const std::chrono::duration &elapsed_seconds) { + LOG << "Objectives:"; + printKeyValue(context.partition.objective, + metrics::quality(hypergraph, context), + "(primary objective function)"); + if (context.partition.objective == Objective::steiner_tree) { + printKeyValue( + "Approximation Factor", + metrics::approximationFactorForProcessMapping(hypergraph, context)); } - - namespace { - - template - void printKeyValue(const T& key, const V& value, const std::string& details = "") { - LOG << " " << std::left << std::setw(20) << key << "=" << value << details; + if (context.partition.objective != Objective::cut) { + printKeyValue(Objective::cut, metrics::quality(hypergraph, Objective::cut)); } + if (context.partition.objective != Objective::km1 && + !PartitionedHypergraph::is_graph) { + printKeyValue(Objective::km1, metrics::quality(hypergraph, Objective::km1)); } - - template - void printObjectives(const PartitionedHypergraph& hypergraph, - const Context& context, - const std::chrono::duration& elapsed_seconds) { - LOG << "Objectives:"; - printKeyValue(context.partition.objective, metrics::quality(hypergraph, - context), "(primary objective function)"); - if ( context.partition.objective == Objective::steiner_tree ) { - printKeyValue("Approximation Factor", - metrics::approximationFactorForProcessMapping(hypergraph, context)); - } - if ( context.partition.objective != Objective::cut ) { - printKeyValue(Objective::cut, metrics::quality(hypergraph, Objective::cut)); - } - if ( context.partition.objective != Objective::km1 && !PartitionedHypergraph::is_graph ) { - printKeyValue(Objective::km1, metrics::quality(hypergraph, Objective::km1)); - } - if ( context.partition.objective != Objective::soed && !PartitionedHypergraph::is_graph ) { - printKeyValue(Objective::soed, metrics::quality(hypergraph, Objective::soed)); - } - printKeyValue("Imbalance", metrics::imbalance(hypergraph, context)); - printKeyValue("Partitioning Time", std::to_string(elapsed_seconds.count()) + " s"); + if (context.partition.objective != Objective::soed && + !PartitionedHypergraph::is_graph) { + printKeyValue(Objective::soed, + metrics::quality(hypergraph, Objective::soed)); } - - template - void printCutMatrix(const PartitionedHypergraph& hypergraph) { - const PartitionID k = hypergraph.k(); - - using MCell = parallel::IntegralAtomicWrapper; - using MCol = std::vector; - std::vector cut_matrix(k, MCol(k, MCell(0))); - - hypergraph.doParallelForAllEdges([&](const HyperedgeID& he) { - if ( hypergraph.connectivity(he) > 1 ) { - const HyperedgeWeight edge_weight = hypergraph.edgeWeight(he); - for ( const PartitionID& block_1 : hypergraph.connectivitySet(he) ) { - for ( const PartitionID& block_2 : hypergraph.connectivitySet(he) ) { - if ( block_1 < block_2 ) { - cut_matrix[block_1][block_2] += edge_weight; - } + printKeyValue("Imbalance", metrics::imbalance(hypergraph, context)); + printKeyValue("Partitioning Time", + std::to_string(elapsed_seconds.count()) + " s"); +} + +template +void printCutMatrix(const PartitionedHypergraph &hypergraph) { + const PartitionID k = hypergraph.k(); + + using MCell = parallel::IntegralAtomicWrapper; + using MCol = std::vector; + std::vector cut_matrix(k, MCol(k, MCell(0))); + + hypergraph.doParallelForAllEdges([&](const HyperedgeID &he) { + if (hypergraph.connectivity(he) > 1) { + const HyperedgeWeight edge_weight = hypergraph.edgeWeight(he); + for (const PartitionID &block_1 : hypergraph.connectivitySet(he)) { + for (const PartitionID &block_2 : hypergraph.connectivitySet(he)) { + if (block_1 < block_2) { + cut_matrix[block_1][block_2] += edge_weight; } } } - }); - - HyperedgeWeight max_cut = 0; - for ( PartitionID block_1 = 0; block_1 < k; ++block_1 ) { - for ( PartitionID block_2 = block_1 + 1; block_2 < k; ++block_2 ) { - max_cut = std::max(max_cut, cut_matrix[block_1][block_2].load()); - } } + }); - // HEADER - const uint8_t column_width = std::max(kahypar::math::digits(max_cut) + 2, 5); - std::cout << std::right << std::setw(column_width) << "Block"; - for ( PartitionID block = 0; block < k; ++block ) { - std::cout << std::right << std::setw(column_width) << block; - } - std::cout << std::endl; - - // CUT MATRIX - for ( PartitionID block_1 = 0; block_1 < k; ++block_1 ) { - std::cout << std::right << std::setw(column_width) << block_1; - for ( PartitionID block_2 = 0; block_2 < k; ++block_2 ) { - std::cout << std::right << std::setw(column_width) - << (PartitionedHypergraph::is_graph ? cut_matrix[block_1][block_2].load() / 2 : cut_matrix[block_1][block_2].load()); - } - std::cout << std::endl; + HyperedgeWeight max_cut = 0; + for (PartitionID block_1 = 0; block_1 < k; ++block_1) { + for (PartitionID block_2 = block_1 + 1; block_2 < k; ++block_2) { + max_cut = std::max(max_cut, cut_matrix[block_1][block_2].load()); } } - template - void printPotentialPositiveGainMoveMatrix(const PartitionedHypergraph& hypergraph) { - const PartitionID k = hypergraph.k(); - - using MCell = parallel::IntegralAtomicWrapper; - using MCol = std::vector; - std::vector positive_gains(k, MCol(k, MCell(0))); - - tbb::enumerable_thread_specific> local_gain(k, 0); - hypergraph.doParallelForAllNodes([&](const HypernodeID hn) { - // Calculate gain to all blocks of the partition - std::vector& tmp_scores = local_gain.local(); - const PartitionID from = hypergraph.partID(hn); - Gain internal_weight = 0; - for (const HyperedgeID& he : hypergraph.incidentEdges(hn)) { - HypernodeID pin_count_in_from_part = hypergraph.pinCountInPart(he, from); - HyperedgeWeight he_weight = hypergraph.edgeWeight(he); - - if ( pin_count_in_from_part > 1 ) { - internal_weight += he_weight; - } + // HEADER + const uint8_t column_width = std::max(kahypar::math::digits(max_cut) + 2, 5); + std::cout << std::right << std::setw(column_width) << "Block"; + for (PartitionID block = 0; block < k; ++block) { + std::cout << std::right << std::setw(column_width) << block; + } + std::cout << std::endl; - for (const PartitionID& to : hypergraph.connectivitySet(he)) { - if (from != to) { - tmp_scores[to] -= he_weight; - } - } + // CUT MATRIX + for (PartitionID block_1 = 0; block_1 < k; ++block_1) { + std::cout << std::right << std::setw(column_width) << block_1; + for (PartitionID block_2 = 0; block_2 < k; ++block_2) { + std::cout << std::right << std::setw(column_width) + << (PartitionedHypergraph::is_graph + ? cut_matrix[block_1][block_2].load() / 2 + : cut_matrix[block_1][block_2].load()); + } + std::cout << std::endl; + } +} + +template +void printPotentialPositiveGainMoveMatrix( + const PartitionedHypergraph &hypergraph) { + const PartitionID k = hypergraph.k(); + + using MCell = parallel::IntegralAtomicWrapper; + using MCol = std::vector; + std::vector positive_gains(k, MCol(k, MCell(0))); + + tbb::enumerable_thread_specific> local_gain(k, 0); + hypergraph.doParallelForAllNodes([&](const HypernodeID hn) { + // Calculate gain to all blocks of the partition + std::vector &tmp_scores = local_gain.local(); + const PartitionID from = hypergraph.partID(hn); + Gain internal_weight = 0; + for (const HyperedgeID &he : hypergraph.incidentEdges(hn)) { + HypernodeID pin_count_in_from_part = hypergraph.pinCountInPart(he, from); + HyperedgeWeight he_weight = hypergraph.edgeWeight(he); + + if (pin_count_in_from_part > 1) { + internal_weight += he_weight; } - for (PartitionID to = 0; to < k; ++to) { + for (const PartitionID &to : hypergraph.connectivitySet(he)) { if (from != to) { - Gain score = tmp_scores[to] + internal_weight; - if ( score < 0 ) { - positive_gains[from][to] += std::abs(score); - } + tmp_scores[to] -= he_weight; } - tmp_scores[to] = 0; } - }); - + } - HyperedgeWeight max_gain = 0; - for ( PartitionID block_1 = 0; block_1 < k; ++block_1 ) { - for ( PartitionID block_2 = block_1 + 1; block_2 < k; ++block_2 ) { - max_gain = std::max(max_gain, positive_gains[block_1][block_2].load()); + for (PartitionID to = 0; to < k; ++to) { + if (from != to) { + Gain score = tmp_scores[to] + internal_weight; + if (score < 0) { + positive_gains[from][to] += std::abs(score); + } } + tmp_scores[to] = 0; } + }); - // HEADER - const uint8_t column_width = std::max(kahypar::math::digits(max_gain) + 2, 5); - std::cout << std::right << std::setw(column_width) << "Block"; - for ( PartitionID block = 0; block < k; ++block ) { - std::cout << std::right << std::setw(column_width) << block; + HyperedgeWeight max_gain = 0; + for (PartitionID block_1 = 0; block_1 < k; ++block_1) { + for (PartitionID block_2 = block_1 + 1; block_2 < k; ++block_2) { + max_gain = std::max(max_gain, positive_gains[block_1][block_2].load()); } - std::cout << std::endl; + } - // CUT MATRIX - for ( PartitionID block_1 = 0; block_1 < k; ++block_1 ) { - std::cout << std::right << std::setw(column_width) << block_1; - for ( PartitionID block_2 = 0; block_2 < k; ++block_2 ) { - std::cout << std::right << std::setw(column_width) << positive_gains[block_1][block_2].load(); - } - std::cout << std::endl; - } + // HEADER + const uint8_t column_width = std::max(kahypar::math::digits(max_gain) + 2, 5); + std::cout << std::right << std::setw(column_width) << "Block"; + for (PartitionID block = 0; block < k; ++block) { + std::cout << std::right << std::setw(column_width) << block; } + std::cout << std::endl; - template - void printConnectedCutHyperedgeAnalysis(const PartitionedHypergraph& hypergraph) { - std::vector visited_he(hypergraph.initialNumEdges(), false); - std::vector connected_cut_hyperedges; - - auto analyse_component = [&](const HyperedgeID he) { - HyperedgeWeight component_weight = 0; - std::vector s; - s.push_back(he); - visited_he[hypergraph.uniqueEdgeID(he)] = true; - - while ( !s.empty() ) { - const HyperedgeID e = s.back(); - s.pop_back(); - component_weight += hypergraph.edgeWeight(e); - - for ( const HypernodeID& pin : hypergraph.pins(e) ) { - for ( const HyperedgeID& tmp_e : hypergraph.incidentEdges(pin) ) { - if ( !visited_he[hypergraph.uniqueEdgeID(tmp_e)] && hypergraph.connectivity(tmp_e) > 1 ) { - s.push_back(tmp_e); - visited_he[hypergraph.uniqueEdgeID(tmp_e)] = true; - } + // CUT MATRIX + for (PartitionID block_1 = 0; block_1 < k; ++block_1) { + std::cout << std::right << std::setw(column_width) << block_1; + for (PartitionID block_2 = 0; block_2 < k; ++block_2) { + std::cout << std::right << std::setw(column_width) + << positive_gains[block_1][block_2].load(); + } + std::cout << std::endl; + } +} + +template +void printConnectedCutHyperedgeAnalysis( + const PartitionedHypergraph &hypergraph) { + std::vector visited_he(hypergraph.initialNumEdges(), false); + std::vector connected_cut_hyperedges; + + auto analyse_component = [&](const HyperedgeID he) { + HyperedgeWeight component_weight = 0; + std::vector s; + s.push_back(he); + visited_he[hypergraph.uniqueEdgeID(he)] = true; + + while (!s.empty()) { + const HyperedgeID e = s.back(); + s.pop_back(); + component_weight += hypergraph.edgeWeight(e); + + for (const HypernodeID &pin : hypergraph.pins(e)) { + for (const HyperedgeID &tmp_e : hypergraph.incidentEdges(pin)) { + if (!visited_he[hypergraph.uniqueEdgeID(tmp_e)] && + hypergraph.connectivity(tmp_e) > 1) { + s.push_back(tmp_e); + visited_he[hypergraph.uniqueEdgeID(tmp_e)] = true; } } } + } - return component_weight; - }; + return component_weight; + }; - for ( const HyperedgeID& he : hypergraph.edges() ) { - if ( hypergraph.connectivity(he) > 1 && !visited_he[hypergraph.uniqueEdgeID(he)] ) { - connected_cut_hyperedges.push_back(analyse_component(he)); - } - } - std::sort(connected_cut_hyperedges.begin(), connected_cut_hyperedges.end()); - - LOG << "Num Connected Cut Hyperedges =" << connected_cut_hyperedges.size(); - LOG << "Min Component Weight =" << connected_cut_hyperedges[0]; - LOG << "Median Component Weight =" << connected_cut_hyperedges[connected_cut_hyperedges.size() / 2]; - LOG << "Max Component Weight =" << connected_cut_hyperedges.back(); - LOG << "Component Weight Vector:"; - std::cout << "("; - for ( const HyperedgeWeight& weight : connected_cut_hyperedges ) { - std::cout << weight << ","; + for (const HyperedgeID &he : hypergraph.edges()) { + if (hypergraph.connectivity(he) > 1 && + !visited_he[hypergraph.uniqueEdgeID(he)]) { + connected_cut_hyperedges.push_back(analyse_component(he)); } - std::cout << "\b)" << std::endl; } + std::sort(connected_cut_hyperedges.begin(), connected_cut_hyperedges.end()); + + LOG << "Num Connected Cut Hyperedges =" << connected_cut_hyperedges.size(); + LOG << "Min Component Weight =" << connected_cut_hyperedges[0]; + LOG << "Median Component Weight =" + << connected_cut_hyperedges[connected_cut_hyperedges.size() / 2]; + LOG << "Max Component Weight =" << connected_cut_hyperedges.back(); + LOG << "Component Weight Vector:"; + std::cout << "("; + for (const HyperedgeWeight &weight : connected_cut_hyperedges) { + std::cout << weight << ","; + } + std::cout << "\b)" << std::endl; +} - template - void printPartitioningResults(const PartitionedHypergraph& hypergraph, - const Context& context, - const std::chrono::duration& elapsed_seconds) { - if (context.partition.verbose_output) { - LOG << "\n********************************************************************************"; - LOG << "* Partitioning Result *"; - LOG << "********************************************************************************"; +template +void printPartitioningResults( + const PartitionedHypergraph &hypergraph, const Context &context, + const std::chrono::duration &elapsed_seconds) { + if (context.partition.verbose_output) { + LOG << "\n*****************************************************************" + "**********" + "*****"; + LOG << "* Partitioning Result " + " " + " *"; + LOG << "*******************************************************************" + "**********" + "***"; - if ( context.partition.show_advanced_cut_analysis ) { - LOG << "\nCut Matrix: "; - printCutMatrix(hypergraph); + if (context.partition.show_advanced_cut_analysis) { + LOG << "\nCut Matrix: "; + printCutMatrix(hypergraph); - LOG << "\nPotential Positive Gain Move Matrix: "; - printPotentialPositiveGainMoveMatrix(hypergraph); + LOG << "\nPotential Positive Gain Move Matrix: "; + printPotentialPositiveGainMoveMatrix(hypergraph); - LOG << "\nConnected Cut Hyperedge Analysis: "; - printConnectedCutHyperedgeAnalysis(hypergraph); - } + LOG << "\nConnected Cut Hyperedge Analysis: "; + printConnectedCutHyperedgeAnalysis(hypergraph); + } - printObjectives(hypergraph, context, elapsed_seconds); + printObjectives(hypergraph, context, elapsed_seconds); - LOG << "\nPartition sizes and weights: "; - printPartWeightsAndSizes(hypergraph, context); + LOG << "\nPartition sizes and weights: "; + printPartWeightsAndSizes(hypergraph, context); - if ( context.partition.show_memory_consumption ) { - // Print Memory Consumption - utils::MemoryTreeNode hypergraph_memory_consumption( + if (context.partition.show_memory_consumption) { + // Print Memory Consumption + utils::MemoryTreeNode hypergraph_memory_consumption( "Partitioned Hypergraph", utils::OutputType::MEGABYTE); - hypergraph.memoryConsumption(&hypergraph_memory_consumption); - hypergraph_memory_consumption.finalize(); - LOG << "\nPartitioned Hypergraph Memory Consumption"; - LOG << hypergraph_memory_consumption; - } - - if ( hypergraph.hasTargetGraph() && TargetGraph::TRACK_STATS ) { - hypergraph.targetGraph()->printStats(); - } - - LOG << "\nTimings:"; - utils::Timer& timer = utils::Utilities::instance().getTimer(context.utility_id); - timer.showDetailedTimings(context.partition.show_detailed_timings); - timer.setMaximumOutputDepth(context.partition.timings_output_depth); - LOG << timer; + hypergraph.memoryConsumption(&hypergraph_memory_consumption); + hypergraph_memory_consumption.finalize(); + LOG << "\nPartitioned Hypergraph Memory Consumption"; + LOG << hypergraph_memory_consumption; } - } - - void printStripe() { - LOG << "--------------------------------------------------------------------------------"; - } - void printBanner() { - LOG << R"(+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++)"; - LOG << R"(+ __ __ _______ _ __ _ _ _____ +)"; - LOG << R"(+ | \/ |__ __| | |/ / | | | | | __ \ +)"; - LOG << R"(+ | \ / | | | ____ | ' / __ _| |__| |_ _| |__) |_ _ _ __ +)"; - LOG << R"(+ | |\/| | | | |____| | < / _` | __ | | | | ___/ _` | '__| +)"; - LOG << R"(+ | | | | | | | . \ (_| | | | | |_| | | | (_| | | +)"; - LOG << R"(+ |_| |_| |_| |_|\_\__,_|_| |_|\__, |_| \__,_|_| +)"; - LOG << R"(+ __/ | +)"; - LOG << R"(+ |___/ +)"; - LOG << R"(+ Karlsruhe Shared Memory Hypergraph Partitioning Framework +)"; - LOG << R"(+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++)"; - } - - namespace internal { - void printCommunityStats(const Statistic& community_size_stats, - const Statistic& community_pins_stats, - const Statistic& community_degree_stats) { - // default double precision is 7 - const uint8_t double_width = 7; - const uint8_t community_size_width = std::max(kahypar::math::digits(community_size_stats.max), double_width) + 4; - const uint8_t community_pins_width = std::max(kahypar::math::digits(community_pins_stats.max), double_width) + 4; - const uint8_t community_degree_width = std::max(kahypar::math::digits(community_degree_stats.max), double_width) + 4; - - LOG << "# HNs Per Community" << std::right << std::setw(community_size_width + 2) - << "# Internal Pins" << std::right << std::setw(community_pins_width + 8 + 4) - << "Internal Degree Sum" << std::right << std::setw(community_degree_width + 8); - LOG << "| min=" << std::left << std::setw(community_size_width) << community_size_stats.min - << " | min=" << std::left << std::setw(community_pins_width) << community_pins_stats.min - << " | min=" << std::left << std::setw(community_degree_width) << community_degree_stats.min; - LOG << "| Q1 =" << std::left << std::setw(community_size_width) << community_size_stats.q1 - << " | Q1 =" << std::left << std::setw(community_pins_width) << community_pins_stats.q1 - << " | Q1 =" << std::left << std::setw(community_degree_width) << community_degree_stats.q1; - LOG << "| med=" << std::left << std::setw(community_size_width) << community_size_stats.med - << " | med=" << std::left << std::setw(community_pins_width) << community_pins_stats.med - << " | med=" << std::left << std::setw(community_degree_width) << community_degree_stats.med; - LOG << "| Q3 =" << std::left << std::setw(community_size_width) << community_size_stats.q3 - << " | Q3 =" << std::left << std::setw(community_pins_width) << community_pins_stats.q3 - << " | Q3 =" << std::left << std::setw(community_degree_width) << community_degree_stats.q3; - LOG << "| max=" << std::left << std::setw(community_size_width) << community_size_stats.max - << " | max=" << std::left << std::setw(community_pins_width) << community_pins_stats.max - << " | max=" << std::left << std::setw(community_degree_width) << community_degree_stats.max; - LOG << "| avg=" << std::left << std::setw(community_size_width) << community_size_stats.avg - << " | avg=" << std::left << std::setw(community_pins_width) << community_pins_stats.avg - << " | avg=" << std::left << std::setw(community_degree_width) << community_degree_stats.avg; - LOG << "| sd =" << std::left << std::setw(community_size_width) << community_size_stats.sd - << " | sd =" << std::left << std::setw(community_pins_width) << community_pins_stats.sd - << " | sd =" << std::left << std::setw(community_degree_width) << community_degree_stats.sd; + if (hypergraph.hasTargetGraph() && TargetGraph::TRACK_STATS) { + hypergraph.targetGraph()->printStats(); } - } - - template - void printCommunityInformation(const Hypergraph& hypergraph) { - - PartitionID num_communities = - tbb::parallel_reduce( - tbb::blocked_range(ID(0), hypergraph.initialNumNodes()), - 0, [&](const tbb::blocked_range& range, PartitionID init) { - PartitionID my_range_num_communities = init; - for (HypernodeID hn = range.begin(); hn < range.end(); ++hn) { - if ( hypergraph.nodeIsEnabled(hn) ) { - my_range_num_communities = std::max(my_range_num_communities, hypergraph.communityID(hn) + 1); - } - } - return my_range_num_communities; - }, - [](const PartitionID lhs, const PartitionID rhs) { - return std::max(lhs, rhs); - }); - num_communities = std::max(num_communities, 1); - - std::vector nodes_per_community(num_communities, 0); - std::vector internal_pins(num_communities, 0); - std::vector internal_degree(num_communities, 0); - - auto reduce_nodes = [&] { - tbb::enumerable_thread_specific< vec< std::pair > > ets_nodes(num_communities, std::make_pair(UL(0), UL(0))); - hypergraph.doParallelForAllNodes([&](const HypernodeID u) { - const PartitionID cu = hypergraph.communityID(u); - ets_nodes.local()[cu].first++; - ets_nodes.local()[cu].second += hypergraph.nodeDegree(u); - }); - for (const auto& x : ets_nodes) { - for (PartitionID i = 0; i < num_communities; ++i) { - nodes_per_community[i] += x[i].first; - internal_degree[i] += x[i].second; - } - } - }; - - auto reduce_hyperedges = [&] { - tbb::enumerable_thread_specific< vec > ets_pins(num_communities, 0); - hypergraph.doParallelForAllEdges([&](const HyperedgeID he) { - auto& pin_counter = ets_pins.local(); - for (const HypernodeID pin : hypergraph.pins(he)) { - pin_counter[ hypergraph.communityID(pin) ]++; + LOG << "\nTimings:"; + utils::Timer &timer = + utils::Utilities::instance().getTimer(context.utility_id); + timer.showDetailedTimings(context.partition.show_detailed_timings); + timer.setMaximumOutputDepth(context.partition.timings_output_depth); + LOG << timer; + } +} + +void printStripe() { + LOG << "---------------------------------------------------------------------" + "----------" + "-"; +} + +void printBanner() { + LOG << R"(+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++)"; + LOG << R"(+ __ __ _______ _ __ _ _ _____ +)"; + LOG << R"(+ | \/ |__ __| | |/ / | | | | | __ \ +)"; + LOG << R"(+ | \ / | | | ____ | ' / __ _| |__| |_ _| |__) |_ _ _ __ +)"; + LOG << R"(+ | |\/| | | | |____| | < / _` | __ | | | | ___/ _` | '__| +)"; + LOG << R"(+ | | | | | | | . \ (_| | | | | |_| | | | (_| | | +)"; + LOG << R"(+ |_| |_| |_| |_|\_\__,_|_| |_|\__, |_| \__,_|_| +)"; + LOG << R"(+ __/ | +)"; + LOG << R"(+ |___/ +)"; + LOG << R"(+ Karlsruhe Shared Memory Hypergraph Partitioning Framework +)"; + LOG << R"(+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++)"; +} + +namespace internal { +void printCommunityStats(const Statistic &community_size_stats, + const Statistic &community_pins_stats, + const Statistic &community_degree_stats) { + // default double precision is 7 + const uint8_t double_width = 7; + const uint8_t community_size_width = + std::max(kahypar::math::digits(community_size_stats.max), double_width) + + 4; + const uint8_t community_pins_width = + std::max(kahypar::math::digits(community_pins_stats.max), double_width) + + 4; + const uint8_t community_degree_width = + std::max(kahypar::math::digits(community_degree_stats.max), + double_width) + + 4; + + LOG << "# HNs Per Community" << std::right + << std::setw(community_size_width + 2) << "# Internal Pins" << std::right + << std::setw(community_pins_width + 8 + 4) << "Internal Degree Sum" + << std::right << std::setw(community_degree_width + 8); + LOG << "| min=" << std::left << std::setw(community_size_width) + << community_size_stats.min << " | min=" << std::left + << std::setw(community_pins_width) << community_pins_stats.min + << " | min=" << std::left << std::setw(community_degree_width) + << community_degree_stats.min; + LOG << "| Q1 =" << std::left << std::setw(community_size_width) + << community_size_stats.q1 << " | Q1 =" << std::left + << std::setw(community_pins_width) << community_pins_stats.q1 + << " | Q1 =" << std::left << std::setw(community_degree_width) + << community_degree_stats.q1; + LOG << "| med=" << std::left << std::setw(community_size_width) + << community_size_stats.med << " | med=" << std::left + << std::setw(community_pins_width) << community_pins_stats.med + << " | med=" << std::left << std::setw(community_degree_width) + << community_degree_stats.med; + LOG << "| Q3 =" << std::left << std::setw(community_size_width) + << community_size_stats.q3 << " | Q3 =" << std::left + << std::setw(community_pins_width) << community_pins_stats.q3 + << " | Q3 =" << std::left << std::setw(community_degree_width) + << community_degree_stats.q3; + LOG << "| max=" << std::left << std::setw(community_size_width) + << community_size_stats.max << " | max=" << std::left + << std::setw(community_pins_width) << community_pins_stats.max + << " | max=" << std::left << std::setw(community_degree_width) + << community_degree_stats.max; + LOG << "| avg=" << std::left << std::setw(community_size_width) + << community_size_stats.avg << " | avg=" << std::left + << std::setw(community_pins_width) << community_pins_stats.avg + << " | avg=" << std::left << std::setw(community_degree_width) + << community_degree_stats.avg; + LOG << "| sd =" << std::left << std::setw(community_size_width) + << community_size_stats.sd << " | sd =" << std::left + << std::setw(community_pins_width) << community_pins_stats.sd + << " | sd =" << std::left << std::setw(community_degree_width) + << community_degree_stats.sd; +} +} // namespace internal + +template +void printCommunityInformation(const Hypergraph &hypergraph) { + + PartitionID num_communities = tbb::parallel_reduce( + tbb::blocked_range(ID(0), hypergraph.initialNumNodes()), 0, + [&](const tbb::blocked_range &range, PartitionID init) { + PartitionID my_range_num_communities = init; + for (HypernodeID hn = range.begin(); hn < range.end(); ++hn) { + if (hypergraph.nodeIsEnabled(hn)) { + my_range_num_communities = std::max(my_range_num_communities, + hypergraph.communityID(hn) + 1); + } } + return my_range_num_communities; + }, + [](const PartitionID lhs, const PartitionID rhs) { + return std::max(lhs, rhs); }); + num_communities = std::max(num_communities, 1); + + std::vector nodes_per_community(num_communities, 0); + std::vector internal_pins(num_communities, 0); + std::vector internal_degree(num_communities, 0); + + auto reduce_nodes = [&] { + tbb::enumerable_thread_specific>> ets_nodes( + num_communities, std::make_pair(UL(0), UL(0))); + hypergraph.doParallelForAllNodes([&](const HypernodeID u) { + const PartitionID cu = hypergraph.communityID(u); + ets_nodes.local()[cu].first++; + ets_nodes.local()[cu].second += hypergraph.nodeDegree(u); + }); - for (const auto& x : ets_pins) { - for (PartitionID i = 0; i < num_communities; ++i) { - internal_pins[i] += x[i]; - } + for (const auto &x : ets_nodes) { + for (PartitionID i = 0; i < num_communities; ++i) { + nodes_per_community[i] += x[i].first; + internal_degree[i] += x[i].second; } - }; - - tbb::parallel_invoke(reduce_nodes, reduce_hyperedges); - - std::sort(nodes_per_community.begin(), nodes_per_community.end()); - std::sort(internal_pins.begin(), internal_pins.end()); - std::sort(internal_degree.begin(), internal_degree.end()); - - auto square = [&](size_t x) { return x * x; }; + } + }; - auto avg_and_std_dev = [&](const std::vector& v) { - const double avg = std::accumulate(v.begin(), v.end(), 0.0) / static_cast(v.size()); - double std_dev = 0.0; - for (size_t x : v) { - std_dev += square(x - avg); + auto reduce_hyperedges = [&] { + tbb::enumerable_thread_specific> ets_pins(num_communities, 0); + hypergraph.doParallelForAllEdges([&](const HyperedgeID he) { + auto &pin_counter = ets_pins.local(); + for (const HypernodeID pin : hypergraph.pins(he)) { + pin_counter[hypergraph.communityID(pin)]++; } - std_dev = std::sqrt(std_dev / static_cast(v.size() - 1)); - return std::make_pair(avg, std_dev); - }; - - auto [avg_nodes, std_dev_nodes] = avg_and_std_dev(nodes_per_community); - auto [avg_pins, std_dev_pins] = avg_and_std_dev(internal_pins); - auto [avg_deg, std_dev_deg] = avg_and_std_dev(internal_degree); - - LOG << "# Communities :" << num_communities; - - internal::printCommunityStats( - internal::createStats(nodes_per_community, avg_nodes, std_dev_nodes), - internal::createStats(internal_pins, avg_pins, std_dev_pins), - internal::createStats(internal_degree, avg_deg, std_dev_deg) - ); - } + }); - namespace { - #define PRINT_CUT_MATRIX(X) void printCutMatrix(const X& hypergraph) - #define PRINT_HYPERGRAPH_INFO(X) void printHypergraphInfo(const X& hypergraph, \ - const Context& context, \ - const std::string& name, \ - const bool show_memory_consumption) - #define PRINT_PARTITIONING_RESULTS(X) void printPartitioningResults(const X& hypergraph, \ - const Context& context, \ - const std::string& description) - #define PRINT_PARTITIONING_RESULTS_2(X) void printPartitioningResults(const X& hypergraph, \ - const Context& context, \ - const std::chrono::duration& elapsed_seconds) - #define PRINT_PART_WEIGHT_AND_SIZES(X) void printPartWeightsAndSizes(const X& hypergraph, const Context& context) - #define PRINT_FIXED_VERTEX_PART_WEIGHTS(X) void printFixedVertexPartWeights(const X& hypergraph, const Context& context) - #define PRINT_INPUT_INFORMATION(X) void printInputInformation(const Context& context, const X& hypergraph) - #define PRINT_COMMUNITY_INFORMATION(X) void printCommunityInformation(const X& hypergraph) - } // namespace - - INSTANTIATE_FUNC_WITH_HYPERGRAPHS(PRINT_HYPERGRAPH_INFO) - INSTANTIATE_FUNC_WITH_HYPERGRAPHS(PRINT_INPUT_INFORMATION) - INSTANTIATE_FUNC_WITH_HYPERGRAPHS(PRINT_COMMUNITY_INFORMATION) - INSTANTIATE_FUNC_WITH_HYPERGRAPHS(PRINT_FIXED_VERTEX_PART_WEIGHTS) - INSTANTIATE_FUNC_WITH_PARTITIONED_HG(PRINT_CUT_MATRIX) - INSTANTIATE_FUNC_WITH_PARTITIONED_HG(PRINT_PARTITIONING_RESULTS) - INSTANTIATE_FUNC_WITH_PARTITIONED_HG(PRINT_PARTITIONING_RESULTS_2) - INSTANTIATE_FUNC_WITH_PARTITIONED_HG(PRINT_PART_WEIGHT_AND_SIZES) + for (const auto &x : ets_pins) { + for (PartitionID i = 0; i < num_communities; ++i) { + internal_pins[i] += x[i]; + } + } + }; + + tbb::parallel_invoke(reduce_nodes, reduce_hyperedges); + + std::sort(nodes_per_community.begin(), nodes_per_community.end()); + std::sort(internal_pins.begin(), internal_pins.end()); + std::sort(internal_degree.begin(), internal_degree.end()); + + auto square = [&](size_t x) { return x * x; }; + + auto avg_and_std_dev = [&](const std::vector &v) { + const double avg = std::accumulate(v.begin(), v.end(), 0.0) / + static_cast(v.size()); + double std_dev = 0.0; + for (size_t x : v) { + std_dev += square(x - avg); + } + std_dev = std::sqrt(std_dev / static_cast(v.size() - 1)); + return std::make_pair(avg, std_dev); + }; + + auto [avg_nodes, std_dev_nodes] = avg_and_std_dev(nodes_per_community); + auto [avg_pins, std_dev_pins] = avg_and_std_dev(internal_pins); + auto [avg_deg, std_dev_deg] = avg_and_std_dev(internal_degree); + + LOG << "# Communities :" << num_communities; + + internal::printCommunityStats( + internal::createStats(nodes_per_community, avg_nodes, std_dev_nodes), + internal::createStats(internal_pins, avg_pins, std_dev_pins), + internal::createStats(internal_degree, avg_deg, std_dev_deg)); +} + +namespace { +#define PRINT_CUT_MATRIX(X) void printCutMatrix(const X &hypergraph) +#define PRINT_HYPERGRAPH_INFO(X) \ + void printHypergraphInfo(const X &hypergraph, const Context &context, \ + const std::string &name, \ + const bool show_memory_consumption) +#define PRINT_PARTITIONING_RESULTS(X) \ + void printPartitioningResults(const X &hypergraph, const Context &context, \ + const std::string &description) +#define PRINT_PARTITIONING_RESULTS_2(X) \ + void printPartitioningResults( \ + const X &hypergraph, const Context &context, \ + const std::chrono::duration &elapsed_seconds) +#define PRINT_PART_WEIGHT_AND_SIZES(X) \ + void printPartWeightsAndSizes(const X &hypergraph, const Context &context) +#define PRINT_FIXED_VERTEX_PART_WEIGHTS(X) \ + void printFixedVertexPartWeights(const X &hypergraph, const Context &context) +#define PRINT_INPUT_INFORMATION(X) \ + void printInputInformation(const Context &context, const X &hypergraph) +#define PRINT_COMMUNITY_INFORMATION(X) \ + void printCommunityInformation(const X &hypergraph) +} // namespace + +INSTANTIATE_FUNC_WITH_HYPERGRAPHS(PRINT_HYPERGRAPH_INFO) +INSTANTIATE_FUNC_WITH_HYPERGRAPHS(PRINT_INPUT_INFORMATION) +INSTANTIATE_FUNC_WITH_HYPERGRAPHS(PRINT_COMMUNITY_INFORMATION) +INSTANTIATE_FUNC_WITH_HYPERGRAPHS(PRINT_FIXED_VERTEX_PART_WEIGHTS) +INSTANTIATE_FUNC_WITH_PARTITIONED_HG(PRINT_CUT_MATRIX) +INSTANTIATE_FUNC_WITH_PARTITIONED_HG(PRINT_PARTITIONING_RESULTS) +INSTANTIATE_FUNC_WITH_PARTITIONED_HG(PRINT_PARTITIONING_RESULTS_2) +INSTANTIATE_FUNC_WITH_PARTITIONED_HG(PRINT_PART_WEIGHT_AND_SIZES) } // namespace mt_kahypar::io \ No newline at end of file diff --git a/mt-kahypar/io/partitioning_output.h b/mt-kahypar/io/partitioning_output.h index 393cd3c67..af049927d 100644 --- a/mt-kahypar/io/partitioning_output.h +++ b/mt-kahypar/io/partitioning_output.h @@ -30,38 +30,36 @@ #include "mt-kahypar/partition/context.h" namespace mt_kahypar::io { - void printStripe(); - void printBanner(); - void printContext(const Context& context); - void printMemoryPoolConsumption(const Context& context); - void printCoarseningBanner(const Context& context); - void printInitialPartitioningBanner(const Context& context); - void printLocalSearchBanner(const Context& context); - void printVCycleBanner(const Context& context, const size_t vcycle_num); - void printDeepMultilevelBanner(const Context& context); - void printTopLevelPreprocessingBanner(const Context& context); +void printStripe(); +void printBanner(); +void printContext(const Context &context); +void printMemoryPoolConsumption(const Context &context); +void printCoarseningBanner(const Context &context); +void printInitialPartitioningBanner(const Context &context); +void printLocalSearchBanner(const Context &context); +void printVCycleBanner(const Context &context, const size_t vcycle_num); +void printDeepMultilevelBanner(const Context &context); +void printTopLevelPreprocessingBanner(const Context &context); - template - void printCutMatrix(const PartitionedHypergraph& hypergraph); - template - void printHypergraphInfo(const Hypergraph& hypergraph, - const Context& context, - const std::string& name, - const bool show_memory_consumption); - template - void printPartitioningResults(const PartitionedHypergraph& hypergraph, - const Context& context, - const std::string& description); - template - void printPartitioningResults(const PartitionedHypergraph& hypergraph, - const Context& context, - const std::chrono::duration& elapsed_seconds); - template - void printPartWeightsAndSizes(const PartitionedHypergraph& hypergraph, const Context& context); - template - void printFixedVertexPartWeights(const Hypergraph& hypergraph, const Context& context); - template - void printInputInformation(const Context& context, const Hypergraph& hypergraph); - template - void printCommunityInformation(const Hypergraph& hypergraph); -} // namespace mt_kahypar::io +template +void printCutMatrix(const PartitionedHypergraph &hypergraph); +template +void printHypergraphInfo(const Hypergraph &hypergraph, const Context &context, + const std::string &name, const bool show_memory_consumption); +template +void printPartitioningResults(const PartitionedHypergraph &hypergraph, + const Context &context, const std::string &description); +template +void printPartitioningResults(const PartitionedHypergraph &hypergraph, + const Context &context, + const std::chrono::duration &elapsed_seconds); +template +void printPartWeightsAndSizes(const PartitionedHypergraph &hypergraph, + const Context &context); +template +void printFixedVertexPartWeights(const Hypergraph &hypergraph, const Context &context); +template +void printInputInformation(const Context &context, const Hypergraph &hypergraph); +template +void printCommunityInformation(const Hypergraph &hypergraph); +} // namespace mt_kahypar::io diff --git a/mt-kahypar/io/sql_plottools_serializer.cpp b/mt-kahypar/io/sql_plottools_serializer.cpp index 07d4f6e04..06c0edeee 100644 --- a/mt-kahypar/io/sql_plottools_serializer.cpp +++ b/mt-kahypar/io/sql_plottools_serializer.cpp @@ -30,166 +30,249 @@ #include #include "mt-kahypar/definitions.h" -#include "mt-kahypar/partition/metrics.h" #include "mt-kahypar/partition/mapping/target_graph.h" -#include "mt-kahypar/utils/utilities.h" +#include "mt-kahypar/partition/metrics.h" #include "mt-kahypar/utils/timer.h" +#include "mt-kahypar/utils/utilities.h" namespace mt_kahypar::io::serializer { -template -std::string serialize(const PartitionedHypergraph& hypergraph, - const Context& context, - const std::chrono::duration& elapsed_seconds) { - if (context.partition.sp_process_output) { +template +std::string serialize(const PartitionedHypergraph &hypergraph, const Context &context, + const std::chrono::duration &elapsed_seconds) +{ + if(context.partition.sp_process_output) + { std::stringstream oss; oss << "RESULT" - << " algorithm=" << context.algorithm_name - << " graph=" << context.partition.graph_filename.substr( - context.partition.graph_filename.find_last_of('/') + 1); - if ( context.partition.fixed_vertex_filename != "" ) { - oss << " fixed_vertex_filename=" << context.partition.fixed_vertex_filename.substr( - context.partition.fixed_vertex_filename.find_last_of('/') + 1); + << " algorithm=" << context.algorithm_name << " graph=" + << context.partition.graph_filename.substr( + context.partition.graph_filename.find_last_of('/') + 1); + if(context.partition.fixed_vertex_filename != "") + { + oss << " fixed_vertex_filename=" + << context.partition.fixed_vertex_filename.substr( + context.partition.fixed_vertex_filename.find_last_of('/') + 1); } - oss << " numHNs=" << hypergraph.initialNumNodes() - << " numHEs=" << (PartitionedHypergraph::is_graph ? hypergraph.initialNumEdges() / 2 : hypergraph.initialNumEdges()) + oss << " numHNs=" << hypergraph.initialNumNodes() << " numHEs=" + << (PartitionedHypergraph::is_graph ? hypergraph.initialNumEdges() / 2 : + hypergraph.initialNumEdges()) << " mode=" << context.partition.mode << " objective=" << context.partition.objective << " gain_policy=" << context.partition.gain_policy << " file_format=" << context.partition.file_format << " partition_type=" << context.partition.partition_type - << " k=" << context.partition.k - << " epsilon=" << context.partition.epsilon + << " k=" << context.partition.k << " epsilon=" << context.partition.epsilon << " seed=" << context.partition.seed << " num_vcycles=" << context.partition.num_vcycles << " deterministic=" << context.partition.deterministic - << " perform_parallel_recursion_in_deep_multilevel=" << context.partition.perform_parallel_recursion_in_deep_multilevel; - oss << " large_hyperedge_size_threshold_factor=" << context.partition.large_hyperedge_size_threshold_factor - << " smallest_large_he_size_threshold=" << context.partition.smallest_large_he_size_threshold - << " large_hyperedge_size_threshold=" << context.partition.large_hyperedge_size_threshold - << " ignore_hyperedge_size_threshold=" << context.partition.ignore_hyperedge_size_threshold + << " perform_parallel_recursion_in_deep_multilevel=" + << context.partition.perform_parallel_recursion_in_deep_multilevel; + oss << " large_hyperedge_size_threshold_factor=" + << context.partition.large_hyperedge_size_threshold_factor + << " smallest_large_he_size_threshold=" + << context.partition.smallest_large_he_size_threshold + << " large_hyperedge_size_threshold=" + << context.partition.large_hyperedge_size_threshold + << " ignore_hyperedge_size_threshold=" + << context.partition.ignore_hyperedge_size_threshold << " time_limit=" << context.partition.time_limit - << " use_individual_part_weights=" << context.partition.use_individual_part_weights - << " perfect_balance_part_weight=" << context.partition.perfect_balance_part_weights[0] + << " use_individual_part_weights=" + << context.partition.use_individual_part_weights + << " perfect_balance_part_weight=" + << context.partition.perfect_balance_part_weights[0] << " max_part_weight=" << context.partition.max_part_weights[0] << " total_graph_weight=" << hypergraph.totalWeight(); - oss << " use_community_detection=" << std::boolalpha << context.preprocessing.use_community_detection - << " disable_community_detection_for_mesh_graphs=" << std::boolalpha << context.preprocessing.disable_community_detection_for_mesh_graphs - << " community_edge_weight_function=" << context.preprocessing.community_detection.edge_weight_function - << " community_max_pass_iterations=" << context.preprocessing.community_detection.max_pass_iterations - << " community_min_vertex_move_fraction=" << context.preprocessing.community_detection.min_vertex_move_fraction - << " community_vertex_degree_sampling_threshold=" << context.preprocessing.community_detection.vertex_degree_sampling_threshold - << " community_num_sub_rounds_deterministic=" << context.preprocessing.community_detection.num_sub_rounds_deterministic - << " community_low_memory_contraction=" << context.preprocessing.community_detection.low_memory_contraction; + oss << " use_community_detection=" << std::boolalpha + << context.preprocessing.use_community_detection + << " disable_community_detection_for_mesh_graphs=" << std::boolalpha + << context.preprocessing.disable_community_detection_for_mesh_graphs + << " community_edge_weight_function=" + << context.preprocessing.community_detection.edge_weight_function + << " community_max_pass_iterations=" + << context.preprocessing.community_detection.max_pass_iterations + << " community_min_vertex_move_fraction=" + << context.preprocessing.community_detection.min_vertex_move_fraction + << " community_vertex_degree_sampling_threshold=" + << context.preprocessing.community_detection.vertex_degree_sampling_threshold + << " community_num_sub_rounds_deterministic=" + << context.preprocessing.community_detection.num_sub_rounds_deterministic + << " community_low_memory_contraction=" + << context.preprocessing.community_detection.low_memory_contraction; oss << " coarsening_algorithm=" << context.coarsening.algorithm - << " coarsening_contraction_limit_multiplier=" << context.coarsening.contraction_limit_multiplier - << " coarsening_deep_ml_contraction_limit_multiplier=" << context.coarsening.deep_ml_contraction_limit_multiplier - << " coarsening_use_adaptive_edge_size=" << std::boolalpha << context.coarsening.use_adaptive_edge_size - << " coarsening_max_allowed_weight_multiplier=" << context.coarsening.max_allowed_weight_multiplier - << " coarsening_minimum_shrink_factor=" << context.coarsening.minimum_shrink_factor - << " coarsening_maximum_shrink_factor=" << context.coarsening.maximum_shrink_factor - << " coarsening_max_allowed_node_weight=" << context.coarsening.max_allowed_node_weight - << " coarsening_vertex_degree_sampling_threshold=" << context.coarsening.vertex_degree_sampling_threshold - << " coarsening_num_sub_rounds_deterministic=" << context.coarsening.num_sub_rounds_deterministic + << " coarsening_contraction_limit_multiplier=" + << context.coarsening.contraction_limit_multiplier + << " coarsening_deep_ml_contraction_limit_multiplier=" + << context.coarsening.deep_ml_contraction_limit_multiplier + << " coarsening_use_adaptive_edge_size=" << std::boolalpha + << context.coarsening.use_adaptive_edge_size + << " coarsening_max_allowed_weight_multiplier=" + << context.coarsening.max_allowed_weight_multiplier + << " coarsening_minimum_shrink_factor=" + << context.coarsening.minimum_shrink_factor + << " coarsening_maximum_shrink_factor=" + << context.coarsening.maximum_shrink_factor + << " coarsening_max_allowed_node_weight=" + << context.coarsening.max_allowed_node_weight + << " coarsening_vertex_degree_sampling_threshold=" + << context.coarsening.vertex_degree_sampling_threshold + << " coarsening_num_sub_rounds_deterministic=" + << context.coarsening.num_sub_rounds_deterministic << " coarsening_contraction_limit=" << context.coarsening.contraction_limit << " rating_function=" << context.coarsening.rating.rating_function - << " rating_heavy_node_penalty_policy=" << context.coarsening.rating.heavy_node_penalty_policy + << " rating_heavy_node_penalty_policy=" + << context.coarsening.rating.heavy_node_penalty_policy << " rating_acceptance_policy=" << context.coarsening.rating.acceptance_policy; oss << " initial_partitioning_mode=" << context.initial_partitioning.mode << " initial_partitioning_runs=" << context.initial_partitioning.runs - << " initial_partitioning_use_adaptive_ip_runs=" << std::boolalpha << context.initial_partitioning.use_adaptive_ip_runs - << " initial_partitioning_min_adaptive_ip_runs=" << context.initial_partitioning.min_adaptive_ip_runs - << " initial_partitioning_perform_refinement_on_best_partitions=" << std::boolalpha << context.initial_partitioning.perform_refinement_on_best_partitions - << " initial_partitioning_fm_refinment_rounds=" << std::boolalpha << context.initial_partitioning.fm_refinment_rounds - << " initial_partitioning_remove_degree_zero_hns_before_ip=" << std::boolalpha << context.initial_partitioning.remove_degree_zero_hns_before_ip - << " initial_partitioning_lp_maximum_iterations=" << context.initial_partitioning.lp_maximum_iterations - << " initial_partitioning_lp_initial_block_size=" << context.initial_partitioning.lp_initial_block_size - << " initial_partitioning_population_size=" << context.initial_partitioning.population_size; + << " initial_partitioning_use_adaptive_ip_runs=" << std::boolalpha + << context.initial_partitioning.use_adaptive_ip_runs + << " initial_partitioning_min_adaptive_ip_runs=" + << context.initial_partitioning.min_adaptive_ip_runs + << " initial_partitioning_perform_refinement_on_best_partitions=" + << std::boolalpha + << context.initial_partitioning.perform_refinement_on_best_partitions + << " initial_partitioning_fm_refinment_rounds=" << std::boolalpha + << context.initial_partitioning.fm_refinment_rounds + << " initial_partitioning_remove_degree_zero_hns_before_ip=" << std::boolalpha + << context.initial_partitioning.remove_degree_zero_hns_before_ip + << " initial_partitioning_lp_maximum_iterations=" + << context.initial_partitioning.lp_maximum_iterations + << " initial_partitioning_lp_initial_block_size=" + << context.initial_partitioning.lp_initial_block_size + << " initial_partitioning_population_size=" + << context.initial_partitioning.population_size; oss << " rebalancer=" << std::boolalpha << context.refinement.rebalancer - << " refine_until_no_improvement=" << std::boolalpha << context.refinement.refine_until_no_improvement - << " relative_improvement_threshold=" << context.refinement.relative_improvement_threshold + << " refine_until_no_improvement=" << std::boolalpha + << context.refinement.refine_until_no_improvement + << " relative_improvement_threshold=" + << context.refinement.relative_improvement_threshold << " max_batch_size=" << context.refinement.max_batch_size - << " min_border_vertices_per_thread=" << context.refinement.min_border_vertices_per_thread + << " min_border_vertices_per_thread=" + << context.refinement.min_border_vertices_per_thread << " lp_algorithm=" << context.refinement.label_propagation.algorithm - << " lp_maximum_iterations=" << context.refinement.label_propagation.maximum_iterations - << " lp_rebalancing=" << std::boolalpha << context.refinement.label_propagation.rebalancing - << " lp_unconstrained=" << std::boolalpha << context.refinement.label_propagation.unconstrained - << " lp_relative_improvement_threshold=" << context.refinement.label_propagation.relative_improvement_threshold - << " lp_hyperedge_size_activation_threshold=" << context.refinement.label_propagation.hyperedge_size_activation_threshold - << " sync_lp_num_sub_rounds_sync_lp=" << context.refinement.deterministic_refinement.num_sub_rounds_sync_lp - << " sync_lp_use_active_node_set=" << context.refinement.deterministic_refinement.use_active_node_set; + << " lp_maximum_iterations=" + << context.refinement.label_propagation.maximum_iterations + << " lp_rebalancing=" << std::boolalpha + << context.refinement.label_propagation.rebalancing + << " lp_unconstrained=" << std::boolalpha + << context.refinement.label_propagation.unconstrained + << " lp_relative_improvement_threshold=" + << context.refinement.label_propagation.relative_improvement_threshold + << " lp_hyperedge_size_activation_threshold=" + << context.refinement.label_propagation.hyperedge_size_activation_threshold + << " sync_lp_num_sub_rounds_sync_lp=" + << context.refinement.deterministic_refinement.num_sub_rounds_sync_lp + << " sync_lp_use_active_node_set=" + << context.refinement.deterministic_refinement.use_active_node_set; oss << " fm_algorithm=" << context.refinement.fm.algorithm << " fm_multitry_rounds=" << context.refinement.fm.multitry_rounds - << " fm_rollback_parallel=" << std::boolalpha << context.refinement.fm.rollback_parallel - << " fm_rollback_sensitive_to_num_moves=" << std::boolalpha << context.refinement.fm.iter_moves_on_recalc - << " fm_rollback_balance_violation_factor=" << context.refinement.fm.rollback_balance_violation_factor + << " fm_rollback_parallel=" << std::boolalpha + << context.refinement.fm.rollback_parallel + << " fm_rollback_sensitive_to_num_moves=" << std::boolalpha + << context.refinement.fm.iter_moves_on_recalc + << " fm_rollback_balance_violation_factor=" + << context.refinement.fm.rollback_balance_violation_factor << " fm_min_improvement=" << context.refinement.fm.min_improvement << " fm_release_nodes=" << context.refinement.fm.release_nodes << " fm_iter_moves_on_recalc=" << context.refinement.fm.iter_moves_on_recalc << " fm_num_seed_nodes=" << context.refinement.fm.num_seed_nodes << " fm_time_limit_factor=" << context.refinement.fm.time_limit_factor - << " fm_obey_minimal_parallelism=" << std::boolalpha << context.refinement.fm.obey_minimal_parallelism + << " fm_obey_minimal_parallelism=" << std::boolalpha + << context.refinement.fm.obey_minimal_parallelism << " fm_shuffle=" << std::boolalpha << context.refinement.fm.shuffle << " fm_unconstrained_rounds=" << context.refinement.fm.unconstrained_rounds - << " fm_treshold_border_node_inclusion=" << context.refinement.fm.treshold_border_node_inclusion - << " fm_unconstrained_min_improvement=" << context.refinement.fm.unconstrained_min_improvement - << " fm_unconstrained_upper_bound=" << context.refinement.fm.unconstrained_upper_bound - << " fm_unconstrained_upper_bound_min=" << context.refinement.fm.unconstrained_upper_bound_min + << " fm_treshold_border_node_inclusion=" + << context.refinement.fm.treshold_border_node_inclusion + << " fm_unconstrained_min_improvement=" + << context.refinement.fm.unconstrained_min_improvement + << " fm_unconstrained_upper_bound=" + << context.refinement.fm.unconstrained_upper_bound + << " fm_unconstrained_upper_bound_min=" + << context.refinement.fm.unconstrained_upper_bound_min << " fm_imbalance_penalty_min=" << context.refinement.fm.imbalance_penalty_min << " fm_imbalance_penalty_max=" << context.refinement.fm.imbalance_penalty_max - << " fm_activate_unconstrained_dynamically=" << std::boolalpha << context.refinement.fm.activate_unconstrained_dynamically - << " fm_penalty_for_activation_test=" << context.refinement.fm.penalty_for_activation_test - << " global_fm_use_global_fm=" << std::boolalpha << context.refinement.global_fm.use_global_fm - << " global_fm_refine_until_no_improvement=" << std::boolalpha << context.refinement.global_fm.refine_until_no_improvement + << " fm_activate_unconstrained_dynamically=" << std::boolalpha + << context.refinement.fm.activate_unconstrained_dynamically + << " fm_penalty_for_activation_test=" + << context.refinement.fm.penalty_for_activation_test + << " global_fm_use_global_fm=" << std::boolalpha + << context.refinement.global_fm.use_global_fm + << " global_fm_refine_until_no_improvement=" << std::boolalpha + << context.refinement.global_fm.refine_until_no_improvement << " global_fm_num_seed_nodes=" << context.refinement.global_fm.num_seed_nodes - << " global_fm_obey_minimal_parallelism=" << std::boolalpha << context.refinement.global_fm.obey_minimal_parallelism; + << " global_fm_obey_minimal_parallelism=" << std::boolalpha + << context.refinement.global_fm.obey_minimal_parallelism; oss << " flow_algorithm=" << context.refinement.flows.algorithm - << " flow_parallel_searches_multiplier=" << context.refinement.flows.parallel_searches_multiplier - << " flow_num_parallel_searches=" << context.refinement.flows.num_parallel_searches + << " flow_parallel_searches_multiplier=" + << context.refinement.flows.parallel_searches_multiplier + << " flow_num_parallel_searches=" + << context.refinement.flows.num_parallel_searches << " flow_max_bfs_distance=" << context.refinement.flows.max_bfs_distance - << " flow_min_relative_improvement_per_round=" << context.refinement.flows.min_relative_improvement_per_round + << " flow_min_relative_improvement_per_round=" + << context.refinement.flows.min_relative_improvement_per_round << " flow_time_limit_factor=" << context.refinement.flows.time_limit_factor - << " flow_skip_small_cuts=" << std::boolalpha << context.refinement.flows.skip_small_cuts - << " flow_skip_unpromising_blocks=" << std::boolalpha << context.refinement.flows.skip_unpromising_blocks - << " flow_pierce_in_bulk=" << std::boolalpha << context.refinement.flows.pierce_in_bulk + << " flow_skip_small_cuts=" << std::boolalpha + << context.refinement.flows.skip_small_cuts + << " flow_skip_unpromising_blocks=" << std::boolalpha + << context.refinement.flows.skip_unpromising_blocks + << " flow_pierce_in_bulk=" << std::boolalpha + << context.refinement.flows.pierce_in_bulk << " flow_alpha=" << context.refinement.flows.alpha << " flow_max_num_pins=" << context.refinement.flows.max_num_pins - << " flow_find_most_balanced_cut=" << std::boolalpha << context.refinement.flows.find_most_balanced_cut - << " flow_determine_distance_from_cut=" << std::boolalpha << context.refinement.flows.determine_distance_from_cut + << " flow_find_most_balanced_cut=" << std::boolalpha + << context.refinement.flows.find_most_balanced_cut + << " flow_determine_distance_from_cut=" << std::boolalpha + << context.refinement.flows.determine_distance_from_cut << " flow_steiner_tree_policy=" << context.refinement.flows.steiner_tree_policy; oss << " num_threads=" << context.shared_memory.num_threads - << " use_localized_random_shuffle=" << std::boolalpha << context.shared_memory.use_localized_random_shuffle + << " use_localized_random_shuffle=" << std::boolalpha + << context.shared_memory.use_localized_random_shuffle << " shuffle_block_size=" << context.shared_memory.shuffle_block_size - << " static_balancing_work_packages=" << context.shared_memory.static_balancing_work_packages; + << " static_balancing_work_packages=" + << context.shared_memory.static_balancing_work_packages; - if ( context.partition.objective == Objective::steiner_tree ) { - oss << " target_graph_file=" << context.mapping.target_graph_file.substr( - context.mapping.target_graph_file.find_last_of('/') + 1) + if(context.partition.objective == Objective::steiner_tree) + { + oss << " target_graph_file=" + << context.mapping.target_graph_file.substr( + context.mapping.target_graph_file.find_last_of('/') + 1) << " mapping_strategy=" << context.mapping.strategy - << " mapping_use_local_search=" << std::boolalpha << context.mapping.use_local_search - << " mapping_use_two_phase_approach=" << std::boolalpha << context.mapping.use_two_phase_approach + << " mapping_use_local_search=" << std::boolalpha + << context.mapping.use_local_search + << " mapping_use_two_phase_approach=" << std::boolalpha + << context.mapping.use_two_phase_approach << " mapping_max_steiner_tree_size=" << context.mapping.max_steiner_tree_size << " mapping_largest_he_fraction=" << context.mapping.largest_he_fraction - << " mapping_min_pin_coverage_of_largest_hes=" << context.mapping.min_pin_coverage_of_largest_hes + << " mapping_min_pin_coverage_of_largest_hes=" + << context.mapping.min_pin_coverage_of_largest_hes << " mapping_large_he_threshold=" << context.mapping.large_he_threshold; - if ( TargetGraph::TRACK_STATS ) { + if(TargetGraph::TRACK_STATS) + { hypergraph.targetGraph()->printStats(oss); } } // Metrics - if ( hypergraph.initialNumEdges() > 0 ) { - oss << " " << context.partition.objective << "=" << metrics::quality(hypergraph, context); - if ( context.partition.objective == Objective::steiner_tree ) { - oss << " approximation_factor=" << metrics::approximationFactorForProcessMapping(hypergraph, context); + if(hypergraph.initialNumEdges() > 0) + { + oss << " " << context.partition.objective << "=" + << metrics::quality(hypergraph, context); + if(context.partition.objective == Objective::steiner_tree) + { + oss << " approximation_factor=" + << metrics::approximationFactorForProcessMapping(hypergraph, context); } - if ( context.partition.objective != Objective::cut ) { + if(context.partition.objective != Objective::cut) + { oss << " cut=" << metrics::quality(hypergraph, Objective::cut); } - if ( context.partition.objective != Objective::km1 ) { + if(context.partition.objective != Objective::km1) + { oss << " km1=" << metrics::quality(hypergraph, Objective::km1); } - if ( context.partition.objective != Objective::soed ) { + if(context.partition.objective != Objective::soed) + { oss << " soed=" << metrics::quality(hypergraph, Objective::soed); } oss << " imbalance=" << metrics::imbalance(hypergraph, context); @@ -197,7 +280,7 @@ std::string serialize(const PartitionedHypergraph& hypergraph, oss << " totalPartitionTime=" << elapsed_seconds.count(); // Timings - utils::Timer& timer = utils::Utilities::instance().getTimer(context.utility_id); + utils::Timer &timer = utils::Utilities::instance().getTimer(context.utility_id); timer.showDetailedTimings(context.partition.show_detailed_timings); timer.serialize(oss); @@ -208,17 +291,19 @@ std::string serialize(const PartitionedHypergraph& hypergraph, oss << utils::Utilities::instance().getInitialPartitioningStats(context.utility_id); return oss.str(); - } else { + } + else + { return ""; } } namespace { -#define SERIALIZE(X) std::string serialize(const X& hypergraph, \ - const Context& context, \ - const std::chrono::duration& elapsed_seconds) +#define SERIALIZE(X) \ + std::string serialize(const X &hypergraph, const Context &context, \ + const std::chrono::duration &elapsed_seconds) } // namespace INSTANTIATE_FUNC_WITH_PARTITIONED_HG(SERIALIZE) -} // namespace +} // namespace diff --git a/mt-kahypar/io/sql_plottools_serializer.h b/mt-kahypar/io/sql_plottools_serializer.h index 5845e2da2..204e03d71 100644 --- a/mt-kahypar/io/sql_plottools_serializer.h +++ b/mt-kahypar/io/sql_plottools_serializer.h @@ -27,15 +27,14 @@ #pragma once -#include #include +#include #include "mt-kahypar/partition/context.h" namespace mt_kahypar::io::serializer { - template - std::string serialize(const PartitionedHypergraph& hypergraph, - const Context& context, - const std::chrono::duration& elapsed_seconds); +template +std::string serialize(const PartitionedHypergraph &hypergraph, const Context &context, + const std::chrono::duration &elapsed_seconds); } \ No newline at end of file diff --git a/mt-kahypar/macros.h b/mt-kahypar/macros.h index 0249d5a16..679859668 100644 --- a/mt-kahypar/macros.h +++ b/mt-kahypar/macros.h @@ -6,10 +6,10 @@ * Copyright (C) 2019 Tobias Heuer * * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is + * of this software and associated documentation files (the "Software"), to + *deal in the Software without restriction, including without limitation the + *rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + *sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in @@ -19,22 +19,22 @@ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + *FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + *IN THE SOFTWARE. ******************************************************************************/ #pragma once #include -#if defined(MT_KAHYPAR_LIBRARY_MODE) || \ - !defined(KAHYPAR_ENABLE_THREAD_PINNING) || defined(__APPLE__) +#if defined(MT_KAHYPAR_LIBRARY_MODE) || !defined(KAHYPAR_ENABLE_THREAD_PINNING) || \ + defined(__APPLE__) #include "tbb/task_arena.h" -// If we use the C or Python interface or thread pinning is disabled, the cpu ID -// to which the current thread is assigned to is not unique. We therefore use -// the slot index of the current task arena as unique thread ID. Note that the -// ID can be negative if the task scheduler is not initialized. +// If we use the C or Python interface or thread pinning is disabled, the cpu +// ID to which the current thread is assigned to is not unique. We therefore +// use the slot index of the current task arena as unique thread ID. Note that +// the ID can be negative if the task scheduler is not initialized. #define THREAD_ID std::max(0, tbb::this_task_arena::current_thread_index()) #else #ifdef __linux__ @@ -48,24 +48,26 @@ #include "kahypar-resources/macros.h" -#define SPECIALIZATION(EXPR, TYPE) \ - template std::enable_if_t +#define SPECIALIZATION(EXPR, TYPE) \ + template \ + std::enable_if_t -#define TRUE_SPECIALIZATION(EXPR, TYPE) \ - template std::enable_if_t +#define TRUE_SPECIALIZATION(EXPR, TYPE) \ + template \ + std::enable_if_t -#define FALSE_SPECIALIZATION(EXPR, TYPE) \ - template std::enable_if_t +#define FALSE_SPECIALIZATION(EXPR, TYPE) \ + template \ + std::enable_if_t -#if (defined(__GNUC__) || defined(__clang__)) && defined(NDEBUG) -#define MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE __attribute__ ((always_inline)) inline +#if(defined(__GNUC__) || defined(__clang__)) && defined(NDEBUG) +#define MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE __attribute__((always_inline)) inline #else #define MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE #endif -#define HEAVY_ASSERT0(cond) \ - !(enable_heavy_assert) ? (void)0 : [&]() { ASSERT(cond); }() -#define HEAVY_ASSERT1(cond, msg) \ +#define HEAVY_ASSERT0(cond) !(enable_heavy_assert) ? (void)0 : [&]() { ASSERT(cond); }() +#define HEAVY_ASSERT1(cond, msg) \ !(enable_heavy_assert) ? (void)0 : [&]() { ASSERT(cond, msg); }() #ifdef KAHYPAR_ENABLE_HEAVY_PREPROCESSING_ASSERTIONS @@ -106,21 +108,19 @@ // Heavy assertions are assertions which increase the complexity of the scope // which they are executed in by an polynomial factor. In debug mode you are // often only interested in certain phase of the multilevel paradigm. However, -// when enabling all assertions it can take a while to reach the point which you -// are really interested in, because heavy assertions radicaly downgrade the -// performance of the application. Therefore such assertions should be packed in -// a heavy assertion macro. Heavy assertions can be enabled via cmake flag for -// specific phase or for specific scope by adding static constexpr bool -// enable_heavy_assert = false; to the corresponding scope. -#define HEAVY_PREPROCESSING_ASSERT(...) \ - EXPAND(HEAVY_ASSERT_EVAL(PREPROCESSING, \ - EXPAND(NARG(__VA_ARGS__)))(__VA_ARGS__)) -#define HEAVY_COARSENING_ASSERT(...) \ +// when enabling all assertions it can take a while to reach the point which +// you are really interested in, because heavy assertions radicaly downgrade +// the performance of the application. Therefore such assertions should be +// packed in a heavy assertion macro. Heavy assertions can be enabled via cmake +// flag for specific phase or for specific scope by adding static constexpr +// bool enable_heavy_assert = false; to the corresponding scope. +#define HEAVY_PREPROCESSING_ASSERT(...) \ + EXPAND(HEAVY_ASSERT_EVAL(PREPROCESSING, EXPAND(NARG(__VA_ARGS__)))(__VA_ARGS__)) +#define HEAVY_COARSENING_ASSERT(...) \ EXPAND(HEAVY_ASSERT_EVAL(COARSENING, EXPAND(NARG(__VA_ARGS__)))(__VA_ARGS__)) -#define HEAVY_INITIAL_PARTITIONING_ASSERT(...) \ - EXPAND(HEAVY_ASSERT_EVAL(INITIAL_PARTITIONING, \ - EXPAND(NARG(__VA_ARGS__)))(__VA_ARGS__)) -#define HEAVY_REFINEMENT_ASSERT(...) \ +#define HEAVY_INITIAL_PARTITIONING_ASSERT(...) \ + EXPAND(HEAVY_ASSERT_EVAL(INITIAL_PARTITIONING, EXPAND(NARG(__VA_ARGS__)))(__VA_ARGS__)) +#define HEAVY_REFINEMENT_ASSERT(...) \ EXPAND(HEAVY_ASSERT_EVAL(REFINEMENT, EXPAND(NARG(__VA_ARGS__)))(__VA_ARGS__)) // In windows unisgned long != size_t @@ -144,25 +144,26 @@ #undef WARNING #endif #define WARNING(msg) LOG << YELLOW << "[WARNING]" << END << msg -#define ERR(msg) \ - LOG << RED << "[ERROR]" << END << msg; \ +#define ERR(msg) \ + LOG << RED << "[ERROR]" << END << msg; \ std::exit(-1) #ifdef MT_KAHYPAR_LIBRARY_MODE -#define ALGO_SWITCH(warning_msg, error_msg, context_variable, \ - alternative_value) \ +#define ALGO_SWITCH(warning_msg, error_msg, context_variable, alternative_value) \ ERR(error_msg); #else -#define ALGO_SWITCH(warning_msg, error_msg, context_variable, \ - alternative_value) \ - WARNING(warning_msg); \ - char answer = 'N'; \ - std::cin >> answer; \ - answer = std::toupper(answer); \ - if (answer == 'Y') { \ - context_variable = alternative_value; \ - } else { \ - ERR(error_msg); \ +#define ALGO_SWITCH(warning_msg, error_msg, context_variable, alternative_value) \ + WARNING(warning_msg); \ + char answer = 'N'; \ + std::cin >> answer; \ + answer = std::toupper(answer); \ + if(answer == 'Y') \ + { \ + context_variable = alternative_value; \ + } \ + else \ + { \ + ERR(error_msg); \ } #endif diff --git a/mt-kahypar/parallel/atomic_wrapper.h b/mt-kahypar/parallel/atomic_wrapper.h index e6d14a5a6..7f85d24a6 100644 --- a/mt-kahypar/parallel/atomic_wrapper.h +++ b/mt-kahypar/parallel/atomic_wrapper.h @@ -33,222 +33,227 @@ #include #include -template -class CAtomic : public std::__atomic_base { +template +class CAtomic : public std::__atomic_base +{ public: using Base = std::__atomic_base; - explicit CAtomic(const T value = T()) : Base(value) { } + explicit CAtomic(const T value = T()) : Base(value) {} - CAtomic(const CAtomic& other) : Base(other.load(std::memory_order_relaxed)) { } + CAtomic(const CAtomic &other) : Base(other.load(std::memory_order_relaxed)) {} - CAtomic& operator=(const CAtomic& other) { + CAtomic &operator=(const CAtomic &other) + { Base::store(other.load(std::memory_order_relaxed), std::memory_order_relaxed); return *this; } - CAtomic(CAtomic&& other) : Base(other.load(std::memory_order_relaxed)) { } + CAtomic(CAtomic &&other) : Base(other.load(std::memory_order_relaxed)) {} - CAtomic& operator=(CAtomic&& other) { + CAtomic &operator=(CAtomic &&other) + { Base::store(other.load(std::memory_order_relaxed), std::memory_order_relaxed); return *this; } - // unfortunately the internal value M_i is private, so we cannot issue __atomic_add_fetch( &M_i, i, int(m) ) ourselves - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE T add_fetch(T i, std::memory_order m = std::memory_order_seq_cst) { + // unfortunately the internal value M_i is private, so we cannot issue + // __atomic_add_fetch( &M_i, i, int(m) ) ourselves + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE T + add_fetch(T i, std::memory_order m = std::memory_order_seq_cst) + { return Base::fetch_add(i, m) + i; } - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE T sub_fetch(T i, std::memory_order m = std::memory_order_seq_cst) { + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE T + sub_fetch(T i, std::memory_order m = std::memory_order_seq_cst) + { return Base::fetch_sub(i, m) - i; } }; -class SpinLock { +class SpinLock +{ public: - // boilerplate to make it 'copyable'. but we just clear the spinlock. there is never a use case to copy a locked spinlock - SpinLock() { } - SpinLock(const SpinLock&) { } - SpinLock& operator=(const SpinLock&) { spinner.clear(std::memory_order_relaxed); return *this; } - - bool tryLock() { - return !spinner.test_and_set(std::memory_order_acquire); + // boilerplate to make it 'copyable'. but we just clear the spinlock. there is never a + // use case to copy a locked spinlock + SpinLock() {} + SpinLock(const SpinLock &) {} + SpinLock &operator=(const SpinLock &) + { + spinner.clear(std::memory_order_relaxed); + return *this; } - void lock() { - while (spinner.test_and_set(std::memory_order_acquire)) { + bool tryLock() { return !spinner.test_and_set(std::memory_order_acquire); } + + void lock() + { + while(spinner.test_and_set(std::memory_order_acquire)) + { // spin // stack overflow says adding 'cpu_relax' instruction may improve performance } } - void unlock() { - spinner.clear(std::memory_order_release); - } + void unlock() { spinner.clear(std::memory_order_release); } private: std::atomic_flag spinner = ATOMIC_FLAG_INIT; }; - namespace mt_kahypar { namespace parallel { // For non-integral types, e.g. floating point. used in community detecion template -class AtomicWrapper : public std::atomic { - public: - explicit AtomicWrapper(const T value = T()) : - std::atomic(value) { } +class AtomicWrapper : public std::atomic +{ +public: + explicit AtomicWrapper(const T value = T()) : std::atomic(value) {} - AtomicWrapper(const AtomicWrapper& other) : - std::atomic(other.load()) { } + AtomicWrapper(const AtomicWrapper &other) : std::atomic(other.load()) {} - AtomicWrapper & operator= (const AtomicWrapper& other) { + AtomicWrapper &operator=(const AtomicWrapper &other) + { this->store(other.load()); return *this; } - AtomicWrapper(AtomicWrapper&& other) { - this->store(other.load()); - } + AtomicWrapper(AtomicWrapper &&other) { this->store(other.load()); } - void operator+= (T other) { + void operator+=(T other) + { T cur = this->load(std::memory_order_relaxed); - while (!this->compare_exchange_weak(cur, cur + other, std::memory_order_relaxed)) { + while(!this->compare_exchange_weak(cur, cur + other, std::memory_order_relaxed)) + { cur = this->load(std::memory_order_relaxed); } } - void operator-= (T other) { + void operator-=(T other) + { T cur = this->load(std::memory_order_relaxed); - while (!this->compare_exchange_weak(cur, cur - other, std::memory_order_relaxed)) { + while(!this->compare_exchange_weak(cur, cur - other, std::memory_order_relaxed)) + { cur = this->load(std::memory_order_relaxed); } } }; -//template using IntegralAtomicWrapper = CAtomic; - +// template using IntegralAtomicWrapper = CAtomic; template -class IntegralAtomicWrapper { +class IntegralAtomicWrapper +{ static_assert(std::is_integral::value, "Value must be of integral type"); // static_assert( std::atomic::is_always_lock_free, "Atomic must be lock free" ); - public: - explicit IntegralAtomicWrapper(const T value = T()) : - _value(value) { } +public: + explicit IntegralAtomicWrapper(const T value = T()) : _value(value) {} - IntegralAtomicWrapper(const IntegralAtomicWrapper& other) : - _value(other._value.load()) { } + IntegralAtomicWrapper(const IntegralAtomicWrapper &other) : _value(other._value.load()) + { + } - IntegralAtomicWrapper & operator= (const IntegralAtomicWrapper& other) { + IntegralAtomicWrapper &operator=(const IntegralAtomicWrapper &other) + { _value = other._value.load(); return *this; } - IntegralAtomicWrapper(IntegralAtomicWrapper&& other) : - _value(other._value.load()) { } + IntegralAtomicWrapper(IntegralAtomicWrapper &&other) : _value(other._value.load()) {} - IntegralAtomicWrapper & operator= (IntegralAtomicWrapper&& other) { + IntegralAtomicWrapper &operator=(IntegralAtomicWrapper &&other) + { _value = other._value.load(); return *this; } - IntegralAtomicWrapper & operator= (T desired) noexcept { + IntegralAtomicWrapper &operator=(T desired) noexcept + { _value = desired; return *this; } - void store(T desired, std::memory_order order = std::memory_order_seq_cst) noexcept { + void store(T desired, std::memory_order order = std::memory_order_seq_cst) noexcept + { _value.store(desired, order); } - T load(std::memory_order order = std::memory_order_seq_cst) const noexcept { + T load(std::memory_order order = std::memory_order_seq_cst) const noexcept + { return _value.load(order); } - operator T () const noexcept { - return _value.load(); - } + operator T() const noexcept { return _value.load(); } - T exchange(T desired, std::memory_order order = std::memory_order_seq_cst) noexcept { + T exchange(T desired, std::memory_order order = std::memory_order_seq_cst) noexcept + { return _value.exchange(desired, order); } - bool compare_exchange_weak(T &expected, T desired, std::memory_order order = std::memory_order_seq_cst) noexcept { + bool compare_exchange_weak(T &expected, T desired, + std::memory_order order = std::memory_order_seq_cst) noexcept + { return _value.compare_exchange_weak(expected, desired, order); } - bool compare_exchange_strong(T &expected, T desired, std::memory_order order = std::memory_order_seq_cst) noexcept { + bool + compare_exchange_strong(T &expected, T desired, + std::memory_order order = std::memory_order_seq_cst) noexcept + { return _value.compare_exchange_strong(expected, desired, order); } - T fetch_add(T arg, std::memory_order order = std::memory_order_seq_cst) noexcept { + T fetch_add(T arg, std::memory_order order = std::memory_order_seq_cst) noexcept + { return _value.fetch_add(arg, order); } - T fetch_sub(T arg, std::memory_order order = std::memory_order_seq_cst) noexcept { + T fetch_sub(T arg, std::memory_order order = std::memory_order_seq_cst) noexcept + { return _value.fetch_sub(arg, order); } - T fetch_and(T arg, std::memory_order order = std::memory_order_seq_cst) noexcept { + T fetch_and(T arg, std::memory_order order = std::memory_order_seq_cst) noexcept + { return _value.fetch_and(arg, order); } - T fetch_or(T arg, std::memory_order order = std::memory_order_seq_cst) noexcept { + T fetch_or(T arg, std::memory_order order = std::memory_order_seq_cst) noexcept + { return _value.fetch_or(arg, order); } - T fetch_xor(T arg, std::memory_order order = std::memory_order_seq_cst) noexcept { + T fetch_xor(T arg, std::memory_order order = std::memory_order_seq_cst) noexcept + { return _value.fetch_xor(arg, order); } - T operator++ () noexcept { - return ++_value; - } + T operator++() noexcept { return ++_value; } - T operator++ (int) noexcept { - return _value++; - } + T operator++(int) noexcept { return _value++; } - T operator-- () noexcept { - return --_value; - } + T operator--() noexcept { return --_value; } - T operator-- (int) noexcept { - return _value++; - } + T operator--(int) noexcept { return _value++; } - T operator+= (T arg) noexcept { - return _value.operator+=(arg); - } + T operator+=(T arg) noexcept { return _value.operator+=(arg); } - T operator-= (T arg) noexcept { - return _value.operator-=(arg); - } + T operator-=(T arg) noexcept { return _value.operator-=(arg); } - T operator&= (T arg) noexcept { - return _value.operator&=(arg); - } + T operator&=(T arg) noexcept { return _value.operator&=(arg); } - T operator|= (T arg) noexcept { - return _value.operator|=(arg); - } + T operator|=(T arg) noexcept { return _value.operator|=(arg); } - T operator^= (T arg) noexcept { - return _value.operator^=(arg); - } + T operator^=(T arg) noexcept { return _value.operator^=(arg); } - private: +private: std::atomic _value; }; - - - #pragma GCC diagnostic pop -} // namespace parallel -} // namespace mt_kahypar +} // namespace parallel +} // namespace mt_kahypar diff --git a/mt-kahypar/parallel/chunking.h b/mt-kahypar/parallel/chunking.h index 20e75d128..f3975a778 100644 --- a/mt-kahypar/parallel/chunking.h +++ b/mt-kahypar/parallel/chunking.h @@ -27,16 +27,18 @@ #pragma once -#include #include +#include namespace mt_kahypar::parallel::chunking { - template - inline auto idiv_ceil(T1 a, T2 b) { - return static_cast((static_cast(a)+b-1) / b); - } +template +inline auto idiv_ceil(T1 a, T2 b) +{ + return static_cast((static_cast(a) + b - 1) / b); +} - inline std::pair bounds(size_t i, size_t n, size_t chunk_size) { - return std::make_pair(std::min(n, i * chunk_size), std::min(n, (i+1) * chunk_size)); - } +inline std::pair bounds(size_t i, size_t n, size_t chunk_size) +{ + return std::make_pair(std::min(n, i * chunk_size), std::min(n, (i + 1) * chunk_size)); +} } \ No newline at end of file diff --git a/mt-kahypar/parallel/hardware_topology.h b/mt-kahypar/parallel/hardware_topology.h index 7e14a968d..f7b5cd539 100644 --- a/mt-kahypar/parallel/hardware_topology.h +++ b/mt-kahypar/parallel/hardware_topology.h @@ -27,12 +27,12 @@ #pragma once +#include #include #include +#include #include #include -#include -#include #include "mt-kahypar/macros.h" @@ -50,75 +50,72 @@ namespace parallel { * Template parameters can be replaced in order to mock hardware topology and * simulate a NUMA on a UMA system. */ -template -class HardwareTopology { - private: +class HardwareTopology +{ +private: static constexpr bool debug = false; using Self = HardwareTopology; - struct Cpu { + struct Cpu + { int cpu_id; bool is_hyperthread; }; - class NumaNode { - public: + class NumaNode + { + public: NumaNode(Node node) : - _node_id(node->os_index), - _num_cores(0), - _cpuset(node->cpuset), - _cpus(), - _mutex() { - for (const int cpu_id : HwTopology::get_cpus_of_numa_node_without_hyperthreads(node)) { - _cpus.emplace_back(Cpu { cpu_id, false }); + _node_id(node->os_index), _num_cores(0), _cpuset(node->cpuset), _cpus(), _mutex() + { + for(const int cpu_id : HwTopology::get_cpus_of_numa_node_without_hyperthreads(node)) + { + _cpus.emplace_back(Cpu{ cpu_id, false }); _num_cores++; } - for (const int cpu_id : HwTopology::get_cpus_of_numa_node_only_hyperthreads(node)) { - _cpus.emplace_back(Cpu { cpu_id, true }); + for(const int cpu_id : HwTopology::get_cpus_of_numa_node_only_hyperthreads(node)) + { + _cpus.emplace_back(Cpu{ cpu_id, true }); } } - NumaNode(const NumaNode&) = delete; - NumaNode & operator= (const NumaNode &) = delete; + NumaNode(const NumaNode &) = delete; + NumaNode &operator=(const NumaNode &) = delete; - NumaNode(NumaNode&& other) : - _node_id(other._node_id), - _num_cores(other._num_cores), - _cpuset(std::move(other._cpuset)), - _cpus(std::move(other._cpus)), - _mutex() { } - - int get_id() const { - return _node_id; + NumaNode(NumaNode &&other) : + _node_id(other._node_id), _num_cores(other._num_cores), + _cpuset(std::move(other._cpuset)), _cpus(std::move(other._cpus)), _mutex() + { } - hwloc_cpuset_t get_cpuset() const { - return _cpuset; - } + int get_id() const { return _node_id; } + + hwloc_cpuset_t get_cpuset() const { return _cpuset; } - std::vector cpus() { + std::vector cpus() + { std::vector cpus; - for (const Cpu& cpu : _cpus) { + for(const Cpu &cpu : _cpus) + { cpus.push_back(cpu.cpu_id); } return cpus; } - size_t num_cores_on_numa_node() const { - return _num_cores; - } + size_t num_cores_on_numa_node() const { return _num_cores; } - size_t num_cpus_on_numa_node() const { - return _cpus.size(); - } + size_t num_cpus_on_numa_node() const { return _cpus.size(); } - bool is_hyperthread(const int cpu_id) { + bool is_hyperthread(const int cpu_id) + { size_t pos = 0; - for ( ; pos < _cpus.size(); ++pos) { - if (_cpus[pos].cpu_id == cpu_id) { + for(; pos < _cpus.size(); ++pos) + { + if(_cpus[pos].cpu_id == cpu_id) + { break; } } @@ -126,22 +123,27 @@ class HardwareTopology { return _cpus[pos].is_hyperthread; } - int get_backup_cpu(const int except_cpu) { + int get_backup_cpu(const int except_cpu) + { std::lock_guard lock(_mutex); int cpu_id = -1; - if ( _cpus.size() > 1 ) { + if(_cpus.size() > 1) + { std::mt19937 rng(420); std::shuffle(_cpus.begin(), _cpus.end(), rng); - if ( _cpus[0].cpu_id != except_cpu ) { + if(_cpus[0].cpu_id != except_cpu) + { cpu_id = _cpus[0].cpu_id; - } else { + } + else + { cpu_id = _cpus[1].cpu_id; } } return cpu_id; } - private: + private: int _node_id; size_t _num_cores; hwloc_cpuset_t _cpuset; @@ -149,74 +151,78 @@ class HardwareTopology { std::mutex _mutex; }; - public: - HardwareTopology(const HardwareTopology&) = delete; - HardwareTopology & operator= (const HardwareTopology &) = delete; +public: + HardwareTopology(const HardwareTopology &) = delete; + HardwareTopology &operator=(const HardwareTopology &) = delete; - HardwareTopology(HardwareTopology&&) = delete; - HardwareTopology & operator= (HardwareTopology &&) = delete; + HardwareTopology(HardwareTopology &&) = delete; + HardwareTopology &operator=(HardwareTopology &&) = delete; - ~HardwareTopology() { - HwTopology::destroy_topology(_topology); - } + ~HardwareTopology() { HwTopology::destroy_topology(_topology); } - static HardwareTopology& instance() { + static HardwareTopology &instance() + { static HardwareTopology instance; return instance; } - size_t num_numa_nodes() const { - return _numa_nodes.size(); - } + size_t num_numa_nodes() const { return _numa_nodes.size(); } - size_t num_cpus() const { - return _num_cpus; - } + size_t num_cpus() const { return _num_cpus; } - int numa_node_of_cpu(const int cpu_id) const { + int numa_node_of_cpu(const int cpu_id) const + { ASSERT(cpu_id < (int)_cpu_to_numa_node.size()); ASSERT(_cpu_to_numa_node[cpu_id] != std::numeric_limits::max()); return _cpu_to_numa_node[cpu_id]; } - bool is_hyperthread(const int cpu_id) { + bool is_hyperthread(const int cpu_id) + { int node = numa_node_of_cpu(cpu_id); return _numa_nodes[node].is_hyperthread(cpu_id); } // ! Number of Cores on NUMA node - int num_cores_on_numa_node(const int node) const { + int num_cores_on_numa_node(const int node) const + { ASSERT(node < (int)_numa_nodes.size()); ASSERT(_numa_nodes[node].get_id() == node); return _numa_nodes[node].num_cores_on_numa_node(); } // ! Number of CPUs on NUMA node - int num_cpus_on_numa_node(const int node) const { + int num_cpus_on_numa_node(const int node) const + { ASSERT(node < (int)_numa_nodes.size()); ASSERT(_numa_nodes[node].get_id() == node); return _numa_nodes[node].num_cpus_on_numa_node(); } // ! CPU bitmap of NUMA node - hwloc_cpuset_t get_cpuset_of_numa_node(int node) const { + hwloc_cpuset_t get_cpuset_of_numa_node(int node) const + { ASSERT(node < (int)_numa_nodes.size()); ASSERT(_numa_nodes[node].get_id() == node); return _numa_nodes[node].get_cpuset(); } // ! List of CPUs of NUMA node - std::vector get_cpus_of_numa_node(int node) { + std::vector get_cpus_of_numa_node(int node) + { ASSERT(node < (int)_numa_nodes.size()); ASSERT(_numa_nodes[node].get_id() == node); return _numa_nodes[node].cpus(); } // ! List of all available CPUs - std::vector get_all_cpus() { + std::vector get_all_cpus() + { std::vector cpus; - for ( size_t node = 0; node < num_numa_nodes(); ++node ) { - for ( const int cpu_id : _numa_nodes[node].cpus() ) { + for(size_t node = 0; node < num_numa_nodes(); ++node) + { + for(const int cpu_id : _numa_nodes[node].cpus()) + { cpus.push_back(cpu_id); } } @@ -224,46 +230,52 @@ class HardwareTopology { } // ! Returns a CPU on a NUMA node that differs from CPU except_cpu - int get_backup_cpu(const int node, const int except_cpu) { + int get_backup_cpu(const int node, const int except_cpu) + { ASSERT(node < (int)_numa_nodes.size()); int cpu_id = _numa_nodes[node].get_backup_cpu(except_cpu); - if ( cpu_id == -1 ) { - #ifndef KAHYPAR_TRAVIS_BUILD - throw SystemException("Your system has not enough cpus to execute MT-KaHyPar (> 1)"); - #else + if(cpu_id == -1) + { +#ifndef KAHYPAR_TRAVIS_BUILD + throw SystemException( + "Your system has not enough cpus to execute MT-KaHyPar (> 1)"); +#else // Handling special case: // Travis CI has only two cpus, when mocking a numa architecture // of two nodes, we have to search for a backup node on a different // numa node. Note, this is only enabled in DEBUG mode. cpu_id = except_cpu; - #endif +#endif } return cpu_id; } // ! Set membind policy to interleaved allocations on used NUMA nodes // ! covered by cpuset - void activate_interleaved_membind_policy(hwloc_cpuset_t cpuset) const { + void activate_interleaved_membind_policy(hwloc_cpuset_t cpuset) const + { hwloc_set_membind(_topology, cpuset, HWLOC_MEMBIND_INTERLEAVE, HWLOC_MEMBIND_MIGRATE); } - private: +private: HardwareTopology() : - _num_cpus(0), - _topology(), - _numa_nodes(), - _cpu_to_numa_node(std::thread::hardware_concurrency(), - std::numeric_limits::max()) { + _num_cpus(0), _topology(), _numa_nodes(), + _cpu_to_numa_node(std::thread::hardware_concurrency(), + std::numeric_limits::max()) + { HwTopology::initialize(_topology); init_numa_nodes(); } - void init_numa_nodes() { + void init_numa_nodes() + { Node node = HwTopology::get_first_numa_node(_topology); - while (node != nullptr) { + while(node != nullptr) + { _numa_nodes.emplace_back(node); node = node->next_cousin; - for (const int cpu_id : _numa_nodes.back().cpus()) { + for(const int cpu_id : _numa_nodes.back().cpus()) + { ASSERT(cpu_id < (int)_cpu_to_numa_node.size()); _cpu_to_numa_node[cpu_id] = _numa_nodes.back().get_id(); ++_num_cpus; @@ -271,12 +283,11 @@ class HardwareTopology { } } - size_t _num_cpus; Topology _topology; std::vector _numa_nodes; std::vector _cpu_to_numa_node; }; -} // namespace parallel -} // namespace mt_kahypar +} // namespace parallel +} // namespace mt_kahypar diff --git a/mt-kahypar/parallel/hwloc_topology.h b/mt-kahypar/parallel/hwloc_topology.h index 03a241771..67cad8898 100644 --- a/mt-kahypar/parallel/hwloc_topology.h +++ b/mt-kahypar/parallel/hwloc_topology.h @@ -36,14 +36,17 @@ namespace parallel { * calling hwloc library. Calls to hwloc library are outsourced * to this class such that hardware topology can be mocked. */ -class HwlocTopology { - public: - static void initialize(hwloc_topology_t& topology) { +class HwlocTopology +{ +public: + static void initialize(hwloc_topology_t &topology) + { hwloc_topology_init(&topology); hwloc_topology_load(topology); } - static hwloc_obj_t get_first_numa_node(hwloc_topology_t topology) { + static hwloc_obj_t get_first_numa_node(hwloc_topology_t topology) + { int numa_depth = hwloc_get_type_or_below_depth(topology, HWLOC_OBJ_NUMANODE); hwloc_obj_t node = hwloc_get_obj_by_depth(topology, numa_depth, 0); @@ -53,10 +56,13 @@ class HwlocTopology { // node are then stored under another node (e.g. L3 Cache Node) on the same level // in the topology. Since, we rely on the assumption that we find all core nodes under // a numa node, we have to fix this here. - if ( numa_depth < 0 ) { + if(numa_depth < 0) + { ASSERT(node->parent); - for ( size_t i = 0; i < node->parent->arity; ++i ) { - if ( node->parent->children[i]->type != HWLOC_OBJ_NUMANODE ) { + for(size_t i = 0; i < node->parent->arity; ++i) + { + if(node->parent->children[i]->type != HWLOC_OBJ_NUMANODE) + { node = node->parent->children[i]; break; } @@ -64,7 +70,8 @@ class HwlocTopology { int current_index = 0; hwloc_obj_t current_node = node; - while(current_node) { + while(current_node) + { current_node->os_index = current_index++; current_node = current_node->next_cousin; } @@ -73,65 +80,68 @@ class HwlocTopology { return node; } - static std::vector get_cpus_of_numa_node_without_hyperthreads(hwloc_obj_t node) { + static std::vector get_cpus_of_numa_node_without_hyperthreads(hwloc_obj_t node) + { std::vector cpus; auto add_cpu_of_core = [&](hwloc_obj_t node) { - ASSERT(node->type == HWLOC_OBJ_CORE); - std::vector core_cpus; - int cpu_id; - hwloc_bitmap_foreach_begin(cpu_id, node->cpuset) { - core_cpus.emplace_back(cpu_id); - } - hwloc_bitmap_foreach_end(); - // Assume that core consists of two processing units (hyperthreads) - ASSERT(!core_cpus.empty()); - cpus.push_back(core_cpus[0]); - }; + ASSERT(node->type == HWLOC_OBJ_CORE); + std::vector core_cpus; + int cpu_id; + hwloc_bitmap_foreach_begin(cpu_id, node->cpuset) { core_cpus.emplace_back(cpu_id); } + hwloc_bitmap_foreach_end(); + // Assume that core consists of two processing units (hyperthreads) + ASSERT(!core_cpus.empty()); + cpus.push_back(core_cpus[0]); + }; enumerate_all_core_units(node, add_cpu_of_core); return cpus; } - static std::vector get_cpus_of_numa_node_only_hyperthreads(hwloc_obj_t node) { + static std::vector get_cpus_of_numa_node_only_hyperthreads(hwloc_obj_t node) + { std::vector cpus; auto add_cpu_of_core = [&](hwloc_obj_t node) { - ASSERT(node->type == HWLOC_OBJ_CORE); - std::vector core_cpus; - int cpu_id; - hwloc_bitmap_foreach_begin(cpu_id, node->cpuset) { - core_cpus.emplace_back(cpu_id); - } - hwloc_bitmap_foreach_end(); - // Assume that core consists of two processing units (hyperthreads) - if ( core_cpus.size() >= 2 ) { - cpus.push_back(core_cpus[1]); - } - }; + ASSERT(node->type == HWLOC_OBJ_CORE); + std::vector core_cpus; + int cpu_id; + hwloc_bitmap_foreach_begin(cpu_id, node->cpuset) { core_cpus.emplace_back(cpu_id); } + hwloc_bitmap_foreach_end(); + // Assume that core consists of two processing units (hyperthreads) + if(core_cpus.size() >= 2) + { + cpus.push_back(core_cpus[1]); + } + }; enumerate_all_core_units(node, add_cpu_of_core); return cpus; } - static void destroy_topology(hwloc_topology_t topology) { + static void destroy_topology(hwloc_topology_t topology) + { hwloc_topology_destroy(topology); } - private: +private: template - static void enumerate_all_core_units(hwloc_obj_t node, F& func) { - if (node->type == HWLOC_OBJ_CORE) { + static void enumerate_all_core_units(hwloc_obj_t node, F &func) + { + if(node->type == HWLOC_OBJ_CORE) + { func(node); return; } - for (size_t i = 0; i < node->arity; ++i) { + for(size_t i = 0; i < node->arity; ++i) + { enumerate_all_core_units(node->children[i], func); } } - HwlocTopology() { } + HwlocTopology() {} }; -} // namespace parallel -} // namespace mt_kahypar +} // namespace parallel +} // namespace mt_kahypar diff --git a/mt-kahypar/parallel/memory_pool.h b/mt-kahypar/parallel/memory_pool.h index 798d0b118..2d153c7a8 100644 --- a/mt-kahypar/parallel/memory_pool.h +++ b/mt-kahypar/parallel/memory_pool.h @@ -27,13 +27,13 @@ #pragma once +#include +#include +#include #include #include -#include #include -#include #include -#include #if defined(__linux__) or defined(__APPLE__) #include #elif _WIN32 @@ -55,7 +55,8 @@ namespace parallel { * Memory chunks can be registered with a key and all memory * chunks can be collectively allocated in parallel. */ -class MemoryPoolT { +class MemoryPoolT +{ static constexpr bool debug = false; static constexpr size_t kInvalidMemoryChunk = std::numeric_limits::max(); @@ -63,21 +64,20 @@ class MemoryPoolT { static constexpr size_t MINIMUM_ALLOCATION_SIZE = 10000000; // 10 MB // ! Represents a memory group. - struct MemoryGroup { + struct MemoryGroup + { - explicit MemoryGroup(const size_t stage) : - _stage(stage), - _key_to_memory_id() { } + explicit MemoryGroup(const size_t stage) : _stage(stage), _key_to_memory_id() {} - void insert(const std::string& key, const size_t memory_id) { + void insert(const std::string &key, const size_t memory_id) + { _key_to_memory_id.insert(std::make_pair(key, memory_id)); } - size_t getKey(const std::string& key) const { - return _key_to_memory_id.at(key); - } + size_t getKey(const std::string &key) const { return _key_to_memory_id.at(key); } - bool containsKey(const std::string& key) const { + bool containsKey(const std::string &key) const + { return _key_to_memory_id.find(key) != _key_to_memory_id.end(); } @@ -86,32 +86,25 @@ class MemoryPoolT { }; // ! Represents a memory chunk. - struct MemoryChunk { - - explicit MemoryChunk(const size_t num_elements, - const size_t size) : - _chunk_mutex(), - _num_elements(num_elements), - _size(size), - _initial_size(size * num_elements), - _used_size(size * num_elements), - _total_size(size * num_elements), - _data(nullptr), - _next_memory_chunk_id(kInvalidMemoryChunk), - _defer_allocation(false), - _is_assigned(false) { } - - MemoryChunk(MemoryChunk&& other) : - _chunk_mutex(), - _num_elements(other._num_elements), - _size(other._size), - _initial_size(other._initial_size), - _used_size(other._used_size), - _total_size(other._total_size), - _data(std::move(other._data)), - _next_memory_chunk_id(other._next_memory_chunk_id), - _defer_allocation(other._defer_allocation), - _is_assigned(other._is_assigned) { + struct MemoryChunk + { + + explicit MemoryChunk(const size_t num_elements, const size_t size) : + _chunk_mutex(), _num_elements(num_elements), _size(size), + _initial_size(size * num_elements), _used_size(size * num_elements), + _total_size(size * num_elements), _data(nullptr), + _next_memory_chunk_id(kInvalidMemoryChunk), _defer_allocation(false), + _is_assigned(false) + { + } + + MemoryChunk(MemoryChunk &&other) : + _chunk_mutex(), _num_elements(other._num_elements), _size(other._size), + _initial_size(other._initial_size), _used_size(other._used_size), + _total_size(other._total_size), _data(std::move(other._data)), + _next_memory_chunk_id(other._next_memory_chunk_id), + _defer_allocation(other._defer_allocation), _is_assigned(other._is_assigned) + { other._data = nullptr; other._next_memory_chunk_id = kInvalidMemoryChunk; other._defer_allocation = true; @@ -121,26 +114,33 @@ class MemoryPoolT { // ! Requests the memory chunk. // ! Note, successive calls to this method will return // ! nullptr until release_chunk() is called. - char* request_chunk() { + char *request_chunk() + { std::lock_guard lock(_chunk_mutex); - if ( _data && !_is_assigned ) { + if(_data && !_is_assigned) + { _is_assigned = true; return _data; - } else { + } + else + { return nullptr; } } - char* request_unused_chunk(const size_t size, const size_t page_size) { + char *request_unused_chunk(const size_t size, const size_t page_size) + { size_t aligned_used_size = align_with_page_size(_used_size, page_size); - if ( _data && aligned_used_size < _total_size && - size <= _total_size - aligned_used_size ) { + if(_data && aligned_used_size < _total_size && + size <= _total_size - aligned_used_size) + { std::lock_guard lock(_chunk_mutex); // Double check aligned_used_size = align_with_page_size(_used_size, page_size); - if ( _data && aligned_used_size < _total_size && - size <= _total_size - aligned_used_size ) { - char* data = _data + aligned_used_size; + if(_data && aligned_used_size < _total_size && + size <= _total_size - aligned_used_size) + { + char *data = _data + aligned_used_size; _used_size = aligned_used_size + size; return data; } @@ -149,45 +149,57 @@ class MemoryPoolT { } // ! Releases the memory chunks - void release_chunk() { + void release_chunk() + { std::lock_guard lock(_chunk_mutex); _is_assigned = false; } // ! Allocates the memory chunk // ! Note, the memory chunk is zero initialized. - bool allocate() { - if ( !_data && !_defer_allocation ) { - _data = (char*) scalable_calloc(_num_elements, _size); + bool allocate() + { + if(!_data && !_defer_allocation) + { + _data = (char *)scalable_calloc(_num_elements, _size); return true; - } else { + } + else + { return false; } } // ! Frees the memory chunk - void free() { - if ( _data ) { + void free() + { + if(_data) + { scalable_free(_data); _data = nullptr; } } // ! Returns the size in bytes of the memory chunk - size_t size_in_bytes() const { + size_t size_in_bytes() const + { size_t size = 0; - if ( _data ) { + if(_data) + { size = _num_elements * _size; } return size; } // Align with page size to minimize cache effects - size_t align_with_page_size(const size_t size, const size_t page_size) { - if ( page_size > 1 ) { - return 2 * page_size * ( size / ( 2 * page_size ) + - ( ( size % ( 2 * page_size ) ) != 0 ) ); - } else { + size_t align_with_page_size(const size_t size, const size_t page_size) + { + if(page_size > 1) + { + return 2 * page_size * (size / (2 * page_size) + ((size % (2 * page_size)) != 0)); + } + else + { return size; } } @@ -204,7 +216,7 @@ class MemoryPoolT { // ! Total size in bytes of the memory chunk size_t _total_size; // ! Memory chunk - char* _data; + char *_data; // ! Memory chunk id where this memory chunk is transfered // ! to if memory is not needed any more size_t _next_memory_chunk_id; @@ -216,37 +228,34 @@ class MemoryPoolT { bool _is_assigned; }; +public: + MemoryPoolT(const MemoryPoolT &) = delete; + MemoryPoolT &operator=(const MemoryPoolT &) = delete; - public: - MemoryPoolT(const MemoryPoolT&) = delete; - MemoryPoolT & operator= (const MemoryPoolT &) = delete; - - MemoryPoolT(MemoryPoolT&&) = delete; - MemoryPoolT & operator= (MemoryPoolT &&) = delete; + MemoryPoolT(MemoryPoolT &&) = delete; + MemoryPoolT &operator=(MemoryPoolT &&) = delete; - ~MemoryPoolT() { - free_memory_chunks(); - } + ~MemoryPoolT() { free_memory_chunks(); } - static MemoryPoolT& instance() { + static MemoryPoolT &instance() + { static MemoryPoolT instance; return instance; } // ! Returns wheater memory pool is already initialized - bool isInitialized() const { - return _is_initialized; - } + bool isInitialized() const { return _is_initialized; } // ! Registers a memory group in the memory pool. A memory // ! group is associated with a stage. Assumption is that, if // ! a stage is completed, than memory is not needed any more // ! and can be reused in a consecutive stage. - void register_memory_group(const std::string& group, - const size_t stage) { - if ( _memory_groups.find(group) == _memory_groups.end() ) { - _memory_groups.emplace(std::piecewise_construct, - std::forward_as_tuple(group), std::forward_as_tuple(stage)); + void register_memory_group(const std::string &group, const size_t stage) + { + if(_memory_groups.find(group) == _memory_groups.end()) + { + _memory_groups.emplace(std::piecewise_construct, std::forward_as_tuple(group), + std::forward_as_tuple(stage)); } } @@ -254,33 +263,37 @@ class MemoryPoolT { // ! associated with a memory group and a unique key within that group. // ! Note, that the memory chunk is not immediatly allocated. One has to call // ! allocate_memory_chunks() to collectively allocate all memory chunks. - void register_memory_chunk(const std::string& group, - const std::string& key, - const size_t num_elements, - const size_t size) { + void register_memory_chunk(const std::string &group, const std::string &key, + const size_t num_elements, const size_t size) + { std::unique_lock lock(_memory_mutex); - if ( _memory_groups.find(group) != _memory_groups.end() ) { - MemoryGroup& mem_group = _memory_groups.at(group); + if(_memory_groups.find(group) != _memory_groups.end()) + { + MemoryGroup &mem_group = _memory_groups.at(group); const size_t memory_id = _memory_chunks.size(); - if ( !mem_group.containsKey(key) ) { + if(!mem_group.containsKey(key)) + { mem_group.insert(key, memory_id); _memory_chunks.emplace_back(num_elements, size); DBG << "Registers memory chunk (" << group << "," << key << ")" - << "of" << size_in_megabyte(num_elements * size) << "MB" + << "of" << size_in_megabyte(num_elements * size) << "MB" << "in memory pool"; } } } // ! Allocates all registered memory chunks in parallel - void allocate_memory_chunks(const bool optimize_allocations = true) { + void allocate_memory_chunks(const bool optimize_allocations = true) + { std::unique_lock lock(_memory_mutex); - if ( optimize_allocations ) { + if(optimize_allocations) + { optimize_memory_allocations(); } const size_t num_memory_segments = _memory_chunks.size(); tbb::parallel_for(UL(0), num_memory_segments, [&](const size_t i) { - if (_memory_chunks[i].allocate()) { + if(_memory_chunks[i].allocate()) + { DBG << "Allocate memory chunk of size" << size_in_megabyte(_memory_chunks[i].size_in_bytes()) << "MB"; } @@ -294,21 +307,23 @@ class MemoryPoolT { // ! requested, the size of the memory chunk is smaller than the // ! requested size or the requested memory chunk does not exist, // ! than nullptr is returned. - char* request_mem_chunk(const std::string& group, - const std::string& key, - const size_t num_elements, - const size_t size) { + char *request_mem_chunk(const std::string &group, const std::string &key, + const size_t num_elements, const size_t size) + { const size_t size_in_bytes = num_elements * size; DBG << "Requests memory chunk (" << group << "," << key << ")" - << "of" << size_in_megabyte(size_in_bytes) << "MB" + << "of" << size_in_megabyte(size_in_bytes) << "MB" << "in memory pool"; - if ( !_use_minimum_allocation_size || size_in_bytes > MINIMUM_ALLOCATION_SIZE ) { + if(!_use_minimum_allocation_size || size_in_bytes > MINIMUM_ALLOCATION_SIZE) + { std::shared_lock lock(_memory_mutex); - MemoryChunk* chunk = find_memory_chunk(group, key); + MemoryChunk *chunk = find_memory_chunk(group, key); - if ( chunk && size_in_bytes <= chunk->size_in_bytes() ) { - char* data = chunk->request_chunk(); - if ( data ) { + if(chunk && size_in_bytes <= chunk->size_in_bytes()) + { + char *data = chunk->request_chunk(); + if(data) + { DBG << "Memory chunk request (" << group << "," << key << ")" << "was successful"; return data; @@ -322,33 +337,40 @@ class MemoryPoolT { // ! Requests an unused memory chunk. If memory usage optimization are // ! activated some memory chunks have unused memory segments due to // ! overallocations. - char* request_unused_mem_chunk(const size_t num_elements, - const size_t size, - const bool align_with_page_size = true) { - if ( _is_initialized ) { - DBG << "Request unused memory chunk of" - << size_in_megabyte(num_elements * size) << "MB"; + char *request_unused_mem_chunk(const size_t num_elements, const size_t size, + const bool align_with_page_size = true) + { + if(_is_initialized) + { + DBG << "Request unused memory chunk of" << size_in_megabyte(num_elements * size) + << "MB"; const size_t size_in_bytes = num_elements * size; - if (_use_unused_memory_chunks && - (!_use_minimum_allocation_size || size_in_bytes > MINIMUM_ALLOCATION_SIZE)) { + if(_use_unused_memory_chunks && + (!_use_minimum_allocation_size || size_in_bytes > MINIMUM_ALLOCATION_SIZE)) + { std::shared_lock lock(_memory_mutex); const size_t n = _active_memory_chunks.size(); - if (n > 0) { + if(n > 0) + { const size_t end = _next_active_memory_chunk.load() % n; const size_t start = (end + 1) % n; - for (size_t i = start;; i = (i + 1) % n) { + for(size_t i = start;; i = (i + 1) % n) + { size_t memory_id = _active_memory_chunks[i]; ASSERT(memory_id < _memory_chunks.size()); char *data = _memory_chunks[memory_id].request_unused_chunk( - size_in_bytes, align_with_page_size ? _page_size : UL(1)); - if (data) { + size_in_bytes, align_with_page_size ? _page_size : UL(1)); + if(data) + { DBG << "Memory chunk request for an unsed memory chunk was successful"; - if (_use_round_robin_assignment) { + if(_use_round_robin_assignment) + { ++_next_active_memory_chunk; } return data; } - if (i == end) { + if(i == end) + { break; } } @@ -362,13 +384,16 @@ class MemoryPoolT { // ! Returns the memory chunk under the corresponding group with // ! the specified key. In contrast to assign_mem_chunk, no explicit // ! checks are performed, if chunk is already assigned. - char* mem_chunk(const std::string& group, - const std::string& key) { + char *mem_chunk(const std::string &group, const std::string &key) + { std::shared_lock lock(_memory_mutex); - MemoryChunk* chunk = find_memory_chunk(group, key); - if ( chunk ) { + MemoryChunk *chunk = find_memory_chunk(group, key); + if(chunk) + { return chunk->_data; - } else { + } + else + { return nullptr; } } @@ -376,11 +401,12 @@ class MemoryPoolT { // ! Releases the memory chunk under the corresponding group with // ! the specified key. Afterwards, memory chunk is available for // ! further requests. - void release_mem_chunk(const std::string& group, - const std::string& key) { + void release_mem_chunk(const std::string &group, const std::string &key) + { std::shared_lock lock(_memory_mutex); - MemoryChunk* chunk = find_memory_chunk(group, key); - if ( chunk ) { + MemoryChunk *chunk = find_memory_chunk(group, key); + if(chunk) + { DBG << "Release memory chunk (" << group << "," << key << ")"; chunk->release_chunk(); } @@ -389,33 +415,43 @@ class MemoryPoolT { // ! Signals that the memory of the corresponding group is not // ! required any more. If an optimized memory allocation strategy // ! was calculated before, the memory is passed to next group. - void release_mem_group(const std::string& group) { + void release_mem_group(const std::string &group) + { std::unique_lock lock(_memory_mutex); - if ( _memory_groups.find(group) != _memory_groups.end() ) { - ASSERT([&] { - for ( const auto& key : _memory_groups.at(group)._key_to_memory_id ) { - const size_t memory_id = key.second; - if ( _memory_chunks[memory_id]._is_assigned ) { - LOG << "(" << group << "," << key.first << ")" - << "is assigned"; - return false; - } - } - return true; - }(), "Some memory chunks of group '" << group << "' are still assigned"); + if(_memory_groups.find(group) != _memory_groups.end()) + { + ASSERT( + [&] { + for(const auto &key : _memory_groups.at(group)._key_to_memory_id) + { + const size_t memory_id = key.second; + if(_memory_chunks[memory_id]._is_assigned) + { + LOG << "(" << group << "," << key.first << ")" + << "is assigned"; + return false; + } + } + return true; + }(), + "Some memory chunks of group '" << group << "' are still assigned"); DBG << "Release memory of group '" << group << "'"; - for ( const auto& key : _memory_groups.at(group)._key_to_memory_id ) { + for(const auto &key : _memory_groups.at(group)._key_to_memory_id) + { const size_t memory_id = key.second; - MemoryChunk& lhs = _memory_chunks[memory_id]; + MemoryChunk &lhs = _memory_chunks[memory_id]; ASSERT(lhs._data); - if ( lhs._next_memory_chunk_id != kInvalidMemoryChunk ) { + if(lhs._next_memory_chunk_id != kInvalidMemoryChunk) + { ASSERT(lhs._next_memory_chunk_id < _memory_chunks.size()); - MemoryChunk& rhs = _memory_chunks[lhs._next_memory_chunk_id]; + MemoryChunk &rhs = _memory_chunks[lhs._next_memory_chunk_id]; rhs._data = lhs._data; lhs._data = nullptr; - } else { + } + else + { // Memory chunk is not required any more // => make it available for unused memory requests lhs._used_size = 0; @@ -427,23 +463,30 @@ class MemoryPoolT { } // Resets the memory pool to the state after all memory chunks are allocated - void reset() { + void reset() + { std::unique_lock lock(_memory_mutex); // Find all root memory chunks of an optimization path std::vector in_degree(_memory_chunks.size(), 0); - for ( const MemoryChunk& chunk : _memory_chunks ) { - if ( chunk._next_memory_chunk_id != kInvalidMemoryChunk ) { + for(const MemoryChunk &chunk : _memory_chunks) + { + if(chunk._next_memory_chunk_id != kInvalidMemoryChunk) + { ++in_degree[chunk._next_memory_chunk_id]; } } // Move memory chunks back to root memory chunks - for ( size_t i = 0; i < _memory_chunks.size(); ++i ) { - if ( in_degree[i] == 0 && !_memory_chunks[i]._data ) { + for(size_t i = 0; i < _memory_chunks.size(); ++i) + { + if(in_degree[i] == 0 && !_memory_chunks[i]._data) + { size_t current_mem_chunk = i; - while ( !_memory_chunks[current_mem_chunk]._data ) { - ASSERT(_memory_chunks[current_mem_chunk]._next_memory_chunk_id != kInvalidMemoryChunk); + while(!_memory_chunks[current_mem_chunk]._data) + { + ASSERT(_memory_chunks[current_mem_chunk]._next_memory_chunk_id != + kInvalidMemoryChunk); current_mem_chunk = _memory_chunks[current_mem_chunk]._next_memory_chunk_id; } ASSERT(_memory_chunks[current_mem_chunk]._data); @@ -461,12 +504,12 @@ class MemoryPoolT { } // ! Frees all memory chunks in parallel - void free_memory_chunks() { + void free_memory_chunks() + { std::unique_lock lock(_memory_mutex); const size_t num_memory_segments = _memory_chunks.size(); - tbb::parallel_for(UL(0), num_memory_segments, [&](const size_t i) { - _memory_chunks[i].free(); - }); + tbb::parallel_for(UL(0), num_memory_segments, + [&](const size_t i) { _memory_chunks[i].free(); }); _memory_chunks.clear(); _memory_groups.clear(); _active_memory_chunks.clear(); @@ -474,60 +517,60 @@ class MemoryPoolT { } // ! Only for testing - void deactivate_round_robin_assignment() { - _use_round_robin_assignment = false; - } + void deactivate_round_robin_assignment() { _use_round_robin_assignment = false; } // ! Only for testing - void deactivate_minimum_allocation_size() { - _use_minimum_allocation_size = false; - } + void deactivate_minimum_allocation_size() { _use_minimum_allocation_size = false; } - bool is_unused_memory_allocations_activated() const { + bool is_unused_memory_allocations_activated() const + { return _use_unused_memory_chunks; } - void activate_unused_memory_allocations() { - _use_unused_memory_chunks = true; - } + void activate_unused_memory_allocations() { _use_unused_memory_chunks = true; } - void deactivate_unused_memory_allocations() { - _use_unused_memory_chunks = false; - } + void deactivate_unused_memory_allocations() { _use_unused_memory_chunks = false; } // ! Returns the size in bytes of the memory chunk under the // ! corresponding group with the specified key. - size_t size_in_bytes(const std::string& group, - const std::string& key) { + size_t size_in_bytes(const std::string &group, const std::string &key) + { std::shared_lock lock(_memory_mutex); - MemoryChunk* chunk = find_memory_chunk(group, key); - if ( chunk ) { + MemoryChunk *chunk = find_memory_chunk(group, key); + if(chunk) + { return chunk->size_in_bytes(); - } else { + } + else + { return 0; } } // ! Builds a memory tree that reflects the memory // ! consumption of the memory pool - void memory_consumption(utils::MemoryTreeNode* parent) const { + void memory_consumption(utils::MemoryTreeNode *parent) const + { ASSERT(parent); std::shared_lock lock(_memory_mutex); - for ( const auto& group_element : _memory_groups ) { - const std::string& group = group_element.first; - const auto& key_to_memory_id = group_element.second._key_to_memory_id; - utils::MemoryTreeNode* group_node = parent->addChild(group); - for ( const auto& element : key_to_memory_id ) { - const std::string& key = element.first; + for(const auto &group_element : _memory_groups) + { + const std::string &group = group_element.first; + const auto &key_to_memory_id = group_element.second._key_to_memory_id; + utils::MemoryTreeNode *group_node = parent->addChild(group); + for(const auto &element : key_to_memory_id) + { + const std::string &key = element.first; const size_t memory_id = element.second; ASSERT(memory_id < _memory_chunks.size()); group_node->addChild(key, - std::max(_memory_chunks[memory_id].size_in_bytes(), UL(1))); + std::max(_memory_chunks[memory_id].size_in_bytes(), UL(1))); } } } - void explain_optimizations() const { + void explain_optimizations() const + { std::unique_lock lock(_memory_mutex); using GroupKey = std::pair; size_t total_size = 0; @@ -537,77 +580,85 @@ class MemoryPoolT { << "\n if corresponding memory group is released.\n"; std::unordered_map memory_id_to_group_key; - for ( const auto& mem_group : _memory_groups ) { - const std::string& group = mem_group.first; - for ( const auto& mem_key : mem_group.second._key_to_memory_id ) { - const std::string& key = mem_key.first; + for(const auto &mem_group : _memory_groups) + { + const std::string &group = mem_group.first; + for(const auto &mem_key : mem_group.second._key_to_memory_id) + { + const std::string &key = mem_key.first; const size_t memory_id = mem_key.second; memory_id_to_group_key.emplace(std::piecewise_construct, - std::forward_as_tuple(memory_id), std::forward_as_tuple(group, key)); + std::forward_as_tuple(memory_id), + std::forward_as_tuple(group, key)); } } - for ( size_t memory_id = 0; memory_id < _memory_chunks.size(); ++memory_id ) { - if ( !_memory_chunks[memory_id]._defer_allocation ) { + for(size_t memory_id = 0; memory_id < _memory_chunks.size(); ++memory_id) + { + if(!_memory_chunks[memory_id]._defer_allocation) + { size_t current_memory_id = memory_id; std::string memory_path_desc = " "; size_t path_total_size = 0; const size_t path_allocation_size = _memory_chunks[memory_id].size_in_bytes(); - while ( current_memory_id != kInvalidMemoryChunk ) { - const MemoryChunk& mem_chunk = _memory_chunks[current_memory_id]; + while(current_memory_id != kInvalidMemoryChunk) + { + const MemoryChunk &mem_chunk = _memory_chunks[current_memory_id]; path_total_size += mem_chunk._initial_size; - const std::string& group = memory_id_to_group_key[current_memory_id].first; - const std::string& key = memory_id_to_group_key[current_memory_id].second; - memory_path_desc += "(" + group + "," + key + ") = " - + std::to_string(size_in_megabyte(mem_chunk._initial_size)) - + " MB"; + const std::string &group = memory_id_to_group_key[current_memory_id].first; + const std::string &key = memory_id_to_group_key[current_memory_id].second; + memory_path_desc += + "(" + group + "," + key + + ") = " + std::to_string(size_in_megabyte(mem_chunk._initial_size)) + " MB"; current_memory_id = mem_chunk._next_memory_chunk_id; - if ( current_memory_id != kInvalidMemoryChunk ) { + if(current_memory_id != kInvalidMemoryChunk) + { memory_path_desc += " -> "; } } total_size += path_total_size; allocated_size += path_allocation_size; - LOG << " Allocated" << size_in_megabyte(path_allocation_size) << "MB for the following memory path" - << "and saved" << size_in_megabyte(path_total_size - path_allocation_size) << "MB:"; + LOG << " Allocated" << size_in_megabyte(path_allocation_size) + << "MB for the following memory path" + << "and saved" << size_in_megabyte(path_total_size - path_allocation_size) + << "MB:"; LOG << memory_path_desc << "\n"; } } LOG << BOLD << "Summary:" << END; - LOG << " Size of registered memory chunks =" << size_in_megabyte(total_size) << "MB"; - LOG << " Initial allocated size of memory chunks =" << size_in_megabyte(allocated_size) << "MB"; - LOG << " Saved memory due to memory optimizations =" << size_in_megabyte(total_size - allocated_size) << "MB"; + LOG << " Size of registered memory chunks =" << size_in_megabyte(total_size) + << "MB"; + LOG << " Initial allocated size of memory chunks =" + << size_in_megabyte(allocated_size) << "MB"; + LOG << " Saved memory due to memory optimizations =" + << size_in_megabyte(total_size - allocated_size) << "MB"; } - private: +private: explicit MemoryPoolT() : - _memory_mutex(), - _is_initialized(false), - _page_size(0), - _memory_groups(), - _memory_chunks(), - _next_active_memory_chunk(0), - _active_memory_chunks(), - _use_round_robin_assignment(true), - _use_minimum_allocation_size(true), - _use_unused_memory_chunks(true) { - #if defined(__linux__) or defined(__APPLE__) - _page_size = sysconf(_SC_PAGE_SIZE); - #elif _WIN32 - SYSTEM_INFO sysInfo; - GetSystemInfo(&sysInfo); - _page_size = sysInfo.dwPageSize; - #endif + _memory_mutex(), _is_initialized(false), _page_size(0), _memory_groups(), + _memory_chunks(), _next_active_memory_chunk(0), _active_memory_chunks(), + _use_round_robin_assignment(true), _use_minimum_allocation_size(true), + _use_unused_memory_chunks(true) + { +#if defined(__linux__) or defined(__APPLE__) + _page_size = sysconf(_SC_PAGE_SIZE); +#elif _WIN32 + SYSTEM_INFO sysInfo; + GetSystemInfo(&sysInfo); + _page_size = sysInfo.dwPageSize; +#endif } // ! Returns a pointer to memory chunk under the corresponding group with // ! the specified key. - MemoryChunk* find_memory_chunk(const std::string& group, - const std::string& key) { + MemoryChunk *find_memory_chunk(const std::string &group, const std::string &key) + { - if ( _memory_groups.find(group) != _memory_groups.end() && - _memory_groups.at(group).containsKey(key) ) { + if(_memory_groups.find(group) != _memory_groups.end() && + _memory_groups.at(group).containsKey(key)) + { const size_t memory_id = _memory_groups.at(group).getKey(key); ASSERT(memory_id < _memory_chunks.size()); return &_memory_chunks[memory_id]; @@ -615,17 +666,21 @@ class MemoryPoolT { return nullptr; } - void update_active_memory_chunks() { + void update_active_memory_chunks() + { _active_memory_chunks.clear(); - for ( size_t memory_id = 0; memory_id < _memory_chunks.size(); ++memory_id ) { - if ( _memory_chunks[memory_id]._data ) { + for(size_t memory_id = 0; memory_id < _memory_chunks.size(); ++memory_id) + { + if(_memory_chunks[memory_id]._data) + { _active_memory_chunks.push_back(memory_id); } } _next_active_memory_chunk = 0; } - static double size_in_megabyte(const size_t size_in_bytes) { + static double size_in_megabyte(const size_t size_in_bytes) + { return static_cast(size_in_bytes) / 1000000.0; } @@ -636,38 +691,44 @@ class MemoryPoolT { // ! the memory chunks of a group are not required any more // ! (release_memory_group), than the memory chunks are transfered // ! to next group. - void optimize_memory_allocations() { + void optimize_memory_allocations() + { using MemGroup = std::pair; // - using MemChunk = std::pair; // + using MemChunk = std::pair; // // Sort memory groups according to stage std::vector mem_groups; - for ( const auto& mem_group : _memory_groups ) { + for(const auto &mem_group : _memory_groups) + { mem_groups.push_back(std::make_pair(mem_group.first, mem_group.second._stage)); } std::sort(mem_groups.begin(), mem_groups.end(), - [&](const MemGroup& lhs, const MemGroup& rhs) { - return lhs.second < rhs.second; - }); - - auto fill_mem_chunks = [&](std::vector& mem_chunks, - const std::string group) { - for ( const auto& key : _memory_groups.at(group)._key_to_memory_id ) { + [&](const MemGroup &lhs, const MemGroup &rhs) { + return lhs.second < rhs.second; + }); + + auto fill_mem_chunks = [&](std::vector &mem_chunks, + const std::string group) { + for(const auto &key : _memory_groups.at(group)._key_to_memory_id) + { const size_t memory_id = key.second; - const MemoryChunk& memory_chunk = _memory_chunks[memory_id]; + const MemoryChunk &memory_chunk = _memory_chunks[memory_id]; const size_t size_in_bytes = memory_chunk._num_elements * memory_chunk._size; mem_chunks.push_back(std::make_pair(memory_id, size_in_bytes)); } std::sort(mem_chunks.begin(), mem_chunks.end(), - [&](const MemChunk& lhs, const MemChunk& rhs) { - return lhs.second < rhs.second || (lhs.second == rhs.second && lhs.first < rhs.first ); - }); + [&](const MemChunk &lhs, const MemChunk &rhs) { + return lhs.second < rhs.second || + (lhs.second == rhs.second && lhs.first < rhs.first); + }); }; - if ( !mem_groups.empty() ) { + if(!mem_groups.empty()) + { std::vector lhs_mem_chunks; fill_mem_chunks(lhs_mem_chunks, mem_groups[0].first); - for ( size_t i = 1; i < mem_groups.size(); ++i /* i = stage */ ) { + for(size_t i = 1; i < mem_groups.size(); ++i /* i = stage */) + { // lhs_mem_chunks contains all memory chunks corresponding // to a stage j with j < i that are not matched (in increasing // order of its size in bytes). rhs_mem_chunks contains all memory @@ -677,7 +738,8 @@ class MemoryPoolT { // matched. std::vector rhs_mem_chunks; fill_mem_chunks(rhs_mem_chunks, mem_groups[i].first); - while ( !lhs_mem_chunks.empty() && !rhs_mem_chunks.empty() ) { + while(!lhs_mem_chunks.empty() && !rhs_mem_chunks.empty()) + { const size_t lhs_mem_id = lhs_mem_chunks.back().first; const size_t rhs_mem_id = rhs_mem_chunks.back().first; ASSERT(lhs_mem_id != rhs_mem_id); @@ -694,26 +756,30 @@ class MemoryPoolT { auto augment_memory = [&](const size_t memory_id) { ASSERT(memory_id < _memory_chunks.size()); - MemoryChunk& memory_chunk = _memory_chunks[memory_id]; - if ( !memory_chunk._defer_allocation ) { - std::vector s; + MemoryChunk &memory_chunk = _memory_chunks[memory_id]; + if(!memory_chunk._defer_allocation) + { + std::vector s; s.push_back(&memory_chunk); size_t max_num_elements = memory_chunk._num_elements; size_t max_size = memory_chunk._size; - while ( s.back()->_next_memory_chunk_id != kInvalidMemoryChunk ) { + while(s.back()->_next_memory_chunk_id != kInvalidMemoryChunk) + { const size_t next_memory_id = s.back()->_next_memory_chunk_id; ASSERT(next_memory_id < _memory_chunks.size()); - MemoryChunk& next_memory_chunk = _memory_chunks[next_memory_id]; + MemoryChunk &next_memory_chunk = _memory_chunks[next_memory_id]; const size_t num_elements = next_memory_chunk._num_elements; const size_t size = next_memory_chunk._size; - if ( num_elements * size > max_num_elements * max_size ) { + if(num_elements * size > max_num_elements * max_size) + { max_num_elements = num_elements; max_size = size; } s.push_back(&next_memory_chunk); } - while ( !s.empty() ) { + while(!s.empty()) + { s.back()->_num_elements = max_num_elements; s.back()->_size = max_size; s.back()->_total_size = max_size * max_num_elements; @@ -724,7 +790,8 @@ class MemoryPoolT { // Adapts the allocation sizes along path of matched memory chunks. // Allocation size must be the maximum size along that path. - for ( size_t memory_id = 0; memory_id < _memory_chunks.size(); ++memory_id ) { + for(size_t memory_id = 0; memory_id < _memory_chunks.size(); ++memory_id) + { augment_memory(memory_id); } } @@ -756,86 +823,73 @@ class MemoryPoolT { * instances are partitioned simultanously. Therefore, we disable the memory * pool in case we compile the library interface. */ -class DoNothingMemoryPool { +class DoNothingMemoryPool +{ - public: - DoNothingMemoryPool(const DoNothingMemoryPool&) = delete; - DoNothingMemoryPool & operator= (const DoNothingMemoryPool &) = delete; +public: + DoNothingMemoryPool(const DoNothingMemoryPool &) = delete; + DoNothingMemoryPool &operator=(const DoNothingMemoryPool &) = delete; - DoNothingMemoryPool(DoNothingMemoryPool&&) = delete; - DoNothingMemoryPool & operator= (DoNothingMemoryPool &&) = delete; + DoNothingMemoryPool(DoNothingMemoryPool &&) = delete; + DoNothingMemoryPool &operator=(DoNothingMemoryPool &&) = delete; - static DoNothingMemoryPool& instance() { + static DoNothingMemoryPool &instance() + { static DoNothingMemoryPool instance; return instance; } - bool isInitialized() const { - return true; - } + bool isInitialized() const { return true; } - void register_memory_group(const std::string&, - const size_t) { } + void register_memory_group(const std::string &, const size_t) {} - void register_memory_chunk(const std::string&, - const std::string&, - const size_t, - const size_t) { } + void register_memory_chunk(const std::string &, const std::string &, const size_t, + const size_t) + { + } - void allocate_memory_chunks() { } - void allocate_memory_chunks(const bool) { } + void allocate_memory_chunks() {} + void allocate_memory_chunks(const bool) {} - char* request_mem_chunk(const std::string&, - const std::string&, - const size_t, - const size_t) { + char *request_mem_chunk(const std::string &, const std::string &, const size_t, + const size_t) + { return nullptr; } - char* request_unused_mem_chunk(const size_t, - const size_t) { - return nullptr; - } - char* request_unused_mem_chunk(const size_t, - const size_t, - const bool) { + char *request_unused_mem_chunk(const size_t, const size_t) { return nullptr; } + char *request_unused_mem_chunk(const size_t, const size_t, const bool) + { return nullptr; } - char* mem_chunk(const std::string&, - const std::string&) { - return nullptr; - } - void release_mem_chunk(const std::string&, - const std::string&) { } + char *mem_chunk(const std::string &, const std::string &) { return nullptr; } + void release_mem_chunk(const std::string &, const std::string &) {} - void release_mem_group(const std::string&) { } + void release_mem_group(const std::string &) {} - void reset() { } + void reset() {} void free_memory_chunks() {} // ! Only for testing - void deactivate_round_robin_assignment() { } + void deactivate_round_robin_assignment() {} // ! Only for testing - void deactivate_minimum_allocation_size() { } + void deactivate_minimum_allocation_size() {} - void activate_unused_memory_allocations() { } + void activate_unused_memory_allocations() {} - void deactivate_unused_memory_allocations() { } + void deactivate_unused_memory_allocations() {} - size_t size_in_bytes(const std::string&, - const std::string&) { - return 0; - } + size_t size_in_bytes(const std::string &, const std::string &) { return 0; } - void memory_consumption(utils::MemoryTreeNode*) const { } + void memory_consumption(utils::MemoryTreeNode *) const {} - void explain_optimizations() const { } + void explain_optimizations() const {} - private: - DoNothingMemoryPool() { } +private: + DoNothingMemoryPool() {} }; #ifdef MT_KAHYPAR_LIBRARY_MODE @@ -844,5 +898,5 @@ using MemoryPool = DoNothingMemoryPool; using MemoryPool = MemoryPoolT; #endif -} // namespace parallel -} // namespace mt_kahypar +} // namespace parallel +} // namespace mt_kahypar diff --git a/mt-kahypar/parallel/parallel_counting_sort.h b/mt-kahypar/parallel/parallel_counting_sort.h index 573dff6c3..6c930b8c1 100644 --- a/mt-kahypar/parallel/parallel_counting_sort.h +++ b/mt-kahypar/parallel/parallel_counting_sort.h @@ -27,12 +27,12 @@ #pragma once +#include #include #include #include #include #include -#include #include "mt-kahypar/parallel/chunking.h" #include "mt-kahypar/parallel/parallel_prefix_sum.h" @@ -42,65 +42,83 @@ namespace mt_kahypar::parallel { // KeyFunc must be thread safe // returns the bucket bounds template -vec counting_sort(const InputRange& input, OutputRange& output, - size_t max_num_buckets, KeyFunc& get_bucket, size_t num_tasks) { +vec counting_sort(const InputRange &input, OutputRange &output, + size_t max_num_buckets, KeyFunc &get_bucket, size_t num_tasks) +{ vec global_bucket_begins(max_num_buckets + 2, 0); const size_t n = input.size(); - if (num_tasks > 1 && n > (1 << 17)) { + if(num_tasks > 1 && n > (1 << 17)) + { const size_t chunk_size = chunking::idiv_ceil(n, num_tasks); // thread local counting - vec> thread_local_bucket_ends(num_tasks); // use vector of vector to avoid false sharing. maybe even task-local vector and then copy? + vec > thread_local_bucket_ends( + num_tasks); // use vector of vector to avoid false sharing. maybe even task-local + // vector and then copy? tbb::parallel_for(size_t(0), num_tasks, [&](const size_t taskID) { - vec& bucket_ends = thread_local_bucket_ends[taskID]; + vec &bucket_ends = thread_local_bucket_ends[taskID]; bucket_ends.resize(max_num_buckets, 0); - for (auto[i,last] = chunking::bounds(taskID, n, chunk_size); i < last; ++i) { + for(auto [i, last] = chunking::bounds(taskID, n, chunk_size); i < last; ++i) + { bucket_ends[get_bucket(input[i])]++; } }); // prefix sum local bucket sizes for local offsets - if (max_num_buckets > 1 << 10) { + if(max_num_buckets > 1 << 10) + { tbb::parallel_for(UL(0), max_num_buckets, [&](size_t bucket) { - for (size_t i = 1; i < num_tasks; ++i) { - thread_local_bucket_ends[i][bucket] += thread_local_bucket_ends[i - 1][bucket]; // EVIL for locality! + for(size_t i = 1; i < num_tasks; ++i) + { + thread_local_bucket_ends[i][bucket] += + thread_local_bucket_ends[i - 1][bucket]; // EVIL for locality! } }); - } else { - for (size_t bucket = 0; bucket < max_num_buckets; ++bucket) { - for (size_t i = 1; i < num_tasks; ++i) { - thread_local_bucket_ends[i][bucket] += thread_local_bucket_ends[i - 1][bucket]; // EVIL for locality! + } + else + { + for(size_t bucket = 0; bucket < max_num_buckets; ++bucket) + { + for(size_t i = 1; i < num_tasks; ++i) + { + thread_local_bucket_ends[i][bucket] += + thread_local_bucket_ends[i - 1][bucket]; // EVIL for locality! } } } // prefix sum over bucket - assert(global_bucket_begins.size() >= thread_local_bucket_ends.back().size() + 1); - parallel_prefix_sum(thread_local_bucket_ends.back().cbegin(), thread_local_bucket_ends.back().cend(), - global_bucket_begins.begin() + 1, - std::plus<>(), 0); + assert(global_bucket_begins.size() >= thread_local_bucket_ends.back().size() + 1); + parallel_prefix_sum(thread_local_bucket_ends.back().cbegin(), + thread_local_bucket_ends.back().cend(), + global_bucket_begins.begin() + 1, std::plus<>(), 0); // element assignment tbb::parallel_for(size_t(0), num_tasks, [&](const size_t taskID) { - vec& bucketEnds = thread_local_bucket_ends[taskID]; + vec &bucketEnds = thread_local_bucket_ends[taskID]; // reverse iteration makes the algorithm stable - for (auto [first,i] = chunking::bounds(taskID, n, chunk_size); i > first; --i) { - size_t bucket = get_bucket(input[i-1]); - output[global_bucket_begins[bucket] + (--bucketEnds[bucket])] = input[i-1]; + for(auto [first, i] = chunking::bounds(taskID, n, chunk_size); i > first; --i) + { + size_t bucket = get_bucket(input[i - 1]); + output[global_bucket_begins[bucket] + (--bucketEnds[bucket])] = input[i - 1]; } }); - - } else { - for (size_t i = 0; i < input.size(); ++i) global_bucket_begins[get_bucket(input[i]) + 2]++; - std::partial_sum(global_bucket_begins.begin(), global_bucket_begins.end(), global_bucket_begins.begin()); - for (size_t i = 0; i < input.size(); ++i) output[global_bucket_begins[get_bucket(input[i]) + 1]++] = input[i] ; + } + else + { + for(size_t i = 0; i < input.size(); ++i) + global_bucket_begins[get_bucket(input[i]) + 2]++; + std::partial_sum(global_bucket_begins.begin(), global_bucket_begins.end(), + global_bucket_begins.begin()); + for(size_t i = 0; i < input.size(); ++i) + output[global_bucket_begins[get_bucket(input[i]) + 1]++] = input[i]; } - global_bucket_begins.pop_back(); // did the +2 trick + global_bucket_begins.pop_back(); // did the +2 trick return global_bucket_begins; } -} // namespace mt_kahypar::parallel +} // namespace mt_kahypar::parallel diff --git a/mt-kahypar/parallel/parallel_prefix_sum.h b/mt-kahypar/parallel/parallel_prefix_sum.h index f1f59dc90..2112ff672 100644 --- a/mt-kahypar/parallel/parallel_prefix_sum.h +++ b/mt-kahypar/parallel/parallel_prefix_sum.h @@ -36,142 +36,145 @@ namespace mt_kahypar { - template - struct ParallelPrefixSumBody { - using T = typename ::std::iterator_traits::value_type; - - InIt first; - OutIt out; - T sum, neutral_element; - BinOp& f; - - ParallelPrefixSumBody(InIt first, OutIt out, T neutral_element, BinOp& f): - first(first), - out(out), - sum(neutral_element), - neutral_element(neutral_element), - f(f) { } - - ParallelPrefixSumBody(ParallelPrefixSumBody& other, tbb::split) : - first(other.first), - out(other.out), - sum(other.neutral_element), - neutral_element(other.neutral_element), - f(other.f) { } - - void operator()(const tbb::blocked_range& r, tbb::pre_scan_tag ) { - for (size_t i = r.begin(); i < r.end(); ++i) { - sum = f(sum, *(first + i)); - } - } +template +struct ParallelPrefixSumBody +{ + using T = typename ::std::iterator_traits::value_type; + + InIt first; + OutIt out; + T sum, neutral_element; + BinOp &f; + + ParallelPrefixSumBody(InIt first, OutIt out, T neutral_element, BinOp &f) : + first(first), out(out), sum(neutral_element), neutral_element(neutral_element), f(f) + { + } - void operator()(const tbb::blocked_range& r, tbb::final_scan_tag ) { - for (size_t i = r.begin(); i < r.end(); ++i) { - sum = f(sum, *(first + i)); - *(out + i) = sum; - } - } + ParallelPrefixSumBody(ParallelPrefixSumBody &other, tbb::split) : + first(other.first), out(other.out), sum(other.neutral_element), + neutral_element(other.neutral_element), f(other.f) + { + } - void reverse_join(ParallelPrefixSumBody& other) { - sum = f(sum, other.sum); + void operator()(const tbb::blocked_range &r, tbb::pre_scan_tag) + { + for(size_t i = r.begin(); i < r.end(); ++i) + { + sum = f(sum, *(first + i)); } + } - void assign(ParallelPrefixSumBody& other) { - sum = other.sum; + void operator()(const tbb::blocked_range &r, tbb::final_scan_tag) + { + for(size_t i = r.begin(); i < r.end(); ++i) + { + sum = f(sum, *(first + i)); + *(out + i) = sum; } + } - }; + void reverse_join(ParallelPrefixSumBody &other) { sum = f(sum, other.sum); } - template - static void sequential_prefix_sum(InIt first, InIt last, OutIt d, typename std::iterator_traits::value_type init, BinOp f) { - while (first != last) { - init = f(init, *first); - *d = init; - ++d; - ++first; - } - } + void assign(ParallelPrefixSumBody &other) { sum = other.sum; } +}; - template - static void parallel_prefix_sum(InIt first, InIt last, OutIt d, BinOp f, - typename std::iterator_traits::value_type neutral_element) { +template +static void sequential_prefix_sum(InIt first, InIt last, OutIt d, + typename std::iterator_traits::value_type init, + BinOp f) +{ + while(first != last) + { + init = f(init, *first); + *d = init; + ++d; + ++first; + } +} - typename std::iterator_traits::difference_type n = last - first; +template +static void +parallel_prefix_sum(InIt first, InIt last, OutIt d, BinOp f, + typename std::iterator_traits::value_type neutral_element) +{ - if (n < (1 << 16)) { - return sequential_prefix_sum(first, last, d, neutral_element, f); - } + typename std::iterator_traits::difference_type n = last - first; - ParallelPrefixSumBody body(first, d, neutral_element, f); - tbb::parallel_scan(tbb::blocked_range(0, static_cast(n)), body); + if(n < (1 << 16)) + { + return sequential_prefix_sum(first, last, d, neutral_element, f); } + ParallelPrefixSumBody body(first, d, neutral_element, f); + tbb::parallel_scan(tbb::blocked_range(0, static_cast(n)), body); +} + } namespace mt_kahypar::parallel { -template class V = parallel::scalable_vector> -class TBBPrefixSum { +template class V = parallel::scalable_vector> +class TBBPrefixSum +{ - public: - TBBPrefixSum(V& data) : - _sum(0), - _data(data) { } +public: + TBBPrefixSum(V &data) : _sum(0), _data(data) {} - TBBPrefixSum(TBBPrefixSum& prefix_sum, tbb::split) : - _sum(0), - _data(prefix_sum._data) { } + TBBPrefixSum(TBBPrefixSum &prefix_sum, tbb::split) : _sum(0), _data(prefix_sum._data) {} - T total_sum() const { - return _sum; - } + T total_sum() const { return _sum; } - size_t size() const { - return _data.size() + 1; - } + size_t size() const { return _data.size() + 1; } - T operator[] (const size_t i) const { + T operator[](const size_t i) const + { ASSERT(i <= _data.size()); - if ( i > 0 ) { + if(i > 0) + { return _data[i - 1]; - } else { + } + else + { return static_cast(0); } } - T value(const size_t i) const { + T value(const size_t i) const + { ASSERT(i < _data.size(), V(i) << V(_data.size())); - if ( i > 0 ) { + if(i > 0) + { return _data[i] - _data[i - 1]; - } else { + } + else + { return _data[0]; } } - template - void operator()(const tbb::blocked_range& range, Tag) { - T temp = _sum; - for( size_t i = range.begin(); i < range.end(); ++i ) { - temp = temp + _data[i]; - if( Tag::is_final_scan() ) { - _data[i] = temp; - } + template + void operator()(const tbb::blocked_range &range, Tag) + { + T temp = _sum; + for(size_t i = range.begin(); i < range.end(); ++i) + { + temp = temp + _data[i]; + if(Tag::is_final_scan()) + { + _data[i] = temp; } - _sum = temp; + } + _sum = temp; } - void reverse_join(TBBPrefixSum& prefix_sum) { - _sum += prefix_sum._sum; - } + void reverse_join(TBBPrefixSum &prefix_sum) { _sum += prefix_sum._sum; } - void assign(TBBPrefixSum& prefix_sum) { - _sum = prefix_sum._sum; - } + void assign(TBBPrefixSum &prefix_sum) { _sum = prefix_sum._sum; } - private: +private: T _sum; - V& _data; + V &_data; }; -} // namespace mt_kahypar::parallel +} // namespace mt_kahypar::parallel diff --git a/mt-kahypar/parallel/stl/scalable_queue.h b/mt-kahypar/parallel/stl/scalable_queue.h index 27d9f6c80..43ae22ccb 100644 --- a/mt-kahypar/parallel/stl/scalable_queue.h +++ b/mt-kahypar/parallel/stl/scalable_queue.h @@ -34,6 +34,6 @@ namespace mt_kahypar { namespace parallel { template -using scalable_queue = std::queue> >; -} // namespace parallel -} // namespace mt_kahypar +using scalable_queue = std::queue > >; +} // namespace parallel +} // namespace mt_kahypar diff --git a/mt-kahypar/parallel/stl/scalable_unique_ptr.h b/mt-kahypar/parallel/stl/scalable_unique_ptr.h index dedec0565..d2f6de82e 100644 --- a/mt-kahypar/parallel/stl/scalable_unique_ptr.h +++ b/mt-kahypar/parallel/stl/scalable_unique_ptr.h @@ -34,21 +34,21 @@ namespace mt_kahypar { namespace parallel { -template -struct tbb_deleter { - void operator()(T *p) { - scalable_free(p); - } +template +struct tbb_deleter +{ + void operator()(T *p) { scalable_free(p); } }; -template -using tbb_unique_ptr = std::unique_ptr>; +template +using tbb_unique_ptr = std::unique_ptr >; -template -static tbb_unique_ptr make_unique(const size_t size) { - T* ptr = (T*) scalable_malloc(sizeof(T) * size); +template +static tbb_unique_ptr make_unique(const size_t size) +{ + T *ptr = (T *)scalable_malloc(sizeof(T) * size); return tbb_unique_ptr(ptr, parallel::tbb_deleter()); } -} // namespace parallel -} // namespace mt_kahypar +} // namespace parallel +} // namespace mt_kahypar diff --git a/mt-kahypar/parallel/stl/scalable_vector.h b/mt-kahypar/parallel/stl/scalable_vector.h index b74807692..f438b8494 100644 --- a/mt-kahypar/parallel/stl/scalable_vector.h +++ b/mt-kahypar/parallel/stl/scalable_vector.h @@ -37,123 +37,71 @@ namespace mt_kahypar { -template -using vec = std::vector >; // shorter name +template +using vec = std::vector >; // shorter name namespace parallel { template using scalable_vector = std::vector >; -template -static inline void free(scalable_vector& vec) { +template +static inline void free(scalable_vector &vec) +{ scalable_vector tmp_vec; vec = std::move(tmp_vec); } -template -MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE static void parallel_free(scalable_vector>& vec) { - tbb::parallel_for(UL(0), vec.size(), [&](const size_t i) { - free(vec[i]); - }); +template +MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE static void +parallel_free(scalable_vector > &vec) +{ + tbb::parallel_for(UL(0), vec.size(), [&](const size_t i) { free(vec[i]); }); } -template -MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE static void parallel_free(scalable_vector& vec1, - scalable_vector& vec2) { - tbb::parallel_invoke([&] { - free(vec1); - }, [&] { - free(vec2); - }); +template +MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE static void parallel_free(scalable_vector &vec1, + scalable_vector &vec2) +{ + tbb::parallel_invoke([&] { free(vec1); }, [&] { free(vec2); }); } -template -MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE static void parallel_free(scalable_vector& vec1, - scalable_vector& vec2, - scalable_vector& vec3) { - tbb::parallel_invoke([&] { - free(vec1); - }, [&] { - free(vec2); - }, [&] { - free(vec3); - }); +template +MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE static void parallel_free(scalable_vector &vec1, + scalable_vector &vec2, + scalable_vector &vec3) +{ + tbb::parallel_invoke([&] { free(vec1); }, [&] { free(vec2); }, [&] { free(vec3); }); } -template -MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE static void parallel_free(scalable_vector& vec1, - scalable_vector& vec2, - scalable_vector& vec3, - scalable_vector& vec4) { - tbb::parallel_invoke([&] { - free(vec1); - }, [&] { - free(vec2); - }, [&] { - free(vec3); - }, [&] { - free(vec4); - }); +template +MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE static void +parallel_free(scalable_vector &vec1, scalable_vector &vec2, + scalable_vector &vec3, scalable_vector &vec4) +{ + tbb::parallel_invoke([&] { free(vec1); }, [&] { free(vec2); }, [&] { free(vec3); }, + [&] { free(vec4); }); } - -template -MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE static void parallel_free(scalable_vector& vec1, - scalable_vector& vec2, - scalable_vector& vec3, - scalable_vector& vec4, - scalable_vector& vec5) { - tbb::parallel_invoke([&] { - free(vec1); - }, [&] { - free(vec2); - }, [&] { - free(vec3); - }, [&] { - free(vec4); - }, [&] { - free(vec5); - }); +template +MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE static void +parallel_free(scalable_vector &vec1, scalable_vector &vec2, + scalable_vector &vec3, scalable_vector &vec4, + scalable_vector &vec5) +{ + tbb::parallel_invoke([&] { free(vec1); }, [&] { free(vec2); }, [&] { free(vec3); }, + [&] { free(vec4); }, [&] { free(vec5); }); } -template -MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE static void parallel_free(scalable_vector& vec1, - scalable_vector& vec2, - scalable_vector& vec3, - scalable_vector& vec4, - scalable_vector& vec5, - scalable_vector& vec6) { - tbb::parallel_invoke([&] { - free(vec1); - }, [&] { - free(vec2); - }, [&] { - free(vec3); - }, [&] { - free(vec4); - }, [&] { - free(vec5); - }, [&] { - free(vec6); - }); +template +MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE static void +parallel_free(scalable_vector &vec1, scalable_vector &vec2, + scalable_vector &vec3, scalable_vector &vec4, + scalable_vector &vec5, scalable_vector &vec6) +{ + tbb::parallel_invoke([&] { free(vec1); }, [&] { free(vec2); }, [&] { free(vec3); }, + [&] { free(vec4); }, [&] { free(vec5); }, [&] { free(vec6); }); } -} // namespace parallel - +} // namespace parallel -} // namespace mt_kahypar +} // namespace mt_kahypar diff --git a/mt-kahypar/parallel/stl/thread_locals.h b/mt-kahypar/parallel/stl/thread_locals.h index 5518196f4..c5ad9787b 100644 --- a/mt-kahypar/parallel/stl/thread_locals.h +++ b/mt-kahypar/parallel/stl/thread_locals.h @@ -32,38 +32,43 @@ namespace mt_kahypar { - namespace internals { - template - using ThreadLocal = tbb::enumerable_thread_specific; +namespace internals { +template +using ThreadLocal = tbb::enumerable_thread_specific; - template - struct ThreadLocalFree { - using RangeType = typename ThreadLocal::range_type; - using Iterator = typename ThreadLocal::iterator; +template +struct ThreadLocalFree +{ + using RangeType = typename ThreadLocal::range_type; + using Iterator = typename ThreadLocal::iterator; - explicit ThreadLocalFree(F&& free_func) : - _free_func(free_func) { } + explicit ThreadLocalFree(F &&free_func) : _free_func(free_func) {} - void operator()(RangeType& range) const { - for ( Iterator it = range.begin(); it < range.end(); ++it ) { - _free_func(*it); - } - } - - F _free_func; - }; - } // namespace - - namespace parallel { - template - static void parallel_free_thread_local_internal_data(internals::ThreadLocal& local, - F&& free_func) { - internals::ThreadLocalFree thread_local_free(std::move(free_func)); - tbb::parallel_for(local.range(), thread_local_free); + void operator()(RangeType &range) const + { + for(Iterator it = range.begin(); it < range.end(); ++it) + { + _free_func(*it); } } - template - using tls_enumerable_thread_specific = tbb::enumerable_thread_specific, tbb::ets_key_per_instance>; + F _free_func; +}; +} // namespace + +namespace parallel { +template +static void parallel_free_thread_local_internal_data(internals::ThreadLocal &local, + F &&free_func) +{ + internals::ThreadLocalFree thread_local_free(std::move(free_func)); + tbb::parallel_for(local.range(), thread_local_free); +} +} + +template +using tls_enumerable_thread_specific = + tbb::enumerable_thread_specific, + tbb::ets_key_per_instance>; } \ No newline at end of file diff --git a/mt-kahypar/parallel/stl/zero_allocator.h b/mt-kahypar/parallel/stl/zero_allocator.h index 56cfa40fe..1878f27ea 100644 --- a/mt-kahypar/parallel/stl/zero_allocator.h +++ b/mt-kahypar/parallel/stl/zero_allocator.h @@ -32,22 +32,26 @@ namespace mt_kahypar { namespace parallel { template -class zero_allocator : public tbb::tbb_allocator { - public: +class zero_allocator : public tbb::tbb_allocator +{ +public: using value_type = T; using propagate_on_container_move_assignment = std::true_type; using is_always_equal = std::true_type; zero_allocator() = default; template - explicit zero_allocator(const U&) noexcept {} + explicit zero_allocator(const U &) noexcept + { + } - T* allocate(std::size_t n) { - T* ptr = tbb::tbb_allocator::allocate(n); - std::memset(static_cast(ptr), 0, n * sizeof(value_type)); + T *allocate(std::size_t n) + { + T *ptr = tbb::tbb_allocator::allocate(n); + std::memset(static_cast(ptr), 0, n * sizeof(value_type)); return ptr; } }; -} // namespace parallel -} // namespace mt_kahypar +} // namespace parallel +} // namespace mt_kahypar diff --git a/mt-kahypar/parallel/tbb_initializer.h b/mt-kahypar/parallel/tbb_initializer.h index d0e50d484..72f485533 100644 --- a/mt-kahypar/parallel/tbb_initializer.h +++ b/mt-kahypar/parallel/tbb_initializer.h @@ -27,15 +27,15 @@ #pragma once +#include #include -#include #include +#include #include -#include +#include "tbb/global_control.h" #include "tbb/task_arena.h" #include "tbb/task_group.h" -#include "tbb/global_control.h" #include "mt-kahypar/macros.h" #include "mt-kahypar/parallel/thread_pinning_observer.h" @@ -48,62 +48,66 @@ namespace parallel { * on specific NUMA node. */ template -class TBBInitializer { +class TBBInitializer +{ static constexpr bool debug = false; using Self = TBBInitializer; using ThreadPinningObserver = mt_kahypar::parallel::ThreadPinningObserver; - public: - TBBInitializer(const TBBInitializer&) = delete; - TBBInitializer & operator= (const TBBInitializer &) = delete; +public: + TBBInitializer(const TBBInitializer &) = delete; + TBBInitializer &operator=(const TBBInitializer &) = delete; - TBBInitializer(TBBInitializer&&) = delete; - TBBInitializer & operator= (TBBInitializer &&) = delete; + TBBInitializer(TBBInitializer &&) = delete; + TBBInitializer &operator=(TBBInitializer &&) = delete; - static TBBInitializer& instance(const size_t num_threads = std::thread::hardware_concurrency()) { + static TBBInitializer & + instance(const size_t num_threads = std::thread::hardware_concurrency()) + { static TBBInitializer instance(num_threads); return instance; } - int total_number_of_threads() const { - return _num_threads; - } + int total_number_of_threads() const { return _num_threads; } - int number_of_used_cpus_on_numa_node(const int node) const { + int number_of_used_cpus_on_numa_node(const int node) const + { ASSERT(static_cast(node) < _numa_node_to_cpu_id.size()); return _numa_node_to_cpu_id[node].size(); } - int num_used_numa_nodes() const { - return _numa_node_to_cpu_id.size(); - } + int num_used_numa_nodes() const { return _numa_node_to_cpu_id.size(); } - hwloc_cpuset_t used_cpuset() const { + hwloc_cpuset_t used_cpuset() const + { hwloc_cpuset_t cpuset = hwloc_bitmap_alloc(); - for ( const auto& numa_node : _numa_node_to_cpu_id ) { - for ( const int cpu_id : numa_node ) { + for(const auto &numa_node : _numa_node_to_cpu_id) + { + for(const int cpu_id : numa_node) + { hwloc_bitmap_set(cpuset, cpu_id); } } return cpuset; } - void terminate() { - if ( _global_observer ) { + void terminate() + { + if(_global_observer) + { _global_observer->observe(false); } } - private: +private: explicit TBBInitializer(const int num_threads) : - _num_threads(num_threads), - _gc(tbb::global_control::max_allowed_parallelism, num_threads), - _global_observer(nullptr), - _cpus(), - _numa_node_to_cpu_id() { - HwTopology& topology = HwTopology::instance(); + _num_threads(num_threads), + _gc(tbb::global_control::max_allowed_parallelism, num_threads), + _global_observer(nullptr), _cpus(), _numa_node_to_cpu_id() + { + HwTopology &topology = HwTopology::instance(); int num_numa_nodes = topology.num_numa_nodes(); DBG << "Initialize TBB with" << num_threads << "threads"; _cpus = topology.get_all_cpus(); @@ -112,29 +116,32 @@ class TBBInitializer { // 2.) Increasing order of numa node // 3.) Increasing order of cpu id // ... - std::sort(_cpus.begin(), _cpus.end(), - [&](const int& lhs, const int& rhs) { - int node_lhs = topology.numa_node_of_cpu(lhs); - int node_rhs = topology.numa_node_of_cpu(rhs); - bool is_hyperthread_lhs = topology.is_hyperthread(lhs); - bool is_hyperthread_rhs = topology.is_hyperthread(rhs); - return is_hyperthread_lhs < is_hyperthread_rhs || - (is_hyperthread_lhs == is_hyperthread_rhs && node_lhs < node_rhs) || - (is_hyperthread_lhs == is_hyperthread_rhs && node_lhs == node_rhs && lhs < rhs); - }); + std::sort(_cpus.begin(), _cpus.end(), [&](const int &lhs, const int &rhs) { + int node_lhs = topology.numa_node_of_cpu(lhs); + int node_rhs = topology.numa_node_of_cpu(rhs); + bool is_hyperthread_lhs = topology.is_hyperthread(lhs); + bool is_hyperthread_rhs = topology.is_hyperthread(rhs); + return is_hyperthread_lhs < is_hyperthread_rhs || + (is_hyperthread_lhs == is_hyperthread_rhs && node_lhs < node_rhs) || + (is_hyperthread_lhs == is_hyperthread_rhs && node_lhs == node_rhs && + lhs < rhs); + }); // ... this ensure that we first pop nodes in hyperthreading - while (static_cast(_cpus.size()) > _num_threads) { + while(static_cast(_cpus.size()) > _num_threads) + { _cpus.pop_back(); } _global_observer = std::make_unique(_cpus); _numa_node_to_cpu_id.resize(num_numa_nodes); - for ( const int cpu_id : _cpus ) { + for(const int cpu_id : _cpus) + { int node = topology.numa_node_of_cpu(cpu_id); ASSERT(node < static_cast(_numa_node_to_cpu_id.size())); _numa_node_to_cpu_id[node].push_back(cpu_id); } - while( !_numa_node_to_cpu_id.empty() && _numa_node_to_cpu_id.back().empty() ) { + while(!_numa_node_to_cpu_id.empty() && _numa_node_to_cpu_id.back().empty()) + { _numa_node_to_cpu_id.pop_back(); } } @@ -143,7 +150,7 @@ class TBBInitializer { tbb::global_control _gc; std::unique_ptr _global_observer; std::vector _cpus; - std::vector> _numa_node_to_cpu_id; + std::vector > _numa_node_to_cpu_id; }; -} // namespace parallel -} // namespace mt_kahypar +} // namespace parallel +} // namespace mt_kahypar diff --git a/mt-kahypar/parallel/thread_pinning_observer.h b/mt-kahypar/parallel/thread_pinning_observer.h index ce30572b1..3da3a72e8 100644 --- a/mt-kahypar/parallel/thread_pinning_observer.h +++ b/mt-kahypar/parallel/thread_pinning_observer.h @@ -27,10 +27,10 @@ #pragma once -#include -#include #include #include +#include +#include #ifdef __linux__ #include @@ -44,182 +44,195 @@ #undef __TBB_ARENA_OBSERVER #include "mt-kahypar/macros.h" -#include "mt-kahypar/utils/randomize.h" #include "mt-kahypar/utils/exception.h" +#include "mt-kahypar/utils/randomize.h" namespace mt_kahypar { namespace parallel { template -class ThreadPinningObserver : public tbb::task_scheduler_observer { +class ThreadPinningObserver : public tbb::task_scheduler_observer +{ using Base = tbb::task_scheduler_observer; static constexpr bool debug = false; - public: - +public: // Observer is pinned to a task arena and is responsible for pinning // joining threads to the corresponding numa node. - explicit ThreadPinningObserver(tbb::task_arena& arena, - const int numa_node, - const std::vector& cpus) : - Base(arena), - _num_cpus(HwTopology::instance().num_cpus()), - _numa_node(numa_node), - _is_global_thread_pool(false), - _cpus(cpus), - _mutex(), - _cpu_before() { - ASSERT(cpus.size() > 0 && cpus.size() == static_cast(arena.max_concurrency()), - V(cpus.size()) << V(arena.max_concurrency())); - - // In case we have a task arena of size one, it can sometimes happen that master threads - // joins the arena regardless if there are an other worker thread already inside. - // In order to have enough CPU available for this special case, we request a backup - // CPU from the hardware topology here. - if ( _cpus.size() == 1 ) { - _cpus.push_back(HwTopology::instance().get_backup_cpu(_numa_node, _cpus[0])); + explicit ThreadPinningObserver (tbb::task_arena &arena, const int numa_node, + const std::vector &cpus) : + Base (arena), + _num_cpus (HwTopology::instance ().num_cpus ()), _numa_node (numa_node), + _is_global_thread_pool (false), _cpus (cpus), _mutex (), _cpu_before () + { + ASSERT (cpus.size () > 0 && + cpus.size () == static_cast (arena.max_concurrency ()), + V (cpus.size ()) << V (arena.max_concurrency ())); + + // In case we have a task arena of size one, it can sometimes happen that master + // threads joins the arena regardless if there are an other worker thread already + // inside. In order to have enough CPU available for this special case, we request a + // backup CPU from the hardware topology here. + if (_cpus.size () == 1) + { + _cpus.push_back (HwTopology::instance ().get_backup_cpu (_numa_node, _cpus[0])); } - #if defined(KAHYPAR_ENABLE_THREAD_PINNING) and not defined(__APPLE__) - #ifndef MT_KAHYPAR_LIBRARY_MODE - observe(true); // Enable thread pinning - #endif - #endif +#if defined(KAHYPAR_ENABLE_THREAD_PINNING) and not defined(__APPLE__) +#ifndef MT_KAHYPAR_LIBRARY_MODE + observe (true); // Enable thread pinning +#endif +#endif } // Observer is pinned to the global task arena and is reponsible for // pinning threads to unique CPU id. - explicit ThreadPinningObserver(const std::vector& cpus) : - Base(), - _num_cpus(HwTopology::instance().num_cpus()), - _numa_node(-1), - _is_global_thread_pool(true), - _cpus(cpus), - _mutex(), - _cpu_before() { - if ( _cpus.size() == 1 ) { - _cpus.push_back(HwTopology::instance().get_backup_cpu(0, _cpus[0])); + explicit ThreadPinningObserver (const std::vector &cpus) : + Base (), _num_cpus (HwTopology::instance ().num_cpus ()), _numa_node (-1), + _is_global_thread_pool (true), _cpus (cpus), _mutex (), _cpu_before () + { + if (_cpus.size () == 1) + { + _cpus.push_back (HwTopology::instance ().get_backup_cpu (0, _cpus[0])); } - #ifdef KAHYPAR_ENABLE_THREAD_PINNING - #ifndef MT_KAHYPAR_LIBRARY_MODE - observe(true); // Enable thread pinning - #endif - #endif +#ifdef KAHYPAR_ENABLE_THREAD_PINNING +#ifndef MT_KAHYPAR_LIBRARY_MODE + observe (true); // Enable thread pinning +#endif +#endif } + ThreadPinningObserver (const ThreadPinningObserver &) = delete; + ThreadPinningObserver &operator= (const ThreadPinningObserver &) = delete; - ThreadPinningObserver(const ThreadPinningObserver&) = delete; - ThreadPinningObserver & operator= (const ThreadPinningObserver &) = delete; - - ThreadPinningObserver(ThreadPinningObserver&& other) : - _num_cpus(other._num_cpus), - _numa_node(other._numa_node), - _is_global_thread_pool(other._is_global_thread_pool), - _cpus(other._cpus), - _mutex(), - _cpu_before(std::move(other._cpu_before)) { } + ThreadPinningObserver (ThreadPinningObserver &&other) : + _num_cpus (other._num_cpus), _numa_node (other._numa_node), + _is_global_thread_pool (other._is_global_thread_pool), _cpus (other._cpus), + _mutex (), _cpu_before (std::move (other._cpu_before)) + { + } - ThreadPinningObserver & operator= (ThreadPinningObserver &&) = delete; + ThreadPinningObserver &operator= (ThreadPinningObserver &&) = delete; - void on_scheduler_entry(bool) override { - const int slot = tbb::this_task_arena::current_thread_index(); - ASSERT(static_cast(slot) < _cpus.size(), V(slot) << V(_cpus.size())); + void on_scheduler_entry (bool) override + { + const int slot = tbb::this_task_arena::current_thread_index (); + ASSERT (static_cast (slot) < _cpus.size (), V (slot) << V (_cpus.size ())); - if ( slot >= static_cast(_cpus.size()) ) { + if (slot >= static_cast (_cpus.size ())) + { std::stringstream thread_id; - thread_id << std::this_thread::get_id(); - throw SystemException( - "Thread " + thread_id.str() + " entered the global task arena " - "in a slot that should not exist (Slot = " + std::to_string(slot) + ", Max Slots = " + std::to_string(_cpus.size()) + - ", slots are 0-indexed). This bug only occurs in older versions of TBB. " - "We recommend upgrading TBB to the newest version."); + thread_id << std::this_thread::get_id (); + throw SystemException ( + "Thread " + thread_id.str () + + " entered the global task arena " + "in a slot that should not exist (Slot = " + + std::to_string (slot) + ", Max Slots = " + std::to_string (_cpus.size ()) + + ", slots are 0-indexed). This bug only occurs in older versions of TBB. " + "We recommend upgrading TBB to the newest version."); } - DBG << pin_thread_message(_cpus[slot]); - if(!_is_global_thread_pool) { - std::thread::id thread_id = std::this_thread::get_id(); + DBG << pin_thread_message (_cpus[slot]); + if (!_is_global_thread_pool) + { + std::thread::id thread_id = std::this_thread::get_id (); int current_cpu = THREAD_ID; - std::lock_guard lock(_mutex); + std::lock_guard lock (_mutex); _cpu_before[thread_id] = current_cpu; } - pin_thread_to_cpu(_cpus[slot]); + pin_thread_to_cpu (_cpus[slot]); } - void on_scheduler_exit(bool) override { - if (!_is_global_thread_pool) { - std::thread::id thread_id = std::this_thread::get_id(); + void on_scheduler_exit (bool) override + { + if (!_is_global_thread_pool) + { + std::thread::id thread_id = std::this_thread::get_id (); int cpu_before = -1; { - std::lock_guard lock(_mutex); - DBG << unpin_thread_message(); - auto it = _cpu_before.find(thread_id); - if ( it != _cpu_before.end() ) { + std::lock_guard lock (_mutex); + DBG << unpin_thread_message (); + auto it = _cpu_before.find (thread_id); + if (it != _cpu_before.end ()) + { cpu_before = it->second; - _cpu_before.erase(thread_id); + _cpu_before.erase (thread_id); } } - if ( cpu_before != -1 ) { - pin_thread_to_cpu(cpu_before); + if (cpu_before != -1) + { + pin_thread_to_cpu (cpu_before); } - } else { - DBG << "Thread with PID" << std::this_thread::get_id() - << "leaves GLOBAL task arena"; + } + else + { + DBG << "Thread with PID" << std::this_thread::get_id () + << "leaves GLOBAL task arena"; } } - private: - - void pin_thread_to_cpu(const int cpu_id) { - #ifndef __APPLE__ - #if __linux__ - const size_t size = CPU_ALLOC_SIZE(_num_cpus); +private: + void pin_thread_to_cpu (const int cpu_id) + { +#ifndef __APPLE__ +#if __linux__ + const size_t size = CPU_ALLOC_SIZE (_num_cpus); cpu_set_t mask; - CPU_ZERO(&mask); - CPU_SET(cpu_id, &mask); - const int err = sched_setaffinity(0, size, &mask); - #elif _WIN32 - auto mask = (static_cast(1) << cpu_id); - const int err = SetThreadAffinityMask(GetCurrentThread(), mask) == 0; - #endif - - if (err) { + CPU_ZERO (&mask); + CPU_SET (cpu_id, &mask); + const int err = sched_setaffinity (0, size, &mask); +#elif _WIN32 + auto mask = (static_cast (1) << cpu_id); + const int err = SetThreadAffinityMask (GetCurrentThread (), mask) == 0; +#endif + + if (err) + { const int error = errno; - throw SystemException( - "Failed to set thread affinity to cpu" + std::to_string(cpu_id) + "." + strerror(error)); + throw SystemException ("Failed to set thread affinity to cpu" + + std::to_string (cpu_id) + "." + strerror (error)); } - ASSERT(THREAD_ID == cpu_id); - DBG << "Thread with PID" << std::this_thread::get_id() + ASSERT (THREAD_ID == cpu_id); + DBG << "Thread with PID" << std::this_thread::get_id () << "successfully pinned to CPU" << cpu_id; - #endif +#endif } - std::string pin_thread_message(const int cpu_id) { + std::string pin_thread_message (const int cpu_id) + { std::stringstream ss; - ss << "Assign thread with PID " << std::this_thread::get_id() - << " to CPU " << cpu_id; - if ( _numa_node != -1 ) { + ss << "Assign thread with PID " << std::this_thread::get_id () << " to CPU " + << cpu_id; + if (_numa_node != -1) + { ss << " on NUMA node " << _numa_node; - } else { + } + else + { ss << " in GLOBAL task arena"; } - return ss.str(); + return ss.str (); } - std::string unpin_thread_message() { + std::string unpin_thread_message () + { std::stringstream ss; - ss << "Unassign thread with PID " << std::this_thread::get_id() - << " on CPU " << THREAD_ID; - if ( _numa_node != -1 ) { + ss << "Unassign thread with PID " << std::this_thread::get_id () << " on CPU " + << THREAD_ID; + if (_numa_node != -1) + { ss << " from NUMA node " << _numa_node; - } else { + } + else + { ss << " from GLOBAL task arena"; } - ss << " (Assign thread to its last CPU: " - << _cpu_before[std::this_thread::get_id()] << ")"; - return ss.str(); + ss << " (Assign thread to its last CPU: " << _cpu_before[std::this_thread::get_id ()] + << ")"; + return ss.str (); } const int _num_cpus; @@ -230,5 +243,5 @@ class ThreadPinningObserver : public tbb::task_scheduler_observer { std::mutex _mutex; std::unordered_map _cpu_before; }; -} // namespace parallel -} // namespace mt_kahypar +} // namespace parallel +} // namespace mt_kahypar diff --git a/mt-kahypar/parallel/work_stack.h b/mt-kahypar/parallel/work_stack.h index 7c5ea04d8..8c1cf5539 100644 --- a/mt-kahypar/parallel/work_stack.h +++ b/mt-kahypar/parallel/work_stack.h @@ -36,24 +36,29 @@ namespace mt_kahypar { -template -struct ThreadQueue { +template +struct ThreadQueue +{ vec elements; CAtomic front; - ThreadQueue() { + ThreadQueue() + { elements.reserve(1 << 13); front.store(0); } - void clear() { + void clear() + { elements.clear(); front.store(0); } - bool try_pop(T& dest) { + bool try_pop(T &dest) + { size_t slot = front.fetch_add(1, std::memory_order_acq_rel); - if (slot < elements.size()) { + if(slot < elements.size()) + { dest = elements[slot]; return true; } @@ -61,72 +66,87 @@ struct ThreadQueue { } }; -template -struct WorkContainer { +template +struct WorkContainer +{ - WorkContainer(size_t maxNumThreads = 0) : - tls_queues(maxNumThreads) { } + WorkContainer(size_t maxNumThreads = 0) : tls_queues(maxNumThreads) {} - size_t unsafe_size() const { + size_t unsafe_size() const + { size_t sz = 0; - for (const ThreadQueue& q : tls_queues) { + for(const ThreadQueue &q : tls_queues) + { sz += q.elements.size() - q.front.load(std::memory_order_relaxed); } return sz; } // assumes that no thread is currently calling try_pop - void safe_push(const T el, size_t thread_id) { + void safe_push(const T el, size_t thread_id) + { ASSERT(thread_id < tls_queues.size()); tls_queues[thread_id].elements.push_back(el); ASSERT(tls_queues[thread_id].front.load() == 0); } - bool try_pop(T& dest, size_t thread_id) { + bool try_pop(T &dest, size_t thread_id) + { ASSERT(thread_id < tls_queues.size()); return tls_queues[thread_id].try_pop(dest) || steal_work(dest); } - bool steal_work(T& dest) { - for (ThreadQueue& q : tls_queues) { - if (q.try_pop(dest)) { + bool steal_work(T &dest) + { + for(ThreadQueue &q : tls_queues) + { + if(q.try_pop(dest)) + { return true; } } return false; } - void shuffle() { - tbb::parallel_for_each(tls_queues, [&](ThreadQueue& q) { + void shuffle() + { + tbb::parallel_for_each(tls_queues, [&](ThreadQueue &q) { utils::Randomize::instance().shuffleVector(q.elements); }); } - void clear() { - for (ThreadQueue& q : tls_queues) { + void clear() + { + for(ThreadQueue &q : tls_queues) + { q.clear(); } } - vec> tls_queues; + vec > tls_queues; - using SubRange = IteratorRange< typename vec::const_iterator >; + using SubRange = IteratorRange::const_iterator>; using Range = ConcatenatedRange; - Range safely_inserted_range() const { + Range safely_inserted_range() const + { Range r; - for (const ThreadQueue& q : tls_queues) { - r.concat( SubRange(q.elements.cbegin(), q.elements.cend()) ); + for(const ThreadQueue &q : tls_queues) + { + r.concat(SubRange(q.elements.cbegin(), q.elements.cend())); } return r; } - void memoryConsumption(utils::MemoryTreeNode* parent) const { + void memoryConsumption(utils::MemoryTreeNode *parent) const + { ASSERT(parent); - utils::MemoryTreeNode* work_container_node = parent->addChild("Work Container"); - utils::MemoryTreeNode* local_work_queue_node = work_container_node->addChild("Local Work Queue"); - for (const ThreadQueue& q : tls_queues) { + utils::MemoryTreeNode *work_container_node = parent->addChild("Work Container"); + utils::MemoryTreeNode *local_work_queue_node = + work_container_node->addChild("Local Work Queue"); + for(const ThreadQueue &q : tls_queues) + { local_work_queue_node->updateSize(q.elements.capacity() * sizeof(T)); } } diff --git a/mt-kahypar/partition/coarsening/coarsening_commons.h b/mt-kahypar/partition/coarsening/coarsening_commons.h index 26b6e4d16..8935487c2 100644 --- a/mt-kahypar/partition/coarsening/coarsening_commons.h +++ b/mt-kahypar/partition/coarsening/coarsening_commons.h @@ -33,44 +33,39 @@ namespace mt_kahypar { -template -class Level { +template +class Level +{ using Hypergraph = typename TypeTraits::Hypergraph; public: - explicit Level(Hypergraph&& contracted_hypergraph, - parallel::scalable_vector&& communities, + explicit Level(Hypergraph &&contracted_hypergraph, + parallel::scalable_vector &&communities, double coarsening_time) : - _contracted_hypergraph(std::move(contracted_hypergraph)), - _communities(std::move(communities)), - _coarsening_time(coarsening_time) { } - - Hypergraph& contractedHypergraph() { - return _contracted_hypergraph; + _contracted_hypergraph(std::move(contracted_hypergraph)), + _communities(std::move(communities)), _coarsening_time(coarsening_time) + { } - const Hypergraph& contractedHypergraph() const { - return _contracted_hypergraph; - } + Hypergraph &contractedHypergraph() { return _contracted_hypergraph; } + + const Hypergraph &contractedHypergraph() const { return _contracted_hypergraph; } // ! Maps a global vertex id of the representative hypergraph // ! to its global vertex id in the contracted hypergraph - HypernodeID mapToContractedHypergraph(const HypernodeID hn) const { + HypernodeID mapToContractedHypergraph(const HypernodeID hn) const + { ASSERT(hn < _communities.size()); return _communities[hn]; } - double coarseningTime() const { - return _coarsening_time; - } + double coarseningTime() const { return _coarsening_time; } - void freeInternalData() { - tbb::parallel_invoke([&] { - _contracted_hypergraph.freeInternalData(); - }, [&] { - parallel::free(_communities); - }); + void freeInternalData() + { + tbb::parallel_invoke([&] { _contracted_hypergraph.freeInternalData(); }, + [&] { parallel::free(_communities); }); } private: @@ -84,8 +79,9 @@ class Level { double _coarsening_time; }; -template -class UncoarseningData { +template +class UncoarseningData +{ using Hypergraph = typename TypeTraits::Hypergraph; using HypergraphFactory = typename Hypergraph::Factory; @@ -93,68 +89,88 @@ class UncoarseningData { using ParallelHyperedge = typename Hypergraph::ParallelHyperedge; public: - explicit UncoarseningData(bool n_level, Hypergraph& hg, const Context& context) : - nlevel(n_level), - _hg(hg), - _context(context) { - if (n_level) { - compactified_hg = std::make_unique(); - compactified_phg = std::make_unique(); - } else { - size_t estimated_number_of_levels = UL(1); - if ( hg.initialNumNodes() > context.coarsening.contraction_limit ) { - estimated_number_of_levels = std::ceil( std::log2( - static_cast(hg.initialNumNodes()) / - static_cast(context.coarsening.contraction_limit)) / - std::log2(context.coarsening.maximum_shrink_factor) ) + UL(1); - } - hierarchy.reserve(estimated_number_of_levels); + explicit UncoarseningData(bool n_level, Hypergraph &hg, const Context &context) : + nlevel(n_level), _hg(hg), _context(context) + { + if(n_level) + { + compactified_hg = std::make_unique(); + compactified_phg = std::make_unique(); + } + else + { + size_t estimated_number_of_levels = UL(1); + if(hg.initialNumNodes() > context.coarsening.contraction_limit) + { + estimated_number_of_levels = + std::ceil( + std::log2(static_cast(hg.initialNumNodes()) / + static_cast(context.coarsening.contraction_limit)) / + std::log2(context.coarsening.maximum_shrink_factor)) + + UL(1); } - is_phg_initialized = false; - partitioned_hg = std::make_unique(); + hierarchy.reserve(estimated_number_of_levels); } + is_phg_initialized = false; + partitioned_hg = std::make_unique(); + } - ~UncoarseningData() noexcept { - tbb::parallel_for(UL(0), hierarchy.size(), [&](const size_t i) { - (hierarchy)[i].freeInternalData(); - }, tbb::static_partitioner()); + ~UncoarseningData() noexcept + { + tbb::parallel_for( + UL(0), hierarchy.size(), + [&](const size_t i) { (hierarchy)[i].freeInternalData(); }, + tbb::static_partitioner()); } - void setPartitionedHypergraph(PartitionedHypergraph&& phg) { + void setPartitionedHypergraph(PartitionedHypergraph &&phg) + { ASSERT(!is_phg_initialized); partitioned_hg = std::make_unique(std::move(phg)); is_phg_initialized = true; } - void finalizeCoarsening() { - utils::Timer& timer = utils::Utilities::instance().getTimer(_context.utility_id); - if (nlevel) { + void finalizeCoarsening() + { + utils::Timer &timer = utils::Utilities::instance().getTimer(_context.utility_id); + if(nlevel) + { // Create compactified hypergraph containing only enabled vertices and hyperedges // with consecutive IDs => Less complexity in initial partitioning. timer.start_timer("compactify_hypergraph", "Compactify Hypergraph"); auto compactification = HypergraphFactory::compactify(_hg); *compactified_hg = std::move(compactification.first); compactified_hn_mapping = std::move(compactification.second); - *compactified_phg = PartitionedHypergraph(_context.partition.k, *compactified_hg, parallel_tag_t()); + *compactified_phg = + PartitionedHypergraph(_context.partition.k, *compactified_hg, parallel_tag_t()); timer.stop_timer("compactify_hypergraph"); - } else { + } + else + { timer.start_timer("finalize_multilevel_hierarchy", "Finalize Multilevel Hierarchy"); // Free memory of temporary contraction buffer and // release coarsening memory in memory pool - if (!hierarchy.empty()) { + if(!hierarchy.empty()) + { hierarchy.back().contractedHypergraph().freeTmpContractionBuffer(); - } else { + } + else + { _hg.freeTmpContractionBuffer(); } - if (_context.type == ContextType::main) { + if(_context.type == ContextType::main) + { parallel::MemoryPool::instance().release_mem_group("Coarsening"); } // Construct partitioned hypergraph for initial partitioning - if ( !is_phg_initialized ) { - *partitioned_hg = PartitionedHypergraph(_context.partition.k, _hg, parallel_tag_t()); + if(!is_phg_initialized) + { + *partitioned_hg = + PartitionedHypergraph(_context.partition.k, _hg, parallel_tag_t()); } - if (!hierarchy.empty()) { + if(!hierarchy.empty()) + { partitioned_hg->setHypergraph(hierarchy.back().contractedHypergraph()); } is_phg_initialized = true; @@ -163,28 +179,36 @@ class UncoarseningData { is_finalized = true; } - void performMultilevelContraction( - parallel::scalable_vector&& communities, bool deterministic, - const HighResClockTimepoint& round_start) { + void performMultilevelContraction(parallel::scalable_vector &&communities, + bool deterministic, + const HighResClockTimepoint &round_start) + { ASSERT(!is_finalized); - Hypergraph& current_hg = hierarchy.empty() ? _hg : hierarchy.back().contractedHypergraph(); + Hypergraph ¤t_hg = + hierarchy.empty() ? _hg : hierarchy.back().contractedHypergraph(); ASSERT(current_hg.initialNumNodes() == communities.size()); Hypergraph contracted_hg = current_hg.contract(communities, deterministic); const HighResClockTimepoint round_end = std::chrono::high_resolution_clock::now(); - const double elapsed_time = std::chrono::duration(round_end - round_start).count(); - hierarchy.emplace_back(std::move(contracted_hg), std::move(communities), elapsed_time); + const double elapsed_time = + std::chrono::duration(round_end - round_start).count(); + hierarchy.emplace_back(std::move(contracted_hg), std::move(communities), + elapsed_time); } - PartitionedHypergraph& coarsestPartitionedHypergraph() { - if (nlevel) { + PartitionedHypergraph &coarsestPartitionedHypergraph() + { + if(nlevel) + { return *compactified_phg; - } else { + } + else + { return *partitioned_hg; } } // Multilevel Data - vec> hierarchy; + vec > hierarchy; // NLevel Data // ! Once coarsening terminates we generate a compactified hypergraph @@ -197,7 +221,7 @@ class UncoarseningData { // ! Compactified partitioned hypergraph std::unique_ptr compactified_phg; // ! Contains timings how long a coarsening pass takes for each round - vec> removed_hyperedges_batches; + vec > removed_hyperedges_batches; // ! Removed single-pin and parallel nets. // ! All hyperedges that are contained in one vector must be restored once // ! we completly processed a vector of batches. @@ -210,22 +234,24 @@ class UncoarseningData { bool nlevel; private: - Hypergraph& _hg; - const Context& _context; + Hypergraph &_hg; + const Context &_context; }; typedef struct uncoarsening_data_s uncoarsening_data_t; namespace uncoarsening { - template - uncoarsening_data_t* to_pointer(UncoarseningData& ip_data) { - return reinterpret_cast(&ip_data); - } +template +uncoarsening_data_t *to_pointer(UncoarseningData &ip_data) +{ + return reinterpret_cast(&ip_data); +} - template - UncoarseningData& to_reference(uncoarsening_data_t* ptr) { - return *reinterpret_cast*>(ptr); - } +template +UncoarseningData &to_reference(uncoarsening_data_t *ptr) +{ + return *reinterpret_cast *>(ptr); +} } } diff --git a/mt-kahypar/partition/coarsening/deterministic_multilevel_coarsener.cpp b/mt-kahypar/partition/coarsening/deterministic_multilevel_coarsener.cpp index a627db6ca..f9f0a0240 100644 --- a/mt-kahypar/partition/coarsening/deterministic_multilevel_coarsener.cpp +++ b/mt-kahypar/partition/coarsening/deterministic_multilevel_coarsener.cpp @@ -32,13 +32,14 @@ namespace mt_kahypar { -template -bool DeterministicMultilevelCoarsener::coarseningPassImpl() { - auto& timer = utils::Utilities::instance().getTimer(_context.utility_id); +template +bool DeterministicMultilevelCoarsener::coarseningPassImpl() +{ + auto &timer = utils::Utilities::instance().getTimer(_context.utility_id); const auto pass_start_time = std::chrono::high_resolution_clock::now(); timer.start_timer("coarsening_pass", "Clustering"); - const Hypergraph& hg = Base::currentHypergraph(); + const Hypergraph &hg = Base::currentHypergraph(); size_t num_nodes = Base::currentNumNodes(); const double num_nodes_before_pass = num_nodes; vec clusters(num_nodes, kInvalidHypernode); @@ -49,36 +50,49 @@ bool DeterministicMultilevelCoarsener::coarseningPassImpl() { clusters[u] = u; }); - permutation.random_grouping(num_nodes, _context.shared_memory.static_balancing_work_packages, config.prng()); - for (size_t sub_round = 0; sub_round < config.num_sub_rounds && num_nodes > currentLevelContractionLimit(); ++sub_round) { + permutation.random_grouping( + num_nodes, _context.shared_memory.static_balancing_work_packages, config.prng()); + for(size_t sub_round = 0; + sub_round < config.num_sub_rounds && num_nodes > currentLevelContractionLimit(); + ++sub_round) + { auto [first_bucket, last_bucket] = parallel::chunking::bounds( - sub_round, config.num_buckets, config.num_buckets_per_sub_round); - size_t first = permutation.bucket_bounds[first_bucket], last = permutation.bucket_bounds[last_bucket]; + sub_round, config.num_buckets, config.num_buckets_per_sub_round); + size_t first = permutation.bucket_bounds[first_bucket], + last = permutation.bucket_bounds[last_bucket]; // each vertex finds a cluster it wants to join tbb::parallel_for(first, last, [&](size_t pos) { const HypernodeID u = permutation.at(pos); - if (cluster_weight[u] == hg.nodeWeight(u) && hg.nodeIsEnabled(u)) { + if(cluster_weight[u] == hg.nodeWeight(u) && hg.nodeIsEnabled(u)) + { calculatePreferredTargetCluster(u, clusters); } }); - tbb::enumerable_thread_specific num_contracted_nodes { 0 }; + tbb::enumerable_thread_specific num_contracted_nodes{ 0 }; // already approve if we can grant all requests for proposed cluster // otherwise insert to shared vector so that we can group vertices by cluster tbb::parallel_for(first, last, [&](size_t pos) { HypernodeID u = permutation.at(pos); HypernodeID target = propositions[u]; - if (target != u) { - if (opportunistic_cluster_weight[target] <= _context.coarsening.max_allowed_node_weight) { - // if other nodes joined cluster u but u itself leaves for a different cluster, it doesn't count - if (opportunistic_cluster_weight[u] == hg.nodeWeight(u)) { + if(target != u) + { + if(opportunistic_cluster_weight[target] <= + _context.coarsening.max_allowed_node_weight) + { + // if other nodes joined cluster u but u itself leaves for a different cluster, + // it doesn't count + if(opportunistic_cluster_weight[u] == hg.nodeWeight(u)) + { num_contracted_nodes.local() += 1; } clusters[u] = target; cluster_weight[target] = opportunistic_cluster_weight[target]; - } else { + } + else + { nodes_in_too_heavy_clusters.push_back_buffered(u); } } @@ -87,7 +101,8 @@ bool DeterministicMultilevelCoarsener::coarseningPassImpl() { num_nodes -= num_contracted_nodes.combine(std::plus<>()); nodes_in_too_heavy_clusters.finalize(); - if (nodes_in_too_heavy_clusters.size() > 0) { + if(nodes_in_too_heavy_clusters.size() > 0) + { num_nodes -= approveVerticesInTooHeavyClusters(clusters); } @@ -96,27 +111,34 @@ bool DeterministicMultilevelCoarsener::coarseningPassImpl() { timer.stop_timer("coarsening_pass"); ++pass; - if (num_nodes_before_pass / num_nodes <= _context.coarsening.minimum_shrink_factor) { + if(num_nodes_before_pass / num_nodes <= _context.coarsening.minimum_shrink_factor) + { return false; } _timer.start_timer("contraction", "Contraction"); - _uncoarseningData.performMultilevelContraction(std::move(clusters), true /* deterministic */, pass_start_time); + _uncoarseningData.performMultilevelContraction( + std::move(clusters), true /* deterministic */, pass_start_time); _timer.stop_timer("contraction"); return true; } -template -void DeterministicMultilevelCoarsener::calculatePreferredTargetCluster(HypernodeID u, const vec& clusters) { - const Hypergraph& hg = Base::currentHypergraph(); - auto& ratings = default_rating_maps.local(); +template +void DeterministicMultilevelCoarsener::calculatePreferredTargetCluster( + HypernodeID u, const vec &clusters) +{ + const Hypergraph &hg = Base::currentHypergraph(); + auto &ratings = default_rating_maps.local(); ratings.clear(); // calculate ratings - for (HyperedgeID he : hg.incidentEdges(u)) { + for(HyperedgeID he : hg.incidentEdges(u)) + { HypernodeID he_size = hg.edgeSize(he); - if (he_size < _context.partition.ignore_hyperedge_size_threshold) { + if(he_size < _context.partition.ignore_hyperedge_size_threshold) + { double he_score = static_cast(hg.edgeWeight(he)) / he_size; - for (HypernodeID v : hg.pins(he)) { + for(HypernodeID v : hg.pins(he)) + { ratings[clusters[v]] += he_score; } } @@ -125,15 +147,20 @@ void DeterministicMultilevelCoarsener::calculatePreferredTargetClust // find highest rated, feasible cluster const PartitionID comm_u = hg.communityID(u); const HypernodeWeight weight_u = hg.nodeWeight(u); - vec& best_targets = ties.local(); + vec &best_targets = ties.local(); double best_score = 0.0; - for (const auto& entry : ratings) { + for(const auto &entry : ratings) + { HypernodeID target_cluster = entry.key; double target_score = entry.value; - if (target_score >= best_score && target_cluster != u && hg.communityID(target_cluster) == comm_u - && cluster_weight[target_cluster] + weight_u <= _context.coarsening.max_allowed_node_weight) { - if (target_score > best_score) { + if(target_score >= best_score && target_cluster != u && + hg.communityID(target_cluster) == comm_u && + cluster_weight[target_cluster] + weight_u <= + _context.coarsening.max_allowed_node_weight) + { + if(target_score > best_score) + { best_targets.clear(); best_score = target_score; } @@ -142,56 +169,73 @@ void DeterministicMultilevelCoarsener::calculatePreferredTargetClust } HypernodeID best_target; - if (best_targets.size() == 1) { + if(best_targets.size() == 1) + { best_target = best_targets[0]; - } else if (best_targets.empty()) { + } + else if(best_targets.empty()) + { best_target = u; - } else { + } + else + { hashing::SimpleIntHash sih; hashing::HashRNG hash_prng(sih, u); - size_t pos = std::uniform_int_distribution(0, best_targets.size() - 1)(hash_prng); + size_t pos = + std::uniform_int_distribution(0, best_targets.size() - 1)(hash_prng); assert(pos < best_targets.size()); best_target = best_targets[pos]; } best_targets.clear(); - if (best_target != u) { + if(best_target != u) + { propositions[u] = best_target; - __atomic_fetch_add(&opportunistic_cluster_weight[best_target], hg.nodeWeight(u), __ATOMIC_RELAXED); + __atomic_fetch_add(&opportunistic_cluster_weight[best_target], hg.nodeWeight(u), + __ATOMIC_RELAXED); } } -template -size_t DeterministicMultilevelCoarsener::approveVerticesInTooHeavyClusters(vec& clusters) { - const Hypergraph& hg = Base::currentHypergraph(); - tbb::enumerable_thread_specific num_contracted_nodes { 0 }; +template +size_t DeterministicMultilevelCoarsener::approveVerticesInTooHeavyClusters( + vec &clusters) +{ + const Hypergraph &hg = Base::currentHypergraph(); + tbb::enumerable_thread_specific num_contracted_nodes{ 0 }; - // group vertices by desired cluster, if their cluster is too heavy. approve the lower weight nodes first + // group vertices by desired cluster, if their cluster is too heavy. approve the lower + // weight nodes first auto comp = [&](HypernodeID lhs, HypernodeID rhs) { HypernodeWeight wl = hg.nodeWeight(lhs), wr = hg.nodeWeight(rhs); return std::tie(propositions[lhs], wl, lhs) < std::tie(propositions[rhs], wr, rhs); }; - tbb::parallel_sort(nodes_in_too_heavy_clusters.begin(), nodes_in_too_heavy_clusters.end(), comp); + tbb::parallel_sort(nodes_in_too_heavy_clusters.begin(), + nodes_in_too_heavy_clusters.end(), comp); tbb::parallel_for(UL(0), nodes_in_too_heavy_clusters.size(), [&](size_t pos) { HypernodeID target = propositions[nodes_in_too_heavy_clusters[pos]]; // the first vertex for this cluster handles the approval size_t num_contracted_local = 0; - if (pos == 0 || propositions[nodes_in_too_heavy_clusters[pos - 1]] != target) { + if(pos == 0 || propositions[nodes_in_too_heavy_clusters[pos - 1]] != target) + { HypernodeWeight target_weight = cluster_weight[target]; size_t first_rejected = pos; - // could be parallelized without extra memory but factor 2 work overhead and log(n) depth via binary search - for (; ; ++first_rejected) { + // could be parallelized without extra memory but factor 2 work overhead and log(n) + // depth via binary search + for(;; ++first_rejected) + { // we know that this cluster is too heavy, so the loop will terminate before assert(first_rejected < nodes_in_too_heavy_clusters.size()); assert(propositions[nodes_in_too_heavy_clusters[first_rejected]] == target); HypernodeID v = nodes_in_too_heavy_clusters[first_rejected]; - if (target_weight + hg.nodeWeight(v) > _context.coarsening.max_allowed_node_weight) { + if(target_weight + hg.nodeWeight(v) > _context.coarsening.max_allowed_node_weight) + { break; } clusters[v] = target; target_weight += hg.nodeWeight(v); - if (opportunistic_cluster_weight[v] == hg.nodeWeight(v)) { + if(opportunistic_cluster_weight[v] == hg.nodeWeight(v)) + { num_contracted_local += 1; } } diff --git a/mt-kahypar/partition/coarsening/deterministic_multilevel_coarsener.h b/mt-kahypar/partition/coarsening/deterministic_multilevel_coarsener.h index 3796ba940..7fe7920b0 100644 --- a/mt-kahypar/partition/coarsening/deterministic_multilevel_coarsener.h +++ b/mt-kahypar/partition/coarsening/deterministic_multilevel_coarsener.h @@ -26,33 +26,37 @@ #pragma once -#include "multilevel_coarsener_base.h" #include "i_coarsener.h" +#include "multilevel_coarsener_base.h" #include "include/libmtkahypartypes.h" -#include "mt-kahypar/utils/reproducible_random.h" -#include "mt-kahypar/datastructures/sparse_map.h" #include "mt-kahypar/datastructures/buffered_vector.h" -#include "mt-kahypar/utils/utilities.h" -#include "mt-kahypar/utils/progress_bar.h" +#include "mt-kahypar/datastructures/sparse_map.h" #include "mt-kahypar/utils/cast.h" +#include "mt-kahypar/utils/progress_bar.h" +#include "mt-kahypar/utils/reproducible_random.h" +#include "mt-kahypar/utils/utilities.h" #include namespace mt_kahypar { -template -class DeterministicMultilevelCoarsener : public ICoarsener, - private MultilevelCoarsenerBase { - - struct DeterministicCoarseningConfig { - explicit DeterministicCoarseningConfig(const Context& context) : - prng(context.partition.seed), - num_buckets(utils::ParallelPermutation::num_buckets), - num_sub_rounds(context.coarsening.num_sub_rounds_deterministic), - num_buckets_per_sub_round(0) { - num_buckets_per_sub_round = parallel::chunking::idiv_ceil(num_buckets, num_sub_rounds); +template +class DeterministicMultilevelCoarsener : public ICoarsener, + private MultilevelCoarsenerBase +{ + + struct DeterministicCoarseningConfig + { + explicit DeterministicCoarseningConfig(const Context &context) : + prng(context.partition.seed), + num_buckets(utils::ParallelPermutation::num_buckets), + num_sub_rounds(context.coarsening.num_sub_rounds_deterministic), + num_buckets_per_sub_round(0) + { + num_buckets_per_sub_round = + parallel::chunking::idiv_ceil(num_buckets, num_sub_rounds); } std::mt19937 prng; @@ -66,83 +70,92 @@ class DeterministicMultilevelCoarsener : public ICoarsener, public: DeterministicMultilevelCoarsener(mt_kahypar_hypergraph_t hypergraph, - const Context& context, - uncoarsening_data_t* uncoarseningData) : - Base(utils::cast(hypergraph), - context, - uncoarsening::to_reference(uncoarseningData)), - config(context), - initial_num_nodes(utils::cast(hypergraph).initialNumNodes()), - propositions(utils::cast(hypergraph).initialNumNodes()), - cluster_weight(utils::cast(hypergraph).initialNumNodes(), 0), - opportunistic_cluster_weight(utils::cast(hypergraph).initialNumNodes(), 0), - nodes_in_too_heavy_clusters(utils::cast(hypergraph).initialNumNodes()), - default_rating_maps(utils::cast(hypergraph).initialNumNodes()), - pass(0), - progress_bar(utils::cast(hypergraph).initialNumNodes(), 0, false) + const Context &context, + uncoarsening_data_t *uncoarseningData) : + Base(utils::cast(hypergraph), context, + uncoarsening::to_reference(uncoarseningData)), + config(context), + initial_num_nodes(utils::cast(hypergraph).initialNumNodes()), + propositions(utils::cast(hypergraph).initialNumNodes()), + cluster_weight(utils::cast(hypergraph).initialNumNodes(), 0), + opportunistic_cluster_weight(utils::cast(hypergraph).initialNumNodes(), + 0), + nodes_in_too_heavy_clusters(utils::cast(hypergraph).initialNumNodes()), + default_rating_maps(utils::cast(hypergraph).initialNumNodes()), pass(0), + progress_bar(utils::cast(hypergraph).initialNumNodes(), 0, false) { } - ~DeterministicMultilevelCoarsener() { - - } + ~DeterministicMultilevelCoarsener() {} private: - struct Proposition { + struct Proposition + { HypernodeID node = kInvalidHypernode, cluster = kInvalidHypernode; HypernodeWeight weight = 0; }; static constexpr bool debug = false; - void initializeImpl() override { - if ( _context.partition.verbose_output && _context.partition.enable_progress_bar ) { + void initializeImpl() override + { + if(_context.partition.verbose_output && _context.partition.enable_progress_bar) + { progress_bar.enable(); } } bool coarseningPassImpl() override; - bool shouldNotTerminateImpl() const override { + bool shouldNotTerminateImpl() const override + { return Base::currentNumNodes() > _context.coarsening.contraction_limit; } - void terminateImpl() override { - progress_bar += (initial_num_nodes - progress_bar.count()); // fill to 100% + void terminateImpl() override + { + progress_bar += (initial_num_nodes - progress_bar.count()); // fill to 100% progress_bar.disable(); _uncoarseningData.finalizeCoarsening(); } - HypernodeID currentLevelContractionLimit() { - const auto& hg = Base::currentHypergraph(); - return std::max( _context.coarsening.contraction_limit, - static_cast( - (hg.initialNumNodes() - hg.numRemovedHypernodes()) / _context.coarsening.maximum_shrink_factor) ); + HypernodeID currentLevelContractionLimit() + { + const auto &hg = Base::currentHypergraph(); + return std::max( + _context.coarsening.contraction_limit, + static_cast((hg.initialNumNodes() - hg.numRemovedHypernodes()) / + _context.coarsening.maximum_shrink_factor)); } - void calculatePreferredTargetCluster(HypernodeID u, const vec& clusters); + void calculatePreferredTargetCluster(HypernodeID u, const vec &clusters); - size_t approveVerticesInTooHeavyClusters(vec& clusters); + size_t approveVerticesInTooHeavyClusters(vec &clusters); - HypernodeID currentNumberOfNodesImpl() const override { + HypernodeID currentNumberOfNodesImpl() const override + { return Base::currentNumNodes(); } - mt_kahypar_hypergraph_t coarsestHypergraphImpl() override { - return mt_kahypar_hypergraph_t { - reinterpret_cast( - &Base::currentHypergraph()), Hypergraph::TYPE }; + mt_kahypar_hypergraph_t coarsestHypergraphImpl() override + { + return mt_kahypar_hypergraph_t{ reinterpret_cast( + &Base::currentHypergraph()), + Hypergraph::TYPE }; } - mt_kahypar_partitioned_hypergraph_t coarsestPartitionedHypergraphImpl() override { - return mt_kahypar_partitioned_hypergraph_t { - reinterpret_cast( - &Base::currentPartitionedHypergraph()), PartitionedHypergraph::TYPE }; + mt_kahypar_partitioned_hypergraph_t coarsestPartitionedHypergraphImpl() override + { + return mt_kahypar_partitioned_hypergraph_t{ + reinterpret_cast( + &Base::currentPartitionedHypergraph()), + PartitionedHypergraph::TYPE + }; } using Base = MultilevelCoarsenerBase; - using Base::_hg; using Base::_context; + using Base::_hg; using Base::_timer; using Base::_uncoarseningData; @@ -152,10 +165,10 @@ class DeterministicMultilevelCoarsener : public ICoarsener, vec propositions; vec cluster_weight, opportunistic_cluster_weight; ds::BufferedVector nodes_in_too_heavy_clusters; - tbb::enumerable_thread_specific> default_rating_maps; - tbb::enumerable_thread_specific> ties; + tbb::enumerable_thread_specific > + default_rating_maps; + tbb::enumerable_thread_specific > ties; size_t pass; utils::ProgressBar progress_bar; - }; } diff --git a/mt-kahypar/partition/coarsening/i_coarsener.h b/mt-kahypar/partition/coarsening/i_coarsener.h index eead947c6..ed1549a81 100644 --- a/mt-kahypar/partition/coarsening/i_coarsener.h +++ b/mt-kahypar/partition/coarsening/i_coarsener.h @@ -31,20 +31,22 @@ #include "include/libmtkahypartypes.h" #include "mt-kahypar/macros.h" -#include "mt-kahypar/partition/refinement/i_refiner.h" #include "mt-kahypar/partition/coarsening/coarsening_commons.h" +#include "mt-kahypar/partition/refinement/i_refiner.h" namespace mt_kahypar { -class ICoarsener { +class ICoarsener +{ - public: - ICoarsener(const ICoarsener&) = delete; - ICoarsener(ICoarsener&&) = delete; - ICoarsener & operator= (const ICoarsener &) = delete; - ICoarsener & operator= (ICoarsener &&) = delete; +public: + ICoarsener(const ICoarsener &) = delete; + ICoarsener(ICoarsener &&) = delete; + ICoarsener &operator=(const ICoarsener &) = delete; + ICoarsener &operator=(ICoarsener &&) = delete; - void coarsen() { + void coarsen() + { initialize(); bool should_continue = true; // Coarsening algorithms proceed in passes where each pass computes a clustering @@ -52,46 +54,36 @@ class ICoarsener { // hierarchy. The coarsening algorithms proceeds until the number of nodes equals // a predefined contraction limit (!shouldNotTerminate) or the number of nodes could // not be significantly reduced within one coarsening pass (should_continue). - while ( shouldNotTerminate() && should_continue ) { + while(shouldNotTerminate() && should_continue) + { should_continue = coarseningPass(); } terminate(); } - void initialize() { - initializeImpl(); - } + void initialize() { initializeImpl(); } - bool shouldNotTerminate() const { - return shouldNotTerminateImpl(); - } + bool shouldNotTerminate() const { return shouldNotTerminateImpl(); } - bool coarseningPass() { - return coarseningPassImpl(); - } + bool coarseningPass() { return coarseningPassImpl(); } - void terminate() { - terminateImpl(); - } + void terminate() { terminateImpl(); } - HypernodeID currentNumberOfNodes() const { - return currentNumberOfNodesImpl(); - } + HypernodeID currentNumberOfNodes() const { return currentNumberOfNodesImpl(); } - mt_kahypar_hypergraph_t coarsestHypergraph() { - return coarsestHypergraphImpl(); - } + mt_kahypar_hypergraph_t coarsestHypergraph() { return coarsestHypergraphImpl(); } - mt_kahypar_partitioned_hypergraph_t coarsestPartitionedHypergraph() { + mt_kahypar_partitioned_hypergraph_t coarsestPartitionedHypergraph() + { return coarsestPartitionedHypergraphImpl(); } virtual ~ICoarsener() = default; - protected: +protected: ICoarsener() = default; - private: +private: virtual void initializeImpl() = 0; virtual bool shouldNotTerminateImpl() const = 0; virtual bool coarseningPassImpl() = 0; @@ -101,4 +93,4 @@ class ICoarsener { virtual mt_kahypar_partitioned_hypergraph_t coarsestPartitionedHypergraphImpl() = 0; }; -} // namespace kahypar +} // namespace kahypar diff --git a/mt-kahypar/partition/coarsening/i_uncoarsener.h b/mt-kahypar/partition/coarsening/i_uncoarsener.h index 7e86d5af2..7087ee8ca 100644 --- a/mt-kahypar/partition/coarsening/i_uncoarsener.h +++ b/mt-kahypar/partition/coarsening/i_uncoarsener.h @@ -29,95 +29,82 @@ #pragma once #include "mt-kahypar/macros.h" -#include "mt-kahypar/partition/refinement/i_refiner.h" #include "mt-kahypar/partition/refinement/gains/gain_cache_ptr.h" +#include "mt-kahypar/partition/refinement/i_refiner.h" namespace mt_kahypar { -template -class IUncoarsener { +template +class IUncoarsener +{ using Hypergraph = typename TypeTraits::Hypergraph; using PartitionedHypergraph = typename TypeTraits::PartitionedHypergraph; - public: - IUncoarsener(const IUncoarsener&) = delete; - IUncoarsener(IUncoarsener&&) = delete; - IUncoarsener & operator= (const IUncoarsener &) = delete; - IUncoarsener & operator= (IUncoarsener &&) = delete; +public: + IUncoarsener(const IUncoarsener &) = delete; + IUncoarsener(IUncoarsener &&) = delete; + IUncoarsener &operator=(const IUncoarsener &) = delete; + IUncoarsener &operator=(IUncoarsener &&) = delete; - PartitionedHypergraph&& uncoarsen() { - initialize(); + PartitionedHypergraph &&uncoarsen() + { + initialize(); - while ( !isTopLevel() ) { - projectToNextLevelAndRefine(); - } + while(!isTopLevel()) + { + projectToNextLevelAndRefine(); + } - rebalancing(); + rebalancing(); - return movePartitionedHypergraph(); - } + return movePartitionedHypergraph(); + } - void initialize() { - initializeImpl(); - } + void initialize() { initializeImpl(); } - bool isTopLevel() const { - return isTopLevelImpl(); - } + bool isTopLevel() const { return isTopLevelImpl(); } - void projectToNextLevelAndRefine() { - projectToNextLevelAndRefineImpl(); - } + void projectToNextLevelAndRefine() { projectToNextLevelAndRefineImpl(); } - void refine() { - refineImpl(); - } + void refine() { refineImpl(); } - void rebalancing() { - rebalancingImpl(); - } + void rebalancing() { rebalancingImpl(); } - gain_cache_t getGainCache() { - return getGainCacheImpl(); - } + gain_cache_t getGainCache() { return getGainCacheImpl(); } - HyperedgeWeight getObjective() const { - return getObjectiveImpl(); - } + HyperedgeWeight getObjective() const { return getObjectiveImpl(); } - void updateMetrics() { - updateMetricsImpl(); - } + void updateMetrics() { updateMetricsImpl(); } - PartitionedHypergraph& currentPartitionedHypergraph() { - return currentPartitionedHypergraphImpl(); - } + PartitionedHypergraph ¤tPartitionedHypergraph() + { + return currentPartitionedHypergraphImpl(); + } - HypernodeID currentNumberOfNodes() const { - return currentNumberOfNodesImpl(); - } + HypernodeID currentNumberOfNodes() const { return currentNumberOfNodesImpl(); } - PartitionedHypergraph&& movePartitionedHypergraph() { - return movePartitionedHypergraphImpl(); - } + PartitionedHypergraph &&movePartitionedHypergraph() + { + return movePartitionedHypergraphImpl(); + } + + virtual ~IUncoarsener() = default; + +protected: + IUncoarsener() = default; - virtual ~IUncoarsener() = default; - - protected: - IUncoarsener() = default; - - private: - virtual void initializeImpl() = 0; - virtual bool isTopLevelImpl() const = 0; - virtual void projectToNextLevelAndRefineImpl() = 0; - virtual void refineImpl() = 0; - virtual void rebalancingImpl() = 0; - virtual gain_cache_t getGainCacheImpl() = 0; - virtual HyperedgeWeight getObjectiveImpl() const = 0; - virtual void updateMetricsImpl() = 0; - virtual PartitionedHypergraph& currentPartitionedHypergraphImpl() = 0; - virtual HypernodeID currentNumberOfNodesImpl() const = 0; - virtual PartitionedHypergraph&& movePartitionedHypergraphImpl() = 0; - }; +private: + virtual void initializeImpl() = 0; + virtual bool isTopLevelImpl() const = 0; + virtual void projectToNextLevelAndRefineImpl() = 0; + virtual void refineImpl() = 0; + virtual void rebalancingImpl() = 0; + virtual gain_cache_t getGainCacheImpl() = 0; + virtual HyperedgeWeight getObjectiveImpl() const = 0; + virtual void updateMetricsImpl() = 0; + virtual PartitionedHypergraph ¤tPartitionedHypergraphImpl() = 0; + virtual HypernodeID currentNumberOfNodesImpl() const = 0; + virtual PartitionedHypergraph &&movePartitionedHypergraphImpl() = 0; +}; } diff --git a/mt-kahypar/partition/coarsening/multilevel_coarsener.h b/mt-kahypar/partition/coarsening/multilevel_coarsener.h index a89948998..60c7cf3f6 100644 --- a/mt-kahypar/partition/coarsening/multilevel_coarsener.h +++ b/mt-kahypar/partition/coarsening/multilevel_coarsener.h @@ -30,21 +30,21 @@ #include #include "tbb/concurrent_queue.h" -#include "tbb/task_group.h" #include "tbb/parallel_for.h" #include "tbb/parallel_reduce.h" +#include "tbb/task_group.h" #include "kahypar-resources/meta/mandatory.h" #include "include/libmtkahypartypes.h" +#include "mt-kahypar/parallel/atomic_wrapper.h" +#include "mt-kahypar/partition/coarsening/i_coarsener.h" #include "mt-kahypar/partition/coarsening/multilevel_coarsener_base.h" #include "mt-kahypar/partition/coarsening/multilevel_vertex_pair_rater.h" -#include "mt-kahypar/partition/coarsening/i_coarsener.h" #include "mt-kahypar/partition/coarsening/policies/rating_acceptance_policy.h" #include "mt-kahypar/partition/coarsening/policies/rating_heavy_node_penalty_policy.h" #include "mt-kahypar/partition/coarsening/policies/rating_score_policy.h" -#include "mt-kahypar/parallel/atomic_wrapper.h" #include "mt-kahypar/utils/cast.h" #include "mt-kahypar/utils/progress_bar.h" #include "mt-kahypar/utils/randomize.h" @@ -52,100 +52,90 @@ #include "mt-kahypar/utils/timer.h" namespace mt_kahypar { -template -class MultilevelCoarsener : public ICoarsener, - private MultilevelCoarsenerBase { - private: - +class MultilevelCoarsener : public ICoarsener, private MultilevelCoarsenerBase +{ +private: using Base = MultilevelCoarsenerBase; - using Rater = MultilevelVertexPairRater; + using Rater = + MultilevelVertexPairRater; using Rating = typename Rater::Rating; using Hypergraph = typename TypeTraits::Hypergraph; using PartitionedHypergraph = typename TypeTraits::PartitionedHypergraph; - enum class MatchingState : uint8_t { + enum class MatchingState : uint8_t + { UNMATCHED = 0, MATCHING_IN_PROGRESS = 1, MATCHED = 2 }; - #define STATE(X) static_cast(X) +#define STATE(X) static_cast(X) using AtomicMatchingState = parallel::IntegralAtomicWrapper; using AtomicWeight = parallel::IntegralAtomicWrapper; static constexpr bool debug = false; static constexpr bool enable_heavy_assert = false; - static constexpr HypernodeID kInvalidHypernode = std::numeric_limits::max(); - - public: - MultilevelCoarsener(mt_kahypar_hypergraph_t hypergraph, - const Context& context, - uncoarsening_data_t* uncoarseningData) : - Base(utils::cast(hypergraph), - context, - uncoarsening::to_reference(uncoarseningData)), - _rater(utils::cast(hypergraph).initialNumNodes(), - utils::cast(hypergraph).maxEdgeSize(), context), - _initial_num_nodes(utils::cast(hypergraph).initialNumNodes()), - _current_vertices(), - _matching_state(), - _cluster_weight(), - _matching_partner(), - _pass_nr(0), - _progress_bar(utils::cast(hypergraph).initialNumNodes(), 0, false), - _enable_randomization(true) { + static constexpr HypernodeID kInvalidHypernode = + std::numeric_limits::max(); + +public: + MultilevelCoarsener(mt_kahypar_hypergraph_t hypergraph, const Context &context, + uncoarsening_data_t *uncoarseningData) : + Base(utils::cast(hypergraph), context, + uncoarsening::to_reference(uncoarseningData)), + _rater(utils::cast(hypergraph).initialNumNodes(), + utils::cast(hypergraph).maxEdgeSize(), context), + _initial_num_nodes(utils::cast(hypergraph).initialNumNodes()), + _current_vertices(), _matching_state(), _cluster_weight(), _matching_partner(), + _pass_nr(0), + _progress_bar(utils::cast(hypergraph).initialNumNodes(), 0, false), + _enable_randomization(true) + { _progress_bar += _hg.numRemovedHypernodes(); // Initialize internal data structures parallel - tbb::parallel_invoke([&] { - _current_vertices.resize(_hg.initialNumNodes()); - }, [&] { - _matching_state.resize(_hg.initialNumNodes()); - }, [&] { - _cluster_weight.resize(_hg.initialNumNodes()); - }, [&] { - _matching_partner.resize(_hg.initialNumNodes()); - }); + tbb::parallel_invoke([&] { _current_vertices.resize(_hg.initialNumNodes()); }, + [&] { _matching_state.resize(_hg.initialNumNodes()); }, + [&] { _cluster_weight.resize(_hg.initialNumNodes()); }, + [&] { _matching_partner.resize(_hg.initialNumNodes()); }); } - MultilevelCoarsener(const MultilevelCoarsener&) = delete; - MultilevelCoarsener(MultilevelCoarsener&&) = delete; - MultilevelCoarsener & operator= (const MultilevelCoarsener &) = delete; - MultilevelCoarsener & operator= (MultilevelCoarsener &&) = delete; + MultilevelCoarsener(const MultilevelCoarsener &) = delete; + MultilevelCoarsener(MultilevelCoarsener &&) = delete; + MultilevelCoarsener &operator=(const MultilevelCoarsener &) = delete; + MultilevelCoarsener &operator=(MultilevelCoarsener &&) = delete; - ~MultilevelCoarsener() { - parallel::parallel_free( - _current_vertices, _matching_state, - _cluster_weight, _matching_partner); + ~MultilevelCoarsener() + { + parallel::parallel_free(_current_vertices, _matching_state, _cluster_weight, + _matching_partner); } - void disableRandomization() { - _enable_randomization = false; - } + void disableRandomization() { _enable_randomization = false; } - private: - void initializeImpl() override { - if ( _context.partition.verbose_output && _context.partition.enable_progress_bar ) { +private: + void initializeImpl() override + { + if(_context.partition.verbose_output && _context.partition.enable_progress_bar) + { _progress_bar.enable(); } } - bool shouldNotTerminateImpl() const override { + bool shouldNotTerminateImpl() const override + { return Base::currentNumNodes() > _context.coarsening.contraction_limit; } - bool coarseningPassImpl() override { + bool coarseningPassImpl() override + { HighResClockTimepoint round_start = std::chrono::high_resolution_clock::now(); - Hypergraph& current_hg = Base::currentHypergraph(); - DBG << V(_pass_nr) - << V(current_hg.initialNumNodes()) - << V(current_hg.initialNumEdges()) - << V(current_hg.initialNumPins()); + Hypergraph ¤t_hg = Base::currentHypergraph(); + DBG << V(_pass_nr) << V(current_hg.initialNumNodes()) + << V(current_hg.initialNumEdges()) << V(current_hg.initialNumPins()); // Random shuffle vertices of current hypergraph _current_vertices.resize(current_hg.initialNumNodes()); @@ -157,183 +147,231 @@ class MultilevelCoarsener : public ICoarsener, _matching_state[hn] = STATE(MatchingState::UNMATCHED); _matching_partner[hn] = hn; cluster_ids[hn] = hn; - if ( current_hg.nodeIsEnabled(hn) ) { + if(current_hg.nodeIsEnabled(hn)) + { _cluster_weight[hn] = current_hg.nodeWeight(hn); } }); - if ( _enable_randomization ) { - utils::Randomize::instance().parallelShuffleVector( _current_vertices, UL(0), _current_vertices.size()); + if(_enable_randomization) + { + utils::Randomize::instance().parallelShuffleVector(_current_vertices, UL(0), + _current_vertices.size()); } const HypernodeID num_hns_before_pass = - current_hg.initialNumNodes() - current_hg.numRemovedHypernodes(); + current_hg.initialNumNodes() - current_hg.numRemovedHypernodes(); HypernodeID current_num_nodes = 0; - if ( current_hg.hasFixedVertices() ) { + if(current_hg.hasFixedVertices()) + { current_num_nodes = performClustering(current_hg, cluster_ids); - } else { + } + else + { current_num_nodes = performClustering(current_hg, cluster_ids); } DBG << V(current_num_nodes); - HEAVY_COARSENING_ASSERT([&] { - parallel::scalable_vector expected_weights(current_hg.initialNumNodes()); - // Verify that clustering is correct - for ( const HypernodeID& hn : current_hg.nodes() ) { - const HypernodeID u = hn; - const HypernodeID root_u = cluster_ids[u]; - if ( root_u != cluster_ids[root_u] ) { - LOG << "Hypernode" << u << "is part of cluster" << root_u << ", but cluster" - << root_u << "is also part of cluster" << cluster_ids[root_u]; - return false; - } - expected_weights[root_u] += current_hg.nodeWeight(hn); - } + HEAVY_COARSENING_ASSERT( + [&] { + parallel::scalable_vector expected_weights( + current_hg.initialNumNodes()); + // Verify that clustering is correct + for(const HypernodeID &hn : current_hg.nodes()) + { + const HypernodeID u = hn; + const HypernodeID root_u = cluster_ids[u]; + if(root_u != cluster_ids[root_u]) + { + LOG << "Hypernode" << u << "is part of cluster" << root_u << ", but cluster" + << root_u << "is also part of cluster" << cluster_ids[root_u]; + return false; + } + expected_weights[root_u] += current_hg.nodeWeight(hn); + } - // Verify that cluster weights are aggregated correct - for ( const HypernodeID& hn : current_hg.nodes() ) { - const HypernodeID u = hn; - const HypernodeID root_u = cluster_ids[u]; - if ( root_u == u && expected_weights[u] != _cluster_weight[u] ) { - LOG << "The expected weight of cluster" << u << "is" << expected_weights[u] - << ", but currently it is" << _cluster_weight[u]; - return false; - } - } - return true; - }(), "Parallel clustering computed invalid cluster ids and weights"); + // Verify that cluster weights are aggregated correct + for(const HypernodeID &hn : current_hg.nodes()) + { + const HypernodeID u = hn; + const HypernodeID root_u = cluster_ids[u]; + if(root_u == u && expected_weights[u] != _cluster_weight[u]) + { + LOG << "The expected weight of cluster" << u << "is" << expected_weights[u] + << ", but currently it is" << _cluster_weight[u]; + return false; + } + } + return true; + }(), + "Parallel clustering computed invalid cluster ids and weights"); const double reduction_vertices_percentage = - static_cast(num_hns_before_pass) / - static_cast(current_num_nodes); - if ( reduction_vertices_percentage <= _context.coarsening.minimum_shrink_factor ) { + static_cast(num_hns_before_pass) / static_cast(current_num_nodes); + if(reduction_vertices_percentage <= _context.coarsening.minimum_shrink_factor) + { return false; } _progress_bar += (num_hns_before_pass - current_num_nodes); _timer.start_timer("contraction", "Contraction"); // Perform parallel contraction - _uncoarseningData.performMultilevelContraction(std::move(cluster_ids), false /* deterministic */, round_start); + _uncoarseningData.performMultilevelContraction( + std::move(cluster_ids), false /* deterministic */, round_start); _timer.stop_timer("contraction"); ++_pass_nr; return true; } - template - HypernodeID performClustering(const Hypergraph& current_hg, - vec& cluster_ids) { - // We iterate in parallel over all vertices of the hypergraph and compute its contraction partner. - // Matched vertices are linked in a concurrent union find data structure, that also aggregates - // weights of the resulting clusters and keep track of the number of nodes left, if we would - // contract all matched vertices. + template + HypernodeID performClustering(const Hypergraph ¤t_hg, + vec &cluster_ids) + { + // We iterate in parallel over all vertices of the hypergraph and compute its + // contraction partner. Matched vertices are linked in a concurrent union find data + // structure, that also aggregates weights of the resulting clusters and keep track of + // the number of nodes left, if we would contract all matched vertices. _timer.start_timer("clustering", "Clustering"); - if ( _context.partition.show_detailed_clustering_timings ) { - _timer.start_timer("clustering_level_" + std::to_string(_pass_nr), "Level " + std::to_string(_pass_nr)); + if(_context.partition.show_detailed_clustering_timings) + { + _timer.start_timer("clustering_level_" + std::to_string(_pass_nr), + "Level " + std::to_string(_pass_nr)); } _rater.resetMatches(); _rater.setCurrentNumberOfNodes(current_hg.initialNumNodes()); - const HypernodeID num_hns_before_pass = current_hg.initialNumNodes() - current_hg.numRemovedHypernodes(); + const HypernodeID num_hns_before_pass = + current_hg.initialNumNodes() - current_hg.numRemovedHypernodes(); const HypernodeID hierarchy_contraction_limit = hierarchyContractionLimit(current_hg); DBG << V(current_hg.initialNumNodes()) << V(hierarchy_contraction_limit); HypernodeID current_num_nodes = num_hns_before_pass; tbb::enumerable_thread_specific contracted_nodes(0); tbb::enumerable_thread_specific num_nodes_update_threshold(0); - ds::FixedVertexSupport fixed_vertices = current_hg.copyOfFixedVertexSupport(); + ds::FixedVertexSupport fixed_vertices = + current_hg.copyOfFixedVertexSupport(); fixed_vertices.setMaxBlockWeight(_context.partition.max_part_weights); tbb::parallel_for(0U, current_hg.initialNumNodes(), [&](const HypernodeID id) { ASSERT(id < _current_vertices.size()); const HypernodeID hn = _current_vertices[id]; - if (current_hg.nodeIsEnabled(hn)) { + if(current_hg.nodeIsEnabled(hn)) + { // We perform rating if ... // 1.) The contraction limit of the current level is not reached // 2.) Vertex hn is not matched before const HypernodeID u = hn; - if (_matching_state[u] == STATE(MatchingState::UNMATCHED)) { - if (current_num_nodes > hierarchy_contraction_limit) { + if(_matching_state[u] == STATE(MatchingState::UNMATCHED)) + { + if(current_num_nodes > hierarchy_contraction_limit) + { ASSERT(current_hg.nodeIsEnabled(hn)); - const Rating rating = _rater.template rate(current_hg, hn, - cluster_ids, _cluster_weight, fixed_vertices, _context.coarsening.max_allowed_node_weight); - if (rating.target != kInvalidHypernode) { + const Rating rating = _rater.template rate( + current_hg, hn, cluster_ids, _cluster_weight, fixed_vertices, + _context.coarsening.max_allowed_node_weight); + if(rating.target != kInvalidHypernode) + { const HypernodeID v = rating.target; - HypernodeID& local_contracted_nodes = contracted_nodes.local(); - matchVertices(current_hg, u, v, - cluster_ids, local_contracted_nodes, fixed_vertices); - - // To maintain the current number of nodes of the hypergraph each PE sums up - // its number of contracted nodes locally. To compute the current number of - // nodes, we have to sum up the number of contracted nodes of each PE. This - // operation becomes more expensive the more PEs are participating in coarsening. - // In order to prevent expensive updates of the current number of nodes, we - // define a threshold which the local number of contracted nodes have to exceed - // before the current PE updates the current number of nodes. This threshold is defined - // by the distance to the current contraction limit divided by the number of PEs. - // Once one PE exceeds this bound the first time it is not possible that the - // contraction limit is reached, because otherwise an other PE would update - // the global current number of nodes before. After update the threshold is - // increased by the new difference (in number of nodes) to the contraction limit - // divided by the number of PEs. - if (local_contracted_nodes >= num_nodes_update_threshold.local()) { + HypernodeID &local_contracted_nodes = contracted_nodes.local(); + matchVertices(current_hg, u, v, cluster_ids, + local_contracted_nodes, fixed_vertices); + + // To maintain the current number of nodes of the hypergraph each PE + // sums up its number of contracted nodes locally. To compute the + // current number of nodes, we have to sum up the number of + // contracted nodes of each PE. This operation becomes more + // expensive the more PEs are participating in coarsening. In order + // to prevent expensive updates of the current number of nodes, we + // define a threshold which the local number of contracted nodes + // have to exceed before the current PE updates the current number + // of nodes. This threshold is defined by the distance to the + // current contraction limit divided by the number of PEs. Once one + // PE exceeds this bound the first time it is not possible that the + // contraction limit is reached, because otherwise an other PE would + // update the global current number of nodes before. After update + // the threshold is increased by the new difference (in number of + // nodes) to the contraction limit divided by the number of PEs. + if(local_contracted_nodes >= num_nodes_update_threshold.local()) + { current_num_nodes = num_hns_before_pass - contracted_nodes.combine(std::plus()); const HypernodeID dist_to_contraction_limit = - current_num_nodes > hierarchy_contraction_limit ? - current_num_nodes - hierarchy_contraction_limit : 0; + current_num_nodes > hierarchy_contraction_limit ? + current_num_nodes - hierarchy_contraction_limit : + 0; num_nodes_update_threshold.local() += - dist_to_contraction_limit / _context.shared_memory.original_num_threads; + dist_to_contraction_limit / + _context.shared_memory.original_num_threads; } } } } } }); - if ( _context.partition.show_detailed_clustering_timings ) { + if(_context.partition.show_detailed_clustering_timings) + { _timer.stop_timer("clustering_level_" + std::to_string(_pass_nr)); } _timer.stop_timer("clustering"); - if constexpr ( has_fixed_vertices ) { + if constexpr(has_fixed_vertices) + { // Verify fixed vertices - ASSERT([&] { - vec fixed_vertex_blocks(current_hg.initialNumNodes(), kInvalidPartition); - for ( const HypernodeID& hn : current_hg.nodes() ) { - if ( current_hg.isFixed(hn) ) { - if ( fixed_vertex_blocks[cluster_ids[hn]] != kInvalidPartition && - fixed_vertex_blocks[cluster_ids[hn]] != current_hg.fixedVertexBlock(hn)) { - LOG << "There are two nodes assigned to same cluster that belong to different fixed vertex blocks"; - return false; + ASSERT( + [&] { + vec fixed_vertex_blocks(current_hg.initialNumNodes(), + kInvalidPartition); + for(const HypernodeID &hn : current_hg.nodes()) + { + if(current_hg.isFixed(hn)) + { + if(fixed_vertex_blocks[cluster_ids[hn]] != kInvalidPartition && + fixed_vertex_blocks[cluster_ids[hn]] != + current_hg.fixedVertexBlock(hn)) + { + LOG << "There are two nodes assigned to same cluster that " + "belong to different fixed vertex blocks"; + return false; + } + fixed_vertex_blocks[cluster_ids[hn]] = current_hg.fixedVertexBlock(hn); + } } - fixed_vertex_blocks[cluster_ids[hn]] = current_hg.fixedVertexBlock(hn); - } - } - vec expected_block_weights(_context.partition.k, 0); - for ( const HypernodeID& hn : current_hg.nodes() ) { - if ( fixed_vertex_blocks[cluster_ids[hn]] != kInvalidPartition ) { - if ( !fixed_vertices.isFixed(cluster_ids[hn]) ) { - LOG << "Cluster" << cluster_ids[hn] << "should be fixed to block" - << fixed_vertex_blocks[cluster_ids[hn]]; - return false; + vec expected_block_weights(_context.partition.k, 0); + for(const HypernodeID &hn : current_hg.nodes()) + { + if(fixed_vertex_blocks[cluster_ids[hn]] != kInvalidPartition) + { + if(!fixed_vertices.isFixed(cluster_ids[hn])) + { + LOG << "Cluster" << cluster_ids[hn] << "should be fixed to block" + << fixed_vertex_blocks[cluster_ids[hn]]; + return false; + } + expected_block_weights[fixed_vertex_blocks[cluster_ids[hn]]] += + current_hg.nodeWeight(hn); + } } - expected_block_weights[fixed_vertex_blocks[cluster_ids[hn]]] += current_hg.nodeWeight(hn); - } - } - for ( PartitionID block = 0; block < _context.partition.k; ++block ) { - if ( fixed_vertices.fixedVertexBlockWeight(block) != expected_block_weights[block] ) { - LOG << "Fixed vertex block" << block << "should have weight" << expected_block_weights[block] - << ", but it is" << fixed_vertices.fixedVertexBlockWeight(block); - return false; - } - } - return true; - }(), "Fixed vertex support is corrupted"); + for(PartitionID block = 0; block < _context.partition.k; ++block) + { + if(fixed_vertices.fixedVertexBlockWeight(block) != + expected_block_weights[block]) + { + LOG << "Fixed vertex block" << block << "should have weight" + << expected_block_weights[block] << ", but it is" + << fixed_vertices.fixedVertexBlockWeight(block); + return false; + } + } + return true; + }(), + "Fixed vertex support is corrupted"); } return num_hns_before_pass - contracted_nodes.combine(std::plus<>()); } - void terminateImpl() override { + void terminateImpl() override + { _progress_bar += (_initial_num_nodes - _progress_bar.count()); _progress_bar.disable(); _uncoarseningData.finalizeCoarsening(); @@ -355,13 +393,13 @@ class MultilevelCoarsener : public ICoarsener, * The following functions guarantees that our invariant is fullfilled, if * vertices are matched concurrently. */ - template - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE bool matchVertices(const Hypergraph& hypergraph, - const HypernodeID u, - const HypernodeID v, - parallel::scalable_vector& cluster_ids, - HypernodeID& contracted_nodes, - ds::FixedVertexSupport& fixed_vertices) { + template + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE bool + matchVertices(const Hypergraph &hypergraph, const HypernodeID u, const HypernodeID v, + parallel::scalable_vector &cluster_ids, + HypernodeID &contracted_nodes, + ds::FixedVertexSupport &fixed_vertices) + { ASSERT(u < hypergraph.initialNumNodes()); ASSERT(v < hypergraph.initialNumNodes()); uint8_t unmatched = STATE(MatchingState::UNMATCHED); @@ -372,39 +410,50 @@ class MultilevelCoarsener : public ICoarsener, bool success = false; const HypernodeWeight weight_u = hypergraph.nodeWeight(u); HypernodeWeight weight_v = _cluster_weight[v]; - if ( weight_u + weight_v <= _context.coarsening.max_allowed_node_weight ) { + if(weight_u + weight_v <= _context.coarsening.max_allowed_node_weight) + { - if ( _matching_state[u].compare_exchange_strong(unmatched, match_in_progress) ) { + if(_matching_state[u].compare_exchange_strong(unmatched, match_in_progress)) + { _matching_partner[u] = v; // Current thread gets "ownership" for vertex u. Only threads with "ownership" // can change the cluster id of a vertex. uint8_t matching_state_v = _matching_state[v].load(); - if ( matching_state_v == STATE(MatchingState::MATCHED) ) { + if(matching_state_v == STATE(MatchingState::MATCHED)) + { // Vertex v is already matched and will not change it cluster id any more. - // In that case, it is safe to set the cluster id of u to the cluster id of v. + // In that case, it is safe to set the cluster id of u to the cluster id + // of v. const HypernodeID rep = cluster_ids[v]; ASSERT(_matching_state[rep] == STATE(MatchingState::MATCHED)); - success = joinCluster(hypergraph, - u, rep, cluster_ids, contracted_nodes, fixed_vertices); - } else if ( _matching_state[v].compare_exchange_strong(unmatched, match_in_progress) ) { - // Current thread has the "ownership" for u and v and can change the cluster id - // of both vertices thread-safe. - success = joinCluster(hypergraph, - u, v, cluster_ids, contracted_nodes, fixed_vertices); + success = joinCluster(hypergraph, u, rep, cluster_ids, + contracted_nodes, fixed_vertices); + } + else if(_matching_state[v].compare_exchange_strong(unmatched, match_in_progress)) + { + // Current thread has the "ownership" for u and v and can change the + // cluster id of both vertices thread-safe. + success = joinCluster(hypergraph, u, v, cluster_ids, + contracted_nodes, fixed_vertices); _matching_state[v] = STATE(MatchingState::MATCHED); - } else { - // State of v must be either MATCHING_IN_PROGRESS or an other thread changed the state - // in the meantime to MATCHED. We have to wait until the state of v changed to - // MATCHED or resolve the conflict if u is matched within a cyclic matching dependency + } + else + { + // State of v must be either MATCHING_IN_PROGRESS or an other thread + // changed the state in the meantime to MATCHED. We have to wait until the + // state of v changed to MATCHED or resolve the conflict if u is matched + // within a cyclic matching dependency // Conflict Resolution - while ( _matching_state[v] == STATE(MatchingState::MATCHING_IN_PROGRESS) ) { + while(_matching_state[v] == STATE(MatchingState::MATCHING_IN_PROGRESS)) + { // Check if current vertex is in a cyclic matching dependency HypernodeID cur_u = u; HypernodeID smallest_node_id_in_cycle = cur_u; - while ( _matching_partner[cur_u] != u && _matching_partner[cur_u] != cur_u ) { + while(_matching_partner[cur_u] != u && _matching_partner[cur_u] != cur_u) + { cur_u = _matching_partner[cur_u]; smallest_node_id_in_cycle = std::min(smallest_node_id_in_cycle, cur_u); } @@ -412,9 +461,10 @@ class MultilevelCoarsener : public ICoarsener, // Resolve cyclic matching dependency // Vertex with smallest id starts to resolve conflict const bool is_in_cyclic_dependency = _matching_partner[cur_u] == u; - if ( is_in_cyclic_dependency && u == smallest_node_id_in_cycle) { - success = joinCluster(hypergraph, - u, v, cluster_ids, contracted_nodes, fixed_vertices); + if(is_in_cyclic_dependency && u == smallest_node_id_in_cycle) + { + success = joinCluster(hypergraph, u, v, cluster_ids, + contracted_nodes, fixed_vertices); _matching_state[v] = STATE(MatchingState::MATCHED); } } @@ -422,11 +472,12 @@ class MultilevelCoarsener : public ICoarsener, // If u is still in state MATCHING_IN_PROGRESS its matching partner v // must be matched in the meantime with an other vertex. Therefore, // we try to match u with the representative v's cluster. - if ( _matching_state[u] == STATE(MatchingState::MATCHING_IN_PROGRESS) ) { - ASSERT( _matching_state[v] == STATE(MatchingState::MATCHED) ); + if(_matching_state[u] == STATE(MatchingState::MATCHING_IN_PROGRESS)) + { + ASSERT(_matching_state[v] == STATE(MatchingState::MATCHED)); const HypernodeID rep = cluster_ids[v]; - success = joinCluster(hypergraph, - u, rep, cluster_ids, contracted_nodes, fixed_vertices); + success = joinCluster(hypergraph, u, rep, cluster_ids, + contracted_nodes, fixed_vertices); } } _rater.markAsMatched(u); @@ -438,25 +489,27 @@ class MultilevelCoarsener : public ICoarsener, return success; } - template - bool joinCluster(const Hypergraph& hypergraph, - const HypernodeID u, - const HypernodeID rep, - vec& cluster_ids, - HypernodeID& contracted_nodes, - ds::FixedVertexSupport& fixed_vertices) { + template + bool joinCluster(const Hypergraph &hypergraph, const HypernodeID u, + const HypernodeID rep, vec &cluster_ids, + HypernodeID &contracted_nodes, + ds::FixedVertexSupport &fixed_vertices) + { ASSERT(rep == cluster_ids[rep]); bool success = false; const HypernodeWeight weight_of_u = hypergraph.nodeWeight(u); const HypernodeWeight weight_of_rep = _cluster_weight[rep]; bool cluster_join_operation_allowed = - weight_of_u + weight_of_rep <= _context.coarsening.max_allowed_node_weight; - if constexpr ( has_fixed_vertices ) { - if ( cluster_join_operation_allowed ) { + weight_of_u + weight_of_rep <= _context.coarsening.max_allowed_node_weight; + if constexpr(has_fixed_vertices) + { + if(cluster_join_operation_allowed) + { cluster_join_operation_allowed = fixed_vertices.contract(rep, u); } } - if ( cluster_join_operation_allowed ) { + if(cluster_join_operation_allowed) + { cluster_ids[u] = rep; _cluster_weight[rep] += weight_of_u; ++contracted_nodes; @@ -467,30 +520,38 @@ class MultilevelCoarsener : public ICoarsener, return success; } - HypernodeID currentNumberOfNodesImpl() const override { + HypernodeID currentNumberOfNodesImpl() const override + { return Base::currentNumNodes(); } - mt_kahypar_hypergraph_t coarsestHypergraphImpl() override { - return mt_kahypar_hypergraph_t { - reinterpret_cast( - &Base::currentHypergraph()), Hypergraph::TYPE }; + mt_kahypar_hypergraph_t coarsestHypergraphImpl() override + { + return mt_kahypar_hypergraph_t{ reinterpret_cast( + &Base::currentHypergraph()), + Hypergraph::TYPE }; } - mt_kahypar_partitioned_hypergraph_t coarsestPartitionedHypergraphImpl() override { - return mt_kahypar_partitioned_hypergraph_t { - reinterpret_cast( - &Base::currentPartitionedHypergraph()), PartitionedHypergraph::TYPE }; + mt_kahypar_partitioned_hypergraph_t coarsestPartitionedHypergraphImpl() override + { + return mt_kahypar_partitioned_hypergraph_t{ + reinterpret_cast( + &Base::currentPartitionedHypergraph()), + PartitionedHypergraph::TYPE + }; } - HypernodeID hierarchyContractionLimit(const Hypergraph& hypergraph) const { - return std::max( static_cast( static_cast(hypergraph.initialNumNodes() - - hypergraph.numRemovedHypernodes()) / _context.coarsening.maximum_shrink_factor ), - _context.coarsening.contraction_limit ); + HypernodeID hierarchyContractionLimit(const Hypergraph &hypergraph) const + { + return std::max( + static_cast(static_cast(hypergraph.initialNumNodes() - + hypergraph.numRemovedHypernodes()) / + _context.coarsening.maximum_shrink_factor), + _context.coarsening.contraction_limit); } - using Base::_hg; using Base::_context; + using Base::_hg; using Base::_timer; using Base::_uncoarseningData; Rater _rater; @@ -504,4 +565,4 @@ class MultilevelCoarsener : public ICoarsener, bool _enable_randomization; }; -} // namespace mt_kahypar +} // namespace mt_kahypar diff --git a/mt-kahypar/partition/coarsening/multilevel_coarsener_base.h b/mt-kahypar/partition/coarsening/multilevel_coarsener_base.h index 7f950f350..b2675e3a0 100644 --- a/mt-kahypar/partition/coarsening/multilevel_coarsener_base.h +++ b/mt-kahypar/partition/coarsening/multilevel_coarsener_base.h @@ -27,66 +27,74 @@ #pragma once -#include "mt-kahypar/partition/context.h" -#include "mt-kahypar/partition/refinement/i_refiner.h" #include "mt-kahypar/parallel/stl/scalable_vector.h" #include "mt-kahypar/partition/coarsening/coarsening_commons.h" +#include "mt-kahypar/partition/context.h" +#include "mt-kahypar/partition/refinement/i_refiner.h" #include "mt-kahypar/utils/timer.h" - namespace mt_kahypar { -template -class MultilevelCoarsenerBase { - private: - +template +class MultilevelCoarsenerBase +{ +private: static constexpr bool debug = false; using Hypergraph = typename TypeTraits::Hypergraph; using PartitionedHypergraph = typename TypeTraits::PartitionedHypergraph; - public: - MultilevelCoarsenerBase(Hypergraph& hypergraph, - const Context& context, - UncoarseningData& uncoarseningData) : - _hg(hypergraph), - _context(context), - _timer(utils::Utilities::instance().getTimer(context.utility_id)), - _uncoarseningData(uncoarseningData) {} +public: + MultilevelCoarsenerBase(Hypergraph &hypergraph, const Context &context, + UncoarseningData &uncoarseningData) : + _hg(hypergraph), + _context(context), + _timer(utils::Utilities::instance().getTimer(context.utility_id)), + _uncoarseningData(uncoarseningData) + { + } - MultilevelCoarsenerBase(const MultilevelCoarsenerBase&) = delete; - MultilevelCoarsenerBase(MultilevelCoarsenerBase&&) = delete; - MultilevelCoarsenerBase & operator= (const MultilevelCoarsenerBase &) = delete; - MultilevelCoarsenerBase & operator= (MultilevelCoarsenerBase &&) = delete; + MultilevelCoarsenerBase(const MultilevelCoarsenerBase &) = delete; + MultilevelCoarsenerBase(MultilevelCoarsenerBase &&) = delete; + MultilevelCoarsenerBase &operator=(const MultilevelCoarsenerBase &) = delete; + MultilevelCoarsenerBase &operator=(MultilevelCoarsenerBase &&) = delete; virtual ~MultilevelCoarsenerBase() = default; - protected: - - HypernodeID currentNumNodes() const { - if ( _uncoarseningData.hierarchy.empty() ) { +protected: + HypernodeID currentNumNodes() const + { + if(_uncoarseningData.hierarchy.empty()) + { return _hg.initialNumNodes(); - } else { + } + else + { return _uncoarseningData.hierarchy.back().contractedHypergraph().initialNumNodes(); } } - Hypergraph& currentHypergraph() { - if ( _uncoarseningData.hierarchy.empty() ) { + Hypergraph ¤tHypergraph() + { + if(_uncoarseningData.hierarchy.empty()) + { return _hg; - } else { + } + else + { return _uncoarseningData.hierarchy.back().contractedHypergraph(); } } - PartitionedHypergraph& currentPartitionedHypergraph() { + PartitionedHypergraph ¤tPartitionedHypergraph() + { ASSERT(_uncoarseningData.is_finalized); return *_uncoarseningData.partitioned_hg; - } + } - protected: - Hypergraph& _hg; - const Context& _context; - utils::Timer& _timer; - UncoarseningData& _uncoarseningData; +protected: + Hypergraph &_hg; + const Context &_context; + utils::Timer &_timer; + UncoarseningData &_uncoarseningData; }; -} // namespace mt_kahypar +} // namespace mt_kahypar diff --git a/mt-kahypar/partition/coarsening/multilevel_uncoarsener.cpp b/mt-kahypar/partition/coarsening/multilevel_uncoarsener.cpp index 39197062f..5c16eaaa4 100644 --- a/mt-kahypar/partition/coarsening/multilevel_uncoarsener.cpp +++ b/mt-kahypar/partition/coarsening/multilevel_uncoarsener.cpp @@ -30,241 +30,300 @@ #include "mt-kahypar/definitions.h" #include "mt-kahypar/io/partitioning_output.h" -#include "mt-kahypar/partition/refinement/i_refiner.h" #include "mt-kahypar/partition/metrics.h" -#include "mt-kahypar/utils/stats.h" +#include "mt-kahypar/partition/refinement/i_refiner.h" #include "mt-kahypar/utils/cast.h" +#include "mt-kahypar/utils/stats.h" namespace mt_kahypar { - template - void MultilevelUncoarsener::initializeImpl() { - PartitionedHypergraph& partitioned_hg = *_uncoarseningData.partitioned_hg; - _current_metrics = Base::initializeMetrics(partitioned_hg); - Base::initializeRefinementAlgorithms(); +template +void MultilevelUncoarsener::initializeImpl() +{ + PartitionedHypergraph &partitioned_hg = *_uncoarseningData.partitioned_hg; + _current_metrics = Base::initializeMetrics(partitioned_hg); + Base::initializeRefinementAlgorithms(); - if (_context.type == ContextType::main) { - _context.initial_km1 = _current_metrics.quality; - } + if(_context.type == ContextType::main) + { + _context.initial_km1 = _current_metrics.quality; + } - // Enable progress bar if verbose output is enabled - if ( _context.partition.verbose_output && _context.partition.enable_progress_bar && !debug ) { - _progress.enable(); - _progress.setObjective(_current_metrics.quality); - } + // Enable progress bar if verbose output is enabled + if(_context.partition.verbose_output && _context.partition.enable_progress_bar && + !debug) + { + _progress.enable(); + _progress.setObjective(_current_metrics.quality); + } - // Pass target graph to partitioned hypergraph - partitioned_hg.setTargetGraph(_target_graph); + // Pass target graph to partitioned hypergraph + partitioned_hg.setTargetGraph(_target_graph); - _current_level = _uncoarseningData.hierarchy.size(); - _num_levels = _current_level; - } + _current_level = _uncoarseningData.hierarchy.size(); + _num_levels = _current_level; +} - template - bool MultilevelUncoarsener::isTopLevelImpl() const { - return _current_level < 0; - } +template +bool MultilevelUncoarsener::isTopLevelImpl() const +{ + return _current_level < 0; +} - template - void MultilevelUncoarsener::projectToNextLevelAndRefineImpl() { - PartitionedHypergraph& partitioned_hg = *_uncoarseningData.partitioned_hg; - if ( _current_level == _num_levels ) { - // We always start with a refinement pass on the smallest hypergraph. - // The next calls to this function will then project the partition to the next level - // and perform refinement until we reach the input hypergraph. - IUncoarsener::refine(); - _progress.setObjective(_current_metrics.quality); - _progress += partitioned_hg.initialNumNodes(); - } else { - ASSERT(_current_level >= 0); - // Project partition to the hypergraph on the next level of the hierarchy - _timer.start_timer("projecting_partition", "Projecting Partition"); - const size_t num_nodes_on_previous_level = partitioned_hg.initialNumNodes(); - if (_current_level == 0) { - partitioned_hg.setHypergraph(_hg); - } else { - partitioned_hg.setHypergraph((_uncoarseningData.hierarchy)[_current_level-1].contractedHypergraph()); - } - // Hypergraph stores partition from previous level. - // We now extract the block ids and reset the partition to reuse the - // data structure for the current level. - partitioned_hg.extractPartIDs(_block_ids); - partitioned_hg.resetData(); - GainCachePtr::resetGainCache(_gain_cache); - - // Assign nodes of current level to their corresponding representative of the previous level - partitioned_hg.doParallelForAllNodes([&](const HypernodeID hn) { - const HypernodeID coarse_hn = (_uncoarseningData.hierarchy)[_current_level].mapToContractedHypergraph(hn); - const PartitionID block = _block_ids[coarse_hn]; - ASSERT(block != kInvalidPartition && block < partitioned_hg.k()); - partitioned_hg.setOnlyNodePart(hn, block); - }); - partitioned_hg.initializePartition(); - _timer.stop_timer("projecting_partition"); - - // Improve partition - IUncoarsener::refine(); - - // Update Progress Bar - _progress.setObjective(_current_metrics.quality); - _progress += partitioned_hg.initialNumNodes() - num_nodes_on_previous_level; +template +void MultilevelUncoarsener::projectToNextLevelAndRefineImpl() +{ + PartitionedHypergraph &partitioned_hg = *_uncoarseningData.partitioned_hg; + if(_current_level == _num_levels) + { + // We always start with a refinement pass on the smallest hypergraph. + // The next calls to this function will then project the partition to the next level + // and perform refinement until we reach the input hypergraph. + IUncoarsener::refine(); + _progress.setObjective(_current_metrics.quality); + _progress += partitioned_hg.initialNumNodes(); + } + else + { + ASSERT(_current_level >= 0); + // Project partition to the hypergraph on the next level of the hierarchy + _timer.start_timer("projecting_partition", "Projecting Partition"); + const size_t num_nodes_on_previous_level = partitioned_hg.initialNumNodes(); + if(_current_level == 0) + { + partitioned_hg.setHypergraph(_hg); + } + else + { + partitioned_hg.setHypergraph( + (_uncoarseningData.hierarchy)[_current_level - 1].contractedHypergraph()); } + // Hypergraph stores partition from previous level. + // We now extract the block ids and reset the partition to reuse the + // data structure for the current level. + partitioned_hg.extractPartIDs(_block_ids); + partitioned_hg.resetData(); + GainCachePtr::resetGainCache(_gain_cache); + + // Assign nodes of current level to their corresponding representative of the previous + // level + partitioned_hg.doParallelForAllNodes([&](const HypernodeID hn) { + const HypernodeID coarse_hn = + (_uncoarseningData.hierarchy)[_current_level].mapToContractedHypergraph(hn); + const PartitionID block = _block_ids[coarse_hn]; + ASSERT(block != kInvalidPartition && block < partitioned_hg.k()); + partitioned_hg.setOnlyNodePart(hn, block); + }); + partitioned_hg.initializePartition(); + _timer.stop_timer("projecting_partition"); + + // Improve partition + IUncoarsener::refine(); + + // Update Progress Bar + _progress.setObjective(_current_metrics.quality); + _progress += partitioned_hg.initialNumNodes() - num_nodes_on_previous_level; + } - ASSERT(metrics::quality(*_uncoarseningData.partitioned_hg, _context) == _current_metrics.quality, - V(_current_metrics.quality) << V(metrics::quality(*_uncoarseningData.partitioned_hg, _context))); + ASSERT(metrics::quality(*_uncoarseningData.partitioned_hg, _context) == + _current_metrics.quality, + V(_current_metrics.quality) + << V(metrics::quality(*_uncoarseningData.partitioned_hg, _context))); - --_current_level; - } + --_current_level; +} - template - void MultilevelUncoarsener::rebalancingImpl() { - // If we reach the top-level hypergraph and the partition is still imbalanced, - // we use a rebalancing algorithm to restore balance. - if (_context.type == ContextType::main && !metrics::isBalanced(*_uncoarseningData.partitioned_hg, _context)) { - const HyperedgeWeight quality_before = _current_metrics.quality; - if (_context.partition.verbose_output) { - LOG << RED << "Partition is imbalanced (Current Imbalance:" - << metrics::imbalance(*_uncoarseningData.partitioned_hg, _context) << ")" << END; - - LOG << "Part weights: (violations in red)"; - io::printPartWeightsAndSizes(*_uncoarseningData.partitioned_hg, _context); - } +template +void MultilevelUncoarsener::rebalancingImpl() +{ + // If we reach the top-level hypergraph and the partition is still imbalanced, + // we use a rebalancing algorithm to restore balance. + if(_context.type == ContextType::main && + !metrics::isBalanced(*_uncoarseningData.partitioned_hg, _context)) + { + const HyperedgeWeight quality_before = _current_metrics.quality; + if(_context.partition.verbose_output) + { + LOG << RED << "Partition is imbalanced (Current Imbalance:" + << metrics::imbalance(*_uncoarseningData.partitioned_hg, _context) << ")" + << END; + + LOG << "Part weights: (violations in red)"; + io::printPartWeightsAndSizes(*_uncoarseningData.partitioned_hg, _context); + } - if ( !_context.partition.deterministic ) { - if (_context.partition.verbose_output) { - LOG << RED << "Start rebalancing!" << END; - } + if(!_context.partition.deterministic) + { + if(_context.partition.verbose_output) + { + LOG << RED << "Start rebalancing!" << END; + } - // Preform rebalancing - _timer.start_timer("rebalance", "Rebalance"); - mt_kahypar_partitioned_hypergraph_t phg = + // Preform rebalancing + _timer.start_timer("rebalance", "Rebalance"); + mt_kahypar_partitioned_hypergraph_t phg = utils::partitioned_hg_cast(*_uncoarseningData.partitioned_hg); - _rebalancer->refine(phg, {}, _current_metrics, 0.0); - _timer.stop_timer("rebalance"); - - const HyperedgeWeight quality_after = _current_metrics.quality; - if (_context.partition.verbose_output) { - const HyperedgeWeight quality_delta = quality_after - quality_before; - if (quality_delta > 0) { - LOG << RED << "Rebalancer decreased solution quality by" << quality_delta - << "(Current Imbalance:" << metrics::imbalance(*_uncoarseningData.partitioned_hg, _context) << ")" << END; - } else { - LOG << GREEN << "Rebalancer improves solution quality by" << abs(quality_delta) - << "(Current Imbalance:" << metrics::imbalance(*_uncoarseningData.partitioned_hg, _context) << ")" << END; - } + _rebalancer->refine(phg, {}, _current_metrics, 0.0); + _timer.stop_timer("rebalance"); + + const HyperedgeWeight quality_after = _current_metrics.quality; + if(_context.partition.verbose_output) + { + const HyperedgeWeight quality_delta = quality_after - quality_before; + if(quality_delta > 0) + { + LOG << RED << "Rebalancer decreased solution quality by" << quality_delta + << "(Current Imbalance:" + << metrics::imbalance(*_uncoarseningData.partitioned_hg, _context) << ")" + << END; } - } else { - if (_context.partition.verbose_output) { - LOG << RED << "Skip rebalancing since deterministic mode is activated" << END; + else + { + LOG << GREEN << "Rebalancer improves solution quality by" << abs(quality_delta) + << "(Current Imbalance:" + << metrics::imbalance(*_uncoarseningData.partitioned_hg, _context) << ")" + << END; } } - - - ASSERT(metrics::quality(*_uncoarseningData.partitioned_hg, _context) == _current_metrics.quality, - V(_current_metrics.quality) << V(metrics::quality(*_uncoarseningData.partitioned_hg, _context))); } - } - - template - HyperedgeWeight MultilevelUncoarsener::getObjectiveImpl() const { - return _current_metrics.quality; - } - - template - void MultilevelUncoarsener::updateMetricsImpl() { - _current_metrics = Base::initializeMetrics(*_uncoarseningData.partitioned_hg); - _progress.setObjective(_current_metrics.quality); - } - - template - typename TypeTraits::PartitionedHypergraph& MultilevelUncoarsener::currentPartitionedHypergraphImpl() { - return *_uncoarseningData.partitioned_hg; - } + else + { + if(_context.partition.verbose_output) + { + LOG << RED << "Skip rebalancing since deterministic mode is activated" << END; + } + } - template - HypernodeID MultilevelUncoarsener::currentNumberOfNodesImpl() const { - return _uncoarseningData.partitioned_hg->initialNumNodes(); + ASSERT(metrics::quality(*_uncoarseningData.partitioned_hg, _context) == + _current_metrics.quality, + V(_current_metrics.quality) + << V(metrics::quality(*_uncoarseningData.partitioned_hg, _context))); } +} - template - typename TypeTraits::PartitionedHypergraph&& MultilevelUncoarsener::movePartitionedHypergraphImpl() { - ASSERT(isTopLevelImpl()); - return std::move(*_uncoarseningData.partitioned_hg); - } +template +HyperedgeWeight MultilevelUncoarsener::getObjectiveImpl() const +{ + return _current_metrics.quality; +} - template - void MultilevelUncoarsener::refineImpl() { - PartitionedHypergraph& partitioned_hypergraph = *_uncoarseningData.partitioned_hg; - const double time_limit = Base::refinementTimeLimit(_context, (_uncoarseningData.hierarchy)[_current_level].coarseningTime()); +template +void MultilevelUncoarsener::updateMetricsImpl() +{ + _current_metrics = Base::initializeMetrics(*_uncoarseningData.partitioned_hg); + _progress.setObjective(_current_metrics.quality); +} - if ( debug && _context.type == ContextType::main ) { - io::printHypergraphInfo(partitioned_hypergraph.hypergraph(), - _context, "Refinement Hypergraph", false); - DBG << "Start Refinement -" << _context.partition.objective << "=" << _current_metrics.quality - << ", imbalance = " << _current_metrics.imbalance; - } +template +typename TypeTraits::PartitionedHypergraph & +MultilevelUncoarsener::currentPartitionedHypergraphImpl() +{ + return *_uncoarseningData.partitioned_hg; +} - parallel::scalable_vector dummy; - bool improvement_found = true; - mt_kahypar_partitioned_hypergraph_t phg = utils::partitioned_hg_cast(partitioned_hypergraph); - while( improvement_found ) { - improvement_found = false; - const HyperedgeWeight metric_before = _current_metrics.quality; +template +HypernodeID MultilevelUncoarsener::currentNumberOfNodesImpl() const +{ + return _uncoarseningData.partitioned_hg->initialNumNodes(); +} - if ( _rebalancer && _context.refinement.rebalancer != RebalancingAlgorithm::do_nothing ) { - _rebalancer->initialize(phg); - } +template +typename TypeTraits::PartitionedHypergraph && +MultilevelUncoarsener::movePartitionedHypergraphImpl() +{ + ASSERT(isTopLevelImpl()); + return std::move(*_uncoarseningData.partitioned_hg); +} - if ( _label_propagation && _context.refinement.label_propagation.algorithm != LabelPropagationAlgorithm::do_nothing ) { - _timer.start_timer("initialize_lp_refiner", "Initialize LP Refiner"); - _label_propagation->initialize(phg); - _timer.stop_timer("initialize_lp_refiner"); +template +void MultilevelUncoarsener::refineImpl() +{ + PartitionedHypergraph &partitioned_hypergraph = *_uncoarseningData.partitioned_hg; + const double time_limit = Base::refinementTimeLimit( + _context, (_uncoarseningData.hierarchy)[_current_level].coarseningTime()); + + if(debug && _context.type == ContextType::main) + { + io::printHypergraphInfo(partitioned_hypergraph.hypergraph(), _context, + "Refinement Hypergraph", false); + DBG << "Start Refinement -" << _context.partition.objective << "=" + << _current_metrics.quality << ", imbalance = " << _current_metrics.imbalance; + } - _timer.start_timer("label_propagation", "Label Propagation"); - improvement_found |= _label_propagation->refine(phg, dummy, _current_metrics, time_limit); - _timer.stop_timer("label_propagation"); - } + parallel::scalable_vector dummy; + bool improvement_found = true; + mt_kahypar_partitioned_hypergraph_t phg = + utils::partitioned_hg_cast(partitioned_hypergraph); + while(improvement_found) + { + improvement_found = false; + const HyperedgeWeight metric_before = _current_metrics.quality; + + if(_rebalancer && _context.refinement.rebalancer != RebalancingAlgorithm::do_nothing) + { + _rebalancer->initialize(phg); + } - if ( _fm && _context.refinement.fm.algorithm != FMAlgorithm::do_nothing ) { - _timer.start_timer("initialize_fm_refiner", "Initialize FM Refiner"); - _fm->initialize(phg); - _timer.stop_timer("initialize_fm_refiner"); + if(_label_propagation && _context.refinement.label_propagation.algorithm != + LabelPropagationAlgorithm::do_nothing) + { + _timer.start_timer("initialize_lp_refiner", "Initialize LP Refiner"); + _label_propagation->initialize(phg); + _timer.stop_timer("initialize_lp_refiner"); + + _timer.start_timer("label_propagation", "Label Propagation"); + improvement_found |= + _label_propagation->refine(phg, dummy, _current_metrics, time_limit); + _timer.stop_timer("label_propagation"); + } - _timer.start_timer("fm", "FM"); - improvement_found |= _fm->refine(phg, dummy, _current_metrics, time_limit); - _timer.stop_timer("fm"); - } + if(_fm && _context.refinement.fm.algorithm != FMAlgorithm::do_nothing) + { + _timer.start_timer("initialize_fm_refiner", "Initialize FM Refiner"); + _fm->initialize(phg); + _timer.stop_timer("initialize_fm_refiner"); - if ( _flows && _context.refinement.flows.algorithm != FlowAlgorithm::do_nothing ) { - _timer.start_timer("initialize_flow_scheduler", "Initialize Flow Scheduler"); - _flows->initialize(phg); - _timer.stop_timer("initialize_flow_scheduler"); + _timer.start_timer("fm", "FM"); + improvement_found |= _fm->refine(phg, dummy, _current_metrics, time_limit); + _timer.stop_timer("fm"); + } - _timer.start_timer("flow_refinement_scheduler", "Flow Refinement Scheduler"); - improvement_found |= _flows->refine(phg, dummy, _current_metrics, time_limit); - _timer.stop_timer("flow_refinement_scheduler"); - } + if(_flows && _context.refinement.flows.algorithm != FlowAlgorithm::do_nothing) + { + _timer.start_timer("initialize_flow_scheduler", "Initialize Flow Scheduler"); + _flows->initialize(phg); + _timer.stop_timer("initialize_flow_scheduler"); - if ( _context.type == ContextType::main ) { - ASSERT(_current_metrics.quality == metrics::quality(partitioned_hypergraph, _context), - "Actual metric" << V(metrics::quality(partitioned_hypergraph, _context)) << - "does not match the metric updated by the refiners" << V(_current_metrics.quality)); - } + _timer.start_timer("flow_refinement_scheduler", "Flow Refinement Scheduler"); + improvement_found |= _flows->refine(phg, dummy, _current_metrics, time_limit); + _timer.stop_timer("flow_refinement_scheduler"); + } - const HyperedgeWeight metric_after = _current_metrics.quality; - const double relative_improvement = 1.0 - - static_cast(metric_after) / metric_before; - if ( !_context.refinement.refine_until_no_improvement || - relative_improvement <= _context.refinement.relative_improvement_threshold ) { - break; - } + if(_context.type == ContextType::main) + { + ASSERT(_current_metrics.quality == + metrics::quality(partitioned_hypergraph, _context), + "Actual metric" << V(metrics::quality(partitioned_hypergraph, _context)) + << "does not match the metric updated by the refiners" + << V(_current_metrics.quality)); } - if ( _context.type == ContextType::main) { - DBG << "--------------------------------------------------\n"; + const HyperedgeWeight metric_after = _current_metrics.quality; + const double relative_improvement = + 1.0 - static_cast(metric_after) / metric_before; + if(!_context.refinement.refine_until_no_improvement || + relative_improvement <= _context.refinement.relative_improvement_threshold) + { + break; } } - INSTANTIATE_CLASS_WITH_TYPE_TRAITS(MultilevelUncoarsener) + if(_context.type == ContextType::main) + { + DBG << "--------------------------------------------------\n"; + } +} + +INSTANTIATE_CLASS_WITH_TYPE_TRAITS(MultilevelUncoarsener) } diff --git a/mt-kahypar/partition/coarsening/multilevel_uncoarsener.h b/mt-kahypar/partition/coarsening/multilevel_uncoarsener.h index 9ee6275d0..83c6426ef 100644 --- a/mt-kahypar/partition/coarsening/multilevel_uncoarsener.h +++ b/mt-kahypar/partition/coarsening/multilevel_uncoarsener.h @@ -28,10 +28,10 @@ #pragma once -#include "mt-kahypar/partition/context.h" #include "mt-kahypar/partition/coarsening/coarsening_commons.h" #include "mt-kahypar/partition/coarsening/i_uncoarsener.h" #include "mt-kahypar/partition/coarsening/uncoarsener_base.h" +#include "mt-kahypar/partition/context.h" #include "mt-kahypar/utils/progress_bar.h" namespace mt_kahypar { @@ -39,9 +39,10 @@ namespace mt_kahypar { // Forward Declaration class TargetGraph; -template +template class MultilevelUncoarsener : public IUncoarsener, - private UncoarsenerBase { + private UncoarsenerBase +{ using Base = UncoarsenerBase; using Hypergraph = typename TypeTraits::Hypergraph; @@ -50,25 +51,23 @@ class MultilevelUncoarsener : public IUncoarsener, static constexpr bool debug = false; static constexpr bool enable_heavy_assert = false; - public: - MultilevelUncoarsener(Hypergraph& hypergraph, - const Context& context, - UncoarseningData& uncoarseningData, - const TargetGraph* target_graph) : +public: + MultilevelUncoarsener(Hypergraph &hypergraph, const Context &context, + UncoarseningData &uncoarseningData, + const TargetGraph *target_graph) : Base(hypergraph, context, uncoarseningData), - _target_graph(target_graph), - _current_level(0), - _num_levels(0), - _block_ids(hypergraph.initialNumNodes(), kInvalidPartition), - _current_metrics(), - _progress(hypergraph.initialNumNodes(), 0, false) { } - - MultilevelUncoarsener(const MultilevelUncoarsener&) = delete; - MultilevelUncoarsener(MultilevelUncoarsener&&) = delete; - MultilevelUncoarsener & operator= (const MultilevelUncoarsener &) = delete; - MultilevelUncoarsener & operator= (MultilevelUncoarsener &&) = delete; - - private: + _target_graph(target_graph), _current_level(0), _num_levels(0), + _block_ids(hypergraph.initialNumNodes(), kInvalidPartition), _current_metrics(), + _progress(hypergraph.initialNumNodes(), 0, false) + { + } + + MultilevelUncoarsener(const MultilevelUncoarsener &) = delete; + MultilevelUncoarsener(MultilevelUncoarsener &&) = delete; + MultilevelUncoarsener &operator=(const MultilevelUncoarsener &) = delete; + MultilevelUncoarsener &operator=(MultilevelUncoarsener &&) = delete; + +private: void initializeImpl() override; bool isTopLevelImpl() const override; @@ -79,31 +78,29 @@ class MultilevelUncoarsener : public IUncoarsener, void rebalancingImpl() override; - gain_cache_t getGainCacheImpl() override { - return _gain_cache; - } + gain_cache_t getGainCacheImpl() override { return _gain_cache; } HyperedgeWeight getObjectiveImpl() const override; void updateMetricsImpl() override; - PartitionedHypergraph& currentPartitionedHypergraphImpl() override; + PartitionedHypergraph ¤tPartitionedHypergraphImpl() override; HypernodeID currentNumberOfNodesImpl() const override; - PartitionedHypergraph&& movePartitionedHypergraphImpl() override; + PartitionedHypergraph &&movePartitionedHypergraphImpl() override; - using Base::_hg; using Base::_context; - using Base::_uncoarseningData; + using Base::_flows; + using Base::_fm; using Base::_gain_cache; + using Base::_hg; using Base::_label_propagation; - using Base::_fm; - using Base::_flows; using Base::_rebalancer; using Base::_timer; + using Base::_uncoarseningData; - const TargetGraph* _target_graph; + const TargetGraph *_target_graph; int _current_level; int _num_levels; ds::Array _block_ids; diff --git a/mt-kahypar/partition/coarsening/multilevel_vertex_pair_rater.h b/mt-kahypar/partition/coarsening/multilevel_vertex_pair_rater.h index b8a11223f..9f18cc4c5 100644 --- a/mt-kahypar/partition/coarsening/multilevel_vertex_pair_rater.h +++ b/mt-kahypar/partition/coarsening/multilevel_vertex_pair_rater.h @@ -38,49 +38,54 @@ #include "kahypar-resources/meta/mandatory.h" #include "mt-kahypar/datastructures/sparse_map.h" -#include "mt-kahypar/partition/context.h" #include "mt-kahypar/partition/coarsening/policies/rating_fixed_vertex_acceptance_policy.h" - +#include "mt-kahypar/partition/context.h" namespace mt_kahypar { -template -class MultilevelVertexPairRater { +class MultilevelVertexPairRater +{ using LargeTmpRatingMap = ds::SparseMap; using CacheEfficientRatingMap = ds::FixedSizeSparseMap; - using ThreadLocalCacheEfficientRatingMap = tbb::enumerable_thread_specific; - using ThreadLocalVertexDegreeBoundedRatingMap = tbb::enumerable_thread_specific; + using ThreadLocalCacheEfficientRatingMap = + tbb::enumerable_thread_specific; + using ThreadLocalVertexDegreeBoundedRatingMap = + tbb::enumerable_thread_specific; using ThreadLocalLargeTmpRatingMap = tbb::enumerable_thread_specific; - using ThreadLocalFastResetFlagArray = tbb::enumerable_thread_specific >; + using ThreadLocalFastResetFlagArray = + tbb::enumerable_thread_specific >; - private: +private: static constexpr bool debug = false; - class VertexPairRating { - public: + class VertexPairRating + { + public: VertexPairRating(HypernodeID trgt, RatingType val, bool is_valid) : - target(trgt), - value(val), - valid(is_valid) { } + target(trgt), value(val), valid(is_valid) + { + } VertexPairRating() : - target(std::numeric_limits::max()), - value(std::numeric_limits::min()), - valid(false) { } + target(std::numeric_limits::max()), + value(std::numeric_limits::min()), valid(false) + { + } - VertexPairRating(const VertexPairRating&) = delete; - VertexPairRating & operator= (const VertexPairRating &) = delete; + VertexPairRating(const VertexPairRating &) = delete; + VertexPairRating &operator=(const VertexPairRating &) = delete; - VertexPairRating(VertexPairRating&&) = default; - VertexPairRating & operator= (VertexPairRating &&) = delete; + VertexPairRating(VertexPairRating &&) = default; + VertexPairRating &operator=(VertexPairRating &&) = delete; HypernodeID target; RatingType value; bool valid; }; - enum class RatingMapType { + enum class RatingMapType + { CACHE_EFFICIENT_RATING_MAP, VERTEX_DEGREE_BOUNDED_RATING_MAP, LARGE_RATING_MAP @@ -88,85 +93,98 @@ class MultilevelVertexPairRater { using AtomicWeight = parallel::IntegralAtomicWrapper; - public: +public: using Rating = VertexPairRating; MultilevelVertexPairRater(const HypernodeID num_hypernodes, - const HypernodeID max_edge_size, - const Context& context) : - _context(context), - _current_num_nodes(num_hypernodes), - _vertex_degree_sampling_threshold(context.coarsening.vertex_degree_sampling_threshold), - _local_cache_efficient_rating_map(0.0), - _local_vertex_degree_bounded_rating_map(3UL * _vertex_degree_sampling_threshold, 0.0), - _local_large_rating_map([&] { - return construct_large_tmp_rating_map(); - }), - // Should give a false positive rate < 1% - _bloom_filter_mask(align_to_next_power_of_two( - std::min(ID(10) * max_edge_size, _current_num_nodes)) - 1), - _local_bloom_filter(_bloom_filter_mask + 1), - _already_matched(num_hypernodes) { } - - MultilevelVertexPairRater(const MultilevelVertexPairRater&) = delete; - MultilevelVertexPairRater & operator= (const MultilevelVertexPairRater &) = delete; - - MultilevelVertexPairRater(MultilevelVertexPairRater&&) = delete; - MultilevelVertexPairRater & operator= (MultilevelVertexPairRater &&) = delete; - - template - VertexPairRating rate(const Hypergraph& hypergraph, - const HypernodeID u, - const parallel::scalable_vector& cluster_ids, - const parallel::scalable_vector& cluster_weight, - const ds::FixedVertexSupport& fixed_vertices, - const HypernodeWeight max_allowed_node_weight) { - - const RatingMapType rating_map_type = getRatingMapTypeForRatingOfHypernode(hypergraph, u); - if ( rating_map_type == RatingMapType::CACHE_EFFICIENT_RATING_MAP ) { - return rate(hypergraph, u, _local_cache_efficient_rating_map.local(), - cluster_ids, cluster_weight, fixed_vertices, max_allowed_node_weight, false); - } else if ( rating_map_type == RatingMapType::VERTEX_DEGREE_BOUNDED_RATING_MAP ) { - return rate(hypergraph, u, _local_vertex_degree_bounded_rating_map.local(), - cluster_ids, cluster_weight, fixed_vertices, max_allowed_node_weight, true); - } else { - LargeTmpRatingMap& large_tmp_rating_map = _local_large_rating_map.local(); + const HypernodeID max_edge_size, const Context &context) : + _context(context), + _current_num_nodes(num_hypernodes), + _vertex_degree_sampling_threshold( + context.coarsening.vertex_degree_sampling_threshold), + _local_cache_efficient_rating_map(0.0), + _local_vertex_degree_bounded_rating_map(3UL * _vertex_degree_sampling_threshold, + 0.0), + _local_large_rating_map([&] { return construct_large_tmp_rating_map(); }), + // Should give a false positive rate < 1% + _bloom_filter_mask(align_to_next_power_of_two( + std::min(ID(10) * max_edge_size, _current_num_nodes)) - + 1), + _local_bloom_filter(_bloom_filter_mask + 1), _already_matched(num_hypernodes) + { + } + + MultilevelVertexPairRater(const MultilevelVertexPairRater &) = delete; + MultilevelVertexPairRater &operator=(const MultilevelVertexPairRater &) = delete; + + MultilevelVertexPairRater(MultilevelVertexPairRater &&) = delete; + MultilevelVertexPairRater &operator=(MultilevelVertexPairRater &&) = delete; + + template + VertexPairRating rate(const Hypergraph &hypergraph, const HypernodeID u, + const parallel::scalable_vector &cluster_ids, + const parallel::scalable_vector &cluster_weight, + const ds::FixedVertexSupport &fixed_vertices, + const HypernodeWeight max_allowed_node_weight) + { + + const RatingMapType rating_map_type = + getRatingMapTypeForRatingOfHypernode(hypergraph, u); + if(rating_map_type == RatingMapType::CACHE_EFFICIENT_RATING_MAP) + { + return rate( + hypergraph, u, _local_cache_efficient_rating_map.local(), cluster_ids, + cluster_weight, fixed_vertices, max_allowed_node_weight, false); + } + else if(rating_map_type == RatingMapType::VERTEX_DEGREE_BOUNDED_RATING_MAP) + { + return rate( + hypergraph, u, _local_vertex_degree_bounded_rating_map.local(), cluster_ids, + cluster_weight, fixed_vertices, max_allowed_node_weight, true); + } + else + { + LargeTmpRatingMap &large_tmp_rating_map = _local_large_rating_map.local(); large_tmp_rating_map.setMaxSize(_current_num_nodes); - return rate(hypergraph, u, large_tmp_rating_map, - cluster_ids, cluster_weight, fixed_vertices, max_allowed_node_weight, false); + return rate(hypergraph, u, large_tmp_rating_map, cluster_ids, + cluster_weight, fixed_vertices, + max_allowed_node_weight, false); } } // ! Several threads will mark matches in parallel. However, since // ! we only set the corresponding value to true this function is // ! thread-safe. - void markAsMatched(const HypernodeID original_id) { + void markAsMatched(const HypernodeID original_id) + { _already_matched.set(original_id, true); } // ! Note, this function is not thread safe - void resetMatches() { - _already_matched.reset(); - } + void resetMatches() { _already_matched.reset(); } - void setCurrentNumberOfNodes(const HypernodeID current_num_nodes) { + void setCurrentNumberOfNodes(const HypernodeID current_num_nodes) + { _current_num_nodes = current_num_nodes; } - private: - template - VertexPairRating rate(const Hypergraph& hypergraph, - const HypernodeID u, - RatingMap& tmp_ratings, - const parallel::scalable_vector& cluster_ids, - const parallel::scalable_vector& cluster_weight, - const ds::FixedVertexSupport& fixed_vertices, +private: + template + VertexPairRating rate(const Hypergraph &hypergraph, const HypernodeID u, + RatingMap &tmp_ratings, + const parallel::scalable_vector &cluster_ids, + const parallel::scalable_vector &cluster_weight, + const ds::FixedVertexSupport &fixed_vertices, const HypernodeWeight max_allowed_node_weight, - const bool use_vertex_degree_sampling) { + const bool use_vertex_degree_sampling) + { - if ( use_vertex_degree_sampling ) { + if(use_vertex_degree_sampling) + { fillRatingMapWithSampling(hypergraph, u, tmp_ratings, cluster_ids); - } else { + } + else + { fillRatingMap(hypergraph, u, tmp_ratings, cluster_ids); } @@ -176,28 +194,33 @@ class MultilevelVertexPairRater { RatingType max_rating = std::numeric_limits::min(); HypernodeID target = std::numeric_limits::max(); HypernodeID target_id = std::numeric_limits::max(); - for (auto it = tmp_ratings.end() - 1; it >= tmp_ratings.begin(); --it) { + for(auto it = tmp_ratings.end() - 1; it >= tmp_ratings.begin(); --it) + { const HypernodeID tmp_target_id = it->key; const HypernodeID tmp_target = tmp_target_id; const HypernodeWeight target_weight = cluster_weight[tmp_target_id]; - if ( tmp_target != u && weight_u + target_weight <= max_allowed_node_weight ) { - HypernodeWeight penalty = HeavyNodePenaltyPolicy::penalty(weight_u, target_weight); + if(tmp_target != u && weight_u + target_weight <= max_allowed_node_weight) + { + HypernodeWeight penalty = + HeavyNodePenaltyPolicy::penalty(weight_u, target_weight); penalty = penalty == 0 ? std::max(std::max(weight_u, target_weight), 1) : penalty; const RatingType tmp_rating = it->value / static_cast(penalty); bool accept_fixed_vertex_contraction = true; - if constexpr ( has_fixed_vertices ) { + if constexpr(has_fixed_vertices) + { accept_fixed_vertex_contraction = - FixedVertexAcceptancePolicy::acceptContraction( - hypergraph, fixed_vertices, _context, tmp_target, u); + FixedVertexAcceptancePolicy::acceptContraction(hypergraph, fixed_vertices, + _context, tmp_target, u); } DBG << "r(" << u << "," << tmp_target << ")=" << tmp_rating; - if ( accept_fixed_vertex_contraction && - community_u_id == hypergraph.communityID(tmp_target) && - AcceptancePolicy::acceptRating( tmp_rating, max_rating, - target_id, tmp_target_id, cpu_id, _already_matched) ) { + if(accept_fixed_vertex_contraction && + community_u_id == hypergraph.communityID(tmp_target) && + AcceptancePolicy::acceptRating(tmp_rating, max_rating, target_id, + tmp_target_id, cpu_id, _already_matched)) + { max_rating = tmp_rating; target_id = tmp_target_id; target = tmp_target; @@ -206,8 +229,10 @@ class MultilevelVertexPairRater { } VertexPairRating ret; - if (max_rating != std::numeric_limits::min()) { - ASSERT(target != std::numeric_limits::max(), "invalid contraction target"); + if(max_rating != std::numeric_limits::min()) + { + ASSERT(target != std::numeric_limits::max(), + "invalid contraction target"); ret.value = max_rating; ret.target = target; ret.valid = true; @@ -216,33 +241,45 @@ class MultilevelVertexPairRater { return ret; } - template - void fillRatingMap(const Hypergraph& hypergraph, - const HypernodeID u, - RatingMap& tmp_ratings, - const parallel::scalable_vector& cluster_ids) { - if constexpr (Hypergraph::is_graph) { - for ( const HyperedgeID& he : hypergraph.incidentEdges(u) ) { - const RatingType score = ScorePolicy::score(hypergraph.edgeWeight(he), hypergraph.edgeSize(he)); + template + void fillRatingMap(const Hypergraph &hypergraph, const HypernodeID u, + RatingMap &tmp_ratings, + const parallel::scalable_vector &cluster_ids) + { + if constexpr(Hypergraph::is_graph) + { + for(const HyperedgeID &he : hypergraph.incidentEdges(u)) + { + const RatingType score = + ScorePolicy::score(hypergraph.edgeWeight(he), hypergraph.edgeSize(he)); const HypernodeID representative = cluster_ids[hypergraph.edgeTarget(he)]; ASSERT(representative < hypergraph.initialNumNodes()); tmp_ratings[representative] += score; } - } else { - kahypar::ds::FastResetFlagArray<>& bloom_filter = _local_bloom_filter.local(); - for ( const HyperedgeID& he : hypergraph.incidentEdges(u) ) { + } + else + { + kahypar::ds::FastResetFlagArray<> &bloom_filter = _local_bloom_filter.local(); + for(const HyperedgeID &he : hypergraph.incidentEdges(u)) + { HypernodeID edge_size = hypergraph.edgeSize(he); ASSERT(edge_size > 1, V(he)); - if ( edge_size < _context.partition.ignore_hyperedge_size_threshold ) { - edge_size = _context.coarsening.use_adaptive_edge_size ? - std::max(adaptiveEdgeSize(hypergraph, he, bloom_filter, cluster_ids), ID(2)) : edge_size; - const RatingType score = ScorePolicy::score( - hypergraph.edgeWeight(he), edge_size); - for ( const HypernodeID& v : hypergraph.pins(he) ) { + if(edge_size < _context.partition.ignore_hyperedge_size_threshold) + { + edge_size = + _context.coarsening.use_adaptive_edge_size ? + std::max(adaptiveEdgeSize(hypergraph, he, bloom_filter, cluster_ids), + ID(2)) : + edge_size; + const RatingType score = + ScorePolicy::score(hypergraph.edgeWeight(he), edge_size); + for(const HypernodeID &v : hypergraph.pins(he)) + { const HypernodeID representative = cluster_ids[v]; ASSERT(representative < hypergraph.initialNumNodes()); const HypernodeID bloom_filter_rep = representative & _bloom_filter_mask; - if ( !bloom_filter[bloom_filter_rep] ) { + if(!bloom_filter[bloom_filter_rep]) + { tmp_ratings[representative] += score; bloom_filter.set(bloom_filter_rep, true); } @@ -253,44 +290,59 @@ class MultilevelVertexPairRater { } } - template - void fillRatingMapWithSampling(const Hypergraph& hypergraph, - const HypernodeID u, - RatingMap& tmp_ratings, - const parallel::scalable_vector& cluster_ids) { + template + void + fillRatingMapWithSampling(const Hypergraph &hypergraph, const HypernodeID u, + RatingMap &tmp_ratings, + const parallel::scalable_vector &cluster_ids) + { size_t num_tmp_rating_map_accesses = 0; - if constexpr (Hypergraph::is_graph) { - for ( const HyperedgeID& he : hypergraph.incidentEdges(u) ) { + if constexpr(Hypergraph::is_graph) + { + for(const HyperedgeID &he : hypergraph.incidentEdges(u)) + { // Break if number of accesses to the tmp rating map would exceed // vertex degree sampling threshold - if ( num_tmp_rating_map_accesses >= _vertex_degree_sampling_threshold ) { + if(num_tmp_rating_map_accesses >= _vertex_degree_sampling_threshold) + { break; } - const RatingType score = ScorePolicy::score(hypergraph.edgeWeight(he), hypergraph.edgeSize(he)); + const RatingType score = + ScorePolicy::score(hypergraph.edgeWeight(he), hypergraph.edgeSize(he)); const HypernodeID representative = cluster_ids[hypergraph.edgeTarget(he)]; ASSERT(representative < hypergraph.initialNumNodes()); tmp_ratings[representative] += score; ++num_tmp_rating_map_accesses; } - } else { - kahypar::ds::FastResetFlagArray<>& bloom_filter = _local_bloom_filter.local(); - for ( const HyperedgeID& he : hypergraph.incidentEdges(u) ) { + } + else + { + kahypar::ds::FastResetFlagArray<> &bloom_filter = _local_bloom_filter.local(); + for(const HyperedgeID &he : hypergraph.incidentEdges(u)) + { HypernodeID edge_size = hypergraph.edgeSize(he); - if ( edge_size < _context.partition.ignore_hyperedge_size_threshold ) { - edge_size = _context.coarsening.use_adaptive_edge_size ? - std::max(adaptiveEdgeSize(hypergraph, he, bloom_filter, cluster_ids), ID(2)) : edge_size; + if(edge_size < _context.partition.ignore_hyperedge_size_threshold) + { + edge_size = + _context.coarsening.use_adaptive_edge_size ? + std::max(adaptiveEdgeSize(hypergraph, he, bloom_filter, cluster_ids), + ID(2)) : + edge_size; // Break if number of accesses to the tmp rating map would exceed // vertex degree sampling threshold - if ( num_tmp_rating_map_accesses + edge_size > _vertex_degree_sampling_threshold ) { + if(num_tmp_rating_map_accesses + edge_size > _vertex_degree_sampling_threshold) + { break; } - const RatingType score = ScorePolicy::score( - hypergraph.edgeWeight(he), edge_size); - for ( const HypernodeID& v : hypergraph.pins(he) ) { + const RatingType score = + ScorePolicy::score(hypergraph.edgeWeight(he), edge_size); + for(const HypernodeID &v : hypergraph.pins(he)) + { const HypernodeID representative = cluster_ids[v]; ASSERT(representative < hypergraph.initialNumNodes()); const HypernodeID bloom_filter_rep = representative & _bloom_filter_mask; - if ( !bloom_filter[bloom_filter_rep] ) { + if(!bloom_filter[bloom_filter_rep]) + { tmp_ratings[representative] += score; bloom_filter.set(bloom_filter_rep, true); ++num_tmp_rating_map_accesses; @@ -302,17 +354,20 @@ class MultilevelVertexPairRater { } } - template - inline HypernodeID adaptiveEdgeSize(const Hypergraph& hypergraph, - const HyperedgeID he, - kahypar::ds::FastResetFlagArray<>& bloom_filter, - const parallel::scalable_vector& cluster_ids) { + template + inline HypernodeID + adaptiveEdgeSize(const Hypergraph &hypergraph, const HyperedgeID he, + kahypar::ds::FastResetFlagArray<> &bloom_filter, + const parallel::scalable_vector &cluster_ids) + { HypernodeID edge_size = 0; - for ( const HypernodeID& v : hypergraph.pins(he) ) { + for(const HypernodeID &v : hypergraph.pins(he)) + { const HypernodeID representative = cluster_ids[v]; ASSERT(representative < hypergraph.initialNumNodes()); const HypernodeID bloom_filter_rep = representative & _bloom_filter_mask; - if ( !bloom_filter[bloom_filter_rep] ) { + if(!bloom_filter[bloom_filter_rep]) + { ++edge_size; bloom_filter.set(bloom_filter_rep, true); } @@ -321,47 +376,64 @@ class MultilevelVertexPairRater { return edge_size; } - template - inline RatingMapType getRatingMapTypeForRatingOfHypernode(const Hypergraph& hypergraph, - const HypernodeID u) { + template + inline RatingMapType getRatingMapTypeForRatingOfHypernode(const Hypergraph &hypergraph, + const HypernodeID u) + { const bool use_vertex_degree_sampling = - _vertex_degree_sampling_threshold != std::numeric_limits::max(); - const size_t vertex_degree_bounded_rating_map_size = use_vertex_degree_sampling ? - 3UL * _vertex_degree_sampling_threshold : std::numeric_limits::max(); + _vertex_degree_sampling_threshold != std::numeric_limits::max(); + const size_t vertex_degree_bounded_rating_map_size = + use_vertex_degree_sampling ? 3UL * _vertex_degree_sampling_threshold : + std::numeric_limits::max(); const size_t cache_efficient_rating_map_size = CacheEfficientRatingMap::MAP_SIZE; - const size_t size_of_smaller_rating_map = std::min( - vertex_degree_bounded_rating_map_size, cache_efficient_rating_map_size); + const size_t size_of_smaller_rating_map = + std::min(vertex_degree_bounded_rating_map_size, cache_efficient_rating_map_size); // In case the current number of nodes is smaller than size // of the cache-efficient sparse map, the large tmp rating map // consumes less memory - if ( _current_num_nodes < size_of_smaller_rating_map ) { + if(_current_num_nodes < size_of_smaller_rating_map) + { return RatingMapType::LARGE_RATING_MAP; } // Compute estimation for the upper bound of neighbors of u - if constexpr (Hypergraph::is_graph) { - if ( hypergraph.nodeDegree(u) > cache_efficient_rating_map_size / 3UL ) { - if ( vertex_degree_bounded_rating_map_size < _current_num_nodes ) { + if constexpr(Hypergraph::is_graph) + { + if(hypergraph.nodeDegree(u) > cache_efficient_rating_map_size / 3UL) + { + if(vertex_degree_bounded_rating_map_size < _current_num_nodes) + { return RatingMapType::VERTEX_DEGREE_BOUNDED_RATING_MAP; - } else { + } + else + { return RatingMapType::LARGE_RATING_MAP; } } - } else { + } + else + { HypernodeID ub_neighbors_u = 0; - for ( const HyperedgeID& he : hypergraph.incidentEdges(u) ) { + for(const HyperedgeID &he : hypergraph.incidentEdges(u)) + { const HypernodeID edge_size = hypergraph.edgeSize(he); // Ignore large hyperedges - ub_neighbors_u += edge_size < _context.partition.ignore_hyperedge_size_threshold ? edge_size : 0; - // If the number of estimated neighbors is greater than the size of the cache efficient rating map / 3, we - // use the large sparse map. The division by 3 also ensures that the fill grade - // of the cache efficient sparse map would be small enough such that linear probing - // is fast. - if ( ub_neighbors_u > cache_efficient_rating_map_size / 3UL ) { - if ( vertex_degree_bounded_rating_map_size < _current_num_nodes ) { + ub_neighbors_u += edge_size < _context.partition.ignore_hyperedge_size_threshold ? + edge_size : + 0; + // If the number of estimated neighbors is greater than the size of the cache + // efficient rating map / 3, we use the large sparse map. The division by 3 also + // ensures that the fill grade of the cache efficient sparse map would be small + // enough such that linear probing is fast. + if(ub_neighbors_u > cache_efficient_rating_map_size / 3UL) + { + if(vertex_degree_bounded_rating_map_size < _current_num_nodes) + { return RatingMapType::VERTEX_DEGREE_BOUNDED_RATING_MAP; - } else { + } + else + { return RatingMapType::LARGE_RATING_MAP; } } @@ -371,15 +443,17 @@ class MultilevelVertexPairRater { return RatingMapType::CACHE_EFFICIENT_RATING_MAP; } - LargeTmpRatingMap construct_large_tmp_rating_map() { + LargeTmpRatingMap construct_large_tmp_rating_map() + { return LargeTmpRatingMap(_current_num_nodes); } - size_t align_to_next_power_of_two(const size_t size) const { + size_t align_to_next_power_of_two(const size_t size) const + { return std::pow(2.0, std::ceil(std::log2(static_cast(size)))); } - const Context& _context; + const Context &_context; // ! Number of nodes of the current hypergraph HypernodeID _current_num_nodes; // ! Maximum number of neighbors that are considered for rating @@ -405,4 +479,4 @@ class MultilevelVertexPairRater { // ! Marks all matched vertices kahypar::ds::FastResetFlagArray<> _already_matched; }; -} // namespace mt_kahypar +} // namespace mt_kahypar diff --git a/mt-kahypar/partition/coarsening/nlevel_coarsener.h b/mt-kahypar/partition/coarsening/nlevel_coarsener.h index ad11f2b66..6ec3ea86b 100644 --- a/mt-kahypar/partition/coarsening/nlevel_coarsener.h +++ b/mt-kahypar/partition/coarsening/nlevel_coarsener.h @@ -29,97 +29,98 @@ #include #include "tbb/parallel_for.h" -#include "tbb/parallel_sort.h" #include "tbb/parallel_scan.h" +#include "tbb/parallel_sort.h" #include "include/libmtkahypartypes.h" -#include "kahypar-resources/meta/mandatory.h" +#include "mt-kahypar/parallel/parallel_prefix_sum.h" +#include "mt-kahypar/partition/coarsening/i_coarsener.h" #include "mt-kahypar/partition/coarsening/nlevel_coarsener_base.h" #include "mt-kahypar/partition/coarsening/nlevel_vertex_pair_rater.h" -#include "mt-kahypar/partition/coarsening/i_coarsener.h" #include "mt-kahypar/partition/coarsening/policies/rating_acceptance_policy.h" #include "mt-kahypar/partition/coarsening/policies/rating_heavy_node_penalty_policy.h" #include "mt-kahypar/partition/coarsening/policies/rating_score_policy.h" -#include "mt-kahypar/parallel/parallel_prefix_sum.h" #include "mt-kahypar/utils/cast.h" #include "mt-kahypar/utils/progress_bar.h" #include "mt-kahypar/utils/randomize.h" #include "mt-kahypar/utils/stats.h" +#include "kahypar-resources/meta/mandatory.h" namespace mt_kahypar { -template -class NLevelCoarsener : public ICoarsener, - private NLevelCoarsenerBase { - private: - - #define HIGH_DEGREE_VERTEX_THRESHOLD ID(200000) +class NLevelCoarsener : public ICoarsener, private NLevelCoarsenerBase +{ +private: +#define HIGH_DEGREE_VERTEX_THRESHOLD ID(200000) using Base = NLevelCoarsenerBase; - using Rater = NLevelVertexPairRater; + using Rater = + NLevelVertexPairRater; using Rating = typename Rater::Rating; using Hypergraph = typename TypeTraits::Hypergraph; using PartitionedHypergraph = typename TypeTraits::PartitionedHypergraph; - class ContractionLimitTracker { + class ContractionLimitTracker + { - public: - explicit ContractionLimitTracker(const Context& context) : - _context(context), - _initial_num_nodes(0), - _current_num_nodes(0), - _contracted_nodes(0), - _num_nodes_update_threshold(0) { } + public: + explicit ContractionLimitTracker(const Context &context) : + _context(context), _initial_num_nodes(0), _current_num_nodes(0), + _contracted_nodes(0), _num_nodes_update_threshold(0) + { + } - void initialize(const HypernodeID initial_num_nodes) { + void initialize(const HypernodeID initial_num_nodes) + { _initial_num_nodes = initial_num_nodes; _current_num_nodes = initial_num_nodes; } - HypernodeID currentNumNodes() const { - return _current_num_nodes; - } + HypernodeID currentNumNodes() const { return _current_num_nodes; } - void update(const HypernodeID num_contractions, const HypernodeID contraction_limit) { - if ( num_contractions > 0 ) { + void update(const HypernodeID num_contractions, const HypernodeID contraction_limit) + { + if(num_contractions > 0) + { // To maintain the current number of nodes of the hypergraph each PE sums up // its number of contracted nodes locally. To compute the current number of // nodes, we have to sum up the number of contracted nodes of each PE. This // operation becomes more expensive the more PEs are participating in coarsening. // In order to prevent expensive updates of the current number of nodes, we // define a threshold which the local number of contracted nodes have to exceed - // before the current PE updates the current number of nodes. This threshold is defined - // by the distance to the current contraction limit divided by the number of PEs. - // Once one PE exceeds this bound the first time it is not possible that the - // contraction limit is reached, because otherwise an other PE would update + // before the current PE updates the current number of nodes. This threshold is + // defined by the distance to the current contraction limit divided by the number + // of PEs. Once one PE exceeds this bound the first time it is not possible that + // the contraction limit is reached, because otherwise an other PE would update // the global current number of nodes before. After update the threshold is // increased by the new difference (in number of nodes) to the contraction limit // divided by the number of PEs. - HypernodeID& local_contracted_nodes = _contracted_nodes.local(); + HypernodeID &local_contracted_nodes = _contracted_nodes.local(); local_contracted_nodes += num_contractions; - if ( local_contracted_nodes >= _num_nodes_update_threshold.local() ) { - _current_num_nodes = _initial_num_nodes - - _contracted_nodes.combine(std::plus()); + if(local_contracted_nodes >= _num_nodes_update_threshold.local()) + { + _current_num_nodes = + _initial_num_nodes - _contracted_nodes.combine(std::plus()); const HypernodeID dist_to_cl = _current_num_nodes > contraction_limit ? - _current_num_nodes - contraction_limit : 0; + _current_num_nodes - contraction_limit : + 0; _num_nodes_update_threshold.local() += - dist_to_cl / _context.shared_memory.original_num_threads; + dist_to_cl / _context.shared_memory.original_num_threads; } } } - void updateCurrentNumNodes() { - _current_num_nodes = _initial_num_nodes - - _contracted_nodes.combine(std::plus()); + void updateCurrentNumNodes() + { + _current_num_nodes = + _initial_num_nodes - _contracted_nodes.combine(std::plus()); } - private: - const Context& _context; + private: + const Context &_context; HypernodeID _initial_num_nodes; HypernodeID _current_num_nodes; tbb::enumerable_thread_specific _contracted_nodes; @@ -129,75 +130,76 @@ class NLevelCoarsener : public ICoarsener, static constexpr bool debug = false; static constexpr bool enable_heavy_assert = false; - public: - NLevelCoarsener(mt_kahypar_hypergraph_t hypergraph, - const Context& context, - uncoarsening_data_t* uncoarseningData) : - Base(utils::cast(hypergraph), - context, - uncoarsening::to_reference(uncoarseningData)), - _rater(utils::cast(hypergraph).initialNumNodes(), context), - _initial_num_nodes(utils::cast(hypergraph).initialNumNodes() - - utils::cast(hypergraph).numRemovedHypernodes()), - _current_vertices(), - _tmp_current_vertices(), - _enabled_vertex_flag_array(), - _cl_tracker(context), - _pass_nr(0), - _progress_bar(utils::cast(hypergraph).initialNumNodes(), 0, false), - _enable_randomization(true) { +public: + NLevelCoarsener(mt_kahypar_hypergraph_t hypergraph, const Context &context, + uncoarsening_data_t *uncoarseningData) : + Base(utils::cast(hypergraph), context, + uncoarsening::to_reference(uncoarseningData)), + _rater(utils::cast(hypergraph).initialNumNodes(), context), + _initial_num_nodes(utils::cast(hypergraph).initialNumNodes() - + utils::cast(hypergraph).numRemovedHypernodes()), + _current_vertices(), _tmp_current_vertices(), _enabled_vertex_flag_array(), + _cl_tracker(context), _pass_nr(0), + _progress_bar(utils::cast(hypergraph).initialNumNodes(), 0, false), + _enable_randomization(true) + { _progress_bar += _hg.numRemovedHypernodes(); - tbb::parallel_invoke([&] { - _current_vertices.resize(_hg.initialNumNodes()); - tbb::parallel_for(ID(0), _hg.initialNumNodes(), [&](const HypernodeID hn) { - _current_vertices[hn] = hn; - }); - utils::Randomize::instance().parallelShuffleVector(_current_vertices, UL(0), _current_vertices.size()); - }, [&] { - _tmp_current_vertices.resize(_hg.initialNumNodes()); - }, [&] { - _enabled_vertex_flag_array.resize(_hg.initialNumNodes()); - }); + tbb::parallel_invoke( + [&] { + _current_vertices.resize(_hg.initialNumNodes()); + tbb::parallel_for(ID(0), _hg.initialNumNodes(), + [&](const HypernodeID hn) { _current_vertices[hn] = hn; }); + utils::Randomize::instance().parallelShuffleVector(_current_vertices, UL(0), + _current_vertices.size()); + }, + [&] { _tmp_current_vertices.resize(_hg.initialNumNodes()); }, + [&] { _enabled_vertex_flag_array.resize(_hg.initialNumNodes()); }); } - NLevelCoarsener(const NLevelCoarsener&) = delete; - NLevelCoarsener(NLevelCoarsener&&) = delete; - NLevelCoarsener & operator= (const NLevelCoarsener &) = delete; - NLevelCoarsener & operator= (NLevelCoarsener &&) = delete; + NLevelCoarsener(const NLevelCoarsener &) = delete; + NLevelCoarsener(NLevelCoarsener &&) = delete; + NLevelCoarsener &operator=(const NLevelCoarsener &) = delete; + NLevelCoarsener &operator=(NLevelCoarsener &&) = delete; ~NLevelCoarsener() = default; - void disableRandomization() { - _enable_randomization = false; - } + void disableRandomization() { _enable_randomization = false; } - private: - void initializeImpl() override { - if ( _context.partition.verbose_output && _context.partition.enable_progress_bar ) { +private: + void initializeImpl() override + { + if(_context.partition.verbose_output && _context.partition.enable_progress_bar) + { _progress_bar.enable(); } _cl_tracker.initialize(_initial_num_nodes); } - bool coarseningPassImpl() override { + bool coarseningPassImpl() override + { DBG << V(_pass_nr) << V(_cl_tracker.currentNumNodes()); const HypernodeID num_hns_before_pass = _cl_tracker.currentNumNodes(); - // Coarsening Pass - _rater.resetMatches(); + // Coarsening Pass + _rater.resetMatches(); double contraction_limit = - std::max(static_cast(_cl_tracker.currentNumNodes() / - _context.coarsening.maximum_shrink_factor), _context.coarsening.contraction_limit); - if ( _context.coarsening.maximum_shrink_factor > 99.0 ) { + std::max(static_cast(_cl_tracker.currentNumNodes() / + _context.coarsening.maximum_shrink_factor), + _context.coarsening.contraction_limit); + if(_context.coarsening.maximum_shrink_factor > 99.0) + { contraction_limit = _context.coarsening.contraction_limit; } HighResClockTimepoint round_start = std::chrono::high_resolution_clock::now(); _timer.start_timer("clustering", "Clustering"); - if ( _hg.hasFixedVertices() ) { + if(_hg.hasFixedVertices()) + { _hg.setMaxFixedVertexBlockWeight(_context.partition.max_part_weights); performClustering(contraction_limit); - } else { + } + else + { performClustering(contraction_limit); } _timer.stop_timer("clustering"); @@ -209,15 +211,16 @@ class NLevelCoarsener : public ICoarsener, // Writes all enabled vertices to _current_vertices _cl_tracker.updateCurrentNumNodes(); compactifyVertices(); - utils::Randomize::instance().parallelShuffleVector( - _current_vertices, UL(0), _current_vertices.size()); + utils::Randomize::instance().parallelShuffleVector(_current_vertices, UL(0), + _current_vertices.size()); // Terminate contraction if the number of contracted vertices in this round // is smaller than a certain fraction. const double reduction_vertices_percentage = - static_cast(num_hns_before_pass) / - static_cast(_cl_tracker.currentNumNodes()); - if ( reduction_vertices_percentage <= _context.coarsening.minimum_shrink_factor ) { + static_cast(num_hns_before_pass) / + static_cast(_cl_tracker.currentNumNodes()); + if(reduction_vertices_percentage <= _context.coarsening.minimum_shrink_factor) + { return false; } @@ -225,43 +228,54 @@ class NLevelCoarsener : public ICoarsener, return true; } - template - void performClustering(const HypernodeID contraction_limit) { + template + void performClustering(const HypernodeID contraction_limit) + { tbb::parallel_for(UL(0), _current_vertices.size(), [&](const size_t i) { - if ( _cl_tracker.currentNumNodes() > contraction_limit ) { - const HypernodeID& hn = _current_vertices[i]; + if(_cl_tracker.currentNumNodes() > contraction_limit) + { + const HypernodeID &hn = _current_vertices[i]; const HypernodeID num_contractions = contract(hn); _cl_tracker.update(num_contractions, contraction_limit); } }); } - bool shouldNotTerminateImpl() const override { + bool shouldNotTerminateImpl() const override + { return _cl_tracker.currentNumNodes() > _context.coarsening.contraction_limit; } - void terminateImpl() override { + void terminateImpl() override + { _progress_bar += (_initial_num_nodes - _progress_bar.count()); _progress_bar.disable(); _uncoarseningData.finalizeCoarsening(); } - template - HypernodeID contract(const HypernodeID hn) { + template + HypernodeID contract(const HypernodeID hn) + { HypernodeID num_contractions = 0; - if ( _hg.nodeIsEnabled(hn) ) { + if(_hg.nodeIsEnabled(hn)) + { const Rating rating = _rater.template rate( - _hg, hn, _context.coarsening.max_allowed_node_weight); - if ( rating.target != kInvalidHypernode ) { + _hg, hn, _context.coarsening.max_allowed_node_weight); + if(rating.target != kInvalidHypernode) + { HypernodeID u = hn; HypernodeID v = rating.target; - // In case v is a high degree vertex, we reverse contraction order to improve performance - if ( _hg.nodeDegree(u) < _hg.nodeDegree(v) && _hg.nodeDegree(v) > HIGH_DEGREE_VERTEX_THRESHOLD ) { + // In case v is a high degree vertex, we reverse contraction order to improve + // performance + if(_hg.nodeDegree(u) < _hg.nodeDegree(v) && + _hg.nodeDegree(v) > HIGH_DEGREE_VERTEX_THRESHOLD) + { u = rating.target; v = hn; } - if ( _hg.registerContraction(u, v) ) { + if(_hg.registerContraction(u, v)) + { _rater.markAsMatched(u); _rater.markAsMatched(v); num_contractions = _hg.contract(v, _context.coarsening.max_allowed_node_weight); @@ -272,23 +286,29 @@ class NLevelCoarsener : public ICoarsener, return num_contractions; } - HypernodeID currentNumberOfNodesImpl() const override { + HypernodeID currentNumberOfNodesImpl() const override + { return _cl_tracker.currentNumNodes(); } - mt_kahypar_hypergraph_t coarsestHypergraphImpl() override { - return mt_kahypar_hypergraph_t { - reinterpret_cast( - &Base::compactifiedHypergraph()), Hypergraph::TYPE }; + mt_kahypar_hypergraph_t coarsestHypergraphImpl() override + { + return mt_kahypar_hypergraph_t{ reinterpret_cast( + &Base::compactifiedHypergraph()), + Hypergraph::TYPE }; } - mt_kahypar_partitioned_hypergraph_t coarsestPartitionedHypergraphImpl() override { - return mt_kahypar_partitioned_hypergraph_t { - reinterpret_cast( - &Base::compactifiedPartitionedHypergraph()), PartitionedHypergraph::TYPE }; + mt_kahypar_partitioned_hypergraph_t coarsestPartitionedHypergraphImpl() override + { + return mt_kahypar_partitioned_hypergraph_t{ + reinterpret_cast( + &Base::compactifiedPartitionedHypergraph()), + PartitionedHypergraph::TYPE + }; } - void compactifyVertices() { + void compactifyVertices() + { // Mark all vertices that are still enabled const HypernodeID current_num_nodes = _cl_tracker.currentNumNodes(); tbb::parallel_for(UL(0), _current_vertices.size(), [&](const size_t i) { @@ -299,15 +319,18 @@ class NLevelCoarsener : public ICoarsener, // Calculate prefix sum over all enabled vertices to determine their new position // in _current_vertices parallel::TBBPrefixSum active_vertex_prefix_sum(_enabled_vertex_flag_array); - tbb::parallel_scan(tbb::blocked_range( - UL(0), _enabled_vertex_flag_array.size()), active_vertex_prefix_sum); - ASSERT(active_vertex_prefix_sum.total_sum() == static_cast(current_num_nodes)); + tbb::parallel_scan( + tbb::blocked_range(UL(0), _enabled_vertex_flag_array.size()), + active_vertex_prefix_sum); + ASSERT(active_vertex_prefix_sum.total_sum() == + static_cast(current_num_nodes)); // Write all enabled vertices to _tmp_current_vertices _tmp_current_vertices.resize(current_num_nodes); tbb::parallel_for(UL(0), _current_vertices.size(), [&](const size_t i) { const HypernodeID hn = _current_vertices[i]; - if ( _hg.nodeIsEnabled(hn) ) { + if(_hg.nodeIsEnabled(hn)) + { const size_t pos = active_vertex_prefix_sum[i]; ASSERT(pos < _tmp_current_vertices.size()); _tmp_current_vertices[pos] = hn; @@ -318,8 +341,8 @@ class NLevelCoarsener : public ICoarsener, ASSERT(_current_vertices.size() == static_cast(current_num_nodes)); } - using Base::_hg; using Base::_context; + using Base::_hg; using Base::_timer; using Base::_uncoarseningData; Rater _rater; @@ -333,4 +356,4 @@ class NLevelCoarsener : public ICoarsener, bool _enable_randomization; }; -} // namespace mt_kahypar +} // namespace mt_kahypar diff --git a/mt-kahypar/partition/coarsening/nlevel_coarsener_base.h b/mt-kahypar/partition/coarsening/nlevel_coarsener_base.h index 9543b2616..9b983a80a 100644 --- a/mt-kahypar/partition/coarsening/nlevel_coarsener_base.h +++ b/mt-kahypar/partition/coarsening/nlevel_coarsener_base.h @@ -28,70 +28,76 @@ #include "tbb/task_group.h" +#include +#include "mt-kahypar/parallel/stl/scalable_vector.h" #include "mt-kahypar/partition/context.h" #include "mt-kahypar/partition/metrics.h" -#include "mt-kahypar/partition/refinement/i_refiner.h" #include "mt-kahypar/partition/refinement/flows/i_flow_refiner.h" -#include "mt-kahypar/parallel/stl/scalable_vector.h" +#include "mt-kahypar/partition/refinement/i_refiner.h" #include "mt-kahypar/utils/utilities.h" -#include namespace mt_kahypar { -template -class NLevelCoarsenerBase { - private: - +template +class NLevelCoarsenerBase +{ +private: static constexpr bool debug = false; static constexpr bool enable_heavy_assert = false; using Hypergraph = typename TypeTraits::Hypergraph; using PartitionedHypergraph = typename TypeTraits::PartitionedHypergraph; using ParallelHyperedge = typename Hypergraph::ParallelHyperedge; - using ParallelHyperedgeVector = vec>; + using ParallelHyperedgeVector = vec >; - public: - NLevelCoarsenerBase(Hypergraph& hypergraph, - const Context& context, - UncoarseningData& uncoarseningData) : - _hg(hypergraph), - _context(context), - _timer(utils::Utilities::instance().getTimer(context.utility_id)), - _uncoarseningData(uncoarseningData) { } +public: + NLevelCoarsenerBase(Hypergraph &hypergraph, const Context &context, + UncoarseningData &uncoarseningData) : + _hg(hypergraph), + _context(context), + _timer(utils::Utilities::instance().getTimer(context.utility_id)), + _uncoarseningData(uncoarseningData) + { + } - NLevelCoarsenerBase(const NLevelCoarsenerBase&) = delete; - NLevelCoarsenerBase(NLevelCoarsenerBase&&) = delete; - NLevelCoarsenerBase & operator= (const NLevelCoarsenerBase &) = delete; - NLevelCoarsenerBase & operator= (NLevelCoarsenerBase &&) = delete; + NLevelCoarsenerBase(const NLevelCoarsenerBase &) = delete; + NLevelCoarsenerBase(NLevelCoarsenerBase &&) = delete; + NLevelCoarsenerBase &operator=(const NLevelCoarsenerBase &) = delete; + NLevelCoarsenerBase &operator=(NLevelCoarsenerBase &&) = delete; virtual ~NLevelCoarsenerBase() = default; - protected: - - Hypergraph& compactifiedHypergraph() { +protected: + Hypergraph &compactifiedHypergraph() + { ASSERT(_uncoarseningData.is_finalized); return *_uncoarseningData.compactified_hg; } - PartitionedHypergraph& compactifiedPartitionedHypergraph() { + PartitionedHypergraph &compactifiedPartitionedHypergraph() + { ASSERT(_uncoarseningData.is_finalized); return *_uncoarseningData.compactified_phg; } - void removeSinglePinAndParallelNets(const HighResClockTimepoint& round_start) { - _timer.start_timer("remove_single_pin_and_parallel_nets", "Remove Single Pin and Parallel Nets"); - _uncoarseningData.removed_hyperedges_batches.emplace_back(_hg.removeSinglePinAndParallelHyperedges()); + void removeSinglePinAndParallelNets(const HighResClockTimepoint &round_start) + { + _timer.start_timer("remove_single_pin_and_parallel_nets", + "Remove Single Pin and Parallel Nets"); + _uncoarseningData.removed_hyperedges_batches.emplace_back( + _hg.removeSinglePinAndParallelHyperedges()); const HighResClockTimepoint round_end = std::chrono::high_resolution_clock::now(); - const double elapsed_time = std::chrono::duration(round_end - round_start).count(); + const double elapsed_time = + std::chrono::duration(round_end - round_start).count(); _uncoarseningData.round_coarsening_times.push_back(elapsed_time); _timer.stop_timer("remove_single_pin_and_parallel_nets"); } - protected: +protected: // ! Original hypergraph - Hypergraph& _hg; - const Context& _context; - utils::Timer& _timer; - UncoarseningData& _uncoarseningData; + Hypergraph &_hg; + const Context &_context; + utils::Timer &_timer; + UncoarseningData &_uncoarseningData; }; -} // namespace mt_kahypar +} // namespace mt_kahypar diff --git a/mt-kahypar/partition/coarsening/nlevel_uncoarsener.cpp b/mt-kahypar/partition/coarsening/nlevel_uncoarsener.cpp index 1cca04fbe..06d34fcee 100644 --- a/mt-kahypar/partition/coarsening/nlevel_uncoarsener.cpp +++ b/mt-kahypar/partition/coarsening/nlevel_uncoarsener.cpp @@ -29,401 +29,491 @@ #include "mt-kahypar/partition/coarsening/nlevel_uncoarsener.h" #include "mt-kahypar/definitions.h" -#include "mt-kahypar/utils/progress_bar.h" #include "mt-kahypar/io/partitioning_output.h" -#include "mt-kahypar/utils/utilities.h" #include "mt-kahypar/utils/cast.h" +#include "mt-kahypar/utils/progress_bar.h" +#include "mt-kahypar/utils/utilities.h" namespace mt_kahypar { - template - void NLevelUncoarsener::initializeImpl() { - // Initialize n-level batch uncontraction hierarchy - _timer.start_timer("create_batch_uncontraction_hierarchy", "Create n-Level Hierarchy"); - _hierarchy = _hg.createBatchUncontractionHierarchy(_context.refinement.max_batch_size); - ASSERT(_uncoarseningData.removed_hyperedges_batches.size() == _hierarchy.size() - 1); - _timer.stop_timer("create_batch_uncontraction_hierarchy"); - - ASSERT(_uncoarseningData.is_finalized); - _uncoarseningData.compactified_phg->setTargetGraph(_target_graph); - _current_metrics = Base::initializeMetrics(*_uncoarseningData.compactified_phg); - _stats.current_number_of_nodes = _uncoarseningData.compactified_hg->initialNumNodes(); - Base::initializeRefinementAlgorithms(); - - if (_context.type == ContextType::main) { - _context.initial_km1 = _current_metrics.quality; - } +template +void NLevelUncoarsener::initializeImpl() +{ + // Initialize n-level batch uncontraction hierarchy + _timer.start_timer("create_batch_uncontraction_hierarchy", "Create n-Level Hierarchy"); + _hierarchy = _hg.createBatchUncontractionHierarchy(_context.refinement.max_batch_size); + ASSERT(_uncoarseningData.removed_hyperedges_batches.size() == _hierarchy.size() - 1); + _timer.stop_timer("create_batch_uncontraction_hierarchy"); + + ASSERT(_uncoarseningData.is_finalized); + _uncoarseningData.compactified_phg->setTargetGraph(_target_graph); + _current_metrics = Base::initializeMetrics(*_uncoarseningData.compactified_phg); + _stats.current_number_of_nodes = _uncoarseningData.compactified_hg->initialNumNodes(); + Base::initializeRefinementAlgorithms(); + + if(_context.type == ContextType::main) + { + _context.initial_km1 = _current_metrics.quality; + } - // For initial partitioning, we compactify the node IDs of smallest hypergraph to - // a consecutive range. This step project the partition from the compactified hypergraph - // to the node IDs of the input hypergraph. - _timer.start_timer("initialize_partition", "Initialize Partition"); - *_uncoarseningData.partitioned_hg = PartitionedHypergraph(_context.partition.k, _hg, parallel_tag_t()); - _uncoarseningData.partitioned_hg->doParallelForAllNodes([&](const HypernodeID hn) { - ASSERT(static_cast(hn) < _uncoarseningData.compactified_hn_mapping.size()); - const HypernodeID compactified_hn = _uncoarseningData.compactified_hn_mapping[hn]; - const PartitionID block_id = _uncoarseningData.compactified_phg->partID(compactified_hn); - ASSERT(block_id != kInvalidPartition && block_id < _context.partition.k); - _uncoarseningData.partitioned_hg->setOnlyNodePart(hn, block_id); - }); - _uncoarseningData.partitioned_hg->initializePartition(); - _uncoarseningData.partitioned_hg->setTargetGraph(_target_graph); - - // Initialize Gain Cache - if ( _context.refinement.fm.algorithm == FMAlgorithm::kway_fm ) { - GainCachePtr::initializeGainCache( - *_uncoarseningData.partitioned_hg, _gain_cache); - } + // For initial partitioning, we compactify the node IDs of smallest hypergraph to + // a consecutive range. This step project the partition from the compactified hypergraph + // to the node IDs of the input hypergraph. + _timer.start_timer("initialize_partition", "Initialize Partition"); + *_uncoarseningData.partitioned_hg = + PartitionedHypergraph(_context.partition.k, _hg, parallel_tag_t()); + _uncoarseningData.partitioned_hg->doParallelForAllNodes([&](const HypernodeID hn) { + ASSERT(static_cast(hn) < _uncoarseningData.compactified_hn_mapping.size()); + const HypernodeID compactified_hn = _uncoarseningData.compactified_hn_mapping[hn]; + const PartitionID block_id = + _uncoarseningData.compactified_phg->partID(compactified_hn); + ASSERT(block_id != kInvalidPartition && block_id < _context.partition.k); + _uncoarseningData.partitioned_hg->setOnlyNodePart(hn, block_id); + }); + _uncoarseningData.partitioned_hg->initializePartition(); + _uncoarseningData.partitioned_hg->setTargetGraph(_target_graph); + + // Initialize Gain Cache + if(_context.refinement.fm.algorithm == FMAlgorithm::kway_fm) + { + GainCachePtr::initializeGainCache(*_uncoarseningData.partitioned_hg, _gain_cache); + } - ASSERT(metrics::quality(*_uncoarseningData.compactified_phg, _context) == - metrics::quality(*_uncoarseningData.partitioned_hg, _context), - V(metrics::quality(*_uncoarseningData.compactified_phg, _context)) << - V(metrics::quality(*_uncoarseningData.partitioned_hg, _context))); - ASSERT(metrics::imbalance(*_uncoarseningData.compactified_phg, _context) == - metrics::imbalance(*_uncoarseningData.partitioned_hg, _context), - V(metrics::imbalance(*_uncoarseningData.compactified_phg, _context)) << - V(metrics::imbalance(*_uncoarseningData.partitioned_hg, _context))); - _timer.stop_timer("initialize_partition"); - - // Enable progress bar if verbose output is enabled - if ( _context.partition.verbose_output && _context.partition.enable_progress_bar && !debug ) { - _progress.enable(); - _progress.setObjective(_current_metrics.quality); - _progress += _uncoarseningData.compactified_hg->initialNumNodes(); - } + ASSERT(metrics::quality(*_uncoarseningData.compactified_phg, _context) == + metrics::quality(*_uncoarseningData.partitioned_hg, _context), + V(metrics::quality(*_uncoarseningData.compactified_phg, _context)) + << V(metrics::quality(*_uncoarseningData.partitioned_hg, _context))); + ASSERT(metrics::imbalance(*_uncoarseningData.compactified_phg, _context) == + metrics::imbalance(*_uncoarseningData.partitioned_hg, _context), + V(metrics::imbalance(*_uncoarseningData.compactified_phg, _context)) + << V(metrics::imbalance(*_uncoarseningData.partitioned_hg, _context))); + _timer.stop_timer("initialize_partition"); + + // Enable progress bar if verbose output is enabled + if(_context.partition.verbose_output && _context.partition.enable_progress_bar && + !debug) + { + _progress.enable(); + _progress.setObjective(_current_metrics.quality); + _progress += _uncoarseningData.compactified_hg->initialNumNodes(); + } - // Initialize Refiner - mt_kahypar_partitioned_hypergraph_t phg = + // Initialize Refiner + mt_kahypar_partitioned_hypergraph_t phg = utils::partitioned_hg_cast(*_uncoarseningData.partitioned_hg); - if ( _rebalancer ) { - _rebalancer->initialize(phg); - } - if ( _label_propagation ) { - _label_propagation->initialize(phg); - } - if ( _fm ) { - _fm->initialize(phg); - } + if(_rebalancer) + { + _rebalancer->initialize(phg); + } + if(_label_propagation) + { + _label_propagation->initialize(phg); + } + if(_fm) + { + _fm->initialize(phg); + } - ASSERT(_uncoarseningData.round_coarsening_times.size() == _uncoarseningData.removed_hyperedges_batches.size()); - _uncoarseningData.round_coarsening_times.push_back(_uncoarseningData.round_coarsening_times.size() > 0 ? - _uncoarseningData.round_coarsening_times.back() : std::numeric_limits::max()); // Sentinel + ASSERT(_uncoarseningData.round_coarsening_times.size() == + _uncoarseningData.removed_hyperedges_batches.size()); + _uncoarseningData.round_coarsening_times.push_back( + _uncoarseningData.round_coarsening_times.size() > 0 ? + _uncoarseningData.round_coarsening_times.back() : + std::numeric_limits::max()); // Sentinel + + if(_timer.isEnabled()) + { + _timer.disable(); + _is_timer_disabled = true; + } +} - if ( _timer.isEnabled() ) { - _timer.disable(); - _is_timer_disabled = true; +template +bool NLevelUncoarsener::isTopLevelImpl() const +{ + return _hierarchy.empty(); +} + +template +void NLevelUncoarsener::projectToNextLevelAndRefineImpl() +{ + BatchVector &batches = _hierarchy.back(); + + // Uncontracts all batches from one coarsening pass. One coarsening pass iterates over + // all nodes and contracts each node onto another node. Afterwards, we remove all + // single-pin and identical nets. The following loop reverts all contractions and + // restores single-pin and identical nets. + while(!batches.empty()) + { + const Batch &batch = batches.back(); + if(batch.size() > 0) + { + HEAVY_REFINEMENT_ASSERT(metrics::quality(*_uncoarseningData.partitioned_hg, + _context) == _current_metrics.quality, + V(_current_metrics.quality) << V(metrics::quality( + *_uncoarseningData.partitioned_hg, _context))); + + // Performs batch uncontraction operation + _timer.start_timer("batch_uncontractions", "Batch Uncontractions", false, + _force_measure_timings); + GainCachePtr::uncontract(*_uncoarseningData.partitioned_hg, batch, _gain_cache); + _timer.stop_timer("batch_uncontractions", _force_measure_timings); + + HEAVY_REFINEMENT_ASSERT(_hg.verifyIncidenceArrayAndIncidentNets()); + HEAVY_REFINEMENT_ASSERT(GainCachePtr::checkTrackedPartitionInformation( + *_uncoarseningData.partitioned_hg, _gain_cache)); + HEAVY_REFINEMENT_ASSERT(metrics::quality(*_uncoarseningData.partitioned_hg, + _context) == _current_metrics.quality, + V(_current_metrics.quality) << V(metrics::quality( + *_uncoarseningData.partitioned_hg, _context))); + + // Extracts all border vertices of the current batch + _timer.start_timer("collect_border_vertices", "Collect Border Vertices", false, + _force_measure_timings); + tbb::parallel_for(UL(0), batch.size(), [&](const size_t i) { + const Memento &memento = batch[i]; + if(!_border_vertices_of_batch[memento.u] && + _uncoarseningData.partitioned_hg->isBorderNode(memento.u)) + { + _border_vertices_of_batch.set(memento.u, true); + _tmp_refinement_nodes.stream(memento.u); + } + if(!_border_vertices_of_batch[memento.v] && + _uncoarseningData.partitioned_hg->isBorderNode(memento.v)) + { + _border_vertices_of_batch.set(memento.v, true); + _tmp_refinement_nodes.stream(memento.v); + } + }); + _timer.stop_timer("collect_border_vertices", _force_measure_timings); + + // We perform localized refinement around the uncontracted nodes if the current + // number of border nodes is greater than a predefined threshold. + if(_tmp_refinement_nodes.size() >= _stats.min_num_border_vertices) + { + localizedRefine(*_uncoarseningData.partitioned_hg); + } + + ++_stats.num_batches; + _stats.total_batch_sizes += batch.size(); + // Update Progress Bar + _progress.setObjective(_current_metrics.quality); + _progress += batch.size(); + _stats.current_number_of_nodes += batch.size(); } + batches.pop_back(); } - template - bool NLevelUncoarsener::isTopLevelImpl() const { - return _hierarchy.empty(); + // Perform localized refinement on the remaining nodes such that we do + // not miss any improvement. + if(_tmp_refinement_nodes.size() > 0) + { + localizedRefine(*_uncoarseningData.partitioned_hg); } - template - void NLevelUncoarsener::projectToNextLevelAndRefineImpl() { - BatchVector& batches = _hierarchy.back(); - - // Uncontracts all batches from one coarsening pass. One coarsening pass iterates over all - // nodes and contracts each node onto another node. Afterwards, we remove all single-pin and - // identical nets. The following loop reverts all contractions and restores single-pin and - // identical nets. - while ( !batches.empty() ) { - const Batch& batch = batches.back(); - if ( batch.size() > 0 ) { - HEAVY_REFINEMENT_ASSERT(metrics::quality(*_uncoarseningData.partitioned_hg, _context) == _current_metrics.quality, - V(_current_metrics.quality) << V(metrics::quality(*_uncoarseningData.partitioned_hg, _context))); - - // Performs batch uncontraction operation - _timer.start_timer("batch_uncontractions", "Batch Uncontractions", false, _force_measure_timings); - GainCachePtr::uncontract(*_uncoarseningData.partitioned_hg, batch, _gain_cache); - _timer.stop_timer("batch_uncontractions", _force_measure_timings); - - HEAVY_REFINEMENT_ASSERT(_hg.verifyIncidenceArrayAndIncidentNets()); - HEAVY_REFINEMENT_ASSERT(GainCachePtr::checkTrackedPartitionInformation(*_uncoarseningData.partitioned_hg, _gain_cache)); - HEAVY_REFINEMENT_ASSERT(metrics::quality(*_uncoarseningData.partitioned_hg, _context) == _current_metrics.quality, - V(_current_metrics.quality) << V(metrics::quality(*_uncoarseningData.partitioned_hg, _context))); - - // Extracts all border vertices of the current batch - _timer.start_timer("collect_border_vertices", "Collect Border Vertices", false, _force_measure_timings); - tbb::parallel_for(UL(0), batch.size(), [&](const size_t i) { - const Memento& memento = batch[i]; - if ( !_border_vertices_of_batch[memento.u] && _uncoarseningData.partitioned_hg->isBorderNode(memento.u) ) { - _border_vertices_of_batch.set(memento.u, true); - _tmp_refinement_nodes.stream(memento.u); - } - if ( !_border_vertices_of_batch[memento.v] && _uncoarseningData.partitioned_hg->isBorderNode(memento.v) ) { - _border_vertices_of_batch.set(memento.v, true); - _tmp_refinement_nodes.stream(memento.v); - } - }); - _timer.stop_timer("collect_border_vertices", _force_measure_timings); - - // We perform localized refinement around the uncontracted nodes if the current number - // of border nodes is greater than a predefined threshold. - if ( _tmp_refinement_nodes.size() >= _stats.min_num_border_vertices ) { - localizedRefine(*_uncoarseningData.partitioned_hg); - } + // Restore single-pin and identical nets + if(!_uncoarseningData.removed_hyperedges_batches.empty()) + { + _timer.start_timer("restore_single_pin_and_parallel_nets", + "Restore Single Pin and Parallel Nets", false, + _force_measure_timings); + GainCachePtr::restoreSinglePinAndParallelNets( + *_uncoarseningData.partitioned_hg, + _uncoarseningData.removed_hyperedges_batches.back(), _gain_cache); + _uncoarseningData.removed_hyperedges_batches.pop_back(); + _timer.stop_timer("restore_single_pin_and_parallel_nets", _force_measure_timings); + HEAVY_REFINEMENT_ASSERT(_hg.verifyIncidenceArrayAndIncidentNets()); + HEAVY_REFINEMENT_ASSERT(GainCachePtr::checkTrackedPartitionInformation( + *_uncoarseningData.partitioned_hg, _gain_cache)); + + // After restoring all single-pin and identical-nets, we perform an additional + // refinement step on all border nodes. + IUncoarsener::refine(); + _progress.setObjective(_current_metrics.quality); + _uncoarseningData.round_coarsening_times.pop_back(); + } - ++_stats.num_batches; - _stats.total_batch_sizes += batch.size(); - // Update Progress Bar - _progress.setObjective(_current_metrics.quality); - _progress += batch.size(); - _stats.current_number_of_nodes += batch.size(); - } - batches.pop_back(); + _hierarchy.pop_back(); + + if(_hierarchy.empty()) + { + // After we reach the top-level hypergraph, we perform an additional + // refinement step on all border nodes. + const HyperedgeWeight objective_before = _current_metrics.quality; + const double time_limit = Base::refinementTimeLimit( + _context, _uncoarseningData.round_coarsening_times.back()); + globalRefine(*_uncoarseningData.partitioned_hg, time_limit); + _uncoarseningData.round_coarsening_times.pop_back(); + ASSERT(_uncoarseningData.round_coarsening_times.size() == 0); + const HyperedgeWeight objective_after = _current_metrics.quality; + if(_context.partition.verbose_output && objective_after < objective_before) + { + LOG << GREEN << "Top-Level Refinment improved objective from" << objective_before + << "to" << objective_after << END; } - // Perform localized refinement on the remaining nodes such that we do - // not miss any improvement. - if ( _tmp_refinement_nodes.size() > 0 ) { - localizedRefine(*_uncoarseningData.partitioned_hg); + if(_is_timer_disabled) + { + _timer.enable(); } + } +} - // Restore single-pin and identical nets - if ( !_uncoarseningData.removed_hyperedges_batches.empty() ) { - _timer.start_timer("restore_single_pin_and_parallel_nets", "Restore Single Pin and Parallel Nets", false, _force_measure_timings); - GainCachePtr::restoreSinglePinAndParallelNets(*_uncoarseningData.partitioned_hg, - _uncoarseningData.removed_hyperedges_batches.back(), _gain_cache); - _uncoarseningData.removed_hyperedges_batches.pop_back(); - _timer.stop_timer("restore_single_pin_and_parallel_nets", _force_measure_timings); - HEAVY_REFINEMENT_ASSERT(_hg.verifyIncidenceArrayAndIncidentNets()); - HEAVY_REFINEMENT_ASSERT(GainCachePtr::checkTrackedPartitionInformation(*_uncoarseningData.partitioned_hg, _gain_cache)); +template +void NLevelUncoarsener::refineImpl() +{ + const double time_limit = Base::refinementTimeLimit( + _context, _uncoarseningData.round_coarsening_times.back()); + globalRefine(*_uncoarseningData.partitioned_hg, time_limit); +} - // After restoring all single-pin and identical-nets, we perform an additional - // refinement step on all border nodes. - IUncoarsener::refine(); - _progress.setObjective(_current_metrics.quality); - _uncoarseningData.round_coarsening_times.pop_back(); +template +void NLevelUncoarsener::rebalancingImpl() +{ + // If we reach the top-level hypergraph and the partition is still imbalanced, + // we use a rebalancing algorithm to restore balance. + if(_context.type == ContextType::main && + !metrics::isBalanced(*_uncoarseningData.partitioned_hg, _context)) + { + const HyperedgeWeight quality_before = _current_metrics.quality; + if(_context.partition.verbose_output) + { + LOG << RED << "Partition is imbalanced (Current Imbalance:" + << metrics::imbalance(*_uncoarseningData.partitioned_hg, _context) << ") ->" + << "Rebalancer is activated" << END; + + LOG << "Part weights: (violations in red)"; + io::printPartWeightsAndSizes(*_uncoarseningData.partitioned_hg, _context); } - _hierarchy.pop_back(); - - if ( _hierarchy.empty() ) { - // After we reach the top-level hypergraph, we perform an additional - // refinement step on all border nodes. - const HyperedgeWeight objective_before = _current_metrics.quality; - const double time_limit = Base::refinementTimeLimit(_context, _uncoarseningData.round_coarsening_times.back()); - globalRefine(*_uncoarseningData.partitioned_hg, time_limit); - _uncoarseningData.round_coarsening_times.pop_back(); - ASSERT(_uncoarseningData.round_coarsening_times.size() == 0); - const HyperedgeWeight objective_after = _current_metrics.quality; - if ( _context.partition.verbose_output && objective_after < objective_before ) { - LOG << GREEN << "Top-Level Refinment improved objective from" - << objective_before << "to" << objective_after << END; + // Preform rebalancing + _timer.start_timer("rebalance", "Rebalance"); + mt_kahypar_partitioned_hypergraph_t phg = + utils::partitioned_hg_cast(*_uncoarseningData.partitioned_hg); + _rebalancer->refine(phg, {}, _current_metrics, 0.0); + _timer.stop_timer("rebalance"); + + const HyperedgeWeight quality_after = _current_metrics.quality; + if(_context.partition.verbose_output) + { + const HyperedgeWeight quality_delta = quality_after - quality_before; + if(quality_delta > 0) + { + LOG << RED << "Rebalancer worsen solution quality by" << quality_delta + << "(Current Imbalance:" + << metrics::imbalance(*_uncoarseningData.partitioned_hg, _context) << ")" + << END; } - - if ( _is_timer_disabled ) { - _timer.enable(); + else + { + LOG << GREEN << "Rebalancer improves solution quality by" << abs(quality_delta) + << "(Current Imbalance:" + << metrics::imbalance(*_uncoarseningData.partitioned_hg, _context) << ")" + << END; } } } - template - void NLevelUncoarsener::refineImpl() { - const double time_limit = Base::refinementTimeLimit(_context, _uncoarseningData.round_coarsening_times.back()); - globalRefine(*_uncoarseningData.partitioned_hg, time_limit); - } + ASSERT(metrics::quality(*_uncoarseningData.partitioned_hg, _context) == + _current_metrics.quality, + V(_current_metrics.quality) + << V(metrics::quality(*_uncoarseningData.partitioned_hg, _context))); +} - template - void NLevelUncoarsener::rebalancingImpl() { - // If we reach the top-level hypergraph and the partition is still imbalanced, - // we use a rebalancing algorithm to restore balance. - if ( _context.type == ContextType::main && !metrics::isBalanced(*_uncoarseningData.partitioned_hg, _context)) { - const HyperedgeWeight quality_before = _current_metrics.quality; - if ( _context.partition.verbose_output ) { - LOG << RED << "Partition is imbalanced (Current Imbalance:" - << metrics::imbalance(*_uncoarseningData.partitioned_hg, _context) << ") ->" - << "Rebalancer is activated" << END; - - LOG << "Part weights: (violations in red)"; - io::printPartWeightsAndSizes(*_uncoarseningData.partitioned_hg, _context); - } +template +HyperedgeWeight NLevelUncoarsener::getObjectiveImpl() const +{ + return _current_metrics.quality; +} - // Preform rebalancing - _timer.start_timer("rebalance", "Rebalance"); - mt_kahypar_partitioned_hypergraph_t phg = - utils::partitioned_hg_cast(*_uncoarseningData.partitioned_hg); - _rebalancer->refine(phg, {}, _current_metrics, 0.0); - _timer.stop_timer("rebalance"); - - const HyperedgeWeight quality_after = _current_metrics.quality; - if ( _context.partition.verbose_output ) { - const HyperedgeWeight quality_delta = quality_after - quality_before; - if ( quality_delta > 0 ) { - LOG << RED << "Rebalancer worsen solution quality by" << quality_delta - << "(Current Imbalance:" << metrics::imbalance(*_uncoarseningData.partitioned_hg, _context) << ")" << END; - } else { - LOG << GREEN << "Rebalancer improves solution quality by" << abs(quality_delta) - << "(Current Imbalance:" << metrics::imbalance(*_uncoarseningData.partitioned_hg, _context) << ")" << END; - } - } - } +template +void NLevelUncoarsener::updateMetricsImpl() +{ + _current_metrics = Base::initializeMetrics(*_uncoarseningData.partitioned_hg); + _progress.setObjective(_current_metrics.quality); +} - ASSERT(metrics::quality(*_uncoarseningData.partitioned_hg, _context) == _current_metrics.quality, - V(_current_metrics.quality) << V(metrics::quality(*_uncoarseningData.partitioned_hg, _context))); - } +template +typename TypeTraits::PartitionedHypergraph & +NLevelUncoarsener::currentPartitionedHypergraphImpl() +{ + return *_uncoarseningData.partitioned_hg; +} - template - HyperedgeWeight NLevelUncoarsener::getObjectiveImpl() const { - return _current_metrics.quality; - } +template +HypernodeID NLevelUncoarsener::currentNumberOfNodesImpl() const +{ + return _stats.current_number_of_nodes; +} - template - void NLevelUncoarsener::updateMetricsImpl() { - _current_metrics = Base::initializeMetrics(*_uncoarseningData.partitioned_hg); - _progress.setObjective(_current_metrics.quality); - } +template +typename TypeTraits::PartitionedHypergraph && +NLevelUncoarsener::movePartitionedHypergraphImpl() +{ + ASSERT(isTopLevelImpl()); + return std::move(*_uncoarseningData.partitioned_hg); +} - template - typename TypeTraits::PartitionedHypergraph& NLevelUncoarsener::currentPartitionedHypergraphImpl() { - return *_uncoarseningData.partitioned_hg; +template +void NLevelUncoarsener::localizedRefine( + PartitionedHypergraph &partitioned_hypergraph) +{ + // Copy all border nodes into one vector + vec refinement_nodes = _tmp_refinement_nodes.copy_parallel(); + _tmp_refinement_nodes.clear_parallel(); + _border_vertices_of_batch.reset(); + + if(debug && _context.type == ContextType::main) + { + io::printHypergraphInfo(partitioned_hypergraph.hypergraph(), _context, + "Refinement Hypergraph", false); + DBG << "Start Refinement - objective = " << _current_metrics.quality + << ", imbalance = " << _current_metrics.imbalance; } - template - HypernodeID NLevelUncoarsener::currentNumberOfNodesImpl() const { - return _stats.current_number_of_nodes; - } + bool improvement_found = true; + mt_kahypar_partitioned_hypergraph_t phg = + utils::partitioned_hg_cast(partitioned_hypergraph); + while(improvement_found) + { + improvement_found = false; + + if(_label_propagation && _context.refinement.label_propagation.algorithm != + LabelPropagationAlgorithm::do_nothing) + { + _timer.start_timer("label_propagation", "Label Propagation", false, + _force_measure_timings); + improvement_found |= _label_propagation->refine( + phg, refinement_nodes, _current_metrics, std::numeric_limits::max()); + _timer.stop_timer("label_propagation", _force_measure_timings); + } + + if(_fm && _context.refinement.fm.algorithm != FMAlgorithm::do_nothing) + { + _timer.start_timer("fm", "FM", false, _force_measure_timings); + improvement_found |= _fm->refine(phg, refinement_nodes, _current_metrics, + std::numeric_limits::max()); + _timer.stop_timer("fm", _force_measure_timings); + } + + if(_context.type == ContextType::main) + { + ASSERT(_current_metrics.quality == + metrics::quality(partitioned_hypergraph, _context.partition.objective), + "Actual metric" << V(metrics::quality(partitioned_hypergraph, _context)) + << "does not match the metric updated by the refiners" + << V(_current_metrics.quality)); + } - template - typename TypeTraits::PartitionedHypergraph&& NLevelUncoarsener::movePartitionedHypergraphImpl() { - ASSERT(isTopLevelImpl()); - return std::move(*_uncoarseningData.partitioned_hg); + if(!_context.refinement.refine_until_no_improvement) + { + break; + } } - template - void NLevelUncoarsener::localizedRefine(PartitionedHypergraph& partitioned_hypergraph) { - // Copy all border nodes into one vector - vec refinement_nodes = _tmp_refinement_nodes.copy_parallel(); - _tmp_refinement_nodes.clear_parallel(); - _border_vertices_of_batch.reset(); + if(_context.type == ContextType::main) + { + DBG << "--------------------------------------------------\n"; + } +} - if ( debug && _context.type == ContextType::main ) { - io::printHypergraphInfo(partitioned_hypergraph.hypergraph(), - _context, "Refinement Hypergraph", false); +template +void NLevelUncoarsener::globalRefine( + PartitionedHypergraph &partitioned_hypergraph, const double time_limit) +{ + + auto applyGlobalFMParameters = [&](const FMParameters &fm, + const NLevelGlobalFMParameters global_fm) { + NLevelGlobalFMParameters tmp_global_fm; + tmp_global_fm.num_seed_nodes = fm.num_seed_nodes; + tmp_global_fm.obey_minimal_parallelism = fm.obey_minimal_parallelism; + fm.num_seed_nodes = global_fm.num_seed_nodes; + fm.obey_minimal_parallelism = global_fm.obey_minimal_parallelism; + return tmp_global_fm; + }; + + if(_context.refinement.global_fm.use_global_fm) + { + if(debug && _context.type == ContextType::main) + { + io::printHypergraphInfo(partitioned_hypergraph.hypergraph(), _context, + "Refinement Hypergraph", false); DBG << "Start Refinement - objective = " << _current_metrics.quality << ", imbalance = " << _current_metrics.imbalance; } + // Enable Timings + bool was_enabled = false; + if(!_timer.isEnabled() && _context.type == ContextType::main) + { + _timer.enable(); + was_enabled = true; + } + + // Apply global FM parameters to FM context and temporary store old fm context + _timer.start_timer("global_refinement", "Global Refinement"); + NLevelGlobalFMParameters tmp_global_fm = + applyGlobalFMParameters(_context.refinement.fm, _context.refinement.global_fm); bool improvement_found = true; - mt_kahypar_partitioned_hypergraph_t phg = utils::partitioned_hg_cast(partitioned_hypergraph); - while( improvement_found ) { + mt_kahypar_partitioned_hypergraph_t phg = + utils::partitioned_hg_cast(partitioned_hypergraph); + while(improvement_found) + { improvement_found = false; + const HyperedgeWeight metric_before = _current_metrics.quality; - if ( _label_propagation && _context.refinement.label_propagation.algorithm != LabelPropagationAlgorithm::do_nothing ) { - _timer.start_timer("label_propagation", "Label Propagation", false, _force_measure_timings); - improvement_found |= _label_propagation->refine(phg, - refinement_nodes, _current_metrics, std::numeric_limits::max()); - _timer.stop_timer("label_propagation", _force_measure_timings); + if(_fm && _context.refinement.fm.algorithm != FMAlgorithm::do_nothing) + { + _timer.start_timer("fm", "FM"); + improvement_found |= _fm->refine(phg, {}, _current_metrics, time_limit); + _timer.stop_timer("fm"); } - if ( _fm && _context.refinement.fm.algorithm != FMAlgorithm::do_nothing ) { - _timer.start_timer("fm", "FM", false, _force_measure_timings); - improvement_found |= _fm->refine(phg, - refinement_nodes, _current_metrics, std::numeric_limits::max()); - _timer.stop_timer("fm", _force_measure_timings); + if(_flows && _context.refinement.flows.algorithm != FlowAlgorithm::do_nothing) + { + _timer.start_timer("initialize_flow_scheduler", "Initialize Flow Scheduler"); + _flows->initialize(phg); + _timer.stop_timer("initialize_flow_scheduler"); + + _timer.start_timer("flow_refinement_scheduler", "Flow Refinement Scheduler"); + improvement_found |= _flows->refine(phg, {}, _current_metrics, time_limit); + _timer.stop_timer("flow_refinement_scheduler"); } - if ( _context.type == ContextType::main ) { - ASSERT(_current_metrics.quality == metrics::quality(partitioned_hypergraph, _context.partition.objective), - "Actual metric" << V(metrics::quality(partitioned_hypergraph, _context)) << - "does not match the metric updated by the refiners" << V(_current_metrics.quality)); + if(_context.type == ContextType::main) + { + ASSERT(_current_metrics.quality == + metrics::quality(partitioned_hypergraph, _context.partition.objective), + "Actual metric" << V(metrics::quality(partitioned_hypergraph, _context)) + << "does not match the metric updated by the refiners" + << V(_current_metrics.quality)); } - if ( !_context.refinement.refine_until_no_improvement ) { + const HyperedgeWeight metric_after = _current_metrics.quality; + const double relative_improvement = + 1.0 - static_cast(metric_after) / metric_before; + if(!_context.refinement.global_fm.refine_until_no_improvement || + relative_improvement <= _context.refinement.relative_improvement_threshold) + { break; } } + // Reset FM context + applyGlobalFMParameters(_context.refinement.fm, tmp_global_fm); + _timer.stop_timer("global_refinement"); - if ( _context.type == ContextType::main) { - DBG << "--------------------------------------------------\n"; + if(was_enabled) + { + _timer.disable(); } - } - - template - void NLevelUncoarsener::globalRefine(PartitionedHypergraph& partitioned_hypergraph, - const double time_limit) { - - auto applyGlobalFMParameters = [&](const FMParameters& fm, const NLevelGlobalFMParameters global_fm){ - NLevelGlobalFMParameters tmp_global_fm; - tmp_global_fm.num_seed_nodes = fm.num_seed_nodes; - tmp_global_fm.obey_minimal_parallelism = fm.obey_minimal_parallelism; - fm.num_seed_nodes = global_fm.num_seed_nodes; - fm.obey_minimal_parallelism = global_fm.obey_minimal_parallelism; - return tmp_global_fm; - }; - - if ( _context.refinement.global_fm.use_global_fm ) { - if ( debug && _context.type == ContextType::main ) { - io::printHypergraphInfo(partitioned_hypergraph.hypergraph(), - _context, "Refinement Hypergraph", false); - DBG << "Start Refinement - objective = " << _current_metrics.quality - << ", imbalance = " << _current_metrics.imbalance; - } - - // Enable Timings - bool was_enabled = false; - if ( !_timer.isEnabled() && - _context.type == ContextType::main ) { - _timer.enable(); - was_enabled = true; - } - - // Apply global FM parameters to FM context and temporary store old fm context - _timer.start_timer("global_refinement", "Global Refinement"); - NLevelGlobalFMParameters tmp_global_fm = applyGlobalFMParameters( - _context.refinement.fm, _context.refinement.global_fm); - bool improvement_found = true; - mt_kahypar_partitioned_hypergraph_t phg = utils::partitioned_hg_cast(partitioned_hypergraph); - while( improvement_found ) { - improvement_found = false; - const HyperedgeWeight metric_before = _current_metrics.quality; - - if ( _fm && _context.refinement.fm.algorithm != FMAlgorithm::do_nothing ) { - _timer.start_timer("fm", "FM"); - improvement_found |= _fm->refine(phg, {}, _current_metrics, time_limit); - _timer.stop_timer("fm"); - } - - if ( _flows && _context.refinement.flows.algorithm != FlowAlgorithm::do_nothing ) { - _timer.start_timer("initialize_flow_scheduler", "Initialize Flow Scheduler"); - _flows->initialize(phg); - _timer.stop_timer("initialize_flow_scheduler"); - _timer.start_timer("flow_refinement_scheduler", "Flow Refinement Scheduler"); - improvement_found |= _flows->refine(phg, {}, _current_metrics, time_limit); - _timer.stop_timer("flow_refinement_scheduler"); - } - - if ( _context.type == ContextType::main ) { - ASSERT(_current_metrics.quality == metrics::quality(partitioned_hypergraph, _context.partition.objective), - "Actual metric" << V(metrics::quality(partitioned_hypergraph, _context)) << - "does not match the metric updated by the refiners" << V(_current_metrics.quality)); - } - - const HyperedgeWeight metric_after = _current_metrics.quality; - const double relative_improvement = 1.0 - - static_cast(metric_after) / metric_before; - if ( !_context.refinement.global_fm.refine_until_no_improvement || - relative_improvement <= _context.refinement.relative_improvement_threshold ) { - break; - } - } - // Reset FM context - applyGlobalFMParameters(_context.refinement.fm, tmp_global_fm); - _timer.stop_timer("global_refinement"); - - if ( was_enabled ) { - _timer.disable(); - } - - if ( _context.type == ContextType::main) { - DBG << "--------------------------------------------------\n"; - } + if(_context.type == ContextType::main) + { + DBG << "--------------------------------------------------\n"; } } +} - INSTANTIATE_CLASS_WITH_TYPE_TRAITS(NLevelUncoarsener) +INSTANTIATE_CLASS_WITH_TYPE_TRAITS(NLevelUncoarsener) } diff --git a/mt-kahypar/partition/coarsening/nlevel_uncoarsener.h b/mt-kahypar/partition/coarsening/nlevel_uncoarsener.h index 2aee0dad8..674328754 100644 --- a/mt-kahypar/partition/coarsening/nlevel_uncoarsener.h +++ b/mt-kahypar/partition/coarsening/nlevel_uncoarsener.h @@ -30,12 +30,12 @@ #include "kahypar-resources/datastructure/fast_reset_flag_array.h" -#include "mt-kahypar/partition/context.h" +#include "mt-kahypar/datastructures/streaming_vector.h" +#include "mt-kahypar/partition/coarsening/coarsening_commons.h" #include "mt-kahypar/partition/coarsening/i_uncoarsener.h" #include "mt-kahypar/partition/coarsening/uncoarsener_base.h" +#include "mt-kahypar/partition/context.h" #include "mt-kahypar/partition/refinement/i_refiner.h" -#include "mt-kahypar/partition/coarsening/coarsening_commons.h" -#include "mt-kahypar/datastructures/streaming_vector.h" #include "mt-kahypar/utils/progress_bar.h" namespace mt_kahypar { @@ -43,38 +43,42 @@ namespace mt_kahypar { // Forward Declaration class TargetGraph; -template +template class NLevelUncoarsener : public IUncoarsener, - private UncoarsenerBase { + private UncoarsenerBase +{ using Base = UncoarsenerBase; using Hypergraph = typename TypeTraits::Hypergraph; using PartitionedHypergraph = typename TypeTraits::PartitionedHypergraph; using ParallelHyperedge = typename Hypergraph::ParallelHyperedge; - using ParallelHyperedgeVector = vec>; + using ParallelHyperedgeVector = vec >; - private: +private: static constexpr bool debug = false; static constexpr bool enable_heavy_assert = false; - - struct NLevelStats { - explicit NLevelStats(const Context& context) : - utility_id(context.utility_id), - num_batches(0), - total_batch_sizes(0), - current_number_of_nodes(0), - min_num_border_vertices(0) { - min_num_border_vertices = std::max(context.refinement.max_batch_size, - context.shared_memory.num_threads * context.refinement.min_border_vertices_per_thread); + struct NLevelStats + { + explicit NLevelStats(const Context &context) : + utility_id(context.utility_id), num_batches(0), total_batch_sizes(0), + current_number_of_nodes(0), min_num_border_vertices(0) + { + min_num_border_vertices = + std::max(context.refinement.max_batch_size, + context.shared_memory.num_threads * + context.refinement.min_border_vertices_per_thread); } - ~NLevelStats() { + ~NLevelStats() + { double avg_batch_size = static_cast(total_batch_sizes) / num_batches; - utils::Utilities::instance().getStats(utility_id).add_stat( - "num_batches", static_cast(num_batches)); - utils::Utilities::instance().getStats(utility_id).add_stat( - "avg_batch_size", avg_batch_size); + utils::Utilities::instance() + .getStats(utility_id) + .add_stat("num_batches", static_cast(num_batches)); + utils::Utilities::instance() + .getStats(utility_id) + .add_stat("avg_batch_size", avg_batch_size); DBG << V(num_batches) << V(avg_batch_size); } @@ -85,28 +89,26 @@ class NLevelUncoarsener : public IUncoarsener, size_t min_num_border_vertices; }; - public: - NLevelUncoarsener(Hypergraph& hypergraph, - const Context& context, - UncoarseningData& uncoarseningData, - const TargetGraph* target_graph) : - Base(hypergraph, context, uncoarseningData), - _target_graph(target_graph), - _hierarchy(), - _tmp_refinement_nodes(), - _border_vertices_of_batch(hypergraph.initialNumNodes()), - _stats(context), - _current_metrics(), - _progress(hypergraph.initialNumNodes(), 0, false), - _is_timer_disabled(false), - _force_measure_timings(context.partition.measure_detailed_uncontraction_timings && context.type == ContextType::main) { } - - NLevelUncoarsener(const NLevelUncoarsener&) = delete; - NLevelUncoarsener(NLevelUncoarsener&&) = delete; - NLevelUncoarsener & operator= (const NLevelUncoarsener &) = delete; - NLevelUncoarsener & operator= (NLevelUncoarsener &&) = delete; - - private: +public: + NLevelUncoarsener(Hypergraph &hypergraph, const Context &context, + UncoarseningData &uncoarseningData, + const TargetGraph *target_graph) : + Base(hypergraph, context, uncoarseningData), + _target_graph(target_graph), _hierarchy(), _tmp_refinement_nodes(), + _border_vertices_of_batch(hypergraph.initialNumNodes()), _stats(context), + _current_metrics(), _progress(hypergraph.initialNumNodes(), 0, false), + _is_timer_disabled(false), + _force_measure_timings(context.partition.measure_detailed_uncontraction_timings && + context.type == ContextType::main) + { + } + + NLevelUncoarsener(const NLevelUncoarsener &) = delete; + NLevelUncoarsener(NLevelUncoarsener &&) = delete; + NLevelUncoarsener &operator=(const NLevelUncoarsener &) = delete; + NLevelUncoarsener &operator=(NLevelUncoarsener &&) = delete; + +private: void initializeImpl() override; bool isTopLevelImpl() const override; @@ -117,43 +119,42 @@ class NLevelUncoarsener : public IUncoarsener, void rebalancingImpl() override; - gain_cache_t getGainCacheImpl() override { - return _gain_cache; - } + gain_cache_t getGainCacheImpl() override { return _gain_cache; } HyperedgeWeight getObjectiveImpl() const override; void updateMetricsImpl() override; - PartitionedHypergraph& currentPartitionedHypergraphImpl() override; + PartitionedHypergraph ¤tPartitionedHypergraphImpl() override; HypernodeID currentNumberOfNodesImpl() const override; - PartitionedHypergraph&& movePartitionedHypergraphImpl() override; + PartitionedHypergraph &&movePartitionedHypergraphImpl() override; - void localizedRefine(PartitionedHypergraph& partitioned_hypergraph); + void localizedRefine(PartitionedHypergraph &partitioned_hypergraph); - void globalRefine(PartitionedHypergraph& partitioned_hypergraph, + void globalRefine(PartitionedHypergraph &partitioned_hypergraph, const double time_limit); - using Base::_hg; using Base::_context; - using Base::_uncoarseningData; + using Base::_flows; + using Base::_fm; using Base::_gain_cache; + using Base::_hg; using Base::_label_propagation; - using Base::_fm; - using Base::_flows; using Base::_rebalancer; using Base::_timer; + using Base::_uncoarseningData; - const TargetGraph* _target_graph; + const TargetGraph *_target_graph; // ! Represents the n-level hierarchy // ! A batch is vector of uncontractions/mementos that can be uncontracted in parallel - // ! without conflicts. All batches of a specific version of the hypergraph are assembled - // ! in a batch vector. Each time we perform single-pin and parallel net detection we create - // ! a new version (simply increment a counter) of the hypergraph. Once a batch vector is - // ! completly processed single-pin and parallel nets have to be restored. + // ! without conflicts. All batches of a specific version of the hypergraph are + // assembled ! in a batch vector. Each time we perform single-pin and parallel net + // detection we create ! a new version (simply increment a counter) of the hypergraph. + // Once a batch vector is ! completly processed single-pin and parallel nets have to be + // restored. VersionedBatchVector _hierarchy; ds::StreamingVector _tmp_refinement_nodes; diff --git a/mt-kahypar/partition/coarsening/nlevel_vertex_pair_rater.h b/mt-kahypar/partition/coarsening/nlevel_vertex_pair_rater.h index 0dc381fd9..1cc1c6b65 100644 --- a/mt-kahypar/partition/coarsening/nlevel_vertex_pair_rater.h +++ b/mt-kahypar/partition/coarsening/nlevel_vertex_pair_rater.h @@ -37,117 +37,136 @@ #include "kahypar-resources/meta/mandatory.h" #include "mt-kahypar/datastructures/sparse_map.h" -#include "mt-kahypar/partition/context.h" #include "mt-kahypar/partition/coarsening/policies/rating_fixed_vertex_acceptance_policy.h" +#include "mt-kahypar/partition/context.h" namespace mt_kahypar { -template -class NLevelVertexPairRater { +class NLevelVertexPairRater +{ using LargeTmpRatingMap = ds::SparseMap; using CacheEfficientRatingMap = ds::FixedSizeSparseMap; - using ThreadLocalCacheEfficientRatingMap = tbb::enumerable_thread_specific; - using ThreadLocalVertexDegreeBoundedRatingMap = tbb::enumerable_thread_specific; + using ThreadLocalCacheEfficientRatingMap = + tbb::enumerable_thread_specific; + using ThreadLocalVertexDegreeBoundedRatingMap = + tbb::enumerable_thread_specific; using ThreadLocalLargeTmpRatingMap = tbb::enumerable_thread_specific; - private: +private: static constexpr bool debug = false; - class VertexPairRating { - public: + class VertexPairRating + { + public: VertexPairRating(HypernodeID trgt, RatingType val, bool is_valid) : - target(trgt), - value(val), - valid(is_valid) { } + target(trgt), value(val), valid(is_valid) + { + } VertexPairRating() : - target(std::numeric_limits::max()), - value(std::numeric_limits::min()), - valid(false) { } + target(std::numeric_limits::max()), + value(std::numeric_limits::min()), valid(false) + { + } - VertexPairRating(const VertexPairRating&) = delete; - VertexPairRating & operator= (const VertexPairRating &) = delete; + VertexPairRating(const VertexPairRating &) = delete; + VertexPairRating &operator=(const VertexPairRating &) = delete; - VertexPairRating(VertexPairRating&&) = default; - VertexPairRating & operator= (VertexPairRating &&) = delete; + VertexPairRating(VertexPairRating &&) = default; + VertexPairRating &operator=(VertexPairRating &&) = delete; HypernodeID target; RatingType value; bool valid; }; - enum class RatingMapType { + enum class RatingMapType + { CACHE_EFFICIENT_RATING_MAP, VERTEX_DEGREE_BOUNDED_RATING_MAP, LARGE_RATING_MAP }; - public: +public: using Rating = VertexPairRating; - NLevelVertexPairRater(const HypernodeID num_hypernodes, - const Context& context) : - _context(context), - _current_num_nodes(num_hypernodes), - _vertex_degree_sampling_threshold(context.coarsening.vertex_degree_sampling_threshold), - _local_cache_efficient_rating_map(0.0), - _local_vertex_degree_bounded_rating_map(3UL * _vertex_degree_sampling_threshold, 0.0), - _local_large_rating_map([&] { - return construct_large_tmp_rating_map(); - }), - _already_matched(num_hypernodes) { } - - NLevelVertexPairRater(const NLevelVertexPairRater&) = delete; - NLevelVertexPairRater & operator= (const NLevelVertexPairRater &) = delete; - - NLevelVertexPairRater(NLevelVertexPairRater&&) = delete; - NLevelVertexPairRater & operator= (NLevelVertexPairRater &&) = delete; - - template - VertexPairRating rate(const Hypergraph& hypergraph, - const HypernodeID u, - const HypernodeWeight max_allowed_node_weight) { - - const RatingMapType rating_map_type = getRatingMapTypeForRatingOfHypernode(hypergraph, u); - if ( rating_map_type == RatingMapType::CACHE_EFFICIENT_RATING_MAP ) { - return rate(hypergraph, u, _local_cache_efficient_rating_map.local(), max_allowed_node_weight, false); - } else if ( rating_map_type == RatingMapType::VERTEX_DEGREE_BOUNDED_RATING_MAP ) { - return rate(hypergraph, u, _local_vertex_degree_bounded_rating_map.local(), max_allowed_node_weight, true); - } else { - LargeTmpRatingMap& large_tmp_rating_map = _local_large_rating_map.local(); + NLevelVertexPairRater(const HypernodeID num_hypernodes, const Context &context) : + _context(context), _current_num_nodes(num_hypernodes), + _vertex_degree_sampling_threshold( + context.coarsening.vertex_degree_sampling_threshold), + _local_cache_efficient_rating_map(0.0), + _local_vertex_degree_bounded_rating_map(3UL * _vertex_degree_sampling_threshold, + 0.0), + _local_large_rating_map([&] { return construct_large_tmp_rating_map(); }), + _already_matched(num_hypernodes) + { + } + + NLevelVertexPairRater(const NLevelVertexPairRater &) = delete; + NLevelVertexPairRater &operator=(const NLevelVertexPairRater &) = delete; + + NLevelVertexPairRater(NLevelVertexPairRater &&) = delete; + NLevelVertexPairRater &operator=(NLevelVertexPairRater &&) = delete; + + template + VertexPairRating rate(const Hypergraph &hypergraph, const HypernodeID u, + const HypernodeWeight max_allowed_node_weight) + { + + const RatingMapType rating_map_type = + getRatingMapTypeForRatingOfHypernode(hypergraph, u); + if(rating_map_type == RatingMapType::CACHE_EFFICIENT_RATING_MAP) + { + return rate(hypergraph, u, + _local_cache_efficient_rating_map.local(), + max_allowed_node_weight, false); + } + else if(rating_map_type == RatingMapType::VERTEX_DEGREE_BOUNDED_RATING_MAP) + { + return rate(hypergraph, u, + _local_vertex_degree_bounded_rating_map.local(), + max_allowed_node_weight, true); + } + else + { + LargeTmpRatingMap &large_tmp_rating_map = _local_large_rating_map.local(); large_tmp_rating_map.setMaxSize(_current_num_nodes); - return rate(hypergraph, u, large_tmp_rating_map, max_allowed_node_weight, false); + return rate(hypergraph, u, large_tmp_rating_map, + max_allowed_node_weight, false); } } // ! Several threads will mark matches in parallel. However, since // ! we only set the corresponding value to true this function is // ! thread-safe. - void markAsMatched(const HypernodeID original_id) { + void markAsMatched(const HypernodeID original_id) + { _already_matched.set(original_id, true); } // ! Note, this function is not thread safe - void resetMatches() { - _already_matched.reset(); - } + void resetMatches() { _already_matched.reset(); } - void setCurrentNumberOfNodes(const HypernodeID current_num_nodes) { + void setCurrentNumberOfNodes(const HypernodeID current_num_nodes) + { _current_num_nodes = current_num_nodes; } - private: - template - VertexPairRating rate(const Hypergraph& hypergraph, - const HypernodeID u, - RatingMap& tmp_ratings, +private: + template + VertexPairRating rate(const Hypergraph &hypergraph, const HypernodeID u, + RatingMap &tmp_ratings, const HypernodeWeight max_allowed_node_weight, - const bool use_vertex_degree_sampling) { + const bool use_vertex_degree_sampling) + { - if ( use_vertex_degree_sampling ) { + if(use_vertex_degree_sampling) + { fillRatingMapWithSampling(hypergraph, u, tmp_ratings); - } else { + } + else + { fillRatingMap(hypergraph, u, tmp_ratings); } @@ -156,28 +175,32 @@ class NLevelVertexPairRater { const PartitionID community_u_id = hypergraph.communityID(u); RatingType max_rating = std::numeric_limits::min(); HypernodeID target = kInvalidHypernode; - for (auto it = tmp_ratings.end() - 1; it >= tmp_ratings.begin(); --it) { + for(auto it = tmp_ratings.end() - 1; it >= tmp_ratings.begin(); --it) + { const HypernodeID tmp_target = it->key; const HypernodeWeight target_weight = hypergraph.nodeWeight(tmp_target); - if ( tmp_target != u && weight_u + target_weight <= max_allowed_node_weight ) { - HypernodeWeight penalty = HeavyNodePenaltyPolicy::penalty(weight_u, target_weight); + if(tmp_target != u && weight_u + target_weight <= max_allowed_node_weight) + { + HypernodeWeight penalty = + HeavyNodePenaltyPolicy::penalty(weight_u, target_weight); penalty = penalty == 0 ? std::max(std::max(weight_u, target_weight), 1) : penalty; const RatingType tmp_rating = it->value / static_cast(penalty); bool accept_fixed_vertex_contraction = true; - if constexpr ( has_fixed_vertices ) { + if constexpr(has_fixed_vertices) + { accept_fixed_vertex_contraction = - FixedVertexAcceptancePolicy::acceptContraction( - hypergraph, hypergraph.fixedVertexSupport(), _context, tmp_target, u); + FixedVertexAcceptancePolicy::acceptContraction( + hypergraph, hypergraph.fixedVertexSupport(), _context, tmp_target, u); } DBG << "r(" << u << "," << tmp_target << ")=" << tmp_rating; - if ( accept_fixed_vertex_contraction && - community_u_id == hypergraph.communityID(tmp_target) && - AcceptancePolicy::acceptRating(tmp_rating, max_rating, - target, tmp_target, - cpu_id, _already_matched) ) { + if(accept_fixed_vertex_contraction && + community_u_id == hypergraph.communityID(tmp_target) && + AcceptancePolicy::acceptRating(tmp_rating, max_rating, target, tmp_target, + cpu_id, _already_matched)) + { max_rating = tmp_rating; target = tmp_target; } @@ -185,8 +208,10 @@ class NLevelVertexPairRater { } VertexPairRating ret; - if (max_rating != std::numeric_limits::min()) { - ASSERT(target != std::numeric_limits::max(), "invalid contraction target"); + if(max_rating != std::numeric_limits::min()) + { + ASSERT(target != std::numeric_limits::max(), + "invalid contraction target"); ret.value = max_rating; ret.target = target; ret.valid = true; @@ -195,36 +220,43 @@ class NLevelVertexPairRater { return ret; } - template - void fillRatingMap(const Hypergraph& hypergraph, - const HypernodeID u, - RatingMap& tmp_ratings) { - for ( const HyperedgeID& he : hypergraph.incidentEdges(u) ) { + template + void fillRatingMap(const Hypergraph &hypergraph, const HypernodeID u, + RatingMap &tmp_ratings) + { + for(const HyperedgeID &he : hypergraph.incidentEdges(u)) + { HypernodeID edge_size = hypergraph.edgeSize(he); - if ( edge_size > 1 && edge_size < _context.partition.ignore_hyperedge_size_threshold ) { + if(edge_size > 1 && edge_size < _context.partition.ignore_hyperedge_size_threshold) + { const RatingType score = ScorePolicy::score(hypergraph.edgeWeight(he), edge_size); - for ( const HypernodeID& v : hypergraph.pins(he) ) { + for(const HypernodeID &v : hypergraph.pins(he)) + { tmp_ratings[v] += score; } } } } - template - void fillRatingMapWithSampling(const Hypergraph& hypergraph, - const HypernodeID u, - RatingMap& tmp_ratings) { + template + void fillRatingMapWithSampling(const Hypergraph &hypergraph, const HypernodeID u, + RatingMap &tmp_ratings) + { size_t num_tmp_rating_map_accesses = 0; - for ( const HyperedgeID& he : hypergraph.incidentEdges(u) ) { + for(const HyperedgeID &he : hypergraph.incidentEdges(u)) + { HypernodeID edge_size = hypergraph.edgeSize(he); - if ( edge_size > 1 && edge_size < _context.partition.ignore_hyperedge_size_threshold ) { + if(edge_size > 1 && edge_size < _context.partition.ignore_hyperedge_size_threshold) + { // Break if number of accesses to the tmp rating map would exceed // vertex degree sampling threshold - if ( num_tmp_rating_map_accesses + edge_size > _vertex_degree_sampling_threshold ) { + if(num_tmp_rating_map_accesses + edge_size > _vertex_degree_sampling_threshold) + { break; } const RatingType score = ScorePolicy::score(hypergraph.edgeWeight(he), edge_size); - for ( const HypernodeID& v : hypergraph.pins(he) ) { + for(const HypernodeID &v : hypergraph.pins(he)) + { tmp_ratings[v] += score; ++num_tmp_rating_map_accesses; } @@ -232,38 +264,47 @@ class NLevelVertexPairRater { } } - template - inline RatingMapType getRatingMapTypeForRatingOfHypernode(const Hypergraph& hypergraph, - const HypernodeID u) { + template + inline RatingMapType getRatingMapTypeForRatingOfHypernode(const Hypergraph &hypergraph, + const HypernodeID u) + { const bool use_vertex_degree_sampling = - _vertex_degree_sampling_threshold != std::numeric_limits::max(); - const size_t vertex_degree_bounded_rating_map_size = use_vertex_degree_sampling ? - 3UL * _vertex_degree_sampling_threshold : std::numeric_limits::max(); + _vertex_degree_sampling_threshold != std::numeric_limits::max(); + const size_t vertex_degree_bounded_rating_map_size = + use_vertex_degree_sampling ? 3UL * _vertex_degree_sampling_threshold : + std::numeric_limits::max(); const size_t cache_efficient_rating_map_size = CacheEfficientRatingMap::MAP_SIZE; - const size_t size_of_smaller_rating_map = std::min( - vertex_degree_bounded_rating_map_size, cache_efficient_rating_map_size); + const size_t size_of_smaller_rating_map = + std::min(vertex_degree_bounded_rating_map_size, cache_efficient_rating_map_size); // In case the current number of nodes is smaller than size // of the cache-efficient sparse map, the large tmp rating map // consumes less memory - if ( _current_num_nodes < size_of_smaller_rating_map ) { + if(_current_num_nodes < size_of_smaller_rating_map) + { return RatingMapType::LARGE_RATING_MAP; } // Compute estimation for the upper bound of neighbors of u HypernodeID ub_neighbors_u = 0; - for ( const HyperedgeID& he : hypergraph.incidentEdges(u) ) { + for(const HyperedgeID &he : hypergraph.incidentEdges(u)) + { const HypernodeID edge_size = hypergraph.edgeSize(he); // Ignore large hyperedges - ub_neighbors_u += edge_size < _context.partition.ignore_hyperedge_size_threshold ? edge_size : 0; - // If the number of estimated neighbors is greater than the size of the cache efficient rating map / 3, we - // use the large sparse map. The division by 3 also ensures that the fill grade - // of the cache efficient sparse map would be small enough such that linear probing - // is fast. - if ( ub_neighbors_u > cache_efficient_rating_map_size / 3UL ) { - if ( vertex_degree_bounded_rating_map_size < _current_num_nodes ) { + ub_neighbors_u += + edge_size < _context.partition.ignore_hyperedge_size_threshold ? edge_size : 0; + // If the number of estimated neighbors is greater than the size of the cache + // efficient rating map / 3, we use the large sparse map. The division by 3 also + // ensures that the fill grade of the cache efficient sparse map would be small + // enough such that linear probing is fast. + if(ub_neighbors_u > cache_efficient_rating_map_size / 3UL) + { + if(vertex_degree_bounded_rating_map_size < _current_num_nodes) + { return RatingMapType::VERTEX_DEGREE_BOUNDED_RATING_MAP; - } else { + } + else + { return RatingMapType::LARGE_RATING_MAP; } } @@ -272,11 +313,12 @@ class NLevelVertexPairRater { return RatingMapType::CACHE_EFFICIENT_RATING_MAP; } - LargeTmpRatingMap construct_large_tmp_rating_map() { + LargeTmpRatingMap construct_large_tmp_rating_map() + { return LargeTmpRatingMap(_current_num_nodes); } - const Context& _context; + const Context &_context; // ! Number of nodes of the current hypergraph HypernodeID _current_num_nodes; // ! Maximum number of neighbors that are considered for rating @@ -296,4 +338,4 @@ class NLevelVertexPairRater { // ! Marks all matched vertices kahypar::ds::FastResetFlagArray<> _already_matched; }; -} // namespace mt_kahypar +} // namespace mt_kahypar diff --git a/mt-kahypar/partition/coarsening/policies/rating_acceptance_policy.h b/mt-kahypar/partition/coarsening/policies/rating_acceptance_policy.h index 978ebfe05..70d5387cb 100644 --- a/mt-kahypar/partition/coarsening/policies/rating_acceptance_policy.h +++ b/mt-kahypar/partition/coarsening/policies/rating_acceptance_policy.h @@ -24,27 +24,26 @@ * SOFTWARE. ******************************************************************************/ - #pragma once #include "kahypar-resources/datastructure/fast_reset_flag_array.h" #include "kahypar-resources/meta/policy_registry.h" #include "kahypar-resources/meta/typelist.h" -#include "mt-kahypar/partition/coarsening/policies/rating_tie_breaking_policy.h" #include "mt-kahypar/datastructures/hypergraph_common.h" #include "mt-kahypar/macros.h" +#include "mt-kahypar/partition/coarsening/policies/rating_tie_breaking_policy.h" namespace mt_kahypar { -class BestRatingPreferringUnmatched final : public kahypar::meta::PolicyBase { - public: - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE static bool acceptRating(const RatingType tmp, - const RatingType max_rating, - const HypernodeID old_target, - const HypernodeID new_target, - const int cpu_id, - const kahypar::ds::FastResetFlagArray<>& already_matched) { +class BestRatingPreferringUnmatched final : public kahypar::meta::PolicyBase +{ +public: + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE static bool + acceptRating(const RatingType tmp, const RatingType max_rating, + const HypernodeID old_target, const HypernodeID new_target, + const int cpu_id, const kahypar::ds::FastResetFlagArray<> &already_matched) + { return max_rating < tmp || ((max_rating == tmp) && ((already_matched[old_target] && !already_matched[new_target]) || @@ -56,32 +55,33 @@ class BestRatingPreferringUnmatched final : public kahypar::meta::PolicyBase { }; #ifdef KAHYPAR_ENABLE_EXPERIMENTAL_FEATURES -class BestRatingWithoutTieBreaking final : public kahypar::meta::PolicyBase { - public: - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE static bool acceptRating(const RatingType tmp, - const RatingType max_rating, - const HypernodeID u, - const HypernodeID v, - const int, - const kahypar::ds::FastResetFlagArray<> &) { - return max_rating < tmp || ( max_rating == tmp && u < v ); +class BestRatingWithoutTieBreaking final : public kahypar::meta::PolicyBase +{ +public: + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE static bool + acceptRating(const RatingType tmp, const RatingType max_rating, const HypernodeID u, + const HypernodeID v, const int, const kahypar::ds::FastResetFlagArray<> &) + { + return max_rating < tmp || (max_rating == tmp && u < v); } }; -class BestRatingWithTieBreaking final : public kahypar::meta::PolicyBase { - public: - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE static bool acceptRating(const RatingType tmp, - const RatingType max_rating, - const HypernodeID, - const HypernodeID, - const int cpu_id, - const kahypar::ds::FastResetFlagArray<> &) { - return max_rating < tmp || (max_rating == tmp && RandomRatingWins::acceptEqual(cpu_id)); +class BestRatingWithTieBreaking final : public kahypar::meta::PolicyBase +{ +public: + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE static bool + acceptRating(const RatingType tmp, const RatingType max_rating, const HypernodeID, + const HypernodeID, const int cpu_id, + const kahypar::ds::FastResetFlagArray<> &) + { + return max_rating < tmp || + (max_rating == tmp && RandomRatingWins::acceptEqual(cpu_id)); } }; -using AcceptancePolicies = kahypar::meta::Typelist; +using AcceptancePolicies = + kahypar::meta::Typelist; #else using AcceptancePolicies = kahypar::meta::Typelist; #endif -} // namespace mt_kahypar +} // namespace mt_kahypar diff --git a/mt-kahypar/partition/coarsening/policies/rating_fixed_vertex_acceptance_policy.h b/mt-kahypar/partition/coarsening/policies/rating_fixed_vertex_acceptance_policy.h index db7ab4603..be6e6a187 100644 --- a/mt-kahypar/partition/coarsening/policies/rating_fixed_vertex_acceptance_policy.h +++ b/mt-kahypar/partition/coarsening/policies/rating_fixed_vertex_acceptance_policy.h @@ -24,75 +24,84 @@ * SOFTWARE. ******************************************************************************/ - #pragma once #include "kahypar-resources/meta/policy_registry.h" #include "kahypar-resources/meta/typelist.h" -#include "mt-kahypar/partition/context.h" #include "mt-kahypar/datastructures/fixed_vertex_support.h" #include "mt-kahypar/datastructures/hypergraph_common.h" #include "mt-kahypar/macros.h" +#include "mt-kahypar/partition/context.h" namespace mt_kahypar { -class FixedVertexAcceptancePolicy final : public kahypar::meta::PolicyBase { - public: - // This function decides if contracting v onto u is allowed if the hypergraph contains fixed vertices. - template - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE static bool acceptContraction(const Hypergraph& hypergraph, - const ds::FixedVertexSupport& fixed_vertices, - const Context& context, - const HypernodeID u, - const HypernodeID v) { +class FixedVertexAcceptancePolicy final : public kahypar::meta::PolicyBase +{ +public: + // This function decides if contracting v onto u is allowed if the hypergraph contains + // fixed vertices. + template + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE static bool + acceptContraction(const Hypergraph &hypergraph, + const ds::FixedVertexSupport &fixed_vertices, + const Context &context, const HypernodeID u, const HypernodeID v) + { // We allow the following contractions: // 1.) u = Fixed Vertex <- Free Vertex = v // 2.) u = Free Vertex <- Free Vertex = v - // 3.) u = Fixed Vertex <- Fixed Vertex = v, but u and v must be assigned to the same fixed vertex block - // Note that we do not allow contractions that contract fixed vertex onto a free vertex. - // This policy is the same as used in KaHyPar. - const bool accept_contraction = fixed_vertices.isFixed(u) || !fixed_vertices.isFixed(v); + // 3.) u = Fixed Vertex <- Fixed Vertex = v, but u and v must be assigned to the same + // fixed vertex block Note that we do not allow contractions that contract fixed + // vertex onto a free vertex. This policy is the same as used in KaHyPar. + const bool accept_contraction = + fixed_vertices.isFixed(u) || !fixed_vertices.isFixed(v); // If both are fixed, both vertices must be in the same block const bool accept_fixed_vertex_contraction = - !( fixed_vertices.isFixed(u) && fixed_vertices.isFixed(v) ) || - ( fixed_vertices.fixedVertexBlock(u) == fixed_vertices.fixedVertexBlock(v) ); + !(fixed_vertices.isFixed(u) && fixed_vertices.isFixed(v)) || + (fixed_vertices.fixedVertexBlock(u) == fixed_vertices.fixedVertexBlock(v)); return accept_contraction && accept_fixed_vertex_contraction && - acceptImbalance(hypergraph, fixed_vertices, context, u, v); + acceptImbalance(hypergraph, fixed_vertices, context, u, v); } - private: - // During coarsening, we try to keep the partition induced by the fixed vertices balanced. - // This gives our optimization algorithm that we run after initial partitioning more leeway to - // improve the solution. - template - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE static bool acceptImbalance(const Hypergraph& hypergraph, - const ds::FixedVertexSupport& fixed_vertices, - const Context& context, - const HypernodeID u, - const HypernodeID v) { +private: + // During coarsening, we try to keep the partition induced by the fixed vertices + // balanced. This gives our optimization algorithm that we run after initial + // partitioning more leeway to improve the solution. + template + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE static bool + acceptImbalance(const Hypergraph &hypergraph, + const ds::FixedVertexSupport &fixed_vertices, + const Context &context, const HypernodeID u, const HypernodeID v) + { const bool is_fixed_u = fixed_vertices.isFixed(u); const bool is_fixed_v = fixed_vertices.isFixed(v); - if ( ( is_fixed_u && is_fixed_v ) || (!is_fixed_u && !is_fixed_v) ) { + if((is_fixed_u && is_fixed_v) || (!is_fixed_u && !is_fixed_v)) + { // Contracting a fixed onto a fixed vertex, or an free onto a free vertex does // not increase the fixed vertex block weight. return true; } const HypernodeWeight max_allowed_fixed_vertex_block_weight = - (1.0 + context.partition.epsilon) * std::ceil( - static_cast(fixed_vertices.totalFixedVertexWeight()) / context.partition.k ); + (1.0 + context.partition.epsilon) * + std::ceil(static_cast(fixed_vertices.totalFixedVertexWeight()) / + context.partition.k); const PartitionID block_of_u = fixed_vertices.fixedVertexBlock(u); const PartitionID block_of_v = fixed_vertices.fixedVertexBlock(v); - const PartitionID fixed_block = block_of_u == kInvalidPartition ? block_of_v : block_of_u; + const PartitionID fixed_block = + block_of_u == kInvalidPartition ? block_of_v : block_of_u; ASSERT(fixed_block != kInvalidPartition); const HypernodeWeight fixed_vertex_block_weight_after = - ( block_of_u == kInvalidPartition ? hypergraph.nodeWeight(u) : fixed_vertices.fixedVertexBlockWeight(fixed_block) ) + - ( block_of_u == kInvalidPartition ? fixed_vertices.fixedVertexBlockWeight(fixed_block) : hypergraph.nodeWeight(v) ); + (block_of_u == kInvalidPartition ? + hypergraph.nodeWeight(u) : + fixed_vertices.fixedVertexBlockWeight(fixed_block)) + + (block_of_u == kInvalidPartition ? + fixed_vertices.fixedVertexBlockWeight(fixed_block) : + hypergraph.nodeWeight(v)); return fixed_vertex_block_weight_after <= - std::min(max_allowed_fixed_vertex_block_weight, - context.partition.max_part_weights[fixed_block]); + std::min(max_allowed_fixed_vertex_block_weight, + context.partition.max_part_weights[fixed_block]); } }; -} // namespace mt_kahypar +} // namespace mt_kahypar diff --git a/mt-kahypar/partition/coarsening/policies/rating_heavy_node_penalty_policy.h b/mt-kahypar/partition/coarsening/policies/rating_heavy_node_penalty_policy.h index 4e3ee8174..e8b1ecfe9 100644 --- a/mt-kahypar/partition/coarsening/policies/rating_heavy_node_penalty_policy.h +++ b/mt-kahypar/partition/coarsening/policies/rating_heavy_node_penalty_policy.h @@ -34,32 +34,40 @@ namespace mt_kahypar { -class NoWeightPenalty final : public kahypar::meta::PolicyBase { - public: - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE static HypernodeWeight penalty(const HypernodeWeight, const HypernodeWeight) { +class NoWeightPenalty final : public kahypar::meta::PolicyBase +{ +public: + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE static HypernodeWeight penalty(const HypernodeWeight, + const HypernodeWeight) + { return 1; } }; #ifdef KAHYPAR_ENABLE_EXPERIMENTAL_FEATURES -class MultiplicativePenalty final : public kahypar::meta::PolicyBase { - public: - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE static HypernodeWeight penalty(const HypernodeWeight weight_u, const HypernodeWeight weight_v) { +class MultiplicativePenalty final : public kahypar::meta::PolicyBase +{ +public: + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE static HypernodeWeight + penalty(const HypernodeWeight weight_u, const HypernodeWeight weight_v) + { return weight_u * weight_v; } }; -class AdditivePenalty final : public kahypar::meta::PolicyBase { - public: - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE static HypernodeWeight penalty(const HypernodeWeight weight_u, const HypernodeWeight weight_v) { +class AdditivePenalty final : public kahypar::meta::PolicyBase +{ +public: + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE static HypernodeWeight + penalty(const HypernodeWeight weight_u, const HypernodeWeight weight_v) + { return weight_u + weight_v; } }; -using HeavyNodePenaltyPolicies = kahypar::meta::Typelist; +using HeavyNodePenaltyPolicies = + kahypar::meta::Typelist; #else using HeavyNodePenaltyPolicies = kahypar::meta::Typelist; #endif -} // namespace mt_kahypar +} // namespace mt_kahypar diff --git a/mt-kahypar/partition/coarsening/policies/rating_score_policy.h b/mt-kahypar/partition/coarsening/policies/rating_score_policy.h index 7cdb2dd5a..0a36ab456 100644 --- a/mt-kahypar/partition/coarsening/policies/rating_score_policy.h +++ b/mt-kahypar/partition/coarsening/policies/rating_score_policy.h @@ -33,19 +33,23 @@ #include "mt-kahypar/macros.h" namespace mt_kahypar { -class HeavyEdgeScore final : public kahypar::meta::PolicyBase { - public: - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE static RatingType score(const HyperedgeWeight edge_weight, - const HypernodeID edge_size) { +class HeavyEdgeScore final : public kahypar::meta::PolicyBase +{ +public: + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE static RatingType + score(const HyperedgeWeight edge_weight, const HypernodeID edge_size) + { return static_cast(edge_weight) / (edge_size - 1); } }; #ifdef KAHYPAR_ENABLE_EXPERIMENTAL_FEATURES -class SamenessScore final : public kahypar::meta::PolicyBase { - public: - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE static RatingType score(const HyperedgeWeight edge_weight, - const HypernodeID) { +class SamenessScore final : public kahypar::meta::PolicyBase +{ +public: + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE static RatingType + score(const HyperedgeWeight edge_weight, const HypernodeID) + { return static_cast(edge_weight); } }; @@ -55,4 +59,4 @@ using RatingScorePolicies = kahypar::meta::Typelist; #endif -} // namespace mt_kahypar +} // namespace mt_kahypar diff --git a/mt-kahypar/partition/coarsening/policies/rating_tie_breaking_policy.h b/mt-kahypar/partition/coarsening/policies/rating_tie_breaking_policy.h index fffdac517..4bb94fe2c 100644 --- a/mt-kahypar/partition/coarsening/policies/rating_tie_breaking_policy.h +++ b/mt-kahypar/partition/coarsening/policies/rating_tie_breaking_policy.h @@ -29,51 +29,51 @@ #include "mt-kahypar/utils/randomize.h" namespace mt_kahypar { -class LastRatingWins { - public: - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE static bool acceptEqual(const int) { - return true; - } +class LastRatingWins +{ +public: + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE static bool acceptEqual(const int) { return true; } - LastRatingWins(const LastRatingWins&) = delete; - LastRatingWins & operator= (const LastRatingWins &) = delete; + LastRatingWins(const LastRatingWins &) = delete; + LastRatingWins &operator=(const LastRatingWins &) = delete; - LastRatingWins(LastRatingWins&&) = delete; - LastRatingWins & operator= (LastRatingWins &&) = delete; + LastRatingWins(LastRatingWins &&) = delete; + LastRatingWins &operator=(LastRatingWins &&) = delete; - protected: +protected: ~LastRatingWins() = default; }; -class FirstRatingWins { - public: - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE static bool acceptEqual(const int) { - return false; - } +class FirstRatingWins +{ +public: + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE static bool acceptEqual(const int) { return false; } - FirstRatingWins(const FirstRatingWins&) = delete; - FirstRatingWins & operator= (const FirstRatingWins &) = delete; + FirstRatingWins(const FirstRatingWins &) = delete; + FirstRatingWins &operator=(const FirstRatingWins &) = delete; - FirstRatingWins(FirstRatingWins&&) = delete; - FirstRatingWins & operator= (FirstRatingWins &&) = delete; + FirstRatingWins(FirstRatingWins &&) = delete; + FirstRatingWins &operator=(FirstRatingWins &&) = delete; - protected: +protected: ~FirstRatingWins() = default; }; -class RandomRatingWins { - public: - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE static bool acceptEqual(const int cpu_id) { +class RandomRatingWins +{ +public: + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE static bool acceptEqual(const int cpu_id) + { return utils::Randomize::instance().flipCoin(cpu_id); } - RandomRatingWins(const RandomRatingWins&) = delete; - RandomRatingWins & operator= (const RandomRatingWins &) = delete; + RandomRatingWins(const RandomRatingWins &) = delete; + RandomRatingWins &operator=(const RandomRatingWins &) = delete; - RandomRatingWins(RandomRatingWins&&) = delete; - RandomRatingWins & operator= (RandomRatingWins &&) = delete; + RandomRatingWins(RandomRatingWins &&) = delete; + RandomRatingWins &operator=(RandomRatingWins &&) = delete; - protected: +protected: ~RandomRatingWins() = default; }; -} // namespace mt_kahypar +} // namespace mt_kahypar diff --git a/mt-kahypar/partition/coarsening/uncoarsener_base.h b/mt-kahypar/partition/coarsening/uncoarsener_base.h index d1b282b35..9e9dd07af 100644 --- a/mt-kahypar/partition/coarsening/uncoarsener_base.h +++ b/mt-kahypar/partition/coarsening/uncoarsener_base.h @@ -30,107 +30,113 @@ #include -#include "mt-kahypar/partition/context.h" -#include "mt-kahypar/partition/refinement/i_refiner.h" #include "mt-kahypar/partition/coarsening/coarsening_commons.h" +#include "mt-kahypar/partition/context.h" +#include "mt-kahypar/partition/factories.h" +#include "mt-kahypar/partition/metrics.h" #include "mt-kahypar/partition/refinement/flows/scheduler.h" #include "mt-kahypar/partition/refinement/gains/gain_cache_ptr.h" -#include "mt-kahypar/utils/utilities.h" -#include "mt-kahypar/partition/metrics.h" -#include "mt-kahypar/partition/factories.h" +#include "mt-kahypar/partition/refinement/i_refiner.h" #include "mt-kahypar/utils/cast.h" +#include "mt-kahypar/utils/utilities.h" namespace mt_kahypar { -template -class UncoarsenerBase { +template +class UncoarsenerBase +{ - protected: +protected: static constexpr bool debug = false; using Hypergraph = typename TypeTraits::Hypergraph; using PartitionedHypergraph = typename TypeTraits::PartitionedHypergraph; - public: - UncoarsenerBase(Hypergraph& hypergraph, - const Context& context, - UncoarseningData& uncoarseningData) : - _hg(hypergraph), - _context(context), - _timer(utils::Utilities::instance().getTimer(context.utility_id)), - _uncoarseningData(uncoarseningData), - _gain_cache(gain_cache_t {nullptr, GainPolicy::none}), - _label_propagation(nullptr), - _fm(nullptr), - _flows(nullptr), - _rebalancer(nullptr) {} +public: + UncoarsenerBase(Hypergraph &hypergraph, const Context &context, + UncoarseningData &uncoarseningData) : + _hg(hypergraph), + _context(context), + _timer(utils::Utilities::instance().getTimer(context.utility_id)), + _uncoarseningData(uncoarseningData), + _gain_cache(gain_cache_t{ nullptr, GainPolicy::none }), _label_propagation(nullptr), + _fm(nullptr), _flows(nullptr), _rebalancer(nullptr) + { + } - UncoarsenerBase(const UncoarsenerBase&) = delete; - UncoarsenerBase(UncoarsenerBase&&) = delete; - UncoarsenerBase & operator= (const UncoarsenerBase &) = delete; - UncoarsenerBase & operator= (UncoarsenerBase &&) = delete; + UncoarsenerBase(const UncoarsenerBase &) = delete; + UncoarsenerBase(UncoarsenerBase &&) = delete; + UncoarsenerBase &operator=(const UncoarsenerBase &) = delete; + UncoarsenerBase &operator=(UncoarsenerBase &&) = delete; - virtual ~UncoarsenerBase() { - GainCachePtr::deleteGainCache(_gain_cache); - }; + virtual ~UncoarsenerBase() { GainCachePtr::deleteGainCache(_gain_cache); }; - protected: - Hypergraph& _hg; - const Context& _context; - utils::Timer& _timer; - UncoarseningData& _uncoarseningData; +protected: + Hypergraph &_hg; + const Context &_context; + utils::Timer &_timer; + UncoarseningData &_uncoarseningData; gain_cache_t _gain_cache; std::unique_ptr _label_propagation; std::unique_ptr _fm; std::unique_ptr _flows; std::unique_ptr _rebalancer; - protected: - - double refinementTimeLimit(const Context& context, const double time) { - if ( context.refinement.fm.time_limit_factor != std::numeric_limits::max() ) { - const double time_limit_factor = std::max(1.0, context.refinement.fm.time_limit_factor * context.partition.k); +protected: + double refinementTimeLimit(const Context &context, const double time) + { + if(context.refinement.fm.time_limit_factor != std::numeric_limits::max()) + { + const double time_limit_factor = + std::max(1.0, context.refinement.fm.time_limit_factor * context.partition.k); return std::max(5.0, time_limit_factor * time); - } else { + } + else + { return std::numeric_limits::max(); } } - Metrics initializeMetrics(PartitionedHypergraph& phg) { - Metrics m = { metrics::quality(phg, _context), metrics::imbalance(phg, _context) }; + Metrics initializeMetrics(PartitionedHypergraph &phg) + { + Metrics m = { metrics::quality(phg, _context), metrics::imbalance(phg, _context) }; int64_t num_nodes = phg.initialNumNodes(); - int64_t num_edges = Hypergraph::is_graph ? phg.initialNumEdges() / 2 : phg.initialNumEdges(); - utils::Stats& stats = utils::Utilities::instance().getStats(_context.utility_id); + int64_t num_edges = + Hypergraph::is_graph ? phg.initialNumEdges() / 2 : phg.initialNumEdges(); + utils::Stats &stats = utils::Utilities::instance().getStats(_context.utility_id); stats.add_stat("initial_num_nodes", num_nodes); stats.add_stat("initial_num_edges", num_edges); std::stringstream ss; ss << "initial_" << _context.partition.objective; stats.add_stat(ss.str(), metrics::quality(phg, _context)); - if ( _context.partition.objective != Objective::cut ) { + if(_context.partition.objective != Objective::cut) + { stats.add_stat("initial_cut", metrics::quality(phg, Objective::cut)); } - if ( _context.partition.objective != Objective::km1 ) { + if(_context.partition.objective != Objective::km1) + { stats.add_stat("initial_km1", metrics::quality(phg, Objective::km1)); } stats.add_stat("initial_imbalance", m.imbalance); return m; } - void initializeRefinementAlgorithms() { + void initializeRefinementAlgorithms() + { _gain_cache = GainCachePtr::constructGainCache(_context); // refinement algorithms require access to the rebalancer _rebalancer = RebalancerFactory::getInstance().createObject( - _context.refinement.rebalancer, _hg.initialNumNodes(), _context, _gain_cache); + _context.refinement.rebalancer, _hg.initialNumNodes(), _context, _gain_cache); _label_propagation = LabelPropagationFactory::getInstance().createObject( - _context.refinement.label_propagation.algorithm, - _hg.initialNumNodes(), _hg.initialNumEdges(), _context, _gain_cache, *_rebalancer); + _context.refinement.label_propagation.algorithm, _hg.initialNumNodes(), + _hg.initialNumEdges(), _context, _gain_cache, *_rebalancer); _fm = FMFactory::getInstance().createObject( - _context.refinement.fm.algorithm, - _hg.initialNumNodes(), _hg.initialNumEdges(), _context, _gain_cache, *_rebalancer); + _context.refinement.fm.algorithm, _hg.initialNumNodes(), _hg.initialNumEdges(), + _context, _gain_cache, *_rebalancer); _flows = FlowSchedulerFactory::getInstance().createObject( - _context.refinement.flows.algorithm, - _hg.initialNumNodes(), _hg.initialNumEdges(), _context, _gain_cache); + _context.refinement.flows.algorithm, _hg.initialNumNodes(), _hg.initialNumEdges(), + _context, _gain_cache); } }; } diff --git a/mt-kahypar/partition/context.cpp b/mt-kahypar/partition/context.cpp index 93cf1b187..5c7a425fb 100644 --- a/mt-kahypar/partition/context.cpp +++ b/mt-kahypar/partition/context.cpp @@ -29,929 +29,1131 @@ #include -#include "mt-kahypar/utils/exception.h" #include "mt-kahypar/partition/conversion.h" +#include "mt-kahypar/utils/exception.h" namespace mt_kahypar { - std::ostream & operator<< (std::ostream& str, const PartitioningParameters& params) { - str << "Partitioning Parameters:" << std::endl; - str << " Hypergraph: " << params.graph_filename << std::endl; - if ( params.fixed_vertex_filename != "" ) { - str << " Fixed Vertex File: " << params.fixed_vertex_filename << std::endl; - } - if ( params.write_partition_file ) { - str << " Partition File: " << params.graph_partition_filename << std::endl; - } - str << " Mode: " << params.mode << std::endl; - str << " Objective: " << params.objective << std::endl; - str << " Gain Policy: " << params.gain_policy << std::endl; - str << " Input File Format: " << params.file_format << std::endl; - if ( params.instance_type != InstanceType::UNDEFINED ) { - str << " Instance Type: " << params.instance_type << std::endl; - } - if ( params.preset_type != PresetType::UNDEFINED ) { - str << " Preset Type: " << params.preset_type << std::endl; - } - str << " Partition Type: " << params.partition_type << std::endl; - str << " k: " << params.k << std::endl; - str << " epsilon: " << params.epsilon << std::endl; - str << " seed: " << params.seed << std::endl; - str << " Number of V-Cycles: " << params.num_vcycles << std::endl; - str << " Ignore HE Size Threshold: " << params.ignore_hyperedge_size_threshold << std::endl; - str << " Large HE Size Threshold: " << params.large_hyperedge_size_threshold << std::endl; - if ( params.use_individual_part_weights ) { - str << " Individual Part Weights: "; - for ( const HypernodeWeight& w : params.max_part_weights ) { - str << w << " "; - } - str << std::endl; - } - if ( params.mode == Mode::deep_multilevel ) { - str << " Perform Parallel Recursion: " << std::boolalpha - << params.perform_parallel_recursion_in_deep_multilevel << std::endl; +std::ostream &operator<<(std::ostream &str, const PartitioningParameters ¶ms) +{ + str << "Partitioning Parameters:" << std::endl; + str << " Hypergraph: " << params.graph_filename << std::endl; + if(params.fixed_vertex_filename != "") + { + str << " Fixed Vertex File: " << params.fixed_vertex_filename + << std::endl; + } + if(params.write_partition_file) + { + str << " Partition File: " << params.graph_partition_filename + << std::endl; + } + str << " Mode: " << params.mode << std::endl; + str << " Objective: " << params.objective << std::endl; + str << " Gain Policy: " << params.gain_policy << std::endl; + str << " Input File Format: " << params.file_format << std::endl; + if(params.instance_type != InstanceType::UNDEFINED) + { + str << " Instance Type: " << params.instance_type << std::endl; + } + if(params.preset_type != PresetType::UNDEFINED) + { + str << " Preset Type: " << params.preset_type << std::endl; + } + str << " Partition Type: " << params.partition_type << std::endl; + str << " k: " << params.k << std::endl; + str << " epsilon: " << params.epsilon << std::endl; + str << " seed: " << params.seed << std::endl; + str << " Number of V-Cycles: " << params.num_vcycles << std::endl; + str << " Ignore HE Size Threshold: " + << params.ignore_hyperedge_size_threshold << std::endl; + str << " Large HE Size Threshold: " << params.large_hyperedge_size_threshold + << std::endl; + if(params.use_individual_part_weights) + { + str << " Individual Part Weights: "; + for(const HypernodeWeight &w : params.max_part_weights) + { + str << w << " "; } - return str; + str << std::endl; + } + if(params.mode == Mode::deep_multilevel) + { + str << " Perform Parallel Recursion: " << std::boolalpha + << params.perform_parallel_recursion_in_deep_multilevel << std::endl; + } + return str; +} + +std::ostream &operator<<(std::ostream &str, const CommunityDetectionParameters ¶ms) +{ + str << " Community Detection Parameters:" << std::endl; + str << " Edge Weight Function: " << params.edge_weight_function + << std::endl; + str << " Maximum Louvain-Pass Iterations: " << params.max_pass_iterations + << std::endl; + str << " Minimum Vertex Move Fraction: " << params.min_vertex_move_fraction + << std::endl; + str << " Vertex Degree Sampling Threshold: " + << params.vertex_degree_sampling_threshold << std::endl; + str << " Number of subrounds (deterministic): " + << params.num_sub_rounds_deterministic << std::endl; + return str; +} + +std::ostream &operator<<(std::ostream &str, const PreprocessingParameters ¶ms) +{ + str << "Preprocessing Parameters:" << std::endl; + str << " Use Community Detection: " << std::boolalpha + << params.use_community_detection << std::endl; + str << " Disable C. D. for Mesh Graphs: " << std::boolalpha + << params.disable_community_detection_for_mesh_graphs << std::endl; + if(params.use_community_detection) + { + str << std::endl << params.community_detection; } + return str; +} + +std::ostream &operator<<(std::ostream &str, const RatingParameters ¶ms) +{ + str << " Rating Parameters:" << std::endl; + str << " Rating Function: " << params.rating_function << std::endl; + str << " Heavy Node Penalty: " << params.heavy_node_penalty_policy + << std::endl; + str << " Acceptance Policy: " << params.acceptance_policy + << std::endl; + return str; +} + +std::ostream &operator<<(std::ostream &str, const CoarseningParameters ¶ms) +{ + str << "Coarsening Parameters:" << std::endl; + str << " Algorithm: " << params.algorithm << std::endl; + str << " Use Adaptive Edge Size: " << std::boolalpha + << params.use_adaptive_edge_size << std::endl; + str << " Max Allowed Weight Multiplier: " << params.max_allowed_weight_multiplier + << std::endl; + str << " Maximum Allowed Hypernode Weight: " << params.max_allowed_node_weight + << std::endl; + str << " Contraction Limit Multiplier: " << params.contraction_limit_multiplier + << std::endl; + str << " Deep ML Contraction Limit Multi.: " + << params.deep_ml_contraction_limit_multiplier << std::endl; + str << " Contraction Limit: " << params.contraction_limit + << std::endl; + str << " Minimum Shrink Factor: " << params.minimum_shrink_factor + << std::endl; + str << " Maximum Shrink Factor: " << params.maximum_shrink_factor + << std::endl; + str << " Vertex Degree Sampling Threshold: " + << params.vertex_degree_sampling_threshold << std::endl; + str << " Number of subrounds (deterministic):" << params.num_sub_rounds_deterministic + << std::endl; + str << std::endl << params.rating; + return str; +} - std::ostream & operator<< (std::ostream& str, const CommunityDetectionParameters& params) { - str << " Community Detection Parameters:" << std::endl; - str << " Edge Weight Function: " << params.edge_weight_function << std::endl; - str << " Maximum Louvain-Pass Iterations: " << params.max_pass_iterations << std::endl; - str << " Minimum Vertex Move Fraction: " << params.min_vertex_move_fraction << std::endl; - str << " Vertex Degree Sampling Threshold: " << params.vertex_degree_sampling_threshold << std::endl; - str << " Number of subrounds (deterministic): " << params.num_sub_rounds_deterministic << std::endl; - return str; +std::ostream &operator<<(std::ostream &str, const LabelPropagationParameters ¶ms) +{ + str << " Label Propagation Parameters:" << std::endl; + str << " Algorithm: " << params.algorithm << std::endl; + if(params.algorithm != LabelPropagationAlgorithm::do_nothing) + { + str << " Maximum Iterations: " << params.maximum_iterations + << std::endl; + str << " Unconstrained: " << std::boolalpha + << params.unconstrained << std::endl; + str << " Rebalancing: " << std::boolalpha + << params.rebalancing << std::endl; + str << " HE Size Activation Threshold: " << std::boolalpha + << params.hyperedge_size_activation_threshold << std::endl; + str << " Relative Improvement Threshold: " + << params.relative_improvement_threshold << std::endl; } + return str; +} - std::ostream & operator<< (std::ostream& str, const PreprocessingParameters& params) { - str << "Preprocessing Parameters:" << std::endl; - str << " Use Community Detection: " << std::boolalpha << params.use_community_detection << std::endl; - str << " Disable C. D. for Mesh Graphs: " << std::boolalpha << params.disable_community_detection_for_mesh_graphs << std::endl; - if (params.use_community_detection) { - str << std::endl << params.community_detection; - } - return str; - } - - std::ostream & operator<< (std::ostream& str, const RatingParameters& params) { - str << " Rating Parameters:" << std::endl; - str << " Rating Function: " << params.rating_function << std::endl; - str << " Heavy Node Penalty: " << params.heavy_node_penalty_policy << std::endl; - str << " Acceptance Policy: " << params.acceptance_policy << std::endl; - return str; - } - - std::ostream & operator<< (std::ostream& str, const CoarseningParameters& params) { - str << "Coarsening Parameters:" << std::endl; - str << " Algorithm: " << params.algorithm << std::endl; - str << " Use Adaptive Edge Size: " << std::boolalpha << params.use_adaptive_edge_size << std::endl; - str << " Max Allowed Weight Multiplier: " << params.max_allowed_weight_multiplier << std::endl; - str << " Maximum Allowed Hypernode Weight: " << params.max_allowed_node_weight << std::endl; - str << " Contraction Limit Multiplier: " << params.contraction_limit_multiplier << std::endl; - str << " Deep ML Contraction Limit Multi.: " << params.deep_ml_contraction_limit_multiplier << std::endl; - str << " Contraction Limit: " << params.contraction_limit << std::endl; - str << " Minimum Shrink Factor: " << params.minimum_shrink_factor << std::endl; - str << " Maximum Shrink Factor: " << params.maximum_shrink_factor << std::endl; - str << " Vertex Degree Sampling Threshold: " << params.vertex_degree_sampling_threshold << std::endl; - str << " Number of subrounds (deterministic):" << params.num_sub_rounds_deterministic << std::endl; - str << std::endl << params.rating; - return str; - } - - std::ostream & operator<< (std::ostream& str, const LabelPropagationParameters& params) { - str << " Label Propagation Parameters:" << std::endl; - str << " Algorithm: " << params.algorithm << std::endl; - if ( params.algorithm != LabelPropagationAlgorithm::do_nothing ) { - str << " Maximum Iterations: " << params.maximum_iterations << std::endl; - str << " Unconstrained: " << std::boolalpha << params.unconstrained << std::endl; - str << " Rebalancing: " << std::boolalpha << params.rebalancing << std::endl; - str << " HE Size Activation Threshold: " << std::boolalpha << params.hyperedge_size_activation_threshold << std::endl; - str << " Relative Improvement Threshold: " << params.relative_improvement_threshold << std::endl; - } - return str; - } - - std::ostream& operator<<(std::ostream& out, const FMParameters& params) { - out << " FM Parameters: \n"; - out << " Algorithm: " << params.algorithm << std::endl; - if ( params.algorithm != FMAlgorithm::do_nothing ) { - out << " Multitry Rounds: " << params.multitry_rounds << std::endl; - out << " Parallel Global Rollbacks: " << std::boolalpha << params.rollback_parallel << std::endl; - out << " Rollback Bal. Violation Factor: " << params.rollback_balance_violation_factor << std::endl; - out << " Num Seed Nodes: " << params.num_seed_nodes << std::endl; - out << " Enable Random Shuffle: " << std::boolalpha << params.shuffle << std::endl; - out << " Obey Minimal Parallelism: " << std::boolalpha << params.obey_minimal_parallelism << std::endl; - out << " Minimum Improvement Factor: " << params.min_improvement << std::endl; - out << " Release Nodes: " << std::boolalpha << params.release_nodes << std::endl; - out << " Time Limit Factor: " << params.time_limit_factor << std::endl; - } - if ( params.algorithm == FMAlgorithm::unconstrained_fm ) { - out << " Unconstrained Rounds: " << params.unconstrained_rounds << std::endl; - out << " Threshold Border Node Inclusion: " << params.treshold_border_node_inclusion << std::endl; - out << " Minimum Imbalance Penalty Factor: " << params.imbalance_penalty_min << std::endl; - out << " Maximum Imbalance Penalty Factor: " << params.imbalance_penalty_max << std::endl; - out << " Start Upper Bound for Unc.: " << params.unconstrained_upper_bound << std::endl; - out << " Final Upper Bound for Unc.: " << params.unconstrained_upper_bound_min << std::endl; - out << " Unc. Minimum Improvement Factor: " << params.unconstrained_min_improvement << std::endl; - out << " Activate Unc. Dynamically: " << std::boolalpha << params.activate_unconstrained_dynamically << std::endl; - if ( params.activate_unconstrained_dynamically ) { - out << " Penalty for Activation Test: " << params.penalty_for_activation_test << std::endl; - } +std::ostream &operator<<(std::ostream &out, const FMParameters ¶ms) +{ + out << " FM Parameters: \n"; + out << " Algorithm: " << params.algorithm << std::endl; + if(params.algorithm != FMAlgorithm::do_nothing) + { + out << " Multitry Rounds: " << params.multitry_rounds + << std::endl; + out << " Parallel Global Rollbacks: " << std::boolalpha + << params.rollback_parallel << std::endl; + out << " Rollback Bal. Violation Factor: " + << params.rollback_balance_violation_factor << std::endl; + out << " Num Seed Nodes: " << params.num_seed_nodes << std::endl; + out << " Enable Random Shuffle: " << std::boolalpha << params.shuffle + << std::endl; + out << " Obey Minimal Parallelism: " << std::boolalpha + << params.obey_minimal_parallelism << std::endl; + out << " Minimum Improvement Factor: " << params.min_improvement + << std::endl; + out << " Release Nodes: " << std::boolalpha + << params.release_nodes << std::endl; + out << " Time Limit Factor: " << params.time_limit_factor + << std::endl; + } + if(params.algorithm == FMAlgorithm::unconstrained_fm) + { + out << " Unconstrained Rounds: " << params.unconstrained_rounds + << std::endl; + out << " Threshold Border Node Inclusion: " + << params.treshold_border_node_inclusion << std::endl; + out << " Minimum Imbalance Penalty Factor: " << params.imbalance_penalty_min + << std::endl; + out << " Maximum Imbalance Penalty Factor: " << params.imbalance_penalty_max + << std::endl; + out << " Start Upper Bound for Unc.: " << params.unconstrained_upper_bound + << std::endl; + out << " Final Upper Bound for Unc.: " + << params.unconstrained_upper_bound_min << std::endl; + out << " Unc. Minimum Improvement Factor: " + << params.unconstrained_min_improvement << std::endl; + out << " Activate Unc. Dynamically: " << std::boolalpha + << params.activate_unconstrained_dynamically << std::endl; + if(params.activate_unconstrained_dynamically) + { + out << " Penalty for Activation Test: " + << params.penalty_for_activation_test << std::endl; } + } + out << std::flush; + return out; +} + +std::ostream &operator<<(std::ostream &out, const NLevelGlobalFMParameters ¶ms) +{ + if(params.use_global_fm) + { + out << " Boundary FM Parameters: \n"; + out << " Refine Until No Improvement: " << std::boolalpha + << params.refine_until_no_improvement << std::endl; + out << " Num Seed Nodes: " << params.num_seed_nodes << std::endl; + out << " Obey Minimal Parallelism: " << std::boolalpha + << params.obey_minimal_parallelism << std::endl; + } + return out; +} + +std::ostream &operator<<(std::ostream &out, const FlowParameters ¶ms) +{ + out << " Flow Parameters: \n"; + out << " Algorithm: " << params.algorithm << std::endl; + if(params.algorithm != FlowAlgorithm::do_nothing) + { + out << " Flow Scaling: " << params.alpha << std::endl; + out << " Maximum Number of Pins: " << params.max_num_pins << std::endl; + out << " Find Most Balanced Cut: " << std::boolalpha + << params.find_most_balanced_cut << std::endl; + out << " Determine Distance From Cut: " << std::boolalpha + << params.determine_distance_from_cut << std::endl; + out << " Parallel Searches Multiplier: " << params.parallel_searches_multiplier + << std::endl; + out << " Number of Parallel Searches: " << params.num_parallel_searches + << std::endl; + out << " Maximum BFS Distance: " << params.max_bfs_distance + << std::endl; + out << " Min Rel. Improvement Per Round: " + << params.min_relative_improvement_per_round << std::endl; + out << " Time Limit Factor: " << params.time_limit_factor + << std::endl; + out << " Skip Small Cuts: " << std::boolalpha + << params.skip_small_cuts << std::endl; + out << " Skip Unpromising Blocks: " << std::boolalpha + << params.skip_unpromising_blocks << std::endl; + out << " Pierce in Bulk: " << std::boolalpha + << params.pierce_in_bulk << std::endl; + out << " Steiner Tree Policy: " << params.steiner_tree_policy + << std::endl; out << std::flush; - return out; } + return out; +} - std::ostream& operator<<(std::ostream& out, const NLevelGlobalFMParameters& params) { - if ( params.use_global_fm ) { - out << " Boundary FM Parameters: \n"; - out << " Refine Until No Improvement: " << std::boolalpha << params.refine_until_no_improvement << std::endl; - out << " Num Seed Nodes: " << params.num_seed_nodes << std::endl; - out << " Obey Minimal Parallelism: " << std::boolalpha << params.obey_minimal_parallelism << std::endl; - } - return out; - } - - std::ostream& operator<<(std::ostream& out, const FlowParameters& params) { - out << " Flow Parameters: \n"; - out << " Algorithm: " << params.algorithm << std::endl; - if ( params.algorithm != FlowAlgorithm::do_nothing ) { - out << " Flow Scaling: " << params.alpha << std::endl; - out << " Maximum Number of Pins: " << params.max_num_pins << std::endl; - out << " Find Most Balanced Cut: " << std::boolalpha << params.find_most_balanced_cut << std::endl; - out << " Determine Distance From Cut: " << std::boolalpha << params.determine_distance_from_cut << std::endl; - out << " Parallel Searches Multiplier: " << params.parallel_searches_multiplier << std::endl; - out << " Number of Parallel Searches: " << params.num_parallel_searches << std::endl; - out << " Maximum BFS Distance: " << params.max_bfs_distance << std::endl; - out << " Min Rel. Improvement Per Round: " << params.min_relative_improvement_per_round << std::endl; - out << " Time Limit Factor: " << params.time_limit_factor << std::endl; - out << " Skip Small Cuts: " << std::boolalpha << params.skip_small_cuts << std::endl; - out << " Skip Unpromising Blocks: " << std::boolalpha << params.skip_unpromising_blocks << std::endl; - out << " Pierce in Bulk: " << std::boolalpha << params.pierce_in_bulk << std::endl; - out << " Steiner Tree Policy: " << params.steiner_tree_policy << std::endl; - out << std::flush; - } - return out; - } - - std::ostream& operator<<(std::ostream& out, const DeterministicRefinementParameters& params) { - out << " Number of sub-rounds for Sync LP: " << params.num_sub_rounds_sync_lp << std::endl; - out << " Use active node set: " << std::boolalpha << params.use_active_node_set << std::endl; - return out; - } - - std::ostream & operator<< (std::ostream& str, const RefinementParameters& params) { - str << "Refinement Parameters:" << std::endl; - str << " Rebalancing Algorithm: " << params.rebalancer << std::endl; - str << " Refine Until No Improvement: " << std::boolalpha << params.refine_until_no_improvement << std::endl; - str << " Relative Improvement Threshold: " << params.relative_improvement_threshold << std::endl; - str << " Maximum Batch Size: " << params.max_batch_size << std::endl; - str << " Min Border Vertices Per Thread: " << params.min_border_vertices_per_thread << std::endl; - str << "\n" << params.label_propagation; - str << "\n" << params.fm; - if ( params.global_fm.use_global_fm ) { - str << "\n" << params.global_fm; - } - str << "\n" << params.flows; - return str; +std::ostream &operator<<(std::ostream &out, + const DeterministicRefinementParameters ¶ms) +{ + out << " Number of sub-rounds for Sync LP: " << params.num_sub_rounds_sync_lp + << std::endl; + out << " Use active node set: " << std::boolalpha + << params.use_active_node_set << std::endl; + return out; +} + +std::ostream &operator<<(std::ostream &str, const RefinementParameters ¶ms) +{ + str << "Refinement Parameters:" << std::endl; + str << " Rebalancing Algorithm: " << params.rebalancer << std::endl; + str << " Refine Until No Improvement: " << std::boolalpha + << params.refine_until_no_improvement << std::endl; + str << " Relative Improvement Threshold: " << params.relative_improvement_threshold + << std::endl; + str << " Maximum Batch Size: " << params.max_batch_size << std::endl; + str << " Min Border Vertices Per Thread: " << params.min_border_vertices_per_thread + << std::endl; + str << "\n" << params.label_propagation; + str << "\n" << params.fm; + if(params.global_fm.use_global_fm) + { + str << "\n" << params.global_fm; + } + str << "\n" << params.flows; + return str; +} + +std::ostream &operator<<(std::ostream &str, const InitialPartitioningParameters ¶ms) +{ + str << "Initial Partitioning Parameters:" << std::endl; + str << " Initial Partitioning Mode: " << params.mode << std::endl; + str << " Number of Runs: " << params.runs << std::endl; + str << " Use Adaptive IP Runs: " << std::boolalpha + << params.use_adaptive_ip_runs << std::endl; + if(params.use_adaptive_ip_runs) + { + str << " Min Adaptive IP Runs: " << params.min_adaptive_ip_runs + << std::endl; } + str << " Perform Refinement On Best: " << std::boolalpha + << params.perform_refinement_on_best_partitions << std::endl; + str << " Fm Refinement Rounds: " << params.fm_refinment_rounds + << std::endl; + str << " Remove Degree-Zero HNs Before IP: " << std::boolalpha + << params.remove_degree_zero_hns_before_ip << std::endl; + str << " Maximum Iterations of LP IP: " << params.lp_maximum_iterations + << std::endl; + str << " Initial Block Size of LP IP: " << params.lp_initial_block_size + << std::endl; + str << "\nInitial Partitioning "; + str << params.refinement << std::endl; + return str; +} + +std::ostream &operator<<(std::ostream &str, const MappingParameters ¶ms) +{ + str << "Mapping Parameters: " << std::endl; + str << " Target Graph File: " << params.target_graph_file + << std::endl; + str << " One-To-One Mapping Strategy: " << params.strategy << std::endl; + str << " Use Local Search: " << std::boolalpha + << params.use_local_search << std::endl; + str << " Use Two-Phase Approach: " << std::boolalpha + << params.use_two_phase_approach << std::endl; + str << " Max Precomputed Steiner Tree Size: " << params.max_steiner_tree_size + << std::endl; + str << " Large HE Size Threshold: " << params.large_he_threshold + << std::endl; + return str; +} - std::ostream & operator<< (std::ostream& str, const InitialPartitioningParameters& params) { - str << "Initial Partitioning Parameters:" << std::endl; - str << " Initial Partitioning Mode: " << params.mode << std::endl; - str << " Number of Runs: " << params.runs << std::endl; - str << " Use Adaptive IP Runs: " << std::boolalpha << params.use_adaptive_ip_runs << std::endl; - if ( params.use_adaptive_ip_runs ) { - str << " Min Adaptive IP Runs: " << params.min_adaptive_ip_runs << std::endl; +std::ostream &operator<<(std::ostream &str, const SharedMemoryParameters ¶ms) +{ + str << "Shared Memory Parameters: " << std::endl; + str << " Number of Threads: " << params.num_threads << std::endl; + str << " Number of used NUMA nodes: " + << TBBInitializer::instance().num_used_numa_nodes() << std::endl; + str << " Use Localized Random Shuffle: " << std::boolalpha + << params.use_localized_random_shuffle << std::endl; + str << " Random Shuffle Block Size: " << params.shuffle_block_size + << std::endl; + return str; +} + +bool Context::isNLevelPartitioning() const +{ + return partition.partition_type == N_LEVEL_GRAPH_PARTITIONING || + partition.partition_type == N_LEVEL_HYPERGRAPH_PARTITIONING; +} + +bool Context::forceGainCacheUpdates() const +{ + return isNLevelPartitioning() || partition.mode == Mode::deep_multilevel || + refinement.refine_until_no_improvement; +} + +void Context::setupPartWeights(const HypernodeWeight total_hypergraph_weight) +{ + if(partition.use_individual_part_weights) + { + ASSERT(static_cast(partition.k) == partition.max_part_weights.size()); + const HypernodeWeight max_part_weights_sum = std::accumulate( + partition.max_part_weights.cbegin(), partition.max_part_weights.cend(), 0); + double weight_fraction = + total_hypergraph_weight / static_cast(max_part_weights_sum); + HypernodeWeight perfect_part_weights_sum = 0; + partition.perfect_balance_part_weights.clear(); + for(const HyperedgeWeight &part_weight : partition.max_part_weights) + { + const HypernodeWeight perfect_weight = ceil(weight_fraction * part_weight); + partition.perfect_balance_part_weights.push_back(perfect_weight); + perfect_part_weights_sum += perfect_weight; } - str << " Perform Refinement On Best: " << std::boolalpha << params.perform_refinement_on_best_partitions << std::endl; - str << " Fm Refinement Rounds: " << params.fm_refinment_rounds << std::endl; - str << " Remove Degree-Zero HNs Before IP: " << std::boolalpha << params.remove_degree_zero_hns_before_ip << std::endl; - str << " Maximum Iterations of LP IP: " << params.lp_maximum_iterations << std::endl; - str << " Initial Block Size of LP IP: " << params.lp_initial_block_size << std::endl; - str << "\nInitial Partitioning "; - str << params.refinement << std::endl; - return str; - } - - std::ostream & operator<< (std::ostream& str, const MappingParameters& params) { - str << "Mapping Parameters: " << std::endl; - str << " Target Graph File: " << params.target_graph_file << std::endl; - str << " One-To-One Mapping Strategy: " << params.strategy << std::endl; - str << " Use Local Search: " << std::boolalpha << params.use_local_search << std::endl; - str << " Use Two-Phase Approach: " << std::boolalpha << params.use_two_phase_approach << std::endl; - str << " Max Precomputed Steiner Tree Size: " << params.max_steiner_tree_size << std::endl; - str << " Large HE Size Threshold: " << params.large_he_threshold << std::endl; - return str; - } - - std::ostream & operator<< (std::ostream& str, const SharedMemoryParameters& params) { - str << "Shared Memory Parameters: " << std::endl; - str << " Number of Threads: " << params.num_threads << std::endl; - str << " Number of used NUMA nodes: " << TBBInitializer::instance().num_used_numa_nodes() << std::endl; - str << " Use Localized Random Shuffle: " << std::boolalpha << params.use_localized_random_shuffle << std::endl; - str << " Random Shuffle Block Size: " << params.shuffle_block_size << std::endl; - return str; - } - - bool Context::isNLevelPartitioning() const { - return partition.partition_type == N_LEVEL_GRAPH_PARTITIONING || - partition.partition_type == N_LEVEL_HYPERGRAPH_PARTITIONING; - } - - bool Context::forceGainCacheUpdates() const { - return isNLevelPartitioning() || - partition.mode == Mode::deep_multilevel || - refinement.refine_until_no_improvement; - } - - void Context::setupPartWeights(const HypernodeWeight total_hypergraph_weight) { - if (partition.use_individual_part_weights) { - ASSERT(static_cast(partition.k) == partition.max_part_weights.size()); - const HypernodeWeight max_part_weights_sum = std::accumulate(partition.max_part_weights.cbegin(), - partition.max_part_weights.cend(), 0); - double weight_fraction = total_hypergraph_weight / static_cast(max_part_weights_sum); - HypernodeWeight perfect_part_weights_sum = 0; - partition.perfect_balance_part_weights.clear(); - for (const HyperedgeWeight& part_weight : partition.max_part_weights) { - const HypernodeWeight perfect_weight = ceil(weight_fraction * part_weight); - partition.perfect_balance_part_weights.push_back(perfect_weight); - perfect_part_weights_sum += perfect_weight; - } - if (max_part_weights_sum < total_hypergraph_weight) { - throw InvalidInputException( + if(max_part_weights_sum < total_hypergraph_weight) + { + throw InvalidInputException( "Sum of individual part weights is less than the total hypergraph weight. " "Finding a valid partition is not possible.\n" - "Total hypergraph weight: " + std::to_string(total_hypergraph_weight) + "\n" - "Sum of part weights: " + std::to_string(max_part_weights_sum)); - } else { - // To avoid rounding issues, epsilon should be calculated using the sum of the perfect part weights instead of - // the total hypergraph weight. See also recursive_bipartitioning_initial_partitioner - partition.epsilon = std::min(0.99, max_part_weights_sum / static_cast(std::max(perfect_part_weights_sum, 1)) - 1); - } - } else { - partition.perfect_balance_part_weights.clear(); - partition.perfect_balance_part_weights.push_back(ceil( - total_hypergraph_weight - / static_cast(partition.k))); - for (PartitionID part = 1; part != partition.k; ++part) { - partition.perfect_balance_part_weights.push_back( - partition.perfect_balance_part_weights[0]); - } - partition.max_part_weights.clear(); - partition.max_part_weights.push_back((1 + partition.epsilon) - * partition.perfect_balance_part_weights[0]); - for (PartitionID part = 1; part != partition.k; ++part) { - partition.max_part_weights.push_back(partition.max_part_weights[0]); - } + "Total hypergraph weight: " + + std::to_string(total_hypergraph_weight) + + "\n" + "Sum of part weights: " + + std::to_string(max_part_weights_sum)); + } + else + { + // To avoid rounding issues, epsilon should be calculated using the sum of the + // perfect part weights instead of the total hypergraph weight. See also + // recursive_bipartitioning_initial_partitioner + partition.epsilon = std::min( + 0.99, max_part_weights_sum / + static_cast(std::max(perfect_part_weights_sum, 1)) - + 1); } } - - void Context::setupContractionLimit(const HypernodeWeight total_hypergraph_weight) { - // Setup contraction limit - if (initial_partitioning.mode == Mode::deep_multilevel) { - coarsening.contraction_limit = - std::max(2 * shared_memory.num_threads, static_cast(partition.k)) * - coarsening.contraction_limit_multiplier; - } else { - coarsening.contraction_limit = - coarsening.contraction_limit_multiplier * partition.k; + else + { + partition.perfect_balance_part_weights.clear(); + partition.perfect_balance_part_weights.push_back( + ceil(total_hypergraph_weight / static_cast(partition.k))); + for(PartitionID part = 1; part != partition.k; ++part) + { + partition.perfect_balance_part_weights.push_back( + partition.perfect_balance_part_weights[0]); } + partition.max_part_weights.clear(); + partition.max_part_weights.push_back((1 + partition.epsilon) * + partition.perfect_balance_part_weights[0]); + for(PartitionID part = 1; part != partition.k; ++part) + { + partition.max_part_weights.push_back(partition.max_part_weights[0]); + } + } +} - // Setup maximum allowed vertex and high-degree vertex weight - setupMaximumAllowedNodeWeight(total_hypergraph_weight); +void Context::setupContractionLimit(const HypernodeWeight total_hypergraph_weight) +{ + // Setup contraction limit + if(initial_partitioning.mode == Mode::deep_multilevel) + { + coarsening.contraction_limit = + std::max(2 * shared_memory.num_threads, static_cast(partition.k)) * + coarsening.contraction_limit_multiplier; + } + else + { + coarsening.contraction_limit = coarsening.contraction_limit_multiplier * partition.k; } - void Context::setupMaximumAllowedNodeWeight(const HypernodeWeight total_hypergraph_weight) { - HypernodeWeight min_block_weight = std::numeric_limits::max(); - for ( PartitionID part_id = 0; part_id < partition.k; ++part_id ) { - min_block_weight = std::min(min_block_weight, partition.max_part_weights[part_id]); - } + // Setup maximum allowed vertex and high-degree vertex weight + setupMaximumAllowedNodeWeight(total_hypergraph_weight); +} - double hypernode_weight_fraction = - coarsening.max_allowed_weight_multiplier - / coarsening.contraction_limit; - coarsening.max_allowed_node_weight = - std::ceil(hypernode_weight_fraction * total_hypergraph_weight); - coarsening.max_allowed_node_weight = - std::min(coarsening.max_allowed_node_weight, min_block_weight); - } - - void Context::sanityCheck(const TargetGraph* target_graph) { - if ( isNLevelPartitioning() && coarsening.algorithm == CoarseningAlgorithm::multilevel_coarsener ) { - ALGO_SWITCH("Coarsening algorithm" << coarsening.algorithm << "is only supported in multilevel mode." - << "Do you want to use the n-level version instead (Y/N)?", - "Partitioning with" << coarsening.algorithm - << "coarsener in n-level mode is not supported!", - coarsening.algorithm, - CoarseningAlgorithm::nlevel_coarsener); - } else if ( !isNLevelPartitioning() && coarsening.algorithm == CoarseningAlgorithm::nlevel_coarsener ) { - ALGO_SWITCH("Coarsening algorithm" << coarsening.algorithm << "is only supported in n-Level mode." - << "Do you want to use the multilevel version instead (Y/N)?", - "Partitioning with" << coarsening.algorithm - << "coarsener in multilevel mode is not supported!", - coarsening.algorithm, - CoarseningAlgorithm::multilevel_coarsener); - } +void Context::setupMaximumAllowedNodeWeight(const HypernodeWeight total_hypergraph_weight) +{ + HypernodeWeight min_block_weight = std::numeric_limits::max(); + for(PartitionID part_id = 0; part_id < partition.k; ++part_id) + { + min_block_weight = std::min(min_block_weight, partition.max_part_weights[part_id]); + } - ASSERT(partition.use_individual_part_weights != partition.max_part_weights.empty()); - if (partition.use_individual_part_weights && static_cast(partition.k) != partition.max_part_weights.size()) { - ALGO_SWITCH("Individual part weights specified, but number of parts doesn't match k." - << "Do you want to use k =" << partition.max_part_weights.size() << "instead (Y/N)?", - "Number of parts is not equal to k!", - partition.k, - partition.max_part_weights.size()); - } + double hypernode_weight_fraction = + coarsening.max_allowed_weight_multiplier / coarsening.contraction_limit; + coarsening.max_allowed_node_weight = + std::ceil(hypernode_weight_fraction * total_hypergraph_weight); + coarsening.max_allowed_node_weight = + std::min(coarsening.max_allowed_node_weight, min_block_weight); +} - shared_memory.static_balancing_work_packages = std::clamp(shared_memory.static_balancing_work_packages, size_t(4), size_t(256)); - - if ( partition.objective == Objective::steiner_tree ) { - if ( !target_graph ) { - partition.objective = Objective::km1; - INFO("No target graph provided for steiner tree metric. Switching to km1 metric."); - } else { - if ( partition.mode == Mode::deep_multilevel ) { - ALGO_SWITCH("Partitioning mode" << partition.mode << "is not supported for steiner tree metric." - << "Do you want to use the multilevel mode instead (Y/N)?", - "Partitioning mode" << partition.mode - << "is not supported for steiner tree metric!", - partition.mode, Mode::direct); - } - if ( initial_partitioning.mode == Mode::deep_multilevel ) { - ALGO_SWITCH("Initial partitioning mode" << partition.mode << "is not supported for steiner tree metric." - << "Do you want to use the multilevel mode instead (Y/N)?", - "Initial partitioning mode" << partition.mode - << "is not supported for steiner tree metric!", - partition.mode, Mode::direct); - } - } - } +void Context::sanityCheck(const TargetGraph *target_graph) +{ + if(isNLevelPartitioning() && + coarsening.algorithm == CoarseningAlgorithm::multilevel_coarsener) + { + ALGO_SWITCH("Coarsening algorithm" + << coarsening.algorithm << "is only supported in multilevel mode." + << "Do you want to use the n-level version instead (Y/N)?", + "Partitioning with" << coarsening.algorithm + << "coarsener in n-level mode is not supported!", + coarsening.algorithm, CoarseningAlgorithm::nlevel_coarsener); + } + else if(!isNLevelPartitioning() && + coarsening.algorithm == CoarseningAlgorithm::nlevel_coarsener) + { + ALGO_SWITCH("Coarsening algorithm" + << coarsening.algorithm << "is only supported in n-Level mode." + << "Do you want to use the multilevel version instead (Y/N)?", + "Partitioning with" << coarsening.algorithm + << "coarsener in multilevel mode is not supported!", + coarsening.algorithm, CoarseningAlgorithm::multilevel_coarsener); + } + ASSERT(partition.use_individual_part_weights != partition.max_part_weights.empty()); + if(partition.use_individual_part_weights && + static_cast(partition.k) != partition.max_part_weights.size()) + { + ALGO_SWITCH("Individual part weights specified, but number of parts doesn't match k." + << "Do you want to use k =" << partition.max_part_weights.size() + << "instead (Y/N)?", + "Number of parts is not equal to k!", partition.k, + partition.max_part_weights.size()); + } - shared_memory.static_balancing_work_packages = std::clamp(shared_memory.static_balancing_work_packages, UL(4), UL(256)); + shared_memory.static_balancing_work_packages = + std::clamp(shared_memory.static_balancing_work_packages, size_t(4), size_t(256)); - if ( partition.deterministic ) { - coarsening.algorithm = CoarseningAlgorithm::deterministic_multilevel_coarsener; + if(partition.objective == Objective::steiner_tree) + { + if(!target_graph) + { + partition.objective = Objective::km1; + INFO("No target graph provided for steiner tree metric. Switching to km1 metric."); + } + else + { + if(partition.mode == Mode::deep_multilevel) + { + ALGO_SWITCH("Partitioning mode" + << partition.mode << "is not supported for steiner tree metric." + << "Do you want to use the multilevel mode instead (Y/N)?", + "Partitioning mode" << partition.mode + << "is not supported for steiner tree metric!", + partition.mode, Mode::direct); + } + if(initial_partitioning.mode == Mode::deep_multilevel) + { + ALGO_SWITCH("Initial partitioning mode" + << partition.mode << "is not supported for steiner tree metric." + << "Do you want to use the multilevel mode instead (Y/N)?", + "Initial partitioning mode" + << partition.mode << "is not supported for steiner tree metric!", + partition.mode, Mode::direct); + } + } + } - // disable FM until we have a deterministic version - refinement.fm.algorithm = FMAlgorithm::do_nothing; - initial_partitioning.refinement.fm.algorithm = FMAlgorithm::do_nothing; + shared_memory.static_balancing_work_packages = + std::clamp(shared_memory.static_balancing_work_packages, UL(4), UL(256)); - // disable adaptive IP - initial_partitioning.use_adaptive_ip_runs = false; + if(partition.deterministic) + { + coarsening.algorithm = CoarseningAlgorithm::deterministic_multilevel_coarsener; + // disable FM until we have a deterministic version + refinement.fm.algorithm = FMAlgorithm::do_nothing; + initial_partitioning.refinement.fm.algorithm = FMAlgorithm::do_nothing; - // switch silently - auto lp_algo = refinement.label_propagation.algorithm; - if ( lp_algo != LabelPropagationAlgorithm::do_nothing && lp_algo != LabelPropagationAlgorithm::deterministic ) { - refinement.label_propagation.algorithm = LabelPropagationAlgorithm::deterministic; - } + // disable adaptive IP + initial_partitioning.use_adaptive_ip_runs = false; - lp_algo = initial_partitioning.refinement.label_propagation.algorithm; - if ( lp_algo != LabelPropagationAlgorithm::do_nothing && lp_algo != LabelPropagationAlgorithm::deterministic ) { - initial_partitioning.refinement.label_propagation.algorithm = LabelPropagationAlgorithm::deterministic; - } + // switch silently + auto lp_algo = refinement.label_propagation.algorithm; + if(lp_algo != LabelPropagationAlgorithm::do_nothing && + lp_algo != LabelPropagationAlgorithm::deterministic) + { + refinement.label_propagation.algorithm = LabelPropagationAlgorithm::deterministic; } - if ( partition.instance_type == InstanceType::UNDEFINED ) { - partition.instance_type = to_instance_type(partition.file_format); + lp_algo = initial_partitioning.refinement.label_propagation.algorithm; + if(lp_algo != LabelPropagationAlgorithm::do_nothing && + lp_algo != LabelPropagationAlgorithm::deterministic) + { + initial_partitioning.refinement.label_propagation.algorithm = + LabelPropagationAlgorithm::deterministic; } + } + + if(partition.instance_type == InstanceType::UNDEFINED) + { + partition.instance_type = to_instance_type(partition.file_format); + } - // Set correct gain policy type - setupGainPolicy(); + // Set correct gain policy type + setupGainPolicy(); - if ( partition.preset_type == PresetType::large_k ) { - // Silently switch to deep multilevel scheme for large k partitioning - partition.mode = Mode::deep_multilevel; - } + if(partition.preset_type == PresetType::large_k) + { + // Silently switch to deep multilevel scheme for large k partitioning + partition.mode = Mode::deep_multilevel; } +} - void Context::setupThreadsPerFlowSearch() { - if ( refinement.flows.algorithm == FlowAlgorithm::flow_cutter ) { - // = min(t, min(tau * k, k * (k - 1) / 2)) - // t = number of threads - // k * (k - 1) / 2 = maximum number of edges in the quotient graph - refinement.flows.num_parallel_searches = partition.k == 2 ? 1 : - std::min(shared_memory.num_threads, std::min(std::max(UL(1), static_cast( - refinement.flows.parallel_searches_multiplier * partition.k)), - static_cast((partition.k * (partition.k - 1)) / 2) )); - } +void Context::setupThreadsPerFlowSearch() +{ + if(refinement.flows.algorithm == FlowAlgorithm::flow_cutter) + { + // = min(t, min(tau * k, k * (k - 1) / 2)) + // t = number of threads + // k * (k - 1) / 2 = maximum number of edges in the quotient graph + refinement.flows.num_parallel_searches = + partition.k == 2 ? + 1 : + std::min( + shared_memory.num_threads, + std::min( + std::max(UL(1), static_cast( + refinement.flows.parallel_searches_multiplier * + partition.k)), + static_cast((partition.k * (partition.k - 1)) / 2))); } +} - void Context::setupGainPolicy() { - #ifndef KAHYPAR_ENABLE_SOED_METRIC - if ( partition.objective == Objective::soed ) { - throw InvalidParameterException( +void Context::setupGainPolicy() +{ +#ifndef KAHYPAR_ENABLE_SOED_METRIC + if(partition.objective == Objective::soed) + { + throw InvalidParameterException( "SOED metric is deactivated. Add -DKAHYPAR_ENABLE_SOED_METRIC=ON to the " "cmake command and rebuild Mt-KaHyPar."); - } - #endif + } +#endif - #ifndef KAHYPAR_ENABLE_STEINER_TREE_METRIC - if ( partition.objective == Objective::steiner_tree ) { - throw InvalidParameterException( +#ifndef KAHYPAR_ENABLE_STEINER_TREE_METRIC + if(partition.objective == Objective::steiner_tree) + { + throw InvalidParameterException( "Steiner tree metric is deactivated. Add -DKAHYPAR_ENABLE_STEINER_TREE_METRIC=ON " "to the cmake command and rebuild Mt-KaHyPar."); + } +#endif + + if(partition.instance_type == InstanceType::hypergraph) + { + switch(partition.objective) + { + case Objective::km1: + partition.gain_policy = GainPolicy::km1; + break; + case Objective::cut: + partition.gain_policy = GainPolicy::cut; + break; + case Objective::soed: + partition.gain_policy = GainPolicy::soed; + break; + case Objective::steiner_tree: + partition.gain_policy = GainPolicy::steiner_tree; + break; + case Objective::UNDEFINED: + partition.gain_policy = GainPolicy::none; + break; } - #endif - - if ( partition.instance_type == InstanceType::hypergraph ) { - switch ( partition.objective ) { - case Objective::km1: partition.gain_policy = GainPolicy::km1; break; - case Objective::cut: partition.gain_policy = GainPolicy::cut; break; - case Objective::soed: partition.gain_policy = GainPolicy::soed; break; - case Objective::steiner_tree: partition.gain_policy = GainPolicy::steiner_tree; break; - case Objective::UNDEFINED: partition.gain_policy = GainPolicy::none; break; - } - } else if ( partition.instance_type == InstanceType::graph ) { - if ( partition.objective != Objective::cut && partition.objective != Objective::steiner_tree ) { - partition.objective = Objective::cut; - INFO("Current objective function is equivalent to the edge cut metric for graphs. Objective function is set to edge cut metric."); - } - if ( partition.objective == Objective::cut ) { - partition.gain_policy = GainPolicy::cut_for_graphs; - } else { - partition.gain_policy = GainPolicy::steiner_tree_for_graphs; - } + } + else if(partition.instance_type == InstanceType::graph) + { + if(partition.objective != Objective::cut && + partition.objective != Objective::steiner_tree) + { + partition.objective = Objective::cut; + INFO( + "Current objective function is equivalent to the edge cut metric for graphs. Objective function is set to edge cut metric."); + } + if(partition.objective == Objective::cut) + { + partition.gain_policy = GainPolicy::cut_for_graphs; + } + else + { + partition.gain_policy = GainPolicy::steiner_tree_for_graphs; } } +} - void Context::load_default_preset() { - // General - partition.preset_type = PresetType::default_preset; - partition.mode = Mode::direct; - partition.large_hyperedge_size_threshold_factor = 0.01; - partition.smallest_large_he_size_threshold = 50000; - partition.ignore_hyperedge_size_threshold = 1000; - partition.num_vcycles = 0; - - // shared_memory - shared_memory.use_localized_random_shuffle = false; - shared_memory.static_balancing_work_packages = 128; - - // mapping - mapping.strategy = OneToOneMappingStrategy::greedy_mapping; - mapping.use_local_search = true; - mapping.use_two_phase_approach = false; - mapping.max_steiner_tree_size = 4; - mapping.largest_he_fraction = 0.0; - mapping.min_pin_coverage_of_largest_hes = 0.05; - - // preprocessing - preprocessing.use_community_detection = true; - preprocessing.disable_community_detection_for_mesh_graphs = true; - preprocessing.community_detection.edge_weight_function = LouvainEdgeWeight::hybrid; - preprocessing.community_detection.max_pass_iterations = 5; - preprocessing.community_detection.min_vertex_move_fraction = 0.01; - preprocessing.community_detection.vertex_degree_sampling_threshold = 200000; - - // coarsening - coarsening.algorithm = CoarseningAlgorithm::multilevel_coarsener; - coarsening.use_adaptive_edge_size= true; - coarsening.minimum_shrink_factor = 1.01; - coarsening.maximum_shrink_factor = 2.5; - coarsening.max_allowed_weight_multiplier = 1.0; - coarsening.contraction_limit_multiplier = 160; - coarsening.vertex_degree_sampling_threshold = 200000; - - // coarsening -> rating - coarsening.rating.rating_function = RatingFunction::heavy_edge; - coarsening.rating.heavy_node_penalty_policy = HeavyNodePenaltyPolicy::no_penalty; - coarsening.rating.acceptance_policy = AcceptancePolicy::best_prefer_unmatched; - - // initial partitioning - initial_partitioning.mode = Mode::recursive_bipartitioning; - initial_partitioning.runs = 20; - initial_partitioning.use_adaptive_ip_runs = true; - initial_partitioning.min_adaptive_ip_runs = 5; - initial_partitioning.perform_refinement_on_best_partitions = true; - initial_partitioning.fm_refinment_rounds = 1; - initial_partitioning.lp_maximum_iterations = 20; - initial_partitioning.lp_initial_block_size = 5; - initial_partitioning.remove_degree_zero_hns_before_ip = true; - - // initial partitioning -> refinement - initial_partitioning.refinement.refine_until_no_improvement = false; - - // initial partitioning -> refinement -> label propagation - initial_partitioning.refinement.label_propagation.algorithm = LabelPropagationAlgorithm::label_propagation; - initial_partitioning.refinement.label_propagation.maximum_iterations = 5; - initial_partitioning.refinement.label_propagation.rebalancing = true; - initial_partitioning.refinement.label_propagation.hyperedge_size_activation_threshold = 100; - - // initial partitioning -> refinement -> fm - initial_partitioning.refinement.fm.algorithm = FMAlgorithm::kway_fm; - initial_partitioning.refinement.fm.multitry_rounds = 5; - initial_partitioning.refinement.fm.rollback_parallel = true; - initial_partitioning.refinement.fm.rollback_balance_violation_factor = 1; - initial_partitioning.refinement.fm.num_seed_nodes = 25; - initial_partitioning.refinement.fm.obey_minimal_parallelism = false; - initial_partitioning.refinement.fm.release_nodes = true; - initial_partitioning.refinement.fm.time_limit_factor = 0.25; - initial_partitioning.refinement.fm.iter_moves_on_recalc = true; - - // initial partitioning -> refinement -> flows - initial_partitioning.refinement.flows.algorithm = FlowAlgorithm::do_nothing; - - // refinement - refinement.rebalancer = RebalancingAlgorithm::advanced_rebalancer; - refinement.refine_until_no_improvement = false; - - // refinement -> label propagation - refinement.label_propagation.algorithm = LabelPropagationAlgorithm::label_propagation; - refinement.label_propagation.unconstrained = true; - refinement.label_propagation.maximum_iterations = 5; - refinement.label_propagation.rebalancing = false; - refinement.label_propagation.hyperedge_size_activation_threshold = 100; - refinement.label_propagation.relative_improvement_threshold = 0.001; - - // refinement -> fm - refinement.fm.algorithm = FMAlgorithm::unconstrained_fm; - refinement.fm.multitry_rounds = 10; - refinement.fm.unconstrained_rounds = 8; - refinement.fm.rollback_parallel = true; - refinement.fm.rollback_balance_violation_factor = 1.0; - refinement.fm.treshold_border_node_inclusion = 0.7; - refinement.fm.imbalance_penalty_min = 0.2; - refinement.fm.imbalance_penalty_max = 1.0; - refinement.fm.num_seed_nodes = 25; - refinement.fm.obey_minimal_parallelism = true; - refinement.fm.release_nodes = true; - refinement.fm.time_limit_factor = 0.25; - refinement.fm.min_improvement = -1; - refinement.fm.unconstrained_min_improvement = 0.002; - refinement.fm.iter_moves_on_recalc = true; - - // refinement -> flows - refinement.flows.algorithm = FlowAlgorithm::do_nothing; - } - - void Context::load_quality_preset() { - load_default_preset(); - - // General - partition.preset_type = PresetType::quality; - - // refinement - refinement.refine_until_no_improvement = true; - refinement.relative_improvement_threshold = 0.0025; - - // refinement -> label propagation - refinement.label_propagation.rebalancing = true; - - // refinement -> flows; - refinement.flows.algorithm = FlowAlgorithm::flow_cutter; - refinement.flows.alpha = 16; - refinement.flows.max_num_pins = 4294967295; - refinement.flows.find_most_balanced_cut = true; - refinement.flows.determine_distance_from_cut = true; - refinement.flows.parallel_searches_multiplier = 1.0; - refinement.flows.max_bfs_distance = 2; - refinement.flows.time_limit_factor = 8; - refinement.flows.skip_small_cuts = true; - refinement.flows.skip_unpromising_blocks = true; - refinement.flows.pierce_in_bulk = true; - refinement.flows.min_relative_improvement_per_round = 0.001; - refinement.flows.steiner_tree_policy = SteinerTreeFlowValuePolicy::lower_bound; - } - - void Context::load_deterministic_preset() { - // General - partition.preset_type = PresetType::deterministic; - partition.mode = Mode::direct; - partition.deterministic = true; - partition.large_hyperedge_size_threshold_factor = 0.01; - partition.smallest_large_he_size_threshold = 50000; - partition.ignore_hyperedge_size_threshold = 1000; - partition.num_vcycles = 0; - - // shared_memory - shared_memory.use_localized_random_shuffle = false; - shared_memory.static_balancing_work_packages = 128; - - // preprocessing - preprocessing.use_community_detection = true; - preprocessing.disable_community_detection_for_mesh_graphs = true; - preprocessing.stable_construction_of_incident_edges = true; - preprocessing.community_detection.edge_weight_function = LouvainEdgeWeight::hybrid; - preprocessing.community_detection.max_pass_iterations = 5; - preprocessing.community_detection.min_vertex_move_fraction = 0.01; - preprocessing.community_detection.vertex_degree_sampling_threshold = 200000; - preprocessing.community_detection.low_memory_contraction = true; - preprocessing.community_detection.num_sub_rounds_deterministic = 16; - - // coarsening - coarsening.algorithm = CoarseningAlgorithm::deterministic_multilevel_coarsener; - coarsening.use_adaptive_edge_size= true; - coarsening.minimum_shrink_factor = 1.01; - coarsening.maximum_shrink_factor = 2.5; - coarsening.max_allowed_weight_multiplier = 1.0; - coarsening.contraction_limit_multiplier = 160; - coarsening.vertex_degree_sampling_threshold = 200000; - coarsening.num_sub_rounds_deterministic = 3; - - // coarsening -> rating - coarsening.rating.rating_function = RatingFunction::heavy_edge; - coarsening.rating.heavy_node_penalty_policy = HeavyNodePenaltyPolicy::no_penalty; - coarsening.rating.acceptance_policy = AcceptancePolicy::best_prefer_unmatched; - - // initial partitioning - initial_partitioning.mode = Mode::recursive_bipartitioning; - initial_partitioning.runs = 20; - initial_partitioning.use_adaptive_ip_runs = false; - initial_partitioning.perform_refinement_on_best_partitions = false; - initial_partitioning.fm_refinment_rounds = 3; - initial_partitioning.lp_maximum_iterations = 20; - initial_partitioning.lp_initial_block_size = 5; - initial_partitioning.population_size = 64; - initial_partitioning.remove_degree_zero_hns_before_ip = true; - - // initial partitioning -> refinement - initial_partitioning.refinement.refine_until_no_improvement = false; - - // initial partitioning -> refinement -> label propagation - initial_partitioning.refinement.label_propagation.algorithm = LabelPropagationAlgorithm::deterministic; - initial_partitioning.refinement.label_propagation.maximum_iterations = 5; - initial_partitioning.refinement.label_propagation.hyperedge_size_activation_threshold = 100; - - // initial partitioning -> refinement -> deterministic - initial_partitioning.refinement.deterministic_refinement.num_sub_rounds_sync_lp = 1; - initial_partitioning.refinement.deterministic_refinement.use_active_node_set = true; - - // initial partitioning -> refinement -> fm - initial_partitioning.refinement.fm.algorithm = FMAlgorithm::do_nothing; - - // initial partitioning -> refinement -> flows - initial_partitioning.refinement.flows.algorithm = FlowAlgorithm::do_nothing; - - // refinement - refinement.rebalancer = RebalancingAlgorithm::advanced_rebalancer; - refinement.refine_until_no_improvement = false; - - // refinement -> label propagation - refinement.label_propagation.algorithm = LabelPropagationAlgorithm::deterministic; - refinement.label_propagation.maximum_iterations = 5; - refinement.label_propagation.hyperedge_size_activation_threshold = 100; - - // refinement -> deterministic - refinement.deterministic_refinement.num_sub_rounds_sync_lp = 1; - refinement.deterministic_refinement.use_active_node_set = true; - - // refinement -> fm - refinement.fm.algorithm = FMAlgorithm::do_nothing; +void Context::load_default_preset() +{ + // General + partition.preset_type = PresetType::default_preset; + partition.mode = Mode::direct; + partition.large_hyperedge_size_threshold_factor = 0.01; + partition.smallest_large_he_size_threshold = 50000; + partition.ignore_hyperedge_size_threshold = 1000; + partition.num_vcycles = 0; + + // shared_memory + shared_memory.use_localized_random_shuffle = false; + shared_memory.static_balancing_work_packages = 128; + + // mapping + mapping.strategy = OneToOneMappingStrategy::greedy_mapping; + mapping.use_local_search = true; + mapping.use_two_phase_approach = false; + mapping.max_steiner_tree_size = 4; + mapping.largest_he_fraction = 0.0; + mapping.min_pin_coverage_of_largest_hes = 0.05; + + // preprocessing + preprocessing.use_community_detection = true; + preprocessing.disable_community_detection_for_mesh_graphs = true; + preprocessing.community_detection.edge_weight_function = LouvainEdgeWeight::hybrid; + preprocessing.community_detection.max_pass_iterations = 5; + preprocessing.community_detection.min_vertex_move_fraction = 0.01; + preprocessing.community_detection.vertex_degree_sampling_threshold = 200000; + + // coarsening + coarsening.algorithm = CoarseningAlgorithm::multilevel_coarsener; + coarsening.use_adaptive_edge_size = true; + coarsening.minimum_shrink_factor = 1.01; + coarsening.maximum_shrink_factor = 2.5; + coarsening.max_allowed_weight_multiplier = 1.0; + coarsening.contraction_limit_multiplier = 160; + coarsening.vertex_degree_sampling_threshold = 200000; + + // coarsening -> rating + coarsening.rating.rating_function = RatingFunction::heavy_edge; + coarsening.rating.heavy_node_penalty_policy = HeavyNodePenaltyPolicy::no_penalty; + coarsening.rating.acceptance_policy = AcceptancePolicy::best_prefer_unmatched; + + // initial partitioning + initial_partitioning.mode = Mode::recursive_bipartitioning; + initial_partitioning.runs = 20; + initial_partitioning.use_adaptive_ip_runs = true; + initial_partitioning.min_adaptive_ip_runs = 5; + initial_partitioning.perform_refinement_on_best_partitions = true; + initial_partitioning.fm_refinment_rounds = 1; + initial_partitioning.lp_maximum_iterations = 20; + initial_partitioning.lp_initial_block_size = 5; + initial_partitioning.remove_degree_zero_hns_before_ip = true; + + // initial partitioning -> refinement + initial_partitioning.refinement.refine_until_no_improvement = false; + + // initial partitioning -> refinement -> label propagation + initial_partitioning.refinement.label_propagation.algorithm = + LabelPropagationAlgorithm::label_propagation; + initial_partitioning.refinement.label_propagation.maximum_iterations = 5; + initial_partitioning.refinement.label_propagation.rebalancing = true; + initial_partitioning.refinement.label_propagation.hyperedge_size_activation_threshold = + 100; + + // initial partitioning -> refinement -> fm + initial_partitioning.refinement.fm.algorithm = FMAlgorithm::kway_fm; + initial_partitioning.refinement.fm.multitry_rounds = 5; + initial_partitioning.refinement.fm.rollback_parallel = true; + initial_partitioning.refinement.fm.rollback_balance_violation_factor = 1; + initial_partitioning.refinement.fm.num_seed_nodes = 25; + initial_partitioning.refinement.fm.obey_minimal_parallelism = false; + initial_partitioning.refinement.fm.release_nodes = true; + initial_partitioning.refinement.fm.time_limit_factor = 0.25; + initial_partitioning.refinement.fm.iter_moves_on_recalc = true; + + // initial partitioning -> refinement -> flows + initial_partitioning.refinement.flows.algorithm = FlowAlgorithm::do_nothing; + + // refinement + refinement.rebalancer = RebalancingAlgorithm::advanced_rebalancer; + refinement.refine_until_no_improvement = false; + + // refinement -> label propagation + refinement.label_propagation.algorithm = LabelPropagationAlgorithm::label_propagation; + refinement.label_propagation.unconstrained = true; + refinement.label_propagation.maximum_iterations = 5; + refinement.label_propagation.rebalancing = false; + refinement.label_propagation.hyperedge_size_activation_threshold = 100; + refinement.label_propagation.relative_improvement_threshold = 0.001; + + // refinement -> fm + refinement.fm.algorithm = FMAlgorithm::unconstrained_fm; + refinement.fm.multitry_rounds = 10; + refinement.fm.unconstrained_rounds = 8; + refinement.fm.rollback_parallel = true; + refinement.fm.rollback_balance_violation_factor = 1.0; + refinement.fm.treshold_border_node_inclusion = 0.7; + refinement.fm.imbalance_penalty_min = 0.2; + refinement.fm.imbalance_penalty_max = 1.0; + refinement.fm.num_seed_nodes = 25; + refinement.fm.obey_minimal_parallelism = true; + refinement.fm.release_nodes = true; + refinement.fm.time_limit_factor = 0.25; + refinement.fm.min_improvement = -1; + refinement.fm.unconstrained_min_improvement = 0.002; + refinement.fm.iter_moves_on_recalc = true; + + // refinement -> flows + refinement.flows.algorithm = FlowAlgorithm::do_nothing; +} - // refinement -> flows - refinement.flows.algorithm = FlowAlgorithm::do_nothing; - } - - void Context::load_n_level_preset() { - // General - partition.mode = Mode::direct; - partition.large_hyperedge_size_threshold_factor = 0.01; - partition.smallest_large_he_size_threshold = 50000; - partition.ignore_hyperedge_size_threshold = 1000; - partition.num_vcycles = 0; - - // shared_memory - shared_memory.use_localized_random_shuffle = false; - shared_memory.static_balancing_work_packages = 128; - - // mapping - mapping.strategy = OneToOneMappingStrategy::greedy_mapping; - mapping.use_local_search = true; - mapping.use_two_phase_approach = false; - mapping.max_steiner_tree_size = 4; - mapping.largest_he_fraction = 0.0; - mapping.min_pin_coverage_of_largest_hes = 0.05; - - // preprocessing - preprocessing.use_community_detection = true; - preprocessing.disable_community_detection_for_mesh_graphs = true; - preprocessing.community_detection.edge_weight_function = LouvainEdgeWeight::hybrid; - preprocessing.community_detection.max_pass_iterations = 5; - preprocessing.community_detection.min_vertex_move_fraction = 0.01; - preprocessing.community_detection.vertex_degree_sampling_threshold = 200000; - - // coarsening - coarsening.algorithm = CoarseningAlgorithm::nlevel_coarsener; - coarsening.use_adaptive_edge_size = true; - coarsening.minimum_shrink_factor = 1.01; - coarsening.maximum_shrink_factor = 100.0; - coarsening.max_allowed_weight_multiplier = 1.0; - coarsening.contraction_limit_multiplier = 160; - coarsening.vertex_degree_sampling_threshold = 200000; - - // coarsening -> rating - coarsening.rating.rating_function = RatingFunction::heavy_edge; - coarsening.rating.heavy_node_penalty_policy = HeavyNodePenaltyPolicy::no_penalty; - coarsening.rating.acceptance_policy = AcceptancePolicy::best_prefer_unmatched; - - // initial partitioning - initial_partitioning.mode = Mode::recursive_bipartitioning; - initial_partitioning.runs = 20; - initial_partitioning.use_adaptive_ip_runs = true; - initial_partitioning.min_adaptive_ip_runs = 5; - initial_partitioning.perform_refinement_on_best_partitions = true; - initial_partitioning.fm_refinment_rounds = 2147483647; - initial_partitioning.lp_maximum_iterations = 20; - initial_partitioning.lp_initial_block_size = 5; - initial_partitioning.remove_degree_zero_hns_before_ip = true; - - // initial partitioning -> refinement - initial_partitioning.refinement.refine_until_no_improvement = true; - initial_partitioning.refinement.max_batch_size = 1000; - initial_partitioning.refinement.min_border_vertices_per_thread = 0; - - // initial partitioning -> refinement -> label propagation - initial_partitioning.refinement.label_propagation.algorithm = LabelPropagationAlgorithm::label_propagation; - initial_partitioning.refinement.label_propagation.maximum_iterations = 5; - initial_partitioning.refinement.label_propagation.rebalancing = true; - initial_partitioning.refinement.label_propagation.hyperedge_size_activation_threshold = 100; - - // initial partitioning -> refinement -> fm - initial_partitioning.refinement.fm.algorithm = FMAlgorithm::kway_fm; - initial_partitioning.refinement.fm.multitry_rounds = 5; - initial_partitioning.refinement.fm.rollback_parallel = false; - initial_partitioning.refinement.fm.rollback_balance_violation_factor = 1; - initial_partitioning.refinement.fm.num_seed_nodes = 5; - initial_partitioning.refinement.fm.obey_minimal_parallelism = false; - initial_partitioning.refinement.fm.release_nodes = true; - initial_partitioning.refinement.fm.time_limit_factor = 0.25; - initial_partitioning.refinement.fm.iter_moves_on_recalc = false; - - // initial partitioning -> refinement -> flows - initial_partitioning.refinement.flows.algorithm = FlowAlgorithm::do_nothing; - - // initial partitioning -> refinement -> global fm - initial_partitioning.refinement.global_fm.use_global_fm = false; - - // refinement - refinement.rebalancer = RebalancingAlgorithm::advanced_rebalancer; - refinement.refine_until_no_improvement = true; - refinement.max_batch_size = 1000; - refinement.min_border_vertices_per_thread = 50; - - // refinement -> label propagation - refinement.label_propagation.algorithm = LabelPropagationAlgorithm::label_propagation; - refinement.label_propagation.maximum_iterations = 5; - refinement.label_propagation.rebalancing = true; - refinement.label_propagation.hyperedge_size_activation_threshold = 100; - - // refinement -> fm - refinement.fm.algorithm = FMAlgorithm::kway_fm; - refinement.fm.multitry_rounds = 10; - refinement.fm.rollback_parallel = false; - refinement.fm.rollback_balance_violation_factor = 1.25; - refinement.fm.num_seed_nodes = 5; - refinement.fm.obey_minimal_parallelism = false; - refinement.fm.release_nodes = true; - refinement.fm.time_limit_factor = 0.25; - refinement.fm.min_improvement = -1; - refinement.fm.iter_moves_on_recalc = true; - - // refinement -> flows - refinement.flows.algorithm = FlowAlgorithm::do_nothing; - - // refinement -> global fm - refinement.global_fm.use_global_fm = true; - refinement.global_fm.refine_until_no_improvement = false; - refinement.global_fm.num_seed_nodes = 5; - refinement.global_fm.obey_minimal_parallelism = true; - } - - void Context::load_highest_quality_preset() { - load_n_level_preset(); - - // General - partition.preset_type = PresetType::highest_quality; - - // refinement - refinement.relative_improvement_threshold = 0.0025; - - // refinement -> fm - refinement.fm.iter_moves_on_recalc = false; - - // refinement -> flows; - refinement.flows.algorithm = FlowAlgorithm::flow_cutter; - refinement.flows.alpha = 16; - refinement.flows.max_num_pins = 4294967295; - refinement.flows.find_most_balanced_cut = true; - refinement.flows.determine_distance_from_cut = true; - refinement.flows.parallel_searches_multiplier = 1.0; - refinement.flows.max_bfs_distance = 2; - refinement.flows.time_limit_factor = 8; - refinement.flows.skip_small_cuts = true; - refinement.flows.skip_unpromising_blocks = true; - refinement.flows.pierce_in_bulk = true; - refinement.flows.min_relative_improvement_per_round = 0.001; - refinement.flows.steiner_tree_policy = SteinerTreeFlowValuePolicy::lower_bound; - - // refinement -> global fm - refinement.global_fm.refine_until_no_improvement = true; - } - - void Context::load_large_k_preset() { - // General - partition.preset_type = PresetType::large_k; - partition.mode = Mode::deep_multilevel; - partition.large_hyperedge_size_threshold_factor = 0.01; - partition.smallest_large_he_size_threshold = 50000; - partition.ignore_hyperedge_size_threshold = 1000; - partition.num_vcycles = 0; - - // shared_memory - shared_memory.use_localized_random_shuffle = false; - shared_memory.static_balancing_work_packages = 128; - - // preprocessing - preprocessing.use_community_detection = true; - preprocessing.disable_community_detection_for_mesh_graphs = true; - preprocessing.community_detection.edge_weight_function = LouvainEdgeWeight::hybrid; - preprocessing.community_detection.max_pass_iterations = 5; - preprocessing.community_detection.min_vertex_move_fraction = 0.01; - preprocessing.community_detection.vertex_degree_sampling_threshold = 200000; - - // coarsening - coarsening.algorithm = CoarseningAlgorithm::multilevel_coarsener; - coarsening.use_adaptive_edge_size= true; - coarsening.minimum_shrink_factor = 1.01; - coarsening.maximum_shrink_factor = 2.5; - coarsening.max_allowed_weight_multiplier = 1.0; - coarsening.contraction_limit_multiplier = 500; - coarsening.deep_ml_contraction_limit_multiplier = 160; - coarsening.vertex_degree_sampling_threshold = 200000; - - // coarsening -> rating - coarsening.rating.rating_function = RatingFunction::heavy_edge; - coarsening.rating.heavy_node_penalty_policy = HeavyNodePenaltyPolicy::no_penalty; - coarsening.rating.acceptance_policy = AcceptancePolicy::best_prefer_unmatched; - - // initial partitioning - initial_partitioning.mode = Mode::direct; - initial_partitioning.runs = 5; - initial_partitioning.use_adaptive_ip_runs = true; - initial_partitioning.min_adaptive_ip_runs = 3; - initial_partitioning.perform_refinement_on_best_partitions = true; - initial_partitioning.fm_refinment_rounds = 1; - initial_partitioning.lp_maximum_iterations = 20; - initial_partitioning.lp_initial_block_size = 5; - initial_partitioning.enabled_ip_algos = {1, 1, 0, 1, 1, 0, 1, 0, 1}; - initial_partitioning.remove_degree_zero_hns_before_ip = true; - - // initial partitioning -> refinement - initial_partitioning.refinement.refine_until_no_improvement = false; - - // initial partitioning -> refinement -> label propagation - initial_partitioning.refinement.label_propagation.algorithm = LabelPropagationAlgorithm::label_propagation; - initial_partitioning.refinement.label_propagation.maximum_iterations = 5; - initial_partitioning.refinement.label_propagation.rebalancing = true; - initial_partitioning.refinement.label_propagation.hyperedge_size_activation_threshold = 100; - - // initial partitioning -> refinement -> fm - initial_partitioning.refinement.fm.algorithm = FMAlgorithm::do_nothing; +void Context::load_quality_preset() +{ + load_default_preset(); + + // General + partition.preset_type = PresetType::quality; + + // refinement + refinement.refine_until_no_improvement = true; + refinement.relative_improvement_threshold = 0.0025; + + // refinement -> label propagation + refinement.label_propagation.rebalancing = true; + + // refinement -> flows; + refinement.flows.algorithm = FlowAlgorithm::flow_cutter; + refinement.flows.alpha = 16; + refinement.flows.max_num_pins = 4294967295; + refinement.flows.find_most_balanced_cut = true; + refinement.flows.determine_distance_from_cut = true; + refinement.flows.parallel_searches_multiplier = 1.0; + refinement.flows.max_bfs_distance = 2; + refinement.flows.time_limit_factor = 8; + refinement.flows.skip_small_cuts = true; + refinement.flows.skip_unpromising_blocks = true; + refinement.flows.pierce_in_bulk = true; + refinement.flows.min_relative_improvement_per_round = 0.001; + refinement.flows.steiner_tree_policy = SteinerTreeFlowValuePolicy::lower_bound; +} - // initial partitioning -> refinement -> flows - initial_partitioning.refinement.flows.algorithm = FlowAlgorithm::do_nothing; +void Context::load_deterministic_preset() +{ + // General + partition.preset_type = PresetType::deterministic; + partition.mode = Mode::direct; + partition.deterministic = true; + partition.large_hyperedge_size_threshold_factor = 0.01; + partition.smallest_large_he_size_threshold = 50000; + partition.ignore_hyperedge_size_threshold = 1000; + partition.num_vcycles = 0; + + // shared_memory + shared_memory.use_localized_random_shuffle = false; + shared_memory.static_balancing_work_packages = 128; + + // preprocessing + preprocessing.use_community_detection = true; + preprocessing.disable_community_detection_for_mesh_graphs = true; + preprocessing.stable_construction_of_incident_edges = true; + preprocessing.community_detection.edge_weight_function = LouvainEdgeWeight::hybrid; + preprocessing.community_detection.max_pass_iterations = 5; + preprocessing.community_detection.min_vertex_move_fraction = 0.01; + preprocessing.community_detection.vertex_degree_sampling_threshold = 200000; + preprocessing.community_detection.low_memory_contraction = true; + preprocessing.community_detection.num_sub_rounds_deterministic = 16; + + // coarsening + coarsening.algorithm = CoarseningAlgorithm::deterministic_multilevel_coarsener; + coarsening.use_adaptive_edge_size = true; + coarsening.minimum_shrink_factor = 1.01; + coarsening.maximum_shrink_factor = 2.5; + coarsening.max_allowed_weight_multiplier = 1.0; + coarsening.contraction_limit_multiplier = 160; + coarsening.vertex_degree_sampling_threshold = 200000; + coarsening.num_sub_rounds_deterministic = 3; + + // coarsening -> rating + coarsening.rating.rating_function = RatingFunction::heavy_edge; + coarsening.rating.heavy_node_penalty_policy = HeavyNodePenaltyPolicy::no_penalty; + coarsening.rating.acceptance_policy = AcceptancePolicy::best_prefer_unmatched; + + // initial partitioning + initial_partitioning.mode = Mode::recursive_bipartitioning; + initial_partitioning.runs = 20; + initial_partitioning.use_adaptive_ip_runs = false; + initial_partitioning.perform_refinement_on_best_partitions = false; + initial_partitioning.fm_refinment_rounds = 3; + initial_partitioning.lp_maximum_iterations = 20; + initial_partitioning.lp_initial_block_size = 5; + initial_partitioning.population_size = 64; + initial_partitioning.remove_degree_zero_hns_before_ip = true; + + // initial partitioning -> refinement + initial_partitioning.refinement.refine_until_no_improvement = false; + + // initial partitioning -> refinement -> label propagation + initial_partitioning.refinement.label_propagation.algorithm = + LabelPropagationAlgorithm::deterministic; + initial_partitioning.refinement.label_propagation.maximum_iterations = 5; + initial_partitioning.refinement.label_propagation.hyperedge_size_activation_threshold = + 100; + + // initial partitioning -> refinement -> deterministic + initial_partitioning.refinement.deterministic_refinement.num_sub_rounds_sync_lp = 1; + initial_partitioning.refinement.deterministic_refinement.use_active_node_set = true; + + // initial partitioning -> refinement -> fm + initial_partitioning.refinement.fm.algorithm = FMAlgorithm::do_nothing; + + // initial partitioning -> refinement -> flows + initial_partitioning.refinement.flows.algorithm = FlowAlgorithm::do_nothing; + + // refinement + refinement.rebalancer = RebalancingAlgorithm::advanced_rebalancer; + refinement.refine_until_no_improvement = false; + + // refinement -> label propagation + refinement.label_propagation.algorithm = LabelPropagationAlgorithm::deterministic; + refinement.label_propagation.maximum_iterations = 5; + refinement.label_propagation.hyperedge_size_activation_threshold = 100; + + // refinement -> deterministic + refinement.deterministic_refinement.num_sub_rounds_sync_lp = 1; + refinement.deterministic_refinement.use_active_node_set = true; + + // refinement -> fm + refinement.fm.algorithm = FMAlgorithm::do_nothing; + + // refinement -> flows + refinement.flows.algorithm = FlowAlgorithm::do_nothing; +} - // refinement - refinement.rebalancer = RebalancingAlgorithm::advanced_rebalancer; - refinement.refine_until_no_improvement = false; +void Context::load_n_level_preset() +{ + // General + partition.mode = Mode::direct; + partition.large_hyperedge_size_threshold_factor = 0.01; + partition.smallest_large_he_size_threshold = 50000; + partition.ignore_hyperedge_size_threshold = 1000; + partition.num_vcycles = 0; + + // shared_memory + shared_memory.use_localized_random_shuffle = false; + shared_memory.static_balancing_work_packages = 128; + + // mapping + mapping.strategy = OneToOneMappingStrategy::greedy_mapping; + mapping.use_local_search = true; + mapping.use_two_phase_approach = false; + mapping.max_steiner_tree_size = 4; + mapping.largest_he_fraction = 0.0; + mapping.min_pin_coverage_of_largest_hes = 0.05; + + // preprocessing + preprocessing.use_community_detection = true; + preprocessing.disable_community_detection_for_mesh_graphs = true; + preprocessing.community_detection.edge_weight_function = LouvainEdgeWeight::hybrid; + preprocessing.community_detection.max_pass_iterations = 5; + preprocessing.community_detection.min_vertex_move_fraction = 0.01; + preprocessing.community_detection.vertex_degree_sampling_threshold = 200000; + + // coarsening + coarsening.algorithm = CoarseningAlgorithm::nlevel_coarsener; + coarsening.use_adaptive_edge_size = true; + coarsening.minimum_shrink_factor = 1.01; + coarsening.maximum_shrink_factor = 100.0; + coarsening.max_allowed_weight_multiplier = 1.0; + coarsening.contraction_limit_multiplier = 160; + coarsening.vertex_degree_sampling_threshold = 200000; + + // coarsening -> rating + coarsening.rating.rating_function = RatingFunction::heavy_edge; + coarsening.rating.heavy_node_penalty_policy = HeavyNodePenaltyPolicy::no_penalty; + coarsening.rating.acceptance_policy = AcceptancePolicy::best_prefer_unmatched; + + // initial partitioning + initial_partitioning.mode = Mode::recursive_bipartitioning; + initial_partitioning.runs = 20; + initial_partitioning.use_adaptive_ip_runs = true; + initial_partitioning.min_adaptive_ip_runs = 5; + initial_partitioning.perform_refinement_on_best_partitions = true; + initial_partitioning.fm_refinment_rounds = 2147483647; + initial_partitioning.lp_maximum_iterations = 20; + initial_partitioning.lp_initial_block_size = 5; + initial_partitioning.remove_degree_zero_hns_before_ip = true; + + // initial partitioning -> refinement + initial_partitioning.refinement.refine_until_no_improvement = true; + initial_partitioning.refinement.max_batch_size = 1000; + initial_partitioning.refinement.min_border_vertices_per_thread = 0; + + // initial partitioning -> refinement -> label propagation + initial_partitioning.refinement.label_propagation.algorithm = + LabelPropagationAlgorithm::label_propagation; + initial_partitioning.refinement.label_propagation.maximum_iterations = 5; + initial_partitioning.refinement.label_propagation.rebalancing = true; + initial_partitioning.refinement.label_propagation.hyperedge_size_activation_threshold = + 100; + + // initial partitioning -> refinement -> fm + initial_partitioning.refinement.fm.algorithm = FMAlgorithm::kway_fm; + initial_partitioning.refinement.fm.multitry_rounds = 5; + initial_partitioning.refinement.fm.rollback_parallel = false; + initial_partitioning.refinement.fm.rollback_balance_violation_factor = 1; + initial_partitioning.refinement.fm.num_seed_nodes = 5; + initial_partitioning.refinement.fm.obey_minimal_parallelism = false; + initial_partitioning.refinement.fm.release_nodes = true; + initial_partitioning.refinement.fm.time_limit_factor = 0.25; + initial_partitioning.refinement.fm.iter_moves_on_recalc = false; + + // initial partitioning -> refinement -> flows + initial_partitioning.refinement.flows.algorithm = FlowAlgorithm::do_nothing; + + // initial partitioning -> refinement -> global fm + initial_partitioning.refinement.global_fm.use_global_fm = false; + + // refinement + refinement.rebalancer = RebalancingAlgorithm::advanced_rebalancer; + refinement.refine_until_no_improvement = true; + refinement.max_batch_size = 1000; + refinement.min_border_vertices_per_thread = 50; + + // refinement -> label propagation + refinement.label_propagation.algorithm = LabelPropagationAlgorithm::label_propagation; + refinement.label_propagation.maximum_iterations = 5; + refinement.label_propagation.rebalancing = true; + refinement.label_propagation.hyperedge_size_activation_threshold = 100; + + // refinement -> fm + refinement.fm.algorithm = FMAlgorithm::kway_fm; + refinement.fm.multitry_rounds = 10; + refinement.fm.rollback_parallel = false; + refinement.fm.rollback_balance_violation_factor = 1.25; + refinement.fm.num_seed_nodes = 5; + refinement.fm.obey_minimal_parallelism = false; + refinement.fm.release_nodes = true; + refinement.fm.time_limit_factor = 0.25; + refinement.fm.min_improvement = -1; + refinement.fm.iter_moves_on_recalc = true; + + // refinement -> flows + refinement.flows.algorithm = FlowAlgorithm::do_nothing; + + // refinement -> global fm + refinement.global_fm.use_global_fm = true; + refinement.global_fm.refine_until_no_improvement = false; + refinement.global_fm.num_seed_nodes = 5; + refinement.global_fm.obey_minimal_parallelism = true; +} - // refinement -> label propagation - refinement.label_propagation.algorithm = LabelPropagationAlgorithm::label_propagation; - refinement.label_propagation.maximum_iterations = 5; - refinement.label_propagation.rebalancing = true; - refinement.label_propagation.hyperedge_size_activation_threshold = 100; +void Context::load_highest_quality_preset() +{ + load_n_level_preset(); + + // General + partition.preset_type = PresetType::highest_quality; + + // refinement + refinement.relative_improvement_threshold = 0.0025; + + // refinement -> fm + refinement.fm.iter_moves_on_recalc = false; + + // refinement -> flows; + refinement.flows.algorithm = FlowAlgorithm::flow_cutter; + refinement.flows.alpha = 16; + refinement.flows.max_num_pins = 4294967295; + refinement.flows.find_most_balanced_cut = true; + refinement.flows.determine_distance_from_cut = true; + refinement.flows.parallel_searches_multiplier = 1.0; + refinement.flows.max_bfs_distance = 2; + refinement.flows.time_limit_factor = 8; + refinement.flows.skip_small_cuts = true; + refinement.flows.skip_unpromising_blocks = true; + refinement.flows.pierce_in_bulk = true; + refinement.flows.min_relative_improvement_per_round = 0.001; + refinement.flows.steiner_tree_policy = SteinerTreeFlowValuePolicy::lower_bound; + + // refinement -> global fm + refinement.global_fm.refine_until_no_improvement = true; +} - // refinement -> fm - refinement.fm.algorithm = FMAlgorithm::do_nothing; +void Context::load_large_k_preset() +{ + // General + partition.preset_type = PresetType::large_k; + partition.mode = Mode::deep_multilevel; + partition.large_hyperedge_size_threshold_factor = 0.01; + partition.smallest_large_he_size_threshold = 50000; + partition.ignore_hyperedge_size_threshold = 1000; + partition.num_vcycles = 0; + + // shared_memory + shared_memory.use_localized_random_shuffle = false; + shared_memory.static_balancing_work_packages = 128; + + // preprocessing + preprocessing.use_community_detection = true; + preprocessing.disable_community_detection_for_mesh_graphs = true; + preprocessing.community_detection.edge_weight_function = LouvainEdgeWeight::hybrid; + preprocessing.community_detection.max_pass_iterations = 5; + preprocessing.community_detection.min_vertex_move_fraction = 0.01; + preprocessing.community_detection.vertex_degree_sampling_threshold = 200000; + + // coarsening + coarsening.algorithm = CoarseningAlgorithm::multilevel_coarsener; + coarsening.use_adaptive_edge_size = true; + coarsening.minimum_shrink_factor = 1.01; + coarsening.maximum_shrink_factor = 2.5; + coarsening.max_allowed_weight_multiplier = 1.0; + coarsening.contraction_limit_multiplier = 500; + coarsening.deep_ml_contraction_limit_multiplier = 160; + coarsening.vertex_degree_sampling_threshold = 200000; + + // coarsening -> rating + coarsening.rating.rating_function = RatingFunction::heavy_edge; + coarsening.rating.heavy_node_penalty_policy = HeavyNodePenaltyPolicy::no_penalty; + coarsening.rating.acceptance_policy = AcceptancePolicy::best_prefer_unmatched; + + // initial partitioning + initial_partitioning.mode = Mode::direct; + initial_partitioning.runs = 5; + initial_partitioning.use_adaptive_ip_runs = true; + initial_partitioning.min_adaptive_ip_runs = 3; + initial_partitioning.perform_refinement_on_best_partitions = true; + initial_partitioning.fm_refinment_rounds = 1; + initial_partitioning.lp_maximum_iterations = 20; + initial_partitioning.lp_initial_block_size = 5; + initial_partitioning.enabled_ip_algos = { 1, 1, 0, 1, 1, 0, 1, 0, 1 }; + initial_partitioning.remove_degree_zero_hns_before_ip = true; + + // initial partitioning -> refinement + initial_partitioning.refinement.refine_until_no_improvement = false; + + // initial partitioning -> refinement -> label propagation + initial_partitioning.refinement.label_propagation.algorithm = + LabelPropagationAlgorithm::label_propagation; + initial_partitioning.refinement.label_propagation.maximum_iterations = 5; + initial_partitioning.refinement.label_propagation.rebalancing = true; + initial_partitioning.refinement.label_propagation.hyperedge_size_activation_threshold = + 100; + + // initial partitioning -> refinement -> fm + initial_partitioning.refinement.fm.algorithm = FMAlgorithm::do_nothing; + + // initial partitioning -> refinement -> flows + initial_partitioning.refinement.flows.algorithm = FlowAlgorithm::do_nothing; + + // refinement + refinement.rebalancer = RebalancingAlgorithm::advanced_rebalancer; + refinement.refine_until_no_improvement = false; + + // refinement -> label propagation + refinement.label_propagation.algorithm = LabelPropagationAlgorithm::label_propagation; + refinement.label_propagation.maximum_iterations = 5; + refinement.label_propagation.rebalancing = true; + refinement.label_propagation.hyperedge_size_activation_threshold = 100; + + // refinement -> fm + refinement.fm.algorithm = FMAlgorithm::do_nothing; + + // refinement -> flows + refinement.flows.algorithm = FlowAlgorithm::do_nothing; +} - // refinement -> flows - refinement.flows.algorithm = FlowAlgorithm::do_nothing; - } - - std::ostream & operator<< (std::ostream& str, const Context& context) { - str << "*******************************************************************************\n" - << "* Partitioning Context *\n" - << "*******************************************************************************\n" - << context.partition - << "-------------------------------------------------------------------------------\n" - << context.preprocessing - << "-------------------------------------------------------------------------------\n" - << context.coarsening - << "-------------------------------------------------------------------------------\n" - << context.initial_partitioning - << "-------------------------------------------------------------------------------\n" - << context.refinement +std::ostream &operator<<(std::ostream &str, const Context &context) +{ + str << "*******************************************************************************\n" + << "* Partitioning Context *\n" + << "*******************************************************************************\n" + << context.partition + << "-------------------------------------------------------------------------------\n" + << context.preprocessing + << "-------------------------------------------------------------------------------\n" + << context.coarsening + << "-------------------------------------------------------------------------------\n" + << context.initial_partitioning + << "-------------------------------------------------------------------------------\n" + << context.refinement + << "-------------------------------------------------------------------------------\n"; + if(context.partition.objective == Objective::steiner_tree) + { + str << context.mapping << "-------------------------------------------------------------------------------\n"; - if ( context.partition.objective == Objective::steiner_tree ) { - str << context.mapping - << "-------------------------------------------------------------------------------\n"; - } - str << context.shared_memory - << "-------------------------------------------------------------------------------"; - return str; } + str << context.shared_memory + << "-------------------------------------------------------------------------------"; + return str; +} } diff --git a/mt-kahypar/partition/context_enum_classes.cpp b/mt-kahypar/partition/context_enum_classes.cpp index 913dab8b7..ed3fb5c0b 100644 --- a/mt-kahypar/partition/context_enum_classes.cpp +++ b/mt-kahypar/partition/context_enum_classes.cpp @@ -33,468 +33,705 @@ namespace mt_kahypar { - std::ostream & operator<< (std::ostream& os, const Type& type) { - switch (type) { - case Type::Unweighted: return os << "unweighted"; - case Type::EdgeWeights: return os << "edge_weights"; - case Type::NodeWeights: return os << "node_weights"; - case Type::EdgeAndNodeWeights: return os << "edge_and_node_weights"; - // omit default case to trigger compiler warning for missing cases - } - return os << static_cast(type); - } - - std::ostream & operator<< (std::ostream& os, const FileFormat& format) { - switch (format) { - case FileFormat::hMetis: return os << "hMetis"; - case FileFormat::Metis: return os << "Metis"; - // omit default case to trigger compiler warning for missing cases - } - return os << static_cast(format); - } - - std::ostream & operator<< (std::ostream& os, const InstanceType& type) { - switch (type) { - case InstanceType::graph: return os << "graph"; - case InstanceType::hypergraph: return os << "hypergraph"; - case InstanceType::UNDEFINED: return os << "UNDEFINED"; - // omit default case to trigger compiler warning for missing cases - } - return os << static_cast(type); - } - - std::ostream & operator<< (std::ostream& os, const PresetType& type) { - switch (type) { - case PresetType::deterministic: return os << "deterministic"; - case PresetType::large_k: return os << "large_k"; - case PresetType::default_preset: return os << "default"; - case PresetType::quality: return os << "quality"; - case PresetType::highest_quality: return os << "highest_quality"; - case PresetType::UNDEFINED: return os << "UNDEFINED"; - // omit default case to trigger compiler warning for missing cases - } - return os << static_cast(type); - } - - std::ostream & operator<< (std::ostream& os, const mt_kahypar_partition_type_t& type) { - switch (type) { - case MULTILEVEL_GRAPH_PARTITIONING: return os << "multilevel_graph_partitioning"; - case N_LEVEL_GRAPH_PARTITIONING: return os << "n_level_graph_partitioning"; - case MULTILEVEL_HYPERGRAPH_PARTITIONING: return os << "multilevel_hypergraph_partitioning"; - case LARGE_K_PARTITIONING: return os << "large_k_partitioning"; - case N_LEVEL_HYPERGRAPH_PARTITIONING: return os << "n_level_hypergraph_partitioning"; - case NULLPTR_PARTITION: return os << "UNDEFINED"; - // omit default case to trigger compiler warning for missing cases - } - return os << static_cast(type); - } - - std::ostream& operator<< (std::ostream& os, const ContextType& type) { - if (type == ContextType::main) { - return os << "main"; - } else { - return os << "ip"; - } - return os << static_cast(type); - } - - std::ostream & operator<< (std::ostream& os, const Mode& mode) { - switch (mode) { - case Mode::recursive_bipartitioning: return os << "recursive_bipartitioning"; - case Mode::direct: return os << "direct_kway"; - case Mode::deep_multilevel: return os << "deep_multilevel"; - case Mode::UNDEFINED: return os << "UNDEFINED"; - // omit default case to trigger compiler warning for missing cases - } - return os << static_cast(mode); - } - - std::ostream& operator<< (std::ostream& os, const Objective& objective) { - switch (objective) { - case Objective::cut: return os << "cut"; - case Objective::km1: return os << "km1"; - case Objective::soed: return os << "soed"; - case Objective::steiner_tree: return os << "steiner_tree"; - case Objective::UNDEFINED: return os << "UNDEFINED"; - // omit default case to trigger compiler warning for missing cases - } - return os << static_cast(objective); - } - - std::ostream & operator<< (std::ostream& os, const GainPolicy& type) { - switch (type) { - case GainPolicy::km1: return os << "km1"; - case GainPolicy::cut: return os << "cut"; - case GainPolicy::soed: return os << "soed"; - case GainPolicy::steiner_tree: return os << "steiner_tree"; - case GainPolicy::cut_for_graphs: return os << "cut_for_graphs"; - case GainPolicy::steiner_tree_for_graphs: return os << "steiner_tree_for_graphs"; - case GainPolicy::none: return os << "none"; - // omit default case to trigger compiler warning for missing cases - } - return os << static_cast(type); - } - - std::ostream & operator<< (std::ostream& os, const LouvainEdgeWeight& type) { - switch (type) { - case LouvainEdgeWeight::hybrid: return os << "hybrid"; - case LouvainEdgeWeight::uniform: return os << "uniform"; - case LouvainEdgeWeight::non_uniform: return os << "non_uniform"; - case LouvainEdgeWeight::degree: return os << "degree"; - case LouvainEdgeWeight::UNDEFINED: return os << "UNDEFINED"; - // omit default case to trigger compiler warning for missing cases - } - return os << static_cast(type); - } - - std::ostream & operator<< (std::ostream& os, const SimiliarNetCombinerStrategy& strategy) { - switch (strategy) { - case SimiliarNetCombinerStrategy::union_nets: return os << "union"; - case SimiliarNetCombinerStrategy::max_size: return os << "max_size"; - case SimiliarNetCombinerStrategy::importance: return os << "importance"; - case SimiliarNetCombinerStrategy::UNDEFINED: return os << "UNDEFINED"; - // omit default case to trigger compiler warning for missing cases - } - return os << static_cast(strategy); - } - - std::ostream & operator<< (std::ostream& os, const CoarseningAlgorithm& algo) { - switch (algo) { - case CoarseningAlgorithm::multilevel_coarsener: return os << "multilevel_coarsener"; - case CoarseningAlgorithm::deterministic_multilevel_coarsener: return os << "deterministic_multilevel_coarsener"; - case CoarseningAlgorithm::nlevel_coarsener: return os << "nlevel_coarsener"; - case CoarseningAlgorithm::UNDEFINED: return os << "UNDEFINED"; - // omit default case to trigger compiler warning for missing cases - } - return os << static_cast(algo); - } - - std::ostream & operator<< (std::ostream& os, const HeavyNodePenaltyPolicy& heavy_hn_policy) { - switch (heavy_hn_policy) { - case HeavyNodePenaltyPolicy::no_penalty: return os << "no_penalty"; - ENABLE_EXPERIMENTAL_FEATURES(case HeavyNodePenaltyPolicy::additive: return os << "additive";) - ENABLE_EXPERIMENTAL_FEATURES(case HeavyNodePenaltyPolicy::multiplicative_penalty: return os << "multiplicative";) - case HeavyNodePenaltyPolicy::UNDEFINED: return os << "UNDEFINED"; - } - return os << static_cast(heavy_hn_policy); - } - - std::ostream & operator<< (std::ostream& os, const AcceptancePolicy& acceptance_policy) { - switch (acceptance_policy) { - ENABLE_EXPERIMENTAL_FEATURES(case AcceptancePolicy::best: return os << "best";) - case AcceptancePolicy::best_prefer_unmatched: return os << "best_prefer_unmatched"; - case AcceptancePolicy::UNDEFINED: return os << "UNDEFINED"; - // omit default case to trigger compiler warning for missing cases - } - return os << static_cast(acceptance_policy); - } - - std::ostream & operator<< (std::ostream& os, const RatingFunction& func) { - switch (func) { - case RatingFunction::heavy_edge: return os << "heavy_edge"; - ENABLE_EXPERIMENTAL_FEATURES(case RatingFunction::sameness: return os << "sameness";) - case RatingFunction::UNDEFINED: return os << "UNDEFINED"; - // omit default case to trigger compiler warning for missing cases - } - return os << static_cast(func); - } - - std::ostream & operator<< (std::ostream& os, const InitialPartitioningAlgorithm& algo) { - switch (algo) { - case InitialPartitioningAlgorithm::random: return os << "random"; - case InitialPartitioningAlgorithm::bfs: return os << "bfs"; - case InitialPartitioningAlgorithm::greedy_round_robin_fm: return os << "greedy_round_robin_fm"; - case InitialPartitioningAlgorithm::greedy_global_fm: return os << "greedy_global_fm"; - case InitialPartitioningAlgorithm::greedy_sequential_fm: return os << "greedy_sequential_fm"; - case InitialPartitioningAlgorithm::greedy_round_robin_max_net: return os << "greedy_round_robin_max_net"; - case InitialPartitioningAlgorithm::greedy_global_max_net: return os << "greedy_global_max_net"; - case InitialPartitioningAlgorithm::greedy_sequential_max_net: return os << "greedy_sequential_max_net"; - case InitialPartitioningAlgorithm::label_propagation: return os << "label_propagation"; - case InitialPartitioningAlgorithm::UNDEFINED: return os << "UNDEFINED"; - // omit default case to trigger compiler warning for missing cases - } - return os << static_cast(algo); - } - - std::ostream & operator<< (std::ostream& os, const LabelPropagationAlgorithm& algo) { - switch (algo) { - case LabelPropagationAlgorithm::label_propagation: return os << "label_propagation"; - case LabelPropagationAlgorithm::deterministic: return os << "deterministic"; - case LabelPropagationAlgorithm::do_nothing: return os << "lp_do_nothing"; - // omit default case to trigger compiler warning for missing cases - } - return os << static_cast(algo); - } - - std::ostream & operator<< (std::ostream& os, const FMAlgorithm& algo) { - switch (algo) { - case FMAlgorithm::kway_fm: return os << "kway_fm"; - case FMAlgorithm::unconstrained_fm: return os << "unconstrained_fm"; - case FMAlgorithm::do_nothing: return os << "fm_do_nothing"; - // omit default case to trigger compiler warning for missing cases - } - return os << static_cast(algo); - } - - std::ostream & operator<< (std::ostream& os, const FlowAlgorithm& algo) { - switch (algo) { - case FlowAlgorithm::flow_cutter: return os << "flow_cutter"; - case FlowAlgorithm::mock: return os << "mock"; - case FlowAlgorithm::do_nothing: return os << "do_nothing"; - // omit default case to trigger compiler warning for missing cases - } - return os << static_cast(algo); - } - - - std::ostream & operator<< (std::ostream& os, const RebalancingAlgorithm& algo) { - switch (algo) { - case RebalancingAlgorithm::simple_rebalancer: return os << "simple_rebalancer"; - case RebalancingAlgorithm::advanced_rebalancer: return os << "advanced_rebalancer"; - case RebalancingAlgorithm::do_nothing: return os << "do_nothing"; - // omit default case to trigger compiler warning for missing cases - } - return os << static_cast(algo); - } - - std::ostream & operator<< (std::ostream& os, const OneToOneMappingStrategy& algo) { - switch (algo) { - case OneToOneMappingStrategy::greedy_mapping: return os << "greedy_mapping"; - case OneToOneMappingStrategy::identity: return os << "identity"; - // omit default case to trigger compiler warning for missing cases - } - return os << static_cast(algo); - } - - std::ostream & operator<< (std::ostream& os, const SteinerTreeFlowValuePolicy& policy) { - switch (policy) { - case SteinerTreeFlowValuePolicy::lower_bound: return os << "lower_bound"; - case SteinerTreeFlowValuePolicy::upper_bound: return os << "upper_bound"; - case SteinerTreeFlowValuePolicy::UNDEFINED: return os << "UNDEFINED"; - // omit default case to trigger compiler warning for missing cases - } - return os << static_cast(policy); - } - - Mode modeFromString(const std::string& mode) { - if (mode == "rb") { - return Mode::recursive_bipartitioning; - } else if (mode == "direct") { - return Mode::direct; - } else if (mode == "deep") { - return Mode::deep_multilevel; - } - throw InvalidParameterException("Illegal option: " + mode); - return Mode::UNDEFINED; - } - - InstanceType instanceTypeFromString(const std::string& type) { - if (type == "graph") { - return InstanceType::graph; - } else if (type == "hypergraph") { - return InstanceType::hypergraph; - } - throw InvalidParameterException("Illegal option: " + type); - return InstanceType::UNDEFINED; - } - - PresetType presetTypeFromString(const std::string& type) { - if (type == "deterministic") { - return PresetType::deterministic; - } else if (type == "large_k") { - return PresetType::large_k; - } else if (type == "default") { - return PresetType::default_preset; - } else if (type == "quality") { - return PresetType::quality; - } else if (type == "highest_quality") { - return PresetType::highest_quality; - } - throw InvalidParameterException("Illegal option: " + type); - return PresetType::UNDEFINED; - } - - - Objective objectiveFromString(const std::string& obj) { - if (obj == "cut") { - return Objective::cut; - } else if (obj == "km1") { - return Objective::km1; - } else if (obj == "soed") { - return Objective::soed; - } else if (obj == "steiner_tree") { - return Objective::steiner_tree; - } - throw InvalidParameterException("No valid objective function."); - return Objective::UNDEFINED; - } - - LouvainEdgeWeight louvainEdgeWeightFromString(const std::string& type) { - if (type == "hybrid") { - return LouvainEdgeWeight::hybrid; - } else if (type == "uniform") { - return LouvainEdgeWeight::uniform; - } else if (type == "non_uniform") { - return LouvainEdgeWeight::non_uniform; - } else if (type == "degree") { - return LouvainEdgeWeight::degree; - } - throw InvalidParameterException("No valid louvain edge weight."); - return LouvainEdgeWeight::UNDEFINED; - } - - SimiliarNetCombinerStrategy similiarNetCombinerStrategyFromString(const std::string& type) { - if (type == "union") { - return SimiliarNetCombinerStrategy::union_nets; - } else if (type == "max_size") { - return SimiliarNetCombinerStrategy::max_size; - } else if (type == "importance") { - return SimiliarNetCombinerStrategy::importance; - } - throw InvalidParameterException("No valid similiar net unifier strategy."); - return SimiliarNetCombinerStrategy::UNDEFINED; - } - - CoarseningAlgorithm coarseningAlgorithmFromString(const std::string& type) { - if (type == "multilevel_coarsener") { - return CoarseningAlgorithm::multilevel_coarsener; - } else if (type == "nlevel_coarsener") { - return CoarseningAlgorithm::nlevel_coarsener; - } else if (type == "deterministic_multilevel_coarsener") { - return CoarseningAlgorithm::deterministic_multilevel_coarsener; - } - throw InvalidParameterException("Illegal option: " + type); - return CoarseningAlgorithm::UNDEFINED; - } - - HeavyNodePenaltyPolicy heavyNodePenaltyFromString(const std::string& penalty) { - if (penalty == "no_penalty") { - return HeavyNodePenaltyPolicy::no_penalty; - } - #ifdef KAHYPAR_ENABLE_EXPERIMENTAL_FEATURES - else if (penalty == "multiplicative") { - return HeavyNodePenaltyPolicy::multiplicative_penalty; - } else if (penalty == "additive") { - return HeavyNodePenaltyPolicy::additive; - // omit default case to trigger compiler warning for missing cases - } - #endif - throw InvalidParameterException("No valid edge penalty policy for rating."); - return HeavyNodePenaltyPolicy::UNDEFINED; - } - - AcceptancePolicy acceptanceCriterionFromString(const std::string& crit) { - if (crit == "best_prefer_unmatched") { - return AcceptancePolicy::best_prefer_unmatched; - } - #ifdef KAHYPAR_ENABLE_EXPERIMENTAL_FEATURES - else if (crit == "best") { - return AcceptancePolicy::best; - } - #endif - throw InvalidParameterException("No valid acceptance criterion for rating."); - } - - RatingFunction ratingFunctionFromString(const std::string& function) { - if (function == "heavy_edge") { - return RatingFunction::heavy_edge; - } - #ifdef KAHYPAR_ENABLE_EXPERIMENTAL_FEATURES - else if (function == "sameness") { - return RatingFunction::sameness; - } - #endif - throw InvalidParameterException("No valid rating function for rating."); - return RatingFunction::UNDEFINED; - } - - InitialPartitioningAlgorithm initialPartitioningAlgorithmFromString(const std::string& algo) { - if (algo == "random") { - return InitialPartitioningAlgorithm::random; - } else if (algo == "bfs") { - return InitialPartitioningAlgorithm::bfs; - } else if (algo == "greedy_round_robin_fm") { - return InitialPartitioningAlgorithm::greedy_round_robin_fm; - } else if (algo == "greedy_global_fm") { - return InitialPartitioningAlgorithm::greedy_global_fm; - } else if (algo == "greedy_sequential_fm") { - return InitialPartitioningAlgorithm::greedy_sequential_fm; - } else if (algo == "greedy_round_robin_max_net") { - return InitialPartitioningAlgorithm::greedy_round_robin_max_net; - } else if (algo == "greedy_global_max_net") { - return InitialPartitioningAlgorithm::greedy_global_max_net; - } else if (algo == "greedy_sequential_max_net") { - return InitialPartitioningAlgorithm::greedy_sequential_max_net; - } else if (algo == "label_propagation") { - return InitialPartitioningAlgorithm::label_propagation; - } - throw InvalidParameterException("Illegal option: " + algo); - return InitialPartitioningAlgorithm::UNDEFINED; - } - - LabelPropagationAlgorithm labelPropagationAlgorithmFromString(const std::string& type) { - if (type == "label_propagation") { - return LabelPropagationAlgorithm::label_propagation; - } else if (type == "deterministic") { - return LabelPropagationAlgorithm::deterministic; - } else if (type == "do_nothing") { - return LabelPropagationAlgorithm::do_nothing; - } - throw InvalidParameterException("Illegal option: " + type); +std::ostream &operator<<(std::ostream &os, const Type &type) +{ + switch(type) + { + case Type::Unweighted: + return os << "unweighted"; + case Type::EdgeWeights: + return os << "edge_weights"; + case Type::NodeWeights: + return os << "node_weights"; + case Type::EdgeAndNodeWeights: + return os << "edge_and_node_weights"; + // omit default case to trigger compiler warning for missing cases + } + return os << static_cast(type); +} + +std::ostream &operator<<(std::ostream &os, const FileFormat &format) +{ + switch(format) + { + case FileFormat::hMetis: + return os << "hMetis"; + case FileFormat::Metis: + return os << "Metis"; + // omit default case to trigger compiler warning for missing cases + } + return os << static_cast(format); +} + +std::ostream &operator<<(std::ostream &os, const InstanceType &type) +{ + switch(type) + { + case InstanceType::graph: + return os << "graph"; + case InstanceType::hypergraph: + return os << "hypergraph"; + case InstanceType::UNDEFINED: + return os << "UNDEFINED"; + // omit default case to trigger compiler warning for missing cases + } + return os << static_cast(type); +} + +std::ostream &operator<<(std::ostream &os, const PresetType &type) +{ + switch(type) + { + case PresetType::deterministic: + return os << "deterministic"; + case PresetType::large_k: + return os << "large_k"; + case PresetType::default_preset: + return os << "default"; + case PresetType::quality: + return os << "quality"; + case PresetType::highest_quality: + return os << "highest_quality"; + case PresetType::UNDEFINED: + return os << "UNDEFINED"; + // omit default case to trigger compiler warning for missing cases + } + return os << static_cast(type); +} + +std::ostream &operator<<(std::ostream &os, const mt_kahypar_partition_type_t &type) +{ + switch(type) + { + case MULTILEVEL_GRAPH_PARTITIONING: + return os << "multilevel_graph_partitioning"; + case N_LEVEL_GRAPH_PARTITIONING: + return os << "n_level_graph_partitioning"; + case MULTILEVEL_HYPERGRAPH_PARTITIONING: + return os << "multilevel_hypergraph_partitioning"; + case LARGE_K_PARTITIONING: + return os << "large_k_partitioning"; + case N_LEVEL_HYPERGRAPH_PARTITIONING: + return os << "n_level_hypergraph_partitioning"; + case NULLPTR_PARTITION: + return os << "UNDEFINED"; + // omit default case to trigger compiler warning for missing cases + } + return os << static_cast(type); +} + +std::ostream &operator<<(std::ostream &os, const ContextType &type) +{ + if(type == ContextType::main) + { + return os << "main"; + } + else + { + return os << "ip"; + } + return os << static_cast(type); +} + +std::ostream &operator<<(std::ostream &os, const Mode &mode) +{ + switch(mode) + { + case Mode::recursive_bipartitioning: + return os << "recursive_bipartitioning"; + case Mode::direct: + return os << "direct_kway"; + case Mode::deep_multilevel: + return os << "deep_multilevel"; + case Mode::UNDEFINED: + return os << "UNDEFINED"; + // omit default case to trigger compiler warning for missing cases + } + return os << static_cast(mode); +} + +std::ostream &operator<<(std::ostream &os, const Objective &objective) +{ + switch(objective) + { + case Objective::cut: + return os << "cut"; + case Objective::km1: + return os << "km1"; + case Objective::soed: + return os << "soed"; + case Objective::steiner_tree: + return os << "steiner_tree"; + case Objective::UNDEFINED: + return os << "UNDEFINED"; + // omit default case to trigger compiler warning for missing cases + } + return os << static_cast(objective); +} + +std::ostream &operator<<(std::ostream &os, const GainPolicy &type) +{ + switch(type) + { + case GainPolicy::km1: + return os << "km1"; + case GainPolicy::cut: + return os << "cut"; + case GainPolicy::soed: + return os << "soed"; + case GainPolicy::steiner_tree: + return os << "steiner_tree"; + case GainPolicy::cut_for_graphs: + return os << "cut_for_graphs"; + case GainPolicy::steiner_tree_for_graphs: + return os << "steiner_tree_for_graphs"; + case GainPolicy::none: + return os << "none"; + // omit default case to trigger compiler warning for missing cases + } + return os << static_cast(type); +} + +std::ostream &operator<<(std::ostream &os, const LouvainEdgeWeight &type) +{ + switch(type) + { + case LouvainEdgeWeight::hybrid: + return os << "hybrid"; + case LouvainEdgeWeight::uniform: + return os << "uniform"; + case LouvainEdgeWeight::non_uniform: + return os << "non_uniform"; + case LouvainEdgeWeight::degree: + return os << "degree"; + case LouvainEdgeWeight::UNDEFINED: + return os << "UNDEFINED"; + // omit default case to trigger compiler warning for missing cases + } + return os << static_cast(type); +} + +std::ostream &operator<<(std::ostream &os, const SimiliarNetCombinerStrategy &strategy) +{ + switch(strategy) + { + case SimiliarNetCombinerStrategy::union_nets: + return os << "union"; + case SimiliarNetCombinerStrategy::max_size: + return os << "max_size"; + case SimiliarNetCombinerStrategy::importance: + return os << "importance"; + case SimiliarNetCombinerStrategy::UNDEFINED: + return os << "UNDEFINED"; + // omit default case to trigger compiler warning for missing cases + } + return os << static_cast(strategy); +} + +std::ostream &operator<<(std::ostream &os, const CoarseningAlgorithm &algo) +{ + switch(algo) + { + case CoarseningAlgorithm::multilevel_coarsener: + return os << "multilevel_coarsener"; + case CoarseningAlgorithm::deterministic_multilevel_coarsener: + return os << "deterministic_multilevel_coarsener"; + case CoarseningAlgorithm::nlevel_coarsener: + return os << "nlevel_coarsener"; + case CoarseningAlgorithm::UNDEFINED: + return os << "UNDEFINED"; + // omit default case to trigger compiler warning for missing cases + } + return os << static_cast(algo); +} + +std::ostream &operator<<(std::ostream &os, const HeavyNodePenaltyPolicy &heavy_hn_policy) +{ + switch(heavy_hn_policy) + { + case HeavyNodePenaltyPolicy::no_penalty: + return os << "no_penalty"; + ENABLE_EXPERIMENTAL_FEATURES(case HeavyNodePenaltyPolicy::additive + : return os << "additive";) + ENABLE_EXPERIMENTAL_FEATURES(case HeavyNodePenaltyPolicy::multiplicative_penalty + : return os << "multiplicative";) + case HeavyNodePenaltyPolicy::UNDEFINED: + return os << "UNDEFINED"; + } + return os << static_cast(heavy_hn_policy); +} + +std::ostream &operator<<(std::ostream &os, const AcceptancePolicy &acceptance_policy) +{ + switch(acceptance_policy) + { + ENABLE_EXPERIMENTAL_FEATURES(case AcceptancePolicy::best : return os << "best";) + case AcceptancePolicy::best_prefer_unmatched: + return os << "best_prefer_unmatched"; + case AcceptancePolicy::UNDEFINED: + return os << "UNDEFINED"; + // omit default case to trigger compiler warning for missing cases + } + return os << static_cast(acceptance_policy); +} + +std::ostream &operator<<(std::ostream &os, const RatingFunction &func) +{ + switch(func) + { + case RatingFunction::heavy_edge: + return os << "heavy_edge"; + ENABLE_EXPERIMENTAL_FEATURES(case RatingFunction::sameness : return os << "sameness";) + case RatingFunction::UNDEFINED: + return os << "UNDEFINED"; + // omit default case to trigger compiler warning for missing cases + } + return os << static_cast(func); +} + +std::ostream &operator<<(std::ostream &os, const InitialPartitioningAlgorithm &algo) +{ + switch(algo) + { + case InitialPartitioningAlgorithm::random: + return os << "random"; + case InitialPartitioningAlgorithm::bfs: + return os << "bfs"; + case InitialPartitioningAlgorithm::greedy_round_robin_fm: + return os << "greedy_round_robin_fm"; + case InitialPartitioningAlgorithm::greedy_global_fm: + return os << "greedy_global_fm"; + case InitialPartitioningAlgorithm::greedy_sequential_fm: + return os << "greedy_sequential_fm"; + case InitialPartitioningAlgorithm::greedy_round_robin_max_net: + return os << "greedy_round_robin_max_net"; + case InitialPartitioningAlgorithm::greedy_global_max_net: + return os << "greedy_global_max_net"; + case InitialPartitioningAlgorithm::greedy_sequential_max_net: + return os << "greedy_sequential_max_net"; + case InitialPartitioningAlgorithm::label_propagation: + return os << "label_propagation"; + case InitialPartitioningAlgorithm::UNDEFINED: + return os << "UNDEFINED"; + // omit default case to trigger compiler warning for missing cases + } + return os << static_cast(algo); +} + +std::ostream &operator<<(std::ostream &os, const LabelPropagationAlgorithm &algo) +{ + switch(algo) + { + case LabelPropagationAlgorithm::label_propagation: + return os << "label_propagation"; + case LabelPropagationAlgorithm::deterministic: + return os << "deterministic"; + case LabelPropagationAlgorithm::do_nothing: + return os << "lp_do_nothing"; + // omit default case to trigger compiler warning for missing cases + } + return os << static_cast(algo); +} + +std::ostream &operator<<(std::ostream &os, const FMAlgorithm &algo) +{ + switch(algo) + { + case FMAlgorithm::kway_fm: + return os << "kway_fm"; + case FMAlgorithm::unconstrained_fm: + return os << "unconstrained_fm"; + case FMAlgorithm::do_nothing: + return os << "fm_do_nothing"; + // omit default case to trigger compiler warning for missing cases + } + return os << static_cast(algo); +} + +std::ostream &operator<<(std::ostream &os, const FlowAlgorithm &algo) +{ + switch(algo) + { + case FlowAlgorithm::flow_cutter: + return os << "flow_cutter"; + case FlowAlgorithm::mock: + return os << "mock"; + case FlowAlgorithm::do_nothing: + return os << "do_nothing"; + // omit default case to trigger compiler warning for missing cases + } + return os << static_cast(algo); +} + +std::ostream &operator<<(std::ostream &os, const RebalancingAlgorithm &algo) +{ + switch(algo) + { + case RebalancingAlgorithm::simple_rebalancer: + return os << "simple_rebalancer"; + case RebalancingAlgorithm::advanced_rebalancer: + return os << "advanced_rebalancer"; + case RebalancingAlgorithm::do_nothing: + return os << "do_nothing"; + // omit default case to trigger compiler warning for missing cases + } + return os << static_cast(algo); +} + +std::ostream &operator<<(std::ostream &os, const OneToOneMappingStrategy &algo) +{ + switch(algo) + { + case OneToOneMappingStrategy::greedy_mapping: + return os << "greedy_mapping"; + case OneToOneMappingStrategy::identity: + return os << "identity"; + // omit default case to trigger compiler warning for missing cases + } + return os << static_cast(algo); +} + +std::ostream &operator<<(std::ostream &os, const SteinerTreeFlowValuePolicy &policy) +{ + switch(policy) + { + case SteinerTreeFlowValuePolicy::lower_bound: + return os << "lower_bound"; + case SteinerTreeFlowValuePolicy::upper_bound: + return os << "upper_bound"; + case SteinerTreeFlowValuePolicy::UNDEFINED: + return os << "UNDEFINED"; + // omit default case to trigger compiler warning for missing cases + } + return os << static_cast(policy); +} + +Mode modeFromString(const std::string &mode) +{ + if(mode == "rb") + { + return Mode::recursive_bipartitioning; + } + else if(mode == "direct") + { + return Mode::direct; + } + else if(mode == "deep") + { + return Mode::deep_multilevel; + } + throw InvalidParameterException("Illegal option: " + mode); + return Mode::UNDEFINED; +} + +InstanceType instanceTypeFromString(const std::string &type) +{ + if(type == "graph") + { + return InstanceType::graph; + } + else if(type == "hypergraph") + { + return InstanceType::hypergraph; + } + throw InvalidParameterException("Illegal option: " + type); + return InstanceType::UNDEFINED; +} + +PresetType presetTypeFromString(const std::string &type) +{ + if(type == "deterministic") + { + return PresetType::deterministic; + } + else if(type == "large_k") + { + return PresetType::large_k; + } + else if(type == "default") + { + return PresetType::default_preset; + } + else if(type == "quality") + { + return PresetType::quality; + } + else if(type == "highest_quality") + { + return PresetType::highest_quality; + } + throw InvalidParameterException("Illegal option: " + type); + return PresetType::UNDEFINED; +} + +Objective objectiveFromString(const std::string &obj) +{ + if(obj == "cut") + { + return Objective::cut; + } + else if(obj == "km1") + { + return Objective::km1; + } + else if(obj == "soed") + { + return Objective::soed; + } + else if(obj == "steiner_tree") + { + return Objective::steiner_tree; + } + throw InvalidParameterException("No valid objective function."); + return Objective::UNDEFINED; +} + +LouvainEdgeWeight louvainEdgeWeightFromString(const std::string &type) +{ + if(type == "hybrid") + { + return LouvainEdgeWeight::hybrid; + } + else if(type == "uniform") + { + return LouvainEdgeWeight::uniform; + } + else if(type == "non_uniform") + { + return LouvainEdgeWeight::non_uniform; + } + else if(type == "degree") + { + return LouvainEdgeWeight::degree; + } + throw InvalidParameterException("No valid louvain edge weight."); + return LouvainEdgeWeight::UNDEFINED; +} + +SimiliarNetCombinerStrategy similiarNetCombinerStrategyFromString(const std::string &type) +{ + if(type == "union") + { + return SimiliarNetCombinerStrategy::union_nets; + } + else if(type == "max_size") + { + return SimiliarNetCombinerStrategy::max_size; + } + else if(type == "importance") + { + return SimiliarNetCombinerStrategy::importance; + } + throw InvalidParameterException("No valid similiar net unifier strategy."); + return SimiliarNetCombinerStrategy::UNDEFINED; +} + +CoarseningAlgorithm coarseningAlgorithmFromString(const std::string &type) +{ + if(type == "multilevel_coarsener") + { + return CoarseningAlgorithm::multilevel_coarsener; + } + else if(type == "nlevel_coarsener") + { + return CoarseningAlgorithm::nlevel_coarsener; + } + else if(type == "deterministic_multilevel_coarsener") + { + return CoarseningAlgorithm::deterministic_multilevel_coarsener; + } + throw InvalidParameterException("Illegal option: " + type); + return CoarseningAlgorithm::UNDEFINED; +} + +HeavyNodePenaltyPolicy heavyNodePenaltyFromString(const std::string &penalty) +{ + if(penalty == "no_penalty") + { + return HeavyNodePenaltyPolicy::no_penalty; + } +#ifdef KAHYPAR_ENABLE_EXPERIMENTAL_FEATURES + else if(penalty == "multiplicative") + { + return HeavyNodePenaltyPolicy::multiplicative_penalty; + } + else if(penalty == "additive") + { + return HeavyNodePenaltyPolicy::additive; + // omit default case to trigger compiler warning for missing cases + } +#endif + throw InvalidParameterException("No valid edge penalty policy for rating."); + return HeavyNodePenaltyPolicy::UNDEFINED; +} + +AcceptancePolicy acceptanceCriterionFromString(const std::string &crit) +{ + if(crit == "best_prefer_unmatched") + { + return AcceptancePolicy::best_prefer_unmatched; + } +#ifdef KAHYPAR_ENABLE_EXPERIMENTAL_FEATURES + else if(crit == "best") + { + return AcceptancePolicy::best; + } +#endif + throw InvalidParameterException("No valid acceptance criterion for rating."); +} + +RatingFunction ratingFunctionFromString(const std::string &function) +{ + if(function == "heavy_edge") + { + return RatingFunction::heavy_edge; + } +#ifdef KAHYPAR_ENABLE_EXPERIMENTAL_FEATURES + else if(function == "sameness") + { + return RatingFunction::sameness; + } +#endif + throw InvalidParameterException("No valid rating function for rating."); + return RatingFunction::UNDEFINED; +} + +InitialPartitioningAlgorithm +initialPartitioningAlgorithmFromString(const std::string &algo) +{ + if(algo == "random") + { + return InitialPartitioningAlgorithm::random; + } + else if(algo == "bfs") + { + return InitialPartitioningAlgorithm::bfs; + } + else if(algo == "greedy_round_robin_fm") + { + return InitialPartitioningAlgorithm::greedy_round_robin_fm; + } + else if(algo == "greedy_global_fm") + { + return InitialPartitioningAlgorithm::greedy_global_fm; + } + else if(algo == "greedy_sequential_fm") + { + return InitialPartitioningAlgorithm::greedy_sequential_fm; + } + else if(algo == "greedy_round_robin_max_net") + { + return InitialPartitioningAlgorithm::greedy_round_robin_max_net; + } + else if(algo == "greedy_global_max_net") + { + return InitialPartitioningAlgorithm::greedy_global_max_net; + } + else if(algo == "greedy_sequential_max_net") + { + return InitialPartitioningAlgorithm::greedy_sequential_max_net; + } + else if(algo == "label_propagation") + { + return InitialPartitioningAlgorithm::label_propagation; + } + throw InvalidParameterException("Illegal option: " + algo); + return InitialPartitioningAlgorithm::UNDEFINED; +} + +LabelPropagationAlgorithm labelPropagationAlgorithmFromString(const std::string &type) +{ + if(type == "label_propagation") + { + return LabelPropagationAlgorithm::label_propagation; + } + else if(type == "deterministic") + { + return LabelPropagationAlgorithm::deterministic; + } + else if(type == "do_nothing") + { return LabelPropagationAlgorithm::do_nothing; } + throw InvalidParameterException("Illegal option: " + type); + return LabelPropagationAlgorithm::do_nothing; +} - FMAlgorithm fmAlgorithmFromString(const std::string& type) { - if (type == "kway_fm") { - return FMAlgorithm::kway_fm; - } else if (type == "unconstrained_fm") { - return FMAlgorithm::unconstrained_fm; - } else if (type == "do_nothing") { - return FMAlgorithm::do_nothing; - } - throw InvalidParameterException("Illegal option: " + type); +FMAlgorithm fmAlgorithmFromString(const std::string &type) +{ + if(type == "kway_fm") + { + return FMAlgorithm::kway_fm; + } + else if(type == "unconstrained_fm") + { + return FMAlgorithm::unconstrained_fm; + } + else if(type == "do_nothing") + { return FMAlgorithm::do_nothing; } + throw InvalidParameterException("Illegal option: " + type); + return FMAlgorithm::do_nothing; +} - FlowAlgorithm flowAlgorithmFromString(const std::string& type) { - if (type == "flow_cutter") { - return FlowAlgorithm::flow_cutter; - } else if (type == "do_nothing") { - return FlowAlgorithm::do_nothing; - } - throw InvalidParameterException("Illegal option: " + type); +FlowAlgorithm flowAlgorithmFromString(const std::string &type) +{ + if(type == "flow_cutter") + { + return FlowAlgorithm::flow_cutter; + } + else if(type == "do_nothing") + { return FlowAlgorithm::do_nothing; } + throw InvalidParameterException("Illegal option: " + type); + return FlowAlgorithm::do_nothing; +} - RebalancingAlgorithm rebalancingAlgorithmFromString(const std::string& type) { - if (type == "simple_rebalancer") { - return RebalancingAlgorithm::simple_rebalancer; - } else if (type == "advanced_rebalancer") { - return RebalancingAlgorithm::advanced_rebalancer; - } else if (type == "do_nothing") { - return RebalancingAlgorithm::do_nothing; - } - throw InvalidParameterException("Illegal option: " + type); +RebalancingAlgorithm rebalancingAlgorithmFromString(const std::string &type) +{ + if(type == "simple_rebalancer") + { + return RebalancingAlgorithm::simple_rebalancer; + } + else if(type == "advanced_rebalancer") + { + return RebalancingAlgorithm::advanced_rebalancer; + } + else if(type == "do_nothing") + { return RebalancingAlgorithm::do_nothing; } + throw InvalidParameterException("Illegal option: " + type); + return RebalancingAlgorithm::do_nothing; +} - OneToOneMappingStrategy oneToOneMappingStrategyFromString(const std::string& type) { - if (type == "greedy_mapping") { - return OneToOneMappingStrategy::greedy_mapping; - } else if (type == "identity") { - return OneToOneMappingStrategy::identity; - } - throw InvalidParameterException("Illegal option: " + type); +OneToOneMappingStrategy oneToOneMappingStrategyFromString(const std::string &type) +{ + if(type == "greedy_mapping") + { + return OneToOneMappingStrategy::greedy_mapping; + } + else if(type == "identity") + { return OneToOneMappingStrategy::identity; } + throw InvalidParameterException("Illegal option: " + type); + return OneToOneMappingStrategy::identity; +} - SteinerTreeFlowValuePolicy steinerTreeFlowValuePolicyFromString(const std::string& policy) { - if (policy == "lower_bound") { - return SteinerTreeFlowValuePolicy::lower_bound; - } else if (policy == "upper_bound") { - return SteinerTreeFlowValuePolicy::upper_bound; - } - throw InvalidParameterException("Illegal option: " + policy); - return SteinerTreeFlowValuePolicy::UNDEFINED; +SteinerTreeFlowValuePolicy steinerTreeFlowValuePolicyFromString(const std::string &policy) +{ + if(policy == "lower_bound") + { + return SteinerTreeFlowValuePolicy::lower_bound; + } + else if(policy == "upper_bound") + { + return SteinerTreeFlowValuePolicy::upper_bound; } + throw InvalidParameterException("Illegal option: " + policy); + return SteinerTreeFlowValuePolicy::UNDEFINED; +} } diff --git a/mt-kahypar/partition/context_enum_classes.h b/mt-kahypar/partition/context_enum_classes.h index 3d4aefdda..3b7acaa7e 100644 --- a/mt-kahypar/partition/context_enum_classes.h +++ b/mt-kahypar/partition/context_enum_classes.h @@ -27,34 +27,38 @@ #pragma once +#include #include #include -#include #include "include/libmtkahypartypes.h" #include "mt-kahypar/macros.h" namespace mt_kahypar { -enum class Type : int8_t { +enum class Type : int8_t +{ Unweighted = 0, EdgeWeights = 1, NodeWeights = 10, EdgeAndNodeWeights = 11, }; -enum class FileFormat : int8_t { +enum class FileFormat : int8_t +{ hMetis = 0, Metis = 1, }; -enum class InstanceType : int8_t { +enum class InstanceType : int8_t +{ graph = 0, hypergraph = 1, UNDEFINED = 2 }; -enum class PresetType : int8_t { +enum class PresetType : int8_t +{ deterministic, large_k, default_preset, @@ -63,19 +67,22 @@ enum class PresetType : int8_t { UNDEFINED }; -enum class ContextType : bool { +enum class ContextType : bool +{ main, initial_partitioning }; -enum class Mode : uint8_t { +enum class Mode : uint8_t +{ recursive_bipartitioning, direct, deep_multilevel, UNDEFINED }; -enum class Objective : uint8_t { +enum class Objective : uint8_t +{ cut, km1, soed, @@ -83,7 +90,8 @@ enum class Objective : uint8_t { UNDEFINED }; -enum class GainPolicy : uint8_t { +enum class GainPolicy : uint8_t +{ km1, cut, soed, @@ -93,7 +101,8 @@ enum class GainPolicy : uint8_t { none }; -enum class LouvainEdgeWeight : uint8_t { +enum class LouvainEdgeWeight : uint8_t +{ hybrid, uniform, non_uniform, @@ -101,40 +110,43 @@ enum class LouvainEdgeWeight : uint8_t { UNDEFINED }; -enum class SimiliarNetCombinerStrategy : uint8_t { +enum class SimiliarNetCombinerStrategy : uint8_t +{ union_nets, max_size, importance, UNDEFINED }; -enum class CoarseningAlgorithm : uint8_t { +enum class CoarseningAlgorithm : uint8_t +{ multilevel_coarsener, deterministic_multilevel_coarsener, nlevel_coarsener, UNDEFINED }; -enum class RatingFunction : uint8_t { +enum class RatingFunction : uint8_t +{ heavy_edge, - ENABLE_EXPERIMENTAL_FEATURES(sameness COMMA) - UNDEFINED + ENABLE_EXPERIMENTAL_FEATURES(sameness COMMA) UNDEFINED }; -enum class HeavyNodePenaltyPolicy : uint8_t { +enum class HeavyNodePenaltyPolicy : uint8_t +{ no_penalty, ENABLE_EXPERIMENTAL_FEATURES(multiplicative_penalty COMMA) - ENABLE_EXPERIMENTAL_FEATURES(additive COMMA) - UNDEFINED + ENABLE_EXPERIMENTAL_FEATURES(additive COMMA) UNDEFINED }; -enum class AcceptancePolicy : uint8_t { - ENABLE_EXPERIMENTAL_FEATURES(best COMMA) - best_prefer_unmatched, +enum class AcceptancePolicy : uint8_t +{ + ENABLE_EXPERIMENTAL_FEATURES(best COMMA) best_prefer_unmatched, UNDEFINED }; -enum class InitialPartitioningAlgorithm : uint8_t { +enum class InitialPartitioningAlgorithm : uint8_t +{ greedy_round_robin_fm = 0, greedy_global_fm = 1, greedy_sequential_fm = 2, @@ -147,117 +159,126 @@ enum class InitialPartitioningAlgorithm : uint8_t { UNDEFINED = 9 }; -enum class LabelPropagationAlgorithm : uint8_t { +enum class LabelPropagationAlgorithm : uint8_t +{ label_propagation, deterministic, do_nothing }; -enum class FMAlgorithm : uint8_t { +enum class FMAlgorithm : uint8_t +{ kway_fm, unconstrained_fm, do_nothing }; -enum class FlowAlgorithm : uint8_t { +enum class FlowAlgorithm : uint8_t +{ flow_cutter, mock, do_nothing }; -enum class RebalancingAlgorithm : uint8_t { +enum class RebalancingAlgorithm : uint8_t +{ simple_rebalancer, advanced_rebalancer, do_nothing }; -enum class OneToOneMappingStrategy : uint8_t { +enum class OneToOneMappingStrategy : uint8_t +{ greedy_mapping, identity }; -enum class SteinerTreeFlowValuePolicy : uint8_t { +enum class SteinerTreeFlowValuePolicy : uint8_t +{ lower_bound, upper_bound, UNDEFINED }; -std::ostream & operator<< (std::ostream& os, const Type& type); +std::ostream &operator<<(std::ostream &os, const Type &type); -std::ostream & operator<< (std::ostream& os, const FileFormat& type); +std::ostream &operator<<(std::ostream &os, const FileFormat &type); -std::ostream & operator<< (std::ostream& os, const InstanceType& type); +std::ostream &operator<<(std::ostream &os, const InstanceType &type); -std::ostream & operator<< (std::ostream& os, const PresetType& type); +std::ostream &operator<<(std::ostream &os, const PresetType &type); -std::ostream & operator<< (std::ostream& os, const mt_kahypar_partition_type_t& type); +std::ostream &operator<<(std::ostream &os, const mt_kahypar_partition_type_t &type); -std::ostream & operator<< (std::ostream& os, const ContextType& type); +std::ostream &operator<<(std::ostream &os, const ContextType &type); -std::ostream & operator<< (std::ostream& os, const Mode& mode); +std::ostream &operator<<(std::ostream &os, const Mode &mode); -std::ostream & operator<< (std::ostream& os, const Objective& objective); +std::ostream &operator<<(std::ostream &os, const Objective &objective); -std::ostream & operator<< (std::ostream& os, const GainPolicy& type); +std::ostream &operator<<(std::ostream &os, const GainPolicy &type); -std::ostream & operator<< (std::ostream& os, const LouvainEdgeWeight& type); +std::ostream &operator<<(std::ostream &os, const LouvainEdgeWeight &type); -std::ostream & operator<< (std::ostream& os, const SimiliarNetCombinerStrategy& strategy); +std::ostream &operator<<(std::ostream &os, const SimiliarNetCombinerStrategy &strategy); -std::ostream & operator<< (std::ostream& os, const CoarseningAlgorithm& algo); +std::ostream &operator<<(std::ostream &os, const CoarseningAlgorithm &algo); -std::ostream & operator<< (std::ostream& os, const HeavyNodePenaltyPolicy& heavy_hn_policy); +std::ostream &operator<<(std::ostream &os, const HeavyNodePenaltyPolicy &heavy_hn_policy); -std::ostream & operator<< (std::ostream& os, const AcceptancePolicy& acceptance_policy); +std::ostream &operator<<(std::ostream &os, const AcceptancePolicy &acceptance_policy); -std::ostream & operator<< (std::ostream& os, const RatingFunction& func); +std::ostream &operator<<(std::ostream &os, const RatingFunction &func); -std::ostream & operator<< (std::ostream& os, const InitialPartitioningAlgorithm& algo); +std::ostream &operator<<(std::ostream &os, const InitialPartitioningAlgorithm &algo); -std::ostream & operator<< (std::ostream& os, const LabelPropagationAlgorithm& algo); +std::ostream &operator<<(std::ostream &os, const LabelPropagationAlgorithm &algo); -std::ostream & operator<< (std::ostream& os, const FMAlgorithm& algo); +std::ostream &operator<<(std::ostream &os, const FMAlgorithm &algo); -std::ostream & operator<< (std::ostream& os, const FlowAlgorithm& algo); +std::ostream &operator<<(std::ostream &os, const FlowAlgorithm &algo); -std::ostream & operator<< (std::ostream& os, const RebalancingAlgorithm& algo); +std::ostream &operator<<(std::ostream &os, const RebalancingAlgorithm &algo); -std::ostream & operator<< (std::ostream& os, const OneToOneMappingStrategy& algo); +std::ostream &operator<<(std::ostream &os, const OneToOneMappingStrategy &algo); -std::ostream & operator<< (std::ostream& os, const SteinerTreeFlowValuePolicy& policy); +std::ostream &operator<<(std::ostream &os, const SteinerTreeFlowValuePolicy &policy); -Mode modeFromString(const std::string& mode); +Mode modeFromString(const std::string &mode); -InstanceType instanceTypeFromString(const std::string& type); +InstanceType instanceTypeFromString(const std::string &type); -PresetType presetTypeFromString(const std::string& type); +PresetType presetTypeFromString(const std::string &type); -Objective objectiveFromString(const std::string& obj); +Objective objectiveFromString(const std::string &obj); -LouvainEdgeWeight louvainEdgeWeightFromString(const std::string& type); +LouvainEdgeWeight louvainEdgeWeightFromString(const std::string &type); -SimiliarNetCombinerStrategy similiarNetCombinerStrategyFromString(const std::string& type); +SimiliarNetCombinerStrategy +similiarNetCombinerStrategyFromString(const std::string &type); -CoarseningAlgorithm coarseningAlgorithmFromString(const std::string& type); +CoarseningAlgorithm coarseningAlgorithmFromString(const std::string &type); -HeavyNodePenaltyPolicy heavyNodePenaltyFromString(const std::string& penalty); +HeavyNodePenaltyPolicy heavyNodePenaltyFromString(const std::string &penalty); -AcceptancePolicy acceptanceCriterionFromString(const std::string& crit); +AcceptancePolicy acceptanceCriterionFromString(const std::string &crit); -RatingFunction ratingFunctionFromString(const std::string& function); +RatingFunction ratingFunctionFromString(const std::string &function); -InitialPartitioningAlgorithm initialPartitioningAlgorithmFromString(const std::string& algo); +InitialPartitioningAlgorithm +initialPartitioningAlgorithmFromString(const std::string &algo); -LabelPropagationAlgorithm labelPropagationAlgorithmFromString(const std::string& type); +LabelPropagationAlgorithm labelPropagationAlgorithmFromString(const std::string &type); -FMAlgorithm fmAlgorithmFromString(const std::string& type); +FMAlgorithm fmAlgorithmFromString(const std::string &type); -FlowAlgorithm flowAlgorithmFromString(const std::string& type); +FlowAlgorithm flowAlgorithmFromString(const std::string &type); -RebalancingAlgorithm rebalancingAlgorithmFromString(const std::string& type); +RebalancingAlgorithm rebalancingAlgorithmFromString(const std::string &type); -OneToOneMappingStrategy oneToOneMappingStrategyFromString(const std::string& type); +OneToOneMappingStrategy oneToOneMappingStrategyFromString(const std::string &type); -SteinerTreeFlowValuePolicy steinerTreeFlowValuePolicyFromString(const std::string& policy); +SteinerTreeFlowValuePolicy +steinerTreeFlowValuePolicyFromString(const std::string &policy); -} // namesapce mt_kahypar +} // namesapce mt_kahypar diff --git a/mt-kahypar/partition/conversion.cpp b/mt-kahypar/partition/conversion.cpp index 81362bfd0..9b874e404 100644 --- a/mt-kahypar/partition/conversion.cpp +++ b/mt-kahypar/partition/conversion.cpp @@ -24,7 +24,6 @@ * SOFTWARE. ******************************************************************************/ - #include "mt-kahypar/partition/conversion.h" #include "mt-kahypar/macros.h" @@ -33,85 +32,121 @@ namespace mt_kahypar { mt_kahypar_hypergraph_type_t to_hypergraph_c_type(const PresetType preset, - const InstanceType instance) { - if ( instance == InstanceType::hypergraph ) { - switch ( preset ) { - case PresetType::deterministic: - case PresetType::large_k: - case PresetType::default_preset: - case PresetType::quality: return STATIC_HYPERGRAPH; - case PresetType::highest_quality: return DYNAMIC_HYPERGRAPH; - case PresetType::UNDEFINED: throw InvalidParameterException("Unknown preset type!"); + const InstanceType instance) +{ + if(instance == InstanceType::hypergraph) + { + switch(preset) + { + case PresetType::deterministic: + case PresetType::large_k: + case PresetType::default_preset: + case PresetType::quality: + return STATIC_HYPERGRAPH; + case PresetType::highest_quality: + return DYNAMIC_HYPERGRAPH; + case PresetType::UNDEFINED: + throw InvalidParameterException("Unknown preset type!"); } } - else if ( instance == InstanceType::graph ) { - switch ( preset ) { - case PresetType::deterministic: - case PresetType::large_k: - case PresetType::default_preset: - case PresetType::quality: return STATIC_GRAPH; - case PresetType::highest_quality: return DYNAMIC_GRAPH; - case PresetType::UNDEFINED: throw InvalidParameterException("Unknown preset type!"); + else if(instance == InstanceType::graph) + { + switch(preset) + { + case PresetType::deterministic: + case PresetType::large_k: + case PresetType::default_preset: + case PresetType::quality: + return STATIC_GRAPH; + case PresetType::highest_quality: + return DYNAMIC_GRAPH; + case PresetType::UNDEFINED: + throw InvalidParameterException("Unknown preset type!"); } } - else { - throw InvalidParameterException("Unknown instance type. Should be either graph or hypergraph"); + else + { + throw InvalidParameterException( + "Unknown instance type. Should be either graph or hypergraph"); } return NULLPTR_HYPERGRAPH; } mt_kahypar_partition_type_t to_partition_c_type(const PresetType preset, - const InstanceType instance) { - if ( instance == InstanceType::graph ) { - if ( preset == PresetType::default_preset || - preset == PresetType::quality || - preset == PresetType::large_k || - preset == PresetType::deterministic ) { + const InstanceType instance) +{ + if(instance == InstanceType::graph) + { + if(preset == PresetType::default_preset || preset == PresetType::quality || + preset == PresetType::large_k || preset == PresetType::deterministic) + { return MULTILEVEL_GRAPH_PARTITIONING; - } else if ( preset == PresetType::highest_quality ) { + } + else if(preset == PresetType::highest_quality) + { return N_LEVEL_GRAPH_PARTITIONING; } - } else if ( instance == InstanceType::hypergraph ) { - if ( preset == PresetType::default_preset || - preset == PresetType::quality || - preset == PresetType::deterministic ) { + } + else if(instance == InstanceType::hypergraph) + { + if(preset == PresetType::default_preset || preset == PresetType::quality || + preset == PresetType::deterministic) + { return MULTILEVEL_HYPERGRAPH_PARTITIONING; - } else if ( preset == PresetType::highest_quality ) { + } + else if(preset == PresetType::highest_quality) + { return N_LEVEL_HYPERGRAPH_PARTITIONING; - } else if ( preset == PresetType::large_k ) { + } + else if(preset == PresetType::large_k) + { return LARGE_K_PARTITIONING; } } return NULLPTR_PARTITION; } -PresetType to_preset_type(const Mode mode, - const PartitionID k, +PresetType to_preset_type(const Mode mode, const PartitionID k, const CoarseningAlgorithm coarsening_algo, - const FlowAlgorithm flow_algo) { - if ( coarsening_algo == CoarseningAlgorithm::deterministic_multilevel_coarsener ) { + const FlowAlgorithm flow_algo) +{ + if(coarsening_algo == CoarseningAlgorithm::deterministic_multilevel_coarsener) + { return PresetType::deterministic; - } else if ( mode == Mode::deep_multilevel && k >= 1024 ) { + } + else if(mode == Mode::deep_multilevel && k >= 1024) + { return PresetType::large_k; - } else if ( coarsening_algo == CoarseningAlgorithm::multilevel_coarsener ) { - if ( flow_algo == FlowAlgorithm::flow_cutter ) { + } + else if(coarsening_algo == CoarseningAlgorithm::multilevel_coarsener) + { + if(flow_algo == FlowAlgorithm::flow_cutter) + { return PresetType::quality; - } else { + } + else + { return PresetType::default_preset; } - } else if ( coarsening_algo == CoarseningAlgorithm::nlevel_coarsener ) { + } + else if(coarsening_algo == CoarseningAlgorithm::nlevel_coarsener) + { return PresetType::highest_quality; } return PresetType::UNDEFINED; } -InstanceType to_instance_type(const FileFormat format) { - if ( format == FileFormat::Metis ) { +InstanceType to_instance_type(const FileFormat format) +{ + if(format == FileFormat::Metis) + { return InstanceType::graph; - } else if ( format == FileFormat::hMetis ) { + } + else if(format == FileFormat::hMetis) + { return InstanceType::hypergraph; } return InstanceType::UNDEFINED; } -} // namespace mt_kahypar +} // namespace mt_kahypar diff --git a/mt-kahypar/partition/conversion.h b/mt-kahypar/partition/conversion.h index 19b683da7..bc62f7cc2 100644 --- a/mt-kahypar/partition/conversion.h +++ b/mt-kahypar/partition/conversion.h @@ -28,8 +28,8 @@ #include "include/libmtkahypartypes.h" -#include "mt-kahypar/partition/context_enum_classes.h" #include "mt-kahypar/datastructures/hypergraph_common.h" +#include "mt-kahypar/partition/context_enum_classes.h" namespace mt_kahypar { @@ -39,11 +39,10 @@ mt_kahypar_hypergraph_type_t to_hypergraph_c_type(const PresetType preset, mt_kahypar_partition_type_t to_partition_c_type(const PresetType preset, const InstanceType instance); -PresetType to_preset_type(const Mode mode, - const PartitionID k, +PresetType to_preset_type(const Mode mode, const PartitionID k, const CoarseningAlgorithm coarsening_algo, const FlowAlgorithm flow_algo); InstanceType to_instance_type(const FileFormat format); -} // namespace mt_kahypar +} // namespace mt_kahypar diff --git a/mt-kahypar/partition/deep_multilevel.cpp b/mt-kahypar/partition/deep_multilevel.cpp index d200a496b..620c621e5 100644 --- a/mt-kahypar/partition/deep_multilevel.cpp +++ b/mt-kahypar/partition/deep_multilevel.cpp @@ -34,18 +34,18 @@ #include "tbb/parallel_for.h" #include "mt-kahypar/definitions.h" +#include "mt-kahypar/io/partitioning_output.h" #include "mt-kahypar/macros.h" -#include "mt-kahypar/partition/metrics.h" -#include "mt-kahypar/partition/multilevel.h" #include "mt-kahypar/partition/coarsening/coarsening_commons.h" #include "mt-kahypar/partition/coarsening/multilevel_uncoarsener.h" #include "mt-kahypar/partition/coarsening/nlevel_uncoarsener.h" -#include "mt-kahypar/partition/refinement/gains/gain_cache_ptr.h" +#include "mt-kahypar/partition/metrics.h" +#include "mt-kahypar/partition/multilevel.h" #include "mt-kahypar/partition/refinement/gains/bipartitioning_policy.h" -#include "mt-kahypar/utils/utilities.h" -#include "mt-kahypar/utils/timer.h" +#include "mt-kahypar/partition/refinement/gains/gain_cache_ptr.h" #include "mt-kahypar/utils/progress_bar.h" -#include "mt-kahypar/io/partitioning_output.h" +#include "mt-kahypar/utils/timer.h" +#include "mt-kahypar/utils/utilities.h" namespace mt_kahypar { @@ -54,8 +54,9 @@ namespace { static constexpr bool enable_heavy_assert = false; static constexpr bool debug = false; -template -struct DeepPartitioningResult { +template +struct DeepPartitioningResult +{ using Hypergraph = typename TypeTraits::Hypergraph; using PartitionedHypergraph = typename TypeTraits::PartitionedHypergraph; @@ -65,25 +66,32 @@ struct DeepPartitioningResult { bool valid = false; }; -struct OriginalHypergraphInfo { +struct OriginalHypergraphInfo +{ - // The initial allowed imbalance cannot be used for each bipartition as this could result in an - // imbalanced k-way partition when performing recursive bipartitioning. We therefore adaptively - // adjust the allowed imbalance for each bipartition individually based on the adaptive imbalance - // definition described in our papers. + // The initial allowed imbalance cannot be used for each bipartition as this could + // result in an imbalanced k-way partition when performing recursive bipartitioning. We + // therefore adaptively adjust the allowed imbalance for each bipartition individually + // based on the adaptive imbalance definition described in our papers. double computeAdaptiveEpsilon(const HypernodeWeight current_hypergraph_weight, - const PartitionID current_k) const { - if ( current_hypergraph_weight == 0 ) { + const PartitionID current_k) const + { + if(current_hypergraph_weight == 0) + { // In recursive bipartitioning, it can happen that a block becomes too light that // all nodes of the block fit into one block in a subsequent bipartitioning step. // This will create an empty block, which we fix later in a rebalancing step. return 0.0; - } else { - double base = ceil(static_cast(original_hypergraph_weight) / original_k) - / ceil(static_cast(current_hypergraph_weight) / current_k) - * (1.0 + original_epsilon); - double adaptive_epsilon = std::min(0.99, std::max(std::pow(base, 1.0 / - ceil(log2(static_cast(current_k)))) - 1.0,0.0)); + } + else + { + double base = ceil(static_cast(original_hypergraph_weight) / original_k) / + ceil(static_cast(current_hypergraph_weight) / current_k) * + (1.0 + original_epsilon); + double adaptive_epsilon = std::min( + 0.99, + std::max(std::pow(base, 1.0 / ceil(log2(static_cast(current_k)))) - 1.0, + 0.0)); return adaptive_epsilon; } } @@ -93,21 +101,21 @@ struct OriginalHypergraphInfo { const double original_epsilon; }; -// During uncoarsening in the deep multilevel scheme, we recursively bipartition each block of the -// partition until we reach the desired number of blocks. The recursive bipartitioning tree (RBTree) -// contains for each partition information in how many blocks we have to further bipartition each block, -// the range of block IDs in the final partition of each block, and the perfectly balanced and maximum -// allowed block weight for each block. -class RBTree { - - public: - explicit RBTree(const Context& context) : - _contraction_limit_multiplier(context.coarsening.contraction_limit_multiplier), - _desired_blocks(), - _target_blocks(), - _perfectly_balanced_weights(), - _max_part_weights(), - _partition_to_level() { +// During uncoarsening in the deep multilevel scheme, we recursively bipartition each +// block of the partition until we reach the desired number of blocks. The recursive +// bipartitioning tree (RBTree) contains for each partition information in how many blocks +// we have to further bipartition each block, the range of block IDs in the final +// partition of each block, and the perfectly balanced and maximum allowed block weight +// for each block. +class RBTree +{ + +public: + explicit RBTree(const Context &context) : + _contraction_limit_multiplier(context.coarsening.contraction_limit_multiplier), + _desired_blocks(), _target_blocks(), _perfectly_balanced_weights(), + _max_part_weights(), _partition_to_level() + { _desired_blocks.emplace_back(); _desired_blocks[0].push_back(context.partition.k); _target_blocks.emplace_back(); @@ -115,106 +123,128 @@ class RBTree { _target_blocks[0].push_back(context.partition.k); _perfectly_balanced_weights.emplace_back(); _perfectly_balanced_weights[0].push_back( - std::accumulate(context.partition.perfect_balance_part_weights.cbegin(), - context.partition.perfect_balance_part_weights.cend(), 0)); + std::accumulate(context.partition.perfect_balance_part_weights.cbegin(), + context.partition.perfect_balance_part_weights.cend(), 0)); _max_part_weights.emplace_back(); _max_part_weights[0].push_back( - std::accumulate(context.partition.max_part_weights.cbegin(), - context.partition.max_part_weights.cend(), 0)); + std::accumulate(context.partition.max_part_weights.cbegin(), + context.partition.max_part_weights.cend(), 0)); precomputeRBTree(context); } - PartitionID nextK(const PartitionID k) const { + PartitionID nextK(const PartitionID k) const + { const PartitionID original_k = _desired_blocks[0][0]; - if ( k < original_k && k != kInvalidPartition ) { + if(k < original_k && k != kInvalidPartition) + { ASSERT(_partition_to_level.count(k) > 0); const size_t level = _partition_to_level.at(k); - if ( level + 1 < _desired_blocks.size() ) { + if(level + 1 < _desired_blocks.size()) + { return _desired_blocks[level + 1].size(); - } else { + } + else + { return original_k; } - } else { + } + else + { return kInvalidPartition; } } PartitionID desiredNumberOfBlocks(const PartitionID current_k, - const PartitionID block) const { + const PartitionID block) const + { ASSERT(_partition_to_level.count(current_k) > 0); ASSERT(block < current_k); return _desired_blocks[_partition_to_level.at(current_k)][block]; } - std::pair targetBlocksInFinalPartition(const PartitionID current_k, - const PartitionID block) const { + std::pair + targetBlocksInFinalPartition(const PartitionID current_k, const PartitionID block) const + { ASSERT(_partition_to_level.count(current_k) > 0); ASSERT(block < current_k); - const vec& target_blocks = - _target_blocks[_partition_to_level.at(current_k)]; + const vec &target_blocks = + _target_blocks[_partition_to_level.at(current_k)]; return std::make_pair(target_blocks[block], target_blocks[block + 1]); } HypernodeWeight perfectlyBalancedWeight(const PartitionID current_k, - const PartitionID block) const { + const PartitionID block) const + { ASSERT(_partition_to_level.count(current_k) > 0); ASSERT(block < current_k); return _perfectly_balanced_weights[_partition_to_level.at(current_k)][block]; } - const std::vector& perfectlyBalancedWeightVector(const PartitionID current_k) const { + const std::vector & + perfectlyBalancedWeightVector(const PartitionID current_k) const + { ASSERT(_partition_to_level.count(current_k) > 0); return _perfectly_balanced_weights[_partition_to_level.at(current_k)]; } HypernodeWeight maxPartWeight(const PartitionID current_k, - const PartitionID block) const { + const PartitionID block) const + { ASSERT(_partition_to_level.count(current_k) > 0); ASSERT(block < current_k); return _max_part_weights[_partition_to_level.at(current_k)][block]; } - const std::vector& maxPartWeightVector(const PartitionID current_k) const { + const std::vector & + maxPartWeightVector(const PartitionID current_k) const + { ASSERT(_partition_to_level.count(current_k) > 0); return _max_part_weights[_partition_to_level.at(current_k)]; } - PartitionID get_maximum_number_of_blocks(const HypernodeID current_num_nodes) const { + PartitionID get_maximum_number_of_blocks(const HypernodeID current_num_nodes) const + { const int num_levels = _desired_blocks.size(); - for ( int i = num_levels - 1; i >= 0; --i ) { + for(int i = num_levels - 1; i >= 0; --i) + { const PartitionID k = _desired_blocks[i].size(); - if ( current_num_nodes >= k * _contraction_limit_multiplier ) { + if(current_num_nodes >= k * _contraction_limit_multiplier) + { return k; } } return _desired_blocks.back().size(); } - void printRBTree() const { - for ( size_t level = 0; level < _desired_blocks.size(); ++level ) { + void printRBTree() const + { + for(size_t level = 0; level < _desired_blocks.size(); ++level) + { std::cout << "Level " << (level + 1) << std::endl; - for ( size_t i = 0; i < _desired_blocks[level].size(); ++i) { - std::cout << "(" << _desired_blocks[level][i] - << ", [" << _target_blocks[level][i] << "," << _target_blocks[level][i + 1] << "]" - << ", " << _perfectly_balanced_weights[level][i] - << ", " << _max_part_weights[level][i] << ") "; + for(size_t i = 0; i < _desired_blocks[level].size(); ++i) + { + std::cout << "(" << _desired_blocks[level][i] << ", [" << _target_blocks[level][i] + << "," << _target_blocks[level][i + 1] << "]" + << ", " << _perfectly_balanced_weights[level][i] << ", " + << _max_part_weights[level][i] << ") "; } std::cout << std::endl; } } - private: - void precomputeRBTree(const Context& context) { +private: + void precomputeRBTree(const Context &context) + { auto add_block = [&](const PartitionID k) { const PartitionID start = _target_blocks.back().back(); _desired_blocks.back().push_back(k); _target_blocks.back().push_back(start + k); const HypernodeWeight perfect_part_weight = std::accumulate( - context.partition.perfect_balance_part_weights.cbegin() + start, - context.partition.perfect_balance_part_weights.cbegin() + start + k, 0); - const HypernodeWeight max_part_weight = std::accumulate( - context.partition.max_part_weights.cbegin() + start, - context.partition.max_part_weights.cbegin() + start + k, 0); + context.partition.perfect_balance_part_weights.cbegin() + start, + context.partition.perfect_balance_part_weights.cbegin() + start + k, 0); + const HypernodeWeight max_part_weight = + std::accumulate(context.partition.max_part_weights.cbegin() + start, + context.partition.max_part_weights.cbegin() + start + k, 0); _perfectly_balanced_weights.back().push_back(perfect_part_weight); _max_part_weights.back().push_back(max_part_weight); }; @@ -222,46 +252,54 @@ class RBTree { int cur_level = 0; bool should_continue = true; // Simulates recursive bipartitioning - while ( should_continue ) { + while(should_continue) + { should_continue = false; _desired_blocks.emplace_back(); _target_blocks.emplace_back(); _target_blocks.back().push_back(0); _perfectly_balanced_weights.emplace_back(); _max_part_weights.emplace_back(); - for ( size_t i = 0; i < _desired_blocks[cur_level].size(); ++i ) { + for(size_t i = 0; i < _desired_blocks[cur_level].size(); ++i) + { const PartitionID k = _desired_blocks[cur_level][i]; - if ( k > 1 ) { + if(k > 1) + { const PartitionID k0 = k / 2 + (k % 2); const PartitionID k1 = k / 2; add_block(k0); add_block(k1); - should_continue |= ( k0 > 1 || k1 > 1 ); - } else { + should_continue |= (k0 > 1 || k1 > 1); + } + else + { add_block(1); } } ++cur_level; } - for ( size_t i = 0; i < _desired_blocks.size(); ++i ) { + for(size_t i = 0; i < _desired_blocks.size(); ++i) + { _partition_to_level[_desired_blocks[i].size()] = i; } } const HypernodeID _contraction_limit_multiplier; - vec> _desired_blocks; - vec> _target_blocks; - vec> _perfectly_balanced_weights; - vec> _max_part_weights; + vec > _desired_blocks; + vec > _target_blocks; + vec > _perfectly_balanced_weights; + vec > _max_part_weights; std::unordered_map _partition_to_level; }; -bool disableTimerAndStats(const Context& context) { +bool disableTimerAndStats(const Context &context) +{ const bool was_enabled_before = - utils::Utilities::instance().getTimer(context.utility_id).isEnabled(); - if ( context.type == ContextType::main ) { - utils::Utilities& utils = utils::Utilities::instance(); + utils::Utilities::instance().getTimer(context.utility_id).isEnabled(); + if(context.type == ContextType::main) + { + utils::Utilities &utils = utils::Utilities::instance(); parallel::MemoryPool::instance().deactivate_unused_memory_allocations(); utils.getTimer(context.utility_id).disable(); utils.getStats(context.utility_id).disable(); @@ -269,38 +307,42 @@ bool disableTimerAndStats(const Context& context) { return was_enabled_before; } -void enableTimerAndStats(const Context& context, const bool was_enabled_before) { - if ( context.type == ContextType::main && was_enabled_before ) { - utils::Utilities& utils = utils::Utilities::instance(); +void enableTimerAndStats(const Context &context, const bool was_enabled_before) +{ + if(context.type == ContextType::main && was_enabled_before) + { + utils::Utilities &utils = utils::Utilities::instance(); parallel::MemoryPool::instance().activate_unused_memory_allocations(); utils.getTimer(context.utility_id).enable(); utils.getStats(context.utility_id).enable(); } } -Context setupBipartitioningContext(const Context& context, - const OriginalHypergraphInfo& info, - const PartitionID start_k, - const PartitionID end_k, +Context setupBipartitioningContext(const Context &context, + const OriginalHypergraphInfo &info, + const PartitionID start_k, const PartitionID end_k, const HypernodeWeight total_weight, - const bool is_graph) { + const bool is_graph) +{ ASSERT(end_k - start_k >= 2); Context b_context(context); b_context.partition.k = 2; b_context.partition.objective = Objective::cut; - b_context.partition.gain_policy = is_graph ? GainPolicy::cut_for_graphs : GainPolicy::cut; + b_context.partition.gain_policy = + is_graph ? GainPolicy::cut_for_graphs : GainPolicy::cut; b_context.partition.verbose_output = false; b_context.initial_partitioning.mode = Mode::direct; b_context.type = ContextType::initial_partitioning; - if ( b_context.coarsening.deep_ml_contraction_limit_multiplier == - std::numeric_limits::max() ) { + if(b_context.coarsening.deep_ml_contraction_limit_multiplier == + std::numeric_limits::max()) + { b_context.coarsening.deep_ml_contraction_limit_multiplier = - b_context.coarsening.contraction_limit_multiplier; + b_context.coarsening.contraction_limit_multiplier; } b_context.coarsening.contraction_limit_multiplier = - b_context.coarsening.deep_ml_contraction_limit_multiplier; + b_context.coarsening.deep_ml_contraction_limit_multiplier; b_context.refinement = b_context.initial_partitioning.refinement; // Setup Part Weights @@ -308,51 +350,67 @@ Context setupBipartitioningContext(const Context& context, const PartitionID k0 = k / 2 + (k % 2 != 0 ? 1 : 0); const PartitionID k1 = k / 2; ASSERT(k0 + k1 == k); - if ( context.partition.use_individual_part_weights ) { - const HypernodeWeight max_part_weights_sum = std::accumulate( - context.partition.max_part_weights.cbegin() + start_k, context.partition.max_part_weights.cbegin() + end_k, 0); - const double weight_fraction = total_weight / static_cast(max_part_weights_sum); + if(context.partition.use_individual_part_weights) + { + const HypernodeWeight max_part_weights_sum = + std::accumulate(context.partition.max_part_weights.cbegin() + start_k, + context.partition.max_part_weights.cbegin() + end_k, 0); + const double weight_fraction = + total_weight / static_cast(max_part_weights_sum); ASSERT(weight_fraction <= 1.0); b_context.partition.perfect_balance_part_weights.clear(); b_context.partition.max_part_weights.clear(); HypernodeWeight perfect_weight_p0 = 0; - for ( PartitionID i = start_k; i < start_k + k0; ++i ) { + for(PartitionID i = start_k; i < start_k + k0; ++i) + { perfect_weight_p0 += ceil(weight_fraction * context.partition.max_part_weights[i]); } HypernodeWeight perfect_weight_p1 = 0; - for ( PartitionID i = start_k + k0; i < end_k; ++i ) { + for(PartitionID i = start_k + k0; i < end_k; ++i) + { perfect_weight_p1 += ceil(weight_fraction * context.partition.max_part_weights[i]); } - // In the case of individual part weights, the usual adaptive epsilon formula is not applicable because it - // assumes equal part weights. However, by observing that ceil(current_weight / current_k) is the current - // perfect part weight and (1 + epsilon)ceil(original_weight / original_k) is the maximum part weight, - // we can derive an equivalent formula using the sum of the perfect part weights and the sum of the - // maximum part weights. - // Note that the sum of the perfect part weights might be unequal to the hypergraph weight due to rounding. - // Thus, we need to use the former instead of using the hypergraph weight directly, as otherwise it could - // happen that (1 + epsilon)perfect_part_weight > max_part_weight because of rounding issues. - const double base = max_part_weights_sum / static_cast(perfect_weight_p0 + perfect_weight_p1); - b_context.partition.epsilon = total_weight == 0 ? 0 : - std::min(0.99, std::max(std::pow(base, 1.0 / ceil(log2(static_cast(k)))) - 1.0,0.0)); + // In the case of individual part weights, the usual adaptive epsilon formula is not + // applicable because it assumes equal part weights. However, by observing that + // ceil(current_weight / current_k) is the current perfect part weight and (1 + + // epsilon)ceil(original_weight / original_k) is the maximum part weight, we can + // derive an equivalent formula using the sum of the perfect part weights and the sum + // of the maximum part weights. Note that the sum of the perfect part weights might be + // unequal to the hypergraph weight due to rounding. Thus, we need to use the former + // instead of using the hypergraph weight directly, as otherwise it could happen that + // (1 + epsilon)perfect_part_weight > max_part_weight because of rounding issues. + const double base = + max_part_weights_sum / static_cast(perfect_weight_p0 + perfect_weight_p1); + b_context.partition.epsilon = + total_weight == 0 ? + 0 : + std::min( + 0.99, + std::max(std::pow(base, 1.0 / ceil(log2(static_cast(k)))) - 1.0, + 0.0)); b_context.partition.perfect_balance_part_weights.push_back(perfect_weight_p0); b_context.partition.perfect_balance_part_weights.push_back(perfect_weight_p1); b_context.partition.max_part_weights.push_back( - round((1 + b_context.partition.epsilon) * perfect_weight_p0)); + round((1 + b_context.partition.epsilon) * perfect_weight_p0)); b_context.partition.max_part_weights.push_back( - round((1 + b_context.partition.epsilon) * perfect_weight_p1)); - } else { + round((1 + b_context.partition.epsilon) * perfect_weight_p1)); + } + else + { b_context.partition.epsilon = info.computeAdaptiveEpsilon(total_weight, k); b_context.partition.perfect_balance_part_weights.clear(); b_context.partition.max_part_weights.clear(); b_context.partition.perfect_balance_part_weights.push_back( - std::ceil(k0 / static_cast(k) * static_cast(total_weight))); + std::ceil(k0 / static_cast(k) * static_cast(total_weight))); b_context.partition.perfect_balance_part_weights.push_back( - std::ceil(k1 / static_cast(k) * static_cast(total_weight))); + std::ceil(k1 / static_cast(k) * static_cast(total_weight))); b_context.partition.max_part_weights.push_back( - (1 + b_context.partition.epsilon) * b_context.partition.perfect_balance_part_weights[0]); + (1 + b_context.partition.epsilon) * + b_context.partition.perfect_balance_part_weights[0]); b_context.partition.max_part_weights.push_back( - (1 + b_context.partition.epsilon) * b_context.partition.perfect_balance_part_weights[1]); + (1 + b_context.partition.epsilon) * + b_context.partition.perfect_balance_part_weights[1]); } b_context.setupContractionLimit(total_weight); b_context.setupThreadsPerFlowSearch(); @@ -360,82 +418,94 @@ Context setupBipartitioningContext(const Context& context, return b_context; } -Context setupDeepMultilevelRecursionContext(const Context& context, - const size_t num_threads) { +Context setupDeepMultilevelRecursionContext(const Context &context, + const size_t num_threads) +{ Context r_context(context); r_context.type = ContextType::initial_partitioning; r_context.partition.verbose_output = false; - const double thread_reduction_factor = static_cast(num_threads) / context.shared_memory.num_threads; + const double thread_reduction_factor = + static_cast(num_threads) / context.shared_memory.num_threads; r_context.shared_memory.num_threads = num_threads; r_context.shared_memory.degree_of_parallelism *= thread_reduction_factor; - r_context.initial_partitioning.runs = std::max( - std::ceil(static_cast(context.initial_partitioning.runs) * - thread_reduction_factor), 1.0); + r_context.initial_partitioning.runs = + std::max(std::ceil(static_cast(context.initial_partitioning.runs) * + thread_reduction_factor), + 1.0); return r_context; } -bool usesAdaptiveWeightOfNonCutEdges(const Context& context) { +bool usesAdaptiveWeightOfNonCutEdges(const Context &context) +{ return BipartitioningPolicy::nonCutEdgeMultiplier(context.partition.gain_policy) != 1; } -template -void adaptWeightsOfNonCutEdges(Hypergraph& hg, - const vec& already_cut, - const GainPolicy gain_policy) { - const HyperedgeWeight multiplier = BipartitioningPolicy::nonCutEdgeMultiplier(gain_policy); - if ( multiplier != 1 ) { +template +void adaptWeightsOfNonCutEdges(Hypergraph &hg, const vec &already_cut, + const GainPolicy gain_policy) +{ + const HyperedgeWeight multiplier = + BipartitioningPolicy::nonCutEdgeMultiplier(gain_policy); + if(multiplier != 1) + { ASSERT(static_cast(hg.initialNumEdges()) <= already_cut.size()); - hg.doParallelForAllEdges([&](const HyperedgeID& he) { - if ( !already_cut[he] ) { + hg.doParallelForAllEdges([&](const HyperedgeID &he) { + if(!already_cut[he]) + { hg.setEdgeWeight(he, multiplier * hg.edgeWeight(he)); } }); } } -template -void printInitialPartitioningResult(const PartitionedHypergraph& partitioned_hg, - const Context& context, - const PartitionID k, - const RBTree& rb_tree) { - if ( context.partition.verbose_output ) { +template +void printInitialPartitioningResult(const PartitionedHypergraph &partitioned_hg, + const Context &context, const PartitionID k, + const RBTree &rb_tree) +{ + if(context.partition.verbose_output) + { Context m_context(context); m_context.partition.k = k; - m_context.partition.perfect_balance_part_weights = rb_tree.perfectlyBalancedWeightVector(m_context.partition.k); - m_context.partition.max_part_weights = rb_tree.maxPartWeightVector(m_context.partition.k); - io::printPartitioningResults(partitioned_hg, m_context, "Initial Partitioning Results:"); + m_context.partition.perfect_balance_part_weights = + rb_tree.perfectlyBalancedWeightVector(m_context.partition.k); + m_context.partition.max_part_weights = + rb_tree.maxPartWeightVector(m_context.partition.k); + io::printPartitioningResults(partitioned_hg, m_context, + "Initial Partitioning Results:"); } } -template -bool is_balanced(const PartitionedHypergraph& partitioned_hg, - const PartitionID k, - const RBTree& rb_tree) { +template +bool is_balanced(const PartitionedHypergraph &partitioned_hg, const PartitionID k, + const RBTree &rb_tree) +{ bool isBalanced = true; - for ( PartitionID i = 0; i < k; ++i ) { - isBalanced = isBalanced && partitioned_hg.partWeight(i) <= rb_tree.maxPartWeight(k, i); + for(PartitionID i = 0; i < k; ++i) + { + isBalanced = + isBalanced && partitioned_hg.partWeight(i) <= rb_tree.maxPartWeight(k, i); } return isBalanced; } -template -const DeepPartitioningResult& select_best_partition( - const vec>& partitions, - const Context& context, - const PartitionID k, - const RBTree& rb_tree) { +template +const DeepPartitioningResult & +select_best_partition(const vec > &partitions, + const Context &context, const PartitionID k, const RBTree &rb_tree) +{ vec objectives(partitions.size(), 0); vec isBalanced(partitions.size(), false); // Compute objective value and perform balance check for each partition tbb::task_group tg; - for ( size_t i = 0; i < partitions.size(); ++i ) { + for(size_t i = 0; i < partitions.size(); ++i) + { tg.run([&, i] { - objectives[i] = metrics::quality( - partitions[i].partitioned_hg, context); + objectives[i] = metrics::quality(partitions[i].partitioned_hg, context); isBalanced[i] = is_balanced(partitions[i].partitioned_hg, k, rb_tree); }); } @@ -443,11 +513,13 @@ const DeepPartitioningResult& select_best_partition( // We try to choose a balanced partition with the best objective value size_t best_idx = 0; - for ( size_t i = 1; i < partitions.size(); ++i ) { - if ( ( isBalanced[i] && !isBalanced[best_idx] ) || - ( ( ( !isBalanced[i] && !isBalanced[best_idx] ) || - ( isBalanced[i] && isBalanced[best_idx] ) ) && - objectives[i] < objectives[best_idx] ) ) { + for(size_t i = 1; i < partitions.size(); ++i) + { + if((isBalanced[i] && !isBalanced[best_idx]) || + (((!isBalanced[i] && !isBalanced[best_idx]) || + (isBalanced[i] && isBalanced[best_idx])) && + objectives[i] < objectives[best_idx])) + { best_idx = i; } } @@ -455,62 +527,76 @@ const DeepPartitioningResult& select_best_partition( return partitions[best_idx]; } -template -DeepPartitioningResult bipartition_block(typename TypeTraits::Hypergraph&& hg, - const Context& context, - const OriginalHypergraphInfo& info, - const PartitionID start_k, - const PartitionID end_k) { +template +DeepPartitioningResult +bipartition_block(typename TypeTraits::Hypergraph &&hg, const Context &context, + const OriginalHypergraphInfo &info, const PartitionID start_k, + const PartitionID end_k) +{ using PartitionedHypergraph = typename TypeTraits::PartitionedHypergraph; DeepPartitioningResult bipartition; bipartition.hypergraph = std::move(hg); bipartition.valid = true; - if ( bipartition.hypergraph.initialNumNodes() > 0 ) { + if(bipartition.hypergraph.initialNumNodes() > 0) + { // Bipartition block - Context b_context = setupBipartitioningContext( - context, info, start_k, end_k, bipartition.hypergraph.totalWeight(), PartitionedHypergraph::is_graph); - bipartition.partitioned_hg = Multilevel::partition( - bipartition.hypergraph, b_context); - } else { - bipartition.partitioned_hg = PartitionedHypergraph(2, bipartition.hypergraph, parallel_tag_t()); + Context b_context = setupBipartitioningContext(context, info, start_k, end_k, + bipartition.hypergraph.totalWeight(), + PartitionedHypergraph::is_graph); + bipartition.partitioned_hg = + Multilevel::partition(bipartition.hypergraph, b_context); + } + else + { + bipartition.partitioned_hg = + PartitionedHypergraph(2, bipartition.hypergraph, parallel_tag_t()); } return bipartition; } -template -void apply_bipartitions_to_hypergraph(typename TypeTraits::PartitionedHypergraph& partitioned_hg, - GainCache& gain_cache, - const vec& mapping, - const vec>& bipartitions, - const vec& block_ranges) { - partitioned_hg.doParallelForAllNodes([&](const HypernodeID& hn) { +template +void apply_bipartitions_to_hypergraph( + typename TypeTraits::PartitionedHypergraph &partitioned_hg, GainCache &gain_cache, + const vec &mapping, + const vec > &bipartitions, + const vec &block_ranges) +{ + partitioned_hg.doParallelForAllNodes([&](const HypernodeID &hn) { const PartitionID from = partitioned_hg.partID(hn); ASSERT(static_cast(from) < bipartitions.size()); PartitionID to = kInvalidPartition; - const DeepPartitioningResult& bipartition = bipartitions[from]; - if ( bipartition.valid ) { + const DeepPartitioningResult &bipartition = bipartitions[from]; + if(bipartition.valid) + { ASSERT(static_cast(hn) < mapping.size()); const HypernodeID mapped_hn = mapping[hn]; - to = bipartition.partitioned_hg.partID(mapped_hn) == 0 ? - block_ranges[from] : block_ranges[from] + 1; - } else { + to = bipartition.partitioned_hg.partID(mapped_hn) == 0 ? block_ranges[from] : + block_ranges[from] + 1; + } + else + { to = block_ranges[from]; } ASSERT(to > kInvalidPartition && to < block_ranges.back()); - if ( from != to ) { - if ( gain_cache.isInitialized() ) { + if(from != to) + { + if(gain_cache.isInitialized()) + { partitioned_hg.changeNodePart(gain_cache, hn, from, to); - } else { + } + else + { partitioned_hg.changeNodePart(hn, from, to); } } }); - if ( GainCache::invalidates_entries && gain_cache.isInitialized() ) { - partitioned_hg.doParallelForAllNodes([&](const HypernodeID& hn) { + if(GainCache::invalidates_entries && gain_cache.isInitialized()) + { + partitioned_hg.doParallelForAllNodes([&](const HypernodeID &hn) { gain_cache.recomputeInvalidTerms(partitioned_hg, hn); }); } @@ -518,77 +604,90 @@ void apply_bipartitions_to_hypergraph(typename TypeTraits::PartitionedHypergraph HEAVY_REFINEMENT_ASSERT(partitioned_hg.checkTrackedPartitionInformation(gain_cache)); } -template -void apply_bipartitions_to_hypergraph(typename TypeTraits::PartitionedHypergraph& partitioned_hg, - gain_cache_t gain_cache, - const vec& mapping, - const vec>& bipartitions, - const vec& block_ranges) { +template +void apply_bipartitions_to_hypergraph( + typename TypeTraits::PartitionedHypergraph &partitioned_hg, gain_cache_t gain_cache, + const vec &mapping, + const vec > &bipartitions, + const vec &block_ranges) +{ using PartitionedHypergraph = typename TypeTraits::PartitionedHypergraph; - GainCachePtr::applyWithConcreteGainCacheForHG([&](auto& gain_cache) { - apply_bipartitions_to_hypergraph(partitioned_hg,gain_cache, mapping, bipartitions, block_ranges); - }, gain_cache); + GainCachePtr::applyWithConcreteGainCacheForHG( + [&](auto &gain_cache) { + apply_bipartitions_to_hypergraph(partitioned_hg, gain_cache, mapping, + bipartitions, block_ranges); + }, + gain_cache); } -template -void bipartition_each_block(typename TypeTraits::PartitionedHypergraph& partitioned_hg, - const Context& context, - gain_cache_t gain_cache, - const OriginalHypergraphInfo& info, - const RBTree& rb_tree, - vec& already_cut, - const PartitionID current_k, +template +void bipartition_each_block(typename TypeTraits::PartitionedHypergraph &partitioned_hg, + const Context &context, gain_cache_t gain_cache, + const OriginalHypergraphInfo &info, const RBTree &rb_tree, + vec &already_cut, const PartitionID current_k, const HyperedgeWeight current_objective, - const bool progress_bar_enabled) { + const bool progress_bar_enabled) +{ using Hypergraph = typename TypeTraits::Hypergraph; - utils::Timer& timer = utils::Utilities::instance().getTimer(context.utility_id); + utils::Timer &timer = utils::Utilities::instance().getTimer(context.utility_id); // Extract all blocks of hypergraph timer.start_timer("extract_blocks", "Extract Blocks"); const bool cut_net_splitting = - BipartitioningPolicy::useCutNetSplitting(context.partition.gain_policy); - if ( !already_cut.empty() ) { + BipartitioningPolicy::useCutNetSplitting(context.partition.gain_policy); + if(!already_cut.empty()) + { ASSERT(static_cast(partitioned_hg.initialNumEdges()) <= already_cut.size()); partitioned_hg.doParallelForAllEdges([&](const HyperedgeID he) { already_cut[he] = partitioned_hg.connectivity(he) > 1; }); } - auto extracted_blocks = partitioned_hg.extractAllBlocks(current_k, !already_cut.empty() ? - &already_cut : nullptr, cut_net_splitting, context.preprocessing.stable_construction_of_incident_edges); + auto extracted_blocks = partitioned_hg.extractAllBlocks( + current_k, !already_cut.empty() ? &already_cut : nullptr, cut_net_splitting, + context.preprocessing.stable_construction_of_incident_edges); vec hypergraphs(current_k); - for ( PartitionID block = 0; block < current_k; ++block ) { + for(PartitionID block = 0; block < current_k; ++block) + { hypergraphs[block] = std::move(extracted_blocks.first[block].hg); } - const vec& mapping = extracted_blocks.second; + const vec &mapping = extracted_blocks.second; timer.stop_timer("extract_blocks"); timer.start_timer("bipartition_blocks", "Bipartition Blocks"); const bool was_enabled_before = disableTimerAndStats(context); // n-level disables timer utils::ProgressBar progress(current_k, current_objective, progress_bar_enabled); - vec> bipartitions(current_k); + vec > bipartitions(current_k); vec block_ranges(1, 0); tbb::task_group tg; - for ( PartitionID block = 0; block < current_k; ++block ) { + for(PartitionID block = 0; block < current_k; ++block) + { // The recursive bipartitioning tree stores for each block of the current partition - // the number of blocks in which we have to further bipartition the corresponding block - // recursively. This is important for computing the adjusted imbalance factor to ensure - // that the final k-way partition is balanced. + // the number of blocks in which we have to further bipartition the corresponding + // block recursively. This is important for computing the adjusted imbalance factor to + // ensure that the final k-way partition is balanced. const PartitionID desired_blocks = rb_tree.desiredNumberOfBlocks(current_k, block); - if ( desired_blocks > 1 ) { + if(desired_blocks > 1) + { // Spawn a task that bipartitions the corresponding block tg.run([&, block] { const auto target_blocks = rb_tree.targetBlocksInFinalPartition(current_k, block); adaptWeightsOfNonCutEdges(hypergraphs[block], - extracted_blocks.first[block].already_cut, context.partition.gain_policy); - bipartitions[block] = bipartition_block(std::move(hypergraphs[block]), context, - info, target_blocks.first, target_blocks.second); + extracted_blocks.first[block].already_cut, + context.partition.gain_policy); + bipartitions[block] = + bipartition_block(std::move(hypergraphs[block]), context, info, + target_blocks.first, target_blocks.second); bipartitions[block].partitioned_hg.setHypergraph(bipartitions[block].hypergraph); - progress.addToObjective(progress_bar_enabled ? - metrics::quality(bipartitions[block].partitioned_hg, Objective::cut) : 0 ); + progress.addToObjective( + progress_bar_enabled ? + metrics::quality(bipartitions[block].partitioned_hg, Objective::cut) : + 0); progress += 1; }); block_ranges.push_back(block_ranges.back() + 2); - } else { + } + else + { // No further bipartitions required for the corresponding block bipartitions[block].valid = false; block_ranges.push_back(block_ranges.back() + 1); @@ -600,24 +699,31 @@ void bipartition_each_block(typename TypeTraits::PartitionedHypergraph& partitio timer.stop_timer("bipartition_blocks"); timer.start_timer("apply_bipartitions", "Apply Bipartition"); - apply_bipartitions_to_hypergraph(partitioned_hg, gain_cache, mapping, bipartitions, block_ranges); + apply_bipartitions_to_hypergraph(partitioned_hg, gain_cache, mapping, bipartitions, + block_ranges); timer.stop_timer("apply_bipartitions"); - ASSERT([&] { - HyperedgeWeight expected_objective = current_objective; - for ( PartitionID block = 0; block < current_k; ++block ) { - const PartitionID desired_blocks = rb_tree.desiredNumberOfBlocks(current_k, block); - if ( desired_blocks > 1 ) { - expected_objective += metrics::quality( - bipartitions[block].partitioned_hg, Objective::cut); - } - } - if ( expected_objective != metrics::quality(partitioned_hg, context) ) { - LOG << V(expected_objective) << V(metrics::quality(partitioned_hg, context)); - return false; - } - return true; - }(), "Cut of extracted blocks does not sum up to current objective"); + ASSERT( + [&] { + HyperedgeWeight expected_objective = current_objective; + for(PartitionID block = 0; block < current_k; ++block) + { + const PartitionID desired_blocks = + rb_tree.desiredNumberOfBlocks(current_k, block); + if(desired_blocks > 1) + { + expected_objective += + metrics::quality(bipartitions[block].partitioned_hg, Objective::cut); + } + } + if(expected_objective != metrics::quality(partitioned_hg, context)) + { + LOG << V(expected_objective) << V(metrics::quality(partitioned_hg, context)); + return false; + } + return true; + }(), + "Cut of extracted blocks does not sum up to current objective"); timer.start_timer("free_hypergraphs", "Free Hypergraphs"); tbb::parallel_for(UL(0), bipartitions.size(), [&](const size_t i) { @@ -627,43 +733,54 @@ void bipartition_each_block(typename TypeTraits::PartitionedHypergraph& partitio timer.stop_timer("free_hypergraphs"); } -template -DeepPartitioningResult deep_multilevel_recursion(const typename TypeTraits::Hypergraph& hypergraph, - const Context& context, - const OriginalHypergraphInfo& info, - const RBTree& rb_tree, - const size_t num_threads); - -template -PartitionID deep_multilevel_partitioning(typename TypeTraits::PartitionedHypergraph& partitioned_hg, - const Context& c, - const OriginalHypergraphInfo& info, - const RBTree& rb_tree) { +template +DeepPartitioningResult +deep_multilevel_recursion(const typename TypeTraits::Hypergraph &hypergraph, + const Context &context, const OriginalHypergraphInfo &info, + const RBTree &rb_tree, const size_t num_threads); + +template +PartitionID +deep_multilevel_partitioning(typename TypeTraits::PartitionedHypergraph &partitioned_hg, + const Context &c, const OriginalHypergraphInfo &info, + const RBTree &rb_tree) +{ using Hypergraph = typename TypeTraits::Hypergraph; using PartitionedHypergraph = typename TypeTraits::PartitionedHypergraph; - Hypergraph& hypergraph = partitioned_hg.hypergraph(); + Hypergraph &hypergraph = partitioned_hg.hypergraph(); Context context(c); // ################## COARSENING ################## mt_kahypar::io::printCoarseningBanner(context); - // We change the contraction limit to 2C nodes which is the contraction limit where traditional - // multilevel partitioning bipartitions the smallest hypergraph into two blocks. - const HypernodeID contraction_limit_for_bipartitioning = 2 * context.coarsening.contraction_limit_multiplier; + // We change the contraction limit to 2C nodes which is the contraction limit where + // traditional multilevel partitioning bipartitions the smallest hypergraph into two + // blocks. + const HypernodeID contraction_limit_for_bipartitioning = + 2 * context.coarsening.contraction_limit_multiplier; context.coarsening.contraction_limit = contraction_limit_for_bipartitioning; - PartitionID actual_k = std::max(std::min(static_cast(context.partition.k), - partitioned_hg.initialNumNodes() / context.coarsening.contraction_limit_multiplier), ID(2)); - auto adapt_max_allowed_node_weight = [&](const HypernodeID current_num_nodes, bool& should_continue) { + PartitionID actual_k = + std::max(std::min(static_cast(context.partition.k), + partitioned_hg.initialNumNodes() / + context.coarsening.contraction_limit_multiplier), + ID(2)); + auto adapt_max_allowed_node_weight = [&](const HypernodeID current_num_nodes, + bool &should_continue) { // In case our actual k is not two, we check if the current number of nodes is smaller // than k * contraction_limit. If so, we increase the maximum allowed node weight. - while ( ( current_num_nodes <= actual_k * context.coarsening.contraction_limit || - !should_continue ) && actual_k > 2 ) { + while((current_num_nodes <= actual_k * context.coarsening.contraction_limit || + !should_continue) && + actual_k > 2) + { actual_k = std::max(actual_k / 2, 2); - const double hypernode_weight_fraction = context.coarsening.max_allowed_weight_multiplier / + const double hypernode_weight_fraction = + context.coarsening.max_allowed_weight_multiplier / static_cast(actual_k * context.coarsening.contraction_limit_multiplier); - context.coarsening.max_allowed_node_weight = std::ceil(hypernode_weight_fraction * hypergraph.totalWeight()); + context.coarsening.max_allowed_node_weight = + std::ceil(hypernode_weight_fraction * hypergraph.totalWeight()); should_continue = true; - DBG << "Set max allowed node weight to" << context.coarsening.max_allowed_node_weight + DBG << "Set max allowed node weight to" + << context.coarsening.max_allowed_node_weight << "( Current Number of Nodes =" << current_num_nodes << ")"; } }; @@ -672,36 +789,44 @@ PartitionID deep_multilevel_partitioning(typename TypeTraits::PartitionedHypergr UncoarseningData uncoarseningData(nlevel, hypergraph, context); uncoarseningData.setPartitionedHypergraph(std::move(partitioned_hg)); - utils::Timer& timer = utils::Utilities::instance().getTimer(context.utility_id); + utils::Timer &timer = utils::Utilities::instance().getTimer(context.utility_id); bool no_further_contractions_possible = true; bool should_continue = true; adapt_max_allowed_node_weight(hypergraph.initialNumNodes(), should_continue); timer.start_timer("coarsening", "Coarsening"); { std::unique_ptr coarsener = CoarsenerFactory::getInstance().createObject( - context.coarsening.algorithm, utils::hypergraph_cast(hypergraph), - context, uncoarsening::to_pointer(uncoarseningData)); + context.coarsening.algorithm, utils::hypergraph_cast(hypergraph), context, + uncoarsening::to_pointer(uncoarseningData)); // Perform coarsening coarsener->initialize(); int pass_nr = 1; // Coarsening proceeds until we reach the contraction limit (!shouldNotTerminate()) or // no further contractions are possible (should_continue) - while ( coarsener->shouldNotTerminate() && should_continue ) { + while(coarsener->shouldNotTerminate() && should_continue) + { DBG << "Coarsening Pass" << pass_nr << "- Number of Nodes =" << coarsener->currentNumberOfNodes() - << "- Number of HEs =" << (nlevel ? 0 : - utils::cast(coarsener->coarsestHypergraph()).initialNumEdges()) - << "- Number of Pins =" << (nlevel ? 0 : - utils::cast(coarsener->coarsestHypergraph()).initialNumPins()); - - // In the coarsening phase, we maintain the invariant that t threads process a hypergraph with - // at least t * C nodes (C = contraction_limit_for_bipartitioning). If this invariant is violated, - // we terminate coarsening and call the deep multilevel scheme recursively in parallel with the - // appropriate number of threads to restore the invariant. + << "- Number of HEs =" + << (nlevel ? 0 : + utils::cast(coarsener->coarsestHypergraph()) + .initialNumEdges()) + << "- Number of Pins =" + << (nlevel ? 0 : + utils::cast(coarsener->coarsestHypergraph()) + .initialNumPins()); + + // In the coarsening phase, we maintain the invariant that t threads process a + // hypergraph with at least t * C nodes (C = contraction_limit_for_bipartitioning). + // If this invariant is violated, we terminate coarsening and call the deep + // multilevel scheme recursively in parallel with the appropriate number of threads + // to restore the invariant. const HypernodeID current_num_nodes = coarsener->currentNumberOfNodes(); - if ( context.partition.perform_parallel_recursion_in_deep_multilevel && - current_num_nodes < context.shared_memory.num_threads * contraction_limit_for_bipartitioning ) { + if(context.partition.perform_parallel_recursion_in_deep_multilevel && + current_num_nodes < + context.shared_memory.num_threads * contraction_limit_for_bipartitioning) + { no_further_contractions_possible = false; break; } @@ -712,12 +837,12 @@ PartitionID deep_multilevel_partitioning(typename TypeTraits::PartitionedHypergr } coarsener->terminate(); - - if (context.partition.verbose_output) { + if(context.partition.verbose_output) + { mt_kahypar_hypergraph_t coarsestHypergraph = coarsener->coarsestHypergraph(); - mt_kahypar::io::printHypergraphInfo( - utils::cast(coarsestHypergraph), context, - "Coarsened Hypergraph", context.partition.show_memory_consumption); + mt_kahypar::io::printHypergraphInfo(utils::cast(coarsestHypergraph), + context, "Coarsened Hypergraph", + context.partition.show_memory_consumption); } } timer.stop_timer("coarsening"); @@ -726,9 +851,10 @@ PartitionID deep_multilevel_partitioning(typename TypeTraits::PartitionedHypergr io::printInitialPartitioningBanner(context); timer.start_timer("initial_partitioning", "Initial Partitioning"); const bool was_enabled_before = disableTimerAndStats(context); - PartitionedHypergraph& coarsest_phg = uncoarseningData.coarsestPartitionedHypergraph(); + PartitionedHypergraph &coarsest_phg = uncoarseningData.coarsestPartitionedHypergraph(); PartitionID current_k = kInvalidPartition; - if ( no_further_contractions_possible ) { + if(no_further_contractions_possible) + { DBG << "Smallest Hypergraph" << "- Number of Nodes =" << coarsest_phg.initialNumNodes() << "- Number of HEs =" << coarsest_phg.initialNumEdges() @@ -738,8 +864,8 @@ PartitionID deep_multilevel_partitioning(typename TypeTraits::PartitionedHypergr // and continue with uncoarsening. const auto target_blocks = rb_tree.targetBlocksInFinalPartition(1, 0); Context b_context = setupBipartitioningContext( - context, info, target_blocks.first, target_blocks.second, - hypergraph.totalWeight(), Hypergraph::is_graph); + context, info, target_blocks.first, target_blocks.second, + hypergraph.totalWeight(), Hypergraph::is_graph); Multilevel::partition(coarsest_phg, b_context); current_k = 2; @@ -747,7 +873,9 @@ PartitionID deep_multilevel_partitioning(typename TypeTraits::PartitionedHypergr << "- Objective =" << metrics::quality(coarsest_phg, b_context) << "- Imbalance =" << metrics::imbalance(coarsest_phg, b_context) << "- Epsilon =" << b_context.partition.epsilon; - } else { + } + else + { // If we do not reach the contraction limit, then the invariant that t threads // work on a hypergraph with at least t * C nodes is violated. To restore the // invariant, we call the deep multilevel scheme recursively in parallel. Each @@ -757,15 +885,17 @@ PartitionID deep_multilevel_partitioning(typename TypeTraits::PartitionedHypergr // Determine the number of parallel recursive calls and the number of threads // used for each recursive call. - const Hypergraph& coarsest_hg = coarsest_phg.hypergraph(); + const Hypergraph &coarsest_hg = coarsest_phg.hypergraph(); const HypernodeID current_num_nodes = coarsest_hg.initialNumNodes(); - size_t num_threads_per_recursion = std::max(current_num_nodes, - contraction_limit_for_bipartitioning ) / contraction_limit_for_bipartitioning; - const size_t num_parallel_calls = context.shared_memory.num_threads / num_threads_per_recursion + - (context.shared_memory.num_threads % num_threads_per_recursion != 0); - num_threads_per_recursion = context.shared_memory.num_threads / num_parallel_calls + - (context.shared_memory.num_threads % num_parallel_calls != 0); - + size_t num_threads_per_recursion = + std::max(current_num_nodes, contraction_limit_for_bipartitioning) / + contraction_limit_for_bipartitioning; + const size_t num_parallel_calls = + context.shared_memory.num_threads / num_threads_per_recursion + + (context.shared_memory.num_threads % num_threads_per_recursion != 0); + num_threads_per_recursion = + context.shared_memory.num_threads / num_parallel_calls + + (context.shared_memory.num_threads % num_parallel_calls != 0); DBG << BOLD << "Perform Parallel Recursion" << END << "- Num. Nodes =" << current_num_nodes @@ -775,30 +905,38 @@ PartitionID deep_multilevel_partitioning(typename TypeTraits::PartitionedHypergr // Call deep multilevel scheme recursively tbb::task_group tg; - vec> results(num_parallel_calls); - for ( size_t i = 0; i < num_parallel_calls; ++i ) { + vec > results(num_parallel_calls); + for(size_t i = 0; i < num_parallel_calls; ++i) + { tg.run([&, i] { - const size_t num_threads = std::min(num_threads_per_recursion, - context.shared_memory.num_threads - i * num_threads_per_recursion); - results[i] = deep_multilevel_recursion(coarsest_hg, context, info, rb_tree, num_threads); + const size_t num_threads = + std::min(num_threads_per_recursion, + context.shared_memory.num_threads - i * num_threads_per_recursion); + results[i] = deep_multilevel_recursion(coarsest_hg, context, info, + rb_tree, num_threads); results[i].partitioned_hg.setHypergraph(results[i].hypergraph); }); } tg.wait(); - ASSERT([&] { - const PartitionID expected_k = results[0].k; - for ( size_t i = 1; i < num_parallel_calls; ++i ) { - if ( expected_k != results[i].k ) return false; - } - return true; - }(), "Not all hypergraphs from recursion are partitioned into the same number of blocks!"); + ASSERT( + [&] { + const PartitionID expected_k = results[0].k; + for(size_t i = 1; i < num_parallel_calls; ++i) + { + if(expected_k != results[i].k) + return false; + } + return true; + }(), + "Not all hypergraphs from recursion are partitioned into the same number of blocks!"); current_k = results[0].k; // Apply best bipartition from the recursive calls to the current hypergraph - const DeepPartitioningResult& best = select_best_partition(results, context, current_k, rb_tree); - const PartitionedHypergraph& best_phg = best.partitioned_hg; - coarsest_phg.doParallelForAllNodes([&](const HypernodeID& hn) { + const DeepPartitioningResult &best = + select_best_partition(results, context, current_k, rb_tree); + const PartitionedHypergraph &best_phg = best.partitioned_hg; + coarsest_phg.doParallelForAllNodes([&](const HypernodeID &hn) { const PartitionID block = best_phg.partID(hn); coarsest_phg.setOnlyNodePart(hn, block); }); @@ -806,14 +944,17 @@ PartitionID deep_multilevel_partitioning(typename TypeTraits::PartitionedHypergr DBG << BOLD << "Best Partition from Recursive Calls" << END << "- Objective =" << metrics::quality(coarsest_phg, context) - << "- isBalanced =" << std::boolalpha << is_balanced(coarsest_phg, current_k, rb_tree); + << "- isBalanced =" << std::boolalpha + << is_balanced(coarsest_phg, current_k, rb_tree); } ASSERT(current_k != kInvalidPartition); printInitialPartitioningResult(coarsest_phg, context, current_k, rb_tree); - if ( context.partition.verbose_output ) { - utils::Utilities::instance().getInitialPartitioningStats( - context.utility_id).printInitialPartitioningStats(); + if(context.partition.verbose_output) + { + utils::Utilities::instance() + .getInitialPartitioningStats(context.utility_id) + .printInitialPartitioningStats(); } enableTimerAndStats(context, was_enabled_before); timer.stop_timer("initial_partitioning"); @@ -821,16 +962,19 @@ PartitionID deep_multilevel_partitioning(typename TypeTraits::PartitionedHypergr // ################## UNCOARSENING ################## io::printLocalSearchBanner(context); timer.start_timer("refinement", "Refinement"); - const bool progress_bar_enabled = context.partition.verbose_output && - context.partition.enable_progress_bar && !debug; + const bool progress_bar_enabled = + context.partition.verbose_output && context.partition.enable_progress_bar && !debug; context.partition.enable_progress_bar = false; - std::unique_ptr> uncoarsener(nullptr); - if (uncoarseningData.nlevel) { - uncoarsener = std::make_unique>( - hypergraph, context, uncoarseningData, nullptr); - } else { - uncoarsener = std::make_unique>( - hypergraph, context, uncoarseningData, nullptr); + std::unique_ptr > uncoarsener(nullptr); + if(uncoarseningData.nlevel) + { + uncoarsener = std::make_unique >( + hypergraph, context, uncoarseningData, nullptr); + } + else + { + uncoarsener = std::make_unique >( + hypergraph, context, uncoarseningData, nullptr); } uncoarsener->initialize(); @@ -843,11 +987,13 @@ PartitionID deep_multilevel_partitioning(typename TypeTraits::PartitionedHypergr auto adapt_contraction_limit_for_recursive_bipartitioning = [&](const PartitionID k) { current_k = k; next_k = rb_tree.nextK(current_k); - contraction_limit_for_rb = next_k != kInvalidPartition ? - next_k * context.coarsening.contraction_limit_multiplier : - std::numeric_limits::max(); + contraction_limit_for_rb = + next_k != kInvalidPartition ? + next_k * context.coarsening.contraction_limit_multiplier : + std::numeric_limits::max(); context.partition.k = current_k; - context.partition.perfect_balance_part_weights = rb_tree.perfectlyBalancedWeightVector(current_k); + context.partition.perfect_balance_part_weights = + rb_tree.perfectlyBalancedWeightVector(current_k); context.partition.max_part_weights = rb_tree.maxPartWeightVector(current_k); context.setupThreadsPerFlowSearch(); uncoarsener->updateMetrics(); @@ -855,36 +1001,43 @@ PartitionID deep_multilevel_partitioning(typename TypeTraits::PartitionedHypergr adapt_contraction_limit_for_recursive_bipartitioning(current_k); // Start uncoarsening - vec already_cut(usesAdaptiveWeightOfNonCutEdges(context) ? - partitioned_hg.initialNumEdges() : 0, 0); - while ( !uncoarsener->isTopLevel() ) { + vec already_cut( + usesAdaptiveWeightOfNonCutEdges(context) ? partitioned_hg.initialNumEdges() : 0, 0); + while(!uncoarsener->isTopLevel()) + { // In the uncoarsening phase, we recursively bipartition each block when // the number of nodes gets larger than k' * C. - while ( uncoarsener->currentNumberOfNodes() >= contraction_limit_for_rb ) { - PartitionedHypergraph& current_phg = uncoarsener->currentPartitionedHypergraph(); - if ( context.partition.verbose_output && context.type == ContextType::main ) { + while(uncoarsener->currentNumberOfNodes() >= contraction_limit_for_rb) + { + PartitionedHypergraph ¤t_phg = uncoarsener->currentPartitionedHypergraph(); + if(context.partition.verbose_output && context.type == ContextType::main) + { LOG << "Extend number of blocks from" << current_k << "to" << next_k << "( Current Number of Nodes =" << current_phg.initialNumNodes() << ")"; } timer.start_timer("bipartitioning", "Bipartitioning"); - bipartition_each_block(current_phg, context, uncoarsener->getGainCache(), - info, rb_tree, already_cut, current_k, uncoarsener->getObjective(), progress_bar_enabled); + bipartition_each_block( + current_phg, context, uncoarsener->getGainCache(), info, rb_tree, already_cut, + current_k, uncoarsener->getObjective(), progress_bar_enabled); timer.stop_timer("bipartitioning"); DBG << "Increase number of blocks from" << current_k << "to" << next_k << "( Number of Nodes =" << current_phg.initialNumNodes() << "- Objective =" << metrics::quality(current_phg, context) - << "- isBalanced =" << std::boolalpha << is_balanced(current_phg, next_k, rb_tree); + << "- isBalanced =" << std::boolalpha + << is_balanced(current_phg, next_k, rb_tree); adapt_contraction_limit_for_recursive_bipartitioning(next_k); // Improve partition const HyperedgeWeight obj_before = uncoarsener->getObjective(); uncoarsener->refine(); const HyperedgeWeight obj_after = uncoarsener->getObjective(); - if ( context.partition.verbose_output && context.type == ContextType::main ) { - LOG << "Refinement improved" << context.partition.objective - << "from" << obj_before << "to" << obj_after - << "( Improvement =" << ((double(obj_before) / obj_after - 1.0) * 100.0) << "% )\n"; + if(context.partition.verbose_output && context.type == ContextType::main) + { + LOG << "Refinement improved" << context.partition.objective << "from" + << obj_before << "to" << obj_after + << "( Improvement =" << ((double(obj_before) / obj_after - 1.0) * 100.0) + << "% )\n"; } } @@ -892,10 +1045,12 @@ PartitionID deep_multilevel_partitioning(typename TypeTraits::PartitionedHypergr const HyperedgeWeight obj_before = uncoarsener->getObjective(); uncoarsener->projectToNextLevelAndRefine(); const HyperedgeWeight obj_after = uncoarsener->getObjective(); - if ( context.partition.verbose_output && context.type == ContextType::main ) { + if(context.partition.verbose_output && context.type == ContextType::main) + { LOG << "Refinement after projecting partition to next level improved" << context.partition.objective << "from" << obj_before << "to" << obj_after - << "( Improvement =" << ((double(obj_before) / obj_after - 1.0) * 100.0) << "% )\n"; + << "( Improvement =" << ((double(obj_before) / obj_after - 1.0) * 100.0) + << "% )\n"; } } @@ -903,39 +1058,47 @@ PartitionID deep_multilevel_partitioning(typename TypeTraits::PartitionedHypergr // Note that in case we reach the input hypergraph (ContextType::main) and // we still did not reach the desired number of blocks, we recursively bipartition // each block until the number of blocks equals the desired number of blocks. - while ( uncoarsener->currentNumberOfNodes() >= contraction_limit_for_rb || - ( context.type == ContextType::main && current_k != final_k ) ) { - PartitionedHypergraph& current_phg = uncoarsener->currentPartitionedHypergraph(); - if ( context.partition.verbose_output && context.type == ContextType::main ) { + while(uncoarsener->currentNumberOfNodes() >= contraction_limit_for_rb || + (context.type == ContextType::main && current_k != final_k)) + { + PartitionedHypergraph ¤t_phg = uncoarsener->currentPartitionedHypergraph(); + if(context.partition.verbose_output && context.type == ContextType::main) + { LOG << "Extend number of blocks from" << current_k << "to" << next_k << "( Current Number of Nodes =" << current_phg.initialNumNodes() << ")"; } timer.start_timer("bipartitioning", "Bipartitioning"); bipartition_each_block(current_phg, context, uncoarsener->getGainCache(), - info, rb_tree, already_cut, current_k, uncoarsener->getObjective(), progress_bar_enabled); + info, rb_tree, already_cut, current_k, + uncoarsener->getObjective(), progress_bar_enabled); timer.stop_timer("bipartitioning"); DBG << "Increase number of blocks from" << current_k << "to" << next_k << "( Num Nodes =" << current_phg.initialNumNodes() << "- Objective =" << metrics::quality(current_phg, context) - << "- isBalanced =" << std::boolalpha << is_balanced(current_phg, next_k, rb_tree); + << "- isBalanced =" << std::boolalpha + << is_balanced(current_phg, next_k, rb_tree); adapt_contraction_limit_for_recursive_bipartitioning(next_k); // Improve partition const HyperedgeWeight obj_before = uncoarsener->getObjective(); uncoarsener->refine(); const HyperedgeWeight obj_after = uncoarsener->getObjective(); - if ( context.partition.verbose_output && context.type == ContextType::main ) { - LOG << "Refinement improved" << context.partition.objective - << "from" << obj_before << "to" << obj_after - << "( Improvement =" << ((double(obj_before) / obj_after - 1.0) * 100.0) << "% )\n"; + if(context.partition.verbose_output && context.type == ContextType::main) + { + LOG << "Refinement improved" << context.partition.objective << "from" << obj_before + << "to" << obj_after + << "( Improvement =" << ((double(obj_before) / obj_after - 1.0) * 100.0) + << "% )\n"; } } - if ( context.type == ContextType::main ) { + if(context.type == ContextType::main) + { // The choice of the maximum allowed node weight and adaptive imbalance ratio should // ensure that we find on each level a balanced partition for unweighted inputs. Thus, - // we do not use rebalancing on each level as in the original deep multilevel algorithm. + // we do not use rebalancing on each level as in the original deep multilevel + // algorithm. uncoarsener->rebalancing(); } @@ -947,48 +1110,57 @@ PartitionID deep_multilevel_partitioning(typename TypeTraits::PartitionedHypergr return current_k; } -template -DeepPartitioningResult deep_multilevel_recursion(const typename TypeTraits::Hypergraph& hypergraph, - const Context& context, - const OriginalHypergraphInfo& info, - const RBTree& rb_tree, - const size_t num_threads) { +template +DeepPartitioningResult +deep_multilevel_recursion(const typename TypeTraits::Hypergraph &hypergraph, + const Context &context, const OriginalHypergraphInfo &info, + const RBTree &rb_tree, const size_t num_threads) +{ using PartitionedHypergraph = typename TypeTraits::PartitionedHypergraph; DeepPartitioningResult result; Context r_context = setupDeepMultilevelRecursionContext(context, num_threads); - r_context.partition.k = rb_tree.get_maximum_number_of_blocks(hypergraph.initialNumNodes()); - r_context.partition.perfect_balance_part_weights = rb_tree.perfectlyBalancedWeightVector(r_context.partition.k); - r_context.partition.max_part_weights = rb_tree.maxPartWeightVector(r_context.partition.k); + r_context.partition.k = + rb_tree.get_maximum_number_of_blocks(hypergraph.initialNumNodes()); + r_context.partition.perfect_balance_part_weights = + rb_tree.perfectlyBalancedWeightVector(r_context.partition.k); + r_context.partition.max_part_weights = + rb_tree.maxPartWeightVector(r_context.partition.k); // Copy hypergraph result.hypergraph = hypergraph.copy(parallel_tag_t()); - result.partitioned_hg = PartitionedHypergraph( - r_context.partition.k, result.hypergraph, parallel_tag_t()); + result.partitioned_hg = + PartitionedHypergraph(r_context.partition.k, result.hypergraph, parallel_tag_t()); result.valid = true; // Recursively call deep multilevel partitioning - result.k = deep_multilevel_partitioning(result.partitioned_hg, r_context, info, rb_tree); + result.k = deep_multilevel_partitioning(result.partitioned_hg, r_context, + info, rb_tree); return result; } } -template -typename TypeTraits::PartitionedHypergraph DeepMultilevel::partition( - Hypergraph& hypergraph, const Context& context) { +template +typename TypeTraits::PartitionedHypergraph +DeepMultilevel::partition(Hypergraph &hypergraph, const Context &context) +{ // TODO: Memory for partitioned hypergraph is not available at this point - PartitionedHypergraph partitioned_hypergraph( - context.partition.k, hypergraph, parallel_tag_t()); + PartitionedHypergraph partitioned_hypergraph(context.partition.k, hypergraph, + parallel_tag_t()); partition(partitioned_hypergraph, context); return partitioned_hypergraph; } -template -void DeepMultilevel::partition(PartitionedHypergraph& hypergraph, const Context& context) { +template +void DeepMultilevel::partition(PartitionedHypergraph &hypergraph, + const Context &context) +{ RBTree rb_tree(context); - deep_multilevel_partitioning(hypergraph, context, - OriginalHypergraphInfo { hypergraph.totalWeight(), - context.partition.k, context.partition.epsilon }, rb_tree); + deep_multilevel_partitioning( + hypergraph, context, + OriginalHypergraphInfo{ hypergraph.totalWeight(), context.partition.k, + context.partition.epsilon }, + rb_tree); } INSTANTIATE_CLASS_WITH_TYPE_TRAITS(DeepMultilevel) diff --git a/mt-kahypar/partition/deep_multilevel.h b/mt-kahypar/partition/deep_multilevel.h index 6e431b2ea..1f741fba1 100644 --- a/mt-kahypar/partition/deep_multilevel.h +++ b/mt-kahypar/partition/deep_multilevel.h @@ -31,15 +31,16 @@ namespace mt_kahypar { -template -class DeepMultilevel { +template +class DeepMultilevel +{ using Hypergraph = typename TypeTraits::Hypergraph; using PartitionedHypergraph = typename TypeTraits::PartitionedHypergraph; - public: - static PartitionedHypergraph partition(Hypergraph& hypergraph, const Context& context); - static void partition(PartitionedHypergraph& hypergraph, const Context& context); +public: + static PartitionedHypergraph partition(Hypergraph &hypergraph, const Context &context); + static void partition(PartitionedHypergraph &hypergraph, const Context &context); }; -} // namespace mt_kahypar +} // namespace mt_kahypar diff --git a/mt-kahypar/partition/factories.h b/mt-kahypar/partition/factories.h index cc57ddc6f..b5208cbb2 100644 --- a/mt-kahypar/partition/factories.h +++ b/mt-kahypar/partition/factories.h @@ -33,35 +33,50 @@ #include "mt-kahypar/partition/coarsening/i_coarsener.h" #include "mt-kahypar/partition/context.h" #include "mt-kahypar/partition/initial_partitioning/i_initial_partitioner.h" -#include "mt-kahypar/partition/refinement/i_refiner.h" -#include "mt-kahypar/partition/refinement/i_rebalancer.h" #include "mt-kahypar/partition/refinement/flows/i_flow_refiner.h" #include "mt-kahypar/partition/refinement/fm/fm_commons.h" #include "mt-kahypar/partition/refinement/fm/strategies/i_fm_strategy.h" #include "mt-kahypar/partition/refinement/gains/gain_cache_ptr.h" +#include "mt-kahypar/partition/refinement/i_rebalancer.h" +#include "mt-kahypar/partition/refinement/i_refiner.h" namespace mt_kahypar { typedef struct ip_data_container_s ip_data_container_t; -using CoarsenerFactory = kahypar::meta::Factory; -using InitialPartitionerFactory = kahypar::meta::Factory; +using CoarsenerFactory = + kahypar::meta::Factory; +using InitialPartitionerFactory = kahypar::meta::Factory< + InitialPartitioningAlgorithm, + IInitialPartitioner *(*)(const InitialPartitioningAlgorithm, ip_data_container_t *, + const Context &, const int, const int)>; -using LabelPropagationFactory = kahypar::meta::Factory; +using LabelPropagationFactory = + kahypar::meta::Factory; -using FMFactory = kahypar::meta::Factory; +using FMFactory = + kahypar::meta::Factory; -using FMStrategyFactory = kahypar::meta::Factory; +using FMStrategyFactory = + kahypar::meta::Factory; -using FlowSchedulerFactory = kahypar::meta::Factory; +using FlowSchedulerFactory = + kahypar::meta::Factory; -using RebalancerFactory = kahypar::meta::Factory; +using RebalancerFactory = + kahypar::meta::Factory; -using FlowRefinementFactory = kahypar::meta::Factory; -} // namespace mt_kahypar +using FlowRefinementFactory = + kahypar::meta::Factory; +} // namespace mt_kahypar diff --git a/mt-kahypar/partition/initial_partitioning/bfs_initial_partitioner.cpp b/mt-kahypar/partition/initial_partitioning/bfs_initial_partitioner.cpp index f39b368c6..5cce55308 100644 --- a/mt-kahypar/partition/initial_partitioning/bfs_initial_partitioner.cpp +++ b/mt-kahypar/partition/initial_partitioning/bfs_initial_partitioner.cpp @@ -32,28 +32,33 @@ namespace mt_kahypar { -template -void BFSInitialPartitioner::partitionImpl() { - if ( _ip_data.should_initial_partitioner_run(InitialPartitioningAlgorithm::bfs) ) { +template +void BFSInitialPartitioner::partitionImpl() +{ + if(_ip_data.should_initial_partitioner_run(InitialPartitioningAlgorithm::bfs)) + { HighResClockTimepoint start = std::chrono::high_resolution_clock::now(); - PartitionedHypergraph& hypergraph = _ip_data.local_partitioned_hypergraph(); - kahypar::ds::FastResetFlagArray<>& hypernodes_in_queue = - _ip_data.local_hypernode_fast_reset_flag_array(); - kahypar::ds::FastResetFlagArray<>& hyperedges_in_queue = - _ip_data.local_hyperedge_fast_reset_flag_array(); + PartitionedHypergraph &hypergraph = _ip_data.local_partitioned_hypergraph(); + kahypar::ds::FastResetFlagArray<> &hypernodes_in_queue = + _ip_data.local_hypernode_fast_reset_flag_array(); + kahypar::ds::FastResetFlagArray<> &hyperedges_in_queue = + _ip_data.local_hyperedge_fast_reset_flag_array(); _ip_data.reset_unassigned_hypernodes(_rng); _ip_data.preassignFixedVertices(hypergraph); - vec> start_nodes = - PseudoPeripheralStartNodes::computeStartNodes(_ip_data, _context, kInvalidPartition, _rng); + vec > start_nodes = + PseudoPeripheralStartNodes::computeStartNodes( + _ip_data, _context, kInvalidPartition, _rng); // Insert each start node for each block into its corresponding queue hypernodes_in_queue.reset(); hyperedges_in_queue.reset(); parallel::scalable_vector queues(_context.partition.k); - for (PartitionID block = 0; block < _context.partition.k; ++block) { - for ( const HypernodeID& hn : start_nodes[block] ) { + for(PartitionID block = 0; block < _context.partition.k; ++block) + { + for(const HypernodeID &hn : start_nodes[block]) + { queues[block].push(hn); markHypernodeAsInQueue(hypergraph, hypernodes_in_queue, hn, block); } @@ -65,18 +70,22 @@ void BFSInitialPartitioner::partitionImpl() { // all adjacent vertices into its queue. HypernodeID num_assigned_hypernodes = _ip_data.numFixedVertices(); const HypernodeID current_num_nodes = - hypergraph.initialNumNodes() - hypergraph.numRemovedHypernodes(); - while (num_assigned_hypernodes < current_num_nodes) { - for (PartitionID block = 0; block < _context.partition.k; ++block) { + hypergraph.initialNumNodes() - hypergraph.numRemovedHypernodes(); + while(num_assigned_hypernodes < current_num_nodes) + { + for(PartitionID block = 0; block < _context.partition.k; ++block) + { HypernodeID hn = kInvalidHypernode; bool fits_into_block = false; - while (!queues[block].empty()) { + while(!queues[block].empty()) + { const HypernodeID next_hn = queues[block].front(); ASSERT(!hypergraph.isFixed(next_hn)); queues[block].pop(); - if (hypergraph.partID(next_hn) == kInvalidPartition) { + if(hypergraph.partID(next_hn) == kInvalidPartition) + { // Hypernode is assigned to the current block, if it is not // assigned to an other block and if the assignment does not // violate the balanced constraint. @@ -84,28 +93,35 @@ void BFSInitialPartitioner::partitionImpl() { // we take the last unassigned hypernode popped from the queue. // Note, in that case the balanced constraint will be violated. hn = next_hn; - if (fitsIntoBlock(hypergraph, hn, block)) { + if(fitsIntoBlock(hypergraph, hn, block)) + { fits_into_block = true; break; } } } - if (hn == kInvalidHypernode) { + if(hn == kInvalidHypernode) + { // Special case, in case all hypernodes in the queue are already // assigned to an other block or the hypergraph is unconnected, we // choose an new unassigned hypernode (if one exists) hn = _ip_data.get_unassigned_hypernode(); - if ( hn != kInvalidHypernode && fitsIntoBlock(hypergraph, hn, block) ) { + if(hn != kInvalidHypernode && fitsIntoBlock(hypergraph, hn, block)) + { fits_into_block = true; } } - if ( hn != kInvalidHypernode && !fits_into_block ) { + if(hn != kInvalidHypernode && !fits_into_block) + { // The node does not fit into the block. Thus, we quickly // check if there is another block to which we can assign the node - for ( PartitionID other_block = 0; other_block < _context.partition.k; ++other_block ) { - if ( other_block != block && fitsIntoBlock(hypergraph, hn, other_block) ) { + for(PartitionID other_block = 0; other_block < _context.partition.k; + ++other_block) + { + if(other_block != block && fitsIntoBlock(hypergraph, hn, other_block)) + { // There is another block to which we can assign the node // => ignore the node for now hn = kInvalidHypernode; @@ -114,13 +130,18 @@ void BFSInitialPartitioner::partitionImpl() { } } - if (hn != kInvalidHypernode) { - ASSERT(hypergraph.partID(hn) == kInvalidPartition, V(block) << V(hypergraph.partID(hn))); + if(hn != kInvalidHypernode) + { + ASSERT(hypergraph.partID(hn) == kInvalidPartition, + V(block) << V(hypergraph.partID(hn))); hypergraph.setNodePart(hn, block); ++num_assigned_hypernodes; pushIncidentHypernodesIntoQueue(hypergraph, _context, queues[block], - hypernodes_in_queue, hyperedges_in_queue, hn, block); - } else { + hypernodes_in_queue, hyperedges_in_queue, hn, + block); + } + else + { ASSERT(queues[block].empty()); } } @@ -134,21 +155,25 @@ void BFSInitialPartitioner::partitionImpl() { // ! Pushes all adjacent hypernodes (not visited before) of hypernode hn // ! into the BFS queue of the corresponding block. -template -inline void BFSInitialPartitioner::pushIncidentHypernodesIntoQueue(const PartitionedHypergraph& hypergraph, - const Context& context, - Queue& queue, - kahypar::ds::FastResetFlagArray<>& hypernodes_in_queue, - kahypar::ds::FastResetFlagArray<>& hyperedges_in_queue, - const HypernodeID hn, - const PartitionID block) { +template +inline void BFSInitialPartitioner::pushIncidentHypernodesIntoQueue( + const PartitionedHypergraph &hypergraph, const Context &context, Queue &queue, + kahypar::ds::FastResetFlagArray<> &hypernodes_in_queue, + kahypar::ds::FastResetFlagArray<> &hyperedges_in_queue, const HypernodeID hn, + const PartitionID block) +{ ASSERT(hn != kInvalidHypernode && block != kInvalidPartition); - for ( const HyperedgeID& he : hypergraph.incidentEdges(hn) ) { - if ( !hyperedges_in_queue[block * hypergraph.initialNumEdges() + he] ) { - if ( hypergraph.edgeSize(he) <= context.partition.ignore_hyperedge_size_threshold ) { - for ( const HypernodeID& pin : hypergraph.pins(he) ) { - if ( !hypernodes_in_queue[block * hypergraph.initialNumNodes() + pin] && - hypergraph.partID(pin) == kInvalidPartition ) { + for(const HyperedgeID &he : hypergraph.incidentEdges(hn)) + { + if(!hyperedges_in_queue[block * hypergraph.initialNumEdges() + he]) + { + if(hypergraph.edgeSize(he) <= context.partition.ignore_hyperedge_size_threshold) + { + for(const HypernodeID &pin : hypergraph.pins(he)) + { + if(!hypernodes_in_queue[block * hypergraph.initialNumNodes() + pin] && + hypergraph.partID(pin) == kInvalidPartition) + { queue.push(pin); markHypernodeAsInQueue(hypergraph, hypernodes_in_queue, pin, block); } diff --git a/mt-kahypar/partition/initial_partitioning/bfs_initial_partitioner.h b/mt-kahypar/partition/initial_partitioning/bfs_initial_partitioner.h index a68080088..03c944d2e 100644 --- a/mt-kahypar/partition/initial_partitioning/bfs_initial_partitioner.h +++ b/mt-kahypar/partition/initial_partitioning/bfs_initial_partitioner.h @@ -26,70 +26,69 @@ #pragma once +#include "mt-kahypar/parallel/stl/scalable_queue.h" #include "mt-kahypar/partition/initial_partitioning/i_initial_partitioner.h" #include "mt-kahypar/partition/initial_partitioning/initial_partitioning_data_container.h" -#include "mt-kahypar/parallel/stl/scalable_queue.h" namespace mt_kahypar { -template -class BFSInitialPartitioner : public IInitialPartitioner { +template +class BFSInitialPartitioner : public IInitialPartitioner +{ using Queue = parallel::scalable_queue; static constexpr bool debug = false; using PartitionedHypergraph = typename TypeTraits::PartitionedHypergraph; - public: - BFSInitialPartitioner(const InitialPartitioningAlgorithm, - ip_data_container_t* ip_data, - const Context& context, - const int seed, const int tag) : - _ip_data(ip::to_reference(ip_data)), - _context(context), - _rng(seed), - _tag(tag) { } +public: + BFSInitialPartitioner(const InitialPartitioningAlgorithm, ip_data_container_t *ip_data, + const Context &context, const int seed, const int tag) : + _ip_data(ip::to_reference(ip_data)), + _context(context), _rng(seed), _tag(tag) + { + } - private: +private: void partitionImpl() final; - bool fitsIntoBlock(PartitionedHypergraph& hypergraph, - const HypernodeID hn, - const PartitionID block) const { + bool fitsIntoBlock(PartitionedHypergraph &hypergraph, const HypernodeID hn, + const PartitionID block) const + { ASSERT(block != kInvalidPartition && block < _context.partition.k); return hypergraph.partWeight(block) + hypergraph.nodeWeight(hn) <= - _context.partition.perfect_balance_part_weights[block]; + _context.partition.perfect_balance_part_weights[block]; } // ! Pushes all adjacent hypernodes (not visited before) of hypernode hn // ! into the BFS queue of the corresponding block. - inline void pushIncidentHypernodesIntoQueue(const PartitionedHypergraph& hypergraph, - const Context& context, - Queue& queue, - kahypar::ds::FastResetFlagArray<>& hypernodes_in_queue, - kahypar::ds::FastResetFlagArray<>& hyperedges_in_queue, - const HypernodeID hn, - const PartitionID block); + inline void + pushIncidentHypernodesIntoQueue(const PartitionedHypergraph &hypergraph, + const Context &context, Queue &queue, + kahypar::ds::FastResetFlagArray<> &hypernodes_in_queue, + kahypar::ds::FastResetFlagArray<> &hyperedges_in_queue, + const HypernodeID hn, const PartitionID block); - inline void markHypernodeAsInQueue(const PartitionedHypergraph& hypergraph, - kahypar::ds::FastResetFlagArray<>& hypernodes_in_queue, - const HypernodeID hn, - const PartitionID block) { + inline void + markHypernodeAsInQueue(const PartitionedHypergraph &hypergraph, + kahypar::ds::FastResetFlagArray<> &hypernodes_in_queue, + const HypernodeID hn, const PartitionID block) + { hypernodes_in_queue.set(block * hypergraph.initialNumNodes() + hn, true); } - inline void markHyperedgeAsInQueue(const PartitionedHypergraph& hypergraph, - kahypar::ds::FastResetFlagArray<>& hyperedges_in_queue, - const HyperedgeID he, - const PartitionID block) { + inline void + markHyperedgeAsInQueue(const PartitionedHypergraph &hypergraph, + kahypar::ds::FastResetFlagArray<> &hyperedges_in_queue, + const HyperedgeID he, const PartitionID block) + { hyperedges_in_queue.set(block * hypergraph.initialNumEdges() + he, true); } - InitialPartitioningDataContainer& _ip_data; - const Context& _context; + InitialPartitioningDataContainer &_ip_data; + const Context &_context; std::mt19937 _rng; const int _tag; }; - } // namespace mt_kahypar diff --git a/mt-kahypar/partition/initial_partitioning/greedy_initial_partitioner.h b/mt-kahypar/partition/initial_partitioning/greedy_initial_partitioner.h index 4ae7c1655..493b893a8 100644 --- a/mt-kahypar/partition/initial_partitioning/greedy_initial_partitioner.h +++ b/mt-kahypar/partition/initial_partitioning/greedy_initial_partitioner.h @@ -31,8 +31,9 @@ #include "mt-kahypar/partition/initial_partitioning/policies/pseudo_peripheral_start_nodes.h" namespace mt_kahypar { -template typename GainPolicyT> -class GreedyInitialPartitionerBase { +template typename GainPolicyT> +class GreedyInitialPartitionerBase +{ using PartitionedHypergraph = typename TypeTraits::PartitionedHypergraph; using GainComputationPolicy = GainPolicyT; @@ -40,62 +41,71 @@ class GreedyInitialPartitionerBase { static constexpr bool debug = false; static constexpr bool enable_heavy_assert = false; - public: +public: GreedyInitialPartitionerBase(const InitialPartitioningAlgorithm algorithm, - ip_data_container_t* ip_data, - const Context& context, - const PartitionID default_block, - const int seed, const int tag) : - _algorithm(algorithm), - _ip_data(ip::to_reference(ip_data)), - _context(context), - _default_block(default_block), - _rng(seed), - _tag(tag) - { } - - template - void partitionWithSelectionPolicy() { - if ( _ip_data.should_initial_partitioner_run(_algorithm) ) { + ip_data_container_t *ip_data, const Context &context, + const PartitionID default_block, const int seed, + const int tag) : + _algorithm(algorithm), + _ip_data(ip::to_reference(ip_data)), _context(context), + _default_block(default_block), _rng(seed), _tag(tag) + { + } + + template + void partitionWithSelectionPolicy() + { + if(_ip_data.should_initial_partitioner_run(_algorithm)) + { HighResClockTimepoint start = std::chrono::high_resolution_clock::now(); - PartitionedHypergraph& hg = _ip_data.local_partitioned_hypergraph(); - KWayPriorityQueue& kway_pq = _ip_data.local_kway_priority_queue(); - kahypar::ds::FastResetFlagArray<>& hyperedges_in_queue = - _ip_data.local_hyperedge_fast_reset_flag_array(); + PartitionedHypergraph &hg = _ip_data.local_partitioned_hypergraph(); + KWayPriorityQueue &kway_pq = _ip_data.local_kway_priority_queue(); + kahypar::ds::FastResetFlagArray<> &hyperedges_in_queue = + _ip_data.local_hyperedge_fast_reset_flag_array(); initializeVertices(); PartitionID to = kInvalidPartition; bool use_perfect_balanced_as_upper_bound = true; bool allow_overfitting = false; - while (true) { + while(true) + { // If our default block has a weight less than the perfect balanced block weight - // we terminate greedy initial partitioner in order to prevent that the default block - // becomes underloaded. - if ( _default_block != kInvalidPartition && - hg.partWeight(_default_block) < - _context.partition.perfect_balance_part_weights[_default_block] ) { + // we terminate greedy initial partitioner in order to prevent that the default + // block becomes underloaded. + if(_default_block != kInvalidPartition && + hg.partWeight(_default_block) < + _context.partition.perfect_balance_part_weights[_default_block]) + { break; } HypernodeID hn = kInvalidHypernode; Gain gain = kInvalidGain; - // The greedy initial partitioner has 3 different stages. In the first, we use the perfect - // balanced part weight as upper bound for the block weights. Once we reach the block weight - // limit, we release the upper bound and use the maximum allowed block weight as new upper bound. - // Once we are not able to assign any vertex to a block, we allow overfitting, which effectively - // allows to violate the balance constraint. - if ( !PQSelectionPolicy::pop(hg, kway_pq, hn, to, gain, use_perfect_balanced_as_upper_bound) ) { - if ( use_perfect_balanced_as_upper_bound ) { + // The greedy initial partitioner has 3 different stages. In the first, we use the + // perfect balanced part weight as upper bound for the block weights. Once we + // reach the block weight limit, we release the upper bound and use the maximum + // allowed block weight as new upper bound. Once we are not able to assign any + // vertex to a block, we allow overfitting, which effectively allows to violate + // the balance constraint. + if(!PQSelectionPolicy::pop(hg, kway_pq, hn, to, gain, + use_perfect_balanced_as_upper_bound)) + { + if(use_perfect_balanced_as_upper_bound) + { enableAllPQs(_context.partition.k, kway_pq); use_perfect_balanced_as_upper_bound = false; continue; - } else if ( !allow_overfitting ) { + } + else if(!allow_overfitting) + { enableAllPQs(_context.partition.k, kway_pq); allow_overfitting = true; continue; - } else { + } + else + { break; } } @@ -105,14 +115,22 @@ class GreedyInitialPartitionerBase { ASSERT(to != _default_block); ASSERT(hg.partID(hn) == _default_block); - if ( allow_overfitting || fitsIntoBlock(hg, hn, to, use_perfect_balanced_as_upper_bound) ) { - if ( _default_block != kInvalidPartition ) { + if(allow_overfitting || + fitsIntoBlock(hg, hn, to, use_perfect_balanced_as_upper_bound)) + { + if(_default_block != kInvalidPartition) + { hg.changeNodePart(hn, _default_block, to); - } else { + } + else + { hg.setNodePart(hn, to); } - insertAndUpdateVerticesAfterMove(hg, kway_pq, hyperedges_in_queue, hn, _default_block, to); - } else { + insertAndUpdateVerticesAfterMove(hg, kway_pq, hyperedges_in_queue, hn, + _default_block, to); + } + else + { kway_pq.insert(hn, to, gain); kway_pq.disablePart(to); } @@ -124,10 +142,11 @@ class GreedyInitialPartitionerBase { } } - private: - void initializeVertices() { - PartitionedHypergraph& hg = _ip_data.local_partitioned_hypergraph(); - KWayPriorityQueue& kway_pq = _ip_data.local_kway_priority_queue(); +private: + void initializeVertices() + { + PartitionedHypergraph &hg = _ip_data.local_partitioned_hypergraph(); + KWayPriorityQueue &kway_pq = _ip_data.local_kway_priority_queue(); // Experiments have shown that some pq selection policies work better // if we preassign all vertices to a block and than execute the greedy @@ -135,11 +154,14 @@ class GreedyInitialPartitionerBase { // unassigned, but the global and sequential strategy both preassign // all vertices to block 1 before initial partitioning. _ip_data.preassignFixedVertices(hg); - if ( _default_block != kInvalidPartition ) { + if(_default_block != kInvalidPartition) + { ASSERT(_default_block < _context.partition.k); kway_pq.disablePart(_default_block); - for ( const HypernodeID& hn : hg.nodes() ) { - if ( !hg.isFixed(hn) ) { + for(const HypernodeID &hn : hg.nodes()) + { + if(!hg.isFixed(hn)) + { hg.setNodePart(hn, _default_block); } } @@ -147,13 +169,17 @@ class GreedyInitialPartitionerBase { // Insert start vertices into its corresponding PQs _ip_data.reset_unassigned_hypernodes(_rng); - vec> start_nodes = - PseudoPeripheralStartNodes::computeStartNodes(_ip_data, _context, _default_block, _rng); + vec > start_nodes = + PseudoPeripheralStartNodes::computeStartNodes(_ip_data, _context, + _default_block, _rng); ASSERT(static_cast(_context.partition.k) == start_nodes.size()); kway_pq.clear(); - for ( PartitionID block = 0; block < _context.partition.k; ++block ) { - if ( block != _default_block ) { - for ( const HypernodeID& hn : start_nodes[block] ) { + for(PartitionID block = 0; block < _context.partition.k; ++block) + { + if(block != _default_block) + { + for(const HypernodeID &hn : start_nodes[block]) + { insertVertexIntoPQ(hg, kway_pq, hn, block); } } @@ -162,28 +188,30 @@ class GreedyInitialPartitionerBase { _ip_data.local_hyperedge_fast_reset_flag_array().reset(); } - bool fitsIntoBlock(PartitionedHypergraph& hypergraph, - const HypernodeID hn, + bool fitsIntoBlock(PartitionedHypergraph &hypergraph, const HypernodeID hn, const PartitionID block, - const bool use_perfect_balanced_as_upper_bound) const { + const bool use_perfect_balanced_as_upper_bound) const + { ASSERT(block != kInvalidPartition && block < _context.partition.k); - const HyperedgeWeight upper_bound = use_perfect_balanced_as_upper_bound ? - _context.partition.perfect_balance_part_weights[block] : _context.partition.max_part_weights[block]; - return hypergraph.partWeight(block) + hypergraph.nodeWeight(hn) <= - upper_bound; + const HyperedgeWeight upper_bound = + use_perfect_balanced_as_upper_bound ? + _context.partition.perfect_balance_part_weights[block] : + _context.partition.max_part_weights[block]; + return hypergraph.partWeight(block) + hypergraph.nodeWeight(hn) <= upper_bound; } - void insertVertexIntoPQ(const PartitionedHypergraph& hypergraph, - KWayPriorityQueue& pq, - const HypernodeID hn, - const PartitionID to) { + void insertVertexIntoPQ(const PartitionedHypergraph &hypergraph, KWayPriorityQueue &pq, + const HypernodeID hn, const PartitionID to) + { ASSERT(to != kInvalidPartition && to < _context.partition.k); - ASSERT(hypergraph.partID(hn) == _default_block, V(hypergraph.partID(hn)) << V(_default_block)); + ASSERT(hypergraph.partID(hn) == _default_block, V(hypergraph.partID(hn)) + << V(_default_block)); ASSERT(!pq.contains(hn, to)); const Gain gain = GainComputationPolicy::calculateGain(hypergraph, hn, to); pq.insert(hn, to, gain); - if ( !pq.isEnabled(to) ) { + if(!pq.isEnabled(to)) + { pq.enablePart(to); } @@ -191,22 +219,22 @@ class GreedyInitialPartitionerBase { ASSERT(pq.isEnabled(to)); } - void insertUnassignedVertexIntoPQ(const PartitionedHypergraph& hypergraph, - KWayPriorityQueue& pq, - const PartitionID to) { + void insertUnassignedVertexIntoPQ(const PartitionedHypergraph &hypergraph, + KWayPriorityQueue &pq, const PartitionID to) + { ASSERT(to != _default_block); const HypernodeID unassigned_hn = _ip_data.get_unassigned_hypernode(_default_block); - if ( unassigned_hn != kInvalidHypernode ) { + if(unassigned_hn != kInvalidHypernode) + { insertVertexIntoPQ(hypergraph, pq, unassigned_hn, to); } } - void insertAndUpdateVerticesAfterMove(const PartitionedHypergraph& hypergraph, - KWayPriorityQueue& pq, - kahypar::ds::FastResetFlagArray<>& hyperedges_in_queue, - const HypernodeID hn, - const PartitionID from, - const PartitionID to) { + void insertAndUpdateVerticesAfterMove( + const PartitionedHypergraph &hypergraph, KWayPriorityQueue &pq, + kahypar::ds::FastResetFlagArray<> &hyperedges_in_queue, const HypernodeID hn, + const PartitionID from, const PartitionID to) + { ASSERT(to != kInvalidPartition && to < _context.partition.k); ASSERT(hypergraph.partID(hn) == to); @@ -214,11 +242,14 @@ class GreedyInitialPartitionerBase { GainComputationPolicy::deltaGainUpdate(hypergraph, pq, hn, from, to); // Remove moved hypernode hn from all PQs - for ( PartitionID block = 0; block < hypergraph.k(); ++block ) { - if ( pq.contains(hn, block) ) { + for(PartitionID block = 0; block < hypergraph.k(); ++block) + { + if(pq.contains(hn, block)) + { // Prevent that PQ becomes empty - if ( to != block && pq.size(block) == 1 ) { + if(to != block && pq.size(block) == 1) + { insertUnassignedVertexIntoPQ(hypergraph, pq, block); } @@ -227,11 +258,15 @@ class GreedyInitialPartitionerBase { } // Insert all adjacent hypernodes of the moved vertex into PQ of block to - for ( const HyperedgeID& he : hypergraph.incidentEdges(hn)) { - if ( !hyperedges_in_queue[to * hypergraph.initialNumEdges() + he] ) { - for ( const HypernodeID& pin : hypergraph.pins(he) ) { - if ( hypergraph.partID(pin) == _default_block && - !pq.contains(pin, to) && !hypergraph.isFixed(pin) ) { + for(const HyperedgeID &he : hypergraph.incidentEdges(hn)) + { + if(!hyperedges_in_queue[to * hypergraph.initialNumEdges() + he]) + { + for(const HypernodeID &pin : hypergraph.pins(he)) + { + if(hypergraph.partID(pin) == _default_block && !pq.contains(pin, to) && + !hypergraph.isFixed(pin)) + { insertVertexIntoPQ(hypergraph, pq, pin, to); } } @@ -240,48 +275,54 @@ class GreedyInitialPartitionerBase { } // Prevent that PQ becomes empty - if ( pq.size(to) == 0 ) { + if(pq.size(to) == 0) + { insertUnassignedVertexIntoPQ(hypergraph, pq, to); } } - void enableAllPQs(const PartitionID k, KWayPriorityQueue& pq) { - for ( PartitionID block = 0; block < k; ++block ) { - if ( block != _default_block ) { + void enableAllPQs(const PartitionID k, KWayPriorityQueue &pq) + { + for(PartitionID block = 0; block < k; ++block) + { + if(block != _default_block) + { pq.enablePart(block); } } } const InitialPartitioningAlgorithm _algorithm; - InitialPartitioningDataContainer& _ip_data; - const Context& _context; + InitialPartitioningDataContainer &_ip_data; + const Context &_context; const PartitionID _default_block; std::mt19937 _rng; const int _tag; }; - -// the split into base and subclass serves to reduce the compile time, since the base class -// is only instantiated once for all PQ selection policies -template typename GainPolicyT, - template typename PQSelectionPolicyT> -class GreedyInitialPartitioner : public IInitialPartitioner, GreedyInitialPartitionerBase { +// the split into base and subclass serves to reduce the compile time, since the base +// class is only instantiated once for all PQ selection policies +template typename GainPolicyT, + template typename PQSelectionPolicyT> +class GreedyInitialPartitioner : public IInitialPartitioner, + GreedyInitialPartitionerBase +{ using Base = GreedyInitialPartitionerBase; using PartitionedHypergraph = typename TypeTraits::PartitionedHypergraph; using PQSelectionPolicy = PQSelectionPolicyT; - public: +public: GreedyInitialPartitioner(const InitialPartitioningAlgorithm algorithm, - ip_data_container_t* ip_data, - const Context& context, + ip_data_container_t *ip_data, const Context &context, const int seed, const int tag) : - Base(algorithm, ip_data, context, PQSelectionPolicy::getDefaultBlock(), seed, tag) { } + Base(algorithm, ip_data, context, PQSelectionPolicy::getDefaultBlock(), seed, tag) + { + } - private: - void partitionImpl() final { +private: + void partitionImpl() final + { Base::template partitionWithSelectionPolicy(); } }; diff --git a/mt-kahypar/partition/initial_partitioning/i_initial_partitioner.h b/mt-kahypar/partition/initial_partitioning/i_initial_partitioner.h index a8e9f887d..f75c2ddee 100644 --- a/mt-kahypar/partition/initial_partitioning/i_initial_partitioner.h +++ b/mt-kahypar/partition/initial_partitioning/i_initial_partitioner.h @@ -27,23 +27,22 @@ #pragma once namespace mt_kahypar { -class IInitialPartitioner { - public: - IInitialPartitioner(const IInitialPartitioner&) = delete; - IInitialPartitioner(IInitialPartitioner&&) = delete; - IInitialPartitioner & operator= (const IInitialPartitioner &) = delete; - IInitialPartitioner & operator= (IInitialPartitioner &&) = delete; +class IInitialPartitioner +{ +public: + IInitialPartitioner(const IInitialPartitioner &) = delete; + IInitialPartitioner(IInitialPartitioner &&) = delete; + IInitialPartitioner &operator=(const IInitialPartitioner &) = delete; + IInitialPartitioner &operator=(IInitialPartitioner &&) = delete; - void partition() { - partitionImpl(); - } + void partition() { partitionImpl(); } virtual ~IInitialPartitioner() = default; - protected: +protected: IInitialPartitioner() = default; - private: +private: virtual void partitionImpl() = 0; }; -} // namespace mt_kahypar +} // namespace mt_kahypar diff --git a/mt-kahypar/partition/initial_partitioning/initial_partitioning_commons.h b/mt-kahypar/partition/initial_partitioning/initial_partitioning_commons.h index 7fe8d5978..247b97860 100644 --- a/mt-kahypar/partition/initial_partitioning/initial_partitioning_commons.h +++ b/mt-kahypar/partition/initial_partitioning/initial_partitioning_commons.h @@ -39,9 +39,11 @@ namespace mt_kahypar { -using KWayPriorityQueue = kahypar::ds::KWayPriorityQueue, false>; +using KWayPriorityQueue = + kahypar::ds::KWayPriorityQueue, false>; using ThreadLocalKWayPriorityQueue = tbb::enumerable_thread_specific; -using ThreadLocalFastResetFlagArray = tbb::enumerable_thread_specific >; +using ThreadLocalFastResetFlagArray = + tbb::enumerable_thread_specific >; } \ No newline at end of file diff --git a/mt-kahypar/partition/initial_partitioning/initial_partitioning_data_container.h b/mt-kahypar/partition/initial_partitioning/initial_partitioning_data_container.h index 7aa4b1294..52b807d87 100644 --- a/mt-kahypar/partition/initial_partitioning/initial_partitioning_data_container.h +++ b/mt-kahypar/partition/initial_partitioning/initial_partitioning_data_container.h @@ -26,28 +26,28 @@ #pragma once -#include #include +#include #include "tbb/enumerable_thread_specific.h" #include "mt-kahypar/partition/initial_partitioning/initial_partitioning_commons.h" +#include "mt-kahypar/parallel/stl/scalable_vector.h" #include "mt-kahypar/partition/context.h" -#include "mt-kahypar/partition/metrics.h" #include "mt-kahypar/partition/factories.h" -#include "mt-kahypar/parallel/stl/scalable_vector.h" -#include "mt-kahypar/utils/cast.h" -#include "mt-kahypar/utils/utilities.h" -#include "mt-kahypar/utils/range.h" +#include "mt-kahypar/partition/metrics.h" #include "mt-kahypar/partition/refinement/fm/sequential_twoway_fm_refiner.h" #include "mt-kahypar/partition/refinement/gains/gain_cache_ptr.h" - +#include "mt-kahypar/utils/cast.h" +#include "mt-kahypar/utils/range.h" +#include "mt-kahypar/utils/utilities.h" namespace mt_kahypar { -template -class InitialPartitioningDataContainer { +template +class InitialPartitioningDataContainer +{ static constexpr bool debug = false; static constexpr bool enable_heavy_assert = false; @@ -56,33 +56,37 @@ class InitialPartitioningDataContainer { using PartitionedHypergraph = typename TypeTraits::PartitionedHypergraph; // ! Contains information about the best thread local partition - struct PartitioningResult { + struct PartitioningResult + { PartitioningResult() = default; PartitioningResult(InitialPartitioningAlgorithm algorithm, - HyperedgeWeight objective_ip, - HyperedgeWeight objective, + HyperedgeWeight objective_ip, HyperedgeWeight objective, double imbalance) : - _algorithm(algorithm), - _objective_ip(objective_ip), - _objective(objective), - _imbalance(imbalance) { } + _algorithm(algorithm), + _objective_ip(objective_ip), _objective(objective), _imbalance(imbalance) + { + } - bool is_other_better(const PartitioningResult& other, const double epsilon) const { + bool is_other_better(const PartitioningResult &other, const double epsilon) const + { bool equal_metric = other._objective == _objective; bool improved_metric = other._objective < _objective; bool improved_imbalance = other._imbalance < _imbalance; bool is_feasible = _imbalance <= epsilon; bool is_other_feasible = other._imbalance <= epsilon; - return ( improved_metric && (is_other_feasible || improved_imbalance) ) || - ( equal_metric && improved_imbalance ) || - ( is_other_feasible && !is_feasible ) || - ( improved_imbalance && !is_other_feasible && !is_feasible ) || - ( equal_metric && _imbalance == other._imbalance // tie breaking for deterministic mode - && std::tie(other._random_tag, other._deterministic_tag) < std::tie(_random_tag, _deterministic_tag) ); + return (improved_metric && (is_other_feasible || improved_imbalance)) || + (equal_metric && improved_imbalance) || + (is_other_feasible && !is_feasible) || + (improved_imbalance && !is_other_feasible && !is_feasible) || + (equal_metric && + _imbalance == other._imbalance // tie breaking for deterministic mode + && std::tie(other._random_tag, other._deterministic_tag) < + std::tie(_random_tag, _deterministic_tag)); } - std::string str() const { + std::string str() const + { std::stringstream ss; ss << "Algorithm = " << _algorithm << ", " << "Objective IP = " << _objective_ip << ", " @@ -101,31 +105,30 @@ class InitialPartitioningDataContainer { // ! Aggregates global stats about the partitions produced by an specific // ! initial partitioning algorithm. - struct InitialPartitioningRunStats { + struct InitialPartitioningRunStats + { explicit InitialPartitioningRunStats(InitialPartitioningAlgorithm algo) : - algorithm(algo), - average_quality(0.0), - sum_of_squares(0.0), - n(0), - best_quality(std::numeric_limits::max()) { } + algorithm(algo), average_quality(0.0), sum_of_squares(0.0), n(0), + best_quality(std::numeric_limits::max()) + { + } - void add_run(const HyperedgeWeight quality) { + void add_run(const HyperedgeWeight quality) + { ++n; // Incremental update standard deviation // Incremental update average quality const double old_average_quality = average_quality; average_quality += static_cast(quality - average_quality) / n; - sum_of_squares += - static_cast(quality - old_average_quality) * - static_cast(quality - average_quality); - if ( quality < best_quality ) { + sum_of_squares += static_cast(quality - old_average_quality) * + static_cast(quality - average_quality); + if(quality < best_quality) + { best_quality = quality; } } - double stddev() const { - return n == 1 ? 0 : std::sqrt(sum_of_squares / ( n - 1 )); - } + double stddev() const { return n == 1 ? 0 : std::sqrt(sum_of_squares / (n - 1)); } InitialPartitioningAlgorithm algorithm; double average_quality; @@ -139,26 +142,29 @@ class InitialPartitioningDataContainer { // ! beneficial to perform additional runs of a specific initial // ! partitioning algorithm based on its previous runs (see // ! should_initial_partitioner_run(...)). - struct GlobalInitialPartitioningStats { - - explicit GlobalInitialPartitioningStats(const Context& context) : - _stat_mutex(), - _context(context), - _stats(), - _best_quality(std::numeric_limits::max()) { - const uint8_t num_initial_partitioner = static_cast(InitialPartitioningAlgorithm::UNDEFINED); - for ( uint8_t algo = 0; algo < num_initial_partitioner; ++algo ) { + struct GlobalInitialPartitioningStats + { + + explicit GlobalInitialPartitioningStats(const Context &context) : + _stat_mutex(), _context(context), _stats(), + _best_quality(std::numeric_limits::max()) + { + const uint8_t num_initial_partitioner = + static_cast(InitialPartitioningAlgorithm::UNDEFINED); + for(uint8_t algo = 0; algo < num_initial_partitioner; ++algo) + { _stats.emplace_back(static_cast(algo)); } } void add_run(const InitialPartitioningAlgorithm algorithm, - const HyperedgeWeight quality, - const bool is_feasible) { + const HyperedgeWeight quality, const bool is_feasible) + { std::lock_guard _lock(_stat_mutex); const uint8_t algo_idx = static_cast(algorithm); _stats[algo_idx].add_run(quality); - if ( is_feasible && quality < _best_quality ) { + if(is_feasible && quality < _best_quality) + { _best_quality = quality; } } @@ -172,70 +178,86 @@ class InitialPartitioningDataContainer { // ! so far then we say that the probability that the corresponding initial // ! partitioner produce a new global best partition is too low and prohibit further // ! runs of that partitioner. - bool should_initial_partitioner_run(const InitialPartitioningAlgorithm algorithm) const { - return _context.partition.deterministic || should_initial_partitioner_run_ignoring_deterministic(algorithm); + bool + should_initial_partitioner_run(const InitialPartitioningAlgorithm algorithm) const + { + return _context.partition.deterministic || + should_initial_partitioner_run_ignoring_deterministic(algorithm); } - bool should_initial_partitioner_run_ignoring_deterministic(const InitialPartitioningAlgorithm algorithm) const { + bool should_initial_partitioner_run_ignoring_deterministic( + const InitialPartitioningAlgorithm algorithm) const + { const uint8_t algo_idx = static_cast(algorithm); return !_context.initial_partitioning.use_adaptive_ip_runs || _stats[algo_idx].n < _context.initial_partitioning.min_adaptive_ip_runs || - _stats[algo_idx].average_quality - 2.0 * _stats[algo_idx].stddev() <= _best_quality; + _stats[algo_idx].average_quality - 2.0 * _stats[algo_idx].stddev() <= + _best_quality; } std::mutex _stat_mutex; - const Context& _context; + const Context &_context; parallel::scalable_vector _stats; HyperedgeWeight _best_quality; }; - struct LocalInitialPartitioningHypergraph { + struct LocalInitialPartitioningHypergraph + { - LocalInitialPartitioningHypergraph(Hypergraph& hypergraph, - const Context& context, - GlobalInitialPartitioningStats& global_stats, + LocalInitialPartitioningHypergraph(Hypergraph &hypergraph, const Context &context, + GlobalInitialPartitioningStats &global_stats, const bool disable_fm) : - _partitioned_hypergraph(context.partition.k, hypergraph), - _context(context), - _global_stats(global_stats), - _partition(hypergraph.initialNumNodes(), kInvalidPartition), - _result(InitialPartitioningAlgorithm::UNDEFINED, - std::numeric_limits::max(), - std::numeric_limits::max(), - std::numeric_limits::max()), - _gain_cache(GainCachePtr::constructGainCache(context)), - _rebalancer(nullptr), - _label_propagation(nullptr), - _twoway_fm(nullptr), - _stats() { - - for ( uint8_t algo = 0; algo < static_cast(InitialPartitioningAlgorithm::UNDEFINED); ++algo ) { + _partitioned_hypergraph(context.partition.k, hypergraph), + _context(context), _global_stats(global_stats), + _partition(hypergraph.initialNumNodes(), kInvalidPartition), + _result(InitialPartitioningAlgorithm::UNDEFINED, + std::numeric_limits::max(), + std::numeric_limits::max(), + std::numeric_limits::max()), + _gain_cache(GainCachePtr::constructGainCache(context)), _rebalancer(nullptr), + _label_propagation(nullptr), _twoway_fm(nullptr), _stats() + { + + for(uint8_t algo = 0; + algo < static_cast(InitialPartitioningAlgorithm::UNDEFINED); ++algo) + { _stats.emplace_back(static_cast(algo)); } - if ( _context.partition.k == 2 && !disable_fm ) { + if(_context.partition.k == 2 && !disable_fm) + { // In case of a bisection we instantiate the 2-way FM refiner - _twoway_fm = std::make_unique>(_partitioned_hypergraph, _context); - } else if ( _context.refinement.label_propagation.algorithm != LabelPropagationAlgorithm::do_nothing ) { + _twoway_fm = std::make_unique >( + _partitioned_hypergraph, _context); + } + else if(_context.refinement.label_propagation.algorithm != + LabelPropagationAlgorithm::do_nothing) + { // In case of a direct-kway initial partition we instantiate the LP refiner _rebalancer = RebalancerFactory::getInstance().createObject( - _context.refinement.rebalancer, hypergraph.initialNumNodes(), _context, _gain_cache); + _context.refinement.rebalancer, hypergraph.initialNumNodes(), _context, + _gain_cache); _label_propagation = LabelPropagationFactory::getInstance().createObject( - _context.refinement.label_propagation.algorithm, - hypergraph.initialNumNodes(), hypergraph.initialNumEdges(), _context, _gain_cache, *_rebalancer); + _context.refinement.label_propagation.algorithm, hypergraph.initialNumNodes(), + hypergraph.initialNumEdges(), _context, _gain_cache, *_rebalancer); } } - PartitioningResult refineAndUpdateStats(const InitialPartitioningAlgorithm algorithm, std::mt19937& prng, - const double time = 0.0) { - ASSERT([&]() { - for (const HypernodeID& hn : _partitioned_hypergraph.nodes()) { - if (_partitioned_hypergraph.partID(hn) == kInvalidPartition) { - return false; + PartitioningResult refineAndUpdateStats(const InitialPartitioningAlgorithm algorithm, + std::mt19937 &prng, const double time = 0.0) + { + ASSERT( + [&]() { + for(const HypernodeID &hn : _partitioned_hypergraph.nodes()) + { + if(_partitioned_hypergraph.partID(hn) == kInvalidPartition) + { + return false; + } } - } - return true; - } (), "There are unassigned hypernodes!"); + return true; + }(), + "There are unassigned hypernodes!"); Metrics current_metric; current_metric.quality = metrics::quality(_partitioned_hypergraph, _context, false); @@ -246,7 +268,7 @@ class InitialPartitioningDataContainer { refineCurrentPartition(current_metric, prng); PartitioningResult result(algorithm, quality_before_refinement, - current_metric.quality, current_metric.imbalance); + current_metric.quality, current_metric.imbalance); // Aggregate Stats auto algorithm_index = static_cast(algorithm); @@ -255,43 +277,49 @@ class InitialPartitioningDataContainer { ++_stats[algorithm_index].total_calls; _global_stats.add_run(algorithm, current_metric.quality, - current_metric.imbalance <= _context.partition.epsilon); + current_metric.imbalance <= _context.partition.epsilon); return result; } - PartitioningResult performRefinementOnPartition(vec& partition, - PartitioningResult& input, std::mt19937& prng) { + PartitioningResult performRefinementOnPartition(vec &partition, + PartitioningResult &input, + std::mt19937 &prng) + { Metrics current_metric = { input._objective, input._imbalance }; _partitioned_hypergraph.resetPartition(); // Apply input partition to hypergraph - for ( const HypernodeID& hn : _partitioned_hypergraph.nodes() ) { + for(const HypernodeID &hn : _partitioned_hypergraph.nodes()) + { ASSERT(hn < partition.size()); ASSERT(_partitioned_hypergraph.partID(hn) == kInvalidPartition); _partitioned_hypergraph.setNodePart(hn, partition[hn]); } HEAVY_INITIAL_PARTITIONING_ASSERT( - current_metric.quality == metrics::quality(_partitioned_hypergraph, _context, false)); + current_metric.quality == + metrics::quality(_partitioned_hypergraph, _context, false)); refineCurrentPartition(current_metric, prng); - PartitioningResult result(_result._algorithm, - current_metric.quality, current_metric.quality, - current_metric.imbalance); + PartitioningResult result(_result._algorithm, current_metric.quality, + current_metric.quality, current_metric.imbalance); return result; } - void performRefinementOnBestPartition(int seed) { + void performRefinementOnBestPartition(int seed) + { std::mt19937 prng(seed); auto refined = performRefinementOnPartition(_partition, _result, prng); // Compare current best partition with refined partition - if ( _result.is_other_better(refined, _context.partition.epsilon) ) { - for ( const HypernodeID& hn : _partitioned_hypergraph.nodes() ) { + if(_result.is_other_better(refined, _context.partition.epsilon)) + { + for(const HypernodeID &hn : _partitioned_hypergraph.nodes()) + { const PartitionID part_id = _partitioned_hypergraph.partID(hn); ASSERT(hn < _partition.size()); ASSERT(part_id != kInvalidPartition); @@ -301,155 +329,185 @@ class InitialPartitioningDataContainer { } } - void copyPartition(vec& partition_store) const { - for (HypernodeID node : _partitioned_hypergraph.nodes()) { + void copyPartition(vec &partition_store) const + { + for(HypernodeID node : _partitioned_hypergraph.nodes()) + { ASSERT(_partitioned_hypergraph.partID(node) != kInvalidPartition); partition_store[node] = _partitioned_hypergraph.partID(node); } } - void refineCurrentPartition(Metrics& current_metric, std::mt19937& prng) { - if ( _context.partition.k == 2 && _twoway_fm ) { + void refineCurrentPartition(Metrics ¤t_metric, std::mt19937 &prng) + { + if(_context.partition.k == 2 && _twoway_fm) + { bool improvement = true; - for ( size_t i = 0; i < _context.initial_partitioning.fm_refinment_rounds && improvement; ++i ) { + for(size_t i = 0; + i < _context.initial_partitioning.fm_refinment_rounds && improvement; ++i) + { improvement = _twoway_fm->refine(current_metric, prng); } - } else if ( _label_propagation ) { + } + else if(_label_propagation) + { mt_kahypar_partitioned_hypergraph_t phg = - utils::partitioned_hg_cast(_partitioned_hypergraph); + utils::partitioned_hg_cast(_partitioned_hypergraph); _label_propagation->initialize(phg); _label_propagation->refine(phg, {}, current_metric, - std::numeric_limits::max()); + std::numeric_limits::max()); } HEAVY_INITIAL_PARTITIONING_ASSERT( - current_metric.quality == metrics::quality(_partitioned_hypergraph, _context, false)); + current_metric.quality == + metrics::quality(_partitioned_hypergraph, _context, false)); } - void aggregate_stats(parallel::scalable_vector& main_stats) const { + void aggregate_stats( + parallel::scalable_vector &main_stats) const + { ASSERT(main_stats.size() == _stats.size()); - for ( size_t i = 0; i < _stats.size(); ++i ) { + for(size_t i = 0; i < _stats.size(); ++i) + { main_stats[i].add(_stats[i]); } } - void freeInternalData() { - tbb::parallel_invoke([&] { - _partitioned_hypergraph.freeInternalData(); - }, [&] { - parallel::free(_partition); - }); + void freeInternalData() + { + tbb::parallel_invoke([&] { _partitioned_hypergraph.freeInternalData(); }, + [&] { parallel::free(_partition); }); } PartitionedHypergraph _partitioned_hypergraph; - const Context& _context; - GlobalInitialPartitioningStats& _global_stats; + const Context &_context; + GlobalInitialPartitioningStats &_global_stats; parallel::scalable_vector _partition; PartitioningResult _result; gain_cache_t _gain_cache; std::unique_ptr _rebalancer; std::unique_ptr _label_propagation; - std::unique_ptr> _twoway_fm; + std::unique_ptr > _twoway_fm; parallel::scalable_vector _stats; }; - using ThreadLocalHypergraph = tbb::enumerable_thread_specific; - using ThreadLocalUnassignedHypernodes = tbb::enumerable_thread_specific>; + using ThreadLocalHypergraph = + tbb::enumerable_thread_specific; + using ThreadLocalUnassignedHypernodes = + tbb::enumerable_thread_specific >; using FixedVertexIterator = typename vec::const_iterator; - public: - InitialPartitioningDataContainer(PartitionedHypergraph& hypergraph, - const Context& context, - const bool disable_fm = false) : - _partitioned_hg(hypergraph), - _context(context), - _disable_fm(disable_fm), - _global_stats(context), - _local_hg([&] { - return construct_local_partitioned_hypergraph(); - }), - _local_kway_pq(_context.partition.k), - _is_local_pq_initialized(false), - _local_hn_visited(_context.partition.k * hypergraph.initialNumNodes()), - _local_he_visited(_context.partition.k * hypergraph.initialNumEdges()), - _local_unassigned_hypernodes(), - _local_unassigned_hypernode_pointer(std::numeric_limits::max()), - _fixed_vertices(), - _max_pop_size(_context.initial_partitioning.population_size) { +public: + InitialPartitioningDataContainer(PartitionedHypergraph &hypergraph, + const Context &context, + const bool disable_fm = false) : + _partitioned_hg(hypergraph), + _context(context), _disable_fm(disable_fm), _global_stats(context), + _local_hg([&] { return construct_local_partitioned_hypergraph(); }), + _local_kway_pq(_context.partition.k), _is_local_pq_initialized(false), + _local_hn_visited(_context.partition.k * hypergraph.initialNumNodes()), + _local_he_visited(_context.partition.k * hypergraph.initialNumEdges()), + _local_unassigned_hypernodes(), + _local_unassigned_hypernode_pointer(std::numeric_limits::max()), + _fixed_vertices(), _max_pop_size(_context.initial_partitioning.population_size) + { // Setup Label Propagation IRefiner Config for Initial Partitioning _context.refinement = _context.initial_partitioning.refinement; _context.refinement.label_propagation.execute_sequential = true; - if (_context.partition.deterministic) { + if(_context.partition.deterministic) + { _best_partitions.resize(_max_pop_size); - for (size_t i = 0; i < _max_pop_size; ++i) { - _best_partitions[i].second.resize(hypergraph.initialNumNodes(), kInvalidPartition); + for(size_t i = 0; i < _max_pop_size; ++i) + { + _best_partitions[i].second.resize(hypergraph.initialNumNodes(), + kInvalidPartition); } } - if ( _partitioned_hg.hasFixedVertices() ) { - for ( const HypernodeID& hn : _partitioned_hg.nodes() ) { - if ( _partitioned_hg.isFixed(hn) ) { + if(_partitioned_hg.hasFixedVertices()) + { + for(const HypernodeID &hn : _partitioned_hg.nodes()) + { + if(_partitioned_hg.isFixed(hn)) + { _fixed_vertices.push_back(hn); } } } } - InitialPartitioningDataContainer(const InitialPartitioningDataContainer&) = delete; - InitialPartitioningDataContainer & operator= (const InitialPartitioningDataContainer &) = delete; - - InitialPartitioningDataContainer(InitialPartitioningDataContainer&&) = delete; - InitialPartitioningDataContainer & operator= (InitialPartitioningDataContainer &&) = delete; - - ~InitialPartitioningDataContainer() { - tbb::parallel_invoke([&] { - parallel::parallel_free_thread_local_internal_data( - _local_hg, [&](LocalInitialPartitioningHypergraph& local_hg) { - local_hg.freeInternalData(); + InitialPartitioningDataContainer(const InitialPartitioningDataContainer &) = delete; + InitialPartitioningDataContainer & + operator=(const InitialPartitioningDataContainer &) = delete; + + InitialPartitioningDataContainer(InitialPartitioningDataContainer &&) = delete; + InitialPartitioningDataContainer & + operator=(InitialPartitioningDataContainer &&) = delete; + + ~InitialPartitioningDataContainer() + { + tbb::parallel_invoke( + [&] { + parallel::parallel_free_thread_local_internal_data( + _local_hg, [&](LocalInitialPartitioningHypergraph &local_hg) { + local_hg.freeInternalData(); + }); + }, + [&] { + parallel::parallel_free_thread_local_internal_data( + _local_unassigned_hypernodes, + [&](parallel::scalable_vector &array) { + parallel::free(array); + }); }); - }, [&] { - parallel::parallel_free_thread_local_internal_data( - _local_unassigned_hypernodes, [&](parallel::scalable_vector& array) { - parallel::free(array); - }); - }); } - PartitionedHypergraph& local_partitioned_hypergraph() { + PartitionedHypergraph &local_partitioned_hypergraph() + { return _local_hg.local()._partitioned_hypergraph; } - KWayPriorityQueue& local_kway_priority_queue() { - bool& is_local_pq_initialized = _is_local_pq_initialized.local(); - KWayPriorityQueue& local_kway_pq = _local_kway_pq.local(); - if ( !is_local_pq_initialized ) { + KWayPriorityQueue &local_kway_priority_queue() + { + bool &is_local_pq_initialized = _is_local_pq_initialized.local(); + KWayPriorityQueue &local_kway_pq = _local_kway_pq.local(); + if(!is_local_pq_initialized) + { local_kway_pq.initialize(local_partitioned_hypergraph().initialNumNodes()); is_local_pq_initialized = true; } return local_kway_pq; } - kahypar::ds::FastResetFlagArray<>& local_hypernode_fast_reset_flag_array() { + kahypar::ds::FastResetFlagArray<> &local_hypernode_fast_reset_flag_array() + { return _local_hn_visited.local(); } - kahypar::ds::FastResetFlagArray<>& local_hyperedge_fast_reset_flag_array() { + kahypar::ds::FastResetFlagArray<> &local_hyperedge_fast_reset_flag_array() + { return _local_he_visited.local(); } - void reset_unassigned_hypernodes(std::mt19937& prng) { - vec& unassigned_hypernodes = _local_unassigned_hypernodes.local(); - size_t& unassigned_hypernode_pointer = _local_unassigned_hypernode_pointer.local(); - if ( unassigned_hypernode_pointer == std::numeric_limits::max() || _context.partition.deterministic ) { - if ( _context.partition.deterministic ) { + void reset_unassigned_hypernodes(std::mt19937 &prng) + { + vec &unassigned_hypernodes = _local_unassigned_hypernodes.local(); + size_t &unassigned_hypernode_pointer = _local_unassigned_hypernode_pointer.local(); + if(unassigned_hypernode_pointer == std::numeric_limits::max() || + _context.partition.deterministic) + { + if(_context.partition.deterministic) + { unassigned_hypernodes.clear(); } // In case the local unassigned hypernode vector was not initialized before // we initialize it here - const PartitionedHypergraph& hypergraph = local_partitioned_hypergraph(); - for ( const HypernodeID& hn : hypergraph.nodes() ) { - if ( !hypergraph.isFixed(hn) ) { + const PartitionedHypergraph &hypergraph = local_partitioned_hypergraph(); + for(const HypernodeID &hn : hypergraph.nodes()) + { + if(!hypergraph.isFixed(hn)) + { unassigned_hypernodes.push_back(hn); } } @@ -458,30 +516,36 @@ class InitialPartitioningDataContainer { unassigned_hypernode_pointer = unassigned_hypernodes.size(); } - HypernodeID get_unassigned_hypernode(const PartitionID unassigned_block = kInvalidPartition) { - const PartitionedHypergraph& hypergraph = local_partitioned_hypergraph(); - parallel::scalable_vector& unassigned_hypernodes = - _local_unassigned_hypernodes.local(); - size_t& unassigned_hypernode_pointer = _local_unassigned_hypernode_pointer.local(); + HypernodeID + get_unassigned_hypernode(const PartitionID unassigned_block = kInvalidPartition) + { + const PartitionedHypergraph &hypergraph = local_partitioned_hypergraph(); + parallel::scalable_vector &unassigned_hypernodes = + _local_unassigned_hypernodes.local(); + size_t &unassigned_hypernode_pointer = _local_unassigned_hypernode_pointer.local(); ASSERT(!unassigned_hypernodes.empty()); ASSERT(unassigned_hypernode_pointer <= unassigned_hypernodes.size()); - while ( unassigned_hypernode_pointer > 0 ) { + while(unassigned_hypernode_pointer > 0) + { const HypernodeID current_hn = unassigned_hypernodes[0]; // In case the current hypernode is unassigned we return it - if ( hypergraph.partID(current_hn) == unassigned_block && - !hypergraph.isFixed(current_hn) ) { + if(hypergraph.partID(current_hn) == unassigned_block && + !hypergraph.isFixed(current_hn)) + { return current_hn; } // In case the hypernode on the first position is already assigned, // we swap it to end of the unassigned hypernode vector and decrement // the pointer such that we will not visit it again - std::swap(unassigned_hypernodes[0], unassigned_hypernodes[--unassigned_hypernode_pointer]); + std::swap(unassigned_hypernodes[0], + unassigned_hypernodes[--unassigned_hypernode_pointer]); } return kInvalidHypernode; } - bool should_initial_partitioner_run(const InitialPartitioningAlgorithm algorithm) { + bool should_initial_partitioner_run(const InitialPartitioningAlgorithm algorithm) + { return _global_stats.should_initial_partitioner_run(algorithm); } @@ -490,25 +554,32 @@ class InitialPartitioningDataContainer { * the best local partition, if it has a better quality (or better imbalance). * Partition on the local hypergraph is resetted afterwards. */ - void commit(const InitialPartitioningAlgorithm algorithm, std::mt19937& prng, size_t deterministic_tag, - const double time = 0.0) { + void commit(const InitialPartitioningAlgorithm algorithm, std::mt19937 &prng, + size_t deterministic_tag, const double time = 0.0) + { // already commits the result if non-deterministic - auto& my_ip_data = _local_hg.local(); + auto &my_ip_data = _local_hg.local(); auto my_result = my_ip_data.refineAndUpdateStats(algorithm, prng, time); const double eps = _context.partition.epsilon; - if ( _context.partition.deterministic ) { + if(_context.partition.deterministic) + { // apply result to shared pool - my_result._random_tag = prng(); // this is deterministic since we call the prng owned exclusively by the flat IP algo object + my_result._random_tag = prng(); // this is deterministic since we call the prng + // owned exclusively by the flat IP algo object my_result._deterministic_tag = deterministic_tag; PartitioningResult worst_in_population = _best_partitions[0].first; - if (worst_in_population.is_other_better(my_result, eps)) { + if(worst_in_population.is_other_better(my_result, eps)) + { _pop_lock.lock(); worst_in_population = _best_partitions[0].first; - if (worst_in_population.is_other_better(my_result, eps)) { + if(worst_in_population.is_other_better(my_result, eps)) + { // remove current worst and replace with my result my_ip_data.copyPartition(_best_partitions[0].second); - auto comp = [&](const auto& l, const auto& r) { return r.first.is_other_better(l.first, eps); }; + auto comp = [&](const auto &l, const auto &r) { + return r.first.is_other_better(l.first, eps); + }; assert(std::is_heap(_best_partitions.begin(), _best_partitions.end(), comp)); _best_partitions[0].first = my_result; std::pop_heap(_best_partitions.begin(), _best_partitions.end(), comp); @@ -516,8 +587,11 @@ class InitialPartitioningDataContainer { } _pop_lock.unlock(); } - } else { - if (my_ip_data._result.is_other_better(my_result, eps)) { + } + else + { + if(my_ip_data._result.is_other_better(my_result, eps)) + { my_ip_data._result = my_result; my_ip_data.copyPartition(my_ip_data._partition); } @@ -525,7 +599,8 @@ class InitialPartitioningDataContainer { my_ip_data._partitioned_hypergraph.resetPartition(); } - void commit(InitialPartitioningAlgorithm algorithm) { + void commit(InitialPartitioningAlgorithm algorithm) + { // dummy values for tests std::mt19937 prng(420); commit(algorithm, prng, 420); @@ -536,40 +611,51 @@ class InitialPartitioningDataContainer { * the hypergraph. Note, this function is not thread-safe and should be called * if no other thread using that object operates on it. */ - void apply() { + void apply() + { // Initialize Stats parallel::scalable_vector stats; size_t number_of_threads = 0; - for ( uint8_t algo = 0; algo < static_cast(InitialPartitioningAlgorithm::UNDEFINED); ++algo ) { + for(uint8_t algo = 0; + algo < static_cast(InitialPartitioningAlgorithm::UNDEFINED); ++algo) + { stats.emplace_back(static_cast(algo)); } InitialPartitioningAlgorithm best_flat_algo = InitialPartitioningAlgorithm::UNDEFINED; - HyperedgeWeight best_feasible_objective = std::numeric_limits::max(); unused(best_feasible_objective); + HyperedgeWeight best_feasible_objective = std::numeric_limits::max(); + unused(best_feasible_objective); - if ( _context.partition.deterministic ) { - for (auto& p : _local_hg) { + if(_context.partition.deterministic) + { + for(auto &p : _local_hg) + { ++number_of_threads; p.aggregate_stats(stats); } // bring them in a deterministic order - std::sort(_best_partitions.begin(), _best_partitions.end(), [&](const auto& l, const auto& r) { - return r.first.is_other_better(l.first, _context.partition.epsilon); - }); + std::sort(_best_partitions.begin(), _best_partitions.end(), + [&](const auto &l, const auto &r) { + return r.first.is_other_better(l.first, _context.partition.epsilon); + }); - if ( _context.initial_partitioning.perform_refinement_on_best_partitions ) { + if(_context.initial_partitioning.perform_refinement_on_best_partitions) + { auto refinement_task = [&](size_t i) { - auto& my_data = _local_hg.local(); - auto& my_phg = my_data._partitioned_hypergraph; - vec& my_partition = _best_partitions[i].second; - PartitioningResult& my_objectives = _best_partitions[i].first; + auto &my_data = _local_hg.local(); + auto &my_phg = my_data._partitioned_hypergraph; + vec &my_partition = _best_partitions[i].second; + PartitioningResult &my_objectives = _best_partitions[i].first; std::mt19937 prng(_context.partition.seed + 420 + my_phg.initialNumPins() + i); - auto refined = my_data.performRefinementOnPartition(my_partition, my_objectives, prng); + auto refined = + my_data.performRefinementOnPartition(my_partition, my_objectives, prng); refined._deterministic_tag = my_objectives._deterministic_tag; refined._random_tag = my_objectives._random_tag; - if (my_objectives.is_other_better(refined, _context.partition.epsilon)) { - for (HypernodeID node : my_phg.nodes()) { + if(my_objectives.is_other_better(refined, _context.partition.epsilon)) + { + for(HypernodeID node : my_phg.nodes()) + { my_partition[node] = my_phg.partID(node); } my_objectives = refined; @@ -577,36 +663,44 @@ class InitialPartitioningDataContainer { }; tbb::task_group fm_refinement_group; - for (size_t i = 0; i < _best_partitions.size(); ++i) { + for(size_t i = 0; i < _best_partitions.size(); ++i) + { fm_refinement_group.run(std::bind(refinement_task, i)); } fm_refinement_group.wait(); } size_t best_index = 0; - for (size_t i = 1; i < _best_partitions.size(); ++i) { - if (_best_partitions[best_index].first.is_other_better(_best_partitions[i].first, _context.partition.epsilon) ) { + for(size_t i = 1; i < _best_partitions.size(); ++i) + { + if(_best_partitions[best_index].first.is_other_better(_best_partitions[i].first, + _context.partition.epsilon)) + { best_index = i; } } best_flat_algo = _best_partitions[best_index].first._algorithm; best_feasible_objective = _best_partitions[best_index].first._objective; - const vec& best_partition = _best_partitions[best_index].second; + const vec &best_partition = _best_partitions[best_index].second; _partitioned_hg.doParallelForAllNodes([&](HypernodeID node) { ASSERT(node < best_partition.size() && best_partition[node] != kInvalidPartition); _partitioned_hg.setOnlyNodePart(node, best_partition[node]); }); - - } else { + } + else + { // Perform FM refinement on the best partition of each thread int thread_counter = 0; - if ( _context.initial_partitioning.perform_refinement_on_best_partitions ) { + if(_context.initial_partitioning.perform_refinement_on_best_partitions) + { tbb::task_group fm_refinement_group; - for ( LocalInitialPartitioningHypergraph& partition : _local_hg ) { + for(LocalInitialPartitioningHypergraph &partition : _local_hg) + { fm_refinement_group.run([&, thread_counter] { - partition.performRefinementOnBestPartition(_partitioned_hg.initialNumPins() + thread_counter); + partition.performRefinementOnBestPartition(_partitioned_hg.initialNumPins() + + thread_counter); }); thread_counter++; } @@ -614,25 +708,34 @@ class InitialPartitioningDataContainer { } // Determine best partition - LocalInitialPartitioningHypergraph* best = nullptr; - LocalInitialPartitioningHypergraph* worst = nullptr; - LocalInitialPartitioningHypergraph* best_imbalance = nullptr; - LocalInitialPartitioningHypergraph* best_objective = nullptr; - for ( LocalInitialPartitioningHypergraph& partition : _local_hg ) { + LocalInitialPartitioningHypergraph *best = nullptr; + LocalInitialPartitioningHypergraph *worst = nullptr; + LocalInitialPartitioningHypergraph *best_imbalance = nullptr; + LocalInitialPartitioningHypergraph *best_objective = nullptr; + for(LocalInitialPartitioningHypergraph &partition : _local_hg) + { ++number_of_threads; partition.aggregate_stats(stats); - if ( !best || best->_result.is_other_better(partition._result, _context.partition.epsilon) ) { + if(!best || + best->_result.is_other_better(partition._result, _context.partition.epsilon)) + { best = &partition; } - if ( !worst || !worst->_result.is_other_better(partition._result, _context.partition.epsilon) ) { + if(!worst || + !worst->_result.is_other_better(partition._result, _context.partition.epsilon)) + { worst = &partition; } - if ( !best_imbalance || best_imbalance->_result._imbalance > partition._result._imbalance || - (best_imbalance->_result._imbalance == partition._result._imbalance && - best_objective->_result._objective > partition._result._objective)) { + if(!best_imbalance || + best_imbalance->_result._imbalance > partition._result._imbalance || + (best_imbalance->_result._imbalance == partition._result._imbalance && + best_objective->_result._objective > partition._result._objective)) + { best_imbalance = &partition; } - if ( !best_objective || best_objective->_result._objective > partition._result._objective ) { + if(!best_objective || + best_objective->_result._objective > partition._result._objective) + { best_objective = &partition; } } @@ -643,7 +746,8 @@ class InitialPartitioningDataContainer { ASSERT(best_objective); DBG << "Num Vertices =" << _partitioned_hg.initialNumNodes() << ", Num Edges =" << _partitioned_hg.initialNumEdges() - << ", k =" << _context.partition.k << ", epsilon =" << _context.partition.epsilon; + << ", k =" << _context.partition.k + << ", epsilon =" << _context.partition.epsilon; DBG << "Best Partition [" << best->_result.str() << "]"; DBG << "Worst Partition [" << worst->_result.str() << "]"; DBG << "Best Balanced Partition [" << best_imbalance->_result.str() << "]"; @@ -664,36 +768,41 @@ class InitialPartitioningDataContainer { _partitioned_hg.initializePartition(); ASSERT(best_feasible_objective == metrics::quality(_partitioned_hg, _context, false), - V(best_feasible_objective) << V(metrics::quality(_partitioned_hg, _context, false))); - utils::Utilities::instance().getInitialPartitioningStats( - _context.utility_id).add_initial_partitioning_result(best_flat_algo, number_of_threads, stats); + V(best_feasible_objective) + << V(metrics::quality(_partitioned_hg, _context, false))); + utils::Utilities::instance() + .getInitialPartitioningStats(_context.utility_id) + .add_initial_partitioning_result(best_flat_algo, number_of_threads, stats); } - IteratorRange fixedVertices() const { - return IteratorRange( - _fixed_vertices.cbegin(), _fixed_vertices.cend()); + IteratorRange fixedVertices() const + { + return IteratorRange(_fixed_vertices.cbegin(), + _fixed_vertices.cend()); } - HypernodeID numFixedVertices() const { - return _fixed_vertices.size(); - } + HypernodeID numFixedVertices() const { return _fixed_vertices.size(); } - void preassignFixedVertices(PartitionedHypergraph& hypergraph) { - if ( hypergraph.hasFixedVertices() ) { - for ( const HypernodeID& hn : fixedVertices() ) { + void preassignFixedVertices(PartitionedHypergraph &hypergraph) + { + if(hypergraph.hasFixedVertices()) + { + for(const HypernodeID &hn : fixedVertices()) + { ASSERT(hypergraph.isFixed(hn)); hypergraph.setNodePart(hn, hypergraph.fixedVertexBlock(hn)); } } } - private: - LocalInitialPartitioningHypergraph construct_local_partitioned_hypergraph() { - return LocalInitialPartitioningHypergraph( - _partitioned_hg.hypergraph(), _context, _global_stats, _disable_fm); +private: + LocalInitialPartitioningHypergraph construct_local_partitioned_hypergraph() + { + return LocalInitialPartitioningHypergraph(_partitioned_hg.hypergraph(), _context, + _global_stats, _disable_fm); } - PartitionedHypergraph& _partitioned_hg; + PartitionedHypergraph &_partitioned_hg; Context _context; const bool _disable_fm; @@ -711,19 +820,21 @@ class InitialPartitioningDataContainer { size_t _max_pop_size; SpinLock _pop_lock; - vec< std::pair> > _best_partitions; + vec > > _best_partitions; }; namespace ip { - template - ip_data_container_t* to_pointer(InitialPartitioningDataContainer& ip_data) { - return reinterpret_cast(&ip_data); - } +template +ip_data_container_t *to_pointer(InitialPartitioningDataContainer &ip_data) +{ + return reinterpret_cast(&ip_data); +} - template - InitialPartitioningDataContainer& to_reference(ip_data_container_t* ptr) { - return *reinterpret_cast*>(ptr); - } +template +InitialPartitioningDataContainer &to_reference(ip_data_container_t *ptr) +{ + return *reinterpret_cast *>(ptr); +} } } // namespace mt_kahypar diff --git a/mt-kahypar/partition/initial_partitioning/label_propagation_initial_partitioner.cpp b/mt-kahypar/partition/initial_partitioning/label_propagation_initial_partitioner.cpp index e6863c6fb..5beeb46ae 100644 --- a/mt-kahypar/partition/initial_partitioning/label_propagation_initial_partitioner.cpp +++ b/mt-kahypar/partition/initial_partitioning/label_propagation_initial_partitioner.cpp @@ -27,47 +27,60 @@ #include "mt-kahypar/partition/initial_partitioning/label_propagation_initial_partitioner.h" #include "mt-kahypar/definitions.h" -#include "mt-kahypar/partition/refinement/gains/cut/cut_attributed_gains.h" -#include "mt-kahypar/partition/initial_partitioning/policies/pseudo_peripheral_start_nodes.h" #include "mt-kahypar/partition/initial_partitioning/policies/gain_computation_policy.h" +#include "mt-kahypar/partition/initial_partitioning/policies/pseudo_peripheral_start_nodes.h" +#include "mt-kahypar/partition/refinement/gains/cut/cut_attributed_gains.h" #include "mt-kahypar/utils/randomize.h" namespace mt_kahypar { -template -void LabelPropagationInitialPartitioner::partitionImpl() { - if ( _ip_data.should_initial_partitioner_run(InitialPartitioningAlgorithm::label_propagation) ) { +template +void LabelPropagationInitialPartitioner::partitionImpl() +{ + if(_ip_data.should_initial_partitioner_run( + InitialPartitioningAlgorithm::label_propagation)) + { HighResClockTimepoint start = std::chrono::high_resolution_clock::now(); - PartitionedHypergraph& hg = _ip_data.local_partitioned_hypergraph(); - + PartitionedHypergraph &hg = _ip_data.local_partitioned_hypergraph(); _ip_data.reset_unassigned_hypernodes(_rng); _ip_data.preassignFixedVertices(hg); - vec> start_nodes = - PseudoPeripheralStartNodes::computeStartNodes(_ip_data, _context, kInvalidPartition, _rng); - for ( PartitionID block = 0; block < _context.partition.k; ++block ) { + vec > start_nodes = + PseudoPeripheralStartNodes::computeStartNodes( + _ip_data, _context, kInvalidPartition, _rng); + for(PartitionID block = 0; block < _context.partition.k; ++block) + { size_t i = 0; - for ( ; i < std::min(start_nodes[block].size(), - _context.initial_partitioning.lp_initial_block_size); ++i ) { + for(; i < std::min(start_nodes[block].size(), + _context.initial_partitioning.lp_initial_block_size); + ++i) + { const HypernodeID hn = start_nodes[block][i]; - if ( hg.partID(hn) == kInvalidPartition && fitsIntoBlock(hg, hn, block) ) { + if(hg.partID(hn) == kInvalidPartition && fitsIntoBlock(hg, hn, block)) + { hg.setNodePart(hn, block); - } else { - std::swap(start_nodes[block][i--], start_nodes[block][start_nodes[block].size() - 1]); + } + else + { + std::swap(start_nodes[block][i--], + start_nodes[block][start_nodes[block].size() - 1]); start_nodes[block].pop_back(); } } // Remove remaining unassigned seed nodes - for ( ; i < start_nodes[block].size(); ++i ) { + for(; i < start_nodes[block].size(); ++i) + { start_nodes[block].pop_back(); } - if ( start_nodes[block].size() == 0 ) { + if(start_nodes[block].size() == 0) + { // There has been no seed node assigned to the block // => find an unassigned node and assign it to the block const HypernodeID hn = _ip_data.get_unassigned_hypernode(); - if ( hn != kInvalidHypernode ) { + if(hn != kInvalidHypernode) + { hg.setNodePart(hn, block); start_nodes[block].push_back(hn); } @@ -77,85 +90,101 @@ void LabelPropagationInitialPartitioner::partitionImpl() { // Each block is extended with 5 additional vertices which are adjacent // to their corresponding seed vertices. This should prevent that block // becomes empty after several label propagation rounds. - for ( PartitionID block = 0; block < _context.partition.k; ++block ) { - if ( !start_nodes[block].empty() && start_nodes[block].size() < - _context.initial_partitioning.lp_initial_block_size ) { + for(PartitionID block = 0; block < _context.partition.k; ++block) + { + if(!start_nodes[block].empty() && + start_nodes[block].size() < _context.initial_partitioning.lp_initial_block_size) + { extendBlockToInitialBlockSize(hg, start_nodes[block], block); } } bool converged = false; - for ( size_t i = 0; i < _context.initial_partitioning.lp_maximum_iterations && !converged; ++i ) { + for(size_t i = 0; + i < _context.initial_partitioning.lp_maximum_iterations && !converged; ++i) + { converged = true; - for ( const HypernodeID& hn : hg.nodes() ) { - if (hg.nodeDegree(hn) > 0 && !hg.isFixed(hn)) { + for(const HypernodeID &hn : hg.nodes()) + { + if(hg.nodeDegree(hn) > 0 && !hg.isFixed(hn)) + { // Assign vertex to the block where FM gain is maximized MaxGainMove max_gain_move = computeMaxGainMove(hg, hn); const PartitionID to = max_gain_move.block; - if ( to != kInvalidPartition ) { + if(to != kInvalidPartition) + { const PartitionID from = hg.partID(hn); - if ( from == kInvalidPartition ) { + if(from == kInvalidPartition) + { ASSERT(fitsIntoBlock(hg, hn, to)); - HEAVY_INITIAL_PARTITIONING_ASSERT([&] { - Gain expected_gain = CutGainPolicy::calculateGain(hg, hn, to); - if ( expected_gain != max_gain_move.gain ) { - LOG << V(hn); - LOG << V(from); - LOG << V(to); - LOG << V(max_gain_move.gain); - LOG << V(expected_gain); - } - return true; - }(), "Gain calculation failed"); + HEAVY_INITIAL_PARTITIONING_ASSERT( + [&] { + Gain expected_gain = + CutGainPolicy::calculateGain(hg, hn, to); + if(expected_gain != max_gain_move.gain) + { + LOG << V(hn); + LOG << V(from); + LOG << V(to); + LOG << V(max_gain_move.gain); + LOG << V(expected_gain); + } + return true; + }(), + "Gain calculation failed"); converged = false; hg.setNodePart(hn, to); - } else if ( from != to ) { + } + else if(from != to) + { ASSERT(fitsIntoBlock(hg, hn, to)); converged = false; - #ifndef KAHYPAR_ENABLE_HEAVY_INITIAL_PARTITIONING_ASSERTIONS +#ifndef KAHYPAR_ENABLE_HEAVY_INITIAL_PARTITIONING_ASSERTIONS hg.changeNodePart(hn, from, to); - #else +#else Gain expected_gain = 0; auto cut_delta = [&](const HyperedgeID he, - const HyperedgeWeight edge_weight, - const HypernodeID, - const HypernodeID pin_count_in_from_part_after, - const HypernodeID pin_count_in_to_part_after) { + const HyperedgeWeight edge_weight, const HypernodeID, + const HypernodeID pin_count_in_from_part_after, + const HypernodeID pin_count_in_to_part_after) { HypernodeID adjusted_edge_size = 0; - for ( const HypernodeID& pin : hg.pins(he) ) { - if ( hg.partID(pin) != kInvalidPartition ) { + for(const HypernodeID &pin : hg.pins(he)) + { + if(hg.partID(pin) != kInvalidPartition) + { ++adjusted_edge_size; } } expected_gain -= CutAttributedGains::gain( - he, edge_weight, adjusted_edge_size, - pin_count_in_from_part_after, pin_count_in_to_part_after); + he, edge_weight, adjusted_edge_size, pin_count_in_from_part_after, + pin_count_in_to_part_after); }; hg.changeNodePart(hn, from, to, cut_delta); ASSERT(expected_gain == max_gain_move.gain, "Gain calculation failed" - << V(expected_gain) << V(max_gain_move.gain)); - #endif + << V(expected_gain) + << V(max_gain_move.gain)); +#endif } } - - } else if ( hg.partID(hn) == kInvalidPartition ) { + } + else if(hg.partID(hn) == kInvalidPartition) + { // In case vertex hn is a degree zero vertex we assign it // to the block with minimum weight assignVertexToBlockWithMinimumWeight(hg, hn); } - } - } // If there are still unassigned vertices left, we assign them to the // block with minimum weight. - while ( _ip_data.get_unassigned_hypernode() != kInvalidHypernode ) { + while(_ip_data.get_unassigned_hypernode() != kInvalidHypernode) + { const HypernodeID unassigned_hn = _ip_data.get_unassigned_hypernode(); assignVertexToBlockWithMinimumWeight(hg, unassigned_hn); } @@ -166,18 +195,23 @@ void LabelPropagationInitialPartitioner::partitionImpl() { } } -template -MaxGainMove LabelPropagationInitialPartitioner::computeMaxGainMoveForUnassignedVertex(PartitionedHypergraph& hypergraph, - const HypernodeID hn) { +template +MaxGainMove +LabelPropagationInitialPartitioner::computeMaxGainMoveForUnassignedVertex( + PartitionedHypergraph &hypergraph, const HypernodeID hn) +{ ASSERT(hypergraph.partID(hn) == kInvalidPartition); - ASSERT(std::all_of(_tmp_scores.begin(), _tmp_scores.end(), [](Gain i) { return i == 0; }), - "Temp gain array not initialized properly"); + ASSERT( + std::all_of(_tmp_scores.begin(), _tmp_scores.end(), [](Gain i) { return i == 0; }), + "Temp gain array not initialized properly"); _valid_blocks.reset(); HyperedgeWeight internal_weight = 0; - for (const HyperedgeID& he : hypergraph.incidentEdges(hn)) { + for(const HyperedgeID &he : hypergraph.incidentEdges(hn)) + { const HyperedgeWeight he_weight = hypergraph.edgeWeight(he); - if (hypergraph.connectivity(he) == 1) { + if(hypergraph.connectivity(he) == 1) + { // In case, connectivity is one we would make the hyperedge cut if would // assign the vertex to an different block than the one already contained // in the hyperedge @@ -185,10 +219,13 @@ MaxGainMove LabelPropagationInitialPartitioner::computeMaxGainMoveFo _valid_blocks.set(connected_block, true); internal_weight += he_weight; _tmp_scores[connected_block] += he_weight; - } else { + } + else + { // Otherwise we can assign the vertex to a block already contained // in the hyperedge without affecting cut - for (const PartitionID& target_part : hypergraph.connectivitySet(he)) { + for(const PartitionID &target_part : hypergraph.connectivitySet(he)) + { _valid_blocks.set(target_part, true); } } @@ -197,39 +234,51 @@ MaxGainMove LabelPropagationInitialPartitioner::computeMaxGainMoveFo return findMaxGainMove(hypergraph, hn, internal_weight); } -template -MaxGainMove LabelPropagationInitialPartitioner::computeMaxGainMoveForAssignedVertex(PartitionedHypergraph& hypergraph, - const HypernodeID hn) { +template +MaxGainMove +LabelPropagationInitialPartitioner::computeMaxGainMoveForAssignedVertex( + PartitionedHypergraph &hypergraph, const HypernodeID hn) +{ ASSERT(hypergraph.partID(hn) != kInvalidPartition); - ASSERT(std::all_of(_tmp_scores.begin(), _tmp_scores.end(), [](Gain i) { return i == 0; }), - "Temp gain array not initialized properly"); + ASSERT( + std::all_of(_tmp_scores.begin(), _tmp_scores.end(), [](Gain i) { return i == 0; }), + "Temp gain array not initialized properly"); _valid_blocks.reset(); const PartitionID from = hypergraph.partID(hn); HyperedgeWeight internal_weight = 0; - for (const HyperedgeID& he : hypergraph.incidentEdges(hn)) { + for(const HyperedgeID &he : hypergraph.incidentEdges(hn)) + { const HyperedgeWeight he_weight = hypergraph.edgeWeight(he); const PartitionID connectivity = hypergraph.connectivity(he); const HypernodeID pins_in_from_part = hypergraph.pinCountInPart(he, from); - if ( connectivity == 1 && pins_in_from_part > 1 ) { + if(connectivity == 1 && pins_in_from_part > 1) + { // If connectivity is one and there is more than one vertex in block // of hypernode hn, we would make the hyperedge cut, if we would assign // hn to an different block. internal_weight += he_weight; - } else if ( connectivity == 2 ) { - for (const PartitionID& to : hypergraph.connectivitySet(he)) { + } + else if(connectivity == 2) + { + for(const PartitionID &to : hypergraph.connectivitySet(he)) + { _valid_blocks.set(to, true); // In case connectivity is two and hn is the last vertex in hyperedge // he of block from, we would make that hyperedge a non-cut hyperedge. - if ( pins_in_from_part == 1 && hypergraph.pinCountInPart(he, to) > 0 ) { + if(pins_in_from_part == 1 && hypergraph.pinCountInPart(he, to) > 0) + { _tmp_scores[to] += he_weight; } } - } else { + } + else + { // Otherwise we can assign the vertex to a block already contained // in the hyperedge without affecting cut - for (const PartitionID& to : hypergraph.connectivitySet(he)) { + for(const PartitionID &to : hypergraph.connectivitySet(he)) + { _valid_blocks.set(to, true); } } @@ -238,77 +287,95 @@ MaxGainMove LabelPropagationInitialPartitioner::computeMaxGainMoveFo return findMaxGainMove(hypergraph, hn, internal_weight); } -template -MaxGainMove LabelPropagationInitialPartitioner::findMaxGainMove(PartitionedHypergraph& hypergraph, - const HypernodeID hn, - const HypernodeWeight internal_weight) { +template +MaxGainMove LabelPropagationInitialPartitioner::findMaxGainMove( + PartitionedHypergraph &hypergraph, const HypernodeID hn, + const HypernodeWeight internal_weight) +{ const PartitionID from = hypergraph.partID(hn); PartitionID best_block = from; Gain best_score = from == kInvalidPartition ? std::numeric_limits::min() : 0; - for (PartitionID block = 0; block < _context.partition.k; ++block) { - if (from != block && _valid_blocks[block]) { + for(PartitionID block = 0; block < _context.partition.k; ++block) + { + if(from != block && _valid_blocks[block]) + { _tmp_scores[block] -= internal_weight; // Since we perform size-constraint label propagation, the move to the // corresponding block is only valid, if it fullfils the balanced constraint. - if (fitsIntoBlock(hypergraph, hn, block) && _tmp_scores[block] > best_score) { + if(fitsIntoBlock(hypergraph, hn, block) && _tmp_scores[block] > best_score) + { best_score = _tmp_scores[block]; best_block = block; } } _tmp_scores[block] = 0; } - return MaxGainMove { best_block, best_score }; + return MaxGainMove{ best_block, best_score }; } -template -void LabelPropagationInitialPartitioner::extendBlockToInitialBlockSize(PartitionedHypergraph& hypergraph, - const vec& seed_vertices, - const PartitionID block) { +template +void LabelPropagationInitialPartitioner::extendBlockToInitialBlockSize( + PartitionedHypergraph &hypergraph, const vec &seed_vertices, + const PartitionID block) +{ ASSERT(seed_vertices.size() > 0); size_t block_size = seed_vertices.size(); // We search for _context.initial_partitioning.lp_initial_block_size vertices // around the seed vertex to extend the corresponding block - for ( const HypernodeID& seed_vertex : seed_vertices ) { - for ( const HyperedgeID& he : hypergraph.incidentEdges(seed_vertex) ) { - for ( const HypernodeID& pin : hypergraph.pins(he) ) { - if ( hypergraph.partID(pin) == kInvalidPartition && - fitsIntoBlock(hypergraph, pin, block) ) { + for(const HypernodeID &seed_vertex : seed_vertices) + { + for(const HyperedgeID &he : hypergraph.incidentEdges(seed_vertex)) + { + for(const HypernodeID &pin : hypergraph.pins(he)) + { + if(hypergraph.partID(pin) == kInvalidPartition && + fitsIntoBlock(hypergraph, pin, block)) + { hypergraph.setNodePart(pin, block); block_size++; - if ( block_size >= _context.initial_partitioning.lp_initial_block_size ) break; + if(block_size >= _context.initial_partitioning.lp_initial_block_size) + break; } } - if ( block_size >= _context.initial_partitioning.lp_initial_block_size ) break; + if(block_size >= _context.initial_partitioning.lp_initial_block_size) + break; } - if ( block_size >= _context.initial_partitioning.lp_initial_block_size ) break; + if(block_size >= _context.initial_partitioning.lp_initial_block_size) + break; } - // If there are less than _context.initial_partitioning.lp_initial_block_size // adjacent vertices to the seed vertex, we find a new seed vertex and call // this function recursive - while ( block_size < _context.initial_partitioning.lp_initial_block_size ) { + while(block_size < _context.initial_partitioning.lp_initial_block_size) + { const HypernodeID seed_vertex = _ip_data.get_unassigned_hypernode(); - if ( seed_vertex != kInvalidHypernode && fitsIntoBlock(hypergraph, seed_vertex, block) ) { + if(seed_vertex != kInvalidHypernode && fitsIntoBlock(hypergraph, seed_vertex, block)) + { hypergraph.setNodePart(seed_vertex, block); block_size++; - } else { + } + else + { break; } } } -template -void LabelPropagationInitialPartitioner::assignVertexToBlockWithMinimumWeight(PartitionedHypergraph& hypergraph, - const HypernodeID hn) { +template +void LabelPropagationInitialPartitioner::assignVertexToBlockWithMinimumWeight( + PartitionedHypergraph &hypergraph, const HypernodeID hn) +{ ASSERT(hypergraph.partID(hn) == kInvalidPartition); PartitionID minimum_weight_block = kInvalidPartition; HypernodeWeight minimum_weight = std::numeric_limits::max(); - for ( PartitionID block = 0; block < _context.partition.k; ++block ) { + for(PartitionID block = 0; block < _context.partition.k; ++block) + { const HypernodeWeight block_weight = hypergraph.partWeight(block); - if ( block_weight < minimum_weight ) { + if(block_weight < minimum_weight) + { minimum_weight = block_weight; minimum_weight_block = block; } diff --git a/mt-kahypar/partition/initial_partitioning/label_propagation_initial_partitioner.h b/mt-kahypar/partition/initial_partitioning/label_propagation_initial_partitioner.h index 3dc05acf1..ff9af16a8 100644 --- a/mt-kahypar/partition/initial_partitioning/label_propagation_initial_partitioner.h +++ b/mt-kahypar/partition/initial_partitioning/label_propagation_initial_partitioner.h @@ -32,79 +32,80 @@ namespace mt_kahypar { namespace { - struct MaxGainMove { - const PartitionID block; - const Gain gain; - }; +struct MaxGainMove +{ + const PartitionID block; + const Gain gain; +}; } -template -class LabelPropagationInitialPartitioner : public IInitialPartitioner { +template +class LabelPropagationInitialPartitioner : public IInitialPartitioner +{ using PartitionedHypergraph = typename TypeTraits::PartitionedHypergraph; - using DeltaFunction = std::function; - #define NOOP_FUNC [] (const SynchronizedEdgeUpdate&) { } + using DeltaFunction = std::function; +#define NOOP_FUNC [](const SynchronizedEdgeUpdate &) {} static constexpr bool debug = false; static constexpr bool enable_heavy_assert = false; - public: +public: LabelPropagationInitialPartitioner(const InitialPartitioningAlgorithm, - ip_data_container_t* ip_data, - const Context& context, + ip_data_container_t *ip_data, const Context &context, const int seed, const int tag) : - _ip_data(ip::to_reference(ip_data)), - _context(context), - _valid_blocks(context.partition.k), - _tmp_scores(context.partition.k), - _rng(seed), - _tag(tag) { } - - private: + _ip_data(ip::to_reference(ip_data)), + _context(context), _valid_blocks(context.partition.k), + _tmp_scores(context.partition.k), _rng(seed), _tag(tag) + { + } + +private: void partitionImpl() final; - bool fitsIntoBlock(PartitionedHypergraph& hypergraph, - const HypernodeID hn, - const PartitionID block) const { + bool fitsIntoBlock(PartitionedHypergraph &hypergraph, const HypernodeID hn, + const PartitionID block) const + { ASSERT(block != kInvalidPartition && block < _context.partition.k); return hypergraph.partWeight(block) + hypergraph.nodeWeight(hn) <= - _context.partition.perfect_balance_part_weights[block] * - std::min(1.005, 1 + _context.partition.epsilon); + _context.partition.perfect_balance_part_weights[block] * + std::min(1.005, 1 + _context.partition.epsilon); } - MaxGainMove computeMaxGainMove(PartitionedHypergraph& hypergraph, - const HypernodeID hn) { - if ( hypergraph.partID(hn) == kInvalidPartition ) { + MaxGainMove computeMaxGainMove(PartitionedHypergraph &hypergraph, const HypernodeID hn) + { + if(hypergraph.partID(hn) == kInvalidPartition) + { return computeMaxGainMoveForUnassignedVertex(hypergraph, hn); - } else { + } + else + { return computeMaxGainMoveForAssignedVertex(hypergraph, hn); } } - MaxGainMove computeMaxGainMoveForUnassignedVertex(PartitionedHypergraph& hypergraph, + MaxGainMove computeMaxGainMoveForUnassignedVertex(PartitionedHypergraph &hypergraph, const HypernodeID hn); - MaxGainMove computeMaxGainMoveForAssignedVertex(PartitionedHypergraph& hypergraph, + MaxGainMove computeMaxGainMoveForAssignedVertex(PartitionedHypergraph &hypergraph, const HypernodeID hn); - MaxGainMove findMaxGainMove(PartitionedHypergraph& hypergraph, - const HypernodeID hn, + MaxGainMove findMaxGainMove(PartitionedHypergraph &hypergraph, const HypernodeID hn, const HypernodeWeight internal_weight); - void extendBlockToInitialBlockSize(PartitionedHypergraph& hypergraph, - const vec& seed_vertices, + void extendBlockToInitialBlockSize(PartitionedHypergraph &hypergraph, + const vec &seed_vertices, const PartitionID block); - void assignVertexToBlockWithMinimumWeight(PartitionedHypergraph& hypergraph, + void assignVertexToBlockWithMinimumWeight(PartitionedHypergraph &hypergraph, const HypernodeID hn); - InitialPartitioningDataContainer& _ip_data; - const Context& _context; + InitialPartitioningDataContainer &_ip_data; + const Context &_context; kahypar::ds::FastResetFlagArray<> _valid_blocks; parallel::scalable_vector _tmp_scores; std::mt19937 _rng; const int _tag; }; - } // namespace mt_kahypar diff --git a/mt-kahypar/partition/initial_partitioning/policies/gain_computation_policy.h b/mt-kahypar/partition/initial_partitioning/policies/gain_computation_policy.h index 488f0b554..bd581a363 100644 --- a/mt-kahypar/partition/initial_partitioning/policies/gain_computation_policy.h +++ b/mt-kahypar/partition/initial_partitioning/policies/gain_computation_policy.h @@ -32,62 +32,73 @@ namespace mt_kahypar { -template -class CutGainPolicy { +template +class CutGainPolicy +{ - static constexpr bool enable_heavy_assert = false; + static constexpr bool enable_heavy_assert = false; - using PartitionedHypergraph = typename TypeTraits::PartitionedHypergraph; + using PartitionedHypergraph = typename TypeTraits::PartitionedHypergraph; - public: - static inline Gain calculateGain(const PartitionedHypergraph& hypergraph, - const HypernodeID hn, - const PartitionID to) { - if ( hypergraph.partID(hn) == kInvalidPartition ) { +public: + static inline Gain calculateGain(const PartitionedHypergraph &hypergraph, + const HypernodeID hn, const PartitionID to) + { + if(hypergraph.partID(hn) == kInvalidPartition) + { return calculateGainForInvalidBlock(hypergraph, hn, to); - } else { + } + else + { return calculateGainForValidBlock(hypergraph, hn, to); } } - static inline Gain calculateGainForInvalidBlock(const PartitionedHypergraph& hypergraph, + static inline Gain calculateGainForInvalidBlock(const PartitionedHypergraph &hypergraph, const HypernodeID hn, - const PartitionID to) { + const PartitionID to) + { ASSERT(hypergraph.partID(hn) == kInvalidPartition); ASSERT(to != kInvalidPartition); Gain gain = 0; - for ( const HyperedgeID& he : hypergraph.incidentEdges(hn) ) { - if ( hypergraph.connectivity(he) == 1 && hypergraph.pinCountInPart(he, to) == 0 ) { + for(const HyperedgeID &he : hypergraph.incidentEdges(hn)) + { + if(hypergraph.connectivity(he) == 1 && hypergraph.pinCountInPart(he, to) == 0) + { gain -= hypergraph.edgeWeight(he); } } return gain; } - static inline Gain calculateGainForValidBlock(const PartitionedHypergraph& hypergraph, + static inline Gain calculateGainForValidBlock(const PartitionedHypergraph &hypergraph, const HypernodeID hn, - const PartitionID to) { + const PartitionID to) + { ASSERT(hypergraph.partID(hn) != kInvalidPartition); ASSERT(hypergraph.partID(hn) != to); ASSERT(to != kInvalidPartition); Gain gain = 0; const PartitionID from = hypergraph.partID(hn); - for ( const HyperedgeID& he : hypergraph.incidentEdges(hn) ) { - if ( hypergraph.edgeSize(he) > 1 ) { + for(const HyperedgeID &he : hypergraph.incidentEdges(hn)) + { + if(hypergraph.edgeSize(he) > 1) + { const PartitionID connectivity = hypergraph.connectivity(he); const HypernodeID pin_count_in_from_part = hypergraph.pinCountInPart(he, from); const HypernodeID pin_count_in_to_part = hypergraph.pinCountInPart(he, to); - if ( connectivity == 1 && - pin_count_in_from_part > 1 ) { + if(connectivity == 1 && pin_count_in_from_part > 1) + { // In case connectivity is one and there is more than one pin left // in from part, we would make the hyperedge cut if move hn to // block to gain -= hypergraph.edgeWeight(he); - } else if ( connectivity == 2 && - pin_count_in_from_part == 1 && - pin_count_in_to_part > 0 ) { + } + else if(connectivity == 2 && pin_count_in_from_part == 1 && + pin_count_in_to_part > 0) + { // In case, the connectivity is two and hn is the last pin left // of block from in hyperedge he, we would make the hyperedge a // non-cut hyperedge by moving hn to block to. @@ -98,72 +109,94 @@ class CutGainPolicy { return gain; } - static inline void deltaGainUpdate(const PartitionedHypergraph& hypergraph, - KWayPriorityQueue& pq, - const HypernodeID hn, - const PartitionID from, - const PartitionID to) { - if ( from == kInvalidPartition ) { + static inline void deltaGainUpdate(const PartitionedHypergraph &hypergraph, + KWayPriorityQueue &pq, const HypernodeID hn, + const PartitionID from, const PartitionID to) + { + if(from == kInvalidPartition) + { deltaGainUpdateForInvalidBlock(hypergraph, pq, hn, from, to); - } else { + } + else + { deltaGainUpdateForValidBlock(hypergraph, pq, hn, from, to); } - HEAVY_INITIAL_PARTITIONING_ASSERT([&]() { - for (const HyperedgeID& he : hypergraph.incidentEdges(hn)) { - for (const HypernodeID& pin : hypergraph.pins(he)) { - if (pin != hn) { - for (PartitionID block = 0; block < hypergraph.k(); ++block) { - if (pq.contains(pin, block)) { - const Gain gain = calculateGain(hypergraph, pin, block); - if (pq.key(pin, block) != gain) { - LOG << V(hn); - LOG << V(to); - LOG << V(he); - LOG << V(pin); - LOG << V(block); - LOG << V(gain); - LOG << V(pq.key(pin, block)); - return false; + HEAVY_INITIAL_PARTITIONING_ASSERT( + [&]() { + for(const HyperedgeID &he : hypergraph.incidentEdges(hn)) + { + for(const HypernodeID &pin : hypergraph.pins(he)) + { + if(pin != hn) + { + for(PartitionID block = 0; block < hypergraph.k(); ++block) + { + if(pq.contains(pin, block)) + { + const Gain gain = calculateGain(hypergraph, pin, block); + if(pq.key(pin, block) != gain) + { + LOG << V(hn); + LOG << V(to); + LOG << V(he); + LOG << V(pin); + LOG << V(block); + LOG << V(gain); + LOG << V(pq.key(pin, block)); + return false; + } } } } } } - } - return true; - } (), "Delta Gain Update failed!"); + return true; + }(), + "Delta Gain Update failed!"); } - static inline void deltaGainUpdateForInvalidBlock(const PartitionedHypergraph& hypergraph, - KWayPriorityQueue& pq, - const HypernodeID hn, - const PartitionID, - const PartitionID to) { + static inline void + deltaGainUpdateForInvalidBlock(const PartitionedHypergraph &hypergraph, + KWayPriorityQueue &pq, const HypernodeID hn, + const PartitionID, const PartitionID to) + { ASSERT(hypergraph.partID(hn) == to); - for ( const HyperedgeID& he : hypergraph.incidentEdges(hn) ) { + for(const HyperedgeID &he : hypergraph.incidentEdges(hn)) + { const HypernodeID pin_count_in_to_part_after = hypergraph.pinCountInPart(he, to); const PartitionID connectivity = hypergraph.connectivity(he); const HyperedgeWeight he_weight = hypergraph.edgeWeight(he); - if ( pin_count_in_to_part_after == 1 ) { - if ( connectivity == 1 ) { + if(pin_count_in_to_part_after == 1) + { + if(connectivity == 1) + { // Connectivity changed from 0 to 1 => Each move to an other block (except to) // would make hyperedge he cut - for ( const HypernodeID& pin : hypergraph.pins(he) ) { - for ( PartitionID block = 0; block < hypergraph.k(); ++block ) { - if ( pin != hn && block != to && pq.contains(pin, block) ) { + for(const HypernodeID &pin : hypergraph.pins(he)) + { + for(PartitionID block = 0; block < hypergraph.k(); ++block) + { + if(pin != hn && block != to && pq.contains(pin, block)) + { pq.updateKeyBy(pin, block, -he_weight); } } } - } else if ( connectivity == 2 ) { + } + else if(connectivity == 2) + { // Connectivity changed from 1 to 2 => Each move to a block that is not part // of the connectivity set of the hyperedge does not increase the cut any more. - for ( PartitionID block = 0; block < hypergraph.k(); ++block ) { - if ( block == to || hypergraph.pinCountInPart(he, block) == 0 ) { - for ( const HypernodeID& pin : hypergraph.pins(he) ) { - if ( pin != hn && pq.contains(pin, block) ) { + for(PartitionID block = 0; block < hypergraph.k(); ++block) + { + if(block == to || hypergraph.pinCountInPart(he, block) == 0) + { + for(const HypernodeID &pin : hypergraph.pins(he)) + { + if(pin != hn && pq.contains(pin, block)) + { pq.updateKeyBy(pin, block, he_weight); } } @@ -174,22 +207,27 @@ class CutGainPolicy { } } - static inline void deltaGainUpdateForValidBlock(const PartitionedHypergraph& hypergraph, - KWayPriorityQueue& pq, + static inline void deltaGainUpdateForValidBlock(const PartitionedHypergraph &hypergraph, + KWayPriorityQueue &pq, const HypernodeID hn, const PartitionID from, - const PartitionID to) { + const PartitionID to) + { ASSERT(hypergraph.partID(hn) == to); ASSERT(from != kInvalidPartition); - for ( const HyperedgeID& he : hypergraph.incidentEdges(hn) ) { + for(const HyperedgeID &he : hypergraph.incidentEdges(hn)) + { const HypernodeID he_size = hypergraph.edgeSize(he); - if ( he_size > 1 ) { - const HypernodeID pin_count_in_from_part_before = hypergraph.pinCountInPart(he, from) + 1; + if(he_size > 1) + { + const HypernodeID pin_count_in_from_part_before = + hypergraph.pinCountInPart(he, from) + 1; const HypernodeID pin_count_in_to_part_after = hypergraph.pinCountInPart(he, to); const HyperedgeWeight he_weight = hypergraph.edgeWeight(he); - if ( pin_count_in_from_part_before == he_size ) { + if(pin_count_in_from_part_before == he_size) + { ASSERT(hypergraph.connectivity(he) == 2); ASSERT(pin_count_in_to_part_after == 1); // In case, the pin count in hyperedge he of block from was equal to the @@ -197,22 +235,28 @@ class CutGainPolicy { // All moves to a different block than from do not increase the cut any // more. Therefore, we increase the gain of all pins that are contained // in PQs different from block from. - for ( const HypernodeID& pin : hypergraph.pins(he) ) { - for ( PartitionID block = 0; block < hypergraph.k(); ++block ) { - if ( pin != hn && block != from && pq.contains(pin, block) ) { + for(const HypernodeID &pin : hypergraph.pins(he)) + { + for(PartitionID block = 0; block < hypergraph.k(); ++block) + { + if(pin != hn && block != from && pq.contains(pin, block)) + { pq.updateKeyBy(pin, block, he_weight); } } } } - if ( pin_count_in_to_part_after == he_size - 1 ) { + if(pin_count_in_to_part_after == he_size - 1) + { // In case, the pin count in hyperedge he of block to is equal to the // hyperedge size minus one, we could make the hyperedge a non-cut // hyperedge by moving the only pin left in the from block to block to. // Therefore, we increase the gain of that pin. - for ( const HypernodeID& pin : hypergraph.pins(he) ) { - if ( pin != hn && hypergraph.partID(pin) != to && pq.contains(pin, to) ) { + for(const HypernodeID &pin : hypergraph.pins(he)) + { + if(pin != hn && hypergraph.partID(pin) != to && pq.contains(pin, to)) + { pq.updateKeyBy(pin, to, he_weight); } } @@ -222,74 +266,87 @@ class CutGainPolicy { } }; -template -class MaxNetGainPolicy { +template +class MaxNetGainPolicy +{ - static constexpr bool enable_heavy_assert = false; + static constexpr bool enable_heavy_assert = false; - using PartitionedHypergraph = typename TypeTraits::PartitionedHypergraph; + using PartitionedHypergraph = typename TypeTraits::PartitionedHypergraph; - public: - static inline Gain calculateGain(const PartitionedHypergraph& hypergraph, - const HypernodeID hn, - const PartitionID to) { +public: + static inline Gain calculateGain(const PartitionedHypergraph &hypergraph, + const HypernodeID hn, const PartitionID to) + { ASSERT(to != kInvalidPartition); Gain gain = 0; - for ( const HyperedgeID& he : hypergraph.incidentEdges(hn) ) { - if (hypergraph.pinCountInPart(he, to) > 0) { + for(const HyperedgeID &he : hypergraph.incidentEdges(hn)) + { + if(hypergraph.pinCountInPart(he, to) > 0) + { gain += hypergraph.edgeWeight(he); } } return gain; } - static inline void deltaGainUpdate(const PartitionedHypergraph& hypergraph, - KWayPriorityQueue& pq, - const HypernodeID hn, - const PartitionID, - const PartitionID to) { + static inline void deltaGainUpdate(const PartitionedHypergraph &hypergraph, + KWayPriorityQueue &pq, const HypernodeID hn, + const PartitionID, const PartitionID to) + { ASSERT(hypergraph.partID(hn) == to); - for ( const HyperedgeID& he : hypergraph.incidentEdges(hn) ) { + for(const HyperedgeID &he : hypergraph.incidentEdges(hn)) + { const HyperedgeWeight he_weight = hypergraph.edgeWeight(he); const HypernodeID pins_in_to_part = hypergraph.pinCountInPart(he, to); - if ( pins_in_to_part == 1 ) { + if(pins_in_to_part == 1) + { // Block to was not part of hyperedge he before // => Update gain of all pins in hyperedge to block to - for ( const HypernodeID pin : hypergraph.pins(he) ) { - if ( pq.contains(pin, to) ) { + for(const HypernodeID pin : hypergraph.pins(he)) + { + if(pq.contains(pin, to)) + { pq.updateKeyBy(pin, to, he_weight); } } } } - HEAVY_INITIAL_PARTITIONING_ASSERT([&]() { - for (const HyperedgeID& he : hypergraph.incidentEdges(hn)) { - for (const HypernodeID& pin : hypergraph.pins(he)) { - if (pin != hn) { - for (PartitionID block = 0; block < hypergraph.k(); ++block) { - if (pq.contains(pin, block)) { - const Gain gain = calculateGain(hypergraph, pin, block); - if (pq.key(pin, block) != gain) { - LOG << V(hn); - LOG << V(to); - LOG << V(he); - LOG << V(pin); - LOG << V(block); - LOG << V(gain); - LOG << V(pq.key(pin, block)); - return false; + HEAVY_INITIAL_PARTITIONING_ASSERT( + [&]() { + for(const HyperedgeID &he : hypergraph.incidentEdges(hn)) + { + for(const HypernodeID &pin : hypergraph.pins(he)) + { + if(pin != hn) + { + for(PartitionID block = 0; block < hypergraph.k(); ++block) + { + if(pq.contains(pin, block)) + { + const Gain gain = calculateGain(hypergraph, pin, block); + if(pq.key(pin, block) != gain) + { + LOG << V(hn); + LOG << V(to); + LOG << V(he); + LOG << V(pin); + LOG << V(block); + LOG << V(gain); + LOG << V(pq.key(pin, block)); + return false; + } } } } } } - } - return true; - } (), "Delta Gain Update failed!"); + return true; + }(), + "Delta Gain Update failed!"); } }; - } // namespace mt_kahypar diff --git a/mt-kahypar/partition/initial_partitioning/policies/pq_selection_policy.h b/mt-kahypar/partition/initial_partitioning/policies/pq_selection_policy.h index 9ca48638f..03ac0a618 100644 --- a/mt-kahypar/partition/initial_partitioning/policies/pq_selection_policy.h +++ b/mt-kahypar/partition/initial_partitioning/policies/pq_selection_policy.h @@ -33,27 +33,27 @@ namespace mt_kahypar { // ! Selects the PQs in a round-robin fashion. -template -class RoundRobinPQSelectionPolicy { +template +class RoundRobinPQSelectionPolicy +{ using PartitionedHypergraph = typename TypeTraits::PartitionedHypergraph; - public: - static inline bool pop(const PartitionedHypergraph& hypergraph, - KWayPriorityQueue& pq, - HypernodeID& hn, - PartitionID& to, - Gain& gain, - const bool) { +public: + static inline bool pop(const PartitionedHypergraph &hypergraph, KWayPriorityQueue &pq, + HypernodeID &hn, PartitionID &to, Gain &gain, const bool) + { ASSERT(to >= kInvalidPartition && to < hypergraph.k()); hn = kInvalidHypernode; gain = kInvalidGain; to = (to + 1) % hypergraph.k(); const PartitionID start_block = to; - while ( !pq.isEnabled(to) ) { + while(!pq.isEnabled(to)) + { to = (to + 1) % hypergraph.k(); - if ( start_block == to ) { + if(start_block == to) + { to = kInvalidPartition; return false; } @@ -70,34 +70,32 @@ class RoundRobinPQSelectionPolicy { // before greedy initial partitioning. Experiments have shown that the greedy // round robin variant performs best if we leave all vertices unassigned before // greedy initial partitioning. - static inline PartitionID getDefaultBlock() { - return kInvalidPartition; - } + static inline PartitionID getDefaultBlock() { return kInvalidPartition; } }; - // ! Selects the PQ which contains the maximum gain move -template -class GlobalPQSelectionPolicy { +template +class GlobalPQSelectionPolicy +{ using PartitionedHypergraph = typename TypeTraits::PartitionedHypergraph; - public: - static inline bool pop(const PartitionedHypergraph&, - KWayPriorityQueue& pq, - HypernodeID& hn, - PartitionID& to, - Gain& gain, - const bool) { +public: + static inline bool pop(const PartitionedHypergraph &, KWayPriorityQueue &pq, + HypernodeID &hn, PartitionID &to, Gain &gain, const bool) + { hn = kInvalidHypernode; to = kInvalidPartition; gain = kInvalidGain; - if ( pq.numNonEmptyParts() > 0 && pq.numEnabledParts() > 0 ) { + if(pq.numNonEmptyParts() > 0 && pq.numEnabledParts() > 0) + { pq.deleteMax(hn, gain, to); ASSERT(hn != kInvalidHypernode); return true; - } else { + } + else + { return false; } } @@ -106,48 +104,52 @@ class GlobalPQSelectionPolicy { // before greedy initial partitioning. Experiments have shown that the greedy // global variant performs best if we assign all vertices to block 1 before // greedy initial partitioning. - static inline PartitionID getDefaultBlock() { - return 1; - } + static inline PartitionID getDefaultBlock() { return 1; } }; - // ! Selects the PQs one by one until they are disabled -template -class SequentialPQSelectionPolicy { +template +class SequentialPQSelectionPolicy +{ using PartitionedHypergraph = typename TypeTraits::PartitionedHypergraph; - public: - static inline bool pop(const PartitionedHypergraph& hypergraph, - KWayPriorityQueue& pq, - HypernodeID& hn, - PartitionID& to, - Gain& gain, - const bool use_perfect_balanced_as_upper_bound) { +public: + static inline bool pop(const PartitionedHypergraph &hypergraph, KWayPriorityQueue &pq, + HypernodeID &hn, PartitionID &to, Gain &gain, + const bool use_perfect_balanced_as_upper_bound) + { hn = kInvalidHypernode; gain = kInvalidGain; - if ( use_perfect_balanced_as_upper_bound ) { - if ( to == kInvalidPartition ) { + if(use_perfect_balanced_as_upper_bound) + { + if(to == kInvalidPartition) + { to = 0; } - while ( to < hypergraph.k() && !pq.isEnabled(to) ) { + while(to < hypergraph.k() && !pq.isEnabled(to)) + { ++to; } - if ( to < hypergraph.k() ) { + if(to < hypergraph.k()) + { ASSERT(pq.size(to) > 0); pq.deleteMaxFromPartition(hn, gain, to); ASSERT(hn != kInvalidHypernode); return true; - } else { + } + else + { return false; } - } else { - return GlobalPQSelectionPolicy::pop(hypergraph, - pq, hn, to, gain, use_perfect_balanced_as_upper_bound); + } + else + { + return GlobalPQSelectionPolicy::pop( + hypergraph, pq, hn, to, gain, use_perfect_balanced_as_upper_bound); } } @@ -155,9 +157,7 @@ class SequentialPQSelectionPolicy { // before greedy initial partitioning. Experiments have shown that the greedy // sequential variant performs best if we assign all vertices to block 1 before // greedy initial partitioning. - static inline PartitionID getDefaultBlock() { - return 1; - } + static inline PartitionID getDefaultBlock() { return 1; } }; } // namespace mt_kahypar diff --git a/mt-kahypar/partition/initial_partitioning/policies/pseudo_peripheral_start_nodes.h b/mt-kahypar/partition/initial_partitioning/policies/pseudo_peripheral_start_nodes.h index c8f5784c4..c09bcd1c9 100644 --- a/mt-kahypar/partition/initial_partitioning/policies/pseudo_peripheral_start_nodes.h +++ b/mt-kahypar/partition/initial_partitioning/policies/pseudo_peripheral_start_nodes.h @@ -28,49 +28,57 @@ #include "tbb/task.h" -#include "mt-kahypar/parallel/stl/scalable_vector.h" #include "mt-kahypar/parallel/stl/scalable_queue.h" +#include "mt-kahypar/parallel/stl/scalable_vector.h" #include "mt-kahypar/partition/initial_partitioning/initial_partitioning_data_container.h" #include "mt-kahypar/utils/randomize.h" namespace mt_kahypar { -template -class PseudoPeripheralStartNodes { +template +class PseudoPeripheralStartNodes +{ static constexpr bool debug = false; - using StartNodes = vec>; + using StartNodes = vec >; using Queue = parallel::scalable_queue; using PartitionedHypergraph = typename TypeTraits::PartitionedHypergraph; - public: - static inline StartNodes computeStartNodes(InitialPartitioningDataContainer& ip_data, - const Context& context, - const PartitionID default_block, - std::mt19937& rng) { - PartitionedHypergraph& hypergraph = ip_data.local_partitioned_hypergraph(); - kahypar::ds::FastResetFlagArray<>& hypernodes_in_queue = - ip_data.local_hypernode_fast_reset_flag_array(); - kahypar::ds::FastResetFlagArray<>& hyperedges_in_queue = - ip_data.local_hyperedge_fast_reset_flag_array(); +public: + static inline StartNodes + computeStartNodes(InitialPartitioningDataContainer &ip_data, + const Context &context, const PartitionID default_block, + std::mt19937 &rng) + { + PartitionedHypergraph &hypergraph = ip_data.local_partitioned_hypergraph(); + kahypar::ds::FastResetFlagArray<> &hypernodes_in_queue = + ip_data.local_hypernode_fast_reset_flag_array(); + kahypar::ds::FastResetFlagArray<> &hyperedges_in_queue = + ip_data.local_hyperedge_fast_reset_flag_array(); StartNodes start_nodes(context.partition.k); vec empty_blocks(context.partition.k); std::iota(empty_blocks.begin(), empty_blocks.end(), 0); bool contains_seed_node = false; - if ( hypergraph.hasFixedVertices() ) { + if(hypergraph.hasFixedVertices()) + { hypernodes_in_queue.reset(); hyperedges_in_queue.reset(); // Use all neighbors of fixed vertices as seed nodes - for ( const HypernodeID& hn : ip_data.fixedVertices() ) { + for(const HypernodeID &hn : ip_data.fixedVertices()) + { ASSERT(hypergraph.isFixed(hn)); const PartitionID block = hypergraph.fixedVertexBlock(hn); - for ( const HyperedgeID& he : hypergraph.incidentEdges(hn) ) { - if ( !hyperedges_in_queue[block * hypergraph.initialNumEdges() + he] ) { - for ( const HypernodeID& pin : hypergraph.pins(he) ) { - if ( !hypergraph.isFixed(pin) && - !hypernodes_in_queue[block * hypergraph.initialNumNodes() + pin] ) { + for(const HyperedgeID &he : hypergraph.incidentEdges(hn)) + { + if(!hyperedges_in_queue[block * hypergraph.initialNumEdges() + he]) + { + for(const HypernodeID &pin : hypergraph.pins(he)) + { + if(!hypergraph.isFixed(pin) && + !hypernodes_in_queue[block * hypergraph.initialNumNodes() + pin]) + { start_nodes[block].push_back(pin); contains_seed_node = true; hypernodes_in_queue.set(block * hypergraph.initialNumNodes() + pin, true); @@ -81,10 +89,12 @@ class PseudoPeripheralStartNodes { } } - for ( size_t i = 0; i < empty_blocks.size(); ++i ) { + for(size_t i = 0; i < empty_blocks.size(); ++i) + { // Remove blocks that contain seed nodes from empty blocks const PartitionID block = empty_blocks[i]; - if ( !start_nodes[block].empty() ) { + if(!start_nodes[block].empty()) + { std::swap(empty_blocks[i--], empty_blocks[empty_blocks.size() - 1]); empty_blocks.pop_back(); } @@ -92,14 +102,17 @@ class PseudoPeripheralStartNodes { } } - if ( !contains_seed_node ) { - HypernodeID start_hn = - std::uniform_int_distribution(0, hypergraph.initialNumNodes() -1 )(rng); - if ( !hypergraph.nodeIsEnabled(start_hn) || hypergraph.isFixed(start_hn) ) { + if(!contains_seed_node) + { + HypernodeID start_hn = std::uniform_int_distribution( + 0, hypergraph.initialNumNodes() - 1)(rng); + if(!hypergraph.nodeIsEnabled(start_hn) || hypergraph.isFixed(start_hn)) + { start_hn = ip_data.get_unassigned_hypernode(default_block); } - if ( start_hn != kInvalidHypernode ) { + if(start_hn != kInvalidHypernode) + { ASSERT(hypergraph.nodeIsEnabled(start_hn)); start_nodes[empty_blocks[0]].push_back(start_hn); std::swap(empty_blocks[0], empty_blocks[empty_blocks.size() - 1]); @@ -108,17 +121,19 @@ class PseudoPeripheralStartNodes { } } - if ( !empty_blocks.empty() && contains_seed_node ) { + if(!empty_blocks.empty() && contains_seed_node) + { // We perform k - 1 BFS on the hypergraph to find k vertices that // are "far" away from each other. Each BFS adds a new hypernode to // list of start nodes. Each entry in start_nodes represents a start // node for a specific block of the partition. The new vertex added to // the list of start nodes is the one last touched by the current BFS. - const HypernodeID current_num_nodes = - hypergraph.initialNumNodes() - hypergraph.numRemovedHypernodes() - - ip_data.numFixedVertices(); + const HypernodeID current_num_nodes = hypergraph.initialNumNodes() - + hypergraph.numRemovedHypernodes() - + ip_data.numFixedVertices(); parallel::scalable_vector non_touched_hypernodes; - for ( const PartitionID block : empty_blocks ) { + for(const PartitionID block : empty_blocks) + { Queue queue; hypernodes_in_queue.reset(); hyperedges_in_queue.reset(); @@ -127,18 +142,25 @@ class PseudoPeripheralStartNodes { HypernodeID last_hypernode_touched = kInvalidHypernode; HypernodeID num_touched_hypernodes = 0; ASSERT(queue.size() > 0); - while ( !queue.empty() ) { + while(!queue.empty()) + { last_hypernode_touched = queue.front(); queue.pop(); ++num_touched_hypernodes; // Add all adjacent non-visited vertices of the current visited hypernode // to queue. - for ( const HyperedgeID& he : hypergraph.incidentEdges(last_hypernode_touched) ) { - if ( !hyperedges_in_queue[he] ) { - if ( hypergraph.edgeSize(he) <= context.partition.ignore_hyperedge_size_threshold ) { - for ( const HypernodeID& pin : hypergraph.pins(he) ) { - if ( !hypernodes_in_queue[pin] ) { + for(const HyperedgeID &he : hypergraph.incidentEdges(last_hypernode_touched)) + { + if(!hyperedges_in_queue[he]) + { + if(hypergraph.edgeSize(he) <= + context.partition.ignore_hyperedge_size_threshold) + { + for(const HypernodeID &pin : hypergraph.pins(he)) + { + if(!hypernodes_in_queue[pin]) + { queue.push(pin); hypernodes_in_queue.set(pin, true); } @@ -150,14 +172,18 @@ class PseudoPeripheralStartNodes { // In case the queue is empty and we have not visited all hypernodes. // Therefore, we choose one unvisited vertex at random. - if ( queue.empty() && num_touched_hypernodes < current_num_nodes ) { - for ( const HypernodeID& hn : hypergraph.nodes() ) { - if ( !hypernodes_in_queue[hn] ) { + if(queue.empty() && num_touched_hypernodes < current_num_nodes) + { + for(const HypernodeID &hn : hypergraph.nodes()) + { + if(!hypernodes_in_queue[hn]) + { non_touched_hypernodes.push_back(hn); hypernodes_in_queue.set(hn, true); } } - const int rand_idx = std::uniform_int_distribution<>(0, non_touched_hypernodes.size() - 1)(rng); + const int rand_idx = std::uniform_int_distribution<>( + 0, non_touched_hypernodes.size() - 1)(rng); last_hypernode_touched = non_touched_hypernodes[rand_idx]; } } @@ -172,17 +198,22 @@ class PseudoPeripheralStartNodes { return start_nodes; } - private: - static inline void initializeQueue(Queue& queue, - StartNodes& start_nodes, - InitialPartitioningDataContainer& ip_data, - kahypar::ds::FastResetFlagArray<>& hypernodes_in_queue) { - for ( const HypernodeID& hn : ip_data.fixedVertices() ) { +private: + static inline void + initializeQueue(Queue &queue, StartNodes &start_nodes, + InitialPartitioningDataContainer &ip_data, + kahypar::ds::FastResetFlagArray<> &hypernodes_in_queue) + { + for(const HypernodeID &hn : ip_data.fixedVertices()) + { hypernodes_in_queue.set(hn, true); } - for ( const vec& nodes_of_block : start_nodes ) { - for ( const HypernodeID& hn : nodes_of_block ) { - if ( !hypernodes_in_queue[hn] ) { + for(const vec &nodes_of_block : start_nodes) + { + for(const HypernodeID &hn : nodes_of_block) + { + if(!hypernodes_in_queue[hn]) + { queue.push(hn); hypernodes_in_queue.set(hn, true); } @@ -191,5 +222,4 @@ class PseudoPeripheralStartNodes { } }; - } // namespace mt_kahypar diff --git a/mt-kahypar/partition/initial_partitioning/pool_initial_partitioner.cpp b/mt-kahypar/partition/initial_partitioning/pool_initial_partitioner.cpp index 8f0c235a3..a9c37020f 100644 --- a/mt-kahypar/partition/initial_partitioning/pool_initial_partitioner.cpp +++ b/mt-kahypar/partition/initial_partitioning/pool_initial_partitioner.cpp @@ -42,25 +42,30 @@ namespace { using IPTask = std::tuple; } -template -void Pool::bipartition(PartitionedHypergraph& hypergraph, - const Context& context, - const bool run_parallel) { +template +void Pool::bipartition(PartitionedHypergraph &hypergraph, + const Context &context, const bool run_parallel) +{ ASSERT(context.shared_memory.num_threads > 0); - if ( context.initial_partitioning.enabled_ip_algos.size() < - static_cast(InitialPartitioningAlgorithm::UNDEFINED) ) { + if(context.initial_partitioning.enabled_ip_algos.size() < + static_cast(InitialPartitioningAlgorithm::UNDEFINED)) + { throw InvalidParameterException( - "Size of enabled IP algorithms vector is smaller than number of IP algorithms!"); + "Size of enabled IP algorithms vector is smaller than number of IP algorithms!"); } int tag = 0; std::mt19937 rng(context.partition.seed); vec _ip_task_lists; // Push the runs of the different initial partitioning algorithms into a task list - for ( uint8_t i = 0; i < static_cast(InitialPartitioningAlgorithm::UNDEFINED); ++i ) { - if ( context.initial_partitioning.enabled_ip_algos[i] ) { + for(uint8_t i = 0; i < static_cast(InitialPartitioningAlgorithm::UNDEFINED); + ++i) + { + if(context.initial_partitioning.enabled_ip_algos[i]) + { auto algorithm = static_cast(i); - for ( size_t j = 0; j < context.initial_partitioning.runs; ++j ) { + for(size_t j = 0; j < context.initial_partitioning.runs; ++j) + { // Each initial partitioning algorithm is assigned a seed and a tag // for deterministic behavior when partitioning in deterministic mode. _ip_task_lists.emplace_back(algorithm, rng(), tag++); @@ -73,22 +78,26 @@ void Pool::bipartition(PartitionedHypergraph& hypergraph, tbb::task_group tg; InitialPartitioningDataContainer ip_data(hypergraph, context); - ip_data_container_t* ip_data_ptr = ip::to_pointer(ip_data); - for ( const auto& ip_task : _ip_task_lists ) { + ip_data_container_t *ip_data_ptr = ip::to_pointer(ip_data); + for(const auto &ip_task : _ip_task_lists) + { const InitialPartitioningAlgorithm algorithm = std::get<0>(ip_task); const int seed = std::get<1>(ip_task); const int tag = std::get<2>(ip_task); - if ( run_parallel ) { + if(run_parallel) + { tg.run([&, algorithm, seed, tag] { std::unique_ptr initial_partitioner = - InitialPartitionerFactory::getInstance().createObject( - algorithm, algorithm, ip_data_ptr, context, seed, tag); + InitialPartitionerFactory::getInstance().createObject( + algorithm, algorithm, ip_data_ptr, context, seed, tag); initial_partitioner->partition(); }); - } else { + } + else + { std::unique_ptr initial_partitioner = - InitialPartitionerFactory::getInstance().createObject( - algorithm, algorithm, ip_data_ptr, context, seed, tag); + InitialPartitionerFactory::getInstance().createObject( + algorithm, algorithm, ip_data_ptr, context, seed, tag); initial_partitioner->partition(); } } diff --git a/mt-kahypar/partition/initial_partitioning/pool_initial_partitioner.h b/mt-kahypar/partition/initial_partitioning/pool_initial_partitioner.h index 8de09914a..816131383 100644 --- a/mt-kahypar/partition/initial_partitioning/pool_initial_partitioner.h +++ b/mt-kahypar/partition/initial_partitioning/pool_initial_partitioner.h @@ -30,16 +30,15 @@ #include "include/libmtkahypartypes.h" - namespace mt_kahypar { -template -class Pool { - using PartitionedHypergraph = typename TypeTraits::PartitionedHypergraph; +template +class Pool +{ + using PartitionedHypergraph = typename TypeTraits::PartitionedHypergraph; - public: - static void bipartition(PartitionedHypergraph& hypergraph, - const Context& context, +public: + static void bipartition(PartitionedHypergraph &hypergraph, const Context &context, const bool run_parallel = true); }; diff --git a/mt-kahypar/partition/initial_partitioning/random_initial_partitioner.cpp b/mt-kahypar/partition/initial_partitioning/random_initial_partitioner.cpp index bc8749b8e..a7e232e9c 100644 --- a/mt-kahypar/partition/initial_partitioning/random_initial_partitioner.cpp +++ b/mt-kahypar/partition/initial_partitioning/random_initial_partitioner.cpp @@ -31,25 +31,32 @@ namespace mt_kahypar { -template -void RandomInitialPartitioner::partitionImpl() { - if ( _ip_data.should_initial_partitioner_run(InitialPartitioningAlgorithm::random) ) { +template +void RandomInitialPartitioner::partitionImpl() +{ + if(_ip_data.should_initial_partitioner_run(InitialPartitioningAlgorithm::random)) + { HighResClockTimepoint start = std::chrono::high_resolution_clock::now(); - PartitionedHypergraph& hg = _ip_data.local_partitioned_hypergraph(); - std::uniform_int_distribution select_random_block(0, _context.partition.k - 1); + PartitionedHypergraph &hg = _ip_data.local_partitioned_hypergraph(); + std::uniform_int_distribution select_random_block( + 0, _context.partition.k - 1); _ip_data.preassignFixedVertices(hg); - for ( const HypernodeID& hn : hg.nodes() ) { - if ( !hg.isFixed(hn) ) { + for(const HypernodeID &hn : hg.nodes()) + { + if(!hg.isFixed(hn)) + { // Randomly select a block to assign the hypernode PartitionID block = select_random_block(_rng); PartitionID current_block = block; - while ( !fitsIntoBlock(hg, hn, current_block) ) { + while(!fitsIntoBlock(hg, hn, current_block)) + { // If the hypernode does not fit into the random selected block // (because it would violate the balance constraint), we try to // assign it to the next block. - current_block = ( current_block + 1 ) % _context.partition.k; - if ( current_block == block ) { + current_block = (current_block + 1) % _context.partition.k; + if(current_block == block) + { // In case, we find no valid block to assign the current hypernode // to, we assign it to random selected block break; diff --git a/mt-kahypar/partition/initial_partitioning/random_initial_partitioner.h b/mt-kahypar/partition/initial_partitioning/random_initial_partitioner.h index a2f88b83e..2501e8604 100644 --- a/mt-kahypar/partition/initial_partitioning/random_initial_partitioner.h +++ b/mt-kahypar/partition/initial_partitioning/random_initial_partitioner.h @@ -31,36 +31,36 @@ namespace mt_kahypar { -template -class RandomInitialPartitioner : public IInitialPartitioner { +template +class RandomInitialPartitioner : public IInitialPartitioner +{ static constexpr bool debug = false; using PartitionedHypergraph = typename TypeTraits::PartitionedHypergraph; - public: +public: RandomInitialPartitioner(const InitialPartitioningAlgorithm, - ip_data_container_t* ip_data, - const Context& context, + ip_data_container_t *ip_data, const Context &context, const int seed, const int tag) : - _ip_data(ip::to_reference(ip_data)), - _context(context), - _rng(seed), - _tag(tag) { } + _ip_data(ip::to_reference(ip_data)), + _context(context), _rng(seed), _tag(tag) + { + } - private: +private: void partitionImpl() final; - bool fitsIntoBlock(PartitionedHypergraph& hypergraph, - const HypernodeID hn, - const PartitionID block) const { + bool fitsIntoBlock(PartitionedHypergraph &hypergraph, const HypernodeID hn, + const PartitionID block) const + { ASSERT(block != kInvalidPartition && block < _context.partition.k); return hypergraph.partWeight(block) + hypergraph.nodeWeight(hn) <= - _context.partition.perfect_balance_part_weights[block]; + _context.partition.perfect_balance_part_weights[block]; } - InitialPartitioningDataContainer& _ip_data; - const Context& _context; + InitialPartitioningDataContainer &_ip_data; + const Context &_context; std::mt19937 _rng; const int _tag; }; diff --git a/mt-kahypar/partition/mapping/all_pair_shortest_path.cpp b/mt-kahypar/partition/mapping/all_pair_shortest_path.cpp index 6962c5354..f76dce422 100644 --- a/mt-kahypar/partition/mapping/all_pair_shortest_path.cpp +++ b/mt-kahypar/partition/mapping/all_pair_shortest_path.cpp @@ -29,37 +29,45 @@ namespace mt_kahypar { namespace { -MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE size_t index( - const HypernodeID u, const HypernodeID v, const HypernodeID n) { +MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE size_t index(const HypernodeID u, const HypernodeID v, + const HypernodeID n) +{ ASSERT(u < n && v < n); return u + v * n; } } // namespace -void AllPairShortestPath::compute(const ds::StaticGraph& graph, - vec& distances) { +void AllPairShortestPath::compute(const ds::StaticGraph &graph, + vec &distances) +{ const HypernodeID n = graph.initialNumNodes(); ASSERT(static_cast(n * n) <= distances.size()); // Initialize Distance Matrix - for ( const HypernodeID& u : graph.nodes() ) { + for(const HypernodeID &u : graph.nodes()) + { distances[index(u, u, n)] = 0; } - for ( const HyperedgeID& e : graph.edges() ) { + for(const HyperedgeID &e : graph.edges()) + { const HypernodeID u = graph.edgeSource(e); const HypernodeID v = graph.edgeTarget(e); distances[index(u, v, n)] = graph.edgeWeight(e); } // Floyd Algorithm to compute all shortest paths (O(n^3)) - for ( HypernodeID k = 0; k < n; ++k) { - for ( HypernodeID u = 0; u < n; ++u ) { - for ( HypernodeID v = 0; v < n; ++v ) { - distances[index(u, v, n)] = std::min(distances[index(u, v, n)], - distances[index(u, k, n)] + distances[index(k, v, n)]); + for(HypernodeID k = 0; k < n; ++k) + { + for(HypernodeID u = 0; u < n; ++u) + { + for(HypernodeID v = 0; v < n; ++v) + { + distances[index(u, v, n)] = + std::min(distances[index(u, v, n)], + distances[index(u, k, n)] + distances[index(k, v, n)]); } } } } -} // namespace kahypar +} // namespace kahypar diff --git a/mt-kahypar/partition/mapping/all_pair_shortest_path.h b/mt-kahypar/partition/mapping/all_pair_shortest_path.h index 4cf19ae00..f22bf0143 100644 --- a/mt-kahypar/partition/mapping/all_pair_shortest_path.h +++ b/mt-kahypar/partition/mapping/all_pair_shortest_path.h @@ -26,19 +26,19 @@ #pragma once -#include "mt-kahypar/macros.h" #include "mt-kahypar/datastructures/static_graph.h" +#include "mt-kahypar/macros.h" namespace mt_kahypar { -class AllPairShortestPath { +class AllPairShortestPath +{ - public: - static void compute(const ds::StaticGraph& graph, - vec& distances); +public: + static void compute(const ds::StaticGraph &graph, vec &distances); - private: - AllPairShortestPath() { } +private: + AllPairShortestPath() {} }; -} // namespace kahypar +} // namespace kahypar diff --git a/mt-kahypar/partition/mapping/greedy_mapping.cpp b/mt-kahypar/partition/mapping/greedy_mapping.cpp index 68ea614fb..944d14393 100644 --- a/mt-kahypar/partition/mapping/greedy_mapping.cpp +++ b/mt-kahypar/partition/mapping/greedy_mapping.cpp @@ -29,12 +29,12 @@ #include #include -#include "mt-kahypar/definitions.h" -#include "mt-kahypar/partition/metrics.h" -#include "mt-kahypar/partition/mapping/kerninghan_lin.h" -#include "mt-kahypar/datastructures/static_graph.h" #include "mt-kahypar/datastructures/static_bitset.h" +#include "mt-kahypar/datastructures/static_graph.h" +#include "mt-kahypar/definitions.h" #include "mt-kahypar/parallel/atomic_wrapper.h" +#include "mt-kahypar/partition/mapping/kerninghan_lin.h" +#include "mt-kahypar/partition/metrics.h" #include "mt-kahypar/utils/randomize.h" #include "mt-kahypar/utils/utilities.h" @@ -44,49 +44,58 @@ namespace { static constexpr bool debug = false; -struct PQElement { +struct PQElement +{ HyperedgeWeight rating; HypernodeID u; }; -bool operator<(const PQElement& lhs, const PQElement& rhs) { +bool operator<(const PQElement &lhs, const PQElement &rhs) +{ return lhs.rating < rhs.rating || (lhs.rating == rhs.rating && lhs.u < rhs.u); } -bool operator>(const PQElement& lhs, const PQElement& rhs) { +bool operator>(const PQElement &lhs, const PQElement &rhs) +{ return lhs.rating > rhs.rating || (lhs.rating == rhs.rating && lhs.u > rhs.u); } using PQ = std::priority_queue; - -HypernodeID get_node_with_minimum_weighted_degree(const ds::StaticGraph& graph) { +HypernodeID get_node_with_minimum_weighted_degree(const ds::StaticGraph &graph) +{ vec min_nodes; HyperedgeWeight min_weighted_degree = std::numeric_limits::max(); - for ( const HypernodeID& hn : graph.nodes() ) { + for(const HypernodeID &hn : graph.nodes()) + { HyperedgeWeight weighted_degree = 0; - for ( const HyperedgeID he : graph.incidentEdges(hn) ) { + for(const HyperedgeID he : graph.incidentEdges(hn)) + { weighted_degree += graph.edgeWeight(he); } - if ( weighted_degree < min_weighted_degree ) { + if(weighted_degree < min_weighted_degree) + { min_nodes.clear(); min_nodes.push_back(hn); min_weighted_degree = weighted_degree; - } else if ( weighted_degree == min_weighted_degree ) { + } + else if(weighted_degree == min_weighted_degree) + { min_nodes.push_back(hn); } } ASSERT(min_nodes.size() > 0); - return min_nodes.size() == 1 ? min_nodes[0] : - min_nodes[utils::Randomize::instance().getRandomInt( - 0, static_cast(min_nodes.size() - 1), THREAD_ID)]; + return min_nodes.size() == 1 ? + min_nodes[0] : + min_nodes[utils::Randomize::instance().getRandomInt( + 0, static_cast(min_nodes.size() - 1), THREAD_ID)]; } -template -void compute_greedy_mapping(CommunicationHypergraph& communication_hg, - const TargetGraph& target_graph, - const Context&, - const HypernodeID seed_node) { +template +void compute_greedy_mapping(CommunicationHypergraph &communication_hg, + const TargetGraph &target_graph, const Context &, + const HypernodeID seed_node) +{ // For each node u, the ratings store weight of all incident hyperedges // that connect u to partial assignment vec rating(communication_hg.initialNumNodes(), 0); @@ -95,42 +104,48 @@ void compute_greedy_mapping(CommunicationHypergraph& communication_hg, vec nodes_to_update; // Marks unassigned processors ds::Bitset unassigned_processors(target_graph.numBlocks()); - ds::StaticBitset unassigned_processors_view( - unassigned_processors.numBlocks(), unassigned_processors.data()); + ds::StaticBitset unassigned_processors_view(unassigned_processors.numBlocks(), + unassigned_processors.data()); PQ pq; auto check_if_all_nodes_are_assigned = [&]() { - if ( pq.empty() ) { + if(pq.empty()) + { // Check if there are still unassigned nodes. // This can happen if the communication hypergraph is not connected - for ( const HypernodeID& hn : communication_hg.nodes() ) { - if ( communication_hg.partID(hn) == kInvalidPartition ) { + for(const HypernodeID &hn : communication_hg.nodes()) + { + if(communication_hg.partID(hn) == kInvalidPartition) + { ASSERT(up_to_date_ratings[hn]); - pq.push( PQElement { rating[hn], hn } ); + pq.push(PQElement{ rating[hn], hn }); break; } } } }; - auto assign = [&](const HypernodeID u, - const PartitionID process) { + auto assign = [&](const HypernodeID u, const PartitionID process) { ASSERT(process != kInvalidPartition && process < communication_hg.k()); ASSERT(unassigned_processors.isSet(process)); communication_hg.setNodePart(u, process); - up_to_date_ratings[u] = false; // This marks u as assigned + up_to_date_ratings[u] = false; // This marks u as assigned unassigned_processors.unset(process); // This marks the process as assigned DBG << "Assign node" << u << "to process" << process; // Update ratings nodes_to_update.clear(); - for ( const HyperedgeID& he : communication_hg.incidentEdges(u) ) { - if ( !visited_hes[he] ) { + for(const HyperedgeID &he : communication_hg.incidentEdges(u)) + { + if(!visited_hes[he]) + { const HyperedgeWeight edge_weight = communication_hg.edgeWeight(he); - for ( const HypernodeID& pin : communication_hg.pins(he) ) { + for(const HypernodeID &pin : communication_hg.pins(he)) + { rating[pin] += edge_weight; - if ( up_to_date_ratings[pin] ) { + if(up_to_date_ratings[pin]) + { nodes_to_update.push_back(pin); up_to_date_ratings[pin] = false; } @@ -140,8 +155,9 @@ void compute_greedy_mapping(CommunicationHypergraph& communication_hg, } // Update PQ - for ( const HypernodeID& hn : nodes_to_update ) { - pq.push(PQElement { rating[hn], hn }); + for(const HypernodeID &hn : nodes_to_update) + { + pq.push(PQElement{ rating[hn], hn }); up_to_date_ratings[hn] = true; } check_if_all_nodes_are_assigned(); @@ -149,7 +165,8 @@ void compute_greedy_mapping(CommunicationHypergraph& communication_hg, communication_hg.resetPartition(); // Initialize unassigned processors - for ( PartitionID block = 0; block < target_graph.numBlocks(); ++block ) { + for(PartitionID block = 0; block < target_graph.numBlocks(); ++block) + { unassigned_processors.set(block); } // Assign seed node to process with minimum weighted degree @@ -158,12 +175,14 @@ void compute_greedy_mapping(CommunicationHypergraph& communication_hg, HyperedgeWeight actual_objective = 0; vec tie_breaking; vec tmp_ratings(communication_hg.initialNumNodes(), 0); - while ( !pq.empty() ) { + while(!pq.empty()) + { const PQElement best = pq.top(); const HypernodeID u = best.u; pq.pop(); - if ( !up_to_date_ratings[u] ) { + if(!up_to_date_ratings[u]) + { check_if_all_nodes_are_assigned(); continue; } @@ -171,14 +190,18 @@ void compute_greedy_mapping(CommunicationHypergraph& communication_hg, ASSERT(communication_hg.partID(u) == kInvalidPartition); // Assign node with the strongest connection to the partial assignment // to the process that minimizes the steiner tree metric. - for ( const HyperedgeID& he : communication_hg.incidentEdges(u) ) { - ds::Bitset& connectivity_set = communication_hg.deepCopyOfConnectivitySet(he); + for(const HyperedgeID &he : communication_hg.incidentEdges(u)) + { + ds::Bitset &connectivity_set = communication_hg.deepCopyOfConnectivitySet(he); const HyperedgeWeight edge_weight = communication_hg.edgeWeight(he); - const HyperedgeWeight distance_before = communication_hg.connectivity(he) > 0 ? - target_graph.distance(connectivity_set) : 0; - for ( const PartitionID process : unassigned_processors_view ) { + const HyperedgeWeight distance_before = + communication_hg.connectivity(he) > 0 ? + target_graph.distance(connectivity_set) : + 0; + for(const PartitionID process : unassigned_processors_view) + { const HyperedgeWeight distance_after = - target_graph.distanceWithBlock(connectivity_set, process); + target_graph.distanceWithBlock(connectivity_set, process); tmp_ratings[process] += (distance_after - distance_before) * edge_weight; } } @@ -186,69 +209,87 @@ void compute_greedy_mapping(CommunicationHypergraph& communication_hg, // Determine processor that would result in the least increase of the // steiner tree metric. HyperedgeWeight best_rating = std::numeric_limits::max(); - for ( const PartitionID process : unassigned_processors_view ) { - if ( tmp_ratings[process] < best_rating ) { + for(const PartitionID process : unassigned_processors_view) + { + if(tmp_ratings[process] < best_rating) + { tie_breaking.clear(); tie_breaking.push_back(process); best_rating = tmp_ratings[process]; - } else if ( tmp_ratings[process] == best_rating ) { + } + else if(tmp_ratings[process] == best_rating) + { tie_breaking.push_back(process); } tmp_ratings[process] = 0; } - // Assign node to processor that results in the least increase of the objective function + // Assign node to processor that results in the least increase of the objective + // function ASSERT(tie_breaking.size() > 0); - const PartitionID best_process = tie_breaking.size() == 1 ? tie_breaking[0] : - tie_breaking[utils::Randomize::instance().getRandomInt( - 0, static_cast(tie_breaking.size() - 1), THREAD_ID)]; + const PartitionID best_process = + tie_breaking.size() == 1 ? + tie_breaking[0] : + tie_breaking[utils::Randomize::instance().getRandomInt( + 0, static_cast(tie_breaking.size() - 1), THREAD_ID)]; actual_objective += best_rating; assign(u, best_process); } ASSERT(actual_objective == metrics::quality(communication_hg, Objective::steiner_tree)); - ASSERT([&] { - for ( const HypernodeID hn : communication_hg.nodes() ) { - if ( communication_hg.partID(hn) == kInvalidPartition ) { - return false; - } - } - return true; - }(), "There are unassigned nodes"); + ASSERT( + [&] { + for(const HypernodeID hn : communication_hg.nodes()) + { + if(communication_hg.partID(hn) == kInvalidPartition) + { + return false; + } + } + return true; + }(), + "There are unassigned nodes"); DBG << "Greedy mapping algorithm with seed node" << seed_node << "produced an mapping with solution quality" << actual_objective; } } // namespace -template -void GreedyMapping::mapToTargetGraph(CommunicationHypergraph& communication_hg, - const TargetGraph& target_graph, - const Context& context) { +template +void GreedyMapping::mapToTargetGraph( + CommunicationHypergraph &communication_hg, const TargetGraph &target_graph, + const Context &context) +{ ASSERT(communication_hg.initialNumNodes() == target_graph.graph().initialNumNodes()); - utils::Timer& timer = utils::Utilities::instance().getTimer(context.utility_id); + utils::Timer &timer = utils::Utilities::instance().getTimer(context.utility_id); SpinLock best_lock; - HyperedgeWeight best_objective = metrics::quality(communication_hg, Objective::steiner_tree); + HyperedgeWeight best_objective = + metrics::quality(communication_hg, Objective::steiner_tree); vec best_mapping(communication_hg.initialNumNodes(), 0); std::iota(best_mapping.begin(), best_mapping.end(), 0); timer.start_timer("initial_mapping", "Initial Mapping"); - communication_hg.doParallelForAllNodes([&](const HypernodeID& hn) { + communication_hg.doParallelForAllNodes([&](const HypernodeID &hn) { // Compute greedy mapping with the current node as seed node - CommunicationHypergraph tmp_communication_phg( - target_graph.numBlocks(), communication_hg.hypergraph()); + CommunicationHypergraph tmp_communication_phg(target_graph.numBlocks(), + communication_hg.hypergraph()); tmp_communication_phg.setTargetGraph(&target_graph); compute_greedy_mapping(tmp_communication_phg, target_graph, context, hn); - if ( context.mapping.use_local_search ) { - KerninghanLin::improve(tmp_communication_phg, target_graph); + if(context.mapping.use_local_search) + { + KerninghanLin::improve(tmp_communication_phg, + target_graph); } // Check if new mapping is better than the currently best mapping - const HyperedgeWeight objective = metrics::quality(tmp_communication_phg, Objective::steiner_tree); + const HyperedgeWeight objective = + metrics::quality(tmp_communication_phg, Objective::steiner_tree); best_lock.lock(); - if ( objective < best_objective ) { + if(objective < best_objective) + { best_objective = objective; - for ( const HypernodeID& u : tmp_communication_phg.nodes() ) { + for(const HypernodeID &u : tmp_communication_phg.nodes()) + { best_mapping[u] = tmp_communication_phg.partID(u); } } @@ -258,7 +299,8 @@ void GreedyMapping::mapToTargetGraph(CommunicationHyper // Apply best mapping communication_hg.resetPartition(); - for ( const HypernodeID& hn : communication_hg.nodes() ) { + for(const HypernodeID &hn : communication_hg.nodes()) + { communication_hg.setOnlyNodePart(hn, best_mapping[hn]); } communication_hg.initializePartition(); @@ -266,4 +308,4 @@ void GreedyMapping::mapToTargetGraph(CommunicationHyper INSTANTIATE_CLASS_WITH_PARTITIONED_HG(GreedyMapping) -} // namespace kahypar +} // namespace kahypar diff --git a/mt-kahypar/partition/mapping/greedy_mapping.h b/mt-kahypar/partition/mapping/greedy_mapping.h index aa761b3db..33c8640a6 100644 --- a/mt-kahypar/partition/mapping/greedy_mapping.h +++ b/mt-kahypar/partition/mapping/greedy_mapping.h @@ -26,35 +26,36 @@ #pragma once -#include "mt-kahypar/macros.h" #include "mt-kahypar/datastructures/partitioned_graph.h" -#include "mt-kahypar/partition/mapping/target_graph.h" +#include "mt-kahypar/macros.h" #include "mt-kahypar/partition/context.h" +#include "mt-kahypar/partition/mapping/target_graph.h" namespace mt_kahypar { -template -class GreedyMapping { +template +class GreedyMapping +{ using PartitionedGraph = ds::PartitionedGraph; - public: +public: /** This function implements the greedy mapping algorithm of Glantz et. al.: - * Glantz, Roland, Hening Meyerhenke, and Alexander Noe. - * "Algorithms for mapping parallel processes onto grid and torus architectures." - * 2015 23rd Euromicro International Conference on Parallel, Distributed, and Network-Based Processing. IEEE, 2015. - * - * The algorithm chooses a seed node and assigns it to processor with the lowest communication - * volume. In each step, the algorithm assigns the node of the communication hypergraph with - * the strongest connection to the partial assignment to the processor that results in the - * least increasing of the steiner tree metric. - */ - static void mapToTargetGraph(CommunicationHypergraph& communication_hg, - const TargetGraph& target_graph, - const Context& context); - - private: - GreedyMapping() { } + * Glantz, Roland, Hening Meyerhenke, and Alexander Noe. + * "Algorithms for mapping parallel processes onto grid and torus architectures." + * 2015 23rd Euromicro International Conference on Parallel, Distributed, and + * Network-Based Processing. IEEE, 2015. + * + * The algorithm chooses a seed node and assigns it to processor with the lowest + * communication volume. In each step, the algorithm assigns the node of the + * communication hypergraph with the strongest connection to the partial assignment to + * the processor that results in the least increasing of the steiner tree metric. + */ + static void mapToTargetGraph(CommunicationHypergraph &communication_hg, + const TargetGraph &target_graph, const Context &context); + +private: + GreedyMapping() {} }; -} // namespace kahypar +} // namespace kahypar diff --git a/mt-kahypar/partition/mapping/initial_mapping.cpp b/mt-kahypar/partition/mapping/initial_mapping.cpp index eda22ffdb..35071ba9e 100644 --- a/mt-kahypar/partition/mapping/initial_mapping.cpp +++ b/mt-kahypar/partition/mapping/initial_mapping.cpp @@ -29,113 +29,124 @@ #include "tbb/parallel_invoke.h" #include "mt-kahypar/definitions.h" -#include "mt-kahypar/partition/mapping/target_graph.h" +#include "mt-kahypar/parallel/memory_pool.h" +#include "mt-kahypar/parallel/stl/scalable_vector.h" #include "mt-kahypar/partition/mapping/greedy_mapping.h" +#include "mt-kahypar/partition/mapping/target_graph.h" #include "mt-kahypar/partition/metrics.h" -#include "mt-kahypar/parallel/stl/scalable_vector.h" #include "mt-kahypar/utils/utilities.h" -#include "mt-kahypar/parallel/memory_pool.h" namespace mt_kahypar { namespace { -using HyperedgeVector = vec>; +using HyperedgeVector = vec >; -template -std::pair convert_to_static_hypergraph(const PartitionedHypergraph& phg) { +template +std::pair +convert_to_static_hypergraph(const PartitionedHypergraph &phg) +{ using Hypergraph = ds::StaticHypergraph; using TargetPartitionedHypergraph = StaticPartitionedHypergraph; using Factory = typename Hypergraph::Factory; const HypernodeID num_hypernodes = phg.initialNumNodes(); - const HyperedgeID num_hyperedges = phg.initialNumEdges() / (PartitionedHypergraph::is_graph ? 2 : 1); + const HyperedgeID num_hyperedges = + phg.initialNumEdges() / (PartitionedHypergraph::is_graph ? 2 : 1); HyperedgeVector edge_vector; vec hyperedge_weight; vec hypernode_weight; // Allocate data structure - tbb::parallel_invoke([&] { - edge_vector.assign(num_hyperedges, vec()); - }, [&] { - hyperedge_weight.assign(num_hyperedges, 0); - }, [&] { - hypernode_weight.assign(num_hypernodes, 0); - }); + tbb::parallel_invoke([&] { edge_vector.assign(num_hyperedges, vec()); }, + [&] { hyperedge_weight.assign(num_hyperedges, 0); }, + [&] { hypernode_weight.assign(num_hypernodes, 0); }); // Write hypergraph into temporary data structure - tbb::parallel_invoke([&] { - if constexpr ( PartitionedHypergraph::is_graph ) { - CAtomic cnt(0); - phg.doParallelForAllEdges([&](const HyperedgeID& he) { - const HypernodeID u = phg.edgeSource(he); - const HypernodeID v = phg.edgeTarget(he); - if ( u < v ) { - // insert each edge only once - const size_t id = cnt.fetch_add(1, std::memory_order_relaxed); - hyperedge_weight[id] = phg.edgeWeight(he); - edge_vector[id].push_back(u); - edge_vector[id].push_back(v); + tbb::parallel_invoke( + [&] { + if constexpr(PartitionedHypergraph::is_graph) + { + CAtomic cnt(0); + phg.doParallelForAllEdges([&](const HyperedgeID &he) { + const HypernodeID u = phg.edgeSource(he); + const HypernodeID v = phg.edgeTarget(he); + if(u < v) + { + // insert each edge only once + const size_t id = cnt.fetch_add(1, std::memory_order_relaxed); + hyperedge_weight[id] = phg.edgeWeight(he); + edge_vector[id].push_back(u); + edge_vector[id].push_back(v); + } + }); } - }); - } else { - phg.doParallelForAllEdges([&](const HyperedgeID& he) { - hyperedge_weight[he] = phg.edgeWeight(he); - for ( const HypernodeID& pin : phg.pins(he) ) { - edge_vector[he].push_back(pin); + else + { + phg.doParallelForAllEdges([&](const HyperedgeID &he) { + hyperedge_weight[he] = phg.edgeWeight(he); + for(const HypernodeID &pin : phg.pins(he)) + { + edge_vector[he].push_back(pin); + } + }); } + }, + [&] { + phg.doParallelForAllNodes( + [&](const HypernodeID &hn) { hypernode_weight[hn] = phg.nodeWeight(hn); }); }); - } - }, [&] { - phg.doParallelForAllNodes([&](const HypernodeID& hn) { - hypernode_weight[hn] = phg.nodeWeight(hn); - }); - }); // Construct new hypergraph and apply partition - Hypergraph converted_hg = Factory::construct(num_hypernodes, num_hyperedges, - edge_vector, hyperedge_weight.data(), hypernode_weight.data()); - converted_hg.doParallelForAllNodes([&](const HypernodeID& hn) { - if ( !phg.nodeIsEnabled(hn) ) { + Hypergraph converted_hg = + Factory::construct(num_hypernodes, num_hyperedges, edge_vector, + hyperedge_weight.data(), hypernode_weight.data()); + converted_hg.doParallelForAllNodes([&](const HypernodeID &hn) { + if(!phg.nodeIsEnabled(hn)) + { ASSERT(converted_hg.nodeDegree(hn) == 0); converted_hg.disableHypernode(hn); } }); - TargetPartitionedHypergraph converted_phg(phg.k(), converted_hg, parallel_tag_t { }); - phg.doParallelForAllNodes([&](const HypernodeID& hn) { - converted_phg.setOnlyNodePart(hn, phg.partID(hn)); - }); + TargetPartitionedHypergraph converted_phg(phg.k(), converted_hg, parallel_tag_t{}); + phg.doParallelForAllNodes( + [&](const HypernodeID &hn) { converted_phg.setOnlyNodePart(hn, phg.partID(hn)); }); converted_phg.initializePartition(); - ASSERT(metrics::quality(phg, Objective::cut) == metrics::quality(converted_phg, Objective::cut)); + ASSERT(metrics::quality(phg, Objective::cut) == + metrics::quality(converted_phg, Objective::cut)); return std::make_pair( - std::move(converted_hg), std::move(converted_phg)); + std::move(converted_hg), std::move(converted_phg)); } -template -void applyPartition(PartitionedHypergraph& phg, - TargetPartitionedHypergraph& target_phg) { - target_phg.doParallelForAllNodes([&](const HypernodeID& hn) { +template +void applyPartition(PartitionedHypergraph &phg, TargetPartitionedHypergraph &target_phg) +{ + target_phg.doParallelForAllNodes([&](const HypernodeID &hn) { const PartitionID from = target_phg.partID(hn); const PartitionID to = phg.partID(hn); - if ( from != to ) { + if(from != to) + { target_phg.changeNodePart(hn, from, to); } }); } -template -Hypergraph repairEmptyBlocks(const Hypergraph& contracted_hg, - const PartitionedHypergraph& communication_hg, - vec& mapping) { +template +Hypergraph repairEmptyBlocks(const Hypergraph &contracted_hg, + const PartitionedHypergraph &communication_hg, + vec &mapping) +{ using Factory = typename Hypergraph::Factory; const PartitionID k = communication_hg.k(); vec block_mapping(contracted_hg.initialNumNodes(), kInvalidHypernode); HypernodeID cur_id = 0; vec hypernode_weight(k, 0); - for ( PartitionID block = 0; block < k; ++block ) { - if ( communication_hg.partWeight(block) > 0 ) { + for(PartitionID block = 0; block < k; ++block) + { + if(communication_hg.partWeight(block) > 0) + { ASSERT(UL(cur_id) < block_mapping.size()); block_mapping[cur_id++] = block; } @@ -146,116 +157,132 @@ Hypergraph repairEmptyBlocks(const Hypergraph& contracted_hg, const HyperedgeID num_hyperedges = contracted_hg.initialNumEdges(); HyperedgeVector edge_vector(num_hyperedges, vec()); vec hyperedge_weight(num_hyperedges, 0); - contracted_hg.doParallelForAllEdges([&](const HyperedgeID& he) { - for ( const HypernodeID& pin : contracted_hg.pins(he) ) { + contracted_hg.doParallelForAllEdges([&](const HyperedgeID &he) { + for(const HypernodeID &pin : contracted_hg.pins(he)) + { edge_vector[he].push_back(block_mapping[pin]); } hyperedge_weight[he] = contracted_hg.edgeWeight(he); }); // Adapt mapping - communication_hg.doParallelForAllNodes([&](const HypernodeID& hn) { - mapping[hn] = block_mapping[mapping[hn]]; - }); + communication_hg.doParallelForAllNodes( + [&](const HypernodeID &hn) { mapping[hn] = block_mapping[mapping[hn]]; }); - return Factory::construct(num_hypernodes, num_hyperedges, - edge_vector, hyperedge_weight.data(), hypernode_weight.data()); + return Factory::construct(num_hypernodes, num_hyperedges, edge_vector, + hyperedge_weight.data(), hypernode_weight.data()); } -template -void map_to_target_graph(PartitionedHypergraph& communication_hg, - const TargetGraph& target_graph, - const Context& context) { +template +void map_to_target_graph(PartitionedHypergraph &communication_hg, + const TargetGraph &target_graph, const Context &context) +{ using Hypergraph = typename PartitionedHypergraph::UnderlyingHypergraph; - utils::Timer& timer = utils::Utilities::instance().getTimer(context.utility_id); + utils::Timer &timer = utils::Utilities::instance().getTimer(context.utility_id); const bool was_unused_memory_allocations_enabled = - parallel::MemoryPoolT::instance().is_unused_memory_allocations_activated(); + parallel::MemoryPoolT::instance().is_unused_memory_allocations_activated(); parallel::MemoryPoolT::instance().deactivate_unused_memory_allocations(); // We contract all blocks of the partition to create an one-to-one mapping problem timer.start_timer("contract_partition", "Contract Partition"); vec mapping(communication_hg.initialNumNodes(), kInvalidHypernode); communication_hg.setTargetGraph(&target_graph); - communication_hg.doParallelForAllNodes([&](const HypernodeID hn) { - mapping[hn] = communication_hg.partID(hn); - }); + communication_hg.doParallelForAllNodes( + [&](const HypernodeID hn) { mapping[hn] = communication_hg.partID(hn); }); // Here, we collapse each block of the communication hypergraph partition into // a single node. The contracted hypergraph has exactly k nodes. In the // contracted hypergraph node i corresponds to block i of the input // communication hypergraph. Hypergraph contracted_hg = communication_hg.hypergraph().contract(mapping); - if ( contracted_hg.initialNumNodes() < static_cast(communication_hg.k()) ) { + if(contracted_hg.initialNumNodes() < static_cast(communication_hg.k())) + { // If the contracted hypergraph has less than k nodes then there must be some empty // blocks which we have to fix in the following contracted_hg = repairEmptyBlocks(contracted_hg, communication_hg, mapping); } PartitionedHypergraph contracted_phg(communication_hg.k(), contracted_hg); - for ( const HypernodeID& hn : contracted_phg.nodes() ) { + for(const HypernodeID &hn : contracted_phg.nodes()) + { contracted_phg.setOnlyNodePart(hn, hn); } contracted_phg.initializePartition(); contracted_phg.setTargetGraph(&target_graph); timer.stop_timer("contract_partition"); - const HyperedgeWeight objective_before = metrics::quality(contracted_phg, Objective::steiner_tree); + const HyperedgeWeight objective_before = + metrics::quality(contracted_phg, Objective::steiner_tree); ASSERT(metrics::quality(communication_hg, Objective::steiner_tree) == objective_before); // Solve one-to-one mapping problem - if ( context.mapping.strategy == OneToOneMappingStrategy::greedy_mapping ) { - GreedyMapping::mapToTargetGraph(contracted_phg, target_graph, context); + if(context.mapping.strategy == OneToOneMappingStrategy::greedy_mapping) + { + GreedyMapping::mapToTargetGraph(contracted_phg, target_graph, + context); } - const HyperedgeWeight objective_after = metrics::quality(contracted_phg, Objective::steiner_tree); - if ( objective_after < objective_before ) { - if ( context.partition.verbose_output ) { + const HyperedgeWeight objective_after = + metrics::quality(contracted_phg, Objective::steiner_tree); + if(objective_after < objective_before) + { + if(context.partition.verbose_output) + { LOG << GREEN << "Initial one-to-one mapping algorithm has improved objective by" - << (objective_before - objective_after) - << "( Before =" << objective_before << ", After =" << objective_after << ")" << END; + << (objective_before - objective_after) << "( Before =" << objective_before + << ", After =" << objective_after << ")" << END; } // Initial mapping algorithm has improved solution quality // => apply improved mapping to input communication hypergraph - communication_hg.doParallelForAllNodes([&](const HypernodeID& hn) { + communication_hg.doParallelForAllNodes([&](const HypernodeID &hn) { const PartitionID from = communication_hg.partID(hn); const PartitionID to = contracted_phg.partID(mapping[hn]); - if ( from != to ) { + if(from != to) + { communication_hg.changeNodePart(hn, from, to); } }); - } else if ( context.partition.verbose_output && objective_before < objective_after ) { + } + else if(context.partition.verbose_output && objective_before < objective_after) + { // Initial mapping algorithm has worsen solution quality // => use input partition of communication hypergraph LOG << RED << "Initial one-to-one mapping algorithm has worsen objective by" - << (objective_after - objective_before) - << "( Before =" << objective_before << ", After =" << objective_after << ")." - << "Use mapping from initial partitiong!"<< END; + << (objective_after - objective_before) << "( Before =" << objective_before + << ", After =" << objective_after << ")." + << "Use mapping from initial partitiong!" << END; } - if ( was_unused_memory_allocations_enabled ) { + if(was_unused_memory_allocations_enabled) + { parallel::MemoryPoolT::instance().activate_unused_memory_allocations(); } } } -template -void InitialMapping::mapToTargetGraph(PartitionedHypergraph& communication_hg, - const TargetGraph& target_graph, - const Context& context) { - if constexpr ( !PartitionedHypergraph::is_static_hypergraph ) { - // The mapping algorithm collapses each block of the communication hypergraph partition into - // a single node. Thereby, it uses the contract(...) function of the hypergraph data structure - // which is only implemented for static graphs and hypergraphs (implemented for multilevel partitioing, - // but not for n-level). In case the communication hypergraph uses an dynamic graph or hypergraph - // data structure, we convert it to static data structure and then compute the initial mapping. +template +void InitialMapping::mapToTargetGraph(PartitionedHypergraph &communication_hg, + const TargetGraph &target_graph, + const Context &context) +{ + if constexpr(!PartitionedHypergraph::is_static_hypergraph) + { + // The mapping algorithm collapses each block of the communication hypergraph + // partition into a single node. Thereby, it uses the contract(...) function of the + // hypergraph data structure which is only implemented for static graphs and + // hypergraphs (implemented for multilevel partitioing, but not for n-level). In case + // the communication hypergraph uses an dynamic graph or hypergraph data structure, we + // convert it to static data structure and then compute the initial mapping. auto static_hypergraph = convert_to_static_hypergraph(communication_hg); - StaticPartitionedHypergraph& tmp_communication_hg = static_hypergraph.second; + StaticPartitionedHypergraph &tmp_communication_hg = static_hypergraph.second; tmp_communication_hg.setHypergraph(static_hypergraph.first); map_to_target_graph(tmp_communication_hg, target_graph, context); applyPartition(tmp_communication_hg, communication_hg); - } else { + } + else + { map_to_target_graph(communication_hg, target_graph, context); } } INSTANTIATE_CLASS_WITH_TYPE_TRAITS(InitialMapping) -} // namespace kahypar +} // namespace kahypar diff --git a/mt-kahypar/partition/mapping/initial_mapping.h b/mt-kahypar/partition/mapping/initial_mapping.h index be50eed71..0b320aa7c 100644 --- a/mt-kahypar/partition/mapping/initial_mapping.h +++ b/mt-kahypar/partition/mapping/initial_mapping.h @@ -34,13 +34,14 @@ namespace mt_kahypar { // Forward Declaration class TargetGraph; -template -class InitialMapping { +template +class InitialMapping +{ using Hypergraph = typename TypeTraits::Hypergraph; using PartitionedHypergraph = typename TypeTraits::PartitionedHypergraph; - public: +public: // ! This function takes an already partitioned communication hypergraph and // ! maps the block of partition to the target graph such that the steiner // ! tree metric is minimized. The internal implementation @@ -48,12 +49,11 @@ class InitialMapping { // ! then solves a one-to-one mapping problem. The function is called after initial // ! partitioning via recursive bipartitioning (RB) since RB can not optimize // ! the steiner tree metric. - static void mapToTargetGraph(PartitionedHypergraph& communication_hg, - const TargetGraph& target_graph, - const Context& context); + static void mapToTargetGraph(PartitionedHypergraph &communication_hg, + const TargetGraph &target_graph, const Context &context); - private: - InitialMapping() { } +private: + InitialMapping() {} }; -} // namespace kahypar +} // namespace kahypar diff --git a/mt-kahypar/partition/mapping/kerninghan_lin.cpp b/mt-kahypar/partition/mapping/kerninghan_lin.cpp index 3ad14553f..9767ffe5c 100644 --- a/mt-kahypar/partition/mapping/kerninghan_lin.cpp +++ b/mt-kahypar/partition/mapping/kerninghan_lin.cpp @@ -26,19 +26,19 @@ #include "mt-kahypar/partition/mapping/kerninghan_lin.h" +#include "mt-kahypar/datastructures/static_bitset.h" #include "mt-kahypar/definitions.h" -#include "mt-kahypar/partition/metrics.h" #include "mt-kahypar/partition/mapping/target_graph.h" -#include "mt-kahypar/datastructures/static_bitset.h" +#include "mt-kahypar/partition/metrics.h" namespace mt_kahypar { namespace { -template -MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE void swap(CommunicationHypergraph& communication_hg, - const HypernodeID u, - const HypernodeID v) { +template +MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE void swap(CommunicationHypergraph &communication_hg, + const HypernodeID u, const HypernodeID v) +{ const PartitionID block_of_u = communication_hg.partID(u); const PartitionID block_of_v = communication_hg.partID(v); ASSERT(block_of_u != block_of_v); @@ -46,29 +46,28 @@ MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE void swap(CommunicationHypergraph& communicat communication_hg.changeNodePart(v, block_of_v, block_of_u); } -MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE HyperedgeWeight swap_gain(const TargetGraph& target_graph, - ds::Bitset& connectivity_set, - const HyperedgeWeight edge_weight, - const PartitionID removed_block, - const PartitionID new_block) { +MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE HyperedgeWeight +swap_gain(const TargetGraph &target_graph, ds::Bitset &connectivity_set, + const HyperedgeWeight edge_weight, const PartitionID removed_block, + const PartitionID new_block) +{ ASSERT(connectivity_set.isSet(removed_block)); ASSERT(!connectivity_set.isSet(new_block)); // Current distance between all nodes in the connectivity set const HyperedgeWeight distance_before = target_graph.distance(connectivity_set); // Distance between all nodes in the connectivity set after the swap operation - const HyperedgeWeight distance_after = - target_graph.distanceAfterExchangingBlocks(connectivity_set, removed_block, new_block); + const HyperedgeWeight distance_after = target_graph.distanceAfterExchangingBlocks( + connectivity_set, removed_block, new_block); return (distance_before - distance_after) * edge_weight; } // This function computes the gain of swapping the blocks of node u and v // in the communication hypergraph for the steiner tree metric. -template -MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE HyperedgeWeight swap_gain(CommunicationHypergraph& communication_hg, - const TargetGraph& target_graph, - const HypernodeID u, - const HypernodeID v, - vec& marked_hes) { +template +MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE HyperedgeWeight +swap_gain(CommunicationHypergraph &communication_hg, const TargetGraph &target_graph, + const HypernodeID u, const HypernodeID v, vec &marked_hes) +{ HyperedgeWeight gain = 0; const PartitionID block_of_u = communication_hg.partID(u); const PartitionID block_of_v = communication_hg.partID(v); @@ -77,30 +76,37 @@ MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE HyperedgeWeight swap_gain(CommunicationHyperg // connectivity set does not change due to the swap operation. // We therefore mark all incident hyperedges of u and only compute the // gain for hyperedges that are not in the intersection of u and v. - for ( const HyperedgeID& he : communication_hg.incidentEdges(u) ) { + for(const HyperedgeID &he : communication_hg.incidentEdges(u)) + { marked_hes[communication_hg.uniqueEdgeID(he)] = true; } - for ( const HyperedgeID& he : communication_hg.incidentEdges(v) ) { + for(const HyperedgeID &he : communication_hg.incidentEdges(v)) + { const HyperedgeID unique_id = communication_hg.uniqueEdgeID(he); - if ( !marked_hes[unique_id] ) { + if(!marked_hes[unique_id]) + { // Hyperedge only contains v => compute swap gain - ds::Bitset& connectivity_set = communication_hg.deepCopyOfConnectivitySet(he); - gain += swap_gain(target_graph, connectivity_set, - communication_hg.edgeWeight(he), block_of_v, block_of_u); - } else { + ds::Bitset &connectivity_set = communication_hg.deepCopyOfConnectivitySet(he); + gain += swap_gain(target_graph, connectivity_set, communication_hg.edgeWeight(he), + block_of_v, block_of_u); + } + else + { // Hyperedge contains u and v => unmark hyperedge marked_hes[unique_id] = false; } } - for ( const HyperedgeID& he : communication_hg.incidentEdges(u) ) { + for(const HyperedgeID &he : communication_hg.incidentEdges(u)) + { const HyperedgeID unique_id = communication_hg.uniqueEdgeID(he); - if ( marked_hes[unique_id] ) { + if(marked_hes[unique_id]) + { // Hyperedge only contains u => compute swap gain - ds::Bitset& connectivity_set = communication_hg.deepCopyOfConnectivitySet(he); - gain += swap_gain(target_graph, connectivity_set, - communication_hg.edgeWeight(he), block_of_u, block_of_v); + ds::Bitset &connectivity_set = communication_hg.deepCopyOfConnectivitySet(he); + gain += swap_gain(target_graph, connectivity_set, communication_hg.edgeWeight(he), + block_of_u, block_of_v); marked_hes[unique_id] = false; } } @@ -110,16 +116,19 @@ MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE HyperedgeWeight swap_gain(CommunicationHyperg using Swap = std::pair; -struct PQElement { +struct PQElement +{ HyperedgeWeight gain; Swap swap; }; -bool operator<(const PQElement& lhs, const PQElement& rhs) { +bool operator<(const PQElement &lhs, const PQElement &rhs) +{ return lhs.gain < rhs.gain || (lhs.gain == rhs.gain && lhs.swap < rhs.swap); } -bool operator>(const PQElement& lhs, const PQElement& rhs) { +bool operator>(const PQElement &lhs, const PQElement &rhs) +{ return lhs.gain > rhs.gain || (lhs.gain == rhs.gain && lhs.swap > rhs.swap); } @@ -127,28 +136,36 @@ using PQ = std::priority_queue; } -template -void KerninghanLin::improve(CommunicationHypergraph& communication_hg, - const TargetGraph& target_graph) { +template +void KerninghanLin::improve( + CommunicationHypergraph &communication_hg, const TargetGraph &target_graph) +{ ASSERT(communication_hg.initialNumNodes() == target_graph.graph().initialNumNodes()); - HyperedgeWeight current_objective = metrics::quality(communication_hg, Objective::steiner_tree); + HyperedgeWeight current_objective = + metrics::quality(communication_hg, Objective::steiner_tree); vec marked_hes(communication_hg.initialNumEdges(), false); bool found_improvement = true; size_t fruitless_rounds = 0; size_t pass_nr = 1; - while ( found_improvement ) { - DBG << "Start of pass" << pass_nr << "( Current Objective =" << current_objective << ")"; + while(found_improvement) + { + DBG << "Start of pass" << pass_nr << "( Current Objective =" << current_objective + << ")"; found_improvement = false; HyperedgeWeight objective_before = current_objective; // Initialize priority queue PQ pq; - for ( const HypernodeID& u : communication_hg.nodes() ) { - for ( const HypernodeID& v : communication_hg.nodes() ) { - if ( u < v ) { - const HyperedgeWeight gain = swap_gain(communication_hg, target_graph, u, v, marked_hes); - pq.push(PQElement { gain, std::make_pair(u, v) }); + for(const HypernodeID &u : communication_hg.nodes()) + { + for(const HypernodeID &v : communication_hg.nodes()) + { + if(u < v) + { + const HyperedgeWeight gain = + swap_gain(communication_hg, target_graph, u, v, marked_hes); + pq.push(PQElement{ gain, std::make_pair(u, v) }); } } } @@ -158,26 +175,30 @@ void KerninghanLin::improve(CommunicationHypergraph& co HyperedgeWeight best_objective = current_objective; vec performed_swaps; vec already_moved(communication_hg.initialNumNodes(), false); - while ( !pq.empty() ) { + while(!pq.empty()) + { const PQElement elem = pq.top(); pq.pop(); const HyperedgeWeight gain = elem.gain; const HypernodeID u = elem.swap.first; const HypernodeID v = elem.swap.second; - if ( already_moved[u] || already_moved[v] ) { + if(already_moved[u] || already_moved[v]) + { // Each node can move at most once in each round continue; } // Recompute gain - const HyperedgeWeight recomputed_gain = swap_gain(communication_hg, target_graph, u, v, marked_hes); - if ( gain != recomputed_gain ) { + const HyperedgeWeight recomputed_gain = + swap_gain(communication_hg, target_graph, u, v, marked_hes); + if(gain != recomputed_gain) + { // Lazy update of PQ - // Note that since we do not immediately update the PQ after a swap operation, we may not be - // able do perform the node swap with highest gain. However, the lazy update strategy ensures - // that the gains are accurate and give a good estimate. - pq.push(PQElement { recomputed_gain, elem.swap }); + // Note that since we do not immediately update the PQ after a swap operation, we + // may not be able do perform the node swap with highest gain. However, the lazy + // update strategy ensures that the gains are accurate and give a good estimate. + pq.push(PQElement{ recomputed_gain, elem.swap }); continue; } @@ -189,29 +210,37 @@ void KerninghanLin::improve(CommunicationHypergraph& co already_moved[v] = true; DBG << "Swap block ID of nodes" << u << "and" << v << "with gain" << gain << "( New Objective =" << current_objective << ")"; - if ( current_objective <= best_objective ) { + if(current_objective <= best_objective) + { best_idx = performed_swaps.size(); best_objective = current_objective; } - ASSERT(current_objective == metrics::quality(communication_hg, Objective::steiner_tree)); + ASSERT(current_objective == + metrics::quality(communication_hg, Objective::steiner_tree)); } // Rollback to best seen solution - for ( int i = performed_swaps.size() - 1; i >= best_idx; --i ) { - const PQElement& elem = performed_swaps[i]; + for(int i = performed_swaps.size() - 1; i >= best_idx; --i) + { + const PQElement &elem = performed_swaps[i]; swap(communication_hg, elem.swap.first, elem.swap.second); current_objective += elem.gain; } - ASSERT(current_objective == metrics::quality(communication_hg, Objective::steiner_tree)); + ASSERT(current_objective == + metrics::quality(communication_hg, Objective::steiner_tree)); ASSERT(current_objective == best_objective); - if ( current_objective == objective_before ) { + if(current_objective == objective_before) + { ++fruitless_rounds; - } else { + } + else + { fruitless_rounds = 0; } - found_improvement = best_idx > 0 && fruitless_rounds <= MAX_NUMBER_OF_FRUITLESS_ROUNDS; + found_improvement = + best_idx > 0 && fruitless_rounds <= MAX_NUMBER_OF_FRUITLESS_ROUNDS; ++pass_nr; } DBG << "Local Search Result =" << current_objective << "\n"; @@ -219,4 +248,4 @@ void KerninghanLin::improve(CommunicationHypergraph& co INSTANTIATE_CLASS_WITH_PARTITIONED_HG(KerninghanLin) -} // namespace kahypar +} // namespace kahypar diff --git a/mt-kahypar/partition/mapping/kerninghan_lin.h b/mt-kahypar/partition/mapping/kerninghan_lin.h index 761ac167c..27a738744 100644 --- a/mt-kahypar/partition/mapping/kerninghan_lin.h +++ b/mt-kahypar/partition/mapping/kerninghan_lin.h @@ -34,8 +34,9 @@ namespace mt_kahypar { // Forward Declaration class TargetGraph; -template -class KerninghanLin { +template +class KerninghanLin +{ static constexpr bool debug = false; @@ -43,7 +44,7 @@ class KerninghanLin { // terminate the search (prevents oscillation). static constexpr size_t MAX_NUMBER_OF_FRUITLESS_ROUNDS = 2; - public: +public: // ! This function implements the Kerninghan-Lin algorithm to // ! improve a given mapping onto a target graph. The algorithm // ! performs in each step a swap operation of two nodes that results @@ -51,11 +52,11 @@ class KerninghanLin { // ! node is swapped at most once, the algorithm rolls back to the // ! best seen solution. This is repeated several times until no // ! further improvements are possible. - static void improve(CommunicationHypergraph& communication_hg, - const TargetGraph& target_graph); + static void improve(CommunicationHypergraph &communication_hg, + const TargetGraph &target_graph); - private: - KerninghanLin() { } +private: + KerninghanLin() {} }; -} // namespace kahypar +} // namespace kahypar diff --git a/mt-kahypar/partition/mapping/set_enumerator.h b/mt-kahypar/partition/mapping/set_enumerator.h index 313169307..d092ac0bd 100644 --- a/mt-kahypar/partition/mapping/set_enumerator.h +++ b/mt-kahypar/partition/mapping/set_enumerator.h @@ -26,9 +26,9 @@ #pragma once -#include "mt-kahypar/macros.h" #include "mt-kahypar/datastructures/bitset.h" #include "mt-kahypar/datastructures/static_bitset.h" +#include "mt-kahypar/macros.h" namespace mt_kahypar { @@ -36,129 +36,133 @@ namespace mt_kahypar { * This class implements an iterator to enumerate all subsets of * the set {0, ..., n-1} of size m. */ -class SetEnumerator { +class SetEnumerator +{ using Block = typename ds::StaticBitset::Block; static constexpr size_t BITS_PER_BLOCK = ds::StaticBitset::BITS_PER_BLOCK; - class SetIterator { - public: + class SetIterator + { + public: using iterator_category = std::forward_iterator_tag; using value_type = const ds::StaticBitset; - using reference = const ds::StaticBitset&; - using pointer = const ds::StaticBitset*; + using reference = const ds::StaticBitset &; + using pointer = const ds::StaticBitset *; using difference_type = std::ptrdiff_t; - SetIterator(const size_t n, - const size_t m, - ds::Bitset& bitset, - const bool end) : - _bitset(bitset), - _cur_bitset(bitset.numBlocks(), bitset.data()), - _cur_set(m + 1, 0) { + SetIterator(const size_t n, const size_t m, ds::Bitset &bitset, const bool end) : + _bitset(bitset), _cur_bitset(bitset.numBlocks(), bitset.data()), + _cur_set(m + 1, 0) + { _cur_set[0] = n; // Sentinel - if ( !end ) { - for ( size_t i = 0; i < m; ++i ) { + if(!end) + { + for(size_t i = 0; i < m; ++i) + { _cur_set[i + 1] = m - 1 - i; _bitset.set(i); } - } else { - for ( size_t i = 1; i <= m; ++i ) { + } + else + { + for(size_t i = 1; i <= m; ++i) + { _cur_set[i] = n - i; } ++_cur_set[0]; } } - const ds::StaticBitset& operator*() const { - return _cur_bitset; - } + const ds::StaticBitset &operator*() const { return _cur_bitset; } - SetIterator& operator++() { + SetIterator &operator++() + { nextSet(); return *this; } - SetIterator operator++(int ) { + SetIterator operator++(int) + { const SetIterator res = *this; nextSet(); return res; } - bool operator==(const SetIterator& o) const { - for ( size_t i = 0; i < std::min(_cur_set.size(), o._cur_set.size()); ++i) { - if ( _cur_set[i] != o._cur_set[i] ) { + bool operator==(const SetIterator &o) const + { + for(size_t i = 0; i < std::min(_cur_set.size(), o._cur_set.size()); ++i) + { + if(_cur_set[i] != o._cur_set[i]) + { return false; } } return _cur_set.size() == o._cur_set.size(); } - bool operator!=(const SetIterator& o) const { - return !operator==(o); - } + bool operator!=(const SetIterator &o) const { return !operator==(o); } - private: - void nextSet() { + private: + void nextSet() + { size_t i = 1; - for ( ; i < _cur_set.size(); ++i ) { + for(; i < _cur_set.size(); ++i) + { _bitset.unset(_cur_set[i]); ++_cur_set[i]; - if ( _cur_set[i] < _cur_set[i - 1] ) { + if(_cur_set[i] < _cur_set[i - 1]) + { _bitset.set(_cur_set[i]); break; - } else { + } + else + { --_cur_set[i]; } } - if ( i < _cur_set.size() ) { - for ( size_t j = i - 1 ; j > 0; --j ) { + if(i < _cur_set.size()) + { + for(size_t j = i - 1; j > 0; --j) + { _cur_set[j] = _cur_set[j + 1] + 1; _bitset.set(_cur_set[j]); } - } else { + } + else + { ++_cur_set[0]; } } - ds::Bitset& _bitset; + ds::Bitset &_bitset; ds::StaticBitset _cur_bitset; vec _cur_set; }; - public: +public: using iterator = SetIterator; using const_iterator = const SetIterator; - explicit SetEnumerator(const size_t n, const size_t m) : - _n(n), - _m(m), - _bitset(n) { + explicit SetEnumerator(const size_t n, const size_t m) : _n(n), _m(m), _bitset(n) + { ASSERT(_m <= _n); } - SetEnumerator(const SetEnumerator&) = delete; - SetEnumerator & operator= (const SetEnumerator &) = delete; - SetEnumerator(SetEnumerator&&) = delete; - SetEnumerator & operator= (SetEnumerator &&) = delete; + SetEnumerator(const SetEnumerator &) = delete; + SetEnumerator &operator=(const SetEnumerator &) = delete; + SetEnumerator(SetEnumerator &&) = delete; + SetEnumerator &operator=(SetEnumerator &&) = delete; - iterator begin() { - return iterator(_n, _m, _bitset, false); - } + iterator begin() { return iterator(_n, _m, _bitset, false); } - iterator end() { - return iterator(_n, _m, _bitset, true); - } + iterator end() { return iterator(_n, _m, _bitset, true); } - const_iterator cbegin() { - return const_iterator(_n, _m, _bitset, false); - } + const_iterator cbegin() { return const_iterator(_n, _m, _bitset, false); } - const_iterator cend() { - return const_iterator(_n, _m, _bitset, true); - } + const_iterator cend() { return const_iterator(_n, _m, _bitset, true); } - private: +private: const size_t _n; const size_t _m; ds::Bitset _bitset; @@ -169,112 +173,107 @@ class SetEnumerator { * of a set. The set is represented as a bitset and position of the one bits * marks elements contained in the set. The set contains elements from 0 to n-1. */ -class SubsetEnumerator { +class SubsetEnumerator +{ using Block = typename ds::StaticBitset::Block; - class SubsetIterator { + class SubsetIterator + { using iterator_category = std::forward_iterator_tag; using value_type = const ds::StaticBitset; - using reference = const ds::StaticBitset&; - using pointer = const ds::StaticBitset*; + using reference = const ds::StaticBitset &; + using pointer = const ds::StaticBitset *; using difference_type = std::ptrdiff_t; - public: - SubsetIterator(const vec& set, - ds::Bitset& bitset, - const bool end) : - _set(set), - _bitset(bitset), - _cur_mask(0), - _cur_subset(bitset.numBlocks(), bitset.data()) { - if ( !end ) { + public: + SubsetIterator(const vec &set, ds::Bitset &bitset, const bool end) : + _set(set), _bitset(bitset), _cur_mask(0), + _cur_subset(bitset.numBlocks(), bitset.data()) + { + if(!end) + { applyNextMask(); - } else { + } + else + { _cur_mask = (static_cast(1) << _set.size()) - 1; // pow(2, set.size()) - 1 } } - const ds::StaticBitset& operator*() const { - return _cur_subset; - } + const ds::StaticBitset &operator*() const { return _cur_subset; } - SubsetIterator& operator++() { + SubsetIterator &operator++() + { applyNextMask(); return *this; } - SubsetIterator operator++(int ) { + SubsetIterator operator++(int) + { const SubsetIterator res = *this; applyNextMask(); return res; } - bool operator==(const SubsetIterator& o) const { - return _cur_mask == o._cur_mask; - } + bool operator==(const SubsetIterator &o) const { return _cur_mask == o._cur_mask; } - bool operator!=(const SubsetIterator& o) const { - return !operator==(o); - } + bool operator!=(const SubsetIterator &o) const { return !operator==(o); } - private: - void applyNextMask() { + private: + void applyNextMask() + { ++_cur_mask; - for ( size_t i = 0; i < _set.size(); ++i ) { - if ( _cur_mask & ( 1 << i ) ) { + for(size_t i = 0; i < _set.size(); ++i) + { + if(_cur_mask & (1 << i)) + { _bitset.set(_set[i]); - } else { + } + else + { _bitset.unset(_set[i]); } } } - const vec& _set; - ds::Bitset& _bitset; + const vec &_set; + ds::Bitset &_bitset; Block _cur_mask; ds::StaticBitset _cur_subset; }; - public: +public: using iterator = SubsetIterator; using const_iterator = const SubsetIterator; - explicit SubsetEnumerator(const size_t n, - const ds::StaticBitset& set) : - _set(set.popcount(), 0), - _subset(n) { + explicit SubsetEnumerator(const size_t n, const ds::StaticBitset &set) : + _set(set.popcount(), 0), _subset(n) + { size_t i = 0; - for ( const PartitionID& block : set ) { + for(const PartitionID &block : set) + { ASSERT(i < _set.size()); _set[i++] = block; } } - SubsetEnumerator(const SubsetEnumerator&) = delete; - SubsetEnumerator & operator= (const SubsetEnumerator &) = delete; - SubsetEnumerator(SubsetEnumerator&&) = delete; - SubsetEnumerator & operator= (SubsetEnumerator &&) = delete; + SubsetEnumerator(const SubsetEnumerator &) = delete; + SubsetEnumerator &operator=(const SubsetEnumerator &) = delete; + SubsetEnumerator(SubsetEnumerator &&) = delete; + SubsetEnumerator &operator=(SubsetEnumerator &&) = delete; - iterator begin() { - return iterator(_set, _subset, false); - } + iterator begin() { return iterator(_set, _subset, false); } - iterator end() { - return iterator(_set, _subset, true); - } + iterator end() { return iterator(_set, _subset, true); } - const_iterator cbegin() { - return const_iterator(_set, _subset, false); - } + const_iterator cbegin() { return const_iterator(_set, _subset, false); } - const_iterator cend() { - return const_iterator(_set, _subset, true); - } + const_iterator cend() { return const_iterator(_set, _subset, true); } - private: +private: vec _set; ds::Bitset _subset; }; -} // namespace kahypar +} // namespace kahypar diff --git a/mt-kahypar/partition/mapping/steiner_tree.cpp b/mt-kahypar/partition/mapping/steiner_tree.cpp index ba54f5212..83eb35821 100644 --- a/mt-kahypar/partition/mapping/steiner_tree.cpp +++ b/mt-kahypar/partition/mapping/steiner_tree.cpp @@ -24,20 +24,23 @@ * SOFTWARE. ******************************************************************************/ -#include "mt-kahypar/datastructures/static_bitset.h" #include "mt-kahypar/partition/mapping/steiner_tree.h" +#include "mt-kahypar/datastructures/static_bitset.h" #include "mt-kahypar/partition/mapping/all_pair_shortest_path.h" #include "mt-kahypar/partition/mapping/set_enumerator.h" namespace mt_kahypar { namespace { -MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE size_t index(const ds::StaticBitset& set, const PartitionID k) { +MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE size_t index(const ds::StaticBitset &set, + const PartitionID k) +{ ASSERT(set.popcount() > 0); size_t index = 0; PartitionID multiplier = 1; PartitionID last_block = kInvalidPartition; - for ( const PartitionID block : set ) { + for(const PartitionID block : set) + { index += multiplier * block; multiplier *= k; last_block = block; @@ -45,8 +48,9 @@ MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE size_t index(const ds::StaticBitset& set, con return index + (multiplier == k ? last_block * k : 0); } -MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE size_t index( - const HypernodeID u, const HypernodeID v, const HypernodeID n) { +MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE size_t index(const HypernodeID u, const HypernodeID v, + const HypernodeID n) +{ ASSERT(u < n && v < n); return u + v * n; } @@ -74,24 +78,25 @@ MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE size_t index( * must be a shortest path. Otherwise, the steiner tree has not minimal weight. * Morover, paths connecting {p, s, t} and {p, r} in the steiner tree must be also * optimal steiner trees for the terminal set T' = {p, s, t} and T'' = {p, t}. Otherwise, - * the given steiner tree would be not optimal. Thus, we can construct optimal solution for steiner - * trees out of optimal solution of smaller steiner trees. + * the given steiner tree would be not optimal. Thus, we can construct optimal solution + * for steiner trees out of optimal solution of smaller steiner trees. * - * The following algorithm uses dynamic programming to compute all steiner trees up to a given - * size of the terminal set. It first computes the shortest path between all nodes and then - * iterates over all subsets of the node set of certain size. For each subset D, we then enumerate - * all subsets E c D and compute S_u(E) := OPT(E u { u }) + OPT(D \ E u {u}) for all nodes u \in V, where - * OPT(E) is the optimal solution of the steiner tree problem for the terminal set E (precomputed in previous - * step). The optimal solution for a terminal set D u { v } for an abritrary node v \in V is then - * OPT(D u { v }) := min_{u \in V} min_{E c D} S_u(E) + shortest_path(u, v). + * The following algorithm uses dynamic programming to compute all steiner trees up to a + * given size of the terminal set. It first computes the shortest path between all nodes + * and then iterates over all subsets of the node set of certain size. For each subset D, + * we then enumerate all subsets E c D and compute S_u(E) := OPT(E u { u }) + OPT(D \ E u + * {u}) for all nodes u \in V, where OPT(E) is the optimal solution of the steiner tree + * problem for the terminal set E (precomputed in previous step). The optimal solution for + * a terminal set D u { v } for an abritrary node v \in V is then OPT(D u { v }) := min_{u + * \in V} min_{E c D} S_u(E) + shortest_path(u, v). * * The complexity of the algorithm is * O( n^3 + \sum_{k = 2 to m - 1} binomial(n, k) * n * ( 2^k * k + n * k ) ) * where m is the maximum size of the precomputed terminal set */ -void SteinerTree::compute(const ds::StaticGraph& graph, - const size_t max_set_size, - vec& distances) { +void SteinerTree::compute(const ds::StaticGraph &graph, const size_t max_set_size, + vec &distances) +{ const PartitionID n = graph.initialNumNodes(); // Floyds All-Pair Shortest Path Algorithm -> O(n^3) AllPairShortestPath::compute(graph, distances); @@ -111,17 +116,21 @@ void SteinerTree::compute(const ds::StaticGraph& graph, * for each v in V do * S[ D u { v } ] = min( S[ D u { v } ], S[ {u, v} ] + min_dist ) */ - for ( size_t m = 2; m < max_set_size; ++m ) { // k - 2 steps -> k := max_set_size + for(size_t m = 2; m < max_set_size; ++m) + { // k - 2 steps -> k := max_set_size SetEnumerator subsets_of_size_m(n, m); // We compute for each subset D c V of size m the optimal steiner tree here - for ( const ds::StaticBitset& d_tmp : subsets_of_size_m ) { // O(binom(n,k)) = O(n! / (k!*(n - k)!)) steps + for(const ds::StaticBitset &d_tmp : subsets_of_size_m) + { // O(binom(n,k)) = O(n! / (k!*(n - k)!)) steps ds::Bitset d_set = d_tmp.copy(); ds::StaticBitset d(d_set.numBlocks(), d_set.data()); ASSERT(static_cast(d.popcount()) == m); - for ( const HypernodeID& u : graph.nodes() ) { // O(n) steps + for(const HypernodeID &u : graph.nodes()) + { // O(n) steps HyperedgeWeight min_dist = std::numeric_limits::max(); SubsetEnumerator proper_subsets_of_d(n, d); - for ( const ds::StaticBitset& e_tmp : proper_subsets_of_d ) { // O(2^k) steps + for(const ds::StaticBitset &e_tmp : proper_subsets_of_d) + { // O(2^k) steps // Here, we iterate over all subsets E c D and compute the optimal steiner tree // for D with the assumption that u is the junction node of the steiner tree. ds::Bitset e_set = e_tmp.copy(); @@ -132,7 +141,8 @@ void SteinerTree::compute(const ds::StaticGraph& graph, f_set.set(u); // Add u to F -> F u { u } min_dist = std::min(min_dist, distances[index(e, n)] + distances[index(f, n)]); } - for ( const HypernodeID& v : graph.nodes() ) { // O(n) steps + for(const HypernodeID &v : graph.nodes()) + { // O(n) steps // Compute optimal steiner tree for D u { v } with the assumption that // u is the junction node of the optimal steiner tree. Since the outer // loop iterates over all u \in V, this will compute the optimal steiner @@ -140,16 +150,16 @@ void SteinerTree::compute(const ds::StaticGraph& graph, const bool was_set = d_set.isSet(v); d_set.set(v); // Add v to set D -> D u { v } const size_t idx_d = index(d, n); - distances[idx_d] = std::min(distances[idx_d], - distances[index(u, v, n)] + min_dist); - if ( !was_set ) { + distances[idx_d] = + std::min(distances[idx_d], distances[index(u, v, n)] + min_dist); + if(!was_set) + { d_set.unset(v); } } } } } - } -} // namespace kahypar +} // namespace kahypar diff --git a/mt-kahypar/partition/mapping/steiner_tree.h b/mt-kahypar/partition/mapping/steiner_tree.h index 26092b28c..e74e19023 100644 --- a/mt-kahypar/partition/mapping/steiner_tree.h +++ b/mt-kahypar/partition/mapping/steiner_tree.h @@ -26,20 +26,20 @@ #pragma once -#include "mt-kahypar/macros.h" #include "mt-kahypar/datastructures/static_graph.h" +#include "mt-kahypar/macros.h" namespace mt_kahypar { -class SteinerTree { +class SteinerTree +{ - public: - static void compute(const ds::StaticGraph& graph, - const size_t max_set_size, - vec& distances); +public: + static void compute(const ds::StaticGraph &graph, const size_t max_set_size, + vec &distances); - private: - SteinerTree() { } +private: + SteinerTree() {} }; -} // namespace kahypar +} // namespace kahypar diff --git a/mt-kahypar/partition/mapping/target_graph.cpp b/mt-kahypar/partition/mapping/target_graph.cpp index 3523171d3..7a4d6264d 100644 --- a/mt-kahypar/partition/mapping/target_graph.cpp +++ b/mt-kahypar/partition/mapping/target_graph.cpp @@ -36,12 +36,13 @@ namespace mt_kahypar { #ifdef KAHYPAR_ENABLE_STEINER_TREE_METRIC -void TargetGraph::precomputeDistances(const size_t max_connectivity) { +void TargetGraph::precomputeDistances(const size_t max_connectivity) +{ const size_t num_entries = std::pow(_k, max_connectivity); - if ( num_entries > MEMORY_LIMIT ) { - throw SystemException( - "Too much memory requested for precomputing steiner trees " - "of connectivity sets in the target graph."); + if(num_entries > MEMORY_LIMIT) + { + throw SystemException("Too much memory requested for precomputing steiner trees " + "of connectivity sets in the target graph."); } _distances.assign(num_entries, std::numeric_limits::max() / 3); SteinerTree::compute(_graph, max_connectivity, _distances); @@ -50,71 +51,89 @@ void TargetGraph::precomputeDistances(const size_t max_connectivity) { _is_initialized = true; } -HyperedgeWeight TargetGraph::distance(const ds::StaticBitset& connectivity_set) const { +HyperedgeWeight TargetGraph::distance(const ds::StaticBitset &connectivity_set) const +{ const PartitionID connectivity = connectivity_set.popcount(); const size_t idx = index(connectivity_set); - if ( likely(connectivity <= _max_precomputed_connectitivty) ) { + if(likely(connectivity <= _max_precomputed_connectitivty)) + { ASSERT(idx < _distances.size()); - if constexpr ( TRACK_STATS ) ++_stats.precomputed; + if constexpr(TRACK_STATS) + ++_stats.precomputed; return _distances[idx]; - } else { - // We have not precomputed the optimal steiner tree for the connectivity set. - #ifdef __linux__ - HashTableHandle& handle = _handles.local(); + } + else + { +// We have not precomputed the optimal steiner tree for the connectivity set. +#ifdef __linux__ + HashTableHandle &handle = _handles.local(); auto res = handle.find(idx); - if ( likely( res != handle.end() ) ) { - if constexpr ( TRACK_STATS ) ++_stats.cache_hits; + if(likely(res != handle.end())) + { + if constexpr(TRACK_STATS) + ++_stats.cache_hits; return (*res).second; - } else { - if constexpr ( TRACK_STATS ) ++_stats.cache_misses; + } + else + { + if constexpr(TRACK_STATS) + ++_stats.cache_misses; // Entry is not cached => Compute 2-approximation of optimal steiner tree const HyperedgeWeight mst_weight = - computeWeightOfMSTOnMetricCompletion(connectivity_set); + computeWeightOfMSTOnMetricCompletion(connectivity_set); handle.insert(idx, mst_weight); return mst_weight; } - #elif defined(_WIN32) or defined(__APPLE__) +#elif defined(_WIN32) or defined(__APPLE__) auto res = _cache.find(idx); - if ( likely ( res != _cache.end() ) ) { - if constexpr ( TRACK_STATS ) ++_stats.cache_hits; + if(likely(res != _cache.end())) + { + if constexpr(TRACK_STATS) + ++_stats.cache_hits; return res->second; - } else { - if constexpr ( TRACK_STATS ) ++_stats.cache_misses; + } + else + { + if constexpr(TRACK_STATS) + ++_stats.cache_misses; // Entry is not cached => Compute 2-approximation of optimal steiner tree const HyperedgeWeight mst_weight = - computeWeightOfMSTOnMetricCompletion(connectivity_set); + computeWeightOfMSTOnMetricCompletion(connectivity_set); _cache.insert(std::make_pair(idx, mst_weight)); return mst_weight; } - #endif +#endif } } /** - * This function computes an MST on the metric completion of the target graph restricted to - * the blocks in the connectivity set. To compute the MST, we use Jarnik-Prim algorithm which - * has time complexity of |E| + |V| * log(|V|) = |V|^2 + |V| * log(|V|) (since we work on a - * complete graph). However, we restrict the computation only to nodes and edges contained in - * the connectivity set. + * This function computes an MST on the metric completion of the target graph restricted + * to the blocks in the connectivity set. To compute the MST, we use Jarnik-Prim algorithm + * which has time complexity of |E| + |V| * log(|V|) = |V|^2 + |V| * log(|V|) (since we + * work on a complete graph). However, we restrict the computation only to nodes and edges + * contained in the connectivity set. */ -HyperedgeWeight TargetGraph::computeWeightOfMSTOnMetricCompletion(const ds::StaticBitset& connectivity_set) const { +HyperedgeWeight TargetGraph::computeWeightOfMSTOnMetricCompletion( + const ds::StaticBitset &connectivity_set) const +{ ASSERT(_is_initialized); ASSERT(connectivity_set.popcount() > 0); - MSTData& mst_data = _local_mst_data.local(); - ds::Bitset& remaining_nodes = mst_data.bitset; - ds::StaticBitset cur_blocks( - remaining_nodes.numBlocks(), remaining_nodes.data()); - vec& lightest_edge = mst_data.lightest_edge; - PQ& pq = mst_data.pq; + MSTData &mst_data = _local_mst_data.local(); + ds::Bitset &remaining_nodes = mst_data.bitset; + ds::StaticBitset cur_blocks(remaining_nodes.numBlocks(), remaining_nodes.data()); + vec &lightest_edge = mst_data.lightest_edge; + PQ &pq = mst_data.pq; ASSERT(pq.empty()); auto push = [&](const PartitionID u) { - for ( const PartitionID& v : cur_blocks ) { + for(const PartitionID &v : cur_blocks) + { ASSERT(u != v); - const HyperedgeWeight dist = _distances[index(u,v)]; + const HyperedgeWeight dist = _distances[index(u, v)]; // If there is a lighter edge connecting v to the MST, // we push v with the new weight into the PQ. - if ( dist < lightest_edge[v] ) { + if(dist < lightest_edge[v]) + { pq.push(std::make_pair(dist, v)); lightest_edge[v] = dist; } @@ -123,7 +142,8 @@ HyperedgeWeight TargetGraph::computeWeightOfMSTOnMetricCompletion(const ds::Stat // Initialize data structure and PQ PartitionID root = kInvalidPartition; - for ( const PartitionID& block : connectivity_set ) { + for(const PartitionID &block : connectivity_set) + { remaining_nodes.set(block); lightest_edge[block] = std::numeric_limits::max(); root = block; @@ -132,10 +152,13 @@ HyperedgeWeight TargetGraph::computeWeightOfMSTOnMetricCompletion(const ds::Stat push(root); HyperedgeWeight res = 0; - while ( !pq.empty() ) { - PQElement best = pq.top(); pq.pop(); + while(!pq.empty()) + { + PQElement best = pq.top(); + pq.pop(); const PartitionID u = best.second; - if ( !remaining_nodes.isSet(u) ) { + if(!remaining_nodes.isSet(u)) + { // u is already contained in the MST -> skip continue; } @@ -152,4 +175,4 @@ HyperedgeWeight TargetGraph::computeWeightOfMSTOnMetricCompletion(const ds::Stat } #endif -} // namespace kahypar +} // namespace kahypar diff --git a/mt-kahypar/partition/mapping/target_graph.h b/mt-kahypar/partition/mapping/target_graph.h index c02f9213a..66b006f55 100644 --- a/mt-kahypar/partition/mapping/target_graph.h +++ b/mt-kahypar/partition/mapping/target_graph.h @@ -26,9 +26,9 @@ #pragma once -#include -#include #include +#include +#include #include "tbb/enumerable_thread_specific.h" @@ -43,87 +43,77 @@ #include "tbb/concurrent_unordered_map.h" #endif -#include "mt-kahypar/macros.h" -#include "mt-kahypar/datastructures/static_graph.h" #include "mt-kahypar/datastructures/static_bitset.h" +#include "mt-kahypar/datastructures/static_graph.h" +#include "mt-kahypar/macros.h" #include "mt-kahypar/parallel/atomic_wrapper.h" namespace mt_kahypar { #ifdef KAHYPAR_ENABLE_STEINER_TREE_METRIC -class TargetGraph { +class TargetGraph +{ static constexpr size_t INITIAL_HASH_TABLE_CAPACITY = 100000; static constexpr size_t MEMORY_LIMIT = 100000000; using PQElement = std::pair; - using PQ = std::priority_queue, std::greater>; + using PQ = std::priority_queue, std::greater >; - #ifdef __linux__ - using hasher_type = utils_tm::hash_tm::murmur2_hash; +#ifdef __linux__ + using hasher_type = utils_tm::hash_tm::murmur2_hash; using allocator_type = growt::AlignedAllocator<>; - using ConcurrentHashTable = typename growt::table_config< - size_t, size_t, hasher_type, allocator_type, hmod::growable, hmod::sync>::table_type; + using ConcurrentHashTable = + typename growt::table_config::table_type; using HashTableHandle = typename ConcurrentHashTable::handle_type; - #elif defined(_WIN32) or defined(__APPLE__) +#elif defined(_WIN32) or defined(__APPLE__) using ConcurrentHashTable = tbb::concurrent_unordered_map; - #endif +#endif - struct MSTData { - MSTData(const size_t n) : - bitset(n), - lightest_edge(n), - pq() { } + struct MSTData + { + MSTData(const size_t n) : bitset(n), lightest_edge(n), pq() {} ds::Bitset bitset; vec lightest_edge; PQ pq; }; - struct Stats { - Stats() : - precomputed(0), - cache_misses(0), - cache_hits(0) { } + struct Stats + { + Stats() : precomputed(0), cache_misses(0), cache_hits(0) {} CAtomic precomputed; CAtomic cache_misses; CAtomic cache_hits; }; - public: +public: static constexpr bool TRACK_STATS = false; - explicit TargetGraph(ds::StaticGraph&& graph) : - _is_initialized(false), - _k(graph.initialNumNodes()), - _graph(std::move(graph)), - _max_precomputed_connectitivty(0), - _distances(), - _local_mst_data(graph.initialNumNodes()), - _cache(INITIAL_HASH_TABLE_CAPACITY), - #ifdef __linux__ - _handles([&]() { return getHandle(); }), - #endif - _stats() { } - - TargetGraph(const TargetGraph&) = delete; - TargetGraph & operator= (const TargetGraph &) = delete; - - TargetGraph(TargetGraph&&) = default; - TargetGraph & operator= (TargetGraph &&) = default; - - PartitionID numBlocks() const { - return _k; + explicit TargetGraph(ds::StaticGraph &&graph) : + _is_initialized(false), _k(graph.initialNumNodes()), _graph(std::move(graph)), + _max_precomputed_connectitivty(0), _distances(), + _local_mst_data(graph.initialNumNodes()), _cache(INITIAL_HASH_TABLE_CAPACITY), +#ifdef __linux__ + _handles([&]() { return getHandle(); }), +#endif + _stats() + { } - bool isInitialized() const { - return _is_initialized; - } + TargetGraph(const TargetGraph &) = delete; + TargetGraph &operator=(const TargetGraph &) = delete; - const ds::StaticGraph& graph() const { - return _graph; - } + TargetGraph(TargetGraph &&) = default; + TargetGraph &operator=(TargetGraph &&) = default; + + PartitionID numBlocks() const { return _k; } + + bool isInitialized() const { return _is_initialized; } + + const ds::StaticGraph &graph() const { return _graph; } // ! This function computes the weight of all steiner trees for all // ! connectivity sets with connectivity at most m (:= max_connectivity), @@ -133,43 +123,51 @@ class TargetGraph { // ! in the connectivity set if precomputed. Otherwise, we compute // ! a 2-approximation of the optimal steiner tree // ! (see computeWeightOfMSTOnMetricCompletion(...)) - HyperedgeWeight distance(const ds::StaticBitset& connectivity_set) const; + HyperedgeWeight distance(const ds::StaticBitset &connectivity_set) const; - HyperedgeWeight distance(const ds::Bitset& connectivity_set) const { + HyperedgeWeight distance(const ds::Bitset &connectivity_set) const + { ds::StaticBitset view(connectivity_set.numBlocks(), connectivity_set.data()); return distance(view); } // ! Computes the optimal steiner tree between the blocks in the connectivity // ! set if we would add an additional block. - HyperedgeWeight distanceWithBlock(ds::Bitset& connectivity_set, const PartitionID block) const { + HyperedgeWeight distanceWithBlock(ds::Bitset &connectivity_set, + const PartitionID block) const + { ASSERT(block < _k); const bool was_set = connectivity_set.isSet(block); connectivity_set.set(block); ds::StaticBitset view(connectivity_set.numBlocks(), connectivity_set.data()); const HyperedgeWeight dist = distance(view); - if ( !was_set ) connectivity_set.unset(block); + if(!was_set) + connectivity_set.unset(block); return dist; } // ! Computes the optimal steiner tree between the blocks in the connectivity // ! set if we would remove an block. - HyperedgeWeight distanceWithoutBlock(ds::Bitset& connectivity_set, const PartitionID block) const { + HyperedgeWeight distanceWithoutBlock(ds::Bitset &connectivity_set, + const PartitionID block) const + { ASSERT(block < _k); const bool was_set = connectivity_set.isSet(block); connectivity_set.unset(block); ds::StaticBitset view(connectivity_set.numBlocks(), connectivity_set.data()); const HyperedgeWeight dist = distance(view); - if ( was_set ) connectivity_set.set(block); + if(was_set) + connectivity_set.set(block); return dist; } // ! Computes the optimal steiner tree between the blocks in the connectivity // ! set if we would remove block `removed_block` and block `added_block` to // ! the connectivity set. - HyperedgeWeight distanceAfterExchangingBlocks(ds::Bitset& connectivity_set, + HyperedgeWeight distanceAfterExchangingBlocks(ds::Bitset &connectivity_set, const PartitionID removed_block, - const PartitionID added_block) const { + const PartitionID added_block) const + { ASSERT(removed_block < _k && added_block < _k); const bool was_removed_set = connectivity_set.isSet(removed_block); const bool was_added_set = connectivity_set.isSet(added_block); @@ -177,70 +175,80 @@ class TargetGraph { connectivity_set.set(added_block); ds::StaticBitset view(connectivity_set.numBlocks(), connectivity_set.data()); const HyperedgeWeight dist = distance(view); - if ( was_removed_set ) connectivity_set.set(removed_block); - if ( !was_added_set ) connectivity_set.unset(added_block); + if(was_removed_set) + connectivity_set.set(removed_block); + if(!was_added_set) + connectivity_set.unset(added_block); return dist; } // ! Returns the shortest path between two blocks in the target graph - HyperedgeWeight distance(const PartitionID i, const PartitionID j) const { + HyperedgeWeight distance(const PartitionID i, const PartitionID j) const + { ASSERT(_is_initialized); return _distances[index(i, j)]; } // ! Print statistics - void printStats() const { - const size_t total_requests = _stats.precomputed + _stats.cache_hits + _stats.cache_misses; + void printStats() const + { + const size_t total_requests = + _stats.precomputed + _stats.cache_hits + _stats.cache_misses; LOG << "\nTarget Graph Distance Computation Stats:"; std::cout << "Accessed Precomputed Distance = " << std::setprecision(2) << (static_cast(_stats.precomputed) / total_requests) * 100 << "% (" << _stats.precomputed << ")" << std::endl; std::cout << " Computed MST = " << std::setprecision(2) - << (static_cast(_stats.cache_misses) / total_requests) * 100 << "% (" - << _stats.cache_misses << ")" << std::endl; + << (static_cast(_stats.cache_misses) / total_requests) * 100 + << "% (" << _stats.cache_misses << ")" << std::endl; std::cout << " Used Cached MST = " << std::setprecision(2) << (static_cast(_stats.cache_hits) / total_requests) * 100 << "% (" << _stats.cache_hits << ")" << std::endl; } - void printStats(std::stringstream& oss) const { + void printStats(std::stringstream &oss) const + { oss << " used_precomputed_distance=" << _stats.precomputed << " used_mst=" << _stats.cache_misses << " used_cached_mst=" << _stats.cache_hits; } - private: +private: MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE size_t index(const PartitionID i, - const PartitionID j) const { + const PartitionID j) const + { ASSERT(i < _k && j < _k); return i + j * _k; } - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE size_t index(const ds::StaticBitset& connectivity_set) const { + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE size_t + index(const ds::StaticBitset &connectivity_set) const + { size_t index = 0; size_t multiplier = 1; PartitionID last_block = kInvalidPartition; - for ( const PartitionID block : connectivity_set ) { + for(const PartitionID block : connectivity_set) + { ASSERT(block != kInvalidPartition && block < _k); index += multiplier * UL(block); multiplier *= UL(_k); last_block = block; } - return last_block != kInvalidPartition ? index + - (multiplier == UL(_k) ? last_block * _k : 0) : 0; + return last_block != kInvalidPartition ? + index + (multiplier == UL(_k) ? last_block * _k : 0) : + 0; } // ! This function computes an MST on the metric completion of the target graph // ! restricted to the blocks in the connectivity set. The metric completion is // ! complete graph where each edge {u,v} has a weight equals the shortest path // ! connecting u and v. This gives a 2-approximation for steiner tree problem. - HyperedgeWeight computeWeightOfMSTOnMetricCompletion(const ds::StaticBitset& connectivity_set) const; + HyperedgeWeight + computeWeightOfMSTOnMetricCompletion(const ds::StaticBitset &connectivity_set) const; - #ifdef __linux__ - HashTableHandle getHandle() const { - return _cache.get_handle(); - } - #endif +#ifdef __linux__ + HashTableHandle getHandle() const { return _cache.get_handle(); } +#endif bool _is_initialized; @@ -263,46 +271,41 @@ class TargetGraph { // ! Cache stores the weight of MST computations mutable ConcurrentHashTable _cache; - #ifdef __linux__ +#ifdef __linux__ // ! Handle to access concurrent hash table mutable tbb::enumerable_thread_specific _handles; - #endif +#endif // ! Stats mutable Stats _stats; }; #else -class TargetGraph { - public: +class TargetGraph +{ +public: static constexpr bool TRACK_STATS = false; - explicit TargetGraph(ds::StaticGraph&&) { } + explicit TargetGraph(ds::StaticGraph &&) {} - TargetGraph(const TargetGraph&) = delete; - TargetGraph & operator= (const TargetGraph &) = delete; + TargetGraph(const TargetGraph &) = delete; + TargetGraph &operator=(const TargetGraph &) = delete; - TargetGraph(TargetGraph&&) = default; - TargetGraph & operator= (TargetGraph &&) = default; + TargetGraph(TargetGraph &&) = default; + TargetGraph &operator=(TargetGraph &&) = default; - PartitionID numBlocks() const { - return 0; - } + PartitionID numBlocks() const { return 0; } - bool isInitialized() const { - return false; - } + bool isInitialized() const { return false; } - void precomputeDistances(const size_t) { } + void precomputeDistances(const size_t) {} - HyperedgeWeight distance(const ds::StaticBitset&) const { - return 0; - } + HyperedgeWeight distance(const ds::StaticBitset &) const { return 0; } - void printStats() const { } + void printStats() const {} - void printStats(std::stringstream&) const { } + void printStats(std::stringstream &) const {} }; #endif -} // namespace kahypar +} // namespace kahypar diff --git a/mt-kahypar/partition/metrics.cpp b/mt-kahypar/partition/metrics.cpp index 5223a88a4..601343729 100644 --- a/mt-kahypar/partition/metrics.cpp +++ b/mt-kahypar/partition/metrics.cpp @@ -27,8 +27,8 @@ #include "mt-kahypar/partition/metrics.h" -#include #include +#include #include "mt-kahypar/definitions.h" #include "mt-kahypar/partition/mapping/target_graph.h" @@ -38,165 +38,210 @@ namespace mt_kahypar::metrics { namespace { -template -struct ObjectiveFunction { }; +template +struct ObjectiveFunction +{ +}; -template -struct ObjectiveFunction { - HyperedgeWeight operator()(const PartitionedHypergraph& phg, const HyperedgeID& he) const { +template +struct ObjectiveFunction +{ + HyperedgeWeight operator()(const PartitionedHypergraph &phg, + const HyperedgeID &he) const + { return phg.connectivity(he) > 1 ? phg.edgeWeight(he) : 0; } }; -template -struct ObjectiveFunction { - HyperedgeWeight operator()(const PartitionedHypergraph& phg, const HyperedgeID& he) const { +template +struct ObjectiveFunction +{ + HyperedgeWeight operator()(const PartitionedHypergraph &phg, + const HyperedgeID &he) const + { return std::max(phg.connectivity(he) - 1, 0) * phg.edgeWeight(he); } }; -template -struct ObjectiveFunction { - HyperedgeWeight operator()(const PartitionedHypergraph& phg, const HyperedgeID& he) const { +template +struct ObjectiveFunction +{ + HyperedgeWeight operator()(const PartitionedHypergraph &phg, + const HyperedgeID &he) const + { const PartitionID connectivity = phg.connectivity(he); return connectivity > 1 ? connectivity * phg.edgeWeight(he) : 0; } }; -template -struct ObjectiveFunction { - HyperedgeWeight operator()(const PartitionedHypergraph& phg, const HyperedgeID& he) const { +template +struct ObjectiveFunction +{ + HyperedgeWeight operator()(const PartitionedHypergraph &phg, + const HyperedgeID &he) const + { ASSERT(phg.hasTargetGraph()); - const TargetGraph* target_graph = phg.targetGraph(); - const HyperedgeWeight distance = target_graph->distance(phg.shallowCopyOfConnectivitySet(he)); + const TargetGraph *target_graph = phg.targetGraph(); + const HyperedgeWeight distance = + target_graph->distance(phg.shallowCopyOfConnectivitySet(he)); return distance * phg.edgeWeight(he); } }; -template -HyperedgeWeight compute_objective_parallel(const PartitionedHypergraph& phg) { +template +HyperedgeWeight compute_objective_parallel(const PartitionedHypergraph &phg) +{ ObjectiveFunction func; tbb::enumerable_thread_specific obj(0); - phg.doParallelForAllEdges([&](const HyperedgeID he) { - obj.local() += func(phg, he); - }); + phg.doParallelForAllEdges([&](const HyperedgeID he) { obj.local() += func(phg, he); }); return obj.combine(std::plus<>()) / (PartitionedHypergraph::is_graph ? 2 : 1); } -template -HyperedgeWeight compute_objective_sequentially(const PartitionedHypergraph& phg) { +template +HyperedgeWeight compute_objective_sequentially(const PartitionedHypergraph &phg) +{ ObjectiveFunction func; HyperedgeWeight obj = 0; - for (const HyperedgeID& he : phg.edges()) { + for(const HyperedgeID &he : phg.edges()) + { obj += func(phg, he); } return obj / (PartitionedHypergraph::is_graph ? 2 : 1); } -template -HyperedgeWeight contribution(const PartitionedHypergraph& phg, const HyperedgeID he) { +template +HyperedgeWeight contribution(const PartitionedHypergraph &phg, const HyperedgeID he) +{ ObjectiveFunction func; return func(phg, he); } } -template -HyperedgeWeight quality(const PartitionedHypergraph& hg, - const Context& context, - const bool parallel) { +template +HyperedgeWeight quality(const PartitionedHypergraph &hg, const Context &context, + const bool parallel) +{ return quality(hg, context.partition.objective, parallel); } -template -HyperedgeWeight quality(const PartitionedHypergraph& hg, - const Objective objective, - const bool parallel) { - switch (objective) { - case Objective::cut: - return parallel ? compute_objective_parallel(hg) : - compute_objective_sequentially(hg); - case Objective::km1: - return parallel ? compute_objective_parallel(hg) : - compute_objective_sequentially(hg); - case Objective::soed: - return parallel ? compute_objective_parallel(hg) : - compute_objective_sequentially(hg); - case Objective::steiner_tree: - return parallel ? compute_objective_parallel(hg) : - compute_objective_sequentially(hg); - default: throw InvalidParameterException("Unknown Objective"); +template +HyperedgeWeight quality(const PartitionedHypergraph &hg, const Objective objective, + const bool parallel) +{ + switch(objective) + { + case Objective::cut: + return parallel ? compute_objective_parallel(hg) : + compute_objective_sequentially(hg); + case Objective::km1: + return parallel ? compute_objective_parallel(hg) : + compute_objective_sequentially(hg); + case Objective::soed: + return parallel ? compute_objective_parallel(hg) : + compute_objective_sequentially(hg); + case Objective::steiner_tree: + return parallel ? compute_objective_parallel(hg) : + compute_objective_sequentially(hg); + default: + throw InvalidParameterException("Unknown Objective"); } return 0; } -template -HyperedgeWeight contribution(const PartitionedHypergraph& hg, - const HyperedgeID he, - const Objective objective) { - switch (objective) { - case Objective::cut: return contribution(hg, he); - case Objective::km1: return contribution(hg, he); - case Objective::soed: return contribution(hg, he); - case Objective::steiner_tree: return contribution(hg, he); - default: throw InvalidParameterException("Unknown Objective"); +template +HyperedgeWeight contribution(const PartitionedHypergraph &hg, const HyperedgeID he, + const Objective objective) +{ + switch(objective) + { + case Objective::cut: + return contribution(hg, he); + case Objective::km1: + return contribution(hg, he); + case Objective::soed: + return contribution(hg, he); + case Objective::steiner_tree: + return contribution(hg, he); + default: + throw InvalidParameterException("Unknown Objective"); } return 0; } -template -bool isBalanced(const PartitionedHypergraph& phg, const Context& context) { +template +bool isBalanced(const PartitionedHypergraph &phg, const Context &context) +{ size_t num_empty_parts = 0; - for (PartitionID i = 0; i < context.partition.k; ++i) { - if (phg.partWeight(i) > context.partition.max_part_weights[i]) { + for(PartitionID i = 0; i < context.partition.k; ++i) + { + if(phg.partWeight(i) > context.partition.max_part_weights[i]) + { return false; } - if (phg.partWeight(i) == 0) { + if(phg.partWeight(i) == 0) + { num_empty_parts++; } } return context.partition.preset_type == PresetType::large_k || - num_empty_parts <= phg.numRemovedHypernodes(); + num_empty_parts <= phg.numRemovedHypernodes(); } -template -double imbalance(const PartitionedHypergraph& hypergraph, const Context& context) { - ASSERT(context.partition.perfect_balance_part_weights.size() == (size_t)context.partition.k); +template +double imbalance(const PartitionedHypergraph &hypergraph, const Context &context) +{ + ASSERT(context.partition.perfect_balance_part_weights.size() == + (size_t)context.partition.k); - double max_balance = (hypergraph.partWeight(0) / - static_cast(context.partition.perfect_balance_part_weights[0])); + double max_balance = + (hypergraph.partWeight(0) / + static_cast(context.partition.perfect_balance_part_weights[0])); - for (PartitionID i = 1; i < context.partition.k; ++i) { + for(PartitionID i = 1; i < context.partition.k; ++i) + { const double balance_i = - (hypergraph.partWeight(i) / - static_cast(context.partition.perfect_balance_part_weights[i])); + (hypergraph.partWeight(i) / + static_cast(context.partition.perfect_balance_part_weights[i])); max_balance = std::max(max_balance, balance_i); } return max_balance - 1.0; } -template -double approximationFactorForProcessMapping(const PartitionedHypergraph& hypergraph, const Context& context) { - if ( !PartitionedHypergraph::is_graph ) { +template +double approximationFactorForProcessMapping(const PartitionedHypergraph &hypergraph, + const Context &context) +{ + if(!PartitionedHypergraph::is_graph) + { tbb::enumerable_thread_specific approx_factor(0); - hypergraph.doParallelForAllEdges([&](const HyperedgeID& he) { + hypergraph.doParallelForAllEdges([&](const HyperedgeID &he) { const size_t connectivity = hypergraph.connectivity(he); - approx_factor.local() += connectivity <= context.mapping.max_steiner_tree_size ? 1 : 2; + approx_factor.local() += + connectivity <= context.mapping.max_steiner_tree_size ? 1 : 2; }); - return static_cast(approx_factor.combine(std::plus<>())) / hypergraph.initialNumEdges(); - } else { + return static_cast(approx_factor.combine(std::plus<>())) / + hypergraph.initialNumEdges(); + } + else + { return 1.0; } } namespace { -#define OBJECTIVE_1(X) HyperedgeWeight quality(const X& hg, const Context& context, const bool parallel) -#define OBJECTIVE_2(X) HyperedgeWeight quality(const X& hg, const Objective objective, const bool parallel) -#define CONTRIBUTION(X) HyperedgeWeight contribution(const X& hg, const HyperedgeID he, const Objective objective) -#define IS_BALANCED(X) bool isBalanced(const X& phg, const Context& context) -#define IMBALANCE(X) double imbalance(const X& hypergraph, const Context& context) -#define APPROX_FACTOR(X) double approximationFactorForProcessMapping(const X& hypergraph, const Context& context) +#define OBJECTIVE_1(X) \ + HyperedgeWeight quality(const X &hg, const Context &context, const bool parallel) +#define OBJECTIVE_2(X) \ + HyperedgeWeight quality(const X &hg, const Objective objective, const bool parallel) +#define CONTRIBUTION(X) \ + HyperedgeWeight contribution(const X &hg, const HyperedgeID he, \ + const Objective objective) +#define IS_BALANCED(X) bool isBalanced(const X &phg, const Context &context) +#define IMBALANCE(X) double imbalance(const X &hypergraph, const Context &context) +#define APPROX_FACTOR(X) \ + double approximationFactorForProcessMapping(const X &hypergraph, const Context &context) } INSTANTIATE_FUNC_WITH_PARTITIONED_HG(OBJECTIVE_1) diff --git a/mt-kahypar/partition/metrics.h b/mt-kahypar/partition/metrics.h index 307c5b9b5..54818da60 100644 --- a/mt-kahypar/partition/metrics.h +++ b/mt-kahypar/partition/metrics.h @@ -31,7 +31,8 @@ namespace mt_kahypar { -struct Metrics { +struct Metrics +{ HyperedgeWeight quality; double imbalance; }; @@ -39,29 +40,27 @@ struct Metrics { namespace metrics { // ! Computes for the given partitioned hypergraph the corresponding objective function -template -HyperedgeWeight quality(const PartitionedHypergraph& hg, - const Context& context, +template +HyperedgeWeight quality(const PartitionedHypergraph &hg, const Context &context, const bool parallel = true); -template -HyperedgeWeight quality(const PartitionedHypergraph& hg, - const Objective objective, +template +HyperedgeWeight quality(const PartitionedHypergraph &hg, const Objective objective, const bool parallel = true); // ! Computes for a hyperedge the contribution to the corresponding objective function -template -HyperedgeWeight contribution(const PartitionedHypergraph& hg, - const HyperedgeID he, +template +HyperedgeWeight contribution(const PartitionedHypergraph &hg, const HyperedgeID he, const Objective objective); -template -bool isBalanced(const PartitionedHypergraph& phg, const Context& context); +template +bool isBalanced(const PartitionedHypergraph &phg, const Context &context); -template -double imbalance(const PartitionedHypergraph& hypergraph, const Context& context); +template +double imbalance(const PartitionedHypergraph &hypergraph, const Context &context); -template -double approximationFactorForProcessMapping(const PartitionedHypergraph& hypergraph, const Context& context); +template +double approximationFactorForProcessMapping(const PartitionedHypergraph &hypergraph, + const Context &context); -} // namespace metrics -} // namespace mt_kahypar +} // namespace metrics +} // namespace mt_kahypar diff --git a/mt-kahypar/partition/multilevel.cpp b/mt-kahypar/partition/multilevel.cpp index 124d1ffea..fbc7d86ec 100644 --- a/mt-kahypar/partition/multilevel.cpp +++ b/mt-kahypar/partition/multilevel.cpp @@ -34,218 +34,253 @@ #include "include/libmtkahypartypes.h" #include "mt-kahypar/definitions.h" +#include "mt-kahypar/partition/deep_multilevel.h" #include "mt-kahypar/partition/factories.h" +#include "mt-kahypar/partition/initial_partitioning/pool_initial_partitioner.h" #include "mt-kahypar/partition/preprocessing/sparsification/degree_zero_hn_remover.h" #include "mt-kahypar/partition/preprocessing/sparsification/large_he_remover.h" -#include "mt-kahypar/partition/initial_partitioning/pool_initial_partitioner.h" #include "mt-kahypar/partition/recursive_bipartitioning.h" -#include "mt-kahypar/partition/deep_multilevel.h" #ifdef KAHYPAR_ENABLE_STEINER_TREE_METRIC #include "mt-kahypar/partition/mapping/initial_mapping.h" #endif -#include "mt-kahypar/parallel/memory_pool.h" #include "mt-kahypar/io/partitioning_output.h" +#include "mt-kahypar/parallel/memory_pool.h" #include "mt-kahypar/partition/coarsening/multilevel_uncoarsener.h" #include "mt-kahypar/partition/coarsening/nlevel_uncoarsener.h" #include "mt-kahypar/utils/cast.h" -#include "mt-kahypar/utils/utilities.h" #include "mt-kahypar/utils/exception.h" +#include "mt-kahypar/utils/utilities.h" namespace mt_kahypar { namespace { - void disableTimerAndStats(const Context& context) { - if ( context.type == ContextType::main && context.partition.mode == Mode::direct ) { - utils::Utilities& utils = utils::Utilities::instance(); - parallel::MemoryPool::instance().deactivate_unused_memory_allocations(); - utils.getTimer(context.utility_id).disable(); - utils.getStats(context.utility_id).disable(); - } +void disableTimerAndStats(const Context &context) +{ + if(context.type == ContextType::main && context.partition.mode == Mode::direct) + { + utils::Utilities &utils = utils::Utilities::instance(); + parallel::MemoryPool::instance().deactivate_unused_memory_allocations(); + utils.getTimer(context.utility_id).disable(); + utils.getStats(context.utility_id).disable(); } +} - void enableTimerAndStats(const Context& context) { - if ( context.type == ContextType::main && context.partition.mode == Mode::direct ) { - utils::Utilities& utils = utils::Utilities::instance(); - parallel::MemoryPool::instance().activate_unused_memory_allocations(); - utils.getTimer(context.utility_id).enable(); - utils.getStats(context.utility_id).enable(); - } +void enableTimerAndStats(const Context &context) +{ + if(context.type == ContextType::main && context.partition.mode == Mode::direct) + { + utils::Utilities &utils = utils::Utilities::instance(); + parallel::MemoryPool::instance().activate_unused_memory_allocations(); + utils.getTimer(context.utility_id).enable(); + utils.getStats(context.utility_id).enable(); } +} - template - typename TypeTraits::PartitionedHypergraph multilevel_partitioning( - typename TypeTraits::Hypergraph& hypergraph, - const Context& context, - const TargetGraph* target_graph, - const bool is_vcycle) { - using Hypergraph = typename TypeTraits::Hypergraph; - using PartitionedHypergraph = typename TypeTraits::PartitionedHypergraph; - PartitionedHypergraph partitioned_hg; +template +typename TypeTraits::PartitionedHypergraph +multilevel_partitioning(typename TypeTraits::Hypergraph &hypergraph, + const Context &context, const TargetGraph *target_graph, + const bool is_vcycle) +{ + using Hypergraph = typename TypeTraits::Hypergraph; + using PartitionedHypergraph = typename TypeTraits::PartitionedHypergraph; + PartitionedHypergraph partitioned_hg; + + // ################## COARSENING ################## + mt_kahypar::io::printCoarseningBanner(context); + + const bool nlevel = context.isNLevelPartitioning(); + UncoarseningData uncoarseningData(nlevel, hypergraph, context); + + utils::Timer &timer = utils::Utilities::instance().getTimer(context.utility_id); + timer.start_timer("coarsening", "Coarsening"); + { + std::unique_ptr coarsener = CoarsenerFactory::getInstance().createObject( + context.coarsening.algorithm, utils::hypergraph_cast(hypergraph), context, + uncoarsening::to_pointer(uncoarseningData)); + coarsener->coarsen(); + + if(context.partition.verbose_output) + { + mt_kahypar_hypergraph_t coarsestHypergraph = coarsener->coarsestHypergraph(); + mt_kahypar::io::printHypergraphInfo(utils::cast(coarsestHypergraph), + context, "Coarsened Hypergraph", + context.partition.show_memory_consumption); + } + } + timer.stop_timer("coarsening"); - // ################## COARSENING ################## - mt_kahypar::io::printCoarseningBanner(context); + // ################## INITIAL PARTITIONING ################## + io::printInitialPartitioningBanner(context); + timer.start_timer("initial_partitioning", "Initial Partitioning"); + PartitionedHypergraph &phg = uncoarseningData.coarsestPartitionedHypergraph(); - const bool nlevel = context.isNLevelPartitioning(); - UncoarseningData uncoarseningData(nlevel, hypergraph, context); + if(!is_vcycle) + { + DegreeZeroHypernodeRemover degree_zero_hn_remover(context); + if(context.initial_partitioning.remove_degree_zero_hns_before_ip) + { + degree_zero_hn_remover.removeDegreeZeroHypernodes(phg.hypergraph()); + } - utils::Timer& timer = utils::Utilities::instance().getTimer(context.utility_id); - timer.start_timer("coarsening", "Coarsening"); + Context ip_context(context); + ip_context.type = ContextType::initial_partitioning; + ip_context.refinement = context.initial_partitioning.refinement; + disableTimerAndStats(context); + if(context.initial_partitioning.mode == Mode::direct) { - std::unique_ptr coarsener = CoarsenerFactory::getInstance().createObject( - context.coarsening.algorithm, utils::hypergraph_cast(hypergraph), - context, uncoarsening::to_pointer(uncoarseningData)); - coarsener->coarsen(); - - if (context.partition.verbose_output) { - mt_kahypar_hypergraph_t coarsestHypergraph = coarsener->coarsestHypergraph(); - mt_kahypar::io::printHypergraphInfo( - utils::cast(coarsestHypergraph), context, - "Coarsened Hypergraph", context.partition.show_memory_consumption); - } + // The pool initial partitioner consist of several flat bipartitioning + // techniques. This case runs as a base case (k = 2) within recursive bipartitioning + // or the deep multilevel scheme. + ip_context.partition.verbose_output = false; + Pool::bipartition(phg, ip_context); } - timer.stop_timer("coarsening"); - - // ################## INITIAL PARTITIONING ################## - io::printInitialPartitioningBanner(context); - timer.start_timer("initial_partitioning", "Initial Partitioning"); - PartitionedHypergraph& phg = uncoarseningData.coarsestPartitionedHypergraph(); - - if ( !is_vcycle ) { - DegreeZeroHypernodeRemover degree_zero_hn_remover(context); - if ( context.initial_partitioning.remove_degree_zero_hns_before_ip ) { - degree_zero_hn_remover.removeDegreeZeroHypernodes(phg.hypergraph()); - } - - Context ip_context(context); - ip_context.type = ContextType::initial_partitioning; - ip_context.refinement = context.initial_partitioning.refinement; - disableTimerAndStats(context); - if ( context.initial_partitioning.mode == Mode::direct ) { - // The pool initial partitioner consist of several flat bipartitioning - // techniques. This case runs as a base case (k = 2) within recursive bipartitioning - // or the deep multilevel scheme. - ip_context.partition.verbose_output = false; - Pool::bipartition(phg, ip_context); - } else if ( context.initial_partitioning.mode == Mode::recursive_bipartitioning ) { - RecursiveBipartitioning::partition(phg, ip_context, target_graph); - } else if ( context.initial_partitioning.mode == Mode::deep_multilevel ) { - ASSERT(ip_context.partition.objective != Objective::steiner_tree); - ip_context.partition.verbose_output = false; - DeepMultilevel::partition(phg, ip_context); - } else { - throw InvalidParameterException("Undefined initial partitioning algorithm"); - } - enableTimerAndStats(context); - degree_zero_hn_remover.restoreDegreeZeroHypernodes(phg); - } else { - // When performing a V-cycle, we store the block IDs - // of the input hypergraph as community IDs - const Hypergraph& hypergraph = phg.hypergraph(); - phg.doParallelForAllNodes([&](const HypernodeID hn) { - const PartitionID part_id = hypergraph.communityID(hn); - ASSERT(part_id != kInvalidPartition && part_id < context.partition.k); - ASSERT(phg.partID(hn) == kInvalidPartition); - phg.setOnlyNodePart(hn, part_id); - }); - phg.initializePartition(); - - #ifdef KAHYPAR_ENABLE_STEINER_TREE_METRIC - if ( context.partition.objective == Objective::steiner_tree ) { - phg.setTargetGraph(target_graph); - timer.start_timer("one_to_one_mapping", "One-To-One Mapping"); - // Try to improve current mapping - InitialMapping::mapToTargetGraph( - phg, *target_graph, context); - timer.stop_timer("one_to_one_mapping"); - } - #endif + else if(context.initial_partitioning.mode == Mode::recursive_bipartitioning) + { + RecursiveBipartitioning::partition(phg, ip_context, target_graph); + } + else if(context.initial_partitioning.mode == Mode::deep_multilevel) + { + ASSERT(ip_context.partition.objective != Objective::steiner_tree); + ip_context.partition.verbose_output = false; + DeepMultilevel::partition(phg, ip_context); + } + else + { + throw InvalidParameterException("Undefined initial partitioning algorithm"); } + enableTimerAndStats(context); + degree_zero_hn_remover.restoreDegreeZeroHypernodes(phg); + } + else + { + // When performing a V-cycle, we store the block IDs + // of the input hypergraph as community IDs + const Hypergraph &hypergraph = phg.hypergraph(); + phg.doParallelForAllNodes([&](const HypernodeID hn) { + const PartitionID part_id = hypergraph.communityID(hn); + ASSERT(part_id != kInvalidPartition && part_id < context.partition.k); + ASSERT(phg.partID(hn) == kInvalidPartition); + phg.setOnlyNodePart(hn, part_id); + }); + phg.initializePartition(); - ASSERT([&] { - bool success = true; - if ( phg.hasFixedVertices() ) { - for ( const HypernodeID& hn : phg.nodes() ) { - if ( phg.isFixed(hn) && phg.fixedVertexBlock(hn) != phg.partID(hn) ) { - LOG << "Node" << hn << "is fixed to block" << phg.fixedVertexBlock(hn) - << ", but is assigned to block" << phg.partID(hn); - success = false; +#ifdef KAHYPAR_ENABLE_STEINER_TREE_METRIC + if(context.partition.objective == Objective::steiner_tree) + { + phg.setTargetGraph(target_graph); + timer.start_timer("one_to_one_mapping", "One-To-One Mapping"); + // Try to improve current mapping + InitialMapping::mapToTargetGraph(phg, *target_graph, context); + timer.stop_timer("one_to_one_mapping"); + } +#endif + } + + ASSERT( + [&] { + bool success = true; + if(phg.hasFixedVertices()) + { + for(const HypernodeID &hn : phg.nodes()) + { + if(phg.isFixed(hn) && phg.fixedVertexBlock(hn) != phg.partID(hn)) + { + LOG << "Node" << hn << "is fixed to block" << phg.fixedVertexBlock(hn) + << ", but is assigned to block" << phg.partID(hn); + success = false; + } } } - } - return success; - }(), "Some fixed vertices are not assigned to their corresponding block"); + return success; + }(), + "Some fixed vertices are not assigned to their corresponding block"); - if ( context.partition.objective == Objective::steiner_tree ) { - phg.setTargetGraph(target_graph); - } - io::printPartitioningResults(phg, context, "Initial Partitioning Results:"); - if ( context.partition.verbose_output && !is_vcycle ) { - utils::Utilities::instance().getInitialPartitioningStats( - context.utility_id).printInitialPartitioningStats(); - } - timer.stop_timer("initial_partitioning"); - - // ################## UNCOARSENING ################## - io::printLocalSearchBanner(context); - timer.start_timer("refinement", "Refinement"); - std::unique_ptr> uncoarsener(nullptr); - if (uncoarseningData.nlevel) { - uncoarsener = std::make_unique>( + if(context.partition.objective == Objective::steiner_tree) + { + phg.setTargetGraph(target_graph); + } + io::printPartitioningResults(phg, context, "Initial Partitioning Results:"); + if(context.partition.verbose_output && !is_vcycle) + { + utils::Utilities::instance() + .getInitialPartitioningStats(context.utility_id) + .printInitialPartitioningStats(); + } + timer.stop_timer("initial_partitioning"); + + // ################## UNCOARSENING ################## + io::printLocalSearchBanner(context); + timer.start_timer("refinement", "Refinement"); + std::unique_ptr > uncoarsener(nullptr); + if(uncoarseningData.nlevel) + { + uncoarsener = std::make_unique >( hypergraph, context, uncoarseningData, target_graph); - } else { - uncoarsener = std::make_unique>( + } + else + { + uncoarsener = std::make_unique >( hypergraph, context, uncoarseningData, target_graph); - } - partitioned_hg = uncoarsener->uncoarsen(); + } + partitioned_hg = uncoarsener->uncoarsen(); - io::printPartitioningResults(partitioned_hg, context, "Local Search Results:"); - timer.stop_timer("refinement"); + io::printPartitioningResults(partitioned_hg, context, "Local Search Results:"); + timer.stop_timer("refinement"); - return partitioned_hg; - } + return partitioned_hg; +} } -template -typename Multilevel::PartitionedHypergraph Multilevel::partition( - Hypergraph& hypergraph, const Context& context, const TargetGraph* target_graph) { +template +typename Multilevel::PartitionedHypergraph +Multilevel::partition(Hypergraph &hypergraph, const Context &context, + const TargetGraph *target_graph) +{ PartitionedHypergraph partitioned_hg = - multilevel_partitioning(hypergraph, context, target_graph, false); + multilevel_partitioning(hypergraph, context, target_graph, false); // ################## V-CYCLES ################## - if ( context.partition.num_vcycles > 0 && context.type == ContextType::main ) { + if(context.partition.num_vcycles > 0 && context.type == ContextType::main) + { partitionVCycle(hypergraph, partitioned_hg, context, target_graph); } return partitioned_hg; } -template -void Multilevel::partition(PartitionedHypergraph& partitioned_hg, - const Context& context, - const TargetGraph* target_graph) { - PartitionedHypergraph tmp_phg = partition( - partitioned_hg.hypergraph(), context, target_graph); - tmp_phg.doParallelForAllNodes([&](const HypernodeID& hn) { +template +void Multilevel::partition(PartitionedHypergraph &partitioned_hg, + const Context &context, + const TargetGraph *target_graph) +{ + PartitionedHypergraph tmp_phg = + partition(partitioned_hg.hypergraph(), context, target_graph); + tmp_phg.doParallelForAllNodes([&](const HypernodeID &hn) { partitioned_hg.setOnlyNodePart(hn, tmp_phg.partID(hn)); }); partitioned_hg.initializePartition(); } -template -void Multilevel::partitionVCycle(Hypergraph& hypergraph, - PartitionedHypergraph& partitioned_hg, - const Context& context, - const TargetGraph* target_graph) { +template +void Multilevel::partitionVCycle(Hypergraph &hypergraph, + PartitionedHypergraph &partitioned_hg, + const Context &context, + const TargetGraph *target_graph) +{ ASSERT(context.partition.num_vcycles > 0); - for ( size_t i = 0; i < context.partition.num_vcycles; ++i ) { + for(size_t i = 0; i < context.partition.num_vcycles; ++i) + { // Reset memory pool hypergraph.reset(); parallel::MemoryPool::instance().reset(); parallel::MemoryPool::instance().release_mem_group("Preprocessing"); - if ( context.isNLevelPartitioning() ) { - // Workaround: reset() function of hypergraph reinserts all removed hyperedges again. + if(context.isNLevelPartitioning()) + { + // Workaround: reset() function of hypergraph reinserts all removed hyperedges + // again. LargeHyperedgeRemover large_he_remover(context); large_he_remover.removeLargeHyperedgesInNLevelVCycle(hypergraph); } @@ -254,14 +289,14 @@ void Multilevel::partitionVCycle(Hypergraph& hypergraph, // This way coarsening does not contract nodes that do not belong to same block // of the input partition. For initial partitioning, we use the community IDs of // smallest hypergraph as initial partition. - hypergraph.doParallelForAllNodes([&](const HypernodeID& hn) { + hypergraph.doParallelForAllNodes([&](const HypernodeID &hn) { hypergraph.setCommunityID(hn, partitioned_hg.partID(hn)); }); // Perform V-cycle io::printVCycleBanner(context, i + 1); partitioned_hg = multilevel_partitioning( - hypergraph, context, target_graph, true /* V-cycle flag */ ); + hypergraph, context, target_graph, true /* V-cycle flag */); } } diff --git a/mt-kahypar/partition/multilevel.h b/mt-kahypar/partition/multilevel.h index 9b97659bd..93cb29daf 100644 --- a/mt-kahypar/partition/multilevel.h +++ b/mt-kahypar/partition/multilevel.h @@ -34,29 +34,28 @@ namespace mt_kahypar { // Forward Declaration class TargetGraph; -template -class Multilevel { +template +class Multilevel +{ using Hypergraph = typename TypeTraits::Hypergraph; using PartitionedHypergraph = typename TypeTraits::PartitionedHypergraph; - public: +public: // ! Partitions a hypergraph using the multilevel paradigm. - static PartitionedHypergraph partition(Hypergraph& hypergraph, - const Context& context, - const TargetGraph* target_graph = nullptr); + static PartitionedHypergraph partition(Hypergraph &hypergraph, const Context &context, + const TargetGraph *target_graph = nullptr); // ! Partitions a hypergraph using the multilevel paradigm. - static void partition(PartitionedHypergraph& partitioned_hg, - const Context& context, - const TargetGraph* target_graph = nullptr); + static void partition(PartitionedHypergraph &partitioned_hg, const Context &context, + const TargetGraph *target_graph = nullptr); // ! Improves an existing partition using the iterated multilevel cycle technique // ! (also called V-cycle). - static void partitionVCycle(Hypergraph& hypergraph, - PartitionedHypergraph& partitioned_hg, - const Context& context, - const TargetGraph* target_graph = nullptr); + static void partitionVCycle(Hypergraph &hypergraph, + PartitionedHypergraph &partitioned_hg, + const Context &context, + const TargetGraph *target_graph = nullptr); }; -} // namespace mt_kahypar +} // namespace mt_kahypar diff --git a/mt-kahypar/partition/partitioner.cpp b/mt-kahypar/partition/partitioner.cpp index 66200838a..604300b5a 100644 --- a/mt-kahypar/partition/partitioner.cpp +++ b/mt-kahypar/partition/partitioner.cpp @@ -27,442 +27,532 @@ #include "partitioner.h" -#include "tbb/parallel_sort.h" #include "tbb/parallel_reduce.h" +#include "tbb/parallel_sort.h" #include "mt-kahypar/definitions.h" #include "mt-kahypar/io/partitioning_output.h" +#include "mt-kahypar/partition/deep_multilevel.h" +#include "mt-kahypar/partition/mapping/target_graph.h" #include "mt-kahypar/partition/multilevel.h" +#include "mt-kahypar/partition/preprocessing/community_detection/parallel_louvain.h" #include "mt-kahypar/partition/preprocessing/sparsification/degree_zero_hn_remover.h" #include "mt-kahypar/partition/preprocessing/sparsification/large_he_remover.h" -#include "mt-kahypar/partition/preprocessing/community_detection/parallel_louvain.h" #include "mt-kahypar/partition/recursive_bipartitioning.h" -#include "mt-kahypar/partition/deep_multilevel.h" -#include "mt-kahypar/partition/mapping/target_graph.h" #ifdef KAHYPAR_ENABLE_STEINER_TREE_METRIC #include "mt-kahypar/partition/mapping/initial_mapping.h" #endif +#include "mt-kahypar/utils/exception.h" #include "mt-kahypar/utils/hypergraph_statistics.h" #include "mt-kahypar/utils/stats.h" #include "mt-kahypar/utils/timer.h" -#include "mt-kahypar/utils/exception.h" - namespace mt_kahypar { - template - void setupContext(Hypergraph& hypergraph, Context& context, TargetGraph* target_graph) { - if ( target_graph ) { - context.partition.k = target_graph->numBlocks(); - } +template +void setupContext(Hypergraph &hypergraph, Context &context, TargetGraph *target_graph) +{ + if(target_graph) + { + context.partition.k = target_graph->numBlocks(); + } - context.partition.large_hyperedge_size_threshold = std::max(hypergraph.initialNumNodes() * - context.partition.large_hyperedge_size_threshold_factor, 100.0); - context.sanityCheck(target_graph); - context.setupPartWeights(hypergraph.totalWeight()); - context.setupContractionLimit(hypergraph.totalWeight()); - context.setupThreadsPerFlowSearch(); - - if ( context.partition.gain_policy == GainPolicy::steiner_tree ) { - const PartitionID k = target_graph ? target_graph->numBlocks() : 1; - const PartitionID max_k = Hypergraph::is_graph ? 256 : 64; - if ( k > max_k ) { - const std::string type = Hypergraph::is_graph ? "graphs" : "hypergraphs"; - throw InvalidInputException( - "We currently only support mappings of " + type + " onto target graphs with at " - "most " + STR(max_k) + "nodes!"); - } + context.partition.large_hyperedge_size_threshold = + std::max(hypergraph.initialNumNodes() * + context.partition.large_hyperedge_size_threshold_factor, + 100.0); + context.sanityCheck(target_graph); + context.setupPartWeights(hypergraph.totalWeight()); + context.setupContractionLimit(hypergraph.totalWeight()); + context.setupThreadsPerFlowSearch(); + + if(context.partition.gain_policy == GainPolicy::steiner_tree) + { + const PartitionID k = target_graph ? target_graph->numBlocks() : 1; + const PartitionID max_k = Hypergraph::is_graph ? 256 : 64; + if(k > max_k) + { + const std::string type = Hypergraph::is_graph ? "graphs" : "hypergraphs"; + throw InvalidInputException("We currently only support mappings of " + type + + " onto target graphs with at " + "most " + + STR(max_k) + "nodes!"); + } - if ( context.mapping.largest_he_fraction > 0.0 ) { - // Determine a threshold of what we consider a large hyperedge in - // the steiner tree gain cache - vec he_sizes(hypergraph.initialNumEdges(), 0); - hypergraph.doParallelForAllEdges([&](const HyperedgeID& he) { - he_sizes[he] = hypergraph.edgeSize(he); - }); - // Sort hyperedges in decreasing order of their sizes - tbb::parallel_sort(he_sizes.begin(), he_sizes.end(), - [&](const HypernodeID& lhs, const HypernodeID& rhs) { - return lhs > rhs; - }); - const size_t percentile = context.mapping.largest_he_fraction * hypergraph.initialNumEdges(); - // Compute the percentage of pins covered by the largest hyperedges - const double covered_pins_percentage = + if(context.mapping.largest_he_fraction > 0.0) + { + // Determine a threshold of what we consider a large hyperedge in + // the steiner tree gain cache + vec he_sizes(hypergraph.initialNumEdges(), 0); + hypergraph.doParallelForAllEdges( + [&](const HyperedgeID &he) { he_sizes[he] = hypergraph.edgeSize(he); }); + // Sort hyperedges in decreasing order of their sizes + tbb::parallel_sort( + he_sizes.begin(), he_sizes.end(), + [&](const HypernodeID &lhs, const HypernodeID &rhs) { return lhs > rhs; }); + const size_t percentile = + context.mapping.largest_he_fraction * hypergraph.initialNumEdges(); + // Compute the percentage of pins covered by the largest hyperedges + const double covered_pins_percentage = static_cast(tbb::parallel_reduce( - tbb::blocked_range(UL(0), percentile), - 0, [&](const tbb::blocked_range& range, int init) { - for ( size_t i = range.begin(); i < range.end(); ++i ) { - init += he_sizes[i]; - } - return init; - }, [&](const int lhs, const int rhs) { - return lhs + rhs; - })) / hypergraph.initialNumPins(); - if ( covered_pins_percentage >= context.mapping.min_pin_coverage_of_largest_hes ) { - // If the largest hyperedge covers a large portion of the hypergraph, we assume that - // the hyperedge sizes follow a power law distribution and ignore hyperedges larger than - // the following threshold when calculating and maintaining the adjacent blocks of node - // in the steiner tree gain cache. - context.mapping.large_he_threshold = he_sizes[percentile]; - } + tbb::blocked_range(UL(0), percentile), 0, + [&](const tbb::blocked_range &range, int init) { + for(size_t i = range.begin(); i < range.end(); ++i) + { + init += he_sizes[i]; + } + return init; + }, + [&](const int lhs, const int rhs) { return lhs + rhs; })) / + hypergraph.initialNumPins(); + if(covered_pins_percentage >= context.mapping.min_pin_coverage_of_largest_hes) + { + // If the largest hyperedge covers a large portion of the hypergraph, we assume + // that the hyperedge sizes follow a power law distribution and ignore hyperedges + // larger than the following threshold when calculating and maintaining the + // adjacent blocks of node in the steiner tree gain cache. + context.mapping.large_he_threshold = he_sizes[percentile]; } } + } - // Setup enabled IP algorithms - if ( context.initial_partitioning.enabled_ip_algos.size() > 0 && - context.initial_partitioning.enabled_ip_algos.size() < - static_cast(InitialPartitioningAlgorithm::UNDEFINED) ) { - throw InvalidParameterException( + // Setup enabled IP algorithms + if(context.initial_partitioning.enabled_ip_algos.size() > 0 && + context.initial_partitioning.enabled_ip_algos.size() < + static_cast(InitialPartitioningAlgorithm::UNDEFINED)) + { + throw InvalidParameterException( "Size of enabled IP algorithms vector is smaller than number of IP algorithms!"); - } else if ( context.initial_partitioning.enabled_ip_algos.size() == 0 ) { - context.initial_partitioning.enabled_ip_algos.assign( + } + else if(context.initial_partitioning.enabled_ip_algos.size() == 0) + { + context.initial_partitioning.enabled_ip_algos.assign( static_cast(InitialPartitioningAlgorithm::UNDEFINED), true); - } else { - bool is_one_ip_algo_enabled = false; - for ( size_t i = 0; i < context.initial_partitioning.enabled_ip_algos.size(); ++i ) { - is_one_ip_algo_enabled |= context.initial_partitioning.enabled_ip_algos[i]; - } - if ( !is_one_ip_algo_enabled ) { - throw InvalidParameterException( + } + else + { + bool is_one_ip_algo_enabled = false; + for(size_t i = 0; i < context.initial_partitioning.enabled_ip_algos.size(); ++i) + { + is_one_ip_algo_enabled |= context.initial_partitioning.enabled_ip_algos[i]; + } + if(!is_one_ip_algo_enabled) + { + throw InvalidParameterException( "At least one initial partitioning algorithm must be enabled!"); - } } + } - // Check fixed vertex support compatibility - if ( hypergraph.hasFixedVertices() ) { - if ( context.partition.mode == Mode::deep_multilevel || - context.initial_partitioning.mode == Mode::deep_multilevel ) { - throw NonSupportedOperationException( + // Check fixed vertex support compatibility + if(hypergraph.hasFixedVertices()) + { + if(context.partition.mode == Mode::deep_multilevel || + context.initial_partitioning.mode == Mode::deep_multilevel) + { + throw NonSupportedOperationException( "Deep multilevel partitioning scheme does not support fixed vertices!"); - } } } +} - template - void configurePreprocessing(const Hypergraph& hypergraph, Context& context) { - const double density = static_cast(Hypergraph::is_graph ? hypergraph.initialNumEdges() / 2 : hypergraph.initialNumEdges()) / - static_cast(hypergraph.initialNumNodes()); - if (context.preprocessing.community_detection.edge_weight_function == LouvainEdgeWeight::hybrid) { - if (density < 0.75) { - context.preprocessing.community_detection.edge_weight_function = LouvainEdgeWeight::degree; - } else if ( density < 2 && hypergraph.maxEdgeSize() > context.partition.ignore_hyperedge_size_threshold ) { - context.preprocessing.community_detection.edge_weight_function = LouvainEdgeWeight::non_uniform; - } else { - context.preprocessing.community_detection.edge_weight_function = LouvainEdgeWeight::uniform; - } +template +void configurePreprocessing(const Hypergraph &hypergraph, Context &context) +{ + const double density = + static_cast(Hypergraph::is_graph ? hypergraph.initialNumEdges() / 2 : + hypergraph.initialNumEdges()) / + static_cast(hypergraph.initialNumNodes()); + if(context.preprocessing.community_detection.edge_weight_function == + LouvainEdgeWeight::hybrid) + { + if(density < 0.75) + { + context.preprocessing.community_detection.edge_weight_function = + LouvainEdgeWeight::degree; + } + else if(density < 2 && + hypergraph.maxEdgeSize() > context.partition.ignore_hyperedge_size_threshold) + { + context.preprocessing.community_detection.edge_weight_function = + LouvainEdgeWeight::non_uniform; + } + else + { + context.preprocessing.community_detection.edge_weight_function = + LouvainEdgeWeight::uniform; } } +} - template - void sanitize(typename TypeTraits::Hypergraph& hypergraph, - Context& context, - DegreeZeroHypernodeRemover& degree_zero_hn_remover, - LargeHyperedgeRemover& large_he_remover) { - - utils::Timer& timer = utils::Utilities::instance().getTimer(context.utility_id); - timer.start_timer("degree_zero_hypernode_removal", "Degree Zero Hypernode Removal"); - const HypernodeID num_removed_degree_zero_hypernodes = - degree_zero_hn_remover.removeDegreeZeroHypernodes(hypergraph); - timer.stop_timer("degree_zero_hypernode_removal"); - - timer.start_timer("large_hyperedge_removal", "Large Hyperedge Removal"); - const HypernodeID num_removed_large_hyperedges = - large_he_remover.removeLargeHyperedges(hypergraph); - timer.stop_timer("large_hyperedge_removal"); - - const HyperedgeID num_removed_single_node_hes = hypergraph.numRemovedHyperedges(); - if (context.partition.verbose_output && - ( num_removed_single_node_hes > 0 || - num_removed_degree_zero_hypernodes > 0 || - num_removed_large_hyperedges > 0 )) { - LOG << "Performed single-node/large HE removal and degree-zero HN contractions:"; - LOG << "\033[1m\033[31m" << " # removed" - << num_removed_single_node_hes << "single-pin hyperedges during hypergraph file parsing" - << "\033[0m"; - LOG << "\033[1m\033[31m" << " # removed" - << num_removed_large_hyperedges << "large hyperedges with |e| >" << large_he_remover.largeHyperedgeThreshold() << "\033[0m"; - LOG << "\033[1m\033[31m" << " # contracted" - << num_removed_degree_zero_hypernodes << "hypernodes with d(v) = 0 and w(v) = 1" - << "\033[0m"; - io::printStripe(); - } +template +void sanitize(typename TypeTraits::Hypergraph &hypergraph, Context &context, + DegreeZeroHypernodeRemover °ree_zero_hn_remover, + LargeHyperedgeRemover &large_he_remover) +{ + + utils::Timer &timer = utils::Utilities::instance().getTimer(context.utility_id); + timer.start_timer("degree_zero_hypernode_removal", "Degree Zero Hypernode Removal"); + const HypernodeID num_removed_degree_zero_hypernodes = + degree_zero_hn_remover.removeDegreeZeroHypernodes(hypergraph); + timer.stop_timer("degree_zero_hypernode_removal"); + + timer.start_timer("large_hyperedge_removal", "Large Hyperedge Removal"); + const HypernodeID num_removed_large_hyperedges = + large_he_remover.removeLargeHyperedges(hypergraph); + timer.stop_timer("large_hyperedge_removal"); + + const HyperedgeID num_removed_single_node_hes = hypergraph.numRemovedHyperedges(); + if(context.partition.verbose_output && + (num_removed_single_node_hes > 0 || num_removed_degree_zero_hypernodes > 0 || + num_removed_large_hyperedges > 0)) + { + LOG << "Performed single-node/large HE removal and degree-zero HN contractions:"; + LOG << "\033[1m\033[31m" + << " # removed" << num_removed_single_node_hes + << "single-pin hyperedges during hypergraph file parsing" + << "\033[0m"; + LOG << "\033[1m\033[31m" + << " # removed" << num_removed_large_hyperedges << "large hyperedges with |e| >" + << large_he_remover.largeHyperedgeThreshold() << "\033[0m"; + LOG << "\033[1m\033[31m" + << " # contracted" << num_removed_degree_zero_hypernodes + << "hypernodes with d(v) = 0 and w(v) = 1" + << "\033[0m"; + io::printStripe(); } +} - template - bool isGraph(const Hypergraph& hypergraph) { - if (Hypergraph::is_graph) { - return true; - } - return tbb::parallel_reduce(tbb::blocked_range( - ID(0), hypergraph.initialNumEdges()), true, [&](const tbb::blocked_range& range, bool isGraph) { - if ( isGraph ) { - bool tmp_is_graph = isGraph; - for (HyperedgeID he = range.begin(); he < range.end(); ++he) { - if ( hypergraph.edgeIsEnabled(he) ) { - tmp_is_graph &= (hypergraph.edgeSize(he) == 2); +template +bool isGraph(const Hypergraph &hypergraph) +{ + if(Hypergraph::is_graph) + { + return true; + } + return tbb::parallel_reduce( + tbb::blocked_range(ID(0), hypergraph.initialNumEdges()), true, + [&](const tbb::blocked_range &range, bool isGraph) { + if(isGraph) + { + bool tmp_is_graph = isGraph; + for(HyperedgeID he = range.begin(); he < range.end(); ++he) + { + if(hypergraph.edgeIsEnabled(he)) + { + tmp_is_graph &= (hypergraph.edgeSize(he) == 2); + } } + return tmp_is_graph; } - return tmp_is_graph; - } - return false; - }, [&](const bool lhs, const bool rhs) { - return lhs && rhs; - }); + return false; + }, + [&](const bool lhs, const bool rhs) { return lhs && rhs; }); +} + +template +bool isMeshGraph(const Hypergraph &graph) +{ + const HypernodeID num_nodes = graph.initialNumNodes(); + const double avg_hn_degree = utils::avgHypernodeDegree(graph); + std::vector hn_degrees; + hn_degrees.resize(graph.initialNumNodes()); + graph.doParallelForAllNodes( + [&](const HypernodeID &hn) { hn_degrees[hn] = graph.nodeDegree(hn); }); + const double stdev_hn_degree = + utils::parallel_stdev(hn_degrees, avg_hn_degree, num_nodes); + if(stdev_hn_degree > avg_hn_degree / 2) + { + return false; } - template - bool isMeshGraph(const Hypergraph& graph) { - const HypernodeID num_nodes = graph.initialNumNodes(); - const double avg_hn_degree = utils::avgHypernodeDegree(graph); - std::vector hn_degrees; - hn_degrees.resize(graph.initialNumNodes()); - graph.doParallelForAllNodes([&](const HypernodeID& hn) { - hn_degrees[hn] = graph.nodeDegree(hn); - }); - const double stdev_hn_degree = utils::parallel_stdev(hn_degrees, avg_hn_degree, num_nodes); - if (stdev_hn_degree > avg_hn_degree / 2) { - return false; + // test whether 99.9th percentile hypernode degree is at most 4 times the average degree + tbb::enumerable_thread_specific num_high_degree_nodes(0); + graph.doParallelForAllNodes([&](const HypernodeID &node) { + if(graph.nodeDegree(node) > 4 * avg_hn_degree) + { + num_high_degree_nodes.local() += 1; } + }); + return num_high_degree_nodes.combine(std::plus<>()) <= num_nodes / 1000; +} - // test whether 99.9th percentile hypernode degree is at most 4 times the average degree - tbb::enumerable_thread_specific num_high_degree_nodes(0); - graph.doParallelForAllNodes([&](const HypernodeID& node) { - if (graph.nodeDegree(node) > 4 * avg_hn_degree) { - num_high_degree_nodes.local() += 1; - } - }); - return num_high_degree_nodes.combine(std::plus<>()) <= num_nodes / 1000; +template +void precomputeSteinerTrees(Hypergraph &hypergraph, TargetGraph *target_graph, + Context &context) +{ + if(target_graph && !target_graph->isInitialized()) + { + utils::Timer &timer = utils::Utilities::instance().getTimer(context.utility_id); + timer.start_timer("precompute_steiner_trees", "Precompute Steiner Trees"); + const size_t max_steiner_tree_size = + std::min(std::min(context.mapping.max_steiner_tree_size, UL(context.partition.k)), + static_cast(hypergraph.maxEdgeSize())); + target_graph->precomputeDistances(max_steiner_tree_size); + timer.stop_timer("precompute_steiner_trees"); } +} - template - void precomputeSteinerTrees(Hypergraph& hypergraph, TargetGraph* target_graph, Context& context) { - if ( target_graph && !target_graph->isInitialized() ) { - utils::Timer& timer = utils::Utilities::instance().getTimer(context.utility_id); - timer.start_timer("precompute_steiner_trees", "Precompute Steiner Trees"); - const size_t max_steiner_tree_size = std::min( - std::min(context.mapping.max_steiner_tree_size, UL(context.partition.k)), - static_cast(hypergraph.maxEdgeSize())); - target_graph->precomputeDistances(max_steiner_tree_size); - timer.stop_timer("precompute_steiner_trees"); +template +void preprocess(Hypergraph &hypergraph, Context &context, TargetGraph *target_graph) +{ + bool use_community_detection = context.preprocessing.use_community_detection; + bool is_graph = false; + + utils::Timer &timer = utils::Utilities::instance().getTimer(context.utility_id); + if(context.preprocessing.use_community_detection) + { + timer.start_timer("detect_graph_structure", "Detect Graph Structure"); + is_graph = isGraph(hypergraph); + if(is_graph && context.preprocessing.disable_community_detection_for_mesh_graphs) + { + use_community_detection = !isMeshGraph(hypergraph); } + timer.stop_timer("detect_graph_structure"); } - template - void preprocess(Hypergraph& hypergraph, Context& context, TargetGraph* target_graph) { - bool use_community_detection = context.preprocessing.use_community_detection; - bool is_graph = false; - - utils::Timer& timer = utils::Utilities::instance().getTimer(context.utility_id); - if ( context.preprocessing.use_community_detection ) { - timer.start_timer("detect_graph_structure", "Detect Graph Structure"); - is_graph = isGraph(hypergraph); - if ( is_graph && context.preprocessing.disable_community_detection_for_mesh_graphs ) { - use_community_detection = !isMeshGraph(hypergraph); - } - timer.stop_timer("detect_graph_structure"); + if(use_community_detection) + { + io::printTopLevelPreprocessingBanner(context); + + timer.start_timer("community_detection", "Community Detection"); + timer.start_timer("construct_graph", "Construct Graph"); + Graph graph( + hypergraph, context.preprocessing.community_detection.edge_weight_function, + is_graph); + if(!context.preprocessing.community_detection.low_memory_contraction) + { + graph.allocateContractionBuffers(); } - - if ( use_community_detection ) { - io::printTopLevelPreprocessingBanner(context); - - timer.start_timer("community_detection", "Community Detection"); - timer.start_timer("construct_graph", "Construct Graph"); - Graph graph(hypergraph, - context.preprocessing.community_detection.edge_weight_function, is_graph); - if ( !context.preprocessing.community_detection.low_memory_contraction ) { - graph.allocateContractionBuffers(); - } - timer.stop_timer("construct_graph"); - timer.start_timer("perform_community_detection", "Perform Community Detection"); - ds::Clustering communities = community_detection::run_parallel_louvain(graph, context); - graph.restrictClusteringToHypernodes(hypergraph, communities); - hypergraph.setCommunityIDs(std::move(communities)); - timer.stop_timer("perform_community_detection"); - timer.stop_timer("community_detection"); - - if (context.partition.verbose_output) { - io::printCommunityInformation(hypergraph); - } + timer.stop_timer("construct_graph"); + timer.start_timer("perform_community_detection", "Perform Community Detection"); + ds::Clustering communities = + community_detection::run_parallel_louvain(graph, context); + graph.restrictClusteringToHypernodes(hypergraph, communities); + hypergraph.setCommunityIDs(std::move(communities)); + timer.stop_timer("perform_community_detection"); + timer.stop_timer("community_detection"); + + if(context.partition.verbose_output) + { + io::printCommunityInformation(hypergraph); } + } - precomputeSteinerTrees(hypergraph, target_graph, context); + precomputeSteinerTrees(hypergraph, target_graph, context); - parallel::MemoryPool::instance().release_mem_group("Preprocessing"); - } + parallel::MemoryPool::instance().release_mem_group("Preprocessing"); +} - template - void forceFixedVertexAssignment(PartitionedHypergraph& partitioned_hg, - const Context& context) { - if ( partitioned_hg.hasFixedVertices() ) { - // This is a sanity check verifying that all fixed vertices are assigned - // to their corresponding blocks. If one fixed vertex is assigned to a different - // block, we move it to its fixed vertex block. Note that a wrong fixed vertex - // block assignment will fail in debug mode. Thus, this loop should not move any node, but - // we keep it in case anything goes wrong during partitioning. - partitioned_hg.doParallelForAllNodes([&](const HypernodeID& hn) { - if ( partitioned_hg.isFixed(hn) ) { - const PartitionID from = partitioned_hg.partID(hn); - const PartitionID to = partitioned_hg.fixedVertexBlock(hn); - if ( from != to ) { - if ( context.partition.verbose_output ) { - LOG << RED << "Node" << hn << "is fixed to block" << to - << ", but it is assigned to block" << from << "!" - << "It is now moved to its fixed vertex block." << END; - } - partitioned_hg.changeNodePart(hn, from, to, NOOP_FUNC, true); +template +void forceFixedVertexAssignment(PartitionedHypergraph &partitioned_hg, + const Context &context) +{ + if(partitioned_hg.hasFixedVertices()) + { + // This is a sanity check verifying that all fixed vertices are assigned + // to their corresponding blocks. If one fixed vertex is assigned to a different + // block, we move it to its fixed vertex block. Note that a wrong fixed vertex + // block assignment will fail in debug mode. Thus, this loop should not move any node, + // but we keep it in case anything goes wrong during partitioning. + partitioned_hg.doParallelForAllNodes([&](const HypernodeID &hn) { + if(partitioned_hg.isFixed(hn)) + { + const PartitionID from = partitioned_hg.partID(hn); + const PartitionID to = partitioned_hg.fixedVertexBlock(hn); + if(from != to) + { + if(context.partition.verbose_output) + { + LOG << RED << "Node" << hn << "is fixed to block" << to + << ", but it is assigned to block" << from << "!" + << "It is now moved to its fixed vertex block." << END; } + partitioned_hg.changeNodePart(hn, from, to, NOOP_FUNC, true); } - }); - } + } + }); } +} - template - typename Partitioner::PartitionedHypergraph Partitioner::partition( - Hypergraph& hypergraph, Context& context, TargetGraph* target_graph) { - configurePreprocessing(hypergraph, context); - setupContext(hypergraph, context, target_graph); - - io::printContext(context); - io::printMemoryPoolConsumption(context); - io::printInputInformation(context, hypergraph); - - #ifdef KAHYPAR_ENABLE_STEINER_TREE_METRIC - bool map_partition_to_target_graph_at_the_end = false; - if ( context.partition.objective == Objective::steiner_tree && - context.mapping.use_two_phase_approach ) { - map_partition_to_target_graph_at_the_end = true; - context.partition.objective = Objective::km1; - context.setupGainPolicy(); - } - #endif - - // ################## PREPROCESSING ################## - utils::Timer& timer = utils::Utilities::instance().getTimer(context.utility_id); - timer.start_timer("preprocessing", "Preprocessing"); - DegreeZeroHypernodeRemover degree_zero_hn_remover(context); - LargeHyperedgeRemover large_he_remover(context); - preprocess(hypergraph, context, target_graph); - sanitize(hypergraph, context, degree_zero_hn_remover, large_he_remover); - timer.stop_timer("preprocessing"); - - // ################## MULTILEVEL & VCYCLE ################## - PartitionedHypergraph partitioned_hypergraph; - if (context.partition.mode == Mode::direct) { - partitioned_hypergraph = Multilevel::partition(hypergraph, context, target_graph); - } else if (context.partition.mode == Mode::recursive_bipartitioning) { - partitioned_hypergraph = RecursiveBipartitioning::partition(hypergraph, context, target_graph); - } else if (context.partition.mode == Mode::deep_multilevel) { - ASSERT(context.partition.objective != Objective::steiner_tree); - partitioned_hypergraph = DeepMultilevel::partition(hypergraph, context); - } else { - throw InvalidParameterException("Invalid partitioning mode!"); - } +template +typename Partitioner::PartitionedHypergraph +Partitioner::partition(Hypergraph &hypergraph, Context &context, + TargetGraph *target_graph) +{ + configurePreprocessing(hypergraph, context); + setupContext(hypergraph, context, target_graph); + + io::printContext(context); + io::printMemoryPoolConsumption(context); + io::printInputInformation(context, hypergraph); + +#ifdef KAHYPAR_ENABLE_STEINER_TREE_METRIC + bool map_partition_to_target_graph_at_the_end = false; + if(context.partition.objective == Objective::steiner_tree && + context.mapping.use_two_phase_approach) + { + map_partition_to_target_graph_at_the_end = true; + context.partition.objective = Objective::km1; + context.setupGainPolicy(); + } +#endif + + // ################## PREPROCESSING ################## + utils::Timer &timer = utils::Utilities::instance().getTimer(context.utility_id); + timer.start_timer("preprocessing", "Preprocessing"); + DegreeZeroHypernodeRemover degree_zero_hn_remover(context); + LargeHyperedgeRemover large_he_remover(context); + preprocess(hypergraph, context, target_graph); + sanitize(hypergraph, context, degree_zero_hn_remover, large_he_remover); + timer.stop_timer("preprocessing"); + + // ################## MULTILEVEL & VCYCLE ################## + PartitionedHypergraph partitioned_hypergraph; + if(context.partition.mode == Mode::direct) + { + partitioned_hypergraph = + Multilevel::partition(hypergraph, context, target_graph); + } + else if(context.partition.mode == Mode::recursive_bipartitioning) + { + partitioned_hypergraph = + RecursiveBipartitioning::partition(hypergraph, context, target_graph); + } + else if(context.partition.mode == Mode::deep_multilevel) + { + ASSERT(context.partition.objective != Objective::steiner_tree); + partitioned_hypergraph = DeepMultilevel::partition(hypergraph, context); + } + else + { + throw InvalidParameterException("Invalid partitioning mode!"); + } - ASSERT([&] { - bool success = true; - if ( partitioned_hypergraph.hasFixedVertices() ) { - for ( const HypernodeID& hn : partitioned_hypergraph.nodes() ) { - if ( partitioned_hypergraph.isFixed(hn) && - partitioned_hypergraph.fixedVertexBlock(hn) != partitioned_hypergraph.partID(hn) ) { - LOG << "Node" << hn << "is fixed to block" << partitioned_hypergraph.fixedVertexBlock(hn) - << ", but is assigned to block" << partitioned_hypergraph.partID(hn); - success = false; + ASSERT( + [&] { + bool success = true; + if(partitioned_hypergraph.hasFixedVertices()) + { + for(const HypernodeID &hn : partitioned_hypergraph.nodes()) + { + if(partitioned_hypergraph.isFixed(hn) && + partitioned_hypergraph.fixedVertexBlock(hn) != + partitioned_hypergraph.partID(hn)) + { + LOG << "Node" << hn << "is fixed to block" + << partitioned_hypergraph.fixedVertexBlock(hn) + << ", but is assigned to block" << partitioned_hypergraph.partID(hn); + success = false; + } } } - } - return success; - }(), "Some fixed vertices are not assigned to their corresponding block"); - - // ################## POSTPROCESSING ################## - timer.start_timer("postprocessing", "Postprocessing"); - large_he_remover.restoreLargeHyperedges(partitioned_hypergraph); - degree_zero_hn_remover.restoreDegreeZeroHypernodes(partitioned_hypergraph); - forceFixedVertexAssignment(partitioned_hypergraph, context); - timer.stop_timer("postprocessing"); - - #ifdef KAHYPAR_ENABLE_STEINER_TREE_METRIC - if ( map_partition_to_target_graph_at_the_end ) { - ASSERT(target_graph); - context.partition.objective = Objective::steiner_tree; - timer.start_timer("one_to_one_mapping", "One-To-One Mapping"); - InitialMapping::mapToTargetGraph( - partitioned_hypergraph, *target_graph, context); - timer.stop_timer("one_to_one_mapping"); - } - #endif + return success; + }(), + "Some fixed vertices are not assigned to their corresponding block"); - if (context.partition.verbose_output) { - io::printHypergraphInfo(partitioned_hypergraph.hypergraph(), context, - "Uncoarsened Hypergraph", context.partition.show_memory_consumption); - io::printStripe(); - } + // ################## POSTPROCESSING ################## + timer.start_timer("postprocessing", "Postprocessing"); + large_he_remover.restoreLargeHyperedges(partitioned_hypergraph); + degree_zero_hn_remover.restoreDegreeZeroHypernodes(partitioned_hypergraph); + forceFixedVertexAssignment(partitioned_hypergraph, context); + timer.stop_timer("postprocessing"); + +#ifdef KAHYPAR_ENABLE_STEINER_TREE_METRIC + if(map_partition_to_target_graph_at_the_end) + { + ASSERT(target_graph); + context.partition.objective = Objective::steiner_tree; + timer.start_timer("one_to_one_mapping", "One-To-One Mapping"); + InitialMapping::mapToTargetGraph(partitioned_hypergraph, *target_graph, + context); + timer.stop_timer("one_to_one_mapping"); + } +#endif - return partitioned_hypergraph; + if(context.partition.verbose_output) + { + io::printHypergraphInfo(partitioned_hypergraph.hypergraph(), context, + "Uncoarsened Hypergraph", + context.partition.show_memory_consumption); + io::printStripe(); } + return partitioned_hypergraph; +} - template - void Partitioner::partitionVCycle(PartitionedHypergraph& partitioned_hg, - Context& context, - TargetGraph* target_graph) { - Hypergraph& hypergraph = partitioned_hg.hypergraph(); - configurePreprocessing(hypergraph, context); - setupContext(hypergraph, context, target_graph); - - utils::Timer& timer = utils::Utilities::instance().getTimer(context.utility_id); - timer.start_timer("preprocessing", "Preprocessing"); - precomputeSteinerTrees(hypergraph, target_graph, context); - partitioned_hg.setTargetGraph(target_graph); - timer.stop_timer("preprocessing"); - - io::printContext(context); - io::printMemoryPoolConsumption(context); - io::printInputInformation(context, hypergraph); - io::printPartitioningResults(partitioned_hg, context, "\nInput Partition:"); - - // ################## PREPROCESSING ################## - timer.start_timer("preprocessing", "Preprocessing"); - DegreeZeroHypernodeRemover degree_zero_hn_remover(context); - LargeHyperedgeRemover large_he_remover(context); - sanitize(hypergraph, context, degree_zero_hn_remover, large_he_remover); - timer.stop_timer("preprocessing"); - - // ################## MULTILEVEL & VCYCLE ################## - if (context.partition.mode == Mode::direct) { - Multilevel::partitionVCycle( - hypergraph, partitioned_hg, context, target_graph); - } else { - throw InvalidParameterException("Invalid V-cycle partitioning mode!"); - } +template +void Partitioner::partitionVCycle(PartitionedHypergraph &partitioned_hg, + Context &context, TargetGraph *target_graph) +{ + Hypergraph &hypergraph = partitioned_hg.hypergraph(); + configurePreprocessing(hypergraph, context); + setupContext(hypergraph, context, target_graph); + + utils::Timer &timer = utils::Utilities::instance().getTimer(context.utility_id); + timer.start_timer("preprocessing", "Preprocessing"); + precomputeSteinerTrees(hypergraph, target_graph, context); + partitioned_hg.setTargetGraph(target_graph); + timer.stop_timer("preprocessing"); + + io::printContext(context); + io::printMemoryPoolConsumption(context); + io::printInputInformation(context, hypergraph); + io::printPartitioningResults(partitioned_hg, context, "\nInput Partition:"); + + // ################## PREPROCESSING ################## + timer.start_timer("preprocessing", "Preprocessing"); + DegreeZeroHypernodeRemover degree_zero_hn_remover(context); + LargeHyperedgeRemover large_he_remover(context); + sanitize(hypergraph, context, degree_zero_hn_remover, large_he_remover); + timer.stop_timer("preprocessing"); + + // ################## MULTILEVEL & VCYCLE ################## + if(context.partition.mode == Mode::direct) + { + Multilevel::partitionVCycle(hypergraph, partitioned_hg, context, + target_graph); + } + else + { + throw InvalidParameterException("Invalid V-cycle partitioning mode!"); + } - // ################## POSTPROCESSING ################## - timer.start_timer("postprocessing", "Postprocessing"); - large_he_remover.restoreLargeHyperedges(partitioned_hg); - degree_zero_hn_remover.restoreDegreeZeroHypernodes(partitioned_hg); - forceFixedVertexAssignment(partitioned_hg, context); - timer.stop_timer("postprocessing"); - - if (context.partition.verbose_output) { - io::printHypergraphInfo(partitioned_hg.hypergraph(), context, - "Uncoarsened Hypergraph", context.partition.show_memory_consumption); - io::printStripe(); - } + // ################## POSTPROCESSING ################## + timer.start_timer("postprocessing", "Postprocessing"); + large_he_remover.restoreLargeHyperedges(partitioned_hg); + degree_zero_hn_remover.restoreDegreeZeroHypernodes(partitioned_hg); + forceFixedVertexAssignment(partitioned_hg, context); + timer.stop_timer("postprocessing"); + + if(context.partition.verbose_output) + { + io::printHypergraphInfo(partitioned_hg.hypergraph(), context, + "Uncoarsened Hypergraph", + context.partition.show_memory_consumption); + io::printStripe(); + } - ASSERT([&] { - bool success = true; - if ( partitioned_hg.hasFixedVertices() ) { - for ( const HypernodeID& hn : partitioned_hg.nodes() ) { - if ( partitioned_hg.isFixed(hn) && - partitioned_hg.fixedVertexBlock(hn) != partitioned_hg.partID(hn) ) { - LOG << "Node" << hn << "is fixed to block" << partitioned_hg.fixedVertexBlock(hn) - << ", but is assigned to block" << partitioned_hg.partID(hn); - success = false; + ASSERT( + [&] { + bool success = true; + if(partitioned_hg.hasFixedVertices()) + { + for(const HypernodeID &hn : partitioned_hg.nodes()) + { + if(partitioned_hg.isFixed(hn) && + partitioned_hg.fixedVertexBlock(hn) != partitioned_hg.partID(hn)) + { + LOG << "Node" << hn << "is fixed to block" + << partitioned_hg.fixedVertexBlock(hn) << ", but is assigned to block" + << partitioned_hg.partID(hn); + success = false; + } } } - } - return success; - }(), "Some fixed vertices are not assigned to their corresponding block"); - } + return success; + }(), + "Some fixed vertices are not assigned to their corresponding block"); +} - INSTANTIATE_CLASS_WITH_TYPE_TRAITS(Partitioner) +INSTANTIATE_CLASS_WITH_TYPE_TRAITS(Partitioner) } diff --git a/mt-kahypar/partition/partitioner.h b/mt-kahypar/partition/partitioner.h index e117a4430..7e094d4cf 100644 --- a/mt-kahypar/partition/partitioner.h +++ b/mt-kahypar/partition/partitioner.h @@ -34,20 +34,19 @@ namespace mt_kahypar { // Forward Declaration class TargetGraph; -template -class Partitioner { +template +class Partitioner +{ using Hypergraph = typename TypeTraits::Hypergraph; using PartitionedHypergraph = typename TypeTraits::PartitionedHypergraph; - public: - static PartitionedHypergraph partition(Hypergraph& hypergraph, - Context& context, - TargetGraph* target_graph = nullptr); +public: + static PartitionedHypergraph partition(Hypergraph &hypergraph, Context &context, + TargetGraph *target_graph = nullptr); - static void partitionVCycle(PartitionedHypergraph& partitioned_hg, - Context& context, - TargetGraph* target_graph = nullptr); + static void partitionVCycle(PartitionedHypergraph &partitioned_hg, Context &context, + TargetGraph *target_graph = nullptr); }; -} // namespace mt_kahypar +} // namespace mt_kahypar diff --git a/mt-kahypar/partition/partitioner_facade.cpp b/mt-kahypar/partition/partitioner_facade.cpp index 1677367f7..e74646527 100644 --- a/mt-kahypar/partition/partitioner_facade.cpp +++ b/mt-kahypar/partition/partitioner_facade.cpp @@ -28,254 +28,303 @@ #include "mt-kahypar/partition/partitioner_facade.h" #include "mt-kahypar/definitions.h" -#include "mt-kahypar/partition/partitioner.h" -#include "mt-kahypar/io/partitioning_output.h" -#include "mt-kahypar/io/hypergraph_io.h" #include "mt-kahypar/io/csv_output.h" +#include "mt-kahypar/io/hypergraph_io.h" +#include "mt-kahypar/io/partitioning_output.h" #include "mt-kahypar/io/sql_plottools_serializer.h" -#include "mt-kahypar/utils/cast.h" -#include "mt-kahypar/utils/randomize.h" #include "mt-kahypar/partition/conversion.h" +#include "mt-kahypar/partition/partitioner.h" +#include "mt-kahypar/utils/cast.h" #include "mt-kahypar/utils/exception.h" +#include "mt-kahypar/utils/randomize.h" namespace mt_kahypar { namespace internal { - template - mt_kahypar_partitioned_hypergraph_t partition(mt_kahypar_hypergraph_t hypergraph, - Context& context, - TargetGraph* target_graph) { - using Hypergraph = typename TypeTraits::Hypergraph; - using PartitionedHypergraph = typename TypeTraits::PartitionedHypergraph; - Hypergraph& hg = utils::cast(hypergraph); +template +mt_kahypar_partitioned_hypergraph_t partition(mt_kahypar_hypergraph_t hypergraph, + Context &context, TargetGraph *target_graph) +{ + using Hypergraph = typename TypeTraits::Hypergraph; + using PartitionedHypergraph = typename TypeTraits::PartitionedHypergraph; + Hypergraph &hg = utils::cast(hypergraph); - // Partition Hypergraph - PartitionedHypergraph partitioned_hg = + // Partition Hypergraph + PartitionedHypergraph partitioned_hg = Partitioner::partition(hg, context, target_graph); - return mt_kahypar_partitioned_hypergraph_t { - reinterpret_cast( - new PartitionedHypergraph(std::move(partitioned_hg))), PartitionedHypergraph::TYPE }; - } + return mt_kahypar_partitioned_hypergraph_t{ + reinterpret_cast( + new PartitionedHypergraph(std::move(partitioned_hg))), + PartitionedHypergraph::TYPE + }; +} - template - void improve(mt_kahypar_partitioned_hypergraph_t partitioned_hg, - Context& context, - TargetGraph* target_graph) { - using PartitionedHypergraph = typename TypeTraits::PartitionedHypergraph; - PartitionedHypergraph& phg = utils::cast(partitioned_hg); +template +void improve(mt_kahypar_partitioned_hypergraph_t partitioned_hg, Context &context, + TargetGraph *target_graph) +{ + using PartitionedHypergraph = typename TypeTraits::PartitionedHypergraph; + PartitionedHypergraph &phg = utils::cast(partitioned_hg); - // Improve partition - Partitioner::partitionVCycle(phg, context, target_graph); - } + // Improve partition + Partitioner::partitionVCycle(phg, context, target_graph); +} - void check_if_feature_is_enabled(const mt_kahypar_partition_type_t type) { - unused(type); - #ifndef KAHYPAR_ENABLE_GRAPH_PARTITIONING_FEATURES - if ( type == MULTILEVEL_GRAPH_PARTITIONING || type == N_LEVEL_GRAPH_PARTITIONING ) { - throw InvalidParameterException( +void check_if_feature_is_enabled(const mt_kahypar_partition_type_t type) +{ + unused(type); +#ifndef KAHYPAR_ENABLE_GRAPH_PARTITIONING_FEATURES + if(type == MULTILEVEL_GRAPH_PARTITIONING || type == N_LEVEL_GRAPH_PARTITIONING) + { + throw InvalidParameterException( "Graph partitioning features are deactivated. Add -DKAHYPAR_ENABLE_GRAPH_PARTITIONING_FEATURES=ON " "to the cmake command and rebuild Mt-KaHyPar."); - } - #endif - #ifndef KAHYPAR_ENABLE_HIGHEST_QUALITY_FEATURES - if ( type == N_LEVEL_HYPERGRAPH_PARTITIONING || type == N_LEVEL_GRAPH_PARTITIONING ) { - throw InvalidParameterException( + } +#endif +#ifndef KAHYPAR_ENABLE_HIGHEST_QUALITY_FEATURES + if(type == N_LEVEL_HYPERGRAPH_PARTITIONING || type == N_LEVEL_GRAPH_PARTITIONING) + { + throw InvalidParameterException( "Quality preset features are deactivated. Add -KAHYPAR_ENABLE_HIGHEST_QUALITY_FEATURES=ON " "to the cmake command and rebuild Mt-KaHyPar."); - } - #endif - #ifndef KAHYPAR_ENABLE_LARGE_K_PARTITIONING_FEATURES - if ( type == LARGE_K_PARTITIONING ) { - throw InvalidParameterException( + } +#endif +#ifndef KAHYPAR_ENABLE_LARGE_K_PARTITIONING_FEATURES + if(type == LARGE_K_PARTITIONING) + { + throw InvalidParameterException( "Large-k partitioning features are deactivated. Add -DKAHYPAR_ENABLE_LARGE_K_PARTITIONING_FEATURES=ON " "to the cmake command and rebuild Mt-KaHyPar."); - } - #endif } +#endif +} } // namespace internal - mt_kahypar_partitioned_hypergraph_t PartitionerFacade::partition(mt_kahypar_hypergraph_t hypergraph, - Context& context, - TargetGraph* target_graph) { - const mt_kahypar_partition_type_t type = to_partition_c_type( - context.partition.preset_type, context.partition.instance_type); - internal::check_if_feature_is_enabled(type); - switch ( type ) { - #ifdef KAHYPAR_ENABLE_GRAPH_PARTITIONING_FEATURES - case MULTILEVEL_GRAPH_PARTITIONING: - return internal::partition(hypergraph, context, target_graph); - #endif - case MULTILEVEL_HYPERGRAPH_PARTITIONING: - return internal::partition(hypergraph, context, target_graph); - #ifdef KAHYPAR_ENABLE_LARGE_K_PARTITIONING_FEATURES - case LARGE_K_PARTITIONING: - return internal::partition(hypergraph, context, target_graph); - #endif - #ifdef KAHYPAR_ENABLE_HIGHEST_QUALITY_FEATURES - #ifdef KAHYPAR_ENABLE_GRAPH_PARTITIONING_FEATURES - case N_LEVEL_GRAPH_PARTITIONING: - return internal::partition(hypergraph, context, target_graph); - #endif - case N_LEVEL_HYPERGRAPH_PARTITIONING: - return internal::partition(hypergraph, context, target_graph); - #endif - default: - return mt_kahypar_partitioned_hypergraph_t { nullptr, NULLPTR_PARTITION }; - } - return mt_kahypar_partitioned_hypergraph_t { nullptr, NULLPTR_PARTITION }; +mt_kahypar_partitioned_hypergraph_t +PartitionerFacade::partition(mt_kahypar_hypergraph_t hypergraph, Context &context, + TargetGraph *target_graph) +{ + const mt_kahypar_partition_type_t type = + to_partition_c_type(context.partition.preset_type, context.partition.instance_type); + internal::check_if_feature_is_enabled(type); + switch(type) + { +#ifdef KAHYPAR_ENABLE_GRAPH_PARTITIONING_FEATURES + case MULTILEVEL_GRAPH_PARTITIONING: + return internal::partition(hypergraph, context, target_graph); +#endif + case MULTILEVEL_HYPERGRAPH_PARTITIONING: + return internal::partition(hypergraph, context, + target_graph); +#ifdef KAHYPAR_ENABLE_LARGE_K_PARTITIONING_FEATURES + case LARGE_K_PARTITIONING: + return internal::partition(hypergraph, context, + target_graph); +#endif +#ifdef KAHYPAR_ENABLE_HIGHEST_QUALITY_FEATURES +#ifdef KAHYPAR_ENABLE_GRAPH_PARTITIONING_FEATURES + case N_LEVEL_GRAPH_PARTITIONING: + return internal::partition(hypergraph, context, target_graph); +#endif + case N_LEVEL_HYPERGRAPH_PARTITIONING: + return internal::partition(hypergraph, context, + target_graph); +#endif + default: + return mt_kahypar_partitioned_hypergraph_t{ nullptr, NULLPTR_PARTITION }; } + return mt_kahypar_partitioned_hypergraph_t{ nullptr, NULLPTR_PARTITION }; +} - - void PartitionerFacade::improve(mt_kahypar_partitioned_hypergraph_t partitioned_hg, - Context& context, - TargetGraph* target_graph) { - const mt_kahypar_partition_type_t type = to_partition_c_type( - context.partition.preset_type, context.partition.instance_type); - internal::check_if_feature_is_enabled(type); - switch ( type ) { - #ifdef KAHYPAR_ENABLE_GRAPH_PARTITIONING_FEATURES - case MULTILEVEL_GRAPH_PARTITIONING: - internal::improve(partitioned_hg, context, target_graph); break; - #endif - case MULTILEVEL_HYPERGRAPH_PARTITIONING: - internal::improve(partitioned_hg, context, target_graph); break; - #ifdef KAHYPAR_ENABLE_LARGE_K_PARTITIONING_FEATURES - case LARGE_K_PARTITIONING: - internal::improve(partitioned_hg, context, target_graph); break; - #endif - #ifdef KAHYPAR_ENABLE_HIGHEST_QUALITY_FEATURES - #ifdef KAHYPAR_ENABLE_GRAPH_PARTITIONING_FEATURES - case N_LEVEL_GRAPH_PARTITIONING: - internal::improve(partitioned_hg, context, target_graph); break; - #endif - case N_LEVEL_HYPERGRAPH_PARTITIONING: - internal::improve(partitioned_hg, context, target_graph); break; - #endif - default: break; - } +void PartitionerFacade::improve(mt_kahypar_partitioned_hypergraph_t partitioned_hg, + Context &context, TargetGraph *target_graph) +{ + const mt_kahypar_partition_type_t type = + to_partition_c_type(context.partition.preset_type, context.partition.instance_type); + internal::check_if_feature_is_enabled(type); + switch(type) + { +#ifdef KAHYPAR_ENABLE_GRAPH_PARTITIONING_FEATURES + case MULTILEVEL_GRAPH_PARTITIONING: + internal::improve(partitioned_hg, context, target_graph); + break; +#endif + case MULTILEVEL_HYPERGRAPH_PARTITIONING: + internal::improve(partitioned_hg, context, target_graph); + break; +#ifdef KAHYPAR_ENABLE_LARGE_K_PARTITIONING_FEATURES + case LARGE_K_PARTITIONING: + internal::improve(partitioned_hg, context, target_graph); + break; +#endif +#ifdef KAHYPAR_ENABLE_HIGHEST_QUALITY_FEATURES +#ifdef KAHYPAR_ENABLE_GRAPH_PARTITIONING_FEATURES + case N_LEVEL_GRAPH_PARTITIONING: + internal::improve(partitioned_hg, context, target_graph); + break; +#endif + case N_LEVEL_HYPERGRAPH_PARTITIONING: + internal::improve(partitioned_hg, context, target_graph); + break; +#endif + default: + break; } +} - void PartitionerFacade::printPartitioningResults(const mt_kahypar_partitioned_hypergraph_t phg, - const Context& context, - const std::chrono::duration& elapsed_seconds) { - const mt_kahypar_partition_type_t type = phg.type; - switch ( type ) { - #ifdef KAHYPAR_ENABLE_GRAPH_PARTITIONING_FEATURES - case MULTILEVEL_GRAPH_PARTITIONING: - io::printPartitioningResults(utils::cast_const(phg), context, elapsed_seconds); - break; - #endif - case MULTILEVEL_HYPERGRAPH_PARTITIONING: - io::printPartitioningResults(utils::cast_const(phg), context, elapsed_seconds); - break; - #ifdef KAHYPAR_ENABLE_LARGE_K_PARTITIONING_FEATURES - case LARGE_K_PARTITIONING: - io::printPartitioningResults(utils::cast_const(phg), context, elapsed_seconds); - break; - #endif - #ifdef KAHYPAR_ENABLE_HIGHEST_QUALITY_FEATURES - #ifdef KAHYPAR_ENABLE_GRAPH_PARTITIONING_FEATURES - case N_LEVEL_GRAPH_PARTITIONING: - io::printPartitioningResults(utils::cast_const(phg), context, elapsed_seconds); - break; - #endif - case N_LEVEL_HYPERGRAPH_PARTITIONING: - io::printPartitioningResults(utils::cast_const(phg), context, elapsed_seconds); - break; - #endif - default: break; - } +void PartitionerFacade::printPartitioningResults( + const mt_kahypar_partitioned_hypergraph_t phg, const Context &context, + const std::chrono::duration &elapsed_seconds) +{ + const mt_kahypar_partition_type_t type = phg.type; + switch(type) + { +#ifdef KAHYPAR_ENABLE_GRAPH_PARTITIONING_FEATURES + case MULTILEVEL_GRAPH_PARTITIONING: + io::printPartitioningResults(utils::cast_const(phg), context, + elapsed_seconds); + break; +#endif + case MULTILEVEL_HYPERGRAPH_PARTITIONING: + io::printPartitioningResults(utils::cast_const(phg), + context, elapsed_seconds); + break; +#ifdef KAHYPAR_ENABLE_LARGE_K_PARTITIONING_FEATURES + case LARGE_K_PARTITIONING: + io::printPartitioningResults( + utils::cast_const(phg), context, + elapsed_seconds); + break; +#endif +#ifdef KAHYPAR_ENABLE_HIGHEST_QUALITY_FEATURES +#ifdef KAHYPAR_ENABLE_GRAPH_PARTITIONING_FEATURES + case N_LEVEL_GRAPH_PARTITIONING: + io::printPartitioningResults(utils::cast_const(phg), context, + elapsed_seconds); + break; +#endif + case N_LEVEL_HYPERGRAPH_PARTITIONING: + io::printPartitioningResults(utils::cast_const(phg), + context, elapsed_seconds); + break; +#endif + default: + break; } +} - std::string PartitionerFacade::serializeCSV(const mt_kahypar_partitioned_hypergraph_t phg, - const Context& context, - const std::chrono::duration& elapsed_seconds) { - const mt_kahypar_partition_type_t type = phg.type; - switch ( type ) { - #ifdef KAHYPAR_ENABLE_GRAPH_PARTITIONING_FEATURES - case MULTILEVEL_GRAPH_PARTITIONING: - return io::csv::serialize(utils::cast_const(phg), context, elapsed_seconds); - break; - #endif - case MULTILEVEL_HYPERGRAPH_PARTITIONING: - return io::csv::serialize(utils::cast_const(phg), context, elapsed_seconds); - #ifdef KAHYPAR_ENABLE_LARGE_K_PARTITIONING_FEATURES - case LARGE_K_PARTITIONING: - return io::csv::serialize(utils::cast_const(phg), context, elapsed_seconds); - #endif - #ifdef KAHYPAR_ENABLE_HIGHEST_QUALITY_FEATURES - #ifdef KAHYPAR_ENABLE_GRAPH_PARTITIONING_FEATURES - case N_LEVEL_GRAPH_PARTITIONING: - return io::csv::serialize(utils::cast_const(phg), context, elapsed_seconds); - #endif - case N_LEVEL_HYPERGRAPH_PARTITIONING: - return io::csv::serialize(utils::cast_const(phg), context, elapsed_seconds); - #endif - default: return ""; - } +std::string +PartitionerFacade::serializeCSV(const mt_kahypar_partitioned_hypergraph_t phg, + const Context &context, + const std::chrono::duration &elapsed_seconds) +{ + const mt_kahypar_partition_type_t type = phg.type; + switch(type) + { +#ifdef KAHYPAR_ENABLE_GRAPH_PARTITIONING_FEATURES + case MULTILEVEL_GRAPH_PARTITIONING: + return io::csv::serialize(utils::cast_const(phg), context, + elapsed_seconds); + break; +#endif + case MULTILEVEL_HYPERGRAPH_PARTITIONING: + return io::csv::serialize(utils::cast_const(phg), + context, elapsed_seconds); +#ifdef KAHYPAR_ENABLE_LARGE_K_PARTITIONING_FEATURES + case LARGE_K_PARTITIONING: + return io::csv::serialize(utils::cast_const(phg), + context, elapsed_seconds); +#endif +#ifdef KAHYPAR_ENABLE_HIGHEST_QUALITY_FEATURES +#ifdef KAHYPAR_ENABLE_GRAPH_PARTITIONING_FEATURES + case N_LEVEL_GRAPH_PARTITIONING: + return io::csv::serialize(utils::cast_const(phg), context, + elapsed_seconds); +#endif + case N_LEVEL_HYPERGRAPH_PARTITIONING: + return io::csv::serialize(utils::cast_const(phg), + context, elapsed_seconds); +#endif + default: return ""; } + return ""; +} - std::string PartitionerFacade::serializeResultLine(const mt_kahypar_partitioned_hypergraph_t phg, - const Context& context, - const std::chrono::duration& elapsed_seconds) { - const mt_kahypar_partition_type_t type = phg.type; - switch ( type ) { - #ifdef KAHYPAR_ENABLE_GRAPH_PARTITIONING_FEATURES - case MULTILEVEL_GRAPH_PARTITIONING: - return io::serializer::serialize(utils::cast_const(phg), context, elapsed_seconds); - #endif - case MULTILEVEL_HYPERGRAPH_PARTITIONING: - return io::serializer::serialize(utils::cast_const(phg), context, elapsed_seconds); - #ifdef KAHYPAR_ENABLE_LARGE_K_PARTITIONING_FEATURES - case LARGE_K_PARTITIONING: - return io::serializer::serialize(utils::cast_const(phg), context, elapsed_seconds); - #endif - #ifdef KAHYPAR_ENABLE_HIGHEST_QUALITY_FEATURES - #ifdef KAHYPAR_ENABLE_GRAPH_PARTITIONING_FEATURES - case N_LEVEL_GRAPH_PARTITIONING: - return io::serializer::serialize(utils::cast_const(phg), context, elapsed_seconds); - #endif - case N_LEVEL_HYPERGRAPH_PARTITIONING: - return io::serializer::serialize(utils::cast_const(phg), context, elapsed_seconds); - #endif - default: return ""; - } +std::string PartitionerFacade::serializeResultLine( + const mt_kahypar_partitioned_hypergraph_t phg, const Context &context, + const std::chrono::duration &elapsed_seconds) +{ + const mt_kahypar_partition_type_t type = phg.type; + switch(type) + { +#ifdef KAHYPAR_ENABLE_GRAPH_PARTITIONING_FEATURES + case MULTILEVEL_GRAPH_PARTITIONING: + return io::serializer::serialize(utils::cast_const(phg), + context, elapsed_seconds); +#endif + case MULTILEVEL_HYPERGRAPH_PARTITIONING: + return io::serializer::serialize(utils::cast_const(phg), + context, elapsed_seconds); +#ifdef KAHYPAR_ENABLE_LARGE_K_PARTITIONING_FEATURES + case LARGE_K_PARTITIONING: + return io::serializer::serialize( + utils::cast_const(phg), context, + elapsed_seconds); +#endif +#ifdef KAHYPAR_ENABLE_HIGHEST_QUALITY_FEATURES +#ifdef KAHYPAR_ENABLE_GRAPH_PARTITIONING_FEATURES + case N_LEVEL_GRAPH_PARTITIONING: + return io::serializer::serialize(utils::cast_const(phg), + context, elapsed_seconds); +#endif + case N_LEVEL_HYPERGRAPH_PARTITIONING: + return io::serializer::serialize(utils::cast_const(phg), + context, elapsed_seconds); +#endif + default: return ""; } + return ""; +} - void PartitionerFacade::writePartitionFile(const mt_kahypar_partitioned_hypergraph_t phg, - const std::string& filename) { - const mt_kahypar_partition_type_t type = phg.type; - switch ( type ) { - #ifdef KAHYPAR_ENABLE_GRAPH_PARTITIONING_FEATURES - case MULTILEVEL_GRAPH_PARTITIONING: - io::writePartitionFile(utils::cast_const(phg), filename); - break; - #endif - case MULTILEVEL_HYPERGRAPH_PARTITIONING: - io::writePartitionFile(utils::cast_const(phg), filename); - break; - #ifdef KAHYPAR_ENABLE_LARGE_K_PARTITIONING_FEATURES - case LARGE_K_PARTITIONING: - io::writePartitionFile(utils::cast_const(phg), filename); - break; - #endif - #ifdef KAHYPAR_ENABLE_HIGHEST_QUALITY_FEATURES - #ifdef KAHYPAR_ENABLE_GRAPH_PARTITIONING_FEATURES - case N_LEVEL_GRAPH_PARTITIONING: - io::writePartitionFile(utils::cast_const(phg), filename); - break; - #endif - case N_LEVEL_HYPERGRAPH_PARTITIONING: - io::writePartitionFile(utils::cast_const(phg), filename); - break; - #endif - default: break; - } +void PartitionerFacade::writePartitionFile(const mt_kahypar_partitioned_hypergraph_t phg, + const std::string &filename) +{ + const mt_kahypar_partition_type_t type = phg.type; + switch(type) + { +#ifdef KAHYPAR_ENABLE_GRAPH_PARTITIONING_FEATURES + case MULTILEVEL_GRAPH_PARTITIONING: + io::writePartitionFile(utils::cast_const(phg), filename); + break; +#endif + case MULTILEVEL_HYPERGRAPH_PARTITIONING: + io::writePartitionFile(utils::cast_const(phg), filename); + break; +#ifdef KAHYPAR_ENABLE_LARGE_K_PARTITIONING_FEATURES + case LARGE_K_PARTITIONING: + io::writePartitionFile(utils::cast_const(phg), + filename); + break; +#endif +#ifdef KAHYPAR_ENABLE_HIGHEST_QUALITY_FEATURES +#ifdef KAHYPAR_ENABLE_GRAPH_PARTITIONING_FEATURES + case N_LEVEL_GRAPH_PARTITIONING: + io::writePartitionFile(utils::cast_const(phg), filename); + break; +#endif + case N_LEVEL_HYPERGRAPH_PARTITIONING: + io::writePartitionFile(utils::cast_const(phg), + filename); + break; +#endif + default: + break; } +} -} // namespace mt_kahypar +} // namespace mt_kahypar diff --git a/mt-kahypar/partition/partitioner_facade.h b/mt-kahypar/partition/partitioner_facade.h index 1e604f92d..269648e55 100644 --- a/mt-kahypar/partition/partitioner_facade.h +++ b/mt-kahypar/partition/partitioner_facade.h @@ -36,37 +36,39 @@ namespace mt_kahypar { // Forward Declaration class TargetGraph; -class PartitionerFacade { - public: +class PartitionerFacade +{ +public: // ! Partition the hypergraph into a predefined number of blocks - static mt_kahypar_partitioned_hypergraph_t partition(mt_kahypar_hypergraph_t hypergraph, - Context& context, - TargetGraph* target_graph = nullptr); + static mt_kahypar_partitioned_hypergraph_t + partition(mt_kahypar_hypergraph_t hypergraph, Context &context, + TargetGraph *target_graph = nullptr); // ! Improves a given partition static void improve(mt_kahypar_partitioned_hypergraph_t partitioned_hg, - Context& context, - TargetGraph* target_graph = nullptr); + Context &context, TargetGraph *target_graph = nullptr); // ! Prints timings and metrics to output - static void printPartitioningResults(const mt_kahypar_partitioned_hypergraph_t phg, - const Context& context, - const std::chrono::duration& elapsed_seconds); + static void + printPartitioningResults(const mt_kahypar_partitioned_hypergraph_t phg, + const Context &context, + const std::chrono::duration &elapsed_seconds); // ! Prints timings and metrics in CSV file format static std::string serializeCSV(const mt_kahypar_partitioned_hypergraph_t phg, - const Context& context, - const std::chrono::duration& elapsed_seconds); + const Context &context, + const std::chrono::duration &elapsed_seconds); // ! Prints timings and metrics as a RESULT line parsable by SQL Plot Tools // ! https://github.com/bingmann/sqlplot-tools - static std::string serializeResultLine(const mt_kahypar_partitioned_hypergraph_t phg, - const Context& context, - const std::chrono::duration& elapsed_seconds); + static std::string + serializeResultLine(const mt_kahypar_partitioned_hypergraph_t phg, + const Context &context, + const std::chrono::duration &elapsed_seconds); // ! Writes the partition to the corresponding file static void writePartitionFile(const mt_kahypar_partitioned_hypergraph_t phg, - const std::string& filename); + const std::string &filename); }; -} // namespace mt_kahypar +} // namespace mt_kahypar diff --git a/mt-kahypar/partition/preprocessing/community_detection/local_moving_modularity.cpp b/mt-kahypar/partition/preprocessing/community_detection/local_moving_modularity.cpp index 42c67b6d5..dc56a69b4 100644 --- a/mt-kahypar/partition/preprocessing/community_detection/local_moving_modularity.cpp +++ b/mt-kahypar/partition/preprocessing/community_detection/local_moving_modularity.cpp @@ -28,40 +28,44 @@ #include "local_moving_modularity.h" #include "mt-kahypar/definitions.h" -#include "mt-kahypar/utils/timer.h" -#include "mt-kahypar/utils/floating_point_comparisons.h" #include "mt-kahypar/parallel/stl/thread_locals.h" +#include "mt-kahypar/utils/floating_point_comparisons.h" +#include "mt-kahypar/utils/timer.h" #include #include namespace mt_kahypar::metrics { -template -double modularity(const Graph& graph, const ds::Clustering& communities) { +template +double modularity(const Graph &graph, const ds::Clustering &communities) +{ ASSERT(graph.canBeUsed()); ASSERT(graph.numNodes() == communities.size()); vec nodes(graph.numNodes()); vec cluster_mod(graph.numNodes(), 0.0); // make summation order deterministic! - tbb::parallel_for(UL(0), graph.numNodes(), [&](size_t pos) { - nodes[pos] = pos; - }); + tbb::parallel_for(UL(0), graph.numNodes(), [&](size_t pos) { nodes[pos] = pos; }); tbb::parallel_sort(nodes.begin(), nodes.end(), [&](NodeID lhs, NodeID rhs) { return std::tie(communities[lhs], lhs) < std::tie(communities[rhs], rhs); }); - // deterministic reduce doesn't have dynamic load balancing --> precompute the contributions and then sum them + // deterministic reduce doesn't have dynamic load balancing --> precompute the + // contributions and then sum them tbb::parallel_for(UL(0), graph.numNodes(), [&](size_t pos) { NodeID x = nodes[pos]; PartitionID comm = communities[x]; double comm_vol = 0.0, internal = 0.0; - if (pos == 0 || communities[nodes[pos - 1]] != comm) { - for (size_t i = pos; i < nodes.size() && communities[nodes[i]] == comm; ++i) { + if(pos == 0 || communities[nodes[pos - 1]] != comm) + { + for(size_t i = pos; i < nodes.size() && communities[nodes[i]] == comm; ++i) + { NodeID u = nodes[i]; comm_vol += graph.nodeVolume(u); - for (const Arc& arc : graph.arcsOf(u)) { - if (communities[arc.head] != comm) { + for(const Arc &arc : graph.arcsOf(u)) + { + if(communities[arc.head] != comm) + { internal -= arc.weight; } } @@ -72,14 +76,16 @@ double modularity(const Graph& graph, const ds::Clustering& communit }); auto r = tbb::blocked_range(UL(0), graph.numNodes(), 1000); - auto combine_range = [&](const tbb::blocked_range& r, double partial) { - return std::accumulate(cluster_mod.begin() + r.begin(), cluster_mod.begin() + r.end(), partial); + auto combine_range = [&](const tbb::blocked_range &r, double partial) { + return std::accumulate(cluster_mod.begin() + r.begin(), cluster_mod.begin() + r.end(), + partial); }; - return tbb::parallel_deterministic_reduce(r, 0.0, combine_range, std::plus<>()) / graph.totalVolume(); + return tbb::parallel_deterministic_reduce(r, 0.0, combine_range, std::plus<>()) / + graph.totalVolume(); } namespace { -#define MODULARITY(X) double modularity(const Graph&, const ds::Clustering&) +#define MODULARITY(X) double modularity(const Graph &, const ds::Clustering &) } INSTANTIATE_FUNC_WITH_HYPERGRAPHS(MODULARITY) @@ -88,21 +94,26 @@ INSTANTIATE_FUNC_WITH_HYPERGRAPHS(MODULARITY) namespace mt_kahypar::community_detection { -template -bool ParallelLocalMovingModularity::localMoving(Graph& graph, ds::Clustering& communities) { +template +bool ParallelLocalMovingModularity::localMoving(Graph &graph, + ds::Clustering &communities) +{ ASSERT(graph.canBeUsed()); _max_degree = graph.max_degree(); _reciprocal_total_volume = 1.0 / graph.totalVolume(); _vol_multiplier_div_by_node_vol = _reciprocal_total_volume; // init - if (_context.partition.deterministic) { + if(_context.partition.deterministic) + { tbb::parallel_for(UL(0), graph.numNodes(), [&](NodeID u) { communities[u] = u; _cluster_volumes[u].store(graph.nodeVolume(u), std::memory_order_relaxed); }); - } else { - auto& nodes = permutation.permutation; + } + else + { + auto &nodes = permutation.permutation; nodes.resize(graph.numNodes()); tbb::parallel_for(0U, static_cast(graph.numNodes()), [&](const NodeID u) { nodes[u] = u; @@ -115,47 +126,67 @@ bool ParallelLocalMovingModularity::localMoving(Graph& g // local moving bool clustering_changed = false; - if ( graph.numArcs() > 0 ) { + if(graph.numArcs() > 0) + { size_t number_of_nodes_moved = graph.numNodes(); - for (size_t round = 0; - number_of_nodes_moved >= _context.preprocessing.community_detection.min_vertex_move_fraction * graph.numNodes() - && round < _context.preprocessing.community_detection.max_pass_iterations; round++) { - if (_context.partition.deterministic) { + for(size_t round = 0; + number_of_nodes_moved >= + _context.preprocessing.community_detection.min_vertex_move_fraction * + graph.numNodes() && + round < _context.preprocessing.community_detection.max_pass_iterations; + round++) + { + if(_context.partition.deterministic) + { number_of_nodes_moved = synchronousParallelRound(graph, communities); - } else { + } + else + { number_of_nodes_moved = parallelNonDeterministicRound(graph, communities); } clustering_changed |= number_of_nodes_moved > 0; - DBG << "Louvain-Pass #" << round << " - num moves " << number_of_nodes_moved << " - Modularity:" << metrics::modularity(graph, communities); + DBG << "Louvain-Pass #" << round << " - num moves " << number_of_nodes_moved + << " - Modularity:" << metrics::modularity(graph, communities); } } return clustering_changed; } -template -size_t ParallelLocalMovingModularity::synchronousParallelRound(const Graph& graph, ds::Clustering& communities) { - if (graph.numNodes() < 200) { +template +size_t ParallelLocalMovingModularity::synchronousParallelRound( + const Graph &graph, ds::Clustering &communities) +{ + if(graph.numNodes() < 200) + { return sequentialRound(graph, communities); } size_t seed = prng(); - permutation.random_grouping(graph.numNodes(), _context.shared_memory.static_balancing_work_packages, seed); + permutation.random_grouping( + graph.numNodes(), _context.shared_memory.static_balancing_work_packages, seed); size_t num_moved_nodes = 0; constexpr size_t num_buckets = utils::ParallelPermutation::num_buckets; - const size_t num_sub_rounds = _context.preprocessing.community_detection.num_sub_rounds_deterministic; - size_t num_buckets_per_sub_round = parallel::chunking::idiv_ceil(num_buckets, num_sub_rounds); + const size_t num_sub_rounds = + _context.preprocessing.community_detection.num_sub_rounds_deterministic; + size_t num_buckets_per_sub_round = + parallel::chunking::idiv_ceil(num_buckets, num_sub_rounds); size_t max_round_size = 0; - for (size_t sub_round = 0; sub_round < num_sub_rounds; ++sub_round) { - auto [first_bucket, last_bucket] = parallel::chunking::bounds(sub_round, num_buckets, num_buckets_per_sub_round); - max_round_size = std::max(max_round_size, - size_t(permutation.bucket_bounds[last_bucket] - permutation.bucket_bounds[first_bucket])); + for(size_t sub_round = 0; sub_round < num_sub_rounds; ++sub_round) + { + auto [first_bucket, last_bucket] = + parallel::chunking::bounds(sub_round, num_buckets, num_buckets_per_sub_round); + max_round_size = + std::max(max_round_size, size_t(permutation.bucket_bounds[last_bucket] - + permutation.bucket_bounds[first_bucket])); } volume_updates_to.adapt_capacity(max_round_size); volume_updates_from.adapt_capacity(max_round_size); - for (size_t sub_round = 0; sub_round < num_sub_rounds; ++sub_round) { - auto [first_bucket, last_bucket] = parallel::chunking::bounds(sub_round, num_buckets, num_buckets_per_sub_round); + for(size_t sub_round = 0; sub_round < num_sub_rounds; ++sub_round) + { + auto [first_bucket, last_bucket] = + parallel::chunking::bounds(sub_round, num_buckets, num_buckets_per_sub_round); assert(first_bucket < last_bucket && last_bucket < permutation.bucket_bounds.size()); size_t first = permutation.bucket_bounds[first_bucket]; size_t last = permutation.bucket_bounds[last_bucket]; @@ -164,9 +195,10 @@ size_t ParallelLocalMovingModularity::synchronousParallelRound(const tbb::parallel_for(first, last, [&](size_t pos) { HypernodeID u = permutation.at(pos); PartitionID best_cluster = computeMaxGainCluster(graph, communities, u); - if (best_cluster != communities[u]) { + if(best_cluster != communities[u]) + { volume_updates_from.push_back_buffered({ communities[u], u }); - volume_updates_to.push_back_buffered({best_cluster, u }); + volume_updates_to.push_back_buffered({ best_cluster, u }); num_moved_local.local() += 1; } }); @@ -174,26 +206,33 @@ size_t ParallelLocalMovingModularity::synchronousParallelRound(const size_t num_moved_sub_round = num_moved_local.combine(std::plus<>()); num_moved_nodes += num_moved_sub_round; - // We can't do atomic adds of the volumes since they're not commutative and thus lead to non-deterministic decisions - // Instead we sort the updates, and for each cluster let one thread sum up the updates. - tbb::parallel_invoke([&] { - volume_updates_to.finalize(); - tbb::parallel_sort(volume_updates_to.begin(), volume_updates_to.end()); - }, [&] { - volume_updates_from.finalize(); - tbb::parallel_sort(volume_updates_from.begin(), volume_updates_from.end()); - }); + // We can't do atomic adds of the volumes since they're not commutative and thus lead + // to non-deterministic decisions Instead we sort the updates, and for each cluster + // let one thread sum up the updates. + tbb::parallel_invoke( + [&] { + volume_updates_to.finalize(); + tbb::parallel_sort(volume_updates_to.begin(), volume_updates_to.end()); + }, + [&] { + volume_updates_from.finalize(); + tbb::parallel_sort(volume_updates_from.begin(), volume_updates_from.end()); + }); const size_t sz_to = volume_updates_to.size(); tbb::parallel_for(UL(0), sz_to, [&](size_t pos) { PartitionID c = volume_updates_to[pos].cluster; - if (pos == 0 || volume_updates_to[pos - 1].cluster != c) { + if(pos == 0 || volume_updates_to[pos - 1].cluster != c) + { ArcWeight vol_delta = 0.0; - for ( ; pos < sz_to && volume_updates_to[pos].cluster == c; ++pos) { + for(; pos < sz_to && volume_updates_to[pos].cluster == c; ++pos) + { vol_delta += graph.nodeVolume(volume_updates_to[pos].node); communities[volume_updates_to[pos].node] = c; } - _cluster_volumes[c].store(_cluster_volumes[c].load(std::memory_order_relaxed) + vol_delta, std::memory_order_relaxed); + _cluster_volumes[c].store(_cluster_volumes[c].load(std::memory_order_relaxed) + + vol_delta, + std::memory_order_relaxed); } }); volume_updates_to.clear(); @@ -201,12 +240,16 @@ size_t ParallelLocalMovingModularity::synchronousParallelRound(const const size_t sz_from = volume_updates_from.size(); tbb::parallel_for(UL(0), sz_from, [&](size_t pos) { PartitionID c = volume_updates_from[pos].cluster; - if (pos == 0 || volume_updates_from[pos - 1].cluster != c) { + if(pos == 0 || volume_updates_from[pos - 1].cluster != c) + { ArcWeight vol_delta = 0.0; - for ( ; pos < sz_from && volume_updates_from[pos].cluster == c; ++pos) { + for(; pos < sz_from && volume_updates_from[pos].cluster == c; ++pos) + { vol_delta -= graph.nodeVolume(volume_updates_from[pos].node); } - _cluster_volumes[c].store(_cluster_volumes[c].load(std::memory_order_relaxed) + vol_delta, std::memory_order_relaxed); + _cluster_volumes[c].store(_cluster_volumes[c].load(std::memory_order_relaxed) + + vol_delta, + std::memory_order_relaxed); } }); volume_updates_from.clear(); @@ -215,15 +258,20 @@ size_t ParallelLocalMovingModularity::synchronousParallelRound(const return num_moved_nodes; } -template -size_t ParallelLocalMovingModularity::sequentialRound(const Graph& graph, ds::Clustering& communities) { +template +size_t +ParallelLocalMovingModularity::sequentialRound(const Graph &graph, + ds::Clustering &communities) +{ size_t seed = prng(); permutation.sequential_fallback(graph.numNodes(), seed); size_t num_moved = 0; - for (size_t i = 0; i < graph.numNodes(); ++i) { + for(size_t i = 0; i < graph.numNodes(); ++i) + { NodeID u = permutation.at(i); PartitionID best_cluster = computeMaxGainCluster(graph, communities, u); - if (best_cluster != communities[u]) { + if(best_cluster != communities[u]) + { _cluster_volumes[best_cluster] += graph.nodeVolume(u); _cluster_volumes[communities[u]] -= graph.nodeVolume(u); communities[u] = best_cluster; @@ -233,10 +281,13 @@ size_t ParallelLocalMovingModularity::sequentialRound(const Graph -size_t ParallelLocalMovingModularity::parallelNonDeterministicRound(const Graph& graph, ds::Clustering& communities) { - auto& nodes = permutation.permutation; - if ( !_disable_randomization ) { +template +size_t ParallelLocalMovingModularity::parallelNonDeterministicRound( + const Graph &graph, ds::Clustering &communities) +{ + auto &nodes = permutation.permutation; + if(!_disable_randomization) + { utils::Randomize::instance().parallelShuffleVector(nodes, UL(0), nodes.size()); } @@ -245,7 +296,8 @@ size_t ParallelLocalMovingModularity::parallelNonDeterministicRound( const ArcWeight volU = graph.nodeVolume(u); const PartitionID from = communities[u]; PartitionID best_cluster = computeMaxGainCluster(graph, communities, u); - if (best_cluster != from) { + if(best_cluster != from) + { _cluster_volumes[best_cluster] += volU; _cluster_volumes[from] -= volU; communities[u] = best_cluster; @@ -262,33 +314,42 @@ size_t ParallelLocalMovingModularity::parallelNonDeterministicRound( return number_of_nodes_moved; } -template -bool ParallelLocalMovingModularity::verifyGain(const Graph& graph, const ds::Clustering& communities, const NodeID u, - const PartitionID to, double gain, double weight_from, double weight_to) { - if (_context.partition.deterministic) { +template +bool ParallelLocalMovingModularity::verifyGain( + const Graph &graph, const ds::Clustering &communities, const NodeID u, + const PartitionID to, double gain, double weight_from, double weight_to) +{ + if(_context.partition.deterministic) + { // the check is omitted, since changing the cluster volumes breaks determinism return true; } const PartitionID from = communities[u]; - long double adjustedGain = adjustAdvancedModGain(gain, weight_from, _cluster_volumes[from], graph.nodeVolume(u)); + long double adjustedGain = adjustAdvancedModGain( + gain, weight_from, _cluster_volumes[from], graph.nodeVolume(u)); const double volMultiplier = _vol_multiplier_div_by_node_vol * graph.nodeVolume(u); double modGain = modularityGain(weight_to, _cluster_volumes[to], volMultiplier); - long double adjustedGainRecomputed = adjustAdvancedModGain(modGain, weight_from, _cluster_volumes[from], graph.nodeVolume(u)); + long double adjustedGainRecomputed = adjustAdvancedModGain( + modGain, weight_from, _cluster_volumes[from], graph.nodeVolume(u)); unused(adjustedGainRecomputed); - if (from == to) { + if(from == to) + { adjustedGainRecomputed = 0.0L; adjustedGain = 0.0L; } ASSERT(adjustedGain == adjustedGainRecomputed); - long double dTotalVolumeSquared = static_cast(graph.totalVolume()) * static_cast(graph.totalVolume()); + long double dTotalVolumeSquared = static_cast(graph.totalVolume()) * + static_cast(graph.totalVolume()); - auto accBeforeMove = intraClusterWeightsAndSumOfSquaredClusterVolumes(graph, communities); - long double coverageBeforeMove = static_cast(accBeforeMove.first) / graph.totalVolume(); + auto accBeforeMove = + intraClusterWeightsAndSumOfSquaredClusterVolumes(graph, communities); + long double coverageBeforeMove = + static_cast(accBeforeMove.first) / graph.totalVolume(); long double expectedCoverageBeforeMove = accBeforeMove.second / dTotalVolumeSquared; long double modBeforeMove = coverageBeforeMove - expectedCoverageBeforeMove; @@ -298,16 +359,20 @@ bool ParallelLocalMovingModularity::verifyGain(const Graph(accAfterMove.first) / graph.totalVolume(); + auto accAfterMove = + intraClusterWeightsAndSumOfSquaredClusterVolumes(graph, communities_after_move); + long double coverageAfterMove = + static_cast(accAfterMove.first) / graph.totalVolume(); long double expectedCoverageAfterMove = accAfterMove.second / dTotalVolumeSquared; long double modAfterMove = coverageAfterMove - expectedCoverageAfterMove; - const bool result = math::are_almost_equal_ld(modBeforeMove + adjustedGain, modAfterMove, 1e-8); - ASSERT(result, - V(modBeforeMove + adjustedGain) << V(modAfterMove) << V(gain) << V(adjustedGain) - << V(coverageBeforeMove) << V(expectedCoverageBeforeMove) << V(modBeforeMove) - << V(coverageAfterMove) << V(expectedCoverageAfterMove) << V(modAfterMove)); + const bool result = + math::are_almost_equal_ld(modBeforeMove + adjustedGain, modAfterMove, 1e-8); + ASSERT(result, V(modBeforeMove + adjustedGain) + << V(modAfterMove) << V(gain) << V(adjustedGain) + << V(coverageBeforeMove) << V(expectedCoverageBeforeMove) + << V(modBeforeMove) << V(coverageAfterMove) + << V(expectedCoverageAfterMove) << V(modAfterMove)); _cluster_volumes[to] -= graph.nodeVolume(u); _cluster_volumes[from] += graph.nodeVolume(u); @@ -315,60 +380,69 @@ bool ParallelLocalMovingModularity::verifyGain(const Graph -std::pair ParallelLocalMovingModularity::intraClusterWeightsAndSumOfSquaredClusterVolumes( - const Graph& graph, const ds::Clustering& communities) { +template +std::pair ParallelLocalMovingModularity:: + intraClusterWeightsAndSumOfSquaredClusterVolumes(const Graph &graph, + const ds::Clustering &communities) +{ ArcWeight intraClusterWeights = 0; ArcWeight sumOfSquaredClusterVolumes = 0; vec cluster_volumes(graph.numNodes(), 0); - for (NodeID u : graph.nodes()) { + for(NodeID u : graph.nodes()) + { ArcWeight arcVol = 0; - for (const Arc& arc : graph.arcsOf(u)) { - if (communities[u] == communities[arc.head]) + for(const Arc &arc : graph.arcsOf(u)) + { + if(communities[u] == communities[arc.head]) intraClusterWeights += arc.weight; arcVol += arc.weight; } - ArcWeight selfLoopWeight = graph.nodeVolume(u) - arcVol; // already accounted for as twice! + ArcWeight selfLoopWeight = + graph.nodeVolume(u) - arcVol; // already accounted for as twice! ASSERT(selfLoopWeight >= 0.0); intraClusterWeights += selfLoopWeight; cluster_volumes[communities[u]] += graph.nodeVolume(u); } - for (NodeID cluster : graph.nodes()) { // unused cluster IDs have volume 0 + for(NodeID cluster : graph.nodes()) + { // unused cluster IDs have volume 0 sumOfSquaredClusterVolumes += cluster_volumes[cluster] * cluster_volumes[cluster]; } return std::make_pair(intraClusterWeights, sumOfSquaredClusterVolumes); } -template -void ParallelLocalMovingModularity::initializeClusterVolumes(const Graph& graph, ds::Clustering& communities) { +template +void ParallelLocalMovingModularity::initializeClusterVolumes( + const Graph &graph, ds::Clustering &communities) +{ _reciprocal_total_volume = 1.0 / graph.totalVolume(); - _vol_multiplier_div_by_node_vol = _reciprocal_total_volume; + _vol_multiplier_div_by_node_vol = _reciprocal_total_volume; tbb::parallel_for(0U, static_cast(graph.numNodes()), [&](const NodeID u) { const PartitionID community_id = communities[u]; _cluster_volumes[community_id] += graph.nodeVolume(u); }); } -template -ParallelLocalMovingModularity::~ParallelLocalMovingModularity() { -/* - tbb::parallel_invoke([&] { - parallel::parallel_free_thread_local_internal_data( - _local_small_incident_cluster_weight, [&](CacheEfficientIncidentClusterWeights& data) { - data.freeInternalData(); - }); - }, [&] { - parallel::parallel_free_thread_local_internal_data( - _local_large_incident_cluster_weight, [&](LargeIncidentClusterWeights& data) { - data.freeInternalData(); - }); - }, [&] { - parallel::free(_cluster_volumes); - }); -*/ +template +ParallelLocalMovingModularity::~ParallelLocalMovingModularity() +{ + /* + tbb::parallel_invoke([&] { + parallel::parallel_free_thread_local_internal_data( + _local_small_incident_cluster_weight, + [&](CacheEfficientIncidentClusterWeights& data) { data.freeInternalData(); + }); + }, [&] { + parallel::parallel_free_thread_local_internal_data( + _local_large_incident_cluster_weight, [&](LargeIncidentClusterWeights& data) + { data.freeInternalData(); + }); + }, [&] { + parallel::free(_cluster_volumes); + }); + */ } INSTANTIATE_CLASS_WITH_HYPERGRAPHS(ParallelLocalMovingModularity) diff --git a/mt-kahypar/partition/preprocessing/community_detection/local_moving_modularity.h b/mt-kahypar/partition/preprocessing/community_detection/local_moving_modularity.h index 6a05c3f14..63f503f38 100644 --- a/mt-kahypar/partition/preprocessing/community_detection/local_moving_modularity.h +++ b/mt-kahypar/partition/preprocessing/community_detection/local_moving_modularity.h @@ -25,102 +25,111 @@ * SOFTWARE. ******************************************************************************/ - #pragma once - -#include "mt-kahypar/datastructures/sparse_map.h" #include "mt-kahypar/datastructures/buffered_vector.h" +#include "mt-kahypar/datastructures/sparse_map.h" -#include "mt-kahypar/macros.h" #include "mt-kahypar/datastructures/graph.h" -#include "mt-kahypar/partition/context.h" +#include "mt-kahypar/macros.h" #include "mt-kahypar/parallel/atomic_wrapper.h" +#include "mt-kahypar/partition/context.h" #include "mt-kahypar/utils/randomize.h" #include "mt-kahypar/utils/reproducible_random.h" - #include "gtest/gtest_prod.h" namespace mt_kahypar::metrics { - template - double modularity(const Graph& graph, const ds::Clustering& communities); +template +double modularity(const Graph &graph, const ds::Clustering &communities); } namespace mt_kahypar::community_detection { -template -class ParallelLocalMovingModularity { - private: +template +class ParallelLocalMovingModularity +{ +private: using LargeIncidentClusterWeights = ds::FixedSizeSparseMap; - using CacheEfficientIncidentClusterWeights = ds::FixedSizeSparseMap; + using CacheEfficientIncidentClusterWeights = + ds::FixedSizeSparseMap; - public: +public: static constexpr bool debug = false; static constexpr bool enable_heavy_assert = false; - ParallelLocalMovingModularity(const Context& context, - size_t numNodes, + ParallelLocalMovingModularity(const Context &context, size_t numNodes, const bool disable_randomization = false) : - _context(context), - _max_degree(numNodes), - _vertex_degree_sampling_threshold(context.preprocessing.community_detection.vertex_degree_sampling_threshold), - _cluster_volumes(numNodes), - non_sampling_incident_cluster_weights(numNodes), - _disable_randomization(disable_randomization), - prng(context.partition.seed), - volume_updates_to(0), - volume_updates_from(0) { } + _context(context), + _max_degree(numNodes), + _vertex_degree_sampling_threshold( + context.preprocessing.community_detection.vertex_degree_sampling_threshold), + _cluster_volumes(numNodes), non_sampling_incident_cluster_weights(numNodes), + _disable_randomization(disable_randomization), prng(context.partition.seed), + volume_updates_to(0), volume_updates_from(0) + { + } ~ParallelLocalMovingModularity(); - bool localMoving(Graph& graph, ds::Clustering& communities); + bool localMoving(Graph &graph, ds::Clustering &communities); - private: - size_t parallelNonDeterministicRound(const Graph& graph, ds::Clustering& communities); - size_t synchronousParallelRound(const Graph& graph, ds::Clustering& communities); - size_t sequentialRound(const Graph& graph, ds::Clustering& communities); +private: + size_t parallelNonDeterministicRound(const Graph &graph, + ds::Clustering &communities); + size_t synchronousParallelRound(const Graph &graph, + ds::Clustering &communities); + size_t sequentialRound(const Graph &graph, ds::Clustering &communities); - struct ClearList { + struct ClearList + { vec weights; vec used; - ClearList(size_t n) : weights(n) { } + ClearList(size_t n) : weights(n) {} }; - - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE bool ratingsFitIntoSmallSparseMap(const Graph& graph, - const HypernodeID u) { - static constexpr size_t cache_efficient_map_size = CacheEfficientIncidentClusterWeights::MAP_SIZE / 3UL; - return std::min(_vertex_degree_sampling_threshold, _max_degree) > cache_efficient_map_size && + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE bool + ratingsFitIntoSmallSparseMap(const Graph &graph, const HypernodeID u) + { + static constexpr size_t cache_efficient_map_size = + CacheEfficientIncidentClusterWeights::MAP_SIZE / 3UL; + return std::min(_vertex_degree_sampling_threshold, _max_degree) > + cache_efficient_map_size && graph.degree(u) <= cache_efficient_map_size; } - LargeIncidentClusterWeights construct_large_incident_cluster_weight_map() { - return LargeIncidentClusterWeights(3UL * std::min(_max_degree, _vertex_degree_sampling_threshold), 0); + LargeIncidentClusterWeights construct_large_incident_cluster_weight_map() + { + return LargeIncidentClusterWeights( + 3UL * std::min(_max_degree, _vertex_degree_sampling_threshold), 0); } // ! Only for testing - void initializeClusterVolumes(const Graph& graph, ds::Clustering& communities); - - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE PartitionID computeMaxGainCluster(const Graph& graph, - const ds::Clustering& communities, - const NodeID u) { - return computeMaxGainCluster(graph, communities, u, non_sampling_incident_cluster_weights.local()); + void initializeClusterVolumes(const Graph &graph, + ds::Clustering &communities); + + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE PartitionID computeMaxGainCluster( + const Graph &graph, const ds::Clustering &communities, const NodeID u) + { + return computeMaxGainCluster(graph, communities, u, + non_sampling_incident_cluster_weights.local()); } - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE PartitionID computeMaxGainCluster(const Graph& graph, - const ds::Clustering& communities, - const NodeID u, - ClearList& incident_cluster_weights) { + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE PartitionID + computeMaxGainCluster(const Graph &graph, const ds::Clustering &communities, + const NodeID u, ClearList &incident_cluster_weights) + { const PartitionID from = communities[u]; PartitionID bestCluster = communities[u]; - auto& weights = incident_cluster_weights.weights; - auto& used = incident_cluster_weights.used; + auto &weights = incident_cluster_weights.weights; + auto &used = incident_cluster_weights.used; - for (const Arc& arc : graph.arcsOf(u, _vertex_degree_sampling_threshold)) { + for(const Arc &arc : graph.arcsOf(u, _vertex_degree_sampling_threshold)) + { const auto cv = communities[arc.head]; - if (weights[cv] == 0.0) used.push_back(cv); + if(weights[cv] == 0.0) + used.push_back(cv); weights[cv] += arc.weight; } @@ -131,12 +140,17 @@ class ParallelLocalMovingModularity { const double volMultiplier = _vol_multiplier_div_by_node_vol * volU; double bestGain = weight_from - volMultiplier * (volume_from - volU); double best_weight_to = weight_from; - for (const auto to : used) { + for(const auto to : used) + { // if from == to, we would have to remove volU from volume_to as well. // just skip it. it has (adjusted) gain zero. - if (from != to) { - double gain = modularityGain(weights[to], _cluster_volumes[to].load(std::memory_order_relaxed), volMultiplier); - if (gain > bestGain) { + if(from != to) + { + double gain = modularityGain(weights[to], + _cluster_volumes[to].load(std::memory_order_relaxed), + volMultiplier); + if(gain > bestGain) + { bestCluster = to; bestGain = gain; best_weight_to = weights[to]; @@ -149,57 +163,59 @@ class ParallelLocalMovingModularity { // changing communities and volumes in parallel causes non-determinism in debug mode unused(best_weight_to); - HEAVY_PREPROCESSING_ASSERT(verifyGain(graph, communities, u, bestCluster, bestGain, weight_from, best_weight_to)); + HEAVY_PREPROCESSING_ASSERT(verifyGain(graph, communities, u, bestCluster, bestGain, + weight_from, best_weight_to)); return bestCluster; } - - inline double modularityGain(const ArcWeight weight_to, - const ArcWeight volume_to, - const double multiplier) { + inline double modularityGain(const ArcWeight weight_to, const ArcWeight volume_to, + const double multiplier) + { return weight_to - multiplier * volume_to; // missing term is - weight_from + multiplier * (volume_from - volume_node) } - inline long double adjustAdvancedModGain(double gain, - const ArcWeight weight_from, + inline long double adjustAdvancedModGain(double gain, const ArcWeight weight_from, const ArcWeight volume_from, - const ArcWeight volume_node) const { + const ArcWeight volume_node) const + { return 2.0L * _reciprocal_total_volume * - (gain - weight_from + _reciprocal_total_volume * - volume_node * (volume_from - volume_node)); + (gain - weight_from + + _reciprocal_total_volume * volume_node * (volume_from - volume_node)); } + bool verifyGain(const Graph &graph, const ds::Clustering &communities, + NodeID u, PartitionID to, double gain, double weight_from, + double weight_to); - bool verifyGain(const Graph& graph, const ds::Clustering& communities, NodeID u, PartitionID to, double gain, - double weight_from, double weight_to); - - static std::pair intraClusterWeightsAndSumOfSquaredClusterVolumes(const Graph& graph, const ds::Clustering& communities); + static std::pair + intraClusterWeightsAndSumOfSquaredClusterVolumes(const Graph &graph, + const ds::Clustering &communities); - const Context& _context; + const Context &_context; size_t _max_degree; const size_t _vertex_degree_sampling_threshold; double _reciprocal_total_volume = 0.0; double _vol_multiplier_div_by_node_vol = 0.0; - vec> _cluster_volumes; + vec > _cluster_volumes; tbb::enumerable_thread_specific non_sampling_incident_cluster_weights; const bool _disable_randomization; utils::ParallelPermutation permutation; std::mt19937 prng; - struct ClusterMove { + struct ClusterMove + { PartitionID cluster; NodeID node; - bool operator< (const ClusterMove& o) const { + bool operator<(const ClusterMove &o) const + { return std::tie(cluster, node) < std::tie(o.cluster, o.node); } }; ds::BufferedVector volume_updates_to, volume_updates_from; - - FRIEND_TEST(ALouvain, ComputesMaxGainMove1); FRIEND_TEST(ALouvain, ComputesMaxGainMove2); FRIEND_TEST(ALouvain, ComputesMaxGainMove3); diff --git a/mt-kahypar/partition/preprocessing/community_detection/parallel_louvain.cpp b/mt-kahypar/partition/preprocessing/community_detection/parallel_louvain.cpp index cafd63cdb..bd1674684 100644 --- a/mt-kahypar/partition/preprocessing/community_detection/parallel_louvain.cpp +++ b/mt-kahypar/partition/preprocessing/community_detection/parallel_louvain.cpp @@ -25,7 +25,6 @@ * SOFTWARE. ******************************************************************************/ - #include "parallel_louvain.h" #include "mt-kahypar/definitions.h" @@ -33,52 +32,61 @@ namespace mt_kahypar::community_detection { - template - ds::Clustering local_moving_contract_recurse(Graph& fine_graph, - ParallelLocalMovingModularity& mlv, - const Context& context) { - utils::Timer& timer = utils::Utilities::instance().getTimer(context.utility_id); - timer.start_timer("local_moving", "Local Moving"); - ds::Clustering communities(fine_graph.numNodes()); - bool communities_changed = mlv.localMoving(fine_graph, communities); - timer.stop_timer("local_moving"); - - if (communities_changed) { - timer.start_timer("contraction_cd", "Contraction"); - // Contract Communities - Graph coarse_graph = fine_graph.contract(communities, context.preprocessing.community_detection.low_memory_contraction); - ASSERT(coarse_graph.totalVolume() == fine_graph.totalVolume()); - timer.stop_timer("contraction_cd"); +template +ds::Clustering +local_moving_contract_recurse(Graph &fine_graph, + ParallelLocalMovingModularity &mlv, + const Context &context) +{ + utils::Timer &timer = utils::Utilities::instance().getTimer(context.utility_id); + timer.start_timer("local_moving", "Local Moving"); + ds::Clustering communities(fine_graph.numNodes()); + bool communities_changed = mlv.localMoving(fine_graph, communities); + timer.stop_timer("local_moving"); - // Recurse on contracted graph - ds::Clustering coarse_communities = local_moving_contract_recurse(coarse_graph, mlv, context); + if(communities_changed) + { + timer.start_timer("contraction_cd", "Contraction"); + // Contract Communities + Graph coarse_graph = fine_graph.contract( + communities, context.preprocessing.community_detection.low_memory_contraction); + ASSERT(coarse_graph.totalVolume() == fine_graph.totalVolume()); + timer.stop_timer("contraction_cd"); - timer.start_timer("project", "Project"); - // Prolong Clustering - tbb::parallel_for(UL(0), fine_graph.numNodes(), [&](const NodeID u) { - ASSERT(communities[u] < static_cast(coarse_communities.size())); - communities[u] = coarse_communities[communities[u]]; - }); - timer.stop_timer("project"); - } + // Recurse on contracted graph + ds::Clustering coarse_communities = + local_moving_contract_recurse(coarse_graph, mlv, context); - return communities; + timer.start_timer("project", "Project"); + // Prolong Clustering + tbb::parallel_for(UL(0), fine_graph.numNodes(), [&](const NodeID u) { + ASSERT(communities[u] < static_cast(coarse_communities.size())); + communities[u] = coarse_communities[communities[u]]; + }); + timer.stop_timer("project"); } - template - ds::Clustering run_parallel_louvain(Graph& graph, - const Context& context, - bool disable_randomization) { - ParallelLocalMovingModularity mlv(context, graph.numNodes(), disable_randomization); - ds::Clustering communities = local_moving_contract_recurse(graph, mlv, context); - return communities; - } + return communities; +} - namespace { - #define LOCAL_MOVING(X) ds::Clustering local_moving_contract_recurse(Graph&, ParallelLocalMovingModularity&, const Context&) - #define PARALLEL_LOUVAIN(X) ds::Clustering run_parallel_louvain(Graph&, const Context&, bool) - } +template +ds::Clustering run_parallel_louvain(Graph &graph, const Context &context, + bool disable_randomization) +{ + ParallelLocalMovingModularity mlv(context, graph.numNodes(), + disable_randomization); + ds::Clustering communities = local_moving_contract_recurse(graph, mlv, context); + return communities; +} + +namespace { +#define LOCAL_MOVING(X) \ + ds::Clustering local_moving_contract_recurse( \ + Graph &, ParallelLocalMovingModularity &, const Context &) +#define PARALLEL_LOUVAIN(X) \ + ds::Clustering run_parallel_louvain(Graph &, const Context &, bool) +} - INSTANTIATE_FUNC_WITH_HYPERGRAPHS(LOCAL_MOVING) - INSTANTIATE_FUNC_WITH_HYPERGRAPHS(PARALLEL_LOUVAIN) +INSTANTIATE_FUNC_WITH_HYPERGRAPHS(LOCAL_MOVING) +INSTANTIATE_FUNC_WITH_HYPERGRAPHS(PARALLEL_LOUVAIN) } \ No newline at end of file diff --git a/mt-kahypar/partition/preprocessing/community_detection/parallel_louvain.h b/mt-kahypar/partition/preprocessing/community_detection/parallel_louvain.h index 783f91810..fcf0a8992 100644 --- a/mt-kahypar/partition/preprocessing/community_detection/parallel_louvain.h +++ b/mt-kahypar/partition/preprocessing/community_detection/parallel_louvain.h @@ -25,18 +25,17 @@ * SOFTWARE. ******************************************************************************/ - #pragma once #include "mt-kahypar/partition/preprocessing/community_detection/local_moving_modularity.h" namespace mt_kahypar::community_detection { - template - ds::Clustering local_moving_contract_recurse(Graph& fine_graph, - ParallelLocalMovingModularity& mlv, - const Context& context); - template - ds::Clustering run_parallel_louvain(Graph& graph, - const Context& context, - bool disable_randomization = false); +template +ds::Clustering +local_moving_contract_recurse(Graph &fine_graph, + ParallelLocalMovingModularity &mlv, + const Context &context); +template +ds::Clustering run_parallel_louvain(Graph &graph, const Context &context, + bool disable_randomization = false); } diff --git a/mt-kahypar/partition/preprocessing/sparsification/degree_zero_hn_remover.h b/mt-kahypar/partition/preprocessing/sparsification/degree_zero_hn_remover.h index 44655ea88..3709647f0 100644 --- a/mt-kahypar/partition/preprocessing/sparsification/degree_zero_hn_remover.h +++ b/mt-kahypar/partition/preprocessing/sparsification/degree_zero_hn_remover.h @@ -24,43 +24,48 @@ * SOFTWARE. ******************************************************************************/ - #pragma once #include "tbb/parallel_sort.h" -#include "mt-kahypar/partition/context.h" #include "mt-kahypar/datastructures/streaming_vector.h" +#include "mt-kahypar/partition/context.h" namespace mt_kahypar { -template -class DegreeZeroHypernodeRemover { +template +class DegreeZeroHypernodeRemover +{ using Hypergraph = typename TypeTraits::Hypergraph; using PartitionedHypergraph = typename TypeTraits::PartitionedHypergraph; - public: - DegreeZeroHypernodeRemover(const Context& context) : - _context(context), - _removed_hns() { } +public: + DegreeZeroHypernodeRemover(const Context &context) : _context(context), _removed_hns() + { + } - DegreeZeroHypernodeRemover(const DegreeZeroHypernodeRemover&) = delete; - DegreeZeroHypernodeRemover & operator= (const DegreeZeroHypernodeRemover &) = delete; + DegreeZeroHypernodeRemover(const DegreeZeroHypernodeRemover &) = delete; + DegreeZeroHypernodeRemover &operator=(const DegreeZeroHypernodeRemover &) = delete; - DegreeZeroHypernodeRemover(DegreeZeroHypernodeRemover&&) = delete; - DegreeZeroHypernodeRemover & operator= (DegreeZeroHypernodeRemover &&) = delete; + DegreeZeroHypernodeRemover(DegreeZeroHypernodeRemover &&) = delete; + DegreeZeroHypernodeRemover &operator=(DegreeZeroHypernodeRemover &&) = delete; // ! Remove all degree zero vertices - HypernodeID removeDegreeZeroHypernodes(Hypergraph& hypergraph) { + HypernodeID removeDegreeZeroHypernodes(Hypergraph &hypergraph) + { const HypernodeID current_num_nodes = - hypergraph.initialNumNodes() - hypergraph.numRemovedHypernodes(); + hypergraph.initialNumNodes() - hypergraph.numRemovedHypernodes(); HypernodeID num_removed_degree_zero_hypernodes = 0; - for ( const HypernodeID& hn : hypergraph.nodes() ) { - if ( current_num_nodes - num_removed_degree_zero_hypernodes <= _context.coarsening.contraction_limit) { + for(const HypernodeID &hn : hypergraph.nodes()) + { + if(current_num_nodes - num_removed_degree_zero_hypernodes <= + _context.coarsening.contraction_limit) + { break; } - if ( hypergraph.nodeDegree(hn) == 0 && !hypergraph.isFixed(hn) ) { + if(hypergraph.nodeDegree(hn) == 0 && !hypergraph.isFixed(hn)) + { hypergraph.removeDegreeZeroHypernode(hn); _removed_hns.push_back(hn); ++num_removed_degree_zero_hypernodes; @@ -70,13 +75,15 @@ class DegreeZeroHypernodeRemover { } // ! Restore degree-zero vertices - void restoreDegreeZeroHypernodes(PartitionedHypergraph& hypergraph) { + void restoreDegreeZeroHypernodes(PartitionedHypergraph &hypergraph) + { // Sort degree-zero vertices in decreasing order of their weight - tbb::parallel_sort(_removed_hns.begin(), _removed_hns.end(), - [&](const HypernodeID& lhs, const HypernodeID& rhs) { - return hypergraph.nodeWeight(lhs) > hypergraph.nodeWeight(rhs) - || (hypergraph.nodeWeight(lhs) == hypergraph.nodeWeight(rhs) && lhs > rhs); - }); + tbb::parallel_sort( + _removed_hns.begin(), _removed_hns.end(), + [&](const HypernodeID &lhs, const HypernodeID &rhs) { + return hypergraph.nodeWeight(lhs) > hypergraph.nodeWeight(rhs) || + (hypergraph.nodeWeight(lhs) == hypergraph.nodeWeight(rhs) && lhs > rhs); + }); // Sort blocks of partition in increasing order of their weight auto distance_to_max = [&](const PartitionID block) { return hypergraph.partWeight(block) - _context.partition.max_part_weights[block]; @@ -84,17 +91,19 @@ class DegreeZeroHypernodeRemover { parallel::scalable_vector blocks(_context.partition.k, 0); std::iota(blocks.begin(), blocks.end(), 0); std::sort(blocks.begin(), blocks.end(), - [&](const PartitionID& lhs, const PartitionID& rhs) { - return distance_to_max(lhs) < distance_to_max(rhs); - }); + [&](const PartitionID &lhs, const PartitionID &rhs) { + return distance_to_max(lhs) < distance_to_max(rhs); + }); // Perform Bin-Packing - for ( const HypernodeID& hn : _removed_hns ) { + for(const HypernodeID &hn : _removed_hns) + { PartitionID to = blocks.front(); hypergraph.restoreDegreeZeroHypernode(hn, to); PartitionID i = 0; - while ( i + 1 < _context.partition.k && - distance_to_max(blocks[i]) > distance_to_max(blocks[i + 1]) ) { + while(i + 1 < _context.partition.k && + distance_to_max(blocks[i]) > distance_to_max(blocks[i + 1])) + { std::swap(blocks[i], blocks[i + 1]); ++i; } @@ -102,9 +111,9 @@ class DegreeZeroHypernodeRemover { _removed_hns.clear(); } - private: - const Context& _context; +private: + const Context &_context; parallel::scalable_vector _removed_hns; }; -} // namespace mt_kahypar +} // namespace mt_kahypar diff --git a/mt-kahypar/partition/preprocessing/sparsification/large_he_remover.h b/mt-kahypar/partition/preprocessing/sparsification/large_he_remover.h index dc56f399c..22b4598e2 100644 --- a/mt-kahypar/partition/preprocessing/sparsification/large_he_remover.h +++ b/mt-kahypar/partition/preprocessing/sparsification/large_he_remover.h @@ -12,8 +12,8 @@ * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. + * The above copyright notice and this permission notice shall be included in + *all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, @@ -26,36 +26,39 @@ #pragma once +#include "mt-kahypar/parallel/stl/scalable_vector.h" #include "mt-kahypar/partition/context.h" #include "mt-kahypar/partition/metrics.h" -#include "mt-kahypar/parallel/stl/scalable_vector.h" namespace mt_kahypar { -template -class LargeHyperedgeRemover { +template +class LargeHyperedgeRemover +{ using Hypergraph = typename TypeTraits::Hypergraph; using PartitionedHypergraph = typename TypeTraits::PartitionedHypergraph; - public: - LargeHyperedgeRemover(const Context& context) : - _context(context), - _removed_hes() { } +public: + LargeHyperedgeRemover(const Context &context) : _context(context), _removed_hes() {} - LargeHyperedgeRemover(const LargeHyperedgeRemover&) = delete; - LargeHyperedgeRemover & operator= (const LargeHyperedgeRemover &) = delete; + LargeHyperedgeRemover(const LargeHyperedgeRemover &) = delete; + LargeHyperedgeRemover &operator=(const LargeHyperedgeRemover &) = delete; - LargeHyperedgeRemover(LargeHyperedgeRemover&&) = delete; - LargeHyperedgeRemover & operator= (LargeHyperedgeRemover &&) = delete; + LargeHyperedgeRemover(LargeHyperedgeRemover &&) = delete; + LargeHyperedgeRemover &operator=(LargeHyperedgeRemover &&) = delete; // ! Removes large hyperedges from the hypergraph // ! Returns the number of removed large hyperedges. - HypernodeID removeLargeHyperedges(Hypergraph& hypergraph) { + HypernodeID removeLargeHyperedges(Hypergraph &hypergraph) + { HypernodeID num_removed_large_hyperedges = 0; - if constexpr ( !Hypergraph::is_graph ) { - for ( const HyperedgeID& he : hypergraph.edges() ) { - if ( hypergraph.edgeSize(he) > largeHyperedgeThreshold() ) { + if constexpr(!Hypergraph::is_graph) + { + for(const HyperedgeID &he : hypergraph.edges()) + { + if(hypergraph.edgeSize(he) > largeHyperedgeThreshold()) + { hypergraph.removeLargeEdge(he); _removed_hes.push_back(he); ++num_removed_large_hyperedges; @@ -70,41 +73,44 @@ class LargeHyperedgeRemover { // ! This causes that all removed hyperedges in the dynamic hypergraph are // ! reinserted to the incident nets of each vertex. By simply calling this // ! function, we remove all large hyperedges again. - void removeLargeHyperedgesInNLevelVCycle(Hypergraph& hypergraph) { - for ( const HyperedgeID& he : _removed_hes ) { + void removeLargeHyperedgesInNLevelVCycle(Hypergraph &hypergraph) + { + for(const HyperedgeID &he : _removed_hes) + { hypergraph.enableHyperedge(he); hypergraph.removeLargeEdge(he); } } // ! Restores all previously removed large hyperedges - void restoreLargeHyperedges(PartitionedHypergraph& hypergraph) { + void restoreLargeHyperedges(PartitionedHypergraph &hypergraph) + { HyperedgeWeight delta = 0; - for ( const HyperedgeID& he : _removed_hes ) { + for(const HyperedgeID &he : _removed_hes) + { hypergraph.restoreLargeEdge(he); delta += metrics::contribution(hypergraph, he, _context.partition.objective); } - if ( _context.partition.verbose_output && delta > 0 ) { + if(_context.partition.verbose_output && delta > 0) + { LOG << RED << "Restoring of" << _removed_hes.size() << "large hyperedges (|e| >" << largeHyperedgeThreshold() << ") increased" << _context.partition.objective << "by" << delta << END; } } - HypernodeID largeHyperedgeThreshold() const { - return std::max( - _context.partition.large_hyperedge_size_threshold, - _context.partition.smallest_large_he_size_threshold); + HypernodeID largeHyperedgeThreshold() const + { + return std::max(_context.partition.large_hyperedge_size_threshold, + _context.partition.smallest_large_he_size_threshold); } - void reset() { - _removed_hes.clear(); - } + void reset() { _removed_hes.clear(); } - private: - const Context& _context; +private: + const Context &_context; parallel::scalable_vector _removed_hes; }; -} // namespace mt_kahypar +} // namespace mt_kahypar diff --git a/mt-kahypar/partition/recursive_bipartitioning.cpp b/mt-kahypar/partition/recursive_bipartitioning.cpp index 82fae1cfb..2a3568b38 100644 --- a/mt-kahypar/partition/recursive_bipartitioning.cpp +++ b/mt-kahypar/partition/recursive_bipartitioning.cpp @@ -32,10 +32,10 @@ #include #include +#include "mt-kahypar/datastructures/fixed_vertex_support.h" #include "mt-kahypar/definitions.h" #include "mt-kahypar/macros.h" #include "mt-kahypar/partition/multilevel.h" -#include "mt-kahypar/datastructures/fixed_vertex_support.h" #include "mt-kahypar/partition/refinement/gains/bipartitioning_policy.h" #ifdef KAHYPAR_ENABLE_STEINER_TREE_METRIC #include "mt-kahypar/partition/mapping/initial_mapping.h" @@ -43,30 +43,36 @@ #include "mt-kahypar/io/partitioning_output.h" #include "mt-kahypar/parallel/memory_pool.h" #include "mt-kahypar/utils/randomize.h" -#include "mt-kahypar/utils/utilities.h" #include "mt-kahypar/utils/timer.h" +#include "mt-kahypar/utils/utilities.h" #include "mt-kahypar/partition/metrics.h" namespace mt_kahypar { +struct OriginalHypergraphInfo +{ -struct OriginalHypergraphInfo { - - // The initial allowed imbalance cannot be used for each bipartition as this could result in an - // imbalanced k-way partition when performing recursive bipartitioning. We therefore adaptively - // adjust the allowed imbalance for each bipartition individually based on the adaptive imbalance - // definition described in our papers. + // The initial allowed imbalance cannot be used for each bipartition as this could + // result in an imbalanced k-way partition when performing recursive bipartitioning. We + // therefore adaptively adjust the allowed imbalance for each bipartition individually + // based on the adaptive imbalance definition described in our papers. double computeAdaptiveEpsilon(const HypernodeWeight current_hypergraph_weight, - const PartitionID current_k) const { - if ( current_hypergraph_weight == 0 ) { + const PartitionID current_k) const + { + if(current_hypergraph_weight == 0) + { return 0.0; - } else { - double base = ceil(static_cast(original_hypergraph_weight) / original_k) - / ceil(static_cast(current_hypergraph_weight) / current_k) - * (1.0 + original_epsilon); - double adaptive_epsilon = std::min(0.99, std::max(std::pow(base, 1.0 / - ceil(log2(static_cast(current_k)))) - 1.0,0.0)); + } + else + { + double base = ceil(static_cast(original_hypergraph_weight) / original_k) / + ceil(static_cast(current_hypergraph_weight) / current_k) * + (1.0 + original_epsilon); + double adaptive_epsilon = std::min( + 0.99, + std::max(std::pow(base, 1.0 / ceil(log2(static_cast(current_k)))) - 1.0, + 0.0)); return adaptive_epsilon; } } @@ -78,370 +84,438 @@ struct OriginalHypergraphInfo { namespace rb { - static constexpr bool debug = false; - - // Sets the appropriate parameters for the multilevel bipartitioning call - template - Context setupBipartitioningContext(const Hypergraph& hypergraph, - const Context& context, - const OriginalHypergraphInfo& info) { - Context b_context(context); - - b_context.partition.k = 2; - b_context.partition.objective = Objective::cut; - b_context.partition.gain_policy = Hypergraph::is_graph ? - GainPolicy::cut_for_graphs : GainPolicy::cut; - b_context.partition.verbose_output = false; - b_context.initial_partitioning.mode = Mode::direct; - if (context.partition.mode == Mode::direct) { - b_context.type = ContextType::initial_partitioning; - } - - // Setup Part Weights - const HypernodeWeight total_weight = hypergraph.totalWeight(); - const PartitionID k = context.partition.k; - const PartitionID k0 = k / 2 + (k % 2 != 0 ? 1 : 0); - const PartitionID k1 = k / 2; - ASSERT(k0 + k1 == context.partition.k); - if ( context.partition.use_individual_part_weights ) { - const HypernodeWeight max_part_weights_sum = std::accumulate(context.partition.max_part_weights.cbegin(), - context.partition.max_part_weights.cend(), 0); - const double weight_fraction = total_weight / static_cast(max_part_weights_sum); - ASSERT(weight_fraction <= 1.0); - b_context.partition.perfect_balance_part_weights.clear(); - b_context.partition.max_part_weights.clear(); - HypernodeWeight perfect_weight_p0 = 0; - for ( PartitionID i = 0; i < k0; ++i ) { - perfect_weight_p0 += ceil(weight_fraction * context.partition.max_part_weights[i]); - } - HypernodeWeight perfect_weight_p1 = 0; - for ( PartitionID i = k0; i < k; ++i ) { - perfect_weight_p1 += ceil(weight_fraction * context.partition.max_part_weights[i]); - } - // In the case of individual part weights, the usual adaptive epsilon formula is not applicable because it - // assumes equal part weights. However, by observing that ceil(current_weight / current_k) is the current - // perfect part weight and (1 + epsilon)ceil(original_weight / original_k) is the maximum part weight, - // we can derive an equivalent formula using the sum of the perfect part weights and the sum of the - // maximum part weights. - // Note that the sum of the perfect part weights might be unequal to the hypergraph weight due to rounding. - // Thus, we need to use the former instead of using the hypergraph weight directly, as otherwise it could - // happen that (1 + epsilon)perfect_part_weight > max_part_weight because of rounding issues. - const double base = max_part_weights_sum / static_cast(perfect_weight_p0 + perfect_weight_p1); - b_context.partition.epsilon = total_weight == 0 ? 0 : std::min(0.99, std::max(std::pow(base, 1.0 / - ceil(log2(static_cast(k)))) - 1.0,0.0)); - b_context.partition.perfect_balance_part_weights.push_back(perfect_weight_p0); - b_context.partition.perfect_balance_part_weights.push_back(perfect_weight_p1); - b_context.partition.max_part_weights.push_back( - round((1 + b_context.partition.epsilon) * perfect_weight_p0)); - b_context.partition.max_part_weights.push_back( - round((1 + b_context.partition.epsilon) * perfect_weight_p1)); - } else { - b_context.partition.epsilon = info.computeAdaptiveEpsilon(total_weight, k); - - b_context.partition.perfect_balance_part_weights.clear(); - b_context.partition.max_part_weights.clear(); - b_context.partition.perfect_balance_part_weights.push_back( - std::ceil(k0 / static_cast(k) * static_cast(total_weight))); - b_context.partition.perfect_balance_part_weights.push_back( - std::ceil(k1 / static_cast(k) * static_cast(total_weight))); - b_context.partition.max_part_weights.push_back( - (1 + b_context.partition.epsilon) * b_context.partition.perfect_balance_part_weights[0]); - b_context.partition.max_part_weights.push_back( - (1 + b_context.partition.epsilon) * b_context.partition.perfect_balance_part_weights[1]); - } - b_context.setupContractionLimit(total_weight); - b_context.setupThreadsPerFlowSearch(); - - return b_context; +static constexpr bool debug = false; + +// Sets the appropriate parameters for the multilevel bipartitioning call +template +Context setupBipartitioningContext(const Hypergraph &hypergraph, const Context &context, + const OriginalHypergraphInfo &info) +{ + Context b_context(context); + + b_context.partition.k = 2; + b_context.partition.objective = Objective::cut; + b_context.partition.gain_policy = + Hypergraph::is_graph ? GainPolicy::cut_for_graphs : GainPolicy::cut; + b_context.partition.verbose_output = false; + b_context.initial_partitioning.mode = Mode::direct; + if(context.partition.mode == Mode::direct) + { + b_context.type = ContextType::initial_partitioning; } - // Sets the appropriate parameters for the recursive bipartitioning call - Context setupRecursiveBipartitioningContext(const Context& context, - const PartitionID k0, const PartitionID k1, - const double degree_of_parallelism) { - ASSERT((k1 - k0) >= 2); - Context rb_context(context); - rb_context.partition.k = k1 - k0; - if (context.partition.mode == Mode::direct) { - rb_context.type = ContextType::initial_partitioning; + // Setup Part Weights + const HypernodeWeight total_weight = hypergraph.totalWeight(); + const PartitionID k = context.partition.k; + const PartitionID k0 = k / 2 + (k % 2 != 0 ? 1 : 0); + const PartitionID k1 = k / 2; + ASSERT(k0 + k1 == context.partition.k); + if(context.partition.use_individual_part_weights) + { + const HypernodeWeight max_part_weights_sum = + std::accumulate(context.partition.max_part_weights.cbegin(), + context.partition.max_part_weights.cend(), 0); + const double weight_fraction = + total_weight / static_cast(max_part_weights_sum); + ASSERT(weight_fraction <= 1.0); + b_context.partition.perfect_balance_part_weights.clear(); + b_context.partition.max_part_weights.clear(); + HypernodeWeight perfect_weight_p0 = 0; + for(PartitionID i = 0; i < k0; ++i) + { + perfect_weight_p0 += ceil(weight_fraction * context.partition.max_part_weights[i]); } - - rb_context.partition.perfect_balance_part_weights.assign(rb_context.partition.k, 0); - rb_context.partition.max_part_weights.assign(rb_context.partition.k, 0); - for ( PartitionID part_id = k0; part_id < k1; ++part_id ) { - rb_context.partition.perfect_balance_part_weights[part_id - k0] = - context.partition.perfect_balance_part_weights[part_id]; - rb_context.partition.max_part_weights[part_id - k0] = - context.partition.max_part_weights[part_id]; + HypernodeWeight perfect_weight_p1 = 0; + for(PartitionID i = k0; i < k; ++i) + { + perfect_weight_p1 += ceil(weight_fraction * context.partition.max_part_weights[i]); } + // In the case of individual part weights, the usual adaptive epsilon formula is not + // applicable because it assumes equal part weights. However, by observing that + // ceil(current_weight / current_k) is the current perfect part weight and (1 + + // epsilon)ceil(original_weight / original_k) is the maximum part weight, we can + // derive an equivalent formula using the sum of the perfect part weights and the sum + // of the maximum part weights. Note that the sum of the perfect part weights might be + // unequal to the hypergraph weight due to rounding. Thus, we need to use the former + // instead of using the hypergraph weight directly, as otherwise it could happen that + // (1 + epsilon)perfect_part_weight > max_part_weight because of rounding issues. + const double base = + max_part_weights_sum / static_cast(perfect_weight_p0 + perfect_weight_p1); + b_context.partition.epsilon = + total_weight == 0 ? + 0 : + std::min( + 0.99, + std::max(std::pow(base, 1.0 / ceil(log2(static_cast(k)))) - 1.0, + 0.0)); + b_context.partition.perfect_balance_part_weights.push_back(perfect_weight_p0); + b_context.partition.perfect_balance_part_weights.push_back(perfect_weight_p1); + b_context.partition.max_part_weights.push_back( + round((1 + b_context.partition.epsilon) * perfect_weight_p0)); + b_context.partition.max_part_weights.push_back( + round((1 + b_context.partition.epsilon) * perfect_weight_p1)); + } + else + { + b_context.partition.epsilon = info.computeAdaptiveEpsilon(total_weight, k); + + b_context.partition.perfect_balance_part_weights.clear(); + b_context.partition.max_part_weights.clear(); + b_context.partition.perfect_balance_part_weights.push_back( + std::ceil(k0 / static_cast(k) * static_cast(total_weight))); + b_context.partition.perfect_balance_part_weights.push_back( + std::ceil(k1 / static_cast(k) * static_cast(total_weight))); + b_context.partition.max_part_weights.push_back( + (1 + b_context.partition.epsilon) * + b_context.partition.perfect_balance_part_weights[0]); + b_context.partition.max_part_weights.push_back( + (1 + b_context.partition.epsilon) * + b_context.partition.perfect_balance_part_weights[1]); + } + b_context.setupContractionLimit(total_weight); + b_context.setupThreadsPerFlowSearch(); - rb_context.shared_memory.degree_of_parallelism *= degree_of_parallelism; + return b_context; +} - return rb_context; +// Sets the appropriate parameters for the recursive bipartitioning call +Context setupRecursiveBipartitioningContext(const Context &context, const PartitionID k0, + const PartitionID k1, + const double degree_of_parallelism) +{ + ASSERT((k1 - k0) >= 2); + Context rb_context(context); + rb_context.partition.k = k1 - k0; + if(context.partition.mode == Mode::direct) + { + rb_context.type = ContextType::initial_partitioning; } - template - void setupFixedVerticesForBipartitioning(Hypergraph& hg, - const PartitionID k) { - if ( hg.hasFixedVertices() ) { - const PartitionID m = k / 2 + (k % 2); - ds::FixedVertexSupport fixed_vertices(hg.initialNumNodes(), 2); - fixed_vertices.setHypergraph(&hg); - hg.doParallelForAllNodes([&](const HypernodeID& hn) { - if ( hg.isFixed(hn) ) { - if ( hg.fixedVertexBlock(hn) < m ) { - fixed_vertices.fixToBlock(hn, 0); - } else { - fixed_vertices.fixToBlock(hn, 1); - } - } - }); - hg.addFixedVertexSupport(std::move(fixed_vertices)); - } + rb_context.partition.perfect_balance_part_weights.assign(rb_context.partition.k, 0); + rb_context.partition.max_part_weights.assign(rb_context.partition.k, 0); + for(PartitionID part_id = k0; part_id < k1; ++part_id) + { + rb_context.partition.perfect_balance_part_weights[part_id - k0] = + context.partition.perfect_balance_part_weights[part_id]; + rb_context.partition.max_part_weights[part_id - k0] = + context.partition.max_part_weights[part_id]; } - template - void setupFixedVerticesForRecursion(const Hypergraph& input_hg, - Hypergraph& extracted_hg, - const vec& input2extracted, - const PartitionID k0, - const PartitionID k1) { - if ( input_hg.hasFixedVertices() ) { - ds::FixedVertexSupport fixed_vertices( - extracted_hg.initialNumNodes(), k1 - k0); - fixed_vertices.setHypergraph(&extracted_hg); - input_hg.doParallelForAllNodes([&](const HypernodeID& hn) { - if ( input_hg.isFixed(hn) ) { - const PartitionID block = input_hg.fixedVertexBlock(hn); - if ( block >= k0 && block < k1 ) { - fixed_vertices.fixToBlock(input2extracted[hn], block - k0); - } - } - }); - extracted_hg.addFixedVertexSupport(std::move(fixed_vertices)); - } - } + rb_context.shared_memory.degree_of_parallelism *= degree_of_parallelism; - bool usesAdaptiveWeightOfNonCutEdges(const Context& context) { - return BipartitioningPolicy::nonCutEdgeMultiplier(context.partition.gain_policy) != 1; - } + return rb_context; +} - template - void adaptWeightsOfNonCutEdges(Hypergraph& hg, - const vec& already_cut, - const GainPolicy gain_policy, - const bool revert) { - const HyperedgeWeight multiplier = BipartitioningPolicy::nonCutEdgeMultiplier(gain_policy); - if ( multiplier != 1 ) { - ASSERT(static_cast(hg.initialNumEdges()) == already_cut.size()); - hg.doParallelForAllEdges([&](const HyperedgeID& he) { - if ( !already_cut[he] ) { - hg.setEdgeWeight(he, static_cast(( revert ? 1.0 / multiplier : - static_cast(multiplier) ) * hg.edgeWeight(he))); +template +void setupFixedVerticesForBipartitioning(Hypergraph &hg, const PartitionID k) +{ + if(hg.hasFixedVertices()) + { + const PartitionID m = k / 2 + (k % 2); + ds::FixedVertexSupport fixed_vertices(hg.initialNumNodes(), 2); + fixed_vertices.setHypergraph(&hg); + hg.doParallelForAllNodes([&](const HypernodeID &hn) { + if(hg.isFixed(hn)) + { + if(hg.fixedVertexBlock(hn) < m) + { + fixed_vertices.fixToBlock(hn, 0); } - }); - } + else + { + fixed_vertices.fixToBlock(hn, 1); + } + } + }); + hg.addFixedVertexSupport(std::move(fixed_vertices)); } +} - // Takes a hypergraph partitioned into two blocks as input and then recursively - // partitions one block into (k1 - b0) blocks - template - void recursively_bipartition_block(typename TypeTraits::PartitionedHypergraph& phg, - const Context& context, - const PartitionID block, const PartitionID k0, const PartitionID k1, - const OriginalHypergraphInfo& info, - const vec& already_cut, - const double degree_of_parallism); - - // Uses multilevel recursive bipartitioning to partition the given hypergraph into (k1 - k0) blocks - template - void recursive_bipartitioning(typename TypeTraits::PartitionedHypergraph& phg, - const Context& context, - const PartitionID k0, const PartitionID k1, - const OriginalHypergraphInfo& info, - vec& already_cut) { - using Hypergraph = typename TypeTraits::Hypergraph; - using PartitionedHypergraph = typename TypeTraits::PartitionedHypergraph; - if ( phg.initialNumNodes() > 0 ) { - // Multilevel Bipartitioning - const PartitionID k = (k1 - k0); - Hypergraph& hg = phg.hypergraph(); - ds::FixedVertexSupport fixed_vertices = hg.copyOfFixedVertexSupport(); - Context b_context = setupBipartitioningContext(hg, context, info); - setupFixedVerticesForBipartitioning(hg, k); - adaptWeightsOfNonCutEdges(hg, already_cut, context.partition.gain_policy, false); - DBG << "Multilevel Bipartitioning - Range = (" << k0 << "," << k1 << "), Epsilon =" << b_context.partition.epsilon; - PartitionedHypergraph bipartitioned_hg = Multilevel::partition(hg, b_context); - DBG << "Bipartitioning Result -" - << "Objective =" << metrics::quality(bipartitioned_hg, b_context) - << "Imbalance =" << metrics::imbalance(bipartitioned_hg, b_context) - << "(Target Imbalance =" << b_context.partition.epsilon << ")"; - adaptWeightsOfNonCutEdges(hg, already_cut, context.partition.gain_policy, true); - hg.addFixedVertexSupport(std::move(fixed_vertices)); - - // Apply bipartition to the input hypergraph - const PartitionID block_0 = 0; - const PartitionID block_1 = k / 2 + (k % 2); - phg.doParallelForAllNodes([&](const HypernodeID& hn) { - PartitionID part_id = bipartitioned_hg.partID(hn); - ASSERT(part_id != kInvalidPartition && part_id < phg.k()); - ASSERT(phg.partID(hn) == kInvalidPartition); - if ( part_id == 0 ) { - phg.setOnlyNodePart(hn, block_0); - } else { - phg.setOnlyNodePart(hn, block_1); +template +void setupFixedVerticesForRecursion(const Hypergraph &input_hg, Hypergraph &extracted_hg, + const vec &input2extracted, + const PartitionID k0, const PartitionID k1) +{ + if(input_hg.hasFixedVertices()) + { + ds::FixedVertexSupport fixed_vertices(extracted_hg.initialNumNodes(), + k1 - k0); + fixed_vertices.setHypergraph(&extracted_hg); + input_hg.doParallelForAllNodes([&](const HypernodeID &hn) { + if(input_hg.isFixed(hn)) + { + const PartitionID block = input_hg.fixedVertexBlock(hn); + if(block >= k0 && block < k1) + { + fixed_vertices.fixToBlock(input2extracted[hn], block - k0); } - }); - phg.initializePartition(); + } + }); + extracted_hg.addFixedVertexSupport(std::move(fixed_vertices)); + } +} + +bool usesAdaptiveWeightOfNonCutEdges(const Context &context) +{ + return BipartitioningPolicy::nonCutEdgeMultiplier(context.partition.gain_policy) != 1; +} - if ( usesAdaptiveWeightOfNonCutEdges(context) ) { - // Update cut hyperedges - phg.doParallelForAllEdges([&](const HyperedgeID& he) { - already_cut[he] |= phg.connectivity(he) > 1; - }); +template +void adaptWeightsOfNonCutEdges(Hypergraph &hg, const vec &already_cut, + const GainPolicy gain_policy, const bool revert) +{ + const HyperedgeWeight multiplier = + BipartitioningPolicy::nonCutEdgeMultiplier(gain_policy); + if(multiplier != 1) + { + ASSERT(static_cast(hg.initialNumEdges()) == already_cut.size()); + hg.doParallelForAllEdges([&](const HyperedgeID &he) { + if(!already_cut[he]) + { + hg.setEdgeWeight( + he, static_cast( + (revert ? 1.0 / multiplier : static_cast(multiplier)) * + hg.edgeWeight(he))); } + }); + } +} - ASSERT(metrics::quality(bipartitioned_hg, context) == - metrics::quality(phg, context)); - - ASSERT(context.partition.k >= 2); - PartitionID rb_k0 = context.partition.k / 2 + context.partition.k % 2; - PartitionID rb_k1 = context.partition.k / 2; - if ( rb_k0 >= 2 && rb_k1 >= 2 ) { - // Both blocks of the bipartition must to be further partitioned into at least two blocks. - DBG << "Current k = " << context.partition.k << "\n" - << "Block" << block_0 << "is further partitioned into k =" << rb_k0 << "blocks\n" - << "Block" << block_1 << "is further partitioned into k =" << rb_k1 << "blocks\n"; - tbb::task_group tg; - tg.run([&] { recursively_bipartition_block(phg, context, block_0, 0, rb_k0, info, already_cut, 0.5); }); - tg.run([&] { recursively_bipartition_block(phg, context, block_1, rb_k0, rb_k0 + rb_k1, info, already_cut, 0.5); }); - tg.wait(); - } else if ( rb_k0 >= 2 ) { - ASSERT(rb_k1 < 2); - // Only the first block needs to be further partitioned into at least two blocks. - DBG << "Current k = " << context.partition.k << "\n" - << "Block" << block_0 << "is further partitioned into k =" << rb_k0 << "blocks\n"; - recursively_bipartition_block(phg, context, block_0, 0, rb_k0, info, already_cut, 1.0); +// Takes a hypergraph partitioned into two blocks as input and then recursively +// partitions one block into (k1 - b0) blocks +template +void recursively_bipartition_block(typename TypeTraits::PartitionedHypergraph &phg, + const Context &context, const PartitionID block, + const PartitionID k0, const PartitionID k1, + const OriginalHypergraphInfo &info, + const vec &already_cut, + const double degree_of_parallism); + +// Uses multilevel recursive bipartitioning to partition the given hypergraph into (k1 - +// k0) blocks +template +void recursive_bipartitioning(typename TypeTraits::PartitionedHypergraph &phg, + const Context &context, const PartitionID k0, + const PartitionID k1, const OriginalHypergraphInfo &info, + vec &already_cut) +{ + using Hypergraph = typename TypeTraits::Hypergraph; + using PartitionedHypergraph = typename TypeTraits::PartitionedHypergraph; + if(phg.initialNumNodes() > 0) + { + // Multilevel Bipartitioning + const PartitionID k = (k1 - k0); + Hypergraph &hg = phg.hypergraph(); + ds::FixedVertexSupport fixed_vertices = hg.copyOfFixedVertexSupport(); + Context b_context = setupBipartitioningContext(hg, context, info); + setupFixedVerticesForBipartitioning(hg, k); + adaptWeightsOfNonCutEdges(hg, already_cut, context.partition.gain_policy, false); + DBG << "Multilevel Bipartitioning - Range = (" << k0 << "," << k1 + << "), Epsilon =" << b_context.partition.epsilon; + PartitionedHypergraph bipartitioned_hg = + Multilevel::partition(hg, b_context); + DBG << "Bipartitioning Result -" + << "Objective =" << metrics::quality(bipartitioned_hg, b_context) + << "Imbalance =" << metrics::imbalance(bipartitioned_hg, b_context) + << "(Target Imbalance =" << b_context.partition.epsilon << ")"; + adaptWeightsOfNonCutEdges(hg, already_cut, context.partition.gain_policy, true); + hg.addFixedVertexSupport(std::move(fixed_vertices)); + + // Apply bipartition to the input hypergraph + const PartitionID block_0 = 0; + const PartitionID block_1 = k / 2 + (k % 2); + phg.doParallelForAllNodes([&](const HypernodeID &hn) { + PartitionID part_id = bipartitioned_hg.partID(hn); + ASSERT(part_id != kInvalidPartition && part_id < phg.k()); + ASSERT(phg.partID(hn) == kInvalidPartition); + if(part_id == 0) + { + phg.setOnlyNodePart(hn, block_0); } + else + { + phg.setOnlyNodePart(hn, block_1); + } + }); + phg.initializePartition(); + + if(usesAdaptiveWeightOfNonCutEdges(context)) + { + // Update cut hyperedges + phg.doParallelForAllEdges( + [&](const HyperedgeID &he) { already_cut[he] |= phg.connectivity(he) > 1; }); + } + + ASSERT(metrics::quality(bipartitioned_hg, context) == metrics::quality(phg, context)); + + ASSERT(context.partition.k >= 2); + PartitionID rb_k0 = context.partition.k / 2 + context.partition.k % 2; + PartitionID rb_k1 = context.partition.k / 2; + if(rb_k0 >= 2 && rb_k1 >= 2) + { + // Both blocks of the bipartition must to be further partitioned into at least two + // blocks. + DBG << "Current k = " << context.partition.k << "\n" + << "Block" << block_0 << "is further partitioned into k =" << rb_k0 + << "blocks\n" + << "Block" << block_1 << "is further partitioned into k =" << rb_k1 + << "blocks\n"; + tbb::task_group tg; + tg.run([&] { + recursively_bipartition_block(phg, context, block_0, 0, rb_k0, info, + already_cut, 0.5); + }); + tg.run([&] { + recursively_bipartition_block(phg, context, block_1, rb_k0, + rb_k0 + rb_k1, info, already_cut, 0.5); + }); + tg.wait(); + } + else if(rb_k0 >= 2) + { + ASSERT(rb_k1 < 2); + // Only the first block needs to be further partitioned into at least two blocks. + DBG << "Current k = " << context.partition.k << "\n" + << "Block" << block_0 << "is further partitioned into k =" << rb_k0 + << "blocks\n"; + recursively_bipartition_block(phg, context, block_0, 0, rb_k0, info, + already_cut, 1.0); } } } +} -template -void rb::recursively_bipartition_block(typename TypeTraits::PartitionedHypergraph& phg, - const Context& context, - const PartitionID block, const PartitionID k0, const PartitionID k1, - const OriginalHypergraphInfo& info, - const vec& already_cut, - const double degree_of_parallism) { +template +void rb::recursively_bipartition_block(typename TypeTraits::PartitionedHypergraph &phg, + const Context &context, const PartitionID block, + const PartitionID k0, const PartitionID k1, + const OriginalHypergraphInfo &info, + const vec &already_cut, + const double degree_of_parallism) +{ using Hypergraph = typename TypeTraits::Hypergraph; using PartitionedHypergraph = typename TypeTraits::PartitionedHypergraph; - Context rb_context = setupRecursiveBipartitioningContext(context, k0, k1, degree_of_parallism); + Context rb_context = + setupRecursiveBipartitioningContext(context, k0, k1, degree_of_parallism); // Extracts the block of the hypergraph which we recursively want to partition const bool cut_net_splitting = - BipartitioningPolicy::useCutNetSplitting(context.partition.gain_policy); - auto extracted_block = phg.extract(block, !already_cut.empty() ? &already_cut : nullptr, - cut_net_splitting, context.preprocessing.stable_construction_of_incident_edges); - Hypergraph& rb_hg = extracted_block.hg; - auto& mapping = extracted_block.hn_mapping; + BipartitioningPolicy::useCutNetSplitting(context.partition.gain_policy); + auto extracted_block = + phg.extract(block, !already_cut.empty() ? &already_cut : nullptr, cut_net_splitting, + context.preprocessing.stable_construction_of_incident_edges); + Hypergraph &rb_hg = extracted_block.hg; + auto &mapping = extracted_block.hn_mapping; setupFixedVerticesForRecursion(phg.hypergraph(), rb_hg, mapping, k0, k1); - if ( rb_hg.initialNumNodes() > 0 ) { + if(rb_hg.initialNumNodes() > 0) + { // Recursively partition the given block into (k1 - k0) blocks PartitionedHypergraph rb_phg(rb_context.partition.k, rb_hg, parallel_tag_t()); - recursive_bipartitioning(rb_phg, rb_context, - k0, k1, info, extracted_block.already_cut); + recursive_bipartitioning(rb_phg, rb_context, k0, k1, info, + extracted_block.already_cut); ASSERT(phg.initialNumNodes() == mapping.size()); // Apply k-way partition to the input hypergraph - phg.doParallelForAllNodes([&](const HypernodeID& hn) { - if ( phg.partID(hn) == block ) { + phg.doParallelForAllNodes([&](const HypernodeID &hn) { + if(phg.partID(hn) == block) + { ASSERT(hn < mapping.size()); PartitionID to = block + rb_phg.partID(mapping[hn]); ASSERT(to != kInvalidPartition && to < phg.k()); - if ( block != to ) { + if(block != to) + { phg.changeNodePart(hn, block, to, NOOP_FUNC, true); } } }); DBG << "Recursive Bipartitioning Result -" - << "k =" << (k1 - k0) - << "Objective =" << metrics::quality(phg, context) + << "k =" << (k1 - k0) << "Objective =" << metrics::quality(phg, context) << "Imbalance =" << metrics::imbalance(phg, rb_context) << "(Target Imbalance =" << rb_context.partition.epsilon << ")"; - } } -template +template typename RecursiveBipartitioning::PartitionedHypergraph -RecursiveBipartitioning::partition(Hypergraph& hypergraph, - const Context& context, - const TargetGraph* target_graph) { - PartitionedHypergraph partitioned_hypergraph(context.partition.k, hypergraph, parallel_tag_t()); +RecursiveBipartitioning::partition(Hypergraph &hypergraph, + const Context &context, + const TargetGraph *target_graph) +{ + PartitionedHypergraph partitioned_hypergraph(context.partition.k, hypergraph, + parallel_tag_t()); partition(partitioned_hypergraph, context, target_graph); return partitioned_hypergraph; } -template -void RecursiveBipartitioning::partition(PartitionedHypergraph& hypergraph, - const Context& context, - const TargetGraph* target_graph) { +template +void RecursiveBipartitioning::partition(PartitionedHypergraph &hypergraph, + const Context &context, + const TargetGraph *target_graph) +{ unused(target_graph); - utils::Utilities& utils = utils::Utilities::instance(); - if (context.partition.mode == Mode::recursive_bipartitioning) { + utils::Utilities &utils = utils::Utilities::instance(); + if(context.partition.mode == Mode::recursive_bipartitioning) + { utils.getTimer(context.utility_id).start_timer("rb", "Recursive Bipartitioning"); } - if (context.type == ContextType::main) { + if(context.type == ContextType::main) + { parallel::MemoryPool::instance().deactivate_unused_memory_allocations(); utils.getTimer(context.utility_id).disable(); utils.getStats(context.utility_id).disable(); } Context rb_context(context); - if ( rb_context.partition.objective == Objective::steiner_tree ) { + if(rb_context.partition.objective == Objective::steiner_tree) + { // In RB mode, we optimize the km1 metric for the steiner tree metric and // apply the permutation computed in the target graph to the partition. - rb_context.partition.objective = PartitionedHypergraph::is_graph ? - Objective::cut : Objective::km1; - rb_context.partition.gain_policy = PartitionedHypergraph::is_graph ? - GainPolicy::cut_for_graphs : GainPolicy::km1; + rb_context.partition.objective = + PartitionedHypergraph::is_graph ? Objective::cut : Objective::km1; + rb_context.partition.gain_policy = + PartitionedHypergraph::is_graph ? GainPolicy::cut_for_graphs : GainPolicy::km1; } - if ( context.type == ContextType::initial_partitioning ) { + if(context.type == ContextType::initial_partitioning) + { rb_context.partition.verbose_output = false; } - vec already_cut(rb::usesAdaptiveWeightOfNonCutEdges(context) ? - hypergraph.initialNumEdges() : 0, 0); - rb::recursive_bipartitioning(hypergraph, rb_context, 0, rb_context.partition.k, - OriginalHypergraphInfo { hypergraph.totalWeight(), rb_context.partition.k, - rb_context.partition.epsilon }, already_cut); + vec already_cut( + rb::usesAdaptiveWeightOfNonCutEdges(context) ? hypergraph.initialNumEdges() : 0, 0); + rb::recursive_bipartitioning( + hypergraph, rb_context, 0, rb_context.partition.k, + OriginalHypergraphInfo{ hypergraph.totalWeight(), rb_context.partition.k, + rb_context.partition.epsilon }, + already_cut); - if (context.type == ContextType::main) { + if(context.type == ContextType::main) + { parallel::MemoryPool::instance().activate_unused_memory_allocations(); utils.getTimer(context.utility_id).enable(); utils.getStats(context.utility_id).enable(); } - #ifdef KAHYPAR_ENABLE_STEINER_TREE_METRIC - if ( context.partition.objective == Objective::steiner_tree ) { +#ifdef KAHYPAR_ENABLE_STEINER_TREE_METRIC + if(context.partition.objective == Objective::steiner_tree) + { ASSERT(target_graph); - utils::Timer& timer = utils.getTimer(context.utility_id); + utils::Timer &timer = utils.getTimer(context.utility_id); const bool was_enabled = timer.isEnabled(); timer.enable(); timer.start_timer("one_to_one_mapping", "One-To-One Mapping"); // Map partition onto target graph - InitialMapping::mapToTargetGraph( - hypergraph, *target_graph, context); + InitialMapping::mapToTargetGraph(hypergraph, *target_graph, context); timer.stop_timer("one_to_one_mapping"); - if ( !was_enabled ) { + if(!was_enabled) + { timer.disable(); } } - #endif +#endif - if (context.partition.mode == Mode::recursive_bipartitioning) { + if(context.partition.mode == Mode::recursive_bipartitioning) + { utils.getTimer(context.utility_id).stop_timer("rb"); } } diff --git a/mt-kahypar/partition/recursive_bipartitioning.h b/mt-kahypar/partition/recursive_bipartitioning.h index 6690dee76..aad5b831b 100644 --- a/mt-kahypar/partition/recursive_bipartitioning.h +++ b/mt-kahypar/partition/recursive_bipartitioning.h @@ -34,21 +34,19 @@ namespace mt_kahypar { // Forward Declaration class TargetGraph; -template -class RecursiveBipartitioning { +template +class RecursiveBipartitioning +{ using Hypergraph = typename TypeTraits::Hypergraph; using PartitionedHypergraph = typename TypeTraits::PartitionedHypergraph; - public: +public: // ! Partitions a hypergraph using multilevel recursive bipartitioning. - static PartitionedHypergraph partition(Hypergraph& hypergraph, - const Context& context, - const TargetGraph* target_graph = nullptr); - static void partition(PartitionedHypergraph& hypergraph, - const Context& context, - const TargetGraph* target_graph = nullptr); - + static PartitionedHypergraph partition(Hypergraph &hypergraph, const Context &context, + const TargetGraph *target_graph = nullptr); + static void partition(PartitionedHypergraph &hypergraph, const Context &context, + const TargetGraph *target_graph = nullptr); }; -} // namespace mt_kahypar +} // namespace mt_kahypar diff --git a/mt-kahypar/partition/refinement/deterministic/deterministic_label_propagation.cpp b/mt-kahypar/partition/refinement/deterministic/deterministic_label_propagation.cpp index c12d13732..4dd92a3b7 100644 --- a/mt-kahypar/partition/refinement/deterministic/deterministic_label_propagation.cpp +++ b/mt-kahypar/partition/refinement/deterministic/deterministic_label_propagation.cpp @@ -27,488 +27,616 @@ #include "deterministic_label_propagation.h" #include "mt-kahypar/definitions.h" -#include "mt-kahypar/partition/metrics.h" #include "mt-kahypar/parallel/chunking.h" #include "mt-kahypar/parallel/parallel_counting_sort.h" +#include "mt-kahypar/partition/metrics.h" #include "mt-kahypar/utils/cast.h" -#include #include +#include namespace mt_kahypar { - template - bool DeterministicLabelPropagationRefiner::refineImpl( - mt_kahypar_partitioned_hypergraph_t& hypergraph, - const vec&, - Metrics& best_metrics, - const double) { - PartitionedHypergraph& phg = utils::cast(hypergraph); - Gain overall_improvement = 0; - - if (context.partition.k != current_k) { - current_k = context.partition.k; - gain_computation.changeNumberOfBlocks(current_k); - } +template +bool DeterministicLabelPropagationRefiner::refineImpl( + mt_kahypar_partitioned_hypergraph_t &hypergraph, const vec &, + Metrics &best_metrics, const double) +{ + PartitionedHypergraph &phg = utils::cast(hypergraph); + Gain overall_improvement = 0; - constexpr size_t num_buckets = utils::ParallelPermutation::num_buckets; - size_t num_sub_rounds = context.refinement.deterministic_refinement.num_sub_rounds_sync_lp; + if(context.partition.k != current_k) + { + current_k = context.partition.k; + gain_computation.changeNumberOfBlocks(current_k); + } - for (size_t iter = 0; iter < context.refinement.label_propagation.maximum_iterations; ++iter) { - if (context.refinement.deterministic_refinement.use_active_node_set && ++round == 0) { - std::fill(last_moved_in_round.begin(), last_moved_in_round.end(), CAtomic(0)); - } + constexpr size_t num_buckets = utils::ParallelPermutation::num_buckets; + size_t num_sub_rounds = + context.refinement.deterministic_refinement.num_sub_rounds_sync_lp; - // size == 0 means no node was moved last round, but there were positive gains --> try again with different permutation - if (!context.refinement.deterministic_refinement.use_active_node_set || iter == 0 || active_nodes.size() == 0) { - permutation.random_grouping(phg.initialNumNodes(), context.shared_memory.static_balancing_work_packages,prng()); - } else { - tbb::parallel_sort(active_nodes.begin(), active_nodes.end()); - permutation.sample_buckets_and_group_by(active_nodes.range(), - context.shared_memory.static_balancing_work_packages, prng()); - } - active_nodes.clear(); - - const size_t num_buckets_per_sub_round = parallel::chunking::idiv_ceil(num_buckets, num_sub_rounds); - size_t num_moves = 0; - Gain round_improvement = 0; - bool increase_sub_rounds = false; - for (size_t sub_round = 0; sub_round < num_sub_rounds; ++sub_round) { - auto[first_bucket, last_bucket] = parallel::chunking::bounds(sub_round, num_buckets, num_buckets_per_sub_round); - ASSERT(first_bucket < last_bucket && last_bucket < permutation.bucket_bounds.size()); - size_t first = permutation.bucket_bounds[first_bucket], last = permutation.bucket_bounds[last_bucket]; - moves.clear(); - - // calculate moves - tbb::parallel_for(HypernodeID(first), HypernodeID(last), [&](const HypernodeID position) { - ASSERT(position < permutation.permutation.size()); - const HypernodeID u = permutation.at(position); - ASSERT(u < phg.initialNumNodes()); - if (phg.isFixed(u) || !phg.nodeIsEnabled(u) || !phg.isBorderNode(u)) return; - Move move = gain_computation.computeMaxGainMove(phg, u, /*rebalance=*/false, /*consider_non_adjacent_blocks=*/false, /*allow_imbalance=*/true); - move.gain = -move.gain; - if (move.gain > 0 && move.to != phg.partID(u)) { - moves.push_back_buffered(move); - } - }); - - moves.finalize(); + for(size_t iter = 0; iter < context.refinement.label_propagation.maximum_iterations; + ++iter) + { + if(context.refinement.deterministic_refinement.use_active_node_set && ++round == 0) + { + std::fill(last_moved_in_round.begin(), last_moved_in_round.end(), + CAtomic(0)); + } - Gain sub_round_improvement = 0; - size_t num_moves_in_sub_round = moves.size(); - if (num_moves_in_sub_round > 0) { - bool reverted = false; - std::tie(sub_round_improvement, reverted) = applyMovesByMaximalPrefixesInBlockPairs(phg); - increase_sub_rounds |= reverted; - if (sub_round_improvement > 0 && moves.size() > 0) { - sub_round_improvement += applyMovesSortedByGainAndRevertUnbalanced(phg); - } + // size == 0 means no node was moved last round, but there were positive gains --> try + // again with different permutation + if(!context.refinement.deterministic_refinement.use_active_node_set || iter == 0 || + active_nodes.size() == 0) + { + permutation.random_grouping(phg.initialNumNodes(), + context.shared_memory.static_balancing_work_packages, + prng()); + } + else + { + tbb::parallel_sort(active_nodes.begin(), active_nodes.end()); + permutation.sample_buckets_and_group_by( + active_nodes.range(), context.shared_memory.static_balancing_work_packages, + prng()); + } + active_nodes.clear(); + + const size_t num_buckets_per_sub_round = + parallel::chunking::idiv_ceil(num_buckets, num_sub_rounds); + size_t num_moves = 0; + Gain round_improvement = 0; + bool increase_sub_rounds = false; + for(size_t sub_round = 0; sub_round < num_sub_rounds; ++sub_round) + { + auto [first_bucket, last_bucket] = + parallel::chunking::bounds(sub_round, num_buckets, num_buckets_per_sub_round); + ASSERT(first_bucket < last_bucket && + last_bucket < permutation.bucket_bounds.size()); + size_t first = permutation.bucket_bounds[first_bucket], + last = permutation.bucket_bounds[last_bucket]; + moves.clear(); + + // calculate moves + tbb::parallel_for( + HypernodeID(first), HypernodeID(last), [&](const HypernodeID position) { + ASSERT(position < permutation.permutation.size()); + const HypernodeID u = permutation.at(position); + ASSERT(u < phg.initialNumNodes()); + if(phg.isFixed(u) || !phg.nodeIsEnabled(u) || !phg.isBorderNode(u)) + return; + Move move = gain_computation.computeMaxGainMove( + phg, u, /*rebalance=*/false, /*consider_non_adjacent_blocks=*/false, + /*allow_imbalance=*/true); + move.gain = -move.gain; + if(move.gain > 0 && move.to != phg.partID(u)) + { + moves.push_back_buffered(move); + } + }); + + moves.finalize(); + + Gain sub_round_improvement = 0; + size_t num_moves_in_sub_round = moves.size(); + if(num_moves_in_sub_round > 0) + { + bool reverted = false; + std::tie(sub_round_improvement, reverted) = + applyMovesByMaximalPrefixesInBlockPairs(phg); + increase_sub_rounds |= reverted; + if(sub_round_improvement > 0 && moves.size() > 0) + { + sub_round_improvement += applyMovesSortedByGainAndRevertUnbalanced(phg); } - round_improvement += sub_round_improvement; - num_moves += num_moves_in_sub_round; - } - overall_improvement += round_improvement; - active_nodes.finalize(); - - if (increase_sub_rounds) { - num_sub_rounds = std::min(num_buckets, num_sub_rounds * 2); - } - if (num_moves == 0) { - break; // no vertices with positive gain --> stop } + round_improvement += sub_round_improvement; + num_moves += num_moves_in_sub_round; } + overall_improvement += round_improvement; + active_nodes.finalize(); - best_metrics.quality -= overall_improvement; - best_metrics.imbalance = metrics::imbalance(phg, context); - if (context.type == ContextType::main) { - DBG << V(best_metrics.quality) << V(best_metrics.imbalance); + if(increase_sub_rounds) + { + num_sub_rounds = std::min(num_buckets, num_sub_rounds * 2); } - return overall_improvement > 0; + if(num_moves == 0) + { + break; // no vertices with positive gain --> stop + } + } + + best_metrics.quality -= overall_improvement; + best_metrics.imbalance = metrics::imbalance(phg, context); + if(context.type == ContextType::main) + { + DBG << V(best_metrics.quality) << V(best_metrics.imbalance); } + return overall_improvement > 0; +} /* - * for configs where we don't know exact gains --> have to trace the overall improvement with attributed gains + * for configs where we don't know exact gains --> have to trace the overall improvement + * with attributed gains */ - template - Gain DeterministicLabelPropagationRefiner::performMoveWithAttributedGain( - PartitionedHypergraph& phg, const Move& m, bool activate_neighbors) { - Gain attributed_gain = 0; - auto objective_delta = [&](const SynchronizedEdgeUpdate& sync_update) { - attributed_gain -= AttributedGains::gain(sync_update); - }; - const bool was_moved = phg.changeNodePart(m.node, m.from, m.to, objective_delta); - if (context.refinement.deterministic_refinement.use_active_node_set && activate_neighbors && was_moved) { - // activate neighbors for next round - const HypernodeID n = phg.initialNumNodes(); - for (HyperedgeID he : phg.incidentEdges(m.node)) { - if (phg.edgeSize(he) <= context.refinement.label_propagation.hyperedge_size_activation_threshold) { - if (last_moved_in_round[he + n].load(std::memory_order_relaxed) != round) { - last_moved_in_round[he + n].store(round, std::memory_order_relaxed); // no need for atomic semantics - for (HypernodeID v : phg.pins(he)) { - uint32_t lrv = last_moved_in_round[v].load(std::memory_order_relaxed); - if (lrv != round && - last_moved_in_round[v].compare_exchange_strong(lrv, round, std::memory_order_acq_rel)) { - active_nodes.push_back_buffered(v); - } +template +Gain DeterministicLabelPropagationRefiner< + GraphAndGainTypes>::performMoveWithAttributedGain(PartitionedHypergraph &phg, + const Move &m, + bool activate_neighbors) +{ + Gain attributed_gain = 0; + auto objective_delta = [&](const SynchronizedEdgeUpdate &sync_update) { + attributed_gain -= AttributedGains::gain(sync_update); + }; + const bool was_moved = phg.changeNodePart(m.node, m.from, m.to, objective_delta); + if(context.refinement.deterministic_refinement.use_active_node_set && + activate_neighbors && was_moved) + { + // activate neighbors for next round + const HypernodeID n = phg.initialNumNodes(); + for(HyperedgeID he : phg.incidentEdges(m.node)) + { + if(phg.edgeSize(he) <= + context.refinement.label_propagation.hyperedge_size_activation_threshold) + { + if(last_moved_in_round[he + n].load(std::memory_order_relaxed) != round) + { + last_moved_in_round[he + n].store( + round, std::memory_order_relaxed); // no need for atomic semantics + for(HypernodeID v : phg.pins(he)) + { + uint32_t lrv = last_moved_in_round[v].load(std::memory_order_relaxed); + if(lrv != round && last_moved_in_round[v].compare_exchange_strong( + lrv, round, std::memory_order_acq_rel)) + { + active_nodes.push_back_buffered(v); } } } } } - return attributed_gain; } - - template - template - Gain DeterministicLabelPropagationRefiner::applyMovesIf( - PartitionedHypergraph& phg, const vec& my_moves, size_t end, Predicate&& predicate) { - auto range = tbb::blocked_range(UL(0), end); - auto accum = [&](const tbb::blocked_range& r, const Gain& init) -> Gain { - Gain my_gain = init; - for (size_t i = r.begin(); i < r.end(); ++i) { - if (predicate(i)) { - my_gain += performMoveWithAttributedGain(phg, my_moves[i], true); - } + return attributed_gain; +} + +template +template +Gain DeterministicLabelPropagationRefiner::applyMovesIf( + PartitionedHypergraph &phg, const vec &my_moves, size_t end, + Predicate &&predicate) +{ + auto range = tbb::blocked_range(UL(0), end); + auto accum = [&](const tbb::blocked_range &r, const Gain &init) -> Gain { + Gain my_gain = init; + for(size_t i = r.begin(); i < r.end(); ++i) + { + if(predicate(i)) + { + my_gain += performMoveWithAttributedGain(phg, my_moves[i], true); } - return my_gain; - }; - return tbb::parallel_reduce(range, 0, accum, std::plus<>()); - } + } + return my_gain; + }; + return tbb::parallel_reduce(range, 0, accum, std::plus<>()); +} + +template +vec aggregatePartWeightDeltas(PartitionedHypergraph &phg, + PartitionID current_k, + const vec &moves, size_t end) +{ + // parallel reduce makes way too many vector copies + tbb::enumerable_thread_specific > ets_part_weight_diffs(current_k, + 0); + auto accum = [&](const tbb::blocked_range &r) { + auto &part_weights = ets_part_weight_diffs.local(); + for(size_t i = r.begin(); i < r.end(); ++i) + { + part_weights[moves[i].from] -= phg.nodeWeight(moves[i].node); + part_weights[moves[i].to] += phg.nodeWeight(moves[i].node); + } + }; + tbb::parallel_for(tbb::blocked_range(UL(0), end), accum); + vec res(current_k, 0); + auto combine = [&](const vec &a) { + for(size_t i = 0; i < res.size(); ++i) + { + res[i] += a[i]; + } + }; + ets_part_weight_diffs.combine_each(combine); + return res; +} + +template +Gain DeterministicLabelPropagationRefiner:: + applyMovesSortedByGainAndRevertUnbalanced(PartitionedHypergraph &phg) +{ + const size_t num_moves = moves.size(); + tbb::parallel_sort( + moves.begin(), moves.begin() + num_moves, [](const Move &m1, const Move &m2) { + return m1.gain > m2.gain || (m1.gain == m2.gain && m1.node < m2.node); + }); - template - vec aggregatePartWeightDeltas(PartitionedHypergraph& phg, PartitionID current_k, const vec& moves, size_t end) { - // parallel reduce makes way too many vector copies - tbb::enumerable_thread_specific> - ets_part_weight_diffs(current_k, 0); - auto accum = [&](const tbb::blocked_range& r) { - auto& part_weights = ets_part_weight_diffs.local(); - for (size_t i = r.begin(); i < r.end(); ++i) { - part_weights[moves[i].from] -= phg.nodeWeight(moves[i].node); - part_weights[moves[i].to] += phg.nodeWeight(moves[i].node); - } - }; - tbb::parallel_for(tbb::blocked_range(UL(0), end), accum); - vec res(current_k, 0); - auto combine = [&](const vec& a) { - for (size_t i = 0; i < res.size(); ++i) { - res[i] += a[i]; - } - }; - ets_part_weight_diffs.combine_each(combine); - return res; + const auto &max_part_weights = context.partition.max_part_weights; + size_t num_overloaded_blocks = 0, num_overloaded_before_round = 0; + vec part_weights = + aggregatePartWeightDeltas(phg, current_k, moves.getData(), num_moves); + for(PartitionID i = 0; i < current_k; ++i) + { + part_weights[i] += phg.partWeight(i); + if(part_weights[i] > max_part_weights[i]) + { + num_overloaded_blocks++; + } + if(phg.partWeight(i) > max_part_weights[i]) + { + num_overloaded_before_round++; + } } - template - Gain DeterministicLabelPropagationRefiner::applyMovesSortedByGainAndRevertUnbalanced(PartitionedHypergraph& phg) { - const size_t num_moves = moves.size(); - tbb::parallel_sort(moves.begin(), moves.begin() + num_moves, [](const Move& m1, const Move& m2) { - return m1.gain > m2.gain || (m1.gain == m2.gain && m1.node < m2.node); - }); + size_t num_overloaded_before_first_pass = num_overloaded_blocks; + size_t num_reverted_moves = 0; + size_t j = num_moves; + + auto revert_move = [&](Move &m) { + part_weights[m.to] -= phg.nodeWeight(m.node); + part_weights[m.from] += phg.nodeWeight(m.node); + m.invalidate(); + num_reverted_moves++; + if(part_weights[m.to] <= max_part_weights[m.to]) + { + num_overloaded_blocks--; + } + }; - const auto& max_part_weights = context.partition.max_part_weights; - size_t num_overloaded_blocks = 0, num_overloaded_before_round = 0; - vec part_weights = aggregatePartWeightDeltas(phg, current_k, moves.getData(), num_moves); - for (PartitionID i = 0; i < current_k; ++i) { - part_weights[i] += phg.partWeight(i); - if (part_weights[i] > max_part_weights[i]) { - num_overloaded_blocks++; - } - if (phg.partWeight(i) > max_part_weights[i]) { - num_overloaded_before_round++; - } + while(num_overloaded_blocks > 0 && j > 0) + { + Move &m = moves[--j]; + if(part_weights[m.to] > max_part_weights[m.to] && + part_weights[m.from] + phg.nodeWeight(m.node) <= max_part_weights[m.from]) + { + revert_move(m); } + } - size_t num_overloaded_before_first_pass = num_overloaded_blocks; - size_t num_reverted_moves = 0; - size_t j = num_moves; - - auto revert_move = [&](Move& m) { - part_weights[m.to] -= phg.nodeWeight(m.node); - part_weights[m.from] += phg.nodeWeight(m.node); - m.invalidate(); - num_reverted_moves++; - if (part_weights[m.to] <= max_part_weights[m.to]) { - num_overloaded_blocks--; + if(num_overloaded_blocks > 0) + { + DBG << "still overloaded" << num_overloaded_blocks << V(num_moves) + << V(num_reverted_moves) << V(num_overloaded_before_round) + << V(num_overloaded_before_first_pass) << "trigger second run"; + + size_t num_extra_rounds = 1; + j = num_moves; + size_t last_valid_move = 0; + while(num_overloaded_blocks > 0) + { + if(j == 0) + { + j = last_valid_move; + last_valid_move = 0; + num_extra_rounds++; } - }; - - while (num_overloaded_blocks > 0 && j > 0) { - Move& m = moves[--j]; - if (part_weights[m.to] > max_part_weights[m.to] - && part_weights[m.from] + phg.nodeWeight(m.node) <= max_part_weights[m.from]) { + Move &m = moves[j - 1]; + if(m.isValid() && part_weights[m.to] > max_part_weights[m.to]) + { + if(part_weights[m.from] + phg.nodeWeight(m.node) > max_part_weights[m.from] && + part_weights[m.from] <= max_part_weights[m.from]) + { + num_overloaded_blocks++; + } revert_move(m); } - } - - if (num_overloaded_blocks > 0) { - DBG << "still overloaded" << num_overloaded_blocks << V(num_moves) << V(num_reverted_moves) - << V(num_overloaded_before_round) << V(num_overloaded_before_first_pass) << "trigger second run"; - - size_t num_extra_rounds = 1; - j = num_moves; - size_t last_valid_move = 0; - while (num_overloaded_blocks > 0) { - if (j == 0) { - j = last_valid_move; - last_valid_move = 0; - num_extra_rounds++; - } - Move& m = moves[j - 1]; - if (m.isValid() && part_weights[m.to] > max_part_weights[m.to]) { - if (part_weights[m.from] + phg.nodeWeight(m.node) > max_part_weights[m.from] - && part_weights[m.from] <= max_part_weights[m.from]) { - num_overloaded_blocks++; - } - revert_move(m); - } - if (last_valid_move == 0 && m.isValid()) { - last_valid_move = j; - } - --j; + if(last_valid_move == 0 && m.isValid()) + { + last_valid_move = j; } - - DBG << V(num_reverted_moves) << V(num_extra_rounds); + --j; } - // apply all moves that were not invalidated - Gain gain = applyMovesIf(phg, moves.getData(), num_moves, [&](size_t pos) { return moves[pos].isValid(); }); - - // if that decreased solution quality, revert it all - if (gain < 0) { - gain += applyMovesIf(phg, moves.getData(), num_moves, [&](size_t pos) { - if (moves[pos].isValid()) { - std::swap(moves[pos].from, moves[pos].to); - return true; - } else { - return false; - } - }); - assert(gain == 0); - } - return gain; + DBG << V(num_reverted_moves) << V(num_extra_rounds); } - template - std::pair DeterministicLabelPropagationRefiner::applyMovesByMaximalPrefixesInBlockPairs(PartitionedHypergraph& phg) { - PartitionID k = current_k; - PartitionID max_key = k * k; - auto index = [&](PartitionID b1, PartitionID b2) { return b1 * k + b2; }; - auto get_key = [&](const Move& m) { return index(m.from, m.to); }; - - const size_t num_moves = moves.size(); + // apply all moves that were not invalidated + Gain gain = applyMovesIf(phg, moves.getData(), num_moves, + [&](size_t pos) { return moves[pos].isValid(); }); - // aggregate moves by direction. not in-place because of counting sort. - // but it gives us the positions of the buckets right away - auto positions = parallel::counting_sort(moves, sorted_moves, max_key, get_key, - context.shared_memory.num_threads); - - auto has_moves = [&](PartitionID p1, PartitionID p2) { - size_t direction = index(p1, p2); - return positions[direction + 1] != positions[direction]; - }; - - vec> relevant_block_pairs; - vec involvements(k, 0); - for (PartitionID p1 = 0; p1 < k; ++p1) { - for (PartitionID p2 = p1 + 1; p2 < k; ++p2) { - if (has_moves(p1, p2) || has_moves(p2, p1)) { - relevant_block_pairs.emplace_back(p1, p2); - } - // more involvements reduce slack --> only increment involvements if vertices are moved into that block - if (has_moves(p1, p2)) { - involvements[p2]++; - } - if (has_moves(p2, p1)) { - involvements[p1]++; - } + // if that decreased solution quality, revert it all + if(gain < 0) + { + gain += applyMovesIf(phg, moves.getData(), num_moves, [&](size_t pos) { + if(moves[pos].isValid()) + { + std::swap(moves[pos].from, moves[pos].to); + return true; + } + else + { + return false; + } + }); + assert(gain == 0); + } + return gain; +} + +template +std::pair DeterministicLabelPropagationRefiner:: + applyMovesByMaximalPrefixesInBlockPairs(PartitionedHypergraph &phg) +{ + PartitionID k = current_k; + PartitionID max_key = k * k; + auto index = [&](PartitionID b1, PartitionID b2) { return b1 * k + b2; }; + auto get_key = [&](const Move &m) { return index(m.from, m.to); }; + + const size_t num_moves = moves.size(); + + // aggregate moves by direction. not in-place because of counting sort. + // but it gives us the positions of the buckets right away + auto positions = parallel::counting_sort(moves, sorted_moves, max_key, get_key, + context.shared_memory.num_threads); + + auto has_moves = [&](PartitionID p1, PartitionID p2) { + size_t direction = index(p1, p2); + return positions[direction + 1] != positions[direction]; + }; + + vec > relevant_block_pairs; + vec involvements(k, 0); + for(PartitionID p1 = 0; p1 < k; ++p1) + { + for(PartitionID p2 = p1 + 1; p2 < k; ++p2) + { + if(has_moves(p1, p2) || has_moves(p2, p1)) + { + relevant_block_pairs.emplace_back(p1, p2); + } + // more involvements reduce slack --> only increment involvements if vertices are + // moved into that block + if(has_moves(p1, p2)) + { + involvements[p2]++; + } + if(has_moves(p2, p1)) + { + involvements[p1]++; } } + } - // swap_prefix[index(p1,p2)] stores the first position of moves to revert out of the sequence of moves from p1 to p2 - vec swap_prefix(max_key, 0); - tbb::parallel_for(size_t(0), relevant_block_pairs.size(), [&](size_t bp_index) { - // sort both directions by gain (alternative: gain / weight?) - auto sort_by_gain_and_prefix_sum_node_weights = [&](PartitionID p1, PartitionID p2) { - size_t begin = positions[index(p1, p2)], end = positions[index(p1, p2) + 1]; - auto comp = [&](const Move& m1, const Move& m2) { - return m1.gain > m2.gain || (m1.gain == m2.gain && m1.node < m2.node); - }; - tbb::parallel_sort(sorted_moves.begin() + begin, sorted_moves.begin() + end, comp); - tbb::parallel_for(begin, end, [&](size_t pos) { - cumulative_node_weights[pos] = phg.nodeWeight(sorted_moves[pos].node); - }); - parallel_prefix_sum(cumulative_node_weights.begin() + begin, cumulative_node_weights.begin() + end, - cumulative_node_weights.begin() + begin, std::plus<>(), 0); + // swap_prefix[index(p1,p2)] stores the first position of moves to revert out of the + // sequence of moves from p1 to p2 + vec swap_prefix(max_key, 0); + tbb::parallel_for(size_t(0), relevant_block_pairs.size(), [&](size_t bp_index) { + // sort both directions by gain (alternative: gain / weight?) + auto sort_by_gain_and_prefix_sum_node_weights = [&](PartitionID p1, PartitionID p2) { + size_t begin = positions[index(p1, p2)], end = positions[index(p1, p2) + 1]; + auto comp = [&](const Move &m1, const Move &m2) { + return m1.gain > m2.gain || (m1.gain == m2.gain && m1.node < m2.node); }; - - PartitionID p1, p2; - std::tie(p1, p2) = relevant_block_pairs[bp_index]; - tbb::parallel_invoke([&] { - sort_by_gain_and_prefix_sum_node_weights(p1, p2); - }, [&] { - sort_by_gain_and_prefix_sum_node_weights(p2, p1); + tbb::parallel_sort(sorted_moves.begin() + begin, sorted_moves.begin() + end, comp); + tbb::parallel_for(begin, end, [&](size_t pos) { + cumulative_node_weights[pos] = phg.nodeWeight(sorted_moves[pos].node); }); + parallel_prefix_sum(cumulative_node_weights.begin() + begin, + cumulative_node_weights.begin() + end, + cumulative_node_weights.begin() + begin, std::plus<>(), 0); + }; - HypernodeWeight budget_p1 = context.partition.max_part_weights[p1] - phg.partWeight(p1), - budget_p2 = context.partition.max_part_weights[p2] - phg.partWeight(p2); - HypernodeWeight lb_p1 = -(budget_p1 /std::max(size_t(1), involvements[p1])), - ub_p2 = budget_p2 / std::max(size_t(1), involvements[p2]); - - size_t p1_begin = positions[index(p1, p2)], p1_end = positions[index(p1, p2) + 1], - p2_begin = positions[index(p2, p1)], p2_end = positions[index(p2, p1) + 1]; - - auto best_prefix = findBestPrefixesRecursive(p1_begin, p1_end, p2_begin, p2_end, - p1_begin - 1, p2_begin - 1, lb_p1, ub_p2); - - assert(best_prefix == findBestPrefixesSequentially(p1_begin, p1_end, p2_begin, p2_end, - p1_begin - 1, p2_begin - 1, lb_p1, ub_p2)); - if (best_prefix.first == invalid_pos) { - // represents no solution found (and recursive version didn't move all the way to the start of the range) - // --> replace with starts of ranges (represents no moves applied) - best_prefix = std::make_pair(p1_begin, p2_begin); - } - swap_prefix[index(p1, p2)] = best_prefix.first; - swap_prefix[index(p2, p1)] = best_prefix.second; - }); + PartitionID p1, p2; + std::tie(p1, p2) = relevant_block_pairs[bp_index]; + tbb::parallel_invoke([&] { sort_by_gain_and_prefix_sum_node_weights(p1, p2); }, + [&] { sort_by_gain_and_prefix_sum_node_weights(p2, p1); }); + + HypernodeWeight budget_p1 = + context.partition.max_part_weights[p1] - phg.partWeight(p1), + budget_p2 = + context.partition.max_part_weights[p2] - phg.partWeight(p2); + HypernodeWeight lb_p1 = -(budget_p1 / std::max(size_t(1), involvements[p1])), + ub_p2 = budget_p2 / std::max(size_t(1), involvements[p2]); + + size_t p1_begin = positions[index(p1, p2)], p1_end = positions[index(p1, p2) + 1], + p2_begin = positions[index(p2, p1)], p2_end = positions[index(p2, p1) + 1]; + + auto best_prefix = findBestPrefixesRecursive( + p1_begin, p1_end, p2_begin, p2_end, p1_begin - 1, p2_begin - 1, lb_p1, ub_p2); + + assert(best_prefix == findBestPrefixesSequentially(p1_begin, p1_end, p2_begin, p2_end, + p1_begin - 1, p2_begin - 1, lb_p1, + ub_p2)); + if(best_prefix.first == invalid_pos) + { + // represents no solution found (and recursive version didn't move all the way to + // the start of the range) + // --> replace with starts of ranges (represents no moves applied) + best_prefix = std::make_pair(p1_begin, p2_begin); + } + swap_prefix[index(p1, p2)] = best_prefix.first; + swap_prefix[index(p2, p1)] = best_prefix.second; + }); + + moves.clear(); + Gain actual_gain = applyMovesIf(phg, sorted_moves, num_moves, [&](size_t pos) { + if(pos < swap_prefix[index(sorted_moves[pos].from, sorted_moves[pos].to)]) + { + return true; + } + else + { + // save non-applied moves as backup, to try to apply them in a second step. + moves.push_back_buffered(sorted_moves[pos]); + return false; + } + }); + moves.finalize(); - moves.clear(); - Gain actual_gain = applyMovesIf(phg, sorted_moves, num_moves, [&](size_t pos) { - if (pos < swap_prefix[index(sorted_moves[pos].from, sorted_moves[pos].to)]) { + // revert everything if that decreased solution quality + bool revert_all = actual_gain < 0; + if(revert_all) + { + actual_gain += applyMovesIf(phg, sorted_moves, num_moves, [&](size_t pos) { + if(pos < swap_prefix[index(sorted_moves[pos].from, sorted_moves[pos].to)]) + { + std::swap(sorted_moves[pos].from, sorted_moves[pos].to); return true; - } else { - // save non-applied moves as backup, to try to apply them in a second step. - moves.push_back_buffered(sorted_moves[pos]); + } + else + { return false; } }); - moves.finalize(); - - // revert everything if that decreased solution quality - bool revert_all = actual_gain < 0; - if (revert_all) { - actual_gain += applyMovesIf(phg, sorted_moves, num_moves, [&](size_t pos) { - if (pos < swap_prefix[index(sorted_moves[pos].from, sorted_moves[pos].to)]) { - std::swap(sorted_moves[pos].from, sorted_moves[pos].to); - return true; - } else { - return false; - } - }); - } - - return std::make_pair(actual_gain, revert_all); } - template - std::pair DeterministicLabelPropagationRefiner::findBestPrefixesRecursive( - size_t p1_begin, size_t p1_end, size_t p2_begin, size_t p2_end, - size_t p1_invalid, size_t p2_invalid, - HypernodeWeight lb_p1, HypernodeWeight ub_p2) + return std::make_pair(actual_gain, revert_all); +} + +template +std::pair +DeterministicLabelPropagationRefiner::findBestPrefixesRecursive( + size_t p1_begin, size_t p1_end, size_t p2_begin, size_t p2_end, size_t p1_invalid, + size_t p2_invalid, HypernodeWeight lb_p1, HypernodeWeight ub_p2) +{ + auto balance = [&](size_t p1_ind, size_t p2_ind) { + ASSERT(p1_ind == p1_invalid || p1_ind < p1_end); + ASSERT(p1_ind >= p1_invalid || p1_invalid == (size_t(0) - 1)); + ASSERT(p2_ind == p2_invalid || p2_ind < p2_end); + ASSERT(p2_ind >= p2_invalid || p2_invalid == (size_t(0) - 1)); + ASSERT(p1_ind == p1_invalid || p1_ind < cumulative_node_weights.size()); + ASSERT(p2_ind == p2_invalid || p2_ind < cumulative_node_weights.size()); + const auto a = (p1_ind == p1_invalid) ? 0 : cumulative_node_weights[p1_ind]; + const auto b = (p2_ind == p2_invalid) ? 0 : cumulative_node_weights[p2_ind]; + return a - b; + }; + + auto is_feasible = [&](size_t p1_ind, size_t p2_ind) { + const HypernodeWeight bal = balance(p1_ind, p2_ind); + return lb_p1 <= bal && bal <= ub_p2; + }; + + const size_t n_p1 = p1_end - p1_begin, n_p2 = p2_end - p2_begin; + + static constexpr size_t sequential_cutoff = 2000; + if(n_p1 < sequential_cutoff && n_p2 < sequential_cutoff) { - auto balance = [&](size_t p1_ind, size_t p2_ind) { - ASSERT(p1_ind == p1_invalid || p1_ind < p1_end); - ASSERT(p1_ind >= p1_invalid || p1_invalid == (size_t(0) - 1)); - ASSERT(p2_ind == p2_invalid || p2_ind < p2_end); - ASSERT(p2_ind >= p2_invalid || p2_invalid == (size_t(0) - 1)); - ASSERT(p1_ind == p1_invalid || p1_ind < cumulative_node_weights.size()); - ASSERT(p2_ind == p2_invalid || p2_ind < cumulative_node_weights.size()); - const auto a = (p1_ind == p1_invalid) ? 0 : cumulative_node_weights[p1_ind]; - const auto b = (p2_ind == p2_invalid) ? 0 : cumulative_node_weights[p2_ind]; - return a - b; - }; - - auto is_feasible = [&](size_t p1_ind, size_t p2_ind) { - const HypernodeWeight bal = balance(p1_ind, p2_ind); - return lb_p1 <= bal && bal <= ub_p2; - }; - - const size_t n_p1 = p1_end - p1_begin, n_p2 = p2_end - p2_begin; + return findBestPrefixesSequentially(p1_begin, p1_end, p2_begin, p2_end, p1_invalid, + p2_invalid, lb_p1, ub_p2); + } - static constexpr size_t sequential_cutoff = 2000; - if (n_p1 < sequential_cutoff && n_p2 < sequential_cutoff) { - return findBestPrefixesSequentially(p1_begin, p1_end, p2_begin, p2_end, p1_invalid, p2_invalid, lb_p1, ub_p2); + const auto c = cumulative_node_weights.begin(); + if(n_p1 > n_p2) + { + size_t p1_mid = p1_begin + n_p1 / 2; + auto p2_match_it = + std::lower_bound(c + p2_begin, c + p2_end, cumulative_node_weights[p1_mid]); + size_t p2_match = std::distance(cumulative_node_weights.begin(), p2_match_it); + + if(p2_match != p2_end && p1_mid != p1_end && is_feasible(p1_mid, p2_match)) + { + // no need to search left range + return findBestPrefixesRecursive(p1_mid + 1, p1_end, p2_match + 1, p2_end, + p1_invalid, p2_invalid, lb_p1, ub_p2); } - - const auto c = cumulative_node_weights.begin(); - if (n_p1 > n_p2) { - size_t p1_mid = p1_begin + n_p1 / 2; - auto p2_match_it = std::lower_bound(c + p2_begin, c + p2_end, cumulative_node_weights[p1_mid]); - size_t p2_match = std::distance(cumulative_node_weights.begin(), p2_match_it); - - if (p2_match != p2_end && p1_mid != p1_end && is_feasible(p1_mid, p2_match)) { - // no need to search left range - return findBestPrefixesRecursive(p1_mid + 1, p1_end, p2_match + 1, p2_end, p1_invalid, p2_invalid, lb_p1, ub_p2); - } - if (p2_match == p2_end && balance(p1_mid, p2_end - 1) > ub_p2) { - // p1_mid cannot be compensated --> no need to search right range - return findBestPrefixesRecursive(p1_begin, p1_mid, p2_begin, p2_match, p1_invalid, p2_invalid, lb_p1, ub_p2); - } - - std::pair left, right; - tbb::parallel_invoke([&] { - left = findBestPrefixesRecursive(p1_begin, p1_mid, p2_begin, p2_match, p1_invalid, p2_invalid, lb_p1, ub_p2); - }, [&] { - right = findBestPrefixesRecursive(p1_mid, p1_end, p2_match, p2_end, p1_invalid, p2_invalid, lb_p1, ub_p2); - }); - return right.first != invalid_pos ? right : left; - } else { - size_t p2_mid = p2_begin + n_p2 / 2; - auto p1_match_it = std::lower_bound(c + p1_begin, c + p1_end, cumulative_node_weights[p2_mid]); - size_t p1_match = std::distance(cumulative_node_weights.begin(), p1_match_it); - - if (p1_match != p1_end && p2_mid != p2_end && is_feasible(p1_match, p2_mid)) { - // no need to search left range - return findBestPrefixesRecursive(p1_match + 1, p1_end, p2_mid + 1, p2_end, p1_invalid, p2_invalid, lb_p1, ub_p2); - } - if (p1_match == p1_end && balance(p1_end - 1, p2_mid) < lb_p1) { - // p2_mid cannot be compensated --> no need to search right range - return findBestPrefixesRecursive(p1_begin, p1_match, p2_begin, p2_mid, p1_invalid, p2_invalid, lb_p1, ub_p2); - } - - std::pair left, right; - tbb::parallel_invoke([&] { - left = findBestPrefixesRecursive(p1_begin, p1_match, p2_begin, p2_mid, p1_invalid, p2_invalid, lb_p1, ub_p2); - }, [&] { - right = findBestPrefixesRecursive(p1_match, p1_end, p2_mid, p2_end, p1_invalid, p2_invalid, lb_p1, ub_p2); - }); - return right.first != invalid_pos ? right : left; + if(p2_match == p2_end && balance(p1_mid, p2_end - 1) > ub_p2) + { + // p1_mid cannot be compensated --> no need to search right range + return findBestPrefixesRecursive(p1_begin, p1_mid, p2_begin, p2_match, p1_invalid, + p2_invalid, lb_p1, ub_p2); } - } - template - std::pair DeterministicLabelPropagationRefiner::findBestPrefixesSequentially( - size_t p1_begin, size_t p1_end, size_t p2_begin, size_t p2_end, size_t p1_inv, size_t p2_inv, - HypernodeWeight lb_p1, HypernodeWeight ub_p2) + std::pair left, right; + tbb::parallel_invoke( + [&] { + left = findBestPrefixesRecursive(p1_begin, p1_mid, p2_begin, p2_match, + p1_invalid, p2_invalid, lb_p1, ub_p2); + }, + [&] { + right = findBestPrefixesRecursive(p1_mid, p1_end, p2_match, p2_end, p1_invalid, + p2_invalid, lb_p1, ub_p2); + }); + return right.first != invalid_pos ? right : left; + } + else { - auto balance = [&](size_t p1_ind, size_t p2_ind) { - const auto a = (p1_ind == p1_inv) ? 0 : cumulative_node_weights[p1_ind]; - const auto b = (p2_ind == p2_inv) ? 0 : cumulative_node_weights[p2_ind]; - return a - b; - }; - - auto is_feasible = [&](size_t p1_ind, size_t p2_ind) { - const HypernodeWeight bal = balance(p1_ind, p2_ind); - return lb_p1 <= bal && bal <= ub_p2; - }; + size_t p2_mid = p2_begin + n_p2 / 2; + auto p1_match_it = + std::lower_bound(c + p1_begin, c + p1_end, cumulative_node_weights[p2_mid]); + size_t p1_match = std::distance(cumulative_node_weights.begin(), p1_match_it); + + if(p1_match != p1_end && p2_mid != p2_end && is_feasible(p1_match, p2_mid)) + { + // no need to search left range + return findBestPrefixesRecursive(p1_match + 1, p1_end, p2_mid + 1, p2_end, + p1_invalid, p2_invalid, lb_p1, ub_p2); + } + if(p1_match == p1_end && balance(p1_end - 1, p2_mid) < lb_p1) + { + // p2_mid cannot be compensated --> no need to search right range + return findBestPrefixesRecursive(p1_begin, p1_match, p2_begin, p2_mid, p1_invalid, + p2_invalid, lb_p1, ub_p2); + } - while (true) { - if (is_feasible(p1_end - 1, p2_end - 1)) { return std::make_pair(p1_end, p2_end); } - if (balance(p1_end - 1, p2_end - 1) < 0) { - if (p2_end == p2_begin) { break; } - p2_end--; - } else { - if (p1_end == p1_begin) { break; } - p1_end--; + std::pair left, right; + tbb::parallel_invoke( + [&] { + left = findBestPrefixesRecursive(p1_begin, p1_match, p2_begin, p2_mid, + p1_invalid, p2_invalid, lb_p1, ub_p2); + }, + [&] { + right = findBestPrefixesRecursive(p1_match, p1_end, p2_mid, p2_end, p1_invalid, + p2_invalid, lb_p1, ub_p2); + }); + return right.first != invalid_pos ? right : left; + } +} + +template +std::pair +DeterministicLabelPropagationRefiner::findBestPrefixesSequentially( + size_t p1_begin, size_t p1_end, size_t p2_begin, size_t p2_end, size_t p1_inv, + size_t p2_inv, HypernodeWeight lb_p1, HypernodeWeight ub_p2) +{ + auto balance = [&](size_t p1_ind, size_t p2_ind) { + const auto a = (p1_ind == p1_inv) ? 0 : cumulative_node_weights[p1_ind]; + const auto b = (p2_ind == p2_inv) ? 0 : cumulative_node_weights[p2_ind]; + return a - b; + }; + + auto is_feasible = [&](size_t p1_ind, size_t p2_ind) { + const HypernodeWeight bal = balance(p1_ind, p2_ind); + return lb_p1 <= bal && bal <= ub_p2; + }; + + while(true) + { + if(is_feasible(p1_end - 1, p2_end - 1)) + { + return std::make_pair(p1_end, p2_end); + } + if(balance(p1_end - 1, p2_end - 1) < 0) + { + if(p2_end == p2_begin) + { + break; } + p2_end--; + } + else + { + if(p1_end == p1_begin) + { + break; + } + p1_end--; } - return std::make_pair(invalid_pos, invalid_pos); - } - - namespace { - #define DETERMINISTIC_LABEL_PROPAGATION_REFINER(X) DeterministicLabelPropagationRefiner } + return std::make_pair(invalid_pos, invalid_pos); +} +namespace { +#define DETERMINISTIC_LABEL_PROPAGATION_REFINER(X) DeterministicLabelPropagationRefiner +} - INSTANTIATE_CLASS_WITH_VALID_TRAITS(DETERMINISTIC_LABEL_PROPAGATION_REFINER) +INSTANTIATE_CLASS_WITH_VALID_TRAITS(DETERMINISTIC_LABEL_PROPAGATION_REFINER) } // namespace mt_kahypar diff --git a/mt-kahypar/partition/refinement/deterministic/deterministic_label_propagation.h b/mt-kahypar/partition/refinement/deterministic/deterministic_label_propagation.h index 7884c4a6b..89863f37b 100644 --- a/mt-kahypar/partition/refinement/deterministic/deterministic_label_propagation.h +++ b/mt-kahypar/partition/refinement/deterministic/deterministic_label_propagation.h @@ -24,47 +24,47 @@ * SOFTWARE. ******************************************************************************/ - #pragma once #include "mt-kahypar/datastructures/buffered_vector.h" #include "mt-kahypar/partition/context.h" -#include "mt-kahypar/partition/refinement/i_refiner.h" #include "mt-kahypar/partition/refinement/i_rebalancer.h" +#include "mt-kahypar/partition/refinement/i_refiner.h" -#include "mt-kahypar/partition/refinement/gains/gain_definitions.h" #include "mt-kahypar/partition/refinement/gains/gain_cache_ptr.h" +#include "mt-kahypar/partition/refinement/gains/gain_definitions.h" #include "mt-kahypar/utils/reproducible_random.h" namespace mt_kahypar { -template -class DeterministicLabelPropagationRefiner final : public IRefiner { +template +class DeterministicLabelPropagationRefiner final : public IRefiner +{ using PartitionedHypergraph = typename GraphAndGainTypes::PartitionedHypergraph; using GainComputation = typename GraphAndGainTypes::GainComputation; using AttributedGains = typename GraphAndGainTypes::AttributedGains; public: - explicit DeterministicLabelPropagationRefiner(const HypernodeID num_hypernodes, - const HyperedgeID num_hyperedges, - const Context& context, - gain_cache_t /* only relevant for other refiners */, - IRebalancer& /* only relevant for other refiners */) : - DeterministicLabelPropagationRefiner(num_hypernodes, num_hyperedges, context) { } + explicit DeterministicLabelPropagationRefiner( + const HypernodeID num_hypernodes, const HyperedgeID num_hyperedges, + const Context &context, gain_cache_t /* only relevant for other refiners */, + IRebalancer & /* only relevant for other refiners */) : + DeterministicLabelPropagationRefiner(num_hypernodes, num_hyperedges, context) + { + } explicit DeterministicLabelPropagationRefiner(const HypernodeID num_hypernodes, const HyperedgeID num_hyperedges, - const Context& context) : + const Context &context) : context(context), gain_computation(context, true /* disable_randomization */), - cumulative_node_weights(num_hypernodes), - moves(num_hypernodes), - sorted_moves(num_hypernodes), - current_k(context.partition.k), - prng(context.partition.seed), - active_nodes(0) { - if (context.refinement.deterministic_refinement.use_active_node_set) { + cumulative_node_weights(num_hypernodes), moves(num_hypernodes), + sorted_moves(num_hypernodes), current_k(context.partition.k), + prng(context.partition.seed), active_nodes(0) + { + if(context.refinement.deterministic_refinement.use_active_node_set) + { active_nodes.adapt_capacity(num_hypernodes); last_moved_in_round.resize(num_hypernodes + num_hyperedges, CAtomic(0)); } @@ -74,30 +74,38 @@ class DeterministicLabelPropagationRefiner final : public IRefiner { static constexpr bool debug = false; static constexpr size_t invalid_pos = std::numeric_limits::max() / 2; - bool refineImpl(mt_kahypar_partitioned_hypergraph_t& hypergraph, - const vec& refinement_nodes, - Metrics& best_metrics, double) final ; + bool refineImpl(mt_kahypar_partitioned_hypergraph_t &hypergraph, + const vec &refinement_nodes, Metrics &best_metrics, + double) final; - void initializeImpl(mt_kahypar_partitioned_hypergraph_t&) final { /* nothing to do */ } + void initializeImpl(mt_kahypar_partitioned_hypergraph_t &) final + { /* nothing to do */ + } // functions to apply moves from a sub-round - Gain applyMovesSortedByGainAndRevertUnbalanced(PartitionedHypergraph& phg); - std::pair applyMovesByMaximalPrefixesInBlockPairs(PartitionedHypergraph& phg); - Gain performMoveWithAttributedGain(PartitionedHypergraph& phg, const Move& m, bool activate_neighbors); - template - Gain applyMovesIf(PartitionedHypergraph& phg, const vec& moves, size_t end, Predicate&& predicate); - - - std::pair findBestPrefixesRecursive( - size_t p1_begin, size_t p1_end, size_t p2_begin, size_t p2_end, size_t p1_inv, size_t p2_inv, - HypernodeWeight lb_p1, HypernodeWeight ub_p2); + Gain applyMovesSortedByGainAndRevertUnbalanced(PartitionedHypergraph &phg); + std::pair + applyMovesByMaximalPrefixesInBlockPairs(PartitionedHypergraph &phg); + Gain performMoveWithAttributedGain(PartitionedHypergraph &phg, const Move &m, + bool activate_neighbors); + template + Gain applyMovesIf(PartitionedHypergraph &phg, const vec &moves, size_t end, + Predicate &&predicate); + + std::pair findBestPrefixesRecursive(size_t p1_begin, size_t p1_end, + size_t p2_begin, size_t p2_end, + size_t p1_inv, size_t p2_inv, + HypernodeWeight lb_p1, + HypernodeWeight ub_p2); // used for verification - std::pair findBestPrefixesSequentially( - size_t p1_begin, size_t p1_end, size_t p2_begin, size_t p2_end, size_t p1_inv, size_t p2_inv, - HypernodeWeight lb_p1, HypernodeWeight ub_p2); + std::pair findBestPrefixesSequentially(size_t p1_begin, size_t p1_end, + size_t p2_begin, size_t p2_end, + size_t p1_inv, size_t p2_inv, + HypernodeWeight lb_p1, + HypernodeWeight ub_p2); - const Context& context; + const Context &context; GainComputation gain_computation; vec cumulative_node_weights; ds::BufferedVector moves; @@ -107,7 +115,7 @@ class DeterministicLabelPropagationRefiner final : public IRefiner { std::mt19937 prng; utils::ParallelPermutation permutation; ds::BufferedVector active_nodes; - vec> last_moved_in_round; + vec > last_moved_in_round; uint32_t round = 0; }; diff --git a/mt-kahypar/partition/refinement/do_nothing_refiner.h b/mt-kahypar/partition/refinement/do_nothing_refiner.h index e86a77cf0..3e0192f3f 100644 --- a/mt-kahypar/partition/refinement/do_nothing_refiner.h +++ b/mt-kahypar/partition/refinement/do_nothing_refiner.h @@ -34,39 +34,44 @@ #include "mt-kahypar/partition/refinement/i_refiner.h" namespace mt_kahypar { -class DoNothingRefiner final : public IRebalancer { - public: - template - explicit DoNothingRefiner(Args&& ...) noexcept { } - DoNothingRefiner(const DoNothingRefiner&) = delete; - DoNothingRefiner(DoNothingRefiner&&) = delete; - DoNothingRefiner & operator= (const DoNothingRefiner &) = delete; - DoNothingRefiner & operator= (DoNothingRefiner &&) = delete; +class DoNothingRefiner final : public IRebalancer +{ +public: + template + explicit DoNothingRefiner(Args &&...) noexcept + { + } + DoNothingRefiner(const DoNothingRefiner &) = delete; + DoNothingRefiner(DoNothingRefiner &&) = delete; + DoNothingRefiner &operator=(const DoNothingRefiner &) = delete; + DoNothingRefiner &operator=(DoNothingRefiner &&) = delete; - private: - void initializeImpl(mt_kahypar_partitioned_hypergraph_t&) override final { } +private: + void initializeImpl(mt_kahypar_partitioned_hypergraph_t &) override final {} - bool refineImpl(mt_kahypar_partitioned_hypergraph_t&, - const parallel::scalable_vector&, - Metrics &, - const double) override final { + bool refineImpl(mt_kahypar_partitioned_hypergraph_t &, + const parallel::scalable_vector &, Metrics &, + const double) override final + { return false; } - virtual bool refineAndOutputMovesImpl(mt_kahypar_partitioned_hypergraph_t&, - const parallel::scalable_vector&, - parallel::scalable_vector>&, - Metrics&, - const double) override final { + virtual bool + refineAndOutputMovesImpl(mt_kahypar_partitioned_hypergraph_t &, + const parallel::scalable_vector &, + parallel::scalable_vector > &, + Metrics &, const double) override final + { return false; } - virtual bool refineAndOutputMovesLinearImpl(mt_kahypar_partitioned_hypergraph_t&, - const parallel::scalable_vector&, - parallel::scalable_vector&, - Metrics&, - const double) override final { + virtual bool + refineAndOutputMovesLinearImpl(mt_kahypar_partitioned_hypergraph_t &, + const parallel::scalable_vector &, + parallel::scalable_vector &, Metrics &, + const double) override final + { return false; } }; -} // namespace kahypar +} // namespace kahypar diff --git a/mt-kahypar/partition/refinement/flows/do_nothing_refiner.h b/mt-kahypar/partition/refinement/flows/do_nothing_refiner.h index 6078a860e..ce8b51322 100644 --- a/mt-kahypar/partition/refinement/flows/do_nothing_refiner.h +++ b/mt-kahypar/partition/refinement/flows/do_nothing_refiner.h @@ -34,28 +34,30 @@ #include "mt-kahypar/partition/refinement/flows/i_flow_refiner.h" namespace mt_kahypar { -class DoNothingFlowRefiner final : public IFlowRefiner { - public: - template - explicit DoNothingFlowRefiner(Args&& ...) noexcept { } - DoNothingFlowRefiner(const DoNothingFlowRefiner&) = delete; - DoNothingFlowRefiner(DoNothingFlowRefiner&&) = delete; - DoNothingFlowRefiner & operator= (const DoNothingFlowRefiner &) = delete; - DoNothingFlowRefiner & operator= (DoNothingFlowRefiner &&) = delete; - - private: - void initializeImpl(mt_kahypar_partitioned_hypergraph_const_t&) override final { } - - MoveSequence refineImpl(mt_kahypar_partitioned_hypergraph_const_t&, - const Subhypergraph&, - const HighResClockTimepoint&) override final { - return MoveSequence { {}, 0 }; +class DoNothingFlowRefiner final : public IFlowRefiner +{ +public: + template + explicit DoNothingFlowRefiner(Args &&...) noexcept + { } - - PartitionID maxNumberOfBlocksPerSearchImpl() const override { - return 2; + DoNothingFlowRefiner(const DoNothingFlowRefiner &) = delete; + DoNothingFlowRefiner(DoNothingFlowRefiner &&) = delete; + DoNothingFlowRefiner &operator=(const DoNothingFlowRefiner &) = delete; + DoNothingFlowRefiner &operator=(DoNothingFlowRefiner &&) = delete; + +private: + void initializeImpl(mt_kahypar_partitioned_hypergraph_const_t &) override final {} + + MoveSequence refineImpl(mt_kahypar_partitioned_hypergraph_const_t &, + const Subhypergraph &, + const HighResClockTimepoint &) override final + { + return MoveSequence{ {}, 0 }; } + PartitionID maxNumberOfBlocksPerSearchImpl() const override { return 2; } + void setNumThreadsForSearchImpl(const size_t) override {} }; -} // namespace kahypar +} // namespace kahypar diff --git a/mt-kahypar/partition/refinement/flows/flow_common.h b/mt-kahypar/partition/refinement/flows/flow_common.h index 7acabc927..0c2e94821 100644 --- a/mt-kahypar/partition/refinement/flows/flow_common.h +++ b/mt-kahypar/partition/refinement/flows/flow_common.h @@ -24,13 +24,14 @@ * SOFTWARE. ******************************************************************************/ -#include "datastructure/flow_hypergraph_builder.h" #include "mt-kahypar/datastructures/hypergraph_common.h" #include "mt-kahypar/parallel/stl/scalable_vector.h" +#include "datastructure/flow_hypergraph_builder.h" namespace mt_kahypar { -enum class MoveSequenceState : uint8_t { +enum class MoveSequenceState : uint8_t +{ IN_PROGRESS = 0, SUCCESS = 1, VIOLATES_BALANCE_CONSTRAINT = 2, @@ -42,13 +43,15 @@ enum class MoveSequenceState : uint8_t { // Represents a sequence of vertex moves with an // expected improvement of the solution quality if we // apply the moves -struct MoveSequence { +struct MoveSequence +{ vec moves; Gain expected_improvement; // >= 0 MoveSequenceState state = MoveSequenceState::IN_PROGRESS; }; -struct FlowProblem { +struct FlowProblem +{ whfc::Node source; whfc::Node sink; HyperedgeWeight total_cut; @@ -57,7 +60,8 @@ struct FlowProblem { HypernodeWeight weight_of_block_1; }; -struct Subhypergraph { +struct Subhypergraph +{ PartitionID block_0; PartitionID block_1; vec nodes_of_block_0; @@ -67,17 +71,16 @@ struct Subhypergraph { vec hes; size_t num_pins; - size_t numNodes() const { - return nodes_of_block_0.size() + nodes_of_block_1.size(); - } + size_t numNodes() const { return nodes_of_block_0.size() + nodes_of_block_1.size(); } }; -inline std::ostream& operator<<(std::ostream& out, const Subhypergraph& sub_hg) { - out << "[Nodes=" << sub_hg.numNodes() - << ", Edges=" << sub_hg.hes.size() - << ", Pins=" << sub_hg.num_pins - << ", Blocks=(" << sub_hg.block_0 << "," << sub_hg.block_1 << ")" - << ", Weights=(" << sub_hg.weight_of_block_0 << "," << sub_hg.weight_of_block_1 << ")]"; +inline std::ostream &operator<<(std::ostream &out, const Subhypergraph &sub_hg) +{ + out << "[Nodes=" << sub_hg.numNodes() << ", Edges=" << sub_hg.hes.size() + << ", Pins=" << sub_hg.num_pins << ", Blocks=(" << sub_hg.block_0 << "," + << sub_hg.block_1 << ")" + << ", Weights=(" << sub_hg.weight_of_block_0 << "," << sub_hg.weight_of_block_1 + << ")]"; return out; } diff --git a/mt-kahypar/partition/refinement/flows/flow_hypergraph_builder.cpp b/mt-kahypar/partition/refinement/flows/flow_hypergraph_builder.cpp index 732b4816f..7433de9b3 100644 --- a/mt-kahypar/partition/refinement/flows/flow_hypergraph_builder.cpp +++ b/mt-kahypar/partition/refinement/flows/flow_hypergraph_builder.cpp @@ -28,55 +28,67 @@ #include "mt-kahypar/partition/refinement/flows/flow_hypergraph_builder.h" #include "tbb/blocked_range.h" +#include "tbb/parallel_for.h" #include "tbb/parallel_invoke.h" #include "tbb/parallel_reduce.h" #include "tbb/parallel_scan.h" -#include "tbb/parallel_for.h" namespace mt_kahypar { // ####################### Sequential Construction ####################### -void FlowHypergraphBuilder::finalize() { - if( !finishHyperedge() ) { //finish last open hyperedge +void FlowHypergraphBuilder::finalize() +{ + if(!finishHyperedge()) + { + // finish last open hyperedge // maybe the last started hyperedge has zero/one pins and thus we still use the // previous sentinel. was never a bug, since that capacity is never read hyperedges.back().capacity = 0; } total_node_weight = whfc::NodeWeight(0); - for (whfc::Node u : nodeIDs()) { - nodes[u+1].first_out += nodes[u].first_out; + for(whfc::Node u : nodeIDs()) + { + nodes[u + 1].first_out += nodes[u].first_out; total_node_weight += nodes[u].weight; } incident_hyperedges.resize(numPins()); - for (whfc::Hyperedge e : hyperedgeIDs()) { - for (auto pin_it = beginIndexPins(e); pin_it != endIndexPins(e); pin_it++) { - Pin& p = pins[pin_it]; - //destroy first_out temporarily and reset later + for(whfc::Hyperedge e : hyperedgeIDs()) + { + for(auto pin_it = beginIndexPins(e); pin_it != endIndexPins(e); pin_it++) + { + Pin &p = pins[pin_it]; + // destroy first_out temporarily and reset later whfc::InHeIndex ind_he = nodes[p.pin].first_out++; incident_hyperedges[ind_he] = { e, pin_it }; - //set iterator for incident hyperedge -> its position in incident_hyperedges of the node + // set iterator for incident hyperedge -> its position in incident_hyperedges of the + // node p.he_inc_iter = ind_he; } } - for (whfc::Node u(numNodes()-1); u > 0; u--) { - nodes[u].first_out = nodes[u-1].first_out; //reset temporarily destroyed first_out + for(whfc::Node u(numNodes() - 1); u > 0; u--) + { + nodes[u].first_out = nodes[u - 1].first_out; // reset temporarily destroyed first_out } nodes[0].first_out = whfc::InHeIndex(0); _finalized = true; } -bool FlowHypergraphBuilder::finishHyperedge() { - if (currentHyperedgeSize() == 1) { +bool FlowHypergraphBuilder::finishHyperedge() +{ + if(currentHyperedgeSize() == 1) + { removeLastPin(); } - if (currentHyperedgeSize() > 0) { - hyperedges.push_back({whfc::PinIndex::fromOtherValueType(numPins()), whfc::Flow(0)});//sentinel + if(currentHyperedgeSize() > 0) + { + hyperedges.push_back( + { whfc::PinIndex::fromOtherValueType(numPins()), whfc::Flow(0) }); // sentinel return true; } return false; @@ -85,21 +97,26 @@ bool FlowHypergraphBuilder::finishHyperedge() { // ####################### Parallel Construction ####################### void FlowHypergraphBuilder::allocateHyperedgesAndPins(const size_t num_hyperedges, - const size_t num_pins) { - tbb::parallel_invoke([&] { - hyperedges.assign(num_hyperedges + 1, HyperedgeData { - whfc::PinIndex::Invalid(), whfc::Flow(0) }); - }, [&] { - pins.assign(num_pins, Pin { whfc::Node::Invalid(), whfc::InHeIndex::Invalid() }); - }); + const size_t num_pins) +{ + tbb::parallel_invoke( + [&] { + hyperedges.assign(num_hyperedges + 1, + HyperedgeData{ whfc::PinIndex::Invalid(), whfc::Flow(0) }); + }, + [&] { + pins.assign(num_pins, Pin{ whfc::Node::Invalid(), whfc::InHeIndex::Invalid() }); + }); } -void FlowHypergraphBuilder::finalizeHyperedges() { - for ( size_t i = 1; i < _tmp_csr_buckets.size(); ++i ) { +void FlowHypergraphBuilder::finalizeHyperedges() +{ + for(size_t i = 1; i < _tmp_csr_buckets.size(); ++i) + { _tmp_csr_buckets[i]._global_start_he = - _tmp_csr_buckets[i - 1]._global_start_he + _tmp_csr_buckets[i - 1]._num_hes; + _tmp_csr_buckets[i - 1]._global_start_he + _tmp_csr_buckets[i - 1]._num_hes; _tmp_csr_buckets[i]._global_start_pin_idx = - _tmp_csr_buckets[i - 1]._global_start_pin_idx + _tmp_csr_buckets[i - 1]._num_pins; + _tmp_csr_buckets[i - 1]._global_start_pin_idx + _tmp_csr_buckets[i - 1]._num_pins; } tbb::parallel_for(UL(0), _tmp_csr_buckets.size(), [&](const size_t idx) { @@ -107,118 +124,140 @@ void FlowHypergraphBuilder::finalizeHyperedges() { }); const size_t num_hyperedges = - _tmp_csr_buckets.back()._global_start_he + _tmp_csr_buckets.back()._num_hes; + _tmp_csr_buckets.back()._global_start_he + _tmp_csr_buckets.back()._num_hes; const size_t num_pins = - _tmp_csr_buckets.back()._global_start_pin_idx + _tmp_csr_buckets.back()._num_pins; + _tmp_csr_buckets.back()._global_start_pin_idx + _tmp_csr_buckets.back()._num_pins; resizeHyperedgesAndPins(num_hyperedges, num_pins); - hyperedges.emplace_back( HyperedgeData { whfc::PinIndex(num_pins), whfc::Flow(0) } ); // sentinel + hyperedges.emplace_back( + HyperedgeData{ whfc::PinIndex(num_pins), whfc::Flow(0) }); // sentinel } -void FlowHypergraphBuilder::finalizeParallel() { +void FlowHypergraphBuilder::finalizeParallel() +{ ASSERT(verifyParallelConstructedHypergraph(), "Parallel construction failed!"); - tbb::parallel_invoke([&] { - // Determine maximum edge capacity - maxHyperedgeCapacity = tbb::parallel_reduce( - tbb::blocked_range(UL(0), hyperedges.size()), whfc::Flow(0), - [&](const tbb::blocked_range& range, whfc::Flow init) { - whfc::Flow max_capacity = init; - for (size_t i = range.begin(); i < range.end(); ++i) { - max_capacity = std::max(max_capacity, hyperedges[i].capacity); - } - return max_capacity; - }, [](const whfc::Flow& lhs, const whfc::Flow& rhs) { - return std::max(lhs, rhs); - }); - }, [&] { - // Determine total node weight - total_node_weight = tbb::parallel_reduce( - tbb::blocked_range(UL(0), static_cast(numNodes())), whfc::NodeWeight(0), - [&](const tbb::blocked_range& range, whfc::NodeWeight init) { - whfc::NodeWeight weight = init; - for (size_t i = range.begin(); i < range.end(); ++i) { - weight += nodes[i].weight; - } - return weight; - }, std::plus<>()); - }, [&] { - incident_hyperedges.resize(numPins()); - }, [&]() { - _inc_he_pos.assign(numNodes(), 0); - }); + tbb::parallel_invoke( + [&] { + // Determine maximum edge capacity + maxHyperedgeCapacity = tbb::parallel_reduce( + tbb::blocked_range(UL(0), hyperedges.size()), whfc::Flow(0), + [&](const tbb::blocked_range &range, whfc::Flow init) { + whfc::Flow max_capacity = init; + for(size_t i = range.begin(); i < range.end(); ++i) + { + max_capacity = std::max(max_capacity, hyperedges[i].capacity); + } + return max_capacity; + }, + [](const whfc::Flow &lhs, const whfc::Flow &rhs) { + return std::max(lhs, rhs); + }); + }, + [&] { + // Determine total node weight + total_node_weight = tbb::parallel_reduce( + tbb::blocked_range(UL(0), static_cast(numNodes())), + whfc::NodeWeight(0), + [&](const tbb::blocked_range &range, whfc::NodeWeight init) { + whfc::NodeWeight weight = init; + for(size_t i = range.begin(); i < range.end(); ++i) + { + weight += nodes[i].weight; + } + return weight; + }, + std::plus<>()); + }, + [&] { incident_hyperedges.resize(numPins()); }, + [&]() { _inc_he_pos.assign(numNodes(), 0); }); // Compute node degree prefix sum tbb::parallel_scan( - tbb::blocked_range(UL(0), numNodes() + 1), whfc::InHeIndex(0), - [&](const tbb::blocked_range& r, whfc::InHeIndex sum, bool is_final_scan) -> whfc::InHeIndex { - whfc::InHeIndex tmp = sum; - for ( size_t i = r.begin(); i < r.end(); ++i ) { - tmp += nodes[i].first_out; - if ( is_final_scan ) { - nodes[i].first_out = tmp; + tbb::blocked_range(UL(0), numNodes() + 1), whfc::InHeIndex(0), + [&](const tbb::blocked_range &r, whfc::InHeIndex sum, + bool is_final_scan) -> whfc::InHeIndex { + whfc::InHeIndex tmp = sum; + for(size_t i = r.begin(); i < r.end(); ++i) + { + tmp += nodes[i].first_out; + if(is_final_scan) + { + nodes[i].first_out = tmp; + } } - } - return tmp; - }, [&](const whfc::InHeIndex lhs, const whfc::InHeIndex rhs) { - return lhs + rhs; - } - ); + return tmp; + }, + [&](const whfc::InHeIndex lhs, const whfc::InHeIndex rhs) { return lhs + rhs; }); tbb::parallel_for(UL(0), numHyperedges(), [&](const size_t i) { const whfc::Hyperedge e(i); - for ( auto pin_it = beginIndexPins(e); pin_it != endIndexPins(e); pin_it++ ) { - Pin& p = pins[pin_it]; - const whfc::Node& u = p.pin; - //destroy first_out temporarily and reset later - whfc::InHeIndex::ValueType ind_he = nodes[u].first_out + - __atomic_fetch_add(&_inc_he_pos[u], 1, __ATOMIC_RELAXED); + for(auto pin_it = beginIndexPins(e); pin_it != endIndexPins(e); pin_it++) + { + Pin &p = pins[pin_it]; + const whfc::Node &u = p.pin; + // destroy first_out temporarily and reset later + whfc::InHeIndex::ValueType ind_he = + nodes[u].first_out + __atomic_fetch_add(&_inc_he_pos[u], 1, __ATOMIC_RELAXED); incident_hyperedges[ind_he] = { e, pin_it }; - //set iterator for incident hyperedge -> its position in incident_hyperedges of the node + // set iterator for incident hyperedge -> its position in incident_hyperedges of the + // node p.he_inc_iter = whfc::InHeIndex(ind_he); } }); - ASSERT([&]() { - size_t num_pins = 0; - for ( const whfc::Node& u : nodeIDs() ) { - for ( const InHe& in_e : hyperedgesOf(u) ) { - ++num_pins; - bool found = false; - for ( const Pin& p : pinsOf(in_e.e) ) { - if ( p.pin == u ) { - found = true; - break; + ASSERT( + [&]() { + size_t num_pins = 0; + for(const whfc::Node &u : nodeIDs()) + { + for(const InHe &in_e : hyperedgesOf(u)) + { + ++num_pins; + bool found = false; + for(const Pin &p : pinsOf(in_e.e)) + { + if(p.pin == u) + { + found = true; + break; + } + } + if(!found) + { + LOG << "Node" << u << "is not incident to hyperedge" << in_e.e << "!"; + return false; + } } } - if ( !found ) { - LOG << "Node" << u << "is not incident to hyperedge" << in_e.e << "!"; + if(num_pins != numPins()) + { + LOG << "Some incident hyperedges are missing (" << V(num_pins) << V(numPins()) + << ")"; return false; } - } - } - if ( num_pins != numPins() ) { - LOG << "Some incident hyperedges are missing (" << V(num_pins) << V(numPins()) << ")"; - return false; - } - return true; - }(), "Parallel incidence hyperedge construction failed!"); + return true; + }(), + "Parallel incidence hyperedge construction failed!"); _finalized = true; } - void FlowHypergraphBuilder::resizeHyperedgesAndPins(const size_t num_hyperedges, - const size_t num_pins) { + const size_t num_pins) +{ ASSERT(num_hyperedges <= hyperedges.size()); ASSERT(num_pins <= pins.size()); hyperedges.resize(num_hyperedges); pins.resize(num_pins); } -bool FlowHypergraphBuilder::verifyParallelConstructedHypergraph() { +bool FlowHypergraphBuilder::verifyParallelConstructedHypergraph() +{ size_t num_pins = 0; - for ( size_t i = 0; i < numNodes(); ++i ) { - if ( nodes[i].weight == 0 ) { + for(size_t i = 0; i < numNodes(); ++i) + { + if(nodes[i].weight == 0) + { LOG << "Node" << i << "has zero weight!"; return false; } @@ -226,13 +265,17 @@ bool FlowHypergraphBuilder::verifyParallelConstructedHypergraph() { } num_pins += nodes.back().first_out; // sentinel - if ( num_pins != numPins() ) { - LOG << "[Node Degrees] Expected number of pins =" << numPins() << ", Actual =" << num_pins; + if(num_pins != numPins()) + { + LOG << "[Node Degrees] Expected number of pins =" << numPins() + << ", Actual =" << num_pins; return false; } - for ( size_t i = 0; i < pins.size(); ++i ) { - if ( pins[i].pin == whfc::Node::Invalid() ) { + for(size_t i = 0; i < pins.size(); ++i) + { + if(pins[i].pin == whfc::Node::Invalid()) + { LOG << "Pin at index" << i << "not assigned"; return false; } @@ -240,24 +283,29 @@ bool FlowHypergraphBuilder::verifyParallelConstructedHypergraph() { size_t previous_end = 0; num_pins = 0; - for ( size_t i = 0; i < hyperedges.size() - 1; ++i ) { + for(size_t i = 0; i < hyperedges.size() - 1; ++i) + { size_t current_start = hyperedges[i].first_out; - size_t current_end = hyperedges[i+1].first_out; - if ( current_end - current_start <= 1 ) { + size_t current_end = hyperedges[i + 1].first_out; + if(current_end - current_start <= 1) + { LOG << "Hyperedge of size one contained"; return false; } - if ( current_start != previous_end ) { + if(current_start != previous_end) + { LOG << "Gap or intersection in hyperedge incidence array!"; return false; } - num_pins += ( current_end - current_start ); + num_pins += (current_end - current_start); previous_end = current_end; } - if ( num_pins != numPins() ) { - LOG << "[Edge Sizes] Expected number of pins =" << numPins() << ", Actual =" << num_pins; + if(num_pins != numPins()) + { + LOG << "[Edge Sizes] Expected number of pins =" << numPins() + << ", Actual =" << num_pins; return false; } @@ -266,7 +314,8 @@ bool FlowHypergraphBuilder::verifyParallelConstructedHypergraph() { // ####################### Common Functions ####################### -void FlowHypergraphBuilder::clear() { +void FlowHypergraphBuilder::clear() +{ _finalized = false; _numPinsAtHyperedgeStart = 0; maxHyperedgeCapacity = 0; @@ -277,9 +326,9 @@ void FlowHypergraphBuilder::clear() { incident_hyperedges.clear(); total_node_weight = whfc::NodeWeight(0); - //sentinels - nodes.push_back({whfc::InHeIndex(0), whfc::NodeWeight(0)}); - hyperedges.push_back({whfc::PinIndex(0), whfc::Flow(0)}); + // sentinels + nodes.push_back({ whfc::InHeIndex(0), whfc::NodeWeight(0) }); + hyperedges.push_back({ whfc::PinIndex(0), whfc::Flow(0) }); } } diff --git a/mt-kahypar/partition/refinement/flows/flow_hypergraph_builder.h b/mt-kahypar/partition/refinement/flows/flow_hypergraph_builder.h index 08daf5342..19a698d1f 100644 --- a/mt-kahypar/partition/refinement/flows/flow_hypergraph_builder.h +++ b/mt-kahypar/partition/refinement/flows/flow_hypergraph_builder.h @@ -33,262 +33,292 @@ namespace mt_kahypar { - class FlowHypergraphBuilder : public whfc::FlowHypergraph { - - using TmpPinRange = mutable_range>; - - struct TmpCSRBucket { - TmpCSRBucket() : - _hes(), - _pins(), - _num_hes(0), - _global_start_he(0), - _num_pins(0), - _global_start_pin_idx(0) { } - - void initialize(const size_t num_hes, const size_t num_pins) { - _hes.clear(); - _pins.clear(); - _hes.resize(num_hes + 1); - _pins.resize(num_pins); - _num_hes = whfc::Hyperedge(0); - _global_start_he = whfc::Hyperedge(0); - _num_pins = 0; - _global_start_pin_idx = 0; - } - - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE whfc::PinIndex pinCount(const whfc::Hyperedge e) { - ASSERT(e < _num_hes); - return _hes[e + 1].first_out - _hes[e].first_out; - } - - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE whfc::Flow& capacity(const whfc::Hyperedge e) { - ASSERT(e < _num_hes); - return _hes[e].capacity; - } - - TmpPinRange pinsOf(const whfc::Hyperedge e) { - ASSERT(e < _num_hes); - return TmpPinRange(_pins.begin() + _hes[e].first_out, - _pins.begin() + _hes[e + 1].first_out); - } - - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE whfc::Hyperedge originalHyperedgeID(const whfc::Hyperedge& e) { - ASSERT(e < _num_hes); - return _global_start_he + e; - } - - void addPin(const whfc::Node u, const size_t pin_idx) { - ASSERT(pin_idx < _pins.size()); - ASSERT(pin_idx == _num_pins); - _pins[pin_idx].pin = u; - ++_num_pins; - } - - void finishHyperedge(const whfc::Hyperedge he, const whfc::Flow capacity, - const size_t pin_start_idx, const size_t pin_end_idx) { - ASSERT(he == _num_hes); - ASSERT(static_cast(he + 1) < _hes.size()); - ASSERT(pin_end_idx <= _pins.size()); - ASSERT(pin_end_idx == _num_pins); - _hes[he].capacity = capacity; - _hes[he].first_out = whfc::PinIndex(pin_start_idx); - _hes[he + 1].first_out = whfc::PinIndex(pin_end_idx); - ++_num_hes; - } +class FlowHypergraphBuilder : public whfc::FlowHypergraph +{ - void finalize() { - _hes.resize(_num_hes + 1); - _pins.resize(_num_pins); - } + using TmpPinRange = mutable_range >; - void copyDataToFlowHypergraph(std::vector& hyperedges, - std::vector& pins) { - if ( _num_hes > 0 ) { - const size_t num_hes = static_cast(_num_hes); - for ( size_t i = 0; i < num_hes; ++i ) { - _hes[i].first_out += _global_start_pin_idx; - } - const size_t he_start = static_cast(_global_start_he); - std::memcpy(hyperedges.data() + he_start, - _hes.data(), sizeof(FlowHypergraph::HyperedgeData) * num_hes); - } - if ( _num_pins > 0 ) { - std::memcpy(pins.data() + _global_start_pin_idx, - _pins.data(), sizeof(FlowHypergraph::Pin) * _num_pins); - } - } - - vec _hes; - vec _pins; - whfc::Hyperedge _num_hes; - whfc::Hyperedge _global_start_he; - size_t _num_pins; - size_t _global_start_pin_idx; - }; - - public: - using Base = whfc::FlowHypergraph; - - FlowHypergraphBuilder() : - Base(), - _finalized(false), - _numPinsAtHyperedgeStart(0), - _tmp_csr_buckets(), - _inc_he_pos() { - clear(); - } - - explicit FlowHypergraphBuilder(size_t num_nodes) : - Base(), - _finalized(false), - _numPinsAtHyperedgeStart(0), - _tmp_csr_buckets(), - _inc_he_pos() { - reinitialize(num_nodes); + struct TmpCSRBucket + { + TmpCSRBucket() : + _hes(), _pins(), _num_hes(0), _global_start_he(0), _num_pins(0), + _global_start_pin_idx(0) + { } - // ####################### Sequential Construction ####################### - - void addNode(const whfc::NodeWeight w) { - nodes.back().weight = w; - nodes.push_back({whfc::InHeIndex(0), whfc::NodeWeight(0)}); - } - - void startHyperedge(const whfc::Flow capacity) { - finishHyperedge(); //finish last hyperedge - hyperedges.back().capacity = capacity; //exploit sentinel - _numPinsAtHyperedgeStart = numPins(); - maxHyperedgeCapacity = std::max(maxHyperedgeCapacity, capacity); + void initialize(const size_t num_hes, const size_t num_pins) + { + _hes.clear(); + _pins.clear(); + _hes.resize(num_hes + 1); + _pins.resize(num_pins); + _num_hes = whfc::Hyperedge(0); + _global_start_he = whfc::Hyperedge(0); + _num_pins = 0; + _global_start_pin_idx = 0; } - void addPin(const whfc::Node u) { - assert(u < numNodes()); - pins.push_back({u, whfc::InHeIndex::Invalid()}); - nodes[u+1].first_out++; + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE whfc::PinIndex pinCount(const whfc::Hyperedge e) + { + ASSERT(e < _num_hes); + return _hes[e + 1].first_out - _hes[e].first_out; } - size_t currentHyperedgeSize() const { - return numPins() - _numPinsAtHyperedgeStart; + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE whfc::Flow &capacity(const whfc::Hyperedge e) + { + ASSERT(e < _num_hes); + return _hes[e].capacity; } - void removeCurrentHyperedge() { - while (numPins() > _numPinsAtHyperedgeStart) { - removeLastPin(); - } + TmpPinRange pinsOf(const whfc::Hyperedge e) + { + ASSERT(e < _num_hes); + return TmpPinRange(_pins.begin() + _hes[e].first_out, + _pins.begin() + _hes[e + 1].first_out); } - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE whfc::Flow& capacity(const whfc::Hyperedge e) { - ASSERT(e < hyperedges.size()); - return hyperedges[e].capacity; + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE whfc::Hyperedge + originalHyperedgeID(const whfc::Hyperedge &e) + { + ASSERT(e < _num_hes); + return _global_start_he + e; } - void finalize(); - - // ####################### Parallel Construction ####################### - - void allocateNodes(const size_t num_nodes) { - nodes.assign(num_nodes + 1, NodeData { whfc::InHeIndex(0), whfc::NodeWeight(0) }); - } - - void resizeNodes(const size_t num_nodes) { - ASSERT(num_nodes <= numNodes()); - nodes.resize(num_nodes + 1); - } - - void allocateHyperedgesAndPins(const size_t num_hyperedges, - const size_t num_pins); - - void setNumCSRBuckets(const size_t num_buckets) { - _tmp_csr_buckets.resize(num_buckets); - } - - void initializeCSRBucket(const size_t bucket, const size_t num_hes, const size_t num_pins) { - ASSERT(bucket < _tmp_csr_buckets.size()); - _tmp_csr_buckets[bucket].initialize(num_hes, num_pins); - } - - void finalizeCSRBucket(const size_t bucket) { - ASSERT(bucket < _tmp_csr_buckets.size()); - _tmp_csr_buckets[bucket].finalize(); - } - - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE whfc::PinIndex tmpPinCount(const size_t bucket, const whfc::Hyperedge e) { - ASSERT(bucket < _tmp_csr_buckets.size()); - return _tmp_csr_buckets[bucket].pinCount(e); - } - - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE whfc::Flow& capacity(const size_t bucket, const whfc::Hyperedge e) { - ASSERT(bucket < _tmp_csr_buckets.size()); - return _tmp_csr_buckets[bucket].capacity(e); - } - - TmpPinRange tmpPinsOf(const size_t bucket, const whfc::Hyperedge e) { - ASSERT(bucket < _tmp_csr_buckets.size()); - return _tmp_csr_buckets[bucket].pinsOf(e); - } - - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE whfc::Hyperedge originalHyperedgeID(const size_t bucket, const whfc::Hyperedge& e) { - ASSERT(bucket < _tmp_csr_buckets.size()); - return _tmp_csr_buckets[bucket].originalHyperedgeID(e); - } - - void addPin(const whfc::Node u, const size_t bucket, const size_t pin_idx) { - ASSERT(bucket < _tmp_csr_buckets.size()); - ASSERT(static_cast(u) < numNodes()); - _tmp_csr_buckets[bucket].addPin(u, pin_idx); - __atomic_fetch_add(reinterpret_cast( - &nodes[u + 1].first_out), 1, __ATOMIC_RELAXED); + void addPin(const whfc::Node u, const size_t pin_idx) + { + ASSERT(pin_idx < _pins.size()); + ASSERT(pin_idx == _num_pins); + _pins[pin_idx].pin = u; + ++_num_pins; } void finishHyperedge(const whfc::Hyperedge he, const whfc::Flow capacity, - const size_t bucket, const size_t pin_start_idx, const size_t pin_end_idx) { - ASSERT(bucket < _tmp_csr_buckets.size()); - _tmp_csr_buckets[bucket].finishHyperedge(he, capacity, pin_start_idx, pin_end_idx); + const size_t pin_start_idx, const size_t pin_end_idx) + { + ASSERT(he == _num_hes); + ASSERT(static_cast(he + 1) < _hes.size()); + ASSERT(pin_end_idx <= _pins.size()); + ASSERT(pin_end_idx == _num_pins); + _hes[he].capacity = capacity; + _hes[he].first_out = whfc::PinIndex(pin_start_idx); + _hes[he + 1].first_out = whfc::PinIndex(pin_end_idx); + ++_num_hes; } - void finalizeHyperedges(); - - void finalizeParallel(); - - // ####################### Common Functions ####################### - - void clear(); - - void reinitialize(size_t num_nodes) { - clear(); - nodes.resize(num_nodes + 1); + void finalize() + { + _hes.resize(_num_hes + 1); + _pins.resize(_num_pins); } - void shrink_to_fit() { - nodes.shrink_to_fit(); - hyperedges.shrink_to_fit(); - pins.shrink_to_fit(); - incident_hyperedges.shrink_to_fit(); + void copyDataToFlowHypergraph(std::vector &hyperedges, + std::vector &pins) + { + if(_num_hes > 0) + { + const size_t num_hes = static_cast(_num_hes); + for(size_t i = 0; i < num_hes; ++i) + { + _hes[i].first_out += _global_start_pin_idx; + } + const size_t he_start = static_cast(_global_start_he); + std::memcpy(hyperedges.data() + he_start, _hes.data(), + sizeof(FlowHypergraph::HyperedgeData) * num_hes); + } + if(_num_pins > 0) + { + std::memcpy(pins.data() + _global_start_pin_idx, _pins.data(), + sizeof(FlowHypergraph::Pin) * _num_pins); + } } - private: - - // ####################### Sequential Construction ####################### + vec _hes; + vec _pins; + whfc::Hyperedge _num_hes; + whfc::Hyperedge _global_start_he; + size_t _num_pins; + size_t _global_start_pin_idx; + }; - void removeLastPin() { - nodes[ pins.back().pin + 1 ].first_out--; - pins.pop_back(); +public: + using Base = whfc::FlowHypergraph; + + FlowHypergraphBuilder() : + Base(), _finalized(false), _numPinsAtHyperedgeStart(0), _tmp_csr_buckets(), + _inc_he_pos() + { + clear(); + } + + explicit FlowHypergraphBuilder(size_t num_nodes) : + Base(), _finalized(false), _numPinsAtHyperedgeStart(0), _tmp_csr_buckets(), + _inc_he_pos() + { + reinitialize(num_nodes); + } + + // ####################### Sequential Construction ####################### + + void addNode(const whfc::NodeWeight w) + { + nodes.back().weight = w; + nodes.push_back({ whfc::InHeIndex(0), whfc::NodeWeight(0) }); + } + + void startHyperedge(const whfc::Flow capacity) + { + finishHyperedge(); // finish last hyperedge + hyperedges.back().capacity = capacity; // exploit sentinel + _numPinsAtHyperedgeStart = numPins(); + maxHyperedgeCapacity = std::max(maxHyperedgeCapacity, capacity); + } + + void addPin(const whfc::Node u) + { + assert(u < numNodes()); + pins.push_back({ u, whfc::InHeIndex::Invalid() }); + nodes[u + 1].first_out++; + } + + size_t currentHyperedgeSize() const { return numPins() - _numPinsAtHyperedgeStart; } + + void removeCurrentHyperedge() + { + while(numPins() > _numPinsAtHyperedgeStart) + { + removeLastPin(); } - - bool finishHyperedge(); - - // ####################### Parallel Construction ####################### - - void resizeHyperedgesAndPins(const size_t num_hyperedges, - const size_t num_pins); - - bool verifyParallelConstructedHypergraph(); - - bool _finalized; - size_t _numPinsAtHyperedgeStart; - - vec _tmp_csr_buckets; - vec _inc_he_pos; - }; + } + + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE whfc::Flow &capacity(const whfc::Hyperedge e) + { + ASSERT(e < hyperedges.size()); + return hyperedges[e].capacity; + } + + void finalize(); + + // ####################### Parallel Construction ####################### + + void allocateNodes(const size_t num_nodes) + { + nodes.assign(num_nodes + 1, NodeData{ whfc::InHeIndex(0), whfc::NodeWeight(0) }); + } + + void resizeNodes(const size_t num_nodes) + { + ASSERT(num_nodes <= numNodes()); + nodes.resize(num_nodes + 1); + } + + void allocateHyperedgesAndPins(const size_t num_hyperedges, const size_t num_pins); + + void setNumCSRBuckets(const size_t num_buckets) + { + _tmp_csr_buckets.resize(num_buckets); + } + + void initializeCSRBucket(const size_t bucket, const size_t num_hes, + const size_t num_pins) + { + ASSERT(bucket < _tmp_csr_buckets.size()); + _tmp_csr_buckets[bucket].initialize(num_hes, num_pins); + } + + void finalizeCSRBucket(const size_t bucket) + { + ASSERT(bucket < _tmp_csr_buckets.size()); + _tmp_csr_buckets[bucket].finalize(); + } + + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE whfc::PinIndex tmpPinCount(const size_t bucket, + const whfc::Hyperedge e) + { + ASSERT(bucket < _tmp_csr_buckets.size()); + return _tmp_csr_buckets[bucket].pinCount(e); + } + + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE whfc::Flow &capacity(const size_t bucket, + const whfc::Hyperedge e) + { + ASSERT(bucket < _tmp_csr_buckets.size()); + return _tmp_csr_buckets[bucket].capacity(e); + } + + TmpPinRange tmpPinsOf(const size_t bucket, const whfc::Hyperedge e) + { + ASSERT(bucket < _tmp_csr_buckets.size()); + return _tmp_csr_buckets[bucket].pinsOf(e); + } + + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE whfc::Hyperedge + originalHyperedgeID(const size_t bucket, const whfc::Hyperedge &e) + { + ASSERT(bucket < _tmp_csr_buckets.size()); + return _tmp_csr_buckets[bucket].originalHyperedgeID(e); + } + + void addPin(const whfc::Node u, const size_t bucket, const size_t pin_idx) + { + ASSERT(bucket < _tmp_csr_buckets.size()); + ASSERT(static_cast(u) < numNodes()); + _tmp_csr_buckets[bucket].addPin(u, pin_idx); + __atomic_fetch_add( + reinterpret_cast(&nodes[u + 1].first_out), 1, + __ATOMIC_RELAXED); + } + + void finishHyperedge(const whfc::Hyperedge he, const whfc::Flow capacity, + const size_t bucket, const size_t pin_start_idx, + const size_t pin_end_idx) + { + ASSERT(bucket < _tmp_csr_buckets.size()); + _tmp_csr_buckets[bucket].finishHyperedge(he, capacity, pin_start_idx, pin_end_idx); + } + + void finalizeHyperedges(); + + void finalizeParallel(); + + // ####################### Common Functions ####################### + + void clear(); + + void reinitialize(size_t num_nodes) + { + clear(); + nodes.resize(num_nodes + 1); + } + + void shrink_to_fit() + { + nodes.shrink_to_fit(); + hyperedges.shrink_to_fit(); + pins.shrink_to_fit(); + incident_hyperedges.shrink_to_fit(); + } + +private: + // ####################### Sequential Construction ####################### + + void removeLastPin() + { + nodes[pins.back().pin + 1].first_out--; + pins.pop_back(); + } + + bool finishHyperedge(); + + // ####################### Parallel Construction ####################### + + void resizeHyperedgesAndPins(const size_t num_hyperedges, const size_t num_pins); + + bool verifyParallelConstructedHypergraph(); + + bool _finalized; + size_t _numPinsAtHyperedgeStart; + + vec _tmp_csr_buckets; + vec _inc_he_pos; +}; } diff --git a/mt-kahypar/partition/refinement/flows/flow_refiner.cpp b/mt-kahypar/partition/refinement/flows/flow_refiner.cpp index 35b5b475d..bea816ff6 100644 --- a/mt-kahypar/partition/refinement/flows/flow_refiner.cpp +++ b/mt-kahypar/partition/refinement/flows/flow_refiner.cpp @@ -35,63 +35,84 @@ namespace mt_kahypar { -template -MoveSequence FlowRefiner::refineImpl(mt_kahypar_partitioned_hypergraph_const_t& hypergraph, - const Subhypergraph& sub_hg, - const HighResClockTimepoint& start) { - const PartitionedHypergraph& phg = utils::cast_const(hypergraph); - MoveSequence sequence { { }, 0 }; - utils::Timer& timer = utils::Utilities::instance().getTimer(_context.utility_id); +template +MoveSequence FlowRefiner::refineImpl( + mt_kahypar_partitioned_hypergraph_const_t &hypergraph, const Subhypergraph &sub_hg, + const HighResClockTimepoint &start) +{ + const PartitionedHypergraph &phg = utils::cast_const(hypergraph); + MoveSequence sequence{ {}, 0 }; + utils::Timer &timer = utils::Utilities::instance().getTimer(_context.utility_id); // Construct flow network that contains all vertices given in refinement nodes timer.start_timer("construct_flow_network", "Construct Flow Network", true); FlowProblem flow_problem = constructFlowHypergraph(phg, sub_hg); timer.stop_timer("construct_flow_network"); - if ( flow_problem.total_cut - flow_problem.non_removable_cut > 0 ) { + if(flow_problem.total_cut - flow_problem.non_removable_cut > 0) + { // Solve max-flow min-cut problem bool time_limit_reached = false; timer.start_timer("hyper_flow_cutter", "HyperFlowCutter", true); bool flowcutter_succeeded = runFlowCutter(flow_problem, start, time_limit_reached); timer.stop_timer("hyper_flow_cutter"); - if ( flowcutter_succeeded ) { + if(flowcutter_succeeded) + { // We apply the solution if it either improves the cut or the balance of // the bipartition induced by the two blocks HyperedgeWeight new_cut = flow_problem.non_removable_cut; HypernodeWeight max_part_weight; - const bool sequential = _context.shared_memory.num_threads == _context.refinement.flows.num_parallel_searches; - if (sequential) { + const bool sequential = _context.shared_memory.num_threads == + _context.refinement.flows.num_parallel_searches; + if(sequential) + { new_cut += _sequential_hfc.cs.flow_algo.flow_value; - max_part_weight = std::max(_sequential_hfc.cs.source_weight, _sequential_hfc.cs.target_weight); - } else { + max_part_weight = + std::max(_sequential_hfc.cs.source_weight, _sequential_hfc.cs.target_weight); + } + else + { new_cut += _parallel_hfc.cs.flow_algo.flow_value; - max_part_weight = std::max(_parallel_hfc.cs.source_weight, _parallel_hfc.cs.target_weight); + max_part_weight = + std::max(_parallel_hfc.cs.source_weight, _parallel_hfc.cs.target_weight); } - const bool improved_solution = new_cut < flow_problem.total_cut || - (new_cut == flow_problem.total_cut && max_part_weight < std::max(flow_problem.weight_of_block_0, flow_problem.weight_of_block_1)); + const bool improved_solution = + new_cut < flow_problem.total_cut || + (new_cut == flow_problem.total_cut && + max_part_weight < + std::max(flow_problem.weight_of_block_0, flow_problem.weight_of_block_1)); // Extract move sequence - if ( improved_solution ) { + if(improved_solution) + { sequence.expected_improvement = flow_problem.total_cut - new_cut; - for ( const whfc::Node& u : _flow_hg.nodeIDs() ) { + for(const whfc::Node &u : _flow_hg.nodeIDs()) + { const HypernodeID hn = _whfc_to_node[u]; - if ( hn != kInvalidHypernode ) { + if(hn != kInvalidHypernode) + { const PartitionID from = phg.partID(hn); PartitionID to; - if (sequential) { + if(sequential) + { to = _sequential_hfc.cs.flow_algo.isSource(u) ? _block_0 : _block_1; - } else { + } + else + { to = _parallel_hfc.cs.flow_algo.isSource(u) ? _block_0 : _block_1; } - if ( from != to ) { - sequence.moves.push_back(Move { from, to, hn, kInvalidGain }); + if(from != to) + { + sequence.moves.push_back(Move{ from, to, hn, kInvalidGain }); } } } } - } else if ( time_limit_reached ) { + } + else if(time_limit_reached) + { sequence.state = MoveSequenceState::TIME_LIMIT; } } @@ -101,20 +122,23 @@ MoveSequence FlowRefiner::refineImpl(mt_kahypar_partitioned_h #define NOW std::chrono::high_resolution_clock::now() #define RUNNING_TIME(X) std::chrono::duration(NOW - X).count(); -template -bool FlowRefiner::runFlowCutter(const FlowProblem& flow_problem, - const HighResClockTimepoint& start, - bool& time_limit_reached) { +template +bool FlowRefiner::runFlowCutter(const FlowProblem &flow_problem, + const HighResClockTimepoint &start, + bool &time_limit_reached) +{ whfc::Node s = flow_problem.source; whfc::Node t = flow_problem.sink; bool result = false; size_t iteration = 0; auto on_cut = [&] { - if (++iteration == 25) { + if(++iteration == 25) + { iteration = 0; double elapsed = RUNNING_TIME(start); - if (elapsed > _time_limit) { + if(elapsed > _time_limit) + { time_limit_reached = true; return false; } @@ -122,22 +146,29 @@ bool FlowRefiner::runFlowCutter(const FlowProblem& flow_probl return true; }; - - const bool sequential = _context.shared_memory.num_threads == _context.refinement.flows.num_parallel_searches; - if (sequential) { - _sequential_hfc.cs.setMaxBlockWeight(0, std::max( - flow_problem.weight_of_block_0, _context.partition.max_part_weights[_block_0])); - _sequential_hfc.cs.setMaxBlockWeight(1, std::max( - flow_problem.weight_of_block_1, _context.partition.max_part_weights[_block_1])); + const bool sequential = _context.shared_memory.num_threads == + _context.refinement.flows.num_parallel_searches; + if(sequential) + { + _sequential_hfc.cs.setMaxBlockWeight( + 0, std::max(flow_problem.weight_of_block_0, + _context.partition.max_part_weights[_block_0])); + _sequential_hfc.cs.setMaxBlockWeight( + 1, std::max(flow_problem.weight_of_block_1, + _context.partition.max_part_weights[_block_1])); _sequential_hfc.reset(); _sequential_hfc.setFlowBound(flow_problem.total_cut - flow_problem.non_removable_cut); result = _sequential_hfc.enumerateCutsUntilBalancedOrFlowBoundExceeded(s, t, on_cut); - } else { - _parallel_hfc.cs.setMaxBlockWeight(0, std::max( - flow_problem.weight_of_block_0, _context.partition.max_part_weights[_block_0])); - _parallel_hfc.cs.setMaxBlockWeight(1, std::max( - flow_problem.weight_of_block_1, _context.partition.max_part_weights[_block_1])); + } + else + { + _parallel_hfc.cs.setMaxBlockWeight( + 0, std::max(flow_problem.weight_of_block_0, + _context.partition.max_part_weights[_block_0])); + _parallel_hfc.cs.setMaxBlockWeight( + 1, std::max(flow_problem.weight_of_block_1, + _context.partition.max_part_weights[_block_1])); _parallel_hfc.reset(); _parallel_hfc.setFlowBound(flow_problem.total_cut - flow_problem.non_removable_cut); @@ -146,27 +177,31 @@ bool FlowRefiner::runFlowCutter(const FlowProblem& flow_probl return result; } -template -FlowProblem FlowRefiner::constructFlowHypergraph(const PartitionedHypergraph& phg, - const Subhypergraph& sub_hg) { +template +FlowProblem +FlowRefiner::constructFlowHypergraph(const PartitionedHypergraph &phg, + const Subhypergraph &sub_hg) +{ _block_0 = sub_hg.block_0; _block_1 = sub_hg.block_1; ASSERT(_block_0 != kInvalidPartition && _block_1 != kInvalidPartition); FlowProblem flow_problem; - - const bool sequential = _context.shared_memory.num_threads == _context.refinement.flows.num_parallel_searches; - if ( sequential ) { + const bool sequential = _context.shared_memory.num_threads == + _context.refinement.flows.num_parallel_searches; + if(sequential) + { flow_problem = _sequential_construction.constructFlowHypergraph( - phg, sub_hg, _block_0, _block_1, _whfc_to_node); - } else { + phg, sub_hg, _block_0, _block_1, _whfc_to_node); + } + else + { flow_problem = _parallel_construction.constructFlowHypergraph( - phg, sub_hg, _block_0, _block_1, _whfc_to_node); + phg, sub_hg, _block_0, _block_1, _whfc_to_node); } DBG << "Flow Hypergraph [ Nodes =" << _flow_hg.numNodes() - << ", Edges =" << _flow_hg.numHyperedges() - << ", Pins =" << _flow_hg.numPins() + << ", Edges =" << _flow_hg.numHyperedges() << ", Pins =" << _flow_hg.numPins() << ", Blocks = (" << _block_0 << "," << _block_1 << ") ]"; return flow_problem; diff --git a/mt-kahypar/partition/refinement/flows/flow_refiner.h b/mt-kahypar/partition/refinement/flows/flow_refiner.h index a32a5dd14..bc4cd1a6a 100644 --- a/mt-kahypar/partition/refinement/flows/flow_refiner.h +++ b/mt-kahypar/partition/refinement/flows/flow_refiner.h @@ -30,65 +30,62 @@ #include #include "algorithm/hyperflowcutter.h" -#include "algorithm/sequential_push_relabel.h" #include "algorithm/parallel_push_relabel.h" +#include "algorithm/sequential_push_relabel.h" -#include "mt-kahypar/partition/context.h" -#include "mt-kahypar/partition/refinement/flows/i_flow_refiner.h" #include "mt-kahypar/datastructures/sparse_map.h" #include "mt-kahypar/datastructures/thread_safe_fast_reset_flag_array.h" #include "mt-kahypar/parallel/stl/scalable_queue.h" -#include "mt-kahypar/partition/refinement/flows/sequential_construction.h" -#include "mt-kahypar/partition/refinement/flows/parallel_construction.h" +#include "mt-kahypar/partition/context.h" #include "mt-kahypar/partition/refinement/flows/flow_hypergraph_builder.h" +#include "mt-kahypar/partition/refinement/flows/i_flow_refiner.h" +#include "mt-kahypar/partition/refinement/flows/parallel_construction.h" +#include "mt-kahypar/partition/refinement/flows/sequential_construction.h" #include "mt-kahypar/utils/cast.h" namespace mt_kahypar { -template -class FlowRefiner final : public IFlowRefiner { +template +class FlowRefiner final : public IFlowRefiner +{ static constexpr bool debug = false; using PartitionedHypergraph = typename GraphAndGainTypes::PartitionedHypergraph; - public: - explicit FlowRefiner(const HyperedgeID num_hyperedges, - const Context& context) : - _phg(nullptr), - _context(context), - _num_available_threads(0), - _block_0(kInvalidPartition), - _block_1(kInvalidPartition), - _flow_hg(), - _sequential_hfc(_flow_hg, context.partition.seed), - _parallel_hfc(_flow_hg, context.partition.seed), - _whfc_to_node(), - _sequential_construction(num_hyperedges, _flow_hg, _sequential_hfc, context), - _parallel_construction(num_hyperedges, _flow_hg, _parallel_hfc, context) { - _sequential_hfc.find_most_balanced = _context.refinement.flows.find_most_balanced_cut; - _sequential_hfc.timer.active = false; - _sequential_hfc.forceSequential(true); - _sequential_hfc.setBulkPiercing(context.refinement.flows.pierce_in_bulk); - - _parallel_hfc.find_most_balanced = _context.refinement.flows.find_most_balanced_cut; - _parallel_hfc.timer.active = false; - _parallel_hfc.forceSequential(false); - _sequential_hfc.setBulkPiercing(context.refinement.flows.pierce_in_bulk); +public: + explicit FlowRefiner(const HyperedgeID num_hyperedges, const Context &context) : + _phg(nullptr), _context(context), _num_available_threads(0), + _block_0(kInvalidPartition), _block_1(kInvalidPartition), _flow_hg(), + _sequential_hfc(_flow_hg, context.partition.seed), + _parallel_hfc(_flow_hg, context.partition.seed), _whfc_to_node(), + _sequential_construction(num_hyperedges, _flow_hg, _sequential_hfc, context), + _parallel_construction(num_hyperedges, _flow_hg, _parallel_hfc, context) + { + _sequential_hfc.find_most_balanced = _context.refinement.flows.find_most_balanced_cut; + _sequential_hfc.timer.active = false; + _sequential_hfc.forceSequential(true); + _sequential_hfc.setBulkPiercing(context.refinement.flows.pierce_in_bulk); + + _parallel_hfc.find_most_balanced = _context.refinement.flows.find_most_balanced_cut; + _parallel_hfc.timer.active = false; + _parallel_hfc.forceSequential(false); + _sequential_hfc.setBulkPiercing(context.refinement.flows.pierce_in_bulk); } - FlowRefiner(const FlowRefiner&) = delete; - FlowRefiner(FlowRefiner&&) = delete; - FlowRefiner & operator= (const FlowRefiner &) = delete; - FlowRefiner & operator= (FlowRefiner &&) = delete; + FlowRefiner(const FlowRefiner &) = delete; + FlowRefiner(FlowRefiner &&) = delete; + FlowRefiner &operator=(const FlowRefiner &) = delete; + FlowRefiner &operator=(FlowRefiner &&) = delete; virtual ~FlowRefiner() = default; - protected: - - private: - void initializeImpl(mt_kahypar_partitioned_hypergraph_const_t& hypergraph) override { - const PartitionedHypergraph& phg = utils::cast_const(hypergraph); +protected: +private: + void initializeImpl(mt_kahypar_partitioned_hypergraph_const_t &hypergraph) override + { + const PartitionedHypergraph &phg = + utils::cast_const(hypergraph); _phg = &phg; _time_limit = std::numeric_limits::max(); _block_0 = kInvalidPartition; @@ -97,27 +94,25 @@ class FlowRefiner final : public IFlowRefiner { _whfc_to_node.clear(); } - MoveSequence refineImpl(mt_kahypar_partitioned_hypergraph_const_t& hypergraph, - const Subhypergraph& sub_hg, - const HighResClockTimepoint& start) override; + MoveSequence refineImpl(mt_kahypar_partitioned_hypergraph_const_t &hypergraph, + const Subhypergraph &sub_hg, + const HighResClockTimepoint &start) override; - bool runFlowCutter(const FlowProblem& flow_problem, - const HighResClockTimepoint& start, - bool& time_limit_reached); + bool runFlowCutter(const FlowProblem &flow_problem, const HighResClockTimepoint &start, + bool &time_limit_reached); - FlowProblem constructFlowHypergraph(const PartitionedHypergraph& phg, - const Subhypergraph& sub_hg); + FlowProblem constructFlowHypergraph(const PartitionedHypergraph &phg, + const Subhypergraph &sub_hg); - PartitionID maxNumberOfBlocksPerSearchImpl() const override { - return 2; - } + PartitionID maxNumberOfBlocksPerSearchImpl() const override { return 2; } - void setNumThreadsForSearchImpl(const size_t num_threads) override { + void setNumThreadsForSearchImpl(const size_t num_threads) override + { _num_available_threads = num_threads; } - const PartitionedHypergraph* _phg; - const Context& _context; + const PartitionedHypergraph *_phg; + const Context &_context; using IFlowRefiner::_time_limit; size_t _num_available_threads; @@ -131,4 +126,4 @@ class FlowRefiner final : public IFlowRefiner { SequentialConstruction _sequential_construction; ParallelConstruction _parallel_construction; }; -} // namespace mt_kahypar +} // namespace mt_kahypar diff --git a/mt-kahypar/partition/refinement/flows/i_flow_refiner.h b/mt-kahypar/partition/refinement/flows/i_flow_refiner.h index a38b59c41..68bd5e4c1 100644 --- a/mt-kahypar/partition/refinement/flows/i_flow_refiner.h +++ b/mt-kahypar/partition/refinement/flows/i_flow_refiner.h @@ -26,69 +26,66 @@ #pragma once - #include "include/libmtkahypartypes.h" #include "mt-kahypar/definitions.h" #include "mt-kahypar/macros.h" -#include "mt-kahypar/partition/metrics.h" #include "mt-kahypar/parallel/stl/scalable_vector.h" +#include "mt-kahypar/partition/metrics.h" #include "mt-kahypar/partition/refinement/flows/flow_common.h" namespace mt_kahypar { -class IFlowRefiner { +class IFlowRefiner +{ - public: - IFlowRefiner(const IFlowRefiner&) = delete; - IFlowRefiner(IFlowRefiner&&) = delete; - IFlowRefiner & operator= (const IFlowRefiner &) = delete; - IFlowRefiner & operator= (IFlowRefiner &&) = delete; +public: + IFlowRefiner(const IFlowRefiner &) = delete; + IFlowRefiner(IFlowRefiner &&) = delete; + IFlowRefiner &operator=(const IFlowRefiner &) = delete; + IFlowRefiner &operator=(IFlowRefiner &&) = delete; virtual ~IFlowRefiner() = default; - void initialize(mt_kahypar_partitioned_hypergraph_const_t& phg) { - initializeImpl(phg); - } + void initialize(mt_kahypar_partitioned_hypergraph_const_t &phg) { initializeImpl(phg); } - MoveSequence refine(mt_kahypar_partitioned_hypergraph_const_t& phg, - const Subhypergraph& sub_hg, - const HighResClockTimepoint& start) { + MoveSequence refine(mt_kahypar_partitioned_hypergraph_const_t &phg, + const Subhypergraph &sub_hg, const HighResClockTimepoint &start) + { return refineImpl(phg, sub_hg, start); } // ! Returns the maximum number of blocks that can be refined // ! per search with this refinement algorithm - PartitionID maxNumberOfBlocksPerSearch() const { + PartitionID maxNumberOfBlocksPerSearch() const + { return maxNumberOfBlocksPerSearchImpl(); } // ! Set the number of threads that is used for the next search - void setNumThreadsForSearch(const size_t num_threads) { + void setNumThreadsForSearch(const size_t num_threads) + { setNumThreadsForSearchImpl(num_threads); } // ! Updates the time limit (in seconds) - void updateTimeLimit(const double time_limit) { - _time_limit = time_limit; - } - + void updateTimeLimit(const double time_limit) { _time_limit = time_limit; } - protected: +protected: IFlowRefiner() = default; double _time_limit = 0; - private: - virtual void initializeImpl(mt_kahypar_partitioned_hypergraph_const_t& phg) = 0; +private: + virtual void initializeImpl(mt_kahypar_partitioned_hypergraph_const_t &phg) = 0; - virtual MoveSequence refineImpl(mt_kahypar_partitioned_hypergraph_const_t& phg, - const Subhypergraph& sub_hg, - const HighResClockTimepoint& start) = 0; + virtual MoveSequence refineImpl(mt_kahypar_partitioned_hypergraph_const_t &phg, + const Subhypergraph &sub_hg, + const HighResClockTimepoint &start) = 0; virtual PartitionID maxNumberOfBlocksPerSearchImpl() const = 0; virtual void setNumThreadsForSearchImpl(const size_t num_threads) = 0; }; -} // namespace mt_kahypar +} // namespace mt_kahypar diff --git a/mt-kahypar/partition/refinement/flows/parallel_construction.cpp b/mt-kahypar/partition/refinement/flows/parallel_construction.cpp index 7f9cb7915..4462ce338 100644 --- a/mt-kahypar/partition/refinement/flows/parallel_construction.cpp +++ b/mt-kahypar/partition/refinement/flows/parallel_construction.cpp @@ -36,208 +36,242 @@ namespace mt_kahypar { -template +template typename ParallelConstruction::TmpHyperedge -ParallelConstruction::DynamicIdenticalNetDetection::get(const size_t he_hash, - const vec& pins) { +ParallelConstruction::DynamicIdenticalNetDetection::get( + const size_t he_hash, const vec &pins) +{ const size_t bucket_idx = he_hash % _hash_buckets.size(); - if ( __atomic_load_n(&_hash_buckets[bucket_idx].threshold, __ATOMIC_RELAXED) == _threshold ) { + if(__atomic_load_n(&_hash_buckets[bucket_idx].threshold, __ATOMIC_RELAXED) == + _threshold) + { // There exists already some hyperedges with the same hash - for ( const ThresholdHyperedge& tmp : _hash_buckets[bucket_idx].identical_nets ) { + for(const ThresholdHyperedge &tmp : _hash_buckets[bucket_idx].identical_nets) + { // Check if there is some hyperedge equal to he - const TmpHyperedge& tmp_e = tmp.e; - if ( tmp.threshold == _threshold && tmp_e.hash == he_hash && - _flow_hg.tmpPinCount(tmp_e.bucket, tmp_e.e) == pins.size() ) { + const TmpHyperedge &tmp_e = tmp.e; + if(tmp.threshold == _threshold && tmp_e.hash == he_hash && + _flow_hg.tmpPinCount(tmp_e.bucket, tmp_e.e) == pins.size()) + { bool is_identical = true; size_t idx = 0; - for ( const whfc::FlowHypergraph::Pin& u : _flow_hg.tmpPinsOf(tmp_e.bucket, tmp_e.e) ) { - if ( u.pin != pins[idx++] ) { + for(const whfc::FlowHypergraph::Pin &u : + _flow_hg.tmpPinsOf(tmp_e.bucket, tmp_e.e)) + { + if(u.pin != pins[idx++]) + { is_identical = false; break; } } - if ( is_identical ) { + if(is_identical) + { return tmp_e; } } } } - return TmpHyperedge { 0, std::numeric_limits::max(), whfc::invalidHyperedge }; + return TmpHyperedge{ 0, std::numeric_limits::max(), whfc::invalidHyperedge }; } -template -void ParallelConstruction::DynamicIdenticalNetDetection::add(const TmpHyperedge& tmp_he) { +template +void ParallelConstruction::DynamicIdenticalNetDetection::add( + const TmpHyperedge &tmp_he) +{ const size_t bucket_idx = tmp_he.hash % _hash_buckets.size(); - uint32_t expected = __atomic_load_n(&_hash_buckets[bucket_idx].threshold, __ATOMIC_RELAXED); + uint32_t expected = + __atomic_load_n(&_hash_buckets[bucket_idx].threshold, __ATOMIC_RELAXED); uint32_t desired = _threshold - 1; - while ( __atomic_load_n(&_hash_buckets[bucket_idx].threshold, __ATOMIC_RELAXED) < _threshold ) { - if ( expected < desired && - __atomic_compare_exchange(&_hash_buckets[bucket_idx].threshold, - &expected, &desired, false, __ATOMIC_ACQ_REL, __ATOMIC_RELAXED) ) { + while(__atomic_load_n(&_hash_buckets[bucket_idx].threshold, __ATOMIC_RELAXED) < + _threshold) + { + if(expected < desired && + __atomic_compare_exchange(&_hash_buckets[bucket_idx].threshold, &expected, + &desired, false, __ATOMIC_ACQ_REL, __ATOMIC_RELAXED)) + { _hash_buckets[bucket_idx].identical_nets.clear(); - __atomic_store_n(&_hash_buckets[bucket_idx].threshold, _threshold, __ATOMIC_RELAXED); + __atomic_store_n(&_hash_buckets[bucket_idx].threshold, _threshold, + __ATOMIC_RELAXED); } } - _hash_buckets[bucket_idx].identical_nets.push_back(ThresholdHyperedge { tmp_he, _threshold }); + _hash_buckets[bucket_idx].identical_nets.push_back( + ThresholdHyperedge{ tmp_he, _threshold }); } -template -FlowProblem ParallelConstruction::constructFlowHypergraph(const PartitionedHypergraph& phg, - const Subhypergraph& sub_hg, - const PartitionID block_0, - const PartitionID block_1, - vec& whfc_to_node) { +template +FlowProblem ParallelConstruction::constructFlowHypergraph( + const PartitionedHypergraph &phg, const Subhypergraph &sub_hg, + const PartitionID block_0, const PartitionID block_1, vec &whfc_to_node) +{ FlowProblem flow_problem; - const double density = static_cast(phg.initialNumEdges()) / phg.initialNumNodes(); - const double avg_he_size = static_cast(phg.initialNumPins()) / phg.initialNumEdges(); - if ( density >= 0.5 && avg_he_size <= 100 ) { + const double density = + static_cast(phg.initialNumEdges()) / phg.initialNumNodes(); + const double avg_he_size = + static_cast(phg.initialNumPins()) / phg.initialNumEdges(); + if(density >= 0.5 && avg_he_size <= 100) + { // This algorithm iterates over all hyperedges and checks for all pins if // they are contained in the flow problem. Algorithm could have overheads, if // only a small portion of each hyperedge is contained in the flow hypergraph. flow_problem = constructDefault(phg, sub_hg, block_0, block_1, whfc_to_node); - } else { + } + else + { // This is a construction algorithm optimized for hypergraphs with large hyperedges. // Algorithm constructs a temporary pin list, therefore it could have overheads // for hypergraphs with small hyperedges. - flow_problem = constructOptimizedForLargeHEs(phg, sub_hg, block_0, block_1, whfc_to_node); + flow_problem = + constructOptimizedForLargeHEs(phg, sub_hg, block_0, block_1, whfc_to_node); } - if ( _flow_hg.nodeWeight(flow_problem.source) == 0 || - _flow_hg.nodeWeight(flow_problem.sink) == 0 ) { + if(_flow_hg.nodeWeight(flow_problem.source) == 0 || + _flow_hg.nodeWeight(flow_problem.sink) == 0) + { // Source or sink not connected to vertices in the flow problem flow_problem.non_removable_cut = 0; flow_problem.total_cut = 0; - } else { + } + else + { _flow_hg.finalizeParallel(); - if ( _context.refinement.flows.determine_distance_from_cut ) { + if(_context.refinement.flows.determine_distance_from_cut) + { // Determine the distance of each node contained in the flow network from the cut. // This technique improves piercing decision within the WHFC framework. - determineDistanceFromCut(phg, flow_problem.source, - flow_problem.sink, block_0, block_1, whfc_to_node); + determineDistanceFromCut(phg, flow_problem.source, flow_problem.sink, block_0, + block_1, whfc_to_node); } } DBG << "Flow Hypergraph [ Nodes =" << _flow_hg.numNodes() - << ", Edges =" << _flow_hg.numHyperedges() - << ", Pins =" << _flow_hg.numPins() + << ", Edges =" << _flow_hg.numHyperedges() << ", Pins =" << _flow_hg.numPins() << ", Blocks = (" << block_0 << "," << block_1 << ") ]"; return flow_problem; } -template -FlowProblem ParallelConstruction::constructFlowHypergraph(const PartitionedHypergraph& phg, - const Subhypergraph& sub_hg, - const PartitionID block_0, - const PartitionID block_1, - vec& whfc_to_node, - const bool default_construction) { +template +FlowProblem ParallelConstruction::constructFlowHypergraph( + const PartitionedHypergraph &phg, const Subhypergraph &sub_hg, + const PartitionID block_0, const PartitionID block_1, vec &whfc_to_node, + const bool default_construction) +{ FlowProblem flow_problem; - if ( default_construction ) { + if(default_construction) + { // This algorithm iterates over all hyperedges and checks for all pins if // they are contained in the flow problem. Algorithm could have overheads, if // only a small portion of each hyperedge is contained in the flow hypergraph. flow_problem = constructDefault(phg, sub_hg, block_0, block_1, whfc_to_node); - } else { + } + else + { // This is a construction algorithm optimized for hypergraphs with large hyperedges. // Algorithm constructs a temporary pin list, therefore it could have overheads // for hypergraphs with small hyperedges. - flow_problem = constructOptimizedForLargeHEs(phg, sub_hg, block_0, block_1, whfc_to_node); + flow_problem = + constructOptimizedForLargeHEs(phg, sub_hg, block_0, block_1, whfc_to_node); } - if ( _flow_hg.nodeWeight(flow_problem.source) == 0 || - _flow_hg.nodeWeight(flow_problem.sink) == 0 ) { + if(_flow_hg.nodeWeight(flow_problem.source) == 0 || + _flow_hg.nodeWeight(flow_problem.sink) == 0) + { // Source or sink not connected to vertices in the flow problem flow_problem.non_removable_cut = 0; flow_problem.total_cut = 0; - } else { + } + else + { _flow_hg.finalizeParallel(); - if ( _context.refinement.flows.determine_distance_from_cut ) { + if(_context.refinement.flows.determine_distance_from_cut) + { // Determine the distance of each node contained in the flow network from the cut. // This technique improves piercing decision within the WHFC framework. - determineDistanceFromCut(phg, flow_problem.source, - flow_problem.sink, block_0, block_1, whfc_to_node); + determineDistanceFromCut(phg, flow_problem.source, flow_problem.sink, block_0, + block_1, whfc_to_node); } } DBG << "Flow Hypergraph [ Nodes =" << _flow_hg.numNodes() - << ", Edges =" << _flow_hg.numHyperedges() - << ", Pins =" << _flow_hg.numPins() + << ", Edges =" << _flow_hg.numHyperedges() << ", Pins =" << _flow_hg.numPins() << ", Blocks = (" << block_0 << "," << block_1 << ") ]"; return flow_problem; } -template -FlowProblem ParallelConstruction::constructDefault(const PartitionedHypergraph& phg, - const Subhypergraph& sub_hg, - const PartitionID block_0, - const PartitionID block_1, - vec& whfc_to_node) { +template +FlowProblem ParallelConstruction::constructDefault( + const PartitionedHypergraph &phg, const Subhypergraph &sub_hg, + const PartitionID block_0, const PartitionID block_1, vec &whfc_to_node) +{ ASSERT(block_0 != kInvalidPartition && block_1 != kInvalidPartition); FlowProblem flow_problem; flow_problem.total_cut = 0; flow_problem.non_removable_cut = 0; _node_to_whfc.clear(); - tbb::parallel_invoke([&]() { - _node_to_whfc.clear(); - _node_to_whfc.setMaxSize(sub_hg.numNodes()); - }, [&] { - whfc_to_node.resize(sub_hg.numNodes() + 2); - }, [&] { - _flow_hg.allocateNodes(sub_hg.numNodes() + 2); - }, [&] { - _identical_nets.reset(); - }); - - if ( _context.refinement.flows.determine_distance_from_cut ) { + tbb::parallel_invoke( + [&]() { + _node_to_whfc.clear(); + _node_to_whfc.setMaxSize(sub_hg.numNodes()); + }, + [&] { whfc_to_node.resize(sub_hg.numNodes() + 2); }, + [&] { _flow_hg.allocateNodes(sub_hg.numNodes() + 2); }, + [&] { _identical_nets.reset(); }); + + if(_context.refinement.flows.determine_distance_from_cut) + { _cut_hes.clear(); } // Add refinement nodes to flow network - tbb::parallel_invoke([&] { - // Add source nodes - flow_problem.source = whfc::Node(0); - whfc_to_node[flow_problem.source] = kInvalidHypernode; - _flow_hg.nodeWeight(flow_problem.source) = whfc::NodeWeight( - std::max(0, phg.partWeight(block_0) - sub_hg.weight_of_block_0)); - tbb::parallel_for(UL(0), sub_hg.nodes_of_block_0.size(), [&](const size_t i) { - const HypernodeID hn = sub_hg.nodes_of_block_0[i]; - const whfc::Node u(1 + i); - whfc_to_node[u] = hn; - _node_to_whfc[hn] = u; - _flow_hg.nodeWeight(u) = whfc::NodeWeight(phg.nodeWeight(hn)); - }); - }, [&] { - // Add sink nodes - flow_problem.sink = whfc::Node(sub_hg.nodes_of_block_0.size() + 1); - whfc_to_node[flow_problem.sink] = kInvalidHypernode; - _flow_hg.nodeWeight(flow_problem.sink) = whfc::NodeWeight( - std::max(0, phg.partWeight(block_1) - sub_hg.weight_of_block_1)); - tbb::parallel_for(UL(0), sub_hg.nodes_of_block_1.size(), [&](const size_t i) { - const HypernodeID hn = sub_hg.nodes_of_block_1[i]; - const whfc::Node u(flow_problem.sink + 1 + i); - whfc_to_node[u] = hn; - _node_to_whfc[hn] = u; - _flow_hg.nodeWeight(u) = whfc::NodeWeight(phg.nodeWeight(hn)); - }); - }); - flow_problem.weight_of_block_0 = _flow_hg.nodeWeight(flow_problem.source) + sub_hg.weight_of_block_0; - flow_problem.weight_of_block_1 = _flow_hg.nodeWeight(flow_problem.sink) + sub_hg.weight_of_block_1; + tbb::parallel_invoke( + [&] { + // Add source nodes + flow_problem.source = whfc::Node(0); + whfc_to_node[flow_problem.source] = kInvalidHypernode; + _flow_hg.nodeWeight(flow_problem.source) = whfc::NodeWeight( + std::max(0, phg.partWeight(block_0) - sub_hg.weight_of_block_0)); + tbb::parallel_for(UL(0), sub_hg.nodes_of_block_0.size(), [&](const size_t i) { + const HypernodeID hn = sub_hg.nodes_of_block_0[i]; + const whfc::Node u(1 + i); + whfc_to_node[u] = hn; + _node_to_whfc[hn] = u; + _flow_hg.nodeWeight(u) = whfc::NodeWeight(phg.nodeWeight(hn)); + }); + }, + [&] { + // Add sink nodes + flow_problem.sink = whfc::Node(sub_hg.nodes_of_block_0.size() + 1); + whfc_to_node[flow_problem.sink] = kInvalidHypernode; + _flow_hg.nodeWeight(flow_problem.sink) = whfc::NodeWeight( + std::max(0, phg.partWeight(block_1) - sub_hg.weight_of_block_1)); + tbb::parallel_for(UL(0), sub_hg.nodes_of_block_1.size(), [&](const size_t i) { + const HypernodeID hn = sub_hg.nodes_of_block_1[i]; + const whfc::Node u(flow_problem.sink + 1 + i); + whfc_to_node[u] = hn; + _node_to_whfc[hn] = u; + _flow_hg.nodeWeight(u) = whfc::NodeWeight(phg.nodeWeight(hn)); + }); + }); + flow_problem.weight_of_block_0 = + _flow_hg.nodeWeight(flow_problem.source) + sub_hg.weight_of_block_0; + flow_problem.weight_of_block_1 = + _flow_hg.nodeWeight(flow_problem.sink) + sub_hg.weight_of_block_1; const HyperedgeID max_hyperedges = sub_hg.hes.size(); const HypernodeID max_pins = sub_hg.num_pins + max_hyperedges; _flow_hg.allocateHyperedgesAndPins(max_hyperedges, max_pins); // Add hyperedge to flow network and configure source and sink - auto push_into_tmp_pins = [&](vec& tmp_pins, const whfc::Node pin, - size_t& current_hash, const bool is_source_or_sink) { + auto push_into_tmp_pins = [&](vec &tmp_pins, const whfc::Node pin, + size_t ¤t_hash, const bool is_source_or_sink) { tmp_pins.push_back(pin); current_hash += kahypar::math::hash(pin); - if ( is_source_or_sink ) { + if(is_source_or_sink) + { // According to Lars: Adding to source or sink to the start of // each pin list improves running time std::swap(tmp_pins[0], tmp_pins.back()); @@ -245,13 +279,15 @@ FlowProblem ParallelConstruction::constructDefault(const Part }; _flow_hg.setNumCSRBuckets(NUM_CSR_BUCKETS); - const size_t step = max_hyperedges / NUM_CSR_BUCKETS + (max_hyperedges % NUM_CSR_BUCKETS != 0); + const size_t step = + max_hyperedges / NUM_CSR_BUCKETS + (max_hyperedges % NUM_CSR_BUCKETS != 0); tbb::parallel_for(UL(0), NUM_CSR_BUCKETS, [&](const size_t idx) { const size_t start = std::min(step * idx, static_cast(max_hyperedges)); const size_t end = std::min(step * (idx + 1), static_cast(max_hyperedges)); const size_t num_hes = end - start; size_t num_pins = 0; - for ( size_t i = start; i < end; ++i ) { + for(size_t i = start; i < end; ++i) + { const HyperedgeID he = sub_hg.hes[i]; num_pins += phg.edgeSize(he) + 1; } @@ -259,24 +295,34 @@ FlowProblem ParallelConstruction::constructDefault(const Part whfc::Hyperedge e(0); size_t pin_idx = 0; - vec& tmp_pins = _tmp_pins.local(); - for ( size_t i = start; i < end; ++i ) { + vec &tmp_pins = _tmp_pins.local(); + for(size_t i = start; i < end; ++i) + { const HyperedgeID he = sub_hg.hes[i]; - if ( !FlowNetworkConstruction::dropHyperedge(phg, he, block_0, block_1) ) { + if(!FlowNetworkConstruction::dropHyperedge(phg, he, block_0, block_1)) + { tmp_pins.clear(); size_t he_hash = 0; - bool connectToSource = FlowNetworkConstruction::connectToSource(phg, he, block_0, block_1); - bool connectToSink = FlowNetworkConstruction::connectToSink(phg, he, block_0, block_1); - const HyperedgeWeight he_weight = FlowNetworkConstruction::capacity(phg, _context, he, block_0, block_1); - if ( ( phg.pinCountInPart(he, block_0) > 0 && phg.pinCountInPart(he, block_1) > 0 ) || - FlowNetworkConstruction::isCut(phg, he, block_0, block_1) ) { + bool connectToSource = + FlowNetworkConstruction::connectToSource(phg, he, block_0, block_1); + bool connectToSink = + FlowNetworkConstruction::connectToSink(phg, he, block_0, block_1); + const HyperedgeWeight he_weight = + FlowNetworkConstruction::capacity(phg, _context, he, block_0, block_1); + if((phg.pinCountInPart(he, block_0) > 0 && phg.pinCountInPart(he, block_1) > 0) || + FlowNetworkConstruction::isCut(phg, he, block_0, block_1)) + { __atomic_fetch_add(&flow_problem.total_cut, he_weight, __ATOMIC_RELAXED); } - for ( const HypernodeID& pin : phg.pins(he) ) { - whfc::Node* whfc_pin = _node_to_whfc.get_if_contained(pin); - if ( whfc_pin ) { + for(const HypernodeID &pin : phg.pins(he)) + { + whfc::Node *whfc_pin = _node_to_whfc.get_if_contained(pin); + if(whfc_pin) + { push_into_tmp_pins(tmp_pins, *whfc_pin, he_hash, false); - } else { + } + else + { const PartitionID pin_block = phg.partID(pin); connectToSource |= pin_block == block_0; connectToSink |= pin_block == block_1; @@ -285,40 +331,56 @@ FlowProblem ParallelConstruction::constructDefault(const Part const bool empty_hyperedge = tmp_pins.size() == 0; const bool connected_to_source_and_sink = connectToSource && connectToSink; - if ( connected_to_source_and_sink ) { + if(connected_to_source_and_sink) + { // Hyperedge is connected to source and sink which means we can not remove it // from the cut with the current flow problem => remove he from flow problem - __atomic_fetch_add(&flow_problem.non_removable_cut, he_weight, __ATOMIC_RELAXED); - } else if ( !empty_hyperedge ) { - if ( connectToSource ) { + __atomic_fetch_add(&flow_problem.non_removable_cut, he_weight, + __ATOMIC_RELAXED); + } + else if(!empty_hyperedge) + { + if(connectToSource) + { push_into_tmp_pins(tmp_pins, flow_problem.source, he_hash, true); - } else if ( connectToSink ) { + } + else if(connectToSink) + { push_into_tmp_pins(tmp_pins, flow_problem.sink, he_hash, true); } // Sort pins for identical net detection - std::sort( tmp_pins.begin() + - ( tmp_pins[0] == flow_problem.source || - tmp_pins[0] == flow_problem.sink), tmp_pins.end()); + std::sort(tmp_pins.begin() + (tmp_pins[0] == flow_problem.source || + tmp_pins[0] == flow_problem.sink), + tmp_pins.end()); - if ( tmp_pins.size() > 1 ) { + if(tmp_pins.size() > 1) + { const TmpHyperedge identical_net = _identical_nets.get(he_hash, tmp_pins); - if ( identical_net.e == whfc::invalidHyperedge ) { + if(identical_net.e == whfc::invalidHyperedge) + { const size_t pin_start = pin_idx; const size_t pin_end = pin_start + tmp_pins.size(); - for ( size_t i = 0; i < tmp_pins.size(); ++i ) { + for(size_t i = 0; i < tmp_pins.size(); ++i) + { _flow_hg.addPin(tmp_pins[i], idx, pin_idx++); } - TmpHyperedge tmp_e { he_hash, idx, e++ }; - if ( _context.refinement.flows.determine_distance_from_cut && - phg.pinCountInPart(he, block_0) > 0 && phg.pinCountInPart(he, block_1) > 0 ) { + TmpHyperedge tmp_e{ he_hash, idx, e++ }; + if(_context.refinement.flows.determine_distance_from_cut && + phg.pinCountInPart(he, block_0) > 0 && + phg.pinCountInPart(he, block_1) > 0) + { _cut_hes.push_back(tmp_e); } _flow_hg.finishHyperedge(tmp_e.e, he_weight, idx, pin_start, pin_end); _identical_nets.add(tmp_e); - } else { + } + else + { // Current hyperedge is identical to an already added - __atomic_fetch_add(&_flow_hg.capacity(identical_net.bucket, identical_net.e), he_weight, __ATOMIC_RELAXED); + __atomic_fetch_add( + &_flow_hg.capacity(identical_net.bucket, identical_net.e), he_weight, + __ATOMIC_RELAXED); } } } @@ -326,104 +388,111 @@ FlowProblem ParallelConstruction::constructDefault(const Part } }); - tbb::parallel_for(UL(0), NUM_CSR_BUCKETS, [&](const size_t idx) { - _flow_hg.finalizeCSRBucket(idx); - }); + tbb::parallel_for(UL(0), NUM_CSR_BUCKETS, + [&](const size_t idx) { _flow_hg.finalizeCSRBucket(idx); }); _flow_hg.finalizeHyperedges(); return flow_problem; } -template -FlowProblem ParallelConstruction::constructOptimizedForLargeHEs(const PartitionedHypergraph& phg, - const Subhypergraph& sub_hg, - const PartitionID block_0, - const PartitionID block_1, - vec& whfc_to_node) { +template +FlowProblem ParallelConstruction::constructOptimizedForLargeHEs( + const PartitionedHypergraph &phg, const Subhypergraph &sub_hg, + const PartitionID block_0, const PartitionID block_1, vec &whfc_to_node) +{ ASSERT(block_0 != kInvalidPartition && block_1 != kInvalidPartition); FlowProblem flow_problem; flow_problem.total_cut = 0; flow_problem.non_removable_cut = 0; _node_to_whfc.clear(); - tbb::parallel_invoke([&]() { - _he_to_whfc.clear(); - _he_to_whfc.setMaxSize(sub_hg.hes.size()); - tbb::parallel_for(UL(0), sub_hg.hes.size(), [&](const size_t i) { - const HyperedgeID he = sub_hg.hes[i]; - _he_to_whfc[he] = i; - }); - }, [&] { - whfc_to_node.resize(sub_hg.numNodes() + 2); - }, [&] { - _flow_hg.allocateNodes(sub_hg.numNodes() + 2); - }, [&] { - _identical_nets.reset(); - }); + tbb::parallel_invoke( + [&]() { + _he_to_whfc.clear(); + _he_to_whfc.setMaxSize(sub_hg.hes.size()); + tbb::parallel_for(UL(0), sub_hg.hes.size(), [&](const size_t i) { + const HyperedgeID he = sub_hg.hes[i]; + _he_to_whfc[he] = i; + }); + }, + [&] { whfc_to_node.resize(sub_hg.numNodes() + 2); }, + [&] { _flow_hg.allocateNodes(sub_hg.numNodes() + 2); }, + [&] { _identical_nets.reset(); }); - if ( _context.refinement.flows.determine_distance_from_cut ) { + if(_context.refinement.flows.determine_distance_from_cut) + { _cut_hes.clear(); } // Add refinement nodes to flow network const size_t num_buckets = _pins.numBuckets(); const HyperedgeID max_hyperedges = sub_hg.hes.size(); - const size_t hes_per_bucket = max_hyperedges / num_buckets + (max_hyperedges % num_buckets != 0); - tbb::parallel_invoke([&] { - // Add source nodes - flow_problem.source = whfc::Node(0); - whfc_to_node[flow_problem.source] = kInvalidHypernode; - _flow_hg.nodeWeight(flow_problem.source) = whfc::NodeWeight( - std::max(0, phg.partWeight(block_0) - sub_hg.weight_of_block_0)); - tbb::parallel_for(UL(0), sub_hg.nodes_of_block_0.size(), [&](const size_t i) { - const HypernodeID hn = sub_hg.nodes_of_block_0[i]; - const whfc::Node u(1 + i); - whfc_to_node[u] = hn; - _flow_hg.nodeWeight(u) = whfc::NodeWeight(phg.nodeWeight(hn)); - for ( const HyperedgeID& he : phg.incidentEdges(hn) ) { - ASSERT(_he_to_whfc.get_if_contained(he) != nullptr); - const HyperedgeID e = _he_to_whfc[he]; - _pins.insert(e / hes_per_bucket, TmpPin { e, u, block_0 }); - } - }); - }, [&] { - // Add sink nodes - flow_problem.sink = whfc::Node(sub_hg.nodes_of_block_0.size() + 1); - whfc_to_node[flow_problem.sink] = kInvalidHypernode; - _flow_hg.nodeWeight(flow_problem.sink) = whfc::NodeWeight( - std::max(0, phg.partWeight(block_1) - sub_hg.weight_of_block_1)); - tbb::parallel_for(UL(0), sub_hg.nodes_of_block_1.size(), [&](const size_t i) { - const HypernodeID hn = sub_hg.nodes_of_block_1[i]; - const whfc::Node u(flow_problem.sink + 1 + i); - whfc_to_node[u] = hn; - _flow_hg.nodeWeight(u) = whfc::NodeWeight(phg.nodeWeight(hn)); - for ( const HyperedgeID& he : phg.incidentEdges(hn) ) { - ASSERT(_he_to_whfc.get_if_contained(he) != nullptr); - const HyperedgeID e = _he_to_whfc[he]; - _pins.insert(e / hes_per_bucket, TmpPin { e, u, block_1 }); - } - }); - }); - flow_problem.weight_of_block_0 = _flow_hg.nodeWeight(flow_problem.source) + sub_hg.weight_of_block_0; - flow_problem.weight_of_block_1 = _flow_hg.nodeWeight(flow_problem.sink) + sub_hg.weight_of_block_1; + const size_t hes_per_bucket = + max_hyperedges / num_buckets + (max_hyperedges % num_buckets != 0); + tbb::parallel_invoke( + [&] { + // Add source nodes + flow_problem.source = whfc::Node(0); + whfc_to_node[flow_problem.source] = kInvalidHypernode; + _flow_hg.nodeWeight(flow_problem.source) = whfc::NodeWeight( + std::max(0, phg.partWeight(block_0) - sub_hg.weight_of_block_0)); + tbb::parallel_for(UL(0), sub_hg.nodes_of_block_0.size(), [&](const size_t i) { + const HypernodeID hn = sub_hg.nodes_of_block_0[i]; + const whfc::Node u(1 + i); + whfc_to_node[u] = hn; + _flow_hg.nodeWeight(u) = whfc::NodeWeight(phg.nodeWeight(hn)); + for(const HyperedgeID &he : phg.incidentEdges(hn)) + { + ASSERT(_he_to_whfc.get_if_contained(he) != nullptr); + const HyperedgeID e = _he_to_whfc[he]; + _pins.insert(e / hes_per_bucket, TmpPin{ e, u, block_0 }); + } + }); + }, + [&] { + // Add sink nodes + flow_problem.sink = whfc::Node(sub_hg.nodes_of_block_0.size() + 1); + whfc_to_node[flow_problem.sink] = kInvalidHypernode; + _flow_hg.nodeWeight(flow_problem.sink) = whfc::NodeWeight( + std::max(0, phg.partWeight(block_1) - sub_hg.weight_of_block_1)); + tbb::parallel_for(UL(0), sub_hg.nodes_of_block_1.size(), [&](const size_t i) { + const HypernodeID hn = sub_hg.nodes_of_block_1[i]; + const whfc::Node u(flow_problem.sink + 1 + i); + whfc_to_node[u] = hn; + _flow_hg.nodeWeight(u) = whfc::NodeWeight(phg.nodeWeight(hn)); + for(const HyperedgeID &he : phg.incidentEdges(hn)) + { + ASSERT(_he_to_whfc.get_if_contained(he) != nullptr); + const HyperedgeID e = _he_to_whfc[he]; + _pins.insert(e / hes_per_bucket, TmpPin{ e, u, block_1 }); + } + }); + }); + flow_problem.weight_of_block_0 = + _flow_hg.nodeWeight(flow_problem.source) + sub_hg.weight_of_block_0; + flow_problem.weight_of_block_1 = + _flow_hg.nodeWeight(flow_problem.sink) + sub_hg.weight_of_block_1; const HypernodeID max_pins = sub_hg.num_pins + max_hyperedges; _flow_hg.allocateHyperedgesAndPins(max_hyperedges, max_pins); _flow_hg.setNumCSRBuckets(num_buckets); _pins.doParallelForAllBuckets([&](const size_t idx) { - vec& pins_of_bucket = _pins.getBucket(idx); - if ( pins_of_bucket.size() > 0 ) { + vec &pins_of_bucket = _pins.getBucket(idx); + if(pins_of_bucket.size() > 0) + { std::sort(pins_of_bucket.begin(), pins_of_bucket.end(), - [&](const TmpPin& lhs, const TmpPin& rhs ) { - return lhs.e < rhs.e || (lhs.e == rhs.e && lhs.pin < rhs.pin); - }); + [&](const TmpPin &lhs, const TmpPin &rhs) { + return lhs.e < rhs.e || (lhs.e == rhs.e && lhs.pin < rhs.pin); + }); HyperedgeID last_he = kInvalidHyperedge; size_t num_hes = 1; size_t num_pins = 0; - for ( const TmpPin& pin : pins_of_bucket ) { - if ( pin.e != last_he ) { + for(const TmpPin &pin : pins_of_bucket) + { + if(pin.e != last_he) + { ++num_hes; last_he = pin.e; } @@ -434,7 +503,7 @@ FlowProblem ParallelConstruction::constructOptimizedForLargeH whfc::Hyperedge current_he(0); size_t pin_idx = 0; - vec& tmp_pins = _tmp_pins.local(); + vec &tmp_pins = _tmp_pins.local(); size_t start_idx = 0; last_he = pins_of_bucket[start_idx].e; HypernodeID pin_count_in_block_0 = 0; @@ -443,63 +512,86 @@ FlowProblem ParallelConstruction::constructOptimizedForLargeH ASSERT(start_idx < end_idx); tmp_pins.clear(); const HyperedgeID he = sub_hg.hes[last_he]; - if ( !FlowNetworkConstruction::dropHyperedge(phg, he, block_0, block_1) ) { - const HyperedgeWeight he_weight = FlowNetworkConstruction::capacity(phg, _context, he, block_0, block_1); + if(!FlowNetworkConstruction::dropHyperedge(phg, he, block_0, block_1)) + { + const HyperedgeWeight he_weight = + FlowNetworkConstruction::capacity(phg, _context, he, block_0, block_1); const HypernodeID actual_pin_count_block_0 = phg.pinCountInPart(he, block_0); const HypernodeID actual_pin_count_block_1 = phg.pinCountInPart(he, block_1); - bool connect_to_source = FlowNetworkConstruction::connectToSource(phg, he, block_0, block_1); - bool connect_to_sink = FlowNetworkConstruction::connectToSink(phg, he, block_0, block_1); + bool connect_to_source = + FlowNetworkConstruction::connectToSource(phg, he, block_0, block_1); + bool connect_to_sink = + FlowNetworkConstruction::connectToSink(phg, he, block_0, block_1); connect_to_source |= pin_count_in_block_0 < actual_pin_count_block_0; connect_to_sink |= pin_count_in_block_1 < actual_pin_count_block_1; - if ( ( actual_pin_count_block_0 > 0 && actual_pin_count_block_1 > 0 ) || - FlowNetworkConstruction::isCut(phg, he, block_0, block_1) ) { + if((actual_pin_count_block_0 > 0 && actual_pin_count_block_1 > 0) || + FlowNetworkConstruction::isCut(phg, he, block_0, block_1)) + { __atomic_fetch_add(&flow_problem.total_cut, he_weight, __ATOMIC_RELAXED); } - if ( connect_to_source && connect_to_sink ) { + if(connect_to_source && connect_to_sink) + { // Hyperedge is connected to source and sink which means we can not remove it // from the cut with the current flow problem => remove he from flow problem - __atomic_fetch_add(&flow_problem.non_removable_cut, he_weight, __ATOMIC_RELAXED); - } else { + __atomic_fetch_add(&flow_problem.non_removable_cut, he_weight, + __ATOMIC_RELAXED); + } + else + { // Add hyperedge to flow network and configure source and sink size_t hash = 0; - if ( connect_to_source ) { + if(connect_to_source) + { tmp_pins.push_back(flow_problem.source); hash += kahypar::math::hash(flow_problem.source); - } else if ( connect_to_sink ) { + } + else if(connect_to_sink) + { tmp_pins.push_back(flow_problem.sink); hash += kahypar::math::hash(flow_problem.sink); } - for ( size_t i = start_idx; i < end_idx; ++i ) { + for(size_t i = start_idx; i < end_idx; ++i) + { tmp_pins.push_back(pins_of_bucket[i].pin); hash += kahypar::math::hash(pins_of_bucket[i].pin); } - if ( tmp_pins.size() > 1 ) { + if(tmp_pins.size() > 1) + { const TmpHyperedge identical_net = _identical_nets.get(hash, tmp_pins); - if ( identical_net.e == whfc::invalidHyperedge ) { + if(identical_net.e == whfc::invalidHyperedge) + { const size_t pin_start = pin_idx; const size_t pin_end = pin_start + tmp_pins.size(); - for ( const whfc::Node& pin : tmp_pins ) { + for(const whfc::Node &pin : tmp_pins) + { _flow_hg.addPin(pin, idx, pin_idx++); } - TmpHyperedge tmp_e { hash, idx, current_he++ }; - if ( _context.refinement.flows.determine_distance_from_cut && - actual_pin_count_block_0 > 0 && actual_pin_count_block_1 > 0 ) { + TmpHyperedge tmp_e{ hash, idx, current_he++ }; + if(_context.refinement.flows.determine_distance_from_cut && + actual_pin_count_block_0 > 0 && actual_pin_count_block_1 > 0) + { _cut_hes.push_back(tmp_e); } _flow_hg.finishHyperedge(tmp_e.e, he_weight, idx, pin_start, pin_end); _identical_nets.add(tmp_e); - } else { + } + else + { // Current hyperedge is identical to an already added - __atomic_fetch_add(&_flow_hg.capacity(identical_net.bucket, identical_net.e), he_weight, __ATOMIC_RELAXED); + __atomic_fetch_add( + &_flow_hg.capacity(identical_net.bucket, identical_net.e), he_weight, + __ATOMIC_RELAXED); } } } } }; - for ( size_t i = 0; i < pins_of_bucket.size(); ++i ) { - if ( last_he != pins_of_bucket[i].e ) { + for(size_t i = 0; i < pins_of_bucket.size(); ++i) + { + if(last_he != pins_of_bucket[i].e) + { add_hyperedge(i); start_idx = i; last_he = pins_of_bucket[i].e; @@ -510,69 +602,75 @@ FlowProblem ParallelConstruction::constructOptimizedForLargeH pin_count_in_block_1 += pins_of_bucket[i].block == block_1; } add_hyperedge(pins_of_bucket.size()); - } else { + } + else + { _flow_hg.initializeCSRBucket(idx, 0, 0); } _pins.clear(idx); }); - tbb::parallel_for(UL(0), num_buckets, [&](const size_t idx) { - _flow_hg.finalizeCSRBucket(idx); - }); + tbb::parallel_for(UL(0), num_buckets, + [&](const size_t idx) { _flow_hg.finalizeCSRBucket(idx); }); _flow_hg.finalizeHyperedges(); return flow_problem; } namespace { -template -class BFSQueue { +template +class BFSQueue +{ - public: - explicit BFSQueue(const size_t num_threads) : - _q(num_threads) { } +public: + explicit BFSQueue(const size_t num_threads) : _q(num_threads) {} - bool empty() { + bool empty() + { bool is_empty = true; - for ( size_t i = 0; i < _q.size(); ++i ) { + for(size_t i = 0; i < _q.size(); ++i) + { is_empty &= _q[i].empty(); } return is_empty; } - bool empty(const size_t i) { + bool empty(const size_t i) + { ASSERT(i < _q.size()); return _q[i].empty(); } - void push(const T elem, const size_t i) { + void push(const T elem, const size_t i) + { ASSERT(i < _q.size()); return _q[i].push(elem); } - T front(const size_t i) { + T front(const size_t i) + { ASSERT(i < _q.size()); return _q[i].front(); } - void pop(const size_t i) { + void pop(const size_t i) + { ASSERT(i < _q.size()); return _q[i].pop(); } - private: - vec> _q; +private: + vec > _q; }; } -template -void ParallelConstruction::determineDistanceFromCut(const PartitionedHypergraph& phg, - const whfc::Node source, - const whfc::Node sink, - const PartitionID block_0, - const PartitionID block_1, - const vec& whfc_to_node) { - auto& distances = _hfc.cs.border_nodes.distance; +template +void ParallelConstruction::determineDistanceFromCut( + const PartitionedHypergraph &phg, const whfc::Node source, const whfc::Node sink, + const PartitionID block_0, const PartitionID block_1, + const vec &whfc_to_node) +{ + auto &distances = _hfc.cs.border_nodes.distance; distances.assign(_flow_hg.numNodes(), whfc::HopDistance(0)); _visited_hns.resize(_flow_hg.numNodes() + _flow_hg.numHyperedges()); _visited_hns.reset(); @@ -583,12 +681,15 @@ void ParallelConstruction::determineDistanceFromCut(const Par size_t q_idx = 0; const size_t num_threads = std::thread::hardware_concurrency(); - vec> q(2, BFSQueue(num_threads)); + vec > q(2, BFSQueue(num_threads)); tbb::parallel_for(UL(0), _cut_hes.size(), [&](const size_t i) { const int thread_idx = tbb::this_task_arena::current_thread_index(); - const whfc::Hyperedge he = _flow_hg.originalHyperedgeID(_cut_hes[i].bucket, _cut_hes[i].e); - for ( const whfc::FlowHypergraph::Pin& pin : _flow_hg.pinsOf(he) ) { - if ( _visited_hns.compare_and_set_to_true(pin.pin) ) { + const whfc::Hyperedge he = + _flow_hg.originalHyperedgeID(_cut_hes[i].bucket, _cut_hes[i].e); + for(const whfc::FlowHypergraph::Pin &pin : _flow_hg.pinsOf(he)) + { + if(_visited_hns.compare_and_set_to_true(pin.pin)) + { q[q_idx].push(pin.pin, thread_idx); } } @@ -599,27 +700,36 @@ void ParallelConstruction::determineDistanceFromCut(const Par whfc::HopDistance dist(1); whfc::HopDistance max_dist_source(0); whfc::HopDistance max_dist_sink(0); - while ( !q[q_idx].empty() ) { + while(!q[q_idx].empty()) + { bool reached_source_side = false; bool reached_sink_side = false; tbb::parallel_for(UL(0), num_threads, [&](const size_t idx) { - while ( !q[q_idx].empty(idx) ) { + while(!q[q_idx].empty(idx)) + { whfc::Node u = q[q_idx].front(idx); q[q_idx].pop(idx); const PartitionID block_of_u = phg.partID(whfc_to_node[u]); - if ( block_of_u == block_0 ) { + if(block_of_u == block_0) + { distances[u] = -dist; reached_source_side = true; - } else if ( block_of_u == block_1 ) { + } + else if(block_of_u == block_1) + { distances[u] = dist; reached_sink_side = true; } - for ( const whfc::FlowHypergraph::InHe& in_he : _flow_hg.hyperedgesOf(u) ) { + for(const whfc::FlowHypergraph::InHe &in_he : _flow_hg.hyperedgesOf(u)) + { const whfc::Hyperedge he = in_he.e; - if ( _visited_hns.compare_and_set_to_true(_flow_hg.numNodes() + he) ) { - for ( const whfc::FlowHypergraph::Pin& pin : _flow_hg.pinsOf(he) ) { - if ( _visited_hns.compare_and_set_to_true(pin.pin) ) { + if(_visited_hns.compare_and_set_to_true(_flow_hg.numNodes() + he)) + { + for(const whfc::FlowHypergraph::Pin &pin : _flow_hg.pinsOf(he)) + { + if(_visited_hns.compare_and_set_to_true(pin.pin)) + { q[1 - q_idx].push(pin.pin, idx); } } @@ -628,8 +738,10 @@ void ParallelConstruction::determineDistanceFromCut(const Par } }); - if ( reached_source_side ) max_dist_source = dist; - if ( reached_sink_side ) max_dist_sink = dist; + if(reached_source_side) + max_dist_source = dist; + if(reached_sink_side) + max_dist_sink = dist; ASSERT(q[q_idx].empty()); q_idx = 1 - q_idx; diff --git a/mt-kahypar/partition/refinement/flows/parallel_construction.h b/mt-kahypar/partition/refinement/flows/parallel_construction.h index a49713673..662448c8d 100644 --- a/mt-kahypar/partition/refinement/flows/parallel_construction.h +++ b/mt-kahypar/partition/refinement/flows/parallel_construction.h @@ -32,21 +32,22 @@ #include "algorithm/hyperflowcutter.h" #include "algorithm/parallel_push_relabel.h" -#include "mt-kahypar/partition/context.h" -#include "mt-kahypar/datastructures/sparse_map.h" +#include "mt-kahypar/datastructures/concurrent_bucket_map.h" #include "mt-kahypar/datastructures/concurrent_flat_map.h" +#include "mt-kahypar/datastructures/sparse_map.h" #include "mt-kahypar/datastructures/thread_safe_fast_reset_flag_array.h" -#include "mt-kahypar/datastructures/concurrent_bucket_map.h" -#include "mt-kahypar/partition/refinement/flows/i_flow_refiner.h" -#include "mt-kahypar/partition/refinement/flows/flow_hypergraph_builder.h" #include "mt-kahypar/parallel/stl/zero_allocator.h" +#include "mt-kahypar/partition/context.h" +#include "mt-kahypar/partition/refinement/flows/flow_hypergraph_builder.h" +#include "mt-kahypar/partition/refinement/flows/i_flow_refiner.h" namespace mt_kahypar { struct FlowProblem; -template -class ParallelConstruction { +template +class ParallelConstruction +{ static constexpr bool debug = false; @@ -55,129 +56,121 @@ class ParallelConstruction { using PartitionedHypergraph = typename GraphAndGainTypes::PartitionedHypergraph; using FlowNetworkConstruction = typename GraphAndGainTypes::FlowNetworkConstruction; - struct TmpPin { + struct TmpPin + { HyperedgeID e; whfc::Node pin; PartitionID block; }; - struct TmpHyperedge { + struct TmpHyperedge + { const size_t hash; const size_t bucket; const whfc::Hyperedge e; }; - class DynamicIdenticalNetDetection { + class DynamicIdenticalNetDetection + { - struct ThresholdHyperedge { + struct ThresholdHyperedge + { const TmpHyperedge e; const uint32_t threshold; }; - using IdenticalNetVector = tbb::concurrent_vector< - ThresholdHyperedge, parallel::zero_allocator>; + using IdenticalNetVector = + tbb::concurrent_vector >; - struct HashBucket { - HashBucket() : - identical_nets(), - threshold(0) { } + struct HashBucket + { + HashBucket() : identical_nets(), threshold(0) {} IdenticalNetVector identical_nets; uint32_t threshold; }; - public: + public: explicit DynamicIdenticalNetDetection(const HyperedgeID num_hyperedges, - FlowHypergraphBuilder& flow_hg, - const Context& context) : - _flow_hg(flow_hg), - _hash_buckets(), - _threshold(2) { - _hash_buckets.resize(std::max(UL(1024), num_hyperedges / - context.refinement.flows.num_parallel_searches)); + FlowHypergraphBuilder &flow_hg, + const Context &context) : + _flow_hg(flow_hg), + _hash_buckets(), _threshold(2) + { + _hash_buckets.resize(std::max( + UL(1024), num_hyperedges / context.refinement.flows.num_parallel_searches)); } - TmpHyperedge get(const size_t he_hash, - const vec& pins); + TmpHyperedge get(const size_t he_hash, const vec &pins); - void add(const TmpHyperedge& tmp_he); + void add(const TmpHyperedge &tmp_he); - void reset() { - _threshold += 2; - } + void reset() { _threshold += 2; } - private: - FlowHypergraphBuilder& _flow_hg; + private: + FlowHypergraphBuilder &_flow_hg; vec _hash_buckets; uint32_t _threshold; }; - public: +public: explicit ParallelConstruction(const HyperedgeID num_hyperedges, - FlowHypergraphBuilder& flow_hg, - whfc::HyperFlowCutter& hfc, - const Context& context) : - _context(context), - _flow_hg(flow_hg), - _hfc(hfc), - _node_to_whfc(), - _visited_hns(), - _tmp_pins(), - _cut_hes(), - _pins(), - _he_to_whfc(), - _identical_nets(num_hyperedges, flow_hg, context) { } - - ParallelConstruction(const ParallelConstruction&) = delete; - ParallelConstruction(ParallelConstruction&&) = delete; - ParallelConstruction & operator= (const ParallelConstruction &) = delete; - ParallelConstruction & operator= (ParallelConstruction &&) = delete; + FlowHypergraphBuilder &flow_hg, + whfc::HyperFlowCutter &hfc, + const Context &context) : + _context(context), + _flow_hg(flow_hg), _hfc(hfc), _node_to_whfc(), _visited_hns(), _tmp_pins(), + _cut_hes(), _pins(), _he_to_whfc(), + _identical_nets(num_hyperedges, flow_hg, context) + { + } + + ParallelConstruction(const ParallelConstruction &) = delete; + ParallelConstruction(ParallelConstruction &&) = delete; + ParallelConstruction &operator=(const ParallelConstruction &) = delete; + ParallelConstruction &operator=(ParallelConstruction &&) = delete; virtual ~ParallelConstruction() = default; - - FlowProblem constructFlowHypergraph(const PartitionedHypergraph& phg, - const Subhypergraph& sub_hg, + FlowProblem constructFlowHypergraph(const PartitionedHypergraph &phg, + const Subhypergraph &sub_hg, const PartitionID block_0, const PartitionID block_1, - vec& whfc_to_node); + vec &whfc_to_node); // ! Only for testing - FlowProblem constructFlowHypergraph(const PartitionedHypergraph& phg, - const Subhypergraph& sub_hg, + FlowProblem constructFlowHypergraph(const PartitionedHypergraph &phg, + const Subhypergraph &sub_hg, const PartitionID block_0, const PartitionID block_1, - vec& whfc_to_node, + vec &whfc_to_node, const bool default_construction); - private: - FlowProblem constructDefault(const PartitionedHypergraph& phg, - const Subhypergraph& sub_hg, - const PartitionID block_0, - const PartitionID block_1, - vec& whfc_to_node); +private: + FlowProblem constructDefault(const PartitionedHypergraph &phg, + const Subhypergraph &sub_hg, const PartitionID block_0, + const PartitionID block_1, vec &whfc_to_node); - FlowProblem constructOptimizedForLargeHEs(const PartitionedHypergraph& phg, - const Subhypergraph& sub_hg, + FlowProblem constructOptimizedForLargeHEs(const PartitionedHypergraph &phg, + const Subhypergraph &sub_hg, const PartitionID block_0, const PartitionID block_1, - vec& whfc_to_node); + vec &whfc_to_node); - void determineDistanceFromCut(const PartitionedHypergraph& phg, - const whfc::Node source, - const whfc::Node sink, - const PartitionID block_0, + void determineDistanceFromCut(const PartitionedHypergraph &phg, const whfc::Node source, + const whfc::Node sink, const PartitionID block_0, const PartitionID block_1, - const vec& whfc_to_node); + const vec &whfc_to_node); - const Context& _context; + const Context &_context; - FlowHypergraphBuilder& _flow_hg; - whfc::HyperFlowCutter& _hfc; + FlowHypergraphBuilder &_flow_hg; + whfc::HyperFlowCutter &_hfc; ds::ConcurrentFlatMap _node_to_whfc; ds::ThreadSafeFastResetFlagArray<> _visited_hns; - tbb::enumerable_thread_specific> _tmp_pins; + tbb::enumerable_thread_specific > _tmp_pins; tbb::concurrent_vector _cut_hes; ds::ConcurrentBucketMap _pins; @@ -185,4 +178,4 @@ class ParallelConstruction { DynamicIdenticalNetDetection _identical_nets; }; -} // namespace mt_kahypar +} // namespace mt_kahypar diff --git a/mt-kahypar/partition/refinement/flows/problem_construction.cpp b/mt-kahypar/partition/refinement/flows/problem_construction.cpp index c057a0eff..67f348739 100644 --- a/mt-kahypar/partition/refinement/flows/problem_construction.cpp +++ b/mt-kahypar/partition/refinement/flows/problem_construction.cpp @@ -24,7 +24,6 @@ * SOFTWARE. ******************************************************************************/ - #include "mt-kahypar/partition/refinement/flows/problem_construction.h" #include @@ -36,14 +35,18 @@ namespace mt_kahypar { -template -void ProblemConstruction::BFSData::clearQueue() { - while ( !queue.empty() ) queue.pop(); - while ( !next_queue.empty() ) next_queue.pop(); +template +void ProblemConstruction::BFSData::clearQueue() +{ + while(!queue.empty()) + queue.pop(); + while(!next_queue.empty()) + next_queue.pop(); } -template -void ProblemConstruction::BFSData::reset() { +template +void ProblemConstruction::BFSData::reset() +{ current_distance = 0; queue_weight_block_0 = 0; queue_weight_block_1 = 0; @@ -55,29 +58,34 @@ void ProblemConstruction::BFSData::reset() { std::fill(locked_blocks.begin(), locked_blocks.end(), false); } -template -HypernodeID ProblemConstruction::BFSData::pop_hypernode() { +template +HypernodeID ProblemConstruction::BFSData::pop_hypernode() +{ ASSERT(!queue.empty()); const HypernodeID hn = queue.front(); queue.pop(); return hn; } -template +template void ProblemConstruction::BFSData::add_pins_of_hyperedge_to_queue( - const HyperedgeID& he, - const PartitionedHypergraph& phg, - const size_t max_bfs_distance, - const HypernodeWeight max_weight_block_0, - const HypernodeWeight max_weight_block_1) { - if ( current_distance <= max_bfs_distance && !lock_queue ) { - if ( !visited_he[he] ) { - for ( const HypernodeID& pin : phg.pins(he) ) { - if ( !visited_hn[pin] ) { + const HyperedgeID &he, const PartitionedHypergraph &phg, + const size_t max_bfs_distance, const HypernodeWeight max_weight_block_0, + const HypernodeWeight max_weight_block_1) +{ + if(current_distance <= max_bfs_distance && !lock_queue) + { + if(!visited_he[he]) + { + for(const HypernodeID &pin : phg.pins(he)) + { + if(!visited_hn[pin]) + { const PartitionID block = phg.partID(pin); const bool is_block_0 = blocks.i == block; const bool is_block_1 = blocks.j == block; - if ( (is_block_0 || is_block_1) && !locked_blocks[block] ) { + if((is_block_0 || is_block_1) && !locked_blocks[block]) + { next_queue.push(pin); queue_weight_block_0 += is_block_0 ? phg.nodeWeight(pin) : 0; queue_weight_block_1 += is_block_1 ? phg.nodeWeight(pin) : 0; @@ -89,22 +97,25 @@ void ProblemConstruction::BFSData::add_pins_of_hyperedge_to_queue( } } - if ( queue_weight_block_0 >= max_weight_block_0 && - queue_weight_block_1 >= max_weight_block_1 ) { + if(queue_weight_block_0 >= max_weight_block_0 && + queue_weight_block_1 >= max_weight_block_1) + { lock_queue = true; } } namespace { - using assert_map = std::unordered_map; +using assert_map = std::unordered_map; } -template -Subhypergraph ProblemConstruction::construct(const SearchID search_id, - QuotientGraph& quotient_graph, - const PartitionedHypergraph& phg) { +template +Subhypergraph +ProblemConstruction::construct(const SearchID search_id, + QuotientGraph "ient_graph, + const PartitionedHypergraph &phg) +{ Subhypergraph sub_hg; - BFSData& bfs = _local_bfs.local(); + BFSData &bfs = _local_bfs.local(); bfs.reset(); bfs.blocks = quotient_graph.getBlockPair(search_id); sub_hg.block_0 = bfs.blocks.i; @@ -113,37 +124,44 @@ Subhypergraph ProblemConstruction::construct(const SearchID search_i sub_hg.weight_of_block_1 = 0; sub_hg.num_pins = 0; const HypernodeWeight max_weight_block_0 = - _scaling * _context.partition.perfect_balance_part_weights[sub_hg.block_1] - phg.partWeight(sub_hg.block_1); + _scaling * _context.partition.perfect_balance_part_weights[sub_hg.block_1] - + phg.partWeight(sub_hg.block_1); const HypernodeWeight max_weight_block_1 = - _scaling * _context.partition.perfect_balance_part_weights[sub_hg.block_0] - phg.partWeight(sub_hg.block_0); + _scaling * _context.partition.perfect_balance_part_weights[sub_hg.block_0] - + phg.partWeight(sub_hg.block_0); const size_t max_bfs_distance = _context.refinement.flows.max_bfs_distance; - // We initialize the BFS with all cut hyperedges running // between the involved block associated with the search bfs.clearQueue(); - quotient_graph.doForAllCutHyperedgesOfSearch(search_id, [&](const HyperedgeID& he) { - bfs.add_pins_of_hyperedge_to_queue(he, phg, max_bfs_distance, - max_weight_block_0, max_weight_block_1); + quotient_graph.doForAllCutHyperedgesOfSearch(search_id, [&](const HyperedgeID &he) { + bfs.add_pins_of_hyperedge_to_queue(he, phg, max_bfs_distance, max_weight_block_0, + max_weight_block_1); }); bfs.swap_with_next_queue(); // BFS - while ( !bfs.is_empty() && - !isMaximumProblemSizeReached(sub_hg, - max_weight_block_0, max_weight_block_1, bfs.locked_blocks) ) { + while(!bfs.is_empty() && + !isMaximumProblemSizeReached(sub_hg, max_weight_block_0, max_weight_block_1, + bfs.locked_blocks)) + { HypernodeID hn = bfs.pop_hypernode(); PartitionID block = phg.partID(hn); const bool is_block_contained = block == sub_hg.block_0 || block == sub_hg.block_1; - if ( is_block_contained && !bfs.locked_blocks[block] ) { + if(is_block_contained && !bfs.locked_blocks[block]) + { const bool is_fixed = phg.isFixed(hn); // We do not add fixed vertices to the flow problem, but still // expand the BFS to its neighbors - if ( !is_fixed ) { - if ( sub_hg.block_0 == block ) { + if(!is_fixed) + { + if(sub_hg.block_0 == block) + { sub_hg.nodes_of_block_0.push_back(hn); sub_hg.weight_of_block_0 += phg.nodeWeight(hn); - } else { + } + else + { ASSERT(sub_hg.block_1 == block); sub_hg.nodes_of_block_1.push_back(hn); sub_hg.weight_of_block_1 += phg.nodeWeight(hn); @@ -152,93 +170,114 @@ Subhypergraph ProblemConstruction::construct(const SearchID search_i } // Push all neighbors of the added vertex into the queue - for ( const HyperedgeID& he : phg.incidentEdges(hn) ) { - bfs.add_pins_of_hyperedge_to_queue(he, phg, max_bfs_distance, - max_weight_block_0, max_weight_block_1); - if ( !is_fixed && !bfs.contained_hes[phg.uniqueEdgeID(he)] ) { + for(const HyperedgeID &he : phg.incidentEdges(hn)) + { + bfs.add_pins_of_hyperedge_to_queue(he, phg, max_bfs_distance, max_weight_block_0, + max_weight_block_1); + if(!is_fixed && !bfs.contained_hes[phg.uniqueEdgeID(he)]) + { sub_hg.hes.push_back(he); bfs.contained_hes[phg.uniqueEdgeID(he)] = true; } } } - if ( bfs.is_empty() ) { + if(bfs.is_empty()) + { bfs.swap_with_next_queue(); } } DBG << "Search ID:" << search_id << "-" << sub_hg; // Check if all touched hyperedges are contained in subhypergraph - ASSERT([&]() { - assert_map expected_hes; - for ( const HyperedgeID& he : sub_hg.hes ) { - const HyperedgeID id = phg.uniqueEdgeID(he); - if ( expected_hes.count(id) > 0 ) { - LOG << "Hyperedge" << he << "is contained multiple times in subhypergraph!"; - return false; - } - expected_hes[id] = true; - } + ASSERT( + [&]() { + assert_map expected_hes; + for(const HyperedgeID &he : sub_hg.hes) + { + const HyperedgeID id = phg.uniqueEdgeID(he); + if(expected_hes.count(id) > 0) + { + LOG << "Hyperedge" << he << "is contained multiple times in subhypergraph!"; + return false; + } + expected_hes[id] = true; + } - for ( const HypernodeID& hn : sub_hg.nodes_of_block_0 ) { - for ( const HyperedgeID& he : phg.incidentEdges(hn) ) { - const HyperedgeID id = phg.uniqueEdgeID(he); - if ( expected_hes.count(id) == 0 ) { - LOG << "Hyperedge" << he << "not contained in subhypergraph!"; - return false; + for(const HypernodeID &hn : sub_hg.nodes_of_block_0) + { + for(const HyperedgeID &he : phg.incidentEdges(hn)) + { + const HyperedgeID id = phg.uniqueEdgeID(he); + if(expected_hes.count(id) == 0) + { + LOG << "Hyperedge" << he << "not contained in subhypergraph!"; + return false; + } + expected_hes[id] = false; + } } - expected_hes[id] = false; - } - } - for ( const HypernodeID& hn : sub_hg.nodes_of_block_1 ) { - for ( const HyperedgeID& he : phg.incidentEdges(hn) ) { - const HyperedgeID id = phg.uniqueEdgeID(he); - if ( expected_hes.count(id) == 0 ) { - LOG << "Hyperedge" << he << "not contained in subhypergraph!"; - return false; + for(const HypernodeID &hn : sub_hg.nodes_of_block_1) + { + for(const HyperedgeID &he : phg.incidentEdges(hn)) + { + const HyperedgeID id = phg.uniqueEdgeID(he); + if(expected_hes.count(id) == 0) + { + LOG << "Hyperedge" << he << "not contained in subhypergraph!"; + return false; + } + expected_hes[id] = false; + } } - expected_hes[id] = false; - } - } - for ( const auto& entry : expected_hes ) { - const HyperedgeID he = entry.first; - const bool visited = !entry.second; - if ( !visited ) { - LOG << "HyperedgeID" << he << "should be not part of subhypergraph!"; - return false; - } - } - return true; - }(), "Subhypergraph construction failed!"); + for(const auto &entry : expected_hes) + { + const HyperedgeID he = entry.first; + const bool visited = !entry.second; + if(!visited) + { + LOG << "HyperedgeID" << he << "should be not part of subhypergraph!"; + return false; + } + } + return true; + }(), + "Subhypergraph construction failed!"); return sub_hg; } -template -void ProblemConstruction::changeNumberOfBlocks(const PartitionID new_k) { +template +void ProblemConstruction::changeNumberOfBlocks(const PartitionID new_k) +{ ASSERT(new_k == _context.partition.k); - for ( BFSData& data : _local_bfs ) { - if ( static_cast(new_k) > data.locked_blocks.size() ) { + for(BFSData &data : _local_bfs) + { + if(static_cast(new_k) > data.locked_blocks.size()) + { data.locked_blocks.assign(new_k, false); } } } -template -MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE bool ProblemConstruction::isMaximumProblemSizeReached( - const Subhypergraph& sub_hg, - const HypernodeWeight max_weight_block_0, - const HypernodeWeight max_weight_block_1, - vec& locked_blocks) const { - if ( sub_hg.weight_of_block_0 >= max_weight_block_0 ) { +template +MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE bool +ProblemConstruction::isMaximumProblemSizeReached( + const Subhypergraph &sub_hg, const HypernodeWeight max_weight_block_0, + const HypernodeWeight max_weight_block_1, vec &locked_blocks) const +{ + if(sub_hg.weight_of_block_0 >= max_weight_block_0) + { locked_blocks[sub_hg.block_0] = true; } - if ( sub_hg.weight_of_block_1 >= max_weight_block_1 ) { + if(sub_hg.weight_of_block_1 >= max_weight_block_1) + { locked_blocks[sub_hg.block_1] = true; } - if ( sub_hg.num_pins >= _context.refinement.flows.max_num_pins ) { + if(sub_hg.num_pins >= _context.refinement.flows.max_num_pins) + { locked_blocks[sub_hg.block_0] = true; locked_blocks[sub_hg.block_1] = true; } diff --git a/mt-kahypar/partition/refinement/flows/problem_construction.h b/mt-kahypar/partition/refinement/flows/problem_construction.h index c36cdd80c..e7fb81af7 100644 --- a/mt-kahypar/partition/refinement/flows/problem_construction.h +++ b/mt-kahypar/partition/refinement/flows/problem_construction.h @@ -28,18 +28,19 @@ #include "tbb/enumerable_thread_specific.h" -#include "mt-kahypar/partition/context.h" #include "mt-kahypar/datastructures/sparse_map.h" -#include "mt-kahypar/partition/refinement/flows/refiner_adapter.h" -#include "mt-kahypar/partition/refinement/flows/quotient_graph.h" -#include "mt-kahypar/parallel/stl/scalable_vector.h" -#include "mt-kahypar/parallel/stl/scalable_queue.h" #include "mt-kahypar/parallel/atomic_wrapper.h" +#include "mt-kahypar/parallel/stl/scalable_queue.h" +#include "mt-kahypar/parallel/stl/scalable_vector.h" +#include "mt-kahypar/partition/context.h" +#include "mt-kahypar/partition/refinement/flows/quotient_graph.h" +#include "mt-kahypar/partition/refinement/flows/refiner_adapter.h" namespace mt_kahypar { -template -class ProblemConstruction { +template +class ProblemConstruction +{ static constexpr bool debug = false; @@ -49,20 +50,16 @@ class ProblemConstruction { * Contains data required to grow two region around * the cut of two blocks of the partition. */ - struct BFSData { - explicit BFSData(const HypernodeID num_nodes, - const HyperedgeID num_edges, + struct BFSData + { + explicit BFSData(const HypernodeID num_nodes, const HyperedgeID num_edges, const PartitionID k) : - current_distance(0), - queue(), - next_queue(), - visited_hn(num_nodes, false), - visited_he(num_edges, false), - contained_hes(num_edges, false), - locked_blocks(k, false), - queue_weight_block_0(0), - queue_weight_block_1(0), - lock_queue(false) { } + current_distance(0), + queue(), next_queue(), visited_hn(num_nodes, false), visited_he(num_edges, false), + contained_hes(num_edges, false), locked_blocks(k, false), queue_weight_block_0(0), + queue_weight_block_1(0), lock_queue(false) + { + } void clearQueue(); @@ -70,22 +67,20 @@ class ProblemConstruction { HypernodeID pop_hypernode(); - void add_pins_of_hyperedge_to_queue(const HyperedgeID& he, - const PartitionedHypergraph& phg, + void add_pins_of_hyperedge_to_queue(const HyperedgeID &he, + const PartitionedHypergraph &phg, const size_t max_bfs_distance, const HypernodeWeight max_weight_block_0, const HypernodeWeight max_weight_block_1); - bool is_empty() const { - return queue.empty(); - } + bool is_empty() const { return queue.empty(); } - bool is_next_empty() const { - return next_queue.empty(); - } + bool is_next_empty() const { return next_queue.empty(); } - void swap_with_next_queue() { - if ( !is_next_empty() ) { + void swap_with_next_queue() + { + if(!is_next_empty()) + { std::swap(queue, next_queue); ++current_distance; } @@ -104,47 +99,44 @@ class ProblemConstruction { bool lock_queue; }; - public: +public: explicit ProblemConstruction(const HypernodeID num_hypernodes, - const HyperedgeID num_hyperedges, - const Context& context) : - _context(context), - _scaling(1.0 + _context.refinement.flows.alpha * - std::min(0.05, _context.partition.epsilon)), - _num_hypernodes(num_hypernodes), - _num_hyperedges(num_hyperedges), - _local_bfs([&] { + const HyperedgeID num_hyperedges, const Context &context) : + _context(context), + _scaling(1.0 + _context.refinement.flows.alpha * + std::min(0.05, _context.partition.epsilon)), + _num_hypernodes(num_hypernodes), _num_hyperedges(num_hyperedges), _local_bfs([&] { // If the number of blocks changes, BFSData needs to be initialized // differently. Thus we use a lambda that reads the current number of // blocks from the context return constructBFSData(); - } - ) { } + }) + { + } - ProblemConstruction(const ProblemConstruction&) = delete; - ProblemConstruction(ProblemConstruction&&) = delete; + ProblemConstruction(const ProblemConstruction &) = delete; + ProblemConstruction(ProblemConstruction &&) = delete; - ProblemConstruction & operator= (const ProblemConstruction &) = delete; - ProblemConstruction & operator= (ProblemConstruction &&) = delete; + ProblemConstruction &operator=(const ProblemConstruction &) = delete; + ProblemConstruction &operator=(ProblemConstruction &&) = delete; Subhypergraph construct(const SearchID search_id, - QuotientGraph& quotient_graph, - const PartitionedHypergraph& phg); + QuotientGraph "ient_graph, + const PartitionedHypergraph &phg); void changeNumberOfBlocks(const PartitionID new_k); - private: - BFSData constructBFSData() const { +private: + BFSData constructBFSData() const + { return BFSData(_num_hypernodes, _num_hyperedges, _context.partition.k); } MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE bool isMaximumProblemSizeReached( - const Subhypergraph& sub_hg, - const HypernodeWeight max_weight_block_0, - const HypernodeWeight max_weight_block_1, - vec& locked_blocks) const; + const Subhypergraph &sub_hg, const HypernodeWeight max_weight_block_0, + const HypernodeWeight max_weight_block_1, vec &locked_blocks) const; - const Context& _context; + const Context &_context; double _scaling; HypernodeID _num_hypernodes; HyperedgeID _num_hyperedges; @@ -153,4 +145,4 @@ class ProblemConstruction { tbb::enumerable_thread_specific _local_bfs; }; -} // namespace kahypar +} // namespace kahypar diff --git a/mt-kahypar/partition/refinement/flows/quotient_graph.cpp b/mt-kahypar/partition/refinement/flows/quotient_graph.cpp index be8833a3f..4eb00c862 100644 --- a/mt-kahypar/partition/refinement/flows/quotient_graph.cpp +++ b/mt-kahypar/partition/refinement/flows/quotient_graph.cpp @@ -24,29 +24,29 @@ * SOFTWARE. ******************************************************************************/ - #include "mt-kahypar/partition/refinement/flows/quotient_graph.h" #include #include "tbb/parallel_sort.h" -#include "mt-kahypar/definitions.h" #include "mt-kahypar/datastructures/sparse_map.h" - +#include "mt-kahypar/definitions.h" namespace mt_kahypar { -template -void QuotientGraph::QuotientGraphEdge::add_hyperedge(const HyperedgeID he, - const HyperedgeWeight weight) { +template +void QuotientGraph::QuotientGraphEdge::add_hyperedge( + const HyperedgeID he, const HyperedgeWeight weight) +{ cut_hes.push_back(he); cut_he_weight += weight; ++num_cut_hes; } -template -void QuotientGraph::QuotientGraphEdge::reset() { +template +void QuotientGraph::QuotientGraphEdge::reset() +{ cut_hes.clear(); ownership.store(INVALID_SEARCH_ID, std::memory_order_relaxed); is_in_queue.store(false, std::memory_order_relaxed); @@ -54,24 +54,28 @@ void QuotientGraph::QuotientGraphEdge::reset() { cut_he_weight.store(0, std::memory_order_relaxed); } -template -bool QuotientGraph::ActiveBlockSchedulingRound::popBlockPairFromQueue(BlockPair& blocks) { +template +bool QuotientGraph::ActiveBlockSchedulingRound::popBlockPairFromQueue( + BlockPair &blocks) +{ blocks.i = kInvalidPartition; blocks.j = kInvalidPartition; - if ( _unscheduled_blocks.try_pop(blocks) ) { + if(_unscheduled_blocks.try_pop(blocks)) + { _quotient_graph[blocks.i][blocks.j].markAsNotInQueue(); } return blocks.i != kInvalidPartition && blocks.j != kInvalidPartition; } -template -void QuotientGraph::ActiveBlockSchedulingRound::finalizeSearch(const BlockPair& blocks, - const HyperedgeWeight improvement, - bool& block_0_becomes_active, - bool& block_1_becomes_active) { +template +void QuotientGraph::ActiveBlockSchedulingRound::finalizeSearch( + const BlockPair &blocks, const HyperedgeWeight improvement, + bool &block_0_becomes_active, bool &block_1_becomes_active) +{ _round_improvement += improvement; --_remaining_blocks; - if ( improvement > 0 ) { + if(improvement > 0) + { _active_blocks_lock.lock(); block_0_becomes_active = !_active_blocks[blocks.i]; block_1_becomes_active = !_active_blocks[blocks.j]; @@ -81,77 +85,98 @@ void QuotientGraph::ActiveBlockSchedulingRound::finalizeSearch(const } } -template -bool QuotientGraph::ActiveBlockSchedulingRound::pushBlockPairIntoQueue(const BlockPair& blocks) { - QuotientGraphEdge& qg_edge = _quotient_graph[blocks.i][blocks.j]; - if ( qg_edge.markAsInQueue() ) { +template +bool QuotientGraph::ActiveBlockSchedulingRound::pushBlockPairIntoQueue( + const BlockPair &blocks) +{ + QuotientGraphEdge &qg_edge = _quotient_graph[blocks.i][blocks.j]; + if(qg_edge.markAsInQueue()) + { _unscheduled_blocks.push(blocks); ++_remaining_blocks; return true; - } else { + } + else + { return false; } } -template -void QuotientGraph::ActiveBlockScheduler::initialize(const vec& active_blocks, - const bool is_input_hypergraph) { +template +void QuotientGraph::ActiveBlockScheduler::initialize( + const vec &active_blocks, const bool is_input_hypergraph) +{ reset(); _is_input_hypergraph = is_input_hypergraph; HyperedgeWeight best_total_improvement = 1; - for ( PartitionID i = 0; i < _context.partition.k; ++i ) { - for ( PartitionID j = i + 1; j < _context.partition.k; ++j ) { - best_total_improvement = std::max(best_total_improvement, - _quotient_graph[i][j].total_improvement.load(std::memory_order_relaxed)); + for(PartitionID i = 0; i < _context.partition.k; ++i) + { + for(PartitionID j = i + 1; j < _context.partition.k; ++j) + { + best_total_improvement = std::max( + best_total_improvement, + _quotient_graph[i][j].total_improvement.load(std::memory_order_relaxed)); } } vec active_block_pairs; - for ( PartitionID i = 0; i < _context.partition.k; ++i ) { - for ( PartitionID j = i + 1; j < _context.partition.k; ++j ) { - if ( isActiveBlockPair(i, j) && ( active_blocks[i] || active_blocks[j] ) ) { - active_block_pairs.push_back( BlockPair { i, j } ); + for(PartitionID i = 0; i < _context.partition.k; ++i) + { + for(PartitionID j = i + 1; j < _context.partition.k; ++j) + { + if(isActiveBlockPair(i, j) && (active_blocks[i] || active_blocks[j])) + { + active_block_pairs.push_back(BlockPair{ i, j }); } } } - if ( active_block_pairs.size() > 0 ) { + if(active_block_pairs.size() > 0) + { std::sort(active_block_pairs.begin(), active_block_pairs.end(), - [&](const BlockPair& lhs, const BlockPair& rhs) { - return _quotient_graph[lhs.i][lhs.j].total_improvement > - _quotient_graph[rhs.i][rhs.j].total_improvement || - ( _quotient_graph[lhs.i][lhs.j].total_improvement == - _quotient_graph[rhs.i][rhs.j].total_improvement && - _quotient_graph[lhs.i][lhs.j].cut_he_weight > - _quotient_graph[rhs.i][rhs.j].cut_he_weight ); - }); + [&](const BlockPair &lhs, const BlockPair &rhs) { + return _quotient_graph[lhs.i][lhs.j].total_improvement > + _quotient_graph[rhs.i][rhs.j].total_improvement || + (_quotient_graph[lhs.i][lhs.j].total_improvement == + _quotient_graph[rhs.i][rhs.j].total_improvement && + _quotient_graph[lhs.i][lhs.j].cut_he_weight > + _quotient_graph[rhs.i][rhs.j].cut_he_weight); + }); _rounds.emplace_back(_context, _quotient_graph); ++_num_rounds; - for ( const BlockPair& blocks : active_block_pairs ) { + for(const BlockPair &blocks : active_block_pairs) + { DBG << "Schedule blocks (" << blocks.i << "," << blocks.j << ") in round 1 (" - << "Total Improvement =" << _quotient_graph[blocks.i][blocks.j].total_improvement << "," + << "Total Improvement =" + << _quotient_graph[blocks.i][blocks.j].total_improvement << "," << "Cut Weight =" << _quotient_graph[blocks.i][blocks.j].cut_he_weight << ")"; _rounds.back().pushBlockPairIntoQueue(blocks); } } } -template -bool QuotientGraph::ActiveBlockScheduler::popBlockPairFromQueue(BlockPair& blocks, size_t& round) { +template +bool QuotientGraph::ActiveBlockScheduler::popBlockPairFromQueue( + BlockPair &blocks, size_t &round) +{ bool success = false; round = _first_active_round; - while ( !_terminate && round < _num_rounds ) { + while(!_terminate && round < _num_rounds) + { success = _rounds[round].popBlockPairFromQueue(blocks); - if ( success ) { + if(success) + { break; } ++round; } - if ( success && round == _num_rounds - 1 ) { + if(success && round == _num_rounds - 1) + { _round_lock.lock(); - if ( round == _num_rounds - 1 ) { + if(round == _num_rounds - 1) + { // There must always be a next round available such that we can // reschedule block pairs that become active. _rounds.emplace_back(_context, _quotient_graph); @@ -163,108 +188,135 @@ bool QuotientGraph::ActiveBlockScheduler::popBlockPairFromQueue(Bloc return success; } -template -void QuotientGraph::ActiveBlockScheduler::finalizeSearch(const BlockPair& blocks, - const size_t round, - const HyperedgeWeight improvement) { +template +void QuotientGraph::ActiveBlockScheduler::finalizeSearch( + const BlockPair &blocks, const size_t round, const HyperedgeWeight improvement) +{ ASSERT(round < _rounds.size()); bool block_0_becomes_active = false; bool block_1_becomes_active = false; - _rounds[round].finalizeSearch(blocks, improvement, - block_0_becomes_active, block_1_becomes_active); + _rounds[round].finalizeSearch(blocks, improvement, block_0_becomes_active, + block_1_becomes_active); - if ( block_0_becomes_active ) { - // If blocks.i becomes active, we push all adjacent blocks into the queue of the next round + if(block_0_becomes_active) + { + // If blocks.i becomes active, we push all adjacent blocks into the queue of the next + // round ASSERT(round + 1 < _rounds.size()); - for ( PartitionID other = 0; other < _context.partition.k; ++other ) { - if ( blocks.i != other ) { + for(PartitionID other = 0; other < _context.partition.k; ++other) + { + if(blocks.i != other) + { const PartitionID block_0 = std::min(blocks.i, other); const PartitionID block_1 = std::max(blocks.i, other); - if ( isActiveBlockPair(block_0, block_1) ) { - DBG << "Schedule blocks (" << block_0 << "," << block_1 << ") in round" << (round + 2) << " (" - << "Total Improvement =" << _quotient_graph[block_0][block_1].total_improvement << "," + if(isActiveBlockPair(block_0, block_1)) + { + DBG << "Schedule blocks (" << block_0 << "," << block_1 << ") in round" + << (round + 2) << " (" + << "Total Improvement =" + << _quotient_graph[block_0][block_1].total_improvement << "," << "Cut Weight =" << _quotient_graph[block_0][block_1].cut_he_weight << ")"; - _rounds[round + 1].pushBlockPairIntoQueue(BlockPair { block_0, block_1 }); + _rounds[round + 1].pushBlockPairIntoQueue(BlockPair{ block_0, block_1 }); } } } } - if ( block_1_becomes_active ) { - // If blocks.j becomes active, we push all adjacent blocks into the queue of the next round + if(block_1_becomes_active) + { + // If blocks.j becomes active, we push all adjacent blocks into the queue of the next + // round ASSERT(round + 1 < _rounds.size()); - for ( PartitionID other = 0; other < _context.partition.k; ++other ) { - if ( blocks.j != other ) { + for(PartitionID other = 0; other < _context.partition.k; ++other) + { + if(blocks.j != other) + { const PartitionID block_0 = std::min(blocks.j, other); const PartitionID block_1 = std::max(blocks.j, other); - if ( isActiveBlockPair(block_0, block_1) ) { - DBG << "Schedule blocks (" << block_0 << "," << block_1 << ") in round" << (round + 2) << " (" - << "Total Improvement =" << _quotient_graph[block_0][block_1].total_improvement << "," + if(isActiveBlockPair(block_0, block_1)) + { + DBG << "Schedule blocks (" << block_0 << "," << block_1 << ") in round" + << (round + 2) << " (" + << "Total Improvement =" + << _quotient_graph[block_0][block_1].total_improvement << "," << "Cut Weight =" << _quotient_graph[block_0][block_1].cut_he_weight << ")"; - _rounds[round + 1].pushBlockPairIntoQueue(BlockPair { block_0, block_1 }); + _rounds[round + 1].pushBlockPairIntoQueue(BlockPair{ block_0, block_1 }); } } } } // Special case - if ( improvement > 0 && !_quotient_graph[blocks.i][blocks.j].isInQueue() && isActiveBlockPair(blocks.i, blocks.j) && - ( _rounds[round].isActive(blocks.i) || _rounds[round].isActive(blocks.j) ) ) { - // The active block scheduling strategy works in multiple rounds and each contain a separate queue - // to store active block pairs. A block pair is only allowed to be contained in one queue. - // If a block becomes active, we schedule all quotient graph edges incident to the block in - // the next round. However, there could be some edges that are already contained in a queue of - // a previous round, which are then not scheduled in the next round. If this edge is scheduled and - // leads to an improvement, we schedule it in the next round here. - DBG << "Schedule blocks (" << blocks.i << "," << blocks.j << ") in round" << (round + 2) << " (" - << "Total Improvement =" << _quotient_graph[blocks.i][blocks.j].total_improvement << "," - << "Cut Weight =" << _quotient_graph[blocks.i][blocks.j].cut_he_weight << ")"; - _rounds[round + 1].pushBlockPairIntoQueue(BlockPair { blocks.i, blocks.j }); + if(improvement > 0 && !_quotient_graph[blocks.i][blocks.j].isInQueue() && + isActiveBlockPair(blocks.i, blocks.j) && + (_rounds[round].isActive(blocks.i) || _rounds[round].isActive(blocks.j))) + { + // The active block scheduling strategy works in multiple rounds and each contain a + // separate queue to store active block pairs. A block pair is only allowed to be + // contained in one queue. If a block becomes active, we schedule all quotient graph + // edges incident to the block in the next round. However, there could be some edges + // that are already contained in a queue of a previous round, which are then not + // scheduled in the next round. If this edge is scheduled and leads to an improvement, + // we schedule it in the next round here. + DBG << "Schedule blocks (" << blocks.i << "," << blocks.j << ") in round" + << (round + 2) << " (" + << "Total Improvement =" << _quotient_graph[blocks.i][blocks.j].total_improvement + << "," + << "Cut Weight =" << _quotient_graph[blocks.i][blocks.j].cut_he_weight << ")"; + _rounds[round + 1].pushBlockPairIntoQueue(BlockPair{ blocks.i, blocks.j }); } - if ( round == _first_active_round && _rounds[round].numRemainingBlocks() == 0 ) { + if(round == _first_active_round && _rounds[round].numRemainingBlocks() == 0) + { _round_lock.lock(); // We consider a round as finished, if the previous round is also finished and there // are no remaining blocks in the queue of that round. - while ( _first_active_round < _rounds.size() && - _rounds[_first_active_round].numRemainingBlocks() == 0 ) { - DBG << GREEN << "Round" << (_first_active_round + 1) << "terminates with improvement" + while(_first_active_round < _rounds.size() && + _rounds[_first_active_round].numRemainingBlocks() == 0) + { + DBG << GREEN << "Round" << (_first_active_round + 1) + << "terminates with improvement" << _rounds[_first_active_round].roundImprovement() << "(" << "Minimum Required Improvement =" << _min_improvement_per_round << ")" << END; // We require that minimum improvement per round must be greater than a threshold, // otherwise we terminate early - _terminate = _rounds[_first_active_round].roundImprovement() < _min_improvement_per_round; + _terminate = + _rounds[_first_active_round].roundImprovement() < _min_improvement_per_round; ++_first_active_round; } _round_lock.unlock(); } } -template -bool QuotientGraph::ActiveBlockScheduler::isActiveBlockPair(const PartitionID i, - const PartitionID j) const { - const bool skip_small_cuts = !_is_input_hypergraph && - _context.refinement.flows.skip_small_cuts; +template +bool QuotientGraph::ActiveBlockScheduler::isActiveBlockPair( + const PartitionID i, const PartitionID j) const +{ + const bool skip_small_cuts = + !_is_input_hypergraph && _context.refinement.flows.skip_small_cuts; const bool contains_enough_cut_hes = - (skip_small_cuts && _quotient_graph[i][j].cut_he_weight > 10) || - (!skip_small_cuts && _quotient_graph[i][j].cut_he_weight > 0); + (skip_small_cuts && _quotient_graph[i][j].cut_he_weight > 10) || + (!skip_small_cuts && _quotient_graph[i][j].cut_he_weight > 0); const bool is_promising_blocks_pair = - !_context.refinement.flows.skip_unpromising_blocks || - ( _first_active_round == 0 || _quotient_graph[i][j].num_improvements_found > 0 ); + !_context.refinement.flows.skip_unpromising_blocks || + (_first_active_round == 0 || _quotient_graph[i][j].num_improvements_found > 0); return contains_enough_cut_hes && is_promising_blocks_pair; } -template -SearchID QuotientGraph::requestNewSearch(FlowRefinerAdapter& refiner) { +template +SearchID +QuotientGraph::requestNewSearch(FlowRefinerAdapter &refiner) +{ ASSERT(_phg); SearchID search_id = INVALID_SEARCH_ID; - BlockPair blocks { kInvalidPartition, kInvalidPartition }; + BlockPair blocks{ kInvalidPartition, kInvalidPartition }; size_t round = 0; bool success = _active_block_scheduler.popBlockPairFromQueue(blocks, round); _register_search_lock.lock(); const SearchID tmp_search_id = _searches.size(); - if ( success && _quotient_graph[blocks.i][blocks.j].acquire(tmp_search_id) ) { + if(success && _quotient_graph[blocks.i][blocks.j].acquire(tmp_search_id)) + { ++_num_active_searches; // Create new search search_id = tmp_search_id; @@ -273,48 +325,58 @@ SearchID QuotientGraph::requestNewSearch(FlowRefinerAdapter +template void QuotientGraph::addNewCutHyperedge(const HyperedgeID he, - const PartitionID block) { + const PartitionID block) +{ ASSERT(_phg); ASSERT(_phg->pinCountInPart(he, block) > 0); // Add hyperedge he as a cut hyperedge to each block pair that contains 'block' - for ( const PartitionID& other_block : _phg->connectivitySet(he) ) { - if ( other_block != block ) { + for(const PartitionID &other_block : _phg->connectivitySet(he)) + { + if(other_block != block) + { _quotient_graph[std::min(block, other_block)][std::max(block, other_block)] - .add_hyperedge(he, _phg->edgeWeight(he)); + .add_hyperedge(he, _phg->edgeWeight(he)); } } } -template -void QuotientGraph::finalizeConstruction(const SearchID search_id) { +template +void QuotientGraph::finalizeConstruction(const SearchID search_id) +{ ASSERT(search_id < _searches.size()); _searches[search_id].is_finalized = true; - const BlockPair& blocks = _searches[search_id].blocks; + const BlockPair &blocks = _searches[search_id].blocks; _quotient_graph[blocks.i][blocks.j].release(search_id); } -template +template void QuotientGraph::finalizeSearch(const SearchID search_id, - const HyperedgeWeight total_improvement) { + const HyperedgeWeight total_improvement) +{ ASSERT(_phg); ASSERT(search_id < _searches.size()); ASSERT(_searches[search_id].is_finalized); - const BlockPair& blocks = _searches[search_id].blocks; - QuotientGraphEdge& qg_edge = _quotient_graph[blocks.i][blocks.j]; - if ( total_improvement > 0 ) { + const BlockPair &blocks = _searches[search_id].blocks; + QuotientGraphEdge &qg_edge = _quotient_graph[blocks.i][blocks.j]; + if(total_improvement > 0) + { // If the search improves the quality of the partition, we reinsert // all hyperedges that were used by the search and are still cut. ++qg_edge.num_improvements_found; @@ -322,13 +384,14 @@ void QuotientGraph::finalizeSearch(const SearchID search_id, } // In case the block pair becomes active, // we reinsert it into the queue - _active_block_scheduler.finalizeSearch( - blocks, _searches[search_id].round, total_improvement); + _active_block_scheduler.finalizeSearch(blocks, _searches[search_id].round, + total_improvement); --_num_active_searches; } -template -void QuotientGraph::initialize(const PartitionedHypergraph& phg) { +template +void QuotientGraph::initialize(const PartitionedHypergraph &phg) +{ _phg = &phg; // Reset internal members @@ -341,9 +404,12 @@ void QuotientGraph::initialize(const PartitionedHypergraph& phg) { phg.doParallelForAllEdges([&](const HyperedgeID he) { ++local_num_hes.local(); const HyperedgeWeight edge_weight = phg.edgeWeight(he); - for ( const PartitionID i : phg.connectivitySet(he) ) { - for ( const PartitionID j : phg.connectivitySet(he) ) { - if ( i < j ) { + for(const PartitionID i : phg.connectivitySet(he)) + { + for(const PartitionID j : phg.connectivitySet(he)) + { + if(i < j) + { _quotient_graph[i][j].add_hyperedge(he, edge_weight); } } @@ -356,31 +422,39 @@ void QuotientGraph::initialize(const PartitionedHypergraph& phg) { _active_block_scheduler.initialize(active_blocks, isInputHypergraph()); } -template -void QuotientGraph::changeNumberOfBlocks(const PartitionID new_k) { +template +void QuotientGraph::changeNumberOfBlocks(const PartitionID new_k) +{ // Reset improvement history as the number of blocks had changed - for ( size_t i = 0; i < _quotient_graph.size(); ++i ) { - for ( size_t j = 0; j < _quotient_graph.size(); ++j ) { + for(size_t i = 0; i < _quotient_graph.size(); ++i) + { + for(size_t j = 0; j < _quotient_graph.size(); ++j) + { _quotient_graph[i][j].num_improvements_found.store(0, std::memory_order_relaxed); _quotient_graph[i][j].total_improvement.store(0, std::memory_order_relaxed); } } - if ( static_cast(new_k) > _quotient_graph.size() ) { + if(static_cast(new_k) > _quotient_graph.size()) + { _quotient_graph.clear(); _quotient_graph.assign(new_k, vec(new_k)); } } -template -size_t QuotientGraph::numActiveBlockPairs() const { +template +size_t QuotientGraph::numActiveBlockPairs() const +{ return _active_block_scheduler.numRemainingBlocks() + _num_active_searches; } -template -void QuotientGraph::resetQuotientGraphEdges() { - for ( PartitionID i = 0; i < _context.partition.k; ++i ) { - for ( PartitionID j = i + 1; j < _context.partition.k; ++j ) { +template +void QuotientGraph::resetQuotientGraphEdges() +{ + for(PartitionID i = 0; i < _context.partition.k; ++i) + { + for(PartitionID j = i + 1; j < _context.partition.k; ++j) + { _quotient_graph[i][j].reset(); } } diff --git a/mt-kahypar/partition/refinement/flows/quotient_graph.h b/mt-kahypar/partition/refinement/flows/quotient_graph.h index ebafd6b99..3f2cc8ce7 100644 --- a/mt-kahypar/partition/refinement/flows/quotient_graph.h +++ b/mt-kahypar/partition/refinement/flows/quotient_graph.h @@ -30,20 +30,22 @@ #include "tbb/concurrent_vector.h" #include "tbb/enumerable_thread_specific.h" +#include "mt-kahypar/parallel/atomic_wrapper.h" #include "mt-kahypar/partition/context.h" #include "mt-kahypar/partition/refinement/flows/refiner_adapter.h" -#include "mt-kahypar/parallel/atomic_wrapper.h" #include "mt-kahypar/utils/randomize.h" namespace mt_kahypar { -struct BlockPair { +struct BlockPair +{ PartitionID i = kInvalidPartition; PartitionID j = kInvalidPartition; }; -template -class QuotientGraph { +template +class QuotientGraph +{ static constexpr bool debug = false; static constexpr bool enable_heavy_assert = false; @@ -51,56 +53,52 @@ class QuotientGraph { using PartitionedHypergraph = typename TypeTraits::PartitionedHypergraph; // ! Represents an edge of the quotient graph - struct QuotientGraphEdge { + struct QuotientGraphEdge + { QuotientGraphEdge() : - blocks(), - ownership(INVALID_SEARCH_ID), - is_in_queue(false), - cut_hes(), - num_cut_hes(0), - cut_he_weight(0), - num_improvements_found(0), - total_improvement(0) { } + blocks(), ownership(INVALID_SEARCH_ID), is_in_queue(false), cut_hes(), + num_cut_hes(0), cut_he_weight(0), num_improvements_found(0), total_improvement(0) + { + } // ! Adds a cut hyperedge to this quotient graph edge - void add_hyperedge(const HyperedgeID he, - const HyperedgeWeight weight); + void add_hyperedge(const HyperedgeID he, const HyperedgeWeight weight); void reset(); // ! Returns true, if quotient graph edge is acquired by a search - bool isAcquired() const { - return ownership.load() != INVALID_SEARCH_ID; - } + bool isAcquired() const { return ownership.load() != INVALID_SEARCH_ID; } // ! Tries to acquire quotient graph edge with corresponding search id - bool acquire(const SearchID search_id) { + bool acquire(const SearchID search_id) + { SearchID expected = INVALID_SEARCH_ID; SearchID desired = search_id; return ownership.compare_exchange_strong(expected, desired); } // ! Releases quotient graph edge - void release(const SearchID search_id) { + void release(const SearchID search_id) + { unused(search_id); ASSERT(ownership.load() == search_id); ownership.store(INVALID_SEARCH_ID); } - bool isInQueue() const { - return is_in_queue.load(std::memory_order_relaxed); - } + bool isInQueue() const { return is_in_queue.load(std::memory_order_relaxed); } // ! Marks quotient graph edge as in queue. Queued edges are scheduled // ! for refinement. - bool markAsInQueue() { + bool markAsInQueue() + { bool expected = false; bool desired = true; return is_in_queue.compare_exchange_strong(expected, desired); } // ! Marks quotient graph edge as nnot in queue - bool markAsNotInQueue() { + bool markAsNotInQueue() + { bool expected = true; bool desired = false; return is_in_queue.compare_exchange_strong(expected, desired); @@ -128,52 +126,50 @@ class QuotientGraph { /** * Maintains the block pair of a round of the active block scheduling strategy */ - class ActiveBlockSchedulingRound { - - public: - explicit ActiveBlockSchedulingRound(const Context& context, - vec>& quotient_graph) : - _context(context), - _quotient_graph(quotient_graph), - _unscheduled_blocks(), - _round_improvement(0), - _active_blocks_lock(), - _active_blocks(context.partition.k, false), - _remaining_blocks(0) { } + class ActiveBlockSchedulingRound + { + + public: + explicit ActiveBlockSchedulingRound(const Context &context, + vec > "ient_graph) : + _context(context), + _quotient_graph(quotient_graph), _unscheduled_blocks(), _round_improvement(0), + _active_blocks_lock(), _active_blocks(context.partition.k, false), + _remaining_blocks(0) + { + } // ! Pops a block pair from the queue. // ! Returns true, if a block pair was successfully popped from the queue. // ! The corresponding block pair will be stored in blocks. - bool popBlockPairFromQueue(BlockPair& blocks); + bool popBlockPairFromQueue(BlockPair &blocks); // ! Pushes a block pair into the queue. // ! Return true, if the block pair was successfully pushed into the queue. // ! Note, that a block pair is only allowed to be contained in one queue // ! (there are multiple active rounds). - bool pushBlockPairIntoQueue(const BlockPair& blocks); + bool pushBlockPairIntoQueue(const BlockPair &blocks); // ! Signals that the search on the corresponding block pair terminated. - void finalizeSearch(const BlockPair& blocks, - const HyperedgeWeight improvement, - bool& block_0_becomes_active, - bool& block_1_becomes_active); + void finalizeSearch(const BlockPair &blocks, const HyperedgeWeight improvement, + bool &block_0_becomes_active, bool &block_1_becomes_active); - HyperedgeWeight roundImprovement() const { + HyperedgeWeight roundImprovement() const + { return _round_improvement.load(std::memory_order_relaxed); } - bool isActive(const PartitionID block) const { + bool isActive(const PartitionID block) const + { ASSERT(block < _context.partition.k); return _active_blocks[block]; } - size_t numRemainingBlocks() const { - return _remaining_blocks; - } + size_t numRemainingBlocks() const { return _remaining_blocks; } - const Context& _context; - // ! Quotient graph - vec>& _quotient_graph; + const Context &_context; + // ! Quotient graph + vec > &_quotient_graph; // ! Queue that contains all unscheduled block pairs of the current round tbb::concurrent_queue _unscheduled_blocks; // ! Current improvement made in this round @@ -198,66 +194,64 @@ class QuotientGraph { * rounds. However, block pairs from earlier rounds have an higher priority to be * scheduled. */ - class ActiveBlockScheduler { - - public: - explicit ActiveBlockScheduler(const Context& context, - vec>& quotient_graph) : - _context(context), - _quotient_graph(quotient_graph), - _num_rounds(0), - _rounds(), - _min_improvement_per_round(0), - _terminate(false), - _round_lock(), - _first_active_round(0), - _is_input_hypergraph(false) { } + class ActiveBlockScheduler + { + + public: + explicit ActiveBlockScheduler(const Context &context, + vec > "ient_graph) : + _context(context), + _quotient_graph(quotient_graph), _num_rounds(0), _rounds(), + _min_improvement_per_round(0), _terminate(false), _round_lock(), + _first_active_round(0), _is_input_hypergraph(false) + { + } // ! Initialize the first round of the active block scheduling strategy - void initialize(const vec& active_blocks, - const bool is_input_hypergraph); + void initialize(const vec &active_blocks, const bool is_input_hypergraph); // ! Pops a block pair from the queue. // ! Returns true, if a block pair was successfully popped from the queue. // ! The corresponding block pair and the round to which this blocks corresponds // ! to are stored in blocks and round. - bool popBlockPairFromQueue(BlockPair& blocks, size_t& round); + bool popBlockPairFromQueue(BlockPair &blocks, size_t &round); // ! Signals that the search on the corresponding block pair terminated. // ! If one the two blocks become active, we immediatly schedule all edges // ! adjacent in the quotient graph in the next round of active block scheduling - void finalizeSearch(const BlockPair& blocks, - const size_t round, + void finalizeSearch(const BlockPair &blocks, const size_t round, const HyperedgeWeight improvement); - size_t numRemainingBlocks() const { + size_t numRemainingBlocks() const + { size_t num_remaining_blocks = 0; - for ( size_t i = _first_active_round; i < _num_rounds; ++i ) { + for(size_t i = _first_active_round; i < _num_rounds; ++i) + { num_remaining_blocks += _rounds[i].numRemainingBlocks(); } return num_remaining_blocks; } - void setObjective(const HyperedgeWeight objective) { + void setObjective(const HyperedgeWeight objective) + { _min_improvement_per_round = - _context.refinement.flows.min_relative_improvement_per_round * objective; + _context.refinement.flows.min_relative_improvement_per_round * objective; } - private: - - void reset() { + private: + void reset() + { _num_rounds.store(0); _rounds.clear(); _first_active_round = 0; _terminate = false; } - bool isActiveBlockPair(const PartitionID i, - const PartitionID j) const; + bool isActiveBlockPair(const PartitionID i, const PartitionID j) const; - const Context& _context; + const Context &_context; // ! Quotient graph - vec>& _quotient_graph; + vec > &_quotient_graph; // Contains all active block scheduling rounds CAtomic _num_rounds; tbb::concurrent_vector _rounds; @@ -273,11 +267,12 @@ class QuotientGraph { }; // Contains information required by a local search - struct Search { - explicit Search(const BlockPair& blocks, const size_t round) : - blocks(blocks), - round(round), - is_finalized(false) { } + struct Search + { + explicit Search(const BlockPair &blocks, const size_t round) : + blocks(blocks), round(round), is_finalized(false) + { + } // ! Block pair on which this search operates on BlockPair blocks; @@ -291,31 +286,28 @@ class QuotientGraph { public: static constexpr SearchID INVALID_SEARCH_ID = std::numeric_limits::max(); - explicit QuotientGraph(const HyperedgeID num_hyperedges, - const Context& context) : - _phg(nullptr), - _context(context), - _initial_num_edges(num_hyperedges), - _current_num_edges(kInvalidHyperedge), - _quotient_graph(context.partition.k, - vec(context.partition.k)), - _register_search_lock(), - _active_block_scheduler(context, _quotient_graph), - _num_active_searches(0), - _searches() { - for ( PartitionID i = 0; i < _context.partition.k; ++i ) { - for ( PartitionID j = i + 1; j < _context.partition.k; ++j ) { + explicit QuotientGraph(const HyperedgeID num_hyperedges, const Context &context) : + _phg(nullptr), _context(context), _initial_num_edges(num_hyperedges), + _current_num_edges(kInvalidHyperedge), + _quotient_graph(context.partition.k, vec(context.partition.k)), + _register_search_lock(), _active_block_scheduler(context, _quotient_graph), + _num_active_searches(0), _searches() + { + for(PartitionID i = 0; i < _context.partition.k; ++i) + { + for(PartitionID j = i + 1; j < _context.partition.k; ++j) + { _quotient_graph[i][j].blocks.i = i; _quotient_graph[i][j].blocks.j = j; } } } - QuotientGraph(const QuotientGraph&) = delete; - QuotientGraph(QuotientGraph&&) = delete; + QuotientGraph(const QuotientGraph &) = delete; + QuotientGraph(QuotientGraph &&) = delete; - QuotientGraph & operator= (const QuotientGraph &) = delete; - QuotientGraph & operator= (QuotientGraph &&) = delete; + QuotientGraph &operator=(const QuotientGraph &) = delete; + QuotientGraph &operator=(QuotientGraph &&) = delete; /** * Returns a new search id which is associated with a certain number @@ -324,43 +316,43 @@ class QuotientGraph { * associated with the search. If there are currently no block pairs * available then INVALID_SEARCH_ID is returned. */ - SearchID requestNewSearch(FlowRefinerAdapter& refiner); + SearchID requestNewSearch(FlowRefinerAdapter &refiner); // ! Returns the block pair on which the corresponding search operates on - BlockPair getBlockPair(const SearchID search_id) const { + BlockPair getBlockPair(const SearchID search_id) const + { ASSERT(search_id < _searches.size()); return _searches[search_id].blocks; } // ! Number of block pairs used by the corresponding search - size_t numBlockPairs(const SearchID) const { - return 1; - } + size_t numBlockPairs(const SearchID) const { return 1; } - template - void doForAllCutHyperedgesOfSearch(const SearchID search_id, const F& f) { - const BlockPair& blocks = _searches[search_id].blocks; + template + void doForAllCutHyperedgesOfSearch(const SearchID search_id, const F &f) + { + const BlockPair &blocks = _searches[search_id].blocks; const size_t num_cut_hes = _quotient_graph[blocks.i][blocks.j].num_cut_hes.load(); std::shuffle(_quotient_graph[blocks.i][blocks.j].cut_hes.begin(), _quotient_graph[blocks.i][blocks.j].cut_hes.begin() + num_cut_hes, utils::Randomize::instance().getGenerator()); - for ( size_t i = 0; i < num_cut_hes; ++i ) { + for(size_t i = 0; i < num_cut_hes; ++i) + { const HyperedgeID he = _quotient_graph[blocks.i][blocks.j].cut_hes[i]; - if ( _phg->pinCountInPart(he, blocks.i) > 0 && _phg->pinCountInPart(he, blocks.j) > 0 ) { + if(_phg->pinCountInPart(he, blocks.i) > 0 && _phg->pinCountInPart(he, blocks.j) > 0) + { f(he); } } } - /** * Notifies the quotient graph that hyperedge he contains * a new block, which was previously not contained. The thread * that increases the pin count of hyperedge he in the corresponding * block to 1 is responsible to call this function. */ - void addNewCutHyperedge(const HyperedgeID he, - const PartitionID block); + void addNewCutHyperedge(const HyperedgeID he, const PartitionID block); /** * Notify the quotient graph that the construction of the corresponding @@ -375,21 +367,23 @@ class QuotientGraph { * we reinsert all hyperedges that were used throughout the construction * and are still cut between the corresponding block. */ - void finalizeSearch(const SearchID search_id, - const HyperedgeWeight total_improvement); + void finalizeSearch(const SearchID search_id, const HyperedgeWeight total_improvement); // ! Initializes the quotient graph. This includes to find // ! all cut hyperedges between all block pairs - void initialize(const PartitionedHypergraph& phg); + void initialize(const PartitionedHypergraph &phg); - void setObjective(const HyperedgeWeight objective) { + void setObjective(const HyperedgeWeight objective) + { _active_block_scheduler.setObjective(objective); } size_t numActiveBlockPairs() const; // ! Only for testing - HyperedgeWeight getCutHyperedgeWeightOfBlockPair(const PartitionID i, const PartitionID j) const { + HyperedgeWeight getCutHyperedgeWeightOfBlockPair(const PartitionID i, + const PartitionID j) const + { ASSERT(i < j); ASSERT(0 <= i && i < _context.partition.k); ASSERT(0 <= j && j < _context.partition.k); @@ -398,22 +392,19 @@ class QuotientGraph { void changeNumberOfBlocks(const PartitionID new_k); - private: - +private: void resetQuotientGraphEdges(); - bool isInputHypergraph() const { - return _current_num_edges == _initial_num_edges; - } + bool isInputHypergraph() const { return _current_num_edges == _initial_num_edges; } - const PartitionedHypergraph* _phg; - const Context& _context; + const PartitionedHypergraph *_phg; + const Context &_context; const HypernodeID _initial_num_edges; HypernodeID _current_num_edges; // ! Each edge contains stats and the cut hyperedges // ! of the block pair which its represents. - vec> _quotient_graph; + vec > _quotient_graph; SpinLock _register_search_lock; // ! Queue that contains all block pairs. @@ -425,4 +416,4 @@ class QuotientGraph { tbb::concurrent_vector _searches; }; -} // namespace kahypar +} // namespace kahypar diff --git a/mt-kahypar/partition/refinement/flows/refiner_adapter.cpp b/mt-kahypar/partition/refinement/flows/refiner_adapter.cpp index 216680388..b712ce9f0 100644 --- a/mt-kahypar/partition/refinement/flows/refiner_adapter.cpp +++ b/mt-kahypar/partition/refinement/flows/refiner_adapter.cpp @@ -33,25 +33,29 @@ namespace mt_kahypar { namespace { - #define NOW std::chrono::high_resolution_clock::now() - #define RUNNING_TIME(X) std::chrono::duration(NOW - X).count(); +#define NOW std::chrono::high_resolution_clock::now() +#define RUNNING_TIME(X) std::chrono::duration(NOW - X).count(); } -template +template bool FlowRefinerAdapter::registerNewSearch(const SearchID search_id, - const PartitionedHypergraph& phg) { + const PartitionedHypergraph &phg) +{ bool success = true; size_t refiner_idx = INVALID_REFINER_IDX; - if ( _unused_refiners.try_pop(refiner_idx) ) { + if(_unused_refiners.try_pop(refiner_idx)) + { // Note, search id are usually consecutive starting from 0. // However, this function is not called in increasing search id order. _search_lock.lock(); - while ( static_cast(search_id) >= _active_searches.size() ) { - _active_searches.push_back(ActiveSearch { INVALID_REFINER_IDX, NOW, 0.0, false }); + while(static_cast(search_id) >= _active_searches.size()) + { + _active_searches.push_back(ActiveSearch{ INVALID_REFINER_IDX, NOW, 0.0, false }); } _search_lock.unlock(); - if ( !_refiner[refiner_idx] ) { + if(!_refiner[refiner_idx]) + { // Lazy initialization of refiner _refiner[refiner_idx] = initializeRefiner(); } @@ -59,61 +63,72 @@ bool FlowRefinerAdapter::registerNewSearch(const SearchID search_id, _active_searches[search_id].refiner_idx = refiner_idx; _active_searches[search_id].start = NOW; mt_kahypar_partitioned_hypergraph_const_t partitioned_hg = - utils::partitioned_hg_const_cast(phg); + utils::partitioned_hg_const_cast(phg); _refiner[refiner_idx]->initialize(partitioned_hg); _refiner[refiner_idx]->updateTimeLimit(timeLimit()); - } else { + } + else + { success = false; } return success; } -template +template MoveSequence FlowRefinerAdapter::refine(const SearchID search_id, - const PartitionedHypergraph& phg, - const Subhypergraph& sub_hg) { + const PartitionedHypergraph &phg, + const Subhypergraph &sub_hg) +{ ASSERT(static_cast(search_id) < _active_searches.size()); ASSERT(_active_searches[search_id].refiner_idx != INVALID_REFINER_IDX); // Perform refinement mt_kahypar_partitioned_hypergraph_const_t partitioned_hg = - utils::partitioned_hg_const_cast(phg); + utils::partitioned_hg_const_cast(phg); const size_t refiner_idx = _active_searches[search_id].refiner_idx; const size_t num_free_threads = _threads.acquireFreeThreads(); _refiner[refiner_idx]->setNumThreadsForSearch(num_free_threads); - MoveSequence moves = _refiner[refiner_idx]->refine(partitioned_hg, sub_hg, _active_searches[search_id].start); + MoveSequence moves = _refiner[refiner_idx]->refine(partitioned_hg, sub_hg, + _active_searches[search_id].start); _threads.releaseThreads(num_free_threads); - _active_searches[search_id].reaches_time_limit = moves.state == MoveSequenceState::TIME_LIMIT; + _active_searches[search_id].reaches_time_limit = + moves.state == MoveSequenceState::TIME_LIMIT; return moves; } -template -PartitionID FlowRefinerAdapter::maxNumberOfBlocks(const SearchID search_id) { +template +PartitionID FlowRefinerAdapter::maxNumberOfBlocks(const SearchID search_id) +{ ASSERT(static_cast(search_id) < _active_searches.size()); ASSERT(_active_searches[search_id].refiner_idx != INVALID_REFINER_IDX); const size_t refiner_idx = _active_searches[search_id].refiner_idx; return _refiner[refiner_idx]->maxNumberOfBlocksPerSearch(); } -template -void FlowRefinerAdapter::finalizeSearch(const SearchID search_id) { +template +void FlowRefinerAdapter::finalizeSearch(const SearchID search_id) +{ ASSERT(static_cast(search_id) < _active_searches.size()); const double running_time = RUNNING_TIME(_active_searches[search_id].start); _active_searches[search_id].running_time = running_time; - //Update average running time + // Update average running time _search_lock.lock(); - if ( !_active_searches[search_id].reaches_time_limit ) { - _average_running_time = (running_time + _num_refinements * - _average_running_time) / static_cast(_num_refinements + 1); + if(!_active_searches[search_id].reaches_time_limit) + { + _average_running_time = (running_time + _num_refinements * _average_running_time) / + static_cast(_num_refinements + 1); ++_num_refinements; } _search_lock.unlock(); // Search position of refiner associated with the search id - if ( shouldSetTimeLimit() ) { - for ( size_t idx = 0; idx < _refiner.size(); ++idx ) { - if ( _refiner[idx] ) { + if(shouldSetTimeLimit()) + { + for(size_t idx = 0; idx < _refiner.size(); ++idx) + { + if(_refiner[idx]) + { _refiner[idx]->updateTimeLimit(timeLimit()); } } @@ -124,8 +139,9 @@ void FlowRefinerAdapter::finalizeSearch(const SearchID search_id) { _active_searches[search_id].refiner_idx = INVALID_REFINER_IDX; } -template -void FlowRefinerAdapter::initialize(const size_t max_parallelism) { +template +void FlowRefinerAdapter::initialize(const size_t max_parallelism) +{ _num_parallel_refiners = max_parallelism; _threads.num_threads = _context.shared_memory.num_threads; _threads.num_parallel_refiners = max_parallelism; @@ -133,7 +149,8 @@ void FlowRefinerAdapter::initialize(const size_t max_parallelism) { _threads.num_used_threads = 0; _unused_refiners.clear(); - for ( size_t i = 0; i < numAvailableRefiner(); ++i ) { + for(size_t i = 0; i < numAvailableRefiner(); ++i) + { _unused_refiners.push(i); } _active_searches.clear(); @@ -141,10 +158,11 @@ void FlowRefinerAdapter::initialize(const size_t max_parallelism) { _average_running_time = 0.0; } -template -std::unique_ptr FlowRefinerAdapter::initializeRefiner() { +template +std::unique_ptr FlowRefinerAdapter::initializeRefiner() +{ return FlowRefinementFactory::getInstance().createObject( - _context.refinement.flows.algorithm, _num_hyperedges, _context); + _context.refinement.flows.algorithm, _num_hyperedges, _context); } INSTANTIATE_CLASS_WITH_TYPE_TRAITS(FlowRefinerAdapter) diff --git a/mt-kahypar/partition/refinement/flows/refiner_adapter.h b/mt-kahypar/partition/refinement/flows/refiner_adapter.h index bd2a6dd4a..53b7440e6 100644 --- a/mt-kahypar/partition/refinement/flows/refiner_adapter.h +++ b/mt-kahypar/partition/refinement/flows/refiner_adapter.h @@ -25,18 +25,19 @@ ******************************************************************************/ #pragma once -#include "tbb/concurrent_vector.h" #include "tbb/concurrent_queue.h" +#include "tbb/concurrent_vector.h" +#include "mt-kahypar/parallel/atomic_wrapper.h" +#include "mt-kahypar/parallel/stl/scalable_vector.h" #include "mt-kahypar/partition/context.h" #include "mt-kahypar/partition/refinement/flows/i_flow_refiner.h" -#include "mt-kahypar/parallel/stl/scalable_vector.h" -#include "mt-kahypar/parallel/atomic_wrapper.h" namespace mt_kahypar { -template -class FlowRefinerAdapter { +template +class FlowRefinerAdapter +{ static constexpr bool debug = false; static constexpr bool enable_heavy_assert = false; @@ -44,36 +45,39 @@ class FlowRefinerAdapter { using PartitionedHypergraph = typename TypeTraits::PartitionedHypergraph; - struct ActiveSearch { + struct ActiveSearch + { size_t refiner_idx; HighResClockTimepoint start; double running_time; bool reaches_time_limit; }; - struct ThreadOrganizer { + struct ThreadOrganizer + { ThreadOrganizer() : - lock(), - num_threads(0), - num_used_threads(0), - num_parallel_refiners(0), - num_active_refiners(0) { } + lock(), num_threads(0), num_used_threads(0), num_parallel_refiners(0), + num_active_refiners(0) + { + } - size_t acquireFreeThreads() { + size_t acquireFreeThreads() + { lock.lock(); const size_t num_threads_per_search = - std::max(UL(1), static_cast(std::ceil( - static_cast(num_threads - num_used_threads) / - ( num_parallel_refiners - num_active_refiners ) ))); - const size_t num_free_threads = std::min( - num_threads_per_search, num_threads - num_used_threads); + std::max(UL(1), static_cast(std::ceil( + static_cast(num_threads - num_used_threads) / + (num_parallel_refiners - num_active_refiners)))); + const size_t num_free_threads = + std::min(num_threads_per_search, num_threads - num_used_threads); ++num_active_refiners; num_used_threads += num_free_threads; lock.unlock(); return num_free_threads; } - void releaseThreads(const size_t num_threads) { + void releaseThreads(const size_t num_threads) + { lock.lock(); ASSERT(num_threads <= num_used_threads); ASSERT(num_active_refiners); @@ -82,7 +86,8 @@ class FlowRefinerAdapter { lock.unlock(); } - void terminateRefiner() { + void terminateRefiner() + { lock.lock(); --num_parallel_refiners; lock.unlock(); @@ -96,39 +101,31 @@ class FlowRefinerAdapter { }; public: - explicit FlowRefinerAdapter(const HyperedgeID num_hyperedges, - const Context& context) : - _num_hyperedges(num_hyperedges), - _context(context), - _unused_refiners(), - _refiner(), - _search_lock(), - _active_searches(), - _threads(), - _num_parallel_refiners(0), - _num_refinements(0), - _average_running_time(0.0) { - for ( size_t i = 0; i < _context.shared_memory.num_threads; ++i ) { + explicit FlowRefinerAdapter(const HyperedgeID num_hyperedges, const Context &context) : + _num_hyperedges(num_hyperedges), _context(context), _unused_refiners(), _refiner(), + _search_lock(), _active_searches(), _threads(), _num_parallel_refiners(0), + _num_refinements(0), _average_running_time(0.0) + { + for(size_t i = 0; i < _context.shared_memory.num_threads; ++i) + { _refiner.emplace_back(nullptr); } } - FlowRefinerAdapter(const FlowRefinerAdapter&) = delete; - FlowRefinerAdapter(FlowRefinerAdapter&&) = delete; + FlowRefinerAdapter(const FlowRefinerAdapter &) = delete; + FlowRefinerAdapter(FlowRefinerAdapter &&) = delete; - FlowRefinerAdapter & operator= (const FlowRefinerAdapter &) = delete; - FlowRefinerAdapter & operator= (FlowRefinerAdapter &&) = delete; + FlowRefinerAdapter &operator=(const FlowRefinerAdapter &) = delete; + FlowRefinerAdapter &operator=(FlowRefinerAdapter &&) = delete; void initialize(const size_t max_parallelism); // ! Associates a refiner with a search id. // ! Returns true, if there is an idle refiner left. - bool registerNewSearch(const SearchID search_id, - const PartitionedHypergraph& phg); + bool registerNewSearch(const SearchID search_id, const PartitionedHypergraph &phg); - MoveSequence refine(const SearchID search_id, - const PartitionedHypergraph& phg, - const Subhypergraph& sub_hg); + MoveSequence refine(const SearchID search_id, const PartitionedHypergraph &phg, + const Subhypergraph &sub_hg); // ! Returns the maximum number of blocks which is allowed to be // ! contained in the problem of the refiner associated with @@ -139,45 +136,43 @@ class FlowRefinerAdapter { // ! available again void finalizeSearch(const SearchID search_id); - void terminateRefiner() { - _threads.terminateRefiner(); - } + void terminateRefiner() { _threads.terminateRefiner(); } - size_t numAvailableRefiner() const { - return _num_parallel_refiners; - } + size_t numAvailableRefiner() const { return _num_parallel_refiners; } - double runningTime(const SearchID search_id) const { + double runningTime(const SearchID search_id) const + { ASSERT(static_cast(search_id) < _active_searches.size()); return _active_searches[search_id].running_time; } - double timeLimit() const { - return shouldSetTimeLimit() ? - std::max(_context.refinement.flows.time_limit_factor * - _average_running_time, 0.1) : std::numeric_limits::max(); + double timeLimit() const + { + return shouldSetTimeLimit() ? std::max(_context.refinement.flows.time_limit_factor * + _average_running_time, + 0.1) : + std::numeric_limits::max(); } // ! Only for testing - size_t numUsedThreads() const { - return _threads.num_used_threads; - } + size_t numUsedThreads() const { return _threads.num_used_threads; } private: std::unique_ptr initializeRefiner(); - bool shouldSetTimeLimit() const { + bool shouldSetTimeLimit() const + { return _num_refinements > static_cast(_context.partition.k) && - _context.refinement.flows.time_limit_factor > 1.0; + _context.refinement.flows.time_limit_factor > 1.0; } const HyperedgeID _num_hyperedges; - const Context& _context; + const Context &_context; // ! Indices of unused refiners tbb::concurrent_queue _unused_refiners; // ! Available refiners - vec> _refiner; + vec > _refiner; // ! Mapping from search id to refiner SpinLock _search_lock; tbb::concurrent_vector _active_searches; @@ -187,7 +182,6 @@ class FlowRefinerAdapter { size_t _num_refinements; double _average_running_time; - }; -} // namespace kahypar +} // namespace kahypar diff --git a/mt-kahypar/partition/refinement/flows/scheduler.cpp b/mt-kahypar/partition/refinement/flows/scheduler.cpp index a01774794..c665a1d02 100644 --- a/mt-kahypar/partition/refinement/flows/scheduler.cpp +++ b/mt-kahypar/partition/refinement/flows/scheduler.cpp @@ -27,88 +27,97 @@ #include "mt-kahypar/partition/refinement/flows/scheduler.h" #include "mt-kahypar/definitions.h" +#include "mt-kahypar/io/partitioning_output.h" #include "mt-kahypar/partition/metrics.h" #include "mt-kahypar/partition/refinement/gains/gain_definitions.h" -#include "mt-kahypar/io/partitioning_output.h" -#include "mt-kahypar/utils/utilities.h" #include "mt-kahypar/utils/cast.h" +#include "mt-kahypar/utils/utilities.h" namespace mt_kahypar { -template -void FlowRefinementScheduler::RefinementStats::update_global_stats() { +template +void FlowRefinementScheduler::RefinementStats::update_global_stats() +{ _stats.update_stat("num_flow_refinements", - num_refinements.load(std::memory_order_relaxed)); + num_refinements.load(std::memory_order_relaxed)); _stats.update_stat("num_flow_improvement", - num_improvements.load(std::memory_order_relaxed)); - _stats.update_stat("num_time_limits", - num_time_limits.load(std::memory_order_relaxed)); + num_improvements.load(std::memory_order_relaxed)); + _stats.update_stat("num_time_limits", num_time_limits.load(std::memory_order_relaxed)); _stats.update_stat("correct_expected_improvement", - correct_expected_improvement.load(std::memory_order_relaxed)); + correct_expected_improvement.load(std::memory_order_relaxed)); _stats.update_stat("zero_gain_improvement", - zero_gain_improvement.load(std::memory_order_relaxed)); - _stats.update_stat("failed_updates_due_to_conflicting_moves", - failed_updates_due_to_conflicting_moves.load(std::memory_order_relaxed)); + zero_gain_improvement.load(std::memory_order_relaxed)); + _stats.update_stat( + "failed_updates_due_to_conflicting_moves", + failed_updates_due_to_conflicting_moves.load(std::memory_order_relaxed)); _stats.update_stat("failed_updates_due_to_conflicting_moves_without_rollback", - failed_updates_due_to_conflicting_moves_without_rollback.load(std::memory_order_relaxed)); - _stats.update_stat("failed_updates_due_to_balance_constraint", - failed_updates_due_to_balance_constraint.load(std::memory_order_relaxed)); + failed_updates_due_to_conflicting_moves_without_rollback.load( + std::memory_order_relaxed)); + _stats.update_stat( + "failed_updates_due_to_balance_constraint", + failed_updates_due_to_balance_constraint.load(std::memory_order_relaxed)); _stats.update_stat("total_flow_refinement_improvement", - total_improvement.load(std::memory_order_relaxed)); + total_improvement.load(std::memory_order_relaxed)); } -template +template bool FlowRefinementScheduler::refineImpl( - mt_kahypar_partitioned_hypergraph_t& hypergraph, - const parallel::scalable_vector&, - Metrics& best_metrics, - const double) { - PartitionedHypergraph& phg = utils::cast(hypergraph); + mt_kahypar_partitioned_hypergraph_t &hypergraph, + const parallel::scalable_vector &, Metrics &best_metrics, const double) +{ + PartitionedHypergraph &phg = utils::cast(hypergraph); ASSERT(_phg == &phg); _quotient_graph.setObjective(best_metrics.quality); std::atomic overall_delta(0); - utils::Timer& timer = utils::Utilities::instance().getTimer(_context.utility_id); + utils::Timer &timer = utils::Utilities::instance().getTimer(_context.utility_id); tbb::parallel_for(UL(0), _refiner.numAvailableRefiner(), [&](const size_t i) { - while ( i < std::max(UL(1), static_cast( - std::ceil(_context.refinement.flows.parallel_searches_multiplier * - _quotient_graph.numActiveBlockPairs()))) ) { + while(i < std::max(UL(1), static_cast(std::ceil( + _context.refinement.flows.parallel_searches_multiplier * + _quotient_graph.numActiveBlockPairs())))) + { SearchID search_id = _quotient_graph.requestNewSearch(_refiner); - if ( search_id != QuotientGraph::INVALID_SEARCH_ID ) { - DBG << "Start search" << search_id - << "( Blocks =" << blocksOfSearch(search_id) + if(search_id != QuotientGraph::INVALID_SEARCH_ID) + { + DBG << "Start search" << search_id << "( Blocks =" << blocksOfSearch(search_id) << ", Refiner =" << i << ")"; timer.start_timer("region_growing", "Grow Region", true); const Subhypergraph sub_hg = - _constructor.construct(search_id, _quotient_graph, phg); + _constructor.construct(search_id, _quotient_graph, phg); _quotient_graph.finalizeConstruction(search_id); timer.stop_timer("region_growing"); HyperedgeWeight delta = 0; bool improved_solution = false; - if ( sub_hg.numNodes() > 0 ) { + if(sub_hg.numNodes() > 0) + { ++_stats.num_refinements; MoveSequence sequence = _refiner.refine(search_id, phg, sub_hg); - if ( !sequence.moves.empty() ) { + if(!sequence.moves.empty()) + { timer.start_timer("apply_moves", "Apply Moves", true); delta = applyMoves(search_id, sequence); overall_delta -= delta; improved_solution = sequence.state == MoveSequenceState::SUCCESS && delta > 0; timer.stop_timer("apply_moves"); - } else if ( sequence.state == MoveSequenceState::TIME_LIMIT ) { + } + else if(sequence.state == MoveSequenceState::TIME_LIMIT) + { ++_stats.num_time_limits; - DBG << RED << "Search" << search_id << "reaches the time limit ( Time Limit =" - << _refiner.timeLimit() << "s )" << END; + DBG << RED << "Search" << search_id + << "reaches the time limit ( Time Limit =" << _refiner.timeLimit() + << "s )" << END; } } _quotient_graph.finalizeSearch(search_id, improved_solution ? delta : 0); _refiner.finalizeSearch(search_id); - DBG << "End search" << search_id - << "( Blocks =" << blocksOfSearch(search_id) - << ", Refiner =" << i - << ", Running Time =" << _refiner.runningTime(search_id) << ")"; - } else { + DBG << "End search" << search_id << "( Blocks =" << blocksOfSearch(search_id) + << ", Refiner =" << i << ", Running Time =" << _refiner.runningTime(search_id) + << ")"; + } + else + { break; } } @@ -118,27 +127,34 @@ bool FlowRefinementScheduler::refineImpl( DBG << _stats; - ASSERT([&]() { - for ( PartitionID i = 0; i < _context.partition.k; ++i ) { - if ( _part_weights[i] != phg.partWeight(i) ) { - LOG << V(_part_weights[i]) << V(phg.partWeight(i)); - return false; - } - } - return true; - }(), "Concurrent part weight updates failed!"); + ASSERT( + [&]() { + for(PartitionID i = 0; i < _context.partition.k; ++i) + { + if(_part_weights[i] != phg.partWeight(i)) + { + LOG << V(_part_weights[i]) << V(phg.partWeight(i)); + return false; + } + } + return true; + }(), + "Concurrent part weight updates failed!"); // Update metrics statistics - HEAVY_REFINEMENT_ASSERT(best_metrics.quality + overall_delta == metrics::quality(phg, _context), - V(best_metrics.quality) << V(overall_delta) << V(metrics::quality(phg, _context))); + HEAVY_REFINEMENT_ASSERT( + best_metrics.quality + overall_delta == metrics::quality(phg, _context), + V(best_metrics.quality) << V(overall_delta) << V(metrics::quality(phg, _context))); best_metrics.quality += overall_delta; best_metrics.imbalance = metrics::imbalance(phg, _context); _stats.update_global_stats(); // Update Gain Cache - if ( _context.forceGainCacheUpdates() && _gain_cache.isInitialized() ) { - phg.doParallelForAllNodes([&](const HypernodeID& hn) { - if ( _was_moved[hn] ) { + if(_context.forceGainCacheUpdates() && _gain_cache.isInitialized()) + { + phg.doParallelForAllNodes([&](const HypernodeID &hn) { + if(_was_moved[hn]) + { _gain_cache.recomputeInvalidTerms(phg, hn); _was_moved[hn] = uint8_t(false); } @@ -150,21 +166,24 @@ bool FlowRefinementScheduler::refineImpl( return overall_delta.load(std::memory_order_relaxed) < 0; } -template -void FlowRefinementScheduler::initializeImpl(mt_kahypar_partitioned_hypergraph_t& hypergraph) { - PartitionedHypergraph& phg = utils::cast(hypergraph); +template +void FlowRefinementScheduler::initializeImpl( + mt_kahypar_partitioned_hypergraph_t &hypergraph) +{ + PartitionedHypergraph &phg = utils::cast(hypergraph); _phg = &phg; resizeDataStructuresForCurrentK(); // Initialize Part Weights - for ( PartitionID i = 0; i < _context.partition.k; ++i ) { + for(PartitionID i = 0; i < _context.partition.k; ++i) + { _part_weights[i] = phg.partWeight(i); - _max_part_weights[i] = std::max( - phg.partWeight(i), _context.partition.max_part_weights[i]); + _max_part_weights[i] = + std::max(phg.partWeight(i), _context.partition.max_part_weights[i]); } _stats.reset(); - utils::Timer& timer = utils::Utilities::instance().getTimer(_context.utility_id); + utils::Timer &timer = utils::Utilities::instance().getTimer(_context.utility_id); timer.start_timer("initialize_quotient_graph", "Initialize Quotient Graph"); _quotient_graph.initialize(phg); timer.stop_timer("initialize_quotient_graph"); @@ -175,14 +194,17 @@ void FlowRefinementScheduler::initializeImpl(mt_kahypar_parti _refiner.initialize(max_parallism); } -template -void FlowRefinementScheduler::resizeDataStructuresForCurrentK() { - if ( _current_k != _context.partition.k ) { +template +void FlowRefinementScheduler::resizeDataStructuresForCurrentK() +{ + if(_current_k != _context.partition.k) + { _current_k = _context.partition.k; // Note that in general changing the number of blocks should not resize // any data structure as we initialize the scheduler with the final // number of blocks. This is just a fallback if someone changes this in the future. - if ( static_cast(_current_k) > _part_weights.size() ) { + if(static_cast(_current_k) > _part_weights.size()) + { _part_weights.resize(_current_k); _max_part_weights.resize(_current_k); } @@ -193,49 +215,53 @@ void FlowRefinementScheduler::resizeDataStructuresForCurrentK namespace { -struct NewCutHyperedge { +struct NewCutHyperedge +{ HyperedgeID he; PartitionID block; }; -template -bool changeNodePart(PartitionedHypergraph& phg, - GainCache& gain_cache, - const HypernodeID hn, - const PartitionID from, - const PartitionID to, - const F& objective_delta, - const bool gain_cache_update) { +template +bool changeNodePart(PartitionedHypergraph &phg, GainCache &gain_cache, + const HypernodeID hn, const PartitionID from, const PartitionID to, + const F &objective_delta, const bool gain_cache_update) +{ bool success = false; - if ( gain_cache_update && gain_cache.isInitialized()) { - success = phg.changeNodePart(gain_cache, hn, from, to, - std::numeric_limits::max(), []{}, objective_delta); - } else { - success = phg.changeNodePart(hn, from, to, - std::numeric_limits::max(), []{}, objective_delta); + if(gain_cache_update && gain_cache.isInitialized()) + { + success = phg.changeNodePart( + gain_cache, hn, from, to, std::numeric_limits::max(), [] {}, + objective_delta); + } + else + { + success = phg.changeNodePart( + hn, from, to, std::numeric_limits::max(), [] {}, + objective_delta); } ASSERT(success); return success; } -template -void applyMoveSequence(PartitionedHypergraph& phg, - GainCache& gain_cache, - const MoveSequence& sequence, - const F& objective_delta, - const bool gain_cache_update, - vec& was_moved, - vec& new_cut_hes) { - for ( const Move& move : sequence.moves ) { +template +void applyMoveSequence(PartitionedHypergraph &phg, GainCache &gain_cache, + const MoveSequence &sequence, const F &objective_delta, + const bool gain_cache_update, vec &was_moved, + vec &new_cut_hes) +{ + for(const Move &move : sequence.moves) + { ASSERT(move.from == phg.partID(move.node)); - if ( move.from != move.to ) { - changeNodePart(phg, gain_cache, move.node, move.from, - move.to, objective_delta, gain_cache_update); + if(move.from != move.to) + { + changeNodePart(phg, gain_cache, move.node, move.from, move.to, objective_delta, + gain_cache_update); was_moved[move.node] = uint8_t(true); // If move increases the pin count of some hyperedges in block 'move.to' to one 1 // we set the corresponding block here. int i = new_cut_hes.size() - 1; - while ( i >= 0 && new_cut_hes[i].block == kInvalidPartition ) { + while(i >= 0 && new_cut_hes[i].block == kInvalidPartition) + { new_cut_hes[i].block = move.to; --i; } @@ -243,25 +269,28 @@ void applyMoveSequence(PartitionedHypergraph& phg, } } -template -void revertMoveSequence(PartitionedHypergraph& phg, - GainCache& gain_cache, - const MoveSequence& sequence, - const F& objective_delta, - const bool gain_cache_update) { - for ( const Move& move : sequence.moves ) { - if ( move.from != move.to ) { +template +void revertMoveSequence(PartitionedHypergraph &phg, GainCache &gain_cache, + const MoveSequence &sequence, const F &objective_delta, + const bool gain_cache_update) +{ + for(const Move &move : sequence.moves) + { + if(move.from != move.to) + { ASSERT(phg.partID(move.node) == move.to); - changeNodePart(phg, gain_cache, move.node, move.to, - move.from, objective_delta, gain_cache_update); + changeNodePart(phg, gain_cache, move.node, move.to, move.from, objective_delta, + gain_cache_update); } } } -template -void addCutHyperedgesToQuotientGraph(QuotientGraph& quotient_graph, - const vec& new_cut_hes) { - for ( const NewCutHyperedge& new_cut_he : new_cut_hes ) { +template +void addCutHyperedgesToQuotientGraph(QuotientGraph "ient_graph, + const vec &new_cut_hes) +{ + for(const NewCutHyperedge &new_cut_he : new_cut_hes) + { ASSERT(new_cut_he.block != kInvalidPartition); quotient_graph.addNewCutHyperedge(new_cut_he.he, new_cut_he.block); } @@ -269,8 +298,11 @@ void addCutHyperedgesToQuotientGraph(QuotientGraph& quotient_graph, } // namespace -template -HyperedgeWeight FlowRefinementScheduler::applyMoves(const SearchID search_id, MoveSequence& sequence) { +template +HyperedgeWeight +FlowRefinementScheduler::applyMoves(const SearchID search_id, + MoveSequence &sequence) +{ unused(search_id); ASSERT(_phg); @@ -280,9 +312,11 @@ HyperedgeWeight FlowRefinementScheduler::applyMoves(const Sea // Compute Part Weight Deltas vec part_weight_deltas(_context.partition.k, 0); - for ( Move& move : sequence.moves ) { + for(Move &move : sequence.moves) + { move.from = _phg->partID(move.node); - if ( move.from != move.to ) { + if(move.from != move.to) + { const HypernodeWeight node_weight = _phg->nodeWeight(move.node); part_weight_deltas[move.from] -= node_weight; part_weight_deltas[move.to] += node_weight; @@ -291,56 +325,69 @@ HyperedgeWeight FlowRefinementScheduler::applyMoves(const Sea HyperedgeWeight improvement = 0; vec new_cut_hes; - auto delta_func = [&](const SynchronizedEdgeUpdate& sync_update) { + auto delta_func = [&](const SynchronizedEdgeUpdate &sync_update) { improvement -= AttributedGains::gain(sync_update); // Collect hyperedges with new blocks in its connectivity set - if ( sync_update.pin_count_in_to_part_after == 1 ) { + if(sync_update.pin_count_in_to_part_after == 1) + { // the corresponding block will be set in applyMoveSequence(...) function - new_cut_hes.emplace_back(NewCutHyperedge { sync_update.he, kInvalidPartition }); + new_cut_hes.emplace_back(NewCutHyperedge{ sync_update.he, kInvalidPartition }); } }; // Update part weights atomically PartWeightUpdateResult update_res = partWeightUpdate(part_weight_deltas, false); - if ( update_res.is_balanced ) { + if(update_res.is_balanced) + { // Apply move sequence to partition applyMoveSequence(*_phg, _gain_cache, sequence, delta_func, - _context.forceGainCacheUpdates(), _was_moved, new_cut_hes); + _context.forceGainCacheUpdates(), _was_moved, new_cut_hes); - if ( improvement < 0 ) { + if(improvement < 0) + { update_res = partWeightUpdate(part_weight_deltas, true); - if ( update_res.is_balanced ) { + if(update_res.is_balanced) + { // Move sequence worsen solution quality => Rollback DBG << RED << "Move sequence worsen solution quality (" << "Expected Improvement =" << sequence.expected_improvement - << ", Real Improvement =" << improvement - << ", Search ID =" << search_id << ")" << END; - revertMoveSequence(*_phg, _gain_cache, sequence, delta_func, _context.forceGainCacheUpdates()); + << ", Real Improvement =" << improvement << ", Search ID =" << search_id + << ")" << END; + revertMoveSequence(*_phg, _gain_cache, sequence, delta_func, + _context.forceGainCacheUpdates()); ++_stats.failed_updates_due_to_conflicting_moves; sequence.state = MoveSequenceState::WORSEN_SOLUTION_QUALITY; - } else { + } + else + { // Rollback would violate balance constraint => Worst Case ++_stats.failed_updates_due_to_conflicting_moves_without_rollback; sequence.state = MoveSequenceState::WORSEN_SOLUTION_QUALITY_WITHOUT_ROLLBACK; - DBG << RED << "Rollback of move sequence violated balance constraint ( Moved Nodes =" + DBG << RED + << "Rollback of move sequence violated balance constraint ( Moved Nodes =" << sequence.moves.size() << ", Expected Improvement =" << sequence.expected_improvement - << ", Real Improvement =" << improvement - << ", Search ID =" << search_id << ")" << END; + << ", Real Improvement =" << improvement << ", Search ID =" << search_id + << ")" << END; } - } else { + } + else + { ++_stats.num_improvements; - _stats.correct_expected_improvement += (improvement == sequence.expected_improvement); + _stats.correct_expected_improvement += + (improvement == sequence.expected_improvement); _stats.zero_gain_improvement += (improvement == 0); sequence.state = MoveSequenceState::SUCCESS; - DBG << ( improvement > 0 ? GREEN : "" ) << "SUCCESS -" + DBG << (improvement > 0 ? GREEN : "") << "SUCCESS -" << "Moved Nodes =" << sequence.moves.size() << ", Expected Improvement =" << sequence.expected_improvement - << ", Real Improvement =" << improvement - << ", Search ID =" << search_id << ( improvement > 0 ? END : "" ); + << ", Real Improvement =" << improvement << ", Search ID =" << search_id + << (improvement > 0 ? END : ""); } - } else { + } + else + { ++_stats.failed_updates_due_to_balance_constraint; sequence.state = MoveSequenceState::VIOLATES_BALANCE_CONSTRAINT; DBG << RED << "Move sequence violated balance constraint ( Moved Nodes =" @@ -351,7 +398,8 @@ HyperedgeWeight FlowRefinementScheduler::applyMoves(const Sea _apply_moves_lock.unlock(); - if ( sequence.state == MoveSequenceState::SUCCESS && improvement > 0 ) { + if(sequence.state == MoveSequenceState::SUCCESS && improvement > 0) + { addCutHyperedgesToQuotientGraph(_quotient_graph, new_cut_hes); _stats.total_improvement += improvement; } @@ -359,26 +407,31 @@ HyperedgeWeight FlowRefinementScheduler::applyMoves(const Sea return improvement; } -template +template typename FlowRefinementScheduler::PartWeightUpdateResult -FlowRefinementScheduler::partWeightUpdate(const vec& part_weight_deltas, - const bool rollback) { +FlowRefinementScheduler::partWeightUpdate( + const vec &part_weight_deltas, const bool rollback) +{ const HypernodeWeight multiplier = rollback ? -1 : 1; PartWeightUpdateResult res; _part_weights_lock.lock(); PartitionID i = 0; - for ( ; i < _context.partition.k; ++i ) { - if ( _part_weights[i] + multiplier * part_weight_deltas[i] > _max_part_weights[i] ) { + for(; i < _context.partition.k; ++i) + { + if(_part_weights[i] + multiplier * part_weight_deltas[i] > _max_part_weights[i]) + { DBG << "Move sequence violated balance constraint of block" << i << "(Max =" << _max_part_weights[i] - << ", Actual =" << (_part_weights[i] + multiplier * part_weight_deltas[i]) << ")"; + << ", Actual =" << (_part_weights[i] + multiplier * part_weight_deltas[i]) + << ")"; res.is_balanced = false; res.overloaded_block = i; - res.overload_weight = ( _part_weights[i] + multiplier * - part_weight_deltas[i] ) - _max_part_weights[i]; + res.overload_weight = + (_part_weights[i] + multiplier * part_weight_deltas[i]) - _max_part_weights[i]; // Move Sequence Violates Balance Constraint => Rollback --i; - for ( ; i >= 0; --i ) { + for(; i >= 0; --i) + { _part_weights[i] -= multiplier * part_weight_deltas[i]; } break; diff --git a/mt-kahypar/partition/refinement/flows/scheduler.h b/mt-kahypar/partition/refinement/flows/scheduler.h index 738fa17cd..c20bc77d5 100644 --- a/mt-kahypar/partition/refinement/flows/scheduler.h +++ b/mt-kahypar/partition/refinement/flows/scheduler.h @@ -26,36 +26,37 @@ #pragma once +#include "mt-kahypar/parallel/atomic_wrapper.h" #include "mt-kahypar/partition/context.h" -#include "mt-kahypar/partition/refinement/i_refiner.h" +#include "mt-kahypar/partition/refinement/flows/problem_construction.h" #include "mt-kahypar/partition/refinement/flows/quotient_graph.h" #include "mt-kahypar/partition/refinement/flows/refiner_adapter.h" -#include "mt-kahypar/partition/refinement/flows/problem_construction.h" #include "mt-kahypar/partition/refinement/gains/gain_cache_ptr.h" -#include "mt-kahypar/parallel/atomic_wrapper.h" +#include "mt-kahypar/partition/refinement/i_refiner.h" #include "mt-kahypar/utils/utilities.h" namespace mt_kahypar { namespace { - static constexpr size_t PROGRESS_BAR_SIZE = 50; - - template - std::string progress_bar(const size_t value, const size_t max, const F& f) { - const double percentage = static_cast(value) / std::max(max,UL(1)); - const size_t ticks = PROGRESS_BAR_SIZE * percentage; - std::stringstream pbar_str; - pbar_str << "|" - << f(percentage) << std::string(ticks, '|') << END - << std::string(PROGRESS_BAR_SIZE - ticks, ' ') - << "| " << std::setprecision(2) << (100.0 * percentage) << "% (" << value << ")"; - return pbar_str.str(); - } +static constexpr size_t PROGRESS_BAR_SIZE = 50; + +template +std::string progress_bar(const size_t value, const size_t max, const F &f) +{ + const double percentage = static_cast(value) / std::max(max, UL(1)); + const size_t ticks = PROGRESS_BAR_SIZE * percentage; + std::stringstream pbar_str; + pbar_str << "|" << f(percentage) << std::string(ticks, '|') << END + << std::string(PROGRESS_BAR_SIZE - ticks, ' ') << "| " << std::setprecision(2) + << (100.0 * percentage) << "% (" << value << ")"; + return pbar_str.str(); +} } -template -class FlowRefinementScheduler final : public IRefiner { +template +class FlowRefinementScheduler final : public IRefiner +{ static constexpr bool debug = false; static constexpr bool enable_heavy_assert = false; @@ -65,20 +66,19 @@ class FlowRefinementScheduler final : public IRefiner { using GainCache = typename GraphAndGainTypes::GainCache; using AttributedGains = typename GraphAndGainTypes::AttributedGains; - struct RefinementStats { - RefinementStats(utils::Stats& stats) : - _stats(stats), - num_refinements(0), - num_improvements(0), - num_time_limits(0), - correct_expected_improvement(0), - zero_gain_improvement(0), - failed_updates_due_to_conflicting_moves(0), - failed_updates_due_to_conflicting_moves_without_rollback(0), - failed_updates_due_to_balance_constraint(0), - total_improvement(0) { } - - void reset() { + struct RefinementStats + { + RefinementStats(utils::Stats &stats) : + _stats(stats), num_refinements(0), num_improvements(0), num_time_limits(0), + correct_expected_improvement(0), zero_gain_improvement(0), + failed_updates_due_to_conflicting_moves(0), + failed_updates_due_to_conflicting_moves_without_rollback(0), + failed_updates_due_to_balance_constraint(0), total_improvement(0) + { + } + + void reset() + { num_refinements.store(0); num_improvements.store(0); num_time_limits.store(0); @@ -92,7 +92,7 @@ class FlowRefinementScheduler final : public IRefiner { void update_global_stats(); - utils::Stats& _stats; + utils::Stats &_stats; CAtomic num_refinements; CAtomic num_improvements; CAtomic num_time_limits; @@ -104,75 +104,104 @@ class FlowRefinementScheduler final : public IRefiner { CAtomic total_improvement; }; - struct PartWeightUpdateResult { + struct PartWeightUpdateResult + { bool is_balanced = true; PartitionID overloaded_block = kInvalidPartition; HypernodeWeight overload_weight = 0; }; - friend std::ostream & operator<< (std::ostream& str, const RefinementStats& stats) { + friend std::ostream &operator<<(std::ostream &str, const RefinementStats &stats) + { str << "\n"; str << "Total Improvement = " << stats.total_improvement << "\n"; str << "Number of Flow-Based Refinements = " << stats.num_refinements << "\n"; str << "+ No Improvements = " - << progress_bar(stats.num_refinements - stats.num_improvements, stats.num_refinements, - [&](const double percentage) { return percentage > 0.9 ? RED : percentage > 0.75 ? YELLOW : GREEN; }) << "\n"; + << progress_bar( + stats.num_refinements - stats.num_improvements, stats.num_refinements, + [&](const double percentage) { + return percentage > 0.9 ? RED : percentage > 0.75 ? YELLOW : GREEN; + }) + << "\n"; str << "+ Number of Improvements = " - << progress_bar(stats.num_improvements, stats.num_refinements, - [&](const double percentage) { return percentage < 0.05 ? RED : percentage < 0.15 ? YELLOW : GREEN; }) << "\n"; + << progress_bar( + stats.num_improvements, stats.num_refinements, + [&](const double percentage) { + return percentage < 0.05 ? RED : percentage < 0.15 ? YELLOW : GREEN; + }) + << "\n"; str << " + Correct Expected Improvements = " - << progress_bar(stats.correct_expected_improvement, stats.num_improvements, - [&](const double percentage) { return percentage > 0.9 ? GREEN : percentage > 0.75 ? YELLOW : RED; }) << "\n"; + << progress_bar( + stats.correct_expected_improvement, stats.num_improvements, + [&](const double percentage) { + return percentage > 0.9 ? GREEN : percentage > 0.75 ? YELLOW : RED; + }) + << "\n"; str << " + Incorrect Expected Improvements = " - << progress_bar(stats.num_improvements - stats.correct_expected_improvement, stats.num_improvements, - [&](const double percentage) { return percentage < 0.1 ? GREEN : percentage < 0.25 ? YELLOW : RED; }) << "\n"; + << progress_bar( + stats.num_improvements - stats.correct_expected_improvement, + stats.num_improvements, + [&](const double percentage) { + return percentage < 0.1 ? GREEN : percentage < 0.25 ? YELLOW : RED; + }) + << "\n"; str << " + Zero-Gain Improvements = " << progress_bar(stats.zero_gain_improvement, stats.num_improvements, - [&](const double) { return WHITE; }) << "\n"; + [&](const double) { return WHITE; }) + << "\n"; str << "+ Failed due to Balance Constraint = " - << progress_bar(stats.failed_updates_due_to_balance_constraint, stats.num_refinements, - [&](const double percentage) { return percentage < 0.01 ? GREEN : percentage < 0.05 ? YELLOW : RED; }) << "\n"; + << progress_bar( + stats.failed_updates_due_to_balance_constraint, stats.num_refinements, + [&](const double percentage) { + return percentage < 0.01 ? GREEN : percentage < 0.05 ? YELLOW : RED; + }) + << "\n"; str << "+ Failed due to Conflicting Moves = " - << progress_bar(stats.failed_updates_due_to_conflicting_moves, stats.num_refinements, - [&](const double percentage) { return percentage < 0.01 ? GREEN : percentage < 0.05 ? YELLOW : RED; }) << "\n"; + << progress_bar( + stats.failed_updates_due_to_conflicting_moves, stats.num_refinements, + [&](const double percentage) { + return percentage < 0.01 ? GREEN : percentage < 0.05 ? YELLOW : RED; + }) + << "\n"; str << "+ Time Limits = " - << progress_bar(stats.num_time_limits, stats.num_refinements, - [&](const double percentage) { return percentage < 0.0025 ? GREEN : percentage < 0.01 ? YELLOW : RED; }) << "\n"; + << progress_bar( + stats.num_time_limits, stats.num_refinements, + [&](const double percentage) { + return percentage < 0.0025 ? GREEN : percentage < 0.01 ? YELLOW : RED; + }) + << "\n"; str << "---------------------------------------------------------------"; return str; } public: FlowRefinementScheduler(const HypernodeID num_hypernodes, - const HyperedgeID num_hyperedges, - const Context& context, - GainCache& gain_cache) : - _phg(nullptr), - _context(context), - _gain_cache(gain_cache), - _current_k(context.partition.k), - _quotient_graph(num_hyperedges, context), - _refiner(num_hyperedges, context), - _constructor(num_hypernodes, num_hyperedges, context), - _was_moved(num_hypernodes, uint8_t(false)), - _part_weights_lock(), - _part_weights(context.partition.k, 0), - _max_part_weights(context.partition.k, 0), - _stats(utils::Utilities::instance().getStats(context.utility_id)), - _apply_moves_lock() { } + const HyperedgeID num_hyperedges, const Context &context, + GainCache &gain_cache) : + _phg(nullptr), + _context(context), _gain_cache(gain_cache), _current_k(context.partition.k), + _quotient_graph(num_hyperedges, context), _refiner(num_hyperedges, context), + _constructor(num_hypernodes, num_hyperedges, context), + _was_moved(num_hypernodes, uint8_t(false)), _part_weights_lock(), + _part_weights(context.partition.k, 0), _max_part_weights(context.partition.k, 0), + _stats(utils::Utilities::instance().getStats(context.utility_id)), + _apply_moves_lock() + { + } FlowRefinementScheduler(const HypernodeID num_hypernodes, - const HyperedgeID num_hyperedges, - const Context& context, + const HyperedgeID num_hyperedges, const Context &context, gain_cache_t gain_cache) : - FlowRefinementScheduler(num_hypernodes, num_hyperedges, context, - GainCachePtr::cast(gain_cache)) { } + FlowRefinementScheduler(num_hypernodes, num_hyperedges, context, + GainCachePtr::cast(gain_cache)) + { + } - FlowRefinementScheduler(const FlowRefinementScheduler&) = delete; - FlowRefinementScheduler(FlowRefinementScheduler&&) = delete; + FlowRefinementScheduler(const FlowRefinementScheduler &) = delete; + FlowRefinementScheduler(FlowRefinementScheduler &&) = delete; - FlowRefinementScheduler & operator= (const FlowRefinementScheduler &) = delete; - FlowRefinementScheduler & operator= (FlowRefinementScheduler &&) = delete; + FlowRefinementScheduler &operator=(const FlowRefinementScheduler &) = delete; + FlowRefinementScheduler &operator=(FlowRefinementScheduler &&) = delete; /** * Applies the sequence of vertex moves to the partitioned hypergraph. @@ -180,8 +209,7 @@ class FlowRefinementScheduler final : public IRefiner { * the balance constaint and not worsen solution quality. * Returns, improvement in solution quality. */ - HyperedgeWeight applyMoves(const SearchID search_id, - MoveSequence& sequence); + HyperedgeWeight applyMoves(const SearchID search_id, MoveSequence &sequence); /** * Returns the current weight of each block. @@ -190,7 +218,8 @@ class FlowRefinementScheduler final : public IRefiner { * part weight updates for a move sequence as a transaction, which * we protect with a spin lock. */ - vec partWeights() { + vec partWeights() + { _part_weights_lock.lock(); vec _copy_part_weights(_part_weights); _part_weights_lock.unlock(); @@ -198,26 +227,26 @@ class FlowRefinementScheduler final : public IRefiner { } private: - bool refineImpl(mt_kahypar_partitioned_hypergraph_t& phg, - const vec& refinement_nodes, - Metrics& metrics, + bool refineImpl(mt_kahypar_partitioned_hypergraph_t &phg, + const vec &refinement_nodes, Metrics &metrics, double time_limit) final; - void initializeImpl(mt_kahypar_partitioned_hypergraph_t& phg) final; + void initializeImpl(mt_kahypar_partitioned_hypergraph_t &phg) final; void resizeDataStructuresForCurrentK(); - PartWeightUpdateResult partWeightUpdate(const vec& part_weight_deltas, + PartWeightUpdateResult partWeightUpdate(const vec &part_weight_deltas, const bool rollback); - std::string blocksOfSearch(const SearchID search_id) { + std::string blocksOfSearch(const SearchID search_id) + { const BlockPair blocks = _quotient_graph.getBlockPair(search_id); return "(" + std::to_string(blocks.i) + "," + std::to_string(blocks.j) + ")"; } - PartitionedHypergraph* _phg; - const Context& _context; - GainCache& _gain_cache; + PartitionedHypergraph *_phg; + const Context &_context; + GainCache &_gain_cache; PartitionID _current_k; // ! Contains information of all cut hyperedges between the @@ -245,4 +274,4 @@ class FlowRefinementScheduler final : public IRefiner { SpinLock _apply_moves_lock; }; -} // namespace kahypar +} // namespace kahypar diff --git a/mt-kahypar/partition/refinement/flows/sequential_construction.cpp b/mt-kahypar/partition/refinement/flows/sequential_construction.cpp index 731cbc56e..87bf07cbe 100644 --- a/mt-kahypar/partition/refinement/flows/sequential_construction.cpp +++ b/mt-kahypar/partition/refinement/flows/sequential_construction.cpp @@ -34,131 +34,154 @@ namespace mt_kahypar { -template -whfc::Hyperedge SequentialConstruction::DynamicIdenticalNetDetection::add_if_not_contained( - const whfc::Hyperedge he, const size_t he_hash, const vec& pins) { +template +whfc::Hyperedge SequentialConstruction::DynamicIdenticalNetDetection:: + add_if_not_contained(const whfc::Hyperedge he, const size_t he_hash, + const vec &pins) +{ const size_t bucket_idx = he_hash % _hash_buckets.size(); - if ( _hash_buckets[bucket_idx].threshold == _threshold ) { + if(_hash_buckets[bucket_idx].threshold == _threshold) + { // There exists already some hyperedges with the same hash - for ( const TmpHyperedge& tmp_e : _hash_buckets[bucket_idx].identical_nets ) { + for(const TmpHyperedge &tmp_e : _hash_buckets[bucket_idx].identical_nets) + { // Check if there is some hyperedge equal to he - if ( tmp_e.hash == he_hash && _flow_hg.pinCount(tmp_e.e) == pins.size() ) { + if(tmp_e.hash == he_hash && _flow_hg.pinCount(tmp_e.e) == pins.size()) + { bool is_identical = true; size_t idx = 0; - for ( const whfc::FlowHypergraph::Pin& u : _flow_hg.pinsOf(tmp_e.e) ) { - if ( u.pin != pins[idx++] ) { + for(const whfc::FlowHypergraph::Pin &u : _flow_hg.pinsOf(tmp_e.e)) + { + if(u.pin != pins[idx++]) + { is_identical = false; break; } } - if ( is_identical ) { + if(is_identical) + { return tmp_e.e; } } } - } else { + } + else + { _hash_buckets[bucket_idx].identical_nets.clear(); _hash_buckets[bucket_idx].threshold = _threshold; } - _hash_buckets[bucket_idx].identical_nets.push_back(TmpHyperedge { he_hash, he }); + _hash_buckets[bucket_idx].identical_nets.push_back(TmpHyperedge{ he_hash, he }); return whfc::invalidHyperedge; } -template -FlowProblem SequentialConstruction::constructFlowHypergraph(const PartitionedHypergraph& phg, - const Subhypergraph& sub_hg, - const PartitionID block_0, - const PartitionID block_1, - vec& whfc_to_node) { +template +FlowProblem SequentialConstruction::constructFlowHypergraph( + const PartitionedHypergraph &phg, const Subhypergraph &sub_hg, + const PartitionID block_0, const PartitionID block_1, vec &whfc_to_node) +{ FlowProblem flow_problem; - const double density = static_cast(phg.initialNumEdges()) / phg.initialNumNodes(); - const double avg_he_size = static_cast(phg.initialNumPins()) / phg.initialNumEdges(); - if ( density >= 0.5 && avg_he_size <= 100 ) { + const double density = + static_cast(phg.initialNumEdges()) / phg.initialNumNodes(); + const double avg_he_size = + static_cast(phg.initialNumPins()) / phg.initialNumEdges(); + if(density >= 0.5 && avg_he_size <= 100) + { // This algorithm iterates over all hyperedges and checks for all pins if // they are contained in the flow problem. Algorithm could have overheads, if // only a small portion of each hyperedge is contained in the flow hypergraph. flow_problem = constructDefault(phg, sub_hg, block_0, block_1, whfc_to_node); - } else { + } + else + { // This is a construction algorithm optimized for hypergraphs with large hyperedges. // Algorithm constructs a temporary pin list, therefore it could have overheads // for hypergraphs with small hyperedges. - flow_problem = constructOptimizedForLargeHEs(phg, sub_hg, block_0, block_1, whfc_to_node); + flow_problem = + constructOptimizedForLargeHEs(phg, sub_hg, block_0, block_1, whfc_to_node); } - if ( _flow_hg.nodeWeight(flow_problem.source) == 0 || - _flow_hg.nodeWeight(flow_problem.sink) == 0 ) { + if(_flow_hg.nodeWeight(flow_problem.source) == 0 || + _flow_hg.nodeWeight(flow_problem.sink) == 0) + { // Source or sink not connected to vertices in the flow problem flow_problem.non_removable_cut = 0; flow_problem.total_cut = 0; - } else { + } + else + { _flow_hg.finalize(); - if ( _context.refinement.flows.determine_distance_from_cut ) { + if(_context.refinement.flows.determine_distance_from_cut) + { // Determine the distance of each node contained in the flow network from the cut. // This technique improves piercing decision within the WHFC framework. - determineDistanceFromCut(phg, flow_problem.source, - flow_problem.sink, block_0, block_1, whfc_to_node); + determineDistanceFromCut(phg, flow_problem.source, flow_problem.sink, block_0, + block_1, whfc_to_node); } } DBG << "Flow Hypergraph [ Nodes =" << _flow_hg.numNodes() - << ", Edges =" << _flow_hg.numHyperedges() - << ", Pins =" << _flow_hg.numPins() + << ", Edges =" << _flow_hg.numHyperedges() << ", Pins =" << _flow_hg.numPins() << ", Blocks = (" << block_0 << "," << block_1 << ") ]"; return flow_problem; } -template -FlowProblem SequentialConstruction::constructFlowHypergraph(const PartitionedHypergraph& phg, - const Subhypergraph& sub_hg, - const PartitionID block_0, - const PartitionID block_1, - vec& whfc_to_node, - const bool default_construction) { +template +FlowProblem SequentialConstruction::constructFlowHypergraph( + const PartitionedHypergraph &phg, const Subhypergraph &sub_hg, + const PartitionID block_0, const PartitionID block_1, vec &whfc_to_node, + const bool default_construction) +{ FlowProblem flow_problem; - if ( default_construction ) { + if(default_construction) + { // This algorithm iterates over all hyperedges and checks for all pins if // they are contained in the flow problem. Algorithm could have overheads, if // only a small portion of each hyperedge is contained in the flow hypergraph. flow_problem = constructDefault(phg, sub_hg, block_0, block_1, whfc_to_node); - } else { + } + else + { // This is a construction algorithm optimized for hypergraphs with large hyperedges. // Algorithm constructs a temporary pin list, therefore it could have overheads // for hypergraphs with small hyperedges. - flow_problem = constructOptimizedForLargeHEs(phg, sub_hg, block_0, block_1, whfc_to_node); + flow_problem = + constructOptimizedForLargeHEs(phg, sub_hg, block_0, block_1, whfc_to_node); } - if ( _flow_hg.nodeWeight(flow_problem.source) == 0 || - _flow_hg.nodeWeight(flow_problem.sink) == 0 ) { + if(_flow_hg.nodeWeight(flow_problem.source) == 0 || + _flow_hg.nodeWeight(flow_problem.sink) == 0) + { // Source or sink not connected to vertices in the flow problem flow_problem.non_removable_cut = 0; flow_problem.total_cut = 0; - } else { + } + else + { _flow_hg.finalize(); - if ( _context.refinement.flows.determine_distance_from_cut ) { + if(_context.refinement.flows.determine_distance_from_cut) + { // Determine the distance of each node contained in the flow network from the cut. // This technique improves piercing decision within the WHFC framework. - determineDistanceFromCut(phg, flow_problem.source, - flow_problem.sink, block_0, block_1, whfc_to_node); + determineDistanceFromCut(phg, flow_problem.source, flow_problem.sink, block_0, + block_1, whfc_to_node); } } DBG << "Flow Hypergraph [ Nodes =" << _flow_hg.numNodes() - << ", Edges =" << _flow_hg.numHyperedges() - << ", Pins =" << _flow_hg.numPins() + << ", Edges =" << _flow_hg.numHyperedges() << ", Pins =" << _flow_hg.numPins() << ", Blocks = (" << block_0 << "," << block_1 << ") ]"; return flow_problem; } -template -FlowProblem SequentialConstruction::constructDefault(const PartitionedHypergraph& phg, - const Subhypergraph& sub_hg, - const PartitionID block_0, - const PartitionID block_1, - vec& whfc_to_node) { +template +FlowProblem SequentialConstruction::constructDefault( + const PartitionedHypergraph &phg, const Subhypergraph &sub_hg, + const PartitionID block_0, const PartitionID block_1, vec &whfc_to_node) +{ ASSERT(block_0 != kInvalidPartition && block_1 != kInvalidPartition); FlowProblem flow_problem; flow_problem.total_cut = 0; @@ -167,14 +190,17 @@ FlowProblem SequentialConstruction::constructDefault(const Pa _node_to_whfc.clear(); whfc_to_node.resize(sub_hg.numNodes() + 2); - if ( _context.refinement.flows.determine_distance_from_cut ) { + if(_context.refinement.flows.determine_distance_from_cut) + { _cut_hes.clear(); } // Add refinement nodes to flow network - auto add_nodes = [&](const vec& nodes, const whfc::Node::ValueType start_u) { + auto add_nodes = [&](const vec &nodes, + const whfc::Node::ValueType start_u) { whfc::Node flow_hn(start_u); - for ( const HypernodeID& hn : nodes) { + for(const HypernodeID &hn : nodes) + { const HypernodeWeight hn_weight = phg.nodeWeight(hn); whfc_to_node[flow_hn] = hn; _node_to_whfc[hn] = flow_hn++; @@ -184,22 +210,26 @@ FlowProblem SequentialConstruction::constructDefault(const Pa // Add source nodes flow_problem.source = whfc::Node(0); whfc_to_node[flow_problem.source] = kInvalidHypernode; - _flow_hg.addNode(whfc::NodeWeight( - std::max(0, phg.partWeight(block_0) - sub_hg.weight_of_block_0))); + _flow_hg.addNode( + whfc::NodeWeight(std::max(0, phg.partWeight(block_0) - sub_hg.weight_of_block_0))); add_nodes(sub_hg.nodes_of_block_0, flow_problem.source + 1); // Add sink nodes flow_problem.sink = whfc::Node(sub_hg.nodes_of_block_0.size() + 1); whfc_to_node[flow_problem.sink] = kInvalidHypernode; - _flow_hg.addNode(whfc::NodeWeight( - std::max(0, phg.partWeight(block_1) - sub_hg.weight_of_block_1))); + _flow_hg.addNode( + whfc::NodeWeight(std::max(0, phg.partWeight(block_1) - sub_hg.weight_of_block_1))); add_nodes(sub_hg.nodes_of_block_1, flow_problem.sink + 1); - flow_problem.weight_of_block_0 = _flow_hg.nodeWeight(flow_problem.source) + sub_hg.weight_of_block_0; - flow_problem.weight_of_block_1 = _flow_hg.nodeWeight(flow_problem.sink) + sub_hg.weight_of_block_1; + flow_problem.weight_of_block_0 = + _flow_hg.nodeWeight(flow_problem.source) + sub_hg.weight_of_block_0; + flow_problem.weight_of_block_1 = + _flow_hg.nodeWeight(flow_problem.sink) + sub_hg.weight_of_block_1; - auto push_into_tmp_pins = [&](const whfc::Node pin, size_t& current_hash, const bool is_source_or_sink) { + auto push_into_tmp_pins = [&](const whfc::Node pin, size_t ¤t_hash, + const bool is_source_or_sink) { _tmp_pins.push_back(pin); current_hash += kahypar::math::hash(pin); - if ( is_source_or_sink ) { + if(is_source_or_sink) + { // According to Lars: Adding to source or sink to the start of // each pin list improves running time std::swap(_tmp_pins[0], _tmp_pins.back()); @@ -208,22 +238,32 @@ FlowProblem SequentialConstruction::constructDefault(const Pa // Add hyperedge to flow network and configure source and sink whfc::Hyperedge current_he(0); - for ( const HyperedgeID& he : sub_hg.hes ) { - if ( !FlowNetworkConstruction::dropHyperedge(phg, he, block_0, block_1) ) { + for(const HyperedgeID &he : sub_hg.hes) + { + if(!FlowNetworkConstruction::dropHyperedge(phg, he, block_0, block_1)) + { size_t he_hash = 0; _tmp_pins.clear(); - const HyperedgeWeight he_weight = FlowNetworkConstruction::capacity(phg, _context, he, block_0, block_1); + const HyperedgeWeight he_weight = + FlowNetworkConstruction::capacity(phg, _context, he, block_0, block_1); _flow_hg.startHyperedge(whfc::Flow(he_weight)); - bool connectToSource = FlowNetworkConstruction::connectToSource(phg, he, block_0, block_1); - bool connectToSink = FlowNetworkConstruction::connectToSink(phg, he, block_0, block_1); - if ( ( phg.pinCountInPart(he, block_0) > 0 && phg.pinCountInPart(he, block_1) > 0 ) || - FlowNetworkConstruction::isCut(phg, he, block_0, block_1) ) { + bool connectToSource = + FlowNetworkConstruction::connectToSource(phg, he, block_0, block_1); + bool connectToSink = + FlowNetworkConstruction::connectToSink(phg, he, block_0, block_1); + if((phg.pinCountInPart(he, block_0) > 0 && phg.pinCountInPart(he, block_1) > 0) || + FlowNetworkConstruction::isCut(phg, he, block_0, block_1)) + { flow_problem.total_cut += he_weight; } - for ( const HypernodeID& pin : phg.pins(he) ) { - if ( _node_to_whfc.contains(pin) ) { + for(const HypernodeID &pin : phg.pins(he)) + { + if(_node_to_whfc.contains(pin)) + { push_into_tmp_pins(_node_to_whfc[pin], he_hash, false); - } else { + } + else + { const PartitionID pin_block = phg.partID(pin); connectToSource |= pin_block == block_0; connectToSink |= pin_block == block_1; @@ -232,37 +272,49 @@ FlowProblem SequentialConstruction::constructDefault(const Pa const bool empty_hyperedge = _tmp_pins.size() == 0; const bool connected_to_source_and_sink = connectToSource && connectToSink; - if ( connected_to_source_and_sink || empty_hyperedge ) { + if(connected_to_source_and_sink || empty_hyperedge) + { // Hyperedge is connected to source and sink which means we can not remove it // from the cut with the current flow problem => remove he from flow problem _flow_hg.removeCurrentHyperedge(); flow_problem.non_removable_cut += connected_to_source_and_sink ? he_weight : 0; - } else { + } + else + { - if ( connectToSource ) { + if(connectToSource) + { push_into_tmp_pins(flow_problem.source, he_hash, true); - } else if ( connectToSink ) { + } + else if(connectToSink) + { push_into_tmp_pins(flow_problem.sink, he_hash, true); } // Sort pins for identical net detection - std::sort( _tmp_pins.begin() + - ( _tmp_pins[0] == flow_problem.source || - _tmp_pins[0] == flow_problem.sink), _tmp_pins.end()); + std::sort(_tmp_pins.begin() + (_tmp_pins[0] == flow_problem.source || + _tmp_pins[0] == flow_problem.sink), + _tmp_pins.end()); - if ( _tmp_pins.size() > 1 ) { + if(_tmp_pins.size() > 1) + { whfc::Hyperedge identical_net = - _identical_nets.add_if_not_contained(current_he, he_hash, _tmp_pins); - if ( identical_net == whfc::invalidHyperedge ) { - for ( const whfc::Node& pin : _tmp_pins ) { + _identical_nets.add_if_not_contained(current_he, he_hash, _tmp_pins); + if(identical_net == whfc::invalidHyperedge) + { + for(const whfc::Node &pin : _tmp_pins) + { _flow_hg.addPin(pin); } - if ( _context.refinement.flows.determine_distance_from_cut && - phg.pinCountInPart(he, block_0) > 0 && phg.pinCountInPart(he, block_1) > 0 ) { + if(_context.refinement.flows.determine_distance_from_cut && + phg.pinCountInPart(he, block_0) > 0 && phg.pinCountInPart(he, block_1) > 0) + { _cut_hes.push_back(current_he); } ++current_he; - } else { + } + else + { // Current hyperedge is identical to an already added _flow_hg.capacity(identical_net) += he_weight; } @@ -274,12 +326,11 @@ FlowProblem SequentialConstruction::constructDefault(const Pa return flow_problem; } -template -FlowProblem SequentialConstruction::constructOptimizedForLargeHEs(const PartitionedHypergraph& phg, - const Subhypergraph& sub_hg, - const PartitionID block_0, - const PartitionID block_1, - vec& whfc_to_node) { +template +FlowProblem SequentialConstruction::constructOptimizedForLargeHEs( + const PartitionedHypergraph &phg, const Subhypergraph &sub_hg, + const PartitionID block_0, const PartitionID block_1, vec &whfc_to_node) +{ ASSERT(block_0 != kInvalidPartition && block_1 != kInvalidPartition); FlowProblem flow_problem; flow_problem.total_cut = 0; @@ -289,25 +340,30 @@ FlowProblem SequentialConstruction::constructOptimizedForLarg _he_to_whfc.clear(); whfc_to_node.resize(sub_hg.numNodes() + 2); - if ( _context.refinement.flows.determine_distance_from_cut ) { + if(_context.refinement.flows.determine_distance_from_cut) + { _cut_hes.clear(); } - for ( size_t i = 0; i < sub_hg.hes.size(); ++i ) { + for(size_t i = 0; i < sub_hg.hes.size(); ++i) + { const HyperedgeID he = sub_hg.hes[i]; _he_to_whfc[he] = i; } // Add refinement nodes to flow network - auto add_nodes = [&](const vec& nodes, const PartitionID block, const whfc::Node::ValueType start_u) { + auto add_nodes = [&](const vec &nodes, const PartitionID block, + const whfc::Node::ValueType start_u) { whfc::Node flow_hn(start_u); - for ( const HypernodeID& hn : nodes) { + for(const HypernodeID &hn : nodes) + { const HypernodeWeight hn_weight = phg.nodeWeight(hn); whfc_to_node[flow_hn] = hn; _flow_hg.addNode(whfc::NodeWeight(hn_weight)); - for ( const HyperedgeID& he : phg.incidentEdges(hn) ) { + for(const HyperedgeID &he : phg.incidentEdges(hn)) + { ASSERT(_he_to_whfc.contains(he)); - _pins.push_back(TmpPin { _he_to_whfc[he], flow_hn, block }); + _pins.push_back(TmpPin{ _he_to_whfc[he], flow_hn, block }); } ++flow_hn; } @@ -315,24 +371,25 @@ FlowProblem SequentialConstruction::constructOptimizedForLarg // Add source nodes flow_problem.source = whfc::Node(0); whfc_to_node[flow_problem.source] = kInvalidHypernode; - _flow_hg.addNode(whfc::NodeWeight( - std::max(0, phg.partWeight(block_0) - sub_hg.weight_of_block_0))); + _flow_hg.addNode( + whfc::NodeWeight(std::max(0, phg.partWeight(block_0) - sub_hg.weight_of_block_0))); add_nodes(sub_hg.nodes_of_block_0, block_0, flow_problem.source + 1); // Add sink nodes flow_problem.sink = whfc::Node(sub_hg.nodes_of_block_0.size() + 1); whfc_to_node[flow_problem.sink] = kInvalidHypernode; - _flow_hg.addNode(whfc::NodeWeight( - std::max(0, phg.partWeight(block_1) - sub_hg.weight_of_block_1))); + _flow_hg.addNode( + whfc::NodeWeight(std::max(0, phg.partWeight(block_1) - sub_hg.weight_of_block_1))); add_nodes(sub_hg.nodes_of_block_1, block_1, flow_problem.sink + 1); - flow_problem.weight_of_block_0 = _flow_hg.nodeWeight(flow_problem.source) + sub_hg.weight_of_block_0; - flow_problem.weight_of_block_1 = _flow_hg.nodeWeight(flow_problem.sink) + sub_hg.weight_of_block_1; + flow_problem.weight_of_block_0 = + _flow_hg.nodeWeight(flow_problem.source) + sub_hg.weight_of_block_0; + flow_problem.weight_of_block_1 = + _flow_hg.nodeWeight(flow_problem.sink) + sub_hg.weight_of_block_1; - - if ( _pins.size() > 0 ) { - std::sort(_pins.begin(), _pins.end(), - [&](const TmpPin& lhs, const TmpPin& rhs ) { - return lhs.e < rhs.e || (lhs.e == rhs.e && lhs.pin < rhs.pin); - }); + if(_pins.size() > 0) + { + std::sort(_pins.begin(), _pins.end(), [&](const TmpPin &lhs, const TmpPin &rhs) { + return lhs.e < rhs.e || (lhs.e == rhs.e && lhs.pin < rhs.pin); + }); whfc::Hyperedge current_he(0); size_t start_idx = 0; @@ -343,53 +400,71 @@ FlowProblem SequentialConstruction::constructOptimizedForLarg ASSERT(start_idx < end_idx); _tmp_pins.clear(); const HyperedgeID he = sub_hg.hes[last_he]; - if ( !FlowNetworkConstruction::dropHyperedge(phg, he, block_0, block_1) ) { - const HyperedgeWeight he_weight = FlowNetworkConstruction::capacity(phg, _context, he, block_0, block_1); + if(!FlowNetworkConstruction::dropHyperedge(phg, he, block_0, block_1)) + { + const HyperedgeWeight he_weight = + FlowNetworkConstruction::capacity(phg, _context, he, block_0, block_1); const HypernodeID actual_pin_count_block_0 = phg.pinCountInPart(he, block_0); const HypernodeID actual_pin_count_block_1 = phg.pinCountInPart(he, block_1); - bool connect_to_source = FlowNetworkConstruction::connectToSource(phg, he, block_0, block_1); - bool connect_to_sink = FlowNetworkConstruction::connectToSink(phg, he, block_0, block_1); + bool connect_to_source = + FlowNetworkConstruction::connectToSource(phg, he, block_0, block_1); + bool connect_to_sink = + FlowNetworkConstruction::connectToSink(phg, he, block_0, block_1); connect_to_source |= pin_count_in_block_0 < actual_pin_count_block_0; connect_to_sink |= pin_count_in_block_1 < actual_pin_count_block_1; - if ( ( actual_pin_count_block_0 > 0 && actual_pin_count_block_1 > 0 ) || - FlowNetworkConstruction::isCut(phg, he, block_0, block_1) ) { + if((actual_pin_count_block_0 > 0 && actual_pin_count_block_1 > 0) || + FlowNetworkConstruction::isCut(phg, he, block_0, block_1)) + { flow_problem.total_cut += he_weight; } _flow_hg.startHyperedge(whfc::Flow(he_weight)); - if ( connect_to_source && connect_to_sink ) { + if(connect_to_source && connect_to_sink) + { // Hyperedge is connected to source and sink which means we can not remove it // from the cut with the current flow problem => remove he from flow problem flow_problem.non_removable_cut += he_weight; _flow_hg.removeCurrentHyperedge(); - } else { + } + else + { // Add hyperedge to flow network and configure source and sink size_t hash = 0; - if ( connect_to_source ) { + if(connect_to_source) + { _tmp_pins.push_back(flow_problem.source); hash += kahypar::math::hash(flow_problem.source); - } else if ( connect_to_sink ) { + } + else if(connect_to_sink) + { _tmp_pins.push_back(flow_problem.sink); hash += kahypar::math::hash(flow_problem.sink); } - for ( size_t i = start_idx; i < end_idx; ++i ) { + for(size_t i = start_idx; i < end_idx; ++i) + { _tmp_pins.push_back(_pins[i].pin); hash += kahypar::math::hash(_pins[i].pin); } - if ( _tmp_pins.size() > 1 ) { + if(_tmp_pins.size() > 1) + { whfc::Hyperedge identical_net = - _identical_nets.add_if_not_contained(current_he, hash, _tmp_pins); - if ( identical_net == whfc::invalidHyperedge ) { - for ( const whfc::Node& pin : _tmp_pins ) { + _identical_nets.add_if_not_contained(current_he, hash, _tmp_pins); + if(identical_net == whfc::invalidHyperedge) + { + for(const whfc::Node &pin : _tmp_pins) + { _flow_hg.addPin(pin); } - if ( _context.refinement.flows.determine_distance_from_cut && - actual_pin_count_block_0 > 0 && actual_pin_count_block_1 > 0 ) { + if(_context.refinement.flows.determine_distance_from_cut && + actual_pin_count_block_0 > 0 && actual_pin_count_block_1 > 0) + { _cut_hes.push_back(current_he); } ++current_he; - } else { + } + else + { // Current hyperedge is identical to an already added _flow_hg.capacity(identical_net) += he_weight; } @@ -397,8 +472,10 @@ FlowProblem SequentialConstruction::constructOptimizedForLarg } } }; - for ( size_t i = 0; i < _pins.size(); ++i ) { - if ( last_he != _pins[i].e ) { + for(size_t i = 0; i < _pins.size(); ++i) + { + if(last_he != _pins[i].e) + { add_hyperedge(i); start_idx = i; last_he = _pins[i].e; @@ -414,23 +491,25 @@ FlowProblem SequentialConstruction::constructOptimizedForLarg return flow_problem; } -template -void SequentialConstruction::determineDistanceFromCut(const PartitionedHypergraph& phg, - const whfc::Node source, - const whfc::Node sink, - const PartitionID block_0, - const PartitionID block_1, - const vec& whfc_to_node) { - auto& distances = _hfc.cs.border_nodes.distance; +template +void SequentialConstruction::determineDistanceFromCut( + const PartitionedHypergraph &phg, const whfc::Node source, const whfc::Node sink, + const PartitionID block_0, const PartitionID block_1, + const vec &whfc_to_node) +{ + auto &distances = _hfc.cs.border_nodes.distance; distances.assign(_flow_hg.numNodes(), whfc::HopDistance(0)); _visited_hns.resize(_flow_hg.numNodes() + _flow_hg.numHyperedges()); - _visited_hns.reset(); // Review Note + _visited_hns.reset(); // Review Note // Initialize bfs queue with vertices contained in cut hyperedges parallel::scalable_queue q, next_q; - for ( const whfc::Hyperedge& he : _cut_hes ) { - for ( const whfc::FlowHypergraph::Pin& pin : _flow_hg.pinsOf(he) ) { - if ( pin.pin != source && pin.pin != sink && !_visited_hns[pin.pin] ) { + for(const whfc::Hyperedge &he : _cut_hes) + { + for(const whfc::FlowHypergraph::Pin &pin : _flow_hg.pinsOf(he)) + { + if(pin.pin != source && pin.pin != sink && !_visited_hns[pin.pin]) + { q.push(pin.pin); _visited_hns.setUnsafe(pin.pin, true); } @@ -442,24 +521,32 @@ void SequentialConstruction::determineDistanceFromCut(const P whfc::HopDistance dist = 1; whfc::HopDistance max_dist_source(0); whfc::HopDistance max_dist_sink(0); - while ( !q.empty() ) { + while(!q.empty()) + { const whfc::Node u = q.front(); q.pop(); const PartitionID block_of_u = phg.partID(whfc_to_node[u]); - if ( block_of_u == block_0 ) { + if(block_of_u == block_0) + { distances[u] = -dist; max_dist_source = std::max(max_dist_source, dist); - } else if ( block_of_u == block_1 ) { + } + else if(block_of_u == block_1) + { distances[u] = dist; max_dist_sink = std::max(max_dist_sink, dist); } - for ( const whfc::FlowHypergraph::InHe& in_he : _flow_hg.hyperedgesOf(u) ) { + for(const whfc::FlowHypergraph::InHe &in_he : _flow_hg.hyperedgesOf(u)) + { const whfc::Hyperedge he = in_he.e; - if ( !_visited_hns[_flow_hg.numNodes() + he] ) { - for ( const whfc::FlowHypergraph::Pin& pin : _flow_hg.pinsOf(he) ) { - if ( pin.pin != source && pin.pin != sink && !_visited_hns[pin.pin] ) { + if(!_visited_hns[_flow_hg.numNodes() + he]) + { + for(const whfc::FlowHypergraph::Pin &pin : _flow_hg.pinsOf(he)) + { + if(pin.pin != source && pin.pin != sink && !_visited_hns[pin.pin]) + { next_q.push(pin.pin); _visited_hns.setUnsafe(pin.pin, true); } @@ -468,7 +555,8 @@ void SequentialConstruction::determineDistanceFromCut(const P } } - if ( q.empty() ) { + if(q.empty()) + { std::swap(q, next_q); ++dist; } diff --git a/mt-kahypar/partition/refinement/flows/sequential_construction.h b/mt-kahypar/partition/refinement/flows/sequential_construction.h index e2818ad0c..7c85ad24b 100644 --- a/mt-kahypar/partition/refinement/flows/sequential_construction.h +++ b/mt-kahypar/partition/refinement/flows/sequential_construction.h @@ -31,139 +31,130 @@ #include "algorithm/hyperflowcutter.h" #include "algorithm/sequential_push_relabel.h" -#include "mt-kahypar/partition/context.h" #include "mt-kahypar/datastructures/sparse_map.h" #include "mt-kahypar/datastructures/thread_safe_fast_reset_flag_array.h" -#include "mt-kahypar/partition/refinement/flows/i_flow_refiner.h" +#include "mt-kahypar/partition/context.h" #include "mt-kahypar/partition/refinement/flows/flow_hypergraph_builder.h" +#include "mt-kahypar/partition/refinement/flows/i_flow_refiner.h" namespace mt_kahypar { struct FlowProblem; -template -class SequentialConstruction { +template +class SequentialConstruction +{ static constexpr bool debug = false; using PartitionedHypergraph = typename GraphAndGainTypes::PartitionedHypergraph; using FlowNetworkConstruction = typename GraphAndGainTypes::FlowNetworkConstruction; - struct TmpPin { + struct TmpPin + { HyperedgeID e; whfc::Node pin; PartitionID block; }; - class DynamicIdenticalNetDetection { + class DynamicIdenticalNetDetection + { - struct TmpHyperedge { + struct TmpHyperedge + { const size_t hash; const whfc::Hyperedge e; }; using IdenticalNetVector = vec; - struct HashBucket { - HashBucket() : - identical_nets(), - threshold(0) { } + struct HashBucket + { + HashBucket() : identical_nets(), threshold(0) {} IdenticalNetVector identical_nets; uint32_t threshold; }; - public: + public: explicit DynamicIdenticalNetDetection(const HyperedgeID num_hyperedges, - FlowHypergraphBuilder& flow_hg, - const Context& context) : - _flow_hg(flow_hg), - _hash_buckets(), - _threshold(1) { - _hash_buckets.resize(std::max(UL(1024), num_hyperedges / - context.refinement.flows.num_parallel_searches)); + FlowHypergraphBuilder &flow_hg, + const Context &context) : + _flow_hg(flow_hg), + _hash_buckets(), _threshold(1) + { + _hash_buckets.resize(std::max( + UL(1024), num_hyperedges / context.refinement.flows.num_parallel_searches)); } /** * Returns an invalid hyperedge id, if the edge is not contained, otherwise * it returns the id of the hyperedge that is identical to he. */ - whfc::Hyperedge add_if_not_contained(const whfc::Hyperedge he, - const size_t he_hash, - const vec& pins); + whfc::Hyperedge add_if_not_contained(const whfc::Hyperedge he, const size_t he_hash, + const vec &pins); - void reset() { - ++_threshold; - } + void reset() { ++_threshold; } - private: - whfc::FlowHypergraph& _flow_hg; + private: + whfc::FlowHypergraph &_flow_hg; vec _hash_buckets; uint32_t _threshold; }; - public: +public: explicit SequentialConstruction(const HyperedgeID num_hyperedges, - FlowHypergraphBuilder& flow_hg, - whfc::HyperFlowCutter& hfc, - const Context& context) : - _context(context), - _flow_hg(flow_hg), - _hfc(hfc), - _node_to_whfc(), - _visited_hns(), - _tmp_pins(), - _cut_hes(), - _pins(), - _he_to_whfc(), - _identical_nets(num_hyperedges, flow_hg, context) { } - - SequentialConstruction(const SequentialConstruction&) = delete; - SequentialConstruction(SequentialConstruction&&) = delete; - SequentialConstruction & operator= (const SequentialConstruction &) = delete; - SequentialConstruction & operator= (SequentialConstruction &&) = delete; + FlowHypergraphBuilder &flow_hg, + whfc::HyperFlowCutter &hfc, + const Context &context) : + _context(context), + _flow_hg(flow_hg), _hfc(hfc), _node_to_whfc(), _visited_hns(), _tmp_pins(), + _cut_hes(), _pins(), _he_to_whfc(), + _identical_nets(num_hyperedges, flow_hg, context) + { + } + + SequentialConstruction(const SequentialConstruction &) = delete; + SequentialConstruction(SequentialConstruction &&) = delete; + SequentialConstruction &operator=(const SequentialConstruction &) = delete; + SequentialConstruction &operator=(SequentialConstruction &&) = delete; virtual ~SequentialConstruction() = default; - - FlowProblem constructFlowHypergraph(const PartitionedHypergraph& phg, - const Subhypergraph& sub_hg, + FlowProblem constructFlowHypergraph(const PartitionedHypergraph &phg, + const Subhypergraph &sub_hg, const PartitionID block_0, const PartitionID block_1, - vec& whfc_to_node); + vec &whfc_to_node); // ! Only for testing - FlowProblem constructFlowHypergraph(const PartitionedHypergraph& phg, - const Subhypergraph& sub_hg, + FlowProblem constructFlowHypergraph(const PartitionedHypergraph &phg, + const Subhypergraph &sub_hg, const PartitionID block_0, const PartitionID block_1, - vec& whfc_to_node, + vec &whfc_to_node, const bool default_construction); - private: - FlowProblem constructDefault(const PartitionedHypergraph& phg, - const Subhypergraph& sub_hg, - const PartitionID block_0, - const PartitionID block_1, - vec& whfc_to_node); +private: + FlowProblem constructDefault(const PartitionedHypergraph &phg, + const Subhypergraph &sub_hg, const PartitionID block_0, + const PartitionID block_1, vec &whfc_to_node); - FlowProblem constructOptimizedForLargeHEs(const PartitionedHypergraph& phg, - const Subhypergraph& sub_hg, + FlowProblem constructOptimizedForLargeHEs(const PartitionedHypergraph &phg, + const Subhypergraph &sub_hg, const PartitionID block_0, const PartitionID block_1, - vec& whfc_to_node); + vec &whfc_to_node); - void determineDistanceFromCut(const PartitionedHypergraph& phg, - const whfc::Node source, - const whfc::Node sink, - const PartitionID block_0, + void determineDistanceFromCut(const PartitionedHypergraph &phg, const whfc::Node source, + const whfc::Node sink, const PartitionID block_0, const PartitionID block_1, - const vec& whfc_to_node); + const vec &whfc_to_node); - const Context& _context; + const Context &_context; - FlowHypergraphBuilder& _flow_hg; - whfc::HyperFlowCutter& _hfc; + FlowHypergraphBuilder &_flow_hg; + whfc::HyperFlowCutter &_hfc; ds::DynamicSparseMap _node_to_whfc; ds::ThreadSafeFastResetFlagArray<> _visited_hns; @@ -175,4 +166,4 @@ class SequentialConstruction { DynamicIdenticalNetDetection _identical_nets; }; -} // namespace mt_kahypar +} // namespace mt_kahypar diff --git a/mt-kahypar/partition/refinement/fm/fm_commons.cpp b/mt-kahypar/partition/refinement/fm/fm_commons.cpp index 3267d61d5..e4ec9a609 100644 --- a/mt-kahypar/partition/refinement/fm/fm_commons.cpp +++ b/mt-kahypar/partition/refinement/fm/fm_commons.cpp @@ -27,231 +27,288 @@ #include #include -#include "mt-kahypar/partition/refinement/gains/gain_definitions.h" #include "mt-kahypar/datastructures/sparse_map.h" #include "mt-kahypar/partition/refinement/fm/fm_commons.h" - +#include "mt-kahypar/partition/refinement/gains/gain_definitions.h" namespace mt_kahypar { - template - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE - uint64_t pairToKey(L left, R right) { - ASSERT(left >= 0 && static_cast(left) <= std::numeric_limits::max()); - ASSERT(right >= 0 && static_cast(right) <= std::numeric_limits::max()); - return (static_cast(left) << 32) + static_cast(right); +template +MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE uint64_t pairToKey(L left, R right) +{ + ASSERT(left >= 0 && + static_cast(left) <= std::numeric_limits::max()); + ASSERT(right >= 0 && + static_cast(right) <= std::numeric_limits::max()); + return (static_cast(left) << 32) + static_cast(right); +} + +MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE +std::pair keyToPair(uint64_t key) +{ + return { key >> 32, key & std::numeric_limits::max() }; +} + +Gain UnconstrainedFMData::estimatePenaltyForImbalancedMove( + PartitionID to, HypernodeWeight initial_imbalance, HypernodeWeight moved_weight) const +{ + ASSERT(initialized && to != kInvalidPartition); + // TODO test whether it is faster to save the previous position locally + BucketID bucketId = 0; + while(bucketId < NUM_BUCKETS && + initial_imbalance + moved_weight > bucket_weights[indexForBucket(to, bucketId)]) + { + ++bucketId; + } + if(bucketId < NUM_BUCKETS) + { + return std::ceil(moved_weight * gainPerWeightForBucket(bucketId)); } - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE - std::pair keyToPair(uint64_t key) { - return {key >> 32, key & std::numeric_limits::max()}; + // fallback case (it should be very unlikely that fallback_bucket_weights contains + // elements) + while(bucketId < NUM_BUCKETS + fallback_bucket_weights[to].size() && + initial_imbalance + moved_weight > + fallback_bucket_weights[to][bucketId - NUM_BUCKETS]) + { + ++bucketId; } + return (bucketId == NUM_BUCKETS + fallback_bucket_weights[to].size()) ? + std::numeric_limits::max() : + std::ceil(moved_weight * gainPerWeightForBucket(bucketId)); +} - Gain UnconstrainedFMData::estimatePenaltyForImbalancedMove(PartitionID to, - HypernodeWeight initial_imbalance, - HypernodeWeight moved_weight) const { - ASSERT(initialized && to != kInvalidPartition); - // TODO test whether it is faster to save the previous position locally - BucketID bucketId = 0; - while (bucketId < NUM_BUCKETS - && initial_imbalance + moved_weight > bucket_weights[indexForBucket(to, bucketId)]) { - ++bucketId; - } - if (bucketId < NUM_BUCKETS) { - return std::ceil(moved_weight * gainPerWeightForBucket(bucketId)); +template +void UnconstrainedFMData::InitializationHelper::initialize( + UnconstrainedFMData &data, const Context &context, + const typename GraphAndGainTypes::PartitionedHypergraph &phg, + const typename GraphAndGainTypes::GainCache &gain_cache) +{ + auto get_node_stats = [&](const HypernodeID hypernode) { + // TODO(maas): we might want to save the total incident weight in the hypergraph data + // structure at some point in the future + HyperedgeWeight total_incident_weight = 0; + for(const HyperedgeID &he : phg.incidentEdges(hypernode)) + { + total_incident_weight += phg.edgeWeight(he); } + HyperedgeWeight internal_weight = + gain_cache.penaltyTerm(hypernode, phg.partID(hypernode)); + ASSERT(internal_weight == gain_cache.recomputePenaltyTerm(phg, hypernode)); + return std::make_pair(internal_weight, total_incident_weight); + }; - // fallback case (it should be very unlikely that fallback_bucket_weights contains elements) - while (bucketId < NUM_BUCKETS + fallback_bucket_weights[to].size() - && initial_imbalance + moved_weight > fallback_bucket_weights[to][bucketId - NUM_BUCKETS]) { - ++bucketId; - } - return (bucketId == NUM_BUCKETS + fallback_bucket_weights[to].size()) ? - std::numeric_limits::max() : std::ceil(moved_weight * gainPerWeightForBucket(bucketId)); - } + const double bn_treshold = context.refinement.fm.treshold_border_node_inclusion; + tbb::enumerable_thread_specific local_considered_weight(0); + tbb::enumerable_thread_specific local_inserted_weight(0); + // collect nodes and fill buckets + phg.doParallelForAllNodes([&](const HypernodeID hn) { + const HypernodeWeight hn_weight = phg.nodeWeight(hn); + if(hn_weight == 0) + return; + auto [internal_weight, total_incident_weight] = get_node_stats(hn); + if(static_cast(internal_weight) >= bn_treshold * total_incident_weight) + { + local_considered_weight.local() += hn_weight; + const BucketID bucketId = + bucketForGainPerWeight(static_cast(internal_weight) / hn_weight); + if(bucketId < NUM_BUCKETS) + { + local_inserted_weight.local() += hn_weight; + auto &local_weights = data.local_bucket_weights.local(); + local_weights[data.indexForBucket(phg.partID(hn), bucketId)] += hn_weight; + data.rebalancing_nodes.set(hn, true); + } + } + }); - template - void UnconstrainedFMData::InitializationHelper::initialize( - UnconstrainedFMData& data, const Context& context, - const typename GraphAndGainTypes::PartitionedHypergraph& phg, - const typename GraphAndGainTypes::GainCache& gain_cache) { - auto get_node_stats = [&](const HypernodeID hypernode) { - // TODO(maas): we might want to save the total incident weight in the hypergraph data structure - // at some point in the future - HyperedgeWeight total_incident_weight = 0; - for (const HyperedgeID& he : phg.incidentEdges(hypernode)) { - total_incident_weight += phg.edgeWeight(he); + auto &bucket_weights = data.bucket_weights; + // for each block compute prefix sum of bucket weights, which is later used for + // estimating penalties + auto compute_prefix_sum_for_range = [&](size_t start, size_t end) { + for(const auto &local_weights : data.local_bucket_weights) + { + ASSERT(bucket_weights.size() == local_weights.size()); + for(size_t i = start; i < end; ++i) + { + ASSERT(i < local_weights.size()); + bucket_weights[i] += local_weights[i]; } - HyperedgeWeight internal_weight = gain_cache.penaltyTerm(hypernode, phg.partID(hypernode)); - ASSERT(internal_weight == gain_cache.recomputePenaltyTerm(phg, hypernode)); - return std::make_pair(internal_weight, total_incident_weight); - }; + } + for(size_t i = start; i + 1 < end; ++i) + { + bucket_weights[i + 1] += bucket_weights[i]; + } + }; + tbb::parallel_for( + static_cast(0), context.partition.k, + [&](const PartitionID block) { + compute_prefix_sum_for_range(block * NUM_BUCKETS, (block + 1) * NUM_BUCKETS); + }, + tbb::static_partitioner()); + + const HypernodeWeight considered_weight = + local_considered_weight.combine(std::plus<>()); + const HypernodeWeight inserted_weight = local_inserted_weight.combine(std::plus<>()); + if(static_cast(inserted_weight) / considered_weight < FALLBACK_TRESHOLD) + { + // Use fallback if fixed number of buckets per block is not sufficient: + // For unweighted instances or instances with reasonable weight distribution this + // should almost never be necessary. We use more expensive precomputations (hash maps + // instead of arrays) here in order to keep memory overhead low and still get fast + // queries for estimating imbalance penalties. + using SparseMap = ds::DynamicSparseMap; - const double bn_treshold = context.refinement.fm.treshold_border_node_inclusion; - tbb::enumerable_thread_specific local_considered_weight(0); - tbb::enumerable_thread_specific local_inserted_weight(0); - // collect nodes and fill buckets + // collect nodes into local hashmaps + tbb::enumerable_thread_specific local_accumulator; phg.doParallelForAllNodes([&](const HypernodeID hn) { const HypernodeWeight hn_weight = phg.nodeWeight(hn); - if (hn_weight == 0) return; + if(hn_weight == 0) + return; auto [internal_weight, total_incident_weight] = get_node_stats(hn); - if (static_cast(internal_weight) >= bn_treshold * total_incident_weight) { - local_considered_weight.local() += hn_weight; - const BucketID bucketId = bucketForGainPerWeight(static_cast(internal_weight) / hn_weight); - if (bucketId < NUM_BUCKETS) { - local_inserted_weight.local() += hn_weight; - auto& local_weights = data.local_bucket_weights.local(); - local_weights[data.indexForBucket(phg.partID(hn), bucketId)] += hn_weight; - data.rebalancing_nodes.set(hn, true); + if(static_cast(internal_weight) >= bn_treshold * total_incident_weight) + { + const BucketID bucketId = + bucketForGainPerWeight(static_cast(internal_weight) / hn_weight); + if(bucketId >= NUM_BUCKETS) + { + auto &map = local_accumulator.local(); + // hash by block id and bucket id + map[pairToKey(phg.partID(hn), bucketId - NUM_BUCKETS)] += hn_weight; } } }); - auto& bucket_weights = data.bucket_weights; - // for each block compute prefix sum of bucket weights, which is later used for estimating penalties - auto compute_prefix_sum_for_range = [&](size_t start, size_t end) { - for (const auto& local_weights: data.local_bucket_weights) { - ASSERT(bucket_weights.size() == local_weights.size()); - for (size_t i = start; i < end; ++i) { - ASSERT(i < local_weights.size()); - bucket_weights[i] += local_weights[i]; - } - } - for (size_t i = start; i + 1 < end; ++i) { - bucket_weights[i + 1] += bucket_weights[i]; - } - }; - tbb::parallel_for(static_cast(0), context.partition.k, [&](const PartitionID block) { - compute_prefix_sum_for_range(block * NUM_BUCKETS, (block + 1) * NUM_BUCKETS); - }, tbb::static_partitioner()); - - const HypernodeWeight considered_weight = local_considered_weight.combine(std::plus<>()); - const HypernodeWeight inserted_weight = local_inserted_weight.combine(std::plus<>()); - if (static_cast(inserted_weight) / considered_weight < FALLBACK_TRESHOLD) { - // Use fallback if fixed number of buckets per block is not sufficient: - // For unweighted instances or instances with reasonable weight distribution this should almost never - // be necessary. We use more expensive precomputations (hash maps instead of arrays) here in order to - // keep memory overhead low and still get fast queries for estimating imbalance penalties. - using SparseMap = ds::DynamicSparseMap; + vec max_rank_per_block(context.partition.k, AtomicBucketID(0)); + vec weight_per_block(context.partition.k, AtomicWeight(0)); + // sort resulting values and determine ranks (so that larger values are ignored) + local_accumulator.combine_each([&](SparseMap &map) { + // we sort the values in the dense part (note that this invalidates the map) + std::sort(map.begin(), map.end(), + [](const auto &l, const auto &r) { return l.key < r.key; }); + // compute summed weight and rank/bucketID for each block + auto it = map.begin(); + for(PartitionID p = 0; p < context.partition.k; ++p) + { + const uint32_t block = static_cast(p); - // collect nodes into local hashmaps - tbb::enumerable_thread_specific local_accumulator; - phg.doParallelForAllNodes([&](const HypernodeID hn) { - const HypernodeWeight hn_weight = phg.nodeWeight(hn); - if (hn_weight == 0) return; - - auto [internal_weight, total_incident_weight] = get_node_stats(hn); - if (static_cast(internal_weight) >= bn_treshold * total_incident_weight) { - const BucketID bucketId = bucketForGainPerWeight(static_cast(internal_weight) / hn_weight); - if (bucketId >= NUM_BUCKETS) { - auto& map = local_accumulator.local(); - // hash by block id and bucket id - map[pairToKey(phg.partID(hn), bucketId - NUM_BUCKETS)] += hn_weight; + ASSERT(it == map.end() || keyToPair(it->key).first >= block); + HypernodeWeight total_weight = 0; + while(it < map.end() && keyToPair(it->key).first == block) + { + total_weight += it->value; + ++it; + } + // scan backwards to find (approximately) an element with rank according to the + // fallback treshold + HypernodeWeight remaining_upper_weight = + std::floor((1.0 - FALLBACK_TRESHOLD) * total_weight); + auto backwards_it = it; + while(total_weight > 0 && remaining_upper_weight >= (--backwards_it)->value) + { + ASSERT(keyToPair(backwards_it->key).first == block); + remaining_upper_weight -= backwards_it->value; + } + // write result to global arrays + weight_per_block[block].fetch_add(total_weight, std::memory_order_relaxed); + const auto [curr_block, new_rank] = keyToPair(backwards_it->key); + if(curr_block == block) + { + AtomicBucketID &global_rank = max_rank_per_block[block]; + BucketID current = global_rank.load(std::memory_order_relaxed); + while(current < new_rank && !global_rank.compare_exchange_strong( + current, new_rank, std::memory_order_relaxed)) + { /* try again */ } } - }); - - vec max_rank_per_block(context.partition.k, AtomicBucketID(0)); - vec weight_per_block(context.partition.k, AtomicWeight(0)); - // sort resulting values and determine ranks (so that larger values are ignored) - local_accumulator.combine_each([&](SparseMap& map) { - // we sort the values in the dense part (note that this invalidates the map) - std::sort(map.begin(), map.end(), [](const auto& l, const auto& r) { - return l.key < r.key; - }); - // compute summed weight and rank/bucketID for each block - auto it = map.begin(); - for (PartitionID p = 0; p < context.partition.k; ++p) { - const uint32_t block = static_cast(p); + else + { + ASSERT(total_weight == 0); + } + } + }); - ASSERT(it == map.end() || keyToPair(it->key).first >= block); - HypernodeWeight total_weight = 0; - while (it < map.end() && keyToPair(it->key).first == block) { - total_weight += it->value; - ++it; - } - // scan backwards to find (approximately) an element with rank according to the fallback treshold - HypernodeWeight remaining_upper_weight = std::floor((1.0 - FALLBACK_TRESHOLD) * total_weight); - auto backwards_it = it; - while (total_weight > 0 && remaining_upper_weight >= (--backwards_it)->value) { - ASSERT(keyToPair(backwards_it->key).first == block); - remaining_upper_weight -= backwards_it->value; + auto &fallback_bucket_weights = data.fallback_bucket_weights; + // resize vectors accordingly, set rank to zero if no fallback is required for this + // block + tbb::parallel_for( + static_cast(0), context.partition.k, + [&](const PartitionID block) { + const HypernodeWeight handled_weight = + bucket_weights[data.indexForBucket(block, NUM_BUCKETS - 1)]; + const HypernodeWeight fallback_weight = weight_per_block[block]; + if(static_cast(handled_weight) / (handled_weight + fallback_weight) >= + FALLBACK_TRESHOLD) + { + max_rank_per_block[block].store(0); } - // write result to global arrays - weight_per_block[block].fetch_add(total_weight, std::memory_order_relaxed); - const auto [curr_block, new_rank] = keyToPair(backwards_it->key); - if (curr_block == block) { - AtomicBucketID& global_rank = max_rank_per_block[block]; - BucketID current = global_rank.load(std::memory_order_relaxed); - while (current < new_rank - && !global_rank.compare_exchange_strong(current, new_rank, std::memory_order_relaxed)) { /* try again */ } - } else { - ASSERT(total_weight == 0); + else + { + fallback_bucket_weights[block].resize(max_rank_per_block[block] + 1, 0); } - } - }); - - auto& fallback_bucket_weights = data.fallback_bucket_weights; - // resize vectors accordingly, set rank to zero if no fallback is required for this block - tbb::parallel_for(static_cast(0), context.partition.k, [&](const PartitionID block) { - const HypernodeWeight handled_weight = bucket_weights[data.indexForBucket(block, NUM_BUCKETS - 1)]; - const HypernodeWeight fallback_weight = weight_per_block[block]; - if (static_cast(handled_weight) / (handled_weight + fallback_weight) >= FALLBACK_TRESHOLD) { - max_rank_per_block[block].store(0); - } else { - fallback_bucket_weights[block].resize(max_rank_per_block[block] + 1, 0); - } - }, tbb::static_partitioner()); + }, + tbb::static_partitioner()); - // accumulate results in fallback_bucket_weights - local_accumulator.combine_each([&](SparseMap& map) { - auto it = map.begin(); - for (PartitionID p = 0; p < context.partition.k; ++p) { - const uint32_t block = static_cast(p); - const size_t upper_limit = fallback_bucket_weights[block].size(); - ASSERT(upper_limit == 0 || upper_limit == max_rank_per_block[block] + 1); + // accumulate results in fallback_bucket_weights + local_accumulator.combine_each([&](SparseMap &map) { + auto it = map.begin(); + for(PartitionID p = 0; p < context.partition.k; ++p) + { + const uint32_t block = static_cast(p); + const size_t upper_limit = fallback_bucket_weights[block].size(); + ASSERT(upper_limit == 0 || upper_limit == max_rank_per_block[block] + 1); - ASSERT(it == map.end() || keyToPair(it->key).first >= block); - while (it < map.end() && keyToPair(it->key).first == block) { - BucketID current_rank = keyToPair(it->key).second; - if (current_rank < upper_limit) { - __atomic_fetch_add(&fallback_bucket_weights[block][current_rank], it->value, __ATOMIC_RELAXED); - } - ++it; + ASSERT(it == map.end() || keyToPair(it->key).first >= block); + while(it < map.end() && keyToPair(it->key).first == block) + { + BucketID current_rank = keyToPair(it->key).second; + if(current_rank < upper_limit) + { + __atomic_fetch_add(&fallback_bucket_weights[block][current_rank], it->value, + __ATOMIC_RELAXED); } + ++it; } - }); + } + }); - // compute prefix sums - tbb::parallel_for(static_cast(0), context.partition.k, [&](const PartitionID block) { - auto& weights = fallback_bucket_weights[block]; - if (!weights.empty()) { - weights[0] += bucket_weights[data.indexForBucket(block, NUM_BUCKETS - 1)]; - for (size_t i = 0; i + 1 < weights.size(); ++i) { - weights[i + 1] += weights[i]; + // compute prefix sums + tbb::parallel_for( + static_cast(0), context.partition.k, + [&](const PartitionID block) { + auto &weights = fallback_bucket_weights[block]; + if(!weights.empty()) + { + weights[0] += bucket_weights[data.indexForBucket(block, NUM_BUCKETS - 1)]; + for(size_t i = 0; i + 1 < weights.size(); ++i) + { + weights[i + 1] += weights[i]; + } } - } - }, tbb::static_partitioner()); - } - - data.initialized = true; + }, + tbb::static_partitioner()); } - void UnconstrainedFMData::reset() { - rebalancing_nodes.reset(); - bucket_weights.assign(current_k * NUM_BUCKETS, 0); - virtual_weight_delta.assign(current_k, AtomicWeight(0)); - for (auto& local_weights: local_bucket_weights) { - local_weights.assign(current_k * NUM_BUCKETS, 0); - } - fallback_bucket_weights.assign(current_k, {}); - initialized = false; - } + data.initialized = true; +} - namespace { - #define UNCONSTRAINED_FM_INITIALIZATION(X) UnconstrainedFMData::InitializationHelper +void UnconstrainedFMData::reset() +{ + rebalancing_nodes.reset(); + bucket_weights.assign(current_k * NUM_BUCKETS, 0); + virtual_weight_delta.assign(current_k, AtomicWeight(0)); + for(auto &local_weights : local_bucket_weights) + { + local_weights.assign(current_k * NUM_BUCKETS, 0); } + fallback_bucket_weights.assign(current_k, {}); + initialized = false; +} + +namespace { +#define UNCONSTRAINED_FM_INITIALIZATION(X) UnconstrainedFMData::InitializationHelper +} - INSTANTIATE_CLASS_WITH_VALID_TRAITS(UNCONSTRAINED_FM_INITIALIZATION) +INSTANTIATE_CLASS_WITH_VALID_TRAITS(UNCONSTRAINED_FM_INITIALIZATION) } diff --git a/mt-kahypar/partition/refinement/fm/fm_commons.h b/mt-kahypar/partition/refinement/fm/fm_commons.h index 97ee1f34b..2492a7489 100644 --- a/mt-kahypar/partition/refinement/fm/fm_commons.h +++ b/mt-kahypar/partition/refinement/fm/fm_commons.h @@ -30,8 +30,8 @@ #include #include -#include #include +#include #include "kahypar-resources/datastructure/fast_reset_flag_array.h" @@ -40,32 +40,39 @@ namespace mt_kahypar { - -struct GlobalMoveTracker { +struct GlobalMoveTracker +{ vec moveOrder; vec moveOfNode; CAtomic runningMoveID; MoveID firstMoveID = 1; explicit GlobalMoveTracker(size_t numNodes = 0) : - moveOrder(numNodes), - moveOfNode(numNodes, 0), - runningMoveID(1) { } + moveOrder(numNodes), moveOfNode(numNodes, 0), runningMoveID(1) + { + } // Returns true if stored move IDs should be reset - bool reset() { - if (runningMoveID.load() >= std::numeric_limits::max() - moveOrder.size() - 20) { - tbb::parallel_for(UL(0), moveOfNode.size(), [&](size_t i) { moveOfNode[i] = 0; }, tbb::static_partitioner()); + bool reset() + { + if(runningMoveID.load() >= std::numeric_limits::max() - moveOrder.size() - 20) + { + tbb::parallel_for( + UL(0), moveOfNode.size(), [&](size_t i) { moveOfNode[i] = 0; }, + tbb::static_partitioner()); firstMoveID = 1; runningMoveID.store(1); return true; - } else { + } + else + { firstMoveID = ++runningMoveID; return false; } } - MoveID insertMove(const Move &m) { + MoveID insertMove(const Move &m) + { const MoveID move_id = runningMoveID.fetch_add(1, std::memory_order_relaxed); assert(move_id - firstMoveID < moveOrder.size()); moveOrder[move_id - firstMoveID] = m; @@ -73,69 +80,84 @@ struct GlobalMoveTracker { return move_id; } - Move& getMove(MoveID move_id) { + Move &getMove(MoveID move_id) + { assert(move_id - firstMoveID < moveOrder.size()); return moveOrder[move_id - firstMoveID]; } - bool wasNodeMovedInThisRound(HypernodeID u) const { + bool wasNodeMovedInThisRound(HypernodeID u) const + { const MoveID m_id = moveOfNode[u]; - if (m_id >= firstMoveID && m_id < runningMoveID.load(std::memory_order_relaxed)) { // active move ID + if(m_id >= firstMoveID && m_id < runningMoveID.load(std::memory_order_relaxed)) + { // active move ID ASSERT(moveOrder[m_id - firstMoveID].node == u); - return moveOrder[m_id - firstMoveID].isValid(); // not reverted already + return moveOrder[m_id - firstMoveID].isValid(); // not reverted already } return false; } - MoveID numPerformedMoves() const { + MoveID numPerformedMoves() const + { return runningMoveID.load(std::memory_order_relaxed) - firstMoveID; } - bool isMoveStale(const MoveID move_id) const { - return move_id < firstMoveID; - } + bool isMoveStale(const MoveID move_id) const { return move_id < firstMoveID; } }; -struct NodeTracker { - vec> searchOfNode; +struct NodeTracker +{ + vec > searchOfNode; SearchID releasedMarker = 1; SearchID deactivatedNodeMarker = 2; - CAtomic highestActiveSearchID { 2 }; + CAtomic highestActiveSearchID{ 2 }; - explicit NodeTracker(size_t numNodes = 0) : searchOfNode(numNodes, CAtomic(0)) { } + explicit NodeTracker(size_t numNodes = 0) : searchOfNode(numNodes, CAtomic(0)) + { + } // only the search that owns u is allowed to call this - void deactivateNode(HypernodeID u, SearchID search_id) { + void deactivateNode(HypernodeID u, SearchID search_id) + { assert(searchOfNode[u].load() == search_id); unused(search_id); searchOfNode[u].store(deactivatedNodeMarker, std::memory_order_release); } - bool isLocked(HypernodeID u) { + bool isLocked(HypernodeID u) + { return searchOfNode[u].load(std::memory_order_relaxed) == deactivatedNodeMarker; } - void releaseNode(HypernodeID u) { + void releaseNode(HypernodeID u) + { searchOfNode[u].store(releasedMarker, std::memory_order_relaxed); } - bool isSearchInactive(SearchID search_id) const { + bool isSearchInactive(SearchID search_id) const + { return search_id < deactivatedNodeMarker; } - bool canNodeStartNewSearch(HypernodeID u) const { - return isSearchInactive( searchOfNode[u].load(std::memory_order_relaxed) ); + bool canNodeStartNewSearch(HypernodeID u) const + { + return isSearchInactive(searchOfNode[u].load(std::memory_order_relaxed)); } - bool tryAcquireNode(HypernodeID u, SearchID new_search) { + bool tryAcquireNode(HypernodeID u, SearchID new_search) + { SearchID current_search = searchOfNode[u].load(std::memory_order_relaxed); - return isSearchInactive(current_search) - && searchOfNode[u].compare_exchange_strong(current_search, new_search, std::memory_order_acq_rel); + return isSearchInactive(current_search) && + searchOfNode[u].compare_exchange_strong(current_search, new_search, + std::memory_order_acq_rel); } - void requestNewSearches(SearchID max_num_searches) { - if (highestActiveSearchID.load(std::memory_order_relaxed) >= std::numeric_limits::max() - max_num_searches - 20) { + void requestNewSearches(SearchID max_num_searches) + { + if(highestActiveSearchID.load(std::memory_order_relaxed) >= + std::numeric_limits::max() - max_num_searches - 20) + { tbb::parallel_for(UL(0), searchOfNode.size(), [&](const size_t i) { searchOfNode[i].store(0, std::memory_order_relaxed); }); @@ -146,97 +168,120 @@ struct NodeTracker { } }; - -// Contains data required for unconstrained FM: We group non-border nodes in buckets based on their -// incident weight to node weight ratio. This allows to give a (pessimistic) estimate of the effective -// gain for moves that violate the balance constraint -class UnconstrainedFMData { +// Contains data required for unconstrained FM: We group non-border nodes in buckets based +// on their incident weight to node weight ratio. This allows to give a (pessimistic) +// estimate of the effective gain for moves that violate the balance constraint +class UnconstrainedFMData +{ using AtomicWeight = parallel::IntegralAtomicWrapper; using BucketID = uint32_t; using AtomicBucketID = parallel::IntegralAtomicWrapper; - #pragma GCC diagnostic push - #pragma GCC diagnostic ignored "-Wmismatched-tags" - template - struct InitializationHelper { - static void initialize(UnconstrainedFMData& data, const Context& context, - const typename GraphAndGainTypes::PartitionedHypergraph& phg, - const typename GraphAndGainTypes::GainCache& gain_cache); +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wmismatched-tags" + template + struct InitializationHelper + { + static void initialize(UnconstrainedFMData &data, const Context &context, + const typename GraphAndGainTypes::PartitionedHypergraph &phg, + const typename GraphAndGainTypes::GainCache &gain_cache); }; - #pragma GCC diagnostic pop +#pragma GCC diagnostic pop static constexpr BucketID NUM_BUCKETS = 16; static constexpr double BUCKET_FACTOR = 1.5; static constexpr double FALLBACK_TRESHOLD = 0.75; - public: - explicit UnconstrainedFMData(HypernodeID num_nodes): - initialized(false), - current_k(0), - bucket_weights(), - virtual_weight_delta(), - local_bucket_weights(), - rebalancing_nodes(num_nodes) { } - - template - void initialize(const Context& context, - const typename GraphAndGainTypes::PartitionedHypergraph& phg, - const typename GraphAndGainTypes::GainCache& gain_cache) { +public: + explicit UnconstrainedFMData(HypernodeID num_nodes) : + initialized(false), current_k(0), bucket_weights(), virtual_weight_delta(), + local_bucket_weights(), rebalancing_nodes(num_nodes) + { + } + + template + void initialize(const Context &context, + const typename GraphAndGainTypes::PartitionedHypergraph &phg, + const typename GraphAndGainTypes::GainCache &gain_cache) + { changeNumberOfBlocks(context.partition.k); reset(); InitializationHelper::initialize(*this, context, phg, gain_cache); } - Gain estimatePenaltyForImbalancedMove(PartitionID to, HypernodeWeight initial_imbalance, HypernodeWeight moved_weight) const; + Gain estimatePenaltyForImbalancedMove(PartitionID to, HypernodeWeight initial_imbalance, + HypernodeWeight moved_weight) const; - AtomicWeight& virtualWeightDelta(PartitionID block) { + AtomicWeight &virtualWeightDelta(PartitionID block) + { ASSERT(block >= 0 && static_cast(block) < virtual_weight_delta.size()); return virtual_weight_delta[block]; } - bool isRebalancingNode(HypernodeID hn) const { + bool isRebalancingNode(HypernodeID hn) const + { return initialized && rebalancing_nodes[hn]; } void reset(); - void changeNumberOfBlocks(PartitionID new_k) { - if (new_k != current_k) { + void changeNumberOfBlocks(PartitionID new_k) + { + if(new_k != current_k) + { current_k = new_k; - local_bucket_weights = tbb::enumerable_thread_specific>(new_k * NUM_BUCKETS); + local_bucket_weights = + tbb::enumerable_thread_specific >(new_k * NUM_BUCKETS); initialized = false; } } - private: - template +private: + template friend class InitializationHelper; - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE size_t indexForBucket(PartitionID block, BucketID bucketId) const { - ASSERT(bucketId < NUM_BUCKETS && block * NUM_BUCKETS + bucketId < bucket_weights.size()); + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE size_t indexForBucket(PartitionID block, + BucketID bucketId) const + { + ASSERT(bucketId < NUM_BUCKETS && + block * NUM_BUCKETS + bucketId < bucket_weights.size()); return block * NUM_BUCKETS + bucketId; } // upper bound of gain values in bucket - static double gainPerWeightForBucket(BucketID bucketId) { - if (bucketId > 1) { + static double gainPerWeightForBucket(BucketID bucketId) + { + if(bucketId > 1) + { return std::pow(BUCKET_FACTOR, bucketId - 2); - } else if (bucketId == 1) { + } + else if(bucketId == 1) + { return 0.5; - } else { + } + else + { return 0; } } - static BucketID bucketForGainPerWeight(double gainPerWeight) { - if (gainPerWeight >= 1) { + static BucketID bucketForGainPerWeight(double gainPerWeight) + { + if(gainPerWeight >= 1) + { return 2 + std::ceil(std::log(gainPerWeight) / std::log(BUCKET_FACTOR)); - } else if (gainPerWeight > 0.5) { + } + else if(gainPerWeight > 0.5) + { return 2; - } else if (gainPerWeight > 0) { + } + else if(gainPerWeight > 0) + { return 1; - } else { + } + else + { return 0; } } @@ -245,13 +290,15 @@ class UnconstrainedFMData { PartitionID current_k; parallel::scalable_vector bucket_weights; parallel::scalable_vector virtual_weight_delta; - tbb::enumerable_thread_specific> local_bucket_weights; - parallel::scalable_vector> fallback_bucket_weights; + tbb::enumerable_thread_specific > + local_bucket_weights; + parallel::scalable_vector > + fallback_bucket_weights; kahypar::ds::FastResetFlagArray<> rebalancing_nodes; }; - -struct FMSharedData { +struct FMSharedData +{ // ! Number of Nodes size_t numberOfNodes; @@ -261,68 +308,67 @@ struct FMSharedData { // ! PQ handles shared by all threads (each vertex is only held by one thread) vec vertexPQHandles; - // ! Stores the sequence of performed moves and assigns IDs to moves that can be used in the global rollback code + // ! Stores the sequence of performed moves and assigns IDs to moves that can be used in + // the global rollback code GlobalMoveTracker moveTracker; - // ! Tracks the current search of a node, and if a node can still be added to an active search + // ! Tracks the current search of a node, and if a node can still be added to an active + // search NodeTracker nodeTracker; - // ! Stores the designated target part of a vertex, i.e. the part with the highest gain to which moving is feasible + // ! Stores the designated target part of a vertex, i.e. the part with the highest gain + // to which moving is feasible vec targetPart; // ! Additional data for unconstrained FM algorithm UnconstrainedFMData unconstrained; - // ! Stop parallel refinement if finishedTasks > finishedTasksLimit to avoid long-running single searches + // ! Stop parallel refinement if finishedTasks > finishedTasksLimit to avoid + // long-running single searches CAtomic finishedTasks; size_t finishedTasksLimit = std::numeric_limits::max(); bool release_nodes = true; FMSharedData(size_t numNodes, size_t numThreads) : - numberOfNodes(numNodes), - refinementNodes(), //numNodes, numThreads), - vertexPQHandles(), //numPQHandles, invalid_position), - moveTracker(), //numNodes), - nodeTracker(), //numNodes), - targetPart(), - unconstrained(numNodes) { + numberOfNodes(numNodes), refinementNodes(), // numNodes, numThreads), + vertexPQHandles(), // numPQHandles, invalid_position), + moveTracker(), // numNodes), + nodeTracker(), // numNodes), + targetPart(), unconstrained(numNodes) + { finishedTasks.store(0, std::memory_order_relaxed); - tbb::parallel_invoke([&] { - moveTracker.moveOrder.resize(numNodes); - }, [&] { - moveTracker.moveOfNode.resize(numNodes); - }, [&] { - nodeTracker.searchOfNode.resize(numNodes, CAtomic(0)); - }, [&] { - vertexPQHandles.resize(numNodes, invalid_position); - }, [&] { - refinementNodes.tls_queues.resize(numThreads); - }, [&] { - targetPart.resize(numNodes, kInvalidPartition); - }); + tbb::parallel_invoke( + [&] { moveTracker.moveOrder.resize(numNodes); }, + [&] { moveTracker.moveOfNode.resize(numNodes); }, + [&] { nodeTracker.searchOfNode.resize(numNodes, CAtomic(0)); }, + [&] { vertexPQHandles.resize(numNodes, invalid_position); }, + [&] { refinementNodes.tls_queues.resize(numThreads); }, + [&] { targetPart.resize(numNodes, kInvalidPartition); }); } FMSharedData(size_t numNodes) : - FMSharedData( - numNodes, - TBBInitializer::instance().total_number_of_threads()) { } + FMSharedData(numNodes, TBBInitializer::instance().total_number_of_threads()) + { + } - FMSharedData() : - FMSharedData(0, 0) { } + FMSharedData() : FMSharedData(0, 0) {} - void memoryConsumption(utils::MemoryTreeNode* parent) const { + void memoryConsumption(utils::MemoryTreeNode *parent) const + { ASSERT(parent); - utils::MemoryTreeNode* shared_fm_data_node = parent->addChild("Shared FM Data"); + utils::MemoryTreeNode *shared_fm_data_node = parent->addChild("Shared FM Data"); - utils::MemoryTreeNode* pq_handles_node = shared_fm_data_node->addChild("PQ Handles"); + utils::MemoryTreeNode *pq_handles_node = shared_fm_data_node->addChild("PQ Handles"); pq_handles_node->updateSize(vertexPQHandles.capacity() * sizeof(PosT)); - utils::MemoryTreeNode* move_tracker_node = shared_fm_data_node->addChild("Move Tracker"); + utils::MemoryTreeNode *move_tracker_node = + shared_fm_data_node->addChild("Move Tracker"); move_tracker_node->updateSize(moveTracker.moveOrder.capacity() * sizeof(Move) + moveTracker.moveOfNode.capacity() * sizeof(MoveID)); - utils::MemoryTreeNode* node_tracker_node = shared_fm_data_node->addChild("Node Tracker"); + utils::MemoryTreeNode *node_tracker_node = + shared_fm_data_node->addChild("Node Tracker"); node_tracker_node->updateSize(nodeTracker.searchOfNode.capacity() * sizeof(SearchID)); refinementNodes.memoryConsumption(shared_fm_data_node); } diff --git a/mt-kahypar/partition/refinement/fm/global_rollback.cpp b/mt-kahypar/partition/refinement/fm/global_rollback.cpp index 143b611ba..b73dd0f34 100644 --- a/mt-kahypar/partition/refinement/fm/global_rollback.cpp +++ b/mt-kahypar/partition/refinement/fm/global_rollback.cpp @@ -28,535 +28,623 @@ #include "tbb/parallel_scan.h" +#include "mt-kahypar/datastructures/bitset.h" +#include "mt-kahypar/datastructures/pin_count_snapshot.h" #include "mt-kahypar/definitions.h" #include "mt-kahypar/partition/metrics.h" +#include "mt-kahypar/partition/refinement/gains/gain_cache_ptr.h" #include "mt-kahypar/partition/refinement/gains/gain_definitions.h" #include "mt-kahypar/utils/timer.h" -#include "mt-kahypar/partition/refinement/gains/gain_cache_ptr.h" -#include "mt-kahypar/datastructures/bitset.h" -#include "mt-kahypar/datastructures/pin_count_snapshot.h" namespace mt_kahypar { - template - struct BalanceAndBestIndexScan { - const PartitionedHypergraph& phg; - const vec& moves; +template +struct BalanceAndBestIndexScan +{ + const PartitionedHypergraph &phg; + const vec &moves; - struct Prefix { - Gain gain = 0; /** gain when using valid moves up to best_index */ - MoveID best_index = 0; /** local ID of first move to revert */ - HypernodeWeight heaviest_weight = - std::numeric_limits::max(); /** weight of the heaviest part */ + struct Prefix + { + Gain gain = 0; /** gain when using valid moves up to best_index */ + MoveID best_index = 0; /** local ID of first move to revert */ + HypernodeWeight heaviest_weight = + std::numeric_limits::max(); /** weight of the heaviest part */ - bool operator<(const Prefix& o) const { - return gain > o.gain || - (gain == o.gain && std::tie(heaviest_weight, best_index) < std::tie(o.heaviest_weight, o.best_index)); - } - }; - std::shared_ptr< tbb::enumerable_thread_specific > local_best; - - Gain gain_sum = 0; - - vec part_weights; - const std::vector& max_part_weights; - - BalanceAndBestIndexScan(BalanceAndBestIndexScan& b, tbb::split) : - phg(b.phg), - moves(b.moves), - local_best(b.local_best), - gain_sum(0), - part_weights(b.part_weights.size(), 0), - max_part_weights(b.max_part_weights) { } - - - BalanceAndBestIndexScan(const PartitionedHypergraph& phg, - const vec& moves, - const vec& part_weights, - const std::vector& max_part_weights) : - phg(phg), - moves(moves), - local_best(std::make_shared< tbb::enumerable_thread_specific >()), - part_weights(part_weights), - max_part_weights(max_part_weights) + bool operator<(const Prefix &o) const { + return gain > o.gain || + (gain == o.gain && std::tie(heaviest_weight, best_index) < + std::tie(o.heaviest_weight, o.best_index)); } + }; + std::shared_ptr > local_best; + Gain gain_sum = 0; - void operator()(const tbb::blocked_range& r, tbb::pre_scan_tag ) { - for (MoveID i = r.begin(); i < r.end(); ++i) { - const Move& m = moves[i]; - if (m.isValid()) { // skip locally reverted moves - gain_sum += m.gain; - part_weights[m.from] -= phg.nodeWeight(m.node); - part_weights[m.to] += phg.nodeWeight(m.node); - } + vec part_weights; + const std::vector &max_part_weights; + + BalanceAndBestIndexScan(BalanceAndBestIndexScan &b, tbb::split) : + phg(b.phg), moves(b.moves), local_best(b.local_best), gain_sum(0), + part_weights(b.part_weights.size(), 0), max_part_weights(b.max_part_weights) + { + } + + BalanceAndBestIndexScan(const PartitionedHypergraph &phg, const vec &moves, + const vec &part_weights, + const std::vector &max_part_weights) : + phg(phg), + moves(moves), + local_best(std::make_shared >()), + part_weights(part_weights), max_part_weights(max_part_weights) + { + } + + void operator()(const tbb::blocked_range &r, tbb::pre_scan_tag) + { + for(MoveID i = r.begin(); i < r.end(); ++i) + { + const Move &m = moves[i]; + if(m.isValid()) + { // skip locally reverted moves + gain_sum += m.gain; + part_weights[m.from] -= phg.nodeWeight(m.node); + part_weights[m.to] += phg.nodeWeight(m.node); } } + } - // subranges a | b | c | d . assuming this ran pre_scan on c, - // then lhs ran pre_scan on b and final_scan of this will be on d - void reverse_join(BalanceAndBestIndexScan& lhs) { - for (size_t i = 0; i < part_weights.size(); ++i) { - part_weights[i] += lhs.part_weights[i]; - } - gain_sum += lhs.gain_sum; + // subranges a | b | c | d . assuming this ran pre_scan on c, + // then lhs ran pre_scan on b and final_scan of this will be on d + void reverse_join(BalanceAndBestIndexScan &lhs) + { + for(size_t i = 0; i < part_weights.size(); ++i) + { + part_weights[i] += lhs.part_weights[i]; } + gain_sum += lhs.gain_sum; + } - void operator()(const tbb::blocked_range& r, tbb::final_scan_tag ) { - size_t overloaded = 0; - for (size_t i = 0; i < part_weights.size(); ++i) { - if (part_weights[i] > max_part_weights[i]) { - overloaded++; - } + void operator()(const tbb::blocked_range &r, tbb::final_scan_tag) + { + size_t overloaded = 0; + for(size_t i = 0; i < part_weights.size(); ++i) + { + if(part_weights[i] > max_part_weights[i]) + { + overloaded++; } + } - Prefix current; - for (MoveID i = r.begin(); i < r.end(); ++i) { - const Move& m = moves[i]; - - if (m.isValid()) { // skip locally reverted moves - gain_sum += m.gain; + Prefix current; + for(MoveID i = r.begin(); i < r.end(); ++i) + { + const Move &m = moves[i]; - const bool from_overloaded = part_weights[m.from] > max_part_weights[m.from]; - part_weights[m.from] -= phg.nodeWeight(m.node); - if (from_overloaded && part_weights[m.from] <= max_part_weights[m.from]) { - overloaded--; - } - const bool to_overloaded = part_weights[m.to] > max_part_weights[m.to]; - part_weights[m.to] += phg.nodeWeight(m.node); - if (!to_overloaded && part_weights[m.to] > max_part_weights[m.to]) { - overloaded++; - } + if(m.isValid()) + { // skip locally reverted moves + gain_sum += m.gain; - if (overloaded == 0 && gain_sum >= current.gain) { - Prefix new_prefix = { gain_sum, i + 1, *std::max_element(part_weights.begin(), part_weights.end()) }; - current = std::min(current, new_prefix); - } + const bool from_overloaded = part_weights[m.from] > max_part_weights[m.from]; + part_weights[m.from] -= phg.nodeWeight(m.node); + if(from_overloaded && part_weights[m.from] <= max_part_weights[m.from]) + { + overloaded--; + } + const bool to_overloaded = part_weights[m.to] > max_part_weights[m.to]; + part_weights[m.to] += phg.nodeWeight(m.node); + if(!to_overloaded && part_weights[m.to] > max_part_weights[m.to]) + { + overloaded++; } - } - if (current.best_index != 0) { - Prefix& lb = local_best->local(); - lb = std::min(lb, current); + if(overloaded == 0 && gain_sum >= current.gain) + { + Prefix new_prefix = { + gain_sum, i + 1, *std::max_element(part_weights.begin(), part_weights.end()) + }; + current = std::min(current, new_prefix); + } } } - void assign(BalanceAndBestIndexScan& b) { - gain_sum = b.gain_sum; - } - - Prefix finalize(const vec& initial_part_weights) { - Prefix res { 0, 0, *std::max_element(initial_part_weights.begin(), initial_part_weights.end()) }; - for (const Prefix& x : *local_best) { - res = std::min(res, x); - } - return res; + if(current.best_index != 0) + { + Prefix &lb = local_best->local(); + lb = std::min(lb, current); } - }; - - template - HyperedgeWeight GlobalRollback::revertToBestPrefixParallel( - PartitionedHypergraph& phg, FMSharedData& sharedData, - const vec& partWeights, const std::vector& maxPartWeights) { - const MoveID numMoves = sharedData.moveTracker.numPerformedMoves(); - if (numMoves == 0) return 0; - - const vec& move_order = sharedData.moveTracker.moveOrder; + } - recalculateGains(phg, sharedData); - HEAVY_REFINEMENT_ASSERT(verifyGains(phg, sharedData)); + void assign(BalanceAndBestIndexScan &b) { gain_sum = b.gain_sum; } - BalanceAndBestIndexScan s(phg, move_order, partWeights, maxPartWeights); - // TODO set grain size in blocked_range? to avoid too many copies of part weights array. experiment with different values - tbb::parallel_scan(tbb::blocked_range(0, numMoves), s); - typename BalanceAndBestIndexScan::Prefix b = s.finalize(partWeights); + Prefix finalize(const vec &initial_part_weights) + { + Prefix res{ + 0, 0, *std::max_element(initial_part_weights.begin(), initial_part_weights.end()) + }; + for(const Prefix &x : *local_best) + { + res = std::min(res, x); + } + return res; + } +}; + +template +HyperedgeWeight GlobalRollback::revertToBestPrefixParallel( + PartitionedHypergraph &phg, FMSharedData &sharedData, + const vec &partWeights, + const std::vector &maxPartWeights) +{ + const MoveID numMoves = sharedData.moveTracker.numPerformedMoves(); + if(numMoves == 0) + return 0; + + const vec &move_order = sharedData.moveTracker.moveOrder; + + recalculateGains(phg, sharedData); + HEAVY_REFINEMENT_ASSERT(verifyGains(phg, sharedData)); + + BalanceAndBestIndexScan s(phg, move_order, partWeights, + maxPartWeights); + // TODO set grain size in blocked_range? to avoid too many copies of part weights array. + // experiment with different values + tbb::parallel_scan(tbb::blocked_range(0, numMoves), s); + typename BalanceAndBestIndexScan::Prefix b = + s.finalize(partWeights); + + tbb::parallel_for(b.best_index, numMoves, [&](const MoveID moveID) { + const Move &m = move_order[moveID]; + if(m.isValid()) + { + moveVertex(phg, m.node, m.to, m.from); + } + }); - tbb::parallel_for(b.best_index, numMoves, [&](const MoveID moveID) { - const Move& m = move_order[moveID]; - if (m.isValid()) { - moveVertex(phg, m.node, m.to, m.from); - } + // recompute penalty term values since they are potentially invalid + if constexpr(GainCache::invalidates_entries) + { + tbb::parallel_for(MoveID(0), numMoves, [&](const MoveID i) { + gain_cache.recomputeInvalidTerms(phg, move_order[i].node); }); + } - // recompute penalty term values since they are potentially invalid - if constexpr (GainCache::invalidates_entries) { - tbb::parallel_for(MoveID(0), numMoves, [&](const MoveID i) { - gain_cache.recomputeInvalidTerms(phg, move_order[i].node); - }); - } - - sharedData.moveTracker.reset(); + sharedData.moveTracker.reset(); - HEAVY_REFINEMENT_ASSERT(phg.checkTrackedPartitionInformation(gain_cache)); - return b.gain; - } + HEAVY_REFINEMENT_ASSERT(phg.checkTrackedPartitionInformation(gain_cache)); + return b.gain; +} - template - void GlobalRollback::recalculateGainForHyperedge(PartitionedHypergraph& phg, - FMSharedData& sharedData, - const HyperedgeID& e) { - GlobalMoveTracker& tracker = sharedData.moveTracker; - auto& r = ets_recalc_data.local(); - - // compute auxiliary data - for (HypernodeID v : phg.pins(e)) { - if (tracker.wasNodeMovedInThisRound(v)) { - const MoveID m_id = tracker.moveOfNode[v]; - const Move& m = tracker.getMove(m_id); - Rollback::updateMove(m_id, m, r); - // no change for remaining pins! - } else { - Rollback::updateNonMovedPinInBlock(phg.partID(v), r); - } +template +void GlobalRollback::recalculateGainForHyperedge( + PartitionedHypergraph &phg, FMSharedData &sharedData, const HyperedgeID &e) +{ + GlobalMoveTracker &tracker = sharedData.moveTracker; + auto &r = ets_recalc_data.local(); + + // compute auxiliary data + for(HypernodeID v : phg.pins(e)) + { + if(tracker.wasNodeMovedInThisRound(v)) + { + const MoveID m_id = tracker.moveOfNode[v]; + const Move &m = tracker.getMove(m_id); + Rollback::updateMove(m_id, m, r); + // no change for remaining pins! } + else + { + Rollback::updateNonMovedPinInBlock(phg.partID(v), r); + } + } - // distribute gains to pins - for (HypernodeID v : phg.pins(e)) { - if (tracker.wasNodeMovedInThisRound(v)) { - const MoveID m_id = tracker.moveOfNode[v]; - Move& m = tracker.getMove(m_id); + // distribute gains to pins + for(HypernodeID v : phg.pins(e)) + { + if(tracker.wasNodeMovedInThisRound(v)) + { + const MoveID m_id = tracker.moveOfNode[v]; + Move &m = tracker.getMove(m_id); - const HyperedgeWeight benefit = Rollback::benefit(phg, e, m_id, m, r);; - const HyperedgeWeight penalty = Rollback::penalty(phg, e, m_id, m, r); + const HyperedgeWeight benefit = Rollback::benefit(phg, e, m_id, m, r); + ; + const HyperedgeWeight penalty = Rollback::penalty(phg, e, m_id, m, r); - if ( benefit > 0 ) { - // increase gain of v by benefit - __atomic_fetch_add(&m.gain, benefit, __ATOMIC_RELAXED); - } + if(benefit > 0) + { + // increase gain of v by benefit + __atomic_fetch_add(&m.gain, benefit, __ATOMIC_RELAXED); + } - if ( penalty > 0 ) { - // decrease gain of v by penalty - __atomic_fetch_sub(&m.gain, penalty, __ATOMIC_RELAXED); - } + if(penalty > 0) + { + // decrease gain of v by penalty + __atomic_fetch_sub(&m.gain, penalty, __ATOMIC_RELAXED); } } + } - if (context.partition.k <= static_cast(2 * phg.edgeSize(e))) { - // this branch is an optimization. in case it is cheaper to iterate over the parts, do that - for (PartitionID i = 0; i < context.partition.k; ++i) { - r[i].reset(); + if(context.partition.k <= static_cast(2 * phg.edgeSize(e))) + { + // this branch is an optimization. in case it is cheaper to iterate over the parts, do + // that + for(PartitionID i = 0; i < context.partition.k; ++i) + { + r[i].reset(); + } + } + else + { + for(HypernodeID v : phg.pins(e)) + { + if(tracker.wasNodeMovedInThisRound(v)) + { + const Move &m = tracker.getMove(tracker.moveOfNode[v]); + r[m.from].reset(); + r[m.to].reset(); } - } else { - for (HypernodeID v : phg.pins(e)) { - if (tracker.wasNodeMovedInThisRound(v)) { - const Move& m = tracker.getMove(tracker.moveOfNode[v]); - r[m.from].reset(); - r[m.to].reset(); - } else { - r[phg.partID(v)].reset(); - } + else + { + r[phg.partID(v)].reset(); } } } +} + +template +void GlobalRollback::recalculateGainForHyperedgeViaAttributedGains( + PartitionedHypergraph &phg, FMSharedData &sharedData, const HyperedgeID &e) +{ + GlobalMoveTracker &tracker = sharedData.moveTracker; + ds::Bitset &connectivity_set = phg.deepCopyOfConnectivitySet(e); + ds::PinCountSnapshot pin_counts(phg.k(), phg.hypergraph().maxEdgeSize()); + for(const PartitionID &block : phg.connectivitySet(e)) + { + pin_counts.setPinCountInPart(block, phg.pinCountInPart(e, block)); + } + SynchronizedEdgeUpdate sync_update; + sync_update.he = e; + sync_update.edge_weight = phg.edgeWeight(e); + sync_update.edge_size = phg.edgeSize(e); + sync_update.target_graph = phg.targetGraph(); + sync_update.connectivity_set_after = &connectivity_set; + sync_update.pin_counts_after = &pin_counts; + + // Find all pins of hyperedge that were moved in this round + vec moved_pins; + for(const HypernodeID &pin : phg.pins(e)) + { + if(tracker.wasNodeMovedInThisRound(pin)) + { + moved_pins.push_back(pin); + } + } - template - void GlobalRollback::recalculateGainForHyperedgeViaAttributedGains(PartitionedHypergraph& phg, - FMSharedData& sharedData, - const HyperedgeID& e) { - GlobalMoveTracker& tracker = sharedData.moveTracker; - ds::Bitset& connectivity_set = phg.deepCopyOfConnectivitySet(e); - ds::PinCountSnapshot pin_counts(phg.k(), phg.hypergraph().maxEdgeSize()); - for ( const PartitionID& block : phg.connectivitySet(e) ) { - pin_counts.setPinCountInPart(block, phg.pinCountInPart(e, block)); + // Sort moves in decreasing order of execution + // => first entry is the node that was moved last in the hyperedge + std::sort(moved_pins.begin(), moved_pins.end(), + [&](const HypernodeID &lhs, const HypernodeID &rhs) { + return tracker.moveOfNode[lhs] > tracker.moveOfNode[rhs]; + }); + + // Revert moves and compute attributed gain + for(const HypernodeID &u : moved_pins) + { + const MoveID m_id = tracker.moveOfNode[u]; + Move &m = tracker.getMove(m_id); + sync_update.from = m.to; + sync_update.to = m.from; + sync_update.pin_count_in_from_part_after = + pin_counts.decrementPinCountInPart(sync_update.from); + sync_update.pin_count_in_to_part_after = + pin_counts.incrementPinCountInPart(sync_update.to); + if(sync_update.pin_count_in_from_part_after == 0) + { + ASSERT(connectivity_set.isSet(sync_update.from)); + connectivity_set.unset(sync_update.from); } + if(sync_update.pin_count_in_to_part_after == 1) + { + ASSERT(!connectivity_set.isSet(sync_update.to)); + connectivity_set.set(sync_update.to); + } + // This is the gain for reverting the move. + const HyperedgeWeight attributed_gain = AttributedGains::gain(sync_update); + // For recomputed gains, a postive gain means improvement. However, the opposite + // is the case for attributed gains. + __atomic_fetch_add(&m.gain, attributed_gain, __ATOMIC_RELAXED); + } +} + +template +void GlobalRollback::recalculateGainForGraphEdgeViaAttributedGains( + PartitionedHypergraph &phg, FMSharedData &sharedData, const HyperedgeID &e) +{ + if(!phg.isSinglePin(e)) + { + GlobalMoveTracker &tracker = sharedData.moveTracker; SynchronizedEdgeUpdate sync_update; sync_update.he = e; sync_update.edge_weight = phg.edgeWeight(e); sync_update.edge_size = phg.edgeSize(e); sync_update.target_graph = phg.targetGraph(); - sync_update.connectivity_set_after = &connectivity_set; - sync_update.pin_counts_after = &pin_counts; - - // Find all pins of hyperedge that were moved in this round - vec moved_pins; - for ( const HypernodeID& pin : phg.pins(e) ) { - if ( tracker.wasNodeMovedInThisRound(pin) ) { - moved_pins.push_back(pin); - } - } - // Sort moves in decreasing order of execution - // => first entry is the node that was moved last in the hyperedge - std::sort(moved_pins.begin(), moved_pins.end(), - [&](const HypernodeID& lhs, const HypernodeID& rhs) { - return tracker.moveOfNode[lhs] > tracker.moveOfNode[rhs]; - }); - - // Revert moves and compute attributed gain - for ( const HypernodeID& u : moved_pins ) { - const MoveID m_id = tracker.moveOfNode[u]; - Move& m = tracker.getMove(m_id); - sync_update.from = m.to; - sync_update.to = m.from; - sync_update.pin_count_in_from_part_after = pin_counts.decrementPinCountInPart(sync_update.from); - sync_update.pin_count_in_to_part_after = pin_counts.incrementPinCountInPart(sync_update.to); - if ( sync_update.pin_count_in_from_part_after == 0 ) { - ASSERT(connectivity_set.isSet(sync_update.from)); - connectivity_set.unset(sync_update.from); - } - if ( sync_update.pin_count_in_to_part_after == 1 ) { - ASSERT(!connectivity_set.isSet(sync_update.to)); - connectivity_set.set(sync_update.to); - } - // This is the gain for reverting the move. - const HyperedgeWeight attributed_gain = AttributedGains::gain(sync_update); - // For recomputed gains, a postive gain means improvement. However, the opposite - // is the case for attributed gains. - __atomic_fetch_add(&m.gain, attributed_gain, __ATOMIC_RELAXED); + HypernodeID first_move = phg.edgeSource(e); + HypernodeID second_move = phg.edgeTarget(e); + if(!tracker.wasNodeMovedInThisRound(first_move) && + !tracker.wasNodeMovedInThisRound(second_move)) + { + // Both nodes were not moved in this round => nothing to do + return; } - } - - template - void GlobalRollback::recalculateGainForGraphEdgeViaAttributedGains(PartitionedHypergraph& phg, - FMSharedData& sharedData, - const HyperedgeID& e) { - if ( !phg.isSinglePin(e) ) { - GlobalMoveTracker& tracker = sharedData.moveTracker; - SynchronizedEdgeUpdate sync_update; - sync_update.he = e; - sync_update.edge_weight = phg.edgeWeight(e); - sync_update.edge_size = phg.edgeSize(e); - sync_update.target_graph = phg.targetGraph(); - - HypernodeID first_move = phg.edgeSource(e); - HypernodeID second_move = phg.edgeTarget(e); - if ( !tracker.wasNodeMovedInThisRound(first_move) && - !tracker.wasNodeMovedInThisRound(second_move) ) { - // Both nodes were not moved in this round => nothing to do - return; - } else if ( tracker.wasNodeMovedInThisRound(first_move) && - tracker.wasNodeMovedInThisRound(second_move) ) { - if ( tracker.moveOfNode[first_move] > tracker.moveOfNode[second_move] ) { - std::swap(first_move, second_move); - } - } else if ( !tracker.wasNodeMovedInThisRound(first_move) && - tracker.wasNodeMovedInThisRound(second_move) ) { + else if(tracker.wasNodeMovedInThisRound(first_move) && + tracker.wasNodeMovedInThisRound(second_move)) + { + if(tracker.moveOfNode[first_move] > tracker.moveOfNode[second_move]) + { std::swap(first_move, second_move); } + } + else if(!tracker.wasNodeMovedInThisRound(first_move) && + tracker.wasNodeMovedInThisRound(second_move)) + { + std::swap(first_move, second_move); + } - ASSERT(tracker.wasNodeMovedInThisRound(first_move)); - ASSERT(!tracker.wasNodeMovedInThisRound(second_move) || - (tracker.moveOfNode[first_move] < tracker.moveOfNode[second_move])); - Move& first_m = tracker.getMove(tracker.moveOfNode[first_move]); - // sentinel in case second node was not moved - Move tmp_second_m = Move { phg.partID(second_move), - phg.partID(second_move), second_move, 0 }; - Move& second_m = tracker.wasNodeMovedInThisRound(second_move) ? - tracker.getMove(tracker.moveOfNode[second_move]) : tmp_second_m; - - // Compute gain of first move - sync_update.from = first_m.from; - sync_update.to = first_m.to; - sync_update.pin_count_in_from_part_after = - first_m.from == second_m.from ? 1 : 0; - sync_update.pin_count_in_to_part_after = - first_m.to == second_m.from ? 2 : 1; - sync_update.block_of_other_node = second_m.from; + ASSERT(tracker.wasNodeMovedInThisRound(first_move)); + ASSERT(!tracker.wasNodeMovedInThisRound(second_move) || + (tracker.moveOfNode[first_move] < tracker.moveOfNode[second_move])); + Move &first_m = tracker.getMove(tracker.moveOfNode[first_move]); + // sentinel in case second node was not moved + Move tmp_second_m = + Move{ phg.partID(second_move), phg.partID(second_move), second_move, 0 }; + Move &second_m = tracker.wasNodeMovedInThisRound(second_move) ? + tracker.getMove(tracker.moveOfNode[second_move]) : + tmp_second_m; + + // Compute gain of first move + sync_update.from = first_m.from; + sync_update.to = first_m.to; + sync_update.pin_count_in_from_part_after = first_m.from == second_m.from ? 1 : 0; + sync_update.pin_count_in_to_part_after = first_m.to == second_m.from ? 2 : 1; + sync_update.block_of_other_node = second_m.from; + const HyperedgeWeight attributed_gain = AttributedGains::gain(sync_update); + __atomic_fetch_add(&first_m.gain, -attributed_gain, __ATOMIC_RELAXED); + + if(tracker.wasNodeMovedInThisRound(second_move)) + { + // Compute gain of second move + sync_update.from = second_m.from; + sync_update.to = second_m.to; + sync_update.pin_count_in_from_part_after = first_m.to == second_m.from ? 1 : 0; + sync_update.pin_count_in_to_part_after = first_m.to == second_m.to ? 2 : 1; + sync_update.block_of_other_node = first_m.to; const HyperedgeWeight attributed_gain = AttributedGains::gain(sync_update); - __atomic_fetch_add(&first_m.gain, -attributed_gain, __ATOMIC_RELAXED); - - if ( tracker.wasNodeMovedInThisRound(second_move) ) { - // Compute gain of second move - sync_update.from = second_m.from; - sync_update.to = second_m.to; - sync_update.pin_count_in_from_part_after = - first_m.to == second_m.from ? 1 : 0; - sync_update.pin_count_in_to_part_after = - first_m.to == second_m.to ? 2 : 1; - sync_update.block_of_other_node = first_m.to; - const HyperedgeWeight attributed_gain = AttributedGains::gain(sync_update); - __atomic_fetch_add(&second_m.gain, -attributed_gain, __ATOMIC_RELAXED); - } + __atomic_fetch_add(&second_m.gain, -attributed_gain, __ATOMIC_RELAXED); } } +} - template - void GlobalRollback::recalculateGains(PartitionedHypergraph& phg, FMSharedData& sharedData) { - GlobalMoveTracker& tracker = sharedData.moveTracker; - - auto recalculate_and_distribute_for_hyperedge = [&](const HyperedgeID e) { - if constexpr ( Rollback::supports_parallel_rollback ) { - recalculateGainForHyperedge(phg, sharedData, e); - } else { - if constexpr ( PartitionedHypergraph::is_graph ) { - recalculateGainForGraphEdgeViaAttributedGains(phg, sharedData, e); - } else { - recalculateGainForHyperedgeViaAttributedGains(phg, sharedData, e); - } - } - }; +template +void GlobalRollback::recalculateGains(PartitionedHypergraph &phg, + FMSharedData &sharedData) +{ + GlobalMoveTracker &tracker = sharedData.moveTracker; - tbb::parallel_for(MoveID(0), tracker.numPerformedMoves(), [&](MoveID m_id) { - tracker.moveOrder[m_id].gain = 0; - }); + auto recalculate_and_distribute_for_hyperedge = [&](const HyperedgeID e) { + if constexpr(Rollback::supports_parallel_rollback) + { + recalculateGainForHyperedge(phg, sharedData, e); + } + else + { + if constexpr(PartitionedHypergraph::is_graph) + { + recalculateGainForGraphEdgeViaAttributedGains(phg, sharedData, e); + } + else + { + recalculateGainForHyperedgeViaAttributedGains(phg, sharedData, e); + } + } + }; - if (context.refinement.fm.iter_moves_on_recalc) { - tbb::parallel_for(0U, sharedData.moveTracker.numPerformedMoves(), [&](const MoveID local_move_id) { - const HypernodeID u = sharedData.moveTracker.moveOrder[local_move_id].node; - if (tracker.wasNodeMovedInThisRound(u)) { - for (HyperedgeID e : phg.incidentEdges(u)) { - // test-and-set whether this is the first time this hyperedge is encountered - uint32_t expected = last_recalc_round[phg.uniqueEdgeID(e)].load(std::memory_order_relaxed); - if (expected < round && last_recalc_round[phg.uniqueEdgeID(e)].exchange(round, std::memory_order_acquire) == expected) { - recalculate_and_distribute_for_hyperedge(e); + tbb::parallel_for(MoveID(0), tracker.numPerformedMoves(), + [&](MoveID m_id) { tracker.moveOrder[m_id].gain = 0; }); + + if(context.refinement.fm.iter_moves_on_recalc) + { + tbb::parallel_for( + 0U, sharedData.moveTracker.numPerformedMoves(), [&](const MoveID local_move_id) { + const HypernodeID u = sharedData.moveTracker.moveOrder[local_move_id].node; + if(tracker.wasNodeMovedInThisRound(u)) + { + for(HyperedgeID e : phg.incidentEdges(u)) + { + // test-and-set whether this is the first time this hyperedge is encountered + uint32_t expected = + last_recalc_round[phg.uniqueEdgeID(e)].load(std::memory_order_relaxed); + if(expected < round && last_recalc_round[phg.uniqueEdgeID(e)].exchange( + round, std::memory_order_acquire) == expected) + { + recalculate_and_distribute_for_hyperedge(e); + } } } - } - }); + }); - // reset bits - if (++round == std::numeric_limits::max()) { - // should never happen on practical inputs. - last_recalc_round.assign(phg.initialNumEdges(), CAtomic(0)); - } - } else{ - tbb::parallel_for(0U, phg.initialNumEdges(), recalculate_and_distribute_for_hyperedge); + // reset bits + if(++round == std::numeric_limits::max()) + { + // should never happen on practical inputs. + last_recalc_round.assign(phg.initialNumEdges(), CAtomic(0)); } } + else + { + tbb::parallel_for(0U, phg.initialNumEdges(), + recalculate_and_distribute_for_hyperedge); + } +} - template - HyperedgeWeight GlobalRollback::revertToBestPrefixSequential( - PartitionedHypergraph& phg, - FMSharedData& sharedData, - const vec&, - const std::vector& maxPartWeights) { - - GlobalMoveTracker& tracker = sharedData.moveTracker; - const MoveID numMoves = tracker.numPerformedMoves(); - const vec& move_order = tracker.moveOrder; - - // revert all moves - tbb::parallel_for(0U, numMoves, [&](const MoveID localMoveID) { - const Move& m = move_order[localMoveID]; - if (m.isValid()) { - moveVertex(phg, m.node, m.to, m.from); - } - }); +template +HyperedgeWeight GlobalRollback::revertToBestPrefixSequential( + PartitionedHypergraph &phg, FMSharedData &sharedData, const vec &, + const std::vector &maxPartWeights) +{ + GlobalMoveTracker &tracker = sharedData.moveTracker; + const MoveID numMoves = tracker.numPerformedMoves(); + const vec &move_order = tracker.moveOrder; - size_t num_unbalanced_slots = 0; + // revert all moves + tbb::parallel_for(0U, numMoves, [&](const MoveID localMoveID) { + const Move &m = move_order[localMoveID]; + if(m.isValid()) + { + moveVertex(phg, m.node, m.to, m.from); + } + }); - size_t overloaded = 0; - for (PartitionID i = 0; i < context.partition.k; ++i) { - if (phg.partWeight(i) > maxPartWeights[i]) { - overloaded++; - } + size_t overloaded = 0; + for(PartitionID i = 0; i < context.partition.k; ++i) + { + if(phg.partWeight(i) > maxPartWeights[i]) + { + overloaded++; } + } - // roll forward sequentially - Gain best_gain = 0, gain_sum = 0; - MoveID best_index = 0; - auto attributed_gains = [&](const SynchronizedEdgeUpdate& sync_update) { - gain_sum -= AttributedGains::gain(sync_update); - }; - for (MoveID localMoveID = 0; localMoveID < numMoves; ++localMoveID) { - const Move& m = move_order[localMoveID]; - if (!m.isValid()) continue; - - const bool from_overloaded = phg.partWeight(m.from) > maxPartWeights[m.from]; - const bool to_overloaded = phg.partWeight(m.to) > maxPartWeights[m.to]; - phg.changeNodePart(gain_cache, m.node, m.from, m.to, - std::numeric_limits::max(), []{ }, attributed_gains); - if (from_overloaded && phg.partWeight(m.from) <= maxPartWeights[m.from]) { - overloaded--; - } - if (!to_overloaded && phg.partWeight(m.to) > maxPartWeights[m.to]) { - overloaded++; - } + // roll forward sequentially + Gain best_gain = 0, gain_sum = 0; + MoveID best_index = 0; + auto attributed_gains = [&](const SynchronizedEdgeUpdate &sync_update) { + gain_sum -= AttributedGains::gain(sync_update); + }; + for(MoveID localMoveID = 0; localMoveID < numMoves; ++localMoveID) + { + const Move &m = move_order[localMoveID]; + if(!m.isValid()) + continue; + + const bool from_overloaded = phg.partWeight(m.from) > maxPartWeights[m.from]; + const bool to_overloaded = phg.partWeight(m.to) > maxPartWeights[m.to]; + phg.changeNodePart( + gain_cache, m.node, m.from, m.to, std::numeric_limits::max(), + [] {}, attributed_gains); + if(from_overloaded && phg.partWeight(m.from) <= maxPartWeights[m.from]) + { + overloaded--; + } + if(!to_overloaded && phg.partWeight(m.to) > maxPartWeights[m.to]) + { + overloaded++; + } - if (overloaded > 0) { - num_unbalanced_slots++; - } + if(overloaded == 0 && gain_sum > best_gain) + { + best_index = localMoveID + 1; + best_gain = gain_sum; + } + } - if (overloaded == 0 && gain_sum > best_gain) { - best_index = localMoveID + 1; - best_gain = gain_sum; - } + // revert rejected moves again + tbb::parallel_for(best_index, numMoves, [&](const MoveID i) { + const Move &m = move_order[i]; + if(m.isValid()) + { + moveVertex(phg, m.node, m.to, m.from); } + }); - // revert rejected moves again - tbb::parallel_for(best_index, numMoves, [&](const MoveID i) { - const Move& m = move_order[i]; - if (m.isValid()) { - moveVertex(phg, m.node, m.to, m.from); - } + if constexpr(GainCache::invalidates_entries) + { + tbb::parallel_for(0U, numMoves, [&](const MoveID i) { + gain_cache.recomputeInvalidTerms(phg, move_order[i].node); }); + } - if constexpr (GainCache::invalidates_entries) { - tbb::parallel_for(0U, numMoves, [&](const MoveID i) { - gain_cache.recomputeInvalidTerms(phg, move_order[i].node); - }); - } + tracker.reset(); - tracker.reset(); + return best_gain; +} - return best_gain; - } +template +bool GlobalRollback::verifyGains(PartitionedHypergraph &phg, + FMSharedData &sharedData) +{ + vec &move_order = sharedData.moveTracker.moveOrder; + auto recompute_penalty_terms = [&] { + for(MoveID localMoveID = 0; localMoveID < sharedData.moveTracker.numPerformedMoves(); + ++localMoveID) + { + gain_cache.recomputeInvalidTerms(phg, move_order[localMoveID].node); + } + }; - template - bool GlobalRollback::verifyGains(PartitionedHypergraph& phg, FMSharedData& sharedData) { - vec& move_order = sharedData.moveTracker.moveOrder; + recompute_penalty_terms(); + phg.checkTrackedPartitionInformation(gain_cache); - auto recompute_penalty_terms = [&] { - for (MoveID localMoveID = 0; localMoveID < sharedData.moveTracker.numPerformedMoves(); ++localMoveID) { - gain_cache.recomputeInvalidTerms(phg, move_order[localMoveID].node); - } - }; + // revert all moves + for(MoveID localMoveID = 0; localMoveID < sharedData.moveTracker.numPerformedMoves(); + ++localMoveID) + { + const Move &m = sharedData.moveTracker.moveOrder[localMoveID]; + if(m.isValid()) + { + moveVertex(phg, m.node, m.to, m.from); + } + } - recompute_penalty_terms(); - phg.checkTrackedPartitionInformation(gain_cache); + recompute_penalty_terms(); - // revert all moves - for (MoveID localMoveID = 0; localMoveID < sharedData.moveTracker.numPerformedMoves(); ++localMoveID) { - const Move& m = sharedData.moveTracker.moveOrder[localMoveID]; - if (m.isValid()) { - moveVertex(phg, m.node, m.to, m.from); - } - } + // roll forward sequentially and check gains + for(MoveID localMoveID = 0; localMoveID < sharedData.moveTracker.numPerformedMoves(); + ++localMoveID) + { + const Move &m = sharedData.moveTracker.moveOrder[localMoveID]; + if(!m.isValid()) + continue; - recompute_penalty_terms(); - - // roll forward sequentially and check gains - for (MoveID localMoveID = 0; localMoveID < sharedData.moveTracker.numPerformedMoves(); ++localMoveID) { - const Move& m = sharedData.moveTracker.moveOrder[localMoveID]; - if (!m.isValid()) - continue; - - Gain gain = 0; - auto attributed_gains = [&](const SynchronizedEdgeUpdate& sync_update) { - gain -= AttributedGains::gain(sync_update); - }; - - ASSERT(gain_cache.penaltyTerm(m.node, phg.partID(m.node)) == gain_cache.recomputePenaltyTerm(phg, m.node)); - ASSERT(gain_cache.benefitTerm(m.node, m.to) == gain_cache.recomputeBenefitTerm(phg, m.node, m.to)); - const Gain gain_in_cache = gain_cache.gain(m.node, m.from, m.to); - unused(gain_in_cache); - - // const HyperedgeWeight objective_before_move = - // metrics::quality(phg, context, false); - phg.changeNodePart(gain_cache, m.node, m.from, m.to, - std::numeric_limits::max(), []{ }, attributed_gains); - // const HyperedgeWeight objective_after_move = - // metrics::quality(phg, context, false); - - // ASSERT(objective_after_move + gain == objective_before_move, - // V(gain) << V(m.gain) << V(objective_after_move) << V(objective_before_move)); - // ASSERT(objective_after_move + m.gain == objective_before_move, - // V(gain) << V(m.gain) << V(objective_after_move) << V(objective_before_move)); - ASSERT(gain == gain_in_cache); - ASSERT(gain == m.gain, V(gain) << V(m.gain)); - unused(gain); // unused(objective_before_move); unused(objective_after_move); // for release mode - } + Gain gain = 0; + auto attributed_gains = [&](const SynchronizedEdgeUpdate &sync_update) { + gain -= AttributedGains::gain(sync_update); + }; - recompute_penalty_terms(); - return true; + ASSERT(gain_cache.penaltyTerm(m.node, phg.partID(m.node)) == + gain_cache.recomputePenaltyTerm(phg, m.node)); + ASSERT(gain_cache.benefitTerm(m.node, m.to) == + gain_cache.recomputeBenefitTerm(phg, m.node, m.to)); + const Gain gain_in_cache = gain_cache.gain(m.node, m.from, m.to); + unused(gain_in_cache); + + // const HyperedgeWeight objective_before_move = + // metrics::quality(phg, context, false); + phg.changeNodePart( + gain_cache, m.node, m.from, m.to, std::numeric_limits::max(), + [] {}, attributed_gains); + // const HyperedgeWeight objective_after_move = + // metrics::quality(phg, context, false); + + // ASSERT(objective_after_move + gain == objective_before_move, + // V(gain) << V(m.gain) << V(objective_after_move) << V(objective_before_move)); + // ASSERT(objective_after_move + m.gain == objective_before_move, + // V(gain) << V(m.gain) << V(objective_after_move) << V(objective_before_move)); + ASSERT(gain == gain_in_cache); + ASSERT(gain == m.gain, V(gain) << V(m.gain)); + unused(gain); // unused(objective_before_move); unused(objective_after_move); // for + // release mode } - namespace { - #define GLOBAL_ROLLBACK(X) GlobalRollback - } + recompute_penalty_terms(); + return true; +} + +namespace { +#define GLOBAL_ROLLBACK(X) GlobalRollback +} - INSTANTIATE_CLASS_WITH_VALID_TRAITS(GLOBAL_ROLLBACK) +INSTANTIATE_CLASS_WITH_VALID_TRAITS(GLOBAL_ROLLBACK) } diff --git a/mt-kahypar/partition/refinement/fm/global_rollback.h b/mt-kahypar/partition/refinement/fm/global_rollback.h index 950412ad5..bd1b34970 100644 --- a/mt-kahypar/partition/refinement/fm/global_rollback.h +++ b/mt-kahypar/partition/refinement/fm/global_rollback.h @@ -30,11 +30,11 @@ #include "mt-kahypar/partition/refinement/fm/fm_commons.h" - namespace mt_kahypar { -template -class GlobalRollback { +template +class GlobalRollback +{ static constexpr bool enable_heavy_assert = false; using PartitionedHypergraph = typename GraphAndGainTypes::PartitionedHypergraph; @@ -44,77 +44,86 @@ class GlobalRollback { using RecalculationData = typename Rollback::RecalculationData; public: - explicit GlobalRollback(const HyperedgeID num_hyperedges, - const Context& context, - GainCache& gainCache) : - context(context), - gain_cache(gainCache), - max_part_weight_scaling(context.refinement.fm.rollback_balance_violation_factor), - ets_recalc_data([&] { return vec(context.partition.k); }), - last_recalc_round(), - round(1) { - if (context.refinement.fm.iter_moves_on_recalc && context.refinement.fm.rollback_parallel) { + explicit GlobalRollback(const HyperedgeID num_hyperedges, const Context &context, + GainCache &gainCache) : + context(context), + gain_cache(gainCache), + max_part_weight_scaling(context.refinement.fm.rollback_balance_violation_factor), + ets_recalc_data([&] { return vec(context.partition.k); }), + last_recalc_round(), round(1) + { + if(context.refinement.fm.iter_moves_on_recalc && + context.refinement.fm.rollback_parallel) + { last_recalc_round.resize(num_hyperedges, CAtomic(0)); } } - HyperedgeWeight revertToBestPrefix(PartitionedHypergraph& phg, - FMSharedData& sharedData, - const vec& partWeights, - const std::vector& maxPartWeights) { - if (context.refinement.fm.rollback_parallel) { + HyperedgeWeight revertToBestPrefix(PartitionedHypergraph &phg, FMSharedData &sharedData, + const vec &partWeights, + const std::vector &maxPartWeights) + { + if(context.refinement.fm.rollback_parallel) + { return revertToBestPrefixParallel(phg, sharedData, partWeights, maxPartWeights); - } else { + } + else + { return revertToBestPrefixSequential(phg, sharedData, partWeights, maxPartWeights); } } - HyperedgeWeight revertToBestPrefixParallel(PartitionedHypergraph& phg, - FMSharedData& sharedData, - const vec& partWeights, - const std::vector& maxPartWeights); - - void recalculateGainForHyperedge(PartitionedHypergraph& phg, - FMSharedData& sharedData, - const HyperedgeID& he); - void recalculateGainForHyperedgeViaAttributedGains(PartitionedHypergraph& phg, - FMSharedData& sharedData, - const HyperedgeID& he); - void recalculateGainForGraphEdgeViaAttributedGains(PartitionedHypergraph& phg, - FMSharedData& sharedData, - const HyperedgeID& he); - void recalculateGains(PartitionedHypergraph& phg, FMSharedData& sharedData); - - HyperedgeWeight revertToBestPrefixSequential(PartitionedHypergraph& phg, - FMSharedData& sharedData, - const vec&, - const std::vector& maxPartWeights); + HyperedgeWeight + revertToBestPrefixParallel(PartitionedHypergraph &phg, FMSharedData &sharedData, + const vec &partWeights, + const std::vector &maxPartWeights); + + void recalculateGainForHyperedge(PartitionedHypergraph &phg, FMSharedData &sharedData, + const HyperedgeID &he); + void recalculateGainForHyperedgeViaAttributedGains(PartitionedHypergraph &phg, + FMSharedData &sharedData, + const HyperedgeID &he); + void recalculateGainForGraphEdgeViaAttributedGains(PartitionedHypergraph &phg, + FMSharedData &sharedData, + const HyperedgeID &he); + void recalculateGains(PartitionedHypergraph &phg, FMSharedData &sharedData); + + HyperedgeWeight + revertToBestPrefixSequential(PartitionedHypergraph &phg, FMSharedData &sharedData, + const vec &, + const std::vector &maxPartWeights); MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE - void moveVertex(PartitionedHypergraph& phg, HypernodeID u, PartitionID from, PartitionID to) { + void moveVertex(PartitionedHypergraph &phg, HypernodeID u, PartitionID from, + PartitionID to) + { phg.changeNodePart(gain_cache, u, from, to); } - void changeNumberOfBlocks(const PartitionID new_k) { - for ( auto& recalc_data : ets_recalc_data ) { - if ( static_cast(new_k) > recalc_data.size() ) { + void changeNumberOfBlocks(const PartitionID new_k) + { + for(auto &recalc_data : ets_recalc_data) + { + if(static_cast(new_k) > recalc_data.size()) + { recalc_data.resize(new_k); } } } - bool verifyGains(PartitionedHypergraph& phg, FMSharedData& sharedData); + bool verifyGains(PartitionedHypergraph &phg, FMSharedData &sharedData); private: - const Context& context; + const Context &context; - GainCache& gain_cache; + GainCache &gain_cache; - // ! Factor to multiply max part weight with, in order to relax or disable the balance criterion. Set to zero for disabling + // ! Factor to multiply max part weight with, in order to relax or disable the balance + // criterion. Set to zero for disabling double max_part_weight_scaling; - tbb::enumerable_thread_specific< vec > ets_recalc_data; - vec> last_recalc_round; + tbb::enumerable_thread_specific > ets_recalc_data; + vec > last_recalc_round; uint32_t round; }; diff --git a/mt-kahypar/partition/refinement/fm/localized_kway_fm_core.cpp b/mt-kahypar/partition/refinement/fm/localized_kway_fm_core.cpp index 9d5884fe2..9345b8758 100644 --- a/mt-kahypar/partition/refinement/fm/localized_kway_fm_core.cpp +++ b/mt-kahypar/partition/refinement/fm/localized_kway_fm_core.cpp @@ -28,246 +28,308 @@ #include "mt-kahypar/partition/refinement/fm/localized_kway_fm_core.h" #include "mt-kahypar/definitions.h" -#include "mt-kahypar/partition/refinement/gains/gain_definitions.h" #include "mt-kahypar/partition/refinement/fm/strategies/gain_cache_strategy.h" #include "mt-kahypar/partition/refinement/fm/strategies/unconstrained_strategy.h" +#include "mt-kahypar/partition/refinement/gains/gain_definitions.h" namespace mt_kahypar { - template - template - bool LocalizedKWayFM::findMoves(DispatchedFMStrategy& fm_strategy, PartitionedHypergraph& phg, - size_t taskID, size_t numSeeds) { - localMoves.clear(); - thisSearch = ++sharedData.nodeTracker.highestActiveSearchID; - - HypernodeID seedNode; - HypernodeID pushes = 0; - while (pushes < numSeeds && sharedData.refinementNodes.try_pop(seedNode, taskID)) { - if (sharedData.nodeTracker.tryAcquireNode(seedNode, thisSearch)) { - fm_strategy.insertIntoPQ(phg, gain_cache, seedNode); - pushes++; - } +template +template +bool LocalizedKWayFM::findMoves(DispatchedFMStrategy &fm_strategy, + PartitionedHypergraph &phg, + size_t taskID, size_t numSeeds) +{ + localMoves.clear(); + thisSearch = ++sharedData.nodeTracker.highestActiveSearchID; + + HypernodeID seedNode; + HypernodeID pushes = 0; + while(pushes < numSeeds && sharedData.refinementNodes.try_pop(seedNode, taskID)) + { + if(sharedData.nodeTracker.tryAcquireNode(seedNode, thisSearch)) + { + fm_strategy.insertIntoPQ(phg, gain_cache, seedNode); + pushes++; } + } - if (pushes > 0) { - deltaPhg.clear(); - deltaPhg.setPartitionedHypergraph(&phg); - delta_gain_cache.clear(); - internalFindMoves(phg, fm_strategy); - return true; - } else { - return false; + if(pushes > 0) + { + deltaPhg.clear(); + deltaPhg.setPartitionedHypergraph(&phg); + delta_gain_cache.clear(); + internalFindMoves(phg, fm_strategy); + return true; + } + else + { + return false; + } +} + +template +MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE std::pair +heaviestPartAndWeight(const Partition &partition, const PartitionID k) +{ + PartitionID p = kInvalidPartition; + HypernodeWeight w = std::numeric_limits::min(); + for(PartitionID i = 0; i < k; ++i) + { + if(partition.partWeight(i) > w) + { + w = partition.partWeight(i); + p = i; } } + return std::make_pair(p, w); +} + +template +template +MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE void +LocalizedKWayFM::acquireOrUpdateNeighbors( + PHG &phg, CACHE &gain_cache, const Move &move, DispatchedFMStrategy &fm_strategy) +{ + auto updateOrAcquire = [&](const HypernodeID v) { + SearchID searchOfV = + sharedData.nodeTracker.searchOfNode[v].load(std::memory_order_relaxed); + if(searchOfV == thisSearch) + { + fm_strategy.updateGain(phg, gain_cache, v, move); + } + else if(sharedData.nodeTracker.tryAcquireNode(v, thisSearch)) + { + fm_strategy.insertIntoPQ(phg, gain_cache, v); + } + }; + + if constexpr(PartitionedHypergraph::is_graph) + { + // simplified case for graphs: neighbors can't be duplicated + for(HyperedgeID e : phg.incidentEdges(move.node)) + { + HypernodeID v = phg.edgeTarget(e); + if(has_fixed_vertices && phg.isFixed(v)) + continue; - template - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE std::pair - heaviestPartAndWeight(const Partition& partition, const PartitionID k) { - PartitionID p = kInvalidPartition; - HypernodeWeight w = std::numeric_limits::min(); - for (PartitionID i = 0; i < k; ++i) { - if (partition.partWeight(i) > w) { - w = partition.partWeight(i); - p = i; - } + updateOrAcquire(v); } - return std::make_pair(p, w); } - - template - template - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE - void LocalizedKWayFM::acquireOrUpdateNeighbors(PHG& phg, CACHE& gain_cache, const Move& move, - DispatchedFMStrategy& fm_strategy) { - auto updateOrAcquire = [&](const HypernodeID v) { - SearchID searchOfV = sharedData.nodeTracker.searchOfNode[v].load(std::memory_order_relaxed); - if (searchOfV == thisSearch) { - fm_strategy.updateGain(phg, gain_cache, v, move); - } else if (sharedData.nodeTracker.tryAcquireNode(v, thisSearch)) { - fm_strategy.insertIntoPQ(phg, gain_cache, v); - } - }; - - if constexpr (PartitionedHypergraph::is_graph) { - // simplified case for graphs: neighbors can't be duplicated - for (HyperedgeID e : phg.incidentEdges(move.node)) { - HypernodeID v = phg.edgeTarget(e); - if ( has_fixed_vertices && phg.isFixed(v) ) continue; - - updateOrAcquire(v); - } - } else { - // Note: only vertices incident to edges with gain changes can become new boundary vertices. - // Vertices that already were boundary vertices, can still be considered later since they are in the task queue - for (HyperedgeID e : edgesWithGainChanges) { - if (phg.edgeSize(e) < context.partition.ignore_hyperedge_size_threshold) { - for (HypernodeID v : phg.pins(e)) { - if ( has_fixed_vertices && phg.isFixed(v) ) continue; - - if (neighborDeduplicator[v] != deduplicationTime) { - updateOrAcquire(v); - neighborDeduplicator[v] = deduplicationTime; - } + else + { + // Note: only vertices incident to edges with gain changes can become new boundary + // vertices. Vertices that already were boundary vertices, can still be considered + // later since they are in the task queue + for(HyperedgeID e : edgesWithGainChanges) + { + if(phg.edgeSize(e) < context.partition.ignore_hyperedge_size_threshold) + { + for(HypernodeID v : phg.pins(e)) + { + if(has_fixed_vertices && phg.isFixed(v)) + continue; + + if(neighborDeduplicator[v] != deduplicationTime) + { + updateOrAcquire(v); + neighborDeduplicator[v] = deduplicationTime; } } } + } - if (++deduplicationTime == 0) { - neighborDeduplicator.assign(neighborDeduplicator.size(), 0); - deduplicationTime = 1; - } + if(++deduplicationTime == 0) + { + neighborDeduplicator.assign(neighborDeduplicator.size(), 0); + deduplicationTime = 1; } } +} + +template +template +void LocalizedKWayFM::internalFindMoves( + PartitionedHypergraph &phg, DispatchedFMStrategy &fm_strategy) +{ + StopRule stopRule(phg.initialNumNodes()); + Move move; + + Gain estimatedImprovement = 0; + Gain bestImprovement = 0; + + HypernodeWeight heaviestPartWeight = 0; + HypernodeWeight fromWeight = 0, toWeight = 0; + + while(!stopRule.searchShouldStop() && + sharedData.finishedTasks.load(std::memory_order_relaxed) < + sharedData.finishedTasksLimit) + { + + if(!fm_strategy.findNextMove(deltaPhg, delta_gain_cache, move)) + break; + sharedData.nodeTracker.deactivateNode(move.node, thisSearch); + + // skip if no target block available + if(move.to == kInvalidPartition) + { + continue; + } + bool expect_improvement = estimatedImprovement + move.gain > bestImprovement; + bool high_deg = + phg.nodeDegree(move.node) >= PartitionedHypergraph::HIGH_DEGREE_THRESHOLD; - template - template - void LocalizedKWayFM::internalFindMoves(PartitionedHypergraph& phg, - DispatchedFMStrategy& fm_strategy) { - StopRule stopRule(phg.initialNumNodes()); - Move move; - - Gain estimatedImprovement = 0; - Gain bestImprovement = 0; - - HypernodeWeight heaviestPartWeight = 0; - HypernodeWeight fromWeight = 0, toWeight = 0; - - while (!stopRule.searchShouldStop() - && sharedData.finishedTasks.load(std::memory_order_relaxed) < sharedData.finishedTasksLimit) { + // skip if high degree (unless it nets actual improvement; but don't apply on deltaPhg + // then) + if(!expect_improvement && high_deg) + { + continue; + } - if (!fm_strategy.findNextMove(deltaPhg, delta_gain_cache, move)) break; - sharedData.nodeTracker.deactivateNode(move.node, thisSearch); + edgesWithGainChanges + .clear(); // clear before move. delta_func feeds nets of moved vertex. + MoveID move_id = std::numeric_limits::max(); + bool moved = false; + const HypernodeWeight allowed_weight = + DispatchedFMStrategy::is_unconstrained ? + std::numeric_limits::max() : + context.partition.max_part_weights[move.to]; + + heaviestPartWeight = heaviestPartAndWeight(deltaPhg, context.partition.k).second; + fromWeight = deltaPhg.partWeight(move.from); + toWeight = deltaPhg.partWeight(move.to); + if(expect_improvement) + { + // since we will flush the move sequence, don't bother running it through the + // deltaPhg this is intended to allow moving high deg nodes (blow up hash tables) if + // they give an improvement. The nets affected by a gain cache update are collected + // when we apply this improvement on the global partition (used to expand the + // localized search and update the gain values). + moved = toWeight + phg.nodeWeight(move.node) <= allowed_weight; + } + else + { + moved = deltaPhg.changeNodePart( + move.node, move.from, move.to, allowed_weight, + [&](const SynchronizedEdgeUpdate &sync_update) { + if(!PartitionedHypergraph::is_graph && + GainCache::triggersDeltaGainUpdate(sync_update)) + { + edgesWithGainChanges.push_back(sync_update.he); + } + delta_gain_cache.deltaGainUpdate(deltaPhg, sync_update); + }); + fm_strategy.applyMove(deltaPhg, delta_gain_cache, move); + } - // skip if no target block available - if (move.to == kInvalidPartition) { - continue; + if(moved) + { + estimatedImprovement += move.gain; + localMoves.emplace_back(move, move_id); + stopRule.update(move.gain); + bool improved_km1 = estimatedImprovement > bestImprovement; + bool improved_balance_less_equal_km1 = + estimatedImprovement >= bestImprovement && fromWeight == heaviestPartWeight && + toWeight + phg.nodeWeight(move.node) < heaviestPartWeight; + if(improved_km1 || improved_balance_less_equal_km1) + { + // Apply move sequence to global partition + for(size_t i = 0; i < localMoves.size(); ++i) + { + const Move &local_move = localMoves[i].first; + phg.changeNodePart( + gain_cache, local_move.node, local_move.from, local_move.to, + std::numeric_limits::max(), + [&] { sharedData.moveTracker.insertMove(local_move); }, + [&](const SynchronizedEdgeUpdate &) {}); + } + localMoves.clear(); + fm_strategy.flushLocalChanges(); + stopRule.reset(); + deltaPhg.clear(); // clear hashtables, save memory :) + delta_gain_cache.clear(); + bestImprovement = estimatedImprovement; } - bool expect_improvement = estimatedImprovement + move.gain > bestImprovement; - bool high_deg = phg.nodeDegree(move.node) >= PartitionedHypergraph::HIGH_DEGREE_THRESHOLD; - - // skip if high degree (unless it nets actual improvement; but don't apply on deltaPhg then) - if (!expect_improvement && high_deg) { - continue; + // no need to update our PQs if we stop anyways + if(stopRule.searchShouldStop() || + sharedData.finishedTasks.load(std::memory_order_relaxed) >= + sharedData.finishedTasksLimit) + { + break; } - edgesWithGainChanges.clear(); // clear before move. delta_func feeds nets of moved vertex. - MoveID move_id = std::numeric_limits::max(); - bool moved = false; - const HypernodeWeight allowed_weight = DispatchedFMStrategy::is_unconstrained ? std::numeric_limits::max() - : context.partition.max_part_weights[move.to]; - - heaviestPartWeight = heaviestPartAndWeight(deltaPhg, context.partition.k).second; - fromWeight = deltaPhg.partWeight(move.from); - toWeight = deltaPhg.partWeight(move.to); - if (expect_improvement) { - // since we will flush the move sequence, don't bother running it through the deltaPhg - // this is intended to allow moving high deg nodes (blow up hash tables) if they give an improvement. - // The nets affected by a gain cache update are collected when we apply this improvement on the - // global partition (used to expand the localized search and update the gain values). - moved = toWeight + phg.nodeWeight(move.node) <= allowed_weight; - } else { - moved = deltaPhg.changeNodePart(move.node, move.from, move.to, allowed_weight, - [&](const SynchronizedEdgeUpdate& sync_update) { - if (!PartitionedHypergraph::is_graph && GainCache::triggersDeltaGainUpdate(sync_update)) { - edgesWithGainChanges.push_back(sync_update.he); - } - delta_gain_cache.deltaGainUpdate(deltaPhg, sync_update); - }); - fm_strategy.applyMove(deltaPhg, delta_gain_cache, move); + if(phg.hasFixedVertices()) + { + acquireOrUpdateNeighbors(deltaPhg, delta_gain_cache, move, fm_strategy); } - - if (moved) { - estimatedImprovement += move.gain; - localMoves.emplace_back(move, move_id); - stopRule.update(move.gain); - bool improved_km1 = estimatedImprovement > bestImprovement; - bool improved_balance_less_equal_km1 = estimatedImprovement >= bestImprovement - && fromWeight == heaviestPartWeight - && toWeight + phg.nodeWeight(move.node) < heaviestPartWeight; - if (improved_km1 || improved_balance_less_equal_km1) { - // Apply move sequence to global partition - for (size_t i = 0; i < localMoves.size(); ++i) { - const Move& local_move = localMoves[i].first; - phg.changeNodePart( - gain_cache, local_move.node, local_move.from, local_move.to, - std::numeric_limits::max(), - [&] { sharedData.moveTracker.insertMove(local_move); }, - [&](const SynchronizedEdgeUpdate& ) {}); - } - localMoves.clear(); - fm_strategy.flushLocalChanges(); - stopRule.reset(); - deltaPhg.clear(); // clear hashtables, save memory :) - delta_gain_cache.clear(); - bestImprovement = estimatedImprovement; - } - - // no need to update our PQs if we stop anyways - if (stopRule.searchShouldStop() - || sharedData.finishedTasks.load(std::memory_order_relaxed) >= sharedData.finishedTasksLimit) { - break; - } - - if (phg.hasFixedVertices()) { - acquireOrUpdateNeighbors(deltaPhg, delta_gain_cache, move, fm_strategy); - } else { - acquireOrUpdateNeighbors(deltaPhg, delta_gain_cache, move, fm_strategy); - } - + else + { + acquireOrUpdateNeighbors(deltaPhg, delta_gain_cache, move, fm_strategy); } } - - fm_strategy.reset(); - } - - - template - void LocalizedKWayFM::changeNumberOfBlocks(const PartitionID new_k) { - deltaPhg.changeNumberOfBlocks(new_k); - blockPQ.resize(new_k); - for ( VertexPriorityQueue& pq : vertexPQs ) { - pq.setHandle(sharedData.vertexPQHandles.data(), sharedData.numberOfNodes); - } - while ( static_cast(new_k) > vertexPQs.size() ) { - vertexPQs.emplace_back(sharedData.vertexPQHandles.data(), sharedData.numberOfNodes); - } } - template - void LocalizedKWayFM::memoryConsumption(utils::MemoryTreeNode *parent) const { - ASSERT(parent); - - utils::MemoryTreeNode *localized_fm_node = parent->addChild("Localized k-Way FM"); - - utils::MemoryTreeNode *deduplicator_node = localized_fm_node->addChild("Deduplicator"); - deduplicator_node->updateSize(neighborDeduplicator.capacity() * sizeof(HypernodeID)); - utils::MemoryTreeNode *edges_to_activate_node = localized_fm_node->addChild("edgesWithGainChanges"); - edges_to_activate_node->updateSize(edgesWithGainChanges.capacity() * sizeof(HyperedgeID)); - - size_t vertex_pq_sizes = std::accumulate( - vertexPQs.begin(), vertexPQs.end(), 0, - [](size_t init, const VertexPriorityQueue& pq) { return init + pq.size_in_bytes(); } - ); - localized_fm_node->addChild("PQs", blockPQ.size_in_bytes() + vertex_pq_sizes); - - utils::MemoryTreeNode *local_moves_node = parent->addChild("Local FM Moves"); - local_moves_node->updateSize(localMoves.capacity() * sizeof(std::pair)); - - deltaPhg.memoryConsumption(localized_fm_node); - delta_gain_cache.memoryConsumption(localized_fm_node); + fm_strategy.reset(); +} + +template +void LocalizedKWayFM::changeNumberOfBlocks(const PartitionID new_k) +{ + deltaPhg.changeNumberOfBlocks(new_k); + blockPQ.resize(new_k); + for(VertexPriorityQueue &pq : vertexPQs) + { + pq.setHandle(sharedData.vertexPQHandles.data(), sharedData.numberOfNodes); } - - namespace { - #define LOCALIZED_KWAY_FM(X) LocalizedKWayFM; \ - template bool LocalizedKWayFM::findMoves(LocalUnconstrainedStrategy&, \ - typename LocalizedKWayFM::PartitionedHypergraph&, size_t, size_t); \ - template bool LocalizedKWayFM::findMoves(LocalGainCacheStrategy&, \ - typename LocalizedKWayFM::PartitionedHypergraph&, size_t, size_t) + while(static_cast(new_k) > vertexPQs.size()) + { + vertexPQs.emplace_back(sharedData.vertexPQHandles.data(), sharedData.numberOfNodes); } - - INSTANTIATE_CLASS_WITH_VALID_TRAITS(LOCALIZED_KWAY_FM) - -} // namespace mt_kahypar +} + +template +void LocalizedKWayFM::memoryConsumption( + utils::MemoryTreeNode *parent) const +{ + ASSERT(parent); + + utils::MemoryTreeNode *localized_fm_node = parent->addChild("Localized k-Way FM"); + + utils::MemoryTreeNode *deduplicator_node = localized_fm_node->addChild("Deduplicator"); + deduplicator_node->updateSize(neighborDeduplicator.capacity() * sizeof(HypernodeID)); + utils::MemoryTreeNode *edges_to_activate_node = + localized_fm_node->addChild("edgesWithGainChanges"); + edges_to_activate_node->updateSize(edgesWithGainChanges.capacity() * + sizeof(HyperedgeID)); + + size_t vertex_pq_sizes = + std::accumulate(vertexPQs.begin(), vertexPQs.end(), 0, + [](size_t init, const VertexPriorityQueue &pq) { + return init + pq.size_in_bytes(); + }); + localized_fm_node->addChild("PQs", blockPQ.size_in_bytes() + vertex_pq_sizes); + + utils::MemoryTreeNode *local_moves_node = parent->addChild("Local FM Moves"); + local_moves_node->updateSize(localMoves.capacity() * sizeof(std::pair)); + + deltaPhg.memoryConsumption(localized_fm_node); + delta_gain_cache.memoryConsumption(localized_fm_node); +} + +namespace { +#define LOCALIZED_KWAY_FM(X) \ + LocalizedKWayFM; \ + template bool LocalizedKWayFM::findMoves( \ + LocalUnconstrainedStrategy &, \ + typename LocalizedKWayFM::PartitionedHypergraph &, size_t, size_t); \ + template bool LocalizedKWayFM::findMoves( \ + LocalGainCacheStrategy &, typename LocalizedKWayFM::PartitionedHypergraph &, \ + size_t, size_t) +} + +INSTANTIATE_CLASS_WITH_VALID_TRAITS(LOCALIZED_KWAY_FM) + +} // namespace mt_kahypar diff --git a/mt-kahypar/partition/refinement/fm/localized_kway_fm_core.h b/mt-kahypar/partition/refinement/fm/localized_kway_fm_core.h index aaf009944..03b35ed3a 100644 --- a/mt-kahypar/partition/refinement/fm/localized_kway_fm_core.h +++ b/mt-kahypar/partition/refinement/fm/localized_kway_fm_core.h @@ -25,7 +25,6 @@ * SOFTWARE. ******************************************************************************/ - #pragma once #include @@ -37,89 +36,92 @@ namespace mt_kahypar { - -template -class LocalizedKWayFM { +template +class LocalizedKWayFM +{ public: using PartitionedHypergraph = typename GraphAndGainTypes::PartitionedHypergraph; - private: +private: static constexpr size_t MAP_SIZE_LARGE = 16384; static constexpr size_t MAP_SIZE_MOVE_DELTA = 8192; using GainCache = typename GraphAndGainTypes::GainCache; using DeltaGainCache = typename GraphAndGainTypes::DeltaGainCache; - using DeltaPartitionedHypergraph = typename PartitionedHypergraph::template DeltaPartition; - using BlockPriorityQueue = ds::ExclusiveHandleHeap< ds::MaxHeap >; - using VertexPriorityQueue = ds::MaxHeap; // these need external handles + using DeltaPartitionedHypergraph = + typename PartitionedHypergraph::template DeltaPartition< + DeltaGainCache::requires_connectivity_set>; + using BlockPriorityQueue = ds::ExclusiveHandleHeap >; + using VertexPriorityQueue = + ds::MaxHeap; // these need external handles public: - explicit LocalizedKWayFM(const Context& context, - const HypernodeID numNodes, - FMSharedData& sharedData, - GainCache& gainCache) : - context(context), - thisSearch(0), - deltaPhg(context), - neighborDeduplicator(PartitionedHypergraph::is_graph ? 0 : numNodes, 0), - gain_cache(gainCache), - delta_gain_cache(gainCache), - sharedData(sharedData), - blockPQ(static_cast(context.partition.k)), - vertexPQs(static_cast(context.partition.k), - VertexPriorityQueue(sharedData.vertexPQHandles.data(), sharedData.numberOfNodes)) { + explicit LocalizedKWayFM(const Context &context, const HypernodeID numNodes, + FMSharedData &sharedData, GainCache &gainCache) : + context(context), + thisSearch(0), deltaPhg(context), + neighborDeduplicator(PartitionedHypergraph::is_graph ? 0 : numNodes, 0), + gain_cache(gainCache), delta_gain_cache(gainCache), sharedData(sharedData), + blockPQ(static_cast(context.partition.k)), + vertexPQs(static_cast(context.partition.k), + VertexPriorityQueue(sharedData.vertexPQHandles.data(), + sharedData.numberOfNodes)) + { const bool top_level = context.type == ContextType::main; delta_gain_cache.initialize(top_level ? MAP_SIZE_LARGE : MAP_SIZE_MOVE_DELTA); } - template - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE DispatchedFMStrategy initializeDispatchedStrategy() { + template + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE DispatchedFMStrategy initializeDispatchedStrategy() + { return DispatchedFMStrategy(context, sharedData, blockPQ, vertexPQs); } - template - bool findMoves(DispatchedFMStrategy& fm_strategy, PartitionedHypergraph& phg, size_t taskID, size_t numSeeds); + template + bool findMoves(DispatchedFMStrategy &fm_strategy, PartitionedHypergraph &phg, + size_t taskID, size_t numSeeds); - void memoryConsumption(utils::MemoryTreeNode* parent) const; + void memoryConsumption(utils::MemoryTreeNode *parent) const; void changeNumberOfBlocks(const PartitionID new_k); private: - template - void internalFindMoves(PartitionedHypergraph& phg, DispatchedFMStrategy& fm_strategy); + template + void internalFindMoves(PartitionedHypergraph &phg, DispatchedFMStrategy &fm_strategy); - template - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE - void acquireOrUpdateNeighbors(PHG& phg, CACHE& gain_cache, const Move& move, DispatchedFMStrategy& fm_strategy); + template + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE void + acquireOrUpdateNeighbors(PHG &phg, CACHE &gain_cache, const Move &move, + DispatchedFMStrategy &fm_strategy); - - private: - - const Context& context; +private: + const Context &context; // ! Unique search id associated with the current local search SearchID thisSearch; // ! Local data members required for one localized search run - //FMLocalData localData; - vec< std::pair > localMoves; + // FMLocalData localData; + vec > localMoves; // ! Wrapper around the global partitioned hypergraph, that allows // ! to perform moves non-visible for other local searches DeltaPartitionedHypergraph deltaPhg; - // ! Used after a move. Stores whether a neighbor of the just moved vertex has already been updated. + // ! Used after a move. Stores whether a neighbor of the just moved vertex has already + // been updated. vec neighborDeduplicator; HypernodeID deduplicationTime = 1; // ! Stores hyperedges whose pins's gains may have changed after vertex move vec edgesWithGainChanges; - GainCache& gain_cache; + GainCache &gain_cache; DeltaGainCache delta_gain_cache; - FMSharedData& sharedData; + FMSharedData &sharedData; // ! Priority Queue that contains for each block of the partition // ! the vertex with the best gain value diff --git a/mt-kahypar/partition/refinement/fm/multitry_kway_fm.cpp b/mt-kahypar/partition/refinement/fm/multitry_kway_fm.cpp index ae0b94de7..d3fe79b8d 100644 --- a/mt-kahypar/partition/refinement/fm/multitry_kway_fm.cpp +++ b/mt-kahypar/partition/refinement/fm/multitry_kway_fm.cpp @@ -30,430 +30,517 @@ #include "mt-kahypar/partition/refinement/fm/multitry_kway_fm.h" #include "mt-kahypar/definitions.h" -#include "mt-kahypar/utils/utilities.h" -#include "mt-kahypar/partition/factories.h" // TODO removing this could make compilation a lot faster +#include "mt-kahypar/partition/factories.h" // TODO removing this could make compilation a lot faster #include "mt-kahypar/partition/metrics.h" #include "mt-kahypar/partition/refinement/gains/gain_definitions.h" -#include "mt-kahypar/utils/memory_tree.h" #include "mt-kahypar/utils/cast.h" +#include "mt-kahypar/utils/memory_tree.h" +#include "mt-kahypar/utils/utilities.h" namespace mt_kahypar { - using ds::StreamingVector; - - template - MultiTryKWayFM::MultiTryKWayFM(const HypernodeID num_hypernodes, - const HyperedgeID num_hyperedges, - const Context& c, - GainCache& gainCache, - IRebalancer& rb) : +using ds::StreamingVector; + +template +MultiTryKWayFM::MultiTryKWayFM(const HypernodeID num_hypernodes, + const HyperedgeID num_hyperedges, + const Context &c, GainCache &gainCache, + IRebalancer &rb) : initial_num_nodes(num_hypernodes), - context(c), - gain_cache(gainCache), - current_k(c.partition.k), + context(c), gain_cache(gainCache), current_k(c.partition.k), sharedData(num_hypernodes), - fm_strategy(FMStrategyFactory::getInstance().createObject(context.refinement.fm.algorithm, context, sharedData)), + fm_strategy(FMStrategyFactory::getInstance().createObject( + context.refinement.fm.algorithm, context, sharedData)), globalRollback(num_hyperedges, context, gainCache), ets_fm([&] { return constructLocalizedKWayFMSearch(); }), - tmp_move_order(num_hypernodes), - rebalancer(rb) { - if (context.refinement.fm.obey_minimal_parallelism) { - sharedData.finishedTasksLimit = std::min(UL(8), context.shared_memory.num_threads); - } + tmp_move_order(num_hypernodes), rebalancer(rb) +{ + if(context.refinement.fm.obey_minimal_parallelism) + { + sharedData.finishedTasksLimit = std::min(UL(8), context.shared_memory.num_threads); + } +} + +// helper function for rebalancing +std::vector setupMaxPartWeights(const Context &context) +{ + double max_part_weight_scaling = + context.refinement.fm.rollback_balance_violation_factor; + if(max_part_weight_scaling == 1.0) + { + return context.partition.max_part_weights; } - // helper function for rebalancing - std::vector setupMaxPartWeights(const Context& context) { - double max_part_weight_scaling = context.refinement.fm.rollback_balance_violation_factor; - if (max_part_weight_scaling == 1.0) { - return context.partition.max_part_weights; + std::vector max_part_weights = + context.partition.perfect_balance_part_weights; + if(max_part_weight_scaling == 0.0) + { + for(PartitionID i = 0; i < context.partition.k; ++i) + { + max_part_weights[i] = std::numeric_limits::max(); } - - std::vector max_part_weights = context.partition.perfect_balance_part_weights; - if (max_part_weight_scaling == 0.0) { - for (PartitionID i = 0; i < context.partition.k; ++i) { - max_part_weights[i] = std::numeric_limits::max(); - } - } else { - for (PartitionID i = 0; i < context.partition.k; ++i) { - max_part_weights[i] *= ( 1.0 + context.partition.epsilon * max_part_weight_scaling ); - } + } + else + { + for(PartitionID i = 0; i < context.partition.k; ++i) + { + max_part_weights[i] *= (1.0 + context.partition.epsilon * max_part_weight_scaling); } - return max_part_weights; } + return max_part_weights; +} + +template +bool MultiTryKWayFM::refineImpl( + mt_kahypar_partitioned_hypergraph_t &hypergraph, + const vec &refinement_nodes, Metrics &metrics, const double time_limit) +{ + PartitionedHypergraph &phg = utils::cast(hypergraph); + resizeDataStructuresForCurrentK(); + + Gain overall_improvement = 0; + size_t consecutive_rounds_with_too_little_improvement = 0; + enable_light_fm = false; + sharedData.release_nodes = context.refinement.fm.release_nodes; + double current_time_limit = time_limit; + tbb::task_group tg; + vec initialPartWeights(size_t(context.partition.k)); + std::vector max_part_weights = setupMaxPartWeights(context); + HighResClockTimepoint fm_start = std::chrono::high_resolution_clock::now(); + utils::Timer &timer = utils::Utilities::instance().getTimer(context.utility_id); + + for(size_t round = 0; round < context.refinement.fm.multitry_rounds; ++round) + { // global multi try rounds + for(PartitionID i = 0; i < context.partition.k; ++i) + { + initialPartWeights[i] = phg.partWeight(i); + } - template - bool MultiTryKWayFM::refineImpl(mt_kahypar_partitioned_hypergraph_t& hypergraph, - const vec& refinement_nodes, - Metrics& metrics, - const double time_limit) { - PartitionedHypergraph& phg = utils::cast(hypergraph); - resizeDataStructuresForCurrentK(); - - Gain overall_improvement = 0; - size_t consecutive_rounds_with_too_little_improvement = 0; - enable_light_fm = false; - sharedData.release_nodes = context.refinement.fm.release_nodes; - double current_time_limit = time_limit; - tbb::task_group tg; - vec initialPartWeights(size_t(context.partition.k)); - std::vector max_part_weights = setupMaxPartWeights(context); - HighResClockTimepoint fm_start = std::chrono::high_resolution_clock::now(); - utils::Timer& timer = utils::Utilities::instance().getTimer(context.utility_id); - - for (size_t round = 0; round < context.refinement.fm.multitry_rounds; ++round) { // global multi try rounds - for (PartitionID i = 0; i < context.partition.k; ++i) { - initialPartWeights[i] = phg.partWeight(i); - } + const bool is_unconstrained = fm_strategy->isUnconstrainedRound(round); + if(is_unconstrained) + { + timer.start_timer("initialize_data_unconstrained", "Initialize Data for Unc. FM"); + sharedData.unconstrained.initialize(context, phg, gain_cache); + timer.stop_timer("initialize_data_unconstrained"); + } - const bool is_unconstrained = fm_strategy->isUnconstrainedRound(round); - if (is_unconstrained) { - timer.start_timer("initialize_data_unconstrained", "Initialize Data for Unc. FM"); - sharedData.unconstrained.initialize(context, phg, gain_cache); - timer.stop_timer("initialize_data_unconstrained"); - } + timer.start_timer("collect_border_nodes", "Collect Border Nodes"); + roundInitialization(phg, refinement_nodes); + timer.stop_timer("collect_border_nodes"); - timer.start_timer("collect_border_nodes", "Collect Border Nodes"); - roundInitialization(phg, refinement_nodes); - timer.stop_timer("collect_border_nodes"); + size_t num_border_nodes = sharedData.refinementNodes.unsafe_size(); + if(num_border_nodes == 0) + { + break; + } + size_t num_seeds = context.refinement.fm.num_seed_nodes; + if(context.type == ContextType::main && !refinement_nodes.empty() /* n-level */ + && num_border_nodes < 20 * context.shared_memory.num_threads) + { + num_seeds = num_border_nodes / (4 * context.shared_memory.num_threads); + num_seeds = std::min(num_seeds, context.refinement.fm.num_seed_nodes); + num_seeds = std::max(num_seeds, UL(1)); + } - size_t num_border_nodes = sharedData.refinementNodes.unsafe_size(); - if (num_border_nodes == 0) { - break; + timer.start_timer("find_moves", "Find Moves"); + size_t num_tasks = std::min( + num_border_nodes, size_t(TBBInitializer::instance().total_number_of_threads())); + sharedData.finishedTasks.store(0, std::memory_order_relaxed); + fm_strategy->findMoves(utils::localized_fm_cast(ets_fm), hypergraph, num_tasks, + num_seeds, round); + timer.stop_timer("find_moves"); + + if(is_unconstrained && !isBalanced(phg, max_part_weights)) + { + vec > moves_by_part; + + // compute rebalancing moves + timer.start_timer("rebalance_fm", "Rebalance"); + Metrics tmp_metrics; + ASSERT([&] { // correct quality only required for assertions + tmp_metrics.quality = metrics::quality(phg, context); + return true; + }()); + + if constexpr(GainCache::invalidates_entries) + { + tbb::parallel_for(MoveID(0), sharedData.moveTracker.numPerformedMoves(), + [&](const MoveID i) { + gain_cache.recomputeInvalidTerms( + phg, sharedData.moveTracker.moveOrder[i].node); + }); } - size_t num_seeds = context.refinement.fm.num_seed_nodes; - if (context.type == ContextType::main - && !refinement_nodes.empty() /* n-level */ - && num_border_nodes < 20 * context.shared_memory.num_threads) { - num_seeds = num_border_nodes / (4 * context.shared_memory.num_threads); - num_seeds = std::min(num_seeds, context.refinement.fm.num_seed_nodes); - num_seeds = std::max(num_seeds, UL(1)); - } - - timer.start_timer("find_moves", "Find Moves"); - size_t num_tasks = std::min(num_border_nodes, size_t(TBBInitializer::instance().total_number_of_threads())); - sharedData.finishedTasks.store(0, std::memory_order_relaxed); - fm_strategy->findMoves(utils::localized_fm_cast(ets_fm), hypergraph, - num_tasks, num_seeds, round); - timer.stop_timer("find_moves"); - - if (is_unconstrained && !isBalanced(phg, max_part_weights)) { - vec> moves_by_part; - - // compute rebalancing moves - timer.start_timer("rebalance_fm", "Rebalance"); - Metrics tmp_metrics; - ASSERT([&]{ // correct quality only required for assertions - tmp_metrics.quality = metrics::quality(phg, context); - return true; - }()); - - if constexpr (GainCache::invalidates_entries) { - tbb::parallel_for(MoveID(0), sharedData.moveTracker.numPerformedMoves(), [&](const MoveID i) { - gain_cache.recomputeInvalidTerms(phg, sharedData.moveTracker.moveOrder[i].node); - }); - } - HEAVY_REFINEMENT_ASSERT(phg.checkTrackedPartitionInformation(gain_cache)); - - tmp_metrics.imbalance = metrics::imbalance(phg, context); - rebalancer.refineAndOutputMoves(hypergraph, {}, moves_by_part, tmp_metrics, current_time_limit); - timer.stop_timer("rebalance_fm"); - - if (!moves_by_part.empty()) { - // compute new move sequence where each imbalanced move is immediately rebalanced - interleaveMoveSequenceWithRebalancingMoves(phg, initialPartWeights, max_part_weights, moves_by_part); - } + HEAVY_REFINEMENT_ASSERT(phg.checkTrackedPartitionInformation(gain_cache)); + + tmp_metrics.imbalance = metrics::imbalance(phg, context); + rebalancer.refineAndOutputMoves(hypergraph, {}, moves_by_part, tmp_metrics, + current_time_limit); + timer.stop_timer("rebalance_fm"); + + if(!moves_by_part.empty()) + { + // compute new move sequence where each imbalanced move is immediately rebalanced + interleaveMoveSequenceWithRebalancingMoves(phg, initialPartWeights, + max_part_weights, moves_by_part); } + } - timer.start_timer("rollback", "Rollback to Best Solution"); - HyperedgeWeight improvement = globalRollback.revertToBestPrefix(phg, sharedData, initialPartWeights, max_part_weights); - timer.stop_timer("rollback"); - - const double roundImprovementFraction = improvementFraction(improvement, - metrics.quality - overall_improvement); - overall_improvement += improvement; - if (roundImprovementFraction < context.refinement.fm.min_improvement) { - consecutive_rounds_with_too_little_improvement++; - } else { - consecutive_rounds_with_too_little_improvement = 0; - } - fm_strategy->reportImprovement(round, improvement, roundImprovementFraction); - - HighResClockTimepoint fm_timestamp = std::chrono::high_resolution_clock::now(); - const double elapsed_time = std::chrono::duration(fm_timestamp - fm_start).count(); - if (debug && context.type == ContextType::main) { - LOG << V(round) << V(improvement) << V(metrics::quality(phg, context)) - << V(metrics::imbalance(phg, context)) << V(num_border_nodes) << V(roundImprovementFraction) - << V(elapsed_time) << V(current_time_limit); - } + timer.start_timer("rollback", "Rollback to Best Solution"); + HyperedgeWeight improvement = globalRollback.revertToBestPrefix( + phg, sharedData, initialPartWeights, max_part_weights); + timer.stop_timer("rollback"); + + const double roundImprovementFraction = + improvementFraction(improvement, metrics.quality - overall_improvement); + overall_improvement += improvement; + if(roundImprovementFraction < context.refinement.fm.min_improvement) + { + consecutive_rounds_with_too_little_improvement++; + } + else + { + consecutive_rounds_with_too_little_improvement = 0; + } + fm_strategy->reportImprovement(round, improvement, roundImprovementFraction); + + HighResClockTimepoint fm_timestamp = std::chrono::high_resolution_clock::now(); + const double elapsed_time = + std::chrono::duration(fm_timestamp - fm_start).count(); + if(debug && context.type == ContextType::main) + { + LOG << V(round) << V(improvement) << V(metrics::quality(phg, context)) + << V(metrics::imbalance(phg, context)) << V(num_border_nodes) + << V(roundImprovementFraction) << V(elapsed_time) << V(current_time_limit); + } - // Enforce a time limit (based on k and coarsening time). - // Switch to more "light-weight" FM after reaching it the first time. Abort after second time. - if ( elapsed_time > current_time_limit ) { - if ( !enable_light_fm ) { - DBG << RED << "Multitry FM reached time limit => switch to Light FM Configuration" << END; - sharedData.release_nodes = false; - current_time_limit *= 2; - enable_light_fm = true; - } else { - DBG << RED << "Light version of Multitry FM reached time limit => ABORT" << END; - break; - } + // Enforce a time limit (based on k and coarsening time). + // Switch to more "light-weight" FM after reaching it the first time. Abort after + // second time. + if(elapsed_time > current_time_limit) + { + if(!enable_light_fm) + { + DBG << RED << "Multitry FM reached time limit => switch to Light FM Configuration" + << END; + sharedData.release_nodes = false; + current_time_limit *= 2; + enable_light_fm = true; } - - if ( (improvement <= 0 && (!context.refinement.fm.activate_unconstrained_dynamically || round > 1)) - || consecutive_rounds_with_too_little_improvement >= 2 ) { + else + { + DBG << RED << "Light version of Multitry FM reached time limit => ABORT" << END; break; } } - if (context.partition.show_memory_consumption && context.partition.verbose_output - && context.type == ContextType::main - && phg.initialNumNodes() == sharedData.moveTracker.moveOrder.size() /* top level */) { - printMemoryConsumption(); + if((improvement <= 0 && + (!context.refinement.fm.activate_unconstrained_dynamically || round > 1)) || + consecutive_rounds_with_too_little_improvement >= 2) + { + break; } - - metrics.quality -= overall_improvement; - metrics.imbalance = metrics::imbalance(phg, context); - HEAVY_REFINEMENT_ASSERT(phg.checkTrackedPartitionInformation(gain_cache)); - ASSERT(metrics.quality == metrics::quality(phg, context), - V(metrics.quality) << V(metrics::quality(phg, context))); - - return overall_improvement > 0; } - template - void MultiTryKWayFM::roundInitialization(PartitionedHypergraph& phg, - const vec& refinement_nodes) { - // clear border nodes - sharedData.refinementNodes.clear(); - - if ( refinement_nodes.empty() ) { - // log(n) level case - // iterate over all nodes and insert border nodes into task queue - tbb::parallel_for(tbb::blocked_range(0, phg.initialNumNodes()), - [&](const tbb::blocked_range& r) { - const int task_id = tbb::this_task_arena::current_thread_index(); - // In really rare cases, the tbb::this_task_arena::current_thread_index() - // function a thread id greater than max_concurrency which causes an - // segmentation fault if we do not perform the check here. This is caused by - // our working queue for border nodes with which we initialize the localized - // FM searches. For now, we do not know why this occurs but this prevents - // the segmentation fault. - if ( task_id >= 0 && task_id < TBBInitializer::instance().total_number_of_threads() ) { - for (HypernodeID u = r.begin(); u < r.end(); ++u) { - if (phg.nodeIsEnabled(u) && phg.isBorderNode(u) && !phg.isFixed(u)) { - sharedData.refinementNodes.safe_push(u, task_id); - } - } - } - }); - } else { - // n-level case - tbb::parallel_for(UL(0), refinement_nodes.size(), [&](const size_t i) { - const HypernodeID u = refinement_nodes[i]; - const int task_id = tbb::this_task_arena::current_thread_index(); - if ( task_id >= 0 && task_id < TBBInitializer::instance().total_number_of_threads() ) { - if (phg.nodeIsEnabled(u) && phg.isBorderNode(u) && !phg.isFixed(u)) { - sharedData.refinementNodes.safe_push(u, task_id); - } - } - }); - } - - // shuffle task queue if requested - if (context.refinement.fm.shuffle) { - sharedData.refinementNodes.shuffle(); - } - - // requesting new searches activates all nodes by raising the deactivated node marker - // also clears the array tracking search IDs in case of overflow - sharedData.nodeTracker.requestNewSearches(static_cast(sharedData.refinementNodes.unsafe_size())); + if(context.partition.show_memory_consumption && context.partition.verbose_output && + context.type == ContextType::main && + phg.initialNumNodes() == sharedData.moveTracker.moveOrder.size() /* top level */) + { + printMemoryConsumption(); } - template - void MultiTryKWayFM::interleaveMoveSequenceWithRebalancingMoves( - const PartitionedHypergraph& phg, - const vec& initialPartWeights, - const std::vector& max_part_weights, - vec>& rebalancing_moves_by_part) { - ASSERT(rebalancing_moves_by_part.size() == static_cast(context.partition.k)); - HEAVY_REFINEMENT_ASSERT([&] { - std::set moved_nodes; - for (PartitionID part = 0; part < context.partition.k; ++part) { - for (const Move& m: rebalancing_moves_by_part[part]) { - if (m.from != part || m.to != phg.partID(m.node) || moved_nodes.count(m.node) != 0) { - return false; - } - moved_nodes.insert(m.node); - } - } - return true; - }()); - - GlobalMoveTracker& move_tracker = sharedData.moveTracker; - // Check the rebalancing moves for nodes that are moved twice. Double moves violate the precondition of the global - // rollback, which requires that each node is moved at most once. Thus we "merge" the moves of any node - // that is moved twice (e.g., 0 -> 2 -> 1 becomes 0 -> 1) - for (PartitionID part = 0; part < context.partition.k; ++part) { - vec& moves = rebalancing_moves_by_part[part]; - tbb::parallel_for(UL(0), moves.size(), [&](const size_t i) { - Move& r_move = moves[i]; - if (r_move.isValid() && move_tracker.wasNodeMovedInThisRound(r_move.node)) { - ASSERT(r_move.to == phg.partID(r_move.node)); - Move& first_move = move_tracker.getMove(move_tracker.moveOfNode[r_move.node]); - ASSERT(r_move.node == first_move.node && r_move.from == first_move.to); - if (first_move.from == r_move.to) { - // if rebalancing undid the move, we simply delete it - move_tracker.moveOfNode[r_move.node] = 0; - first_move.invalidate(); - r_move.invalidate(); - } else { - // "merge" the moves - r_move.from = first_move.from; - first_move.invalidate(); - } + metrics.quality -= overall_improvement; + metrics.imbalance = metrics::imbalance(phg, context); + HEAVY_REFINEMENT_ASSERT(phg.checkTrackedPartitionInformation(gain_cache)); + ASSERT(metrics.quality == metrics::quality(phg, context), + V(metrics.quality) << V(metrics::quality(phg, context))); + + return overall_improvement > 0; +} + +template +void MultiTryKWayFM::roundInitialization( + PartitionedHypergraph &phg, const vec &refinement_nodes) +{ + // clear border nodes + sharedData.refinementNodes.clear(); + + if(refinement_nodes.empty()) + { + // log(n) level case + // iterate over all nodes and insert border nodes into task queue + tbb::parallel_for(tbb::blocked_range(0, phg.initialNumNodes()), + [&](const tbb::blocked_range &r) { + const int task_id = tbb::this_task_arena::current_thread_index(); + // In really rare cases, the + // tbb::this_task_arena::current_thread_index() function a thread + // id greater than max_concurrency which causes an segmentation + // fault if we do not perform the check here. This is caused by + // our working queue for border nodes with which we initialize the + // localized FM searches. For now, we do not know why this occurs + // but this prevents the segmentation fault. + if(task_id >= 0 && + task_id < TBBInitializer::instance().total_number_of_threads()) + { + for(HypernodeID u = r.begin(); u < r.end(); ++u) + { + if(phg.nodeIsEnabled(u) && phg.isBorderNode(u) && + !phg.isFixed(u)) + { + sharedData.refinementNodes.safe_push(u, task_id); + } + } + } + }); + } + else + { + // n-level case + tbb::parallel_for(UL(0), refinement_nodes.size(), [&](const size_t i) { + const HypernodeID u = refinement_nodes[i]; + const int task_id = tbb::this_task_arena::current_thread_index(); + if(task_id >= 0 && task_id < TBBInitializer::instance().total_number_of_threads()) + { + if(phg.nodeIsEnabled(u) && phg.isBorderNode(u) && !phg.isFixed(u)) + { + sharedData.refinementNodes.safe_push(u, task_id); } - }, tbb::static_partitioner()); - } - - // NOTE: We re-insert invalid rebalancing moves to ensure the gain cache is updated correctly by the global rollback - // For now we use a sequential implementation, which is probably fast enough (since this is a single scan trough - // the move sequence). We might replace it with a parallel implementation later. - vec current_part_weights = initialPartWeights; - vec current_rebalancing_move_index(context.partition.k, 0); - MoveID next_move_index = 0; - - auto insert_moves_to_balance_part = [&](const PartitionID part) { - if (current_part_weights[part] > max_part_weights[part]) { - insertMovesToBalanceBlock(phg, part, max_part_weights, rebalancing_moves_by_part, - next_move_index, current_part_weights, current_rebalancing_move_index); } - }; + }); + } - // it might be possible that the initial weights are already imbalanced - for (PartitionID part = 0; part < context.partition.k; ++part) { - insert_moves_to_balance_part(part); - } + // shuffle task queue if requested + if(context.refinement.fm.shuffle) + { + sharedData.refinementNodes.shuffle(); + } - const vec& move_order = move_tracker.moveOrder; - const MoveID num_moves = move_tracker.numPerformedMoves(); - for (MoveID move_id = 0; move_id < num_moves; ++move_id) { - const Move& m = move_order[move_id]; - if (m.isValid()) { - const HypernodeWeight hn_weight = phg.nodeWeight(m.node); - current_part_weights[m.from] -= hn_weight; - current_part_weights[m.to] += hn_weight; - tmp_move_order[next_move_index] = m; - ++next_move_index; - // insert rebalancing moves if necessary - insert_moves_to_balance_part(m.to); - } else { - // setting moveOfNode to zero is necessary because, after replacing the move sequence, - // wasNodeMovedInThisRound() could falsely return true otherwise - move_tracker.moveOfNode[m.node] = 0; + // requesting new searches activates all nodes by raising the deactivated node marker + // also clears the array tracking search IDs in case of overflow + sharedData.nodeTracker.requestNewSearches( + static_cast(sharedData.refinementNodes.unsafe_size())); +} + +template +void MultiTryKWayFM::interleaveMoveSequenceWithRebalancingMoves( + const PartitionedHypergraph &phg, const vec &initialPartWeights, + const std::vector &max_part_weights, + vec > &rebalancing_moves_by_part) +{ + ASSERT(rebalancing_moves_by_part.size() == static_cast(context.partition.k)); + HEAVY_REFINEMENT_ASSERT([&] { + std::set moved_nodes; + for(PartitionID part = 0; part < context.partition.k; ++part) + { + for(const Move &m : rebalancing_moves_by_part[part]) + { + if(m.from != part || m.to != phg.partID(m.node) || moved_nodes.count(m.node) != 0) + { + return false; + } + moved_nodes.insert(m.node); } } + return true; + }()); + + GlobalMoveTracker &move_tracker = sharedData.moveTracker; + // Check the rebalancing moves for nodes that are moved twice. Double moves violate the + // precondition of the global rollback, which requires that each node is moved at most + // once. Thus we "merge" the moves of any node that is moved twice (e.g., 0 -> 2 -> 1 + // becomes 0 -> 1) + for(PartitionID part = 0; part < context.partition.k; ++part) + { + vec &moves = rebalancing_moves_by_part[part]; + tbb::parallel_for( + UL(0), moves.size(), + [&](const size_t i) { + Move &r_move = moves[i]; + if(r_move.isValid() && move_tracker.wasNodeMovedInThisRound(r_move.node)) + { + ASSERT(r_move.to == phg.partID(r_move.node)); + Move &first_move = move_tracker.getMove(move_tracker.moveOfNode[r_move.node]); + ASSERT(r_move.node == first_move.node && r_move.from == first_move.to); + if(first_move.from == r_move.to) + { + // if rebalancing undid the move, we simply delete it + move_tracker.moveOfNode[r_move.node] = 0; + first_move.invalidate(); + r_move.invalidate(); + } + else + { + // "merge" the moves + r_move.from = first_move.from; + first_move.invalidate(); + } + } + }, + tbb::static_partitioner()); + } - // append any remaining rebalancing moves (rollback will decide whether to keep them) - for (PartitionID part = 0; part < context.partition.k; ++part) { - while (current_rebalancing_move_index[part] < rebalancing_moves_by_part[part].size()) { - const MoveID move_index_for_part = current_rebalancing_move_index[part]; - const Move& m = rebalancing_moves_by_part[part][move_index_for_part]; - ++current_rebalancing_move_index[part]; - tmp_move_order[next_move_index] = m; - ++next_move_index; - } + // NOTE: We re-insert invalid rebalancing moves to ensure the gain cache is updated + // correctly by the global rollback For now we use a sequential implementation, which is + // probably fast enough (since this is a single scan trough the move sequence). We might + // replace it with a parallel implementation later. + vec current_part_weights = initialPartWeights; + vec current_rebalancing_move_index(context.partition.k, 0); + MoveID next_move_index = 0; + + auto insert_moves_to_balance_part = [&](const PartitionID part) { + if(current_part_weights[part] > max_part_weights[part]) + { + insertMovesToBalanceBlock(phg, part, max_part_weights, rebalancing_moves_by_part, + next_move_index, current_part_weights, + current_rebalancing_move_index); } + }; - // update sharedData - const MoveID first_move_id = move_tracker.firstMoveID; - ASSERT(tmp_move_order.size() == move_tracker.moveOrder.size()); - - std::swap(move_tracker.moveOrder, tmp_move_order); - move_tracker.runningMoveID.store(first_move_id + next_move_index); - tbb::parallel_for(ID(0), next_move_index, [&](const MoveID move_id) { - const Move& m = move_tracker.moveOrder[move_id]; - if (m.isValid()) { - move_tracker.moveOfNode[m.node] = first_move_id + move_id; - } - }, tbb::static_partitioner()); + // it might be possible that the initial weights are already imbalanced + for(PartitionID part = 0; part < context.partition.k; ++part) + { + insert_moves_to_balance_part(part); } - template - void MultiTryKWayFM::insertMovesToBalanceBlock(const PartitionedHypergraph& phg, - const PartitionID block, - const std::vector& max_part_weights, - const vec>& rebalancing_moves_by_part, - MoveID& next_move_index, - vec& current_part_weights, - vec& current_rebalancing_move_index) { - while (current_part_weights[block] > max_part_weights[block] - && current_rebalancing_move_index[block] < rebalancing_moves_by_part[block].size()) { - const MoveID move_index_for_block = current_rebalancing_move_index[block]; - const Move& m = rebalancing_moves_by_part[block][move_index_for_block]; - ++current_rebalancing_move_index[block]; + const vec &move_order = move_tracker.moveOrder; + const MoveID num_moves = move_tracker.numPerformedMoves(); + for(MoveID move_id = 0; move_id < num_moves; ++move_id) + { + const Move &m = move_order[move_id]; + if(m.isValid()) + { + const HypernodeWeight hn_weight = phg.nodeWeight(m.node); + current_part_weights[m.from] -= hn_weight; + current_part_weights[m.to] += hn_weight; tmp_move_order[next_move_index] = m; ++next_move_index; - if (m.isValid()) { - const HypernodeWeight hn_weight = phg.nodeWeight(m.node); - current_part_weights[m.from] -= hn_weight; - current_part_weights[m.to] += hn_weight; - - if (current_part_weights[m.to] > max_part_weights[m.to]) { - // edge case: it is possible that the rebalancing move itself causes new imbalance -> call recursively - insertMovesToBalanceBlock(phg, m.to, max_part_weights, rebalancing_moves_by_part, - next_move_index, current_part_weights, current_rebalancing_move_index); - } - } + // insert rebalancing moves if necessary + insert_moves_to_balance_part(m.to); + } + else + { + // setting moveOfNode to zero is necessary because, after replacing the move + // sequence, wasNodeMovedInThisRound() could falsely return true otherwise + move_tracker.moveOfNode[m.node] = 0; } } - - template - void MultiTryKWayFM::initializeImpl(mt_kahypar_partitioned_hypergraph_t& hypergraph) { - PartitionedHypergraph& phg = utils::cast(hypergraph); - - if (!gain_cache.isInitialized()) { - gain_cache.initializeGainCache(phg); + // append any remaining rebalancing moves (rollback will decide whether to keep them) + for(PartitionID part = 0; part < context.partition.k; ++part) + { + while(current_rebalancing_move_index[part] < rebalancing_moves_by_part[part].size()) + { + const MoveID move_index_for_part = current_rebalancing_move_index[part]; + const Move &m = rebalancing_moves_by_part[part][move_index_for_part]; + ++current_rebalancing_move_index[part]; + tmp_move_order[next_move_index] = m; + ++next_move_index; } } - template - void MultiTryKWayFM::resizeDataStructuresForCurrentK() { - // If the number of blocks changes, we resize data structures - // (can happen during deep multilevel partitioning) - if ( current_k != context.partition.k ) { - current_k = context.partition.k; - // Note that in general changing the number of blocks in the - // global rollback data structure should not resize any data structure - // as we initialize them with the final number of blocks. This is just a fallback - // if someone changes this in the future. - globalRollback.changeNumberOfBlocks(current_k); - sharedData.unconstrained.changeNumberOfBlocks(current_k); - for ( auto& localized_fm : ets_fm ) { - localized_fm.changeNumberOfBlocks(current_k); + // update sharedData + const MoveID first_move_id = move_tracker.firstMoveID; + ASSERT(tmp_move_order.size() == move_tracker.moveOrder.size()); + + std::swap(move_tracker.moveOrder, tmp_move_order); + move_tracker.runningMoveID.store(first_move_id + next_move_index); + tbb::parallel_for( + ID(0), next_move_index, + [&](const MoveID move_id) { + const Move &m = move_tracker.moveOrder[move_id]; + if(m.isValid()) + { + move_tracker.moveOfNode[m.node] = first_move_id + move_id; + } + }, + tbb::static_partitioner()); +} + +template +void MultiTryKWayFM::insertMovesToBalanceBlock( + const PartitionedHypergraph &phg, const PartitionID block, + const std::vector &max_part_weights, + const vec > &rebalancing_moves_by_part, MoveID &next_move_index, + vec ¤t_part_weights, + vec ¤t_rebalancing_move_index) +{ + while(current_part_weights[block] > max_part_weights[block] && + current_rebalancing_move_index[block] < rebalancing_moves_by_part[block].size()) + { + const MoveID move_index_for_block = current_rebalancing_move_index[block]; + const Move &m = rebalancing_moves_by_part[block][move_index_for_block]; + ++current_rebalancing_move_index[block]; + tmp_move_order[next_move_index] = m; + ++next_move_index; + if(m.isValid()) + { + const HypernodeWeight hn_weight = phg.nodeWeight(m.node); + current_part_weights[m.from] -= hn_weight; + current_part_weights[m.to] += hn_weight; + + if(current_part_weights[m.to] > max_part_weights[m.to]) + { + // edge case: it is possible that the rebalancing move itself causes new imbalance + // -> call recursively + insertMovesToBalanceBlock(phg, m.to, max_part_weights, rebalancing_moves_by_part, + next_move_index, current_part_weights, + current_rebalancing_move_index); } - gain_cache.changeNumberOfBlocks(current_k); } } +} - template - void MultiTryKWayFM::printMemoryConsumption() { - utils::MemoryTreeNode fm_memory("Multitry k-Way FM", utils::OutputType::MEGABYTE); +template +void MultiTryKWayFM::initializeImpl( + mt_kahypar_partitioned_hypergraph_t &hypergraph) +{ + PartitionedHypergraph &phg = utils::cast(hypergraph); - for (const auto& fm : ets_fm) { - fm.memoryConsumption(&fm_memory); + if(!gain_cache.isInitialized()) + { + gain_cache.initializeGainCache(phg); + } +} + +template +void MultiTryKWayFM::resizeDataStructuresForCurrentK() +{ + // If the number of blocks changes, we resize data structures + // (can happen during deep multilevel partitioning) + if(current_k != context.partition.k) + { + current_k = context.partition.k; + // Note that in general changing the number of blocks in the + // global rollback data structure should not resize any data structure + // as we initialize them with the final number of blocks. This is just a fallback + // if someone changes this in the future. + globalRollback.changeNumberOfBlocks(current_k); + sharedData.unconstrained.changeNumberOfBlocks(current_k); + for(auto &localized_fm : ets_fm) + { + localized_fm.changeNumberOfBlocks(current_k); } - sharedData.memoryConsumption(&fm_memory); - fm_memory.finalize(); - - LOG << BOLD << "\n FM Memory Consumption" << END; - LOG << fm_memory; + gain_cache.changeNumberOfBlocks(current_k); } +} + +template +void MultiTryKWayFM::printMemoryConsumption() +{ + utils::MemoryTreeNode fm_memory("Multitry k-Way FM", utils::OutputType::MEGABYTE); - namespace { - #define MULTITRY_KWAY_FM(X) MultiTryKWayFM + for(const auto &fm : ets_fm) + { + fm.memoryConsumption(&fm_memory); } + sharedData.memoryConsumption(&fm_memory); + fm_memory.finalize(); + + LOG << BOLD << "\n FM Memory Consumption" << END; + LOG << fm_memory; +} + +namespace { +#define MULTITRY_KWAY_FM(X) MultiTryKWayFM +} - INSTANTIATE_CLASS_WITH_VALID_TRAITS(MULTITRY_KWAY_FM) +INSTANTIATE_CLASS_WITH_VALID_TRAITS(MULTITRY_KWAY_FM) } // namespace mt_kahypar diff --git a/mt-kahypar/partition/refinement/fm/multitry_kway_fm.h b/mt-kahypar/partition/refinement/fm/multitry_kway_fm.h index 94fdf7740..6b2631ba1 100644 --- a/mt-kahypar/partition/refinement/fm/multitry_kway_fm.h +++ b/mt-kahypar/partition/refinement/fm/multitry_kway_fm.h @@ -31,17 +31,18 @@ #include "mt-kahypar/partition/context.h" -#include "mt-kahypar/partition/refinement/i_refiner.h" -#include "mt-kahypar/partition/refinement/i_rebalancer.h" -#include "mt-kahypar/partition/refinement/fm/localized_kway_fm_core.h" #include "mt-kahypar/partition/refinement/fm/global_rollback.h" +#include "mt-kahypar/partition/refinement/fm/localized_kway_fm_core.h" #include "mt-kahypar/partition/refinement/fm/strategies/i_fm_strategy.h" #include "mt-kahypar/partition/refinement/gains/gain_cache_ptr.h" +#include "mt-kahypar/partition/refinement/i_rebalancer.h" +#include "mt-kahypar/partition/refinement/i_refiner.h" namespace mt_kahypar { -template -class MultiTryKWayFM final : public IRefiner { +template +class MultiTryKWayFM final : public IRefiner +{ static constexpr bool debug = false; static constexpr bool enable_heavy_assert = false; @@ -53,63 +54,63 @@ class MultiTryKWayFM final : public IRefiner { static_assert(GainCache::TYPE != GainPolicy::none); - public: +public: + MultiTryKWayFM(const HypernodeID num_hypernodes, const HyperedgeID num_hyperedges, + const Context &c, GainCache &gainCache, IRebalancer &rb); - MultiTryKWayFM(const HypernodeID num_hypernodes, - const HyperedgeID num_hyperedges, - const Context& c, - GainCache& gainCache, - IRebalancer& rb); - - MultiTryKWayFM(const HypernodeID num_hypernodes, - const HyperedgeID num_hyperedges, - const Context& c, - gain_cache_t gainCache, - IRebalancer& rb) : - MultiTryKWayFM(num_hypernodes, num_hyperedges, c, - GainCachePtr::cast(gainCache), rb) { } + MultiTryKWayFM(const HypernodeID num_hypernodes, const HyperedgeID num_hyperedges, + const Context &c, gain_cache_t gainCache, IRebalancer &rb) : + MultiTryKWayFM(num_hypernodes, num_hyperedges, c, + GainCachePtr::cast(gainCache), rb) + { + } void printMemoryConsumption(); - private: - bool refineImpl(mt_kahypar_partitioned_hypergraph_t& phg, - const vec& refinement_nodes, - Metrics& metrics, - double time_limit) final ; +private: + bool refineImpl(mt_kahypar_partitioned_hypergraph_t &phg, + const vec &refinement_nodes, Metrics &metrics, + double time_limit) final; - void initializeImpl(mt_kahypar_partitioned_hypergraph_t& phg) final ; + void initializeImpl(mt_kahypar_partitioned_hypergraph_t &phg) final; - void roundInitialization(PartitionedHypergraph& phg, - const vec& refinement_nodes); + void roundInitialization(PartitionedHypergraph &phg, + const vec &refinement_nodes); - void interleaveMoveSequenceWithRebalancingMoves(const PartitionedHypergraph& phg, - const vec& initialPartWeights, - const std::vector& max_part_weights, - vec>& rebalancing_moves_by_part); + void interleaveMoveSequenceWithRebalancingMoves( + const PartitionedHypergraph &phg, const vec &initialPartWeights, + const std::vector &max_part_weights, + vec > &rebalancing_moves_by_part); - void insertMovesToBalanceBlock(const PartitionedHypergraph& phg, + void insertMovesToBalanceBlock(const PartitionedHypergraph &phg, const PartitionID block, - const std::vector& max_part_weights, - const vec>& rebalancing_moves_by_part, - MoveID& next_move_index, - vec& current_part_weights, - vec& current_rebalancing_move_index); - - bool isBalanced(const PartitionedHypergraph& phg, const std::vector& max_part_weights) { - for (PartitionID i = 0; i < context.partition.k; ++i) { - if (phg.partWeight(i) > max_part_weights[i]) { + const std::vector &max_part_weights, + const vec > &rebalancing_moves_by_part, + MoveID &next_move_index, + vec ¤t_part_weights, + vec ¤t_rebalancing_move_index); + + bool isBalanced(const PartitionedHypergraph &phg, + const std::vector &max_part_weights) + { + for(PartitionID i = 0; i < context.partition.k; ++i) + { + if(phg.partWeight(i) > max_part_weights[i]) + { return false; } } return true; } - LocalizedFMSearch constructLocalizedKWayFMSearch() { + LocalizedFMSearch constructLocalizedKWayFMSearch() + { return LocalizedFMSearch(context, initial_num_nodes, sharedData, gain_cache); } - static double improvementFraction(Gain gain, HyperedgeWeight old_km1) { - if (old_km1 == 0) + static double improvementFraction(Gain gain, HyperedgeWeight old_km1) + { + if(old_km1 == 0) return 0; else return static_cast(gain) / static_cast(old_km1); @@ -119,15 +120,15 @@ class MultiTryKWayFM final : public IRefiner { bool enable_light_fm = false; const HypernodeID initial_num_nodes; - const Context& context; - GainCache& gain_cache; + const Context &context; + GainCache &gain_cache; PartitionID current_k; FMSharedData sharedData; std::unique_ptr fm_strategy; Rollback globalRollback; tbb::enumerable_thread_specific ets_fm; vec tmp_move_order; - IRebalancer& rebalancer; + IRebalancer &rebalancer; }; } // namespace mt_kahypar diff --git a/mt-kahypar/partition/refinement/fm/sequential_twoway_fm_refiner.cpp b/mt-kahypar/partition/refinement/fm/sequential_twoway_fm_refiner.cpp index 568cb29f9..ab5a3b2c1 100644 --- a/mt-kahypar/partition/refinement/fm/sequential_twoway_fm_refiner.cpp +++ b/mt-kahypar/partition/refinement/fm/sequential_twoway_fm_refiner.cpp @@ -32,45 +32,58 @@ namespace mt_kahypar { -template -bool SequentialTwoWayFmRefiner::refine(Metrics& best_metrics, std::mt19937& prng) { +template +bool SequentialTwoWayFmRefiner::refine(Metrics &best_metrics, + std::mt19937 &prng) +{ // Activate all border nodes _pq.clear(); _border_vertices.initialize(_phg); _nodes.clear(); - for (HypernodeID hn : _phg.nodes()) { - if ( !_phg.isFixed(hn) ) { + for(HypernodeID hn : _phg.nodes()) + { + if(!_phg.isFixed(hn)) + { _nodes.push_back(hn); - } else { + } + else + { _vertex_state[hn] = VertexState::MOVED; } } std::shuffle(_nodes.begin(), _nodes.end(), prng); - for ( const HypernodeID& hn : _nodes ) { + for(const HypernodeID &hn : _nodes) + { _vertex_state[hn] = VertexState::INACTIVE; activate(hn); } - for ( const HyperedgeID& he : _phg.edges() ) { + for(const HyperedgeID &he : _phg.edges()) + { _he_state[he] = HEState::FREE; } - auto border_vertex_update = [&](const SynchronizedEdgeUpdate& sync_update) { - if ( sync_update.edge_size > 1 ) { - if ( sync_update.pin_count_in_from_part_after == 0 ) { - _border_vertices.becameNonCutHyperedge(_phg, sync_update.he, _vertex_state); - } else if ( sync_update.pin_count_in_to_part_after == 1 ) { - _border_vertices.becameCutHyperedge(_phg, sync_update.he, _vertex_state); - } - } - }; + auto border_vertex_update = [&](const SynchronizedEdgeUpdate &sync_update) { + if(sync_update.edge_size > 1) + { + if(sync_update.pin_count_in_from_part_after == 0) + { + _border_vertices.becameNonCutHyperedge(_phg, sync_update.he, _vertex_state); + } + else if(sync_update.pin_count_in_to_part_after == 1) + { + _border_vertices.becameCutHyperedge(_phg, sync_update.he, _vertex_state); + } + } + }; parallel::scalable_vector performed_moves; HyperedgeWeight current_cut = best_metrics.quality; double current_imbalance = best_metrics.imbalance; size_t min_cut_idx = 0; StopRule stopping_rule(_phg.initialNumNodes()); - while ( !_pq.empty() && !stopping_rule.searchShouldStop() ) { + while(!_pq.empty() && !stopping_rule.searchShouldStop()) + { ASSERT(_pq.isEnabled(0) || _pq.isEnabled(1)); HEAVY_REFINEMENT_ASSERT(verifyPQState(), "PQ corrupted!"); @@ -88,8 +101,10 @@ bool SequentialTwoWayFmRefiner::refine(Metrics& best_metrics, std::m // Perform vertex move PartitionID from = _phg.partID(hn); _vertex_state[hn] = VertexState::MOVED; - if ( _phg.changeNodePart(hn, from, to, - _context.partition.max_part_weights[to], []{}, border_vertex_update) ) { + if(_phg.changeNodePart( + hn, from, to, _context.partition.max_part_weights[to], [] {}, + border_vertex_update)) + { // Perform delta gain updates updateNeighbors(hn, from, to); @@ -97,47 +112,50 @@ bool SequentialTwoWayFmRefiner::refine(Metrics& best_metrics, std::m // Remove all vertices that became internal from the PQ _border_vertices.doForAllVerticesThatBecameInternalVertices( - [&](const HypernodeID hn) { - ASSERT(!_border_vertices.isBorderNode(hn)); - ASSERT(_vertex_state[hn] == VertexState::ACTIVE); - ASSERT(_pq.contains(hn)); - _pq.remove(hn, 1 - _phg.partID(hn)); - _vertex_state[hn] = VertexState::INACTIVE; - } - ); + [&](const HypernodeID hn) { + ASSERT(!_border_vertices.isBorderNode(hn)); + ASSERT(_vertex_state[hn] == VertexState::ACTIVE); + ASSERT(_pq.contains(hn)); + _pq.remove(hn, 1 - _phg.partID(hn)); + _vertex_state[hn] = VertexState::INACTIVE; + }); // Insert all new border vertices into PQ _border_vertices.doForAllVerticesThatBecameBorderVertices( - [&](const HypernodeID hn) { - ASSERT(_border_vertices.isBorderNode(hn)); - ASSERT(_vertex_state[hn] == VertexState::INACTIVE); - activate(hn); - }); + [&](const HypernodeID hn) { + ASSERT(_border_vertices.isBorderNode(hn)); + ASSERT(_vertex_state[hn] == VertexState::INACTIVE); + activate(hn); + }); performed_moves.push_back(hn); - DBG << "Moved hypernode" << hn << "from block" << from << "to block" << to << "with gain" << gain; + DBG << "Moved hypernode" << hn << "from block" << from << "to block" << to + << "with gain" << gain; current_cut -= gain; current_imbalance = metrics::imbalance(_phg, _context); stopping_rule.update(gain); - const bool improved_cut_within_balance = (current_cut < best_metrics.quality) && - ( _phg.partWeight(0) - <= _context.partition.max_part_weights[0]) && - ( _phg.partWeight(1) - <= _context.partition.max_part_weights[1]); - const bool improved_balance_less_equal_cut = (current_imbalance < best_metrics.imbalance) && - (current_cut <= best_metrics.quality); - const bool move_is_feasible = ( _phg.partWeight(from) > 0) && - ( improved_cut_within_balance || - improved_balance_less_equal_cut ); - if ( move_is_feasible ) { - DBG << GREEN << "2Way FM improved cut from" << best_metrics.quality << "to" << current_cut - << "(Imbalance:" << current_imbalance << ")" << END; + const bool improved_cut_within_balance = + (current_cut < best_metrics.quality) && + (_phg.partWeight(0) <= _context.partition.max_part_weights[0]) && + (_phg.partWeight(1) <= _context.partition.max_part_weights[1]); + const bool improved_balance_less_equal_cut = + (current_imbalance < best_metrics.imbalance) && + (current_cut <= best_metrics.quality); + const bool move_is_feasible = + (_phg.partWeight(from) > 0) && + (improved_cut_within_balance || improved_balance_less_equal_cut); + if(move_is_feasible) + { + DBG << GREEN << "2Way FM improved cut from" << best_metrics.quality << "to" + << current_cut << "(Imbalance:" << current_imbalance << ")" << END; stopping_rule.reset(); best_metrics.quality = current_cut; best_metrics.imbalance = current_imbalance; min_cut_idx = performed_moves.size(); - } else { + } + else + { DBG << RED << "2Way FM decreased cut to" << current_cut << "(Imbalance:" << current_imbalance << ")" << END; } @@ -147,16 +165,20 @@ bool SequentialTwoWayFmRefiner::refine(Metrics& best_metrics, std::m // Perform rollback to best partition found during local search rollback(performed_moves, min_cut_idx); - HEAVY_REFINEMENT_ASSERT(best_metrics.quality == metrics::quality(_phg, Objective::cut, false), - V(best_metrics.quality) << V(metrics::quality(_phg, Objective::cut, false))); + HEAVY_REFINEMENT_ASSERT( + best_metrics.quality == metrics::quality(_phg, Objective::cut, false), + V(best_metrics.quality) << V(metrics::quality(_phg, Objective::cut, false))); HEAVY_REFINEMENT_ASSERT(best_metrics.imbalance == metrics::imbalance(_phg, _context), - V(best_metrics.imbalance) << V(metrics::imbalance(_phg, _context))); + V(best_metrics.imbalance) + << V(metrics::imbalance(_phg, _context))); return min_cut_idx > 0; } -template -void SequentialTwoWayFmRefiner::activate(const HypernodeID hn) { - if ( _border_vertices.isBorderNode(hn) ) { +template +void SequentialTwoWayFmRefiner::activate(const HypernodeID hn) +{ + if(_border_vertices.isBorderNode(hn)) + { ASSERT(_vertex_state[hn] == VertexState::INACTIVE); const PartitionID from = _phg.partID(hn); const PartitionID to = 1 - from; @@ -164,7 +186,8 @@ void SequentialTwoWayFmRefiner::activate(const HypernodeID hn) { ASSERT(!_pq.contains(hn, to), V(hn)); _vertex_state[hn] = VertexState::ACTIVE; _pq.insert(hn, to, computeGain(hn, from, to)); - if ( _phg.partWeight(to) < _context.partition.max_part_weights[to] ) { + if(_phg.partWeight(to) < _context.partition.max_part_weights[to]) + { _pq.enablePart(to); } } @@ -174,22 +197,28 @@ void SequentialTwoWayFmRefiner::activate(const HypernodeID hn) { * Performs delta gain update on all non locked hyperedges and * state transition of hyperedges. */ -template +template void SequentialTwoWayFmRefiner::updateNeighbors(const HypernodeID hn, const PartitionID from, - const PartitionID to) { + const PartitionID to) +{ ASSERT(_phg.partID(hn) == to); - for ( const HyperedgeID& he : _phg.incidentEdges(hn) ) { + for(const HyperedgeID &he : _phg.incidentEdges(hn)) + { const PartitionID he_state = _he_state[he]; - if ( _phg.edgeSize(he) > 1 && he_state != HEState::LOCKED ) { + if(_phg.edgeSize(he) > 1 && he_state != HEState::LOCKED) + { deltaGainUpdate(he, from, to); // State Transition of hyperedge - if ( he_state == HEState::FREE ) { + if(he_state == HEState::FREE) + { // Vertex hn is the first vertex changed its block // in hyperedge he => free -> loose _he_state[he] = to; - } else if ( he_state == from ) { + } + else if(he_state == from) + { // An other vertex already changed its block in opposite direction // => hyperedge he can not be removed from cut any more and therefore // it can not affect the gains of its pins => loose -> locked @@ -200,10 +229,11 @@ void SequentialTwoWayFmRefiner::updateNeighbors(const HypernodeID hn } // ! Delta-Gain Update as decribed in [ParMar06]. -template +template void SequentialTwoWayFmRefiner::deltaGainUpdate(const HyperedgeID he, const PartitionID from, - const PartitionID to) { + const PartitionID to) +{ const HypernodeID pin_count_from_part_after_move = _phg.pinCountInPart(he, from); const HypernodeID pin_count_to_part_after_move = _phg.pinCountInPart(he, to); @@ -212,38 +242,58 @@ void SequentialTwoWayFmRefiner::deltaGainUpdate(const HyperedgeID he const bool increase_necessary = pin_count_from_part_after_move == 1; const bool decrease_necessary = pin_count_to_part_after_move == 2; - if ( he_became_cut_he || he_became_internal_he || - increase_necessary || decrease_necessary ) { + if(he_became_cut_he || he_became_internal_he || increase_necessary || + decrease_necessary) + { ASSERT(_phg.edgeSize(he) != 1, V(he)); const HyperedgeWeight he_weight = _phg.edgeWeight(he); - if (_phg.edgeSize(he) == 2) { - for (const HypernodeID& pin : _phg.pins(he)) { - if ( _vertex_state[pin] == VertexState::ACTIVE ) { + if(_phg.edgeSize(he) == 2) + { + for(const HypernodeID &pin : _phg.pins(he)) + { + if(_vertex_state[pin] == VertexState::ACTIVE) + { const char factor = (_phg.partID(pin) == from ? 2 : -2); updatePin(pin, factor * he_weight); } } - } else if (he_became_cut_he) { - for (const HypernodeID& pin : _phg.pins(he)) { - if ( _vertex_state[pin] == VertexState::ACTIVE ) { + } + else if(he_became_cut_he) + { + for(const HypernodeID &pin : _phg.pins(he)) + { + if(_vertex_state[pin] == VertexState::ACTIVE) + { updatePin(pin, he_weight); } } - } else if (he_became_internal_he) { - for (const HypernodeID& pin : _phg.pins(he)) { - if ( _vertex_state[pin] == VertexState::ACTIVE ) { + } + else if(he_became_internal_he) + { + for(const HypernodeID &pin : _phg.pins(he)) + { + if(_vertex_state[pin] == VertexState::ACTIVE) + { updatePin(pin, -he_weight); } } - } else { - if ( increase_necessary || decrease_necessary ) { - for (const HypernodeID& pin : _phg.pins(he)) { - if ( _phg.partID(pin) == from ) { - if ( increase_necessary && _vertex_state[pin] == VertexState::ACTIVE ) { + } + else + { + if(increase_necessary || decrease_necessary) + { + for(const HypernodeID &pin : _phg.pins(he)) + { + if(_phg.partID(pin) == from) + { + if(increase_necessary && _vertex_state[pin] == VertexState::ACTIVE) + { updatePin(pin, he_weight); } - } else if ( decrease_necessary && _vertex_state[pin] == VertexState::ACTIVE ) { + } + else if(decrease_necessary && _vertex_state[pin] == VertexState::ACTIVE) + { updatePin(pin, -he_weight); } } @@ -252,36 +302,48 @@ void SequentialTwoWayFmRefiner::deltaGainUpdate(const HyperedgeID he } } -template -void SequentialTwoWayFmRefiner::updatePin(const HypernodeID pin, const Gain delta) { +template +void SequentialTwoWayFmRefiner::updatePin(const HypernodeID pin, + const Gain delta) +{ const PartitionID to = 1 - _phg.partID(pin); ASSERT(_vertex_state[pin] == VertexState::ACTIVE, V(pin)); ASSERT(_pq.contains(pin, to), V(pin) << V(to)); _pq.updateKeyBy(pin, to, delta); } -template +template void SequentialTwoWayFmRefiner::updatePQState(const PartitionID from, - const PartitionID to) { - if (_phg.partWeight(to) >= _context.partition.max_part_weights[to] ) { + const PartitionID to) +{ + if(_phg.partWeight(to) >= _context.partition.max_part_weights[to]) + { _pq.disablePart(to); } - if (_phg.partWeight(from) < _context.partition.max_part_weights[from] ) { + if(_phg.partWeight(from) < _context.partition.max_part_weights[from]) + { _pq.enablePart(from); } } -template -Gain SequentialTwoWayFmRefiner::computeGain(const HypernodeID hn, const PartitionID from, const PartitionID to) { +template +Gain SequentialTwoWayFmRefiner::computeGain(const HypernodeID hn, + const PartitionID from, + const PartitionID to) +{ ASSERT(_phg.partID(hn) == from); ASSERT(1 - from == to); Gain gain = 0; - for ( const HyperedgeID& he : _phg.incidentEdges(hn) ) { - if ( _phg.edgeSize(he) > 1 ) { - if ( _phg.pinCountInPart(he, to) == 0 ) { + for(const HyperedgeID &he : _phg.incidentEdges(hn)) + { + if(_phg.edgeSize(he) > 1) + { + if(_phg.pinCountInPart(he, to) == 0) + { gain -= _phg.edgeWeight(he); } - if ( _phg.pinCountInPart(he, from) == 1 ) { + if(_phg.pinCountInPart(he, from) == 1) + { gain += _phg.edgeWeight(he); } } @@ -289,10 +351,13 @@ Gain SequentialTwoWayFmRefiner::computeGain(const HypernodeID hn, co return gain; } -template -void SequentialTwoWayFmRefiner::rollback(const parallel::scalable_vector& performed_moves, - const size_t min_cut_idx) { - for ( size_t i = min_cut_idx; i < performed_moves.size(); ++i ) { +template +void SequentialTwoWayFmRefiner::rollback( + const parallel::scalable_vector &performed_moves, + const size_t min_cut_idx) +{ + for(size_t i = min_cut_idx; i < performed_moves.size(); ++i) + { const HypernodeID hn = performed_moves[i]; const PartitionID from = _phg.partID(hn); const PartitionID to = 1 - from; @@ -300,26 +365,38 @@ void SequentialTwoWayFmRefiner::rollback(const parallel::scalable_ve } } -template -bool SequentialTwoWayFmRefiner::verifyPQState() const { - for ( const HypernodeID& hn : _phg.nodes() ) { +template +bool SequentialTwoWayFmRefiner::verifyPQState() const +{ + for(const HypernodeID &hn : _phg.nodes()) + { const PartitionID to = 1 - _phg.partID(hn); - if ( _border_vertices.isBorderNode(hn) && _vertex_state[hn] != VertexState::MOVED ) { - if ( !_pq.contains(hn, to) ) { + if(_border_vertices.isBorderNode(hn) && _vertex_state[hn] != VertexState::MOVED) + { + if(!_pq.contains(hn, to)) + { LOG << "Hypernode" << hn << "is a border node and should be contained in the PQ"; return false; } - if ( _vertex_state[hn] != VertexState::ACTIVE ) { - LOG << "Hypernode" << hn << "is a border node and its not moved and its state should be ACTIVE"; + if(_vertex_state[hn] != VertexState::ACTIVE) + { + LOG << "Hypernode" << hn + << "is a border node and its not moved and its state should be ACTIVE"; return false; } - } else if ( !_border_vertices.isBorderNode(hn) && _vertex_state[hn] != VertexState::MOVED ) { - if ( _pq.contains(hn, to) ) { - LOG << "Hypernode" << hn << "is not a border node and should be not contained in PQ"; + } + else if(!_border_vertices.isBorderNode(hn) && _vertex_state[hn] != VertexState::MOVED) + { + if(_pq.contains(hn, to)) + { + LOG << "Hypernode" << hn + << "is not a border node and should be not contained in PQ"; return false; } - if ( _vertex_state[hn] != VertexState::INACTIVE ) { - LOG << "Hypernode" << hn << "is not a border node and its not moved and its state should be INACTIVE"; + if(_vertex_state[hn] != VertexState::INACTIVE) + { + LOG << "Hypernode" << hn + << "is not a border node and its not moved and its state should be INACTIVE"; return false; } } diff --git a/mt-kahypar/partition/refinement/fm/sequential_twoway_fm_refiner.h b/mt-kahypar/partition/refinement/fm/sequential_twoway_fm_refiner.h index 99f5acba0..0ee4c63e8 100644 --- a/mt-kahypar/partition/refinement/fm/sequential_twoway_fm_refiner.h +++ b/mt-kahypar/partition/refinement/fm/sequential_twoway_fm_refiner.h @@ -37,18 +37,20 @@ namespace mt_kahypar { /** - * Implements a classical sequential 2-way FM which is similiar to the one implemented in KaHyPar. - * Main difference is that we do not use a gain cache, since we do not want use the 2-way fm refiner - * in a multilevel context. It is used after each bisection during initial partitioning to refine - * a given bipartition. + * Implements a classical sequential 2-way FM which is similiar to the one implemented in + * KaHyPar. Main difference is that we do not use a gain cache, since we do not want use + * the 2-way fm refiner in a multilevel context. It is used after each bisection during + * initial partitioning to refine a given bipartition. */ -template -class SequentialTwoWayFmRefiner { +template +class SequentialTwoWayFmRefiner +{ static constexpr bool debug = false; static constexpr bool enable_heavy_assert = false; - using KWayRefinementPQ = kahypar::ds::KWayPriorityQueue >; + using KWayRefinementPQ = + kahypar::ds::KWayPriorityQueue >; using PartitionedHypergraph = typename TypeTraits::PartitionedHypergraph; /** @@ -58,7 +60,8 @@ class SequentialTwoWayFmRefiner { * the hyperedge becomes LOCKED. LOCKED hyperedges have the property that they can not * be removed from cut and we can therefore skip delta gain updates. */ - enum HEState { + enum HEState + { FREE = std::numeric_limits::max() - 1, LOCKED = std::numeric_limits::max(), }; @@ -68,7 +71,8 @@ class SequentialTwoWayFmRefiner { * ACTIVE = Vertex is a border node and inserted into the PQ * MOVED = Vertex was moved during local search */ - enum class VertexState { + enum class VertexState + { INACTIVE, ACTIVE, MOVED @@ -76,95 +80,113 @@ class SequentialTwoWayFmRefiner { /** * Our partitioned hypergraph data structures does not track to how many cut hyperedges - * a vertex is incident to. This helper class tracks the number of incident cut hyperedges - * and gathers all nodes that became border or internal nodes during the last move on - * the hypergraph, which is required by our 2-way FM implementation, since only border vertices - * are eligble for moving. + * a vertex is incident to. This helper class tracks the number of incident cut + * hyperedges and gathers all nodes that became border or internal nodes during the last + * move on the hypergraph, which is required by our 2-way FM implementation, since only + * border vertices are eligble for moving. */ - class BorderVertexTracker { - - public: - explicit BorderVertexTracker(const HypernodeID& num_hypernodes) : - _num_hypernodes(num_hypernodes), - _num_incident_cut_hes(num_hypernodes, 0), - _hns_to_activate(), - _hns_to_remove_from_pq() { } + class BorderVertexTracker + { + + public: + explicit BorderVertexTracker(const HypernodeID &num_hypernodes) : + _num_hypernodes(num_hypernodes), _num_incident_cut_hes(num_hypernodes, 0), + _hns_to_activate(), _hns_to_remove_from_pq() + { + } - void initialize(const PartitionedHypergraph& phg) { + void initialize(const PartitionedHypergraph &phg) + { reset(); - for ( const HypernodeID& hn : phg.nodes() ) { - ASSERT(hn < _num_hypernodes); - for ( const HyperedgeID& he : phg.incidentEdges(hn) ) { - if ( phg.connectivity(he) > 1 ) { + for(const HypernodeID &hn : phg.nodes()) + { + ASSERT(hn < _num_hypernodes); + for(const HyperedgeID &he : phg.incidentEdges(hn)) + { + if(phg.connectivity(he) > 1) + { ++_num_incident_cut_hes[hn]; } } } } - bool isBorderNode(const HypernodeID hn) const { - ASSERT(hn < _num_hypernodes); + bool isBorderNode(const HypernodeID hn) const + { + ASSERT(hn < _num_hypernodes); return _num_incident_cut_hes[hn] > 0; } - void becameCutHyperedge(const PartitionedHypergraph& phg, - const HyperedgeID he, - const parallel::scalable_vector& vertex_state) { + void becameCutHyperedge(const PartitionedHypergraph &phg, const HyperedgeID he, + const parallel::scalable_vector &vertex_state) + { // assertion doesn't hold for graph structure, because edge pin counts // are not updated until the move is completed - for ( const HypernodeID& pin : phg.pins(he) ) { - ASSERT(pin < _num_hypernodes); + for(const HypernodeID &pin : phg.pins(he)) + { + ASSERT(pin < _num_hypernodes); ASSERT(_num_incident_cut_hes[pin] <= phg.nodeDegree(pin)); ++_num_incident_cut_hes[pin]; - if ( _num_incident_cut_hes[pin] == 1 && vertex_state[pin] == VertexState::INACTIVE ) { + if(_num_incident_cut_hes[pin] == 1 && vertex_state[pin] == VertexState::INACTIVE) + { _hns_to_activate.push_back(pin); } } } - void becameNonCutHyperedge(const PartitionedHypergraph& phg, - const HyperedgeID he, - const parallel::scalable_vector& vertex_state) { + void becameNonCutHyperedge(const PartitionedHypergraph &phg, const HyperedgeID he, + const parallel::scalable_vector &vertex_state) + { // assertion doesn't hold for graph structure, because edge pin counts // are not updated until the move is completed - for ( const HypernodeID& pin : phg.pins(he) ) { - ASSERT(pin < _num_hypernodes); + for(const HypernodeID &pin : phg.pins(he)) + { + ASSERT(pin < _num_hypernodes); ASSERT(_num_incident_cut_hes[pin] > 0); --_num_incident_cut_hes[pin]; - // Note, it is possible that we insert border vertices here, since an other hyperedge - // can become cut after the update. However, we handle this later by an explicit check - // if the vertex is still an internal vertex (see doForAllVerticesThatBecameInternalVertices(...)). - if ( _num_incident_cut_hes[pin] == 0 && vertex_state[pin] == VertexState::ACTIVE ) { + // Note, it is possible that we insert border vertices here, since an other + // hyperedge can become cut after the update. However, we handle this later by an + // explicit check if the vertex is still an internal vertex (see + // doForAllVerticesThatBecameInternalVertices(...)). + if(_num_incident_cut_hes[pin] == 0 && vertex_state[pin] == VertexState::ACTIVE) + { _hns_to_remove_from_pq.push_back(pin); } } } // ! Iterates over all vertices that became border vertices after the last move - template - void doForAllVerticesThatBecameBorderVertices(const F& f) { - for ( const HypernodeID& hn : _hns_to_activate ) { + template + void doForAllVerticesThatBecameBorderVertices(const F &f) + { + for(const HypernodeID &hn : _hns_to_activate) + { f(hn); } _hns_to_activate.clear(); } // ! Iterates over all vertices that became internal vertices after the last move - template - void doForAllVerticesThatBecameInternalVertices(const F& f) { - for ( const HypernodeID& hn : _hns_to_remove_from_pq ) { + template + void doForAllVerticesThatBecameInternalVertices(const F &f) + { + for(const HypernodeID &hn : _hns_to_remove_from_pq) + { // Explicit border vertex check, because vector can contain fales positives // (see becameNonCutHyperedge(...)) - if ( !isBorderNode(hn) ) { + if(!isBorderNode(hn)) + { f(hn); } } _hns_to_remove_from_pq.clear(); } - private: - void reset() { - for ( HypernodeID hn = 0; hn < _num_hypernodes; ++hn ) { + private: + void reset() + { + for(HypernodeID hn = 0; hn < _num_hypernodes; ++hn) + { _num_incident_cut_hes[hn] = 0; } _hns_to_activate.clear(); @@ -177,53 +199,48 @@ class SequentialTwoWayFmRefiner { parallel::scalable_vector _hns_to_remove_from_pq; }; - public: - SequentialTwoWayFmRefiner(PartitionedHypergraph& phg, - const Context& context) : - _phg(phg), - _context(context), - _nodes(), - _pq(context.partition.k), - _border_vertices(phg.initialNumNodes()), - _vertex_state(phg.initialNumNodes(), VertexState::INACTIVE), - _he_state(phg.initialNumEdges(), HEState::FREE) { +public: + SequentialTwoWayFmRefiner(PartitionedHypergraph &phg, const Context &context) : + _phg(phg), _context(context), _nodes(), _pq(context.partition.k), + _border_vertices(phg.initialNumNodes()), + _vertex_state(phg.initialNumNodes(), VertexState::INACTIVE), + _he_state(phg.initialNumEdges(), HEState::FREE) + { ASSERT(_context.partition.k == 2); _pq.initialize(_phg.initialNumNodes()); } - bool refine(Metrics& best_metrics, std::mt19937& prng); - - private: + bool refine(Metrics &best_metrics, std::mt19937 &prng); +private: void activate(const HypernodeID hn); /** * Performs delta gain update on all non locked hyperedges and * state transition of hyperedges. */ - void updateNeighbors(const HypernodeID hn, - const PartitionID from, + void updateNeighbors(const HypernodeID hn, const PartitionID from, const PartitionID to); // ! Delta-Gain Update as decribed in [ParMar06]. - void deltaGainUpdate(const HyperedgeID he, - const PartitionID from, + void deltaGainUpdate(const HyperedgeID he, const PartitionID from, const PartitionID to); - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE void updatePin(const HypernodeID pin, const Gain delta); + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE void updatePin(const HypernodeID pin, + const Gain delta); MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE void updatePQState(const PartitionID from, const PartitionID to); Gain computeGain(const HypernodeID hn, const PartitionID from, const PartitionID to); - void rollback(const parallel::scalable_vector& performed_moves, + void rollback(const parallel::scalable_vector &performed_moves, const size_t min_cut_idx); bool verifyPQState() const; - PartitionedHypergraph& _phg; - const Context& _context; + PartitionedHypergraph &_phg; + const Context &_context; parallel::scalable_vector _nodes; KWayRefinementPQ _pq; diff --git a/mt-kahypar/partition/refinement/fm/stop_rule.h b/mt-kahypar/partition/refinement/fm/stop_rule.h index 4e2dbf1bd..b2f61c3ba 100644 --- a/mt-kahypar/partition/refinement/fm/stop_rule.h +++ b/mt-kahypar/partition/refinement/fm/stop_rule.h @@ -32,22 +32,29 @@ namespace mt_kahypar { // adaptive random walk stopping rule from KaHyPar -class StopRule { +class StopRule +{ public: - StopRule(HypernodeID numNodes) : beta(std::log(numNodes)) { } + StopRule(HypernodeID numNodes) : beta(std::log(numNodes)) {} - bool searchShouldStop() { - return (numSteps > beta) && (Mk == 0 || numSteps >= ( variance / (Mk*Mk) ) * stopFactor ); + bool searchShouldStop() + { + return (numSteps > beta) && + (Mk == 0 || numSteps >= (variance / (Mk * Mk)) * stopFactor); } - void update(Gain gain) { + void update(Gain gain) + { ++numSteps; - if (numSteps == 1) { + if(numSteps == 1) + { Mk = gain; MkPrevious = gain; SkPrevious = 0.0; variance = 0.0; - } else { + } + else + { Mk = MkPrevious + (gain - MkPrevious) / numSteps; Sk = SkPrevious + (gain - MkPrevious) * (gain - Mk); variance = Sk / (numSteps - 1.0); @@ -57,7 +64,8 @@ class StopRule { } } - void reset() { + void reset() + { numSteps = 0; variance = 0.0; } @@ -65,7 +73,7 @@ class StopRule { private: size_t numSteps = 0; double variance = 0.0, Mk = 0.0, MkPrevious = 0.0, Sk = 0.0, SkPrevious = 0.0; - const double alpha = 1.0; // make parameter if it doesn't work well + const double alpha = 1.0; // make parameter if it doesn't work well const double stopFactor = (alpha / 2.0) - 0.25; double beta; }; diff --git a/mt-kahypar/partition/refinement/fm/strategies/gain_cache_strategy.h b/mt-kahypar/partition/refinement/fm/strategies/gain_cache_strategy.h index 09fc6d619..bd6b8f17e 100644 --- a/mt-kahypar/partition/refinement/fm/strategies/gain_cache_strategy.h +++ b/mt-kahypar/partition/refinement/fm/strategies/gain_cache_strategy.h @@ -30,35 +30,40 @@ #include "mt-kahypar/partition/refinement/fm/strategies/i_fm_strategy.h" #include "mt-kahypar/partition/refinement/fm/strategies/local_gain_cache_strategy.h" - namespace mt_kahypar { -template -class GainCacheStrategy: public IFMStrategy { +template +class GainCacheStrategy : public IFMStrategy +{ using Base = IFMStrategy; - public: +public: using LocalFM = LocalizedKWayFM; using PartitionedHypergraph = typename GraphAndGainTypes::PartitionedHypergraph; - GainCacheStrategy(const Context& context, FMSharedData& sharedData): - Base(context, sharedData) { } + GainCacheStrategy(const Context &context, FMSharedData &sharedData) : + Base(context, sharedData) + { + } - bool dispatchedFindMoves(LocalFM& local_fm, PartitionedHypergraph& phg, size_t task_id, size_t num_seeds, size_t) { - LocalGainCacheStrategy local_strategy = local_fm.template initializeDispatchedStrategy(); + bool dispatchedFindMoves(LocalFM &local_fm, PartitionedHypergraph &phg, size_t task_id, + size_t num_seeds, size_t) + { + LocalGainCacheStrategy local_strategy = + local_fm.template initializeDispatchedStrategy(); return local_fm.findMoves(local_strategy, phg, task_id, num_seeds); } - private: - virtual void findMovesImpl(localized_k_way_fm_t local_fm, mt_kahypar_partitioned_hypergraph_t& phg, - size_t num_tasks, size_t num_seeds, size_t round) final { - Base::findMovesWithConcreteStrategy( - local_fm, phg, num_tasks, num_seeds, round); +private: + virtual void findMovesImpl(localized_k_way_fm_t local_fm, + mt_kahypar_partitioned_hypergraph_t &phg, size_t num_tasks, + size_t num_seeds, size_t round) final + { + Base::findMovesWithConcreteStrategy(local_fm, phg, num_tasks, + num_seeds, round); } - virtual bool isUnconstrainedRoundImpl(size_t) const final { - return false; - } + virtual bool isUnconstrainedRoundImpl(size_t) const final { return false; } }; } diff --git a/mt-kahypar/partition/refinement/fm/strategies/i_fm_strategy.h b/mt-kahypar/partition/refinement/fm/strategies/i_fm_strategy.h index 303aa71ed..5170ba50f 100644 --- a/mt-kahypar/partition/refinement/fm/strategies/i_fm_strategy.h +++ b/mt-kahypar/partition/refinement/fm/strategies/i_fm_strategy.h @@ -29,8 +29,8 @@ #include #include "mt-kahypar/datastructures/streaming_vector.h" -#include "mt-kahypar/macros.h" #include "mt-kahypar/definitions.h" +#include "mt-kahypar/macros.h" #include "mt-kahypar/utils/cast.h" namespace mt_kahypar { @@ -38,54 +38,63 @@ namespace mt_kahypar { // TODO: this is still a bit hacky, is there any better way? struct localized_k_way_fm_s; -struct localized_k_way_fm_t { - localized_k_way_fm_s* local_fm; +struct localized_k_way_fm_t +{ + localized_k_way_fm_s *local_fm; mt_kahypar_partition_type_t type; }; namespace utils { // compare cast.h -template -localized_k_way_fm_t localized_fm_cast(tbb::enumerable_thread_specific& local_fm) { - return localized_k_way_fm_t { - reinterpret_cast(&local_fm), LocalFM::PartitionedHypergraph::TYPE }; +template +localized_k_way_fm_t localized_fm_cast(tbb::enumerable_thread_specific &local_fm) +{ + return localized_k_way_fm_t{ reinterpret_cast(&local_fm), + LocalFM::PartitionedHypergraph::TYPE }; } -template -tbb::enumerable_thread_specific& cast(localized_k_way_fm_t fm) { - if ( LocalFM::PartitionedHypergraph::TYPE != fm.type ) { +template +tbb::enumerable_thread_specific &cast(localized_k_way_fm_t fm) +{ + if(LocalFM::PartitionedHypergraph::TYPE != fm.type) + { ERR("Cannot cast local FM [" << typeToString(fm.type) << "to" - << typeToString(LocalFM::PartitionedHypergraph::TYPE) << "]"); + << typeToString(LocalFM::PartitionedHypergraph::TYPE) + << "]"); } - return *reinterpret_cast*>(fm.local_fm); + return *reinterpret_cast *>(fm.local_fm); } } // namespace utils - -class IFMStrategy { - public: +class IFMStrategy +{ +public: // !!! The following declarations should be present in subclasses: // using LocalFM = ...; // using PartitionedHypergraph = ...; - IFMStrategy(const IFMStrategy&) = delete; - IFMStrategy(IFMStrategy&&) = delete; - IFMStrategy & operator= (const IFMStrategy &) = delete; - IFMStrategy & operator= (IFMStrategy &&) = delete; + IFMStrategy(const IFMStrategy &) = delete; + IFMStrategy(IFMStrategy &&) = delete; + IFMStrategy &operator=(const IFMStrategy &) = delete; + IFMStrategy &operator=(IFMStrategy &&) = delete; virtual ~IFMStrategy() = default; - void findMoves(localized_k_way_fm_t local_fm, mt_kahypar_partitioned_hypergraph_t& phg, - size_t num_tasks, size_t num_seeds, size_t round) { + void findMoves(localized_k_way_fm_t local_fm, mt_kahypar_partitioned_hypergraph_t &phg, + size_t num_tasks, size_t num_seeds, size_t round) + { findMovesImpl(local_fm, phg, num_tasks, num_seeds, round); } - bool isUnconstrainedRound(size_t round) const { + bool isUnconstrainedRound(size_t round) const + { return isUnconstrainedRoundImpl(round); } - void reportImprovement(size_t round, Gain absolute_improvement, double relative_improvement) { + void reportImprovement(size_t round, Gain absolute_improvement, + double relative_improvement) + { reportImprovementImpl(round, absolute_improvement, relative_improvement); } @@ -93,45 +102,55 @@ class IFMStrategy { // bool dispatchedFindMoves(LocalFM& local_fm, PartitionedHypergraph& phg, // size_t task_id, size_t num_seeds, size_t round); - protected: - IFMStrategy(const Context& context, FMSharedData& sharedData): - context(context), sharedData(sharedData) { } +protected: + IFMStrategy(const Context &context, FMSharedData &sharedData) : + context(context), sharedData(sharedData) + { + } - template - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE - void findMovesWithConcreteStrategy(localized_k_way_fm_t local_fm, mt_kahypar_partitioned_hypergraph_t& hypergraph, - size_t num_tasks, size_t num_seeds, size_t round) { + template + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE void + findMovesWithConcreteStrategy(localized_k_way_fm_t local_fm, + mt_kahypar_partitioned_hypergraph_t &hypergraph, + size_t num_tasks, size_t num_seeds, size_t round) + { using LocalFM = typename Derived::LocalFM; using PartitionedHypergraph = typename Derived::PartitionedHypergraph; - Derived& concrete_strategy = *static_cast(this); - tbb::enumerable_thread_specific& ets_fm = utils::cast(local_fm); - PartitionedHypergraph& phg = utils::cast(hypergraph); + Derived &concrete_strategy = *static_cast(this); + tbb::enumerable_thread_specific &ets_fm = utils::cast(local_fm); + PartitionedHypergraph &phg = utils::cast(hypergraph); tbb::task_group tg; auto task = [&](const size_t task_id) { - LocalFM& fm = ets_fm.local(); - while(sharedData.finishedTasks.load(std::memory_order_relaxed) < sharedData.finishedTasksLimit - && concrete_strategy.dispatchedFindMoves(fm, phg, task_id, num_seeds, round)) { /* keep running*/ } + LocalFM &fm = ets_fm.local(); + while(sharedData.finishedTasks.load(std::memory_order_relaxed) < + sharedData.finishedTasksLimit && + concrete_strategy.dispatchedFindMoves(fm, phg, task_id, num_seeds, round)) + { /* keep running*/ + } sharedData.finishedTasks.fetch_add(1, std::memory_order_relaxed); }; - for (size_t i = 0; i < num_tasks; ++i) { + for(size_t i = 0; i < num_tasks; ++i) + { tg.run(std::bind(task, i)); } tg.wait(); } - const Context& context; - FMSharedData& sharedData; + const Context &context; + FMSharedData &sharedData; - private: - virtual void findMovesImpl(localized_k_way_fm_t local_fm, mt_kahypar_partitioned_hypergraph_t& phg, - size_t num_tasks, size_t num_seeds, size_t round) = 0; +private: + virtual void findMovesImpl(localized_k_way_fm_t local_fm, + mt_kahypar_partitioned_hypergraph_t &phg, size_t num_tasks, + size_t num_seeds, size_t round) = 0; virtual bool isUnconstrainedRoundImpl(size_t round) const = 0; - virtual void reportImprovementImpl(size_t, Gain, double) { + virtual void reportImprovementImpl(size_t, Gain, double) + { // most strategies don't use this } }; -} // namespace mt_kahypar +} // namespace mt_kahypar diff --git a/mt-kahypar/partition/refinement/fm/strategies/local_gain_cache_strategy.h b/mt-kahypar/partition/refinement/fm/strategies/local_gain_cache_strategy.h index 6d7e4ff4c..20bce39e5 100644 --- a/mt-kahypar/partition/refinement/fm/strategies/local_gain_cache_strategy.h +++ b/mt-kahypar/partition/refinement/fm/strategies/local_gain_cache_strategy.h @@ -31,244 +31,272 @@ #include "mt-kahypar/partition/refinement/fm/fm_commons.h" - namespace mt_kahypar { - /* - * LocalFMStrategy interface - * static constexpr bool uses_gain_cache - * static constexpr bool maintain_gain_cache_between_rounds - * static constexpr bool is_unconstrained - * - * Constructor(context, sharedData, blockPQ, vertexPQs, runStats) - * insertIntoPQ(phg, gain_cache, node) - * updateGain(phg, gain_cache, node, move) - * findNextMove(phg, gain_cache, move) - * applyMove(phg, gain_cache, move) - * reset() - * deltaGainUpdates(phg, gain_cache, sync_update) - * - */ +/* + * LocalFMStrategy interface + * static constexpr bool uses_gain_cache + * static constexpr bool maintain_gain_cache_between_rounds + * static constexpr bool is_unconstrained + * + * Constructor(context, sharedData, blockPQ, vertexPQs, runStats) + * insertIntoPQ(phg, gain_cache, node) + * updateGain(phg, gain_cache, node, move) + * findNextMove(phg, gain_cache, move) + * applyMove(phg, gain_cache, move) + * reset() + * deltaGainUpdates(phg, gain_cache, sync_update) + * + */ -class LocalGainCacheStrategy { +class LocalGainCacheStrategy +{ public: - - using BlockPriorityQueue = ds::ExclusiveHandleHeap< ds::MaxHeap >; - using VertexPriorityQueue = ds::MaxHeap; // these need external handles + using BlockPriorityQueue = ds::ExclusiveHandleHeap >; + using VertexPriorityQueue = + ds::MaxHeap; // these need external handles static constexpr bool uses_gain_cache = true; static constexpr bool maintain_gain_cache_between_rounds = true; static constexpr bool is_unconstrained = false; - LocalGainCacheStrategy(const Context& context, - FMSharedData& sharedData, - BlockPriorityQueue& blockPQ, - vec& vertexPQs) : + LocalGainCacheStrategy(const Context &context, FMSharedData &sharedData, + BlockPriorityQueue &blockPQ, + vec &vertexPQs) : context(context), - sharedData(sharedData), - blockPQ(blockPQ), - vertexPQs(vertexPQs) { } + sharedData(sharedData), blockPQ(blockPQ), vertexPQs(vertexPQs) + { + } - template - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE - void insertIntoPQ(const PartitionedHypergraph& phg, - const GainCache& gain_cache, - const HypernodeID v) { + template + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE void insertIntoPQ(const PartitionedHypergraph &phg, + const GainCache &gain_cache, + const HypernodeID v) + { const PartitionID pv = phg.partID(v); ASSERT(pv < context.partition.k); auto [target, gain] = computeBestTargetBlock(phg, gain_cache, v, pv, true); ASSERT(target < context.partition.k, V(target) << V(context.partition.k)); sharedData.targetPart[v] = target; - vertexPQs[pv].insert(v, gain); // blockPQ updates are done later, collectively. + vertexPQs[pv].insert(v, gain); // blockPQ updates are done later, collectively. } - template - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE - void updateGain(const PartitionedHypergraph& phg, - const GainCache& gain_cache, - const HypernodeID v, - const Move& move) { + template + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE void + updateGain(const PartitionedHypergraph &phg, const GainCache &gain_cache, + const HypernodeID v, const Move &move) + { const PartitionID pv = phg.partID(v); ASSERT(vertexPQs[pv].contains(v)); const PartitionID designatedTargetV = sharedData.targetPart[v]; Gain gain = 0; PartitionID newTarget = kInvalidPartition; - // Note: During gain updates, we ignore the balance constraint for determining the best target block. - // This allows to use the optimized `bestOfThree` function for priority updates, which would not always - // be valid if balance is included (the best of the three could be overloaded). As soon as the move is - // pulled from the PQ, only balanced targets are considered - if (context.partition.k < 4 || designatedTargetV == move.from || designatedTargetV == move.to) { + // Note: During gain updates, we ignore the balance constraint for determining the + // best target block. This allows to use the optimized `bestOfThree` function for + // priority updates, which would not always be valid if balance is included (the best + // of the three could be overloaded). As soon as the move is pulled from the PQ, only + // balanced targets are considered + if(context.partition.k < 4 || designatedTargetV == move.from || + designatedTargetV == move.to) + { // penalty term of designatedTargetV is affected. // and may now be greater than that of other blocks --> recompute full std::tie(newTarget, gain) = computeBestTargetBlock(phg, gain_cache, v, pv, true); - } else { + } + else + { // penalty term of designatedTargetV is not affected. // only move.from and move.to may be better - std::tie(newTarget, gain) = bestOfThree(phg, gain_cache, - v, pv, { designatedTargetV, move.from, move.to }); + std::tie(newTarget, gain) = + bestOfThree(phg, gain_cache, v, pv, { designatedTargetV, move.from, move.to }); } sharedData.targetPart[v] = newTarget; vertexPQs[pv].adjustKey(v, gain); } - template - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE - bool findNextMove(const PartitionedHypergraph& phg, - const GainCache& gain_cache, - Move& m) { + template + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE bool + findNextMove(const PartitionedHypergraph &phg, const GainCache &gain_cache, Move &m) + { updatePQs(); - if (blockPQ.empty()) { + if(blockPQ.empty()) + { return false; } - while (true) { + while(true) + { const PartitionID from = blockPQ.top(); const HypernodeID u = vertexPQs[from].top(); const Gain estimated_gain = vertexPQs[from].topKey(); ASSERT(estimated_gain == blockPQ.topKey()); auto [to, gain] = computeBestTargetBlock(phg, gain_cache, u, phg.partID(u), false); - if (gain >= estimated_gain) { // accept any gain that is at least as good - m.node = u; m.to = to; m.from = from; + if(gain >= estimated_gain) + { // accept any gain that is at least as good + m.node = u; + m.to = to; + m.from = from; m.gain = gain; - vertexPQs[from].deleteTop(); // blockPQ updates are done later, collectively. + vertexPQs[from].deleteTop(); // blockPQ updates are done later, collectively. return true; - } else { + } + else + { vertexPQs[from].adjustKey(u, gain); sharedData.targetPart[u] = to; - if (vertexPQs[from].topKey() != blockPQ.keyOf(from)) { + if(vertexPQs[from].topKey() != blockPQ.keyOf(from)) + { blockPQ.adjustKey(from, vertexPQs[from].topKey()); } } } } - template - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE - void applyMove(const PartitionedHypergraph&, const GainCache&, Move) { + template + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE void applyMove(const PartitionedHypergraph &, + const GainCache &, Move) + { // nothing to do here } MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE - void flushLocalChanges() { + void flushLocalChanges() + { // nothing to do here } - void reset() { + void reset() + { // release all nodes that were not moved - if (sharedData.release_nodes) { + if(sharedData.release_nodes) + { // Release all nodes contained in PQ - for (PartitionID i = 0; i < context.partition.k; ++i) { - for (PosT j = 0; j < vertexPQs[i].size(); ++j) { + for(PartitionID i = 0; i < context.partition.k; ++i) + { + for(PosT j = 0; j < vertexPQs[i].size(); ++j) + { const HypernodeID v = vertexPQs[i].at(j); sharedData.nodeTracker.releaseNode(v); } } } - for (PartitionID i = 0; i < context.partition.k; ++i) { + for(PartitionID i = 0; i < context.partition.k; ++i) + { vertexPQs[i].clear(); } blockPQ.clear(); } - - // We're letting the FM details implementation decide what happens here, since some may not want to do gain cache updates, - // but rather update gains in their PQs or something - template - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE - void deltaGainUpdates(PartitionedHypergraph& phg, - GainCache& gain_cache, - const SynchronizedEdgeUpdate& sync_update) { + // We're letting the FM details implementation decide what happens here, since some may + // not want to do gain cache updates, but rather update gains in their PQs or something + template + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE void + deltaGainUpdates(PartitionedHypergraph &phg, GainCache &gain_cache, + const SynchronizedEdgeUpdate &sync_update) + { gain_cache.deltaGainUpdate(phg, sync_update); } private: MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE - void updatePQs() { - for (PartitionID i = 0; i < context.partition.k; ++i) { - if (!vertexPQs[i].empty()) { + void updatePQs() + { + for(PartitionID i = 0; i < context.partition.k; ++i) + { + if(!vertexPQs[i].empty()) + { blockPQ.insertOrAdjustKey(i, vertexPQs[i].topKey()); - } else if (blockPQ.contains(i)) { + } + else if(blockPQ.contains(i)) + { blockPQ.remove(i); } } } - template - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE - std::pair computeBestTargetBlock(const PartitionedHypergraph& phg, - const GainCache& gain_cache, - const HypernodeID u, - const PartitionID from, - bool ignore_balance) { + template + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE std::pair + computeBestTargetBlock(const PartitionedHypergraph &phg, const GainCache &gain_cache, + const HypernodeID u, const PartitionID from, bool ignore_balance) + { const HypernodeWeight wu = phg.nodeWeight(u); const HypernodeWeight from_weight = phg.partWeight(from); PartitionID to = kInvalidPartition; HyperedgeWeight to_benefit = std::numeric_limits::min(); HypernodeWeight best_to_weight = from_weight - wu; - for ( const PartitionID& i : gain_cache.adjacentBlocks(u) ) { - if (i != from) { + for(const PartitionID &i : gain_cache.adjacentBlocks(u)) + { + if(i != from) + { const HypernodeWeight to_weight = phg.partWeight(i); const HyperedgeWeight penalty = gain_cache.benefitTerm(u, i); - if ( ( penalty > to_benefit || ( penalty == to_benefit && to_weight < best_to_weight ) ) && - (ignore_balance || to_weight + wu <= context.partition.max_part_weights[i]) ) { + if((penalty > to_benefit || + (penalty == to_benefit && to_weight < best_to_weight)) && + (ignore_balance || to_weight + wu <= context.partition.max_part_weights[i])) + { to_benefit = penalty; to = i; best_to_weight = to_weight; } } } - const Gain gain = to != kInvalidPartition ? to_benefit - gain_cache.penaltyTerm(u, phg.partID(u)) - : std::numeric_limits::min(); + const Gain gain = to != kInvalidPartition ? + to_benefit - gain_cache.penaltyTerm(u, phg.partID(u)) : + std::numeric_limits::min(); return std::make_pair(to, gain); } - template - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE - std::pair bestOfThree(const PartitionedHypergraph& phg, - const GainCache& gain_cache, - HypernodeID u, - PartitionID from, - std::array parts) { - // We ignore balance here to avoid recomputations that involve all blocks (see `updateGain` for details) + template + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE std::pair + bestOfThree(const PartitionedHypergraph &phg, const GainCache &gain_cache, + HypernodeID u, PartitionID from, std::array parts) + { + // We ignore balance here to avoid recomputations that involve all blocks (see + // `updateGain` for details) const HypernodeWeight wu = phg.nodeWeight(u); const HypernodeWeight from_weight = phg.partWeight(from); PartitionID to = kInvalidPartition; HyperedgeWeight to_benefit = std::numeric_limits::min(); HypernodeWeight best_to_weight = from_weight - wu; - for (PartitionID i : parts) { - if (i != from && i != kInvalidPartition) { + for(PartitionID i : parts) + { + if(i != from && i != kInvalidPartition) + { const HypernodeWeight to_weight = phg.partWeight(i); const HyperedgeWeight penalty = gain_cache.benefitTerm(u, i); - if ( ( penalty > to_benefit || (penalty == to_benefit && to_weight < best_to_weight) ) ) { + if((penalty > to_benefit || + (penalty == to_benefit && to_weight < best_to_weight))) + { to_benefit = penalty; to = i; best_to_weight = to_weight; } } } - const Gain gain = to != kInvalidPartition ? to_benefit - gain_cache.penaltyTerm(u, phg.partID(u)) - : std::numeric_limits::min(); + const Gain gain = to != kInvalidPartition ? + to_benefit - gain_cache.penaltyTerm(u, phg.partID(u)) : + std::numeric_limits::min(); return std::make_pair(to, gain); } - const Context& context; + const Context &context; protected: - FMSharedData& sharedData; + FMSharedData &sharedData; // ! Priority Queue that contains for each block of the partition // ! the vertex with the best gain value - BlockPriorityQueue& blockPQ; + BlockPriorityQueue &blockPQ; // ! From PQs -> For each block it contains the vertices (contained // ! in that block) touched by the current local search associated // ! with their gain values - vec& vertexPQs; + vec &vertexPQs; }; } diff --git a/mt-kahypar/partition/refinement/fm/strategies/local_unconstrained_strategy.h b/mt-kahypar/partition/refinement/fm/strategies/local_unconstrained_strategy.h index 35e73a481..f52398c87 100644 --- a/mt-kahypar/partition/refinement/fm/strategies/local_unconstrained_strategy.h +++ b/mt-kahypar/partition/refinement/fm/strategies/local_unconstrained_strategy.h @@ -31,239 +31,280 @@ #include "mt-kahypar/datastructures/sparse_map.h" #include "mt-kahypar/partition/refinement/fm/fm_commons.h" - namespace mt_kahypar { - /* - * LocalFMStrategy interface - * static constexpr bool uses_gain_cache - * static constexpr bool maintain_gain_cache_between_rounds - * static constexpr bool is_unconstrained - * - * Constructor(context, sharedData, blockPQ, vertexPQs, runStats) - * insertIntoPQ(phg, gain_cache, node) - * updateGain(phg, gain_cache, node, move) - * findNextMove(phg, gain_cache, move) - * applyMove(phg, gain_cache, move) - * reset() - * deltaGainUpdates(phg, gain_cache, sync_update) - * - */ - -class LocalUnconstrainedStrategy { +/* + * LocalFMStrategy interface + * static constexpr bool uses_gain_cache + * static constexpr bool maintain_gain_cache_between_rounds + * static constexpr bool is_unconstrained + * + * Constructor(context, sharedData, blockPQ, vertexPQs, runStats) + * insertIntoPQ(phg, gain_cache, node) + * updateGain(phg, gain_cache, node, move) + * findNextMove(phg, gain_cache, move) + * applyMove(phg, gain_cache, move) + * reset() + * deltaGainUpdates(phg, gain_cache, sync_update) + * + */ + +class LocalUnconstrainedStrategy +{ using VirtualWeightMap = ds::SparseMap; - public: - using BlockPriorityQueue = ds::ExclusiveHandleHeap< ds::MaxHeap >; - using VertexPriorityQueue = ds::MaxHeap; // these need external handles +public: + using BlockPriorityQueue = ds::ExclusiveHandleHeap >; + using VertexPriorityQueue = + ds::MaxHeap; // these need external handles static constexpr bool uses_gain_cache = true; static constexpr bool maintain_gain_cache_between_rounds = true; static constexpr bool is_unconstrained = true; - LocalUnconstrainedStrategy(const Context& context, - FMSharedData& sharedData, - BlockPriorityQueue& blockPQ, - vec& vertexPQs) : + LocalUnconstrainedStrategy(const Context &context, FMSharedData &sharedData, + BlockPriorityQueue &blockPQ, + vec &vertexPQs) : context(context), - sharedData(sharedData), - blockPQ(blockPQ), - vertexPQs(vertexPQs), + sharedData(sharedData), blockPQ(blockPQ), vertexPQs(vertexPQs), localVirtualWeightDelta(context.partition.k), penaltyFactor(context.refinement.fm.imbalance_penalty_max), - upperBound(context.refinement.fm.unconstrained_upper_bound) { } + upperBound(context.refinement.fm.unconstrained_upper_bound) + { + } - template - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE - void insertIntoPQ(const PartitionedHypergraph& phg, - const GainCache& gain_cache, - const HypernodeID v) { + template + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE void insertIntoPQ(const PartitionedHypergraph &phg, + const GainCache &gain_cache, + const HypernodeID v) + { const PartitionID pv = phg.partID(v); ASSERT(pv < context.partition.k); auto [target, gain] = computeBestTargetBlock(phg, gain_cache, v, pv); ASSERT(target < context.partition.k); sharedData.targetPart[v] = target; - vertexPQs[pv].insert(v, gain); // blockPQ updates are done later, collectively. + vertexPQs[pv].insert(v, gain); // blockPQ updates are done later, collectively. } - template - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE - void updateGain(const PartitionedHypergraph& phg, - const GainCache& gain_cache, - const HypernodeID v, - const Move& move) { + template + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE void + updateGain(const PartitionedHypergraph &phg, const GainCache &gain_cache, + const HypernodeID v, const Move &move) + { const PartitionID pv = phg.partID(v); ASSERT(vertexPQs[pv].contains(v)); const PartitionID designatedTargetV = sharedData.targetPart[v]; Gain gain = 0; PartitionID newTarget = kInvalidPartition; - if (context.partition.k < 4 || designatedTargetV == move.from || designatedTargetV == move.to) { + if(context.partition.k < 4 || designatedTargetV == move.from || + designatedTargetV == move.to) + { // penalty term of designatedTargetV is affected. // and may now be greater than that of other blocks --> recompute full std::tie(newTarget, gain) = computeBestTargetBlock(phg, gain_cache, v, pv); - } else { + } + else + { // penalty term of designatedTargetV is not affected. // only move.from and move.to may be better - std::tie(newTarget, gain) = bestOfThree(phg, gain_cache, - v, pv, { designatedTargetV, move.from, move.to }); + std::tie(newTarget, gain) = + bestOfThree(phg, gain_cache, v, pv, { designatedTargetV, move.from, move.to }); } sharedData.targetPart[v] = newTarget; vertexPQs[pv].adjustKey(v, gain); } - template - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE - bool findNextMove(const PartitionedHypergraph& phg, - const GainCache& gain_cache, - Move& m) { + template + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE bool + findNextMove(const PartitionedHypergraph &phg, const GainCache &gain_cache, Move &m) + { updatePQs(); - if (blockPQ.empty()) { + if(blockPQ.empty()) + { return false; } - while (true) { + while(true) + { const PartitionID from = blockPQ.top(); const HypernodeID u = vertexPQs[from].top(); const Gain estimated_gain = vertexPQs[from].topKey(); ASSERT(estimated_gain == blockPQ.topKey()); auto [to, gain] = computeBestTargetBlock(phg, gain_cache, u, phg.partID(u)); - bool apply_move = (gain >= estimated_gain); // accept any gain that is at least as good - if (apply_move && to != kInvalidPartition && penaltyFactor > 0) { + bool apply_move = + (gain >= estimated_gain); // accept any gain that is at least as good + if(apply_move && to != kInvalidPartition && penaltyFactor > 0) + { const HypernodeWeight wu = phg.nodeWeight(u); const HypernodeWeight to_weight = phg.partWeight(to); - if (upperBound >= 1 && to_weight + wu > upperBound * context.partition.max_part_weights[to]) { + if(upperBound >= 1 && + to_weight + wu > upperBound * context.partition.max_part_weights[to]) + { apply_move = false; - } else if (to_weight + wu > context.partition.max_part_weights[to]) { + } + else if(to_weight + wu > context.partition.max_part_weights[to]) + { const Gain imbalance_penalty = estimatePenalty(to, to_weight, wu); - if (imbalance_penalty != std::numeric_limits::max()) { - Gain new_gain = gain_cache.gain(u, from, to) - std::ceil(penaltyFactor * imbalance_penalty); + if(imbalance_penalty != std::numeric_limits::max()) + { + Gain new_gain = gain_cache.gain(u, from, to) - + std::ceil(penaltyFactor * imbalance_penalty); gain = new_gain; - } else { + } + else + { apply_move = false; } } } - if (apply_move) { - m.node = u; m.to = to; m.from = from; + if(apply_move) + { + m.node = u; + m.to = to; + m.from = from; m.gain = gain; - vertexPQs[from].deleteTop(); // blockPQ updates are done later, collectively. + vertexPQs[from].deleteTop(); // blockPQ updates are done later, collectively. return true; - } else { + } + else + { vertexPQs[from].adjustKey(u, gain); sharedData.targetPart[u] = to; - if (vertexPQs[from].topKey() != blockPQ.keyOf(from)) { + if(vertexPQs[from].topKey() != blockPQ.keyOf(from)) + { blockPQ.adjustKey(from, vertexPQs[from].topKey()); } } } } - template - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE - void applyMove(const PartitionedHypergraph& phg, const GainCache&, Move m) { - if (sharedData.unconstrained.isRebalancingNode(m.node)) { - // If a node is moved which is already in use for penalty estimation, we need to make - // an adjustment so future estimations are not overly optimistic (since in reality, the - // node is not available anymore). This is achieved by increasing the "virtual" weight of - // the origin block, thus pessimizing future estimations + template + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE void applyMove(const PartitionedHypergraph &phg, + const GainCache &, Move m) + { + if(sharedData.unconstrained.isRebalancingNode(m.node)) + { + // If a node is moved which is already in use for penalty estimation, we need to + // make an adjustment so future estimations are not overly optimistic (since in + // reality, the node is not available anymore). This is achieved by increasing the + // "virtual" weight of the origin block, thus pessimizing future estimations localVirtualWeightDelta[m.from] += phg.nodeWeight(m.node); } } MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE - void flushLocalChanges() { - for (auto [block, delta]: localVirtualWeightDelta) { + void flushLocalChanges() + { + for(auto [block, delta] : localVirtualWeightDelta) + { ASSERT(delta >= 0); - sharedData.unconstrained.virtualWeightDelta(block).fetch_add(delta, std::memory_order_relaxed); + sharedData.unconstrained.virtualWeightDelta(block).fetch_add( + delta, std::memory_order_relaxed); } localVirtualWeightDelta.clear(); } - void reset() { + void reset() + { // release all nodes that were not moved - if (sharedData.release_nodes) { + if(sharedData.release_nodes) + { // Release all nodes contained in PQ - for (PartitionID i = 0; i < context.partition.k; ++i) { - for (PosT j = 0; j < vertexPQs[i].size(); ++j) { + for(PartitionID i = 0; i < context.partition.k; ++i) + { + for(PosT j = 0; j < vertexPQs[i].size(); ++j) + { const HypernodeID v = vertexPQs[i].at(j); sharedData.nodeTracker.releaseNode(v); } } } - for (PartitionID i = 0; i < context.partition.k; ++i) { + for(PartitionID i = 0; i < context.partition.k; ++i) + { vertexPQs[i].clear(); } blockPQ.clear(); localVirtualWeightDelta.clear(); } - - // We're letting the FM details implementation decide what happens here, since some may not want to do gain cache updates, - // but rather update gains in their PQs or something - template - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE - void deltaGainUpdates(PartitionedHypergraph& phg, - GainCache& gain_cache, - const SynchronizedEdgeUpdate& sync_update) { + // We're letting the FM details implementation decide what happens here, since some may + // not want to do gain cache updates, but rather update gains in their PQs or something + template + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE void + deltaGainUpdates(PartitionedHypergraph &phg, GainCache &gain_cache, + const SynchronizedEdgeUpdate &sync_update) + { gain_cache.deltaGainUpdate(phg, sync_update); } - void setPenaltyFactor(double penalty) { + void setPenaltyFactor(double penalty) + { ASSERT(penalty >= 0 && penalty <= 1); penaltyFactor = penalty; } - void setUpperBound(double upper_bound) { - upperBound = upper_bound; - } + void setUpperBound(double upper_bound) { upperBound = upper_bound; } private: MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE - void updatePQs() { - for (PartitionID i = 0; i < context.partition.k; ++i) { - if (!vertexPQs[i].empty()) { + void updatePQs() + { + for(PartitionID i = 0; i < context.partition.k; ++i) + { + if(!vertexPQs[i].empty()) + { blockPQ.insertOrAdjustKey(i, vertexPQs[i].topKey()); - } else if (blockPQ.contains(i)) { + } + else if(blockPQ.contains(i)) + { blockPQ.remove(i); } } } - template - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE - std::pair computeBestTargetBlock(const PartitionedHypergraph& phg, - const GainCache& gain_cache, - const HypernodeID u, - const PartitionID from) const { + template + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE std::pair + computeBestTargetBlock(const PartitionedHypergraph &phg, const GainCache &gain_cache, + const HypernodeID u, const PartitionID from) const + { const HypernodeWeight wu = phg.nodeWeight(u); const HypernodeWeight from_weight = phg.partWeight(from); PartitionID to = kInvalidPartition; HyperedgeWeight to_benefit = std::numeric_limits::min(); HypernodeWeight best_to_weight = from_weight - wu; - for (PartitionID i = 0; i < context.partition.k; ++i) { - if (i != from) { + for(PartitionID i = 0; i < context.partition.k; ++i) + { + if(i != from) + { const HypernodeWeight to_weight = phg.partWeight(i); const HypernodeWeight max_weight = context.partition.max_part_weights[i]; HyperedgeWeight benefit = gain_cache.benefitTerm(u, i); - if (upperBound >= 1 && to_weight + wu > upperBound * max_weight) { + if(upperBound >= 1 && to_weight + wu > upperBound * max_weight) + { continue; - } else if (to_weight + wu > max_weight && benefit <= to_benefit) { + } + else if(to_weight + wu > max_weight && benefit <= to_benefit) + { // don't take imbalanced move without improved gain continue; - } else if (to_weight + wu > max_weight && penaltyFactor > 0) { + } + else if(to_weight + wu > max_weight && penaltyFactor > 0) + { const Gain imbalance_penalty = estimatePenalty(i, to_weight, wu); - if (imbalance_penalty == std::numeric_limits::max()) { + if(imbalance_penalty == std::numeric_limits::max()) + { continue; } benefit -= std::ceil(penaltyFactor * imbalance_penalty); } - if ( benefit > to_benefit || ( benefit == to_benefit && to_weight < best_to_weight ) ) { + if(benefit > to_benefit || (benefit == to_benefit && to_weight < best_to_weight)) + { to_benefit = benefit; to = i; best_to_weight = to_weight; @@ -271,45 +312,55 @@ class LocalUnconstrainedStrategy { } } ASSERT(from == phg.partID(u)); - const Gain gain = to != kInvalidPartition ? to_benefit - gain_cache.penaltyTerm(u, from) - : std::numeric_limits::min(); + const Gain gain = to != kInvalidPartition ? + to_benefit - gain_cache.penaltyTerm(u, from) : + std::numeric_limits::min(); return std::make_pair(to, gain); } - template - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE - std::pair bestOfThree(const PartitionedHypergraph& phg, - const GainCache& gain_cache, - HypernodeID u, - PartitionID from, - std::array parts) const { + template + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE std::pair + bestOfThree(const PartitionedHypergraph &phg, const GainCache &gain_cache, + HypernodeID u, PartitionID from, std::array parts) const + { const PartitionID designatedTargetU = sharedData.targetPart[u]; const HypernodeWeight wu = phg.nodeWeight(u); const HypernodeWeight from_weight = phg.partWeight(from); PartitionID to = kInvalidPartition; HyperedgeWeight to_benefit = std::numeric_limits::min(); HypernodeWeight best_to_weight = from_weight - wu; - for (PartitionID i : parts) { - if (i != from && i != kInvalidPartition) { + for(PartitionID i : parts) + { + if(i != from && i != kInvalidPartition) + { const HypernodeWeight to_weight = phg.partWeight(i); HyperedgeWeight benefit = gain_cache.benefitTerm(u, i); - if (upperBound >= 1 && to_weight + wu > upperBound * context.partition.max_part_weights[i]) { + if(upperBound >= 1 && + to_weight + wu > upperBound * context.partition.max_part_weights[i]) + { continue; - } else if (to_weight + wu > context.partition.max_part_weights[i] && penaltyFactor > 0) { + } + else if(to_weight + wu > context.partition.max_part_weights[i] && + penaltyFactor > 0) + { const Gain imbalance_penalty = estimatePenalty(i, to_weight, wu); - if (imbalance_penalty == std::numeric_limits::max()) { - if (i == designatedTargetU) { - // Edge case: the cached target block for u is overloaded (infinite penalty) and no longer valid. - // We need to check all blocks, since otherwise the updated node might get a very low priority. - // Note: Since the penalties increase monotonically during a round, this can happen at most once - // for each target part of a node + if(imbalance_penalty == std::numeric_limits::max()) + { + if(i == designatedTargetU) + { + // Edge case: the cached target block for u is overloaded (infinite penalty) + // and no longer valid. We need to check all blocks, since otherwise the + // updated node might get a very low priority. Note: Since the penalties + // increase monotonically during a round, this can happen at most once for + // each target part of a node return computeBestTargetBlock(phg, gain_cache, u, from); } continue; } benefit -= std::ceil(penaltyFactor * imbalance_penalty); } - if ( benefit > to_benefit || ( benefit == to_benefit && to_weight < best_to_weight ) ) { + if(benefit > to_benefit || (benefit == to_benefit && to_weight < best_to_weight)) + { to_benefit = benefit; to = i; best_to_weight = to_weight; @@ -317,31 +368,37 @@ class LocalUnconstrainedStrategy { } } ASSERT(from == phg.partID(u)); - const Gain gain = to != kInvalidPartition ? to_benefit - gain_cache.penaltyTerm(u, from) - : std::numeric_limits::min(); + const Gain gain = to != kInvalidPartition ? + to_benefit - gain_cache.penaltyTerm(u, from) : + std::numeric_limits::min(); return std::make_pair(to, gain); } MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE - Gain estimatePenalty(PartitionID to, HypernodeWeight to_weight, HypernodeWeight wu) const { - HypernodeWeight virtual_delta = sharedData.unconstrained.virtualWeightDelta(to).load(std::memory_order_relaxed) - + localVirtualWeightDelta.getOrDefault(to); - HypernodeWeight initial_imbalance = to_weight + virtual_delta - context.partition.max_part_weights[to]; - return sharedData.unconstrained.estimatePenaltyForImbalancedMove(to, initial_imbalance, wu); + Gain estimatePenalty(PartitionID to, HypernodeWeight to_weight, + HypernodeWeight wu) const + { + HypernodeWeight virtual_delta = + sharedData.unconstrained.virtualWeightDelta(to).load(std::memory_order_relaxed) + + localVirtualWeightDelta.getOrDefault(to); + HypernodeWeight initial_imbalance = + to_weight + virtual_delta - context.partition.max_part_weights[to]; + return sharedData.unconstrained.estimatePenaltyForImbalancedMove( + to, initial_imbalance, wu); } - const Context& context; + const Context &context; - FMSharedData& sharedData; + FMSharedData &sharedData; // ! Priority Queue that contains for each block of the partition // ! the vertex with the best gain value - BlockPriorityQueue& blockPQ; + BlockPriorityQueue &blockPQ; // ! From PQs -> For each block it contains the vertices (contained // ! in that block) touched by the current local search associated // ! with their gain values - vec& vertexPQs; + vec &vertexPQs; // ! Virtual block weights are saved as delta to the actual block weight. They // ! are necessary to ensure a reasonable penalty estimation in some edge cases. diff --git a/mt-kahypar/partition/refinement/fm/strategies/unconstrained_strategy.h b/mt-kahypar/partition/refinement/fm/strategies/unconstrained_strategy.h index b952e552d..a40fdfe93 100644 --- a/mt-kahypar/partition/refinement/fm/strategies/unconstrained_strategy.h +++ b/mt-kahypar/partition/refinement/fm/strategies/unconstrained_strategy.h @@ -31,120 +31,164 @@ #include "mt-kahypar/partition/refinement/fm/strategies/local_gain_cache_strategy.h" #include "mt-kahypar/partition/refinement/fm/strategies/local_unconstrained_strategy.h" - namespace mt_kahypar { -template -class UnconstrainedStrategy: public IFMStrategy { +template +class UnconstrainedStrategy : public IFMStrategy +{ using Base = IFMStrategy; static constexpr bool debug = false; - public: +public: using LocalFM = LocalizedKWayFM; using PartitionedHypergraph = typename GraphAndGainTypes::PartitionedHypergraph; - UnconstrainedStrategy(const Context& context, FMSharedData& sharedData): + UnconstrainedStrategy(const Context &context, FMSharedData &sharedData) : Base(context, sharedData), current_penalty(context.refinement.fm.imbalance_penalty_min), current_upper_bound(context.refinement.fm.unconstrained_upper_bound), - absolute_improvement_first_round(kInvalidGain), - unconstrained_is_enabled(true), - stats(utils::Utilities::instance().getStats(context.utility_id)) { - ASSERT(!context.refinement.fm.activate_unconstrained_dynamically - || context.refinement.fm.multitry_rounds > 2); + absolute_improvement_first_round(kInvalidGain), unconstrained_is_enabled(true), + stats(utils::Utilities::instance().getStats(context.utility_id)) + { + ASSERT(!context.refinement.fm.activate_unconstrained_dynamically || + context.refinement.fm.multitry_rounds > 2); } - bool dispatchedFindMoves(LocalFM& local_fm, PartitionedHypergraph& phg, size_t task_id, size_t num_seeds, size_t round) { - if (isUnconstrainedRound(round)) { - LocalUnconstrainedStrategy local_strategy = local_fm.template initializeDispatchedStrategy(); + bool dispatchedFindMoves(LocalFM &local_fm, PartitionedHypergraph &phg, size_t task_id, + size_t num_seeds, size_t round) + { + if(isUnconstrainedRound(round)) + { + LocalUnconstrainedStrategy local_strategy = + local_fm.template initializeDispatchedStrategy(); local_strategy.setPenaltyFactor(current_penalty); local_strategy.setUpperBound(current_upper_bound); return local_fm.findMoves(local_strategy, phg, task_id, num_seeds); - } else { - LocalGainCacheStrategy local_strategy = local_fm.template initializeDispatchedStrategy(); + } + else + { + LocalGainCacheStrategy local_strategy = + local_fm.template initializeDispatchedStrategy(); return local_fm.findMoves(local_strategy, phg, task_id, num_seeds); } } - private: - virtual void findMovesImpl(localized_k_way_fm_t local_fm, mt_kahypar_partitioned_hypergraph_t& phg, - size_t num_tasks, size_t num_seeds, size_t round) final { +private: + virtual void findMovesImpl(localized_k_way_fm_t local_fm, + mt_kahypar_partitioned_hypergraph_t &phg, size_t num_tasks, + size_t num_seeds, size_t round) final + { initRound(round); - Base::findMovesWithConcreteStrategy( - local_fm, phg, num_tasks, num_seeds, round); + Base::findMovesWithConcreteStrategy(local_fm, phg, num_tasks, + num_seeds, round); } - virtual bool isUnconstrainedRoundImpl(size_t round) const final { - if (round > 0 && !unconstrained_is_enabled) { + virtual bool isUnconstrainedRoundImpl(size_t round) const final + { + if(round > 0 && !unconstrained_is_enabled) + { return false; } - if (context.refinement.fm.activate_unconstrained_dynamically) { - return round == 1 || (round > 1 && round - 2 < context.refinement.fm.unconstrained_rounds); - } else { + if(context.refinement.fm.activate_unconstrained_dynamically) + { + return round == 1 || + (round > 1 && round - 2 < context.refinement.fm.unconstrained_rounds); + } + else + { return round < context.refinement.fm.unconstrained_rounds; } } - virtual void reportImprovementImpl(size_t round, Gain absolute_improvement, double relative_improvement) final { - if (round == 0) { + virtual void reportImprovementImpl(size_t round, Gain absolute_improvement, + double relative_improvement) final + { + if(round == 0) + { absolute_improvement_first_round = absolute_improvement; - } else if (round == 1 - && context.refinement.fm.activate_unconstrained_dynamically - && absolute_improvement < absolute_improvement_first_round) { - // this is the decision point whether unconstrained or constrained FM is used - unconstrained_is_enabled = false; - DBG << "Disabling unconstrained FM after test round: " << V(absolute_improvement) << V(absolute_improvement_first_round); - } else if (relative_improvement < context.refinement.fm.unconstrained_min_improvement) { + } + else if(round == 1 && context.refinement.fm.activate_unconstrained_dynamically && + absolute_improvement < absolute_improvement_first_round) + { + // this is the decision point whether unconstrained or constrained FM is used + unconstrained_is_enabled = false; + DBG << "Disabling unconstrained FM after test round: " << V(absolute_improvement) + << V(absolute_improvement_first_round); + } + else if(relative_improvement < context.refinement.fm.unconstrained_min_improvement) + { unconstrained_is_enabled = false; - DBG << "Disabling unconstrained FM due to too little improvement:" << V(relative_improvement); + DBG << "Disabling unconstrained FM due to too little improvement:" + << V(relative_improvement); } - if (round == 1) { + if(round == 1) + { stats.update_stat("top-level-ufm-active", unconstrained_is_enabled); - if (unconstrained_is_enabled) { + if(unconstrained_is_enabled) + { stats.update_stat("ufm-active-levels", 1); - } else { + } + else + { stats.update_stat("ufm-inactive-levels", 1); } } } - void initRound(size_t round) { - if (round == 0) { + void initRound(size_t round) + { + if(round == 0) + { unconstrained_is_enabled = true; } - if (context.refinement.fm.activate_unconstrained_dynamically) { - if (round == 1) { + if(context.refinement.fm.activate_unconstrained_dynamically) + { + if(round == 1) + { current_penalty = context.refinement.fm.penalty_for_activation_test; current_upper_bound = context.refinement.fm.unconstrained_upper_bound; - } else if (round > 1 && isUnconstrainedRound(round)) { - size_t n_rounds = std::min(context.refinement.fm.unconstrained_rounds, context.refinement.fm.multitry_rounds - 2); + } + else if(round > 1 && isUnconstrainedRound(round)) + { + size_t n_rounds = std::min(context.refinement.fm.unconstrained_rounds, + context.refinement.fm.multitry_rounds - 2); calculateInterpolation(round - 2, n_rounds); } - } else if (isUnconstrainedRound(round)) { + } + else if(isUnconstrainedRound(round)) + { calculateInterpolation(round, context.refinement.fm.unconstrained_rounds); } } - void calculateInterpolation(size_t round, size_t n_rounds) { + void calculateInterpolation(size_t round, size_t n_rounds) + { ASSERT(unconstrained_is_enabled && round < context.refinement.fm.multitry_rounds); auto interpolate = [&](double start, double end) { - if (round == 0) { + if(round == 0) + { return start; } double summed = (n_rounds - round - 1) * start + round * end; return summed / static_cast(n_rounds - 1); }; - if (round < n_rounds) { + if(round < n_rounds) + { // interpolate values for current penalty and upper bound current_penalty = interpolate(context.refinement.fm.imbalance_penalty_min, context.refinement.fm.imbalance_penalty_max); - if (context.refinement.fm.unconstrained_upper_bound >= 1) { - if (context.refinement.fm.unconstrained_upper_bound_min >= 1) { - current_upper_bound = interpolate(context.refinement.fm.unconstrained_upper_bound, - context.refinement.fm.unconstrained_upper_bound_min); - } else { + if(context.refinement.fm.unconstrained_upper_bound >= 1) + { + if(context.refinement.fm.unconstrained_upper_bound_min >= 1) + { + current_upper_bound = + interpolate(context.refinement.fm.unconstrained_upper_bound, + context.refinement.fm.unconstrained_upper_bound_min); + } + else + { current_upper_bound = context.refinement.fm.unconstrained_upper_bound; } } @@ -155,7 +199,7 @@ class UnconstrainedStrategy: public IFMStrategy { double current_upper_bound; Gain absolute_improvement_first_round; bool unconstrained_is_enabled; - utils::Stats& stats; + utils::Stats &stats; }; } diff --git a/mt-kahypar/partition/refinement/gains/bipartitioning_policy.h b/mt-kahypar/partition/refinement/gains/bipartitioning_policy.h index 196a2e8eb..e66677949 100644 --- a/mt-kahypar/partition/refinement/gains/bipartitioning_policy.h +++ b/mt-kahypar/partition/refinement/gains/bipartitioning_policy.h @@ -26,44 +26,61 @@ #pragma once -#include "mt-kahypar/definitions.h" -#include "mt-kahypar/partition/context_enum_classes.h" #include "mt-kahypar/datastructures/hypergraph_common.h" +#include "mt-kahypar/definitions.h" #include "mt-kahypar/macros.h" +#include "mt-kahypar/partition/context_enum_classes.h" #include "mt-kahypar/utils/exception.h" namespace mt_kahypar { - -struct BipartitioningPolicy { - static bool useCutNetSplitting(const GainPolicy policy) { - switch(policy) { - case GainPolicy::cut: return false; - case GainPolicy::km1: return true; - case GainPolicy::soed: return true; - case GainPolicy::steiner_tree: return true; - case GainPolicy::cut_for_graphs: return false; - case GainPolicy::steiner_tree_for_graphs: return false; - case GainPolicy::none: throw InvalidParameterException("Gain policy is unknown"); +struct BipartitioningPolicy +{ + static bool useCutNetSplitting(const GainPolicy policy) + { + switch(policy) + { + case GainPolicy::cut: + return false; + case GainPolicy::km1: + return true; + case GainPolicy::soed: + return true; + case GainPolicy::steiner_tree: + return true; + case GainPolicy::cut_for_graphs: + return false; + case GainPolicy::steiner_tree_for_graphs: + return false; + case GainPolicy::none: + throw InvalidParameterException("Gain policy is unknown"); } throw InvalidParameterException("Gain policy is unknown"); return false; } - static HyperedgeWeight nonCutEdgeMultiplier(const GainPolicy policy) { - switch(policy) { - case GainPolicy::cut: return 1; - case GainPolicy::km1: return 1; - case GainPolicy::soed: return 2; - case GainPolicy::steiner_tree: return 1; - case GainPolicy::cut_for_graphs: return 1; - case GainPolicy::steiner_tree_for_graphs: return 1; - case GainPolicy::none: throw InvalidParameterException("Gain policy is unknown"); + static HyperedgeWeight nonCutEdgeMultiplier(const GainPolicy policy) + { + switch(policy) + { + case GainPolicy::cut: + return 1; + case GainPolicy::km1: + return 1; + case GainPolicy::soed: + return 2; + case GainPolicy::steiner_tree: + return 1; + case GainPolicy::cut_for_graphs: + return 1; + case GainPolicy::steiner_tree_for_graphs: + return 1; + case GainPolicy::none: + throw InvalidParameterException("Gain policy is unknown"); } throw InvalidParameterException("Gain policy is unknown"); return 0; } }; - -} // namespace mt_kahypar +} // namespace mt_kahypar diff --git a/mt-kahypar/partition/refinement/gains/cut/cut_attributed_gains.h b/mt-kahypar/partition/refinement/gains/cut/cut_attributed_gains.h index ea10192f8..0b3471ed7 100644 --- a/mt-kahypar/partition/refinement/gains/cut/cut_attributed_gains.h +++ b/mt-kahypar/partition/refinement/gains/cut/cut_attributed_gains.h @@ -35,12 +35,17 @@ namespace mt_kahypar { * for each incident hyperedge of the node based on which we then compute an * attributed gain value. */ -struct CutAttributedGains { - static HyperedgeWeight gain(const SynchronizedEdgeUpdate& sync_update) { +struct CutAttributedGains +{ + static HyperedgeWeight gain(const SynchronizedEdgeUpdate &sync_update) + { return sync_update.edge_size > 1 ? - ( sync_update.pin_count_in_from_part_after == sync_update.edge_size - 1) * sync_update.edge_weight - - ( sync_update.pin_count_in_to_part_after == sync_update.edge_size ) * sync_update.edge_weight : 0; + (sync_update.pin_count_in_from_part_after == sync_update.edge_size - 1) * + sync_update.edge_weight - + (sync_update.pin_count_in_to_part_after == sync_update.edge_size) * + sync_update.edge_weight : + 0; } }; -} // namespace mt_kahypar +} // namespace mt_kahypar diff --git a/mt-kahypar/partition/refinement/gains/cut/cut_flow_network_construction.h b/mt-kahypar/partition/refinement/gains/cut/cut_flow_network_construction.h index 545576fbd..c65082c39 100644 --- a/mt-kahypar/partition/refinement/gains/cut/cut_flow_network_construction.h +++ b/mt-kahypar/partition/refinement/gains/cut/cut_flow_network_construction.h @@ -36,53 +36,50 @@ namespace mt_kahypar { * to determine the capacity of a hyperedge and whether or not the hyperedge * is relevant for optimizing the objective function. */ -struct CutFlowNetworkConstruction { +struct CutFlowNetworkConstruction +{ // ! Capacity of the hyperedge - template - static HyperedgeWeight capacity(const PartitionedHypergraph& phg, - const Context&, - const HyperedgeID he, - const PartitionID, - const PartitionID) { + template + static HyperedgeWeight capacity(const PartitionedHypergraph &phg, const Context &, + const HyperedgeID he, const PartitionID, + const PartitionID) + { return phg.edgeWeight(he); } // ! If true, then hyperedge is not relevant and can be dropped. - template - static bool dropHyperedge(const PartitionedHypergraph& phg, - const HyperedgeID he, - const PartitionID block_0, - const PartitionID block_1) { - return phg.pinCountInPart(he, block_0) + phg.pinCountInPart(he, block_1) < phg.edgeSize(he); + template + static bool dropHyperedge(const PartitionedHypergraph &phg, const HyperedgeID he, + const PartitionID block_0, const PartitionID block_1) + { + return phg.pinCountInPart(he, block_0) + phg.pinCountInPart(he, block_1) < + phg.edgeSize(he); } // ! If true, then hyperedge is connected to source. - template - static bool connectToSource(const PartitionedHypergraph&, - const HyperedgeID, - const PartitionID, - const PartitionID) { + template + static bool connectToSource(const PartitionedHypergraph &, const HyperedgeID, + const PartitionID, const PartitionID) + { return false; } // ! If true, then hyperedge is connected to sink. - template - static bool connectToSink(const PartitionedHypergraph&, - const HyperedgeID, - const PartitionID, - const PartitionID) { + template + static bool connectToSink(const PartitionedHypergraph &, const HyperedgeID, + const PartitionID, const PartitionID) + { return false; } // ! If true, then hyperedge is considered as cut edge and its // ! weight is added to the total cut - template - static bool isCut(const PartitionedHypergraph&, - const HyperedgeID, - const PartitionID, - const PartitionID) { + template + static bool isCut(const PartitionedHypergraph &, const HyperedgeID, const PartitionID, + const PartitionID) + { return false; } }; -} // namespace mt_kahypar +} // namespace mt_kahypar diff --git a/mt-kahypar/partition/refinement/gains/cut/cut_gain_cache.cpp b/mt-kahypar/partition/refinement/gains/cut/cut_gain_cache.cpp index 50ac8c25e..0d05431f0 100644 --- a/mt-kahypar/partition/refinement/gains/cut/cut_gain_cache.cpp +++ b/mt-kahypar/partition/refinement/gains/cut/cut_gain_cache.cpp @@ -26,88 +26,104 @@ #include "mt-kahypar/partition/refinement/gains/cut/cut_gain_cache.h" -#include "tbb/parallel_for.h" -#include "tbb/enumerable_thread_specific.h" #include "tbb/concurrent_vector.h" +#include "tbb/enumerable_thread_specific.h" +#include "tbb/parallel_for.h" #include "mt-kahypar/definitions.h" namespace mt_kahypar { -template -void CutGainCache::initializeGainCache(const PartitionedHypergraph& partitioned_hg) { +template +void CutGainCache::initializeGainCache(const PartitionedHypergraph &partitioned_hg) +{ ASSERT(!_is_initialized, "Gain cache is already initialized"); ASSERT(_k <= 0 || _k >= partitioned_hg.k(), - "Gain cache was already initialized for a different k" << V(_k) << V(partitioned_hg.k())); + "Gain cache was already initialized for a different k" << V(_k) + << V(partitioned_hg.k())); allocateGainTable(partitioned_hg.topLevelNumNodes(), partitioned_hg.k()); // Gain calculation consist of two stages // 1. Compute gain of all low degree vertices // 2. Compute gain of all high degree vertices - tbb::enumerable_thread_specific< vec > ets_mtb(_k, 0); + tbb::enumerable_thread_specific > ets_mtb(_k, 0); tbb::concurrent_vector high_degree_vertices; // Compute gain of all low degree vertices - tbb::parallel_for(tbb::blocked_range(HypernodeID(0), partitioned_hg.initialNumNodes()), - [&](tbb::blocked_range& r) { - vec& benefit_aggregator = ets_mtb.local(); - for (HypernodeID u = r.begin(); u < r.end(); ++u) { - if ( partitioned_hg.nodeIsEnabled(u)) { - if ( partitioned_hg.nodeDegree(u) <= HIGH_DEGREE_THRESHOLD) { - initializeGainCacheEntryForNode(partitioned_hg, u, benefit_aggregator); - } else { - // Collect high degree vertices - high_degree_vertices.push_back(u); + tbb::parallel_for( + tbb::blocked_range(HypernodeID(0), partitioned_hg.initialNumNodes()), + [&](tbb::blocked_range &r) { + vec &benefit_aggregator = ets_mtb.local(); + for(HypernodeID u = r.begin(); u < r.end(); ++u) + { + if(partitioned_hg.nodeIsEnabled(u)) + { + if(partitioned_hg.nodeDegree(u) <= HIGH_DEGREE_THRESHOLD) + { + initializeGainCacheEntryForNode(partitioned_hg, u, benefit_aggregator); + } + else + { + // Collect high degree vertices + high_degree_vertices.push_back(u); + } } } - } - }); + }); auto aggregate_contribution_of_he_for_node = - [&](const PartitionID block_of_u, - const HyperedgeID he, - HyperedgeWeight& penalty_aggregator, - vec& benefit_aggregator) { - const HypernodeID edge_size = partitioned_hg.edgeSize(he); - if ( edge_size > 1 && partitioned_hg.connectivity(he) <= 2 ) { - HyperedgeWeight edge_weight = partitioned_hg.edgeWeight(he); - if (partitioned_hg.pinCountInPart(he, block_of_u) == edge_size) { - penalty_aggregator += edge_weight; - } + [&](const PartitionID block_of_u, const HyperedgeID he, + HyperedgeWeight &penalty_aggregator, vec &benefit_aggregator) { + const HypernodeID edge_size = partitioned_hg.edgeSize(he); + if(edge_size > 1 && partitioned_hg.connectivity(he) <= 2) + { + HyperedgeWeight edge_weight = partitioned_hg.edgeWeight(he); + if(partitioned_hg.pinCountInPart(he, block_of_u) == edge_size) + { + penalty_aggregator += edge_weight; + } - for (const PartitionID block : partitioned_hg.connectivitySet(he)) { - if ( partitioned_hg.pinCountInPart(he, block) == edge_size - 1 ) { - benefit_aggregator[block] += edge_weight; + for(const PartitionID block : partitioned_hg.connectivitySet(he)) + { + if(partitioned_hg.pinCountInPart(he, block) == edge_size - 1) + { + benefit_aggregator[block] += edge_weight; + } + } } - } - } - }; + }; // Compute gain of all high degree vertices - for ( const HypernodeID& u : high_degree_vertices ) { + for(const HypernodeID &u : high_degree_vertices) + { tbb::enumerable_thread_specific ets_mfp(0); const PartitionID from = partitioned_hg.partID(u); const HypernodeID degree_of_u = partitioned_hg.nodeDegree(u); - tbb::parallel_for(tbb::blocked_range(ID(0), degree_of_u), - [&](tbb::blocked_range& r) { - vec& benefit_aggregator = ets_mtb.local(); - HyperedgeWeight& penalty_aggregator = ets_mfp.local(); - size_t current_pos = r.begin(); - for ( const HyperedgeID& he : partitioned_hg.incidentEdges(u, r.begin()) ) { - aggregate_contribution_of_he_for_node(from, he, - penalty_aggregator, benefit_aggregator); - ++current_pos; - if ( current_pos == r.end() ) { - break; - } - } - }); + tbb::parallel_for( + tbb::blocked_range(ID(0), degree_of_u), + [&](tbb::blocked_range &r) { + vec &benefit_aggregator = ets_mtb.local(); + HyperedgeWeight &penalty_aggregator = ets_mfp.local(); + size_t current_pos = r.begin(); + for(const HyperedgeID &he : partitioned_hg.incidentEdges(u, r.begin())) + { + aggregate_contribution_of_he_for_node(from, he, penalty_aggregator, + benefit_aggregator); + ++current_pos; + if(current_pos == r.end()) + { + break; + } + } + }); // Aggregate thread locals to compute overall gain of the high degree vertex const HyperedgeWeight penalty_term = ets_mfp.combine(std::plus()); _gain_cache[penalty_index(u)].store(penalty_term, std::memory_order_relaxed); - for (PartitionID p = 0; p < _k; ++p) { + for(PartitionID p = 0; p < _k; ++p) + { HyperedgeWeight move_to_benefit = 0; - for ( auto& l_move_to_benefit : ets_mtb ) { + for(auto &l_move_to_benefit : ets_mtb) + { move_to_benefit += l_move_to_benefit[p]; l_move_to_benefit[p] = 0; } @@ -118,102 +134,136 @@ void CutGainCache::initializeGainCache(const PartitionedHypergraph& partitioned_ _is_initialized = true; } -bool CutGainCache::triggersDeltaGainUpdate(const SynchronizedEdgeUpdate& sync_update) { +bool CutGainCache::triggersDeltaGainUpdate(const SynchronizedEdgeUpdate &sync_update) +{ return sync_update.pin_count_in_from_part_after == sync_update.edge_size - 1 || sync_update.pin_count_in_from_part_after == sync_update.edge_size - 2 || sync_update.pin_count_in_to_part_after == sync_update.edge_size || sync_update.pin_count_in_to_part_after == sync_update.edge_size - 1; } - -template -void CutGainCache::deltaGainUpdate(const PartitionedHypergraph& partitioned_hg, - const SynchronizedEdgeUpdate& sync_update) { +template +void CutGainCache::deltaGainUpdate(const PartitionedHypergraph &partitioned_hg, + const SynchronizedEdgeUpdate &sync_update) +{ ASSERT(_is_initialized, "Gain cache is not initialized"); const HypernodeID edge_size = sync_update.edge_size; - if ( edge_size > 1 ) { + if(edge_size > 1) + { const HyperedgeID he = sync_update.he; const PartitionID from = sync_update.from; const PartitionID to = sync_update.to; const HyperedgeWeight edge_weight = sync_update.edge_weight; - const HypernodeID pin_count_in_from_part_after = sync_update.pin_count_in_from_part_after; + const HypernodeID pin_count_in_from_part_after = + sync_update.pin_count_in_from_part_after; const HypernodeID pin_count_in_to_part_after = sync_update.pin_count_in_to_part_after; - if ( pin_count_in_from_part_after == edge_size - 1 ) { - for ( const HypernodeID& u : partitioned_hg.pins(he) ) { + if(pin_count_in_from_part_after == edge_size - 1) + { + for(const HypernodeID &u : partitioned_hg.pins(he)) + { ASSERT(nodeGainAssertions(u, from)); _gain_cache[penalty_index(u)].fetch_sub(edge_weight, std::memory_order_relaxed); - _gain_cache[benefit_index(u, from)].fetch_add(edge_weight, std::memory_order_relaxed); + _gain_cache[benefit_index(u, from)].fetch_add(edge_weight, + std::memory_order_relaxed); } - } else if ( pin_count_in_from_part_after == edge_size - 2 ) { - for ( const HypernodeID& u : partitioned_hg.pins(he) ) { + } + else if(pin_count_in_from_part_after == edge_size - 2) + { + for(const HypernodeID &u : partitioned_hg.pins(he)) + { ASSERT(nodeGainAssertions(u, from)); - _gain_cache[benefit_index(u, from)].fetch_sub(edge_weight, std::memory_order_relaxed); + _gain_cache[benefit_index(u, from)].fetch_sub(edge_weight, + std::memory_order_relaxed); } } - if ( pin_count_in_to_part_after == edge_size ) { - for ( const HypernodeID& u : partitioned_hg.pins(he) ) { + if(pin_count_in_to_part_after == edge_size) + { + for(const HypernodeID &u : partitioned_hg.pins(he)) + { ASSERT(nodeGainAssertions(u, to)); _gain_cache[penalty_index(u)].fetch_add(edge_weight, std::memory_order_relaxed); - _gain_cache[benefit_index(u, to)].fetch_sub(edge_weight, std::memory_order_relaxed); + _gain_cache[benefit_index(u, to)].fetch_sub(edge_weight, + std::memory_order_relaxed); } - } else if ( pin_count_in_to_part_after == edge_size - 1 ) { - for ( const HypernodeID& u : partitioned_hg.pins(he) ) { + } + else if(pin_count_in_to_part_after == edge_size - 1) + { + for(const HypernodeID &u : partitioned_hg.pins(he)) + { ASSERT(nodeGainAssertions(u, to)); - _gain_cache[benefit_index(u, to)].fetch_add(edge_weight, std::memory_order_relaxed); + _gain_cache[benefit_index(u, to)].fetch_add(edge_weight, + std::memory_order_relaxed); } } } } -template -void CutGainCache::uncontractUpdateAfterRestore(const PartitionedHypergraph& partitioned_hg, - const HypernodeID u, - const HypernodeID v, - const HyperedgeID he, - const HypernodeID pin_count_in_part_after) { - if ( _is_initialized ) { +template +void CutGainCache::uncontractUpdateAfterRestore( + const PartitionedHypergraph &partitioned_hg, const HypernodeID u, const HypernodeID v, + const HyperedgeID he, const HypernodeID pin_count_in_part_after) +{ + if(_is_initialized) + { const PartitionID block = partitioned_hg.partID(u); const HyperedgeWeight edge_weight = partitioned_hg.edgeWeight(he); const HypernodeID edge_size = partitioned_hg.edgeSize(he); - if ( partitioned_hg.connectivity(he) == 2 ) { - if ( pin_count_in_part_after == 2 ) { - // In this case, the hyperedge contains two blocks, while the other block V' (!= block) - // had |e| - 1 pins before the uncontraction of u and v. Now the size of the hyperedge - // increased by one while the block of u and v contains two pins (obviously) and the - // other block |e| - 2. Therefore, we have to subtract w(e) from b(u, V') for all pins - // in the hyperedge. + if(partitioned_hg.connectivity(he) == 2) + { + if(pin_count_in_part_after == 2) + { + // In this case, the hyperedge contains two blocks, while the other block V' (!= + // block) had |e| - 1 pins before the uncontraction of u and v. Now the size of + // the hyperedge increased by one while the block of u and v contains two pins + // (obviously) and the other block |e| - 2. Therefore, we have to subtract w(e) + // from b(u, V') for all pins in the hyperedge. PartitionID other_block = kInvalidPartition; - for ( const PartitionID other : partitioned_hg.connectivitySet(he) ) { - if ( other != block ) { + for(const PartitionID other : partitioned_hg.connectivitySet(he)) + { + if(other != block) + { other_block = other; break; } } - for ( const HypernodeID& pin : partitioned_hg.pins(he) ) { - if ( pin != v ) { - _gain_cache[benefit_index(pin, other_block)].fetch_sub(edge_weight, std::memory_order_relaxed); + for(const HypernodeID &pin : partitioned_hg.pins(he)) + { + if(pin != v) + { + _gain_cache[benefit_index(pin, other_block)].fetch_sub( + edge_weight, std::memory_order_relaxed); } } } - for ( const PartitionID to : partitioned_hg.connectivitySet(he) ) { - if ( partitioned_hg.pinCountInPart(he, to) == edge_size - 1 ) { - _gain_cache[benefit_index(v, to)].fetch_add(edge_weight, std::memory_order_relaxed); + for(const PartitionID to : partitioned_hg.connectivitySet(he)) + { + if(partitioned_hg.pinCountInPart(he, to) == edge_size - 1) + { + _gain_cache[benefit_index(v, to)].fetch_add(edge_weight, + std::memory_order_relaxed); } } - } else if ( pin_count_in_part_after == edge_size ) { + } + else if(pin_count_in_part_after == edge_size) + { // In this case, we have to add w(e) to the penalty term of v _gain_cache[penalty_index(v)].fetch_add(edge_weight, std::memory_order_relaxed); - if ( edge_size == 2 ) { - // Special case: Hyperedge is not a single-pin net anymore. Since we do not consider - // single-pin nets in the penalty terms, we have to add w(e) to the penalty term of u. - for ( const HypernodeID& pin : partitioned_hg.pins(he) ) { - if ( pin != v ) { + if(edge_size == 2) + { + // Special case: Hyperedge is not a single-pin net anymore. Since we do not + // consider single-pin nets in the penalty terms, we have to add w(e) to the + // penalty term of u. + for(const HypernodeID &pin : partitioned_hg.pins(he)) + { + if(pin != v) + { // Note that u may be replaced by another uncontraction. - _gain_cache[penalty_index(pin)].fetch_add(edge_weight, std::memory_order_relaxed); + _gain_cache[penalty_index(pin)].fetch_add(edge_weight, + std::memory_order_relaxed); } } } @@ -221,28 +271,36 @@ void CutGainCache::uncontractUpdateAfterRestore(const PartitionedHypergraph& par } } -template -void CutGainCache::uncontractUpdateAfterReplacement(const PartitionedHypergraph& partitioned_hg, - const HypernodeID u, - const HypernodeID v, - const HyperedgeID he) { - if ( _is_initialized ) { +template +void CutGainCache::uncontractUpdateAfterReplacement( + const PartitionedHypergraph &partitioned_hg, const HypernodeID u, const HypernodeID v, + const HyperedgeID he) +{ + if(_is_initialized) + { const HypernodeID edge_size = partitioned_hg.edgeSize(he); - if ( edge_size > 1 ) { + if(edge_size > 1) + { const PartitionID block = partitioned_hg.partID(u); const HyperedgeWeight edge_weight = partitioned_hg.edgeWeight(he); - if ( partitioned_hg.pinCountInPart(he, block) == edge_size ) { + if(partitioned_hg.pinCountInPart(he, block) == edge_size) + { // u is no longer part of the hyperedge => transfer penalty term to v _gain_cache[penalty_index(u)].fetch_sub(edge_weight, std::memory_order_relaxed); _gain_cache[penalty_index(v)].fetch_add(edge_weight, std::memory_order_relaxed); } - if ( partitioned_hg.connectivity(he) == 2 ) { - for ( const PartitionID to : partitioned_hg.connectivitySet(he) ) { - if ( partitioned_hg.pinCountInPart(he, to) == edge_size - 1 ) { + if(partitioned_hg.connectivity(he) == 2) + { + for(const PartitionID to : partitioned_hg.connectivitySet(he)) + { + if(partitioned_hg.pinCountInPart(he, to) == edge_size - 1) + { // u is no longer part of the hyperedge => transfer benefit term to v - _gain_cache[benefit_index(u, to)].fetch_sub(edge_weight, std::memory_order_relaxed); - _gain_cache[benefit_index(v, to)].fetch_add(edge_weight, std::memory_order_relaxed); + _gain_cache[benefit_index(u, to)].fetch_sub(edge_weight, + std::memory_order_relaxed); + _gain_cache[benefit_index(v, to)].fetch_add(edge_weight, + std::memory_order_relaxed); } } } @@ -250,21 +308,27 @@ void CutGainCache::uncontractUpdateAfterReplacement(const PartitionedHypergraph& } } -template -void CutGainCache::initializeGainCacheEntryForNode(const PartitionedHypergraph& partitioned_hg, - const HypernodeID u, - vec& benefit_aggregator) { +template +void CutGainCache::initializeGainCacheEntryForNode( + const PartitionedHypergraph &partitioned_hg, const HypernodeID u, + vec &benefit_aggregator) +{ PartitionID from = partitioned_hg.partID(u); Gain penalty = 0; - for (const HyperedgeID& e : partitioned_hg.incidentEdges(u)) { + for(const HyperedgeID &e : partitioned_hg.incidentEdges(u)) + { const HypernodeID edge_size = partitioned_hg.edgeSize(e); - if ( edge_size > 1 && partitioned_hg.connectivity(e) <= 2 ) { + if(edge_size > 1 && partitioned_hg.connectivity(e) <= 2) + { HyperedgeWeight ew = partitioned_hg.edgeWeight(e); - if ( partitioned_hg.pinCountInPart(e, from) == edge_size ) { + if(partitioned_hg.pinCountInPart(e, from) == edge_size) + { penalty += ew; } - for (const PartitionID& to : partitioned_hg.connectivitySet(e)) { - if ( partitioned_hg.pinCountInPart(e, to) == edge_size - 1 ) { + for(const PartitionID &to : partitioned_hg.connectivitySet(e)) + { + if(partitioned_hg.pinCountInPart(e, to) == edge_size - 1) + { benefit_aggregator[to] += ew; } } @@ -272,29 +336,28 @@ void CutGainCache::initializeGainCacheEntryForNode(const PartitionedHypergraph& } _gain_cache[penalty_index(u)].store(penalty, std::memory_order_relaxed); - for (PartitionID i = 0; i < _k; ++i) { - _gain_cache[benefit_index(u, i)].store(benefit_aggregator[i], std::memory_order_relaxed); + for(PartitionID i = 0; i < _k; ++i) + { + _gain_cache[benefit_index(u, i)].store(benefit_aggregator[i], + std::memory_order_relaxed); benefit_aggregator[i] = 0; } } - namespace { -#define CUT_INITIALIZE_GAIN_CACHE(X) void CutGainCache::initializeGainCache(const X&) -#define CUT_DELTA_GAIN_UPDATE(X) void CutGainCache::deltaGainUpdate(const X&, \ - const SynchronizedEdgeUpdate&) -#define CUT_RESTORE_UPDATE(X) void CutGainCache::uncontractUpdateAfterRestore(const X&, \ - const HypernodeID, \ - const HypernodeID, \ - const HyperedgeID, \ - const HypernodeID) -#define CUT_REPLACEMENT_UPDATE(X) void CutGainCache::uncontractUpdateAfterReplacement(const X&, \ - const HypernodeID, \ - const HypernodeID, \ - const HyperedgeID) -#define CUT_INIT_GAIN_CACHE_ENTRY(X) void CutGainCache::initializeGainCacheEntryForNode(const X&, \ - const HypernodeID, \ - vec&) +#define CUT_INITIALIZE_GAIN_CACHE(X) void CutGainCache::initializeGainCache(const X &) +#define CUT_DELTA_GAIN_UPDATE(X) \ + void CutGainCache::deltaGainUpdate(const X &, const SynchronizedEdgeUpdate &) +#define CUT_RESTORE_UPDATE(X) \ + void CutGainCache::uncontractUpdateAfterRestore(const X &, const HypernodeID, \ + const HypernodeID, const HyperedgeID, \ + const HypernodeID) +#define CUT_REPLACEMENT_UPDATE(X) \ + void CutGainCache::uncontractUpdateAfterReplacement( \ + const X &, const HypernodeID, const HypernodeID, const HyperedgeID) +#define CUT_INIT_GAIN_CACHE_ENTRY(X) \ + void CutGainCache::initializeGainCacheEntryForNode(const X &, const HypernodeID, \ + vec &) } INSTANTIATE_FUNC_WITH_PARTITIONED_HG(CUT_INITIALIZE_GAIN_CACHE) @@ -303,4 +366,4 @@ INSTANTIATE_FUNC_WITH_PARTITIONED_HG(CUT_RESTORE_UPDATE) INSTANTIATE_FUNC_WITH_PARTITIONED_HG(CUT_REPLACEMENT_UPDATE) INSTANTIATE_FUNC_WITH_PARTITIONED_HG(CUT_INIT_GAIN_CACHE_ENTRY) -} // namespace mt_kahypar +} // namespace mt_kahypar diff --git a/mt-kahypar/partition/refinement/gains/cut/cut_gain_cache.h b/mt-kahypar/partition/refinement/gains/cut/cut_gain_cache.h index 4b6bb5e47..d22a79e44 100644 --- a/mt-kahypar/partition/refinement/gains/cut/cut_gain_cache.h +++ b/mt-kahypar/partition/refinement/gains/cut/cut_gain_cache.h @@ -28,12 +28,12 @@ #include "kahypar-resources/meta/policy_registry.h" -#include "mt-kahypar/partition/context_enum_classes.h" -#include "mt-kahypar/datastructures/hypergraph_common.h" #include "mt-kahypar/datastructures/array.h" +#include "mt-kahypar/datastructures/hypergraph_common.h" #include "mt-kahypar/datastructures/sparse_map.h" -#include "mt-kahypar/parallel/atomic_wrapper.h" #include "mt-kahypar/macros.h" +#include "mt-kahypar/parallel/atomic_wrapper.h" +#include "mt-kahypar/partition/context_enum_classes.h" #include "mt-kahypar/utils/range.h" namespace mt_kahypar { @@ -44,77 +44,76 @@ namespace mt_kahypar { * For a weighted hypergraph H = (V,E,c,w), the cut metric is defined as follows * connectivity(H) := \sum_{e \in cut(E)} w(e). * - * The gain of moving a node u from its current block V_i to a target block V_j can be expressed as follows - * g(u, V_j) := g(u, V_j) := w({ e \in I(u) | pin_count(e, V_j) = |e| - 1 }) - w({ e \in I(u) | pin_count(e, V_i) = |e| }). - * Moving node u from V_i to V_j, removes all nets e \in I(u) from the cut where pin_cout(e, V_j) = |e| - 1, - * but makes it a cut hyperedge if pin_count(e, V_i) = |e|. + * The gain of moving a node u from its current block V_i to a target block V_j can be + * expressed as follows g(u, V_j) := g(u, V_j) := w({ e \in I(u) | pin_count(e, V_j) = |e| + * - 1 }) - w({ e \in I(u) | pin_count(e, V_i) = |e| }). Moving node u from V_i to V_j, + * removes all nets e \in I(u) from the cut where pin_cout(e, V_j) = |e| - 1, but makes it + * a cut hyperedge if pin_count(e, V_i) = |e|. * - * We call the first term in the equation the benefit term b(u, V_j) and the second the penalty term p(u). - * Our gain cache stores and maintains these entries for each node and block. - * Thus, the gain cache stores k + 1 entries per node. -*/ -class CutGainCache { + * We call the first term in the equation the benefit term b(u, V_j) and the second the + * penalty term p(u). Our gain cache stores and maintains these entries for each node and + * block. Thus, the gain cache stores k + 1 entries per node. + */ +class CutGainCache +{ static constexpr HyperedgeID HIGH_DEGREE_THRESHOLD = ID(100000); using AdjacentBlocksIterator = IntegerRangeIterator::const_iterator; - public: - +public: static constexpr GainPolicy TYPE = GainPolicy::cut; static constexpr bool requires_notification_before_update = false; static constexpr bool initializes_gain_cache_entry_after_batch_uncontractions = false; static constexpr bool invalidates_entries = true; CutGainCache() : - _is_initialized(false), - _k(kInvalidPartition), - _gain_cache(), - _dummy_adjacent_blocks() { } + _is_initialized(false), _k(kInvalidPartition), _gain_cache(), + _dummy_adjacent_blocks() + { + } - CutGainCache(const Context&) : - _is_initialized(false), - _k(kInvalidPartition), - _gain_cache(), - _dummy_adjacent_blocks() { } + CutGainCache(const Context &) : + _is_initialized(false), _k(kInvalidPartition), _gain_cache(), + _dummy_adjacent_blocks() + { + } - CutGainCache(const CutGainCache&) = delete; - CutGainCache & operator= (const CutGainCache &) = delete; + CutGainCache(const CutGainCache &) = delete; + CutGainCache &operator=(const CutGainCache &) = delete; - CutGainCache(CutGainCache&& other) = default; - CutGainCache & operator= (CutGainCache&& other) = default; + CutGainCache(CutGainCache &&other) = default; + CutGainCache &operator=(CutGainCache &&other) = default; // ####################### Initialization ####################### - bool isInitialized() const { - return _is_initialized; - } + bool isInitialized() const { return _is_initialized; } - void reset(const bool run_parallel = true) { + void reset(const bool run_parallel = true) + { unused(run_parallel); _is_initialized = false; } - size_t size() const { - return _gain_cache.size(); - } + size_t size() const { return _gain_cache.size(); } // ! Initializes all gain cache entries - template - void initializeGainCache(const PartitionedHypergraph& partitioned_hg); + template + void initializeGainCache(const PartitionedHypergraph &partitioned_hg); - template - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE - void initializeGainCacheEntryForNode(const PartitionedHypergraph&, - const HypernodeID&) { + template + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE void + initializeGainCacheEntryForNode(const PartitionedHypergraph &, const HypernodeID &) + { // Do nothing } - IteratorRange adjacentBlocks(const HypernodeID) const { + IteratorRange adjacentBlocks(const HypernodeID) const + { // We do not maintain the adjacent blocks of a node in this gain cache. // We therefore return an iterator over all blocks here - return IteratorRange( - _dummy_adjacent_blocks.cbegin(), _dummy_adjacent_blocks.cend()); + return IteratorRange(_dummy_adjacent_blocks.cbegin(), + _dummy_adjacent_blocks.cend()); } // ####################### Gain Computation ####################### @@ -123,25 +122,27 @@ class CutGainCache { // ! More formally, p(u) := (w(I(u)) - w({ e \in I(u) | pin_count(e, V_i) = |e| }) MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE HyperedgeWeight penaltyTerm(const HypernodeID u, - const PartitionID /* only relevant for graphs */) const { + const PartitionID /* only relevant for graphs */) const + { ASSERT(_is_initialized, "Gain cache is not initialized"); return _gain_cache[penalty_index(u)].load(std::memory_order_relaxed); } // ! Recomputes the penalty term entry in the gain cache - template - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE - void recomputeInvalidTerms(const PartitionedHypergraph& partitioned_hg, - const HypernodeID u) { + template + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE void + recomputeInvalidTerms(const PartitionedHypergraph &partitioned_hg, const HypernodeID u) + { ASSERT(_is_initialized, "Gain cache is not initialized"); - _gain_cache[penalty_index(u)].store(recomputePenaltyTerm( - partitioned_hg, u), std::memory_order_relaxed); + _gain_cache[penalty_index(u)].store(recomputePenaltyTerm(partitioned_hg, u), + std::memory_order_relaxed); } // ! Returns the benefit term for moving node u to block to. // ! More formally, b(u, V_j) := w({ e \in I(u) | pin_count(e, V_j) = |e| - 1 }) MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE - HyperedgeWeight benefitTerm(const HypernodeID u, const PartitionID to) const { + HyperedgeWeight benefitTerm(const HypernodeID u, const PartitionID to) const + { ASSERT(_is_initialized, "Gain cache is not initialized"); return _gain_cache[benefit_index(u, to)].load(std::memory_order_relaxed); } @@ -151,7 +152,8 @@ class CutGainCache { MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE HyperedgeWeight gain(const HypernodeID u, const PartitionID, /* only relevant for graphs */ - const PartitionID to ) const { + const PartitionID to) const + { ASSERT(_is_initialized, "Gain cache is not initialized"); return benefitTerm(u, to) - penaltyTerm(u, kInvalidPartition); } @@ -160,147 +162,159 @@ class CutGainCache { // ! This function returns true if the corresponding syncronized edge update triggers // ! a gain cache update. - static bool triggersDeltaGainUpdate(const SynchronizedEdgeUpdate& sync_update); + static bool triggersDeltaGainUpdate(const SynchronizedEdgeUpdate &sync_update); // ! The partitioned (hyper)graph call this function when its updates its internal // ! data structures before calling the delta gain update function. The partitioned // ! (hyper)graph holds a lock for the corresponding (hyper)edge when calling this // ! function. Thus, it is guaranteed that no other thread will modify the hyperedge. - template - void notifyBeforeDeltaGainUpdate(const PartitionedHypergraph&, const SynchronizedEdgeUpdate&) { + template + void notifyBeforeDeltaGainUpdate(const PartitionedHypergraph &, + const SynchronizedEdgeUpdate &) + { // Do nothing } // ! This functions implements the delta gain updates for the cut metric. // ! When moving a node from its current block from to a target block to, we iterate - // ! over its incident hyperedges and update their pin count values. After each pin count - // ! update, we call this function to update the gain cache to changes associated with - // ! corresponding hyperedge. - template - void deltaGainUpdate(const PartitionedHypergraph& partitioned_hg, - const SynchronizedEdgeUpdate& sync_update); + // ! over its incident hyperedges and update their pin count values. After each pin + // count ! update, we call this function to update the gain cache to changes associated + // with ! corresponding hyperedge. + template + void deltaGainUpdate(const PartitionedHypergraph &partitioned_hg, + const SynchronizedEdgeUpdate &sync_update); // ####################### Uncontraction ####################### - // ! This function implements the gain cache update after an uncontraction that restores node v in - // ! hyperedge he. After the uncontraction node u and v are contained in hyperedge he. - template - void uncontractUpdateAfterRestore(const PartitionedHypergraph& partitioned_hg, - const HypernodeID u, - const HypernodeID v, + // ! This function implements the gain cache update after an uncontraction that restores + // node v in ! hyperedge he. After the uncontraction node u and v are contained in + // hyperedge he. + template + void uncontractUpdateAfterRestore(const PartitionedHypergraph &partitioned_hg, + const HypernodeID u, const HypernodeID v, const HyperedgeID he, const HypernodeID pin_count_in_part_after); - // ! This function implements the gain cache update after an uncontraction that replaces u with v in - // ! hyperedge he. After the uncontraction only node v is contained in hyperedge he. - template - void uncontractUpdateAfterReplacement(const PartitionedHypergraph& partitioned_hg, - const HypernodeID u, - const HypernodeID v, + // ! This function implements the gain cache update after an uncontraction that replaces + // u with v in ! hyperedge he. After the uncontraction only node v is contained in + // hyperedge he. + template + void uncontractUpdateAfterReplacement(const PartitionedHypergraph &partitioned_hg, + const HypernodeID u, const HypernodeID v, const HyperedgeID he); - // ! This function is called after restoring a single-pin hyperedge. The function assumes that - // ! u is the only pin of the corresponding hyperedge, while block_of_u is its corresponding block ID. - void restoreSinglePinHyperedge(const HypernodeID, - const PartitionID, - const HyperedgeWeight) { + // ! This function is called after restoring a single-pin hyperedge. The function + // assumes that ! u is the only pin of the corresponding hyperedge, while block_of_u is + // its corresponding block ID. + void restoreSinglePinHyperedge(const HypernodeID, const PartitionID, + const HyperedgeWeight) + { // Do nothing here } - // ! This function is called after restoring a net that became identical to another due to a contraction. - template - void restoreIdenticalHyperedge(const PartitionedHypergraph&, - const HyperedgeID) { + // ! This function is called after restoring a net that became identical to another due + // to a contraction. + template + void restoreIdenticalHyperedge(const PartitionedHypergraph &, const HyperedgeID) + { // Do nothing } // ! Notifies the gain cache that all uncontractions of the current batch are completed. - void batchUncontractionsCompleted() { + void batchUncontractionsCompleted() + { // Do nothing } // ####################### Only for Testing ####################### - template - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE - HyperedgeWeight recomputePenaltyTerm(const PartitionedHypergraph& partitioned_hg, - const HypernodeID u) const { + template + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE HyperedgeWeight recomputePenaltyTerm( + const PartitionedHypergraph &partitioned_hg, const HypernodeID u) const + { ASSERT(_is_initialized, "Gain cache is not initialized"); const PartitionID block_of_u = partitioned_hg.partID(u); HyperedgeWeight penalty = 0; - for (HyperedgeID e : partitioned_hg.incidentEdges(u)) { + for(HyperedgeID e : partitioned_hg.incidentEdges(u)) + { const HypernodeID edge_size = partitioned_hg.edgeSize(e); - if ( partitioned_hg.pinCountInPart(e, block_of_u) == edge_size && edge_size > 1 ) { + if(partitioned_hg.pinCountInPart(e, block_of_u) == edge_size && edge_size > 1) + { penalty += partitioned_hg.edgeWeight(e); } } return penalty; } - template - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE - HyperedgeWeight recomputeBenefitTerm(const PartitionedHypergraph& partitioned_hg, - const HypernodeID u, - const PartitionID to) const { + template + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE HyperedgeWeight + recomputeBenefitTerm(const PartitionedHypergraph &partitioned_hg, const HypernodeID u, + const PartitionID to) const + { HyperedgeWeight benefit = 0; - for (HyperedgeID e : partitioned_hg.incidentEdges(u)) { + for(HyperedgeID e : partitioned_hg.incidentEdges(u)) + { const HypernodeID edge_size = partitioned_hg.edgeSize(e); - if (partitioned_hg.pinCountInPart(e, to) == edge_size - 1 && edge_size > 1) { + if(partitioned_hg.pinCountInPart(e, to) == edge_size - 1 && edge_size > 1) + { benefit += partitioned_hg.edgeWeight(e); } } return benefit; } - void changeNumberOfBlocks(const PartitionID new_k) { + void changeNumberOfBlocks(const PartitionID new_k) + { ASSERT(new_k <= _k); _dummy_adjacent_blocks = IntegerRangeIterator(new_k); } - template - bool verifyTrackedAdjacentBlocksOfNodes(const PartitionedHypergraph&) const { + template + bool verifyTrackedAdjacentBlocksOfNodes(const PartitionedHypergraph &) const + { // Gain cache does not track adjacent blocks of node return true; } - private: +private: friend class DeltaCutGainCache; MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE - size_t penalty_index(const HypernodeID u) const { - return size_t(u) * ( _k + 1 ); - } + size_t penalty_index(const HypernodeID u) const { return size_t(u) * (_k + 1); } MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE - size_t benefit_index(const HypernodeID u, const PartitionID p) const { - return size_t(u) * ( _k + 1 ) + p + 1; + size_t benefit_index(const HypernodeID u, const PartitionID p) const + { + return size_t(u) * (_k + 1) + p + 1; } // ! Allocates the memory required to store the gain cache - void allocateGainTable(const HypernodeID num_nodes, - const PartitionID k) { - if (_gain_cache.size() == 0 && k != kInvalidPartition) { + void allocateGainTable(const HypernodeID num_nodes, const PartitionID k) + { + if(_gain_cache.size() == 0 && k != kInvalidPartition) + { _k = k; _dummy_adjacent_blocks = IntegerRangeIterator(k); - _gain_cache.resize( - "Refinement", "gain_cache", num_nodes * size_t(_k + 1), true); + _gain_cache.resize("Refinement", "gain_cache", num_nodes * size_t(_k + 1), true); } } // ! Initializes the benefit and penalty terms for a node u - template - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE - void initializeGainCacheEntryForNode(const PartitionedHypergraph& partitioned_hg, - const HypernodeID u, - vec& benefit_aggregator); - - bool nodeGainAssertions(const HypernodeID u, const PartitionID p) const { - if ( p == kInvalidPartition || p >= _k ) { + template + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE void + initializeGainCacheEntryForNode(const PartitionedHypergraph &partitioned_hg, + const HypernodeID u, vec &benefit_aggregator); + + bool nodeGainAssertions(const HypernodeID u, const PartitionID p) const + { + if(p == kInvalidPartition || p >= _k) + { LOG << "Invalid block ID (Node" << u << "is part of block" << p << ", but valid block IDs must be in the range [ 0," << _k << "])"; return false; } - if ( benefit_index(u, p) >= _gain_cache.size() ) { + if(benefit_index(u, p) >= _gain_cache.size()) + { LOG << "Access to gain cache would result in an out-of-bounds access (" << "Benefit Index =" << benefit_index(u, p) << ", Gain Cache Size =" << _gain_cache.size() << ")"; @@ -309,146 +323,157 @@ class CutGainCache { return true; } - // ! Indicate whether or not the gain cache is initialized bool _is_initialized; // ! Number of blocks PartitionID _k; - // ! Array of size |V| * (k + 1), which stores the benefit and penalty terms of each node. - ds::Array< CAtomic > _gain_cache; + // ! Array of size |V| * (k + 1), which stores the benefit and penalty terms of each + // node. + ds::Array > _gain_cache; // ! Provides an iterator from 0 to k (:= number of blocks) IntegerRangeIterator _dummy_adjacent_blocks; }; /** - * In our FM algorithm, the different local searches perform nodes moves locally not visible for other - * threads. The delta gain cache stores these local changes relative to the shared - * gain cache. For example, the penalty term can be computed as follows - * p'(u) := p(u) + Δp(u) - * where p(u) is the penalty term stored in the shared gain cache and Δp(u) is the penalty term stored in - * the delta gain cache after performing some moves locally. To maintain Δp(u) and Δb(u,V_j), we use a hash - * table that only stores entries affected by a gain cache update. -*/ -class DeltaCutGainCache { + * In our FM algorithm, the different local searches perform nodes moves locally not + * visible for other threads. The delta gain cache stores these local changes relative to + * the shared gain cache. For example, the penalty term can be computed as follows p'(u) + * := p(u) + Δp(u) where p(u) is the penalty term stored in the shared gain cache and + * Δp(u) is the penalty term stored in the delta gain cache after performing some moves + * locally. To maintain Δp(u) and Δb(u,V_j), we use a hash table that only stores entries + * affected by a gain cache update. + */ +class DeltaCutGainCache +{ using AdjacentBlocksIterator = typename CutGainCache::AdjacentBlocksIterator; - public: +public: static constexpr bool requires_connectivity_set = false; - DeltaCutGainCache(const CutGainCache& gain_cache) : - _gain_cache(gain_cache), - _gain_cache_delta() { } + DeltaCutGainCache(const CutGainCache &gain_cache) : + _gain_cache(gain_cache), _gain_cache_delta() + { + } // ####################### Initialize & Reset ####################### - void initialize(const size_t size) { - _gain_cache_delta.initialize(size); - } + void initialize(const size_t size) { _gain_cache_delta.initialize(size); } - void clear() { - _gain_cache_delta.clear(); - } + void clear() { _gain_cache_delta.clear(); } - void dropMemory() { - _gain_cache_delta.freeInternalData(); - } + void dropMemory() { _gain_cache_delta.freeInternalData(); } - size_t size_in_bytes() const { - return _gain_cache_delta.size_in_bytes(); - } + size_t size_in_bytes() const { return _gain_cache_delta.size_in_bytes(); } // ####################### Gain Computation ####################### // ! Returns an iterator over the adjacent blocks of a node - IteratorRange adjacentBlocks(const HypernodeID hn) const { + IteratorRange adjacentBlocks(const HypernodeID hn) const + { return _gain_cache.adjacentBlocks(hn); } // ! Returns the penalty term of node u. // ! More formally, p(u) := (w(I(u)) - w({ e \in I(u) | pin_count(e, V_i) = |e| }) MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE - HyperedgeWeight penaltyTerm(const HypernodeID u, - const PartitionID from) const { - const HyperedgeWeight* penalty_delta = - _gain_cache_delta.get_if_contained(_gain_cache.penalty_index(u)); - return _gain_cache.penaltyTerm(u, from) + ( penalty_delta ? *penalty_delta : 0 ); + HyperedgeWeight penaltyTerm(const HypernodeID u, const PartitionID from) const + { + const HyperedgeWeight *penalty_delta = + _gain_cache_delta.get_if_contained(_gain_cache.penalty_index(u)); + return _gain_cache.penaltyTerm(u, from) + (penalty_delta ? *penalty_delta : 0); } // ! Returns the benefit term for moving node u to block to. // ! More formally, b(u, V_j) := w({ e \in I(u) | pin_count(e, V_j) = |e| - 1 }) MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE - HyperedgeWeight benefitTerm(const HypernodeID u, const PartitionID to) const { + HyperedgeWeight benefitTerm(const HypernodeID u, const PartitionID to) const + { ASSERT(to != kInvalidPartition && to < _gain_cache._k); - const HyperedgeWeight* benefit_delta = - _gain_cache_delta.get_if_contained(_gain_cache.benefit_index(u, to)); - return _gain_cache.benefitTerm(u, to) + ( benefit_delta ? *benefit_delta : 0 ); + const HyperedgeWeight *benefit_delta = + _gain_cache_delta.get_if_contained(_gain_cache.benefit_index(u, to)); + return _gain_cache.benefitTerm(u, to) + (benefit_delta ? *benefit_delta : 0); } // ! Returns the gain of moving node u from its current block to a target block V_j. // ! More formally, g(u, V_j) := b(u, V_j) - p(u). MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE - HyperedgeWeight gain(const HypernodeID u, - const PartitionID from, - const PartitionID to ) const { + HyperedgeWeight gain(const HypernodeID u, const PartitionID from, + const PartitionID to) const + { return benefitTerm(u, to) - penaltyTerm(u, from); } - // ####################### Delta Gain Update ####################### + // ####################### Delta Gain Update ####################### - template - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE - void deltaGainUpdate(const PartitionedHypergraph& partitioned_hg, - const SynchronizedEdgeUpdate& sync_update) { + template + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE void + deltaGainUpdate(const PartitionedHypergraph &partitioned_hg, + const SynchronizedEdgeUpdate &sync_update) + { const HypernodeID edge_size = sync_update.edge_size; - if ( edge_size > 1 ) { + if(edge_size > 1) + { const HyperedgeID he = sync_update.he; const PartitionID from = sync_update.from; const PartitionID to = sync_update.to; const HyperedgeWeight edge_weight = sync_update.edge_weight; - const HypernodeID pin_count_in_from_part_after = sync_update.pin_count_in_from_part_after; - const HypernodeID pin_count_in_to_part_after = sync_update.pin_count_in_to_part_after; - if ( pin_count_in_from_part_after == edge_size - 1 ) { - for ( const HypernodeID& u : partitioned_hg.pins(he) ) { + const HypernodeID pin_count_in_from_part_after = + sync_update.pin_count_in_from_part_after; + const HypernodeID pin_count_in_to_part_after = + sync_update.pin_count_in_to_part_after; + if(pin_count_in_from_part_after == edge_size - 1) + { + for(const HypernodeID &u : partitioned_hg.pins(he)) + { _gain_cache_delta[_gain_cache.penalty_index(u)] -= edge_weight; _gain_cache_delta[_gain_cache.benefit_index(u, from)] += edge_weight; } - } else if ( pin_count_in_from_part_after == edge_size - 2 ) { - for ( const HypernodeID& u : partitioned_hg.pins(he) ) { + } + else if(pin_count_in_from_part_after == edge_size - 2) + { + for(const HypernodeID &u : partitioned_hg.pins(he)) + { _gain_cache_delta[_gain_cache.benefit_index(u, from)] -= edge_weight; } } - if ( pin_count_in_to_part_after == edge_size ) { - for ( const HypernodeID& u : partitioned_hg.pins(he) ) { + if(pin_count_in_to_part_after == edge_size) + { + for(const HypernodeID &u : partitioned_hg.pins(he)) + { _gain_cache_delta[_gain_cache.penalty_index(u)] += edge_weight; _gain_cache_delta[_gain_cache.benefit_index(u, to)] -= edge_weight; } - } else if ( pin_count_in_to_part_after == edge_size - 1 ) { - for ( const HypernodeID& u : partitioned_hg.pins(he) ) { + } + else if(pin_count_in_to_part_after == edge_size - 1) + { + for(const HypernodeID &u : partitioned_hg.pins(he)) + { _gain_cache_delta[_gain_cache.benefit_index(u, to)] += edge_weight; } } } } - // ####################### Miscellaneous ####################### + // ####################### Miscellaneous ####################### - void memoryConsumption(utils::MemoryTreeNode* parent) const { + void memoryConsumption(utils::MemoryTreeNode *parent) const + { ASSERT(parent); - utils::MemoryTreeNode* gain_cache_delta_node = parent->addChild("Delta Gain Cache"); + utils::MemoryTreeNode *gain_cache_delta_node = parent->addChild("Delta Gain Cache"); gain_cache_delta_node->updateSize(size_in_bytes()); } - private: - const CutGainCache& _gain_cache; +private: + const CutGainCache &_gain_cache; // ! Stores the delta of each locally touched gain cache entry // ! relative to the gain cache in '_phg' ds::DynamicFlatMap _gain_cache_delta; }; -} // namespace mt_kahypar +} // namespace mt_kahypar diff --git a/mt-kahypar/partition/refinement/gains/cut/cut_gain_computation.h b/mt-kahypar/partition/refinement/gains/cut/cut_gain_computation.h index 7af5970d0..f0ffe76f3 100644 --- a/mt-kahypar/partition/refinement/gains/cut/cut_gain_computation.h +++ b/mt-kahypar/partition/refinement/gains/cut/cut_gain_computation.h @@ -30,52 +30,59 @@ #include "tbb/enumerable_thread_specific.h" -#include "mt-kahypar/partition/refinement/gains/gain_computation_base.h" -#include "mt-kahypar/partition/refinement/gains/cut/cut_attributed_gains.h" #include "mt-kahypar/datastructures/sparse_map.h" #include "mt-kahypar/parallel/stl/scalable_vector.h" +#include "mt-kahypar/partition/refinement/gains/cut/cut_attributed_gains.h" +#include "mt-kahypar/partition/refinement/gains/gain_computation_base.h" namespace mt_kahypar { -class CutGainComputation : public GainComputationBase { +class CutGainComputation + : public GainComputationBase +{ using Base = GainComputationBase; using RatingMap = typename Base::RatingMap; static constexpr bool enable_heavy_assert = false; - public: - CutGainComputation(const Context& context, - bool disable_randomization = false) : - Base(context, disable_randomization) { } +public: + CutGainComputation(const Context &context, bool disable_randomization = false) : + Base(context, disable_randomization) + { + } // ! Precomputes the gain to all adjacent blocks. // ! Conceptually, we compute the gain of moving the node to an non-adjacent block // ! and the gain to all adjacent blocks assuming the node is in an isolated block. // ! The gain of that node to a block to can then be computed by // ! 'isolated_block_gain - tmp_scores[to]' (see gain(...)) - template - void precomputeGains(const PartitionedHypergraph& phg, - const HypernodeID hn, - RatingMap& tmp_scores, - Gain& isolated_block_gain, - const bool) { + template + void precomputeGains(const PartitionedHypergraph &phg, const HypernodeID hn, + RatingMap &tmp_scores, Gain &isolated_block_gain, const bool) + { ASSERT(tmp_scores.size() == 0, "Rating map not empty"); PartitionID from = phg.partID(hn); - for (const HyperedgeID& he : phg.incidentEdges(hn)) { + for(const HyperedgeID &he : phg.incidentEdges(hn)) + { PartitionID connectivity = phg.connectivity(he); HypernodeID pin_count_in_from_part = phg.pinCountInPart(he, from); HyperedgeWeight weight = phg.edgeWeight(he); - if (connectivity == 1 && phg.edgeSize(he) > 1) { + if(connectivity == 1 && phg.edgeSize(he) > 1) + { // In case, the hyperedge is a non-cut hyperedge, we would increase // the cut, if we move vertex hn to an other block. isolated_block_gain += weight; - } else if (connectivity == 2 && pin_count_in_from_part == 1) { - for (const PartitionID& to : phg.connectivitySet(he)) { + } + else if(connectivity == 2 && pin_count_in_from_part == 1) + { + for(const PartitionID &to : phg.connectivitySet(he)) + { // In case there are only two blocks contained in the current // hyperedge and only one pin left in the from part of the hyperedge, // we would make the current hyperedge a non-cut hyperedge when moving // vertex hn to the other block. - if (from != to) { + if(from != to) + { tmp_scores[to] += weight; } } @@ -83,13 +90,14 @@ class CutGainComputation : public GainComputationBase in parallel (see global_rollback.h). - * Each node move m_i is of the form (u, V_i, V_j), which means that - * node u is moved from block V_i to block V_j. Each node in this sequence is moved at most once. - * Moreover, we assume that all node moves with an index < i are performed before m_i. + * In our FM algorithm, we recompute the gain values of all node moves in the global move + * sequence M := in parallel (see global_rollback.h). Each node move m_i + * is of the form (u, V_i, V_j), which means that node u is moved from block V_i to block + * V_j. Each node in this sequence is moved at most once. Moreover, we assume that all + * node moves with an index < i are performed before m_i. * - * The parallel gain recomputation algorithm iterates over all hyperedges e \in E in parallel. - * We then iterate over the pins of e and compute some auxilliary data based on + * The parallel gain recomputation algorithm iterates over all hyperedges e \in E in + * parallel. We then iterate over the pins of e and compute some auxilliary data based on * which we then decide if we attribute an increase or reduction by w(e) to a moved pin. - * This class implements the functions required by the rollback algorithm to recompute all gain values - * for the cut metric. -*/ -class CutRollback { + * This class implements the functions required by the rollback algorithm to recompute all + * gain values for the cut metric. + */ +class CutRollback +{ - /** - * This class stores for a hyperedge and block the correponding data required to - * recompute the gain values. It stores the move index of the pin that first moved out - * (first_in) resp. last moved into the corresponding block (last_out) and the number - * of moved pins that moved out of the block (moved_out). - */ - public: + /** + * This class stores for a hyperedge and block the correponding data required to + * recompute the gain values. It stores the move index of the pin that first moved out + * (first_in) resp. last moved into the corresponding block (last_out) and the number + * of moved pins that moved out of the block (moved_out). + */ +public: static constexpr bool supports_parallel_rollback = true; - struct RecalculationData { + struct RecalculationData + { MoveID first_out, last_in; HypernodeID moved_out; RecalculationData() : - first_out(std::numeric_limits::max()), - last_in(std::numeric_limits::min()), - moved_out(0) - { } + first_out(std::numeric_limits::max()), + last_in(std::numeric_limits::min()), moved_out(0) + { + } - void reset() { + void reset() + { first_out = std::numeric_limits::max(); last_in = std::numeric_limits::min(); moved_out = 0; @@ -72,52 +75,50 @@ class CutRollback { }; // Updates the auxilliary data for a node move m with index m_id. - static void updateMove(const MoveID m_id, - const Move& m, - vec& r) { + static void updateMove(const MoveID m_id, const Move &m, vec &r) + { r[m.from].first_out = std::min(r[m.from].first_out, m_id); r[m.to].last_in = std::max(r[m.to].last_in, m_id); ++r[m.from].moved_out; } - static void updateNonMovedPinInBlock(const PartitionID, - vec&) { + static void updateNonMovedPinInBlock(const PartitionID, vec &) + { // Do nothing here } - template - static HyperedgeWeight benefit(const PartitionedHypergraph& phg, - const HyperedgeID e, - const MoveID m_id, - const Move& m, - vec& r) { + template + static HyperedgeWeight benefit(const PartitionedHypergraph &phg, const HyperedgeID e, + const MoveID m_id, const Move &m, + vec &r) + { const HypernodeID edge_size = phg.edgeSize(e); - // If the hyperedge was potentially a non-cut edge at some point and m is the last node - // that moves into the corresponding block, while the first node that moves out of the corresponding - // block is performed strictly after m, then m removes e from the cut. + // If the hyperedge was potentially a non-cut edge at some point and m is the last + // node that moves into the corresponding block, while the first node that moves out + // of the corresponding block is performed strictly after m, then m removes e from the + // cut. const bool was_potentially_non_cut_edge_at_some_point = - phg.pinCountInPart(e, m.to) + r[m.to].moved_out == edge_size; + phg.pinCountInPart(e, m.to) + r[m.to].moved_out == edge_size; const bool has_benefit = was_potentially_non_cut_edge_at_some_point && - r[m.to].last_in == m_id && m_id < r[m.to].first_out; + r[m.to].last_in == m_id && m_id < r[m.to].first_out; return has_benefit * phg.edgeWeight(e); } - template - static HyperedgeWeight penalty(const PartitionedHypergraph& phg, - const HyperedgeID e, - const MoveID m_id, - const Move& m, - vec& r) { + template + static HyperedgeWeight penalty(const PartitionedHypergraph &phg, const HyperedgeID e, + const MoveID m_id, const Move &m, + vec &r) + { const HypernodeID edge_size = phg.edgeSize(e); - // If the hyperedge was potentially a non-cut edge at some point and m is the first node - // that moves out of the corresponding block, while the last node that moves into the corresponding - // block is performed strictly before m, then m makes e a cut edge. + // If the hyperedge was potentially a non-cut edge at some point and m is the first + // node that moves out of the corresponding block, while the last node that moves into + // the corresponding block is performed strictly before m, then m makes e a cut edge. const bool was_potentially_non_cut_edge_at_some_point = - phg.pinCountInPart(e, m.from) + r[m.from].moved_out == edge_size; + phg.pinCountInPart(e, m.from) + r[m.from].moved_out == edge_size; const bool has_penalty = was_potentially_non_cut_edge_at_some_point && - r[m.from].first_out == m_id && m_id > r[m.from].last_in; + r[m.from].first_out == m_id && m_id > r[m.from].last_in; return has_penalty * phg.edgeWeight(e); } }; -} // namespace mt_kahypar +} // namespace mt_kahypar diff --git a/mt-kahypar/partition/refinement/gains/cut_for_graphs/cut_gain_cache_for_graphs.cpp b/mt-kahypar/partition/refinement/gains/cut_for_graphs/cut_gain_cache_for_graphs.cpp index da1244a14..6c1f9c2cb 100644 --- a/mt-kahypar/partition/refinement/gains/cut_for_graphs/cut_gain_cache_for_graphs.cpp +++ b/mt-kahypar/partition/refinement/gains/cut_for_graphs/cut_gain_cache_for_graphs.cpp @@ -26,61 +26,70 @@ #include "mt-kahypar/partition/refinement/gains/cut_for_graphs/cut_gain_cache_for_graphs.h" -#include "tbb/parallel_for.h" -#include "tbb/enumerable_thread_specific.h" #include "tbb/concurrent_vector.h" +#include "tbb/enumerable_thread_specific.h" +#include "tbb/parallel_for.h" #include "mt-kahypar/definitions.h" namespace mt_kahypar { -template -void GraphCutGainCache::initializeGainCache(const PartitionedGraph& partitioned_graph) { +template +void GraphCutGainCache::initializeGainCache(const PartitionedGraph &partitioned_graph) +{ ASSERT(!_is_initialized, "Gain cache is already initialized"); ASSERT(_k <= 0 || _k >= partitioned_graph.k(), - "Gain cache was already initialized for a different k" << V(_k) << V(partitioned_graph.k())); + "Gain cache was already initialized for a different k" + << V(_k) << V(partitioned_graph.k())); allocateGainTable(partitioned_graph.topLevelNumNodes(), partitioned_graph.k()); // assert that current gain values are zero ASSERT(!_is_initialized && - std::none_of(_gain_cache.begin(), _gain_cache.end(), - [&](const auto& weight) { return weight.load() != 0; })); + std::none_of(_gain_cache.begin(), _gain_cache.end(), + [&](const auto &weight) { return weight.load() != 0; })); // Initialize gain cache partitioned_graph.doParallelForAllEdges([&](const HyperedgeID e) { const HypernodeID u = partitioned_graph.edgeSource(e); - if (partitioned_graph.nodeIsEnabled(u) && !partitioned_graph.isSinglePin(e)) { - size_t index = incident_weight_index(u, - partitioned_graph.partID(partitioned_graph.edgeTarget(e))); - _gain_cache[index].fetch_add(partitioned_graph.edgeWeight(e), std::memory_order_relaxed); + if(partitioned_graph.nodeIsEnabled(u) && !partitioned_graph.isSinglePin(e)) + { + size_t index = incident_weight_index( + u, partitioned_graph.partID(partitioned_graph.edgeTarget(e))); + _gain_cache[index].fetch_add(partitioned_graph.edgeWeight(e), + std::memory_order_relaxed); } }); _is_initialized = true; } -bool GraphCutGainCache::triggersDeltaGainUpdate(const SynchronizedEdgeUpdate& /* only relevant for hypergraphs */) { +bool GraphCutGainCache::triggersDeltaGainUpdate( + const SynchronizedEdgeUpdate & /* only relevant for hypergraphs */) +{ return true; } -template -void GraphCutGainCache::deltaGainUpdate(const PartitionedGraph& partitioned_graph, - const SynchronizedEdgeUpdate& sync_update) { +template +void GraphCutGainCache::deltaGainUpdate(const PartitionedGraph &partitioned_graph, + const SynchronizedEdgeUpdate &sync_update) +{ ASSERT(_is_initialized, "Gain cache is not initialized"); const HypernodeID target = partitioned_graph.edgeTarget(sync_update.he); const size_t index_in_from_part = incident_weight_index(target, sync_update.from); - _gain_cache[index_in_from_part].fetch_sub(sync_update.edge_weight, std::memory_order_relaxed); + _gain_cache[index_in_from_part].fetch_sub(sync_update.edge_weight, + std::memory_order_relaxed); const size_t index_in_to_part = incident_weight_index(target, sync_update.to); - _gain_cache[index_in_to_part].fetch_add(sync_update.edge_weight, std::memory_order_relaxed); + _gain_cache[index_in_to_part].fetch_add(sync_update.edge_weight, + std::memory_order_relaxed); } -template -void GraphCutGainCache::uncontractUpdateAfterRestore(const PartitionedGraph& partitioned_graph, - const HypernodeID u, - const HypernodeID v, - const HyperedgeID he, - const HypernodeID) { - if ( _is_initialized ) { +template +void GraphCutGainCache::uncontractUpdateAfterRestore( + const PartitionedGraph &partitioned_graph, const HypernodeID u, const HypernodeID v, + const HyperedgeID he, const HypernodeID) +{ + if(_is_initialized) + { // the edge weight is added to u and v const PartitionID block = partitioned_graph.partID(u); const HyperedgeWeight we = partitioned_graph.edgeWeight(he); @@ -89,35 +98,37 @@ void GraphCutGainCache::uncontractUpdateAfterRestore(const PartitionedGraph& par } } -template -MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE -void GraphCutGainCache::uncontractUpdateAfterReplacement(const PartitionedGraph& partitioned_graph, - const HypernodeID u, - const HypernodeID v, - const HyperedgeID he) { - if ( _is_initialized ) { +template +MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE void +GraphCutGainCache::uncontractUpdateAfterReplacement( + const PartitionedGraph &partitioned_graph, const HypernodeID u, const HypernodeID v, + const HyperedgeID he) +{ + if(_is_initialized) + { // the edge weight shifts from u to v const HypernodeID w = partitioned_graph.edgeTarget(he); const PartitionID block_of_w = partitioned_graph.partID(w); const HyperedgeWeight we = partitioned_graph.edgeWeight(he); - _gain_cache[incident_weight_index(u, block_of_w)].fetch_sub(we, std::memory_order_relaxed); - _gain_cache[incident_weight_index(v, block_of_w)].fetch_add(we, std::memory_order_relaxed); + _gain_cache[incident_weight_index(u, block_of_w)].fetch_sub( + we, std::memory_order_relaxed); + _gain_cache[incident_weight_index(v, block_of_w)].fetch_add( + we, std::memory_order_relaxed); } } namespace { -#define GRAPH_CUT_INITIALIZE_GAIN_CACHE(X) void GraphCutGainCache::initializeGainCache(const X&) -#define GRAPH_CUT_DELTA_GAIN_UPDATE(X) void GraphCutGainCache::deltaGainUpdate(const X&, \ - const SynchronizedEdgeUpdate&) -#define GRAPH_CUT_RESTORE_UPDATE(X) void GraphCutGainCache::uncontractUpdateAfterRestore(const X&, \ - const HypernodeID, \ - const HypernodeID, \ - const HyperedgeID, \ - const HypernodeID) -#define GRAPH_CUT_REPLACEMENT_UPDATE(X) void GraphCutGainCache::uncontractUpdateAfterReplacement(const X&, \ - const HypernodeID, \ - const HypernodeID, \ - const HyperedgeID) +#define GRAPH_CUT_INITIALIZE_GAIN_CACHE(X) \ + void GraphCutGainCache::initializeGainCache(const X &) +#define GRAPH_CUT_DELTA_GAIN_UPDATE(X) \ + void GraphCutGainCache::deltaGainUpdate(const X &, const SynchronizedEdgeUpdate &) +#define GRAPH_CUT_RESTORE_UPDATE(X) \ + void GraphCutGainCache::uncontractUpdateAfterRestore( \ + const X &, const HypernodeID, const HypernodeID, const HyperedgeID, \ + const HypernodeID) +#define GRAPH_CUT_REPLACEMENT_UPDATE(X) \ + void GraphCutGainCache::uncontractUpdateAfterReplacement( \ + const X &, const HypernodeID, const HypernodeID, const HyperedgeID) } INSTANTIATE_FUNC_WITH_PARTITIONED_HG(GRAPH_CUT_INITIALIZE_GAIN_CACHE) @@ -125,4 +136,4 @@ INSTANTIATE_FUNC_WITH_PARTITIONED_HG(GRAPH_CUT_DELTA_GAIN_UPDATE) INSTANTIATE_FUNC_WITH_PARTITIONED_HG(GRAPH_CUT_RESTORE_UPDATE) INSTANTIATE_FUNC_WITH_PARTITIONED_HG(GRAPH_CUT_REPLACEMENT_UPDATE) -} // namespace mt_kahypar +} // namespace mt_kahypar diff --git a/mt-kahypar/partition/refinement/gains/cut_for_graphs/cut_gain_cache_for_graphs.h b/mt-kahypar/partition/refinement/gains/cut_for_graphs/cut_gain_cache_for_graphs.h index d98ee8ff0..a638931e8 100644 --- a/mt-kahypar/partition/refinement/gains/cut_for_graphs/cut_gain_cache_for_graphs.h +++ b/mt-kahypar/partition/refinement/gains/cut_for_graphs/cut_gain_cache_for_graphs.h @@ -28,12 +28,12 @@ #include "kahypar-resources/meta/policy_registry.h" -#include "mt-kahypar/partition/context_enum_classes.h" -#include "mt-kahypar/datastructures/hypergraph_common.h" #include "mt-kahypar/datastructures/array.h" +#include "mt-kahypar/datastructures/hypergraph_common.h" #include "mt-kahypar/datastructures/sparse_map.h" -#include "mt-kahypar/parallel/atomic_wrapper.h" #include "mt-kahypar/macros.h" +#include "mt-kahypar/parallel/atomic_wrapper.h" +#include "mt-kahypar/partition/context_enum_classes.h" #include "mt-kahypar/utils/range.h" namespace mt_kahypar { @@ -42,22 +42,23 @@ namespace mt_kahypar { class DeltaGraphCutGainCache; /** - * The gain cache stores the gain values for all possible node moves for the cut metric on plain graphs. + * The gain cache stores the gain values for all possible node moves for the cut metric on + * plain graphs. * * For a weighted graph G = (V,E,c,w), the cut metric is defined as follows * connectivity(H) := \sum_{e \in cut(E)} w(e). * - * The gain of moving a node u from its current block V_i to a target block V_j can be expressed as follows - * g(u, V_j) := w(u, V_j) - w(u, V_i) = b(u, V_j) - p(u) - * where w(u, V') are the weight of all edges that connects node u to block V'. - * We call b(u, V_j) the benefit term and p(u) the penalty term. Our gain cache stores and maintains these + * The gain of moving a node u from its current block V_i to a target block V_j can be + * expressed as follows g(u, V_j) := w(u, V_j) - w(u, V_i) = b(u, V_j) - p(u) where w(u, + * V') are the weight of all edges that connects node u to block V'. We call b(u, V_j) the + * benefit term and p(u) the penalty term. Our gain cache stores and maintains these * entries for each node and block. Note that p(u) = b(u, V_i). * Thus, the gain cache stores k entries per node. -*/ -class GraphCutGainCache { - - public: + */ +class GraphCutGainCache +{ +public: static constexpr GainPolicy TYPE = GainPolicy::cut_for_graphs; static constexpr bool requires_notification_before_update = false; static constexpr bool initializes_gain_cache_entry_after_batch_uncontractions = false; @@ -66,56 +67,55 @@ class GraphCutGainCache { using AdjacentBlocksIterator = IntegerRangeIterator::const_iterator; GraphCutGainCache() : - _is_initialized(false), - _k(kInvalidPartition), - _gain_cache(), - _dummy_adjacent_blocks() { } + _is_initialized(false), _k(kInvalidPartition), _gain_cache(), + _dummy_adjacent_blocks() + { + } - GraphCutGainCache(const Context&) : - _is_initialized(false), - _k(kInvalidPartition), - _gain_cache(), - _dummy_adjacent_blocks() { } + GraphCutGainCache(const Context &) : + _is_initialized(false), _k(kInvalidPartition), _gain_cache(), + _dummy_adjacent_blocks() + { + } - GraphCutGainCache(const GraphCutGainCache&) = delete; - GraphCutGainCache & operator= (const GraphCutGainCache &) = delete; + GraphCutGainCache(const GraphCutGainCache &) = delete; + GraphCutGainCache &operator=(const GraphCutGainCache &) = delete; - GraphCutGainCache(GraphCutGainCache&& other) = default; - GraphCutGainCache & operator= (GraphCutGainCache&& other) = default; + GraphCutGainCache(GraphCutGainCache &&other) = default; + GraphCutGainCache &operator=(GraphCutGainCache &&other) = default; // ####################### Initialization ####################### - bool isInitialized() const { - return _is_initialized; - } + bool isInitialized() const { return _is_initialized; } - void reset(const bool run_parallel = true) { - if ( _is_initialized ) { - _gain_cache.assign(_gain_cache.size(), CAtomic(0), run_parallel); + void reset(const bool run_parallel = true) + { + if(_is_initialized) + { + _gain_cache.assign(_gain_cache.size(), CAtomic(0), run_parallel); } _is_initialized = false; } - size_t size() const { - return _gain_cache.size(); - } + size_t size() const { return _gain_cache.size(); } // ! Initializes all gain cache entries - template - void initializeGainCache(const PartitionedGraph& partitioned_graph); + template + void initializeGainCache(const PartitionedGraph &partitioned_graph); - template - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE - void initializeGainCacheEntryForNode(const PartitionedGraph&, - const HypernodeID&) { + template + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE void + initializeGainCacheEntryForNode(const PartitionedGraph &, const HypernodeID &) + { // Do nothing } - IteratorRange adjacentBlocks(const HypernodeID) const { + IteratorRange adjacentBlocks(const HypernodeID) const + { // We do not maintain the adjacent blocks of a node in this gain cache. // We therefore return an iterator over all blocks here - return IteratorRange( - _dummy_adjacent_blocks.cbegin(), _dummy_adjacent_blocks.cend()); + return IteratorRange(_dummy_adjacent_blocks.cbegin(), + _dummy_adjacent_blocks.cend()); } // ####################### Gain Computation ####################### @@ -123,23 +123,24 @@ class GraphCutGainCache { // ! Returns the penalty term of node u. // ! More formally, p(u) := w(u, partID(u)) MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE - HyperedgeWeight penaltyTerm(const HypernodeID u, - const PartitionID from) const { + HyperedgeWeight penaltyTerm(const HypernodeID u, const PartitionID from) const + { ASSERT(_is_initialized, "Gain cache is not initialized"); return _gain_cache[incident_weight_index(u, from)].load(std::memory_order_relaxed); } - template - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE - void recomputeInvalidTerms(const PartitionedGraph&, - const HypernodeID) { + template + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE void recomputeInvalidTerms(const PartitionedGraph &, + const HypernodeID) + { // Do nothing here (only relevant for hypergraph gain cache) } // ! Returns the benefit term for moving node u to block to. // ! More formally, b(u, V_j) := w(u, V_j) MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE - HyperedgeWeight benefitTerm(const HypernodeID u, const PartitionID to) const { + HyperedgeWeight benefitTerm(const HypernodeID u, const PartitionID to) const + { ASSERT(_is_initialized, "Gain cache is not initialized"); return _gain_cache[incident_weight_index(u, to)].load(std::memory_order_relaxed); } @@ -147,7 +148,9 @@ class GraphCutGainCache { // ! Returns the gain of moving node u from its current block to a target block V_j. // ! More formally, g(u, V_j) := b(u, V_j) - p(u). MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE - HyperedgeWeight gain(const HypernodeID u, const PartitionID from, const PartitionID to) const { + HyperedgeWeight gain(const HypernodeID u, const PartitionID from, + const PartitionID to) const + { ASSERT(_is_initialized, "Gain cache is not initialized"); return benefitTerm(u, to) - penaltyTerm(u, from); } @@ -156,121 +159,133 @@ class GraphCutGainCache { // ! This function returns true if the corresponding syncronized edge update triggers // ! a gain cache update. - static bool triggersDeltaGainUpdate(const SynchronizedEdgeUpdate& sync_update); + static bool triggersDeltaGainUpdate(const SynchronizedEdgeUpdate &sync_update); // ! The partitioned (hyper)graph call this function when its updates its internal // ! data structures before calling the delta gain update function. The partitioned // ! (hyper)graph holds a lock for the corresponding (hyper)edge when calling this // ! function. Thus, it is guaranteed that no other thread will modify the hyperedge. - template - void notifyBeforeDeltaGainUpdate(const PartitionedHypergraph&, const SynchronizedEdgeUpdate&) { + template + void notifyBeforeDeltaGainUpdate(const PartitionedHypergraph &, + const SynchronizedEdgeUpdate &) + { // Do nothing } - // ! This functions implements the delta gain updates for the cut metric on plain graphs. - // ! When moving a node from its current block from to a target block to, we iterate - // ! over its incident edges and syncronize the move on each edge. After syncronization, - // ! we call this function to update the gain cache to changes associated with - // ! corresponding edge. - template - void deltaGainUpdate(const PartitionedGraph& partitioned_graph, - const SynchronizedEdgeUpdate& sync_update); + // ! This functions implements the delta gain updates for the cut metric on plain + // graphs. ! When moving a node from its current block from to a target block to, we + // iterate ! over its incident edges and syncronize the move on each edge. After + // syncronization, ! we call this function to update the gain cache to changes + // associated with ! corresponding edge. + template + void deltaGainUpdate(const PartitionedGraph &partitioned_graph, + const SynchronizedEdgeUpdate &sync_update); // ####################### Uncontraction ####################### - // ! This function implements the gain cache update after an uncontraction that restores node v in - // ! an edge he. After the uncontraction the corresponding edge turns from a selfloop to a regular edge. - template - void uncontractUpdateAfterRestore(const PartitionedGraph& partitioned_graph, - const HypernodeID u, - const HypernodeID v, + // ! This function implements the gain cache update after an uncontraction that restores + // node v in ! an edge he. After the uncontraction the corresponding edge turns from a + // selfloop to a regular edge. + template + void uncontractUpdateAfterRestore(const PartitionedGraph &partitioned_graph, + const HypernodeID u, const HypernodeID v, const HyperedgeID he, const HypernodeID pin_count_in_part_after); - // ! This function implements the gain cache update after an uncontraction that replaces u with v in - // ! an edge he. After the uncontraction only node v is part of edge he. - template - void uncontractUpdateAfterReplacement(const PartitionedGraph& partitioned_graph, - const HypernodeID u, - const HypernodeID v, + // ! This function implements the gain cache update after an uncontraction that replaces + // u with v in ! an edge he. After the uncontraction only node v is part of edge he. + template + void uncontractUpdateAfterReplacement(const PartitionedGraph &partitioned_graph, + const HypernodeID u, const HypernodeID v, const HyperedgeID he); - void restoreSinglePinHyperedge(const HypernodeID, - const PartitionID, - const HyperedgeWeight) { + void restoreSinglePinHyperedge(const HypernodeID, const PartitionID, + const HyperedgeWeight) + { // Do nothing here (only relevant for hypergraph gain cache) } - // ! This function is called after restoring a net that became identical to another due to a contraction. - template - void restoreIdenticalHyperedge(const PartitionedHypergraph&, - const HyperedgeID) { + // ! This function is called after restoring a net that became identical to another due + // to a contraction. + template + void restoreIdenticalHyperedge(const PartitionedHypergraph &, const HyperedgeID) + { // Do nothing } // ! Notifies the gain cache that all uncontractions of the current batch are completed. - void batchUncontractionsCompleted() { + void batchUncontractionsCompleted() + { // Do nothing } // ####################### Only for Testing ####################### - template - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE - HyperedgeWeight recomputePenaltyTerm(const PartitionedGraph& partitioned_graph, - const HypernodeID u) const { + template + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE HyperedgeWeight recomputePenaltyTerm( + const PartitionedGraph &partitioned_graph, const HypernodeID u) const + { PartitionID block_of_u = partitioned_graph.partID(u); HyperedgeWeight penalty = 0; - for (HyperedgeID e : partitioned_graph.incidentEdges(u)) { - if (!partitioned_graph.isSinglePin(e) && - partitioned_graph.partID(partitioned_graph.edgeTarget(e)) == block_of_u) { + for(HyperedgeID e : partitioned_graph.incidentEdges(u)) + { + if(!partitioned_graph.isSinglePin(e) && + partitioned_graph.partID(partitioned_graph.edgeTarget(e)) == block_of_u) + { penalty += partitioned_graph.edgeWeight(e); } } return penalty; } - template - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE - HyperedgeWeight recomputeBenefitTerm(const PartitionedGraph& partitioned_graph, - const HypernodeID u, - const PartitionID to) const { + template + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE HyperedgeWeight + recomputeBenefitTerm(const PartitionedGraph &partitioned_graph, const HypernodeID u, + const PartitionID to) const + { HyperedgeWeight benefit = 0; - for (HyperedgeID e : partitioned_graph.incidentEdges(u)) { - if (!partitioned_graph.isSinglePin(e) && - partitioned_graph.partID(partitioned_graph.edgeTarget(e)) == to) { + for(HyperedgeID e : partitioned_graph.incidentEdges(u)) + { + if(!partitioned_graph.isSinglePin(e) && + partitioned_graph.partID(partitioned_graph.edgeTarget(e)) == to) + { benefit += partitioned_graph.edgeWeight(e); } } return benefit; } - void changeNumberOfBlocks(const PartitionID new_k) { + void changeNumberOfBlocks(const PartitionID new_k) + { ASSERT(new_k <= _k); _dummy_adjacent_blocks = IntegerRangeIterator(new_k); } - template - bool verifyTrackedAdjacentBlocksOfNodes(const PartitionedHypergraph&) const { + template + bool verifyTrackedAdjacentBlocksOfNodes(const PartitionedHypergraph &) const + { // Gain cache does not track adjacent blocks of nodes return true; } - private: +private: friend class DeltaGraphCutGainCache; MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE - size_t incident_weight_index(const HypernodeID u, const PartitionID p) const { - return size_t(u) * _k + p; + size_t incident_weight_index(const HypernodeID u, const PartitionID p) const + { + return size_t(u) * _k + p; } // ! Allocates the memory required to store the gain cache - void allocateGainTable(const HypernodeID num_nodes, - const PartitionID k) { - if (_gain_cache.size() == 0 && k != kInvalidPartition) { + void allocateGainTable(const HypernodeID num_nodes, const PartitionID k) + { + if(_gain_cache.size() == 0 && k != kInvalidPartition) + { _k = k; _dummy_adjacent_blocks = IntegerRangeIterator(k); - _gain_cache.resize("Refinement", "incident_weight_in_part", num_nodes * size_t(_k), true); + _gain_cache.resize("Refinement", "incident_weight_in_part", num_nodes * size_t(_k), + true); } } @@ -281,115 +296,114 @@ class GraphCutGainCache { PartitionID _k; // ! Array of size |V| * k, which stores the benefit and penalty terms of each node. - ds::Array< CAtomic > _gain_cache; + ds::Array > _gain_cache; // ! Provides an iterator from 0 to k (:= number of blocks) IntegerRangeIterator _dummy_adjacent_blocks; }; /** - * In our FM algorithm, the different local searches perform nodes moves locally not visible for other - * threads. The delta gain cache stores these local changes relative to the shared - * gain cache. For example, the penalty term can be computed as follows - * p'(u) := p(u) + Δp(u) - * where p(u) is the penalty term stored in the shared gain cache and Δp(u) is the penalty term stored in - * the delta gain cache after performing some moves locally. To maintain Δp(u) and Δb(u,V_j), we use a hash - * table that only stores entries affected by a gain cache update. -*/ -class DeltaGraphCutGainCache { + * In our FM algorithm, the different local searches perform nodes moves locally not + * visible for other threads. The delta gain cache stores these local changes relative to + * the shared gain cache. For example, the penalty term can be computed as follows p'(u) + * := p(u) + Δp(u) where p(u) is the penalty term stored in the shared gain cache and + * Δp(u) is the penalty term stored in the delta gain cache after performing some moves + * locally. To maintain Δp(u) and Δb(u,V_j), we use a hash table that only stores entries + * affected by a gain cache update. + */ +class DeltaGraphCutGainCache +{ using AdjacentBlocksIterator = typename GraphCutGainCache::AdjacentBlocksIterator; - public: +public: static constexpr bool requires_connectivity_set = false; - DeltaGraphCutGainCache(const GraphCutGainCache& gain_cache) : - _gain_cache(gain_cache), - _incident_weight_in_part_delta() { } + DeltaGraphCutGainCache(const GraphCutGainCache &gain_cache) : + _gain_cache(gain_cache), _incident_weight_in_part_delta() + { + } // ####################### Initialize & Reset ####################### - void initialize(const size_t size) { - _incident_weight_in_part_delta.initialize(size); - } + void initialize(const size_t size) { _incident_weight_in_part_delta.initialize(size); } - void clear() { - _incident_weight_in_part_delta.clear(); - } + void clear() { _incident_weight_in_part_delta.clear(); } - void dropMemory() { - _incident_weight_in_part_delta.freeInternalData(); - } + void dropMemory() { _incident_weight_in_part_delta.freeInternalData(); } - size_t size_in_bytes() const { - return _incident_weight_in_part_delta.size_in_bytes(); - } + size_t size_in_bytes() const { return _incident_weight_in_part_delta.size_in_bytes(); } // ####################### Gain Computation ####################### // ! Returns an iterator over the adjacent blocks of a node - IteratorRange adjacentBlocks(const HypernodeID hn) const { + IteratorRange adjacentBlocks(const HypernodeID hn) const + { return _gain_cache.adjacentBlocks(hn); } // ! Returns the penalty term of node u. // ! More formally, p(u) := w(u, partID(u)) MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE - HyperedgeWeight penaltyTerm(const HypernodeID u, - const PartitionID from) const { - const HyperedgeWeight* penalty_delta = - _incident_weight_in_part_delta.get_if_contained( - _gain_cache.incident_weight_index(u, from)); + HyperedgeWeight penaltyTerm(const HypernodeID u, const PartitionID from) const + { + const HyperedgeWeight *penalty_delta = + _incident_weight_in_part_delta.get_if_contained( + _gain_cache.incident_weight_index(u, from)); return _gain_cache.penaltyTerm(u, from) + (penalty_delta ? *penalty_delta : 0); } // ! Returns the benefit term for moving node u to block to. // ! More formally, b(u, V_j) := w(u, V_j) MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE - HyperedgeWeight benefitTerm(const HypernodeID u, const PartitionID to) const { - const HyperedgeWeight* benefit_delta = - _incident_weight_in_part_delta.get_if_contained( - _gain_cache.incident_weight_index(u, to)); + HyperedgeWeight benefitTerm(const HypernodeID u, const PartitionID to) const + { + const HyperedgeWeight *benefit_delta = + _incident_weight_in_part_delta.get_if_contained( + _gain_cache.incident_weight_index(u, to)); return _gain_cache.benefitTerm(u, to) + (benefit_delta ? *benefit_delta : 0); } // ! Returns the gain of moving node u from its current block to a target block V_j. // ! More formally, g(u, V_j) := b(u, V_j) - p(u). MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE - HyperedgeWeight gain(const HypernodeID u, - const PartitionID from, - const PartitionID to ) const { + HyperedgeWeight gain(const HypernodeID u, const PartitionID from, + const PartitionID to) const + { return benefitTerm(u, to) - penaltyTerm(u, from); } - // ####################### Delta Gain Update ####################### + // ####################### Delta Gain Update ####################### - template - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE - void deltaGainUpdate(const PartitionedGraph& partitioned_graph, - const SynchronizedEdgeUpdate& sync_update) { + template + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE void + deltaGainUpdate(const PartitionedGraph &partitioned_graph, + const SynchronizedEdgeUpdate &sync_update) + { const HypernodeID target = partitioned_graph.edgeTarget(sync_update.he); - const size_t index_in_from_part = _gain_cache.incident_weight_index(target, sync_update.from); + const size_t index_in_from_part = + _gain_cache.incident_weight_index(target, sync_update.from); _incident_weight_in_part_delta[index_in_from_part] -= sync_update.edge_weight; - const size_t index_in_to_part = _gain_cache.incident_weight_index(target, sync_update.to); + const size_t index_in_to_part = + _gain_cache.incident_weight_index(target, sync_update.to); _incident_weight_in_part_delta[index_in_to_part] += sync_update.edge_weight; } + // ####################### Miscellaneous ####################### - // ####################### Miscellaneous ####################### - - void memoryConsumption(utils::MemoryTreeNode* parent) const { + void memoryConsumption(utils::MemoryTreeNode *parent) const + { ASSERT(parent); - utils::MemoryTreeNode* gain_cache_delta_node = parent->addChild("Delta Gain Cache"); + utils::MemoryTreeNode *gain_cache_delta_node = parent->addChild("Delta Gain Cache"); gain_cache_delta_node->updateSize(size_in_bytes()); } - private: - const GraphCutGainCache& _gain_cache; +private: + const GraphCutGainCache &_gain_cache; // ! Stores the delta of each locally touched gain cache entry // ! relative to the gain cache in '_phg' ds::DynamicFlatMap _incident_weight_in_part_delta; }; -} // namespace mt_kahypar +} // namespace mt_kahypar diff --git a/mt-kahypar/partition/refinement/gains/gain_cache_ptr.h b/mt-kahypar/partition/refinement/gains/gain_cache_ptr.h index 49b046f0f..057421280 100644 --- a/mt-kahypar/partition/refinement/gains/gain_cache_ptr.h +++ b/mt-kahypar/partition/refinement/gains/gain_cache_ptr.h @@ -28,13 +28,13 @@ #include "kahypar-resources/meta/typelist.h" +#include "mt-kahypar/datastructures/hypergraph_common.h" #include "mt-kahypar/definitions.h" #include "mt-kahypar/macros.h" #include "mt-kahypar/partition/context_enum_classes.h" -#include "mt-kahypar/datastructures/hypergraph_common.h" +#include "mt-kahypar/partition/refinement/gains/cut/cut_gain_cache.h" #include "mt-kahypar/partition/refinement/gains/gain_definitions.h" #include "mt-kahypar/partition/refinement/gains/km1/km1_gain_cache.h" -#include "mt-kahypar/partition/refinement/gains/cut/cut_gain_cache.h" #include "mt-kahypar/partition/refinement/gains/soed/soed_gain_cache.h" #ifdef KAHYPAR_ENABLE_STEINER_TREE_METRIC #include "mt-kahypar/partition/refinement/gains/steiner_tree/steiner_tree_gain_cache.h" @@ -51,166 +51,203 @@ namespace mt_kahypar { struct gain_cache_s; -typedef struct { - gain_cache_s* gain_cache; +typedef struct +{ + gain_cache_s *gain_cache; GainPolicy type; } gain_cache_t; -class GainCachePtr { +class GainCachePtr +{ + +public: + template + static auto applyWithConcreteGainCache(F function, gain_cache_t gain_cache) + { + switch(gain_cache.type) + { + case GainPolicy::cut: + return function(cast(gain_cache)); + case GainPolicy::km1: + return function(cast(gain_cache)); + case GainPolicy::soed: +#ifdef KAHYPAR_ENABLE_SOED_METRIC + return function(cast(gain_cache)); +#endif + case GainPolicy::steiner_tree: +#ifdef KAHYPAR_ENABLE_STEINER_TREE_METRIC + return function(cast(gain_cache)); +#endif + case GainPolicy::cut_for_graphs: +#ifdef KAHYPAR_ENABLE_GRAPH_PARTITIONING_FEATURES + return function(cast(gain_cache)); +#endif + case GainPolicy::steiner_tree_for_graphs: +#ifdef KAHYPAR_ENABLE_STEINER_TREE_METRIC +#ifdef KAHYPAR_ENABLE_GRAPH_PARTITIONING_FEATURES + return function(cast(gain_cache)); +#endif +#endif + case GainPolicy::none: + break; + } + ERR("No gain policy set"); + } - public: - template - static auto applyWithConcreteGainCache(F function, gain_cache_t gain_cache) { - switch(gain_cache.type) { + template + static auto applyWithConcreteGainCacheForHG(F function, gain_cache_t gain_cache) + { + if constexpr(Hypergraph::is_graph) + { + switch(gain_cache.type) + { +#ifdef KAHYPAR_ENABLE_GRAPH_PARTITIONING_FEATURES + case GainPolicy::cut_for_graphs: + return function(cast(gain_cache)); +#ifdef KAHYPAR_ENABLE_STEINER_TREE_METRIC + case GainPolicy::steiner_tree_for_graphs: + return function(cast(gain_cache)); +#endif +#endif + default: + break; + } + } + else + { + switch(gain_cache.type) + { case GainPolicy::cut: return function(cast(gain_cache)); case GainPolicy::km1: return function(cast(gain_cache)); +#ifdef KAHYPAR_ENABLE_SOED_METRIC case GainPolicy::soed: - #ifdef KAHYPAR_ENABLE_SOED_METRIC return function(cast(gain_cache)); - #endif +#endif +#ifdef KAHYPAR_ENABLE_STEINER_TREE_METRIC case GainPolicy::steiner_tree: - #ifdef KAHYPAR_ENABLE_STEINER_TREE_METRIC return function(cast(gain_cache)); - #endif - case GainPolicy::cut_for_graphs: - #ifdef KAHYPAR_ENABLE_GRAPH_PARTITIONING_FEATURES - return function(cast(gain_cache)); - #endif - case GainPolicy::steiner_tree_for_graphs: - #ifdef KAHYPAR_ENABLE_STEINER_TREE_METRIC - #ifdef KAHYPAR_ENABLE_GRAPH_PARTITIONING_FEATURES - return function(cast(gain_cache)); - #endif - #endif - case GainPolicy::none: break; - } - ERR("No gain policy set"); - } - - template - static auto applyWithConcreteGainCacheForHG(F function, gain_cache_t gain_cache) { - if constexpr (Hypergraph::is_graph) { - switch(gain_cache.type) { - #ifdef KAHYPAR_ENABLE_GRAPH_PARTITIONING_FEATURES - case GainPolicy::cut_for_graphs: - return function(cast(gain_cache)); - #ifdef KAHYPAR_ENABLE_STEINER_TREE_METRIC - case GainPolicy::steiner_tree_for_graphs: - return function(cast(gain_cache)); - #endif - #endif - default: break; - } - } else { - switch(gain_cache.type) { - case GainPolicy::cut: - return function(cast(gain_cache)); - case GainPolicy::km1: - return function(cast(gain_cache)); - #ifdef KAHYPAR_ENABLE_SOED_METRIC - case GainPolicy::soed: - return function(cast(gain_cache)); - #endif - #ifdef KAHYPAR_ENABLE_STEINER_TREE_METRIC - case GainPolicy::steiner_tree: - return function(cast(gain_cache)); - #endif - default: break; +#endif + default: + break; } } ERR("No gain policy set"); } - static gain_cache_t constructGainCache(const Context& context) { - switch(context.partition.gain_policy) { - case GainPolicy::cut: return constructGainCache(context); - case GainPolicy::km1: return constructGainCache(context); - #ifdef KAHYPAR_ENABLE_SOED_METRIC - case GainPolicy::soed: return constructGainCache(context); - #endif - #ifdef KAHYPAR_ENABLE_STEINER_TREE_METRIC - case GainPolicy::steiner_tree: return constructGainCache(context); - #endif - ENABLE_GRAPHS(case GainPolicy::cut_for_graphs: return constructGainCache(context);) - #ifdef KAHYPAR_ENABLE_STEINER_TREE_METRIC - ENABLE_GRAPHS(case GainPolicy::steiner_tree_for_graphs: return constructGainCache(context);) - #endif - case GainPolicy::none: - throw InvalidParameterException("No gain policy set"); - default: break; + static gain_cache_t constructGainCache(const Context &context) + { + switch(context.partition.gain_policy) + { + case GainPolicy::cut: + return constructGainCache(context); + case GainPolicy::km1: + return constructGainCache(context); +#ifdef KAHYPAR_ENABLE_SOED_METRIC + case GainPolicy::soed: + return constructGainCache(context); +#endif +#ifdef KAHYPAR_ENABLE_STEINER_TREE_METRIC + case GainPolicy::steiner_tree: + return constructGainCache(context); +#endif + ENABLE_GRAPHS(case GainPolicy::cut_for_graphs + : return constructGainCache(context);) +#ifdef KAHYPAR_ENABLE_STEINER_TREE_METRIC + ENABLE_GRAPHS(case GainPolicy::steiner_tree_for_graphs + : return constructGainCache(context);) +#endif + case GainPolicy::none: + throw InvalidParameterException("No gain policy set"); + default: + break; } - return gain_cache_t { nullptr, GainPolicy::none }; + return gain_cache_t{ nullptr, GainPolicy::none }; } - static void deleteGainCache(gain_cache_t gain_cache) { - if (gain_cache.type != GainPolicy::none) { - applyWithConcreteGainCache([&](auto& gc) { delete &gc; }, gain_cache); + static void deleteGainCache(gain_cache_t gain_cache) + { + if(gain_cache.type != GainPolicy::none) + { + applyWithConcreteGainCache([&](auto &gc) { delete &gc; }, gain_cache); } } - template - static void initializeGainCache(const PartitionedHypergraph& partitioned_hg, - gain_cache_t gain_cache) { - if (gain_cache.type != GainPolicy::none) { - applyWithConcreteGainCacheForHG([&](auto& gc) { - gc.initializeGainCache(partitioned_hg); - }, gain_cache); + template + static void initializeGainCache(const PartitionedHypergraph &partitioned_hg, + gain_cache_t gain_cache) + { + if(gain_cache.type != GainPolicy::none) + { + applyWithConcreteGainCacheForHG( + [&](auto &gc) { gc.initializeGainCache(partitioned_hg); }, gain_cache); } } - static void resetGainCache(gain_cache_t gain_cache) { - if (gain_cache.type != GainPolicy::none) { - applyWithConcreteGainCache([&](auto& gc) { gc.reset(); }, gain_cache); + static void resetGainCache(gain_cache_t gain_cache) + { + if(gain_cache.type != GainPolicy::none) + { + applyWithConcreteGainCache([&](auto &gc) { gc.reset(); }, gain_cache); } } - template - static void uncontract(PartitionedHypergraph& partitioned_hg, - const Batch& batch, - gain_cache_t gain_cache) { - if (gain_cache.type != GainPolicy::none) { - applyWithConcreteGainCacheForHG([&](auto& gc) { - partitioned_hg.uncontract(batch, gc); - }, gain_cache); + template + static void uncontract(PartitionedHypergraph &partitioned_hg, const Batch &batch, + gain_cache_t gain_cache) + { + if(gain_cache.type != GainPolicy::none) + { + applyWithConcreteGainCacheForHG( + [&](auto &gc) { partitioned_hg.uncontract(batch, gc); }, gain_cache); } } - template - static void restoreSinglePinAndParallelNets(PartitionedHypergraph& partitioned_hg, - const vec& hes_to_restore, - gain_cache_t gain_cache) { - if (gain_cache.type != GainPolicy::none) { - applyWithConcreteGainCacheForHG([&](auto& gc) { - partitioned_hg.restoreSinglePinAndParallelNets(hes_to_restore, gc); - }, gain_cache); + template + static void + restoreSinglePinAndParallelNets(PartitionedHypergraph &partitioned_hg, + const vec &hes_to_restore, + gain_cache_t gain_cache) + { + if(gain_cache.type != GainPolicy::none) + { + applyWithConcreteGainCacheForHG( + [&](auto &gc) { + partitioned_hg.restoreSinglePinAndParallelNets(hes_to_restore, gc); + }, + gain_cache); } } - template - static bool checkTrackedPartitionInformation(PartitionedHypergraph& partitioned_hg, - gain_cache_t gain_cache) { - return applyWithConcreteGainCacheForHG([&](auto& gc) { - return partitioned_hg.checkTrackedPartitionInformation(gc); - }, gain_cache); + template + static bool checkTrackedPartitionInformation(PartitionedHypergraph &partitioned_hg, + gain_cache_t gain_cache) + { + return applyWithConcreteGainCacheForHG( + [&](auto &gc) { return partitioned_hg.checkTrackedPartitionInformation(gc); }, + gain_cache); } - template - static GainCache& cast(gain_cache_t gain_cache) { - if ( gain_cache.type != GainCache::TYPE ) { + template + static GainCache &cast(gain_cache_t gain_cache) + { + if(gain_cache.type != GainCache::TYPE) + { std::stringstream ss; ss << "Cannot cast" << gain_cache.type << "to" << GainCache::TYPE; throw InvalidInputException(ss.str()); } - return *reinterpret_cast(gain_cache.gain_cache); + return *reinterpret_cast(gain_cache.gain_cache); } - private: - template - static gain_cache_t constructGainCache(const Context& context) { - return gain_cache_t { reinterpret_cast(new GainCache(context)), GainCache::TYPE }; +private: + template + static gain_cache_t constructGainCache(const Context &context) + { + return gain_cache_t{ reinterpret_cast(new GainCache(context)), + GainCache::TYPE }; } }; -} // namespace mt_kahypar +} // namespace mt_kahypar diff --git a/mt-kahypar/partition/refinement/gains/gain_computation_base.h b/mt-kahypar/partition/refinement/gains/gain_computation_base.h index dd7833630..be3644333 100644 --- a/mt-kahypar/partition/refinement/gains/gain_computation_base.h +++ b/mt-kahypar/partition/refinement/gains/gain_computation_base.h @@ -32,87 +32,94 @@ #include "tbb/enumerable_thread_specific.h" -#include "mt-kahypar/partition/metrics.h" #include "mt-kahypar/partition/context.h" +#include "mt-kahypar/partition/metrics.h" #include "mt-kahypar/utils/randomize.h" namespace mt_kahypar { -template -class GainComputationBase { +template +class GainComputationBase +{ using DeltaGain = tbb::enumerable_thread_specific; - public: +public: using RatingMap = ds::SparseMap; using TmpScores = tbb::enumerable_thread_specific; - GainComputationBase(const Context& context, - const bool disable_randomization) : - _context(context), - _disable_randomization(disable_randomization), - _deltas(0), - _tmp_scores([&] { - return constructLocalTmpScores(); - }) { } - - template - Move computeMaxGainMove(const PartitionedHypergraph& phg, - const HypernodeID hn, + GainComputationBase(const Context &context, const bool disable_randomization) : + _context(context), _disable_randomization(disable_randomization), _deltas(0), + _tmp_scores([&] { return constructLocalTmpScores(); }) + { + } + + template + Move computeMaxGainMove(const PartitionedHypergraph &phg, const HypernodeID hn, const bool rebalance = false, const bool consider_non_adjacent_blocks = false, - const bool allow_imbalance = false) { - Derived* derived = static_cast(this); - RatingMap& tmp_scores = _tmp_scores.local(); + const bool allow_imbalance = false) + { + Derived *derived = static_cast(this); + RatingMap &tmp_scores = _tmp_scores.local(); Gain isolated_block_gain = 0; - derived->precomputeGains(phg, hn, tmp_scores, isolated_block_gain, consider_non_adjacent_blocks); + derived->precomputeGains(phg, hn, tmp_scores, isolated_block_gain, + consider_non_adjacent_blocks); PartitionID from = phg.partID(hn); - Move best_move { from, from, hn, rebalance ? std::numeric_limits::max() : 0 }; + Move best_move{ from, from, hn, rebalance ? std::numeric_limits::max() : 0 }; HypernodeWeight hn_weight = phg.nodeWeight(hn); int cpu_id = THREAD_ID; - utils::Randomize& rand = utils::Randomize::instance(); - auto test_and_apply = [&](const PartitionID to, - const Gain score, + utils::Randomize &rand = utils::Randomize::instance(); + auto test_and_apply = [&](const PartitionID to, const Gain score, const bool no_tie_breaking = false) { bool new_best_gain = (score < best_move.gain) || - (score == best_move.gain && - !_disable_randomization && + (score == best_move.gain && !_disable_randomization && (no_tie_breaking || rand.flipCoin(cpu_id))); - if (new_best_gain && (allow_imbalance || phg.partWeight(to) + hn_weight <= - _context.partition.max_part_weights[to])) { + if(new_best_gain && + (allow_imbalance || + phg.partWeight(to) + hn_weight <= _context.partition.max_part_weights[to])) + { best_move.to = to; best_move.gain = score; return true; - } else { + } + else + { return false; } }; - for ( const auto& entry : tmp_scores ) { + for(const auto &entry : tmp_scores) + { const PartitionID to = entry.key; - if (from != to) { + if(from != to) + { const Gain score = derived->gain(entry.value, isolated_block_gain); test_and_apply(to, score); } } - if ( consider_non_adjacent_blocks && best_move.to == from ) { + if(consider_non_adjacent_blocks && best_move.to == from) + { // This is important for our rebalancer as the last fallback strategy vec non_adjacent_block; - for ( PartitionID to = 0; to < _context.partition.k; ++to ) { - if ( from != to && !tmp_scores.contains(to) ) { + for(PartitionID to = 0; to < _context.partition.k; ++to) + { + if(from != to && !tmp_scores.contains(to)) + { // This block is not adjacent to the current node - if ( test_and_apply(to, isolated_block_gain, true /* no tie breaking */ ) ) { + if(test_and_apply(to, isolated_block_gain, true /* no tie breaking */)) + { non_adjacent_block.push_back(to); } } } - if ( non_adjacent_block.size() > 0 ) { + if(non_adjacent_block.size() > 0) + { // Choose one at random - const PartitionID to = non_adjacent_block[ - rand.getRandomInt(0, static_cast(non_adjacent_block.size() - 1), cpu_id)]; + const PartitionID to = non_adjacent_block[rand.getRandomInt( + 0, static_cast(non_adjacent_block.size() - 1), cpu_id)]; best_move.to = to; best_move.gain = isolated_block_gain; } @@ -122,53 +129,57 @@ class GainComputationBase { return best_move; } - inline void computeDeltaForHyperedge(const SynchronizedEdgeUpdate& sync_update) { + inline void computeDeltaForHyperedge(const SynchronizedEdgeUpdate &sync_update) + { _deltas.local() += AttributedGains::gain(sync_update); } // ! Returns the delta in the objective function for all moves // ! performed by the calling thread relative to the last call // ! reset() - Gain localDelta() { - return _deltas.local(); - } + Gain localDelta() { return _deltas.local(); } // ! Returns the overall delta of all moves performed by // ! all threads relative to the last call of reset() - Gain delta() const { + Gain delta() const + { Gain overall_delta = 0; - for (const Gain& delta : _deltas) { + for(const Gain &delta : _deltas) + { overall_delta += delta; } return overall_delta; } - void reset() { - for (Gain& delta : _deltas) { + void reset() + { + for(Gain &delta : _deltas) + { delta = 0; } } - void changeNumberOfBlocks(const PartitionID new_k) { + void changeNumberOfBlocks(const PartitionID new_k) + { ASSERT(new_k == _context.partition.k); - for ( auto& tmp_score : _tmp_scores ) { - if ( static_cast(new_k) > tmp_score.size() ) { + for(auto &tmp_score : _tmp_scores) + { + if(static_cast(new_k) > tmp_score.size()) + { tmp_score = RatingMap(new_k); } } - static_cast(this)->changeNumberOfBlocksImpl(new_k); + static_cast(this)->changeNumberOfBlocksImpl(new_k); } private: - RatingMap constructLocalTmpScores() const { - return RatingMap(_context.partition.k); - } + RatingMap constructLocalTmpScores() const { return RatingMap(_context.partition.k); } - protected: - const Context& _context; +protected: + const Context &_context; const bool _disable_randomization; DeltaGain _deltas; TmpScores _tmp_scores; }; -} // namespace mt_kahypar +} // namespace mt_kahypar diff --git a/mt-kahypar/partition/refinement/gains/gain_definitions.h b/mt-kahypar/partition/refinement/gains/gain_definitions.h index a0c1c2066..d35e9e568 100644 --- a/mt-kahypar/partition/refinement/gains/gain_definitions.h +++ b/mt-kahypar/partition/refinement/gains/gain_definitions.h @@ -26,51 +26,52 @@ #pragma once -#include "kahypar-resources/meta/typelist.h" #include "kahypar-resources/meta/policy_registry.h" +#include "kahypar-resources/meta/typelist.h" +#include "mt-kahypar/datastructures/hypergraph_common.h" #include "mt-kahypar/definitions.h" #include "mt-kahypar/partition/context_enum_classes.h" -#include "mt-kahypar/datastructures/hypergraph_common.h" -#include "mt-kahypar/partition/refinement/gains/km1/km1_gain_cache.h" -#include "mt-kahypar/partition/refinement/gains/km1/km1_rollback.h" -#include "mt-kahypar/partition/refinement/gains/km1/km1_gain_computation.h" -#include "mt-kahypar/partition/refinement/gains/km1/km1_attributed_gains.h" -#include "mt-kahypar/partition/refinement/gains/km1/km1_flow_network_construction.h" -#include "mt-kahypar/partition/refinement/gains/cut/cut_gain_cache.h" -#include "mt-kahypar/partition/refinement/gains/cut/cut_rollback.h" -#include "mt-kahypar/partition/refinement/gains/cut/cut_gain_computation.h" #include "mt-kahypar/partition/refinement/gains/cut/cut_attributed_gains.h" #include "mt-kahypar/partition/refinement/gains/cut/cut_flow_network_construction.h" +#include "mt-kahypar/partition/refinement/gains/cut/cut_gain_cache.h" +#include "mt-kahypar/partition/refinement/gains/cut/cut_gain_computation.h" +#include "mt-kahypar/partition/refinement/gains/cut/cut_rollback.h" +#include "mt-kahypar/partition/refinement/gains/km1/km1_attributed_gains.h" +#include "mt-kahypar/partition/refinement/gains/km1/km1_flow_network_construction.h" +#include "mt-kahypar/partition/refinement/gains/km1/km1_gain_cache.h" +#include "mt-kahypar/partition/refinement/gains/km1/km1_gain_computation.h" +#include "mt-kahypar/partition/refinement/gains/km1/km1_rollback.h" #ifdef KAHYPAR_ENABLE_SOED_METRIC #include "mt-kahypar/partition/refinement/gains/soed/soed_attributed_gains.h" -#include "mt-kahypar/partition/refinement/gains/soed/soed_gain_computation.h" +#include "mt-kahypar/partition/refinement/gains/soed/soed_flow_network_construction.h" #include "mt-kahypar/partition/refinement/gains/soed/soed_gain_cache.h" +#include "mt-kahypar/partition/refinement/gains/soed/soed_gain_computation.h" #include "mt-kahypar/partition/refinement/gains/soed/soed_rollback.h" -#include "mt-kahypar/partition/refinement/gains/soed/soed_flow_network_construction.h" #endif #ifdef KAHYPAR_ENABLE_STEINER_TREE_METRIC #include "mt-kahypar/partition/refinement/gains/steiner_tree/steiner_tree_attributed_gains.h" -#include "mt-kahypar/partition/refinement/gains/steiner_tree/steiner_tree_gain_computation.h" +#include "mt-kahypar/partition/refinement/gains/steiner_tree/steiner_tree_flow_network_construction.h" #include "mt-kahypar/partition/refinement/gains/steiner_tree/steiner_tree_gain_cache.h" +#include "mt-kahypar/partition/refinement/gains/steiner_tree/steiner_tree_gain_computation.h" #include "mt-kahypar/partition/refinement/gains/steiner_tree/steiner_tree_rollback.h" -#include "mt-kahypar/partition/refinement/gains/steiner_tree/steiner_tree_flow_network_construction.h" #endif #ifdef KAHYPAR_ENABLE_GRAPH_PARTITIONING_FEATURES #ifdef KAHYPAR_ENABLE_STEINER_TREE_METRIC #include "mt-kahypar/partition/refinement/gains/steiner_tree_for_graphs/steiner_tree_attributed_gains_for_graphs.h" -#include "mt-kahypar/partition/refinement/gains/steiner_tree_for_graphs/steiner_tree_gain_computation_for_graphs.h" -#include "mt-kahypar/partition/refinement/gains/steiner_tree_for_graphs/steiner_tree_gain_cache_for_graphs.h" #include "mt-kahypar/partition/refinement/gains/steiner_tree_for_graphs/steiner_tree_flow_network_construction_for_graphs.h" +#include "mt-kahypar/partition/refinement/gains/steiner_tree_for_graphs/steiner_tree_gain_cache_for_graphs.h" +#include "mt-kahypar/partition/refinement/gains/steiner_tree_for_graphs/steiner_tree_gain_computation_for_graphs.h" #endif -#include "mt-kahypar/partition/refinement/gains/cut_for_graphs/cut_gain_cache_for_graphs.h" #include "mt-kahypar/partition/refinement/gains/cut_for_graphs/cut_attributed_gains_for_graphs.h" +#include "mt-kahypar/partition/refinement/gains/cut_for_graphs/cut_gain_cache_for_graphs.h" #endif #include "mt-kahypar/macros.h" namespace mt_kahypar { -struct Km1GainTypes : public kahypar::meta::PolicyBase { +struct Km1GainTypes : public kahypar::meta::PolicyBase +{ using GainComputation = Km1GainComputation; using AttributedGains = Km1AttributedGains; using GainCache = Km1GainCache; @@ -79,7 +80,8 @@ struct Km1GainTypes : public kahypar::meta::PolicyBase { using FlowNetworkConstruction = Km1FlowNetworkConstruction; }; -struct CutGainTypes : public kahypar::meta::PolicyBase { +struct CutGainTypes : public kahypar::meta::PolicyBase +{ using GainComputation = CutGainComputation; using AttributedGains = CutAttributedGains; using GainCache = CutGainCache; @@ -89,7 +91,8 @@ struct CutGainTypes : public kahypar::meta::PolicyBase { }; #ifdef KAHYPAR_ENABLE_SOED_METRIC -struct SoedGainTypes : public kahypar::meta::PolicyBase { +struct SoedGainTypes : public kahypar::meta::PolicyBase +{ using GainComputation = SoedGainComputation; using AttributedGains = SoedAttributedGains; using GainCache = SoedGainCache; @@ -100,7 +103,8 @@ struct SoedGainTypes : public kahypar::meta::PolicyBase { #endif #ifdef KAHYPAR_ENABLE_STEINER_TREE_METRIC -struct SteinerTreeGainTypes : public kahypar::meta::PolicyBase { +struct SteinerTreeGainTypes : public kahypar::meta::PolicyBase +{ using GainComputation = SteinerTreeGainComputation; using AttributedGains = SteinerTreeAttributedGains; using GainCache = SteinerTreeGainCache; @@ -111,7 +115,8 @@ struct SteinerTreeGainTypes : public kahypar::meta::PolicyBase { #endif #ifdef KAHYPAR_ENABLE_GRAPH_PARTITIONING_FEATURES -struct CutGainForGraphsTypes : public kahypar::meta::PolicyBase { +struct CutGainForGraphsTypes : public kahypar::meta::PolicyBase +{ using GainComputation = CutGainComputation; using AttributedGains = GraphCutAttributedGains; using GainCache = GraphCutGainCache; @@ -121,7 +126,8 @@ struct CutGainForGraphsTypes : public kahypar::meta::PolicyBase { }; #ifdef KAHYPAR_ENABLE_STEINER_TREE_METRIC -struct SteinerTreeForGraphsTypes : public kahypar::meta::PolicyBase { +struct SteinerTreeForGraphsTypes : public kahypar::meta::PolicyBase +{ using GainComputation = GraphSteinerTreeGainComputation; using AttributedGains = GraphSteinerTreeAttributedGains; using GainCache = GraphSteinerTreeGainCache; @@ -132,8 +138,9 @@ struct SteinerTreeForGraphsTypes : public kahypar::meta::PolicyBase { #endif #endif -template -struct GraphAndGainTypes : public kahypar::meta::PolicyBase { +template +struct GraphAndGainTypes : public kahypar::meta::PolicyBase +{ using TypeTraits = TypeTraitsT; using GainTypes = GainTypesT; @@ -148,79 +155,94 @@ struct GraphAndGainTypes : public kahypar::meta::PolicyBase { using FlowNetworkConstruction = typename GainTypes::FlowNetworkConstruction; }; - -using GainTypes = kahypar::meta::Typelist; - -#define _LIST_HYPERGRAPH_COMBINATIONS(TYPE_TRAITS) \ - GraphAndGainTypes, \ - GraphAndGainTypes \ - ENABLE_SOED(COMMA GraphAndGainTypes) \ - ENABLE_STEINER_TREE(COMMA GraphAndGainTypes) - -#define _LIST_GRAPH_COMBINATIONS(TYPE_TRAITS) \ - GraphAndGainTypes \ - ENABLE_STEINER_TREE(COMMA GraphAndGainTypes) - -using GraphAndGainTypesList = kahypar::meta::Typelist<_LIST_HYPERGRAPH_COMBINATIONS(StaticHypergraphTypeTraits) - ENABLE_GRAPHS(COMMA _LIST_GRAPH_COMBINATIONS(StaticGraphTypeTraits)) - ENABLE_HIGHEST_QUALITY(COMMA _LIST_HYPERGRAPH_COMBINATIONS(DynamicHypergraphTypeTraits)) - ENABLE_HIGHEST_QUALITY_FOR_GRAPHS(COMMA _LIST_GRAPH_COMBINATIONS(DynamicGraphTypeTraits)) - ENABLE_LARGE_K(COMMA _LIST_HYPERGRAPH_COMBINATIONS(LargeKHypergraphTypeTraits))>; - - -#define _INSTANTIATE_CLASS_MACRO_FOR_HYPERGRAPH_COMBINATIONS(C, TYPE_TRAITS) \ - template class C(GraphAndGainTypes); \ - template class C(GraphAndGainTypes); \ - ENABLE_SOED(template class C(GraphAndGainTypes);) \ - ENABLE_STEINER_TREE(template class C(GraphAndGainTypes);) - -#define _INSTANTIATE_CLASS_MACRO_FOR_GRAPH_COMBINATIONS(C, TYPE_TRAITS) \ - template class C(GraphAndGainTypes); \ - ENABLE_STEINER_TREE(template class C(GraphAndGainTypes);) - - -#define INSTANTIATE_CLASS_WITH_VALID_TRAITS(C) \ - _INSTANTIATE_CLASS_MACRO_FOR_HYPERGRAPH_COMBINATIONS(C, StaticHypergraphTypeTraits) \ - ENABLE_GRAPHS(_INSTANTIATE_CLASS_MACRO_FOR_GRAPH_COMBINATIONS(C, StaticGraphTypeTraits)) \ - ENABLE_HIGHEST_QUALITY(_INSTANTIATE_CLASS_MACRO_FOR_HYPERGRAPH_COMBINATIONS(C, DynamicHypergraphTypeTraits)) \ - ENABLE_HIGHEST_QUALITY_FOR_GRAPHS(_INSTANTIATE_CLASS_MACRO_FOR_GRAPH_COMBINATIONS(C, DynamicGraphTypeTraits)) \ - ENABLE_LARGE_K(_INSTANTIATE_CLASS_MACRO_FOR_HYPERGRAPH_COMBINATIONS(C, LargeKHypergraphTypeTraits)) - +using GainTypes = kahypar::meta::Typelist< + Km1GainTypes, + CutGainTypes ENABLE_SOED(COMMA SoedGainTypes) ENABLE_STEINER_TREE( + COMMA SteinerTreeGainTypes) ENABLE_GRAPHS(COMMA CutGainForGraphsTypes) + ENABLE_GRAPHS(ENABLE_STEINER_TREE(COMMA SteinerTreeForGraphsTypes))>; + +#define _LIST_HYPERGRAPH_COMBINATIONS(TYPE_TRAITS) \ + GraphAndGainTypes, \ + GraphAndGainTypes ENABLE_SOED( \ + COMMA GraphAndGainTypes) \ + ENABLE_STEINER_TREE( \ + COMMA GraphAndGainTypes) + +#define _LIST_GRAPH_COMBINATIONS(TYPE_TRAITS) \ + GraphAndGainTypes ENABLE_STEINER_TREE( \ + COMMA GraphAndGainTypes) + +using GraphAndGainTypesList = kahypar::meta::Typelist< + _LIST_HYPERGRAPH_COMBINATIONS(StaticHypergraphTypeTraits) + ENABLE_GRAPHS(COMMA _LIST_GRAPH_COMBINATIONS(StaticGraphTypeTraits)) + ENABLE_HIGHEST_QUALITY( + COMMA _LIST_HYPERGRAPH_COMBINATIONS(DynamicHypergraphTypeTraits)) + ENABLE_HIGHEST_QUALITY_FOR_GRAPHS( + COMMA _LIST_GRAPH_COMBINATIONS(DynamicGraphTypeTraits)) + ENABLE_LARGE_K( + COMMA _LIST_HYPERGRAPH_COMBINATIONS(LargeKHypergraphTypeTraits))>; + +#define _INSTANTIATE_CLASS_MACRO_FOR_HYPERGRAPH_COMBINATIONS(C, TYPE_TRAITS) \ + template class C(GraphAndGainTypes); \ + template class C(GraphAndGainTypes); \ + ENABLE_SOED(template class C(GraphAndGainTypes);) \ + ENABLE_STEINER_TREE( \ + template class C(GraphAndGainTypes);) + +#define _INSTANTIATE_CLASS_MACRO_FOR_GRAPH_COMBINATIONS(C, TYPE_TRAITS) \ + template class C(GraphAndGainTypes); \ + ENABLE_STEINER_TREE( \ + template class C(GraphAndGainTypes);) + +#define INSTANTIATE_CLASS_WITH_VALID_TRAITS(C) \ + _INSTANTIATE_CLASS_MACRO_FOR_HYPERGRAPH_COMBINATIONS(C, StaticHypergraphTypeTraits) \ + ENABLE_GRAPHS( \ + _INSTANTIATE_CLASS_MACRO_FOR_GRAPH_COMBINATIONS(C, StaticGraphTypeTraits)) \ + ENABLE_HIGHEST_QUALITY(_INSTANTIATE_CLASS_MACRO_FOR_HYPERGRAPH_COMBINATIONS( \ + C, DynamicHypergraphTypeTraits)) \ + ENABLE_HIGHEST_QUALITY_FOR_GRAPHS( \ + _INSTANTIATE_CLASS_MACRO_FOR_GRAPH_COMBINATIONS(C, DynamicGraphTypeTraits)) \ + ENABLE_LARGE_K(_INSTANTIATE_CLASS_MACRO_FOR_HYPERGRAPH_COMBINATIONS( \ + C, LargeKHypergraphTypeTraits)) // functionality for retrieving combined policy of partition type and gain -#define _RETURN_COMBINED_POLICY(TYPE_TRAITS, GAIN_TYPES) { \ - static GraphAndGainTypes traits; \ - return traits; \ -} - -#define SWITCH_HYPERGRAPH_GAIN_TYPES(TYPE_TRAITS, gain_policy) { \ - switch ( gain_policy ) { \ - case GainPolicy::km1: _RETURN_COMBINED_POLICY(TYPE_TRAITS, Km1GainTypes) \ - case GainPolicy::cut: _RETURN_COMBINED_POLICY(TYPE_TRAITS, CutGainTypes) \ - case GainPolicy::soed: ENABLE_SOED(_RETURN_COMBINED_POLICY(TYPE_TRAITS, SoedGainTypes)) \ - case GainPolicy::steiner_tree: \ - ENABLE_STEINER_TREE(_RETURN_COMBINED_POLICY(TYPE_TRAITS, SteinerTreeGainTypes)) \ - default: { \ - ERR("Invalid gain policy type"); \ - } \ - } \ -} - -#define SWITCH_GRAPH_GAIN_TYPES(TYPE_TRAITS, gain_policy) { \ - switch ( gain_policy ) { \ - case GainPolicy::cut_for_graphs: \ - ENABLE_GRAPHS(_RETURN_COMBINED_POLICY(TYPE_TRAITS, CutGainForGraphsTypes)) \ - case GainPolicy::steiner_tree_for_graphs: \ - ENABLE_STEINER_TREE(ENABLE_GRAPHS(_RETURN_COMBINED_POLICY(TYPE_TRAITS, SteinerTreeForGraphsTypes))) \ - default: { \ - ERR("Invalid gain policy type"); \ - } \ - } \ -} - -} // namespace mt_kahypar +#define _RETURN_COMBINED_POLICY(TYPE_TRAITS, GAIN_TYPES) \ + { \ + static GraphAndGainTypes traits; \ + return traits; \ + } + +#define SWITCH_HYPERGRAPH_GAIN_TYPES(TYPE_TRAITS, gain_policy) \ + { \ + switch(gain_policy) \ + { \ + case GainPolicy::km1: \ + _RETURN_COMBINED_POLICY(TYPE_TRAITS, Km1GainTypes) \ + case GainPolicy::cut: \ + _RETURN_COMBINED_POLICY(TYPE_TRAITS, CutGainTypes) \ + case GainPolicy::soed: \ + ENABLE_SOED(_RETURN_COMBINED_POLICY(TYPE_TRAITS, SoedGainTypes)) \ + case GainPolicy::steiner_tree: \ + ENABLE_STEINER_TREE(_RETURN_COMBINED_POLICY(TYPE_TRAITS, SteinerTreeGainTypes)) \ + default: { \ + ERR("Invalid gain policy type"); \ + } \ + } \ + } + +#define SWITCH_GRAPH_GAIN_TYPES(TYPE_TRAITS, gain_policy) \ + { \ + switch(gain_policy) \ + { \ + case GainPolicy::cut_for_graphs: \ + ENABLE_GRAPHS(_RETURN_COMBINED_POLICY(TYPE_TRAITS, CutGainForGraphsTypes)) \ + case GainPolicy::steiner_tree_for_graphs: \ + ENABLE_STEINER_TREE(ENABLE_GRAPHS( \ + _RETURN_COMBINED_POLICY(TYPE_TRAITS, SteinerTreeForGraphsTypes))) \ + default: { \ + ERR("Invalid gain policy type"); \ + } \ + } \ + } + +} // namespace mt_kahypar diff --git a/mt-kahypar/partition/refinement/gains/km1/km1_attributed_gains.h b/mt-kahypar/partition/refinement/gains/km1/km1_attributed_gains.h index 26928fc68..42dc25e29 100644 --- a/mt-kahypar/partition/refinement/gains/km1/km1_attributed_gains.h +++ b/mt-kahypar/partition/refinement/gains/km1/km1_attributed_gains.h @@ -35,11 +35,13 @@ namespace mt_kahypar { * for each incident hyperedge of the node based on which we then compute an * attributed gain value. */ -struct Km1AttributedGains { - static HyperedgeWeight gain(const SynchronizedEdgeUpdate& sync_update) { +struct Km1AttributedGains +{ + static HyperedgeWeight gain(const SynchronizedEdgeUpdate &sync_update) + { return (sync_update.pin_count_in_to_part_after == 1 ? sync_update.edge_weight : 0) + (sync_update.pin_count_in_from_part_after == 0 ? -sync_update.edge_weight : 0); } }; -} // namespace mt_kahypar +} // namespace mt_kahypar diff --git a/mt-kahypar/partition/refinement/gains/km1/km1_flow_network_construction.h b/mt-kahypar/partition/refinement/gains/km1/km1_flow_network_construction.h index 310e94c1d..ae3ff0139 100644 --- a/mt-kahypar/partition/refinement/gains/km1/km1_flow_network_construction.h +++ b/mt-kahypar/partition/refinement/gains/km1/km1_flow_network_construction.h @@ -36,53 +36,49 @@ namespace mt_kahypar { * to determine the capacity of a hyperedge and whether or not the hyperedge * is relevant for optimizing the objective function. */ -struct Km1FlowNetworkConstruction { +struct Km1FlowNetworkConstruction +{ // ! Capacity of the hyperedge - template - static HyperedgeWeight capacity(const PartitionedHypergraph& phg, - const Context&, - const HyperedgeID he, - const PartitionID, - const PartitionID) { + template + static HyperedgeWeight capacity(const PartitionedHypergraph &phg, const Context &, + const HyperedgeID he, const PartitionID, + const PartitionID) + { return phg.edgeWeight(he); } // ! If true, then hyperedge is not relevant and can be dropped. - template - static bool dropHyperedge(const PartitionedHypergraph&, - const HyperedgeID, - const PartitionID, - const PartitionID) { + template + static bool dropHyperedge(const PartitionedHypergraph &, const HyperedgeID, + const PartitionID, const PartitionID) + { return false; } // ! If true, then hyperedge is connected to source. - template - static bool connectToSource(const PartitionedHypergraph&, - const HyperedgeID, - const PartitionID, - const PartitionID) { + template + static bool connectToSource(const PartitionedHypergraph &, const HyperedgeID, + const PartitionID, const PartitionID) + { return false; } // ! If true, then hyperedge is connected to sink. - template - static bool connectToSink(const PartitionedHypergraph&, - const HyperedgeID, - const PartitionID, - const PartitionID) { + template + static bool connectToSink(const PartitionedHypergraph &, const HyperedgeID, + const PartitionID, const PartitionID) + { return false; } // ! If true, then hyperedge is considered as cut edge and its // ! weight is added to the total cut - template - static bool isCut(const PartitionedHypergraph&, - const HyperedgeID, - const PartitionID, - const PartitionID) { + template + static bool isCut(const PartitionedHypergraph &, const HyperedgeID, const PartitionID, + const PartitionID) + { return false; } }; -} // namespace mt_kahypar +} // namespace mt_kahypar diff --git a/mt-kahypar/partition/refinement/gains/km1/km1_gain_cache.h b/mt-kahypar/partition/refinement/gains/km1/km1_gain_cache.h index 36d397a77..f1aac05fb 100644 --- a/mt-kahypar/partition/refinement/gains/km1/km1_gain_cache.h +++ b/mt-kahypar/partition/refinement/gains/km1/km1_gain_cache.h @@ -30,98 +30,97 @@ #include "kahypar-resources/meta/policy_registry.h" -#include "mt-kahypar/partition/context_enum_classes.h" -#include "mt-kahypar/datastructures/hypergraph_common.h" #include "mt-kahypar/datastructures/array.h" +#include "mt-kahypar/datastructures/hypergraph_common.h" #include "mt-kahypar/datastructures/sparse_map.h" -#include "mt-kahypar/parallel/atomic_wrapper.h" #include "mt-kahypar/macros.h" +#include "mt-kahypar/parallel/atomic_wrapper.h" +#include "mt-kahypar/partition/context_enum_classes.h" #include "mt-kahypar/utils/range.h" namespace mt_kahypar { /** - * The gain cache stores the gain values for all possible node moves for the connectivity metric. + * The gain cache stores the gain values for all possible node moves for the connectivity + * metric. * * For a weighted hypergraph H = (V,E,c,w), the connectivity metric is defined as follows * km1(H) := \sum_{e \in cut(E)} ( lambda(e) - 1 ) * w(e) * where lambda(e) are the number of blocks contained in hyperedge e. * - * The gain of moving a node u from its current block V_i to a target block V_j can be expressed as follows - * g(u, V_j) := w({ e \in I(u) | pin_count(e, V_i) = 1 }) - w({ e \in I(u) | pin_count(e, V_j) = 0 }). - * Moving node u from V_i to V_j, removes block V_i from all nets e \in I(u) where pin_cout(e, V_i) = 1, - * but adds block V_j in all nets where pin_count(e, V_j) = 0. + * The gain of moving a node u from its current block V_i to a target block V_j can be + * expressed as follows g(u, V_j) := w({ e \in I(u) | pin_count(e, V_i) = 1 }) - w({ e \in + * I(u) | pin_count(e, V_j) = 0 }). Moving node u from V_i to V_j, removes block V_i from + * all nets e \in I(u) where pin_cout(e, V_i) = 1, but adds block V_j in all nets where + * pin_count(e, V_j) = 0. * * The gain can be reformulated as follows - * g(u, V_j) := w({ e \in I(u) | pin_count(e, V_i) = 1 }) - w({ e \in I(u) | pin_count(e, V_j) = 0 }) - * = w({ e \in I(u) | pin_count(e, V_i) = 1 }) - w(I(u)) + w({ e \in I(u) | pin_count(e, V_j) >= 1 }) (=: b(u, V_j)) - * = b(u, V_j) - (w(I(u)) - w({ e \in I(u) | pin_count(e, V_i) = 1 })) - * = b(u, V_j) - w({ e \in I(u) | pin_count(e, V_i) > 1 }) - * = b(u, V_j) - p(u) - * We call b(u, V_j) the benefit term and p(u) the penalty term. Our gain cache stores and maintains these - * entries for each node and block. Thus, the gain cache stores k + 1 entries per node. -*/ -class Km1GainCache { + * g(u, V_j) := w({ e \in I(u) | pin_count(e, V_i) = 1 }) - w({ e \in I(u) | pin_count(e, + * V_j) = 0 }) = w({ e \in I(u) | pin_count(e, V_i) = 1 }) - w(I(u)) + w({ e \in I(u) | + * pin_count(e, V_j) >= 1 }) (=: b(u, V_j)) = b(u, V_j) - (w(I(u)) - w({ e \in I(u) | + * pin_count(e, V_i) = 1 })) = b(u, V_j) - w({ e \in I(u) | pin_count(e, V_i) > 1 }) = + * b(u, V_j) - p(u) We call b(u, V_j) the benefit term and p(u) the penalty term. Our gain + * cache stores and maintains these entries for each node and block. Thus, the gain cache + * stores k + 1 entries per node. + */ +class Km1GainCache +{ static constexpr HyperedgeID HIGH_DEGREE_THRESHOLD = ID(100000); using AdjacentBlocksIterator = IntegerRangeIterator::const_iterator; - public: - +public: static constexpr GainPolicy TYPE = GainPolicy::km1; static constexpr bool requires_notification_before_update = false; static constexpr bool initializes_gain_cache_entry_after_batch_uncontractions = false; static constexpr bool invalidates_entries = true; Km1GainCache() : - _is_initialized(false), - _k(kInvalidPartition), - _gain_cache(), - _dummy_adjacent_blocks() { } + _is_initialized(false), _k(kInvalidPartition), _gain_cache(), + _dummy_adjacent_blocks() + { + } - Km1GainCache(const Context&) : - _is_initialized(false), - _k(), - _gain_cache(), - _dummy_adjacent_blocks() { } + Km1GainCache(const Context &) : + _is_initialized(false), _k(), _gain_cache(), _dummy_adjacent_blocks() + { + } - Km1GainCache(const Km1GainCache&) = delete; - Km1GainCache & operator= (const Km1GainCache &) = delete; + Km1GainCache(const Km1GainCache &) = delete; + Km1GainCache &operator=(const Km1GainCache &) = delete; - Km1GainCache(Km1GainCache&& other) = default; - Km1GainCache & operator= (Km1GainCache&& other) = default; + Km1GainCache(Km1GainCache &&other) = default; + Km1GainCache &operator=(Km1GainCache &&other) = default; // ####################### Initialization ####################### - bool isInitialized() const { - return _is_initialized; - } + bool isInitialized() const { return _is_initialized; } - void reset(const bool run_parallel = true) { + void reset(const bool run_parallel = true) + { unused(run_parallel); _is_initialized = false; } - size_t size() const { - return _gain_cache.size(); - } + size_t size() const { return _gain_cache.size(); } // ! Initializes all gain cache entries - template - void initializeGainCache(const PartitionedHypergraph& partitioned_hg); + template + void initializeGainCache(const PartitionedHypergraph &partitioned_hg); - template - void initializeGainCacheEntryForNode(const PartitionedHypergraph&, - const HypernodeID&) { + template + void initializeGainCacheEntryForNode(const PartitionedHypergraph &, const HypernodeID &) + { // Do nothing } - IteratorRange adjacentBlocks(const HypernodeID) const { + IteratorRange adjacentBlocks(const HypernodeID) const + { // We do not maintain the adjacent blocks of a node in this gain cache. // We therefore return an iterator over all blocks here - return IteratorRange( - _dummy_adjacent_blocks.cbegin(), _dummy_adjacent_blocks.cend()); + return IteratorRange(_dummy_adjacent_blocks.cbegin(), + _dummy_adjacent_blocks.cend()); } // ####################### Gain Computation ####################### @@ -130,25 +129,27 @@ class Km1GainCache { // ! More formally, p(u) := w({ e \in I(u) | pin_count(e, V_i) > 1 }) MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE HyperedgeWeight penaltyTerm(const HypernodeID u, - const PartitionID /* only relevant for graphs */) const { + const PartitionID /* only relevant for graphs */) const + { ASSERT(_is_initialized, "Gain cache is not initialized"); return _gain_cache[penalty_index(u)].load(std::memory_order_relaxed); } // ! Recomputes the penalty term entry in the gain cache - template - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE - void recomputeInvalidTerms(const PartitionedHypergraph& partitioned_hg, - const HypernodeID u) { + template + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE void + recomputeInvalidTerms(const PartitionedHypergraph &partitioned_hg, const HypernodeID u) + { ASSERT(_is_initialized, "Gain cache is not initialized"); - _gain_cache[penalty_index(u)].store(recomputePenaltyTerm( - partitioned_hg, u), std::memory_order_relaxed); + _gain_cache[penalty_index(u)].store(recomputePenaltyTerm(partitioned_hg, u), + std::memory_order_relaxed); } // ! Returns the benefit term for moving node u to block to. // ! More formally, b(u, V_j) := w({ e \in I(u) | pin_count(e, V_j) >= 1 }) MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE - HyperedgeWeight benefitTerm(const HypernodeID u, const PartitionID to) const { + HyperedgeWeight benefitTerm(const HypernodeID u, const PartitionID to) const + { ASSERT(_is_initialized, "Gain cache is not initialized"); return _gain_cache[benefit_index(u, to)].load(std::memory_order_relaxed); } @@ -158,7 +159,8 @@ class Km1GainCache { MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE HyperedgeWeight gain(const HypernodeID u, const PartitionID, /* only relevant for graphs */ - const PartitionID to ) const { + const PartitionID to) const + { ASSERT(_is_initialized, "Gain cache is not initialized"); return benefitTerm(u, to) - penaltyTerm(u, kInvalidPartition); } @@ -167,143 +169,154 @@ class Km1GainCache { // ! This function returns true if the corresponding syncronized edge update triggers // ! a gain cache update. - static bool triggersDeltaGainUpdate(const SynchronizedEdgeUpdate& sync_update); + static bool triggersDeltaGainUpdate(const SynchronizedEdgeUpdate &sync_update); // ! The partitioned (hyper)graph call this function when its updates its internal // ! data structures before calling the delta gain update function. The partitioned // ! (hyper)graph holds a lock for the corresponding (hyper)edge when calling this // ! function. Thus, it is guaranteed that no other thread will modify the hyperedge. - template - void notifyBeforeDeltaGainUpdate(const PartitionedHypergraph&, const SynchronizedEdgeUpdate&) { + template + void notifyBeforeDeltaGainUpdate(const PartitionedHypergraph &, + const SynchronizedEdgeUpdate &) + { // Do nothing } // ! This functions implements the delta gain updates for the connecitivity metric. // ! When moving a node from its current block from to a target block to, we iterate - // ! over its incident hyperedges and update their pin count values. After each pin count - // ! update, we call this function to update the gain cache to changes associated with - // ! corresponding hyperedge. - template - void deltaGainUpdate(const PartitionedHypergraph& partitioned_hg, - const SynchronizedEdgeUpdate& sync_update); + // ! over its incident hyperedges and update their pin count values. After each pin + // count ! update, we call this function to update the gain cache to changes associated + // with ! corresponding hyperedge. + template + void deltaGainUpdate(const PartitionedHypergraph &partitioned_hg, + const SynchronizedEdgeUpdate &sync_update); // ####################### Uncontraction ####################### - // ! This function implements the gain cache update after an uncontraction that restores node v in - // ! hyperedge he. After the uncontraction operation, node u and v are contained in hyperedge he. - template - void uncontractUpdateAfterRestore(const PartitionedHypergraph& partitioned_hg, - const HypernodeID u, - const HypernodeID v, + // ! This function implements the gain cache update after an uncontraction that restores + // node v in ! hyperedge he. After the uncontraction operation, node u and v are + // contained in hyperedge he. + template + void uncontractUpdateAfterRestore(const PartitionedHypergraph &partitioned_hg, + const HypernodeID u, const HypernodeID v, const HyperedgeID he, const HypernodeID pin_count_in_part_after); - // ! This function implements the gain cache update after an uncontraction that replaces u with v in - // ! hyperedge he. After the uncontraction only node v is contained in hyperedge he. - template - void uncontractUpdateAfterReplacement(const PartitionedHypergraph& partitioned_hg, - const HypernodeID u, - const HypernodeID v, + // ! This function implements the gain cache update after an uncontraction that replaces + // u with v in ! hyperedge he. After the uncontraction only node v is contained in + // hyperedge he. + template + void uncontractUpdateAfterReplacement(const PartitionedHypergraph &partitioned_hg, + const HypernodeID u, const HypernodeID v, const HyperedgeID he); - // ! This function is called after restoring a single-pin hyperedge. The function assumes that - // ! u is the only pin of the corresponding hyperedge, while block_of_u is its corresponding block ID. - void restoreSinglePinHyperedge(const HypernodeID u, - const PartitionID block_of_u, + // ! This function is called after restoring a single-pin hyperedge. The function + // assumes that ! u is the only pin of the corresponding hyperedge, while block_of_u is + // its corresponding block ID. + void restoreSinglePinHyperedge(const HypernodeID u, const PartitionID block_of_u, const HyperedgeWeight weight_of_he); - // ! This function is called after restoring a net that became identical to another due to a contraction. - template - void restoreIdenticalHyperedge(const PartitionedHypergraph&, - const HyperedgeID) { + // ! This function is called after restoring a net that became identical to another due + // to a contraction. + template + void restoreIdenticalHyperedge(const PartitionedHypergraph &, const HyperedgeID) + { // Do nothing } // ! Notifies the gain cache that all uncontractions of the current batch are completed. - void batchUncontractionsCompleted() { + void batchUncontractionsCompleted() + { // Do nothing } // ####################### Only for Testing ####################### - template - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE - HyperedgeWeight recomputePenaltyTerm(const PartitionedHypergraph& partitioned_hg, - const HypernodeID u) const { + template + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE HyperedgeWeight recomputePenaltyTerm( + const PartitionedHypergraph &partitioned_hg, const HypernodeID u) const + { ASSERT(_is_initialized, "Gain cache is not initialized"); const PartitionID block_of_u = partitioned_hg.partID(u); HyperedgeWeight penalty = 0; - for (HyperedgeID e : partitioned_hg.incidentEdges(u)) { - if ( partitioned_hg.pinCountInPart(e, block_of_u) > 1 ) { + for(HyperedgeID e : partitioned_hg.incidentEdges(u)) + { + if(partitioned_hg.pinCountInPart(e, block_of_u) > 1) + { penalty += partitioned_hg.edgeWeight(e); } } return penalty; } - template - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE - HyperedgeWeight recomputeBenefitTerm(const PartitionedHypergraph& partitioned_hg, - const HypernodeID u, - const PartitionID to) const { + template + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE HyperedgeWeight + recomputeBenefitTerm(const PartitionedHypergraph &partitioned_hg, const HypernodeID u, + const PartitionID to) const + { HyperedgeWeight benefit = 0; - for (HyperedgeID e : partitioned_hg.incidentEdges(u)) { - if (partitioned_hg.pinCountInPart(e, to) >= 1) { + for(HyperedgeID e : partitioned_hg.incidentEdges(u)) + { + if(partitioned_hg.pinCountInPart(e, to) >= 1) + { benefit += partitioned_hg.edgeWeight(e); } } return benefit; } - void changeNumberOfBlocks(const PartitionID new_k) { + void changeNumberOfBlocks(const PartitionID new_k) + { ASSERT(new_k <= _k); _dummy_adjacent_blocks = IntegerRangeIterator(new_k); } - template - bool verifyTrackedAdjacentBlocksOfNodes(const PartitionedHypergraph&) const { + template + bool verifyTrackedAdjacentBlocksOfNodes(const PartitionedHypergraph &) const + { // Gain cache does not track adjacent blocks of node return true; } - private: +private: friend class DeltaKm1GainCache; MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE - size_t penalty_index(const HypernodeID u) const { - return size_t(u) * ( _k + 1 ); - } + size_t penalty_index(const HypernodeID u) const { return size_t(u) * (_k + 1); } MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE - size_t benefit_index(const HypernodeID u, const PartitionID p) const { - return size_t(u) * ( _k + 1 ) + p + 1; + size_t benefit_index(const HypernodeID u, const PartitionID p) const + { + return size_t(u) * (_k + 1) + p + 1; } // ! Allocates the memory required to store the gain cache - void allocateGainTable(const HypernodeID num_nodes, - const PartitionID k) { - if (_gain_cache.size() == 0 && k != kInvalidPartition) { + void allocateGainTable(const HypernodeID num_nodes, const PartitionID k) + { + if(_gain_cache.size() == 0 && k != kInvalidPartition) + { _k = k; _dummy_adjacent_blocks = IntegerRangeIterator(k); - _gain_cache.resize( - "Refinement", "gain_cache", num_nodes * size_t(_k + 1), true); + _gain_cache.resize("Refinement", "gain_cache", num_nodes * size_t(_k + 1), true); } } // ! Initializes the benefit and penalty terms for a node u - template - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE - void initializeGainCacheEntryForNode(const PartitionedHypergraph& partitioned_hg, - const HypernodeID u, - vec& benefit_aggregator); - - bool nodeGainAssertions(const HypernodeID u, const PartitionID p) const { - if ( p == kInvalidPartition || p >= _k ) { + template + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE void + initializeGainCacheEntryForNode(const PartitionedHypergraph &partitioned_hg, + const HypernodeID u, vec &benefit_aggregator); + + bool nodeGainAssertions(const HypernodeID u, const PartitionID p) const + { + if(p == kInvalidPartition || p >= _k) + { LOG << "Invalid block ID (Node" << u << "is part of block" << p << ", but valid block IDs must be in the range [ 0," << _k << "])"; return false; } - if ( benefit_index(u, p) >= _gain_cache.size() ) { + if(benefit_index(u, p) >= _gain_cache.size()) + { LOG << "Access to gain cache would result in an out-of-bounds access (" << "Benefit Index =" << benefit_index(u, p) << ", Gain Cache Size =" << _gain_cache.size() << ")"; @@ -312,145 +325,156 @@ class Km1GainCache { return true; } - // ! Indicate whether or not the gain cache is initialized bool _is_initialized; // ! Number of blocks PartitionID _k; - // ! Array of size |V| * (k + 1), which stores the benefit and penalty terms of each node. - ds::Array< CAtomic > _gain_cache; + // ! Array of size |V| * (k + 1), which stores the benefit and penalty terms of each + // node. + ds::Array > _gain_cache; // ! Provides an iterator from 0 to k (:= number of blocks) IntegerRangeIterator _dummy_adjacent_blocks; }; /** - * In our FM algorithm, the different local searches perform nodes moves locally not visible for other - * threads. The delta gain cache stores these local changes relative to the shared - * gain cache. For example, the penalty term can be computed as follows - * p'(u) := p(u) + Δp(u) - * where p(u) is the penalty term stored in the shared gain cache and Δp(u) is the penalty term stored in - * the delta gain cache after performing some moves locally. To maintain Δp(u) and Δb(u,V_j), we use a hash - * table that only stores entries affected by a gain cache update. -*/ -class DeltaKm1GainCache { + * In our FM algorithm, the different local searches perform nodes moves locally not + * visible for other threads. The delta gain cache stores these local changes relative to + * the shared gain cache. For example, the penalty term can be computed as follows p'(u) + * := p(u) + Δp(u) where p(u) is the penalty term stored in the shared gain cache and + * Δp(u) is the penalty term stored in the delta gain cache after performing some moves + * locally. To maintain Δp(u) and Δb(u,V_j), we use a hash table that only stores entries + * affected by a gain cache update. + */ +class DeltaKm1GainCache +{ using AdjacentBlocksIterator = typename Km1GainCache::AdjacentBlocksIterator; - public: +public: static constexpr bool requires_connectivity_set = false; - DeltaKm1GainCache(const Km1GainCache& gain_cache) : - _gain_cache(gain_cache), - _gain_cache_delta() { } + DeltaKm1GainCache(const Km1GainCache &gain_cache) : + _gain_cache(gain_cache), _gain_cache_delta() + { + } // ####################### Initialize & Reset ####################### - void initialize(const size_t size) { - _gain_cache_delta.initialize(size); - } + void initialize(const size_t size) { _gain_cache_delta.initialize(size); } - void clear() { - _gain_cache_delta.clear(); - } + void clear() { _gain_cache_delta.clear(); } - void dropMemory() { - _gain_cache_delta.freeInternalData(); - } + void dropMemory() { _gain_cache_delta.freeInternalData(); } - size_t size_in_bytes() const { - return _gain_cache_delta.size_in_bytes(); - } + size_t size_in_bytes() const { return _gain_cache_delta.size_in_bytes(); } // ####################### Gain Computation ####################### // ! Returns an iterator over the adjacent blocks of a node - IteratorRange adjacentBlocks(const HypernodeID hn) const { + IteratorRange adjacentBlocks(const HypernodeID hn) const + { return _gain_cache.adjacentBlocks(hn); } // ! Returns the penalty term of node u. // ! More formally, p(u) := w({ e \in I(u) | pin_count(e, V_i) > 1 }) MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE - HyperedgeWeight penaltyTerm(const HypernodeID u, - const PartitionID from) const { - const HyperedgeWeight* penalty_delta = - _gain_cache_delta.get_if_contained(_gain_cache.penalty_index(u)); - return _gain_cache.penaltyTerm(u, from) + ( penalty_delta ? *penalty_delta : 0 ); + HyperedgeWeight penaltyTerm(const HypernodeID u, const PartitionID from) const + { + const HyperedgeWeight *penalty_delta = + _gain_cache_delta.get_if_contained(_gain_cache.penalty_index(u)); + return _gain_cache.penaltyTerm(u, from) + (penalty_delta ? *penalty_delta : 0); } // ! Returns the benefit term for moving node u to block to. // ! More formally, b(u, V_j) := w({ e \in I(u) | pin_count(e, V_j) >= 1 }) MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE - HyperedgeWeight benefitTerm(const HypernodeID u, const PartitionID to) const { + HyperedgeWeight benefitTerm(const HypernodeID u, const PartitionID to) const + { ASSERT(to != kInvalidPartition && to < _gain_cache._k); - const HyperedgeWeight* benefit_delta = - _gain_cache_delta.get_if_contained(_gain_cache.benefit_index(u, to)); - return _gain_cache.benefitTerm(u, to) + ( benefit_delta ? *benefit_delta : 0 ); + const HyperedgeWeight *benefit_delta = + _gain_cache_delta.get_if_contained(_gain_cache.benefit_index(u, to)); + return _gain_cache.benefitTerm(u, to) + (benefit_delta ? *benefit_delta : 0); } // ! Returns the gain of moving node u from its current block to a target block V_j. // ! More formally, g(u, V_j) := b(u, V_j) - p(u). MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE - HyperedgeWeight gain(const HypernodeID u, - const PartitionID from, - const PartitionID to ) const { + HyperedgeWeight gain(const HypernodeID u, const PartitionID from, + const PartitionID to) const + { return benefitTerm(u, to) - penaltyTerm(u, from); } - // ####################### Delta Gain Update ####################### + // ####################### Delta Gain Update ####################### - template - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE - void deltaGainUpdate(const PartitionedHypergraph& partitioned_hg, - const SynchronizedEdgeUpdate& sync_update) { + template + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE void + deltaGainUpdate(const PartitionedHypergraph &partitioned_hg, + const SynchronizedEdgeUpdate &sync_update) + { const HyperedgeID he = sync_update.he; const PartitionID from = sync_update.from; const PartitionID to = sync_update.to; const HyperedgeWeight edge_weight = sync_update.edge_weight; - const HypernodeID pin_count_in_from_part_after = sync_update.pin_count_in_from_part_after; + const HypernodeID pin_count_in_from_part_after = + sync_update.pin_count_in_from_part_after; const HypernodeID pin_count_in_to_part_after = sync_update.pin_count_in_to_part_after; - if (pin_count_in_from_part_after == 1) { - for (HypernodeID u : partitioned_hg.pins(he)) { - if (partitioned_hg.partID(u) == from) { + if(pin_count_in_from_part_after == 1) + { + for(HypernodeID u : partitioned_hg.pins(he)) + { + if(partitioned_hg.partID(u) == from) + { _gain_cache_delta[_gain_cache.penalty_index(u)] -= edge_weight; } } - } else if (pin_count_in_from_part_after == 0) { - for (HypernodeID u : partitioned_hg.pins(he)) { + } + else if(pin_count_in_from_part_after == 0) + { + for(HypernodeID u : partitioned_hg.pins(he)) + { _gain_cache_delta[_gain_cache.benefit_index(u, from)] -= edge_weight; } } - if (pin_count_in_to_part_after == 1) { - for (HypernodeID u : partitioned_hg.pins(he)) { + if(pin_count_in_to_part_after == 1) + { + for(HypernodeID u : partitioned_hg.pins(he)) + { _gain_cache_delta[_gain_cache.benefit_index(u, to)] += edge_weight; } - } else if (pin_count_in_to_part_after == 2) { - for (HypernodeID u : partitioned_hg.pins(he)) { - if (partitioned_hg.partID(u) == to) { + } + else if(pin_count_in_to_part_after == 2) + { + for(HypernodeID u : partitioned_hg.pins(he)) + { + if(partitioned_hg.partID(u) == to) + { _gain_cache_delta[_gain_cache.penalty_index(u)] += edge_weight; } } } } - // ####################### Miscellaneous ####################### + // ####################### Miscellaneous ####################### - void memoryConsumption(utils::MemoryTreeNode* parent) const { + void memoryConsumption(utils::MemoryTreeNode *parent) const + { ASSERT(parent); - utils::MemoryTreeNode* gain_cache_delta_node = parent->addChild("Delta Gain Cache"); + utils::MemoryTreeNode *gain_cache_delta_node = parent->addChild("Delta Gain Cache"); gain_cache_delta_node->updateSize(size_in_bytes()); } - private: - const Km1GainCache& _gain_cache; +private: + const Km1GainCache &_gain_cache; // ! Stores the delta of each locally touched gain cache entry // ! relative to the gain cache in '_phg' ds::DynamicFlatMap _gain_cache_delta; }; -} // namespace mt_kahypar +} // namespace mt_kahypar diff --git a/mt-kahypar/partition/refinement/gains/km1/km1_gain_computation.h b/mt-kahypar/partition/refinement/gains/km1/km1_gain_computation.h index 6146eb053..2af8e6b8b 100644 --- a/mt-kahypar/partition/refinement/gains/km1/km1_gain_computation.h +++ b/mt-kahypar/partition/refinement/gains/km1/km1_gain_computation.h @@ -28,38 +28,40 @@ #include -#include "mt-kahypar/partition/refinement/gains/gain_computation_base.h" -#include "mt-kahypar/partition/refinement/gains/km1/km1_attributed_gains.h" #include "mt-kahypar/datastructures/sparse_map.h" #include "mt-kahypar/parallel/stl/scalable_vector.h" +#include "mt-kahypar/partition/refinement/gains/gain_computation_base.h" +#include "mt-kahypar/partition/refinement/gains/km1/km1_attributed_gains.h" namespace mt_kahypar { -class Km1GainComputation : public GainComputationBase { +class Km1GainComputation + : public GainComputationBase +{ using Base = GainComputationBase; using RatingMap = typename Base::RatingMap; static constexpr bool enable_heavy_assert = false; - public: - Km1GainComputation(const Context& context, - bool disable_randomization = false) : - Base(context, disable_randomization) { } +public: + Km1GainComputation(const Context &context, bool disable_randomization = false) : + Base(context, disable_randomization) + { + } // ! Precomputes the gain to all adjacent blocks. // ! Conceptually, we compute the gain of moving the node to an non-adjacent block // ! and the gain to all adjacent blocks assuming the node is in an isolated block. // ! The gain of that node to a block to can then be computed by // ! 'isolated_block_gain - tmp_scores[to]' (see gain(...)) - template - void precomputeGains(const PartitionedHypergraph& phg, - const HypernodeID hn, - RatingMap& tmp_scores, - Gain& isolated_block_gain, - const bool) { + template + void precomputeGains(const PartitionedHypergraph &phg, const HypernodeID hn, + RatingMap &tmp_scores, Gain &isolated_block_gain, const bool) + { ASSERT(tmp_scores.size() == 0, "Rating map not empty"); PartitionID from = phg.partID(hn); - for (const HyperedgeID& he : phg.incidentEdges(hn)) { + for(const HyperedgeID &he : phg.incidentEdges(hn)) + { HypernodeID pin_count_in_from_part = phg.pinCountInPart(he, from); HyperedgeWeight he_weight = phg.edgeWeight(he); @@ -69,30 +71,33 @@ class Km1GainComputation : public GainComputationBase 1 ) { + if(pin_count_in_from_part > 1) + { isolated_block_gain += he_weight; } // Substract edge weight of all incident blocks. // Note, in case the pin count in from part is greater than one // we will later add that edge weight to the gain (see internal_weight). - for (const PartitionID& to : phg.connectivitySet(he)) { - if (from != to) { + for(const PartitionID &to : phg.connectivitySet(he)) + { + if(from != to) + { tmp_scores[to] += he_weight; } } } } - HyperedgeWeight gain(const Gain to_score, - const Gain isolated_block_gain) { + HyperedgeWeight gain(const Gain to_score, const Gain isolated_block_gain) + { return isolated_block_gain - to_score; } - void changeNumberOfBlocksImpl(const PartitionID) { + void changeNumberOfBlocksImpl(const PartitionID) + { // Do nothing } - }; -} // namespace mt_kahypar +} // namespace mt_kahypar diff --git a/mt-kahypar/partition/refinement/gains/km1/km1_rollback.h b/mt-kahypar/partition/refinement/gains/km1/km1_rollback.h index 8943fed3a..1ebdb79a2 100644 --- a/mt-kahypar/partition/refinement/gains/km1/km1_rollback.h +++ b/mt-kahypar/partition/refinement/gains/km1/km1_rollback.h @@ -34,21 +34,22 @@ namespace mt_kahypar { /** - * In our FM algorithm, we recompute the gain values of all node moves in the global move sequence - * M := in parallel (see global_rollback.h). - * Each node move m_i is of the form (u, V_i, V_j), which means that - * node u is moved from block V_i to block V_j. Each node in this sequence is moved at most once. - * Moreover, we assume that all node moves with an index < i are performed before m_i. + * In our FM algorithm, we recompute the gain values of all node moves in the global move + * sequence M := in parallel (see global_rollback.h). Each node move m_i + * is of the form (u, V_i, V_j), which means that node u is moved from block V_i to block + * V_j. Each node in this sequence is moved at most once. Moreover, we assume that all + * node moves with an index < i are performed before m_i. * - * The parallel gain recomputation algorithm iterates over all hyperedges e \in E in parallel. - * We then iterate over the pins of e and compute some auxilliary data based on + * The parallel gain recomputation algorithm iterates over all hyperedges e \in E in + * parallel. We then iterate over the pins of e and compute some auxilliary data based on * which we then decide if we attribute an increase or reduction by w(e) to a moved pin. - * This class implements the functions required by the rollback algorithm to recompute all gain values - * for the connectivity metric. -*/ -class Km1Rollback { + * This class implements the functions required by the rollback algorithm to recompute all + * gain values for the connectivity metric. + */ +class Km1Rollback +{ - public: +public: static constexpr bool supports_parallel_rollback = true; /** @@ -57,16 +58,18 @@ class Km1Rollback { * (first_in) resp. last moved out of the corresponding block (last_out) and the number * of non-moved pins in the block (remaining_pins). */ - struct RecalculationData { + struct RecalculationData + { MoveID first_in, last_out; HypernodeID remaining_pins; RecalculationData() : - first_in(std::numeric_limits::max()), - last_out(std::numeric_limits::min()), - remaining_pins(0) - { } + first_in(std::numeric_limits::max()), + last_out(std::numeric_limits::min()), remaining_pins(0) + { + } - void reset() { + void reset() + { first_in = std::numeric_limits::max(); last_out = std::numeric_limits::min(); remaining_pins = 0; @@ -74,46 +77,47 @@ class Km1Rollback { }; // Updates the auxilliary data for a node move m with index m_id. - static void updateMove(const MoveID m_id, - const Move& m, - vec& r) { + static void updateMove(const MoveID m_id, const Move &m, vec &r) + { r[m.to].first_in = std::min(r[m.to].first_in, m_id); r[m.from].last_out = std::max(r[m.from].last_out, m_id); } // Updates the number of non-moved in a block. - static void updateNonMovedPinInBlock(const PartitionID block, - vec& r) { + static void updateNonMovedPinInBlock(const PartitionID block, vec &r) + { r[block].remaining_pins++; } - template - static HyperedgeWeight benefit(const PartitionedHypergraph& phg, - const HyperedgeID e, - const MoveID m_id, - const Move& m, - vec& r) { - // The node move reduces the connectivity of the currently considered hyperedge if m is the last - // node that moves out of its corresponding block, while the first node that moves into the correponding - // block is performed strictly after m. Furthermore, the move sequence has to move all nodes out - // of the correspodning block (r[m.from].remaining_pins == 0). - const bool has_benefit = r[m.from].last_out == m_id && r[m.from].first_in > m_id && r[m.from].remaining_pins == 0; + template + static HyperedgeWeight benefit(const PartitionedHypergraph &phg, const HyperedgeID e, + const MoveID m_id, const Move &m, + vec &r) + { + // The node move reduces the connectivity of the currently considered hyperedge if m + // is the last node that moves out of its corresponding block, while the first node + // that moves into the correponding block is performed strictly after m. Furthermore, + // the move sequence has to move all nodes out of the correspodning block + // (r[m.from].remaining_pins == 0). + const bool has_benefit = r[m.from].last_out == m_id && r[m.from].first_in > m_id && + r[m.from].remaining_pins == 0; return has_benefit * phg.edgeWeight(e); } - template - static HyperedgeWeight penalty(const PartitionedHypergraph& phg, - const HyperedgeID e, - const MoveID m_id, - const Move& m, - vec& r) { - // The node move increases the connectivity of the currently considered hyperedge if m is the - // first node that moves into the corresponding block, while the last node that moves out of the - // corresponding block is performed strictly before m. Furthermore, the move sequence has to move - // all nodes out of the correspodning block (r[m.to].remaining_pins == 0). - const bool has_penalty = r[m.to].first_in == m_id && r[m.to].last_out < m_id && r[m.to].remaining_pins == 0; + template + static HyperedgeWeight penalty(const PartitionedHypergraph &phg, const HyperedgeID e, + const MoveID m_id, const Move &m, + vec &r) + { + // The node move increases the connectivity of the currently considered hyperedge if m + // is the first node that moves into the corresponding block, while the last node that + // moves out of the corresponding block is performed strictly before m. Furthermore, + // the move sequence has to move all nodes out of the correspodning block + // (r[m.to].remaining_pins == 0). + const bool has_penalty = r[m.to].first_in == m_id && r[m.to].last_out < m_id && + r[m.to].remaining_pins == 0; return has_penalty * phg.edgeWeight(e); } }; -} // namespace mt_kahypar +} // namespace mt_kahypar diff --git a/mt-kahypar/partition/refinement/gains/soed/soed_attributed_gains.h b/mt-kahypar/partition/refinement/gains/soed/soed_attributed_gains.h index d7534bf99..efab2d223 100644 --- a/mt-kahypar/partition/refinement/gains/soed/soed_attributed_gains.h +++ b/mt-kahypar/partition/refinement/gains/soed/soed_attributed_gains.h @@ -35,22 +35,25 @@ namespace mt_kahypar { * for each incident hyperedge of the node based on which we then compute an * attributed gain value. */ -struct SoedAttributedGains { - static HyperedgeWeight gain(const SynchronizedEdgeUpdate& sync_update) { +struct SoedAttributedGains +{ + static HyperedgeWeight gain(const SynchronizedEdgeUpdate &sync_update) + { const HypernodeID edge_size = sync_update.edge_size; const HyperedgeWeight edge_weight = sync_update.edge_weight; - const HypernodeID pin_count_in_from_part_after = sync_update.pin_count_in_from_part_after; + const HypernodeID pin_count_in_from_part_after = + sync_update.pin_count_in_from_part_after; const HypernodeID pin_count_in_to_part_after = sync_update.pin_count_in_to_part_after; const bool move_removes_he_from_cut = pin_count_in_to_part_after == edge_size; const bool move_makes_he_cut = pin_count_in_from_part_after == edge_size - 1; const bool increased_connectivity = pin_count_in_to_part_after == 1; const bool decreased_connectivity = pin_count_in_from_part_after == 0; - return ( sync_update.edge_size > 1 ) * - ( -2 * move_removes_he_from_cut * edge_weight + - 2 * move_makes_he_cut * edge_weight + - -1 * ( 1 - move_removes_he_from_cut ) * decreased_connectivity * edge_weight + - 1 * ( 1 - move_makes_he_cut ) * increased_connectivity * edge_weight ); + return (sync_update.edge_size > 1) * + (-2 * move_removes_he_from_cut * edge_weight + + 2 * move_makes_he_cut * edge_weight + + -1 * (1 - move_removes_he_from_cut) * decreased_connectivity * edge_weight + + 1 * (1 - move_makes_he_cut) * increased_connectivity * edge_weight); } }; -} // namespace mt_kahypar +} // namespace mt_kahypar diff --git a/mt-kahypar/partition/refinement/gains/soed/soed_flow_network_construction.h b/mt-kahypar/partition/refinement/gains/soed/soed_flow_network_construction.h index 9f63ee547..09e20ace0 100644 --- a/mt-kahypar/partition/refinement/gains/soed/soed_flow_network_construction.h +++ b/mt-kahypar/partition/refinement/gains/soed/soed_flow_network_construction.h @@ -36,57 +36,56 @@ namespace mt_kahypar { * to determine the capacity of a hyperedge and whether or not the hyperedge * is relevant for optimizing the objective function. */ -struct SoedFlowNetworkConstruction { +struct SoedFlowNetworkConstruction +{ // ! Capacity of the hyperedge - template - static HyperedgeWeight capacity(const PartitionedHypergraph& phg, - const Context&, - const HyperedgeID he, - const PartitionID block_0, - const PartitionID block_1) { + template + static HyperedgeWeight capacity(const PartitionedHypergraph &phg, const Context &, + const HyperedgeID he, const PartitionID block_0, + const PartitionID block_1) + { const PartitionID connectivity = phg.connectivity(he); const HypernodeID pin_count_block_0 = phg.pinCountInPart(he, block_0); const HypernodeID pin_count_block_1 = phg.pinCountInPart(he, block_1); - return ( connectivity == 1 || ( connectivity == 2 && pin_count_block_0 > 0 && - pin_count_block_1 > 0 ) ? 2 : 1 ) * phg.edgeWeight(he); + return (connectivity == 1 || (connectivity == 2 && pin_count_block_0 > 0 && + pin_count_block_1 > 0) ? + 2 : + 1) * + phg.edgeWeight(he); } // ! If true, then hyperedge is not relevant and can be dropped. - template - static bool dropHyperedge(const PartitionedHypergraph&, - const HyperedgeID, - const PartitionID, - const PartitionID) { + template + static bool dropHyperedge(const PartitionedHypergraph &, const HyperedgeID, + const PartitionID, const PartitionID) + { return false; } // ! If true, then hyperedge is connected to source. - template - static bool connectToSource(const PartitionedHypergraph&, - const HyperedgeID, - const PartitionID, - const PartitionID) { + template + static bool connectToSource(const PartitionedHypergraph &, const HyperedgeID, + const PartitionID, const PartitionID) + { return false; } // ! If true, then hyperedge is connected to sink. - template - static bool connectToSink(const PartitionedHypergraph&, - const HyperedgeID, - const PartitionID, - const PartitionID) { + template + static bool connectToSink(const PartitionedHypergraph &, const HyperedgeID, + const PartitionID, const PartitionID) + { return false; } // ! If true, then hyperedge is considered as cut edge and its // ! weight is added to the total cut - template - static bool isCut(const PartitionedHypergraph&, - const HyperedgeID, - const PartitionID, - const PartitionID) { + template + static bool isCut(const PartitionedHypergraph &, const HyperedgeID, const PartitionID, + const PartitionID) + { return false; } }; -} // namespace mt_kahypar +} // namespace mt_kahypar diff --git a/mt-kahypar/partition/refinement/gains/soed/soed_gain_cache.cpp b/mt-kahypar/partition/refinement/gains/soed/soed_gain_cache.cpp index 5c4d010ac..c827f240c 100644 --- a/mt-kahypar/partition/refinement/gains/soed/soed_gain_cache.cpp +++ b/mt-kahypar/partition/refinement/gains/soed/soed_gain_cache.cpp @@ -26,90 +26,104 @@ #include "mt-kahypar/partition/refinement/gains/soed/soed_gain_cache.h" -#include "tbb/parallel_for.h" -#include "tbb/enumerable_thread_specific.h" #include "tbb/concurrent_vector.h" +#include "tbb/enumerable_thread_specific.h" +#include "tbb/parallel_for.h" #include "mt-kahypar/definitions.h" namespace mt_kahypar { -template -void SoedGainCache::initializeGainCache(const PartitionedHypergraph& partitioned_hg) { +template +void SoedGainCache::initializeGainCache(const PartitionedHypergraph &partitioned_hg) +{ ASSERT(!_is_initialized, "Gain cache is already initialized"); ASSERT(_k <= 0 || _k >= partitioned_hg.k(), - "Gain cache was already initialized for a different k" << V(_k) << V(partitioned_hg.k())); + "Gain cache was already initialized for a different k" << V(_k) + << V(partitioned_hg.k())); allocateGainTable(partitioned_hg.topLevelNumNodes(), partitioned_hg.k()); - // Gain calculation consist of two stages // 1. Compute gain of all low degree vertices // 2. Compute gain of all high degree vertices - tbb::enumerable_thread_specific< vec > ets_mtb(_k, 0); + tbb::enumerable_thread_specific > ets_mtb(_k, 0); tbb::concurrent_vector high_degree_vertices; // Compute gain of all low degree vertices - tbb::parallel_for(tbb::blocked_range(HypernodeID(0), partitioned_hg.initialNumNodes()), - [&](tbb::blocked_range& r) { - vec& benefit_aggregator = ets_mtb.local(); - for (HypernodeID u = r.begin(); u < r.end(); ++u) { - if ( partitioned_hg.nodeIsEnabled(u)) { - if ( partitioned_hg.nodeDegree(u) <= HIGH_DEGREE_THRESHOLD) { - initializeGainCacheEntryForNode(partitioned_hg, u, benefit_aggregator); - } else { - // Collect high degree vertices - high_degree_vertices.push_back(u); + tbb::parallel_for( + tbb::blocked_range(HypernodeID(0), partitioned_hg.initialNumNodes()), + [&](tbb::blocked_range &r) { + vec &benefit_aggregator = ets_mtb.local(); + for(HypernodeID u = r.begin(); u < r.end(); ++u) + { + if(partitioned_hg.nodeIsEnabled(u)) + { + if(partitioned_hg.nodeDegree(u) <= HIGH_DEGREE_THRESHOLD) + { + initializeGainCacheEntryForNode(partitioned_hg, u, benefit_aggregator); + } + else + { + // Collect high degree vertices + high_degree_vertices.push_back(u); + } } } - } - }); + }); auto aggregate_contribution_of_he_for_node = - [&](const PartitionID block_of_u, - const HyperedgeID he, - HyperedgeWeight& penalty_aggregator, - vec& benefit_aggregator) { - const HypernodeID edge_size = partitioned_hg.edgeSize(he); - if ( edge_size > 1 ) { - const HyperedgeWeight edge_weight = partitioned_hg.edgeWeight(he); - const HypernodeID pin_count_from = partitioned_hg.pinCountInPart(he, block_of_u); - const HyperedgeWeight penalty_multiplier = - ( pin_count_from > 1 ) + ( pin_count_from == edge_size ); - penalty_aggregator += penalty_multiplier * edge_weight; - - for (const PartitionID to : partitioned_hg.connectivitySet(he)) { - const HyperedgeWeight benefit_multiplier = 1 + - ( partitioned_hg.pinCountInPart(he, to) == edge_size - 1 ); - benefit_aggregator[to] += benefit_multiplier * edge_weight; - } - } - }; + [&](const PartitionID block_of_u, const HyperedgeID he, + HyperedgeWeight &penalty_aggregator, vec &benefit_aggregator) { + const HypernodeID edge_size = partitioned_hg.edgeSize(he); + if(edge_size > 1) + { + const HyperedgeWeight edge_weight = partitioned_hg.edgeWeight(he); + const HypernodeID pin_count_from = + partitioned_hg.pinCountInPart(he, block_of_u); + const HyperedgeWeight penalty_multiplier = + (pin_count_from > 1) + (pin_count_from == edge_size); + penalty_aggregator += penalty_multiplier * edge_weight; + + for(const PartitionID to : partitioned_hg.connectivitySet(he)) + { + const HyperedgeWeight benefit_multiplier = + 1 + (partitioned_hg.pinCountInPart(he, to) == edge_size - 1); + benefit_aggregator[to] += benefit_multiplier * edge_weight; + } + } + }; // Compute gain of all high degree vertices - for ( const HypernodeID& u : high_degree_vertices ) { + for(const HypernodeID &u : high_degree_vertices) + { tbb::enumerable_thread_specific ets_mfp(0); const PartitionID from = partitioned_hg.partID(u); const HypernodeID degree_of_u = partitioned_hg.nodeDegree(u); - tbb::parallel_for(tbb::blocked_range(ID(0), degree_of_u), - [&](tbb::blocked_range& r) { - vec& benefit_aggregator = ets_mtb.local(); - HyperedgeWeight& penalty_aggregator = ets_mfp.local(); - size_t current_pos = r.begin(); - for ( const HyperedgeID& he : partitioned_hg.incidentEdges(u, r.begin()) ) { - aggregate_contribution_of_he_for_node(from, he, - penalty_aggregator, benefit_aggregator); - ++current_pos; - if ( current_pos == r.end() ) { - break; - } - } - }); + tbb::parallel_for( + tbb::blocked_range(ID(0), degree_of_u), + [&](tbb::blocked_range &r) { + vec &benefit_aggregator = ets_mtb.local(); + HyperedgeWeight &penalty_aggregator = ets_mfp.local(); + size_t current_pos = r.begin(); + for(const HyperedgeID &he : partitioned_hg.incidentEdges(u, r.begin())) + { + aggregate_contribution_of_he_for_node(from, he, penalty_aggregator, + benefit_aggregator); + ++current_pos; + if(current_pos == r.end()) + { + break; + } + } + }); // Aggregate thread locals to compute overall gain of the high degree vertex const HyperedgeWeight penalty_term = ets_mfp.combine(std::plus()); _gain_cache[penalty_index(u)].store(penalty_term, std::memory_order_relaxed); - for (PartitionID p = 0; p < _k; ++p) { + for(PartitionID p = 0; p < _k; ++p) + { HyperedgeWeight move_to_benefit = 0; - for ( auto& l_move_to_benefit : ets_mtb ) { + for(auto &l_move_to_benefit : ets_mtb) + { move_to_benefit += l_move_to_benefit[p]; l_move_to_benefit[p] = 0; } @@ -120,7 +134,8 @@ void SoedGainCache::initializeGainCache(const PartitionedHypergraph& partitioned _is_initialized = true; } -bool SoedGainCache::triggersDeltaGainUpdate(const SynchronizedEdgeUpdate& sync_update) { +bool SoedGainCache::triggersDeltaGainUpdate(const SynchronizedEdgeUpdate &sync_update) +{ return sync_update.pin_count_in_from_part_after == 0 || sync_update.pin_count_in_from_part_after == 1 || sync_update.pin_count_in_to_part_after == 1 || @@ -131,83 +146,114 @@ bool SoedGainCache::triggersDeltaGainUpdate(const SynchronizedEdgeUpdate& sync_u sync_update.pin_count_in_to_part_after == sync_update.edge_size - 1; } -template -void SoedGainCache::deltaGainUpdate(const PartitionedHypergraph& partitioned_hg, - const SynchronizedEdgeUpdate& sync_update) { +template +void SoedGainCache::deltaGainUpdate(const PartitionedHypergraph &partitioned_hg, + const SynchronizedEdgeUpdate &sync_update) +{ ASSERT(_is_initialized, "Gain cache is not initialized"); const HypernodeID edge_size = sync_update.edge_size; - if ( edge_size > 1 ) { + if(edge_size > 1) + { const HyperedgeID he = sync_update.he; const PartitionID from = sync_update.from; const PartitionID to = sync_update.to; const HyperedgeWeight edge_weight = sync_update.edge_weight; - const HypernodeID pin_count_in_from_part_after = sync_update.pin_count_in_from_part_after; + const HypernodeID pin_count_in_from_part_after = + sync_update.pin_count_in_from_part_after; const HypernodeID pin_count_in_to_part_after = sync_update.pin_count_in_to_part_after; // Delta gain updates for connectivity metric (see km1_gain_cache.cpp) - if ( pin_count_in_from_part_after == 1 ) { - for (const HypernodeID& u : partitioned_hg.pins(he)) { + if(pin_count_in_from_part_after == 1) + { + for(const HypernodeID &u : partitioned_hg.pins(he)) + { ASSERT(nodeGainAssertions(u, from)); - if (partitioned_hg.partID(u) == from) { + if(partitioned_hg.partID(u) == from) + { _gain_cache[penalty_index(u)].fetch_sub(edge_weight, std::memory_order_relaxed); } } - } else if (pin_count_in_from_part_after == 0) { - for (const HypernodeID& u : partitioned_hg.pins(he)) { + } + else if(pin_count_in_from_part_after == 0) + { + for(const HypernodeID &u : partitioned_hg.pins(he)) + { ASSERT(nodeGainAssertions(u, from)); - _gain_cache[benefit_index(u, from)].fetch_sub(edge_weight, std::memory_order_relaxed); + _gain_cache[benefit_index(u, from)].fetch_sub(edge_weight, + std::memory_order_relaxed); } } - if (pin_count_in_to_part_after == 1) { - for (const HypernodeID& u : partitioned_hg.pins(he)) { + if(pin_count_in_to_part_after == 1) + { + for(const HypernodeID &u : partitioned_hg.pins(he)) + { ASSERT(nodeGainAssertions(u, to)); - _gain_cache[benefit_index(u, to)].fetch_add(edge_weight, std::memory_order_relaxed); + _gain_cache[benefit_index(u, to)].fetch_add(edge_weight, + std::memory_order_relaxed); } - } else if (pin_count_in_to_part_after == 2) { - for (const HypernodeID& u : partitioned_hg.pins(he)) { + } + else if(pin_count_in_to_part_after == 2) + { + for(const HypernodeID &u : partitioned_hg.pins(he)) + { ASSERT(nodeGainAssertions(u, to)); - if (partitioned_hg.partID(u) == to) { + if(partitioned_hg.partID(u) == to) + { _gain_cache[penalty_index(u)].fetch_add(edge_weight, std::memory_order_relaxed); } } } // Delta gain updates for cut metric (see cut_gain_cache.cpp) - if ( pin_count_in_from_part_after == edge_size - 1 ) { - for ( const HypernodeID& u : partitioned_hg.pins(he) ) { + if(pin_count_in_from_part_after == edge_size - 1) + { + for(const HypernodeID &u : partitioned_hg.pins(he)) + { ASSERT(nodeGainAssertions(u, from)); _gain_cache[penalty_index(u)].fetch_sub(edge_weight, std::memory_order_relaxed); - _gain_cache[benefit_index(u, from)].fetch_add(edge_weight, std::memory_order_relaxed); + _gain_cache[benefit_index(u, from)].fetch_add(edge_weight, + std::memory_order_relaxed); } - } else if ( pin_count_in_from_part_after == edge_size - 2 ) { - for ( const HypernodeID& u : partitioned_hg.pins(he) ) { + } + else if(pin_count_in_from_part_after == edge_size - 2) + { + for(const HypernodeID &u : partitioned_hg.pins(he)) + { ASSERT(nodeGainAssertions(u, from)); - _gain_cache[benefit_index(u, from)].fetch_sub(edge_weight, std::memory_order_relaxed); + _gain_cache[benefit_index(u, from)].fetch_sub(edge_weight, + std::memory_order_relaxed); } } - if ( pin_count_in_to_part_after == edge_size ) { - for ( const HypernodeID& u : partitioned_hg.pins(he) ) { + if(pin_count_in_to_part_after == edge_size) + { + for(const HypernodeID &u : partitioned_hg.pins(he)) + { ASSERT(nodeGainAssertions(u, to)); _gain_cache[penalty_index(u)].fetch_add(edge_weight, std::memory_order_relaxed); - _gain_cache[benefit_index(u, to)].fetch_sub(edge_weight, std::memory_order_relaxed); + _gain_cache[benefit_index(u, to)].fetch_sub(edge_weight, + std::memory_order_relaxed); } - } else if ( pin_count_in_to_part_after == edge_size - 1 ) { - for ( const HypernodeID& u : partitioned_hg.pins(he) ) { + } + else if(pin_count_in_to_part_after == edge_size - 1) + { + for(const HypernodeID &u : partitioned_hg.pins(he)) + { ASSERT(nodeGainAssertions(u, to)); - _gain_cache[benefit_index(u, to)].fetch_add(edge_weight, std::memory_order_relaxed); + _gain_cache[benefit_index(u, to)].fetch_add(edge_weight, + std::memory_order_relaxed); } } } } -template -void SoedGainCache::uncontractUpdateAfterRestore(const PartitionedHypergraph& partitioned_hg, - const HypernodeID u, - const HypernodeID v, - const HyperedgeID he, - const HypernodeID pin_count_in_part_after) { - if ( _is_initialized ) { +template +void SoedGainCache::uncontractUpdateAfterRestore( + const PartitionedHypergraph &partitioned_hg, const HypernodeID u, const HypernodeID v, + const HyperedgeID he, const HypernodeID pin_count_in_part_after) +{ + if(_is_initialized) + { // If u was the only pin of hyperedge he in its block before then moving out vertex u // of hyperedge he does not decrease the connectivity any more after the // uncontraction => p(u) += w(he) @@ -217,20 +263,26 @@ void SoedGainCache::uncontractUpdateAfterRestore(const PartitionedHypergraph& pa const PartitionID connectivity = partitioned_hg.connectivity(he); HypernodeID other_pin_of_block = kInvalidHypernode; - if ( pin_count_in_part_after == 2 ) { + if(pin_count_in_part_after == 2) + { // Update connectivity part of soed metric - for ( const HypernodeID& pin : partitioned_hg.pins(he) ) { + for(const HypernodeID &pin : partitioned_hg.pins(he)) + { // u might be replaced by an other node in the batch // => search for other pin in the corresponding block and // add edge weight. - if ( pin != v && partitioned_hg.partID(pin) == from ) { - _gain_cache[penalty_index(pin)].add_fetch(edge_weight, std::memory_order_relaxed); - if ( edge_size == 2 ) { - // Special Case: We ignore single-pin nets during the initial gain computation. - // In this case, he is not a single-pin net anymore and we have to add the weight - // of the hyperedge to the benefit term of the corresponding pin. - _gain_cache[benefit_index(pin, from)].add_fetch( - edge_weight, std::memory_order_relaxed); + if(pin != v && partitioned_hg.partID(pin) == from) + { + _gain_cache[penalty_index(pin)].add_fetch(edge_weight, + std::memory_order_relaxed); + if(edge_size == 2) + { + // Special Case: We ignore single-pin nets during the initial gain + // computation. In this case, he is not a single-pin net anymore and we have + // to add the weight of the hyperedge to the benefit term of the corresponding + // pin. + _gain_cache[benefit_index(pin, from)].add_fetch(edge_weight, + std::memory_order_relaxed); } other_pin_of_block = pin; break; @@ -238,23 +290,29 @@ void SoedGainCache::uncontractUpdateAfterRestore(const PartitionedHypergraph& pa } // Update cut part of soed metric - if ( connectivity == 2 ) { - // In this case, the hyperedge contains two blocks, while the other block V' (!= block) - // had |e| - 1 pins before the uncontraction of u and v. Now the size of the hyperedge - // increased by one while the block of u and v contains two pins (obviously) and the - // other block |e| - 2. Therefore, we have to subtract w(e) from b(u, V') for all pins - // in the hyperedge. + if(connectivity == 2) + { + // In this case, the hyperedge contains two blocks, while the other block V' (!= + // block) had |e| - 1 pins before the uncontraction of u and v. Now the size of + // the hyperedge increased by one while the block of u and v contains two pins + // (obviously) and the other block |e| - 2. Therefore, we have to subtract w(e) + // from b(u, V') for all pins in the hyperedge. PartitionID other_block = kInvalidPartition; - for ( const PartitionID other : partitioned_hg.connectivitySet(he) ) { - if ( other != from ) { + for(const PartitionID other : partitioned_hg.connectivitySet(he)) + { + if(other != from) + { other_block = other; break; } } - for ( const HypernodeID& pin : partitioned_hg.pins(he) ) { - if ( pin != v ) { - _gain_cache[benefit_index(pin, other_block)].fetch_sub(edge_weight, std::memory_order_relaxed); + for(const HypernodeID &pin : partitioned_hg.pins(he)) + { + if(pin != v) + { + _gain_cache[benefit_index(pin, other_block)].fetch_sub( + edge_weight, std::memory_order_relaxed); } } } @@ -265,48 +323,58 @@ void SoedGainCache::uncontractUpdateAfterRestore(const PartitionedHypergraph& pa // we increase the b(u, to) for vertex v by w(e) (connectivity metric) // and similary for all adjacent block with a pin count value equals // |e| - 1 (cut metric) - for ( const PartitionID to : partitioned_hg.connectivitySet(he) ) { + for(const PartitionID to : partitioned_hg.connectivitySet(he)) + { _gain_cache[benefit_index(v, to)].add_fetch(edge_weight, std::memory_order_relaxed); - if ( partitioned_hg.pinCountInPart(he, to) == edge_size - 1 ) { - _gain_cache[benefit_index(v, to)].fetch_add(edge_weight, std::memory_order_relaxed); + if(partitioned_hg.pinCountInPart(he, to) == edge_size - 1) + { + _gain_cache[benefit_index(v, to)].fetch_add(edge_weight, + std::memory_order_relaxed); } } // Special case for cut metric - if ( pin_count_in_part_after == edge_size ) { + if(pin_count_in_part_after == edge_size) + { // In this case, we have to add w(e) to the penalty term of v _gain_cache[penalty_index(v)].fetch_add(edge_weight, std::memory_order_relaxed); - if ( edge_size == 2 ) { - // Special case: Hyperedge is not a single-pin net anymore. Since we do not consider - // single-pin nets in the penalty terms, we have to add w(e) to the penalty term of u. - // Note that u may be replaced by another node. + if(edge_size == 2) + { + // Special case: Hyperedge is not a single-pin net anymore. Since we do not + // consider single-pin nets in the penalty terms, we have to add w(e) to the + // penalty term of u. Note that u may be replaced by another node. ASSERT(other_pin_of_block != kInvalidHypernode); - _gain_cache[penalty_index(other_pin_of_block)].fetch_add(edge_weight, std::memory_order_relaxed); + _gain_cache[penalty_index(other_pin_of_block)].fetch_add( + edge_weight, std::memory_order_relaxed); } } } } -template -void SoedGainCache::uncontractUpdateAfterReplacement(const PartitionedHypergraph& partitioned_hg, - const HypernodeID u, - const HypernodeID v, - const HyperedgeID he) { +template +void SoedGainCache::uncontractUpdateAfterReplacement( + const PartitionedHypergraph &partitioned_hg, const HypernodeID u, const HypernodeID v, + const HyperedgeID he) +{ // In this case, u is replaced by v in hyperedge he // => Pin counts of hyperedge he does not change - if ( _is_initialized ) { + if(_is_initialized) + { const HypernodeID edge_size = partitioned_hg.edgeSize(he); - if ( edge_size > 1 ) { + if(edge_size > 1) + { const PartitionID from = partitioned_hg.partID(u); const HyperedgeWeight edge_weight = partitioned_hg.edgeWeight(he); const HypernodeID pin_count_from = partitioned_hg.pinCountInPart(he, from); - if ( pin_count_from > 1 ) { + if(pin_count_from > 1) + { // In this case, we shift the contribution of hyperedge he to the penalty term // of connectivity metric from u to v _gain_cache[penalty_index(u)].sub_fetch(edge_weight, std::memory_order_relaxed); _gain_cache[penalty_index(v)].add_fetch(edge_weight, std::memory_order_relaxed); } - if ( pin_count_from == edge_size ) { + if(pin_count_from == edge_size) + { // In this case, we shift the contribution of hyperedge he to the penalty term // of cut metric from u to v _gain_cache[penalty_index(u)].fetch_sub(edge_weight, std::memory_order_relaxed); @@ -319,71 +387,81 @@ void SoedGainCache::uncontractUpdateAfterReplacement(const PartitionedHypergraph // if the move to an adjacent block removes the hyperedge from the cut // in which case we also transfer the contribution of hyperedge he to // the benefit term from u to v (cut metric). - for ( const PartitionID to : partitioned_hg.connectivitySet(he) ) { - _gain_cache[benefit_index(u, to)].sub_fetch(edge_weight, std::memory_order_relaxed); - _gain_cache[benefit_index(v, to)].add_fetch(edge_weight, std::memory_order_relaxed); - if ( partitioned_hg.pinCountInPart(he, to) == edge_size - 1 ) { + for(const PartitionID to : partitioned_hg.connectivitySet(he)) + { + _gain_cache[benefit_index(u, to)].sub_fetch(edge_weight, + std::memory_order_relaxed); + _gain_cache[benefit_index(v, to)].add_fetch(edge_weight, + std::memory_order_relaxed); + if(partitioned_hg.pinCountInPart(he, to) == edge_size - 1) + { // u is no longer part of the hyperedge => transfer benefit term to v - _gain_cache[benefit_index(u, to)].fetch_sub(edge_weight, std::memory_order_relaxed); - _gain_cache[benefit_index(v, to)].fetch_add(edge_weight, std::memory_order_relaxed); + _gain_cache[benefit_index(u, to)].fetch_sub(edge_weight, + std::memory_order_relaxed); + _gain_cache[benefit_index(v, to)].fetch_add(edge_weight, + std::memory_order_relaxed); } } } } } -void SoedGainCache::restoreSinglePinHyperedge(const HypernodeID, - const PartitionID, - const HyperedgeWeight) { +void SoedGainCache::restoreSinglePinHyperedge(const HypernodeID, const PartitionID, + const HyperedgeWeight) +{ // Do nothing here } -template -void SoedGainCache::initializeGainCacheEntryForNode(const PartitionedHypergraph& partitioned_hg, - const HypernodeID u, - vec& benefit_aggregator) { +template +void SoedGainCache::initializeGainCacheEntryForNode( + const PartitionedHypergraph &partitioned_hg, const HypernodeID u, + vec &benefit_aggregator) +{ PartitionID from = partitioned_hg.partID(u); Gain penalty = 0; - for (const HyperedgeID& e : partitioned_hg.incidentEdges(u)) { + for(const HyperedgeID &e : partitioned_hg.incidentEdges(u)) + { const HypernodeID edge_size = partitioned_hg.edgeSize(e); - if ( edge_size > 1 ) { + if(edge_size > 1) + { const HyperedgeWeight ew = partitioned_hg.edgeWeight(e); const HypernodeID pin_count_from = partitioned_hg.pinCountInPart(e, from); const HyperedgeWeight penalty_multiplier = - ( pin_count_from > 1 ) + ( pin_count_from == edge_size ); + (pin_count_from > 1) + (pin_count_from == edge_size); penalty += penalty_multiplier * ew; - for (const PartitionID& to : partitioned_hg.connectivitySet(e)) { - const HyperedgeWeight benefit_multiplier = 1 + - ( partitioned_hg.pinCountInPart(e, to) == edge_size - 1 ); + for(const PartitionID &to : partitioned_hg.connectivitySet(e)) + { + const HyperedgeWeight benefit_multiplier = + 1 + (partitioned_hg.pinCountInPart(e, to) == edge_size - 1); benefit_aggregator[to] += benefit_multiplier * ew; } } } _gain_cache[penalty_index(u)].store(penalty, std::memory_order_relaxed); - for (PartitionID i = 0; i < _k; ++i) { - _gain_cache[benefit_index(u, i)].store(benefit_aggregator[i], std::memory_order_relaxed); + for(PartitionID i = 0; i < _k; ++i) + { + _gain_cache[benefit_index(u, i)].store(benefit_aggregator[i], + std::memory_order_relaxed); benefit_aggregator[i] = 0; } } namespace { -#define SOED_INITIALIZE_GAIN_CACHE(X) void SoedGainCache::initializeGainCache(const X&) -#define SOED_DELTA_GAIN_UPDATE(X) void SoedGainCache::deltaGainUpdate(const X&, \ - const SynchronizedEdgeUpdate&) -#define SOED_RESTORE_UPDATE(X) void SoedGainCache::uncontractUpdateAfterRestore(const X&, \ - const HypernodeID, \ - const HypernodeID, \ - const HyperedgeID, \ - const HypernodeID) -#define SOED_REPLACEMENT_UPDATE(X) void SoedGainCache::uncontractUpdateAfterReplacement(const X&, \ - const HypernodeID, \ - const HypernodeID, \ - const HyperedgeID) -#define SOED_INIT_GAIN_CACHE_ENTRY(X) void SoedGainCache::initializeGainCacheEntryForNode(const X&, \ - const HypernodeID, \ - vec&) +#define SOED_INITIALIZE_GAIN_CACHE(X) void SoedGainCache::initializeGainCache(const X &) +#define SOED_DELTA_GAIN_UPDATE(X) \ + void SoedGainCache::deltaGainUpdate(const X &, const SynchronizedEdgeUpdate &) +#define SOED_RESTORE_UPDATE(X) \ + void SoedGainCache::uncontractUpdateAfterRestore(const X &, const HypernodeID, \ + const HypernodeID, const HyperedgeID, \ + const HypernodeID) +#define SOED_REPLACEMENT_UPDATE(X) \ + void SoedGainCache::uncontractUpdateAfterReplacement( \ + const X &, const HypernodeID, const HypernodeID, const HyperedgeID) +#define SOED_INIT_GAIN_CACHE_ENTRY(X) \ + void SoedGainCache::initializeGainCacheEntryForNode(const X &, const HypernodeID, \ + vec &) } INSTANTIATE_FUNC_WITH_PARTITIONED_HG(SOED_INITIALIZE_GAIN_CACHE) @@ -392,4 +470,4 @@ INSTANTIATE_FUNC_WITH_PARTITIONED_HG(SOED_RESTORE_UPDATE) INSTANTIATE_FUNC_WITH_PARTITIONED_HG(SOED_REPLACEMENT_UPDATE) INSTANTIATE_FUNC_WITH_PARTITIONED_HG(SOED_INIT_GAIN_CACHE_ENTRY) -} // namespace mt_kahypar +} // namespace mt_kahypar diff --git a/mt-kahypar/partition/refinement/gains/soed/soed_gain_cache.h b/mt-kahypar/partition/refinement/gains/soed/soed_gain_cache.h index 4fcc30a6e..4bf3ebd8f 100644 --- a/mt-kahypar/partition/refinement/gains/soed/soed_gain_cache.h +++ b/mt-kahypar/partition/refinement/gains/soed/soed_gain_cache.h @@ -30,12 +30,12 @@ #include "kahypar-resources/meta/policy_registry.h" -#include "mt-kahypar/partition/context_enum_classes.h" -#include "mt-kahypar/datastructures/hypergraph_common.h" #include "mt-kahypar/datastructures/array.h" +#include "mt-kahypar/datastructures/hypergraph_common.h" #include "mt-kahypar/datastructures/sparse_map.h" -#include "mt-kahypar/parallel/atomic_wrapper.h" #include "mt-kahypar/macros.h" +#include "mt-kahypar/parallel/atomic_wrapper.h" +#include "mt-kahypar/partition/context_enum_classes.h" #include "mt-kahypar/utils/range.h" namespace mt_kahypar { @@ -47,111 +47,111 @@ namespace mt_kahypar { * soed(H) := \sum_{e \in cut(E)} lambda(e) * w(e) = km1(H) + cut(H) * where lambda(e) is the number of blocks contained in hyperedge e. * - * The soed metric is equivalent to the connectivity plus cut-net metric. Thus, we can express - * the gain of a node u that moves from its current block V_i to a target block as a combination - * of the gains of both metrics: - * g(u, V_j) := g_km1(u, V_j) + g_cut(u, V_j) + * The soed metric is equivalent to the connectivity plus cut-net metric. Thus, we can + * express the gain of a node u that moves from its current block V_i to a target block as + * a combination of the gains of both metrics: g(u, V_j) := g_km1(u, V_j) + g_cut(u, V_j) * = b_km1(u, V_j) + b_cut(u, V_j) - ( p_km1(u) + p_cut(u) ) * = b(u, V_j) - p(u) * - * We call b(u, V_j) the benefit term and p(u) the penalty term, which are defined as follows: - * b(u, V_j) := b_km1(u, V_j) + b_cut(u, V_j) - * = w({ e \in I(u) | pin_count(e, V_j) >= 1 }) + w({ e \in I(u) | pin_count(e, V_j) = |e| - 1 }) - * p(u) := p_km1(u) + p_cut(u) - * = w({ e \in I(u) | pin_count(e, V_i) > 1 }) + w({ e \in I(u) | pin_count(e, V_i) = |e| }) - * Our gain cache stores and maintains these entries for each node and block. - * Thus, the gain cache stores k + 1 entries per node. + * We call b(u, V_j) the benefit term and p(u) the penalty term, which are defined as + * follows: b(u, V_j) := b_km1(u, V_j) + b_cut(u, V_j) = w({ e \in I(u) | pin_count(e, + * V_j) >= 1 }) + w({ e \in I(u) | pin_count(e, V_j) = |e| - 1 }) p(u) := p_km1(u) + + * p_cut(u) = w({ e \in I(u) | pin_count(e, V_i) > 1 }) + w({ e \in I(u) | pin_count(e, + * V_i) = |e| }) Our gain cache stores and maintains these entries for each node and + * block. Thus, the gain cache stores k + 1 entries per node. */ -class SoedGainCache { +class SoedGainCache +{ static constexpr HyperedgeID HIGH_DEGREE_THRESHOLD = ID(100000); using AdjacentBlocksIterator = IntegerRangeIterator::const_iterator; - public: - +public: static constexpr GainPolicy TYPE = GainPolicy::soed; static constexpr bool requires_notification_before_update = false; static constexpr bool initializes_gain_cache_entry_after_batch_uncontractions = false; static constexpr bool invalidates_entries = true; SoedGainCache() : - _is_initialized(false), - _k(kInvalidPartition), - _gain_cache(), - _dummy_adjacent_blocks() { } + _is_initialized(false), _k(kInvalidPartition), _gain_cache(), + _dummy_adjacent_blocks() + { + } - SoedGainCache(const Context&) : - _is_initialized(false), - _k(kInvalidPartition), - _gain_cache(), - _dummy_adjacent_blocks() { } + SoedGainCache(const Context &) : + _is_initialized(false), _k(kInvalidPartition), _gain_cache(), + _dummy_adjacent_blocks() + { + } - SoedGainCache(const SoedGainCache&) = delete; - SoedGainCache & operator= (const SoedGainCache &) = delete; + SoedGainCache(const SoedGainCache &) = delete; + SoedGainCache &operator=(const SoedGainCache &) = delete; - SoedGainCache(SoedGainCache&& other) = default; - SoedGainCache & operator= (SoedGainCache&& other) = default; + SoedGainCache(SoedGainCache &&other) = default; + SoedGainCache &operator=(SoedGainCache &&other) = default; // ####################### Initialization ####################### - bool isInitialized() const { - return _is_initialized; - } + bool isInitialized() const { return _is_initialized; } - void reset(const bool run_parallel = true) { + void reset(const bool run_parallel = true) + { unused(run_parallel); _is_initialized = false; } - size_t size() const { - return _gain_cache.size(); - } + size_t size() const { return _gain_cache.size(); } // ! Initializes all gain cache entries - template - void initializeGainCache(const PartitionedHypergraph& partitioned_hg); + template + void initializeGainCache(const PartitionedHypergraph &partitioned_hg); - template - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE - void initializeGainCacheEntryForNode(const PartitionedHypergraph&, - const HypernodeID&) { + template + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE void + initializeGainCacheEntryForNode(const PartitionedHypergraph &, const HypernodeID &) + { // Do nothing } - IteratorRange adjacentBlocks(const HypernodeID) const { + IteratorRange adjacentBlocks(const HypernodeID) const + { // We do not maintain the adjacent blocks of a node in this gain cache. // We therefore return an iterator over all blocks here - return IteratorRange( - _dummy_adjacent_blocks.cbegin(), _dummy_adjacent_blocks.cend()); + return IteratorRange(_dummy_adjacent_blocks.cbegin(), + _dummy_adjacent_blocks.cend()); } // ####################### Gain Computation ####################### // ! Returns the penalty term of node u. - // ! More formally, p(u) := w({ e \in I(u) | pin_count(e, V_i) > 1 }) + w({ e \in I(u) | pin_count(e, V_i) = |e| }) + // ! More formally, p(u) := w({ e \in I(u) | pin_count(e, V_i) > 1 }) + w({ e \in I(u) | + // pin_count(e, V_i) = |e| }) MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE HyperedgeWeight penaltyTerm(const HypernodeID u, - const PartitionID /* only relevant for graphs */) const { + const PartitionID /* only relevant for graphs */) const + { ASSERT(_is_initialized, "Gain cache is not initialized"); return _gain_cache[penalty_index(u)].load(std::memory_order_relaxed); } // ! Recomputes the penalty term entry in the gain cache - template - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE - void recomputeInvalidTerms(const PartitionedHypergraph& partitioned_hg, - const HypernodeID u) { + template + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE void + recomputeInvalidTerms(const PartitionedHypergraph &partitioned_hg, const HypernodeID u) + { ASSERT(_is_initialized, "Gain cache is not initialized"); - _gain_cache[penalty_index(u)].store(recomputePenaltyTerm( - partitioned_hg, u), std::memory_order_relaxed); + _gain_cache[penalty_index(u)].store(recomputePenaltyTerm(partitioned_hg, u), + std::memory_order_relaxed); } // ! Returns the benefit term for moving node u to block to. // ! More formally, - // ! b(u, V_j) := w({ e \in I(u) | pin_count(e, V_j) >= 1 }) + w({ e \in I(u) | pin_count(e, V_j) = |e| - 1 }) + // ! b(u, V_j) := w({ e \in I(u) | pin_count(e, V_j) >= 1 }) + w({ e \in I(u) | + // pin_count(e, V_j) = |e| - 1 }) MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE - HyperedgeWeight benefitTerm(const HypernodeID u, const PartitionID to) const { + HyperedgeWeight benefitTerm(const HypernodeID u, const PartitionID to) const + { ASSERT(_is_initialized, "Gain cache is not initialized"); return _gain_cache[benefit_index(u, to)].load(std::memory_order_relaxed); } @@ -161,7 +161,8 @@ class SoedGainCache { MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE HyperedgeWeight gain(const HypernodeID u, const PartitionID, /* only relevant for graphs */ - const PartitionID to ) const { + const PartitionID to) const + { ASSERT(_is_initialized, "Gain cache is not initialized"); return benefitTerm(u, to) - penaltyTerm(u, kInvalidPartition); } @@ -170,147 +171,156 @@ class SoedGainCache { // ! This function returns true if the corresponding syncronized edge update triggers // ! a gain cache update. - static bool triggersDeltaGainUpdate(const SynchronizedEdgeUpdate& sync_update); + static bool triggersDeltaGainUpdate(const SynchronizedEdgeUpdate &sync_update); // ! The partitioned (hyper)graph call this function when its updates its internal // ! data structures before calling the delta gain update function. The partitioned // ! (hyper)graph holds a lock for the corresponding (hyper)edge when calling this // ! function. Thus, it is guaranteed that no other thread will modify the hyperedge. - template - void notifyBeforeDeltaGainUpdate(const PartitionedHypergraph&, const SynchronizedEdgeUpdate&) { + template + void notifyBeforeDeltaGainUpdate(const PartitionedHypergraph &, + const SynchronizedEdgeUpdate &) + { // Do nothing } // ! This functions implements the delta gain updates for the connecitivity metric. // ! When moving a node from its current block from to a target block to, we iterate - // ! over its incident hyperedges and update their pin count values. After each pin count - // ! update, we call this function to update the gain cache to changes associated with - // ! corresponding hyperedge. - template - void deltaGainUpdate(const PartitionedHypergraph& partitioned_hg, - const SynchronizedEdgeUpdate& sync_update); + // ! over its incident hyperedges and update their pin count values. After each pin + // count ! update, we call this function to update the gain cache to changes associated + // with ! corresponding hyperedge. + template + void deltaGainUpdate(const PartitionedHypergraph &partitioned_hg, + const SynchronizedEdgeUpdate &sync_update); // ####################### Uncontraction ####################### - // ! This function implements the gain cache update after an uncontraction that restores node v in - // ! hyperedge he. After the uncontraction operation, node u and v are contained in hyperedge he. - template - void uncontractUpdateAfterRestore(const PartitionedHypergraph& partitioned_hg, - const HypernodeID u, - const HypernodeID v, + // ! This function implements the gain cache update after an uncontraction that restores + // node v in ! hyperedge he. After the uncontraction operation, node u and v are + // contained in hyperedge he. + template + void uncontractUpdateAfterRestore(const PartitionedHypergraph &partitioned_hg, + const HypernodeID u, const HypernodeID v, const HyperedgeID he, const HypernodeID pin_count_in_part_after); - // ! This function implements the gain cache update after an uncontraction that replaces u with v in - // ! hyperedge he. After the uncontraction only node v is contained in hyperedge he. - template - void uncontractUpdateAfterReplacement(const PartitionedHypergraph& partitioned_hg, - const HypernodeID u, - const HypernodeID v, + // ! This function implements the gain cache update after an uncontraction that replaces + // u with v in ! hyperedge he. After the uncontraction only node v is contained in + // hyperedge he. + template + void uncontractUpdateAfterReplacement(const PartitionedHypergraph &partitioned_hg, + const HypernodeID u, const HypernodeID v, const HyperedgeID he); - // ! This function is called after restoring a single-pin hyperedge. The function assumes that - // ! u is the only pin of the corresponding hyperedge, while block_of_u is its corresponding block ID. - void restoreSinglePinHyperedge(const HypernodeID u, - const PartitionID block_of_u, + // ! This function is called after restoring a single-pin hyperedge. The function + // assumes that ! u is the only pin of the corresponding hyperedge, while block_of_u is + // its corresponding block ID. + void restoreSinglePinHyperedge(const HypernodeID u, const PartitionID block_of_u, const HyperedgeWeight weight_of_he); - // ! This function is called after restoring a net that became identical to another due to a contraction. - template - void restoreIdenticalHyperedge(const PartitionedHypergraph&, - const HyperedgeID) { + // ! This function is called after restoring a net that became identical to another due + // to a contraction. + template + void restoreIdenticalHyperedge(const PartitionedHypergraph &, const HyperedgeID) + { // Do nothing } // ! Notifies the gain cache that all uncontractions of the current batch are completed. - void batchUncontractionsCompleted() { + void batchUncontractionsCompleted() + { // Do nothing } // ####################### Only for Testing ####################### - template - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE - HyperedgeWeight recomputePenaltyTerm(const PartitionedHypergraph& partitioned_hg, - const HypernodeID u) const { + template + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE HyperedgeWeight recomputePenaltyTerm( + const PartitionedHypergraph &partitioned_hg, const HypernodeID u) const + { ASSERT(_is_initialized, "Gain cache is not initialized"); const PartitionID block_of_u = partitioned_hg.partID(u); HyperedgeWeight penalty = 0; - for (HyperedgeID e : partitioned_hg.incidentEdges(u)) { + for(HyperedgeID e : partitioned_hg.incidentEdges(u)) + { const HypernodeID edge_size = partitioned_hg.edgeSize(e); const HypernodeID pin_count = partitioned_hg.pinCountInPart(e, block_of_u); - const HyperedgeWeight multiplier = ( edge_size > 1 ) * - ( ( pin_count > 1 ) + ( pin_count == edge_size ) ); + const HyperedgeWeight multiplier = + (edge_size > 1) * ((pin_count > 1) + (pin_count == edge_size)); penalty += multiplier * partitioned_hg.edgeWeight(e); } return penalty; } - template - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE - HyperedgeWeight recomputeBenefitTerm(const PartitionedHypergraph& partitioned_hg, - const HypernodeID u, - const PartitionID to) const { + template + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE HyperedgeWeight + recomputeBenefitTerm(const PartitionedHypergraph &partitioned_hg, const HypernodeID u, + const PartitionID to) const + { HyperedgeWeight benefit = 0; - for (HyperedgeID e : partitioned_hg.incidentEdges(u)) { + for(HyperedgeID e : partitioned_hg.incidentEdges(u)) + { const HypernodeID edge_size = partitioned_hg.edgeSize(e); const HypernodeID pin_count = partitioned_hg.pinCountInPart(e, to); - const HyperedgeWeight multiplier = ( edge_size > 1 ) * - ( ( pin_count >= 1 ) + ( pin_count == edge_size - 1 ) ); + const HyperedgeWeight multiplier = + (edge_size > 1) * ((pin_count >= 1) + (pin_count == edge_size - 1)); benefit += multiplier * partitioned_hg.edgeWeight(e); } return benefit; } - void changeNumberOfBlocks(const PartitionID new_k) { + void changeNumberOfBlocks(const PartitionID new_k) + { ASSERT(new_k <= _k); _dummy_adjacent_blocks = IntegerRangeIterator(new_k); } - template - bool verifyTrackedAdjacentBlocksOfNodes(const PartitionedHypergraph&) const { + template + bool verifyTrackedAdjacentBlocksOfNodes(const PartitionedHypergraph &) const + { // Gain cache does not track adjacent blocks of node return true; } - private: +private: friend class DeltaSoedGainCache; MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE - size_t penalty_index(const HypernodeID u) const { - return size_t(u) * ( _k + 1 ); - } + size_t penalty_index(const HypernodeID u) const { return size_t(u) * (_k + 1); } MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE - size_t benefit_index(const HypernodeID u, const PartitionID p) const { - return size_t(u) * ( _k + 1 ) + p + 1; + size_t benefit_index(const HypernodeID u, const PartitionID p) const + { + return size_t(u) * (_k + 1) + p + 1; } // ! Allocates the memory required to store the gain cache - void allocateGainTable(const HypernodeID num_nodes, - const PartitionID k) { - if (_gain_cache.size() == 0 && k != kInvalidPartition) { + void allocateGainTable(const HypernodeID num_nodes, const PartitionID k) + { + if(_gain_cache.size() == 0 && k != kInvalidPartition) + { _k = k; _dummy_adjacent_blocks = IntegerRangeIterator(k); - _gain_cache.resize( - "Refinement", "gain_cache", num_nodes * size_t(_k + 1), true); + _gain_cache.resize("Refinement", "gain_cache", num_nodes * size_t(_k + 1), true); } } // ! Initializes the benefit and penalty terms for a node u - template - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE - void initializeGainCacheEntryForNode(const PartitionedHypergraph& partitioned_hg, - const HypernodeID u, - vec& benefit_aggregator); - - bool nodeGainAssertions(const HypernodeID u, const PartitionID p) const { - if ( p == kInvalidPartition || p >= _k ) { + template + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE void + initializeGainCacheEntryForNode(const PartitionedHypergraph &partitioned_hg, + const HypernodeID u, vec &benefit_aggregator); + + bool nodeGainAssertions(const HypernodeID u, const PartitionID p) const + { + if(p == kInvalidPartition || p >= _k) + { LOG << "Invalid block ID (Node" << u << "is part of block" << p << ", but valid block IDs must be in the range [ 0," << _k << "])"; return false; } - if ( benefit_index(u, p) >= _gain_cache.size() ) { + if(benefit_index(u, p) >= _gain_cache.size()) + { LOG << "Access to gain cache would result in an out-of-bounds access (" << "Benefit Index =" << benefit_index(u, p) << ", Gain Cache Size =" << _gain_cache.size() << ")"; @@ -319,173 +329,196 @@ class SoedGainCache { return true; } - // ! Indicate whether or not the gain cache is initialized bool _is_initialized; // ! Number of blocks PartitionID _k; - // ! Array of size |V| * (k + 1), which stores the benefit and penalty terms of each node. - ds::Array< CAtomic > _gain_cache; + // ! Array of size |V| * (k + 1), which stores the benefit and penalty terms of each + // node. + ds::Array > _gain_cache; // ! Provides an iterator from 0 to k (:= number of blocks) IntegerRangeIterator _dummy_adjacent_blocks; }; /** - * In our FM algorithm, the different local searches perform nodes moves locally not visible for other - * threads. The delta gain cache stores these local changes relative to the shared - * gain cache. For example, the penalty term can be computed as follows - * p'(u) := p(u) + Δp(u) - * where p(u) is the penalty term stored in the shared gain cache and Δp(u) is the penalty term stored in - * the delta gain cache after performing some moves locally. To maintain Δp(u) and Δb(u,V_j), we use a hash - * table that only stores entries affected by a gain cache update. -*/ -class DeltaSoedGainCache { + * In our FM algorithm, the different local searches perform nodes moves locally not + * visible for other threads. The delta gain cache stores these local changes relative to + * the shared gain cache. For example, the penalty term can be computed as follows p'(u) + * := p(u) + Δp(u) where p(u) is the penalty term stored in the shared gain cache and + * Δp(u) is the penalty term stored in the delta gain cache after performing some moves + * locally. To maintain Δp(u) and Δb(u,V_j), we use a hash table that only stores entries + * affected by a gain cache update. + */ +class DeltaSoedGainCache +{ using AdjacentBlocksIterator = typename SoedGainCache::AdjacentBlocksIterator; - public: +public: static constexpr bool requires_connectivity_set = false; - DeltaSoedGainCache(const SoedGainCache& gain_cache) : - _gain_cache(gain_cache), - _gain_cache_delta() { } + DeltaSoedGainCache(const SoedGainCache &gain_cache) : + _gain_cache(gain_cache), _gain_cache_delta() + { + } // ####################### Initialize & Reset ####################### - void initialize(const size_t size) { - _gain_cache_delta.initialize(size); - } + void initialize(const size_t size) { _gain_cache_delta.initialize(size); } - void clear() { - _gain_cache_delta.clear(); - } + void clear() { _gain_cache_delta.clear(); } - void dropMemory() { - _gain_cache_delta.freeInternalData(); - } + void dropMemory() { _gain_cache_delta.freeInternalData(); } - size_t size_in_bytes() const { - return _gain_cache_delta.size_in_bytes(); - } + size_t size_in_bytes() const { return _gain_cache_delta.size_in_bytes(); } // ####################### Gain Computation ####################### // ! Returns an iterator over the adjacent blocks of a node - IteratorRange adjacentBlocks(const HypernodeID hn) const { + IteratorRange adjacentBlocks(const HypernodeID hn) const + { return _gain_cache.adjacentBlocks(hn); } // ! Returns the penalty term of node u. // ! More formally, p(u) := w({ e \in I(u) | pin_count(e, V_i) > 1 }) MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE - HyperedgeWeight penaltyTerm(const HypernodeID u, - const PartitionID from) const { - const HyperedgeWeight* penalty_delta = - _gain_cache_delta.get_if_contained(_gain_cache.penalty_index(u)); - return _gain_cache.penaltyTerm(u, from) + ( penalty_delta ? *penalty_delta : 0 ); + HyperedgeWeight penaltyTerm(const HypernodeID u, const PartitionID from) const + { + const HyperedgeWeight *penalty_delta = + _gain_cache_delta.get_if_contained(_gain_cache.penalty_index(u)); + return _gain_cache.penaltyTerm(u, from) + (penalty_delta ? *penalty_delta : 0); } // ! Returns the benefit term for moving node u to block to. // ! More formally, b(u, V_j) := w({ e \in I(u) | pin_count(e, V_j) >= 1 }) MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE - HyperedgeWeight benefitTerm(const HypernodeID u, const PartitionID to) const { + HyperedgeWeight benefitTerm(const HypernodeID u, const PartitionID to) const + { ASSERT(to != kInvalidPartition && to < _gain_cache._k); - const HyperedgeWeight* benefit_delta = - _gain_cache_delta.get_if_contained(_gain_cache.benefit_index(u, to)); - return _gain_cache.benefitTerm(u, to) + ( benefit_delta ? *benefit_delta : 0 ); + const HyperedgeWeight *benefit_delta = + _gain_cache_delta.get_if_contained(_gain_cache.benefit_index(u, to)); + return _gain_cache.benefitTerm(u, to) + (benefit_delta ? *benefit_delta : 0); } // ! Returns the gain of moving node u from its current block to a target block V_j. // ! More formally, g(u, V_j) := b(u, V_j) - p(u). MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE - HyperedgeWeight gain(const HypernodeID u, - const PartitionID from, - const PartitionID to ) const { + HyperedgeWeight gain(const HypernodeID u, const PartitionID from, + const PartitionID to) const + { return benefitTerm(u, to) - penaltyTerm(u, from); } - // ####################### Delta Gain Update ####################### + // ####################### Delta Gain Update ####################### - template - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE - void deltaGainUpdate(const PartitionedHypergraph& partitioned_hg, - const SynchronizedEdgeUpdate& sync_update) { + template + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE void + deltaGainUpdate(const PartitionedHypergraph &partitioned_hg, + const SynchronizedEdgeUpdate &sync_update) + { const HypernodeID edge_size = sync_update.edge_size; - if ( edge_size > 1 ) { + if(edge_size > 1) + { const HyperedgeID he = sync_update.he; const PartitionID from = sync_update.from; const PartitionID to = sync_update.to; const HyperedgeWeight edge_weight = sync_update.edge_weight; - const HypernodeID pin_count_in_from_part_after = sync_update.pin_count_in_from_part_after; - const HypernodeID pin_count_in_to_part_after = sync_update.pin_count_in_to_part_after; + const HypernodeID pin_count_in_from_part_after = + sync_update.pin_count_in_from_part_after; + const HypernodeID pin_count_in_to_part_after = + sync_update.pin_count_in_to_part_after; // Delta gain updates for connectivity metric (see km1_gain_cache.h) - if (pin_count_in_from_part_after == 1) { - for (HypernodeID u : partitioned_hg.pins(he)) { - if (partitioned_hg.partID(u) == from) { + if(pin_count_in_from_part_after == 1) + { + for(HypernodeID u : partitioned_hg.pins(he)) + { + if(partitioned_hg.partID(u) == from) + { _gain_cache_delta[_gain_cache.penalty_index(u)] -= edge_weight; } } - } else if (pin_count_in_from_part_after == 0) { - for (HypernodeID u : partitioned_hg.pins(he)) { + } + else if(pin_count_in_from_part_after == 0) + { + for(HypernodeID u : partitioned_hg.pins(he)) + { _gain_cache_delta[_gain_cache.benefit_index(u, from)] -= edge_weight; } } - if (pin_count_in_to_part_after == 1) { - for (HypernodeID u : partitioned_hg.pins(he)) { + if(pin_count_in_to_part_after == 1) + { + for(HypernodeID u : partitioned_hg.pins(he)) + { _gain_cache_delta[_gain_cache.benefit_index(u, to)] += edge_weight; } - } else if (pin_count_in_to_part_after == 2) { - for (HypernodeID u : partitioned_hg.pins(he)) { - if (partitioned_hg.partID(u) == to) { + } + else if(pin_count_in_to_part_after == 2) + { + for(HypernodeID u : partitioned_hg.pins(he)) + { + if(partitioned_hg.partID(u) == to) + { _gain_cache_delta[_gain_cache.penalty_index(u)] += edge_weight; } } } // Delta gain updates for cut metric (see cut_gain_cache.h) - if ( pin_count_in_from_part_after == edge_size - 1 ) { - for ( const HypernodeID& u : partitioned_hg.pins(he) ) { + if(pin_count_in_from_part_after == edge_size - 1) + { + for(const HypernodeID &u : partitioned_hg.pins(he)) + { _gain_cache_delta[_gain_cache.penalty_index(u)] -= edge_weight; _gain_cache_delta[_gain_cache.benefit_index(u, from)] += edge_weight; } - } else if ( pin_count_in_from_part_after == edge_size - 2 ) { - for ( const HypernodeID& u : partitioned_hg.pins(he) ) { + } + else if(pin_count_in_from_part_after == edge_size - 2) + { + for(const HypernodeID &u : partitioned_hg.pins(he)) + { _gain_cache_delta[_gain_cache.benefit_index(u, from)] -= edge_weight; } } - if ( pin_count_in_to_part_after == edge_size ) { - for ( const HypernodeID& u : partitioned_hg.pins(he) ) { + if(pin_count_in_to_part_after == edge_size) + { + for(const HypernodeID &u : partitioned_hg.pins(he)) + { _gain_cache_delta[_gain_cache.penalty_index(u)] += edge_weight; _gain_cache_delta[_gain_cache.benefit_index(u, to)] -= edge_weight; } - } else if ( pin_count_in_to_part_after == edge_size - 1 ) { - for ( const HypernodeID& u : partitioned_hg.pins(he) ) { + } + else if(pin_count_in_to_part_after == edge_size - 1) + { + for(const HypernodeID &u : partitioned_hg.pins(he)) + { _gain_cache_delta[_gain_cache.benefit_index(u, to)] += edge_weight; } } } } - // ####################### Miscellaneous ####################### + // ####################### Miscellaneous ####################### - void memoryConsumption(utils::MemoryTreeNode* parent) const { + void memoryConsumption(utils::MemoryTreeNode *parent) const + { ASSERT(parent); - utils::MemoryTreeNode* gain_cache_delta_node = parent->addChild("Delta Gain Cache"); + utils::MemoryTreeNode *gain_cache_delta_node = parent->addChild("Delta Gain Cache"); gain_cache_delta_node->updateSize(size_in_bytes()); } - private: - const SoedGainCache& _gain_cache; +private: + const SoedGainCache &_gain_cache; // ! Stores the delta of each locally touched gain cache entry // ! relative to the gain cache in '_phg' ds::DynamicFlatMap _gain_cache_delta; }; -} // namespace mt_kahypar +} // namespace mt_kahypar diff --git a/mt-kahypar/partition/refinement/gains/soed/soed_gain_computation.h b/mt-kahypar/partition/refinement/gains/soed/soed_gain_computation.h index 27683723e..cc46dd488 100644 --- a/mt-kahypar/partition/refinement/gains/soed/soed_gain_computation.h +++ b/mt-kahypar/partition/refinement/gains/soed/soed_gain_computation.h @@ -28,41 +28,44 @@ #include -#include "mt-kahypar/partition/refinement/gains/gain_computation_base.h" -#include "mt-kahypar/partition/refinement/gains/soed/soed_attributed_gains.h" #include "mt-kahypar/datastructures/sparse_map.h" #include "mt-kahypar/parallel/stl/scalable_vector.h" +#include "mt-kahypar/partition/refinement/gains/gain_computation_base.h" +#include "mt-kahypar/partition/refinement/gains/soed/soed_attributed_gains.h" namespace mt_kahypar { -class SoedGainComputation : public GainComputationBase { +class SoedGainComputation + : public GainComputationBase +{ using Base = GainComputationBase; using RatingMap = typename Base::RatingMap; static constexpr bool enable_heavy_assert = false; - public: - SoedGainComputation(const Context& context, - bool disable_randomization = false) : - Base(context, disable_randomization) { } +public: + SoedGainComputation(const Context &context, bool disable_randomization = false) : + Base(context, disable_randomization) + { + } // ! Precomputes the gain to all adjacent blocks. // ! Conceptually, we compute the gain of moving the node to an non-adjacent block // ! and the gain to all adjacent blocks assuming the node is in an isolated block. // ! The gain of that node to a block to can then be computed by // ! 'isolated_block_gain - tmp_scores[to]' (see gain(...)) - template - void precomputeGains(const PartitionedHypergraph& phg, - const HypernodeID hn, - RatingMap& tmp_scores, - Gain& isolated_block_gain, - const bool) { + template + void precomputeGains(const PartitionedHypergraph &phg, const HypernodeID hn, + RatingMap &tmp_scores, Gain &isolated_block_gain, const bool) + { ASSERT(tmp_scores.size() == 0, "Rating map not empty"); PartitionID from = phg.partID(hn); - for (const HyperedgeID& he : phg.incidentEdges(hn)) { + for(const HyperedgeID &he : phg.incidentEdges(hn)) + { const HypernodeID edge_size = phg.edgeSize(he); - if ( edge_size > 1 ) { + if(edge_size > 1) + { HypernodeID pin_count_in_from_part = phg.pinCountInPart(he, from); HyperedgeWeight he_weight = phg.edgeWeight(he); @@ -74,8 +77,10 @@ class SoedGainComputation : public GainComputationBase 1 ) { - isolated_block_gain += (pin_count_in_from_part == edge_size ? 2 : 1) * he_weight; + if(pin_count_in_from_part > 1) + { + isolated_block_gain += + (pin_count_in_from_part == edge_size ? 2 : 1) * he_weight; } // Substract edge weight from all incident blocks. @@ -83,23 +88,27 @@ class SoedGainComputation : public GainComputationBase in parallel (see global_rollback.h). - * Each node move m_i is of the form (u, V_i, V_j), which means that - * node u is moved from block V_i to block V_j. Each node in this sequence is moved at most once. - * Moreover, we assume that all node moves with an index < i are performed before m_i. + * In our FM algorithm, we recompute the gain values of all node moves in the global move + * sequence M := in parallel (see global_rollback.h). Each node move m_i + * is of the form (u, V_i, V_j), which means that node u is moved from block V_i to block + * V_j. Each node in this sequence is moved at most once. Moreover, we assume that all + * node moves with an index < i are performed before m_i. * - * The parallel gain recomputation algorithm iterates over all hyperedges e \in E in parallel. - * We then iterate over the pins of e and compute some auxilliary data based on + * The parallel gain recomputation algorithm iterates over all hyperedges e \in E in + * parallel. We then iterate over the pins of e and compute some auxilliary data based on * which we then decide if we attribute an increase or reduction by w(e) to a moved pin. - * This class implements the functions required by the rollback algorithm to recompute all gain values - * for the connectivity metric. -*/ -class SoedRollback { + * This class implements the functions required by the rollback algorithm to recompute all + * gain values for the connectivity metric. + */ +class SoedRollback +{ using CutRecalculationData = typename CutRollback::RecalculationData; using Km1RecalculationData = typename Km1Rollback::RecalculationData; - public: +public: static constexpr bool supports_parallel_rollback = true; /** @@ -61,24 +62,23 @@ class SoedRollback { * recompute the gain values. Since the soed metric can be expressed as connectivity * plus cut metric, we use the rollback data of the km1 and cut rollback class. */ - struct RecalculationData { + struct RecalculationData + { CutRecalculationData cut_data; Km1RecalculationData km1_data; - RecalculationData() : - cut_data(), - km1_data() { } + RecalculationData() : cut_data(), km1_data() {} - void reset() { + void reset() + { cut_data.reset(); km1_data.reset(); } }; // Updates the auxilliary data for a node move m with index m_id. - static void updateMove(const MoveID m_id, - const Move& m, - vec& r) { + static void updateMove(const MoveID m_id, const Move &m, vec &r) + { r[m.to].km1_data.first_in = std::min(r[m.to].km1_data.first_in, m_id); r[m.from].km1_data.last_out = std::max(r[m.from].km1_data.last_out, m_id); r[m.from].cut_data.first_out = std::min(r[m.from].cut_data.first_out, m_id); @@ -87,62 +87,67 @@ class SoedRollback { } // Updates the number of non-moved in a block. - static void updateNonMovedPinInBlock(const PartitionID block, - vec& r) { + static void updateNonMovedPinInBlock(const PartitionID block, vec &r) + { r[block].km1_data.remaining_pins++; } - template - static HyperedgeWeight benefit(const PartitionedHypergraph& phg, - const HyperedgeID e, - const MoveID m_id, - const Move& m, - vec& r) { + template + static HyperedgeWeight benefit(const PartitionedHypergraph &phg, const HyperedgeID e, + const MoveID m_id, const Move &m, + vec &r) + { const HyperedgeWeight edge_weight = phg.edgeWeight(e); const HypernodeID edge_size = phg.edgeSize(e); // KM1 PART OF SOED METRIC - // The node move reduces the connectivity of the currently considered hyperedge if m is the last - // node that moves out of its corresponding block, while the first node that moves into the correponding - // block is performed strictly after m. Furthermore, the move sequence has to move all nodes out - // of the correspodning block (r[m.from].remaining_pins == 0). + // The node move reduces the connectivity of the currently considered hyperedge if m + // is the last node that moves out of its corresponding block, while the first node + // that moves into the correponding block is performed strictly after m. Furthermore, + // the move sequence has to move all nodes out of the correspodning block + // (r[m.from].remaining_pins == 0). const bool has_km1_benefit = r[m.from].km1_data.last_out == m_id && - r[m.from].km1_data.first_in > m_id && r[m.from].km1_data.remaining_pins == 0; + r[m.from].km1_data.first_in > m_id && + r[m.from].km1_data.remaining_pins == 0; // CUT PART OF SOED METRIC - // If the hyperedge was potentially a non-cut edge at some point and m is the last node - // that moves into the corresponding block, while the first node that moves out of the corresponding - // block is performed strictly after m, then m removes e from the cut. + // If the hyperedge was potentially a non-cut edge at some point and m is the last + // node that moves into the corresponding block, while the first node that moves out + // of the corresponding block is performed strictly after m, then m removes e from the + // cut. const bool was_potentially_non_cut_edge_at_some_point = - phg.pinCountInPart(e, m.to) + r[m.to].cut_data.moved_out == edge_size; + phg.pinCountInPart(e, m.to) + r[m.to].cut_data.moved_out == edge_size; const bool has_cut_benefit = was_potentially_non_cut_edge_at_some_point && - r[m.to].cut_data.last_in == m_id && m_id < r[m.to].cut_data.first_out; + r[m.to].cut_data.last_in == m_id && + m_id < r[m.to].cut_data.first_out; return has_km1_benefit * edge_weight + has_cut_benefit * edge_weight; } - template - static HyperedgeWeight penalty(const PartitionedHypergraph& phg, - const HyperedgeID e, - const MoveID m_id, - const Move& m, - vec& r) { + template + static HyperedgeWeight penalty(const PartitionedHypergraph &phg, const HyperedgeID e, + const MoveID m_id, const Move &m, + vec &r) + { const HyperedgeWeight edge_weight = phg.edgeWeight(e); const HypernodeID edge_size = phg.edgeSize(e); // KM1 PART OF SOED METRIC - // The node move increases the connectivity of the currently considered hyperedge if m is the - // first node that moves into the corresponding block, while the last node that moves out of the - // corresponding block is performed strictly before m. Furthermore, the move sequence has to move - // all nodes out of the correspodning block (r[m.to].remaining_pins == 0). + // The node move increases the connectivity of the currently considered hyperedge if m + // is the first node that moves into the corresponding block, while the last node that + // moves out of the corresponding block is performed strictly before m. Furthermore, + // the move sequence has to move all nodes out of the correspodning block + // (r[m.to].remaining_pins == 0). const bool has_km1_penalty = r[m.to].km1_data.first_in == m_id && - r[m.to].km1_data.last_out < m_id && r[m.to].km1_data.remaining_pins == 0; + r[m.to].km1_data.last_out < m_id && + r[m.to].km1_data.remaining_pins == 0; // CUT PART OF SOED METRIC - // If the hyperedge was potentially a non-cut edge at some point and m is the first node - // that moves out of the corresponding block, while the last node that moves into the corresponding - // block is performed strictly before m, then m makes e a cut edge. + // If the hyperedge was potentially a non-cut edge at some point and m is the first + // node that moves out of the corresponding block, while the last node that moves into + // the corresponding block is performed strictly before m, then m makes e a cut edge. const bool was_potentially_non_cut_edge_at_some_point = - phg.pinCountInPart(e, m.from) + r[m.from].cut_data.moved_out == edge_size; + phg.pinCountInPart(e, m.from) + r[m.from].cut_data.moved_out == edge_size; const bool has_cut_penalty = was_potentially_non_cut_edge_at_some_point && - r[m.from].cut_data.first_out == m_id && m_id > r[m.from].cut_data.last_in; + r[m.from].cut_data.first_out == m_id && + m_id > r[m.from].cut_data.last_in; return has_km1_penalty * edge_weight + has_cut_penalty * edge_weight; } }; -} // namespace mt_kahypar +} // namespace mt_kahypar diff --git a/mt-kahypar/partition/refinement/gains/steiner_tree/steiner_tree_attributed_gains.h b/mt-kahypar/partition/refinement/gains/steiner_tree/steiner_tree_attributed_gains.h index 64f9bbeaf..155252152 100644 --- a/mt-kahypar/partition/refinement/gains/steiner_tree/steiner_tree_attributed_gains.h +++ b/mt-kahypar/partition/refinement/gains/steiner_tree/steiner_tree_attributed_gains.h @@ -37,33 +37,41 @@ namespace mt_kahypar { * for each incident hyperedge of the node based on which we then compute an * attributed gain value. */ -struct SteinerTreeAttributedGains { - static HyperedgeWeight gain(const SynchronizedEdgeUpdate& sync_update) { +struct SteinerTreeAttributedGains +{ + static HyperedgeWeight gain(const SynchronizedEdgeUpdate &sync_update) + { ASSERT(sync_update.target_graph); - ds::Bitset& connectivity_set = *sync_update.connectivity_set_after; + ds::Bitset &connectivity_set = *sync_update.connectivity_set_after; // Distance between blocks of the hyperedge after the syncronized edge update - const HyperedgeWeight distance_after = sync_update.target_graph->distance(connectivity_set); - if ( sync_update.pin_count_in_from_part_after == 0 ) { + const HyperedgeWeight distance_after = + sync_update.target_graph->distance(connectivity_set); + if(sync_update.pin_count_in_from_part_after == 0) + { ASSERT(!connectivity_set.isSet(sync_update.from)); connectivity_set.set(sync_update.from); } - if ( sync_update.pin_count_in_to_part_after == 1 ) { + if(sync_update.pin_count_in_to_part_after == 1) + { ASSERT(connectivity_set.isSet(sync_update.to)); connectivity_set.unset(sync_update.to); } // Distance between blocks of the hyperedge before the syncronized edge update - const HyperedgeWeight distance_before = sync_update.target_graph->distance(connectivity_set); + const HyperedgeWeight distance_before = + sync_update.target_graph->distance(connectivity_set); // Reset connectivity set - if ( sync_update.pin_count_in_from_part_after == 0 ) { + if(sync_update.pin_count_in_from_part_after == 0) + { ASSERT(connectivity_set.isSet(sync_update.from)); connectivity_set.unset(sync_update.from); } - if ( sync_update.pin_count_in_to_part_after == 1 ) { + if(sync_update.pin_count_in_to_part_after == 1) + { ASSERT(!connectivity_set.isSet(sync_update.to)); connectivity_set.set(sync_update.to); } - return ( distance_after - distance_before ) * sync_update.edge_weight; + return (distance_after - distance_before) * sync_update.edge_weight; } }; -} // namespace mt_kahypar +} // namespace mt_kahypar diff --git a/mt-kahypar/partition/refinement/gains/steiner_tree/steiner_tree_flow_network_construction.cpp b/mt-kahypar/partition/refinement/gains/steiner_tree/steiner_tree_flow_network_construction.cpp index 163619dfc..e0c865789 100644 --- a/mt-kahypar/partition/refinement/gains/steiner_tree/steiner_tree_flow_network_construction.cpp +++ b/mt-kahypar/partition/refinement/gains/steiner_tree/steiner_tree_flow_network_construction.cpp @@ -35,157 +35,192 @@ namespace mt_kahypar { namespace { HyperedgeWeight capacity_for_cut_edge(const SteinerTreeFlowValuePolicy policy, const HyperedgeWeight gain_0, - const HyperedgeWeight gain_1) { - switch ( policy ) { - case SteinerTreeFlowValuePolicy::lower_bound: return std::min(gain_0, gain_1); - case SteinerTreeFlowValuePolicy::upper_bound: return std::max(gain_0, gain_1); - case SteinerTreeFlowValuePolicy::UNDEFINED: - throw InvalidParameterException( - "Steiner tree flow value policy is undefined"); - return 0; + const HyperedgeWeight gain_1) +{ + switch(policy) + { + case SteinerTreeFlowValuePolicy::lower_bound: + return std::min(gain_0, gain_1); + case SteinerTreeFlowValuePolicy::upper_bound: + return std::max(gain_0, gain_1); + case SteinerTreeFlowValuePolicy::UNDEFINED: + throw InvalidParameterException("Steiner tree flow value policy is undefined"); + return 0; } return 0; } } // namespace -template -HyperedgeWeight SteinerTreeFlowNetworkConstruction::capacity(const PartitionedHypergraph& phg, - const Context& context, - const HyperedgeID he, - const PartitionID block_0, - const PartitionID block_1) { +template +HyperedgeWeight SteinerTreeFlowNetworkConstruction::capacity( + const PartitionedHypergraph &phg, const Context &context, const HyperedgeID he, + const PartitionID block_0, const PartitionID block_1) +{ ASSERT(phg.hasTargetGraph()); - const TargetGraph& target_graph = *phg.targetGraph(); + const TargetGraph &target_graph = *phg.targetGraph(); const HyperedgeWeight edge_weight = phg.edgeWeight(he); const HypernodeID pin_count_block_0 = phg.pinCountInPart(he, block_0); const HypernodeID pin_count_block_1 = phg.pinCountInPart(he, block_1); - ds::Bitset& connectivity_set = phg.deepCopyOfConnectivitySet(he); + ds::Bitset &connectivity_set = phg.deepCopyOfConnectivitySet(he); const HyperedgeWeight current_distance = target_graph.distance(connectivity_set); - if ( pin_count_block_0 > 0 && pin_count_block_1 == 0 ) { + if(pin_count_block_0 > 0 && pin_count_block_1 == 0) + { // Hyperedge is non-cut - // => we use gain for making the hyperedge cut as capacity to get a lower bound for the - // actual improvement + // => we use gain for making the hyperedge cut as capacity to get a lower bound for + // the actual improvement HyperedgeWeight distance_with_block_1 = 0; - if ( pin_count_block_0 == 1 ) { - distance_with_block_1 = target_graph.distanceAfterExchangingBlocks(connectivity_set, block_0, block_1); - } else { + if(pin_count_block_0 == 1) + { + distance_with_block_1 = + target_graph.distanceAfterExchangingBlocks(connectivity_set, block_0, block_1); + } + else + { distance_with_block_1 = target_graph.distanceWithBlock(connectivity_set, block_1); } return std::abs(current_distance - distance_with_block_1) * edge_weight; - } else if ( pin_count_block_0 == 0 && pin_count_block_1 > 0 ) { + } + else if(pin_count_block_0 == 0 && pin_count_block_1 > 0) + { // Hyperedge is non-cut - // => we use gain for making the hyperedge cut as capacity to get a lower bound for the - // actual improvement + // => we use gain for making the hyperedge cut as capacity to get a lower bound for + // the actual improvement HyperedgeWeight distance_with_block_0 = 0; - if ( pin_count_block_1 == 1 ) { - distance_with_block_0 = target_graph.distanceAfterExchangingBlocks(connectivity_set, block_1, block_0); - } else { + if(pin_count_block_1 == 1) + { + distance_with_block_0 = + target_graph.distanceAfterExchangingBlocks(connectivity_set, block_1, block_0); + } + else + { distance_with_block_0 = target_graph.distanceWithBlock(connectivity_set, block_0); } return std::abs(current_distance - distance_with_block_0) * edge_weight; - } else { + } + else + { // Hyperedge is cut - // => does we either use min(gain_0, gain_1) to compute a lower bound for the actual improvement or - // max(gain_0,gain_1) to compute an uppter bound for the actual improvement. - const HyperedgeWeight distance_without_block_0 = target_graph.distanceWithoutBlock(connectivity_set, block_0); - const HyperedgeWeight distance_without_block_1 = target_graph.distanceWithoutBlock(connectivity_set, block_1); - const HyperedgeWeight gain_0 = (current_distance - distance_without_block_0) * edge_weight; - const HyperedgeWeight gain_1 = (current_distance - distance_without_block_1) * edge_weight; - return capacity_for_cut_edge(context.refinement.flows.steiner_tree_policy, gain_0, gain_1); + // => does we either use min(gain_0, gain_1) to compute a lower bound for the actual + // improvement or max(gain_0,gain_1) to compute an uppter bound for the actual + // improvement. + const HyperedgeWeight distance_without_block_0 = + target_graph.distanceWithoutBlock(connectivity_set, block_0); + const HyperedgeWeight distance_without_block_1 = + target_graph.distanceWithoutBlock(connectivity_set, block_1); + const HyperedgeWeight gain_0 = + (current_distance - distance_without_block_0) * edge_weight; + const HyperedgeWeight gain_1 = + (current_distance - distance_without_block_1) * edge_weight; + return capacity_for_cut_edge(context.refinement.flows.steiner_tree_policy, gain_0, + gain_1); } } -template -bool SteinerTreeFlowNetworkConstruction::connectToSource(const PartitionedHypergraph& partitioned_hg, - const HyperedgeID he, - const PartitionID block_0, - const PartitionID block_1) { +template +bool SteinerTreeFlowNetworkConstruction::connectToSource( + const PartitionedHypergraph &partitioned_hg, const HyperedgeID he, + const PartitionID block_0, const PartitionID block_1) +{ ASSERT(partitioned_hg.hasTargetGraph()); const HypernodeID pin_count_block_0 = partitioned_hg.pinCountInPart(he, block_0); const HypernodeID pin_count_block_1 = partitioned_hg.pinCountInPart(he, block_1); - const TargetGraph& target_graph = *partitioned_hg.targetGraph(); - if ( pin_count_block_0 > 0 && pin_count_block_1 == 0 ) { - ds::Bitset& connectivity_set = partitioned_hg.deepCopyOfConnectivitySet(he); + const TargetGraph &target_graph = *partitioned_hg.targetGraph(); + if(pin_count_block_0 > 0 && pin_count_block_1 == 0) + { + ds::Bitset &connectivity_set = partitioned_hg.deepCopyOfConnectivitySet(he); const HyperedgeWeight current_distance = target_graph.distance(connectivity_set); const HyperedgeWeight distance_after_exchange = - target_graph.distanceAfterExchangingBlocks(connectivity_set, block_0, block_1); - if ( current_distance < distance_after_exchange ) { - // If all nodes from block_0 would move to block_1, we would worsen the steiner tree metric, - // even though the connectivity of the hyperedge does not change. To model this percurlarity in the flow network, - // we add the corresponding hyperedge to the source. + target_graph.distanceAfterExchangingBlocks(connectivity_set, block_0, block_1); + if(current_distance < distance_after_exchange) + { + // If all nodes from block_0 would move to block_1, we would worsen the steiner tree + // metric, even though the connectivity of the hyperedge does not change. To model + // this percurlarity in the flow network, we add the corresponding hyperedge to the + // source. return true; } } - if ( pin_count_block_0 == 0 && pin_count_block_1 == 1 ) { - ds::Bitset& connectivity_set = partitioned_hg.deepCopyOfConnectivitySet(he); + if(pin_count_block_0 == 0 && pin_count_block_1 == 1) + { + ds::Bitset &connectivity_set = partitioned_hg.deepCopyOfConnectivitySet(he); const HyperedgeWeight current_distance = target_graph.distance(connectivity_set); const HyperedgeWeight distance_after_exchange = - target_graph.distanceAfterExchangingBlocks(connectivity_set, block_1, block_0); - if ( current_distance > distance_after_exchange ) { + target_graph.distanceAfterExchangingBlocks(connectivity_set, block_1, block_0); + if(current_distance > distance_after_exchange) + { return true; } } return false; } - -template -bool SteinerTreeFlowNetworkConstruction::connectToSink(const PartitionedHypergraph& partitioned_hg, - const HyperedgeID he, - const PartitionID block_0, - const PartitionID block_1) { +template +bool SteinerTreeFlowNetworkConstruction::connectToSink( + const PartitionedHypergraph &partitioned_hg, const HyperedgeID he, + const PartitionID block_0, const PartitionID block_1) +{ ASSERT(partitioned_hg.hasTargetGraph()); const HypernodeID pin_count_block_0 = partitioned_hg.pinCountInPart(he, block_0); const HypernodeID pin_count_block_1 = partitioned_hg.pinCountInPart(he, block_1); - const TargetGraph& target_graph = *partitioned_hg.targetGraph(); - if ( partitioned_hg.pinCountInPart(he, block_0) == 0 && partitioned_hg.pinCountInPart(he, block_1) > 0 ) { - ds::Bitset& connectivity_set = partitioned_hg.deepCopyOfConnectivitySet(he); + const TargetGraph &target_graph = *partitioned_hg.targetGraph(); + if(partitioned_hg.pinCountInPart(he, block_0) == 0 && + partitioned_hg.pinCountInPart(he, block_1) > 0) + { + ds::Bitset &connectivity_set = partitioned_hg.deepCopyOfConnectivitySet(he); const HyperedgeWeight current_distance = target_graph.distance(connectivity_set); const HyperedgeWeight distance_after_exchange = - target_graph.distanceAfterExchangingBlocks(connectivity_set, block_1, block_0); - if ( current_distance < distance_after_exchange ) { - // If all nodes from block_1 would move to block_0, we would worsen the steiner tree metric, - // even though the connectivity of the hyperedge does not change. To model this percurlarity in the flow network, - // we add the corresponding hyperedge to the sink. + target_graph.distanceAfterExchangingBlocks(connectivity_set, block_1, block_0); + if(current_distance < distance_after_exchange) + { + // If all nodes from block_1 would move to block_0, we would worsen the steiner tree + // metric, even though the connectivity of the hyperedge does not change. To model + // this percurlarity in the flow network, we add the corresponding hyperedge to the + // sink. return true; } } - if ( pin_count_block_0 == 1 && pin_count_block_1 == 0 ) { - ds::Bitset& connectivity_set = partitioned_hg.deepCopyOfConnectivitySet(he); + if(pin_count_block_0 == 1 && pin_count_block_1 == 0) + { + ds::Bitset &connectivity_set = partitioned_hg.deepCopyOfConnectivitySet(he); const HyperedgeWeight current_distance = target_graph.distance(connectivity_set); const HyperedgeWeight distance_after_exchange = - target_graph.distanceAfterExchangingBlocks(connectivity_set, block_0, block_1); - if ( current_distance > distance_after_exchange ) { + target_graph.distanceAfterExchangingBlocks(connectivity_set, block_0, block_1); + if(current_distance > distance_after_exchange) + { return true; } } return false; } -template -bool SteinerTreeFlowNetworkConstruction::isCut(const PartitionedHypergraph& partitioned_hg, - const HyperedgeID he, - const PartitionID block_0, - const PartitionID block_1) { +template +bool SteinerTreeFlowNetworkConstruction::isCut( + const PartitionedHypergraph &partitioned_hg, const HyperedgeID he, + const PartitionID block_0, const PartitionID block_1) +{ ASSERT(partitioned_hg.hasTargetGraph()); const HypernodeID pin_count_block_0 = partitioned_hg.pinCountInPart(he, block_0); const HypernodeID pin_count_block_1 = partitioned_hg.pinCountInPart(he, block_1); - const TargetGraph& target_graph = *partitioned_hg.targetGraph(); - if ( pin_count_block_0 == 0 && pin_count_block_1 == 1 ) { - ds::Bitset& connectivity_set = partitioned_hg.deepCopyOfConnectivitySet(he); + const TargetGraph &target_graph = *partitioned_hg.targetGraph(); + if(pin_count_block_0 == 0 && pin_count_block_1 == 1) + { + ds::Bitset &connectivity_set = partitioned_hg.deepCopyOfConnectivitySet(he); const HyperedgeWeight current_distance = target_graph.distance(connectivity_set); const HyperedgeWeight distance_after_exchange = - target_graph.distanceAfterExchangingBlocks(connectivity_set, block_1, block_0); - if ( current_distance > distance_after_exchange ) { + target_graph.distanceAfterExchangingBlocks(connectivity_set, block_1, block_0); + if(current_distance > distance_after_exchange) + { return true; } } - if ( pin_count_block_0 == 1 && pin_count_block_1 == 0 ) { - ds::Bitset& connectivity_set = partitioned_hg.deepCopyOfConnectivitySet(he); + if(pin_count_block_0 == 1 && pin_count_block_1 == 0) + { + ds::Bitset &connectivity_set = partitioned_hg.deepCopyOfConnectivitySet(he); const HyperedgeWeight current_distance = target_graph.distance(connectivity_set); const HyperedgeWeight distance_after_exchange = - target_graph.distanceAfterExchangingBlocks(connectivity_set, block_0, block_1); - if ( current_distance > distance_after_exchange ) { + target_graph.distanceAfterExchangingBlocks(connectivity_set, block_0, block_1); + if(current_distance > distance_after_exchange) + { return true; } } @@ -193,14 +228,19 @@ bool SteinerTreeFlowNetworkConstruction::isCut(const PartitionedHypergraph& part } namespace { -#define STEINER_TREE_CAPACITY(X) HyperedgeWeight SteinerTreeFlowNetworkConstruction::capacity( \ - const X&, const Context&, const HyperedgeID, const PartitionID, const PartitionID) -#define STEINER_TREE_CONNECT_TO_SOURCE(X) bool SteinerTreeFlowNetworkConstruction::connectToSource( \ - const X&, const HyperedgeID, const PartitionID, const PartitionID) -#define STEINER_TREE_CONNECT_TO_SINK(X) bool SteinerTreeFlowNetworkConstruction::connectToSink( \ - const X&, const HyperedgeID, const PartitionID, const PartitionID) -#define STEINER_TREE_IS_CUT(X) bool SteinerTreeFlowNetworkConstruction::isCut( \ - const X&, const HyperedgeID, const PartitionID, const PartitionID) +#define STEINER_TREE_CAPACITY(X) \ + HyperedgeWeight SteinerTreeFlowNetworkConstruction::capacity( \ + const X &, const Context &, const HyperedgeID, const PartitionID, \ + const PartitionID) +#define STEINER_TREE_CONNECT_TO_SOURCE(X) \ + bool SteinerTreeFlowNetworkConstruction::connectToSource( \ + const X &, const HyperedgeID, const PartitionID, const PartitionID) +#define STEINER_TREE_CONNECT_TO_SINK(X) \ + bool SteinerTreeFlowNetworkConstruction::connectToSink( \ + const X &, const HyperedgeID, const PartitionID, const PartitionID) +#define STEINER_TREE_IS_CUT(X) \ + bool SteinerTreeFlowNetworkConstruction::isCut(const X &, const HyperedgeID, \ + const PartitionID, const PartitionID) } INSTANTIATE_FUNC_WITH_PARTITIONED_HG(STEINER_TREE_CAPACITY) @@ -208,4 +248,4 @@ INSTANTIATE_FUNC_WITH_PARTITIONED_HG(STEINER_TREE_CONNECT_TO_SOURCE) INSTANTIATE_FUNC_WITH_PARTITIONED_HG(STEINER_TREE_CONNECT_TO_SINK) INSTANTIATE_FUNC_WITH_PARTITIONED_HG(STEINER_TREE_IS_CUT) -} // namespace mt_kahypar +} // namespace mt_kahypar diff --git a/mt-kahypar/partition/refinement/gains/steiner_tree/steiner_tree_flow_network_construction.h b/mt-kahypar/partition/refinement/gains/steiner_tree/steiner_tree_flow_network_construction.h index 90f0cdbb7..cd5ffe022 100644 --- a/mt-kahypar/partition/refinement/gains/steiner_tree/steiner_tree_flow_network_construction.h +++ b/mt-kahypar/partition/refinement/gains/steiner_tree/steiner_tree_flow_network_construction.h @@ -36,45 +36,39 @@ namespace mt_kahypar { * to determine the capacity of a hyperedge and whether or not the hyperedge * is relevant for optimizing the objective function. */ -struct SteinerTreeFlowNetworkConstruction { +struct SteinerTreeFlowNetworkConstruction +{ // ! Capacity of the hyperedge - template - static HyperedgeWeight capacity(const PartitionedHypergraph& phg, - const Context& context, - const HyperedgeID he, - const PartitionID block_0, - const PartitionID block_1); + template + static HyperedgeWeight capacity(const PartitionedHypergraph &phg, + const Context &context, const HyperedgeID he, + const PartitionID block_0, const PartitionID block_1); // ! If true, then hyperedge is not relevant and can be dropped. - template - static bool dropHyperedge(const PartitionedHypergraph&, - const HyperedgeID, - const PartitionID, - const PartitionID) { + template + static bool dropHyperedge(const PartitionedHypergraph &, const HyperedgeID, + const PartitionID, const PartitionID) + { return false; } // ! If true, then hyperedge is connected to source. - template - static bool connectToSource(const PartitionedHypergraph& partitioned_hg, - const HyperedgeID he, - const PartitionID block_0, + template + static bool connectToSource(const PartitionedHypergraph &partitioned_hg, + const HyperedgeID he, const PartitionID block_0, const PartitionID block_1); // ! If true, then hyperedge is connected to sink. - template - static bool connectToSink(const PartitionedHypergraph& partitioned_hg, - const HyperedgeID he, - const PartitionID block_0, + template + static bool connectToSink(const PartitionedHypergraph &partitioned_hg, + const HyperedgeID he, const PartitionID block_0, const PartitionID block_1); // ! If true, then hyperedge is considered as cut edge and its // ! weight is added to the total cut - template - static bool isCut(const PartitionedHypergraph& partitioned_hg, - const HyperedgeID he, - const PartitionID block_0, - const PartitionID block_1); + template + static bool isCut(const PartitionedHypergraph &partitioned_hg, const HyperedgeID he, + const PartitionID block_0, const PartitionID block_1); }; -} // namespace mt_kahypar +} // namespace mt_kahypar diff --git a/mt-kahypar/partition/refinement/gains/steiner_tree/steiner_tree_gain_cache.cpp b/mt-kahypar/partition/refinement/gains/steiner_tree/steiner_tree_gain_cache.cpp index ebd0dd195..62b661801 100644 --- a/mt-kahypar/partition/refinement/gains/steiner_tree/steiner_tree_gain_cache.cpp +++ b/mt-kahypar/partition/refinement/gains/steiner_tree/steiner_tree_gain_cache.cpp @@ -26,55 +26,67 @@ #include "mt-kahypar/partition/refinement/gains/steiner_tree/steiner_tree_gain_cache.h" -#include "tbb/parallel_for.h" -#include "tbb/enumerable_thread_specific.h" #include "tbb/concurrent_vector.h" +#include "tbb/enumerable_thread_specific.h" +#include "tbb/parallel_for.h" #include "mt-kahypar/definitions.h" namespace mt_kahypar { -template -void SteinerTreeGainCache::initializeGainCache(const PartitionedHypergraph& partitioned_hg) { +template +void SteinerTreeGainCache::initializeGainCache( + const PartitionedHypergraph &partitioned_hg) +{ ASSERT(!_is_initialized, "Gain cache is already initialized"); ASSERT(_k <= 0 || _k >= partitioned_hg.k(), - "Gain cache was already initialized for a different k" << V(_k) << V(partitioned_hg.k())); - allocateGainTable(partitioned_hg.topLevelNumNodes(), partitioned_hg.topLevelNumEdges(), partitioned_hg.k()); + "Gain cache was already initialized for a different k" << V(_k) + << V(partitioned_hg.k())); + allocateGainTable(partitioned_hg.topLevelNumNodes(), partitioned_hg.topLevelNumEdges(), + partitioned_hg.k()); initializeAdjacentBlocks(partitioned_hg); // Compute gain of all nodes - tbb::parallel_for(tbb::blocked_range(HypernodeID(0), partitioned_hg.initialNumNodes()), - [&](tbb::blocked_range& r) { - vec& benefit_aggregator = _ets_benefit_aggregator.local(); - for (HypernodeID u = r.begin(); u < r.end(); ++u) { - if ( partitioned_hg.nodeIsEnabled(u)) { - initializeGainCacheEntryForNode(partitioned_hg, u, benefit_aggregator); + tbb::parallel_for( + tbb::blocked_range(HypernodeID(0), partitioned_hg.initialNumNodes()), + [&](tbb::blocked_range &r) { + vec &benefit_aggregator = _ets_benefit_aggregator.local(); + for(HypernodeID u = r.begin(); u < r.end(); ++u) + { + if(partitioned_hg.nodeIsEnabled(u)) + { + initializeGainCacheEntryForNode(partitioned_hg, u, benefit_aggregator); + } } - } - }); + }); _is_initialized = true; } -template -void SteinerTreeGainCache::initializeGainCacheEntryForNode(const PartitionedHypergraph& partitioned_hg, - const HypernodeID hn) { - vec& benefit_aggregator = _ets_benefit_aggregator.local(); +template +void SteinerTreeGainCache::initializeGainCacheEntryForNode( + const PartitionedHypergraph &partitioned_hg, const HypernodeID hn) +{ + vec &benefit_aggregator = _ets_benefit_aggregator.local(); initializeAdjacentBlocksOfNode(partitioned_hg, hn); initializeGainCacheEntryForNode(partitioned_hg, hn, benefit_aggregator); } -bool SteinerTreeGainCache::triggersDeltaGainUpdate(const SynchronizedEdgeUpdate& sync_update) { +bool SteinerTreeGainCache::triggersDeltaGainUpdate( + const SynchronizedEdgeUpdate &sync_update) +{ return sync_update.pin_count_in_from_part_after == 0 || sync_update.pin_count_in_from_part_after == 1 || sync_update.pin_count_in_to_part_after == 1 || sync_update.pin_count_in_to_part_after == 2; } -template -void SteinerTreeGainCache::notifyBeforeDeltaGainUpdate(const PartitionedHypergraph&, - const SynchronizedEdgeUpdate& sync_update) { - if ( triggersDeltaGainUpdate(sync_update) ) { +template +void SteinerTreeGainCache::notifyBeforeDeltaGainUpdate( + const PartitionedHypergraph &, const SynchronizedEdgeUpdate &sync_update) +{ + if(triggersDeltaGainUpdate(sync_update)) + { ASSERT(UL(sync_update.he) < _version.size()); // The move will induce a gain cache update. In this case, we increment the version ID // of the hyperedge such that concurrent running initializations of gain entries are @@ -85,123 +97,163 @@ void SteinerTreeGainCache::notifyBeforeDeltaGainUpdate(const PartitionedHypergra namespace { MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE -HyperedgeWeight gainOfHyperedge(const PartitionID from, - const PartitionID to, +HyperedgeWeight gainOfHyperedge(const PartitionID from, const PartitionID to, const HyperedgeWeight edge_weight, - const TargetGraph& target_graph, - ds::PinCountSnapshot& pin_counts, - ds::Bitset& connectivity_set) { + const TargetGraph &target_graph, + ds::PinCountSnapshot &pin_counts, + ds::Bitset &connectivity_set) +{ const HypernodeID pin_count_in_from_part = pin_counts.pinCountInPart(from); const HyperedgeWeight current_distance = target_graph.distance(connectivity_set); - if ( pin_count_in_from_part == 1 ) { + if(pin_count_in_from_part == 1) + { ASSERT(connectivity_set.isSet(from)); connectivity_set.unset(from); } - const HyperedgeWeight distance_with_to = target_graph.distanceWithBlock(connectivity_set, to); - if ( pin_count_in_from_part == 1 ) { + const HyperedgeWeight distance_with_to = + target_graph.distanceWithBlock(connectivity_set, to); + if(pin_count_in_from_part == 1) + { connectivity_set.set(from); } return (current_distance - distance_with_to) * edge_weight; } MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE -void reconstructConnectivitySetAndPinCountsBeforeMove(const SynchronizedEdgeUpdate& sync_update, - ds::Bitset& connectivity_set, - ds::PinCountSnapshot& pin_counts) { - if ( sync_update.pin_count_in_from_part_after == 0 ) { +void reconstructConnectivitySetAndPinCountsBeforeMove( + const SynchronizedEdgeUpdate &sync_update, ds::Bitset &connectivity_set, + ds::PinCountSnapshot &pin_counts) +{ + if(sync_update.pin_count_in_from_part_after == 0) + { ASSERT(!connectivity_set.isSet(sync_update.from)); connectivity_set.set(sync_update.from); } - if ( sync_update.pin_count_in_to_part_after == 1 ) { + if(sync_update.pin_count_in_to_part_after == 1) + { ASSERT(connectivity_set.isSet(sync_update.to)); connectivity_set.unset(sync_update.to); } - pin_counts.setPinCountInPart(sync_update.from, sync_update.pin_count_in_from_part_after + 1); - pin_counts.setPinCountInPart(sync_update.to, sync_update.pin_count_in_to_part_after - 1); + pin_counts.setPinCountInPart(sync_update.from, + sync_update.pin_count_in_from_part_after + 1); + pin_counts.setPinCountInPart(sync_update.to, + sync_update.pin_count_in_to_part_after - 1); } } -template -void SteinerTreeGainCache::deltaGainUpdate(const PartitionedHypergraph& partitioned_hg, - const SynchronizedEdgeUpdate& sync_update) { +template +void SteinerTreeGainCache::deltaGainUpdate(const PartitionedHypergraph &partitioned_hg, + const SynchronizedEdgeUpdate &sync_update) +{ ASSERT(_is_initialized, "Gain cache is not initialized"); ASSERT(sync_update.connectivity_set_after); ASSERT(sync_update.pin_counts_after); ASSERT(sync_update.target_graph); - if ( triggersDeltaGainUpdate(sync_update) ) { + if(triggersDeltaGainUpdate(sync_update)) + { const HyperedgeID he = sync_update.he; const PartitionID from = sync_update.from; const PartitionID to = sync_update.to; const HyperedgeWeight edge_weight = sync_update.edge_weight; - const HypernodeID pin_count_in_from_part_after = sync_update.pin_count_in_from_part_after; + const HypernodeID pin_count_in_from_part_after = + sync_update.pin_count_in_from_part_after; const HypernodeID pin_count_in_to_part_after = sync_update.pin_count_in_to_part_after; - const TargetGraph& target_graph = *sync_update.target_graph; - ds::Bitset& connectivity_set = *sync_update.connectivity_set_after; - ds::PinCountSnapshot& pin_counts = *sync_update.pin_counts_after; + const TargetGraph &target_graph = *sync_update.target_graph; + ds::Bitset &connectivity_set = *sync_update.connectivity_set_after; + ds::PinCountSnapshot &pin_counts = *sync_update.pin_counts_after; - if ( pin_count_in_from_part_after == 0 || pin_count_in_to_part_after == 1 ) { + if(pin_count_in_from_part_after == 0 || pin_count_in_to_part_after == 1) + { // Connectivity set has changed // => Recompute gain of hyperedge for all pins and their adjacent blocks // Compute new gain of hyperedge for all pins and their adjacent blocks and // add it to the gain cache entries - for ( const HypernodeID& pin : partitioned_hg.pins(he) ) { + for(const HypernodeID &pin : partitioned_hg.pins(he)) + { const PartitionID source = partitioned_hg.partID(pin); - for ( const PartitionID& target : _adjacent_blocks.connectivitySet(pin) ) { - if ( source != target ) { + for(const PartitionID &target : _adjacent_blocks.connectivitySet(pin)) + { + if(source != target) + { const HyperedgeWeight gain_after = gainOfHyperedge( - source, target, edge_weight, target_graph, pin_counts, connectivity_set); - _gain_cache[benefit_index(pin, target)].add_fetch(gain_after, std::memory_order_relaxed); + source, target, edge_weight, target_graph, pin_counts, connectivity_set); + _gain_cache[benefit_index(pin, target)].add_fetch(gain_after, + std::memory_order_relaxed); } } } // Reconstruct connectivity set and pin counts before the node move - reconstructConnectivitySetAndPinCountsBeforeMove(sync_update, connectivity_set, pin_counts); + reconstructConnectivitySetAndPinCountsBeforeMove(sync_update, connectivity_set, + pin_counts); // Compute old gain of hyperedge for all pins and their adjacent blocks and // subtract it from the gain cache entries - for ( const HypernodeID& pin : partitioned_hg.pins(he) ) { + for(const HypernodeID &pin : partitioned_hg.pins(he)) + { const PartitionID source = partitioned_hg.partID(pin); - for ( const PartitionID& target : _adjacent_blocks.connectivitySet(pin) ) { - if ( source != target ) { + for(const PartitionID &target : _adjacent_blocks.connectivitySet(pin)) + { + if(source != target) + { const HyperedgeWeight gain_before = gainOfHyperedge( - source, target, edge_weight, target_graph, pin_counts, connectivity_set); - _gain_cache[benefit_index(pin, target)].sub_fetch(gain_before, std::memory_order_relaxed); + source, target, edge_weight, target_graph, pin_counts, connectivity_set); + _gain_cache[benefit_index(pin, target)].sub_fetch(gain_before, + std::memory_order_relaxed); } } } - } else { - if ( pin_count_in_from_part_after == 1 ) { - // In this case, there is only one pin left in block `from` and moving it to another block - // would remove the block from the connectivity set. Thus, we search for the last remaining pin - // in that block and update its gains for moving it to all its adjacent blocks. - for ( const HypernodeID& u : partitioned_hg.pins(he) ) { - if ( partitioned_hg.partID(u) == from ) { - for ( const PartitionID& target : _adjacent_blocks.connectivitySet(u) ) { - if ( from != target ) { + } + else + { + if(pin_count_in_from_part_after == 1) + { + // In this case, there is only one pin left in block `from` and moving it to + // another block would remove the block from the connectivity set. Thus, we search + // for the last remaining pin in that block and update its gains for moving it to + // all its adjacent blocks. + for(const HypernodeID &u : partitioned_hg.pins(he)) + { + if(partitioned_hg.partID(u) == from) + { + for(const PartitionID &target : _adjacent_blocks.connectivitySet(u)) + { + if(from != target) + { // Compute new gain of hyperedge for moving u to the target block - const HyperedgeWeight gain = gainOfHyperedge( - from, target, edge_weight, target_graph, pin_counts, connectivity_set); - _gain_cache[benefit_index(u, target)].add_fetch(gain, std::memory_order_relaxed); - - // Before the node move, we would have increase the connectivity of the hyperedge - // if we would have moved u to a block not in the connectivity set of the hyperedge. - // Thus, we subtract the old gain from gain cache entry. - const HypernodeID pin_count_target_part_before = target == to ? - pin_count_in_to_part_after - 1 : pin_counts.pinCountInPart(target); - if ( pin_count_target_part_before == 0 ) { - // The target part was not part of the connectivity set of the hyperedge before the move. - // Thus, moving u to that block would have increased the connectivity of the hyperedge. - // However, this is no longer the case since moving u out of its block would remove the - // block from the connectivity set. + const HyperedgeWeight gain = + gainOfHyperedge(from, target, edge_weight, target_graph, pin_counts, + connectivity_set); + _gain_cache[benefit_index(u, target)].add_fetch( + gain, std::memory_order_relaxed); + + // Before the node move, we would have increase the connectivity of the + // hyperedge if we would have moved u to a block not in the connectivity + // set of the hyperedge. Thus, we subtract the old gain from gain cache + // entry. + const HypernodeID pin_count_target_part_before = + target == to ? pin_count_in_to_part_after - 1 : + pin_counts.pinCountInPart(target); + if(pin_count_target_part_before == 0) + { + // The target part was not part of the connectivity set of the hyperedge + // before the move. Thus, moving u to that block would have increased + // the connectivity of the hyperedge. However, this is no longer the + // case since moving u out of its block would remove the block from the + // connectivity set. const bool was_set = connectivity_set.isSet(target); connectivity_set.unset(target); - const HyperedgeWeight distance_before = target_graph.distance(connectivity_set); - const HyperedgeWeight distance_after = target_graph.distanceWithBlock(connectivity_set, target); - const HyperedgeWeight gain_before = (distance_before - distance_after) * edge_weight; - _gain_cache[benefit_index(u, target)].sub_fetch(gain_before, std::memory_order_relaxed); - if ( was_set ) connectivity_set.set(target); + const HyperedgeWeight distance_before = + target_graph.distance(connectivity_set); + const HyperedgeWeight distance_after = + target_graph.distanceWithBlock(connectivity_set, target); + const HyperedgeWeight gain_before = + (distance_before - distance_after) * edge_weight; + _gain_cache[benefit_index(u, target)].sub_fetch( + gain_before, std::memory_order_relaxed); + if(was_set) + connectivity_set.set(target); } } } @@ -209,43 +261,62 @@ void SteinerTreeGainCache::deltaGainUpdate(const PartitionedHypergraph& partitio } } - if (pin_count_in_to_part_after == 2) { - // In this case, there are now two pins in block `to`. However, moving out the previously last pin - // of block `to` would have decreased the connectivity of the hyperedge. This is no longer the case - // since there are two pins in the block. Thus, we search for this pin and update its gain. - for ( const HypernodeID& u : partitioned_hg.pins(he) ) { - if ( partitioned_hg.partID(u) == to ) { - for ( const PartitionID& target : _adjacent_blocks.connectivitySet(u) ) { - if ( target != to ) { + if(pin_count_in_to_part_after == 2) + { + // In this case, there are now two pins in block `to`. However, moving out the + // previously last pin of block `to` would have decreased the connectivity of the + // hyperedge. This is no longer the case since there are two pins in the block. + // Thus, we search for this pin and update its gain. + for(const HypernodeID &u : partitioned_hg.pins(he)) + { + if(partitioned_hg.partID(u) == to) + { + for(const PartitionID &target : _adjacent_blocks.connectivitySet(u)) + { + if(target != to) + { // Compute new gain of hyperedge for moving u to the target block const HyperedgeWeight gain = gainOfHyperedge( - to, target, edge_weight, target_graph, pin_counts, connectivity_set); - _gain_cache[benefit_index(u, target)].add_fetch(gain, std::memory_order_relaxed); - - // Before the node move, we would have decreased the connectivity of the hyperedge - // if we would have moved u to a block in the connecivity set or replaced its block - // with another if we would have moved it to block not in the connectivity set. - // Thus, we subtract the old gain from gain cache entry. - const HypernodeID pin_count_target_part_before = target == from ? - pin_count_in_from_part_after + 1 : pin_counts.pinCountInPart(target); + to, target, edge_weight, target_graph, pin_counts, connectivity_set); + _gain_cache[benefit_index(u, target)].add_fetch( + gain, std::memory_order_relaxed); + + // Before the node move, we would have decreased the connectivity of the + // hyperedge if we would have moved u to a block in the connecivity set or + // replaced its block with another if we would have moved it to block not + // in the connectivity set. Thus, we subtract the old gain from gain cache + // entry. + const HypernodeID pin_count_target_part_before = + target == from ? pin_count_in_from_part_after + 1 : + pin_counts.pinCountInPart(target); const bool was_set = connectivity_set.isSet(target); - if ( pin_count_target_part_before == 0 ) connectivity_set.unset(target); - const HyperedgeWeight distance_before = target_graph.distance(connectivity_set); + if(pin_count_target_part_before == 0) + connectivity_set.unset(target); + const HyperedgeWeight distance_before = + target_graph.distance(connectivity_set); HyperedgeWeight distance_after = 0; - if ( pin_count_target_part_before > 0 ) { - // The target block was part of the connectivity set before the node move. - // Thus, moving u out of its block would have decreased the connectivity of - // the hyperedge. - distance_after = target_graph.distanceWithoutBlock(connectivity_set, to); - } else { - // The target block was not part of the connectivity set before the node move. - // Thus, moving u out of its block would have replaced block `to` with the target block - // in the connectivity set. - distance_after = target_graph.distanceAfterExchangingBlocks(connectivity_set, to, target); + if(pin_count_target_part_before > 0) + { + // The target block was part of the connectivity set before the node + // move. Thus, moving u out of its block would have decreased the + // connectivity of the hyperedge. + distance_after = + target_graph.distanceWithoutBlock(connectivity_set, to); + } + else + { + // The target block was not part of the connectivity set before the node + // move. Thus, moving u out of its block would have replaced block `to` + // with the target block in the connectivity set. + distance_after = target_graph.distanceAfterExchangingBlocks( + connectivity_set, to, target); } - const HyperedgeWeight gain_before = (distance_before - distance_after) * edge_weight; - _gain_cache[benefit_index(u, target)].sub_fetch(gain_before, std::memory_order_relaxed); - if ( was_set ) connectivity_set.set(target); + const HyperedgeWeight gain_before = + (distance_before - distance_after) * edge_weight; + _gain_cache[benefit_index(u, target)].sub_fetch( + gain_before, std::memory_order_relaxed); + if(was_set) + connectivity_set.set(target); } } } @@ -255,7 +326,8 @@ void SteinerTreeGainCache::deltaGainUpdate(const PartitionedHypergraph& partitio // Update gain version of hyperedge. If the update version is equal to the version // of the hyperedge, then we know that all gain cache updates are completed. This is - // important for initializing gain entries while simultanously running gain cache updates. + // important for initializing gain entries while simultanously running gain cache + // updates. ++_version[sync_update.he].update_version; } @@ -265,44 +337,61 @@ void SteinerTreeGainCache::deltaGainUpdate(const PartitionedHypergraph& partitio updateAdjacentBlocks(partitioned_hg, sync_update); } -template -void SteinerTreeGainCache::uncontractUpdateAfterRestore(const PartitionedHypergraph& partitioned_hg, - const HypernodeID u, - const HypernodeID v, - const HyperedgeID he, - const HypernodeID pin_count_in_part_after) { - // In this case, u and v are both contained in the hyperedge after the uncontraction operation - // => Pin count of the block of node u increases by one, but connectivity set does not change. - if ( _is_initialized ) { +template +void SteinerTreeGainCache::uncontractUpdateAfterRestore( + const PartitionedHypergraph &partitioned_hg, const HypernodeID u, const HypernodeID v, + const HyperedgeID he, const HypernodeID pin_count_in_part_after) +{ + // In this case, u and v are both contained in the hyperedge after the uncontraction + // operation + // => Pin count of the block of node u increases by one, but connectivity set does not + // change. + if(_is_initialized) + { ASSERT(partitioned_hg.hasTargetGraph()); - const TargetGraph& target_graph = *partitioned_hg.targetGraph(); + const TargetGraph &target_graph = *partitioned_hg.targetGraph(); const PartitionID block = partitioned_hg.partID(u); const HyperedgeWeight edge_weight = partitioned_hg.edgeWeight(he); - if ( pin_count_in_part_after == 2 ) { + if(pin_count_in_part_after == 2) + { // In this case, u was the only pin of its block contained in the hyperedge. // Aftwards, u and v are contained in the hyperedge both in the same block. // This changes the gain of u to all its adjacent blocks - for ( const HypernodeID& pin : partitioned_hg.pins(he) ) { + for(const HypernodeID &pin : partitioned_hg.pins(he)) + { // u might be replaced by an other node in the batch // => search for other pin of the corresponding block and update gain. - if ( pin != v && partitioned_hg.partID(pin) == block ) { - ds::Bitset& connectivity_set = partitioned_hg.deepCopyOfConnectivitySet(he); - const HyperedgeWeight current_distance = target_graph.distance(connectivity_set); - for ( const PartitionID to : _adjacent_blocks.connectivitySet(pin) ) { - if ( block != to ) { + if(pin != v && partitioned_hg.partID(pin) == block) + { + ds::Bitset &connectivity_set = partitioned_hg.deepCopyOfConnectivitySet(he); + const HyperedgeWeight current_distance = + target_graph.distance(connectivity_set); + for(const PartitionID to : _adjacent_blocks.connectivitySet(pin)) + { + if(block != to) + { // u does no longer decrease the connectivity of the hyperedge. We therefore // subtract the previous contribution of the hyperedge to gain values of u HyperedgeWeight old_distance_after_move = 0; HyperedgeWeight new_distance_after_move = current_distance; - if ( partitioned_hg.pinCountInPart(he, to) == 0 ) { - old_distance_after_move = target_graph.distanceAfterExchangingBlocks(connectivity_set, block, to); - new_distance_after_move = target_graph.distanceWithBlock(connectivity_set, to); - } else { - old_distance_after_move = target_graph.distanceWithoutBlock(connectivity_set, block); + if(partitioned_hg.pinCountInPart(he, to) == 0) + { + old_distance_after_move = target_graph.distanceAfterExchangingBlocks( + connectivity_set, block, to); + new_distance_after_move = + target_graph.distanceWithBlock(connectivity_set, to); } - const HyperedgeWeight old_gain = (current_distance - old_distance_after_move) * edge_weight; - const HyperedgeWeight new_gain = (current_distance - new_distance_after_move) * edge_weight; - _gain_cache[benefit_index(pin, to)].add_fetch(new_gain - old_gain, std::memory_order_relaxed); + else + { + old_distance_after_move = + target_graph.distanceWithoutBlock(connectivity_set, block); + } + const HyperedgeWeight old_gain = + (current_distance - old_distance_after_move) * edge_weight; + const HyperedgeWeight new_gain = + (current_distance - new_distance_after_move) * edge_weight; + _gain_cache[benefit_index(pin, to)].add_fetch(new_gain - old_gain, + std::memory_order_relaxed); } } break; @@ -310,9 +399,12 @@ void SteinerTreeGainCache::uncontractUpdateAfterRestore(const PartitionedHypergr } } - if ( partitioned_hg.edgeSize(he) > _large_he_threshold ) { - for ( const HypernodeID& pin : partitioned_hg.pins(he) ) { - for ( const PartitionID& block : partitioned_hg.connectivitySet(he) ) { + if(partitioned_hg.edgeSize(he) > _large_he_threshold) + { + for(const HypernodeID &pin : partitioned_hg.pins(he)) + { + for(const PartitionID &block : partitioned_hg.connectivitySet(he)) + { decrementIncidentEdges(pin, block); } } @@ -325,50 +417,70 @@ void SteinerTreeGainCache::uncontractUpdateAfterRestore(const PartitionedHypergr } } -template -void SteinerTreeGainCache::uncontractUpdateAfterReplacement(const PartitionedHypergraph& partitioned_hg, - const HypernodeID u, - const HypernodeID, - const HyperedgeID he) { +template +void SteinerTreeGainCache::uncontractUpdateAfterReplacement( + const PartitionedHypergraph &partitioned_hg, const HypernodeID u, const HypernodeID, + const HyperedgeID he) +{ // In this case, u is replaced by v in hyperedge he // => Pin counts and connectivity set of hyperedge he does not change - if ( _is_initialized ) { + if(_is_initialized) + { ASSERT(partitioned_hg.hasTargetGraph()); - const TargetGraph& target_graph = *partitioned_hg.targetGraph(); + const TargetGraph &target_graph = *partitioned_hg.targetGraph(); const PartitionID block = partitioned_hg.partID(u); const HyperedgeWeight edge_weight = partitioned_hg.edgeWeight(he); - ds::Bitset& connectivity_set = partitioned_hg.deepCopyOfConnectivitySet(he); + ds::Bitset &connectivity_set = partitioned_hg.deepCopyOfConnectivitySet(he); const HyperedgeWeight current_distance = target_graph.distance(connectivity_set); // Since u is no longer part of the hyperedge, we have to subtract the previous - // contribution of the hyperedge for moving u out of its block from all its gain values - // and add its new contribution. - if ( partitioned_hg.pinCountInPart(he, block) == 1 ) { - for ( const PartitionID to : _adjacent_blocks.connectivitySet(u) ) { - if ( block != to ) { + // contribution of the hyperedge for moving u out of its block from all its gain + // values and add its new contribution. + if(partitioned_hg.pinCountInPart(he, block) == 1) + { + for(const PartitionID to : _adjacent_blocks.connectivitySet(u)) + { + if(block != to) + { HyperedgeWeight distance_used_for_gain = 0; - if ( partitioned_hg.pinCountInPart(he, to) == 0 ) { - distance_used_for_gain = target_graph.distanceAfterExchangingBlocks(connectivity_set, block, to); - } else { - distance_used_for_gain = target_graph.distanceWithoutBlock(connectivity_set, block); + if(partitioned_hg.pinCountInPart(he, to) == 0) + { + distance_used_for_gain = + target_graph.distanceAfterExchangingBlocks(connectivity_set, block, to); + } + else + { + distance_used_for_gain = + target_graph.distanceWithoutBlock(connectivity_set, block); } - const HyperedgeWeight old_gain = (current_distance - distance_used_for_gain) * edge_weight; - _gain_cache[benefit_index(u, to)].sub_fetch(old_gain, std::memory_order_relaxed); + const HyperedgeWeight old_gain = + (current_distance - distance_used_for_gain) * edge_weight; + _gain_cache[benefit_index(u, to)].sub_fetch(old_gain, + std::memory_order_relaxed); } } - } else { - for ( const PartitionID to : _adjacent_blocks.connectivitySet(u) ) { - if ( block != to && partitioned_hg.pinCountInPart(he, to) == 0 ) { - const HyperedgeWeight distance_with_to = target_graph.distanceWithBlock(connectivity_set, to); - const HyperedgeWeight old_gain = (current_distance - distance_with_to) * edge_weight; - _gain_cache[benefit_index(u, to)].sub_fetch(old_gain, std::memory_order_relaxed); + } + else + { + for(const PartitionID to : _adjacent_blocks.connectivitySet(u)) + { + if(block != to && partitioned_hg.pinCountInPart(he, to) == 0) + { + const HyperedgeWeight distance_with_to = + target_graph.distanceWithBlock(connectivity_set, to); + const HyperedgeWeight old_gain = + (current_distance - distance_with_to) * edge_weight; + _gain_cache[benefit_index(u, to)].sub_fetch(old_gain, + std::memory_order_relaxed); } } } // Decrement number of incident edges of each block in the connectivity set // of the hyperedge since u is no longer part of the hyperedge. - if ( partitioned_hg.edgeSize(he) <= _large_he_threshold ) { - for ( const PartitionID& to : partitioned_hg.connectivitySet(he) ) { + if(partitioned_hg.edgeSize(he) <= _large_he_threshold) + { + for(const PartitionID &to : partitioned_hg.connectivitySet(he)) + { decrementIncidentEdges(u, to); } } @@ -382,61 +494,79 @@ void SteinerTreeGainCache::uncontractUpdateAfterReplacement(const PartitionedHyp void SteinerTreeGainCache::restoreSinglePinHyperedge(const HypernodeID u, const PartitionID block_of_u, - const HyperedgeWeight) { + const HyperedgeWeight) +{ incrementIncidentEdges(u, block_of_u); } -template -void SteinerTreeGainCache::restoreIdenticalHyperedge(const PartitionedHypergraph& partitioned_hg, - const HyperedgeID he) { - if ( partitioned_hg.edgeSize(he) <= _large_he_threshold ) { - for ( const HypernodeID& pin : partitioned_hg.pins(he) ) { - for ( const PartitionID& block : partitioned_hg.connectivitySet(he) ) { +template +void SteinerTreeGainCache::restoreIdenticalHyperedge( + const PartitionedHypergraph &partitioned_hg, const HyperedgeID he) +{ + if(partitioned_hg.edgeSize(he) <= _large_he_threshold) + { + for(const HypernodeID &pin : partitioned_hg.pins(he)) + { + for(const PartitionID &block : partitioned_hg.connectivitySet(he)) + { incrementIncidentEdges(pin, block); } } } } -template -void SteinerTreeGainCache::initializeAdjacentBlocks(const PartitionedHypergraph& partitioned_hg) { +template +void SteinerTreeGainCache::initializeAdjacentBlocks( + const PartitionedHypergraph &partitioned_hg) +{ // Initialize adjacent blocks of each node - partitioned_hg.doParallelForAllNodes([&](const HypernodeID& hn) { - initializeAdjacentBlocksOfNode(partitioned_hg, hn); - }); + partitioned_hg.doParallelForAllNodes( + [&](const HypernodeID &hn) { initializeAdjacentBlocksOfNode(partitioned_hg, hn); }); } -template -void SteinerTreeGainCache::initializeAdjacentBlocksOfNode(const PartitionedHypergraph& partitioned_hg, - const HypernodeID hn) { +template +void SteinerTreeGainCache::initializeAdjacentBlocksOfNode( + const PartitionedHypergraph &partitioned_hg, const HypernodeID hn) +{ _adjacent_blocks.clear(hn); - for ( PartitionID to = 0; to < _k; ++to ) { - _num_incident_edges_of_block[benefit_index(hn, to)].store(0, std::memory_order_relaxed); + for(PartitionID to = 0; to < _k; ++to) + { + _num_incident_edges_of_block[benefit_index(hn, to)].store(0, + std::memory_order_relaxed); } - for ( const HyperedgeID& he : partitioned_hg.incidentEdges(hn) ) { - if ( partitioned_hg.edgeSize(he) <= _large_he_threshold ) { - for ( const PartitionID& block : partitioned_hg.connectivitySet(he) ) { + for(const HyperedgeID &he : partitioned_hg.incidentEdges(hn)) + { + if(partitioned_hg.edgeSize(he) <= _large_he_threshold) + { + for(const PartitionID &block : partitioned_hg.connectivitySet(he)) + { incrementIncidentEdges(hn, block); } } } } -template -void SteinerTreeGainCache::updateAdjacentBlocks(const PartitionedHypergraph& partitioned_hg, - const SynchronizedEdgeUpdate& sync_update) { - if ( partitioned_hg.edgeSize(sync_update.he) <= _large_he_threshold ) { - if ( sync_update.pin_count_in_from_part_after == 0 ) { +template +void SteinerTreeGainCache::updateAdjacentBlocks( + const PartitionedHypergraph &partitioned_hg, + const SynchronizedEdgeUpdate &sync_update) +{ + if(partitioned_hg.edgeSize(sync_update.he) <= _large_he_threshold) + { + if(sync_update.pin_count_in_from_part_after == 0) + { // The node move has removed the source block of the move from the // connectivity set of the hyperedge. We therefore decrement the number of // incident edges in the source block for each pin of the hyperedge. If this // decreases the counter to zero for some pin, we remove the source block // from the adjacent blocks of that pin. - for ( const HypernodeID& pin : partitioned_hg.pins(sync_update.he) ) { + for(const HypernodeID &pin : partitioned_hg.pins(sync_update.he)) + { decrementIncidentEdges(pin, sync_update.from); } } - if ( sync_update.pin_count_in_to_part_after == 1 ) { + if(sync_update.pin_count_in_to_part_after == 1) + { // The node move has added the target block of the move to the // connectivity set of the hyperedge. We therefore increment the number of // incident edges in the target block for each pin of the hyperedge. If this @@ -444,21 +574,29 @@ void SteinerTreeGainCache::updateAdjacentBlocks(const PartitionedHypergraph& par // to the adjacent blocks of that pin. Moreover, since we only compute gain // cache entries to adjacent blocks, we initialize the gain cache entry // for that pin and target block. - for ( const HypernodeID& pin : partitioned_hg.pins(sync_update.he) ) { - const HyperedgeID incident_edges_after = incrementIncidentEdges(pin, sync_update.to); - if ( incident_edges_after == 1 ) { + for(const HypernodeID &pin : partitioned_hg.pins(sync_update.he)) + { + const HyperedgeID incident_edges_after = + incrementIncidentEdges(pin, sync_update.to); + if(incident_edges_after == 1) + { ASSERT(sync_update.edge_locks); - initializeGainCacheEntry(partitioned_hg, pin, sync_update.to, *sync_update.edge_locks); + initializeGainCacheEntry(partitioned_hg, pin, sync_update.to, + *sync_update.edge_locks); } } } } } -HyperedgeID SteinerTreeGainCache::incrementIncidentEdges(const HypernodeID u, const PartitionID to) { +HyperedgeID SteinerTreeGainCache::incrementIncidentEdges(const HypernodeID u, + const PartitionID to) +{ const HyperedgeID incident_count_after = - _num_incident_edges_of_block[benefit_index(u, to)].add_fetch(1, std::memory_order_relaxed); - if ( incident_count_after == 1 ) { + _num_incident_edges_of_block[benefit_index(u, to)].add_fetch( + 1, std::memory_order_relaxed); + if(incident_count_after == 1) + { ASSERT(!_adjacent_blocks.contains(u, to)); _gain_cache[benefit_index(u, to)].store(0, std::memory_order_relaxed); _adjacent_blocks.add(u, to); @@ -466,85 +604,98 @@ HyperedgeID SteinerTreeGainCache::incrementIncidentEdges(const HypernodeID u, co return incident_count_after; } -HyperedgeID SteinerTreeGainCache::decrementIncidentEdges(const HypernodeID u, const PartitionID to) { +HyperedgeID SteinerTreeGainCache::decrementIncidentEdges(const HypernodeID u, + const PartitionID to) +{ ASSERT(_num_incident_edges_of_block[benefit_index(u, to)].load() > 0); const HyperedgeID incident_count_after = - _num_incident_edges_of_block[benefit_index(u, to)].sub_fetch(1, std::memory_order_relaxed); - if ( incident_count_after == 0 ) { + _num_incident_edges_of_block[benefit_index(u, to)].sub_fetch( + 1, std::memory_order_relaxed); + if(incident_count_after == 0) + { ASSERT(_adjacent_blocks.contains(u, to)); _adjacent_blocks.remove(u, to); } return incident_count_after; } -template -void SteinerTreeGainCache::initializeGainCacheEntryForNode(const PartitionedHypergraph& partitioned_hg, - const HypernodeID u, - vec& benefit_aggregator) { +template +void SteinerTreeGainCache::initializeGainCacheEntryForNode( + const PartitionedHypergraph &partitioned_hg, const HypernodeID u, + vec &benefit_aggregator) +{ ASSERT(partitioned_hg.hasTargetGraph()); - const TargetGraph& target_graph = *partitioned_hg.targetGraph(); + const TargetGraph &target_graph = *partitioned_hg.targetGraph(); const PartitionID from = partitioned_hg.partID(u); // We only compute the gain to adjacent blocks of a node and initialize them here. // The gain to non-adjacent blocks is -inf. - for ( const PartitionID& to : _adjacent_blocks.connectivitySet(u) ) { + for(const PartitionID &to : _adjacent_blocks.connectivitySet(u)) + { benefit_aggregator[to] = 0; } - for ( const HyperedgeID& he : partitioned_hg.incidentEdges(u) ) { + for(const HyperedgeID &he : partitioned_hg.incidentEdges(u)) + { const HyperedgeWeight edge_weight = partitioned_hg.edgeWeight(he); - ds::Bitset& connectivity_set = partitioned_hg.deepCopyOfConnectivitySet(he); + ds::Bitset &connectivity_set = partitioned_hg.deepCopyOfConnectivitySet(he); const HyperedgeWeight current_distance = target_graph.distance(connectivity_set); - if ( partitioned_hg.pinCountInPart(he, from) == 1 ) { + if(partitioned_hg.pinCountInPart(he, from) == 1) + { // Moving the node out of its current block removes // its block from the connectivity set connectivity_set.unset(from); } // Compute gain to all adjacent blocks - for ( const PartitionID& to : _adjacent_blocks.connectivitySet(u) ) { + for(const PartitionID &to : _adjacent_blocks.connectivitySet(u)) + { const HyperedgeWeight distance_with_to = - target_graph.distanceWithBlock(connectivity_set, to); - benefit_aggregator[to] += ( current_distance - distance_with_to ) * edge_weight; + target_graph.distanceWithBlock(connectivity_set, to); + benefit_aggregator[to] += (current_distance - distance_with_to) * edge_weight; } } - for ( PartitionID to = 0; to < _k; ++to ) { - _gain_cache[benefit_index(u, to)].store(benefit_aggregator[to], std::memory_order_relaxed); + for(PartitionID to = 0; to < _k; ++to) + { + _gain_cache[benefit_index(u, to)].store(benefit_aggregator[to], + std::memory_order_relaxed); benefit_aggregator[to] = std::numeric_limits::min(); } } - - -template -void SteinerTreeGainCache::initializeGainCacheEntry(const PartitionedHypergraph& partitioned_hg, - const HypernodeID hn, - const PartitionID to, - ds::Array& edge_locks) { +template +void SteinerTreeGainCache::initializeGainCacheEntry( + const PartitionedHypergraph &partitioned_hg, const HypernodeID hn, + const PartitionID to, ds::Array &edge_locks) +{ ASSERT(partitioned_hg.hasTargetGraph()); - const TargetGraph& target_graph = *partitioned_hg.targetGraph(); + const TargetGraph &target_graph = *partitioned_hg.targetGraph(); const HypernodeID from = partitioned_hg.partID(hn); - vec& seen_versions = _ets_version.local(); + vec &seen_versions = _ets_version.local(); bool success = false; - while ( !success ) { + while(!success) + { success = true; seen_versions.clear(); HyperedgeWeight gain = 0; - for ( const HyperedgeID& he : partitioned_hg.incidentEdges(hn) ) { + for(const HyperedgeID &he : partitioned_hg.incidentEdges(hn)) + { edge_locks[partitioned_hg.uniqueEdgeID(he)].lock(); // The internal data structures in the partitioned hypergraph are updated // in one transaction and each update is assoicated with a version ID. We // retrieve here the actual state of the connectivity set of the hyperedge // with its version ID. If this version ID changes after the gain computation, // we know that we computed the gain on outdated information and retry. - const uint32_t update_version = _version[he].update_version.load(std::memory_order_relaxed); + const uint32_t update_version = + _version[he].update_version.load(std::memory_order_relaxed); const uint32_t he_version = _version[he].version.load(std::memory_order_relaxed); - ds::Bitset& connectivity_set = partitioned_hg.deepCopyOfConnectivitySet(he); + ds::Bitset &connectivity_set = partitioned_hg.deepCopyOfConnectivitySet(he); edge_locks[partitioned_hg.uniqueEdgeID(he)].unlock(); ASSERT(update_version <= he_version); - if ( update_version < he_version ) { + if(update_version < he_version) + { // There are still pending gain cache updates that must be finished // before we initialize the gain cache entry. success = false; @@ -554,25 +705,29 @@ void SteinerTreeGainCache::initializeGainCacheEntry(const PartitionedHypergraph& // Now compute gain of moving node hn to block `to` for hyperedge const HyperedgeWeight current_distance = target_graph.distance(connectivity_set); - if ( partitioned_hg.pinCountInPart(he, from) == 1 ) { + if(partitioned_hg.pinCountInPart(he, from) == 1) + { // Moving the node out of its current block removes // its block from the connectivity set connectivity_set.unset(from); } const HyperedgeWeight distance_with_to = - target_graph.distanceWithBlock(connectivity_set, to); + target_graph.distanceWithBlock(connectivity_set, to); gain += (current_distance - distance_with_to) * partitioned_hg.edgeWeight(he); } _gain_cache[benefit_index(hn, to)].store(gain, std::memory_order_relaxed); // Check if versions of an incident hyperedge has changed in the meantime. // If not, gain cache entry is correct. Otherwise, recompute it. - if ( success ) { + if(success) + { ASSERT(seen_versions.size() == UL(partitioned_hg.nodeDegree(hn)), - V(hn) << V(seen_versions.size()) << V(partitioned_hg.nodeDegree(hn))); + V(hn) << V(seen_versions.size()) << V(partitioned_hg.nodeDegree(hn))); size_t idx = 0; - for ( const HyperedgeID& he : partitioned_hg.incidentEdges(hn) ) { - if ( seen_versions[idx++] != _version[he].version.load(std::memory_order_relaxed) ) { + for(const HyperedgeID &he : partitioned_hg.incidentEdges(hn)) + { + if(seen_versions[idx++] != _version[he].version.load(std::memory_order_relaxed)) + { success = false; break; } @@ -581,22 +736,31 @@ void SteinerTreeGainCache::initializeGainCacheEntry(const PartitionedHypergraph& } } -template -bool SteinerTreeGainCache::verifyTrackedAdjacentBlocksOfNodes(const PartitionedHypergraph& partitioned_hg) const { +template +bool SteinerTreeGainCache::verifyTrackedAdjacentBlocksOfNodes( + const PartitionedHypergraph &partitioned_hg) const +{ bool success = true; vec num_incident_edges(_k, 0); - for ( const HypernodeID& hn : partitioned_hg.nodes() ) { + for(const HypernodeID &hn : partitioned_hg.nodes()) + { num_incident_edges.assign(_k, 0); - for ( const HyperedgeID& he : partitioned_hg.incidentEdges(hn) ) { - if ( partitioned_hg.edgeSize(he) <= _large_he_threshold ) { - for ( const PartitionID& block : partitioned_hg.connectivitySet(he) ) { + for(const HyperedgeID &he : partitioned_hg.incidentEdges(hn)) + { + if(partitioned_hg.edgeSize(he) <= _large_he_threshold) + { + for(const PartitionID &block : partitioned_hg.connectivitySet(he)) + { ++num_incident_edges[block]; } } } - for ( PartitionID block = 0; block < _k; ++block ) { - if ( _num_incident_edges_of_block[benefit_index(hn, block)] != num_incident_edges[block] ) { + for(PartitionID block = 0; block < _k; ++block) + { + if(_num_incident_edges_of_block[benefit_index(hn, block)] != + num_incident_edges[block]) + { LOG << "Number of incident edges of node" << hn << "to block" << block << "=>" << "Expected:" << num_incident_edges[block] << "," << "Actual:" << _num_incident_edges_of_block[benefit_index(hn, block)]; @@ -604,16 +768,20 @@ bool SteinerTreeGainCache::verifyTrackedAdjacentBlocksOfNodes(const PartitionedH } } - for ( const PartitionID block : _adjacent_blocks.connectivitySet(hn) ) { - if ( num_incident_edges[block] == 0 ) { + for(const PartitionID block : _adjacent_blocks.connectivitySet(hn)) + { + if(num_incident_edges[block] == 0) + { LOG << "Node" << hn << "is not adjacent to block" << block << ", but it is in its connectivity set"; success = false; } } - for ( PartitionID block = 0; block < _k; ++block ) { - if ( num_incident_edges[block] > 0 && !_adjacent_blocks.contains(hn, block) ) { + for(PartitionID block = 0; block < _k; ++block) + { + if(num_incident_edges[block] > 0 && !_adjacent_blocks.contains(hn, block)) + { LOG << "Node" << hn << "should be adjacent to block" << block << ", but it is not in its connectivity set"; success = false; @@ -624,37 +792,39 @@ bool SteinerTreeGainCache::verifyTrackedAdjacentBlocksOfNodes(const PartitionedH } namespace { -#define STEINER_TREE_INITIALIZE_GAIN_CACHE(X) void SteinerTreeGainCache::initializeGainCache(const X&) -#define STEINER_TREE_INITIALIZE_GAIN_CACHE_FOR_NODE(X) void SteinerTreeGainCache::initializeGainCacheEntryForNode(const X&, \ - const HypernodeID) -#define STEINER_TREE_NOTIFY(X) void SteinerTreeGainCache::notifyBeforeDeltaGainUpdate(const X&, \ - const SynchronizedEdgeUpdate&) -#define STEINER_TREE_DELTA_GAIN_UPDATE(X) void SteinerTreeGainCache::deltaGainUpdate(const X&, \ - const SynchronizedEdgeUpdate&) -#define STEINER_TREE_RESTORE_UPDATE(X) void SteinerTreeGainCache::uncontractUpdateAfterRestore(const X&, \ - const HypernodeID, \ - const HypernodeID, \ - const HyperedgeID, \ - const HypernodeID) -#define STEINER_TREE_REPLACEMENT_UPDATE(X) void SteinerTreeGainCache::uncontractUpdateAfterReplacement(const X&, \ - const HypernodeID, \ - const HypernodeID, \ - const HyperedgeID) -#define STEINER_TREE_RESTORE_IDENTICAL_HYPEREDGE(X) void SteinerTreeGainCache::restoreIdenticalHyperedge(const X&, \ - const HyperedgeID) -#define STEINER_TREE_INIT_ADJACENT_BLOCKS(X) void SteinerTreeGainCache::initializeAdjacentBlocks(const X&) -#define STEINER_TREE_INIT_ADJACENT_BLOCKS_OF_NODE(X) void SteinerTreeGainCache::initializeAdjacentBlocksOfNode(const X&, \ - const HypernodeID) -#define STEINER_TREE_UPDATE_ADJACENT_BLOCKS(X) void SteinerTreeGainCache::updateAdjacentBlocks(const X&, \ - const SynchronizedEdgeUpdate&) -#define STEINER_TREE_INIT_GAIN_CACHE_ENTRY(X) void SteinerTreeGainCache::initializeGainCacheEntryForNode(const X&, \ - const HypernodeID, \ - vec&) -#define STEINER_TREE_INIT_LAZY_GAIN_CACHE_ENTRY(X) void SteinerTreeGainCache::initializeGainCacheEntry(const X&, \ - const HypernodeID, \ - const PartitionID, \ - ds::Array&) -#define STEINER_TREE_VERIFY_ADJACENT_BLOCKS(X) bool SteinerTreeGainCache::verifyTrackedAdjacentBlocksOfNodes(const X&) const +#define STEINER_TREE_INITIALIZE_GAIN_CACHE(X) \ + void SteinerTreeGainCache::initializeGainCache(const X &) +#define STEINER_TREE_INITIALIZE_GAIN_CACHE_FOR_NODE(X) \ + void SteinerTreeGainCache::initializeGainCacheEntryForNode(const X &, const HypernodeID) +#define STEINER_TREE_NOTIFY(X) \ + void SteinerTreeGainCache::notifyBeforeDeltaGainUpdate(const X &, \ + const SynchronizedEdgeUpdate &) +#define STEINER_TREE_DELTA_GAIN_UPDATE(X) \ + void SteinerTreeGainCache::deltaGainUpdate(const X &, const SynchronizedEdgeUpdate &) +#define STEINER_TREE_RESTORE_UPDATE(X) \ + void SteinerTreeGainCache::uncontractUpdateAfterRestore( \ + const X &, const HypernodeID, const HypernodeID, const HyperedgeID, \ + const HypernodeID) +#define STEINER_TREE_REPLACEMENT_UPDATE(X) \ + void SteinerTreeGainCache::uncontractUpdateAfterReplacement( \ + const X &, const HypernodeID, const HypernodeID, const HyperedgeID) +#define STEINER_TREE_RESTORE_IDENTICAL_HYPEREDGE(X) \ + void SteinerTreeGainCache::restoreIdenticalHyperedge(const X &, const HyperedgeID) +#define STEINER_TREE_INIT_ADJACENT_BLOCKS(X) \ + void SteinerTreeGainCache::initializeAdjacentBlocks(const X &) +#define STEINER_TREE_INIT_ADJACENT_BLOCKS_OF_NODE(X) \ + void SteinerTreeGainCache::initializeAdjacentBlocksOfNode(const X &, const HypernodeID) +#define STEINER_TREE_UPDATE_ADJACENT_BLOCKS(X) \ + void SteinerTreeGainCache::updateAdjacentBlocks(const X &, \ + const SynchronizedEdgeUpdate &) +#define STEINER_TREE_INIT_GAIN_CACHE_ENTRY(X) \ + void SteinerTreeGainCache::initializeGainCacheEntryForNode( \ + const X &, const HypernodeID, vec &) +#define STEINER_TREE_INIT_LAZY_GAIN_CACHE_ENTRY(X) \ + void SteinerTreeGainCache::initializeGainCacheEntry( \ + const X &, const HypernodeID, const PartitionID, ds::Array &) +#define STEINER_TREE_VERIFY_ADJACENT_BLOCKS(X) \ + bool SteinerTreeGainCache::verifyTrackedAdjacentBlocksOfNodes(const X &) const } INSTANTIATE_FUNC_WITH_PARTITIONED_HG(STEINER_TREE_INITIALIZE_GAIN_CACHE) @@ -671,4 +841,4 @@ INSTANTIATE_FUNC_WITH_PARTITIONED_HG(STEINER_TREE_INIT_GAIN_CACHE_ENTRY) INSTANTIATE_FUNC_WITH_PARTITIONED_HG(STEINER_TREE_INIT_LAZY_GAIN_CACHE_ENTRY) INSTANTIATE_FUNC_WITH_PARTITIONED_HG(STEINER_TREE_VERIFY_ADJACENT_BLOCKS) -} // namespace mt_kahypar +} // namespace mt_kahypar diff --git a/mt-kahypar/partition/refinement/gains/steiner_tree/steiner_tree_gain_cache.h b/mt-kahypar/partition/refinement/gains/steiner_tree/steiner_tree_gain_cache.h index 2e19423cf..cb278fbf6 100644 --- a/mt-kahypar/partition/refinement/gains/steiner_tree/steiner_tree_gain_cache.h +++ b/mt-kahypar/partition/refinement/gains/steiner_tree/steiner_tree_gain_cache.h @@ -32,52 +32,56 @@ #include "tbb/parallel_invoke.h" -#include "mt-kahypar/partition/context.h" -#include "mt-kahypar/partition/mapping/target_graph.h" -#include "mt-kahypar/datastructures/hypergraph_common.h" #include "mt-kahypar/datastructures/array.h" -#include "mt-kahypar/datastructures/sparse_map.h" -#include "mt-kahypar/datastructures/static_bitset.h" #include "mt-kahypar/datastructures/connectivity_set.h" #include "mt-kahypar/datastructures/delta_connectivity_set.h" -#include "mt-kahypar/parallel/atomic_wrapper.h" +#include "mt-kahypar/datastructures/hypergraph_common.h" +#include "mt-kahypar/datastructures/sparse_map.h" +#include "mt-kahypar/datastructures/static_bitset.h" #include "mt-kahypar/macros.h" +#include "mt-kahypar/parallel/atomic_wrapper.h" +#include "mt-kahypar/partition/context.h" +#include "mt-kahypar/partition/mapping/target_graph.h" #include "mt-kahypar/utils/range.h" namespace mt_kahypar { /** - * The gain cache stores the gain values for all possible node moves for the steiner tree metric. + * The gain cache stores the gain values for all possible node moves for the steiner tree + * metric. * - * The mapping problem asks for a mapping Π: V -> V_p of the node set V of a weighted hypergraph H = (V,E,c,w) - * onto a target graph P = (V_P, E_P) such that the following objective function is minimized: - * steiner_tree(H, P, Π) := sum_{e \in E} dist_P(Λ(e)) * w(e) - * Here, dist_P(Λ(e)) is shortest connections between all blocks Λ(e) contained in a hyperedge e using only edges - * of the target graph. Computing dist_P(Λ(e)) reverts to the steiner tree problem which is an NP-hard problem. - * However, we precompute all steiner trees up to a certain size and for larger connectivity sets Λ(e), we compute - * a 2-approximation. + * The mapping problem asks for a mapping Π: V -> V_p of the node set V of a weighted + * hypergraph H = (V,E,c,w) onto a target graph P = (V_P, E_P) such that the following + * objective function is minimized: steiner_tree(H, P, Π) := sum_{e \in E} dist_P(Λ(e)) * + * w(e) Here, dist_P(Λ(e)) is shortest connections between all blocks Λ(e) contained in a + * hyperedge e using only edges of the target graph. Computing dist_P(Λ(e)) reverts to the + * steiner tree problem which is an NP-hard problem. However, we precompute all steiner + * trees up to a certain size and for larger connectivity sets Λ(e), we compute a + * 2-approximation. * - * The gain of moving a node u from its current block V_i to a target block V_j can be expressed as follows: - * g(u,V_j) := sum_{e \in I(u): Φ(e,V_i) = 1 and Φ(e, V_j) > 0} Δdist_P(e, Λ(e)\{V_i}) * w(e) + - * sum_{e \in I(u): Φ(e,V_i) = 1 and Φ(e, V_j) = 0} Δdist_P(e, Λ(e)\{V_i} u {V_j}) * w(e) + - * sum_{e \in I(u): Φ(e,V_i) > 1 and Φ(e, V_j) = 0} Δdist_P(e, Λ(e) u {V_j}) * w(e) - * For a set of blocks A, we define Δdist_P(e, A) := (dist_P(Λ(e)) - dist_P(A)). Moreover, Φ(e,V') is the number - * of pins contained in hyperedge e which are also part of block V'. More formally, Φ(e,V') := |e n V'|. + * The gain of moving a node u from its current block V_i to a target block V_j can be + * expressed as follows: g(u,V_j) := sum_{e \in I(u): Φ(e,V_i) = 1 and Φ(e, V_j) > 0} + * Δdist_P(e, Λ(e)\{V_i}) * w(e) + sum_{e \in I(u): Φ(e,V_i) = 1 and Φ(e, V_j) = 0} + * Δdist_P(e, Λ(e)\{V_i} u {V_j}) * w(e) + sum_{e \in I(u): Φ(e,V_i) > 1 and Φ(e, V_j) = + * 0} Δdist_P(e, Λ(e) u {V_j}) * w(e) For a set of blocks A, we define Δdist_P(e, A) := + * (dist_P(Λ(e)) - dist_P(A)). Moreover, Φ(e,V') is the number of pins contained in + * hyperedge e which are also part of block V'. More formally, Φ(e,V') := |e n V'|. * - * This gain cache implementation maintains the gain values g(u,V_j) for all nodes and their adjacent blocks. - * Thus, the gain cache stores and maintains at most k entries per node where k := |V_P|. -*/ -class SteinerTreeGainCache { + * This gain cache implementation maintains the gain values g(u,V_j) for all nodes and + * their adjacent blocks. Thus, the gain cache stores and maintains at most k entries per + * node where k := |V_P|. + */ +class SteinerTreeGainCache +{ static constexpr HyperedgeID HIGH_DEGREE_THRESHOLD = ID(100000); using AdjacentBlocksIterator = IteratorRange; - public: - struct HyperedgeState { - HyperedgeState() : - version(0), - update_version(0) { } +public: + struct HyperedgeState + { + HyperedgeState() : version(0), update_version(0) {} CAtomic version; CAtomic update_version; @@ -89,59 +93,51 @@ class SteinerTreeGainCache { static constexpr bool invalidates_entries = true; SteinerTreeGainCache() : - _is_initialized(false), - _k(kInvalidPartition), - _gain_cache(), - _ets_benefit_aggregator([&] { return initializeBenefitAggregator(); }), - _num_incident_edges_of_block(), - _adjacent_blocks(), - _version(), - _ets_version(), - _large_he_threshold(std::numeric_limits::max()) { } - - SteinerTreeGainCache(const Context& context) : - _is_initialized(false), - _k(kInvalidPartition), - _gain_cache(), - _ets_benefit_aggregator([&] { return initializeBenefitAggregator(); }), - _num_incident_edges_of_block(), - _adjacent_blocks(), - _version(), - _ets_version(), - _large_he_threshold(context.mapping.large_he_threshold) { } - - SteinerTreeGainCache(const SteinerTreeGainCache&) = delete; - SteinerTreeGainCache & operator= (const SteinerTreeGainCache &) = delete; - - SteinerTreeGainCache(SteinerTreeGainCache&& other) = default; - SteinerTreeGainCache & operator= (SteinerTreeGainCache&& other) = default; + _is_initialized(false), _k(kInvalidPartition), _gain_cache(), + _ets_benefit_aggregator([&] { return initializeBenefitAggregator(); }), + _num_incident_edges_of_block(), _adjacent_blocks(), _version(), _ets_version(), + _large_he_threshold(std::numeric_limits::max()) + { + } + + SteinerTreeGainCache(const Context &context) : + _is_initialized(false), _k(kInvalidPartition), _gain_cache(), + _ets_benefit_aggregator([&] { return initializeBenefitAggregator(); }), + _num_incident_edges_of_block(), _adjacent_blocks(), _version(), _ets_version(), + _large_he_threshold(context.mapping.large_he_threshold) + { + } + + SteinerTreeGainCache(const SteinerTreeGainCache &) = delete; + SteinerTreeGainCache &operator=(const SteinerTreeGainCache &) = delete; + + SteinerTreeGainCache(SteinerTreeGainCache &&other) = default; + SteinerTreeGainCache &operator=(SteinerTreeGainCache &&other) = default; // ####################### Initialization ####################### - bool isInitialized() const { - return _is_initialized; - } + bool isInitialized() const { return _is_initialized; } - void reset(const bool run_parallel = true) { + void reset(const bool run_parallel = true) + { unused(run_parallel); _is_initialized = false; } - size_t size() const { - return _gain_cache.size(); - } + size_t size() const { return _gain_cache.size(); } // ! Initializes all gain cache entries - template - void initializeGainCache(const PartitionedHypergraph& partitioned_hg); + template + void initializeGainCache(const PartitionedHypergraph &partitioned_hg); // ! Initializes the gain cache entry for a node - template - void initializeGainCacheEntryForNode(const PartitionedHypergraph& partitioned_hg, + template + void initializeGainCacheEntryForNode(const PartitionedHypergraph &partitioned_hg, const HypernodeID hn); // ! Returns an iterator over the adjacent blocks of a node - AdjacentBlocksIterator adjacentBlocks(const HypernodeID hn) const { + AdjacentBlocksIterator adjacentBlocks(const HypernodeID hn) const + { return _adjacent_blocks.connectivitySet(hn); } @@ -151,24 +147,25 @@ class SteinerTreeGainCache { // ! Note that the steiner tree gain cache does not maintain a // ! penalty term and returns zero in this case. MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE - HyperedgeWeight penaltyTerm(const HypernodeID, - const PartitionID) const { + HyperedgeWeight penaltyTerm(const HypernodeID, const PartitionID) const + { ASSERT(_is_initialized, "Gain cache is not initialized"); return 0; } // ! Recomputes all gain cache entries for node u - template - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE - void recomputeInvalidTerms(const PartitionedHypergraph& partitioned_hg, - const HypernodeID u) { - vec& benefit_aggregator = _ets_benefit_aggregator.local(); + template + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE void + recomputeInvalidTerms(const PartitionedHypergraph &partitioned_hg, const HypernodeID u) + { + vec &benefit_aggregator = _ets_benefit_aggregator.local(); initializeGainCacheEntryForNode(partitioned_hg, u, benefit_aggregator); } // ! Returns the gain value for moving node u to block to. MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE - HyperedgeWeight benefitTerm(const HypernodeID u, const PartitionID to) const { + HyperedgeWeight benefitTerm(const HypernodeID u, const PartitionID to) const + { ASSERT(_is_initialized, "Gain cache is not initialized"); return _gain_cache[benefit_index(u, to)].load(std::memory_order_relaxed); } @@ -178,7 +175,8 @@ class SteinerTreeGainCache { MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE HyperedgeWeight gain(const HypernodeID u, const PartitionID, /* only relevant for graphs */ - const PartitionID to ) const { + const PartitionID to) const + { ASSERT(_is_initialized, "Gain cache is not initialized"); return benefitTerm(u, to); } @@ -187,143 +185,148 @@ class SteinerTreeGainCache { // ! This function returns true if the corresponding syncronized edge update triggers // ! a gain cache update. - static bool triggersDeltaGainUpdate(const SynchronizedEdgeUpdate& sync_update); + static bool triggersDeltaGainUpdate(const SynchronizedEdgeUpdate &sync_update); // ! The partitioned (hyper)graph call this function when its updates its internal // ! data structures before calling the delta gain update function. The partitioned // ! (hyper)graph holds a lock for the corresponding (hyper)edge when calling this // ! function. Thus, it is guaranteed that no other thread will modify the hyperedge. - template - void notifyBeforeDeltaGainUpdate(const PartitionedHypergraph& partitioned_hg, - const SynchronizedEdgeUpdate& sync_update); + template + void notifyBeforeDeltaGainUpdate(const PartitionedHypergraph &partitioned_hg, + const SynchronizedEdgeUpdate &sync_update); // ! This functions implements the delta gain updates for the steiner tree metric. // ! When moving a node from its current block from to a target block to, we iterate - // ! over its incident hyperedges and update their pin count values. After each pin count - // ! update, we call this function to update the gain cache to changes associated with - // ! corresponding hyperedge. - template - void deltaGainUpdate(const PartitionedHypergraph& partitioned_hg, - const SynchronizedEdgeUpdate& sync_update); + // ! over its incident hyperedges and update their pin count values. After each pin + // count ! update, we call this function to update the gain cache to changes associated + // with ! corresponding hyperedge. + template + void deltaGainUpdate(const PartitionedHypergraph &partitioned_hg, + const SynchronizedEdgeUpdate &sync_update); // ####################### Uncontraction ####################### - // ! This function implements the gain cache update after an uncontraction that restores node v in - // ! hyperedge he. After the uncontraction operation, node u and v are contained in hyperedge he. - template - void uncontractUpdateAfterRestore(const PartitionedHypergraph& partitioned_hg, - const HypernodeID u, - const HypernodeID v, + // ! This function implements the gain cache update after an uncontraction that restores + // node v in ! hyperedge he. After the uncontraction operation, node u and v are + // contained in hyperedge he. + template + void uncontractUpdateAfterRestore(const PartitionedHypergraph &partitioned_hg, + const HypernodeID u, const HypernodeID v, const HyperedgeID he, const HypernodeID pin_count_in_part_after); - // ! This function implements the gain cache update after an uncontraction that replaces u with v in - // ! hyperedge he. After the uncontraction only node v is contained in hyperedge he. - template - void uncontractUpdateAfterReplacement(const PartitionedHypergraph& partitioned_hg, - const HypernodeID u, - const HypernodeID v, + // ! This function implements the gain cache update after an uncontraction that replaces + // u with v in ! hyperedge he. After the uncontraction only node v is contained in + // hyperedge he. + template + void uncontractUpdateAfterReplacement(const PartitionedHypergraph &partitioned_hg, + const HypernodeID u, const HypernodeID v, const HyperedgeID he); - // ! This function is called after restoring a single-pin hyperedge. The function assumes that - // ! u is the only pin of the corresponding hyperedge, while block_of_u is its corresponding block ID. - void restoreSinglePinHyperedge(const HypernodeID u, - const PartitionID block_of_u, + // ! This function is called after restoring a single-pin hyperedge. The function + // assumes that ! u is the only pin of the corresponding hyperedge, while block_of_u is + // its corresponding block ID. + void restoreSinglePinHyperedge(const HypernodeID u, const PartitionID block_of_u, const HyperedgeWeight weight_of_he); - // ! This function is called after restoring a net that became identical to another due to a contraction. - template - void restoreIdenticalHyperedge(const PartitionedHypergraph&, - const HyperedgeID); + // ! This function is called after restoring a net that became identical to another due + // to a contraction. + template + void restoreIdenticalHyperedge(const PartitionedHypergraph &, const HyperedgeID); // ! Notifies the gain cache that all uncontractions of the current batch are completed. - void batchUncontractionsCompleted() { + void batchUncontractionsCompleted() + { // Do nothing } // ####################### Only for Testing ####################### - template - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE - HyperedgeWeight recomputePenaltyTerm(const PartitionedHypergraph&, - const HypernodeID) const { + template + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE HyperedgeWeight + recomputePenaltyTerm(const PartitionedHypergraph &, const HypernodeID) const + { ASSERT(_is_initialized, "Gain cache is not initialized"); return 0; } - template - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE - HyperedgeWeight recomputeBenefitTerm(const PartitionedHypergraph& partitioned_hg, - const HypernodeID u, - const PartitionID to) const { + template + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE HyperedgeWeight + recomputeBenefitTerm(const PartitionedHypergraph &partitioned_hg, const HypernodeID u, + const PartitionID to) const + { ASSERT(partitioned_hg.hasTargetGraph()); HyperedgeWeight gain = 0; - const TargetGraph& target_graph = *partitioned_hg.targetGraph(); + const TargetGraph &target_graph = *partitioned_hg.targetGraph(); const PartitionID from = partitioned_hg.partID(u); - for (const HyperedgeID& e : partitioned_hg.incidentEdges(u)) { - ds::Bitset& connectivity_set = partitioned_hg.deepCopyOfConnectivitySet(e); + for(const HyperedgeID &e : partitioned_hg.incidentEdges(u)) + { + ds::Bitset &connectivity_set = partitioned_hg.deepCopyOfConnectivitySet(e); const HyperedgeWeight current_distance = target_graph.distance(connectivity_set); - if ( partitioned_hg.pinCountInPart(e, from) == 1 ) { + if(partitioned_hg.pinCountInPart(e, from) == 1) + { // Moving the node out of its current block removes // its block from the connectivity set connectivity_set.unset(from); } - const HyperedgeWeight distance_with_to = target_graph.distanceWithBlock(connectivity_set, to); + const HyperedgeWeight distance_with_to = + target_graph.distanceWithBlock(connectivity_set, to); gain += (current_distance - distance_with_to) * partitioned_hg.edgeWeight(e); } return gain; } - void changeNumberOfBlocks(const PartitionID new_k) { + void changeNumberOfBlocks(const PartitionID new_k) + { ASSERT(new_k <= _k); unused(new_k); // Do nothing } - template - bool verifyTrackedAdjacentBlocksOfNodes(const PartitionedHypergraph& partitioned_hg) const; + template + bool + verifyTrackedAdjacentBlocksOfNodes(const PartitionedHypergraph &partitioned_hg) const; - private: +private: friend class DeltaSteinerTreeGainCache; MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE - size_t benefit_index(const HypernodeID u, const PartitionID p) const { + size_t benefit_index(const HypernodeID u, const PartitionID p) const + { return size_t(u) * _k + p; } // ! Allocates the memory required to store the gain cache - void allocateGainTable(const HypernodeID num_nodes, - const HyperedgeID num_edges, - const PartitionID k) { - if (_gain_cache.size() == 0 && k != kInvalidPartition) { + void allocateGainTable(const HypernodeID num_nodes, const HyperedgeID num_edges, + const PartitionID k) + { + if(_gain_cache.size() == 0 && k != kInvalidPartition) + { _k = k; - tbb::parallel_invoke([&] { - _gain_cache.resize( - "Refinement", "gain_cache", num_nodes * _k, true); - }, [&] { - _num_incident_edges_of_block.resize( - "Refinement", "num_incident_edges_of_block", num_nodes * _k, true); - }, [&] { - _adjacent_blocks = ds::ConnectivitySets(num_nodes, k, true); - }, [&] { - _version.assign(num_edges, HyperedgeState()); - }); + tbb::parallel_invoke( + [&] { _gain_cache.resize("Refinement", "gain_cache", num_nodes * _k, true); }, + [&] { + _num_incident_edges_of_block.resize( + "Refinement", "num_incident_edges_of_block", num_nodes * _k, true); + }, + [&] { _adjacent_blocks = ds::ConnectivitySets(num_nodes, k, true); }, + [&] { _version.assign(num_edges, HyperedgeState()); }); } } // ! Initializes the adjacent blocks of all nodes - template - void initializeAdjacentBlocks(const PartitionedHypergraph& partitioned_hg); + template + void initializeAdjacentBlocks(const PartitionedHypergraph &partitioned_hg); - // ! Initializes the adjacent blocks of for a node - template - void initializeAdjacentBlocksOfNode(const PartitionedHypergraph& partitioned_hg, + // ! Initializes the adjacent blocks of for a node + template + void initializeAdjacentBlocksOfNode(const PartitionedHypergraph &partitioned_hg, const HypernodeID hn); // ! Updates the adjacent blocks of a node based on a synronized hyperedge update - template - void updateAdjacentBlocks(const PartitionedHypergraph& partitioned_hg, - const SynchronizedEdgeUpdate& sync_update); + template + void updateAdjacentBlocks(const PartitionedHypergraph &partitioned_hg, + const SynchronizedEdgeUpdate &sync_update); // ! Increments the number of incident edges of node u that contains pins of block to. // ! If the value increases to one, we add the block to the connectivity set of the node @@ -331,31 +334,34 @@ class SteinerTreeGainCache { HyperedgeID incrementIncidentEdges(const HypernodeID u, const PartitionID to); // ! Decrements the number of incident edges of node u that contains pins of block to - // ! If the value decreases to zero, we remove the block from the connectivity set of the node. + // ! If the value decreases to zero, we remove the block from the connectivity set of + // the node. HyperedgeID decrementIncidentEdges(const HypernodeID u, const PartitionID to); // ! Initializes the benefit and penalty terms for a node u - template - void initializeGainCacheEntryForNode(const PartitionedHypergraph& partitioned_hg, + template + void initializeGainCacheEntryForNode(const PartitionedHypergraph &partitioned_hg, const HypernodeID u, - vec& benefit_aggregator); + vec &benefit_aggregator); // ! Initializes the gain cache entry of moving u to block 'to'. The function is // ! thread-safe, meaning that it supports correct initialization while simultanously // ! performing gain cache updates. - template - void initializeGainCacheEntry(const PartitionedHypergraph& partitioned_hg, - const HypernodeID hn, - const PartitionID to, - ds::Array& edge_locks); - - bool nodeGainAssertions(const HypernodeID u, const PartitionID p) const { - if ( p == kInvalidPartition || p >= _k ) { + template + void initializeGainCacheEntry(const PartitionedHypergraph &partitioned_hg, + const HypernodeID hn, const PartitionID to, + ds::Array &edge_locks); + + bool nodeGainAssertions(const HypernodeID u, const PartitionID p) const + { + if(p == kInvalidPartition || p >= _k) + { LOG << "Invalid block ID (Node" << u << "is part of block" << p << ", but valid block IDs must be in the range [ 0," << _k << "])"; return false; } - if ( benefit_index(u, p) >= _gain_cache.size() ) { + if(benefit_index(u, p) >= _gain_cache.size()) + { LOG << "Access to gain cache would result in an out-of-bounds access (" << "Benefit Index =" << benefit_index(u, p) << ", Gain Cache Size =" << _gain_cache.size() << ")"; @@ -364,7 +370,8 @@ class SteinerTreeGainCache { return true; } - vec initializeBenefitAggregator() const { + vec initializeBenefitAggregator() const + { return vec(_k, std::numeric_limits::min()); } @@ -374,93 +381,98 @@ class SteinerTreeGainCache { // ! Number of blocks PartitionID _k; - // ! Array of size |V| * (k + 1), which stores the benefit and penalty terms of each node. - ds::Array< CAtomic > _gain_cache; + // ! Array of size |V| * (k + 1), which stores the benefit and penalty terms of each + // node. + ds::Array > _gain_cache; // ! Thread-local for initializing gain cache entries - tbb::enumerable_thread_specific> _ets_benefit_aggregator; + tbb::enumerable_thread_specific > _ets_benefit_aggregator; // ! This array stores the number of incident hyperedges that contains // ! pins of a particular block for each node. - ds::Array< CAtomic > _num_incident_edges_of_block; + ds::Array > _num_incident_edges_of_block; // ! Stores the adjacent blocks of a node ds::ConnectivitySets _adjacent_blocks; // ! This array stores a version ID for each hyperedge. The partitioned hypergraph // ! increments the version for a hyperedge before it updates it internal data structure - // ! (see notifyBeforeDeltaGainUpdate(...)). This can be use when initialize a new gain cache entries, while - // ! other threads perform concurrent moves on the data structure. + // ! (see notifyBeforeDeltaGainUpdate(...)). This can be use when initialize a new gain + // cache entries, while ! other threads perform concurrent moves on the data structure. vec _version; // ! Array to store version IDs when we lazily initialize a gain cache entry - tbb::enumerable_thread_specific> _ets_version; + tbb::enumerable_thread_specific > _ets_version; - // ! Threshold for the size of a hyperedge that we do not count when tracking adjacent blocks + // ! Threshold for the size of a hyperedge that we do not count when tracking adjacent + // blocks HypernodeID _large_he_threshold; }; /** - * In our FM algorithm, the different local searches perform nodes moves locally not visible for other - * threads. The delta gain cache stores these local changes relative to the shared - * gain cache. For example, the gain can be computed as follows - * g'(u,V') := g(u,V') + Δg(u,V') - * where g(u,V') is the gain stored in the shared gain cache and Δg(u,V') is the gain stored in - * the delta gain cache after performing some moves locally. To maintain Δg(u,V'), we use a hash - * table that only stores entries affected by a gain cache update. -*/ -class DeltaSteinerTreeGainCache { + * In our FM algorithm, the different local searches perform nodes moves locally not + * visible for other threads. The delta gain cache stores these local changes relative to + * the shared gain cache. For example, the gain can be computed as follows g'(u,V') := + * g(u,V') + Δg(u,V') where g(u,V') is the gain stored in the shared gain cache and + * Δg(u,V') is the gain stored in the delta gain cache after performing some moves + * locally. To maintain Δg(u,V'), we use a hash table that only stores entries affected by + * a gain cache update. + */ +class DeltaSteinerTreeGainCache +{ using DeltaAdjacentBlocks = ds::DeltaConnectivitySet; using AdjacentBlocksIterator = typename DeltaAdjacentBlocks::Iterator; - public: +public: static constexpr bool requires_connectivity_set = true; - DeltaSteinerTreeGainCache(const SteinerTreeGainCache& gain_cache) : - _gain_cache(gain_cache), - _gain_cache_delta(), - _invalid_gain_cache_entry(), - _num_incident_edges_delta(), - _adjacent_blocks_delta(gain_cache._k), - _large_he_threshold(gain_cache._large_he_threshold) { + DeltaSteinerTreeGainCache(const SteinerTreeGainCache &gain_cache) : + _gain_cache(gain_cache), _gain_cache_delta(), _invalid_gain_cache_entry(), + _num_incident_edges_delta(), _adjacent_blocks_delta(gain_cache._k), + _large_he_threshold(gain_cache._large_he_threshold) + { _adjacent_blocks_delta.setConnectivitySet(&_gain_cache._adjacent_blocks); } // ####################### Initialize & Reset ####################### - void initialize(const size_t size) { + void initialize(const size_t size) + { _adjacent_blocks_delta.setNumberOfBlocks(_gain_cache._k); _gain_cache_delta.initialize(size); _invalid_gain_cache_entry.initialize(size); _num_incident_edges_delta.initialize(size); } - void clear() { + void clear() + { _gain_cache_delta.clear(); _invalid_gain_cache_entry.clear(); _num_incident_edges_delta.clear(); _adjacent_blocks_delta.reset(); } - void dropMemory() { + void dropMemory() + { _gain_cache_delta.freeInternalData(); _invalid_gain_cache_entry.freeInternalData(); _num_incident_edges_delta.freeInternalData(); _adjacent_blocks_delta.freeInternalData(); } - size_t size_in_bytes() const { - return _gain_cache_delta.size_in_bytes() + - _invalid_gain_cache_entry.size_in_bytes() + - _num_incident_edges_delta.size_in_bytes() + - _adjacent_blocks_delta.size_in_bytes(); + size_t size_in_bytes() const + { + return _gain_cache_delta.size_in_bytes() + _invalid_gain_cache_entry.size_in_bytes() + + _num_incident_edges_delta.size_in_bytes() + + _adjacent_blocks_delta.size_in_bytes(); } // ####################### Gain Computation ####################### // ! Returns an iterator over the adjacent blocks of a node - IteratorRange adjacentBlocks(const HypernodeID hn) const { + IteratorRange adjacentBlocks(const HypernodeID hn) const + { return _adjacent_blocks_delta.connectivitySet(hn); } @@ -468,66 +480,70 @@ class DeltaSteinerTreeGainCache { // ! Note that the steiner tree gain cache does not maintain a // ! penalty term and returns zero in this case. MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE - HyperedgeWeight penaltyTerm(const HypernodeID, - const PartitionID) const { - return 0; - } + HyperedgeWeight penaltyTerm(const HypernodeID, const PartitionID) const { return 0; } // ! Returns the gain value for moving node u to block to. MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE - HyperedgeWeight benefitTerm(const HypernodeID u, const PartitionID to) const { + HyperedgeWeight benefitTerm(const HypernodeID u, const PartitionID to) const + { ASSERT(to != kInvalidPartition && to < _gain_cache._k); const bool use_benefit_term_from_shared_gain_cache = - !_invalid_gain_cache_entry.contains(_gain_cache.benefit_index(u, to)) && - _gain_cache._adjacent_blocks.contains(u, to); + !_invalid_gain_cache_entry.contains(_gain_cache.benefit_index(u, to)) && + _gain_cache._adjacent_blocks.contains(u, to); const HyperedgeWeight benefit_term = - use_benefit_term_from_shared_gain_cache * _gain_cache.benefitTerm(u, to); - const HyperedgeWeight* benefit_delta = - _gain_cache_delta.get_if_contained(_gain_cache.benefit_index(u, to)); - return benefit_term + ( benefit_delta ? *benefit_delta : 0 ); + use_benefit_term_from_shared_gain_cache * _gain_cache.benefitTerm(u, to); + const HyperedgeWeight *benefit_delta = + _gain_cache_delta.get_if_contained(_gain_cache.benefit_index(u, to)); + return benefit_term + (benefit_delta ? *benefit_delta : 0); } // ! Returns the gain value for moving node u to block to. // ! (same as benefitTerm(...)) MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE - HyperedgeWeight gain(const HypernodeID u, - const PartitionID from, - const PartitionID to ) const { + HyperedgeWeight gain(const HypernodeID u, const PartitionID from, + const PartitionID to) const + { return benefitTerm(u, to) - penaltyTerm(u, from); } - // ####################### Delta Gain Update ####################### + // ####################### Delta Gain Update ####################### - template - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE - void deltaGainUpdate(const PartitionedHypergraph& partitioned_hg, - const SynchronizedEdgeUpdate& sync_update) { + template + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE void + deltaGainUpdate(const PartitionedHypergraph &partitioned_hg, + const SynchronizedEdgeUpdate &sync_update) + { ASSERT(sync_update.connectivity_set_after); ASSERT(sync_update.target_graph); const HyperedgeID he = sync_update.he; const PartitionID from = sync_update.from; const PartitionID to = sync_update.to; const HyperedgeWeight edge_weight = sync_update.edge_weight; - const HypernodeID pin_count_in_from_part_after = sync_update.pin_count_in_from_part_after; + const HypernodeID pin_count_in_from_part_after = + sync_update.pin_count_in_from_part_after; const HypernodeID pin_count_in_to_part_after = sync_update.pin_count_in_to_part_after; - const TargetGraph& target_graph = *sync_update.target_graph; - ds::Bitset& connectivity_set = *sync_update.connectivity_set_after; + const TargetGraph &target_graph = *sync_update.target_graph; + ds::Bitset &connectivity_set = *sync_update.connectivity_set_after; - if ( pin_count_in_from_part_after == 0 || pin_count_in_to_part_after == 1 ) { + if(pin_count_in_from_part_after == 0 || pin_count_in_to_part_after == 1) + { // Connectivity set has changed // => Recompute gain of hyperedge for all pins and their adjacent blocks // Compute new gain of hyperedge for all pins and their adjacent blocks and // add it to the gain cache entries - for ( const HypernodeID& pin : partitioned_hg.pins(he) ) { + for(const HypernodeID &pin : partitioned_hg.pins(he)) + { const PartitionID source = partitioned_hg.partID(pin); const HypernodeID pin_count_in_source_block_after = - partitioned_hg.pinCountInPart(he, source); - for ( const PartitionID& target : adjacentBlocks(pin) ) { - if ( source != target ) { - const HyperedgeWeight gain_after = gainOfHyperedge( - source, target, pin_count_in_source_block_after, - edge_weight, target_graph, connectivity_set); + partitioned_hg.pinCountInPart(he, source); + for(const PartitionID &target : adjacentBlocks(pin)) + { + if(source != target) + { + const HyperedgeWeight gain_after = + gainOfHyperedge(source, target, pin_count_in_source_block_after, + edge_weight, target_graph, connectivity_set); _gain_cache_delta[_gain_cache.benefit_index(pin, target)] += gain_after; } } @@ -537,52 +553,72 @@ class DeltaSteinerTreeGainCache { reconstructConnectivitySetBeforeMove(sync_update, connectivity_set); // Compute old gain of hyperedge for all pins and their adjacent blocks and // subtract it from the gain cache entries - for ( const HypernodeID& pin : partitioned_hg.pins(he) ) { + for(const HypernodeID &pin : partitioned_hg.pins(he)) + { const PartitionID source = partitioned_hg.partID(pin); - const PartitionID pin_count_in_source_part_before = source == from ? - sync_update.pin_count_in_from_part_after + 1 : (source == to ? - sync_update.pin_count_in_to_part_after - 1 : partitioned_hg.pinCountInPart(he, source)); - for ( const PartitionID& target : adjacentBlocks(pin) ) { - if ( source != target ) { - const HyperedgeWeight gain_before = gainOfHyperedge( - source, target, pin_count_in_source_part_before, - edge_weight, target_graph, connectivity_set); + const PartitionID pin_count_in_source_part_before = + source == from ? sync_update.pin_count_in_from_part_after + 1 : + (source == to ? sync_update.pin_count_in_to_part_after - 1 : + partitioned_hg.pinCountInPart(he, source)); + for(const PartitionID &target : adjacentBlocks(pin)) + { + if(source != target) + { + const HyperedgeWeight gain_before = + gainOfHyperedge(source, target, pin_count_in_source_part_before, + edge_weight, target_graph, connectivity_set); _gain_cache_delta[_gain_cache.benefit_index(pin, target)] -= gain_before; } } } - } else { - if ( pin_count_in_from_part_after == 1 ) { - // In this case, there is only one pin left in block `from` and moving it to another block - // would remove the block from the connectivity set. Thus, we search for the last remaining pin - // in that block and update its gains for moving it to all its adjacent blocks. - for ( const HypernodeID& u : partitioned_hg.pins(he) ) { - if ( partitioned_hg.partID(u) == from ) { - for ( const PartitionID& target : adjacentBlocks(u) ) { - if ( from != target ) { + } + else + { + if(pin_count_in_from_part_after == 1) + { + // In this case, there is only one pin left in block `from` and moving it to + // another block would remove the block from the connectivity set. Thus, we search + // for the last remaining pin in that block and update its gains for moving it to + // all its adjacent blocks. + for(const HypernodeID &u : partitioned_hg.pins(he)) + { + if(partitioned_hg.partID(u) == from) + { + for(const PartitionID &target : adjacentBlocks(u)) + { + if(from != target) + { // Compute new gain of hyperedge for moving u to the target block - const HyperedgeWeight gain = gainOfHyperedge( - from, target, pin_count_in_from_part_after, - edge_weight, target_graph, connectivity_set); + const HyperedgeWeight gain = + gainOfHyperedge(from, target, pin_count_in_from_part_after, + edge_weight, target_graph, connectivity_set); _gain_cache_delta[_gain_cache.benefit_index(u, target)] += gain; - // Before the node move, we would have increase the connectivity of the hyperedge - // if we would have moved u to a block not in the connectivity set of the hyperedge. - // Thus, we subtract the old gain from gain cache entry. - const HypernodeID pin_count_target_part_before = target == to ? - pin_count_in_to_part_after - 1 : partitioned_hg.pinCountInPart(he, target); - if ( pin_count_target_part_before == 0 ) { - // The target part was not part of the connectivity set of the hyperedge before the move. - // Thus, moving u to that block would have increased the connectivity of the hyperedge. - // However, this is no longer the case since moving u out of its block would remove the - // block from the connectivity set. + // Before the node move, we would have increase the connectivity of the + // hyperedge if we would have moved u to a block not in the connectivity + // set of the hyperedge. Thus, we subtract the old gain from gain cache + // entry. + const HypernodeID pin_count_target_part_before = + target == to ? pin_count_in_to_part_after - 1 : + partitioned_hg.pinCountInPart(he, target); + if(pin_count_target_part_before == 0) + { + // The target part was not part of the connectivity set of the hyperedge + // before the move. Thus, moving u to that block would have increased + // the connectivity of the hyperedge. However, this is no longer the + // case since moving u out of its block would remove the block from the + // connectivity set. const bool was_set = connectivity_set.isSet(target); connectivity_set.unset(target); - const HyperedgeWeight distance_before = target_graph.distance(connectivity_set); - const HyperedgeWeight distance_after = target_graph.distanceWithBlock(connectivity_set, target); - const HyperedgeWeight gain_before = (distance_before - distance_after) * edge_weight; + const HyperedgeWeight distance_before = + target_graph.distance(connectivity_set); + const HyperedgeWeight distance_after = + target_graph.distanceWithBlock(connectivity_set, target); + const HyperedgeWeight gain_before = + (distance_before - distance_after) * edge_weight; _gain_cache_delta[_gain_cache.benefit_index(u, target)] -= gain_before; - if ( was_set ) connectivity_set.set(target); + if(was_set) + connectivity_set.set(target); } } } @@ -590,44 +626,61 @@ class DeltaSteinerTreeGainCache { } } - if (pin_count_in_to_part_after == 2) { - // In this case, there are now two pins in block `to`. However, moving out the previously last pin - // of block `to` would have decreased the connectivity of the hyperedge. This is no longer the case - // since there are two pins in the block. Thus, we search for this pin and update its gain. - for ( const HypernodeID& u : partitioned_hg.pins(he) ) { - if ( partitioned_hg.partID(u) == to ) { - for ( const PartitionID& target : adjacentBlocks(u) ) { - if ( target != to ) { + if(pin_count_in_to_part_after == 2) + { + // In this case, there are now two pins in block `to`. However, moving out the + // previously last pin of block `to` would have decreased the connectivity of the + // hyperedge. This is no longer the case since there are two pins in the block. + // Thus, we search for this pin and update its gain. + for(const HypernodeID &u : partitioned_hg.pins(he)) + { + if(partitioned_hg.partID(u) == to) + { + for(const PartitionID &target : adjacentBlocks(u)) + { + if(target != to) + { // Compute new gain of hyperedge for moving u to the target block - const HyperedgeWeight gain = gainOfHyperedge( - to, target, pin_count_in_to_part_after, - edge_weight, target_graph, connectivity_set); + const HyperedgeWeight gain = + gainOfHyperedge(to, target, pin_count_in_to_part_after, edge_weight, + target_graph, connectivity_set); _gain_cache_delta[_gain_cache.benefit_index(u, target)] += gain; - // Before the node move, we would have decreased the connectivity of the hyperedge - // if we would have moved u to a block in the connecivity set or replaced its block - // with another if we would have moved it to block not in the connectivity set. - // Thus, we subtract the old gain from gain cache entry. - const HypernodeID pin_count_target_part_before = target == from ? - pin_count_in_from_part_after + 1 : partitioned_hg.pinCountInPart(he, target); + // Before the node move, we would have decreased the connectivity of the + // hyperedge if we would have moved u to a block in the connecivity set or + // replaced its block with another if we would have moved it to block not + // in the connectivity set. Thus, we subtract the old gain from gain cache + // entry. + const HypernodeID pin_count_target_part_before = + target == from ? pin_count_in_from_part_after + 1 : + partitioned_hg.pinCountInPart(he, target); const bool was_set = connectivity_set.isSet(target); - if ( pin_count_target_part_before == 0 ) connectivity_set.unset(target); - const HyperedgeWeight distance_before = target_graph.distance(connectivity_set); + if(pin_count_target_part_before == 0) + connectivity_set.unset(target); + const HyperedgeWeight distance_before = + target_graph.distance(connectivity_set); HyperedgeWeight distance_after = 0; - if ( pin_count_target_part_before > 0 ) { - // The target block was part of the connectivity set before the node move. - // Thus, moving u out of its block would have decreased the connectivity of - // the hyperedge. - distance_after = target_graph.distanceWithoutBlock(connectivity_set, to); - } else { - // The target block was not part of the connectivity set before the node move. - // Thus, moving u out of its block would have replaced block `to` with the target block - // in the connectivity set. - distance_after = target_graph.distanceAfterExchangingBlocks(connectivity_set, to, target); + if(pin_count_target_part_before > 0) + { + // The target block was part of the connectivity set before the node + // move. Thus, moving u out of its block would have decreased the + // connectivity of the hyperedge. + distance_after = + target_graph.distanceWithoutBlock(connectivity_set, to); + } + else + { + // The target block was not part of the connectivity set before the node + // move. Thus, moving u out of its block would have replaced block `to` + // with the target block in the connectivity set. + distance_after = target_graph.distanceAfterExchangingBlocks( + connectivity_set, to, target); } - const HyperedgeWeight gain_before = (distance_before - distance_after) * edge_weight; + const HyperedgeWeight gain_before = + (distance_before - distance_after) * edge_weight; _gain_cache_delta[_gain_cache.benefit_index(u, target)] -= gain_before; - if ( was_set ) connectivity_set.set(target); + if(was_set) + connectivity_set.set(target); } } } @@ -638,59 +691,75 @@ class DeltaSteinerTreeGainCache { updateAdjacentBlocks(partitioned_hg, sync_update); } - // ####################### Miscellaneous ####################### + // ####################### Miscellaneous ####################### - void memoryConsumption(utils::MemoryTreeNode* parent) const { + void memoryConsumption(utils::MemoryTreeNode *parent) const + { ASSERT(parent); - utils::MemoryTreeNode* gain_cache_delta_node = parent->addChild("Delta Gain Cache"); + utils::MemoryTreeNode *gain_cache_delta_node = parent->addChild("Delta Gain Cache"); gain_cache_delta_node->updateSize(size_in_bytes()); } - private: +private: MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE - HyperedgeWeight gainOfHyperedge(const PartitionID from, - const PartitionID to, + HyperedgeWeight gainOfHyperedge(const PartitionID from, const PartitionID to, const HypernodeID pin_count_in_from_part, const HyperedgeWeight edge_weight, - const TargetGraph& target_graph, - ds::Bitset& connectivity_set) { + const TargetGraph &target_graph, + ds::Bitset &connectivity_set) + { const HyperedgeWeight current_distance = target_graph.distance(connectivity_set); - if ( pin_count_in_from_part == 1 ) { + if(pin_count_in_from_part == 1) + { connectivity_set.unset(from); } - const HyperedgeWeight distance_with_to = target_graph.distanceWithBlock(connectivity_set, to); - if ( pin_count_in_from_part == 1 ) { + const HyperedgeWeight distance_with_to = + target_graph.distanceWithBlock(connectivity_set, to); + if(pin_count_in_from_part == 1) + { connectivity_set.set(from); } return (current_distance - distance_with_to) * edge_weight; } MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE - void reconstructConnectivitySetBeforeMove(const SynchronizedEdgeUpdate& sync_update, - ds::Bitset& connectivity_set) { - if ( sync_update.pin_count_in_from_part_after == 0 ) { + void reconstructConnectivitySetBeforeMove(const SynchronizedEdgeUpdate &sync_update, + ds::Bitset &connectivity_set) + { + if(sync_update.pin_count_in_from_part_after == 0) + { connectivity_set.set(sync_update.from); } - if ( sync_update.pin_count_in_to_part_after == 1 ) { + if(sync_update.pin_count_in_to_part_after == 1) + { connectivity_set.unset(sync_update.to); } } // ! Updates the adjacent blocks of a node based on a synronized hyperedge update - template - void updateAdjacentBlocks(const PartitionedHypergraph& partitioned_hg, - const SynchronizedEdgeUpdate& sync_update) { - if ( partitioned_hg.edgeSize(sync_update.he) <= _large_he_threshold ) { - if ( sync_update.pin_count_in_from_part_after == 0 ) { - for ( const HypernodeID& pin : partitioned_hg.pins(sync_update.he) ) { + template + void updateAdjacentBlocks(const PartitionedHypergraph &partitioned_hg, + const SynchronizedEdgeUpdate &sync_update) + { + if(partitioned_hg.edgeSize(sync_update.he) <= _large_he_threshold) + { + if(sync_update.pin_count_in_from_part_after == 0) + { + for(const HypernodeID &pin : partitioned_hg.pins(sync_update.he)) + { decrementIncidentEdges(pin, sync_update.from); } } - if ( sync_update.pin_count_in_to_part_after == 1 ) { - for ( const HypernodeID& pin : partitioned_hg.pins(sync_update.he) ) { - const HyperedgeID incident_edges_after = incrementIncidentEdges(pin, sync_update.to); - if ( incident_edges_after == 1 ) { - _invalid_gain_cache_entry[_gain_cache.benefit_index(pin, sync_update.to)] = true; + if(sync_update.pin_count_in_to_part_after == 1) + { + for(const HypernodeID &pin : partitioned_hg.pins(sync_update.he)) + { + const HyperedgeID incident_edges_after = + incrementIncidentEdges(pin, sync_update.to); + if(incident_edges_after == 1) + { + _invalid_gain_cache_entry[_gain_cache.benefit_index(pin, sync_update.to)] = + true; initializeGainCacheEntry(partitioned_hg, pin, sync_update.to); } } @@ -699,13 +768,16 @@ class DeltaSteinerTreeGainCache { } // ! Decrements the number of incident edges of node u that contains pins of block to - // ! If the value decreases to zero, we remove the block from the connectivity set of the node - HypernodeID decrementIncidentEdges(const HypernodeID hn, const PartitionID to) { + // ! If the value decreases to zero, we remove the block from the connectivity set of + // the node + HypernodeID decrementIncidentEdges(const HypernodeID hn, const PartitionID to) + { const HypernodeID shared_incident_count = - _gain_cache._num_incident_edges_of_block[_gain_cache.benefit_index(hn, to)]; + _gain_cache._num_incident_edges_of_block[_gain_cache.benefit_index(hn, to)]; const HypernodeID thread_local_incident_count_after = - --_num_incident_edges_delta[_gain_cache.benefit_index(hn, to)]; - if ( shared_incident_count + thread_local_incident_count_after == 0 ) { + --_num_incident_edges_delta[_gain_cache.benefit_index(hn, to)]; + if(shared_incident_count + thread_local_incident_count_after == 0) + { _adjacent_blocks_delta.remove(hn, to); } return shared_incident_count + thread_local_incident_count_after; @@ -714,40 +786,44 @@ class DeltaSteinerTreeGainCache { // ! Increments the number of incident edges of node u that contains pins of block to. // ! If the value increases to one, we add the block to the connectivity set of the node // ! u and initialize the gain cache entry for moving u to that block. - HypernodeID incrementIncidentEdges(const HypernodeID hn, const PartitionID to) { + HypernodeID incrementIncidentEdges(const HypernodeID hn, const PartitionID to) + { const HypernodeID shared_incident_count = - _gain_cache._num_incident_edges_of_block[_gain_cache.benefit_index(hn, to)]; + _gain_cache._num_incident_edges_of_block[_gain_cache.benefit_index(hn, to)]; const HypernodeID thread_local_incident_count_after = - ++_num_incident_edges_delta[_gain_cache.benefit_index(hn, to)]; - if ( shared_incident_count + thread_local_incident_count_after == 1 ) { + ++_num_incident_edges_delta[_gain_cache.benefit_index(hn, to)]; + if(shared_incident_count + thread_local_incident_count_after == 1) + { _adjacent_blocks_delta.add(hn, to); } return shared_incident_count + thread_local_incident_count_after; } // ! Initializes a gain cache entry - template - void initializeGainCacheEntry(const PartitionedHypergraph& partitioned_hg, - const HypernodeID hn, - const PartitionID to) { + template + void initializeGainCacheEntry(const PartitionedHypergraph &partitioned_hg, + const HypernodeID hn, const PartitionID to) + { ASSERT(partitioned_hg.hasTargetGraph()); - const TargetGraph& target_graph = *partitioned_hg.targetGraph(); + const TargetGraph &target_graph = *partitioned_hg.targetGraph(); const HypernodeID from = partitioned_hg.partID(hn); HyperedgeWeight gain = 0; - for ( const HyperedgeID& he : partitioned_hg.incidentEdges(hn) ) { - ds::Bitset& connectivity_set = partitioned_hg.deepCopyOfConnectivitySet(he); + for(const HyperedgeID &he : partitioned_hg.incidentEdges(hn)) + { + ds::Bitset &connectivity_set = partitioned_hg.deepCopyOfConnectivitySet(he); const HyperedgeWeight current_distance = target_graph.distance(connectivity_set); - if ( partitioned_hg.pinCountInPart(he, from) == 1 ) { + if(partitioned_hg.pinCountInPart(he, from) == 1) + { connectivity_set.unset(from); } const HyperedgeWeight distance_with_to = - target_graph.distanceWithBlock(connectivity_set, to); + target_graph.distanceWithBlock(connectivity_set, to); gain += (current_distance - distance_with_to) * partitioned_hg.edgeWeight(he); } _gain_cache_delta[_gain_cache.benefit_index(hn, to)] = gain; } - const SteinerTreeGainCache& _gain_cache; + const SteinerTreeGainCache &_gain_cache; // ! Stores the delta of each locally touched gain cache entry // ! relative to the shared gain cache @@ -765,8 +841,9 @@ class DeltaSteinerTreeGainCache { // ! adjacent blocks in the shared gain cache DeltaAdjacentBlocks _adjacent_blocks_delta; - // ! Threshold for the size of a hyperedge that we do not count when tracking adjacent blocks + // ! Threshold for the size of a hyperedge that we do not count when tracking adjacent + // blocks HypernodeID _large_he_threshold; }; -} // namespace mt_kahypar +} // namespace mt_kahypar diff --git a/mt-kahypar/partition/refinement/gains/steiner_tree/steiner_tree_gain_computation.h b/mt-kahypar/partition/refinement/gains/steiner_tree/steiner_tree_gain_computation.h index 742cc0892..71d3427f9 100644 --- a/mt-kahypar/partition/refinement/gains/steiner_tree/steiner_tree_gain_computation.h +++ b/mt-kahypar/partition/refinement/gains/steiner_tree/steiner_tree_gain_computation.h @@ -30,29 +30,33 @@ #include "tbb/enumerable_thread_specific.h" -#include "mt-kahypar/partition/refinement/gains/gain_computation_base.h" -#include "mt-kahypar/partition/refinement/gains/steiner_tree/steiner_tree_attributed_gains.h" -#include "mt-kahypar/partition/mapping/target_graph.h" -#include "mt-kahypar/datastructures/static_bitset.h" #include "mt-kahypar/datastructures/sparse_map.h" +#include "mt-kahypar/datastructures/static_bitset.h" #include "mt-kahypar/parallel/stl/scalable_vector.h" +#include "mt-kahypar/partition/mapping/target_graph.h" +#include "mt-kahypar/partition/refinement/gains/gain_computation_base.h" +#include "mt-kahypar/partition/refinement/gains/steiner_tree/steiner_tree_attributed_gains.h" namespace mt_kahypar { -class SteinerTreeGainComputation : public GainComputationBase { - using Base = GainComputationBase; +class SteinerTreeGainComputation + : public GainComputationBase +{ + using Base = + GainComputationBase; using RatingMap = typename Base::RatingMap; static constexpr bool enable_heavy_assert = false; static constexpr size_t BITS_PER_BLOCK = ds::StaticBitset::BITS_PER_BLOCK; - public: - SteinerTreeGainComputation(const Context& context, - bool disable_randomization = false) : - Base(context, disable_randomization), - _local_adjacent_blocks([&] { return constructBitset(); }), - _all_blocks(context.partition.k) { - for ( PartitionID to = 0; to < context.partition.k; ++to ) { +public: + SteinerTreeGainComputation(const Context &context, bool disable_randomization = false) : + Base(context, disable_randomization), + _local_adjacent_blocks([&] { return constructBitset(); }), + _all_blocks(context.partition.k) + { + for(PartitionID to = 0; to < context.partition.k; ++to) + { _all_blocks.set(to); } } @@ -62,23 +66,25 @@ class SteinerTreeGainComputation : public GainComputationBase - void precomputeGains(const PartitionedHypergraph& phg, - const HypernodeID hn, - RatingMap& tmp_scores, - Gain&, - const bool consider_non_adjacent_blocks) { + template + void precomputeGains(const PartitionedHypergraph &phg, const HypernodeID hn, + RatingMap &tmp_scores, Gain &, + const bool consider_non_adjacent_blocks) + { ASSERT(tmp_scores.size() == 0, "Rating map not empty"); // Compute all adjacent blocks of node - ds::Bitset& adjacent_blocks = consider_non_adjacent_blocks ? - _all_blocks : _local_adjacent_blocks.local(); - ds::StaticBitset adjacent_blocks_view( - adjacent_blocks.numBlocks(), adjacent_blocks.data()); - if ( !consider_non_adjacent_blocks ) { + ds::Bitset &adjacent_blocks = + consider_non_adjacent_blocks ? _all_blocks : _local_adjacent_blocks.local(); + ds::StaticBitset adjacent_blocks_view(adjacent_blocks.numBlocks(), + adjacent_blocks.data()); + if(!consider_non_adjacent_blocks) + { adjacent_blocks.reset(); - for (const HyperedgeID& he : phg.incidentEdges(hn)) { - for ( const PartitionID& block : phg.connectivitySet(he) ) { + for(const HyperedgeID &he : phg.incidentEdges(hn)) + { + for(const PartitionID &block : phg.connectivitySet(he)) + { adjacent_blocks.set(block); } } @@ -86,53 +92,54 @@ class SteinerTreeGainComputation : public GainComputationBasedistance(connectivity_set); - if ( pin_count_in_from_part == 1 ) { + if(pin_count_in_from_part == 1) + { // Moving the node out of its current block removes // its block from the connectivity set connectivity_set.unset(from); } // Other gain computation techniques only iterate over the connectivity set // of a hyperedge to compute the gain. They assume that the gain is the same - // for all non-adjacent blocks. However, this is not the case for steiner tree metric. - // The gain to non-adjacent blocks could be different because they induce different - // distances in the target graph. We therefore have to consider all adjacent blocks - // of the node to compute the correct gain. - for ( const PartitionID to : adjacent_blocks_view ) { + // for all non-adjacent blocks. However, this is not the case for steiner tree + // metric. The gain to non-adjacent blocks could be different because they induce + // different distances in the target graph. We therefore have to consider all + // adjacent blocks of the node to compute the correct gain. + for(const PartitionID to : adjacent_blocks_view) + { const HyperedgeWeight distance_after = - target_graph->distanceWithBlock(connectivity_set, to); + target_graph->distanceWithBlock(connectivity_set, to); tmp_scores[to] += (distance_after - distance_before) * he_weight; } } } - HyperedgeWeight gain(const Gain to_score, - const Gain) { - return to_score; - } + HyperedgeWeight gain(const Gain to_score, const Gain) { return to_score; } - void changeNumberOfBlocksImpl(const PartitionID new_k) { + void changeNumberOfBlocksImpl(const PartitionID new_k) + { ASSERT(new_k == _context.partition.k); - for ( auto& adjacent_blocks : _local_adjacent_blocks ) { + for(auto &adjacent_blocks : _local_adjacent_blocks) + { adjacent_blocks.resize(new_k); } _all_blocks.resize(new_k); - for ( PartitionID to = 0; to < new_k; ++to ) { + for(PartitionID to = 0; to < new_k; ++to) + { _all_blocks.set(to); } } - private: - ds::Bitset constructBitset() const { - return ds::Bitset(_context.partition.k); - } +private: + ds::Bitset constructBitset() const { return ds::Bitset(_context.partition.k); } using Base::_context; @@ -142,4 +149,4 @@ class SteinerTreeGainComputation : public GainComputationBase in parallel (see global_rollback.h). - * Each node move m_i is of the form (u, V_i, V_j), which means that - * node u is moved from block V_i to block V_j. Each node in this sequence is moved at most once. - * Moreover, we assume that all node moves with an index < i are performed before m_i. + * In our FM algorithm, we recompute the gain values of all node moves in the global move + * sequence M := in parallel (see global_rollback.h). Each node move m_i + * is of the form (u, V_i, V_j), which means that node u is moved from block V_i to block + * V_j. Each node in this sequence is moved at most once. Moreover, we assume that all + * node moves with an index < i are performed before m_i. * - * The parallel gain recomputation algorithm iterates over all hyperedges e \in E in parallel. - * We then iterate over the pins of e and compute some auxilliary data based on + * The parallel gain recomputation algorithm iterates over all hyperedges e \in E in + * parallel. We then iterate over the pins of e and compute some auxilliary data based on * which we then decide if we attribute an increase or reduction by w(e) to a moved pin. - * This class implements the functions required by the rollback algorithm to recompute all gain values - * for the connectivity metric. -*/ -class SteinerTreeRollback { + * This class implements the functions required by the rollback algorithm to recompute all + * gain values for the connectivity metric. + */ +class SteinerTreeRollback +{ - public: +public: static constexpr bool supports_parallel_rollback = false; - struct RecalculationData { - void reset() { /** Do nothing */ } + struct RecalculationData + { + void reset() + { /** Do nothing */ + } }; // Updates the auxilliary data for a node move m with index m_id. - static void updateMove(const MoveID, const Move&, vec&) { + static void updateMove(const MoveID, const Move &, vec &) + { throw NonSupportedOperationException( - "Parallel rollback is not supported for steiner tree metric"); + "Parallel rollback is not supported for steiner tree metric"); } // Updates the number of non-moved in a block. - static void updateNonMovedPinInBlock(const PartitionID, vec&) { + static void updateNonMovedPinInBlock(const PartitionID, vec &) + { throw NonSupportedOperationException( - "Parallel rollback is not supported for steiner tree metric"); + "Parallel rollback is not supported for steiner tree metric"); } - template - static HyperedgeWeight benefit(const PartitionedHypergraph&, - const HyperedgeID, - const MoveID, - const Move&, - vec&) { + template + static HyperedgeWeight benefit(const PartitionedHypergraph &, const HyperedgeID, + const MoveID, const Move &, vec &) + { throw NonSupportedOperationException( - "Parallel rollback is not supported for steiner tree metric"); + "Parallel rollback is not supported for steiner tree metric"); return 0; } - template - static HyperedgeWeight penalty(const PartitionedHypergraph&, - const HyperedgeID, - const MoveID, - const Move&, - vec&) { + template + static HyperedgeWeight penalty(const PartitionedHypergraph &, const HyperedgeID, + const MoveID, const Move &, vec &) + { throw NonSupportedOperationException( - "Parallel rollback is not supported for steiner tree metric"); + "Parallel rollback is not supported for steiner tree metric"); return 0; } }; -} // namespace mt_kahypar +} // namespace mt_kahypar diff --git a/mt-kahypar/partition/refinement/gains/steiner_tree_for_graphs/steiner_tree_attributed_gains_for_graphs.h b/mt-kahypar/partition/refinement/gains/steiner_tree_for_graphs/steiner_tree_attributed_gains_for_graphs.h index f54b1fdba..a6df53246 100644 --- a/mt-kahypar/partition/refinement/gains/steiner_tree_for_graphs/steiner_tree_attributed_gains_for_graphs.h +++ b/mt-kahypar/partition/refinement/gains/steiner_tree_for_graphs/steiner_tree_attributed_gains_for_graphs.h @@ -36,17 +36,19 @@ namespace mt_kahypar { * for each incident hyperedge of the node based on which we then compute an * attributed gain value. */ -struct GraphSteinerTreeAttributedGains { - static HyperedgeWeight gain(const SynchronizedEdgeUpdate& sync_update) { +struct GraphSteinerTreeAttributedGains +{ + static HyperedgeWeight gain(const SynchronizedEdgeUpdate &sync_update) + { ASSERT(sync_update.block_of_other_node != kInvalidPartition); ASSERT(sync_update.target_graph); - const TargetGraph& target_graph = *sync_update.target_graph; - const HyperedgeWeight distance_before = target_graph.distance( - sync_update.from, sync_update.block_of_other_node); - const HyperedgeWeight distance_after = target_graph.distance( - sync_update.to, sync_update.block_of_other_node); - return ( distance_after - distance_before ) * sync_update.edge_weight; + const TargetGraph &target_graph = *sync_update.target_graph; + const HyperedgeWeight distance_before = + target_graph.distance(sync_update.from, sync_update.block_of_other_node); + const HyperedgeWeight distance_after = + target_graph.distance(sync_update.to, sync_update.block_of_other_node); + return (distance_after - distance_before) * sync_update.edge_weight; } }; -} // namespace mt_kahypar +} // namespace mt_kahypar diff --git a/mt-kahypar/partition/refinement/gains/steiner_tree_for_graphs/steiner_tree_flow_network_construction_for_graphs.cpp b/mt-kahypar/partition/refinement/gains/steiner_tree_for_graphs/steiner_tree_flow_network_construction_for_graphs.cpp index 8da9f35a3..2417e12cc 100644 --- a/mt-kahypar/partition/refinement/gains/steiner_tree_for_graphs/steiner_tree_flow_network_construction_for_graphs.cpp +++ b/mt-kahypar/partition/refinement/gains/steiner_tree_for_graphs/steiner_tree_flow_network_construction_for_graphs.cpp @@ -31,148 +31,173 @@ namespace mt_kahypar { -template -HyperedgeWeight GraphSteinerTreeFlowNetworkConstruction::capacity(const PartitionedHypergraph& phg, - const Context&, - const HyperedgeID he, - const PartitionID block_0, - const PartitionID block_1) { +template +HyperedgeWeight GraphSteinerTreeFlowNetworkConstruction::capacity( + const PartitionedHypergraph &phg, const Context &, const HyperedgeID he, + const PartitionID block_0, const PartitionID block_1) +{ ASSERT(phg.hasTargetGraph()); - const TargetGraph& target_graph = *phg.targetGraph(); + const TargetGraph &target_graph = *phg.targetGraph(); const HyperedgeWeight edge_weight = phg.edgeWeight(he); const HypernodeID u = phg.edgeSource(he); const HypernodeID v = phg.edgeTarget(he); const PartitionID block_of_u = phg.partID(u); const PartitionID block_of_v = phg.partID(v); - if ( ( block_of_u == block_0 || block_of_u == block_1 ) && - ( block_of_v == block_0 || block_of_v == block_1 ) ) { + if((block_of_u == block_0 || block_of_u == block_1) && + (block_of_v == block_0 || block_of_v == block_1)) + { // Both endpoints of the edge are either contained in block 0 or 1. // Removing the edge from the cut or making it a cut edge has the // following gain: return target_graph.distance(block_0, block_1) * edge_weight; - } else { + } + else + { // In this case, only one node is contained in the flow problem and the other // node is part of another block different from block_0 and block_1. // Here, we set the capacity to difference in the steiner tree metric // if we would replace block_0 with block_1. PartitionID other_block = kInvalidPartition; - if ( block_of_u == block_0 || block_of_v == block_0 ) { + if(block_of_u == block_0 || block_of_v == block_0) + { other_block = block_of_u == block_0 ? block_of_v : block_of_u; - } else if ( block_of_u == block_1 || block_of_v == block_1 ) { + } + else if(block_of_u == block_1 || block_of_v == block_1) + { other_block = block_of_u == block_1 ? block_of_v : block_of_u; - } else { + } + else + { // Can happen due to concurrent node moves applied by other flow problems return 0; } ASSERT(other_block != kInvalidPartition); const HyperedgeWeight current_distance = target_graph.distance(block_0, other_block); - const HyperedgeWeight distance_with_block_1 = target_graph.distance(block_1, other_block); + const HyperedgeWeight distance_with_block_1 = + target_graph.distance(block_1, other_block); return std::abs(current_distance - distance_with_block_1) * edge_weight; } return 0; } -template -bool GraphSteinerTreeFlowNetworkConstruction::connectToSource(const PartitionedHypergraph& partitioned_hg, - const HyperedgeID he, - const PartitionID block_0, - const PartitionID block_1) { +template +bool GraphSteinerTreeFlowNetworkConstruction::connectToSource( + const PartitionedHypergraph &partitioned_hg, const HyperedgeID he, + const PartitionID block_0, const PartitionID block_1) +{ ASSERT(partitioned_hg.hasTargetGraph()); - const TargetGraph& target_graph = *partitioned_hg.targetGraph(); + const TargetGraph &target_graph = *partitioned_hg.targetGraph(); const HypernodeID u = partitioned_hg.edgeSource(he); const HypernodeID v = partitioned_hg.edgeTarget(he); const PartitionID block_of_u = partitioned_hg.partID(u); const PartitionID block_of_v = partitioned_hg.partID(v); - if ( block_of_u == block_0 || block_of_v == block_0 ) { + if(block_of_u == block_0 || block_of_v == block_0) + { PartitionID other_block = block_of_u == block_0 ? block_of_v : block_of_u; const HyperedgeWeight current_distance = target_graph.distance(block_0, other_block); const HyperedgeWeight distance_block_1 = target_graph.distance(block_1, other_block); - if ( other_block != block_0 && other_block != block_1 && current_distance < distance_block_1 ) { + if(other_block != block_0 && other_block != block_1 && + current_distance < distance_block_1) + { // Moving the node from block_0 to block_1 would worsen the steiner tree metric, - // even though the edge is still cut afterwards. To model this percurlarity in the flow network, - // we add the corresponding edge to the source. + // even though the edge is still cut afterwards. To model this percurlarity in the + // flow network, we add the corresponding edge to the source. return true; } } - if ( block_of_u == block_1 || block_of_v == block_1 ) { + if(block_of_u == block_1 || block_of_v == block_1) + { PartitionID other_block = block_of_u == block_1 ? block_of_v : block_of_u; const HyperedgeWeight current_distance = target_graph.distance(block_1, other_block); const HyperedgeWeight distance_block_0 = target_graph.distance(block_0, other_block); - if ( other_block != block_0 && other_block != block_1 && current_distance > distance_block_0 ) { + if(other_block != block_0 && other_block != block_1 && + current_distance > distance_block_0) + { // Moving the node from block_1 to block_0 would improve the steiner tree metric, - // even though the edge is still cut afterwards. To model this percurlarity in the flow network, - // we add the corresponding edge to the source. + // even though the edge is still cut afterwards. To model this percurlarity in the + // flow network, we add the corresponding edge to the source. return true; } } return false; } - -template -bool GraphSteinerTreeFlowNetworkConstruction::connectToSink(const PartitionedHypergraph& partitioned_hg, - const HyperedgeID he, - const PartitionID block_0, - const PartitionID block_1) { +template +bool GraphSteinerTreeFlowNetworkConstruction::connectToSink( + const PartitionedHypergraph &partitioned_hg, const HyperedgeID he, + const PartitionID block_0, const PartitionID block_1) +{ ASSERT(partitioned_hg.hasTargetGraph()); - const TargetGraph& target_graph = *partitioned_hg.targetGraph(); + const TargetGraph &target_graph = *partitioned_hg.targetGraph(); const HypernodeID u = partitioned_hg.edgeSource(he); const HypernodeID v = partitioned_hg.edgeTarget(he); const PartitionID block_of_u = partitioned_hg.partID(u); const PartitionID block_of_v = partitioned_hg.partID(v); - if ( block_of_u == block_1 || block_of_v == block_1 ) { + if(block_of_u == block_1 || block_of_v == block_1) + { PartitionID other_block = block_of_u == block_1 ? block_of_v : block_of_u; const HyperedgeWeight current_distance = target_graph.distance(block_1, other_block); const HyperedgeWeight distance_block_1 = target_graph.distance(block_0, other_block); - if ( other_block != block_0 && other_block != block_1 && current_distance < distance_block_1 ) { + if(other_block != block_0 && other_block != block_1 && + current_distance < distance_block_1) + { // Moving the node from block_1 to block_0 would worsen the steiner tree metric, - // even though the edge is still cut afterwards. To model this percurlarity in the flow network, - // we add the corresponding edge to the sink. + // even though the edge is still cut afterwards. To model this percurlarity in the + // flow network, we add the corresponding edge to the sink. return true; } } - if ( block_of_u == block_0 || block_of_v == block_0 ) { + if(block_of_u == block_0 || block_of_v == block_0) + { PartitionID other_block = block_of_u == block_0 ? block_of_v : block_of_u; const HyperedgeWeight current_distance = target_graph.distance(block_0, other_block); const HyperedgeWeight distance_block_1 = target_graph.distance(block_1, other_block); - if ( other_block != block_0 && other_block != block_1 && current_distance > distance_block_1 ) { + if(other_block != block_0 && other_block != block_1 && + current_distance > distance_block_1) + { // Moving the node from block_0 to block_1 would improve the steiner tree metric, - // even though the edge is still cut afterwards. To model this percurlarity in the flow network, - // we add the corresponding edge to the sink. + // even though the edge is still cut afterwards. To model this percurlarity in the + // flow network, we add the corresponding edge to the sink. return true; } } return false; } -template -bool GraphSteinerTreeFlowNetworkConstruction::isCut(const PartitionedHypergraph& partitioned_hg, - const HyperedgeID he, - const PartitionID block_0, - const PartitionID block_1) { +template +bool GraphSteinerTreeFlowNetworkConstruction::isCut( + const PartitionedHypergraph &partitioned_hg, const HyperedgeID he, + const PartitionID block_0, const PartitionID block_1) +{ ASSERT(partitioned_hg.hasTargetGraph()); - const TargetGraph& target_graph = *partitioned_hg.targetGraph(); + const TargetGraph &target_graph = *partitioned_hg.targetGraph(); const HypernodeID u = partitioned_hg.edgeSource(he); const HypernodeID v = partitioned_hg.edgeTarget(he); const PartitionID block_of_u = partitioned_hg.partID(u); const PartitionID block_of_v = partitioned_hg.partID(v); - if ( block_of_u == block_1 || block_of_v == block_1 ) { + if(block_of_u == block_1 || block_of_v == block_1) + { PartitionID other_block = block_of_u == block_1 ? block_of_v : block_of_u; const HyperedgeWeight current_distance = target_graph.distance(block_1, other_block); const HyperedgeWeight distance_block_0 = target_graph.distance(block_0, other_block); - if ( other_block != block_0 && other_block != block_1 && current_distance > distance_block_0 ) { - // Moving the node contained in the flow problem to the other block would improve the - // steiner tree metric, even though the edge would be still cut. - // Thus, we consider it as a cut edge. + if(other_block != block_0 && other_block != block_1 && + current_distance > distance_block_0) + { + // Moving the node contained in the flow problem to the other block would improve + // the steiner tree metric, even though the edge would be still cut. Thus, we + // consider it as a cut edge. return true; } } - if ( block_of_u == block_0 || block_of_v == block_0 ) { + if(block_of_u == block_0 || block_of_v == block_0) + { PartitionID other_block = block_of_u == block_0 ? block_of_v : block_of_u; const HyperedgeWeight current_distance = target_graph.distance(block_0, other_block); const HyperedgeWeight distance_block_1 = target_graph.distance(block_1, other_block); - if ( other_block != block_0 && other_block != block_1 && current_distance > distance_block_1 ) { + if(other_block != block_0 && other_block != block_1 && + current_distance > distance_block_1) + { // Same as the previous case return true; } @@ -181,14 +206,19 @@ bool GraphSteinerTreeFlowNetworkConstruction::isCut(const PartitionedHypergraph& } namespace { -#define STEINER_TREE_CAPACITY(X) HyperedgeWeight GraphSteinerTreeFlowNetworkConstruction::capacity( \ - const X&, const Context&, const HyperedgeID, const PartitionID, const PartitionID) -#define STEINER_TREE_CONNECT_TO_SOURCE(X) bool GraphSteinerTreeFlowNetworkConstruction::connectToSource( \ - const X&, const HyperedgeID, const PartitionID, const PartitionID) -#define STEINER_TREE_CONNECT_TO_SINK(X) bool GraphSteinerTreeFlowNetworkConstruction::connectToSink( \ - const X&, const HyperedgeID, const PartitionID, const PartitionID) -#define STEINER_TREE_IS_CUT(X) bool GraphSteinerTreeFlowNetworkConstruction::isCut( \ - const X&, const HyperedgeID, const PartitionID, const PartitionID) +#define STEINER_TREE_CAPACITY(X) \ + HyperedgeWeight GraphSteinerTreeFlowNetworkConstruction::capacity( \ + const X &, const Context &, const HyperedgeID, const PartitionID, \ + const PartitionID) +#define STEINER_TREE_CONNECT_TO_SOURCE(X) \ + bool GraphSteinerTreeFlowNetworkConstruction::connectToSource( \ + const X &, const HyperedgeID, const PartitionID, const PartitionID) +#define STEINER_TREE_CONNECT_TO_SINK(X) \ + bool GraphSteinerTreeFlowNetworkConstruction::connectToSink( \ + const X &, const HyperedgeID, const PartitionID, const PartitionID) +#define STEINER_TREE_IS_CUT(X) \ + bool GraphSteinerTreeFlowNetworkConstruction::isCut( \ + const X &, const HyperedgeID, const PartitionID, const PartitionID) } INSTANTIATE_FUNC_WITH_PARTITIONED_HG(STEINER_TREE_CAPACITY) @@ -196,4 +226,4 @@ INSTANTIATE_FUNC_WITH_PARTITIONED_HG(STEINER_TREE_CONNECT_TO_SOURCE) INSTANTIATE_FUNC_WITH_PARTITIONED_HG(STEINER_TREE_CONNECT_TO_SINK) INSTANTIATE_FUNC_WITH_PARTITIONED_HG(STEINER_TREE_IS_CUT) -} // namespace mt_kahypar +} // namespace mt_kahypar diff --git a/mt-kahypar/partition/refinement/gains/steiner_tree_for_graphs/steiner_tree_flow_network_construction_for_graphs.h b/mt-kahypar/partition/refinement/gains/steiner_tree_for_graphs/steiner_tree_flow_network_construction_for_graphs.h index 36c6d19a8..c71b2a806 100644 --- a/mt-kahypar/partition/refinement/gains/steiner_tree_for_graphs/steiner_tree_flow_network_construction_for_graphs.h +++ b/mt-kahypar/partition/refinement/gains/steiner_tree_for_graphs/steiner_tree_flow_network_construction_for_graphs.h @@ -36,45 +36,39 @@ namespace mt_kahypar { * to determine the capacity of a hyperedge and whether or not the hyperedge * is relevant for optimizing the objective function. */ -struct GraphSteinerTreeFlowNetworkConstruction { +struct GraphSteinerTreeFlowNetworkConstruction +{ // ! Capacity of the hyperedge - template - static HyperedgeWeight capacity(const PartitionedHypergraph& phg, - const Context& context, - const HyperedgeID he, - const PartitionID block_0, - const PartitionID block_1); + template + static HyperedgeWeight capacity(const PartitionedHypergraph &phg, + const Context &context, const HyperedgeID he, + const PartitionID block_0, const PartitionID block_1); // ! If true, then hyperedge is not relevant and can be dropped. - template - static bool dropHyperedge(const PartitionedHypergraph&, - const HyperedgeID, - const PartitionID, - const PartitionID) { + template + static bool dropHyperedge(const PartitionedHypergraph &, const HyperedgeID, + const PartitionID, const PartitionID) + { return false; } // ! If true, then hyperedge is connected to source. - template - static bool connectToSource(const PartitionedHypergraph& partitioned_hg, - const HyperedgeID he, - const PartitionID block_0, + template + static bool connectToSource(const PartitionedHypergraph &partitioned_hg, + const HyperedgeID he, const PartitionID block_0, const PartitionID block_1); // ! If true, then hyperedge is connected to sink. - template - static bool connectToSink(const PartitionedHypergraph& partitioned_hg, - const HyperedgeID he, - const PartitionID block_0, + template + static bool connectToSink(const PartitionedHypergraph &partitioned_hg, + const HyperedgeID he, const PartitionID block_0, const PartitionID block_1); // ! If true, then hyperedge is considered as cut edge and its // ! weight is added to the total cut - template - static bool isCut(const PartitionedHypergraph& partitioned_hg, - const HyperedgeID he, - const PartitionID block_0, - const PartitionID block_1); + template + static bool isCut(const PartitionedHypergraph &partitioned_hg, const HyperedgeID he, + const PartitionID block_0, const PartitionID block_1); }; -} // namespace mt_kahypar +} // namespace mt_kahypar diff --git a/mt-kahypar/partition/refinement/gains/steiner_tree_for_graphs/steiner_tree_gain_cache_for_graphs.cpp b/mt-kahypar/partition/refinement/gains/steiner_tree_for_graphs/steiner_tree_gain_cache_for_graphs.cpp index 0d809762f..f0c23811d 100644 --- a/mt-kahypar/partition/refinement/gains/steiner_tree_for_graphs/steiner_tree_gain_cache_for_graphs.cpp +++ b/mt-kahypar/partition/refinement/gains/steiner_tree_for_graphs/steiner_tree_gain_cache_for_graphs.cpp @@ -26,98 +26,123 @@ #include "mt-kahypar/partition/refinement/gains/steiner_tree_for_graphs/steiner_tree_gain_cache_for_graphs.h" -#include "tbb/parallel_for.h" -#include "tbb/enumerable_thread_specific.h" #include "tbb/concurrent_vector.h" +#include "tbb/enumerable_thread_specific.h" +#include "tbb/parallel_for.h" #include "mt-kahypar/definitions.h" namespace mt_kahypar { -template -void GraphSteinerTreeGainCache::initializeGainCache(const PartitionedHypergraph& partitioned_hg) { +template +void GraphSteinerTreeGainCache::initializeGainCache( + const PartitionedHypergraph &partitioned_hg) +{ ASSERT(!_is_initialized, "Gain cache is already initialized"); ASSERT(_k <= 0 || _k >= partitioned_hg.k(), - "Gain cache was already initialized for a different k" << V(_k) << V(partitioned_hg.k())); - allocateGainTable(partitioned_hg.topLevelNumNodes(), partitioned_hg.topLevelNumUniqueIds(), partitioned_hg.k()); + "Gain cache was already initialized for a different k" << V(_k) + << V(partitioned_hg.k())); + allocateGainTable(partitioned_hg.topLevelNumNodes(), + partitioned_hg.topLevelNumUniqueIds(), partitioned_hg.k()); initializeAdjacentBlocks(partitioned_hg); - tbb::parallel_invoke([&] { - // Compute gain of all nodes - tbb::parallel_for(tbb::blocked_range(HypernodeID(0), partitioned_hg.initialNumNodes()), - [&](tbb::blocked_range& r) { - vec& gain_aggregator = _ets_benefit_aggregator.local(); - for (HypernodeID u = r.begin(); u < r.end(); ++u) { - if ( partitioned_hg.nodeIsEnabled(u)) { - initializeGainCacheEntryForNode(partitioned_hg, u, gain_aggregator); - } - } + tbb::parallel_invoke( + [&] { + // Compute gain of all nodes + tbb::parallel_for( + tbb::blocked_range(HypernodeID(0), + partitioned_hg.initialNumNodes()), + [&](tbb::blocked_range &r) { + vec &gain_aggregator = _ets_benefit_aggregator.local(); + for(HypernodeID u = r.begin(); u < r.end(); ++u) + { + if(partitioned_hg.nodeIsEnabled(u)) + { + initializeGainCacheEntryForNode(partitioned_hg, u, gain_aggregator); + } + } + }); + }, + [&] { + // Resets edge states + partitioned_hg.doParallelForAllEdges([&](const HyperedgeID &he) { + _edge_state[partitioned_hg.uniqueEdgeID(he)].updateBlocks(kInvalidPartition, + kInvalidPartition, 0); + }); }); - }, [&] { - // Resets edge states - partitioned_hg.doParallelForAllEdges([&](const HyperedgeID& he) { - _edge_state[partitioned_hg.uniqueEdgeID(he)].updateBlocks( - kInvalidPartition, kInvalidPartition, 0); - }); - }); _is_initialized = true; } -template -void GraphSteinerTreeGainCache::initializeGainCacheEntryForNode(const PartitionedHypergraph& partitioned_hg, - const HypernodeID hn) { - vec& gain_aggregator = _ets_benefit_aggregator.local(); +template +void GraphSteinerTreeGainCache::initializeGainCacheEntryForNode( + const PartitionedHypergraph &partitioned_hg, const HypernodeID hn) +{ + vec &gain_aggregator = _ets_benefit_aggregator.local(); initializeAdjacentBlocksOfNode(partitioned_hg, hn); initializeGainCacheEntryForNode(partitioned_hg, hn, gain_aggregator); } -bool GraphSteinerTreeGainCache::triggersDeltaGainUpdate(const SynchronizedEdgeUpdate&) { +bool GraphSteinerTreeGainCache::triggersDeltaGainUpdate(const SynchronizedEdgeUpdate &) +{ return true; } -template -void GraphSteinerTreeGainCache::notifyBeforeDeltaGainUpdate(const PartitionedHypergraph& partitioned_hg, - const SynchronizedEdgeUpdate& sync_update) { - if ( !partitioned_hg.isSinglePin(sync_update.he) ) { +template +void GraphSteinerTreeGainCache::notifyBeforeDeltaGainUpdate( + const PartitionedHypergraph &partitioned_hg, + const SynchronizedEdgeUpdate &sync_update) +{ + if(!partitioned_hg.isSinglePin(sync_update.he)) + { const HyperedgeID unique_id = partitioned_hg.uniqueEdgeID(sync_update.he); ASSERT(UL(unique_id) < _edge_state.size()); ++_edge_state[unique_id].version; const HypernodeID u = partitioned_hg.edgeSource(sync_update.he); const HypernodeID v = partitioned_hg.edgeTarget(sync_update.he); - if ( u < v ) { - _edge_state[unique_id].updateBlocks( - sync_update.to, sync_update.block_of_other_node, _uncontraction_version); - } else { - _edge_state[unique_id].updateBlocks( - sync_update.block_of_other_node, sync_update.to, _uncontraction_version); + if(u < v) + { + _edge_state[unique_id].updateBlocks(sync_update.to, sync_update.block_of_other_node, + _uncontraction_version); + } + else + { + _edge_state[unique_id].updateBlocks(sync_update.block_of_other_node, sync_update.to, + _uncontraction_version); } } } -template -void GraphSteinerTreeGainCache::deltaGainUpdate(const PartitionedHypergraph& partitioned_hg, - const SynchronizedEdgeUpdate& sync_update) { +template +void GraphSteinerTreeGainCache::deltaGainUpdate( + const PartitionedHypergraph &partitioned_hg, + const SynchronizedEdgeUpdate &sync_update) +{ ASSERT(_is_initialized, "Gain cache is not initialized"); ASSERT(sync_update.target_graph); const HyperedgeID he = sync_update.he; - if ( !partitioned_hg.isSinglePin(he) ) { + if(!partitioned_hg.isSinglePin(he)) + { const PartitionID from = sync_update.from; const PartitionID to = sync_update.to; const HyperedgeWeight edge_weight = sync_update.edge_weight; - const TargetGraph& target_graph = *sync_update.target_graph; + const TargetGraph &target_graph = *sync_update.target_graph; const HypernodeID v = partitioned_hg.edgeTarget(he); - for ( const PartitionID& target : _adjacent_blocks.connectivitySet(v) ) { - const HyperedgeWeight delta = ( target_graph.distance(from, target) - - target_graph.distance(to, target) ) * edge_weight ; - _gain_cache[gain_entry_index(v, target)].add_fetch(delta, std::memory_order_relaxed); + for(const PartitionID &target : _adjacent_blocks.connectivitySet(v)) + { + const HyperedgeWeight delta = + (target_graph.distance(from, target) - target_graph.distance(to, target)) * + edge_weight; + _gain_cache[gain_entry_index(v, target)].add_fetch(delta, + std::memory_order_relaxed); } // Update gain version of hyperedge. If the update version is equal to the version // of the hyperedge, then we know that all gain cache updates are completed. This is - // important for initializing gain entries while simultanously running gain cache updates. + // important for initializing gain entries while simultanously running gain cache + // updates. const HyperedgeID unique_id = partitioned_hg.uniqueEdgeID(sync_update.he); ASSERT(UL(unique_id) < _edge_state.size()); ++_edge_state[unique_id].update_version; @@ -129,51 +154,55 @@ void GraphSteinerTreeGainCache::deltaGainUpdate(const PartitionedHypergraph& par } } -template -void GraphSteinerTreeGainCache::uncontractUpdateAfterRestore(const PartitionedHypergraph& partitioned_hg, - const HypernodeID u, - const HypernodeID v, - const HyperedgeID he, - const HypernodeID) { +template +void GraphSteinerTreeGainCache::uncontractUpdateAfterRestore( + const PartitionedHypergraph &partitioned_hg, const HypernodeID u, const HypernodeID v, + const HyperedgeID he, const HypernodeID) +{ unused(v); // In this case, edge he was a selfloop and now it turns to a regular edge - if ( _is_initialized ) { + if(_is_initialized) + { ASSERT(partitioned_hg.hasTargetGraph()); ASSERT(!partitioned_hg.isSinglePin(he)); ASSERT(partitioned_hg.edgeSource(he) == v); - const TargetGraph& target_graph = *partitioned_hg.targetGraph(); + const TargetGraph &target_graph = *partitioned_hg.targetGraph(); const PartitionID from = partitioned_hg.partID(u); const HyperedgeWeight edge_weight = partitioned_hg.edgeWeight(he); - for ( const PartitionID& to : _adjacent_blocks.connectivitySet(u) ) { + for(const PartitionID &to : _adjacent_blocks.connectivitySet(u)) + { _gain_cache[gain_entry_index(u, to)].fetch_sub( - target_graph.distance(from, to) * edge_weight, std::memory_order_relaxed); + target_graph.distance(from, to) * edge_weight, std::memory_order_relaxed); } incrementIncidentEdges(u, from); // Gain cache entry for v is initialized after batch uncontractions } } -template -void GraphSteinerTreeGainCache::uncontractUpdateAfterReplacement(const PartitionedHypergraph& partitioned_hg, - const HypernodeID u, - const HypernodeID v, - const HyperedgeID he) { +template +void GraphSteinerTreeGainCache::uncontractUpdateAfterReplacement( + const PartitionedHypergraph &partitioned_hg, const HypernodeID u, const HypernodeID v, + const HyperedgeID he) +{ unused(v); // In this case, u is replaced by v in hyperedge he // => Pin counts and connectivity set of hyperedge he does not change - if ( _is_initialized && !partitioned_hg.isSinglePin(he) ) { + if(_is_initialized && !partitioned_hg.isSinglePin(he)) + { const PartitionID block_of_u = partitioned_hg.partID(u); ASSERT(partitioned_hg.hasTargetGraph()); ASSERT(partitioned_hg.edgeSource(he) == v); - const TargetGraph& target_graph = *partitioned_hg.targetGraph(); + const TargetGraph &target_graph = *partitioned_hg.targetGraph(); const HypernodeID w = partitioned_hg.edgeTarget(he); const PartitionID block_of_w = partitioned_hg.partID(w); const HyperedgeWeight edge_weight = partitioned_hg.edgeWeight(he); - for ( const PartitionID& to : _adjacent_blocks.connectivitySet(u) ) { + for(const PartitionID &to : _adjacent_blocks.connectivitySet(u)) + { _gain_cache[gain_entry_index(u, to)].fetch_add( - target_graph.distance(block_of_w, to) * edge_weight, std::memory_order_relaxed); + target_graph.distance(block_of_w, to) * edge_weight, std::memory_order_relaxed); } - if ( block_of_u != block_of_w ) { + if(block_of_u != block_of_w) + { decrementIncidentEdges(u, block_of_u); } decrementIncidentEdges(u, block_of_w); @@ -183,16 +212,19 @@ void GraphSteinerTreeGainCache::uncontractUpdateAfterReplacement(const Partition void GraphSteinerTreeGainCache::restoreSinglePinHyperedge(const HypernodeID, const PartitionID, - const HyperedgeWeight) { + const HyperedgeWeight) +{ // Do nothing } -template -void GraphSteinerTreeGainCache::restoreIdenticalHyperedge(const PartitionedHypergraph& partitioned_hg, - const HyperedgeID he) { +template +void GraphSteinerTreeGainCache::restoreIdenticalHyperedge( + const PartitionedHypergraph &partitioned_hg, const HyperedgeID he) +{ const HypernodeID u = partitioned_hg.edgeSource(he); const HypernodeID v = partitioned_hg.edgeTarget(he); - if ( u < v ) { + if(u < v) + { ASSERT(!partitioned_hg.isSinglePin(he)); ASSERT(u != kInvalidHypernode && v != kInvalidHypernode && u != v, V(u) << V(v)); ASSERT(partitioned_hg.nodeIsEnabled(u)); @@ -201,33 +233,43 @@ void GraphSteinerTreeGainCache::restoreIdenticalHyperedge(const PartitionedHyper const PartitionID block_of_v = partitioned_hg.partID(v); incrementIncidentEdges(u, block_of_u); incrementIncidentEdges(v, block_of_u); - if ( block_of_u != block_of_v ) { + if(block_of_u != block_of_v) + { incrementIncidentEdges(u, block_of_v); incrementIncidentEdges(v, block_of_v); } } } -template -void GraphSteinerTreeGainCache::initializeAdjacentBlocks(const PartitionedHypergraph& partitioned_hg) { +template +void GraphSteinerTreeGainCache::initializeAdjacentBlocks( + const PartitionedHypergraph &partitioned_hg) +{ // Initialize adjacent blocks of each node - partitioned_hg.doParallelForAllNodes([&](const HypernodeID& hn) { - initializeAdjacentBlocksOfNode(partitioned_hg, hn); - }); + partitioned_hg.doParallelForAllNodes( + [&](const HypernodeID &hn) { initializeAdjacentBlocksOfNode(partitioned_hg, hn); }); } -template -void GraphSteinerTreeGainCache::initializeAdjacentBlocksOfNode(const PartitionedHypergraph& partitioned_hg, - const HypernodeID hn) { +template +void GraphSteinerTreeGainCache::initializeAdjacentBlocksOfNode( + const PartitionedHypergraph &partitioned_hg, const HypernodeID hn) +{ _adjacent_blocks.clear(hn); - for ( PartitionID to = 0; to < _k; ++to ) { - _num_incident_edges_of_block[gain_entry_index(hn, to)].store(0, std::memory_order_relaxed); + for(PartitionID to = 0; to < _k; ++to) + { + _num_incident_edges_of_block[gain_entry_index(hn, to)].store( + 0, std::memory_order_relaxed); } - for ( const HyperedgeID& he : partitioned_hg.incidentEdges(hn) ) { - if ( !partitioned_hg.isSinglePin(he) ) { - const PartitionID block_of_source = partitioned_hg.partID(partitioned_hg.edgeSource(he)); - const PartitionID block_of_target = partitioned_hg.partID(partitioned_hg.edgeTarget(he)); - if ( block_of_source != block_of_target ) { + for(const HyperedgeID &he : partitioned_hg.incidentEdges(hn)) + { + if(!partitioned_hg.isSinglePin(he)) + { + const PartitionID block_of_source = + partitioned_hg.partID(partitioned_hg.edgeSource(he)); + const PartitionID block_of_target = + partitioned_hg.partID(partitioned_hg.edgeTarget(he)); + if(block_of_source != block_of_target) + { incrementIncidentEdges(hn, block_of_source); } incrementIncidentEdges(hn, block_of_target); @@ -235,21 +277,26 @@ void GraphSteinerTreeGainCache::initializeAdjacentBlocksOfNode(const Partitioned } } -template -void GraphSteinerTreeGainCache::updateAdjacentBlocks(const PartitionedHypergraph& partitioned_hg, - const SynchronizedEdgeUpdate& sync_update) { +template +void GraphSteinerTreeGainCache::updateAdjacentBlocks( + const PartitionedHypergraph &partitioned_hg, + const SynchronizedEdgeUpdate &sync_update) +{ ASSERT(!partitioned_hg.isSinglePin(sync_update.he)); - if ( sync_update.pin_count_in_from_part_after == 0 ) { + if(sync_update.pin_count_in_from_part_after == 0) + { // The node move has removed the source block of the move from the // connectivity set of the hyperedge. We therefore decrement the number of // incident edges in the source block for each pin of the hyperedge. If this // decreases the counter to zero for some pin, we remove the source block // from the adjacent blocks of that pin. - for ( const HypernodeID& pin : partitioned_hg.pins(sync_update.he) ) { + for(const HypernodeID &pin : partitioned_hg.pins(sync_update.he)) + { decrementIncidentEdges(pin, sync_update.from); } } - if ( sync_update.pin_count_in_to_part_after == 1 ) { + if(sync_update.pin_count_in_to_part_after == 1) + { // The node move has added the target block of the move to the // connectivity set of the hyperedge. We therefore increment the number of // incident edges in the target block for each pin of the hyperedge. If this @@ -257,74 +304,96 @@ void GraphSteinerTreeGainCache::updateAdjacentBlocks(const PartitionedHypergraph // to the adjacent blocks of that pin. Moreover, since we only compute gain // cache entries to adjacent blocks, we initialize the gain cache entry // for that pin and target block. - for ( const HypernodeID& pin : partitioned_hg.pins(sync_update.he) ) { - const HyperedgeID incident_edges_after = incrementIncidentEdges(pin, sync_update.to); - if ( incident_edges_after == 1 ) { + for(const HypernodeID &pin : partitioned_hg.pins(sync_update.he)) + { + const HyperedgeID incident_edges_after = + incrementIncidentEdges(pin, sync_update.to); + if(incident_edges_after == 1) + { ASSERT(sync_update.edge_locks); - initializeGainCacheEntry(partitioned_hg, pin, sync_update.to, *sync_update.edge_locks); + initializeGainCacheEntry(partitioned_hg, pin, sync_update.to, + *sync_update.edge_locks); } } } } -HyperedgeID GraphSteinerTreeGainCache::incrementIncidentEdges(const HypernodeID u, const PartitionID to) { +HyperedgeID GraphSteinerTreeGainCache::incrementIncidentEdges(const HypernodeID u, + const PartitionID to) +{ const HyperedgeID incident_count_after = - _num_incident_edges_of_block[gain_entry_index(u, to)].add_fetch(1, std::memory_order_relaxed); - if ( incident_count_after == 1 ) { + _num_incident_edges_of_block[gain_entry_index(u, to)].add_fetch( + 1, std::memory_order_relaxed); + if(incident_count_after == 1) + { _adjacent_blocks.add(u, to); } return incident_count_after; } -HyperedgeID GraphSteinerTreeGainCache::decrementIncidentEdges(const HypernodeID u, const PartitionID to) { +HyperedgeID GraphSteinerTreeGainCache::decrementIncidentEdges(const HypernodeID u, + const PartitionID to) +{ ASSERT(_num_incident_edges_of_block[gain_entry_index(u, to)].load() > 0); const HyperedgeID incident_count_after = - _num_incident_edges_of_block[gain_entry_index(u, to)].sub_fetch(1, std::memory_order_relaxed); - if ( incident_count_after == 0 ) { + _num_incident_edges_of_block[gain_entry_index(u, to)].sub_fetch( + 1, std::memory_order_relaxed); + if(incident_count_after == 0) + { _adjacent_blocks.remove(u, to); } return incident_count_after; } -template -void GraphSteinerTreeGainCache::initializeGainCacheEntryForNode(const PartitionedHypergraph& partitioned_hg, - const HypernodeID u, - vec& gain_aggregator) { +template +void GraphSteinerTreeGainCache::initializeGainCacheEntryForNode( + const PartitionedHypergraph &partitioned_hg, const HypernodeID u, + vec &gain_aggregator) +{ ASSERT(partitioned_hg.hasTargetGraph()); - const TargetGraph& target_graph = *partitioned_hg.targetGraph(); - for ( const HyperedgeID& e : partitioned_hg.incidentEdges(u) ) { - if ( !partitioned_hg.isSinglePin(e) ) { + const TargetGraph &target_graph = *partitioned_hg.targetGraph(); + for(const HyperedgeID &e : partitioned_hg.incidentEdges(u)) + { + if(!partitioned_hg.isSinglePin(e)) + { ASSERT(partitioned_hg.edgeSource(e) == u); - const PartitionID block_of_target = partitioned_hg.partID(partitioned_hg.edgeTarget(e)); + const PartitionID block_of_target = + partitioned_hg.partID(partitioned_hg.edgeTarget(e)); const HyperedgeWeight edge_weight = partitioned_hg.edgeWeight(e); ASSERT(_adjacent_blocks.contains(u, partitioned_hg.partID(u))); - for ( const PartitionID& to : _adjacent_blocks.connectivitySet(u) ) { + for(const PartitionID &to : _adjacent_blocks.connectivitySet(u)) + { gain_aggregator[to] -= target_graph.distance(to, block_of_target) * edge_weight; } } } - for ( const PartitionID& to : _adjacent_blocks.connectivitySet(u) ) { - _gain_cache[gain_entry_index(u, to)].store(gain_aggregator[to], std::memory_order_relaxed); + for(const PartitionID &to : _adjacent_blocks.connectivitySet(u)) + { + _gain_cache[gain_entry_index(u, to)].store(gain_aggregator[to], + std::memory_order_relaxed); gain_aggregator[to] = 0; } } -template -void GraphSteinerTreeGainCache::initializeGainCacheEntry(const PartitionedHypergraph& partitioned_hg, - const HypernodeID u, - const PartitionID to, - ds::Array& edge_locks) { +template +void GraphSteinerTreeGainCache::initializeGainCacheEntry( + const PartitionedHypergraph &partitioned_hg, const HypernodeID u, + const PartitionID to, ds::Array &edge_locks) +{ ASSERT(partitioned_hg.hasTargetGraph()); - const TargetGraph& target_graph = *partitioned_hg.targetGraph(); - vec& seen_versions = _ets_version.local(); + const TargetGraph &target_graph = *partitioned_hg.targetGraph(); + vec &seen_versions = _ets_version.local(); bool success = false; - while ( !success ) { + while(!success) + { success = true; seen_versions.clear(); HyperedgeWeight gain = 0; - for ( const HyperedgeID& he : partitioned_hg.incidentEdges(u) ) { - if ( !partitioned_hg.isSinglePin(he) ) { + for(const HyperedgeID &he : partitioned_hg.incidentEdges(u)) + { + if(!partitioned_hg.isSinglePin(he)) + { ASSERT(partitioned_hg.edgeSource(he) == u); const HyperedgeID unique_id = partitioned_hg.uniqueEdgeID(he); edge_locks[unique_id].lock(); @@ -334,21 +403,29 @@ void GraphSteinerTreeGainCache::initializeGainCacheEntry(const PartitionedHyperg // at the time of its last update with a version ID. If this version ID changes // after the gain computation, we know that we computed the gain on outdated // information and retry. - const uint32_t update_version = _edge_state[unique_id].update_version.load(std::memory_order_relaxed); - const uint32_t edge_version = _edge_state[unique_id].version.load(std::memory_order_relaxed); - PartitionID block_of_u = _edge_state[unique_id].sourceBlock(_uncontraction_version); - PartitionID block_of_v = _edge_state[unique_id].targetBlock(_uncontraction_version); + const uint32_t update_version = + _edge_state[unique_id].update_version.load(std::memory_order_relaxed); + const uint32_t edge_version = + _edge_state[unique_id].version.load(std::memory_order_relaxed); + PartitionID block_of_u = + _edge_state[unique_id].sourceBlock(_uncontraction_version); + PartitionID block_of_v = + _edge_state[unique_id].targetBlock(_uncontraction_version); edge_locks[unique_id].unlock(); const HypernodeID v = partitioned_hg.edgeTarget(he); // The edge state object stores the block ID of the node with smaller ID // first. Swapping both entries in case v < u gives the correct block for node v - if ( v < u ) std::swap(block_of_u, block_of_v); - // In case u and v were not moved yet, we retrieve the block ID of v from the partition. - block_of_v = block_of_v == kInvalidPartition ? partitioned_hg.partID(v) : block_of_v; + if(v < u) + std::swap(block_of_u, block_of_v); + // In case u and v were not moved yet, we retrieve the block ID of v from the + // partition. + block_of_v = + block_of_v == kInvalidPartition ? partitioned_hg.partID(v) : block_of_v; ASSERT(update_version <= edge_version); - if ( update_version < edge_version ) { + if(update_version < edge_version) + { // There are still pending gain cache updates that must be finished // before we initialize the gain cache entry. success = false; @@ -363,13 +440,18 @@ void GraphSteinerTreeGainCache::initializeGainCacheEntry(const PartitionedHyperg // Check if versions of an incident edge has changed in the meantime. // If not, gain cache entry is correct. Otherwise, recompute it. - if ( success ) { + if(success) + { size_t idx = 0; - for ( const HyperedgeID& he : partitioned_hg.incidentEdges(u) ) { - if ( !partitioned_hg.isSinglePin(he) ) { + for(const HyperedgeID &he : partitioned_hg.incidentEdges(u)) + { + if(!partitioned_hg.isSinglePin(he)) + { ASSERT(idx < seen_versions.size()); const HyperedgeID unique_id = partitioned_hg.uniqueEdgeID(he); - if ( seen_versions[idx++] != _edge_state[unique_id].version.load(std::memory_order_relaxed) ) { + if(seen_versions[idx++] != + _edge_state[unique_id].version.load(std::memory_order_relaxed)) + { success = false; break; } @@ -379,22 +461,31 @@ void GraphSteinerTreeGainCache::initializeGainCacheEntry(const PartitionedHyperg } } -template -bool GraphSteinerTreeGainCache::verifyTrackedAdjacentBlocksOfNodes(const PartitionedHypergraph& partitioned_hg) const { +template +bool GraphSteinerTreeGainCache::verifyTrackedAdjacentBlocksOfNodes( + const PartitionedHypergraph &partitioned_hg) const +{ bool success = true; vec num_incident_edges(_k, 0); - for ( const HypernodeID& hn : partitioned_hg.nodes() ) { + for(const HypernodeID &hn : partitioned_hg.nodes()) + { num_incident_edges.assign(_k, 0); - for ( const HyperedgeID& he : partitioned_hg.incidentEdges(hn) ) { - if ( !partitioned_hg.isSinglePin(he) ) { - for ( const PartitionID& block : partitioned_hg.connectivitySet(he) ) { + for(const HyperedgeID &he : partitioned_hg.incidentEdges(hn)) + { + if(!partitioned_hg.isSinglePin(he)) + { + for(const PartitionID &block : partitioned_hg.connectivitySet(he)) + { ++num_incident_edges[block]; } } } - for ( PartitionID block = 0; block < _k; ++block ) { - if ( _num_incident_edges_of_block[gain_entry_index(hn, block)] != num_incident_edges[block] ) { + for(PartitionID block = 0; block < _k; ++block) + { + if(_num_incident_edges_of_block[gain_entry_index(hn, block)] != + num_incident_edges[block]) + { LOG << "Number of incident edges of node" << hn << "to block" << block << "=>" << "Expected:" << num_incident_edges[block] << "," << "Actual:" << _num_incident_edges_of_block[gain_entry_index(hn, block)]; @@ -402,16 +493,20 @@ bool GraphSteinerTreeGainCache::verifyTrackedAdjacentBlocksOfNodes(const Partiti } } - for ( const PartitionID block : _adjacent_blocks.connectivitySet(hn) ) { - if ( num_incident_edges[block] == 0 ) { + for(const PartitionID block : _adjacent_blocks.connectivitySet(hn)) + { + if(num_incident_edges[block] == 0) + { LOG << "Node" << hn << "is not adjacent to block" << block << ", but it is in its connectivity set"; success = false; } } - for ( PartitionID block = 0; block < _k; ++block ) { - if ( num_incident_edges[block] > 0 && !_adjacent_blocks.contains(hn, block) ) { + for(PartitionID block = 0; block < _k; ++block) + { + if(num_incident_edges[block] > 0 && !_adjacent_blocks.contains(hn, block)) + { LOG << "Node" << hn << "should be adjacent to block" << block << ", but it is not in its connectivity set"; success = false; @@ -422,37 +517,42 @@ bool GraphSteinerTreeGainCache::verifyTrackedAdjacentBlocksOfNodes(const Partiti } namespace { -#define STEINER_TREE_INITIALIZE_GAIN_CACHE(X) void GraphSteinerTreeGainCache::initializeGainCache(const X&) -#define STEINER_TREE_INITIALIZE_GAIN_CACHE_FOR_NODE(X) void GraphSteinerTreeGainCache::initializeGainCacheEntryForNode(const X&, \ - const HypernodeID) -#define STEINER_TREE_NOTIFY(X) void GraphSteinerTreeGainCache::notifyBeforeDeltaGainUpdate(const X&, \ - const SynchronizedEdgeUpdate&) -#define STEINER_TREE_DELTA_GAIN_UPDATE(X) void GraphSteinerTreeGainCache::deltaGainUpdate(const X&, \ - const SynchronizedEdgeUpdate&) -#define STEINER_TREE_RESTORE_UPDATE(X) void GraphSteinerTreeGainCache::uncontractUpdateAfterRestore(const X&, \ - const HypernodeID, \ - const HypernodeID, \ - const HyperedgeID, \ - const HypernodeID) -#define STEINER_TREE_REPLACEMENT_UPDATE(X) void GraphSteinerTreeGainCache::uncontractUpdateAfterReplacement(const X&, \ - const HypernodeID, \ - const HypernodeID, \ - const HyperedgeID) -#define STEINER_TREE_RESTORE_IDENTICAL_HYPEREDGE(X) void GraphSteinerTreeGainCache::restoreIdenticalHyperedge(const X&, \ - const HyperedgeID) -#define STEINER_TREE_INIT_ADJACENT_BLOCKS(X) void GraphSteinerTreeGainCache::initializeAdjacentBlocks(const X&) -#define STEINER_TREE_INIT_ADJACENT_BLOCKS_OF_NODE(X) void GraphSteinerTreeGainCache::initializeAdjacentBlocksOfNode(const X&, \ - const HypernodeID) -#define STEINER_TREE_UPDATE_ADJACENT_BLOCKS(X) void GraphSteinerTreeGainCache::updateAdjacentBlocks(const X&, \ - const SynchronizedEdgeUpdate&) -#define STEINER_TREE_INIT_GAIN_CACHE_ENTRY(X) void GraphSteinerTreeGainCache::initializeGainCacheEntryForNode(const X&, \ - const HypernodeID, \ - vec&) -#define STEINER_TREE_INIT_LAZY_GAIN_CACHE_ENTRY(X) void GraphSteinerTreeGainCache::initializeGainCacheEntry(const X&, \ - const HypernodeID, \ - const PartitionID, \ - ds::Array&) -#define STEINER_TREE_VERIFY_ADJACENT_BLOCKS(X) bool GraphSteinerTreeGainCache::verifyTrackedAdjacentBlocksOfNodes(const X&) const +#define STEINER_TREE_INITIALIZE_GAIN_CACHE(X) \ + void GraphSteinerTreeGainCache::initializeGainCache(const X &) +#define STEINER_TREE_INITIALIZE_GAIN_CACHE_FOR_NODE(X) \ + void GraphSteinerTreeGainCache::initializeGainCacheEntryForNode(const X &, \ + const HypernodeID) +#define STEINER_TREE_NOTIFY(X) \ + void GraphSteinerTreeGainCache::notifyBeforeDeltaGainUpdate( \ + const X &, const SynchronizedEdgeUpdate &) +#define STEINER_TREE_DELTA_GAIN_UPDATE(X) \ + void GraphSteinerTreeGainCache::deltaGainUpdate(const X &, \ + const SynchronizedEdgeUpdate &) +#define STEINER_TREE_RESTORE_UPDATE(X) \ + void GraphSteinerTreeGainCache::uncontractUpdateAfterRestore( \ + const X &, const HypernodeID, const HypernodeID, const HyperedgeID, \ + const HypernodeID) +#define STEINER_TREE_REPLACEMENT_UPDATE(X) \ + void GraphSteinerTreeGainCache::uncontractUpdateAfterReplacement( \ + const X &, const HypernodeID, const HypernodeID, const HyperedgeID) +#define STEINER_TREE_RESTORE_IDENTICAL_HYPEREDGE(X) \ + void GraphSteinerTreeGainCache::restoreIdenticalHyperedge(const X &, const HyperedgeID) +#define STEINER_TREE_INIT_ADJACENT_BLOCKS(X) \ + void GraphSteinerTreeGainCache::initializeAdjacentBlocks(const X &) +#define STEINER_TREE_INIT_ADJACENT_BLOCKS_OF_NODE(X) \ + void GraphSteinerTreeGainCache::initializeAdjacentBlocksOfNode(const X &, \ + const HypernodeID) +#define STEINER_TREE_UPDATE_ADJACENT_BLOCKS(X) \ + void GraphSteinerTreeGainCache::updateAdjacentBlocks(const X &, \ + const SynchronizedEdgeUpdate &) +#define STEINER_TREE_INIT_GAIN_CACHE_ENTRY(X) \ + void GraphSteinerTreeGainCache::initializeGainCacheEntryForNode( \ + const X &, const HypernodeID, vec &) +#define STEINER_TREE_INIT_LAZY_GAIN_CACHE_ENTRY(X) \ + void GraphSteinerTreeGainCache::initializeGainCacheEntry( \ + const X &, const HypernodeID, const PartitionID, ds::Array &) +#define STEINER_TREE_VERIFY_ADJACENT_BLOCKS(X) \ + bool GraphSteinerTreeGainCache::verifyTrackedAdjacentBlocksOfNodes(const X &) const } INSTANTIATE_FUNC_WITH_PARTITIONED_HG(STEINER_TREE_INITIALIZE_GAIN_CACHE) @@ -469,4 +569,4 @@ INSTANTIATE_FUNC_WITH_PARTITIONED_HG(STEINER_TREE_INIT_GAIN_CACHE_ENTRY) INSTANTIATE_FUNC_WITH_PARTITIONED_HG(STEINER_TREE_INIT_LAZY_GAIN_CACHE_ENTRY) INSTANTIATE_FUNC_WITH_PARTITIONED_HG(STEINER_TREE_VERIFY_ADJACENT_BLOCKS) -} // namespace mt_kahypar +} // namespace mt_kahypar diff --git a/mt-kahypar/partition/refinement/gains/steiner_tree_for_graphs/steiner_tree_gain_cache_for_graphs.h b/mt-kahypar/partition/refinement/gains/steiner_tree_for_graphs/steiner_tree_gain_cache_for_graphs.h index 8e3e61782..1c89d71ed 100644 --- a/mt-kahypar/partition/refinement/gains/steiner_tree_for_graphs/steiner_tree_gain_cache_for_graphs.h +++ b/mt-kahypar/partition/refinement/gains/steiner_tree_for_graphs/steiner_tree_gain_cache_for_graphs.h @@ -30,67 +30,73 @@ #include "kahypar-resources/meta/policy_registry.h" -#include "tbb/parallel_invoke.h" #include "tbb/concurrent_vector.h" +#include "tbb/parallel_invoke.h" -#include "mt-kahypar/partition/context_enum_classes.h" -#include "mt-kahypar/partition/mapping/target_graph.h" -#include "mt-kahypar/datastructures/hypergraph_common.h" #include "mt-kahypar/datastructures/array.h" -#include "mt-kahypar/datastructures/sparse_map.h" -#include "mt-kahypar/datastructures/static_bitset.h" #include "mt-kahypar/datastructures/connectivity_set.h" #include "mt-kahypar/datastructures/delta_connectivity_set.h" -#include "mt-kahypar/parallel/atomic_wrapper.h" +#include "mt-kahypar/datastructures/hypergraph_common.h" +#include "mt-kahypar/datastructures/sparse_map.h" +#include "mt-kahypar/datastructures/static_bitset.h" #include "mt-kahypar/macros.h" +#include "mt-kahypar/parallel/atomic_wrapper.h" +#include "mt-kahypar/partition/context_enum_classes.h" +#include "mt-kahypar/partition/mapping/target_graph.h" #include "mt-kahypar/utils/range.h" namespace mt_kahypar { /** - * The gain cache stores the gain values for all possible node moves for the steiner tree metric metric on graphs. + * The gain cache stores the gain values for all possible node moves for the steiner tree + * metric metric on graphs. * - * The mapping problem asks for a mapping Π: V -> V_p of the node set V of a weighted graph G = (V,E,c,w) - * onto a target graph P = (V_P, E_P) such that the following objective function is minimized: - * steiner_tree(G, P, Π) := sum_{{u,v} \in E} dist_P(Π[u],Π[v]) * w(u,v) - * Here, dist_P(Π[u],Π[v]) is shortest path connecting block Π[u] and Π[v] in the target graph. + * The mapping problem asks for a mapping Π: V -> V_p of the node set V of a weighted + * graph G = (V,E,c,w) onto a target graph P = (V_P, E_P) such that the following + * objective function is minimized: steiner_tree(G, P, Π) := sum_{{u,v} \in E} + * dist_P(Π[u],Π[v]) * w(u,v) Here, dist_P(Π[u],Π[v]) is shortest path connecting block + * Π[u] and Π[v] in the target graph. * - * The gain of moving a node u from its current block V_i to a target block V_j can be expressed as follows: - * g(u,V_j) := Ψ(u,Π[u]) - Ψ(u,V_j) with Ψ(u,V') := \sum_{{u,v} \in E} dist_P(V',Π[v]) * w(u,v) - * This gain cache implementation maintains the Ψ(u,V') terms for all nodes and their adjacent blocks. - * Thus, the gain cache stores and maintains at most k entries per node where k := |V_P|. -*/ -class GraphSteinerTreeGainCache { + * The gain of moving a node u from its current block V_i to a target block V_j can be + * expressed as follows: g(u,V_j) := Ψ(u,Π[u]) - Ψ(u,V_j) with Ψ(u,V') := \sum_{{u,v} \in + * E} dist_P(V',Π[v]) * w(u,v) This gain cache implementation maintains the Ψ(u,V') terms + * for all nodes and their adjacent blocks. Thus, the gain cache stores and maintains at + * most k entries per node where k := |V_P|. + */ +class GraphSteinerTreeGainCache +{ static constexpr HyperedgeID HIGH_DEGREE_THRESHOLD = ID(100000); using AdjacentBlocksIterator = IteratorRange; - public: +public: static_assert(sizeof(PartitionID) == 4 && sizeof(uint64_t) == 8); - struct EdgeState { - EdgeState() : - is_valid(0), - version(0), - update_version(0), - blocks_of_nodes(0) { + struct EdgeState + { + EdgeState() : is_valid(0), version(0), update_version(0), blocks_of_nodes(0) + { updateBlocks(kInvalidPartition, kInvalidPartition, 0); } - void updateBlocks(const PartitionID source_block, - const PartitionID target_block, - const uint32_t valid_threshold) { - blocks_of_nodes = static_cast(source_block) << 32 | static_cast(target_block); + void updateBlocks(const PartitionID source_block, const PartitionID target_block, + const uint32_t valid_threshold) + { + blocks_of_nodes = + static_cast(source_block) << 32 | static_cast(target_block); is_valid = valid_threshold; } - PartitionID sourceBlock(const uint32_t valid_threshold) const { + PartitionID sourceBlock(const uint32_t valid_threshold) const + { return is_valid == valid_threshold ? blocks_of_nodes >> 32 : kInvalidPartition; } - PartitionID targetBlock(const uint32_t valid_threshold) const { - return is_valid == valid_threshold ? blocks_of_nodes & (( UL(1) << 32 ) - 1) : kInvalidPartition; + PartitionID targetBlock(const uint32_t valid_threshold) const + { + return is_valid == valid_threshold ? blocks_of_nodes & ((UL(1) << 32) - 1) : + kInvalidPartition; } uint32_t is_valid; @@ -105,59 +111,51 @@ class GraphSteinerTreeGainCache { static constexpr bool invalidates_entries = false; GraphSteinerTreeGainCache() : - _is_initialized(false), - _k(kInvalidPartition), - _gain_cache(), - _ets_benefit_aggregator([&] { return initializeBenefitAggregator(); }), - _num_incident_edges_of_block(), - _adjacent_blocks(), - _edge_state(), - _uncontraction_version(0), - _ets_version() { } - - GraphSteinerTreeGainCache(const Context&) : - _is_initialized(false), - _k(kInvalidPartition), - _gain_cache(), - _ets_benefit_aggregator([&] { return initializeBenefitAggregator(); }), - _num_incident_edges_of_block(), - _adjacent_blocks(), - _edge_state(), - _uncontraction_version(0), - _ets_version() { } - - GraphSteinerTreeGainCache(const GraphSteinerTreeGainCache&) = delete; - GraphSteinerTreeGainCache & operator= (const GraphSteinerTreeGainCache &) = delete; - - GraphSteinerTreeGainCache(GraphSteinerTreeGainCache&& other) = default; - GraphSteinerTreeGainCache & operator= (GraphSteinerTreeGainCache&& other) = default; + _is_initialized(false), _k(kInvalidPartition), _gain_cache(), + _ets_benefit_aggregator([&] { return initializeBenefitAggregator(); }), + _num_incident_edges_of_block(), _adjacent_blocks(), _edge_state(), + _uncontraction_version(0), _ets_version() + { + } + + GraphSteinerTreeGainCache(const Context &) : + _is_initialized(false), _k(kInvalidPartition), _gain_cache(), + _ets_benefit_aggregator([&] { return initializeBenefitAggregator(); }), + _num_incident_edges_of_block(), _adjacent_blocks(), _edge_state(), + _uncontraction_version(0), _ets_version() + { + } + + GraphSteinerTreeGainCache(const GraphSteinerTreeGainCache &) = delete; + GraphSteinerTreeGainCache &operator=(const GraphSteinerTreeGainCache &) = delete; + + GraphSteinerTreeGainCache(GraphSteinerTreeGainCache &&other) = default; + GraphSteinerTreeGainCache &operator=(GraphSteinerTreeGainCache &&other) = default; // ####################### Initialization ####################### - bool isInitialized() const { - return _is_initialized; - } + bool isInitialized() const { return _is_initialized; } - void reset(const bool run_parallel = true) { + void reset(const bool run_parallel = true) + { unused(run_parallel); _is_initialized = false; } - size_t size() const { - return _gain_cache.size(); - } + size_t size() const { return _gain_cache.size(); } // ! Initializes all gain cache entries - template - void initializeGainCache(const PartitionedHypergraph& partitioned_hg); + template + void initializeGainCache(const PartitionedHypergraph &partitioned_hg); // ! Initializes the gain cache entry for a node - template - void initializeGainCacheEntryForNode(const PartitionedHypergraph& partitioned_hg, + template + void initializeGainCacheEntryForNode(const PartitionedHypergraph &partitioned_hg, const HypernodeID hn); // ! Returns an iterator over the adjacent blocks of a node - AdjacentBlocksIterator adjacentBlocks(const HypernodeID hn) const { + AdjacentBlocksIterator adjacentBlocks(const HypernodeID hn) const + { return _adjacent_blocks.connectivitySet(hn); } @@ -165,31 +163,33 @@ class GraphSteinerTreeGainCache { // ! Returns the penalty term p(u) := -Ψ(u,Π[u]) of node u. MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE - HyperedgeWeight penaltyTerm(const HypernodeID u, const PartitionID from) const { + HyperedgeWeight penaltyTerm(const HypernodeID u, const PartitionID from) const + { ASSERT(_is_initialized, "Gain cache is not initialized"); return _gain_cache[gain_entry_index(u, from)].load(std::memory_order_relaxed); } // ! Recomputes all gain cache entries for node u - template - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE - void recomputeInvalidTerms(const PartitionedHypergraph&, - const HypernodeID) { + template + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE void + recomputeInvalidTerms(const PartitionedHypergraph &, const HypernodeID) + { // Do nothing } // ! Returns the benefit term b(u, V_j) := -Ψ(u,V_j) of node u. MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE - HyperedgeWeight benefitTerm(const HypernodeID u, const PartitionID to) const { + HyperedgeWeight benefitTerm(const HypernodeID u, const PartitionID to) const + { ASSERT(_is_initialized, "Gain cache is not initialized"); return _gain_cache[gain_entry_index(u, to)].load(std::memory_order_relaxed); } // ! Returns the gain value g(u,V_j) = b(u,V_j) - p(u) for moving node u to block to. MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE - HyperedgeWeight gain(const HypernodeID u, - const PartitionID from, - const PartitionID to ) const { + HyperedgeWeight gain(const HypernodeID u, const PartitionID from, + const PartitionID to) const + { ASSERT(_is_initialized, "Gain cache is not initialized"); return benefitTerm(u, to) - penaltyTerm(u, from); } @@ -198,147 +198,151 @@ class GraphSteinerTreeGainCache { // ! This function returns true if the corresponding syncronized edge update triggers // ! a gain cache update. - static bool triggersDeltaGainUpdate(const SynchronizedEdgeUpdate& sync_update); + static bool triggersDeltaGainUpdate(const SynchronizedEdgeUpdate &sync_update); // ! The partitioned (hyper)graph call this function when its updates its internal // ! data structures before calling the delta gain update function. The partitioned // ! (hyper)graph holds a lock for the corresponding (hyper)edge when calling this // ! function. Thus, it is guaranteed that no other thread will modify the hyperedge. - template - void notifyBeforeDeltaGainUpdate(const PartitionedHypergraph& partitioned_hg, - const SynchronizedEdgeUpdate& sync_update); - + template + void notifyBeforeDeltaGainUpdate(const PartitionedHypergraph &partitioned_hg, + const SynchronizedEdgeUpdate &sync_update); // ! This functions implements the delta gain updates for the steiner tree metric. // ! When moving a node from its current block to a target block, we iterate - // ! over its incident hyperedges and update their pin count values. After each pin count - // ! update, we call this function to update the gain cache to changes associated with - // ! corresponding hyperedge. - template - void deltaGainUpdate(const PartitionedHypergraph& partitioned_hg, - const SynchronizedEdgeUpdate& sync_update); + // ! over its incident hyperedges and update their pin count values. After each pin + // count ! update, we call this function to update the gain cache to changes associated + // with ! corresponding hyperedge. + template + void deltaGainUpdate(const PartitionedHypergraph &partitioned_hg, + const SynchronizedEdgeUpdate &sync_update); // ####################### Uncontraction ####################### - // ! This function implements the gain cache update after an uncontraction that restores node v in - // ! edge he. The uncontraction transforms the edge from a selfloop to a regular edge. - template - void uncontractUpdateAfterRestore(const PartitionedHypergraph& partitioned_hg, - const HypernodeID u, - const HypernodeID v, + // ! This function implements the gain cache update after an uncontraction that restores + // node v in ! edge he. The uncontraction transforms the edge from a selfloop to a + // regular edge. + template + void uncontractUpdateAfterRestore(const PartitionedHypergraph &partitioned_hg, + const HypernodeID u, const HypernodeID v, const HyperedgeID he, const HypernodeID pin_count_in_part_after); - // ! This function implements the gain cache update after an uncontraction that replaces u with v in - // ! edge he. After the uncontraction only node v is contained in edge he. - template - void uncontractUpdateAfterReplacement(const PartitionedHypergraph& partitioned_hg, - const HypernodeID u, - const HypernodeID v, + // ! This function implements the gain cache update after an uncontraction that replaces + // u with v in ! edge he. After the uncontraction only node v is contained in edge he. + template + void uncontractUpdateAfterReplacement(const PartitionedHypergraph &partitioned_hg, + const HypernodeID u, const HypernodeID v, const HyperedgeID he); // ! This function is called after restoring a selfloop. The function assumes that - // ! u is the only pin of the corresponding edge, while block_of_u is its corresponding block ID. - void restoreSinglePinHyperedge(const HypernodeID u, - const PartitionID block_of_u, + // ! u is the only pin of the corresponding edge, while block_of_u is its corresponding + // block ID. + void restoreSinglePinHyperedge(const HypernodeID u, const PartitionID block_of_u, const HyperedgeWeight weight_of_he); - // ! This function is called after restoring a net that became identical to another due to a contraction. - template - void restoreIdenticalHyperedge(const PartitionedHypergraph&, - const HyperedgeID); + // ! This function is called after restoring a net that became identical to another due + // to a contraction. + template + void restoreIdenticalHyperedge(const PartitionedHypergraph &, const HyperedgeID); // ! Notifies the gain cache that all uncontractions of the current batch are completed. - void batchUncontractionsCompleted() { - ++_uncontraction_version; - } + void batchUncontractionsCompleted() { ++_uncontraction_version; } // ####################### Only for Testing ####################### - template - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE - HyperedgeWeight recomputePenaltyTerm(const PartitionedHypergraph& partitioned_hg, - const HypernodeID u) const { + template + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE HyperedgeWeight recomputePenaltyTerm( + const PartitionedHypergraph &partitioned_hg, const HypernodeID u) const + { ASSERT(partitioned_hg.hasTargetGraph()); HyperedgeWeight gain = 0; - const TargetGraph& target_graph = *partitioned_hg.targetGraph(); + const TargetGraph &target_graph = *partitioned_hg.targetGraph(); const PartitionID from = partitioned_hg.partID(u); - for ( const HyperedgeID& e : partitioned_hg.incidentEdges(u) ) { - if ( !partitioned_hg.isSinglePin(e) ) { - const PartitionID block_of_target = partitioned_hg.partID(partitioned_hg.edgeTarget(e)); - gain -= target_graph.distance(from, block_of_target) * partitioned_hg.edgeWeight(e); + for(const HyperedgeID &e : partitioned_hg.incidentEdges(u)) + { + if(!partitioned_hg.isSinglePin(e)) + { + const PartitionID block_of_target = + partitioned_hg.partID(partitioned_hg.edgeTarget(e)); + gain -= + target_graph.distance(from, block_of_target) * partitioned_hg.edgeWeight(e); } } return gain; } - template - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE - HyperedgeWeight recomputeBenefitTerm(const PartitionedHypergraph& partitioned_hg, - const HypernodeID u, - const PartitionID to) const { + template + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE HyperedgeWeight + recomputeBenefitTerm(const PartitionedHypergraph &partitioned_hg, const HypernodeID u, + const PartitionID to) const + { ASSERT(partitioned_hg.hasTargetGraph()); HyperedgeWeight gain = 0; - const TargetGraph& target_graph = *partitioned_hg.targetGraph(); - for ( const HyperedgeID& e : partitioned_hg.incidentEdges(u) ) { - if ( !partitioned_hg.isSinglePin(e) ) { - const PartitionID block_of_target = partitioned_hg.partID(partitioned_hg.edgeTarget(e)); + const TargetGraph &target_graph = *partitioned_hg.targetGraph(); + for(const HyperedgeID &e : partitioned_hg.incidentEdges(u)) + { + if(!partitioned_hg.isSinglePin(e)) + { + const PartitionID block_of_target = + partitioned_hg.partID(partitioned_hg.edgeTarget(e)); gain -= target_graph.distance(to, block_of_target) * partitioned_hg.edgeWeight(e); } } return gain; } - void changeNumberOfBlocks(const PartitionID new_k) { + void changeNumberOfBlocks(const PartitionID new_k) + { ASSERT(new_k <= _k); unused(new_k); // Do nothing } - template - bool verifyTrackedAdjacentBlocksOfNodes(const PartitionedHypergraph& partitioned_hg) const; + template + bool + verifyTrackedAdjacentBlocksOfNodes(const PartitionedHypergraph &partitioned_hg) const; - private: +private: friend class GraphDeltaSteinerTreeGainCache; MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE - size_t gain_entry_index(const HypernodeID u, const PartitionID p) const { + size_t gain_entry_index(const HypernodeID u, const PartitionID p) const + { return size_t(u) * _k + p; } // ! Allocates the memory required to store the gain cache - void allocateGainTable(const HypernodeID num_nodes, - const HyperedgeID num_edges, - const PartitionID k) { - if (_gain_cache.size() == 0 && k != kInvalidPartition) { + void allocateGainTable(const HypernodeID num_nodes, const HyperedgeID num_edges, + const PartitionID k) + { + if(_gain_cache.size() == 0 && k != kInvalidPartition) + { _k = k; - tbb::parallel_invoke([&] { - _gain_cache.resize( - "Refinement", "gain_cache", num_nodes * _k, true); - }, [&] { - _num_incident_edges_of_block.resize( - "Refinement", "num_incident_edges_of_block", num_nodes * _k, true); - }, [&] { - _adjacent_blocks = ds::ConnectivitySets(num_nodes, k, true); - }, [&] { - _edge_state.assign(num_edges, EdgeState()); - }); + tbb::parallel_invoke( + [&] { _gain_cache.resize("Refinement", "gain_cache", num_nodes * _k, true); }, + [&] { + _num_incident_edges_of_block.resize( + "Refinement", "num_incident_edges_of_block", num_nodes * _k, true); + }, + [&] { _adjacent_blocks = ds::ConnectivitySets(num_nodes, k, true); }, + [&] { _edge_state.assign(num_edges, EdgeState()); }); } } // ! Initializes the adjacent blocks of all nodes - template - void initializeAdjacentBlocks(const PartitionedHypergraph& partitioned_hg); + template + void initializeAdjacentBlocks(const PartitionedHypergraph &partitioned_hg); - // ! Initializes the adjacent blocks of for a node - template - void initializeAdjacentBlocksOfNode(const PartitionedHypergraph& partitioned_hg, + // ! Initializes the adjacent blocks of for a node + template + void initializeAdjacentBlocksOfNode(const PartitionedHypergraph &partitioned_hg, const HypernodeID hn); // ! Updates the adjacent blocks of a node based on a synronized hyperedge update - template - void updateAdjacentBlocks(const PartitionedHypergraph& partitioned_hg, - const SynchronizedEdgeUpdate& sync_update); + template + void updateAdjacentBlocks(const PartitionedHypergraph &partitioned_hg, + const SynchronizedEdgeUpdate &sync_update); // ! Increments the number of incident edges of node u that contains pins of block to. // ! If the value increases to one, we add the block to the connectivity set of the node @@ -346,27 +350,25 @@ class GraphSteinerTreeGainCache { HyperedgeID incrementIncidentEdges(const HypernodeID u, const PartitionID to); // ! Decrements the number of incident edges of node u that contains pins of block to - // ! If the value decreases to zero, we remove the block from the connectivity set of the node. + // ! If the value decreases to zero, we remove the block from the connectivity set of + // the node. HyperedgeID decrementIncidentEdges(const HypernodeID u, const PartitionID to); // ! Initializes the benefit and penalty terms for a node u - template - void initializeGainCacheEntryForNode(const PartitionedHypergraph& partitioned_hg, + template + void initializeGainCacheEntryForNode(const PartitionedHypergraph &partitioned_hg, const HypernodeID u, - vec& benefit_aggregator); + vec &benefit_aggregator); // ! Initializes the gain cache entry of moving u to block 'to'. The function is // ! thread-safe, meaning that it supports correct initialization while simultanously // ! performing gain cache updates. - template - void initializeGainCacheEntry(const PartitionedHypergraph& partitioned_hg, - const HypernodeID hn, - const PartitionID to, - ds::Array& edge_locks); - - vec initializeBenefitAggregator() const { - return vec(_k, 0); - } + template + void initializeGainCacheEntry(const PartitionedHypergraph &partitioned_hg, + const HypernodeID hn, const PartitionID to, + ds::Array &edge_locks); + + vec initializeBenefitAggregator() const { return vec(_k, 0); } // ! Indicate whether or not the gain cache is initialized bool _is_initialized; @@ -374,23 +376,24 @@ class GraphSteinerTreeGainCache { // ! Number of blocks PartitionID _k; - // ! Array of size |V| * (k + 1), which stores the benefit and penalty terms of each node. - ds::Array< CAtomic > _gain_cache; + // ! Array of size |V| * (k + 1), which stores the benefit and penalty terms of each + // node. + ds::Array > _gain_cache; // ! Thread-local for initializing gain cache entries - tbb::enumerable_thread_specific> _ets_benefit_aggregator; + tbb::enumerable_thread_specific > _ets_benefit_aggregator; // ! This array stores the number of incident hyperedges that contains // ! pins of a particular block for each node. - ds::Array< CAtomic > _num_incident_edges_of_block; + ds::Array > _num_incident_edges_of_block; // ! Stores the adjacent blocks of a node ds::ConnectivitySets _adjacent_blocks; // ! This array stores a version ID for each hyperedge. The partitioned hypergraph // ! increments the version for a hyperedge before it updates it internal data structure - // ! (see notifyBeforeDeltaGainUpdate(...)). This can be use when initialize a new gain cache entries, while - // ! other threads perform concurrent moves on the data structure. + // ! (see notifyBeforeDeltaGainUpdate(...)). This can be use when initialize a new gain + // cache entries, while ! other threads perform concurrent moves on the data structure. vec _edge_state; // ! After calling batchUncontractionsCompleted(), we increment this counter marking all @@ -398,121 +401,128 @@ class GraphSteinerTreeGainCache { uint32_t _uncontraction_version; // ! Array to store version IDs when we lazily initialize a gain cache entry - tbb::enumerable_thread_specific> _ets_version; + tbb::enumerable_thread_specific > _ets_version; }; /** - * In our FM algorithm, the different local searches perform nodes moves locally not visible for other - * threads. The delta gain cache stores these local changes relative to the shared - * gain cache. For example, the gain can be computed as follows - * g'(u,V') := g(u,V') + Δg(u,V') - * where g(u,V') is the gain stored in the shared gain cache and Δg(u,V') is the gain stored in - * the delta gain cache after performing some moves locally. To maintain Δg(u,V'), we use a hash - * table that only stores entries affected by a gain cache update. -*/ -class GraphDeltaSteinerTreeGainCache { + * In our FM algorithm, the different local searches perform nodes moves locally not + * visible for other threads. The delta gain cache stores these local changes relative to + * the shared gain cache. For example, the gain can be computed as follows g'(u,V') := + * g(u,V') + Δg(u,V') where g(u,V') is the gain stored in the shared gain cache and + * Δg(u,V') is the gain stored in the delta gain cache after performing some moves + * locally. To maintain Δg(u,V'), we use a hash table that only stores entries affected by + * a gain cache update. + */ +class GraphDeltaSteinerTreeGainCache +{ using DeltaAdjacentBlocks = ds::DeltaConnectivitySet; using AdjacentBlocksIterator = typename DeltaAdjacentBlocks::Iterator; - public: +public: static constexpr bool requires_connectivity_set = true; - GraphDeltaSteinerTreeGainCache(const GraphSteinerTreeGainCache& gain_cache) : - _gain_cache(gain_cache), - _gain_cache_delta(), - _invalid_gain_cache_entry(), - _num_incident_edges_delta(), - _adjacent_blocks_delta(gain_cache._k) { + GraphDeltaSteinerTreeGainCache(const GraphSteinerTreeGainCache &gain_cache) : + _gain_cache(gain_cache), _gain_cache_delta(), _invalid_gain_cache_entry(), + _num_incident_edges_delta(), _adjacent_blocks_delta(gain_cache._k) + { _adjacent_blocks_delta.setConnectivitySet(&_gain_cache._adjacent_blocks); } // ####################### Initialize & Reset ####################### - void initialize(const size_t size) { + void initialize(const size_t size) + { _adjacent_blocks_delta.setNumberOfBlocks(_gain_cache._k); _gain_cache_delta.initialize(size); _invalid_gain_cache_entry.initialize(size); _num_incident_edges_delta.initialize(size); } - void clear() { + void clear() + { _gain_cache_delta.clear(); _invalid_gain_cache_entry.clear(); _num_incident_edges_delta.clear(); _adjacent_blocks_delta.reset(); } - void dropMemory() { + void dropMemory() + { _gain_cache_delta.freeInternalData(); _invalid_gain_cache_entry.freeInternalData(); _num_incident_edges_delta.freeInternalData(); _adjacent_blocks_delta.freeInternalData(); } - size_t size_in_bytes() const { - return _gain_cache_delta.size_in_bytes() + - _invalid_gain_cache_entry.size_in_bytes() + - _num_incident_edges_delta.size_in_bytes() + - _adjacent_blocks_delta.size_in_bytes(); + size_t size_in_bytes() const + { + return _gain_cache_delta.size_in_bytes() + _invalid_gain_cache_entry.size_in_bytes() + + _num_incident_edges_delta.size_in_bytes() + + _adjacent_blocks_delta.size_in_bytes(); } // ####################### Gain Computation ####################### // ! Returns an iterator over the adjacent blocks of a node - IteratorRange adjacentBlocks(const HypernodeID hn) const { + IteratorRange adjacentBlocks(const HypernodeID hn) const + { return _adjacent_blocks_delta.connectivitySet(hn); } // ! Returns the penalty term p(u) := -Ψ(u,Π[u]) of node u. MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE - HyperedgeWeight penaltyTerm(const HypernodeID u, - const PartitionID from) const { + HyperedgeWeight penaltyTerm(const HypernodeID u, const PartitionID from) const + { return benefitTerm(u, from); } // ! Returns the benefit term b(u, V_j) := -Ψ(u,V_j) of node u. MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE - HyperedgeWeight benefitTerm(const HypernodeID u, - const PartitionID to) const { + HyperedgeWeight benefitTerm(const HypernodeID u, const PartitionID to) const + { ASSERT(to != kInvalidPartition && to < _gain_cache._k); const bool use_benefit_term_from_shared_gain_cache = - !_invalid_gain_cache_entry.contains(_gain_cache.gain_entry_index(u, to)) && - _gain_cache._adjacent_blocks.contains(u, to); + !_invalid_gain_cache_entry.contains(_gain_cache.gain_entry_index(u, to)) && + _gain_cache._adjacent_blocks.contains(u, to); const HyperedgeWeight benefit_term = - use_benefit_term_from_shared_gain_cache * _gain_cache.benefitTerm(u, to); - const HyperedgeWeight* benefit_delta = - _gain_cache_delta.get_if_contained(_gain_cache.gain_entry_index(u, to)); - return benefit_term + ( benefit_delta ? *benefit_delta : 0 ); + use_benefit_term_from_shared_gain_cache * _gain_cache.benefitTerm(u, to); + const HyperedgeWeight *benefit_delta = + _gain_cache_delta.get_if_contained(_gain_cache.gain_entry_index(u, to)); + return benefit_term + (benefit_delta ? *benefit_delta : 0); } // ! Returns the gain value g(u,V_j) = b(u,V_j) - p(u) for moving node u to block to. MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE - HyperedgeWeight gain(const HypernodeID u, - const PartitionID from, - const PartitionID to ) const { + HyperedgeWeight gain(const HypernodeID u, const PartitionID from, + const PartitionID to) const + { return benefitTerm(u, to) - penaltyTerm(u, from); } - // ####################### Delta Gain Update ####################### + // ####################### Delta Gain Update ####################### - template - MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE - void deltaGainUpdate(const PartitionedHypergraph& partitioned_hg, - const SynchronizedEdgeUpdate& sync_update) { + template + MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE void + deltaGainUpdate(const PartitionedHypergraph &partitioned_hg, + const SynchronizedEdgeUpdate &sync_update) + { ASSERT(sync_update.target_graph); const HyperedgeID he = sync_update.he; - if ( !partitioned_hg.isSinglePin(he) ) { + if(!partitioned_hg.isSinglePin(he)) + { const PartitionID from = sync_update.from; const PartitionID to = sync_update.to; const HyperedgeWeight edge_weight = sync_update.edge_weight; - const TargetGraph& target_graph = *sync_update.target_graph; + const TargetGraph &target_graph = *sync_update.target_graph; const HypernodeID v = partitioned_hg.edgeTarget(he); - for ( const PartitionID& target : adjacentBlocks(v) ) { - const HyperedgeWeight delta = ( target_graph.distance(from, target) - - target_graph.distance(to, target) ) * edge_weight ; + for(const PartitionID &target : adjacentBlocks(v)) + { + const HyperedgeWeight delta = + (target_graph.distance(from, target) - target_graph.distance(to, target)) * + edge_weight; _gain_cache_delta[_gain_cache.gain_entry_index(v, target)] += delta; } @@ -523,29 +533,38 @@ class GraphDeltaSteinerTreeGainCache { } } - // ####################### Miscellaneous ####################### + // ####################### Miscellaneous ####################### - void memoryConsumption(utils::MemoryTreeNode* parent) const { + void memoryConsumption(utils::MemoryTreeNode *parent) const + { ASSERT(parent); - utils::MemoryTreeNode* gain_cache_delta_node = parent->addChild("Delta Gain Cache"); + utils::MemoryTreeNode *gain_cache_delta_node = parent->addChild("Delta Gain Cache"); gain_cache_delta_node->updateSize(size_in_bytes()); } - private: +private: // ! Updates the adjacent blocks of a node based on a synronized hyperedge update - template - void updateAdjacentBlocks(const PartitionedHypergraph& partitioned_hg, - const SynchronizedEdgeUpdate& sync_update) { - if ( sync_update.pin_count_in_from_part_after == 0 ) { - for ( const HypernodeID& pin : partitioned_hg.pins(sync_update.he) ) { + template + void updateAdjacentBlocks(const PartitionedHypergraph &partitioned_hg, + const SynchronizedEdgeUpdate &sync_update) + { + if(sync_update.pin_count_in_from_part_after == 0) + { + for(const HypernodeID &pin : partitioned_hg.pins(sync_update.he)) + { decrementIncidentEdges(pin, sync_update.from); } } - if ( sync_update.pin_count_in_to_part_after == 1 ) { - for ( const HypernodeID& pin : partitioned_hg.pins(sync_update.he) ) { - const HyperedgeID incident_edges_after = incrementIncidentEdges(pin, sync_update.to); - if ( incident_edges_after == 1 ) { - _invalid_gain_cache_entry[_gain_cache.gain_entry_index(pin, sync_update.to)] = true; + if(sync_update.pin_count_in_to_part_after == 1) + { + for(const HypernodeID &pin : partitioned_hg.pins(sync_update.he)) + { + const HyperedgeID incident_edges_after = + incrementIncidentEdges(pin, sync_update.to); + if(incident_edges_after == 1) + { + _invalid_gain_cache_entry[_gain_cache.gain_entry_index(pin, sync_update.to)] = + true; initializeGainCacheEntry(partitioned_hg, pin, sync_update.to); } } @@ -553,13 +572,16 @@ class GraphDeltaSteinerTreeGainCache { } // ! Decrements the number of incident edges of node u that contains pins of block to - // ! If the value decreases to zero, we remove the block from the connectivity set of the node - HypernodeID decrementIncidentEdges(const HypernodeID hn, const PartitionID to) { + // ! If the value decreases to zero, we remove the block from the connectivity set of + // the node + HypernodeID decrementIncidentEdges(const HypernodeID hn, const PartitionID to) + { const HypernodeID shared_incident_count = - _gain_cache._num_incident_edges_of_block[_gain_cache.gain_entry_index(hn, to)]; + _gain_cache._num_incident_edges_of_block[_gain_cache.gain_entry_index(hn, to)]; const HypernodeID thread_local_incident_count_after = - --_num_incident_edges_delta[_gain_cache.gain_entry_index(hn, to)]; - if ( shared_incident_count + thread_local_incident_count_after == 0 ) { + --_num_incident_edges_delta[_gain_cache.gain_entry_index(hn, to)]; + if(shared_incident_count + thread_local_incident_count_after == 0) + { _adjacent_blocks_delta.remove(hn, to); } return shared_incident_count + thread_local_incident_count_after; @@ -568,26 +590,29 @@ class GraphDeltaSteinerTreeGainCache { // ! Increments the number of incident edges of node u that contains pins of block to. // ! If the value increases to one, we add the block to the connectivity set of the node // ! u and initialize the gain cache entry for moving u to that block. - HypernodeID incrementIncidentEdges(const HypernodeID hn, const PartitionID to) { + HypernodeID incrementIncidentEdges(const HypernodeID hn, const PartitionID to) + { const HypernodeID shared_incident_count = - _gain_cache._num_incident_edges_of_block[_gain_cache.gain_entry_index(hn, to)]; + _gain_cache._num_incident_edges_of_block[_gain_cache.gain_entry_index(hn, to)]; const HypernodeID thread_local_incident_count_after = - ++_num_incident_edges_delta[_gain_cache.gain_entry_index(hn, to)]; - if ( shared_incident_count + thread_local_incident_count_after == 1 ) { + ++_num_incident_edges_delta[_gain_cache.gain_entry_index(hn, to)]; + if(shared_incident_count + thread_local_incident_count_after == 1) + { _adjacent_blocks_delta.add(hn, to); } return shared_incident_count + thread_local_incident_count_after; } // ! Initializes a gain cache entry - template - void initializeGainCacheEntry(const PartitionedHypergraph& partitioned_hg, - const HypernodeID u, - const PartitionID to) { + template + void initializeGainCacheEntry(const PartitionedHypergraph &partitioned_hg, + const HypernodeID u, const PartitionID to) + { ASSERT(partitioned_hg.hasTargetGraph()); - const TargetGraph& target_graph = *partitioned_hg.targetGraph(); + const TargetGraph &target_graph = *partitioned_hg.targetGraph(); HyperedgeWeight gain = 0; - for ( const HyperedgeID& he : partitioned_hg.incidentEdges(u) ) { + for(const HyperedgeID &he : partitioned_hg.incidentEdges(u)) + { ASSERT(partitioned_hg.edgeSource(he) == u); const HypernodeID v = partitioned_hg.edgeTarget(he); const PartitionID block_of_v = partitioned_hg.partID(v); @@ -596,7 +621,7 @@ class GraphDeltaSteinerTreeGainCache { _gain_cache_delta[_gain_cache.gain_entry_index(u, to)] = gain; } - const GraphSteinerTreeGainCache& _gain_cache; + const GraphSteinerTreeGainCache &_gain_cache; // ! Stores the delta of each locally touched gain cache entry // ! relative to the shared gain cache @@ -615,4 +640,4 @@ class GraphDeltaSteinerTreeGainCache { DeltaAdjacentBlocks _adjacent_blocks_delta; }; -} // namespace mt_kahypar +} // namespace mt_kahypar diff --git a/mt-kahypar/partition/refinement/gains/steiner_tree_for_graphs/steiner_tree_gain_computation_for_graphs.h b/mt-kahypar/partition/refinement/gains/steiner_tree_for_graphs/steiner_tree_gain_computation_for_graphs.h index 0d5700ec6..1977a3679 100644 --- a/mt-kahypar/partition/refinement/gains/steiner_tree_for_graphs/steiner_tree_gain_computation_for_graphs.h +++ b/mt-kahypar/partition/refinement/gains/steiner_tree_for_graphs/steiner_tree_gain_computation_for_graphs.h @@ -30,30 +30,36 @@ #include "tbb/enumerable_thread_specific.h" -#include "mt-kahypar/partition/refinement/gains/gain_computation_base.h" -#include "mt-kahypar/partition/refinement/gains/steiner_tree_for_graphs/steiner_tree_attributed_gains_for_graphs.h" -#include "mt-kahypar/partition/mapping/target_graph.h" -#include "mt-kahypar/datastructures/static_bitset.h" #include "mt-kahypar/datastructures/sparse_map.h" +#include "mt-kahypar/datastructures/static_bitset.h" #include "mt-kahypar/parallel/stl/scalable_vector.h" +#include "mt-kahypar/partition/mapping/target_graph.h" +#include "mt-kahypar/partition/refinement/gains/gain_computation_base.h" +#include "mt-kahypar/partition/refinement/gains/steiner_tree_for_graphs/steiner_tree_attributed_gains_for_graphs.h" namespace mt_kahypar { -class GraphSteinerTreeGainComputation : public GainComputationBase { - using Base = GainComputationBase; +class GraphSteinerTreeGainComputation + : public GainComputationBase +{ + using Base = GainComputationBase; using RatingMap = typename Base::RatingMap; static constexpr bool enable_heavy_assert = false; static constexpr size_t BITS_PER_BLOCK = ds::StaticBitset::BITS_PER_BLOCK; - public: - GraphSteinerTreeGainComputation(const Context& context, - bool disable_randomization = false) : - Base(context, disable_randomization), - _local_adjacent_blocks([&] { return constructBitset(); }), - _all_blocks(context.partition.k), - _ets_incident_edge_weights([&] { return constructIncidentEdgeWeightVector(); }) { - for ( PartitionID to = 0; to < context.partition.k; ++to ) { +public: + GraphSteinerTreeGainComputation(const Context &context, + bool disable_randomization = false) : + Base(context, disable_randomization), + _local_adjacent_blocks([&] { return constructBitset(); }), + _all_blocks(context.partition.k), + _ets_incident_edge_weights([&] { return constructIncidentEdgeWeightVector(); }) + { + for(PartitionID to = 0; to < context.partition.k; ++to) + { _all_blocks.set(to); } } @@ -63,37 +69,37 @@ class GraphSteinerTreeGainComputation : public GainComputationBase - void precomputeGains(const PartitionedHypergraph& phg, - const HypernodeID hn, - RatingMap& tmp_scores, - Gain&, - const bool consider_non_adjacent_blocks) { + template + void precomputeGains(const PartitionedHypergraph &phg, const HypernodeID hn, + RatingMap &tmp_scores, Gain &, + const bool consider_non_adjacent_blocks) + { ASSERT(tmp_scores.size() == 0, "Rating map not empty"); // The gain of moving a node u from its current block Π[u] to target block V_j can // be expressed as follows for the steiner tree objective function: // g(u, V_j) := \sum_{ {u,v} \in I(u) } ( dist(V_j, Π[v]) - dist(Π[u],Π[v]) ) * w(u,v) - // Here, dist(V',V'') is the shortest path between block V' and V'' in the target graph. - // Computing the gain to all adjacent blocks of the nodes has a time complexity of - // O(|I(u)|*|R(u)|) where R(u) is the set of all adjacent blocks of node u and I(u) is - // the set of all incident edges of node u. - // In the following, we use the following reformulation of the gain: - // gain(u, V_j) := \sum_{V_k \in R(u)} (dist(V_j, V_k) - dist(Π[u], V_k)) * w(u, V_k) - // Here, w(u, V_k) is the weight of all edges connecting u to block V_k which can be - // precomputed in O(|I(u)|) time. After precomputation, we can compute the gain - // to all adjacent blocks in time O(|R(u)|²) => total gain computation complexity than - // is O(|I(u)| * |R(u)|²) which is faster than the naive approach. + // Here, dist(V',V'') is the shortest path between block V' and V'' in the target + // graph. Computing the gain to all adjacent blocks of the nodes has a time complexity + // of O(|I(u)|*|R(u)|) where R(u) is the set of all adjacent blocks of node u and I(u) + // is the set of all incident edges of node u. In the following, we use the following + // reformulation of the gain: gain(u, V_j) := \sum_{V_k \in R(u)} (dist(V_j, V_k) - + // dist(Π[u], V_k)) * w(u, V_k) Here, w(u, V_k) is the weight of all edges connecting + // u to block V_k which can be precomputed in O(|I(u)|) time. After precomputation, we + // can compute the gain to all adjacent blocks in time O(|R(u)|²) => total gain + // computation complexity than is O(|I(u)| * |R(u)|²) which is faster than the naive + // approach. // Precompute adjacent blocks of node and the w(u, V_k) terms const PartitionID from = phg.partID(hn); - vec& incident_edge_weights = _ets_incident_edge_weights.local(); - ds::Bitset& adjacent_blocks = consider_non_adjacent_blocks ? - _all_blocks : _local_adjacent_blocks.local(); - ds::StaticBitset adjacent_blocks_view( - adjacent_blocks.numBlocks(), adjacent_blocks.data()); + vec &incident_edge_weights = _ets_incident_edge_weights.local(); + ds::Bitset &adjacent_blocks = + consider_non_adjacent_blocks ? _all_blocks : _local_adjacent_blocks.local(); + ds::StaticBitset adjacent_blocks_view(adjacent_blocks.numBlocks(), + adjacent_blocks.data()); adjacent_blocks.set(from); - for (const HyperedgeID& he : phg.incidentEdges(hn)) { + for(const HyperedgeID &he : phg.incidentEdges(hn)) + { const PartitionID block_of_target = phg.partID(phg.edgeTarget(he)); adjacent_blocks.set(block_of_target); incident_edge_weights[block_of_target] += phg.edgeWeight(he); @@ -102,44 +108,47 @@ class GraphSteinerTreeGainComputation : public GainComputationBase constructIncidentEdgeWeightVector() const { + vec constructIncidentEdgeWeightVector() const + { return vec(_context.partition.k, 0); } @@ -150,8 +159,9 @@ class GraphSteinerTreeGainComputation : public GainComputationBase _local_adjacent_blocks; ds::Bitset _all_blocks; - // ! Array for precomputing the weight of all edges connecting a node to a particular block - tbb::enumerable_thread_specific> _ets_incident_edge_weights; + // ! Array for precomputing the weight of all edges connecting a node to a particular + // block + tbb::enumerable_thread_specific > _ets_incident_edge_weights; }; -} // namespace mt_kahypar +} // namespace mt_kahypar diff --git a/mt-kahypar/partition/refinement/i_rebalancer.h b/mt-kahypar/partition/refinement/i_rebalancer.h index e700f5318..0acb6b5b5 100644 --- a/mt-kahypar/partition/refinement/i_rebalancer.h +++ b/mt-kahypar/partition/refinement/i_rebalancer.h @@ -37,42 +37,47 @@ namespace mt_kahypar { -class IRebalancer: public IRefiner { +class IRebalancer : public IRefiner +{ - public: +public: virtual ~IRebalancer() = default; - bool refineAndOutputMoves(mt_kahypar_partitioned_hypergraph_t& hypergraph, - const parallel::scalable_vector& refinement_nodes, - parallel::scalable_vector>& moves_by_part, - Metrics& best_metrics, - const double time_limit) { - return refineAndOutputMovesImpl(hypergraph, refinement_nodes, moves_by_part, best_metrics, time_limit); + bool refineAndOutputMoves( + mt_kahypar_partitioned_hypergraph_t &hypergraph, + const parallel::scalable_vector &refinement_nodes, + parallel::scalable_vector > &moves_by_part, + Metrics &best_metrics, const double time_limit) + { + return refineAndOutputMovesImpl(hypergraph, refinement_nodes, moves_by_part, + best_metrics, time_limit); } - bool refineAndOutputMovesLinear(mt_kahypar_partitioned_hypergraph_t& hypergraph, - const parallel::scalable_vector& refinement_nodes, - parallel::scalable_vector& moves, - Metrics& best_metrics, - const double time_limit) { - return refineAndOutputMovesLinearImpl(hypergraph, refinement_nodes, moves, best_metrics, time_limit); + bool refineAndOutputMovesLinear( + mt_kahypar_partitioned_hypergraph_t &hypergraph, + const parallel::scalable_vector &refinement_nodes, + parallel::scalable_vector &moves, Metrics &best_metrics, + const double time_limit) + { + return refineAndOutputMovesLinearImpl(hypergraph, refinement_nodes, moves, + best_metrics, time_limit); } - protected: +protected: IRebalancer() = default; - private: - virtual bool refineAndOutputMovesImpl(mt_kahypar_partitioned_hypergraph_t& hypergraph, - const parallel::scalable_vector& refinement_nodes, - parallel::scalable_vector>& moves_by_part, - Metrics& best_metrics, - const double time_limit) = 0; +private: + virtual bool refineAndOutputMovesImpl( + mt_kahypar_partitioned_hypergraph_t &hypergraph, + const parallel::scalable_vector &refinement_nodes, + parallel::scalable_vector > &moves_by_part, + Metrics &best_metrics, const double time_limit) = 0; - virtual bool refineAndOutputMovesLinearImpl(mt_kahypar_partitioned_hypergraph_t& hypergraph, - const parallel::scalable_vector& refinement_nodes, - parallel::scalable_vector& moves, - Metrics& best_metrics, - const double time_limit) = 0; + virtual bool refineAndOutputMovesLinearImpl( + mt_kahypar_partitioned_hypergraph_t &hypergraph, + const parallel::scalable_vector &refinement_nodes, + parallel::scalable_vector &moves, Metrics &best_metrics, + const double time_limit) = 0; }; -} // namespace mt_kahypar +} // namespace mt_kahypar diff --git a/mt-kahypar/partition/refinement/i_refiner.h b/mt-kahypar/partition/refinement/i_refiner.h index ba50b082c..04f52e5d6 100644 --- a/mt-kahypar/partition/refinement/i_refiner.h +++ b/mt-kahypar/partition/refinement/i_refiner.h @@ -36,37 +36,38 @@ namespace mt_kahypar { -class IRefiner { +class IRefiner +{ - public: - IRefiner(const IRefiner&) = delete; - IRefiner(IRefiner&&) = delete; - IRefiner & operator= (const IRefiner &) = delete; - IRefiner & operator= (IRefiner &&) = delete; +public: + IRefiner(const IRefiner &) = delete; + IRefiner(IRefiner &&) = delete; + IRefiner &operator=(const IRefiner &) = delete; + IRefiner &operator=(IRefiner &&) = delete; virtual ~IRefiner() = default; - void initialize(mt_kahypar_partitioned_hypergraph_t& hypergraph) { + void initialize(mt_kahypar_partitioned_hypergraph_t &hypergraph) + { initializeImpl(hypergraph); } - bool refine(mt_kahypar_partitioned_hypergraph_t& hypergraph, - const parallel::scalable_vector& refinement_nodes, - Metrics& best_metrics, - const double time_limit) { + bool refine(mt_kahypar_partitioned_hypergraph_t &hypergraph, + const parallel::scalable_vector &refinement_nodes, + Metrics &best_metrics, const double time_limit) + { return refineImpl(hypergraph, refinement_nodes, best_metrics, time_limit); } - protected: +protected: IRefiner() = default; - private: - virtual void initializeImpl(mt_kahypar_partitioned_hypergraph_t& hypergraph) = 0; +private: + virtual void initializeImpl(mt_kahypar_partitioned_hypergraph_t &hypergraph) = 0; - virtual bool refineImpl(mt_kahypar_partitioned_hypergraph_t& hypergraph, - const parallel::scalable_vector& refinement_nodes, - Metrics& best_metrics, - const double time_limit) = 0; + virtual bool refineImpl(mt_kahypar_partitioned_hypergraph_t &hypergraph, + const parallel::scalable_vector &refinement_nodes, + Metrics &best_metrics, const double time_limit) = 0; }; -} // namespace mt_kahypar +} // namespace mt_kahypar diff --git a/mt-kahypar/partition/refinement/label_propagation/label_propagation_refiner.cpp b/mt-kahypar/partition/refinement/label_propagation/label_propagation_refiner.cpp index ec32e79d7..ff5e7d9a6 100644 --- a/mt-kahypar/partition/refinement/label_propagation/label_propagation_refiner.cpp +++ b/mt-kahypar/partition/refinement/label_propagation/label_propagation_refiner.cpp @@ -32,358 +32,448 @@ #include "mt-kahypar/definitions.h" #include "mt-kahypar/partition/metrics.h" #include "mt-kahypar/partition/refinement/gains/gain_definitions.h" +#include "mt-kahypar/utils/cast.h" #include "mt-kahypar/utils/randomize.h" -#include "mt-kahypar/utils/utilities.h" #include "mt-kahypar/utils/timer.h" -#include "mt-kahypar/utils/cast.h" +#include "mt-kahypar/utils/utilities.h" namespace mt_kahypar { - template - template - bool LabelPropagationRefiner::moveVertex(PartitionedHypergraph& hypergraph, - const HypernodeID hn, - NextActiveNodes& next_active_nodes, - const F& objective_delta) { - bool is_moved = false; - ASSERT(hn != kInvalidHypernode); - if ( hypergraph.isBorderNode(hn) && !hypergraph.isFixed(hn) ) { - ASSERT(hypergraph.nodeIsEnabled(hn)); - - Move best_move = _gain.computeMaxGainMove(hypergraph, hn, false, false, unconstrained); - // We perform a move if it either improves the solution quality or, in case of a - // zero gain move, the balance of the solution. - const bool positive_gain = best_move.gain < 0; - const bool zero_gain_move = (_context.refinement.label_propagation.rebalancing && - best_move.gain == 0 && - hypergraph.partWeight(best_move.from) - 1 > - hypergraph.partWeight(best_move.to) + 1 && - hypergraph.partWeight(best_move.to) < - _context.partition.perfect_balance_part_weights[best_move.to]); - const bool perform_move = positive_gain || zero_gain_move; - if (best_move.from != best_move.to && perform_move) { - PartitionID from = best_move.from; - PartitionID to = best_move.to; - - Gain delta_before = _gain.localDelta(); - bool changed_part = changeNodePart(hypergraph, hn, from, to, objective_delta); - ASSERT(!unconstrained || changed_part); - is_moved = true; - if (unconstrained || changed_part) { - // In case the move to block 'to' was successful, we verify that the "real" gain - // of the move is either equal to our computed gain or if not, still improves - // the solution quality. - Gain move_delta = _gain.localDelta() - delta_before; - bool accept_move = (move_delta == best_move.gain || move_delta <= 0); - if (accept_move) { - if constexpr (!unconstrained) { - // in unconstrained case, we don't want to activate neighbors if the move is undone - // by the rebalancing - activateNodeAndNeighbors(hypergraph, next_active_nodes, hn, true); - } - } else { - // If the real gain is not equal with the computed gain and - // worsens the solution quality we revert the move. - ASSERT(hypergraph.partID(hn) == to); - changeNodePart(hypergraph, hn, to, from, objective_delta); +template +template +bool LabelPropagationRefiner::moveVertex( + PartitionedHypergraph &hypergraph, const HypernodeID hn, + NextActiveNodes &next_active_nodes, const F &objective_delta) +{ + bool is_moved = false; + ASSERT(hn != kInvalidHypernode); + if(hypergraph.isBorderNode(hn) && !hypergraph.isFixed(hn)) + { + ASSERT(hypergraph.nodeIsEnabled(hn)); + + Move best_move = + _gain.computeMaxGainMove(hypergraph, hn, false, false, unconstrained); + // We perform a move if it either improves the solution quality or, in case of a + // zero gain move, the balance of the solution. + const bool positive_gain = best_move.gain < 0; + const bool zero_gain_move = + (_context.refinement.label_propagation.rebalancing && best_move.gain == 0 && + hypergraph.partWeight(best_move.from) - 1 > + hypergraph.partWeight(best_move.to) + 1 && + hypergraph.partWeight(best_move.to) < + _context.partition.perfect_balance_part_weights[best_move.to]); + const bool perform_move = positive_gain || zero_gain_move; + if(best_move.from != best_move.to && perform_move) + { + PartitionID from = best_move.from; + PartitionID to = best_move.to; + + Gain delta_before = _gain.localDelta(); + bool changed_part = + changeNodePart(hypergraph, hn, from, to, objective_delta); + ASSERT(!unconstrained || changed_part); + is_moved = true; + if(unconstrained || changed_part) + { + // In case the move to block 'to' was successful, we verify that the "real" gain + // of the move is either equal to our computed gain or if not, still improves + // the solution quality. + Gain move_delta = _gain.localDelta() - delta_before; + bool accept_move = (move_delta == best_move.gain || move_delta <= 0); + if(accept_move) + { + if constexpr(!unconstrained) + { + // in unconstrained case, we don't want to activate neighbors if the move is + // undone by the rebalancing + activateNodeAndNeighbors(hypergraph, next_active_nodes, hn, true); } } + else + { + // If the real gain is not equal with the computed gain and + // worsens the solution quality we revert the move. + ASSERT(hypergraph.partID(hn) == to); + changeNodePart(hypergraph, hn, to, from, objective_delta); + } } } - - return is_moved; } - template - bool LabelPropagationRefiner::refineImpl(mt_kahypar_partitioned_hypergraph_t& phg, - const vec& refinement_nodes, - Metrics& best_metrics, - const double) { - PartitionedHypergraph& hypergraph = utils::cast(phg); - resizeDataStructuresForCurrentK(); - _gain.reset(); - _next_active.reset(); - Gain old_quality = best_metrics.quality; - - // Initialize set of active vertices - initializeActiveNodes(hypergraph, refinement_nodes); - - // Perform Label Propagation - labelPropagation(hypergraph, best_metrics); - - HEAVY_REFINEMENT_ASSERT(hypergraph.checkTrackedPartitionInformation(_gain_cache)); - HEAVY_REFINEMENT_ASSERT(best_metrics.quality == - metrics::quality(hypergraph, _context, - !_context.refinement.label_propagation.execute_sequential), - V(best_metrics.quality) << V(metrics::quality(hypergraph, _context, - !_context.refinement.label_propagation.execute_sequential))); - - // Update metrics statistics - Gain delta = old_quality - best_metrics.quality; - ASSERT(delta >= 0, "LP refiner worsen solution quality"); - utils::Utilities::instance().getStats(_context.utility_id).update_stat("lp_improvement", delta); - return delta > 0; + return is_moved; +} + +template +bool LabelPropagationRefiner::refineImpl( + mt_kahypar_partitioned_hypergraph_t &phg, const vec &refinement_nodes, + Metrics &best_metrics, const double) +{ + PartitionedHypergraph &hypergraph = utils::cast(phg); + resizeDataStructuresForCurrentK(); + _gain.reset(); + _next_active.reset(); + Gain old_quality = best_metrics.quality; + + // Initialize set of active vertices + initializeActiveNodes(hypergraph, refinement_nodes); + + // Perform Label Propagation + labelPropagation(hypergraph, best_metrics); + + HEAVY_REFINEMENT_ASSERT(hypergraph.checkTrackedPartitionInformation(_gain_cache)); + HEAVY_REFINEMENT_ASSERT( + best_metrics.quality == + metrics::quality(hypergraph, _context, + !_context.refinement.label_propagation.execute_sequential), + V(best_metrics.quality) << V( + metrics::quality(hypergraph, _context, + !_context.refinement.label_propagation.execute_sequential))); + + // Update metrics statistics + Gain delta = old_quality - best_metrics.quality; + ASSERT(delta >= 0, "LP refiner worsen solution quality"); + utils::Utilities::instance() + .getStats(_context.utility_id) + .update_stat("lp_improvement", delta); + return delta > 0; +} + +template +void LabelPropagationRefiner::labelPropagation( + PartitionedHypergraph &hypergraph, Metrics &best_metrics) +{ + NextActiveNodes next_active_nodes; + vec rebalance_moves; + bool should_stop = false; + for(size_t i = 0; i < _context.refinement.label_propagation.maximum_iterations && + !should_stop && !_active_nodes.empty(); + ++i) + { + should_stop = labelPropagationRound( + hypergraph, next_active_nodes, best_metrics, rebalance_moves, + _context.refinement.label_propagation.unconstrained); + + if(_context.refinement.label_propagation.execute_sequential) + { + _active_nodes = next_active_nodes.copy_sequential(); + } + else + { + _active_nodes = next_active_nodes.copy_parallel(); + } + next_active_nodes.clear_sequential(); } +} +template +bool LabelPropagationRefiner::labelPropagationRound( + PartitionedHypergraph &hypergraph, NextActiveNodes &next_active_nodes, + Metrics &best_metrics, vec &rebalance_moves, bool unconstrained_lp) +{ + Metrics current_metrics = best_metrics; + _visited_he.reset(); + _next_active.reset(); + _gain.reset(); + + if(unconstrained_lp) + { + _old_partition_is_balanced = metrics::isBalanced(hypergraph, _context); + moveActiveNodes(hypergraph, next_active_nodes); + } + else + { + moveActiveNodes(hypergraph, next_active_nodes); + } - template - void LabelPropagationRefiner::labelPropagation(PartitionedHypergraph& hypergraph, - Metrics& best_metrics) { - NextActiveNodes next_active_nodes; - vec rebalance_moves; - bool should_stop = false; - for (size_t i = 0; i < _context.refinement.label_propagation.maximum_iterations - && !should_stop && !_active_nodes.empty(); ++i) { - should_stop = labelPropagationRound(hypergraph, next_active_nodes, best_metrics, rebalance_moves, - _context.refinement.label_propagation.unconstrained); - - if ( _context.refinement.label_propagation.execute_sequential ) { - _active_nodes = next_active_nodes.copy_sequential(); - } else { - _active_nodes = next_active_nodes.copy_parallel(); + current_metrics.imbalance = metrics::imbalance(hypergraph, _context); + current_metrics.quality += _gain.delta(); + + bool should_update_gain_cache = + GainCache::invalidates_entries && _gain_cache.isInitialized(); + if(should_update_gain_cache) + { + forEachMovedNode([&](size_t j) { + _gain_cache.recomputeInvalidTerms(hypergraph, _active_nodes[j]); + if(!unconstrained_lp) + { + _active_node_was_moved[j] = uint8_t(false); } - next_active_nodes.clear_sequential(); - } + }); } - template - bool LabelPropagationRefiner::labelPropagationRound(PartitionedHypergraph& hypergraph, - NextActiveNodes& next_active_nodes, - Metrics& best_metrics, - vec& rebalance_moves, - bool unconstrained_lp) { - Metrics current_metrics = best_metrics; - _visited_he.reset(); - _next_active.reset(); - _gain.reset(); - - if (unconstrained_lp) { - _old_partition_is_balanced = metrics::isBalanced(hypergraph, _context); - moveActiveNodes(hypergraph, next_active_nodes); - } else { - moveActiveNodes(hypergraph, next_active_nodes); + bool should_stop = false; + if(unconstrained_lp) + { + if(!metrics::isBalanced(hypergraph, _context)) + { + should_stop = + applyRebalancing(hypergraph, best_metrics, current_metrics, rebalance_moves); + // rebalancer might initialize the gain cache + should_update_gain_cache = + GainCache::invalidates_entries && _gain_cache.isInitialized(); + } + else + { + should_update_gain_cache = false; } - current_metrics.imbalance = metrics::imbalance(hypergraph, _context); - current_metrics.quality += _gain.delta(); - - bool should_update_gain_cache = GainCache::invalidates_entries && _gain_cache.isInitialized(); - if ( should_update_gain_cache ) { + // store current part of each node (required for rollback) + if(!should_stop) + { forEachMovedNode([&](size_t j) { - _gain_cache.recomputeInvalidTerms(hypergraph, _active_nodes[j]); - if (!unconstrained_lp) { _active_node_was_moved[j] = uint8_t(false); } + _old_part[_active_nodes[j]] = hypergraph.partID(_active_nodes[j]); }); } - - bool should_stop = false; - if ( unconstrained_lp ) { - if (!metrics::isBalanced(hypergraph, _context)) { - should_stop = applyRebalancing(hypergraph, best_metrics, current_metrics, rebalance_moves); - // rebalancer might initialize the gain cache - should_update_gain_cache = GainCache::invalidates_entries && _gain_cache.isInitialized(); - } else { - should_update_gain_cache = false; + // collect activated nodes, update gain cache and reset flags + forEachMovedNode([&](size_t j) { + if(!should_stop) + { + activateNodeAndNeighbors(hypergraph, next_active_nodes, _active_nodes[j], false); } - - // store current part of each node (required for rollback) - if ( !should_stop ) { - forEachMovedNode([&](size_t j) { - _old_part[_active_nodes[j]] = hypergraph.partID(_active_nodes[j]); - }); + if(should_update_gain_cache) + { + _gain_cache.recomputeInvalidTerms(hypergraph, _active_nodes[j]); } - // collect activated nodes, update gain cache and reset flags - forEachMovedNode([&](size_t j) { - if (!should_stop) { - activateNodeAndNeighbors(hypergraph, next_active_nodes, _active_nodes[j], false); - } - if (should_update_gain_cache) { - _gain_cache.recomputeInvalidTerms(hypergraph, _active_nodes[j]); - } - _active_node_was_moved[j] = uint8_t(false); - }); - } + _active_node_was_moved[j] = uint8_t(false); + }); + } - ASSERT(current_metrics.quality <= best_metrics.quality); - const Gain old_quality = best_metrics.quality; - best_metrics = current_metrics; + ASSERT(current_metrics.quality <= best_metrics.quality); + const Gain old_quality = best_metrics.quality; + best_metrics = current_metrics; - HEAVY_REFINEMENT_ASSERT(hypergraph.checkTrackedPartitionInformation(_gain_cache)); - return should_stop || old_quality - current_metrics.quality < - _context.refinement.label_propagation.relative_improvement_threshold * old_quality; - } + HEAVY_REFINEMENT_ASSERT(hypergraph.checkTrackedPartitionInformation(_gain_cache)); + return should_stop || + old_quality - current_metrics.quality < + _context.refinement.label_propagation.relative_improvement_threshold * + old_quality; +} - template - template - void LabelPropagationRefiner::moveActiveNodes(PartitionedHypergraph& phg, - NextActiveNodes& next_active_nodes) { - // This function is passed as lambda to the changeNodePart function and used - // to calculate the "real" delta of a move (in terms of the used objective function). - auto objective_delta = [&](const SynchronizedEdgeUpdate& sync_update) { - _gain.computeDeltaForHyperedge(sync_update); - }; - const bool should_update_gain_cache = GainCache::invalidates_entries && _gain_cache.isInitialized(); - const bool should_mark_nodes = unconstrained || should_update_gain_cache; - - if ( _context.refinement.label_propagation.execute_sequential ) { - utils::Randomize::instance().shuffleVector( - _active_nodes, UL(0), _active_nodes.size(), THREAD_ID); - - for ( size_t j = 0; j < _active_nodes.size(); ++j ) { - const HypernodeID hn = _active_nodes[j]; - if ( moveVertex(phg, hn, next_active_nodes, objective_delta) ) { - if (should_mark_nodes) { _active_node_was_moved[j] = uint8_t(true); } +template +template +void LabelPropagationRefiner::moveActiveNodes( + PartitionedHypergraph &phg, NextActiveNodes &next_active_nodes) +{ + // This function is passed as lambda to the changeNodePart function and used + // to calculate the "real" delta of a move (in terms of the used objective function). + auto objective_delta = [&](const SynchronizedEdgeUpdate &sync_update) { + _gain.computeDeltaForHyperedge(sync_update); + }; + const bool should_update_gain_cache = + GainCache::invalidates_entries && _gain_cache.isInitialized(); + const bool should_mark_nodes = unconstrained || should_update_gain_cache; + + if(_context.refinement.label_propagation.execute_sequential) + { + utils::Randomize::instance().shuffleVector(_active_nodes, UL(0), _active_nodes.size(), + THREAD_ID); + + for(size_t j = 0; j < _active_nodes.size(); ++j) + { + const HypernodeID hn = _active_nodes[j]; + if(moveVertex(phg, hn, next_active_nodes, objective_delta)) + { + if(should_mark_nodes) + { + _active_node_was_moved[j] = uint8_t(true); } } - } else { - utils::Randomize::instance().parallelShuffleVector( - _active_nodes, UL(0), _active_nodes.size()); - - tbb::parallel_for(UL(0), _active_nodes.size(), [&](const size_t& j) { - const HypernodeID hn = _active_nodes[j]; - if ( moveVertex(phg, hn, next_active_nodes, objective_delta) ) { - if (should_mark_nodes) { _active_node_was_moved[j] = uint8_t(true); } - } - }); } } - - - template - bool LabelPropagationRefiner::applyRebalancing(PartitionedHypergraph& hypergraph, - Metrics& best_metrics, - Metrics& current_metrics, - vec& rebalance_moves) { - utils::Timer& timer = utils::Utilities::instance().getTimer(_context.utility_id); - timer.start_timer("rebalance_lp", "Rebalance"); - mt_kahypar_partitioned_hypergraph_t phg = utils::partitioned_hg_cast(hypergraph); - _rebalancer.refineAndOutputMovesLinear(phg, {}, rebalance_moves, current_metrics, 0.0); - - // append to active nodes so they are included for gain cache updates and rollback - _active_nodes.reserve(_active_nodes.size() + rebalance_moves.size()); - for (const Move& m: rebalance_moves) { - bool old_part_unintialized = _might_be_uninitialized && !_old_part_is_initialized[m.node]; - if (old_part_unintialized || m.from == _old_part[m.node]) { - size_t i = _active_nodes.size(); - _active_nodes.push_back(m.node); - _active_node_was_moved[i] = uint8_t(true); - if (old_part_unintialized) { - _old_part[m.node] = m.from; - _old_part_is_initialized.set(m.node, true); + else + { + utils::Randomize::instance().parallelShuffleVector(_active_nodes, UL(0), + _active_nodes.size()); + + tbb::parallel_for(UL(0), _active_nodes.size(), [&](const size_t &j) { + const HypernodeID hn = _active_nodes[j]; + if(moveVertex(phg, hn, next_active_nodes, objective_delta)) + { + if(should_mark_nodes) + { + _active_node_was_moved[j] = uint8_t(true); } } - } - timer.stop_timer("rebalance_lp"); - DBG << "[LP] Imbalance after rebalancing: " << current_metrics.imbalance << ", quality: " << current_metrics.quality; - - bool was_imbalanced_and_improved_balance = !_old_partition_is_balanced - && current_metrics.imbalance < best_metrics.imbalance; - // We consider the new partition an improvement if either - // (1) the old partiton was imbalanced and balance is improved or - // (2) the quality is improved while still being balanced - if ( was_imbalanced_and_improved_balance - || (current_metrics.quality <= best_metrics.quality && metrics::isBalanced(hypergraph, _context)) ) { - return false; - } else { - // rollback and stop LP - auto noop_obj_fn = [](const SynchronizedEdgeUpdate&) { }; - current_metrics = best_metrics; + }); + } +} - forEachMovedNode([&](size_t j) { - const HypernodeID hn = _active_nodes[j]; - ASSERT(!_might_be_uninitialized || _old_part_is_initialized[hn]); - if (hypergraph.partID(hn) != _old_part[hn]) { - changeNodePart(hypergraph, hn, hypergraph.partID(hn), _old_part[hn], noop_obj_fn); - } - }); - return true; +template +bool LabelPropagationRefiner::applyRebalancing( + PartitionedHypergraph &hypergraph, Metrics &best_metrics, Metrics ¤t_metrics, + vec &rebalance_moves) +{ + utils::Timer &timer = utils::Utilities::instance().getTimer(_context.utility_id); + timer.start_timer("rebalance_lp", "Rebalance"); + mt_kahypar_partitioned_hypergraph_t phg = utils::partitioned_hg_cast(hypergraph); + _rebalancer.refineAndOutputMovesLinear(phg, {}, rebalance_moves, current_metrics, 0.0); + + // append to active nodes so they are included for gain cache updates and rollback + _active_nodes.reserve(_active_nodes.size() + rebalance_moves.size()); + for(const Move &m : rebalance_moves) + { + bool old_part_unintialized = + _might_be_uninitialized && !_old_part_is_initialized[m.node]; + if(old_part_unintialized || m.from == _old_part[m.node]) + { + size_t i = _active_nodes.size(); + _active_nodes.push_back(m.node); + _active_node_was_moved[i] = uint8_t(true); + if(old_part_unintialized) + { + _old_part[m.node] = m.from; + _old_part_is_initialized.set(m.node, true); + } } } + timer.stop_timer("rebalance_lp"); + DBG << "[LP] Imbalance after rebalancing: " << current_metrics.imbalance + << ", quality: " << current_metrics.quality; + + bool was_imbalanced_and_improved_balance = + !_old_partition_is_balanced && current_metrics.imbalance < best_metrics.imbalance; + // We consider the new partition an improvement if either + // (1) the old partiton was imbalanced and balance is improved or + // (2) the quality is improved while still being balanced + if(was_imbalanced_and_improved_balance || + (current_metrics.quality <= best_metrics.quality && + metrics::isBalanced(hypergraph, _context))) + { + return false; + } + else + { + // rollback and stop LP + auto noop_obj_fn = [](const SynchronizedEdgeUpdate &) {}; + current_metrics = best_metrics; + + forEachMovedNode([&](size_t j) { + const HypernodeID hn = _active_nodes[j]; + ASSERT(!_might_be_uninitialized || _old_part_is_initialized[hn]); + if(hypergraph.partID(hn) != _old_part[hn]) + { + changeNodePart(hypergraph, hn, hypergraph.partID(hn), _old_part[hn], + noop_obj_fn); + } + }); + return true; + } +} - template - template - void LabelPropagationRefiner::forEachMovedNode(F node_fn) { - if ( _context.refinement.label_propagation.execute_sequential ) { - for (size_t j = 0; j < _active_nodes.size(); j++) { - if (_active_node_was_moved[j]) { - node_fn(j); - } +template +template +void LabelPropagationRefiner::forEachMovedNode(F node_fn) +{ + if(_context.refinement.label_propagation.execute_sequential) + { + for(size_t j = 0; j < _active_nodes.size(); j++) + { + if(_active_node_was_moved[j]) + { + node_fn(j); } - } else { - tbb::parallel_for(UL(0), _active_nodes.size(), [&](const size_t j) { - if (_active_node_was_moved[j]) { - node_fn(j); - } - }); } } - - template - void LabelPropagationRefiner::initializeImpl(mt_kahypar_partitioned_hypergraph_t& phg) { - unused(phg); + else + { + tbb::parallel_for(UL(0), _active_nodes.size(), [&](const size_t j) { + if(_active_node_was_moved[j]) + { + node_fn(j); + } + }); } +} - template - void LabelPropagationRefiner::initializeActiveNodes(PartitionedHypergraph& hypergraph, - const vec& refinement_nodes) { - _active_nodes.clear(); - if ( refinement_nodes.empty() ) { - _might_be_uninitialized = false; - if ( _context.refinement.label_propagation.execute_sequential ) { - for ( const HypernodeID hn : hypergraph.nodes() ) { - if ( _context.refinement.label_propagation.rebalancing || hypergraph.isBorderNode(hn) ) { - _active_nodes.push_back(hn); - } - if ( _context.refinement.label_propagation.unconstrained ) { - _old_part[hn] = hypergraph.partID(hn); - } - } - } else { - // Setup active nodes in parallel - // A node is active, if it is a border vertex. - NextActiveNodes tmp_active_nodes; - hypergraph.doParallelForAllNodes([&](const HypernodeID& hn) { - if ( _context.refinement.label_propagation.rebalancing || hypergraph.isBorderNode(hn) ) { - if ( _next_active.compare_and_set_to_true(hn) ) { - tmp_active_nodes.stream(hn); - } - } - if ( _context.refinement.label_propagation.unconstrained ) { - _old_part[hn] = hypergraph.partID(hn); - } - }); - - _active_nodes = tmp_active_nodes.copy_parallel(); - } - } else { - _active_nodes = refinement_nodes; +template +void LabelPropagationRefiner::initializeImpl( + mt_kahypar_partitioned_hypergraph_t &phg) +{ + unused(phg); +} - if ( _context.refinement.label_propagation.unconstrained ) { - auto set_old_part = [&](const size_t& i) { - const HypernodeID hn = refinement_nodes[i]; +template +void LabelPropagationRefiner::initializeActiveNodes( + PartitionedHypergraph &hypergraph, const vec &refinement_nodes) +{ + _active_nodes.clear(); + if(refinement_nodes.empty()) + { + _might_be_uninitialized = false; + if(_context.refinement.label_propagation.execute_sequential) + { + for(const HypernodeID hn : hypergraph.nodes()) + { + if(_context.refinement.label_propagation.rebalancing || + hypergraph.isBorderNode(hn)) + { + _active_nodes.push_back(hn); + } + if(_context.refinement.label_propagation.unconstrained) + { _old_part[hn] = hypergraph.partID(hn); - _old_part_is_initialized.set(hn, true); - }; - - // we don't want to scan the whole graph for localized LP - _might_be_uninitialized = true; - _old_part_is_initialized.reset(); - if ( _context.refinement.label_propagation.execute_sequential ) { - for (size_t i = 0; i < refinement_nodes.size(); ++i) { - set_old_part(i); - } - } else { - tbb::parallel_for(UL(0), refinement_nodes.size(), set_old_part); } } } + else + { + // Setup active nodes in parallel + // A node is active, if it is a border vertex. + NextActiveNodes tmp_active_nodes; + hypergraph.doParallelForAllNodes([&](const HypernodeID &hn) { + if(_context.refinement.label_propagation.rebalancing || + hypergraph.isBorderNode(hn)) + { + if(_next_active.compare_and_set_to_true(hn)) + { + tmp_active_nodes.stream(hn); + } + } + if(_context.refinement.label_propagation.unconstrained) + { + _old_part[hn] = hypergraph.partID(hn); + } + }); - _next_active.reset(); + _active_nodes = tmp_active_nodes.copy_parallel(); + } } - - namespace { - #define LABEL_PROPAGATION_REFINER(X) LabelPropagationRefiner + else + { + _active_nodes = refinement_nodes; + + if(_context.refinement.label_propagation.unconstrained) + { + auto set_old_part = [&](const size_t &i) { + const HypernodeID hn = refinement_nodes[i]; + _old_part[hn] = hypergraph.partID(hn); + _old_part_is_initialized.set(hn, true); + }; + + // we don't want to scan the whole graph for localized LP + _might_be_uninitialized = true; + _old_part_is_initialized.reset(); + if(_context.refinement.label_propagation.execute_sequential) + { + for(size_t i = 0; i < refinement_nodes.size(); ++i) + { + set_old_part(i); + } + } + else + { + tbb::parallel_for(UL(0), refinement_nodes.size(), set_old_part); + } + } } - // explicitly instantiate so the compiler can generate them when compiling this cpp file - INSTANTIATE_CLASS_WITH_VALID_TRAITS(LABEL_PROPAGATION_REFINER) + _next_active.reset(); +} + +namespace { +#define LABEL_PROPAGATION_REFINER(X) LabelPropagationRefiner +} + +// explicitly instantiate so the compiler can generate them when compiling this cpp file +INSTANTIATE_CLASS_WITH_VALID_TRAITS(LABEL_PROPAGATION_REFINER) } diff --git a/mt-kahypar/partition/refinement/label_propagation/label_propagation_refiner.h b/mt-kahypar/partition/refinement/label_propagation/label_propagation_refiner.h index ab4657d54..ae3c86227 100644 --- a/mt-kahypar/partition/refinement/label_propagation/label_propagation_refiner.h +++ b/mt-kahypar/partition/refinement/label_propagation/label_propagation_refiner.h @@ -33,16 +33,16 @@ #include "mt-kahypar/datastructures/thread_safe_fast_reset_flag_array.h" #include "mt-kahypar/parallel/stl/scalable_vector.h" #include "mt-kahypar/partition/context.h" -#include "mt-kahypar/partition/refinement/i_refiner.h" -#include "mt-kahypar/partition/refinement/i_rebalancer.h" #include "mt-kahypar/partition/refinement/gains/gain_cache_ptr.h" +#include "mt-kahypar/partition/refinement/i_rebalancer.h" +#include "mt-kahypar/partition/refinement/i_refiner.h" #include "mt-kahypar/utils/cast.h" - namespace mt_kahypar { template -class LabelPropagationRefiner final : public IRefiner { - private: +class LabelPropagationRefiner final : public IRefiner +{ +private: using Hypergraph = typename GraphAndGainTypes::Hypergraph; using PartitionedHypergraph = typename GraphAndGainTypes::PartitionedHypergraph; using GainCache = typename GraphAndGainTypes::GainCache; @@ -53,104 +53,107 @@ class LabelPropagationRefiner final : public IRefiner { static constexpr bool debug = false; static constexpr bool enable_heavy_assert = false; - public: +public: explicit LabelPropagationRefiner(const HypernodeID num_hypernodes, const HyperedgeID num_hyperedges, - const Context& context, - GainCache& gain_cache, - IRebalancer& rb) : - _might_be_uninitialized(false), - _old_partition_is_balanced(true), - _context(context), - _gain_cache(gain_cache), - _current_k(context.partition.k), - _current_num_nodes(kInvalidHypernode), - _current_num_edges(kInvalidHyperedge), - _gain(context), - _active_nodes(), - _active_node_was_moved(2 * num_hypernodes, uint8_t(false)), - _old_part(_context.refinement.label_propagation.unconstrained ? num_hypernodes : 0, kInvalidPartition), - _old_part_is_initialized(_context.refinement.label_propagation.unconstrained ? num_hypernodes : 0), - _next_active(num_hypernodes), - _visited_he(Hypergraph::is_graph ? 0 : num_hyperedges), - _rebalancer(rb) { } + const Context &context, GainCache &gain_cache, + IRebalancer &rb) : + _might_be_uninitialized(false), + _old_partition_is_balanced(true), _context(context), _gain_cache(gain_cache), + _current_k(context.partition.k), _current_num_nodes(kInvalidHypernode), + _current_num_edges(kInvalidHyperedge), _gain(context), _active_nodes(), + _active_node_was_moved(2 * num_hypernodes, uint8_t(false)), + _old_part(_context.refinement.label_propagation.unconstrained ? num_hypernodes : 0, + kInvalidPartition), + _old_part_is_initialized( + _context.refinement.label_propagation.unconstrained ? num_hypernodes : 0), + _next_active(num_hypernodes), + _visited_he(Hypergraph::is_graph ? 0 : num_hyperedges), _rebalancer(rb) + { + } explicit LabelPropagationRefiner(const HypernodeID num_hypernodes, const HyperedgeID num_hyperedges, - const Context& context, - gain_cache_t gain_cache, - IRebalancer& rb) : - LabelPropagationRefiner(num_hypernodes, num_hyperedges, context, - GainCachePtr::cast(gain_cache), rb) { } + const Context &context, gain_cache_t gain_cache, + IRebalancer &rb) : + LabelPropagationRefiner(num_hypernodes, num_hyperedges, context, + GainCachePtr::cast(gain_cache), rb) + { + } - LabelPropagationRefiner(const LabelPropagationRefiner&) = delete; - LabelPropagationRefiner(LabelPropagationRefiner&&) = delete; + LabelPropagationRefiner(const LabelPropagationRefiner &) = delete; + LabelPropagationRefiner(LabelPropagationRefiner &&) = delete; - LabelPropagationRefiner & operator= (const LabelPropagationRefiner &) = delete; - LabelPropagationRefiner & operator= (LabelPropagationRefiner &&) = delete; + LabelPropagationRefiner &operator=(const LabelPropagationRefiner &) = delete; + LabelPropagationRefiner &operator=(LabelPropagationRefiner &&) = delete; - private: - bool refineImpl(mt_kahypar_partitioned_hypergraph_t& hypergraph, - const parallel::scalable_vector& refinement_nodes, - Metrics& best_metrics, - double) final ; +private: + bool refineImpl(mt_kahypar_partitioned_hypergraph_t &hypergraph, + const parallel::scalable_vector &refinement_nodes, + Metrics &best_metrics, double) final; - void labelPropagation(PartitionedHypergraph& phg, Metrics& best_metrics); + void labelPropagation(PartitionedHypergraph &phg, Metrics &best_metrics); - bool labelPropagationRound(PartitionedHypergraph& hypergraph, - NextActiveNodes& next_active_nodes, - Metrics& best_metrics, - vec& rebalance_moves, - bool unconstrained_lp); + bool labelPropagationRound(PartitionedHypergraph &hypergraph, + NextActiveNodes &next_active_nodes, Metrics &best_metrics, + vec &rebalance_moves, bool unconstrained_lp); - template - void moveActiveNodes(PartitionedHypergraph& hypergraph, NextActiveNodes& next_active_nodes); + template + void moveActiveNodes(PartitionedHypergraph &hypergraph, + NextActiveNodes &next_active_nodes); - bool applyRebalancing(PartitionedHypergraph& hypergraph, - Metrics& best_metrics, - Metrics& current_metrics, - vec& rebalance_moves); + bool applyRebalancing(PartitionedHypergraph &hypergraph, Metrics &best_metrics, + Metrics ¤t_metrics, vec &rebalance_moves); - template + template void forEachMovedNode(F node_fn); - template - bool moveVertex(PartitionedHypergraph& hypergraph, - const HypernodeID hn, - NextActiveNodes& next_active_nodes, - const F& objective_delta); - - void initializeActiveNodes(PartitionedHypergraph& hypergraph, - const parallel::scalable_vector& refinement_nodes); - - void initializeImpl(mt_kahypar_partitioned_hypergraph_t&) final; - - template - bool changeNodePart(PartitionedHypergraph& phg, - const HypernodeID hn, - const PartitionID from, - const PartitionID to, - const F& objective_delta) { - HypernodeWeight max_weight = unconstrained ? std::numeric_limits::max() - : _context.partition.max_part_weights[to]; - if ( _gain_cache.isInitialized() ) { - return phg.changeNodePart(_gain_cache, hn, from, to, max_weight, []{}, objective_delta); - } else { - return phg.changeNodePart(hn, from, to, max_weight, []{}, objective_delta); + template + bool moveVertex(PartitionedHypergraph &hypergraph, const HypernodeID hn, + NextActiveNodes &next_active_nodes, const F &objective_delta); + + void + initializeActiveNodes(PartitionedHypergraph &hypergraph, + const parallel::scalable_vector &refinement_nodes); + + void initializeImpl(mt_kahypar_partitioned_hypergraph_t &) final; + + template + bool changeNodePart(PartitionedHypergraph &phg, const HypernodeID hn, + const PartitionID from, const PartitionID to, + const F &objective_delta) + { + HypernodeWeight max_weight = unconstrained ? + std::numeric_limits::max() : + _context.partition.max_part_weights[to]; + if(_gain_cache.isInitialized()) + { + return phg.changeNodePart( + _gain_cache, hn, from, to, max_weight, [] {}, objective_delta); + } + else + { + return phg.changeNodePart( + hn, from, to, max_weight, [] {}, objective_delta); } } MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE - void activateNodeAndNeighbors(PartitionedHypergraph& hypergraph, - NextActiveNodes& next_active_nodes, - const HypernodeID hn, - bool activate_moved) { + void activateNodeAndNeighbors(PartitionedHypergraph &hypergraph, + NextActiveNodes &next_active_nodes, const HypernodeID hn, + bool activate_moved) + { auto activate = [&](const HypernodeID hn) { - bool old_part_unintialized = _might_be_uninitialized && !_old_part_is_initialized[hn]; - if (activate_moved || old_part_unintialized || hypergraph.partID(hn) == _old_part[hn]) { - if ( _next_active.compare_and_set_to_true(hn) ) { + bool old_part_unintialized = + _might_be_uninitialized && !_old_part_is_initialized[hn]; + if(activate_moved || old_part_unintialized || + hypergraph.partID(hn) == _old_part[hn]) + { + if(_next_active.compare_and_set_to_true(hn)) + { next_active_nodes.stream(hn); - if ( old_part_unintialized ) { + if(old_part_unintialized) + { _old_part[hn] = hypergraph.partID(hn); _old_part_is_initialized.set(hn, true); } @@ -159,16 +162,24 @@ class LabelPropagationRefiner final : public IRefiner { }; // Set all neighbors of the vertex to active - if constexpr (Hypergraph::is_graph) { - for (const HyperedgeID& he : hypergraph.incidentEdges(hn)) { + if constexpr(Hypergraph::is_graph) + { + for(const HyperedgeID &he : hypergraph.incidentEdges(hn)) + { activate(hypergraph.edgeTarget(he)); } - } else { - for (const HyperedgeID& he : hypergraph.incidentEdges(hn)) { - if ( hypergraph.edgeSize(he) <= - ID(_context.refinement.label_propagation.hyperedge_size_activation_threshold) ) { - if ( !_visited_he[he] ) { - for (const HypernodeID& pin : hypergraph.pins(he)) { + } + else + { + for(const HyperedgeID &he : hypergraph.incidentEdges(hn)) + { + if(hypergraph.edgeSize(he) <= + ID(_context.refinement.label_propagation.hyperedge_size_activation_threshold)) + { + if(!_visited_he[he]) + { + for(const HypernodeID &pin : hypergraph.pins(he)) + { activate(pin); } _visited_he.set(he, true); @@ -177,19 +188,23 @@ class LabelPropagationRefiner final : public IRefiner { } } - if ( activate_moved && _next_active.compare_and_set_to_true(hn) ) { + if(activate_moved && _next_active.compare_and_set_to_true(hn)) + { ASSERT(!_might_be_uninitialized); next_active_nodes.stream(hn); } } - void resizeDataStructuresForCurrentK() { + void resizeDataStructuresForCurrentK() + { // If the number of blocks changes, we resize data structures // (can happen during deep multilevel partitioning) - if ( _current_k != _context.partition.k ) { + if(_current_k != _context.partition.k) + { _current_k = _context.partition.k; _gain.changeNumberOfBlocks(_current_k); - if ( _gain_cache.isInitialized() ) { + if(_gain_cache.isInitialized()) + { _gain_cache.changeNumberOfBlocks(_current_k); } } @@ -197,8 +212,8 @@ class LabelPropagationRefiner final : public IRefiner { bool _might_be_uninitialized; bool _old_partition_is_balanced; - const Context& _context; - GainCache& _gain_cache; + const Context &_context; + GainCache &_gain_cache; PartitionID _current_k; HypernodeID _current_num_nodes; HyperedgeID _current_num_edges; @@ -209,7 +224,7 @@ class LabelPropagationRefiner final : public IRefiner { kahypar::ds::FastResetFlagArray<> _old_part_is_initialized; ds::ThreadSafeFastResetFlagArray<> _next_active; kahypar::ds::FastResetFlagArray<> _visited_he; - IRebalancer& _rebalancer; + IRebalancer &_rebalancer; }; -} // namespace kahypar +} // namespace kahypar diff --git a/mt-kahypar/partition/refinement/rebalancing/advanced_rebalancer.cpp b/mt-kahypar/partition/refinement/rebalancing/advanced_rebalancer.cpp index 0a04165c0..e59b2ff09 100644 --- a/mt-kahypar/partition/refinement/rebalancing/advanced_rebalancer.cpp +++ b/mt-kahypar/partition/refinement/rebalancing/advanced_rebalancer.cpp @@ -29,9 +29,9 @@ #include #include +#include "mt-kahypar/partition/context.h" #include "mt-kahypar/partition/refinement/gains/gain_definitions.h" #include "mt-kahypar/utils/cast.h" -#include "mt-kahypar/partition/context.h" #include "pcg_random.hpp" @@ -39,501 +39,611 @@ namespace mt_kahypar { namespace impl { - float transformGain(Gain gain_, HypernodeWeight wu) { - float gain = gain_; - if (gain > 0) { - gain *= wu; - } else if (gain < 0) { - gain /= wu; - } - return gain; +float transformGain(Gain gain_, HypernodeWeight wu) +{ + float gain = gain_; + if(gain > 0) + { + gain *= wu; + } + else if(gain < 0) + { + gain /= wu; } + return gain; +} - template - std::pair computeBestTargetBlock( - const PartitionedHypergraph& phg, const Context& context, const GainCache& gain_cache, - HypernodeID u, PartitionID from) { - const HypernodeWeight wu = phg.nodeWeight(u); - const HypernodeWeight from_weight = phg.partWeight(from); - PartitionID to = kInvalidPartition; - HyperedgeWeight to_benefit = std::numeric_limits::min(); - HypernodeWeight best_to_weight = from_weight - wu; - for (PartitionID i = 0; i < context.partition.k; ++i) { - if (i != from) { - const HypernodeWeight to_weight = phg.partWeight(i); - const HyperedgeWeight benefit = gain_cache.benefitTerm(u, i); - if ((benefit > to_benefit || (benefit == to_benefit && to_weight < best_to_weight)) && - to_weight + wu <= context.partition.max_part_weights[i]) { - to_benefit = benefit; - to = i; - best_to_weight = to_weight; - } +template +std::pair +computeBestTargetBlock(const PartitionedHypergraph &phg, const Context &context, + const GainCache &gain_cache, HypernodeID u, PartitionID from) +{ + const HypernodeWeight wu = phg.nodeWeight(u); + const HypernodeWeight from_weight = phg.partWeight(from); + PartitionID to = kInvalidPartition; + HyperedgeWeight to_benefit = std::numeric_limits::min(); + HypernodeWeight best_to_weight = from_weight - wu; + for(PartitionID i = 0; i < context.partition.k; ++i) + { + if(i != from) + { + const HypernodeWeight to_weight = phg.partWeight(i); + const HyperedgeWeight benefit = gain_cache.benefitTerm(u, i); + if((benefit > to_benefit || + (benefit == to_benefit && to_weight < best_to_weight)) && + to_weight + wu <= context.partition.max_part_weights[i]) + { + to_benefit = benefit; + to = i; + best_to_weight = to_weight; } } + } - Gain gain = std::numeric_limits::min(); - if (to != kInvalidPartition) { - gain = to_benefit - gain_cache.penaltyTerm(u, phg.partID(u)); - } - return std::make_pair(to, transformGain(gain, wu)); + Gain gain = std::numeric_limits::min(); + if(to != kInvalidPartition) + { + gain = to_benefit - gain_cache.penaltyTerm(u, phg.partID(u)); } + return std::make_pair(to, transformGain(gain, wu)); +} - template - std::pair bestOfThree( - const PartitionedHypergraph& phg, const Context& context, const GainCache& gain_cache, - HypernodeID u, PartitionID from, std::array parts) { - const HypernodeWeight wu = phg.nodeWeight(u); - const HypernodeWeight from_weight = phg.partWeight(from); - PartitionID to = kInvalidPartition; - HyperedgeWeight to_benefit = std::numeric_limits::min(); - HypernodeWeight best_to_weight = from_weight - wu; - for (PartitionID i : parts) { - if (i != from && i != kInvalidPartition) { - const HypernodeWeight to_weight = phg.partWeight(i); - const HyperedgeWeight benefit = gain_cache.benefitTerm(u, i); - if ((benefit > to_benefit || (benefit == to_benefit && to_weight < best_to_weight)) && - to_weight + wu <= context.partition.max_part_weights[i]) { - to_benefit = benefit; - to = i; - best_to_weight = to_weight; - } +template +std::pair +bestOfThree(const PartitionedHypergraph &phg, const Context &context, + const GainCache &gain_cache, HypernodeID u, PartitionID from, + std::array parts) +{ + const HypernodeWeight wu = phg.nodeWeight(u); + const HypernodeWeight from_weight = phg.partWeight(from); + PartitionID to = kInvalidPartition; + HyperedgeWeight to_benefit = std::numeric_limits::min(); + HypernodeWeight best_to_weight = from_weight - wu; + for(PartitionID i : parts) + { + if(i != from && i != kInvalidPartition) + { + const HypernodeWeight to_weight = phg.partWeight(i); + const HyperedgeWeight benefit = gain_cache.benefitTerm(u, i); + if((benefit > to_benefit || + (benefit == to_benefit && to_weight < best_to_weight)) && + to_weight + wu <= context.partition.max_part_weights[i]) + { + to_benefit = benefit; + to = i; + best_to_weight = to_weight; } } - - if (to != kInvalidPartition) { - Gain gain = to_benefit - gain_cache.penaltyTerm(u, phg.partID(u)); - return std::make_pair(to, transformGain(gain, wu)); - } else { - // edge case: if u does not fit in any of the three considered blocks we need to check all blocks - return computeBestTargetBlock(phg, context, gain_cache, u, from); - } } - struct AccessToken { - AccessToken(int seed, size_t num_pqs) : dist(0, num_pqs - 1) { rng.seed(seed); } - size_t getRandomPQ() { return dist(rng); } + if(to != kInvalidPartition) + { + Gain gain = to_benefit - gain_cache.penaltyTerm(u, phg.partID(u)); + return std::make_pair(to, transformGain(gain, wu)); + } + else + { + // edge case: if u does not fit in any of the three considered blocks we need to check + // all blocks + return computeBestTargetBlock(phg, context, gain_cache, u, from); + } +} - std::array getTwoRandomPQs() { - std::array result({getRandomPQ(), getRandomPQ()}); - while (result[0] == result[1]) { result[1] = getRandomPQ(); } - return result; +struct AccessToken +{ + AccessToken(int seed, size_t num_pqs) : dist(0, num_pqs - 1) { rng.seed(seed); } + size_t getRandomPQ() { return dist(rng); } + + std::array getTwoRandomPQs() + { + std::array result({ getRandomPQ(), getRandomPQ() }); + while(result[0] == result[1]) + { + result[1] = getRandomPQ(); } + return result; + } - pcg32 rng; - std::uniform_int_distribution dist; - }; - - - template - struct NextMoveFinder { - Move next_move; - - PartitionedHypergraph& _phg; - GainCache& _gain_cache; - const Context& _context; - - vec& _pqs; - ds::Array& _target_part; - ds::Array& _node_state; - AccessToken _token; - - NextMoveFinder(int seed, const Context& context, PartitionedHypergraph& phg, GainCache& gain_cache, - vec& pqs, - ds::Array& target_part, ds::Array& node_state) : - _phg(phg), _gain_cache(gain_cache), _context(context), - _pqs(pqs), _target_part(target_part), _node_state(node_state), _token(seed, pqs.size()) { } - + pcg32 rng; + std::uniform_int_distribution dist; +}; + +template +struct NextMoveFinder +{ + Move next_move; + + PartitionedHypergraph &_phg; + GainCache &_gain_cache; + const Context &_context; + + vec &_pqs; + ds::Array &_target_part; + ds::Array &_node_state; + AccessToken _token; + + NextMoveFinder(int seed, const Context &context, PartitionedHypergraph &phg, + GainCache &gain_cache, vec &pqs, + ds::Array &target_part, + ds::Array &node_state) : + _phg(phg), + _gain_cache(gain_cache), _context(context), _pqs(pqs), _target_part(target_part), + _node_state(node_state), _token(seed, pqs.size()) + { + } - void recomputeTopGainMove(HypernodeID v, const Move& move /* of the neighbor */) { - float gain = 0; - PartitionID newTarget = kInvalidPartition; - const PartitionID designatedTargetV = _target_part[v]; - if (_context.partition.k < 4 || designatedTargetV == move.from || designatedTargetV == move.to) { - std::tie(newTarget, gain) = computeBestTargetBlock(_phg, _context, _gain_cache, v, _phg.partID(v)); - } else { - std::tie(newTarget, gain) = bestOfThree(_phg, _context, _gain_cache, - v, _phg.partID(v), {designatedTargetV, move.from, move.to}); - } - _target_part[v] = newTarget; + void recomputeTopGainMove(HypernodeID v, const Move &move /* of the neighbor */) + { + float gain = 0; + PartitionID newTarget = kInvalidPartition; + const PartitionID designatedTargetV = _target_part[v]; + if(_context.partition.k < 4 || designatedTargetV == move.from || + designatedTargetV == move.to) + { + std::tie(newTarget, gain) = + computeBestTargetBlock(_phg, _context, _gain_cache, v, _phg.partID(v)); + } + else + { + std::tie(newTarget, gain) = + bestOfThree(_phg, _context, _gain_cache, v, _phg.partID(v), + { designatedTargetV, move.from, move.to }); } + _target_part[v] = newTarget; + } - bool checkCandidate(HypernodeID u, float& gain_in_pq) { - if (!_node_state[u].tryLock()) return false; - auto [to, true_gain] = computeBestTargetBlock(_phg, _context, _gain_cache, u, _phg.partID(u)); - if (true_gain >= gain_in_pq) { - next_move.node = u; - next_move.to = to; - next_move.from = _phg.partID(u); - next_move.gain = true_gain; - return true; - } else { - _target_part[u] = to; - gain_in_pq = true_gain; - _node_state[u].unlock(); - return false; - } + bool checkCandidate(HypernodeID u, float &gain_in_pq) + { + if(!_node_state[u].tryLock()) + return false; + auto [to, true_gain] = + computeBestTargetBlock(_phg, _context, _gain_cache, u, _phg.partID(u)); + if(true_gain >= gain_in_pq) + { + next_move.node = u; + next_move.to = to; + next_move.from = _phg.partID(u); + next_move.gain = true_gain; + return true; + } + else + { + _target_part[u] = to; + gain_in_pq = true_gain; + _node_state[u].unlock(); + return false; } + } - bool lockedModifyPQ(size_t best_id) { - auto& gpq = _pqs[best_id]; - auto& pq = gpq.pq; + bool lockedModifyPQ(size_t best_id) + { + auto &gpq = _pqs[best_id]; + auto &pq = gpq.pq; - HypernodeID node = pq.top(); - float gain_in_pq = pq.topKey(); - const bool success = checkCandidate(node, gain_in_pq); + HypernodeID node = pq.top(); + float gain_in_pq = pq.topKey(); + const bool success = checkCandidate(node, gain_in_pq); - if (success) { + if(success) + { + pq.deleteTop(); + gpq.top_key = pq.empty() ? std::numeric_limits::min() : pq.topKey(); + } + else + { + // gain was updated by success_func in this case + if(_target_part[node] != kInvalidPartition) + { + pq.adjustKey(node, gain_in_pq); + gpq.top_key = pq.topKey(); + } + else + { pq.deleteTop(); gpq.top_key = pq.empty() ? std::numeric_limits::min() : pq.topKey(); - } else { - // gain was updated by success_func in this case - if (_target_part[node] != kInvalidPartition) { - pq.adjustKey(node, gain_in_pq); - gpq.top_key = pq.topKey(); - } else { - pq.deleteTop(); - gpq.top_key = pq.empty() ? std::numeric_limits::min() : pq.topKey(); - } } - gpq.lock.unlock(); - return success; } + gpq.lock.unlock(); + return success; + } - bool tryPop() { - static constexpr size_t NUM_TRIES = 32; - for (size_t i = 0; i < NUM_TRIES; ++i) { - auto two = _token.getTwoRandomPQs(); - auto& first = _pqs[two[0]]; - auto& second = _pqs[two[1]]; - if (first.pq.empty() && second.pq.empty()) continue; - size_t best_id = two[0]; - if (first.pq.empty() || first.top_key < second.top_key) best_id = two[1]; - if (!_pqs[best_id].lock.tryLock()) continue; - // could also check for top key. would want to distinguish tries that failed due to high contention - // vs approaching the end - if (_pqs[best_id].pq.empty()) { - _pqs[best_id].lock.unlock(); - continue; - } - if (lockedModifyPQ(best_id)) return true; - // if you got a PQ but it fails because the node's gain was wrong or the node couldn't be locked - // (success_func failed) then we still want to use the standard method - i = 0; + bool tryPop() + { + static constexpr size_t NUM_TRIES = 32; + for(size_t i = 0; i < NUM_TRIES; ++i) + { + auto two = _token.getTwoRandomPQs(); + auto &first = _pqs[two[0]]; + auto &second = _pqs[two[1]]; + if(first.pq.empty() && second.pq.empty()) + continue; + size_t best_id = two[0]; + if(first.pq.empty() || first.top_key < second.top_key) + best_id = two[1]; + if(!_pqs[best_id].lock.tryLock()) + continue; + // could also check for top key. would want to distinguish tries that failed due to + // high contention vs approaching the end + if(_pqs[best_id].pq.empty()) + { + _pqs[best_id].lock.unlock(); + continue; } + if(lockedModifyPQ(best_id)) + return true; + // if you got a PQ but it fails because the node's gain was wrong or the node + // couldn't be locked (success_func failed) then we still want to use the standard + // method + i = 0; + } - while (true) { - float best_key = std::numeric_limits::min(); - int best_id = -1; - for (size_t i = 0; i < _pqs.size(); ++i) { - if (!_pqs[i].pq.empty() && _pqs[i].top_key > best_key) { - best_key = _pqs[i].top_key; - best_id = i; - } - } - if (best_id == -1) return false; - if (!_pqs[best_id].lock.tryLock()) continue; - if (_pqs[best_id].pq.empty()) { - _pqs[best_id].lock.unlock(); - continue; + while(true) + { + float best_key = std::numeric_limits::min(); + int best_id = -1; + for(size_t i = 0; i < _pqs.size(); ++i) + { + if(!_pqs[i].pq.empty() && _pqs[i].top_key > best_key) + { + best_key = _pqs[i].top_key; + best_id = i; } - if (lockedModifyPQ(best_id)) return true; } - } - - bool findNextMove() { - return tryPop(); - } - }; - - void deactivateOverloadedBlock(uint8_t* is_overloaded, size_t* num_overloaded_blocks) { - if (*is_overloaded) { - uint8_t expected = 1; - if (__atomic_compare_exchange_n(is_overloaded, &expected, 0, false, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED)) { - __atomic_fetch_sub(num_overloaded_blocks, 1, __ATOMIC_RELAXED); + if(best_id == -1) + return false; + if(!_pqs[best_id].lock.tryLock()) + continue; + if(_pqs[best_id].pq.empty()) + { + _pqs[best_id].lock.unlock(); + continue; } + if(lockedModifyPQ(best_id)) + return true; } } -} // namespace impl - - - template - void AdvancedRebalancer::insertNodesInOverloadedBlocks(mt_kahypar_partitioned_hypergraph_t& hypergraph) { - auto& phg = utils::cast(hypergraph); - - // init PQs if not done before - const size_t num_pqs = 2 * _context.shared_memory.num_threads; - if (_pqs.size() != num_pqs) { - _pqs.assign(num_pqs, rebalancer::GuardedPQ(_pq_handles.data(), _node_state.size())); - } - for (auto& gpq : _pqs) { - gpq.reset(); + bool findNextMove() { return tryPop(); } +}; + +void deactivateOverloadedBlock(uint8_t *is_overloaded, size_t *num_overloaded_blocks) +{ + if(*is_overloaded) + { + uint8_t expected = 1; + if(__atomic_compare_exchange_n(is_overloaded, &expected, 0, false, __ATOMIC_ACQUIRE, + __ATOMIC_RELAXED)) + { + __atomic_fetch_sub(num_overloaded_blocks, 1, __ATOMIC_RELAXED); } + } +} - // data structures to draw random PQs - std::atomic seed { 555 }; - tbb::enumerable_thread_specific ets_tokens([&]() { - return impl::AccessToken(seed.fetch_add(1, std::memory_order_relaxed), num_pqs); - }); - - // insert nodes into PQs - phg.doParallelForAllNodes([&](HypernodeID u) { - const PartitionID b = phg.partID(u); - if (!_is_overloaded[b] || phg.isFixed(u)) return; - - auto [target, gain] = impl::computeBestTargetBlock(phg, _context, _gain_cache, u, phg.partID(u)); - if (target == kInvalidPartition) return; - - _node_state[u].markAsMovable(); - _target_part[u] = target; - - auto& token = ets_tokens.local(); - int my_pq_id = -1; - while (true) { - my_pq_id = token.getRandomPQ(); - if (_pqs[my_pq_id].lock.tryLock()) { - break; - } - } - _pqs[my_pq_id].pq.insert(u, gain); - _pqs[my_pq_id].lock.unlock(); - _pq_id[u] = my_pq_id; - }); +} // namespace impl +template +void AdvancedRebalancer::insertNodesInOverloadedBlocks( + mt_kahypar_partitioned_hypergraph_t &hypergraph) +{ + auto &phg = utils::cast(hypergraph); + + // init PQs if not done before + const size_t num_pqs = 2 * _context.shared_memory.num_threads; + if(_pqs.size() != num_pqs) + { + _pqs.assign(num_pqs, rebalancer::GuardedPQ(_pq_handles.data(), _node_state.size())); + } + for(auto &gpq : _pqs) + { + gpq.reset(); + } - for (rebalancer::GuardedPQ& gpq : _pqs) { - if (!gpq.pq.empty()) { - gpq.top_key = gpq.pq.topKey(); + // data structures to draw random PQs + std::atomic seed{ 555 }; + tbb::enumerable_thread_specific ets_tokens([&]() { + return impl::AccessToken(seed.fetch_add(1, std::memory_order_relaxed), num_pqs); + }); + + // insert nodes into PQs + phg.doParallelForAllNodes([&](HypernodeID u) { + const PartitionID b = phg.partID(u); + if(!_is_overloaded[b] || phg.isFixed(u)) + return; + + auto [target, gain] = + impl::computeBestTargetBlock(phg, _context, _gain_cache, u, phg.partID(u)); + if(target == kInvalidPartition) + return; + + _node_state[u].markAsMovable(); + _target_part[u] = target; + + auto &token = ets_tokens.local(); + int my_pq_id = -1; + while(true) + { + my_pq_id = token.getRandomPQ(); + if(_pqs[my_pq_id].lock.tryLock()) + { + break; } } + _pqs[my_pq_id].pq.insert(u, gain); + _pqs[my_pq_id].lock.unlock(); + _pq_id[u] = my_pq_id; + }); + + for(rebalancer::GuardedPQ &gpq : _pqs) + { + if(!gpq.pq.empty()) + { + gpq.top_key = gpq.pq.topKey(); + } } +} - template - std::pair AdvancedRebalancer::findMoves(mt_kahypar_partitioned_hypergraph_t& hypergraph) { - auto& phg = utils::cast(hypergraph); - int64_t attributed_gain = 0; - size_t global_move_id = 0; - size_t num_overloaded_blocks = _overloaded_blocks.size(); - - auto task = [&](size_t task_id) { - vec edges_with_gain_changes; - Gain local_attributed_gain = 0; - vec> nodes_to_update(_pqs.size()); - vec pqs_to_update; - - const int seed = phg.initialNumNodes() + task_id; - - impl::NextMoveFinder next_move_finder(seed, _context, phg, _gain_cache, _pqs, _target_part, _node_state); - - while (num_overloaded_blocks > 0 && next_move_finder.findNextMove()) { - const Move& m = next_move_finder.next_move; - const PartitionID from = phg.partID(m.node); - _node_state[m.node].markAsMovedAndUnlock(); - - if (phg.partWeight(from) <= _context.partition.max_part_weights[from]) { - impl::deactivateOverloadedBlock(&_is_overloaded[from], &num_overloaded_blocks); - continue; - } +template +std::pair AdvancedRebalancer::findMoves( + mt_kahypar_partitioned_hypergraph_t &hypergraph) +{ + auto &phg = utils::cast(hypergraph); + int64_t attributed_gain = 0; + size_t global_move_id = 0; + size_t num_overloaded_blocks = _overloaded_blocks.size(); + + auto task = [&](size_t task_id) { + vec edges_with_gain_changes; + Gain local_attributed_gain = 0; + vec > nodes_to_update(_pqs.size()); + vec pqs_to_update; + + const int seed = phg.initialNumNodes() + task_id; + + impl::NextMoveFinder next_move_finder(seed, _context, phg, _gain_cache, _pqs, + _target_part, _node_state); + + while(num_overloaded_blocks > 0 && next_move_finder.findNextMove()) + { + const Move &m = next_move_finder.next_move; + const PartitionID from = phg.partID(m.node); + _node_state[m.node].markAsMovedAndUnlock(); + + if(phg.partWeight(from) <= _context.partition.max_part_weights[from]) + { + impl::deactivateOverloadedBlock(&_is_overloaded[from], &num_overloaded_blocks); + continue; + } - edges_with_gain_changes.clear(); - size_t move_id = 0; - bool moved = phg.changeNodePart( - _gain_cache, m.node, m.from, m.to, - _context.partition.max_part_weights[m.to], - [&] { move_id = __atomic_fetch_add(&global_move_id, 1, __ATOMIC_RELAXED); }, - [&](const SynchronizedEdgeUpdate& sync_update) { - local_attributed_gain += AttributedGains::gain(sync_update); - if (!PartitionedHypergraph::is_graph && GainCache::triggersDeltaGainUpdate(sync_update)) { - edges_with_gain_changes.push_back(sync_update.he); - } - } - ); - - - - if (!moved) continue; - - auto update_neighbor = [&](HypernodeID v) { - if (v != m.node && _node_state[v].tryLock()) { - int my_pq_id = _pq_id[v]; - assert(my_pq_id != -1); - if (nodes_to_update[my_pq_id].empty()) { - pqs_to_update.push_back(my_pq_id); + edges_with_gain_changes.clear(); + size_t move_id = 0; + bool moved = phg.changeNodePart( + _gain_cache, m.node, m.from, m.to, _context.partition.max_part_weights[m.to], + [&] { move_id = __atomic_fetch_add(&global_move_id, 1, __ATOMIC_RELAXED); }, + [&](const SynchronizedEdgeUpdate &sync_update) { + local_attributed_gain += AttributedGains::gain(sync_update); + if(!PartitionedHypergraph::is_graph && + GainCache::triggersDeltaGainUpdate(sync_update)) + { + edges_with_gain_changes.push_back(sync_update.he); } - nodes_to_update[my_pq_id].push_back(v); - next_move_finder.recomputeTopGainMove(v, m); - } - }; - - // update neighbors - if constexpr (PartitionedHypergraph::is_graph) { - for (const auto e : phg.incidentEdges(m.node)) { - HypernodeID v = phg.edgeTarget(e); - update_neighbor(v); + }); + + if(!moved) + continue; + + auto update_neighbor = [&](HypernodeID v) { + if(v != m.node && _node_state[v].tryLock()) + { + int my_pq_id = _pq_id[v]; + assert(my_pq_id != -1); + if(nodes_to_update[my_pq_id].empty()) + { + pqs_to_update.push_back(my_pq_id); } - } else { - for (HyperedgeID e : edges_with_gain_changes) { - if (phg.edgeSize(e) < _context.partition.ignore_hyperedge_size_threshold) { - for (HypernodeID v : phg.pins(e)) { - update_neighbor(v); - } + nodes_to_update[my_pq_id].push_back(v); + next_move_finder.recomputeTopGainMove(v, m); + } + }; + + // update neighbors + if constexpr(PartitionedHypergraph::is_graph) + { + for(const auto e : phg.incidentEdges(m.node)) + { + HypernodeID v = phg.edgeTarget(e); + update_neighbor(v); + } + } + else + { + for(HyperedgeID e : edges_with_gain_changes) + { + if(phg.edgeSize(e) < _context.partition.ignore_hyperedge_size_threshold) + { + for(HypernodeID v : phg.pins(e)) + { + update_neighbor(v); } } } + } - while (!pqs_to_update.empty()) { - for (size_t i = 0; i < pqs_to_update.size(); ++i) { - int my_pq_id = pqs_to_update[i]; - auto& gpq = _pqs[my_pq_id]; - auto& pq = gpq.pq; - if (gpq.lock.tryLock()) { - for (HypernodeID v : nodes_to_update[my_pq_id]) { - if (pq.contains(v)) { - if (_target_part[v] != kInvalidPartition) { - Gain new_gain_int = _gain_cache.gain(v, phg.partID(v), _target_part[v]); - float new_gain = impl::transformGain(new_gain_int, phg.nodeWeight(v)); - pq.adjustKey(v, new_gain); - } else { - pq.remove(v); - } + while(!pqs_to_update.empty()) + { + for(size_t i = 0; i < pqs_to_update.size(); ++i) + { + int my_pq_id = pqs_to_update[i]; + auto &gpq = _pqs[my_pq_id]; + auto &pq = gpq.pq; + if(gpq.lock.tryLock()) + { + for(HypernodeID v : nodes_to_update[my_pq_id]) + { + if(pq.contains(v)) + { + if(_target_part[v] != kInvalidPartition) + { + Gain new_gain_int = _gain_cache.gain(v, phg.partID(v), _target_part[v]); + float new_gain = impl::transformGain(new_gain_int, phg.nodeWeight(v)); + pq.adjustKey(v, new_gain); + } + else + { + pq.remove(v); } - _node_state[v].unlock(); } - - gpq.lock.unlock(); - pqs_to_update[i] = pqs_to_update.back(); - pqs_to_update.pop_back(); - nodes_to_update[my_pq_id].clear(); + _node_state[v].unlock(); } + + gpq.lock.unlock(); + pqs_to_update[i] = pqs_to_update.back(); + pqs_to_update.pop_back(); + nodes_to_update[my_pq_id].clear(); } } - - _moves[move_id] = m; } - __atomic_fetch_add(&attributed_gain, local_attributed_gain, __ATOMIC_RELAXED); - }; - tbb::task_group tg; - for (size_t i = 0; i < _context.shared_memory.num_threads; ++i) { tg.run(std::bind(task, i)); } - tg.wait(); + _moves[move_id] = m; + } + __atomic_fetch_add(&attributed_gain, local_attributed_gain, __ATOMIC_RELAXED); + }; - return std::make_pair(attributed_gain, global_move_id); + tbb::task_group tg; + for(size_t i = 0; i < _context.shared_memory.num_threads; ++i) + { + tg.run(std::bind(task, i)); } + tg.wait(); - template - bool AdvancedRebalancer::refineInternalParallel(mt_kahypar_partitioned_hypergraph_t& hypergraph, - vec>* moves_by_part, - vec* moves_linear, - Metrics& best_metric) { - auto& phg = utils::cast(hypergraph); - - _overloaded_blocks.clear(); - _is_overloaded.assign(_context.partition.k, false); - for (PartitionID k = 0; k < _context.partition.k; ++k) { - if (phg.partWeight(k) > _context.partition.max_part_weights[k]) { - _overloaded_blocks.push_back(k); - _is_overloaded[k] = 1; - } + return std::make_pair(attributed_gain, global_move_id); +} + +template +bool AdvancedRebalancer::refineInternalParallel( + mt_kahypar_partitioned_hypergraph_t &hypergraph, vec > *moves_by_part, + vec *moves_linear, Metrics &best_metric) +{ + auto &phg = utils::cast(hypergraph); + + _overloaded_blocks.clear(); + _is_overloaded.assign(_context.partition.k, false); + for(PartitionID k = 0; k < _context.partition.k; ++k) + { + if(phg.partWeight(k) > _context.partition.max_part_weights[k]) + { + _overloaded_blocks.push_back(k); + _is_overloaded[k] = 1; } + } - insertNodesInOverloadedBlocks(hypergraph); + insertNodesInOverloadedBlocks(hypergraph); - auto [attributed_gain, num_moves_performed] = findMoves(hypergraph); + auto [attributed_gain, num_moves_performed] = findMoves(hypergraph); - if (moves_by_part != nullptr) { - moves_by_part->resize(_context.partition.k); - for (auto& direction : *moves_by_part) direction.clear(); - for (size_t i = 0; i < num_moves_performed; ++i) { - (*moves_by_part)[_moves[i].from].push_back(_moves[i]); - } - } else if (moves_linear != nullptr) { - moves_linear->clear(); - moves_linear->reserve(num_moves_performed); - for (size_t i = 0; i < num_moves_performed; ++i) { - moves_linear->push_back(_moves[i]); - } + if(moves_by_part != nullptr) + { + moves_by_part->resize(_context.partition.k); + for(auto &direction : *moves_by_part) + direction.clear(); + for(size_t i = 0; i < num_moves_performed; ++i) + { + (*moves_by_part)[_moves[i].from].push_back(_moves[i]); } - - best_metric.quality += attributed_gain; - best_metric.imbalance = metrics::imbalance(phg, _context); - - size_t num_overloaded_blocks = 0; - for (PartitionID b = 0; b < _context.partition.k; ++b) { - if (phg.partWeight(b) > _context.partition.max_part_weights[b]) { - num_overloaded_blocks++; - } + } + else if(moves_linear != nullptr) + { + moves_linear->clear(); + moves_linear->reserve(num_moves_performed); + for(size_t i = 0; i < num_moves_performed; ++i) + { + moves_linear->push_back(_moves[i]); } + } - phg.doParallelForAllNodes([&](HypernodeID u) { - _node_state[u].reset(); - }); + best_metric.quality += attributed_gain; + best_metric.imbalance = metrics::imbalance(phg, _context); - for (auto& gpq : _pqs) { - gpq.pq.clear(); + size_t num_overloaded_blocks = 0; + for(PartitionID b = 0; b < _context.partition.k; ++b) + { + if(phg.partWeight(b) > _context.partition.max_part_weights[b]) + { + num_overloaded_blocks++; } + } + + phg.doParallelForAllNodes([&](HypernodeID u) { _node_state[u].reset(); }); - return num_overloaded_blocks == 0; + for(auto &gpq : _pqs) + { + gpq.pq.clear(); } + return num_overloaded_blocks == 0; +} template -AdvancedRebalancer::AdvancedRebalancer( - HypernodeID num_nodes, const Context& context, GainCache& gain_cache) : - _context(context), - _gain_cache(gain_cache), - _current_k(_context.partition.k), - _gain(context), - _moves(num_nodes), - _target_part(num_nodes, kInvalidPartition), - _pq_handles(num_nodes, invalid_position), - _pq_id(num_nodes, -1), - _node_state(num_nodes) { } +AdvancedRebalancer::AdvancedRebalancer(HypernodeID num_nodes, + const Context &context, + GainCache &gain_cache) : + _context(context), + _gain_cache(gain_cache), _current_k(_context.partition.k), _gain(context), + _moves(num_nodes), _target_part(num_nodes, kInvalidPartition), + _pq_handles(num_nodes, invalid_position), _pq_id(num_nodes, -1), + _node_state(num_nodes) +{ +} template -AdvancedRebalancer::AdvancedRebalancer( - HypernodeID num_nodes, const Context& context, gain_cache_t gain_cache) : - AdvancedRebalancer(num_nodes, context, GainCachePtr::cast(gain_cache)) { } - +AdvancedRebalancer::AdvancedRebalancer(HypernodeID num_nodes, + const Context &context, + gain_cache_t gain_cache) : + AdvancedRebalancer(num_nodes, context, GainCachePtr::cast(gain_cache)) +{ +} template -bool AdvancedRebalancer::refineImpl(mt_kahypar_partitioned_hypergraph_t& hypergraph, - const vec& , Metrics& best_metrics, double) { +bool AdvancedRebalancer::refineImpl( + mt_kahypar_partitioned_hypergraph_t &hypergraph, const vec &, + Metrics &best_metrics, double) +{ return refineInternalParallel(hypergraph, nullptr, nullptr, best_metrics); } template -void AdvancedRebalancer::initializeImpl(mt_kahypar_partitioned_hypergraph_t& hypergraph) { - auto& phg = utils::cast(hypergraph); +void AdvancedRebalancer::initializeImpl( + mt_kahypar_partitioned_hypergraph_t &hypergraph) +{ + auto &phg = utils::cast(hypergraph); - if (!_gain_cache.isInitialized()) { + if(!_gain_cache.isInitialized()) + { _gain_cache.initializeGainCache(phg); } } template -bool AdvancedRebalancer::refineAndOutputMovesImpl(mt_kahypar_partitioned_hypergraph_t& hypergraph, - const vec& , - vec>& moves_by_part, - Metrics& best_metrics, - const double) { +bool AdvancedRebalancer::refineAndOutputMovesImpl( + mt_kahypar_partitioned_hypergraph_t &hypergraph, const vec &, + vec > &moves_by_part, Metrics &best_metrics, const double) +{ return refineInternalParallel(hypergraph, &moves_by_part, nullptr, best_metrics); } template -bool AdvancedRebalancer::refineAndOutputMovesLinearImpl(mt_kahypar_partitioned_hypergraph_t& hypergraph, - const vec& , - vec& moves, - Metrics& best_metrics, - const double) { +bool AdvancedRebalancer::refineAndOutputMovesLinearImpl( + mt_kahypar_partitioned_hypergraph_t &hypergraph, const vec &, + vec &moves, Metrics &best_metrics, const double) +{ return refineInternalParallel(hypergraph, nullptr, &moves, best_metrics); } // explicitly instantiate so the compiler can generate them when compiling this cpp file namespace { - #define ADVANCED_REBALANCER(X) AdvancedRebalancer +#define ADVANCED_REBALANCER(X) AdvancedRebalancer } // explicitly instantiate so the compiler can generate them when compiling this cpp file INSTANTIATE_CLASS_WITH_VALID_TRAITS(ADVANCED_REBALANCER) -} // namespace mt_kahypar +} // namespace mt_kahypar diff --git a/mt-kahypar/partition/refinement/rebalancing/advanced_rebalancer.h b/mt-kahypar/partition/refinement/rebalancing/advanced_rebalancer.h index e3c5daadc..7a4e035fa 100644 --- a/mt-kahypar/partition/refinement/rebalancing/advanced_rebalancer.h +++ b/mt-kahypar/partition/refinement/rebalancing/advanced_rebalancer.h @@ -29,53 +29,59 @@ #include "mt-kahypar/datastructures/priority_queue.h" #include "mt-kahypar/partition/context.h" #include "mt-kahypar/partition/metrics.h" -#include "mt-kahypar/partition/refinement/i_refiner.h" -#include "mt-kahypar/partition/refinement/i_rebalancer.h" #include "mt-kahypar/partition/refinement/gains/gain_cache_ptr.h" +#include "mt-kahypar/partition/refinement/i_rebalancer.h" +#include "mt-kahypar/partition/refinement/i_refiner.h" namespace mt_kahypar { namespace rebalancer { - struct GuardedPQ { - GuardedPQ(PosT *handles, size_t num_nodes) : pq(handles, num_nodes) { } - SpinLock lock; - ds::MaxHeap pq; - float top_key = std::numeric_limits::min(); - void reset() { - pq.clear(); - top_key = std::numeric_limits::min(); - } - }; +struct GuardedPQ +{ + GuardedPQ(PosT *handles, size_t num_nodes) : pq(handles, num_nodes) {} + SpinLock lock; + ds::MaxHeap pq; + float top_key = std::numeric_limits::min(); + void reset() + { + pq.clear(); + top_key = std::numeric_limits::min(); + } +}; - struct NodeState { - uint8_t state = 0; +struct NodeState +{ + uint8_t state = 0; - bool canMove() const { return state == 1; } + bool canMove() const { return state == 1; } - bool isLocked() const { return state == 2; } + bool isLocked() const { return state == 2; } - bool wasMoved() const { return state == 3; } + bool wasMoved() const { return state == 3; } - // Returns true if the node is marked as movable, is not locked and taking the lock now succeeds - bool tryLock() { - uint8_t expected = 1; - return state == 1 && __atomic_compare_exchange_n(&state, &expected, 2, false, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED); - } + // Returns true if the node is marked as movable, is not locked and taking the lock now + // succeeds + bool tryLock() + { + uint8_t expected = 1; + return state == 1 && __atomic_compare_exchange_n(&state, &expected, 2, false, + __ATOMIC_ACQUIRE, __ATOMIC_RELAXED); + } - void unlock() { __atomic_store_n(&state, 1, __ATOMIC_RELEASE); } + void unlock() { __atomic_store_n(&state, 1, __ATOMIC_RELEASE); } - void markAsMovedAndUnlock() { __atomic_store_n(&state, 3, __ATOMIC_RELEASE); } + void markAsMovedAndUnlock() { __atomic_store_n(&state, 3, __ATOMIC_RELEASE); } - void markAsMovable() { state = 1; } + void markAsMovable() { state = 1; } - void reset() { state = 0; } - }; + void reset() { state = 0; } +}; } // namespace rebalancer - template -class AdvancedRebalancer final : public IRebalancer { +class AdvancedRebalancer final : public IRebalancer +{ private: using PartitionedHypergraph = typename GraphAndGainTypes::PartitionedHypergraph; using GainCache = typename GraphAndGainTypes::GainCache; @@ -86,48 +92,40 @@ class AdvancedRebalancer final : public IRebalancer { static constexpr bool enable_heavy_assert = false; public: + explicit AdvancedRebalancer(HypernodeID num_nodes, const Context &context, + GainCache &gain_cache); - explicit AdvancedRebalancer(HypernodeID num_nodes, - const Context& context, - GainCache& gain_cache); - - explicit AdvancedRebalancer(HypernodeID num_nodes, - const Context& context, - gain_cache_t gain_cache); + explicit AdvancedRebalancer(HypernodeID num_nodes, const Context &context, + gain_cache_t gain_cache); private: - bool refineImpl(mt_kahypar_partitioned_hypergraph_t& hypergraph, - const vec& refinement_nodes, - Metrics& best_metrics, + bool refineImpl(mt_kahypar_partitioned_hypergraph_t &hypergraph, + const vec &refinement_nodes, Metrics &best_metrics, double); - void initializeImpl(mt_kahypar_partitioned_hypergraph_t& hypergraph) final; + void initializeImpl(mt_kahypar_partitioned_hypergraph_t &hypergraph) final; - bool refineAndOutputMovesImpl(mt_kahypar_partitioned_hypergraph_t& hypergraph, - const vec& refinement_nodes, - vec>& moves_by_part, - Metrics& best_metrics, + bool refineAndOutputMovesImpl(mt_kahypar_partitioned_hypergraph_t &hypergraph, + const vec &refinement_nodes, + vec > &moves_by_part, Metrics &best_metrics, const double); - bool refineAndOutputMovesLinearImpl(mt_kahypar_partitioned_hypergraph_t& hypergraph, - const vec& refinement_nodes, - vec& moves, - Metrics& best_metrics, + bool refineAndOutputMovesLinearImpl(mt_kahypar_partitioned_hypergraph_t &hypergraph, + const vec &refinement_nodes, + vec &moves, Metrics &best_metrics, const double); - bool refineInternalParallel(mt_kahypar_partitioned_hypergraph_t& hypergraph, - vec>* moves_by_part, - vec* moves_linear, - Metrics& best_metric); + bool refineInternalParallel(mt_kahypar_partitioned_hypergraph_t &hypergraph, + vec > *moves_by_part, vec *moves_linear, + Metrics &best_metric); - const Context& _context; - GainCache& _gain_cache; + const Context &_context; + GainCache &_gain_cache; PartitionID _current_k; GainCalculator _gain; - - void insertNodesInOverloadedBlocks(mt_kahypar_partitioned_hypergraph_t& hypergraph); - std::pair findMoves(mt_kahypar_partitioned_hypergraph_t& hypergraph); + void insertNodesInOverloadedBlocks(mt_kahypar_partitioned_hypergraph_t &hypergraph); + std::pair findMoves(mt_kahypar_partitioned_hypergraph_t &hypergraph); ds::Array _moves; vec _pqs; @@ -139,4 +137,4 @@ class AdvancedRebalancer final : public IRebalancer { ds::Array _node_state; }; -} // namespace mt_kahypar +} // namespace mt_kahypar diff --git a/mt-kahypar/partition/refinement/rebalancing/simple_rebalancer.cpp b/mt-kahypar/partition/refinement/rebalancing/simple_rebalancer.cpp index 26285fd32..8d2e7ac9d 100644 --- a/mt-kahypar/partition/refinement/rebalancing/simple_rebalancer.cpp +++ b/mt-kahypar/partition/refinement/rebalancing/simple_rebalancer.cpp @@ -27,7 +27,6 @@ #include "mt-kahypar/partition/refinement/rebalancing/simple_rebalancer.h" - #include #include @@ -37,263 +36,313 @@ #include "mt-kahypar/definitions.h" #include "mt-kahypar/partition/metrics.h" #include "mt-kahypar/partition/refinement/gains/gain_definitions.h" -#include "mt-kahypar/utils/timer.h" #include "mt-kahypar/utils/cast.h" +#include "mt-kahypar/utils/timer.h" namespace mt_kahypar { - template - bool SimpleRebalancer::refineImpl(mt_kahypar_partitioned_hypergraph_t& hypergraph, - const vec&, - Metrics& best_metrics, - double) { - PartitionedHypergraph& phg = utils::cast(hypergraph); - resizeDataStructuresForCurrentK(); - // If partition is imbalanced, rebalancer is activated - bool improvement = false; - if ( !metrics::isBalanced(phg, _context) ) { - _gain.reset(); - - for ( PartitionID block = 0; block < _context.partition.k; ++block ) { - _part_weights[block] = phg.partWeight(block); - } - - // This function is passed as lambda to the changeNodePart function and used - // to calculate the "real" delta of a move (in terms of the used objective function). - auto objective_delta = [&](const SynchronizedEdgeUpdate& sync_update) { - _gain.computeDeltaForHyperedge(sync_update); - }; +template +bool SimpleRebalancer::refineImpl( + mt_kahypar_partitioned_hypergraph_t &hypergraph, const vec &, + Metrics &best_metrics, double) +{ + PartitionedHypergraph &phg = utils::cast(hypergraph); + resizeDataStructuresForCurrentK(); + // If partition is imbalanced, rebalancer is activated + bool improvement = false; + if(!metrics::isBalanced(phg, _context)) + { + _gain.reset(); + + for(PartitionID block = 0; block < _context.partition.k; ++block) + { + _part_weights[block] = phg.partWeight(block); + } - if ( _context.partition.preset_type != PresetType::large_k ) { - // TODO: This code must be optimized to work for large k - vec moves_to_empty_blocks = repairEmptyBlocks(phg); - for (Move& m : moves_to_empty_blocks) { - moveVertex(phg, m.node, m, objective_delta); - } + // This function is passed as lambda to the changeNodePart function and used + // to calculate the "real" delta of a move (in terms of the used objective function). + auto objective_delta = [&](const SynchronizedEdgeUpdate &sync_update) { + _gain.computeDeltaForHyperedge(sync_update); + }; + + if(_context.partition.preset_type != PresetType::large_k) + { + // TODO: This code must be optimized to work for large k + vec moves_to_empty_blocks = repairEmptyBlocks(phg); + for(Move &m : moves_to_empty_blocks) + { + moveVertex(phg, m.node, m, objective_delta); } + } - // We first try to perform moves that does not worsen solution quality of the partition - // Moves that would worsen the solution quality are gathered in a thread local priority queue - // and processed afterwards if partition is still imbalanced - std::atomic idx(0); - tbb::enumerable_thread_specific move_pqs([&] { - return IndexedMovePQ(idx++); - }); - phg.doParallelForAllNodes([&](const HypernodeID& hn) { - const PartitionID from = phg.partID(hn); - if ( phg.isBorderNode(hn) && !phg.isFixed(hn) && - phg.partWeight(from) > _context.partition.max_part_weights[from] ) { - Move rebalance_move = _gain.computeMaxGainMove(phg, hn, true /* rebalance move */); - if ( rebalance_move.gain <= 0 ) { - moveVertex(phg, hn, rebalance_move, objective_delta); - } else if ( rebalance_move.gain != std::numeric_limits::max() ) { + // We first try to perform moves that does not worsen solution quality of the + // partition Moves that would worsen the solution quality are gathered in a thread + // local priority queue and processed afterwards if partition is still imbalanced + std::atomic idx(0); + tbb::enumerable_thread_specific move_pqs( + [&] { return IndexedMovePQ(idx++); }); + phg.doParallelForAllNodes([&](const HypernodeID &hn) { + const PartitionID from = phg.partID(hn); + if(phg.isBorderNode(hn) && !phg.isFixed(hn) && + phg.partWeight(from) > _context.partition.max_part_weights[from]) + { + Move rebalance_move = + _gain.computeMaxGainMove(phg, hn, true /* rebalance move */); + if(rebalance_move.gain <= 0) + { + moveVertex(phg, hn, rebalance_move, objective_delta); + } + else if(rebalance_move.gain != std::numeric_limits::max()) + { + move_pqs.local().pq.emplace(std::move(rebalance_move)); + } + else + { + // Try to find a move to an non-adjacent block + rebalance_move = _gain.computeMaxGainMove(phg, hn, true /* rebalance move */, + true /* non-adjacent block */); + if(rebalance_move.gain != std::numeric_limits::max()) + { move_pqs.local().pq.emplace(std::move(rebalance_move)); - } else { - // Try to find a move to an non-adjacent block - rebalance_move = _gain.computeMaxGainMove(phg, hn, - true /* rebalance move */, true /* non-adjacent block */ ); - if ( rebalance_move.gain != std::numeric_limits::max() ) { - move_pqs.local().pq.emplace(std::move(rebalance_move)); + } + } + } + }); + + ASSERT( + [&] { + for(PartitionID block = 0; block < _context.partition.k; ++block) + { + if(_part_weights[block] != phg.partWeight(block)) + { + return false; } } + return true; + }(), + "Rebalancer part weights are wrong"); + + // If partition is still imbalanced, we try execute moves stored into + // the thread local priority queue which could possibly worsen solution quality + if(!metrics::isBalanced(phg, _context)) + { + + // Initialize minimum gain value of each priority queue + parallel::scalable_vector active_pqs(idx.load(), false); + parallel::scalable_vector min_pq_gain( + idx.load(), std::numeric_limits::max() - MIN_PQ_GAIN_THRESHOLD); + for(const IndexedMovePQ &idx_pq : move_pqs) + { + if(!idx_pq.pq.empty()) + { + min_pq_gain[idx_pq.idx] = idx_pq.pq.top().gain; } - }); + } - ASSERT([&] { - for ( PartitionID block = 0; block < _context.partition.k; ++block ) { - if ( _part_weights[block] != phg.partWeight(block) ) { - return false; + // Function returns minimum gain value of all priority queues + auto global_pq_min_gain = [&](const bool only_active_pqs) { + Gain min_gain = std::numeric_limits::max() - MIN_PQ_GAIN_THRESHOLD; + for(size_t i = 0; i < min_pq_gain.size(); ++i) + { + if((!only_active_pqs || active_pqs[i]) && min_pq_gain[i] < min_gain) + { + min_gain = min_pq_gain[i]; } } - return true; - }(), "Rebalancer part weights are wrong"); - - // If partition is still imbalanced, we try execute moves stored into - // the thread local priority queue which could possibly worsen solution quality - if ( !metrics::isBalanced(phg, _context) ) { - - // Initialize minimum gain value of each priority queue - parallel::scalable_vector active_pqs(idx.load(), false); - parallel::scalable_vector min_pq_gain(idx.load(), - std::numeric_limits::max() - MIN_PQ_GAIN_THRESHOLD); - for ( const IndexedMovePQ& idx_pq : move_pqs ) { - if ( !idx_pq.pq.empty() ) { - min_pq_gain[idx_pq.idx] = idx_pq.pq.top().gain; + return min_gain; + }; + + // We process each priority queue in parallel. When we perform + // a move we make sure that the current minimum gain value of the local + // PQ is within a certain threshold of the global minimum gain value. + // Otherwise, we perform busy waiting until all moves with a better gain + // are processed. + tbb::parallel_for_each(move_pqs, [&](IndexedMovePQ &idx_pq) { + const size_t idx = idx_pq.idx; + MovePQ &pq = idx_pq.pq; + active_pqs[idx] = true; + Gain current_global_min_pq_gain = global_pq_min_gain(false); + while(!pq.empty()) + { + Move move = pq.top(); + min_pq_gain[idx] = move.gain; + pq.pop(); + + // If the minimum gain value of the local priority queue is not within + // a certain threshold of the global priority queue, we perform busy waiting + // until all moves with a better gain of other pqs are performed. + while(move.gain > current_global_min_pq_gain + MIN_PQ_GAIN_THRESHOLD) + { + current_global_min_pq_gain = global_pq_min_gain(true); } - } - // Function returns minimum gain value of all priority queues - auto global_pq_min_gain = [&](const bool only_active_pqs) { - Gain min_gain = std::numeric_limits::max() - MIN_PQ_GAIN_THRESHOLD; - for ( size_t i = 0; i < min_pq_gain.size(); ++i ) { - if ( (!only_active_pqs || active_pqs[i]) && min_pq_gain[i] < min_gain ) { - min_gain = min_pq_gain[i]; + const PartitionID from = move.from; + if(phg.partWeight(from) > _context.partition.max_part_weights[from]) + { + Move real_move = + _gain.computeMaxGainMove(phg, move.node, true /* rebalance move */); + if(real_move.gain == std::numeric_limits::max()) + { + // Compute move to non-adjacent block + real_move = + _gain.computeMaxGainMove(phg, move.node, true /* rebalance move */, + true /* non-adjacent block */); } - } - return min_gain; - }; - - // We process each priority queue in parallel. When we perform - // a move we make sure that the current minimum gain value of the local - // PQ is within a certain threshold of the global minimum gain value. - // Otherwise, we perform busy waiting until all moves with a better gain - // are processed. - tbb::parallel_for_each(move_pqs, [&](IndexedMovePQ& idx_pq) { - const size_t idx = idx_pq.idx; - MovePQ& pq = idx_pq.pq; - active_pqs[idx] = true; - Gain current_global_min_pq_gain = global_pq_min_gain(false); - while ( !pq.empty() ) { - Move move = pq.top(); - min_pq_gain[idx] = move.gain; - pq.pop(); - - // If the minimum gain value of the local priority queue is not within - // a certain threshold of the global priority queue, we perform busy waiting - // until all moves with a better gain of other pqs are performed. - while ( move.gain > current_global_min_pq_gain + MIN_PQ_GAIN_THRESHOLD ) { - current_global_min_pq_gain = global_pq_min_gain(true); + if(real_move.gain <= move.gain) + { + moveVertex(phg, real_move.node, real_move, objective_delta); } - - const PartitionID from = move.from; - if ( phg.partWeight(from) > _context.partition.max_part_weights[from] ) { - Move real_move = _gain.computeMaxGainMove(phg, move.node, true /* rebalance move */); - if ( real_move.gain == std::numeric_limits::max() ) { - // Compute move to non-adjacent block - real_move = _gain.computeMaxGainMove(phg, move.node, - true /* rebalance move */, true /* non-adjacent block */); - } - if ( real_move.gain <= move.gain ) { - moveVertex(phg, real_move.node, real_move, objective_delta); - } else if ( real_move.gain != std::numeric_limits::max() ) { - pq.emplace(std::move(real_move)); - } + else if(real_move.gain != std::numeric_limits::max()) + { + pq.emplace(std::move(real_move)); } } - active_pqs[idx] = false; - min_pq_gain[idx] = std::numeric_limits::max() - MIN_PQ_GAIN_THRESHOLD; - }); - } + } + active_pqs[idx] = false; + min_pq_gain[idx] = std::numeric_limits::max() - MIN_PQ_GAIN_THRESHOLD; + }); + } - // Update metrics statistics - Gain delta = _gain.delta(); - HEAVY_REFINEMENT_ASSERT(best_metrics.quality + delta == metrics::quality(phg, _context), + // Update metrics statistics + Gain delta = _gain.delta(); + HEAVY_REFINEMENT_ASSERT( + best_metrics.quality + delta == metrics::quality(phg, _context), V(best_metrics.quality) << V(delta) << V(metrics::quality(phg, _context))); - best_metrics.quality += delta; - improvement = delta < 0; - } - return improvement; + best_metrics.quality += delta; + improvement = delta < 0; } + return improvement; +} - template - vec SimpleRebalancer::repairEmptyBlocks(PartitionedHypergraph& phg) { - // First detect if there are any empty blocks. - const size_t k = size_t(_context.partition.k); - boost::dynamic_bitset<> is_empty(k); - vec empty_blocks; - for (size_t i = 0; i < k; ++i) { - if (phg.partWeight(PartitionID(i)) == 0) { - is_empty.set(i, true); - empty_blocks.push_back(PartitionID(i)); - } +template +vec +SimpleRebalancer::repairEmptyBlocks(PartitionedHypergraph &phg) +{ + // First detect if there are any empty blocks. + const size_t k = size_t(_context.partition.k); + boost::dynamic_bitset<> is_empty(k); + vec empty_blocks; + for(size_t i = 0; i < k; ++i) + { + if(phg.partWeight(PartitionID(i)) == 0) + { + is_empty.set(i, true); + empty_blocks.push_back(PartitionID(i)); } + } - vec moves_to_empty_blocks; - - // If so, find the best vertices to move to that block - while (is_empty.any()) { - - tbb::enumerable_thread_specific< vec > ets_scores(k, 0); - - // positive gain values correspond to "good" improvement. MovePQ uses std::greater (MinHeap) - // --> stores worst gains at the top where we can eject them - tbb::enumerable_thread_specific< vec< vec > > ets_best_move(k); - - phg.doParallelForAllNodes([&](const HypernodeID u) { - if ( !phg.isFixed(u) ) { - vec& scores = ets_scores.local(); - vec< vec >& move_proposals = ets_best_move.local(); - - const PartitionID from = phg.partID(u); - Gain unremovable = 0; - for (HyperedgeID e : phg.incidentEdges(u)) { - const HyperedgeWeight edge_weight = phg.edgeWeight(e); - if (phg.pinCountInPart(e, from) > 1) { - unremovable += edge_weight; - } - for (PartitionID i : phg.connectivitySet(e)) { - scores[i] += edge_weight; - } + vec moves_to_empty_blocks; + + // If so, find the best vertices to move to that block + while(is_empty.any()) + { + + tbb::enumerable_thread_specific > ets_scores(k, 0); + + // positive gain values correspond to "good" improvement. MovePQ uses std::greater + // (MinHeap) + // --> stores worst gains at the top where we can eject them + tbb::enumerable_thread_specific > > ets_best_move(k); + + phg.doParallelForAllNodes([&](const HypernodeID u) { + if(!phg.isFixed(u)) + { + vec &scores = ets_scores.local(); + vec > &move_proposals = ets_best_move.local(); + + const PartitionID from = phg.partID(u); + Gain unremovable = 0; + for(HyperedgeID e : phg.incidentEdges(u)) + { + const HyperedgeWeight edge_weight = phg.edgeWeight(e); + if(phg.pinCountInPart(e, from) > 1) + { + unremovable += edge_weight; } - - // maintain thread local priority queues of up to k best gains - for (const PartitionID to : empty_blocks) { - ASSERT(is_empty[to]); - if (to != from && phg.partWeight(from) > phg.nodeWeight(u) - && phg.nodeWeight(u) <= _context.partition.max_part_weights[to]) { - const Gain gain = scores[to] - unremovable; - vec& c = move_proposals[to]; - if (c.size() < k) { - c.push_back(Move { from, to, u, gain }); - std::push_heap(c.begin(), c.end(), MoveGainComparator()); - } else if (c.front().gain < gain) { - std::pop_heap(c.begin(), c.end(), MoveGainComparator()); - c.back() = { from, to, u, gain }; - std::push_heap(c.begin(), c.end(), MoveGainComparator()); - } - } - scores[to] = 0; + for(PartitionID i : phg.connectivitySet(e)) + { + scores[i] += edge_weight; } } - }); - - vec< vec > best_moves_per_part(k); - for (vec>& tlpq : ets_best_move) { - size_t i = is_empty.find_first(); - while (i != is_empty.npos) { - std::copy(tlpq[i].begin(), tlpq[i].end(), std::back_inserter(best_moves_per_part[i])); - i = is_empty.find_next(i); + // maintain thread local priority queues of up to k best gains + for(const PartitionID to : empty_blocks) + { + ASSERT(is_empty[to]); + if(to != from && phg.partWeight(from) > phg.nodeWeight(u) && + phg.nodeWeight(u) <= _context.partition.max_part_weights[to]) + { + const Gain gain = scores[to] - unremovable; + vec &c = move_proposals[to]; + if(c.size() < k) + { + c.push_back(Move{ from, to, u, gain }); + std::push_heap(c.begin(), c.end(), MoveGainComparator()); + } + else if(c.front().gain < gain) + { + std::pop_heap(c.begin(), c.end(), MoveGainComparator()); + c.back() = { from, to, u, gain }; + std::push_heap(c.begin(), c.end(), MoveGainComparator()); + } + } + scores[to] = 0; } } + }); - auto prefer_highest_gain = [&](const Move& lhs, const Move& rhs) { - const HypernodeWeight pwl = phg.partWeight(phg.partID(lhs.node)); - const HypernodeWeight pwr = phg.partWeight(phg.partID(rhs.node)); - return std::tie(lhs.gain, pwl, lhs.node) > std::tie(rhs.gain, pwr, rhs.node); - }; - - auto node_already_used = [&](HypernodeID node) { - return std::any_of(moves_to_empty_blocks.begin(), - moves_to_empty_blocks.end(), - [node](const Move& m) { return m.node == node; } - ); - }; + vec > best_moves_per_part(k); + for(vec > &tlpq : ets_best_move) + { size_t i = is_empty.find_first(); - while (i != is_empty.npos) { - vec& c = best_moves_per_part[i]; - std::sort(c.begin(), c.end(), prefer_highest_gain); - - size_t j = 0; - while (j < c.size() && node_already_used(c[j].node)) { ++j; } - if (j != c.size()) { - moves_to_empty_blocks.push_back(c[j]); - is_empty.set(i, false); - } - + while(i != is_empty.npos) + { + std::copy(tlpq[i].begin(), tlpq[i].end(), + std::back_inserter(best_moves_per_part[i])); i = is_empty.find_next(i); } + } + + auto prefer_highest_gain = [&](const Move &lhs, const Move &rhs) { + const HypernodeWeight pwl = phg.partWeight(phg.partID(lhs.node)); + const HypernodeWeight pwr = phg.partWeight(phg.partID(rhs.node)); + return std::tie(lhs.gain, pwl, lhs.node) > std::tie(rhs.gain, pwr, rhs.node); + }; + + auto node_already_used = [&](HypernodeID node) { + return std::any_of(moves_to_empty_blocks.begin(), moves_to_empty_blocks.end(), + [node](const Move &m) { return m.node == node; }); + }; + + size_t i = is_empty.find_first(); + while(i != is_empty.npos) + { + vec &c = best_moves_per_part[i]; + std::sort(c.begin(), c.end(), prefer_highest_gain); + + size_t j = 0; + while(j < c.size() && node_already_used(c[j].node)) + { + ++j; + } + if(j != c.size()) + { + moves_to_empty_blocks.push_back(c[j]); + is_empty.set(i, false); + } + i = is_empty.find_next(i); } - return moves_to_empty_blocks; } + return moves_to_empty_blocks; +} - // explicitly instantiate so the compiler can generate them when compiling this cpp file - namespace { - #define SIMPLE_REBALANCER(X) SimpleRebalancer - } +// explicitly instantiate so the compiler can generate them when compiling this cpp file +namespace { +#define SIMPLE_REBALANCER(X) SimpleRebalancer +} - // explicitly instantiate so the compiler can generate them when compiling this cpp file - INSTANTIATE_CLASS_WITH_VALID_TRAITS(SIMPLE_REBALANCER) +// explicitly instantiate so the compiler can generate them when compiling this cpp file +INSTANTIATE_CLASS_WITH_VALID_TRAITS(SIMPLE_REBALANCER) } diff --git a/mt-kahypar/partition/refinement/rebalancing/simple_rebalancer.h b/mt-kahypar/partition/refinement/rebalancing/simple_rebalancer.h index 498e685e8..49f0193cf 100644 --- a/mt-kahypar/partition/refinement/rebalancing/simple_rebalancer.h +++ b/mt-kahypar/partition/refinement/rebalancing/simple_rebalancer.h @@ -31,17 +31,18 @@ #include "mt-kahypar/partition/context.h" #include "mt-kahypar/partition/metrics.h" -#include "mt-kahypar/partition/refinement/i_refiner.h" -#include "mt-kahypar/partition/refinement/i_rebalancer.h" -#include "mt-kahypar/partition/refinement/gains/km1/km1_gain_computation.h" #include "mt-kahypar/partition/refinement/gains/cut/cut_gain_computation.h" #include "mt-kahypar/partition/refinement/gains/gain_cache_ptr.h" +#include "mt-kahypar/partition/refinement/gains/km1/km1_gain_computation.h" +#include "mt-kahypar/partition/refinement/i_rebalancer.h" +#include "mt-kahypar/partition/refinement/i_refiner.h" #include "mt-kahypar/utils/cast.h" namespace mt_kahypar { template -class SimpleRebalancer final : public IRebalancer { - private: +class SimpleRebalancer final : public IRebalancer +{ +private: using PartitionedHypergraph = typename GraphAndGainTypes::PartitionedHypergraph; using GainCache = typename GraphAndGainTypes::GainCache; using GainCalculator = typename GraphAndGainTypes::GainComputation; @@ -53,84 +54,86 @@ class SimpleRebalancer final : public IRebalancer { static constexpr Gain MIN_PQ_GAIN_THRESHOLD = 5; public: - - struct MoveGainComparator { - bool operator()(const Move& lhs, const Move& rhs) { + struct MoveGainComparator + { + bool operator()(const Move &lhs, const Move &rhs) + { return lhs.gain > rhs.gain || (lhs.gain == rhs.gain && lhs.node < rhs.node); } }; using MovePQ = std::priority_queue, MoveGainComparator>; - struct IndexedMovePQ { - explicit IndexedMovePQ(const size_t idx) : - idx(idx), - pq() { } + struct IndexedMovePQ + { + explicit IndexedMovePQ(const size_t idx) : idx(idx), pq() {} size_t idx; MovePQ pq; }; - explicit SimpleRebalancer(const Context& context) : - _context(context), - _current_k(context.partition.k), - _gain(context), - _part_weights(_context.partition.k) { } + explicit SimpleRebalancer(const Context &context) : + _context(context), _current_k(context.partition.k), _gain(context), + _part_weights(_context.partition.k) + { + } - explicit SimpleRebalancer(HypernodeID , const Context& context, GainCache&) : - SimpleRebalancer(context) { } + explicit SimpleRebalancer(HypernodeID, const Context &context, GainCache &) : + SimpleRebalancer(context) + { + } - explicit SimpleRebalancer(HypernodeID num_nodes, const Context& context, gain_cache_t gain_cache) : - SimpleRebalancer(num_nodes, context, GainCachePtr::cast(gain_cache)) {} + explicit SimpleRebalancer(HypernodeID num_nodes, const Context &context, + gain_cache_t gain_cache) : + SimpleRebalancer(num_nodes, context, GainCachePtr::cast(gain_cache)) + { + } - SimpleRebalancer(const SimpleRebalancer&) = delete; - SimpleRebalancer(SimpleRebalancer&&) = delete; + SimpleRebalancer(const SimpleRebalancer &) = delete; + SimpleRebalancer(SimpleRebalancer &&) = delete; - SimpleRebalancer & operator= (const SimpleRebalancer &) = delete; - SimpleRebalancer & operator= (SimpleRebalancer &&) = delete; + SimpleRebalancer &operator=(const SimpleRebalancer &) = delete; + SimpleRebalancer &operator=(SimpleRebalancer &&) = delete; - bool refineImpl(mt_kahypar_partitioned_hypergraph_t& hypergraph, - const vec&, - Metrics& best_metrics, - double) final ; + bool refineImpl(mt_kahypar_partitioned_hypergraph_t &hypergraph, + const vec &, Metrics &best_metrics, double) final; - void initializeImpl(mt_kahypar_partitioned_hypergraph_t&) final { } + void initializeImpl(mt_kahypar_partitioned_hypergraph_t &) final {} - bool refineAndOutputMovesImpl(mt_kahypar_partitioned_hypergraph_t&, - const vec&, - vec>&, - Metrics&, - const double) override final { + bool refineAndOutputMovesImpl(mt_kahypar_partitioned_hypergraph_t &, + const vec &, vec > &, Metrics &, + const double) override final + { ERR("simple rebalancer can not be used for unconstrained refinement"); } - bool refineAndOutputMovesLinearImpl(mt_kahypar_partitioned_hypergraph_t&, - const vec&, - vec&, - Metrics&, - const double) override final { + bool refineAndOutputMovesLinearImpl(mt_kahypar_partitioned_hypergraph_t &, + const vec &, vec &, Metrics &, + const double) override final + { ERR("simple rebalancer can not be used for unconstrained refinement"); } - vec repairEmptyBlocks(PartitionedHypergraph& phg); + vec repairEmptyBlocks(PartitionedHypergraph &phg); private: - - template - bool moveVertex(PartitionedHypergraph& phg, - const HypernodeID hn, - const Move& move, - const F& objective_delta) { + template + bool moveVertex(PartitionedHypergraph &phg, const HypernodeID hn, const Move &move, + const F &objective_delta) + { ASSERT(phg.partID(hn) == move.from); const PartitionID from = move.from; const PartitionID to = move.to; const HypernodeWeight node_weight = phg.nodeWeight(hn); - if ( from != to ) { + if(from != to) + { // Before moving, we ensure that the block we move the vertex to does // not become overloaded _part_weights[to] += node_weight; - if ( _part_weights[to] <= _context.partition.max_part_weights[to] ) { - if ( phg.changeNodePart(hn, from, to, objective_delta) ) { + if(_part_weights[to] <= _context.partition.max_part_weights[to]) + { + if(phg.changeNodePart(hn, from, to, objective_delta)) + { DBG << "Moved vertex" << hn << "from block" << from << "to block" << to << "with gain" << move.gain; _part_weights[from] -= node_weight; @@ -142,21 +145,22 @@ class SimpleRebalancer final : public IRebalancer { return false; } - - void resizeDataStructuresForCurrentK() { + void resizeDataStructuresForCurrentK() + { // If the number of blocks changes, we resize data structures // (can happen during deep multilevel partitioning) - if ( _current_k != _context.partition.k ) { + if(_current_k != _context.partition.k) + { _current_k = _context.partition.k; _gain.changeNumberOfBlocks(_current_k); _part_weights = parallel::scalable_vector(_context.partition.k); } } - const Context& _context; + const Context &_context; PartitionID _current_k; GainCalculator _gain; parallel::scalable_vector _part_weights; }; -} // namespace kahypar +} // namespace kahypar diff --git a/mt-kahypar/partition/registries/register_coarsening_algorithms.cpp b/mt-kahypar/partition/registries/register_coarsening_algorithms.cpp index 9f7e93f07..c9d9d860e 100644 --- a/mt-kahypar/partition/registries/register_coarsening_algorithms.cpp +++ b/mt-kahypar/partition/registries/register_coarsening_algorithms.cpp @@ -33,74 +33,67 @@ #ifdef KAHYPAR_ENABLE_HIGHEST_QUALITY_FEATURES #include "mt-kahypar/partition/coarsening/nlevel_coarsener.h" #endif -#include "mt-kahypar/partition/coarsening/multilevel_coarsener.h" #include "mt-kahypar/partition/coarsening/deterministic_multilevel_coarsener.h" +#include "mt-kahypar/partition/coarsening/multilevel_coarsener.h" #include "mt-kahypar/partition/coarsening/policies/rating_acceptance_policy.h" #include "mt-kahypar/partition/coarsening/policies/rating_heavy_node_penalty_policy.h" #include "mt-kahypar/partition/context.h" #include "mt-kahypar/partition/factories.h" - namespace mt_kahypar { -using MultilevelCoarsenerDispatcher = kahypar::meta::StaticMultiDispatchFactory >; +using MultilevelCoarsenerDispatcher = kahypar::meta::StaticMultiDispatchFactory< + MultilevelCoarsener, ICoarsener, + kahypar::meta::Typelist >; -using DeterministicCoarsenerDispatcher = kahypar::meta::StaticMultiDispatchFactory>; +using DeterministicCoarsenerDispatcher = + kahypar::meta::StaticMultiDispatchFactory >; #ifdef KAHYPAR_ENABLE_HIGHEST_QUALITY_FEATURES -using NLevelCoarsenerDispatcher = kahypar::meta::StaticMultiDispatchFactory >; +using NLevelCoarsenerDispatcher = kahypar::meta::StaticMultiDispatchFactory< + NLevelCoarsener, ICoarsener, + kahypar::meta::Typelist >; #endif +#define REGISTER_DISPATCHED_COARSENER(id, dispatcher, ...) \ + static kahypar::meta::Registrar register_##dispatcher( \ + id, [](mt_kahypar_hypergraph_t hypergraph, const Context &context, \ + uncoarsening_data_t *uncoarseningData) { \ + return dispatcher::create( \ + std::forward_as_tuple(hypergraph, context, uncoarseningData), __VA_ARGS__); \ + }) -#define REGISTER_DISPATCHED_COARSENER(id, dispatcher, ...) \ - static kahypar::meta::Registrar register_ ## dispatcher( \ - id, \ - [](mt_kahypar_hypergraph_t hypergraph, const Context& context, uncoarsening_data_t* uncoarseningData) { \ - return dispatcher::create( \ - std::forward_as_tuple(hypergraph, context, uncoarseningData), \ - __VA_ARGS__ \ - ); \ - }) - - -REGISTER_DISPATCHED_COARSENER(CoarseningAlgorithm::multilevel_coarsener, - MultilevelCoarsenerDispatcher, - kahypar::meta::PolicyRegistry::getInstance().getPolicy( - context.partition.partition_type), - kahypar::meta::PolicyRegistry::getInstance().getPolicy( - context.coarsening.rating.rating_function), - kahypar::meta::PolicyRegistry::getInstance().getPolicy( - context.coarsening.rating.heavy_node_penalty_policy), - kahypar::meta::PolicyRegistry::getInstance().getPolicy( - context.coarsening.rating.acceptance_policy)); +REGISTER_DISPATCHED_COARSENER( + CoarseningAlgorithm::multilevel_coarsener, MultilevelCoarsenerDispatcher, + kahypar::meta::PolicyRegistry::getInstance().getPolicy( + context.partition.partition_type), + kahypar::meta::PolicyRegistry::getInstance().getPolicy( + context.coarsening.rating.rating_function), + kahypar::meta::PolicyRegistry::getInstance().getPolicy( + context.coarsening.rating.heavy_node_penalty_policy), + kahypar::meta::PolicyRegistry::getInstance().getPolicy( + context.coarsening.rating.acceptance_policy)); #ifdef KAHYPAR_ENABLE_HIGHEST_QUALITY_FEATURES -REGISTER_DISPATCHED_COARSENER(CoarseningAlgorithm::nlevel_coarsener, - NLevelCoarsenerDispatcher, - kahypar::meta::PolicyRegistry::getInstance().getPolicy( - context.partition.partition_type), - kahypar::meta::PolicyRegistry::getInstance().getPolicy( - context.coarsening.rating.rating_function), - kahypar::meta::PolicyRegistry::getInstance().getPolicy( - context.coarsening.rating.heavy_node_penalty_policy), - kahypar::meta::PolicyRegistry::getInstance().getPolicy( - context.coarsening.rating.acceptance_policy)); +REGISTER_DISPATCHED_COARSENER( + CoarseningAlgorithm::nlevel_coarsener, NLevelCoarsenerDispatcher, + kahypar::meta::PolicyRegistry::getInstance().getPolicy( + context.partition.partition_type), + kahypar::meta::PolicyRegistry::getInstance().getPolicy( + context.coarsening.rating.rating_function), + kahypar::meta::PolicyRegistry::getInstance().getPolicy( + context.coarsening.rating.heavy_node_penalty_policy), + kahypar::meta::PolicyRegistry::getInstance().getPolicy( + context.coarsening.rating.acceptance_policy)); #endif -REGISTER_DISPATCHED_COARSENER(CoarseningAlgorithm::deterministic_multilevel_coarsener, - DeterministicCoarsenerDispatcher, - kahypar::meta::PolicyRegistry::getInstance().getPolicy( - context.partition.partition_type)); +REGISTER_DISPATCHED_COARSENER( + CoarseningAlgorithm::deterministic_multilevel_coarsener, + DeterministicCoarsenerDispatcher, + kahypar::meta::PolicyRegistry::getInstance().getPolicy( + context.partition.partition_type)); -} // namespace mt_kahypar +} // namespace mt_kahypar diff --git a/mt-kahypar/partition/registries/register_initial_partitioning_algorithms.cpp b/mt-kahypar/partition/registries/register_initial_partitioning_algorithms.cpp index 7b6eec76e..1ab056f49 100644 --- a/mt-kahypar/partition/registries/register_initial_partitioning_algorithms.cpp +++ b/mt-kahypar/partition/registries/register_initial_partitioning_algorithms.cpp @@ -25,119 +25,122 @@ * SOFTWARE. ******************************************************************************/ +#include "kahypar-resources/meta/registrar.h" #include "kahypar-resources/meta/static_multi_dispatch_factory.h" #include "kahypar-resources/meta/typelist.h" -#include "kahypar-resources/meta/registrar.h" +#include "mt-kahypar/definitions.h" #include "mt-kahypar/partition/context.h" #include "mt-kahypar/partition/factories.h" -#include "mt-kahypar/definitions.h" -#include "mt-kahypar/partition/initial_partitioning/i_initial_partitioner.h" -#include "mt-kahypar/partition/initial_partitioning/random_initial_partitioner.h" #include "mt-kahypar/partition/initial_partitioning/bfs_initial_partitioner.h" #include "mt-kahypar/partition/initial_partitioning/greedy_initial_partitioner.h" +#include "mt-kahypar/partition/initial_partitioning/i_initial_partitioner.h" #include "mt-kahypar/partition/initial_partitioning/label_propagation_initial_partitioner.h" #include "mt-kahypar/partition/initial_partitioning/policies/gain_computation_policy.h" #include "mt-kahypar/partition/initial_partitioning/policies/pq_selection_policy.h" +#include "mt-kahypar/partition/initial_partitioning/random_initial_partitioner.h" - -#define REGISTER_DISPATCHED_INITIAL_PARTITIONER(id, dispatcher, ...) \ - static kahypar::meta::Registrar register_ ## dispatcher( \ - id, \ - [](const InitialPartitioningAlgorithm algorithm, ip_data_container_t* ip_data, \ - const Context& context, const int seed, const int tag) { \ - return dispatcher::create( \ - std::forward_as_tuple(algorithm, ip_data, context, seed, tag), \ - __VA_ARGS__ \ - ); \ - }) +#define REGISTER_DISPATCHED_INITIAL_PARTITIONER(id, dispatcher, ...) \ + static kahypar::meta::Registrar register_##dispatcher( \ + id, [](const InitialPartitioningAlgorithm algorithm, ip_data_container_t *ip_data, \ + const Context &context, const int seed, const int tag) { \ + return dispatcher::create( \ + std::forward_as_tuple(algorithm, ip_data, context, seed, tag), __VA_ARGS__); \ + }) namespace mt_kahypar { -template -using GreedyRoundRobinFMInitialPartitioner = GreedyInitialPartitioner; -template -using GreedyGlobalFMInitialPartitioner = GreedyInitialPartitioner; -template -using GreedySequentialFMInitialPartitioner = GreedyInitialPartitioner; -template -using GreedyRoundRobinMaxNetInitialPartitioner = GreedyInitialPartitioner; -template -using GreedyGlobalMaxNetInitialPartitioner = GreedyInitialPartitioner; -template -using GreedySequentialMaxNetInitialPartitioner = GreedyInitialPartitioner; +template +using GreedyRoundRobinFMInitialPartitioner = + GreedyInitialPartitioner; +template +using GreedyGlobalFMInitialPartitioner = + GreedyInitialPartitioner; +template +using GreedySequentialFMInitialPartitioner = + GreedyInitialPartitioner; +template +using GreedyRoundRobinMaxNetInitialPartitioner = + GreedyInitialPartitioner; +template +using GreedyGlobalMaxNetInitialPartitioner = + GreedyInitialPartitioner; +template +using GreedySequentialMaxNetInitialPartitioner = + GreedyInitialPartitioner; -using RandomPartitionerDispatcher = kahypar::meta::StaticMultiDispatchFactory< - RandomInitialPartitioner, - IInitialPartitioner, - kahypar::meta::Typelist>; -using BFSPartitionerDispatcher = kahypar::meta::StaticMultiDispatchFactory< - BFSInitialPartitioner, - IInitialPartitioner, - kahypar::meta::Typelist>; -using LPPartitionerDispatcher = kahypar::meta::StaticMultiDispatchFactory< - LabelPropagationInitialPartitioner, - IInitialPartitioner, - kahypar::meta::Typelist>; -using GreedyRoundRobinFMDispatcher = kahypar::meta::StaticMultiDispatchFactory< - GreedyRoundRobinFMInitialPartitioner, - IInitialPartitioner, - kahypar::meta::Typelist>; -using GreedyGlobalFMDispatcher = kahypar::meta::StaticMultiDispatchFactory< - GreedyGlobalFMInitialPartitioner, - IInitialPartitioner, - kahypar::meta::Typelist>; -using GreedySequentialFMDispatcher = kahypar::meta::StaticMultiDispatchFactory< - GreedySequentialFMInitialPartitioner, - IInitialPartitioner, - kahypar::meta::Typelist>; -using GreedyRoundRobinMaxNetDispatcher = kahypar::meta::StaticMultiDispatchFactory< - GreedyRoundRobinMaxNetInitialPartitioner, - IInitialPartitioner, - kahypar::meta::Typelist>; -using GreedyGlobalMaxNetDispatcher = kahypar::meta::StaticMultiDispatchFactory< - GreedyGlobalMaxNetInitialPartitioner, - IInitialPartitioner, - kahypar::meta::Typelist>; -using GreedySequentialMaxNetDispatcher = kahypar::meta::StaticMultiDispatchFactory< - GreedySequentialMaxNetInitialPartitioner, - IInitialPartitioner, - kahypar::meta::Typelist>; +using RandomPartitionerDispatcher = + kahypar::meta::StaticMultiDispatchFactory >; +using BFSPartitionerDispatcher = + kahypar::meta::StaticMultiDispatchFactory >; +using LPPartitionerDispatcher = + kahypar::meta::StaticMultiDispatchFactory >; +using GreedyRoundRobinFMDispatcher = + kahypar::meta::StaticMultiDispatchFactory >; +using GreedyGlobalFMDispatcher = + kahypar::meta::StaticMultiDispatchFactory >; +using GreedySequentialFMDispatcher = + kahypar::meta::StaticMultiDispatchFactory >; +using GreedyRoundRobinMaxNetDispatcher = + kahypar::meta::StaticMultiDispatchFactory >; +using GreedyGlobalMaxNetDispatcher = + kahypar::meta::StaticMultiDispatchFactory >; +using GreedySequentialMaxNetDispatcher = + kahypar::meta::StaticMultiDispatchFactory >; -REGISTER_DISPATCHED_INITIAL_PARTITIONER(InitialPartitioningAlgorithm::random, - RandomPartitionerDispatcher, - kahypar::meta::PolicyRegistry::getInstance().getPolicy( - context.partition.partition_type)); -REGISTER_DISPATCHED_INITIAL_PARTITIONER(InitialPartitioningAlgorithm::bfs, - BFSPartitionerDispatcher, - kahypar::meta::PolicyRegistry::getInstance().getPolicy( - context.partition.partition_type)); -REGISTER_DISPATCHED_INITIAL_PARTITIONER(InitialPartitioningAlgorithm::label_propagation, - LPPartitionerDispatcher, - kahypar::meta::PolicyRegistry::getInstance().getPolicy( - context.partition.partition_type)); -REGISTER_DISPATCHED_INITIAL_PARTITIONER(InitialPartitioningAlgorithm::greedy_round_robin_fm, - GreedyRoundRobinFMDispatcher, - kahypar::meta::PolicyRegistry::getInstance().getPolicy( - context.partition.partition_type)); -REGISTER_DISPATCHED_INITIAL_PARTITIONER(InitialPartitioningAlgorithm::greedy_global_fm, - GreedyGlobalFMDispatcher, - kahypar::meta::PolicyRegistry::getInstance().getPolicy( - context.partition.partition_type)); -REGISTER_DISPATCHED_INITIAL_PARTITIONER(InitialPartitioningAlgorithm::greedy_sequential_fm, - GreedySequentialFMDispatcher, - kahypar::meta::PolicyRegistry::getInstance().getPolicy( - context.partition.partition_type)); -REGISTER_DISPATCHED_INITIAL_PARTITIONER(InitialPartitioningAlgorithm::greedy_round_robin_max_net, - GreedyRoundRobinMaxNetDispatcher, - kahypar::meta::PolicyRegistry::getInstance().getPolicy( - context.partition.partition_type)); -REGISTER_DISPATCHED_INITIAL_PARTITIONER(InitialPartitioningAlgorithm::greedy_global_max_net, - GreedyGlobalMaxNetDispatcher, - kahypar::meta::PolicyRegistry::getInstance().getPolicy( - context.partition.partition_type)); -REGISTER_DISPATCHED_INITIAL_PARTITIONER(InitialPartitioningAlgorithm::greedy_sequential_max_net, - GreedySequentialMaxNetDispatcher, - kahypar::meta::PolicyRegistry::getInstance().getPolicy( - context.partition.partition_type)); -} // namespace mt_kahypar +REGISTER_DISPATCHED_INITIAL_PARTITIONER( + InitialPartitioningAlgorithm::random, RandomPartitionerDispatcher, + kahypar::meta::PolicyRegistry::getInstance().getPolicy( + context.partition.partition_type)); +REGISTER_DISPATCHED_INITIAL_PARTITIONER( + InitialPartitioningAlgorithm::bfs, BFSPartitionerDispatcher, + kahypar::meta::PolicyRegistry::getInstance().getPolicy( + context.partition.partition_type)); +REGISTER_DISPATCHED_INITIAL_PARTITIONER( + InitialPartitioningAlgorithm::label_propagation, LPPartitionerDispatcher, + kahypar::meta::PolicyRegistry::getInstance().getPolicy( + context.partition.partition_type)); +REGISTER_DISPATCHED_INITIAL_PARTITIONER( + InitialPartitioningAlgorithm::greedy_round_robin_fm, GreedyRoundRobinFMDispatcher, + kahypar::meta::PolicyRegistry::getInstance().getPolicy( + context.partition.partition_type)); +REGISTER_DISPATCHED_INITIAL_PARTITIONER( + InitialPartitioningAlgorithm::greedy_global_fm, GreedyGlobalFMDispatcher, + kahypar::meta::PolicyRegistry::getInstance().getPolicy( + context.partition.partition_type)); +REGISTER_DISPATCHED_INITIAL_PARTITIONER( + InitialPartitioningAlgorithm::greedy_sequential_fm, GreedySequentialFMDispatcher, + kahypar::meta::PolicyRegistry::getInstance().getPolicy( + context.partition.partition_type)); +REGISTER_DISPATCHED_INITIAL_PARTITIONER( + InitialPartitioningAlgorithm::greedy_round_robin_max_net, + GreedyRoundRobinMaxNetDispatcher, + kahypar::meta::PolicyRegistry::getInstance().getPolicy( + context.partition.partition_type)); +REGISTER_DISPATCHED_INITIAL_PARTITIONER( + InitialPartitioningAlgorithm::greedy_global_max_net, GreedyGlobalMaxNetDispatcher, + kahypar::meta::PolicyRegistry::getInstance().getPolicy( + context.partition.partition_type)); +REGISTER_DISPATCHED_INITIAL_PARTITIONER( + InitialPartitioningAlgorithm::greedy_sequential_max_net, + GreedySequentialMaxNetDispatcher, + kahypar::meta::PolicyRegistry::getInstance().getPolicy( + context.partition.partition_type)); +} // namespace mt_kahypar diff --git a/mt-kahypar/partition/registries/register_memory_pool.cpp b/mt-kahypar/partition/registries/register_memory_pool.cpp index 52a480015..f05580d14 100644 --- a/mt-kahypar/partition/registries/register_memory_pool.cpp +++ b/mt-kahypar/partition/registries/register_memory_pool.cpp @@ -27,164 +27,227 @@ #include "register_memory_pool.h" -#include "mt-kahypar/definitions.h" -#include "mt-kahypar/datastructures/sparse_pin_counts.h" -#include "mt-kahypar/datastructures/pin_count_in_part.h" #include "mt-kahypar/datastructures/connectivity_set.h" -#include "mt-kahypar/parallel/memory_pool.h" +#include "mt-kahypar/datastructures/pin_count_in_part.h" +#include "mt-kahypar/datastructures/sparse_pin_counts.h" +#include "mt-kahypar/definitions.h" #include "mt-kahypar/parallel/atomic_wrapper.h" -#include "mt-kahypar/utils/utilities.h" +#include "mt-kahypar/parallel/memory_pool.h" #include "mt-kahypar/utils/cast.h" +#include "mt-kahypar/utils/utilities.h" namespace mt_kahypar { - namespace { - template - size_t size_of_edge_sync() { - const bool is_graph = Hypergraph::TYPE == STATIC_GRAPH || Hypergraph::TYPE == DYNAMIC_GRAPH; - if ( is_graph) { - return StaticPartitionedGraph::SIZE_OF_EDGE_LOCK; +namespace { +template +size_t size_of_edge_sync() +{ + const bool is_graph = + Hypergraph::TYPE == STATIC_GRAPH || Hypergraph::TYPE == DYNAMIC_GRAPH; + if(is_graph) + { + return StaticPartitionedGraph::SIZE_OF_EDGE_LOCK; + } + return 0; +} +} + +void register_memory_pool(const mt_kahypar_hypergraph_t hypergraph, + const Context &context) +{ + if(hypergraph.type == STATIC_GRAPH) + { + register_memory_pool(utils::cast_const(hypergraph), context); + } + else if(hypergraph.type == DYNAMIC_GRAPH) + { + register_memory_pool(utils::cast_const(hypergraph), context); + } + else if(hypergraph.type == STATIC_HYPERGRAPH) + { + register_memory_pool(utils::cast_const(hypergraph), context); + } + else if(hypergraph.type == DYNAMIC_HYPERGRAPH) + { + register_memory_pool(utils::cast_const(hypergraph), context); + } +} + +template +void register_memory_pool(const Hypergraph &hypergraph, const Context &context) +{ + + if(context.partition.mode == Mode::direct || + context.partition.mode == Mode::deep_multilevel) + { + + // ########## Preprocessing Memory ########## + + const HypernodeID num_hypernodes = hypergraph.initialNumNodes(); + const HyperedgeID num_hyperedges = hypergraph.initialNumEdges(); + const HypernodeID num_pins = hypergraph.initialNumPins(); + + auto &pool = parallel::MemoryPool::instance(); + + if(context.preprocessing.use_community_detection) + { + const bool is_graph = hypergraph.maxEdgeSize() == 2; + const size_t num_star_expansion_nodes = + num_hypernodes + (is_graph ? 0 : num_hyperedges); + const size_t num_star_expansion_edges = is_graph ? num_pins : (2UL * num_pins); + + pool.register_memory_group("Preprocessing", 1); + pool.register_memory_chunk("Preprocessing", "indices", num_star_expansion_nodes + 1, + sizeof(size_t)); + pool.register_memory_chunk("Preprocessing", "arcs", num_star_expansion_edges, + sizeof(Arc)); + pool.register_memory_chunk("Preprocessing", "node_volumes", + num_star_expansion_nodes, sizeof(ArcWeight)); + + if(!context.preprocessing.community_detection.low_memory_contraction) + { + pool.register_memory_chunk("Preprocessing", "tmp_indices", + num_star_expansion_nodes + 1, + sizeof(parallel::IntegralAtomicWrapper)); + pool.register_memory_chunk("Preprocessing", "tmp_pos", num_star_expansion_nodes, + sizeof(parallel::IntegralAtomicWrapper)); + pool.register_memory_chunk("Preprocessing", "tmp_arcs", num_star_expansion_edges, + sizeof(Arc)); + pool.register_memory_chunk("Preprocessing", "valid_arcs", + num_star_expansion_edges, sizeof(size_t)); + pool.register_memory_chunk("Preprocessing", "tmp_node_volumes", + num_star_expansion_nodes, + sizeof(parallel::AtomicWrapper)); } - return 0; } - } - void register_memory_pool(const mt_kahypar_hypergraph_t hypergraph, - const Context& context) { - if ( hypergraph.type == STATIC_GRAPH ) { - register_memory_pool(utils::cast_const(hypergraph), context); - } else if ( hypergraph.type == DYNAMIC_GRAPH ) { - register_memory_pool(utils::cast_const(hypergraph), context); - } else if ( hypergraph.type == STATIC_HYPERGRAPH ) { - register_memory_pool(utils::cast_const(hypergraph), context); - } else if ( hypergraph.type == DYNAMIC_HYPERGRAPH ) { - register_memory_pool(utils::cast_const(hypergraph), context); + // ########## Coarsening Memory ########## + + pool.register_memory_group("Coarsening", 2); + if(!context.isNLevelPartitioning()) + { + if(Hypergraph::is_graph) + { + pool.register_memory_chunk("Coarsening", "mapping", num_hypernodes, + sizeof(HypernodeID)); + pool.register_memory_chunk("Coarsening", "tmp_nodes", num_hypernodes, + Hypergraph::SIZE_OF_HYPERNODE); + pool.register_memory_chunk("Coarsening", "node_sizes", num_hypernodes, + sizeof(HyperedgeID)); + pool.register_memory_chunk("Coarsening", "tmp_num_incident_edges", num_hypernodes, + sizeof(parallel::IntegralAtomicWrapper)); + pool.register_memory_chunk( + "Coarsening", "node_weights", num_hypernodes, + sizeof(parallel::IntegralAtomicWrapper)); + pool.register_memory_chunk("Coarsening", "tmp_edges", num_hyperedges, + Hypergraph::SIZE_OF_HYPEREDGE); + pool.register_memory_chunk("Coarsening", "edge_id_mapping", num_hyperedges / 2, + sizeof(HyperedgeID)); + } + else + { + pool.register_memory_chunk("Coarsening", "mapping", num_hypernodes, + sizeof(size_t)); + pool.register_memory_chunk("Coarsening", "tmp_hypernodes", num_hypernodes, + Hypergraph::SIZE_OF_HYPERNODE); + pool.register_memory_chunk("Coarsening", "tmp_incident_nets", num_pins, + sizeof(HyperedgeID)); + pool.register_memory_chunk("Coarsening", "tmp_num_incident_nets", num_hypernodes, + sizeof(parallel::IntegralAtomicWrapper)); + pool.register_memory_chunk( + "Coarsening", "hn_weights", num_hypernodes, + sizeof(parallel::IntegralAtomicWrapper)); + pool.register_memory_chunk("Coarsening", "tmp_hyperedges", num_hyperedges, + Hypergraph::SIZE_OF_HYPEREDGE); + pool.register_memory_chunk("Coarsening", "tmp_incidence_array", num_pins, + sizeof(HypernodeID)); + pool.register_memory_chunk("Coarsening", "he_sizes", num_hyperedges, + sizeof(size_t)); + pool.register_memory_chunk("Coarsening", "valid_hyperedges", num_hyperedges, + sizeof(size_t)); + } } - } - template - void register_memory_pool(const Hypergraph& hypergraph, - const Context& context) { - - if (context.partition.mode == Mode::direct || - context.partition.mode == Mode::deep_multilevel ) { - - // ########## Preprocessing Memory ########## - - const HypernodeID num_hypernodes = hypergraph.initialNumNodes(); - const HyperedgeID num_hyperedges = hypergraph.initialNumEdges(); - const HypernodeID num_pins = hypergraph.initialNumPins(); - - auto& pool = parallel::MemoryPool::instance(); - - if ( context.preprocessing.use_community_detection ) { - const bool is_graph = hypergraph.maxEdgeSize() == 2; - const size_t num_star_expansion_nodes = num_hypernodes + (is_graph ? 0 : num_hyperedges); - const size_t num_star_expansion_edges = is_graph ? num_pins : (2UL * num_pins); - - pool.register_memory_group("Preprocessing", 1); - pool.register_memory_chunk("Preprocessing", "indices", num_star_expansion_nodes + 1, sizeof(size_t)); - pool.register_memory_chunk("Preprocessing", "arcs", num_star_expansion_edges, sizeof(Arc)); - pool.register_memory_chunk("Preprocessing", "node_volumes", num_star_expansion_nodes, sizeof(ArcWeight)); - - if ( !context.preprocessing.community_detection.low_memory_contraction ) { - pool.register_memory_chunk("Preprocessing", "tmp_indices", - num_star_expansion_nodes + 1, sizeof(parallel::IntegralAtomicWrapper)); - pool.register_memory_chunk("Preprocessing", "tmp_pos", - num_star_expansion_nodes, sizeof(parallel::IntegralAtomicWrapper)); - pool.register_memory_chunk("Preprocessing", "tmp_arcs", num_star_expansion_edges, sizeof(Arc)); - pool.register_memory_chunk("Preprocessing", "valid_arcs", num_star_expansion_edges, sizeof(size_t)); - pool.register_memory_chunk("Preprocessing", "tmp_node_volumes", - num_star_expansion_nodes, sizeof(parallel::AtomicWrapper)); - } + // ########## Refinement Memory ########## + + pool.register_memory_group("Refinement", 3); + pool.register_memory_chunk("Refinement", "part_ids", num_hypernodes, + sizeof(PartitionID)); + + if(Hypergraph::is_graph) + { + pool.register_memory_chunk("Refinement", "edge_sync", num_hyperedges, + size_of_edge_sync()); + pool.register_memory_chunk("Refinement", "edge_locks", num_hyperedges, + sizeof(SpinLock)); + if(context.refinement.fm.algorithm != FMAlgorithm::do_nothing) + { + pool.register_memory_chunk("Refinement", "incident_weight_in_part", + static_cast(num_hypernodes) * + (context.partition.k + 1), + sizeof(CAtomic)); } - - // ########## Coarsening Memory ########## - - pool.register_memory_group("Coarsening", 2); - if ( !context.isNLevelPartitioning() ) { - if (Hypergraph::is_graph) { - pool.register_memory_chunk("Coarsening", "mapping", num_hypernodes, sizeof(HypernodeID)); - pool.register_memory_chunk("Coarsening", "tmp_nodes", num_hypernodes, Hypergraph::SIZE_OF_HYPERNODE); - pool.register_memory_chunk("Coarsening", "node_sizes", num_hypernodes, sizeof(HyperedgeID)); - pool.register_memory_chunk("Coarsening", "tmp_num_incident_edges", - num_hypernodes, sizeof(parallel::IntegralAtomicWrapper)); - pool.register_memory_chunk("Coarsening", "node_weights", - num_hypernodes, sizeof(parallel::IntegralAtomicWrapper)); - pool.register_memory_chunk("Coarsening", "tmp_edges", num_hyperedges, Hypergraph::SIZE_OF_HYPEREDGE); - pool.register_memory_chunk("Coarsening", "edge_id_mapping", num_hyperedges / 2, sizeof(HyperedgeID)); - } else { - pool.register_memory_chunk("Coarsening", "mapping", num_hypernodes, sizeof(size_t)); - pool.register_memory_chunk("Coarsening", "tmp_hypernodes", num_hypernodes, Hypergraph::SIZE_OF_HYPERNODE); - pool.register_memory_chunk("Coarsening", "tmp_incident_nets", num_pins, sizeof(HyperedgeID)); - pool.register_memory_chunk("Coarsening", "tmp_num_incident_nets", - num_hypernodes, sizeof(parallel::IntegralAtomicWrapper)); - pool.register_memory_chunk("Coarsening", "hn_weights", - num_hypernodes, sizeof(parallel::IntegralAtomicWrapper)); - pool.register_memory_chunk("Coarsening", "tmp_hyperedges", num_hyperedges, Hypergraph::SIZE_OF_HYPEREDGE); - pool.register_memory_chunk("Coarsening", "tmp_incidence_array", num_pins, sizeof(HypernodeID)); - pool.register_memory_chunk("Coarsening", "he_sizes", num_hyperedges, sizeof(size_t)); - pool.register_memory_chunk("Coarsening", "valid_hyperedges", num_hyperedges, sizeof(size_t)); - } + } + else + { + const HypernodeID max_he_size = hypergraph.maxEdgeSize(); + if(context.partition.preset_type == PresetType::large_k) + { + pool.register_memory_chunk("Refinement", "pin_count_in_part", + ds::SparsePinCounts::num_elements( + num_hyperedges, context.partition.k, max_he_size), + sizeof(ds::SparsePinCounts::Value)); } - - // ########## Refinement Memory ########## - - pool.register_memory_group("Refinement", 3); - pool.register_memory_chunk("Refinement", "part_ids", num_hypernodes, sizeof(PartitionID)); - - if (Hypergraph::is_graph) { - pool.register_memory_chunk("Refinement", "edge_sync", num_hyperedges, size_of_edge_sync()); - pool.register_memory_chunk("Refinement", "edge_locks", num_hyperedges, sizeof(SpinLock)); - if ( context.refinement.fm.algorithm != FMAlgorithm::do_nothing ) { - pool.register_memory_chunk("Refinement", "incident_weight_in_part", - static_cast(num_hypernodes) * ( context.partition.k + 1 ), - sizeof(CAtomic)); - } - } else { - const HypernodeID max_he_size = hypergraph.maxEdgeSize(); - if ( context.partition.preset_type == PresetType::large_k ) { - pool.register_memory_chunk("Refinement", "pin_count_in_part", - ds::SparsePinCounts::num_elements(num_hyperedges, context.partition.k, max_he_size), - sizeof(ds::SparsePinCounts::Value)); - } else { - pool.register_memory_chunk("Refinement", "pin_count_in_part", - ds::PinCountInPart::num_elements(num_hyperedges, context.partition.k, max_he_size), - sizeof(ds::PinCountInPart::Value)); - pool.register_memory_chunk("Refinement", "connectivity_set", - ds::ConnectivitySets::num_elements(num_hyperedges, context.partition.k), - sizeof(ds::ConnectivitySets::UnsafeBlock)); + else + { + pool.register_memory_chunk("Refinement", "pin_count_in_part", + ds::PinCountInPart::num_elements( + num_hyperedges, context.partition.k, max_he_size), + sizeof(ds::PinCountInPart::Value)); + pool.register_memory_chunk( + "Refinement", "connectivity_set", + ds::ConnectivitySets::num_elements(num_hyperedges, context.partition.k), + sizeof(ds::ConnectivitySets::UnsafeBlock)); + } + if(context.refinement.fm.algorithm != FMAlgorithm::do_nothing) + { + if(context.partition.objective == Objective::steiner_tree && + !context.mapping.use_two_phase_approach) + { + pool.register_memory_chunk("Refinement", "gain_cache", + static_cast(num_hypernodes) * + (context.partition.k), + sizeof(CAtomic)); + pool.register_memory_chunk("Refinement", "num_incident_edges_of_block", + static_cast(num_hypernodes) * + context.partition.k, + sizeof(CAtomic)); } - if ( context.refinement.fm.algorithm != FMAlgorithm::do_nothing ) { - if ( context.partition.objective == Objective::steiner_tree && !context.mapping.use_two_phase_approach ) { - pool.register_memory_chunk("Refinement", "gain_cache", - static_cast(num_hypernodes) * ( context.partition.k ), - sizeof(CAtomic)); - pool.register_memory_chunk("Refinement", "num_incident_edges_of_block", - static_cast(num_hypernodes) * context.partition.k, - sizeof(CAtomic)); - } else { - pool.register_memory_chunk("Refinement", "gain_cache", - static_cast(num_hypernodes) * ( context.partition.k + 1 ), - sizeof(CAtomic)); - } + else + { + pool.register_memory_chunk("Refinement", "gain_cache", + static_cast(num_hypernodes) * + (context.partition.k + 1), + sizeof(CAtomic)); } - pool.register_memory_chunk("Refinement", "pin_count_update_ownership", - num_hyperedges, sizeof(SpinLock)); } - - // Allocate Memory - utils::Timer& timer = utils::Utilities::instance().getTimer(context.utility_id); - timer.start_timer("memory_pool_allocation", "Memory Pool Allocation"); - pool.allocate_memory_chunks(); - timer.stop_timer("memory_pool_allocation"); + pool.register_memory_chunk("Refinement", "pin_count_update_ownership", + num_hyperedges, sizeof(SpinLock)); } - } - namespace { - #define REGISTER_MEMORY_POOL(X) void register_memory_pool(const X& hypergraph, const Context& context) + // Allocate Memory + utils::Timer &timer = utils::Utilities::instance().getTimer(context.utility_id); + timer.start_timer("memory_pool_allocation", "Memory Pool Allocation"); + pool.allocate_memory_chunks(); + timer.stop_timer("memory_pool_allocation"); } +} + +namespace { +#define REGISTER_MEMORY_POOL(X) \ + void register_memory_pool(const X &hypergraph, const Context &context) +} - INSTANTIATE_FUNC_WITH_HYPERGRAPHS(REGISTER_MEMORY_POOL) +INSTANTIATE_FUNC_WITH_HYPERGRAPHS(REGISTER_MEMORY_POOL) } // namespace mt_kahypar diff --git a/mt-kahypar/partition/registries/register_memory_pool.h b/mt-kahypar/partition/registries/register_memory_pool.h index 348f3646f..6994ca832 100644 --- a/mt-kahypar/partition/registries/register_memory_pool.h +++ b/mt-kahypar/partition/registries/register_memory_pool.h @@ -31,9 +31,10 @@ namespace mt_kahypar { -void register_memory_pool(const mt_kahypar_hypergraph_t hypergraph, const Context& context); +void register_memory_pool(const mt_kahypar_hypergraph_t hypergraph, + const Context &context); -template -void register_memory_pool(const Hypergraph& hypergraph, const Context& context); +template +void register_memory_pool(const Hypergraph &hypergraph, const Context &context); } // namespace mt_kahypar \ No newline at end of file diff --git a/mt-kahypar/partition/registries/register_policies.cpp b/mt-kahypar/partition/registries/register_policies.cpp index f9ef01b69..fe435908c 100644 --- a/mt-kahypar/partition/registries/register_policies.cpp +++ b/mt-kahypar/partition/registries/register_policies.cpp @@ -32,12 +32,12 @@ #include "mt-kahypar/partition/coarsening/policies/rating_acceptance_policy.h" #include "mt-kahypar/partition/coarsening/policies/rating_heavy_node_penalty_policy.h" #include "mt-kahypar/partition/coarsening/policies/rating_score_policy.h" -#include "mt-kahypar/partition/refinement/gains/gain_definitions.h" #include "mt-kahypar/partition/context_enum_classes.h" +#include "mt-kahypar/partition/refinement/gains/gain_definitions.h" -#define REGISTER_POLICY(policy, id, policy_class) \ - static kahypar::meta::Registrar > register_ ## policy_class( \ - id, new policy_class()) +#define REGISTER_POLICY(policy, id, policy_class) \ + static kahypar::meta::Registrar > \ + register_##policy_class(id, new policy_class()) namespace mt_kahypar { // ////////////////////////////////////////////////////////////////////////////// @@ -65,11 +65,9 @@ REGISTER_POLICY(mt_kahypar_partition_type_t, N_LEVEL_HYPERGRAPH_PARTITIONING, // ////////////////////////////////////////////////////////////////////////////// // Coarsening / Rating Policies // ////////////////////////////////////////////////////////////////////////////// -REGISTER_POLICY(RatingFunction, RatingFunction::heavy_edge, - HeavyEdgeScore); +REGISTER_POLICY(RatingFunction, RatingFunction::heavy_edge, HeavyEdgeScore); #ifdef KAHYPAR_ENABLE_EXPERIMENTAL_FEATURES -REGISTER_POLICY(RatingFunction, RatingFunction::sameness, - SamenessScore); +REGISTER_POLICY(RatingFunction, RatingFunction::sameness, SamenessScore); #endif REGISTER_POLICY(HeavyNodePenaltyPolicy, HeavyNodePenaltyPolicy::no_penalty, @@ -84,8 +82,7 @@ REGISTER_POLICY(HeavyNodePenaltyPolicy, HeavyNodePenaltyPolicy::additive, REGISTER_POLICY(AcceptancePolicy, AcceptancePolicy::best_prefer_unmatched, BestRatingPreferringUnmatched); #ifdef KAHYPAR_ENABLE_EXPERIMENTAL_FEATURES -REGISTER_POLICY(AcceptancePolicy, AcceptancePolicy::best, - BestRatingWithTieBreaking); +REGISTER_POLICY(AcceptancePolicy, AcceptancePolicy::best, BestRatingWithTieBreaking); #endif // ////////////////////////////////////////////////////////////////////////////// @@ -102,8 +99,9 @@ REGISTER_POLICY(GainPolicy, GainPolicy::steiner_tree, SteinerTreeGainTypes); #ifdef KAHYPAR_ENABLE_GRAPH_PARTITIONING_FEATURES REGISTER_POLICY(GainPolicy, GainPolicy::cut_for_graphs, CutGainForGraphsTypes); #ifdef KAHYPAR_ENABLE_STEINER_TREE_METRIC -REGISTER_POLICY(GainPolicy, GainPolicy::steiner_tree_for_graphs, SteinerTreeForGraphsTypes); +REGISTER_POLICY(GainPolicy, GainPolicy::steiner_tree_for_graphs, + SteinerTreeForGraphsTypes); #endif #endif -} // namespace mt_kahypar +} // namespace mt_kahypar diff --git a/mt-kahypar/partition/registries/register_refinement_algorithms.cpp b/mt-kahypar/partition/registries/register_refinement_algorithms.cpp index 87eaa5e0e..ec5a10f6f 100644 --- a/mt-kahypar/partition/registries/register_refinement_algorithms.cpp +++ b/mt-kahypar/partition/registries/register_refinement_algorithms.cpp @@ -32,224 +32,213 @@ #include "mt-kahypar/definitions.h" #include "mt-kahypar/partition/context.h" #include "mt-kahypar/partition/factories.h" -#include "mt-kahypar/partition/refinement/do_nothing_refiner.h" -#include "mt-kahypar/partition/refinement/label_propagation/label_propagation_refiner.h" #include "mt-kahypar/partition/refinement/deterministic/deterministic_label_propagation.h" +#include "mt-kahypar/partition/refinement/do_nothing_refiner.h" +#include "mt-kahypar/partition/refinement/flows/do_nothing_refiner.h" +#include "mt-kahypar/partition/refinement/flows/flow_refiner.h" +#include "mt-kahypar/partition/refinement/flows/scheduler.h" #include "mt-kahypar/partition/refinement/fm/multitry_kway_fm.h" #include "mt-kahypar/partition/refinement/fm/strategies/gain_cache_strategy.h" #include "mt-kahypar/partition/refinement/fm/strategies/unconstrained_strategy.h" -#include "mt-kahypar/partition/refinement/flows/do_nothing_refiner.h" -#include "mt-kahypar/partition/refinement/flows/scheduler.h" -#include "mt-kahypar/partition/refinement/flows/flow_refiner.h" #include "mt-kahypar/partition/refinement/gains/gain_definitions.h" -#include "mt-kahypar/partition/refinement/rebalancing/simple_rebalancer.h" +#include "mt-kahypar/partition/refinement/label_propagation/label_propagation_refiner.h" #include "mt-kahypar/partition/refinement/rebalancing/advanced_rebalancer.h" - +#include "mt-kahypar/partition/refinement/rebalancing/simple_rebalancer.h" namespace mt_kahypar { using LabelPropagationDispatcher = kahypar::meta::StaticMultiDispatchFactory< - LabelPropagationRefiner, - IRefiner, - kahypar::meta::Typelist>; + LabelPropagationRefiner, IRefiner, kahypar::meta::Typelist >; using DeterministicLabelPropagationDispatcher = kahypar::meta::StaticMultiDispatchFactory< - DeterministicLabelPropagationRefiner, - IRefiner, - kahypar::meta::Typelist>; + DeterministicLabelPropagationRefiner, IRefiner, + kahypar::meta::Typelist >; using DefaultFMDispatcher = kahypar::meta::StaticMultiDispatchFactory< - MultiTryKWayFM, - IRefiner, - kahypar::meta::Typelist>; + MultiTryKWayFM, IRefiner, kahypar::meta::Typelist >; using UnconstrainedFMDispatcher = DefaultFMDispatcher; using GainCacheFMStrategyDispatcher = kahypar::meta::StaticMultiDispatchFactory< - GainCacheStrategy, - IFMStrategy, - kahypar::meta::Typelist>; + GainCacheStrategy, IFMStrategy, kahypar::meta::Typelist >; using UnconstrainedFMStrategyDispatcher = kahypar::meta::StaticMultiDispatchFactory< - UnconstrainedStrategy, - IFMStrategy, - kahypar::meta::Typelist>; + UnconstrainedStrategy, IFMStrategy, kahypar::meta::Typelist >; using FlowSchedulerDispatcher = kahypar::meta::StaticMultiDispatchFactory< - FlowRefinementScheduler, - IRefiner, - kahypar::meta::Typelist>; + FlowRefinementScheduler, IRefiner, kahypar::meta::Typelist >; using SimpleRebalancerDispatcher = kahypar::meta::StaticMultiDispatchFactory< - SimpleRebalancer, - IRebalancer, - kahypar::meta::Typelist>; + SimpleRebalancer, IRebalancer, kahypar::meta::Typelist >; using AdvancedRebalancerDispatcher = kahypar::meta::StaticMultiDispatchFactory< - AdvancedRebalancer, - IRebalancer, - kahypar::meta::Typelist>; + AdvancedRebalancer, IRebalancer, kahypar::meta::Typelist >; using FlowRefinementDispatcher = kahypar::meta::StaticMultiDispatchFactory< - FlowRefiner, - IFlowRefiner, - kahypar::meta::Typelist>; - - -#define REGISTER_DISPATCHED_LP_REFINER(id, dispatcher, ...) \ - static kahypar::meta::Registrar register_ ## dispatcher( \ - id, \ - [](const HypernodeID num_hypernodes, const HyperedgeID num_hyperedges, \ - const Context& context, gain_cache_t gain_cache, IRebalancer& rebalancer) { \ - return dispatcher::create( \ - std::forward_as_tuple(num_hypernodes, num_hyperedges, context, gain_cache, rebalancer), \ - __VA_ARGS__ \ - ); \ - }) - -#define REGISTER_LP_REFINER(id, refiner, t) \ - static kahypar::meta::Registrar JOIN(register_ ## refiner, t)( \ - id, \ - [](const HypernodeID num_hypernodes, const HyperedgeID num_hyperedges, \ - const Context& context, gain_cache_t gain_cache, IRebalancer& rebalancer) -> IRefiner* { \ - return new refiner(num_hypernodes, num_hyperedges, context, gain_cache, rebalancer); \ - }) - -#define REGISTER_DISPATCHED_FM_REFINER(id, dispatcher, ...) \ - static kahypar::meta::Registrar register_ ## dispatcher( \ - id, \ - [](const HypernodeID num_hypernodes, const HyperedgeID num_hyperedges, \ - const Context& context, gain_cache_t gain_cache, IRebalancer& rebalancer) { \ - return dispatcher::create( \ - std::forward_as_tuple(num_hypernodes, num_hyperedges, context, gain_cache, rebalancer), \ - __VA_ARGS__ \ - ); \ - }) - -#define REGISTER_FM_REFINER(id, refiner, t) \ - static kahypar::meta::Registrar JOIN(register_ ## refiner, t)( \ - id, \ - [](const HypernodeID num_hypernodes, const HyperedgeID num_hyperedges, \ - const Context& context, gain_cache_t gain_cache, IRebalancer& rebalancer) -> IRefiner* { \ - return new refiner(num_hypernodes, num_hyperedges, context, gain_cache, rebalancer); \ - }) - -#define REGISTER_DISPATCHED_FM_STRATEGY(id, dispatcher, ...) \ - static kahypar::meta::Registrar register_ ## dispatcher( \ - id, \ - [](const Context& context, FMSharedData& shared_data) { \ - return dispatcher::create( \ - std::forward_as_tuple(context, shared_data), \ - __VA_ARGS__ \ - ); \ - }) - -#define REGISTER_DISPATCHED_FLOW_SCHEDULER(id, dispatcher, ...) \ - static kahypar::meta::Registrar register_ ## dispatcher( \ - id, \ - [](const HypernodeID num_hypernodes, const HyperedgeID num_hyperedges, \ - const Context& context, gain_cache_t gain_cache) { \ - return dispatcher::create( \ - std::forward_as_tuple(num_hypernodes, num_hyperedges, context, gain_cache), \ - __VA_ARGS__ \ - ); \ - }) - -#define REGISTER_FLOW_SCHEDULER(id, refiner, t) \ - static kahypar::meta::Registrar JOIN(register_ ## refiner, t)( \ - id, \ - [](const HypernodeID num_hypernodes, const HyperedgeID num_hyperedges, \ - const Context& context, gain_cache_t gain_cache) -> IRefiner* { \ - return new refiner(num_hypernodes, num_hyperedges, context, gain_cache); \ - }) - -#define REGISTER_DISPATCHED_REBALANCER(id, dispatcher, ...) \ - static kahypar::meta::Registrar register_ ## dispatcher( \ - id, \ - [](HypernodeID num_hypernodes, const Context& context, gain_cache_t gain_cache) { \ - return dispatcher::create( \ - std::forward_as_tuple(num_hypernodes, context, gain_cache), \ - __VA_ARGS__ \ - ); \ - }) - -#define REGISTER_REBALANCER(id, refiner, t) \ - static kahypar::meta::Registrar JOIN(register_ ## refiner, t)( \ - id, \ - [](HypernodeID num_hypernodes, const Context& context, gain_cache_t gain_cache) -> IRebalancer* { \ - return new refiner(num_hypernodes, context, gain_cache); \ - }) - -#define REGISTER_DISPATCHED_FLOW_REFINER(id, dispatcher, ...) \ - static kahypar::meta::Registrar register_ ## dispatcher( \ - id, \ - [](const HyperedgeID num_hyperedges, const Context& context) { \ - return dispatcher::create( \ - std::forward_as_tuple(num_hyperedges, context), \ - __VA_ARGS__ \ - ); \ - }) - -#define REGISTER_FLOW_REFINER(id, refiner, t) \ - static kahypar::meta::Registrar JOIN(register_ ## refiner, t)( \ - id, \ - [](const HyperedgeID num_Hyperedges, const Context& context) -> IFlowRefiner* { \ - return new refiner(num_Hyperedges, context); \ - }) - - -kahypar::meta::PolicyBase& getGraphAndGainTypesPolicy(mt_kahypar_partition_type_t partition_type, GainPolicy gain_policy) { - switch ( partition_type ) { - case MULTILEVEL_HYPERGRAPH_PARTITIONING: SWITCH_HYPERGRAPH_GAIN_TYPES(StaticHypergraphTypeTraits, gain_policy); - case MULTILEVEL_GRAPH_PARTITIONING: SWITCH_GRAPH_GAIN_TYPES(StaticGraphTypeTraits, gain_policy); - case N_LEVEL_HYPERGRAPH_PARTITIONING: SWITCH_HYPERGRAPH_GAIN_TYPES(DynamicHypergraphTypeTraits, gain_policy); - case N_LEVEL_GRAPH_PARTITIONING: SWITCH_GRAPH_GAIN_TYPES(DynamicGraphTypeTraits, gain_policy); - case LARGE_K_PARTITIONING: SWITCH_HYPERGRAPH_GAIN_TYPES(LargeKHypergraphTypeTraits, gain_policy); - default: { - LOG << "Invalid partition type"; - std::exit(-1); - } + FlowRefiner, IFlowRefiner, kahypar::meta::Typelist >; + +#define REGISTER_DISPATCHED_LP_REFINER(id, dispatcher, ...) \ + static kahypar::meta::Registrar register_##dispatcher( \ + id, [](const HypernodeID num_hypernodes, const HyperedgeID num_hyperedges, \ + const Context &context, gain_cache_t gain_cache, IRebalancer &rebalancer) { \ + return dispatcher::create(std::forward_as_tuple(num_hypernodes, num_hyperedges, \ + context, gain_cache, \ + rebalancer), \ + __VA_ARGS__); \ + }) + +#define REGISTER_LP_REFINER(id, refiner, t) \ + static kahypar::meta::Registrar JOIN(register_##refiner, t)( \ + id, \ + [](const HypernodeID num_hypernodes, const HyperedgeID num_hyperedges, \ + const Context &context, gain_cache_t gain_cache, \ + IRebalancer &rebalancer) -> IRefiner * { \ + return new refiner(num_hypernodes, num_hyperedges, context, gain_cache, \ + rebalancer); \ + }) + +#define REGISTER_DISPATCHED_FM_REFINER(id, dispatcher, ...) \ + static kahypar::meta::Registrar register_##dispatcher( \ + id, [](const HypernodeID num_hypernodes, const HyperedgeID num_hyperedges, \ + const Context &context, gain_cache_t gain_cache, IRebalancer &rebalancer) { \ + return dispatcher::create(std::forward_as_tuple(num_hypernodes, num_hyperedges, \ + context, gain_cache, \ + rebalancer), \ + __VA_ARGS__); \ + }) + +#define REGISTER_FM_REFINER(id, refiner, t) \ + static kahypar::meta::Registrar JOIN(register_##refiner, t)( \ + id, \ + [](const HypernodeID num_hypernodes, const HyperedgeID num_hyperedges, \ + const Context &context, gain_cache_t gain_cache, \ + IRebalancer &rebalancer) -> IRefiner * { \ + return new refiner(num_hypernodes, num_hyperedges, context, gain_cache, \ + rebalancer); \ + }) + +#define REGISTER_DISPATCHED_FM_STRATEGY(id, dispatcher, ...) \ + static kahypar::meta::Registrar register_##dispatcher( \ + id, [](const Context &context, FMSharedData &shared_data) { \ + return dispatcher::create(std::forward_as_tuple(context, shared_data), \ + __VA_ARGS__); \ + }) + +#define REGISTER_DISPATCHED_FLOW_SCHEDULER(id, dispatcher, ...) \ + static kahypar::meta::Registrar register_##dispatcher( \ + id, [](const HypernodeID num_hypernodes, const HyperedgeID num_hyperedges, \ + const Context &context, gain_cache_t gain_cache) { \ + return dispatcher::create( \ + std::forward_as_tuple(num_hypernodes, num_hyperedges, context, gain_cache), \ + __VA_ARGS__); \ + }) + +#define REGISTER_FLOW_SCHEDULER(id, refiner, t) \ + static kahypar::meta::Registrar JOIN(register_##refiner, t)( \ + id, \ + [](const HypernodeID num_hypernodes, const HyperedgeID num_hyperedges, \ + const Context &context, gain_cache_t gain_cache) -> IRefiner * { \ + return new refiner(num_hypernodes, num_hyperedges, context, gain_cache); \ + }) + +#define REGISTER_DISPATCHED_REBALANCER(id, dispatcher, ...) \ + static kahypar::meta::Registrar register_##dispatcher( \ + id, \ + [](HypernodeID num_hypernodes, const Context &context, gain_cache_t gain_cache) { \ + return dispatcher::create( \ + std::forward_as_tuple(num_hypernodes, context, gain_cache), __VA_ARGS__); \ + }) + +#define REGISTER_REBALANCER(id, refiner, t) \ + static kahypar::meta::Registrar JOIN(register_##refiner, t)( \ + id, \ + [](HypernodeID num_hypernodes, const Context &context, gain_cache_t gain_cache) \ + -> IRebalancer * { return new refiner(num_hypernodes, context, gain_cache); }) + +#define REGISTER_DISPATCHED_FLOW_REFINER(id, dispatcher, ...) \ + static kahypar::meta::Registrar register_##dispatcher( \ + id, [](const HyperedgeID num_hyperedges, const Context &context) { \ + return dispatcher::create(std::forward_as_tuple(num_hyperedges, context), \ + __VA_ARGS__); \ + }) + +#define REGISTER_FLOW_REFINER(id, refiner, t) \ + static kahypar::meta::Registrar JOIN(register_##refiner, t)( \ + id, \ + [](const HyperedgeID num_Hyperedges, const Context &context) -> IFlowRefiner * { \ + return new refiner(num_Hyperedges, context); \ + }) + +kahypar::meta::PolicyBase & +getGraphAndGainTypesPolicy(mt_kahypar_partition_type_t partition_type, + GainPolicy gain_policy) +{ + switch(partition_type) + { + case MULTILEVEL_HYPERGRAPH_PARTITIONING: + SWITCH_HYPERGRAPH_GAIN_TYPES(StaticHypergraphTypeTraits, gain_policy); + case MULTILEVEL_GRAPH_PARTITIONING: + SWITCH_GRAPH_GAIN_TYPES(StaticGraphTypeTraits, gain_policy); + case N_LEVEL_HYPERGRAPH_PARTITIONING: + SWITCH_HYPERGRAPH_GAIN_TYPES(DynamicHypergraphTypeTraits, gain_policy); + case N_LEVEL_GRAPH_PARTITIONING: + SWITCH_GRAPH_GAIN_TYPES(DynamicGraphTypeTraits, gain_policy); + case LARGE_K_PARTITIONING: + SWITCH_HYPERGRAPH_GAIN_TYPES(LargeKHypergraphTypeTraits, gain_policy); + default: { + LOG << "Invalid partition type"; + std::exit(-1); + } } } - -REGISTER_DISPATCHED_LP_REFINER(LabelPropagationAlgorithm::label_propagation, - LabelPropagationDispatcher, - getGraphAndGainTypesPolicy(context.partition.partition_type, context.partition.gain_policy)); -REGISTER_DISPATCHED_LP_REFINER(LabelPropagationAlgorithm::deterministic, - DeterministicLabelPropagationDispatcher, - getGraphAndGainTypesPolicy(context.partition.partition_type, context.partition.gain_policy)); +REGISTER_DISPATCHED_LP_REFINER( + LabelPropagationAlgorithm::label_propagation, LabelPropagationDispatcher, + getGraphAndGainTypesPolicy(context.partition.partition_type, + context.partition.gain_policy)); +REGISTER_DISPATCHED_LP_REFINER( + LabelPropagationAlgorithm::deterministic, DeterministicLabelPropagationDispatcher, + getGraphAndGainTypesPolicy(context.partition.partition_type, + context.partition.gain_policy)); REGISTER_LP_REFINER(LabelPropagationAlgorithm::do_nothing, DoNothingRefiner, 1); -REGISTER_DISPATCHED_FM_REFINER(FMAlgorithm::kway_fm, - DefaultFMDispatcher, - getGraphAndGainTypesPolicy(context.partition.partition_type, context.partition.gain_policy)); -REGISTER_DISPATCHED_FM_REFINER(FMAlgorithm::unconstrained_fm, - UnconstrainedFMDispatcher, - getGraphAndGainTypesPolicy(context.partition.partition_type, context.partition.gain_policy)); +REGISTER_DISPATCHED_FM_REFINER( + FMAlgorithm::kway_fm, DefaultFMDispatcher, + getGraphAndGainTypesPolicy(context.partition.partition_type, + context.partition.gain_policy)); +REGISTER_DISPATCHED_FM_REFINER( + FMAlgorithm::unconstrained_fm, UnconstrainedFMDispatcher, + getGraphAndGainTypesPolicy(context.partition.partition_type, + context.partition.gain_policy)); REGISTER_FM_REFINER(FMAlgorithm::do_nothing, DoNothingRefiner, 3); -REGISTER_DISPATCHED_FM_STRATEGY(FMAlgorithm::kway_fm, - GainCacheFMStrategyDispatcher, - getGraphAndGainTypesPolicy(context.partition.partition_type, context.partition.gain_policy)); -REGISTER_DISPATCHED_FM_STRATEGY(FMAlgorithm::unconstrained_fm, - UnconstrainedFMStrategyDispatcher, - getGraphAndGainTypesPolicy(context.partition.partition_type, context.partition.gain_policy)); - -REGISTER_DISPATCHED_FLOW_SCHEDULER(FlowAlgorithm::flow_cutter, - FlowSchedulerDispatcher, - getGraphAndGainTypesPolicy(context.partition.partition_type, context.partition.gain_policy)); +REGISTER_DISPATCHED_FM_STRATEGY( + FMAlgorithm::kway_fm, GainCacheFMStrategyDispatcher, + getGraphAndGainTypesPolicy(context.partition.partition_type, + context.partition.gain_policy)); +REGISTER_DISPATCHED_FM_STRATEGY( + FMAlgorithm::unconstrained_fm, UnconstrainedFMStrategyDispatcher, + getGraphAndGainTypesPolicy(context.partition.partition_type, + context.partition.gain_policy)); + +REGISTER_DISPATCHED_FLOW_SCHEDULER( + FlowAlgorithm::flow_cutter, FlowSchedulerDispatcher, + getGraphAndGainTypesPolicy(context.partition.partition_type, + context.partition.gain_policy)); REGISTER_FLOW_SCHEDULER(FlowAlgorithm::do_nothing, DoNothingRefiner, 4); -REGISTER_DISPATCHED_REBALANCER(RebalancingAlgorithm::simple_rebalancer, - SimpleRebalancerDispatcher, - getGraphAndGainTypesPolicy(context.partition.partition_type, context.partition.gain_policy)); -REGISTER_DISPATCHED_REBALANCER(RebalancingAlgorithm::advanced_rebalancer, - AdvancedRebalancerDispatcher, - getGraphAndGainTypesPolicy(context.partition.partition_type, context.partition.gain_policy)); +REGISTER_DISPATCHED_REBALANCER( + RebalancingAlgorithm::simple_rebalancer, SimpleRebalancerDispatcher, + getGraphAndGainTypesPolicy(context.partition.partition_type, + context.partition.gain_policy)); +REGISTER_DISPATCHED_REBALANCER( + RebalancingAlgorithm::advanced_rebalancer, AdvancedRebalancerDispatcher, + getGraphAndGainTypesPolicy(context.partition.partition_type, + context.partition.gain_policy)); REGISTER_REBALANCER(RebalancingAlgorithm::do_nothing, DoNothingRefiner, 5); -REGISTER_DISPATCHED_FLOW_REFINER(FlowAlgorithm::flow_cutter, - FlowRefinementDispatcher, - getGraphAndGainTypesPolicy(context.partition.partition_type, context.partition.gain_policy)); +REGISTER_DISPATCHED_FLOW_REFINER( + FlowAlgorithm::flow_cutter, FlowRefinementDispatcher, + getGraphAndGainTypesPolicy(context.partition.partition_type, + context.partition.gain_policy)); REGISTER_FLOW_REFINER(FlowAlgorithm::do_nothing, DoNothingFlowRefiner, 6); -} // namespace mt_kahypar +} // namespace mt_kahypar diff --git a/mt-kahypar/utils/bit_ops.h b/mt-kahypar/utils/bit_ops.h index 09da04b3d..7b0d62362 100644 --- a/mt-kahypar/utils/bit_ops.h +++ b/mt-kahypar/utils/bit_ops.h @@ -29,17 +29,14 @@ namespace mt_kahypar::utils { -inline int popcount_64(const uint64_t x) { +inline int popcount_64(const uint64_t x) +{ // this should be GCC specific return __builtin_popcountll(x); } -inline int lowest_set_bit_64(const uint64_t x) { - return __builtin_ctzll(x); -} +inline int lowest_set_bit_64(const uint64_t x) { return __builtin_ctzll(x); } -constexpr int log2(const int x) { - return x <= 1 ? 0 : 1 + log2(x >> 1); -} +constexpr int log2(const int x) { return x <= 1 ? 0 : 1 + log2(x >> 1); } -} // namespace +} // namespace diff --git a/mt-kahypar/utils/cast.h b/mt-kahypar/utils/cast.h index 9c2d2b810..af170c5fd 100644 --- a/mt-kahypar/utils/cast.h +++ b/mt-kahypar/utils/cast.h @@ -37,122 +37,166 @@ namespace mt_kahypar::utils { namespace { -std::string typeToString(const mt_kahypar_hypergraph_type_t type) { - switch ( type ) { - case STATIC_GRAPH: return "STATIC_GRAPH"; - case DYNAMIC_GRAPH: return "DYNAMIC_GRAPH"; - case STATIC_HYPERGRAPH: return "STATIC_HYPERGRAPH"; - case DYNAMIC_HYPERGRAPH: return "DYNAMIC_HYPERGRAPH"; - case NULLPTR_HYPERGRAPH: return "NULLPTR_HYPERGRAPH"; +std::string typeToString(const mt_kahypar_hypergraph_type_t type) +{ + switch(type) + { + case STATIC_GRAPH: + return "STATIC_GRAPH"; + case DYNAMIC_GRAPH: + return "DYNAMIC_GRAPH"; + case STATIC_HYPERGRAPH: + return "STATIC_HYPERGRAPH"; + case DYNAMIC_HYPERGRAPH: + return "DYNAMIC_HYPERGRAPH"; + case NULLPTR_HYPERGRAPH: + return "NULLPTR_HYPERGRAPH"; } return "UNDEFINED"; } -std::string typeToString(const mt_kahypar_partition_type_t type) { - switch ( type ) { - case MULTILEVEL_GRAPH_PARTITIONING: return "MULTILEVEL_GRAPH_PARTITIONING"; - case N_LEVEL_GRAPH_PARTITIONING: return "N_LEVEL_GRAPH_PARTITIONING"; - case MULTILEVEL_HYPERGRAPH_PARTITIONING: return "MULTILEVEL_HYPERGRAPH_PARTITIONING"; - case LARGE_K_PARTITIONING: return "LARGE_K_PARTITIONING"; - case N_LEVEL_HYPERGRAPH_PARTITIONING: return "N_LEVEL_HYPERGRAPH_PARTITIONING"; - case NULLPTR_PARTITION: return "NULLPTR_PARTITION"; +std::string typeToString(const mt_kahypar_partition_type_t type) +{ + switch(type) + { + case MULTILEVEL_GRAPH_PARTITIONING: + return "MULTILEVEL_GRAPH_PARTITIONING"; + case N_LEVEL_GRAPH_PARTITIONING: + return "N_LEVEL_GRAPH_PARTITIONING"; + case MULTILEVEL_HYPERGRAPH_PARTITIONING: + return "MULTILEVEL_HYPERGRAPH_PARTITIONING"; + case LARGE_K_PARTITIONING: + return "LARGE_K_PARTITIONING"; + case N_LEVEL_HYPERGRAPH_PARTITIONING: + return "N_LEVEL_HYPERGRAPH_PARTITIONING"; + case NULLPTR_PARTITION: + return "NULLPTR_PARTITION"; } return "UNDEFINED"; } -template -std::string error_msg(mt_kahypar_hypergraph_t hypergraph) { +template +std::string error_msg(mt_kahypar_hypergraph_t hypergraph) +{ std::stringstream ss; - ss << "Cannot cast" << typeToString(hypergraph.type) << "to" << typeToString(Hypergraph::TYPE); + ss << "Cannot cast" << typeToString(hypergraph.type) << "to" + << typeToString(Hypergraph::TYPE); return ss.str(); } -template -std::string error_msg(mt_kahypar_partitioned_hypergraph_t partitioned_hg) { +template +std::string error_msg(mt_kahypar_partitioned_hypergraph_t partitioned_hg) +{ std::stringstream ss; - ss << "Cannot cast" << typeToString(partitioned_hg.type) << "to" << typeToString(PartitionedHypergraph::TYPE); + ss << "Cannot cast" << typeToString(partitioned_hg.type) << "to" + << typeToString(PartitionedHypergraph::TYPE); return ss.str(); } -template -std::string error_msg(mt_kahypar_partitioned_hypergraph_const_t partitioned_hg) { +template +std::string error_msg(mt_kahypar_partitioned_hypergraph_const_t partitioned_hg) +{ std::stringstream ss; - ss << "Cannot cast" << typeToString(partitioned_hg.type) << "to" << typeToString(PartitionedHypergraph::TYPE); + ss << "Cannot cast" << typeToString(partitioned_hg.type) << "to" + << typeToString(PartitionedHypergraph::TYPE); return ss.str(); } } // namespace -template -Hypergraph& cast(mt_kahypar_hypergraph_t hypergraph) { - if ( Hypergraph::TYPE != hypergraph.type ) { +template +Hypergraph &cast(mt_kahypar_hypergraph_t hypergraph) +{ + if(Hypergraph::TYPE != hypergraph.type) + { throw InvalidInputException(error_msg(hypergraph)); } - return *reinterpret_cast(hypergraph.hypergraph); + return *reinterpret_cast(hypergraph.hypergraph); } -template -const Hypergraph& cast_const(const mt_kahypar_hypergraph_t hypergraph) { - if ( Hypergraph::TYPE != hypergraph.type ) { +template +const Hypergraph &cast_const(const mt_kahypar_hypergraph_t hypergraph) +{ + if(Hypergraph::TYPE != hypergraph.type) + { throw InvalidInputException(error_msg(hypergraph)); } - return *reinterpret_cast(hypergraph.hypergraph); + return *reinterpret_cast(hypergraph.hypergraph); } -template -const Hypergraph& cast_const(mt_kahypar_hypergraph_const_t hypergraph) { - if ( Hypergraph::TYPE != hypergraph.type ) { +template +const Hypergraph &cast_const(mt_kahypar_hypergraph_const_t hypergraph) +{ + if(Hypergraph::TYPE != hypergraph.type) + { throw InvalidInputException(error_msg(hypergraph)); } - return *reinterpret_cast(hypergraph.hypergraph); + return *reinterpret_cast(hypergraph.hypergraph); } -template -mt_kahypar_hypergraph_t hypergraph_cast(Hypergraph& hypergraph) { - return mt_kahypar_hypergraph_t { - reinterpret_cast(&hypergraph), Hypergraph::TYPE }; +template +mt_kahypar_hypergraph_t hypergraph_cast(Hypergraph &hypergraph) +{ + return mt_kahypar_hypergraph_t{ + reinterpret_cast(&hypergraph), Hypergraph::TYPE + }; } -template - mt_kahypar_hypergraph_const_t hypergraph_const_cast(const Hypergraph& hypergraph) { - return mt_kahypar_hypergraph_const_t { - reinterpret_cast(&hypergraph), Hypergraph::TYPE }; +template +mt_kahypar_hypergraph_const_t hypergraph_const_cast(const Hypergraph &hypergraph) +{ + return mt_kahypar_hypergraph_const_t{ + reinterpret_cast(&hypergraph), Hypergraph::TYPE + }; } -template -PartitionedHypergraph& cast(mt_kahypar_partitioned_hypergraph_t phg) { - if ( PartitionedHypergraph::TYPE != phg.type ) { +template +PartitionedHypergraph &cast(mt_kahypar_partitioned_hypergraph_t phg) +{ + if(PartitionedHypergraph::TYPE != phg.type) + { throw InvalidInputException(error_msg(phg)); } - return *reinterpret_cast(phg.partitioned_hg); + return *reinterpret_cast(phg.partitioned_hg); } -template -const PartitionedHypergraph& cast_const(const mt_kahypar_partitioned_hypergraph_t phg) { - if ( PartitionedHypergraph::TYPE != phg.type ) { +template +const PartitionedHypergraph &cast_const(const mt_kahypar_partitioned_hypergraph_t phg) +{ + if(PartitionedHypergraph::TYPE != phg.type) + { throw InvalidInputException(error_msg(phg)); } - return *reinterpret_cast(phg.partitioned_hg); + return *reinterpret_cast(phg.partitioned_hg); } -template -const PartitionedHypergraph& cast_const(mt_kahypar_partitioned_hypergraph_const_t phg) { - if ( PartitionedHypergraph::TYPE != phg.type ) { +template +const PartitionedHypergraph &cast_const(mt_kahypar_partitioned_hypergraph_const_t phg) +{ + if(PartitionedHypergraph::TYPE != phg.type) + { throw InvalidInputException(error_msg(phg)); } - return *reinterpret_cast(phg.partitioned_hg); + return *reinterpret_cast(phg.partitioned_hg); } -template -mt_kahypar_partitioned_hypergraph_t partitioned_hg_cast(PartitionedHypergraph& phg) { - return mt_kahypar_partitioned_hypergraph_t { - reinterpret_cast(&phg), PartitionedHypergraph::TYPE }; +template +mt_kahypar_partitioned_hypergraph_t partitioned_hg_cast(PartitionedHypergraph &phg) +{ + return mt_kahypar_partitioned_hypergraph_t{ + reinterpret_cast(&phg), + PartitionedHypergraph::TYPE + }; } -template -mt_kahypar_partitioned_hypergraph_const_t partitioned_hg_const_cast(const PartitionedHypergraph& phg) { - return mt_kahypar_partitioned_hypergraph_const_t { - reinterpret_cast(&phg), PartitionedHypergraph::TYPE }; +template +mt_kahypar_partitioned_hypergraph_const_t +partitioned_hg_const_cast(const PartitionedHypergraph &phg) +{ + return mt_kahypar_partitioned_hypergraph_const_t{ + reinterpret_cast(&phg), + PartitionedHypergraph::TYPE + }; } -} // namespace mt_kahypar \ No newline at end of file +} // namespace mt_kahypar \ No newline at end of file diff --git a/mt-kahypar/utils/delete.h b/mt-kahypar/utils/delete.h index f17233107..85d8651ac 100644 --- a/mt-kahypar/utils/delete.h +++ b/mt-kahypar/utils/delete.h @@ -35,30 +35,55 @@ namespace mt_kahypar::utils { -void delete_hypergraph(mt_kahypar_hypergraph_t hg) { - if ( hg.hypergraph ) { - switch ( hg.type ) { - case STATIC_GRAPH: delete reinterpret_cast(hg.hypergraph); break; - case DYNAMIC_GRAPH: delete reinterpret_cast(hg.hypergraph); break; - case STATIC_HYPERGRAPH: delete reinterpret_cast(hg.hypergraph); break; - case DYNAMIC_HYPERGRAPH: delete reinterpret_cast(hg.hypergraph); break; - case NULLPTR_HYPERGRAPH: break; +void delete_hypergraph(mt_kahypar_hypergraph_t hg) +{ + if(hg.hypergraph) + { + switch(hg.type) + { + case STATIC_GRAPH: + delete reinterpret_cast(hg.hypergraph); + break; + case DYNAMIC_GRAPH: + delete reinterpret_cast(hg.hypergraph); + break; + case STATIC_HYPERGRAPH: + delete reinterpret_cast(hg.hypergraph); + break; + case DYNAMIC_HYPERGRAPH: + delete reinterpret_cast(hg.hypergraph); + break; + case NULLPTR_HYPERGRAPH: + break; } } } -void delete_partitioned_hypergraph(mt_kahypar_partitioned_hypergraph_t phg) { - if ( phg.partitioned_hg ) { - switch ( phg.type ) { - case MULTILEVEL_GRAPH_PARTITIONING: delete reinterpret_cast(phg.partitioned_hg); break; - case N_LEVEL_GRAPH_PARTITIONING: delete reinterpret_cast(phg.partitioned_hg); break; - case MULTILEVEL_HYPERGRAPH_PARTITIONING: delete reinterpret_cast(phg.partitioned_hg); break; - case LARGE_K_PARTITIONING: delete reinterpret_cast(phg.partitioned_hg); break; - case N_LEVEL_HYPERGRAPH_PARTITIONING: delete reinterpret_cast(phg.partitioned_hg); break; - case NULLPTR_PARTITION: break; +void delete_partitioned_hypergraph(mt_kahypar_partitioned_hypergraph_t phg) +{ + if(phg.partitioned_hg) + { + switch(phg.type) + { + case MULTILEVEL_GRAPH_PARTITIONING: + delete reinterpret_cast(phg.partitioned_hg); + break; + case N_LEVEL_GRAPH_PARTITIONING: + delete reinterpret_cast(phg.partitioned_hg); + break; + case MULTILEVEL_HYPERGRAPH_PARTITIONING: + delete reinterpret_cast(phg.partitioned_hg); + break; + case LARGE_K_PARTITIONING: + delete reinterpret_cast(phg.partitioned_hg); + break; + case N_LEVEL_HYPERGRAPH_PARTITIONING: + delete reinterpret_cast(phg.partitioned_hg); + break; + case NULLPTR_PARTITION: + break; } } } - -} // namespace mt_kahypar \ No newline at end of file +} // namespace mt_kahypar \ No newline at end of file diff --git a/mt-kahypar/utils/exception.h b/mt-kahypar/utils/exception.h index 781d03a09..5f60f2d8e 100644 --- a/mt-kahypar/utils/exception.h +++ b/mt-kahypar/utils/exception.h @@ -27,74 +27,74 @@ #pragma once #include -#include #include +#include #include "mt-kahypar/macros.h" namespace mt_kahypar { -template -class MtKaHyParException : public std::exception { +template +class MtKaHyParException : public std::exception +{ - public: - MtKaHyParException(const std::string& what) : - _what("") { +public: + MtKaHyParException(const std::string &what) : _what("") + { std::stringstream ss; ss << RED << "[" << Derived::TYPE << "] " << END << " " << what; _what = ss.str(); } - const char * what () const throw () { - return _what.c_str(); - } + const char *what() const throw() { return _what.c_str(); } - private: +private: std::string _what; }; -class InvalidInputException : public MtKaHyParException { +class InvalidInputException : public MtKaHyParException +{ using Base = MtKaHyParException; - public: +public: static constexpr char TYPE[] = "Invalid Input"; - InvalidInputException(const std::string& what) : - Base(what) { } + InvalidInputException(const std::string &what) : Base(what) {} }; -class InvalidParameterException : public MtKaHyParException { +class InvalidParameterException : public MtKaHyParException +{ using Base = MtKaHyParException; - public: +public: static constexpr char TYPE[] = "Invalid Parameter"; - InvalidParameterException(const std::string& what) : - Base(what) { } + InvalidParameterException(const std::string &what) : Base(what) {} }; -class NonSupportedOperationException : public MtKaHyParException { +class NonSupportedOperationException + : public MtKaHyParException +{ using Base = MtKaHyParException; - public: +public: static constexpr char TYPE[] = "Non Supported Operation"; - NonSupportedOperationException(const std::string& what) : - Base(what) { } + NonSupportedOperationException(const std::string &what) : Base(what) {} }; -class SystemException : public MtKaHyParException { +class SystemException : public MtKaHyParException +{ using Base = MtKaHyParException; - public: +public: static constexpr char TYPE[] = "System Error"; - SystemException(const std::string& what) : - Base(what) { } + SystemException(const std::string &what) : Base(what) {} }; -} // namespace mt_kahypar \ No newline at end of file +} // namespace mt_kahypar \ No newline at end of file diff --git a/mt-kahypar/utils/floating_point_comparisons.h b/mt-kahypar/utils/floating_point_comparisons.h index 8369d3dbb..23282314e 100644 --- a/mt-kahypar/utils/floating_point_comparisons.h +++ b/mt-kahypar/utils/floating_point_comparisons.h @@ -29,11 +29,14 @@ #include namespace mt_kahypar::math { - inline bool are_almost_equal_d(const double x, const double y, const double tolerance) { - return std::abs(x - y) < tolerance; - } +inline bool are_almost_equal_d(const double x, const double y, const double tolerance) +{ + return std::abs(x - y) < tolerance; +} - inline bool are_almost_equal_ld(const long double x, const long double y, const long double tolerance) { - return std::abs(x - y) < tolerance; - } +inline bool are_almost_equal_ld(const long double x, const long double y, + const long double tolerance) +{ + return std::abs(x - y) < tolerance; +} } \ No newline at end of file diff --git a/mt-kahypar/utils/hash.h b/mt-kahypar/utils/hash.h index e484bcb78..68aac8b1d 100644 --- a/mt-kahypar/utils/hash.h +++ b/mt-kahypar/utils/hash.h @@ -31,14 +31,14 @@ #include #include - namespace mt_kahypar::hashing { namespace integer { // from parlay -inline uint32_t hash32(uint32_t a) { +inline uint32_t hash32(uint32_t a) +{ a = (a + 0x7ed55d16) + (a << 12); a = (a ^ 0xc761c23c) ^ (a >> 19); a = (a + 0x165667b1) + (a << 5); @@ -48,23 +48,26 @@ inline uint32_t hash32(uint32_t a) { return a; } -inline uint32_t hash32_2(uint32_t a) { +inline uint32_t hash32_2(uint32_t a) +{ uint32_t z = (a + 0x6D2B79F5UL); z = (z ^ (z >> 15)) * (z | UL(1)); z ^= z + (z ^ (z >> 7)) * (z | UL(61)); return z ^ (z >> 14); } -inline uint32_t hash32_3(uint32_t a) { +inline uint32_t hash32_3(uint32_t a) +{ uint32_t z = a + 0x9e3779b9; z ^= z >> 15; z *= 0x85ebca6b; z ^= z >> 13; - z *= 0xc2b2ae3d; // 0xc2b2ae35 for murmur3 + z *= 0xc2b2ae3d; // 0xc2b2ae35 for murmur3 return z ^= z >> 16; } -inline uint64_t hash64(uint64_t u) { +inline uint64_t hash64(uint64_t u) +{ uint64_t v = u * 3935559000370003845ul + UL(2691343689449507681); v ^= v >> 21; v ^= v << 37; @@ -76,7 +79,8 @@ inline uint64_t hash64(uint64_t u) { return v; } -inline uint64_t hash64_2(uint64_t x) { +inline uint64_t hash64_2(uint64_t x) +{ x = (x ^ (x >> 30)) * UINT64_C(0xbf58476d1ce4e5b9); x = (x ^ (x >> 27)) * UINT64_C(0x94d049bb133111eb); x = x ^ (x >> 31); @@ -84,11 +88,13 @@ inline uint64_t hash64_2(uint64_t x) { } // from boost::hash_combine -inline uint32_t combine32(uint32_t left, uint32_t hashed_right) { +inline uint32_t combine32(uint32_t left, uint32_t hashed_right) +{ return left ^ (hashed_right + 0x9e3779b9 + (left << 6) + (left >> 2)); } -inline uint32_t combine32_2(uint32_t left, uint32_t hashed_right) { +inline uint32_t combine32_2(uint32_t left, uint32_t hashed_right) +{ constexpr uint32_t c1 = 0xcc9e2d51; constexpr uint32_t c2 = 0x1b873593; constexpr auto rotate_left = [](uint32_t x, uint32_t r) -> uint32_t { @@ -96,50 +102,70 @@ inline uint32_t combine32_2(uint32_t left, uint32_t hashed_right) { }; hashed_right *= c1; - hashed_right = rotate_left(hashed_right,15); + hashed_right = rotate_left(hashed_right, 15); hashed_right *= c2; left ^= hashed_right; - left = rotate_left(left,13); + left = rotate_left(left, 13); left = left * 5 + 0xe6546b64; return left; } -inline uint64_t combine64(uint64_t left, uint64_t hashed_right) { +inline uint64_t combine64(uint64_t left, uint64_t hashed_right) +{ return left ^ (hashed_right + 0x9e3779b97f4a7c15 + (left << 12) + (left >> 4)); } -template struct dependent_false : std::false_type {}; +template +struct dependent_false : std::false_type +{ +}; -template T combine(T left, T hashed_right) { - if constexpr (sizeof(T) == 4) { +template +T combine(T left, T hashed_right) +{ + if constexpr(sizeof(T) == 4) + { return combine32(left, hashed_right); - } else if constexpr (sizeof(T) == 8) { + } + else if constexpr(sizeof(T) == 8) + { return combine64(left, hashed_right); - } else { - static_assert(dependent_false::value, "hashing::integer::combine not intended for other sizes than 32bit and 64bit int"); + } + else + { + static_assert( + dependent_false::value, + "hashing::integer::combine not intended for other sizes than 32bit and 64bit int"); return left + hashed_right; } } -template T hash(T x) { - if constexpr (sizeof(T) == 4) { +template +T hash(T x) +{ + if constexpr(sizeof(T) == 4) + { return hash32(x); - } else if constexpr (sizeof(T) == 8) { + } + else if constexpr(sizeof(T) == 8) + { return hash64(x); - } else { - static_assert(dependent_false::value, "hashing::integer::hash combine not intended for other sizes than 32bit and 64bit int"); + } + else + { + static_assert( + dependent_false::value, + "hashing::integer::hash combine not intended for other sizes than 32bit and 64bit int"); return x; } } - } // namespace integer - // from thrill - /*! +/*! * Tabulation Hashing, see https://en.wikipedia.org/wiki/Tabulation_hashing * * Keeps a table with size * 256 entries of type hash_t, filled with random @@ -152,7 +178,7 @@ template ; using Table = std::array; @@ -160,10 +186,13 @@ class TabulationHashing explicit TabulationHashing(size_t seed = 0) { init(seed); } //! (re-)initialize the table by filling it with random values - void init(const size_t seed) { - prng_t rng { seed }; - for (size_t i = 0; i < size; ++i) { - for (size_t j = 0; j < 256; ++j) { + void init(const size_t seed) + { + prng_t rng{ seed }; + for(size_t i = 0; i < size; ++i) + { + for(size_t j = 0; j < 256; ++j) + { table_[i][j] = rng(); } } @@ -171,12 +200,14 @@ class TabulationHashing //! Hash an element template - hash_type operator () (const T& x) const { + hash_type operator()(const T &x) const + { static_assert(sizeof(T) == size, "Size mismatch with operand type"); hash_t hash = 0; - const uint8_t* ptr = reinterpret_cast(&x); - for (size_t i = 0; i < size; ++i) { + const uint8_t *ptr = reinterpret_cast(&x); + for(size_t i = 0; i < size; ++i) + { hash ^= table_[i][*(ptr + i)]; } return hash; @@ -190,37 +221,41 @@ class TabulationHashing template using HashTabulated = TabulationHashing; -template -struct SimpleIntHash { +template +struct SimpleIntHash +{ using hash_type = T; - void init(T /* seed */) { + void init(T /* seed */) + { // intentionally unimplemented } - T operator()(const T& x) const { - return integer::hash(x); - } + T operator()(const T &x) const { return integer::hash(x); } }; - // implements the rng interface required for std::uniform_int_distribution -template -struct HashRNG { +template +struct HashRNG +{ using result_type = typename HashFunction::hash_type; - explicit HashRNG(HashFunction& hash, result_type seed) : hash(hash), state(hash(seed)), counter(0) { } + explicit HashRNG(HashFunction &hash, result_type seed) : + hash(hash), state(hash(seed)), counter(0) + { + } static constexpr result_type min() { return std::numeric_limits::min(); } static constexpr result_type max() { return std::numeric_limits::max(); } // don't do too many calls of this - result_type operator()() { - //state = hash(state); + result_type operator()() + { + // state = hash(state); state = integer::combine(state, integer::hash(counter++)); return state; } private: - HashFunction& hash; // Hash function copy is expensive in case of tabulation hashing. + HashFunction &hash; // Hash function copy is expensive in case of tabulation hashing. result_type state; result_type counter; }; diff --git a/mt-kahypar/utils/hypergraph_statistics.h b/mt-kahypar/utils/hypergraph_statistics.h index 01f5860a5..697aceabd 100644 --- a/mt-kahypar/utils/hypergraph_statistics.h +++ b/mt-kahypar/utils/hypergraph_statistics.h @@ -32,43 +32,54 @@ namespace mt_kahypar { namespace utils { -template -double parallel_stdev(const std::vector& data, const double avg, const size_t n) { - return std::sqrt(tbb::parallel_reduce( - tbb::blocked_range(UL(0), data.size()), 0.0, - [&](tbb::blocked_range& range, double init) -> double { - double tmp_stdev = init; - for ( size_t i = range.begin(); i < range.end(); ++i ) { - tmp_stdev += (data[i] - avg) * (data[i] - avg); - } - return tmp_stdev; - }, std::plus()) / ( n- 1 )); +template +double parallel_stdev(const std::vector &data, const double avg, const size_t n) +{ + return std::sqrt(tbb::parallel_reduce( + tbb::blocked_range(UL(0), data.size()), 0.0, + [&](tbb::blocked_range &range, double init) -> double { + double tmp_stdev = init; + for(size_t i = range.begin(); i < range.end(); ++i) + { + tmp_stdev += (data[i] - avg) * (data[i] - avg); + } + return tmp_stdev; + }, + std::plus()) / + (n - 1)); } -template -double parallel_avg(const std::vector& data, const size_t n) { - return tbb::parallel_reduce( - tbb::blocked_range(UL(0), data.size()), 0.0, - [&](tbb::blocked_range& range, double init) -> double { - double tmp_avg = init; - for ( size_t i = range.begin(); i < range.end(); ++i ) { - tmp_avg += static_cast(data[i]); - } - return tmp_avg; - }, std::plus()) / static_cast(n); +template +double parallel_avg(const std::vector &data, const size_t n) +{ + return tbb::parallel_reduce( + tbb::blocked_range(UL(0), data.size()), 0.0, + [&](tbb::blocked_range &range, double init) -> double { + double tmp_avg = init; + for(size_t i = range.begin(); i < range.end(); ++i) + { + tmp_avg += static_cast(data[i]); + } + return tmp_avg; + }, + std::plus()) / + static_cast(n); } -template -static inline double avgHyperedgeDegree(const Hypergraph& hypergraph) { - if (Hypergraph::is_graph) { - return 2; - } - return static_cast(hypergraph.initialNumPins()) / hypergraph.initialNumEdges(); +template +static inline double avgHyperedgeDegree(const Hypergraph &hypergraph) +{ + if(Hypergraph::is_graph) + { + return 2; + } + return static_cast(hypergraph.initialNumPins()) / hypergraph.initialNumEdges(); } -template -static inline double avgHypernodeDegree(const Hypergraph& hypergraph) { - return static_cast(hypergraph.initialNumPins()) / hypergraph.initialNumNodes(); +template +static inline double avgHypernodeDegree(const Hypergraph &hypergraph) +{ + return static_cast(hypergraph.initialNumPins()) / hypergraph.initialNumNodes(); } } // namespace utils diff --git a/mt-kahypar/utils/initial_partitioning_stats.h b/mt-kahypar/utils/initial_partitioning_stats.h index b18d76064..d20ee9c1f 100644 --- a/mt-kahypar/utils/initial_partitioning_stats.h +++ b/mt-kahypar/utils/initial_partitioning_stats.h @@ -29,40 +29,45 @@ #include #include -#include "mt-kahypar/partition/context_enum_classes.h" #include "mt-kahypar/parallel/stl/scalable_vector.h" +#include "mt-kahypar/partition/context_enum_classes.h" namespace mt_kahypar { namespace utils { -struct InitialPartitionerSummary { +struct InitialPartitionerSummary +{ explicit InitialPartitionerSummary(const InitialPartitioningAlgorithm algo) : - algorithm(algo), - total_sum_quality(0), - total_time(0.0), - total_best(0), - total_calls(0) { } + algorithm(algo), total_sum_quality(0), total_time(0.0), total_best(0), + total_calls(0) + { + } - friend std::ostream & operator<< (std::ostream& str, const InitialPartitionerSummary& summary); + friend std::ostream &operator<<(std::ostream &str, + const InitialPartitionerSummary &summary); - void add(const InitialPartitionerSummary& summary) { + void add(const InitialPartitionerSummary &summary) + { ASSERT(algorithm == summary.algorithm); total_sum_quality += summary.total_sum_quality; total_time += summary.total_time; total_calls += summary.total_calls; } - double average_quality() const { + double average_quality() const + { return static_cast(total_sum_quality) / std::max(total_calls, UL(1)); } - double average_running_time() const { + double average_running_time() const + { return static_cast(total_time) / std::max(total_calls, UL(1)); } - double percentage_best(const size_t total_ip_calls) const { - return ( static_cast(total_best) / total_ip_calls ) * 100.0; + double percentage_best(const size_t total_ip_calls) const + { + return (static_cast(total_best) / total_ip_calls) * 100.0; } InitialPartitioningAlgorithm algorithm; @@ -72,54 +77,60 @@ struct InitialPartitionerSummary { size_t total_calls; }; -inline std::ostream & operator<< (std::ostream& str, const InitialPartitionerSummary& summary) { +inline std::ostream &operator<<(std::ostream &str, + const InitialPartitionerSummary &summary) +{ str << " avg_quality_" << summary.algorithm << "=" << summary.average_quality() << " total_time_" << summary.algorithm << "=" << summary.total_time << " total_best_" << summary.algorithm << "=" << summary.total_best; return str; } -class InitialPartitioningStats { +class InitialPartitioningStats +{ - public: +public: explicit InitialPartitioningStats() : - _stat_mutex(), - _num_initial_partitioner(static_cast(InitialPartitioningAlgorithm::UNDEFINED)), - _ip_summary(), - _total_ip_calls(0), - _total_sum_number_of_threads(0) { - for ( uint8_t algo = 0; algo < _num_initial_partitioner; ++algo ) { + _stat_mutex(), _num_initial_partitioner( + static_cast(InitialPartitioningAlgorithm::UNDEFINED)), + _ip_summary(), _total_ip_calls(0), _total_sum_number_of_threads(0) + { + for(uint8_t algo = 0; algo < _num_initial_partitioner; ++algo) + { _ip_summary.emplace_back(static_cast(algo)); } } - InitialPartitioningStats(const InitialPartitioningStats& other) : - _stat_mutex(), - _num_initial_partitioner(other._num_initial_partitioner), - _ip_summary(other._ip_summary), - _total_ip_calls(other._total_ip_calls), - _total_sum_number_of_threads(other._total_sum_number_of_threads) { } + InitialPartitioningStats(const InitialPartitioningStats &other) : + _stat_mutex(), _num_initial_partitioner(other._num_initial_partitioner), + _ip_summary(other._ip_summary), _total_ip_calls(other._total_ip_calls), + _total_sum_number_of_threads(other._total_sum_number_of_threads) + { + } - InitialPartitioningStats & operator= (const InitialPartitioningStats &) = delete; + InitialPartitioningStats &operator=(const InitialPartitioningStats &) = delete; - InitialPartitioningStats(InitialPartitioningStats&& other) : - _stat_mutex(), - _num_initial_partitioner(std::move(other._num_initial_partitioner)), - _ip_summary(std::move(other._ip_summary)), - _total_ip_calls(std::move(other._total_ip_calls)), - _total_sum_number_of_threads(std::move(other._total_sum_number_of_threads)) { } + InitialPartitioningStats(InitialPartitioningStats &&other) : + _stat_mutex(), _num_initial_partitioner(std::move(other._num_initial_partitioner)), + _ip_summary(std::move(other._ip_summary)), + _total_ip_calls(std::move(other._total_ip_calls)), + _total_sum_number_of_threads(std::move(other._total_sum_number_of_threads)) + { + } - InitialPartitioningStats & operator= (InitialPartitioningStats &&) = delete; + InitialPartitioningStats &operator=(InitialPartitioningStats &&) = delete; - void add_initial_partitioning_result(const InitialPartitioningAlgorithm best_algorithm, - const size_t number_of_threads, - const parallel::scalable_vector& summary) { + void add_initial_partitioning_result( + const InitialPartitioningAlgorithm best_algorithm, const size_t number_of_threads, + const parallel::scalable_vector &summary) + { std::lock_guard lock(_stat_mutex); ASSERT(summary.size() == _ip_summary.size()); uint8_t best_algorithm_index = static_cast(best_algorithm); ++_ip_summary[best_algorithm_index].total_best; - for ( size_t i = 0; i < _num_initial_partitioner; ++i ) { + for(size_t i = 0; i < _num_initial_partitioner; ++i) + { _ip_summary[i].add(summary[i]); } @@ -127,34 +138,35 @@ class InitialPartitioningStats { ++_total_ip_calls; } - double average_number_of_threads_per_ip_call() const { + double average_number_of_threads_per_ip_call() const + { return static_cast(_total_sum_number_of_threads) / _total_ip_calls; } - void printInitialPartitioningStats() { + void printInitialPartitioningStats() + { LOG << "Initial Partitioning Algorithm Summary:"; LOG << "Number of Initial Partitioning Calls =" << _total_ip_calls; LOG << "Average Number of Thread per IP Call =" << average_number_of_threads_per_ip_call() << "\n"; - std::cout << "\033[1m" - << std::left << std::setw(30) << "Algorithm" - << std::left << std::setw(15) << " Avg. Quality" - << std::left << std::setw(15) << " Total Time (s)" - << std::left << std::setw(10) << " Total Best" + std::cout << "\033[1m" << std::left << std::setw(30) << "Algorithm" << std::left + << std::setw(15) << " Avg. Quality" << std::left << std::setw(15) + << " Total Time (s)" << std::left << std::setw(10) << " Total Best" << std::left << std::setw(15) << " Total Best (%)" << "\033[0m" << std::endl; - for ( const InitialPartitionerSummary& summary : _ip_summary ) { - LOG << std::left << std::setw(30) << summary.algorithm - << std::left << std::setw(15) << summary.average_quality() - << std::left << std::setw(15) << summary.total_time - << std::left << std::setw(10) << summary.total_best - << std::left << std::setw(15) << summary.percentage_best(_total_ip_calls); + for(const InitialPartitionerSummary &summary : _ip_summary) + { + LOG << std::left << std::setw(30) << summary.algorithm << std::left << std::setw(15) + << summary.average_quality() << std::left << std::setw(15) << summary.total_time + << std::left << std::setw(10) << summary.total_best << std::left + << std::setw(15) << summary.percentage_best(_total_ip_calls); } } - friend std::ostream & operator<< (std::ostream& str, const InitialPartitioningStats& stats); + friend std::ostream &operator<<(std::ostream &str, + const InitialPartitioningStats &stats); - private: +private: std::mutex _stat_mutex; const uint8_t _num_initial_partitioner; parallel::scalable_vector _ip_summary; @@ -162,14 +174,16 @@ class InitialPartitioningStats { size_t _total_sum_number_of_threads; }; -inline std::ostream & operator<< (std::ostream& str, const InitialPartitioningStats& stats) { +inline std::ostream &operator<<(std::ostream &str, const InitialPartitioningStats &stats) +{ str << " average_number_of_threads_per_ip_call=" << stats.average_number_of_threads_per_ip_call(); - for ( const InitialPartitionerSummary& summary : stats._ip_summary ) { + for(const InitialPartitionerSummary &summary : stats._ip_summary) + { str << summary; } return str; } -} // namespace utils -} // namespace mt_kahypar +} // namespace utils +} // namespace mt_kahypar diff --git a/mt-kahypar/utils/memory_tree.cpp b/mt-kahypar/utils/memory_tree.cpp index cdc09cf5b..9525d0ade 100644 --- a/mt-kahypar/utils/memory_tree.cpp +++ b/mt-kahypar/utils/memory_tree.cpp @@ -33,121 +33,138 @@ namespace mt_kahypar::utils { - MemoryTreeNode::MemoryTreeNode(const std::string& name, const OutputType& output_type) : - _name(name), - _size_in_bytes(0), - _output_type(output_type), - _children() { } - - MemoryTreeNode* MemoryTreeNode::addChild(const std::string& name, const size_t size_in_bytes) { - auto child_iter = _children.find(name); - if ( child_iter == _children.end() ) { - MemoryTreeNode* child = new MemoryTreeNode(name, _output_type); - child->_size_in_bytes = size_in_bytes; - _children[name] = std::unique_ptr(child); - return child; - } else { - return (*child_iter).second.get(); - } +MemoryTreeNode::MemoryTreeNode(const std::string &name, const OutputType &output_type) : + _name(name), _size_in_bytes(0), _output_type(output_type), _children() +{ +} + +MemoryTreeNode *MemoryTreeNode::addChild(const std::string &name, + const size_t size_in_bytes) +{ + auto child_iter = _children.find(name); + if(child_iter == _children.end()) + { + MemoryTreeNode *child = new MemoryTreeNode(name, _output_type); + child->_size_in_bytes = size_in_bytes; + _children[name] = std::unique_ptr(child); + return child; } - - void MemoryTreeNode::finalize() { - for ( auto& child : _children ) { - child.second->finalize(); - } - - // Aggregate size of childs - for ( auto& child : _children ) { - _size_in_bytes += child.second->_size_in_bytes; - } + else + { + return (*child_iter).second.get(); } +} - - std::string serialize_in_bytes(const size_t size_in_bytes) { - std::stringstream ss; - ss << size_in_bytes << " bytes"; - return ss.str(); +void MemoryTreeNode::finalize() +{ + for(auto &child : _children) + { + child.second->finalize(); } - std::string serialize_in_kilobytes(const size_t size_in_bytes) { - std::stringstream ss; - const double size_in_kb = static_cast(size_in_bytes) / 1000.0; - ss << std::fixed << std::setprecision(3) << size_in_kb << " KB"; - return ss.str(); + // Aggregate size of childs + for(auto &child : _children) + { + _size_in_bytes += child.second->_size_in_bytes; } - - std::string serialize_in_megabytes(const size_t size_in_bytes) { +} + +std::string serialize_in_bytes(const size_t size_in_bytes) +{ + std::stringstream ss; + ss << size_in_bytes << " bytes"; + return ss.str(); +} + +std::string serialize_in_kilobytes(const size_t size_in_bytes) +{ + std::stringstream ss; + const double size_in_kb = static_cast(size_in_bytes) / 1000.0; + ss << std::fixed << std::setprecision(3) << size_in_kb << " KB"; + return ss.str(); +} + +std::string serialize_in_megabytes(const size_t size_in_bytes) +{ + std::stringstream ss; + const double size_in_mb = static_cast(size_in_bytes) / 1000000.0; + ss << std::fixed << std::setprecision(3) << size_in_mb << " MB"; + return ss.str(); +} + +std::string serialize_in_percentage(const size_t parent_size_in_bytes, + const size_t size_in_bytes) +{ + if(parent_size_in_bytes > 0) + { std::stringstream ss; - const double size_in_mb = static_cast(size_in_bytes) / 1000000.0; - ss << std::fixed << std::setprecision(3) << size_in_mb << " MB"; + const double percentage = + (static_cast(size_in_bytes) / static_cast(parent_size_in_bytes)) * + 100.0; + ss << std::fixed << std::setprecision(2) << percentage << "%"; return ss.str(); } - - std::string serialize_in_percentage(const size_t parent_size_in_bytes, - const size_t size_in_bytes) { - if ( parent_size_in_bytes > 0 ) { - std::stringstream ss; - const double percentage = ( static_cast(size_in_bytes) / - static_cast(parent_size_in_bytes) ) * 100.0; - ss << std::fixed << std::setprecision(2) << percentage << "%"; - return ss.str(); - } else { - return serialize_in_megabytes(size_in_bytes); - } + else + { + return serialize_in_megabytes(size_in_bytes); } - - std::string serialize_metric(const OutputType& type, - const size_t parent_size_in_bytes, - const size_t size_in_bytes) { - switch(type) { - case OutputType::BYTES: - return serialize_in_bytes(size_in_bytes); - case OutputType::KILOBYTE: - return serialize_in_kilobytes(size_in_bytes); - case OutputType::MEGABYTE: - return serialize_in_megabytes(size_in_bytes); - case OutputType::PERCENTAGE: - return serialize_in_percentage(parent_size_in_bytes, size_in_bytes); - } - return ""; +} + +std::string serialize_metric(const OutputType &type, const size_t parent_size_in_bytes, + const size_t size_in_bytes) +{ + switch(type) + { + case OutputType::BYTES: + return serialize_in_bytes(size_in_bytes); + case OutputType::KILOBYTE: + return serialize_in_kilobytes(size_in_bytes); + case OutputType::MEGABYTE: + return serialize_in_megabytes(size_in_bytes); + case OutputType::PERCENTAGE: + return serialize_in_percentage(parent_size_in_bytes, size_in_bytes); } - - - void MemoryTreeNode::print(std::ostream& str, const size_t parent_size_in_bytes, int level) const { - - constexpr int MAX_LINE_LENGTH = 45; - constexpr size_t LINE_PREFIX_LENGTH = 3; - - std::string prefix; - prefix += level == 0 ? " + " : - std::string(LINE_PREFIX_LENGTH, ' '); - prefix += level > 0 ? std::string(LINE_PREFIX_LENGTH * (level - 1), ' ') : ""; - prefix += level > 0 ? " + " : ""; - size_t length = prefix.size() + _name.size(); - str << prefix - << _name; - if (length < MAX_LINE_LENGTH) { - str << std::string(MAX_LINE_LENGTH - length, ' '); - } - str << " = " - << serialize_metric(_output_type, - parent_size_in_bytes, _size_in_bytes) << "\n"; - + return ""; +} + +void MemoryTreeNode::print(std::ostream &str, const size_t parent_size_in_bytes, + int level) const +{ + + constexpr int MAX_LINE_LENGTH = 45; + constexpr size_t LINE_PREFIX_LENGTH = 3; + + std::string prefix; + prefix += level == 0 ? " + " : std::string(LINE_PREFIX_LENGTH, ' '); + prefix += level > 0 ? std::string(LINE_PREFIX_LENGTH * (level - 1), ' ') : ""; + prefix += level > 0 ? " + " : ""; + size_t length = prefix.size() + _name.size(); + str << prefix << _name; + if(length < MAX_LINE_LENGTH) + { + str << std::string(MAX_LINE_LENGTH - length, ' '); } - - void MemoryTreeNode::dfs(std::ostream& str, const size_t parent_size_in_bytes, int level) const { - if ( _size_in_bytes > 0 ) { - print(str, parent_size_in_bytes, level); - for (const auto& child : _children) { - child.second->dfs(str, parent_size_in_bytes + _size_in_bytes, level + 1); - } + str << " = " << serialize_metric(_output_type, parent_size_in_bytes, _size_in_bytes) + << "\n"; +} + +void MemoryTreeNode::dfs(std::ostream &str, const size_t parent_size_in_bytes, + int level) const +{ + if(_size_in_bytes > 0) + { + print(str, parent_size_in_bytes, level); + for(const auto &child : _children) + { + child.second->dfs(str, parent_size_in_bytes + _size_in_bytes, level + 1); } } +} - std::ostream & operator<< (std::ostream& str, const MemoryTreeNode& root) { - root.dfs(str, UL(0), 0); - return str; - } - +std::ostream &operator<<(std::ostream &str, const MemoryTreeNode &root) +{ + root.dfs(str, UL(0), 0); + return str; +} } \ No newline at end of file diff --git a/mt-kahypar/utils/memory_tree.h b/mt-kahypar/utils/memory_tree.h index 5791769ad..cc5026b89 100644 --- a/mt-kahypar/utils/memory_tree.h +++ b/mt-kahypar/utils/memory_tree.h @@ -27,39 +27,39 @@ #pragma once #include -#include #include +#include namespace mt_kahypar::utils { -enum class OutputType : uint8_t { +enum class OutputType : uint8_t +{ BYTES = 0, KILOBYTE = 1, MEGABYTE = 2, PERCENTAGE = 3 }; -class MemoryTreeNode { +class MemoryTreeNode +{ - using map_type = std::map>; + using map_type = std::map >; - public: - MemoryTreeNode(const std::string& name, const OutputType& output_type = OutputType::MEGABYTE); +public: + MemoryTreeNode(const std::string &name, + const OutputType &output_type = OutputType::MEGABYTE); - MemoryTreeNode* addChild(const std::string& name, const size_t size_in_bytes = 0); + MemoryTreeNode *addChild(const std::string &name, const size_t size_in_bytes = 0); - void updateSize(const size_t delta) { - _size_in_bytes += delta; - } + void updateSize(const size_t delta) { _size_in_bytes += delta; } void finalize(); - private: +private: + void dfs(std::ostream &str, const size_t parent_size_in_bytes, int level) const; + void print(std::ostream &str, const size_t parent_size_in_bytes, int level) const; - void dfs(std::ostream& str, const size_t parent_size_in_bytes, int level) const ; - void print(std::ostream& str, const size_t parent_size_in_bytes, int level) const ; - - friend std::ostream& operator<<(std::ostream& str, const MemoryTreeNode& root); + friend std::ostream &operator<<(std::ostream &str, const MemoryTreeNode &root); std::string _name; size_t _size_in_bytes; @@ -67,7 +67,6 @@ class MemoryTreeNode { map_type _children; }; +std::ostream &operator<<(std::ostream &str, const MemoryTreeNode &root); -std::ostream & operator<< (std::ostream& str, const MemoryTreeNode& root); - -} // namespace mt_kahypar \ No newline at end of file +} // namespace mt_kahypar \ No newline at end of file diff --git a/mt-kahypar/utils/progress_bar.h b/mt-kahypar/utils/progress_bar.h index 8a2db7e76..04fbad4b3 100644 --- a/mt-kahypar/utils/progress_bar.h +++ b/mt-kahypar/utils/progress_bar.h @@ -26,12 +26,12 @@ #pragma once +#include +#include #include +#include #include #include -#include -#include -#include #if defined(__linux__) or defined(__APPLE__) #include #elif _WIN32 @@ -44,86 +44,85 @@ namespace mt_kahypar { namespace utils { -class ProgressBar { +class ProgressBar +{ - using HighResClockTimepoint = std::chrono::time_point; + using HighResClockTimepoint = + std::chrono::time_point; - public: - explicit ProgressBar(const size_t expected_count, - const HyperedgeWeight objective, +public: + explicit ProgressBar(const size_t expected_count, const HyperedgeWeight objective, const bool enable = true) : - _display_mutex(), - _count(0), - _next_tic_count(0), - _expected_count(expected_count), - _start(std::chrono::high_resolution_clock::now()), - _objective(objective), - _progress_bar_size(0), - _enable(enable) { - #if defined(__linux__) or defined(__APPLE__) + _display_mutex(), + _count(0), _next_tic_count(0), _expected_count(expected_count), + _start(std::chrono::high_resolution_clock::now()), _objective(objective), + _progress_bar_size(0), _enable(enable) + { +#if defined(__linux__) or defined(__APPLE__) struct winsize w; ioctl(STDOUT_FILENO, TIOCGWINSZ, &w); _progress_bar_size = w.ws_col / 2; - #elif _WIN32 +#elif _WIN32 CONSOLE_SCREEN_BUFFER_INFO csbi; GetConsoleScreenBufferInfo(GetStdHandle(STD_OUTPUT_HANDLE), &csbi); - _progress_bar_size = (csbi.srWindow.Right - csbi.srWindow.Left + 1)/2; - #endif + _progress_bar_size = (csbi.srWindow.Right - csbi.srWindow.Left + 1) / 2; +#endif display_progress(); } - ProgressBar(const ProgressBar&) = delete; - ProgressBar & operator= (const ProgressBar &) = delete; + ProgressBar(const ProgressBar &) = delete; + ProgressBar &operator=(const ProgressBar &) = delete; - ProgressBar(ProgressBar&&) = delete; - ProgressBar & operator= (ProgressBar &&) = delete; + ProgressBar(ProgressBar &&) = delete; + ProgressBar &operator=(ProgressBar &&) = delete; - ~ProgressBar() { - finalize(); - } + ~ProgressBar() { finalize(); } - void enable() { + void enable() + { _enable = true; display_progress(); } - void disable() { - _enable = false; - } + void disable() { _enable = false; } - size_t count() const { - return _count.load(); - } + size_t count() const { return _count.load(); } - size_t operator+=( const size_t increment ) { - if ( _enable ) { + size_t operator+=(const size_t increment) + { + if(_enable) + { _count.fetch_add(increment); - if ( _count >= _next_tic_count ) { + if(_count >= _next_tic_count) + { display_progress(); } } return _count; } - void setObjective(const HyperedgeWeight objective) { - _objective = objective; - } + void setObjective(const HyperedgeWeight objective) { _objective = objective; } - void addToObjective(const HyperedgeWeight delta) { + void addToObjective(const HyperedgeWeight delta) + { __atomic_fetch_add(&_objective, delta, __ATOMIC_RELAXED); } - private: - void finalize() { - if ( _count.load() < _expected_count ) { +private: + void finalize() + { + if(_count.load() < _expected_count) + { _count = _expected_count; _next_tic_count = std::numeric_limits::max(); display_progress(); } } - void display_progress() { - if ( _enable ) { + void display_progress() + { + if(_enable) + { std::lock_guard lock(_display_mutex); HighResClockTimepoint end = std::chrono::high_resolution_clock::now(); size_t current_count = std::min(_count.load(), _expected_count); @@ -132,57 +131,70 @@ class ProgressBar { size_t progress = get_progress(current_count); std::cout << "[ " << GREEN; - for ( size_t i = 0; i < current_tics; ++i ) { + for(size_t i = 0; i < current_tics; ++i) + { std::cout << "#"; } std::cout << END; - for ( size_t i = 0; i < _progress_bar_size - current_tics; ++i ) { + for(size_t i = 0; i < _progress_bar_size - current_tics; ++i) + { std::cout << " "; } std::cout << " ] "; - std::cout << "(" << progress << "% - " - << current_count << "/" << _expected_count << ") "; + std::cout << "(" << progress << "% - " << current_count << "/" << _expected_count + << ") "; size_t time = std::chrono::duration(end - _start).count(); display_time(time); std::cout << " - Current Objective: " << _objective; - if ( current_count == _expected_count ) { + if(current_count == _expected_count) + { std::cout << std::endl; - } else { + } + else + { std::cout << "\r" << std::flush; } } } - void display_time(const size_t time) { + void display_time(const size_t time) + { size_t minutes = time / 60; size_t seconds = time % 60; - if ( minutes > 0 ) { + if(minutes > 0) + { std::cout << minutes << " min "; } std::cout << seconds << " s"; } - size_t get_progress(const size_t current_count) { - return ( static_cast(current_count) / - static_cast(_expected_count) ) * 100; + size_t get_progress(const size_t current_count) + { + return (static_cast(current_count) / static_cast(_expected_count)) * + 100; } - size_t get_tics(const size_t current_count) { - return ( static_cast(current_count) / - static_cast(_expected_count) ) * _progress_bar_size; + size_t get_tics(const size_t current_count) + { + return (static_cast(current_count) / static_cast(_expected_count)) * + _progress_bar_size; } - size_t compute_next_tic_count(const size_t current_count) { + size_t compute_next_tic_count(const size_t current_count) + { size_t next_tics = get_tics(current_count) + 1; - if ( next_tics > _progress_bar_size ) { + if(next_tics > _progress_bar_size) + { return std::numeric_limits::max(); - } else { - return ( static_cast(next_tics) / static_cast(_progress_bar_size) ) * - static_cast(_expected_count); + } + else + { + return (static_cast(next_tics) / static_cast(_progress_bar_size)) * + static_cast(_expected_count); } } @@ -196,5 +208,5 @@ class ProgressBar { bool _enable; }; -} // namespace utils -} // namespace mt_kahypar +} // namespace utils +} // namespace mt_kahypar diff --git a/mt-kahypar/utils/randomize.h b/mt-kahypar/utils/randomize.h index 60be2a50d..05ddd4afe 100644 --- a/mt-kahypar/utils/randomize.h +++ b/mt-kahypar/utils/randomize.h @@ -32,66 +32,70 @@ #include #include -#include "tbb/task_group.h" #include "tbb/parallel_for.h" +#include "tbb/task_group.h" #include "mt-kahypar/macros.h" #include "mt-kahypar/parallel/stl/scalable_vector.h" - namespace mt_kahypar::utils { -class Randomize { +class Randomize +{ static constexpr bool debug = false; static constexpr size_t PRECOMPUTED_FLIP_COINS = 128; using SwapBlock = std::pair; - class RandomFunctions { - public: + class RandomFunctions + { + public: RandomFunctions() : - _seed(-1), - _gen(), - _next_coin_flip(0), - _precomputed_flip_coins(PRECOMPUTED_FLIP_COINS), - _int_dist(0, std::numeric_limits::max()), - _float_dist(0, 1), - _norm_dist(0, 1) { + _seed(-1), _gen(), _next_coin_flip(0), + _precomputed_flip_coins(PRECOMPUTED_FLIP_COINS), + _int_dist(0, std::numeric_limits::max()), _float_dist(0, 1), _norm_dist(0, 1) + { precompute_flip_coins(); } - void setSeed(int seed) { + void setSeed(int seed) + { _seed = seed; _gen.seed(_seed); precompute_flip_coins(); } - bool flipCoin() { + bool flipCoin() + { return _precomputed_flip_coins[++_next_coin_flip % PRECOMPUTED_FLIP_COINS]; } // returns uniformly random int from the interval [low, high] - int getRandomInt(int low, int high) { + int getRandomInt(int low, int high) + { return _int_dist(_gen, std::uniform_int_distribution::param_type(low, high)); } // returns uniformly random float from the interval [low, high) - float getRandomFloat(float low, float high) { - return _float_dist(_gen, std::uniform_real_distribution::param_type(low, high)); + float getRandomFloat(float low, float high) + { + return _float_dist(_gen, + std::uniform_real_distribution::param_type(low, high)); } - float getNormalDistributedFloat(float mean, float std_dev) { + float getNormalDistributedFloat(float mean, float std_dev) + { return _norm_dist(_gen, std::normal_distribution::param_type(mean, std_dev)); } - std::mt19937& getGenerator() { - return _gen; - } + std::mt19937 &getGenerator() { return _gen; } - private: - void precompute_flip_coins() { - std::uniform_int_distribution bool_dist(0,1); - for (size_t i = 0; i < PRECOMPUTED_FLIP_COINS; ++i) { + private: + void precompute_flip_coins() + { + std::uniform_int_distribution bool_dist(0, 1); + for(size_t i = 0; i < PRECOMPUTED_FLIP_COINS; ++i) + { _precomputed_flip_coins[i] = static_cast(bool_dist(_gen)); } } @@ -105,99 +109,124 @@ class Randomize { std::normal_distribution _norm_dist; }; - public: - static Randomize& instance() { +public: + static Randomize &instance() + { static Randomize instance; return instance; } - void enableLocalizedParallelShuffle(const size_t localized_random_shuffle_block_size) { + void enableLocalizedParallelShuffle(const size_t localized_random_shuffle_block_size) + { _perform_localized_random_shuffle = true; _localized_random_shuffle_block_size = localized_random_shuffle_block_size; } - void setSeed(int seed) { - for (uint32_t i = 0; i < std::thread::hardware_concurrency(); ++i) { + void setSeed(int seed) + { + for(uint32_t i = 0; i < std::thread::hardware_concurrency(); ++i) + { _rand[i].setSeed(seed + i); } } - bool flipCoin(int cpu_id) { + bool flipCoin(int cpu_id) + { ASSERT(cpu_id < (int)std::thread::hardware_concurrency()); return _rand[cpu_id].flipCoin(); } template - void shuffleVector(std::vector& vector, size_t num_elements, int cpu_id) { + void shuffleVector(std::vector &vector, size_t num_elements, int cpu_id) + { ASSERT(cpu_id < (int)std::thread::hardware_concurrency()); - std::shuffle(vector.begin(), vector.begin() + num_elements, _rand[cpu_id].getGenerator()); + std::shuffle(vector.begin(), vector.begin() + num_elements, + _rand[cpu_id].getGenerator()); } template - void shuffleVector(std::vector& vector, int cpu_id = -1) { - if (cpu_id == -1) + void shuffleVector(std::vector &vector, int cpu_id = -1) + { + if(cpu_id == -1) cpu_id = THREAD_ID; ASSERT(cpu_id < (int)std::thread::hardware_concurrency()); std::shuffle(vector.begin(), vector.end(), _rand[cpu_id].getGenerator()); } template - void shuffleVector(parallel::scalable_vector& vector, int cpu_id = -1) { - if (cpu_id == -1) + void shuffleVector(parallel::scalable_vector &vector, int cpu_id = -1) + { + if(cpu_id == -1) cpu_id = THREAD_ID; ASSERT(cpu_id < (int)std::thread::hardware_concurrency()); std::shuffle(vector.begin(), vector.end(), _rand[cpu_id].getGenerator()); } template - void shuffleVector(parallel::scalable_vector& vector, size_t num_elements, int cpu_id) { + void shuffleVector(parallel::scalable_vector &vector, size_t num_elements, + int cpu_id) + { ASSERT(cpu_id < (int)std::thread::hardware_concurrency()); - std::shuffle(vector.begin(), vector.begin() + num_elements, _rand[cpu_id].getGenerator()); + std::shuffle(vector.begin(), vector.begin() + num_elements, + _rand[cpu_id].getGenerator()); } template - void shuffleVector(std::vector& vector, size_t i, size_t j, int cpu_id) { + void shuffleVector(std::vector &vector, size_t i, size_t j, int cpu_id) + { ASSERT(i <= j && j <= vector.size()); ASSERT(cpu_id < (int)std::thread::hardware_concurrency()); std::shuffle(vector.begin() + i, vector.begin() + j, _rand[cpu_id].getGenerator()); } template - void shuffleVector(parallel::scalable_vector& vector, size_t i, size_t j, int cpu_id) { + void shuffleVector(parallel::scalable_vector &vector, size_t i, size_t j, int cpu_id) + { ASSERT(i <= j && j <= vector.size()); ASSERT(cpu_id < (int)std::thread::hardware_concurrency()); - if ( _perform_localized_random_shuffle ) { + if(_perform_localized_random_shuffle) + { localizedShuffleVector(vector, i, j, cpu_id); - } else { + } + else + { std::shuffle(vector.begin() + i, vector.begin() + j, _rand[cpu_id].getGenerator()); } } template - void parallelShuffleVector(parallel::scalable_vector& vector, const size_t i, const size_t j) { + void parallelShuffleVector(parallel::scalable_vector &vector, const size_t i, + const size_t j) + { ASSERT(i <= j && j <= vector.size()); const size_t P = 2 * std::thread::hardware_concurrency(); const size_t N = j - i; const size_t step = N / P; - if ( _perform_localized_random_shuffle ) { + if(_perform_localized_random_shuffle) + { tbb::parallel_for(UL(0), P, [&](const size_t k) { const size_t start = i + k * step; const size_t end = i + (k == P - 1 ? N : (k + 1) * step); localizedShuffleVector(vector, start, end, THREAD_ID); }); - } else { + } + else + { // Compute blocks that should be swapped before // random shuffling parallel::scalable_vector swap_blocks; parallel::scalable_vector matched_blocks(P, false); int cpu_id = THREAD_ID; - for ( size_t a = 0; a < P; ++a ) { - if ( !matched_blocks[a] ) { + for(size_t a = 0; a < P; ++a) + { + if(!matched_blocks[a]) + { matched_blocks[a] = true; size_t b = getRandomInt(0, P - 1, cpu_id); - while ( matched_blocks[b] ) { - b = ( b + 1 ) % P; + while(matched_blocks[b]) + { + b = (b + 1) % P; } matched_blocks[b] = true; swap_blocks.push_back(std::make_pair(a, b)); @@ -214,62 +243,72 @@ class Randomize { const size_t end_2 = i + (block_2 == P - 1 ? N : (block_2 + 1) * step); const int cpu_id = THREAD_ID; swapBlocks(vector, start_1, end_1, start_2, end_2); - std::shuffle(vector.begin() + start_1, vector.begin() + end_1, _rand[cpu_id].getGenerator()); - std::shuffle(vector.begin() + start_2, vector.begin() + end_2, _rand[cpu_id].getGenerator()); + std::shuffle(vector.begin() + start_1, vector.begin() + end_1, + _rand[cpu_id].getGenerator()); + std::shuffle(vector.begin() + start_2, vector.begin() + end_2, + _rand[cpu_id].getGenerator()); }); } } // returns uniformly random int from the interval [low, high] - int getRandomInt(int low, int high, int cpu_id) { + int getRandomInt(int low, int high, int cpu_id) + { ASSERT(cpu_id < (int)std::thread::hardware_concurrency()); return _rand[cpu_id].getRandomInt(low, high); } // returns uniformly random float from the interval [low, high) - float getRandomFloat(float low, float high, int cpu_id) { + float getRandomFloat(float low, float high, int cpu_id) + { ASSERT(cpu_id < (int)std::thread::hardware_concurrency()); return _rand[cpu_id].getRandomFloat(low, high); } - float getNormalDistributedFloat(float mean, float std_dev, int cpu_id) { + float getNormalDistributedFloat(float mean, float std_dev, int cpu_id) + { ASSERT(cpu_id < (int)std::thread::hardware_concurrency()); return _rand[cpu_id].getNormalDistributedFloat(mean, std_dev); } - std::mt19937& getGenerator() { + std::mt19937 &getGenerator() + { int cpu_id = THREAD_ID; return _rand[cpu_id].getGenerator(); } - private: +private: explicit Randomize() : - _rand(std::thread::hardware_concurrency()), - _perform_localized_random_shuffle(false), - _localized_random_shuffle_block_size(1024) { } + _rand(std::thread::hardware_concurrency()), + _perform_localized_random_shuffle(false), _localized_random_shuffle_block_size(1024) + { + } template - void swapBlocks(parallel::scalable_vector& vector, - const size_t start_1, - const size_t end_1, - const size_t start_2, - const size_t end_2) { + void swapBlocks(parallel::scalable_vector &vector, const size_t start_1, + const size_t end_1, const size_t start_2, const size_t end_2) + { ASSERT(start_1 <= end_1); ASSERT(start_2 <= end_2); ASSERT(end_1 <= vector.size()); ASSERT(end_2 <= vector.size()); size_t N = std::min(end_1 - start_1, end_2 - start_2); - for ( size_t i = 0; i < N; ++i ) { + for(size_t i = 0; i < N; ++i) + { std::swap(vector[start_1 + i], vector[start_2 + i]); } } template - void localizedShuffleVector(parallel::scalable_vector& vector, const size_t i, const size_t j, const int cpu_id) { + void localizedShuffleVector(parallel::scalable_vector &vector, const size_t i, + const size_t j, const int cpu_id) + { ASSERT(i <= j && j <= vector.size()); - for ( size_t start = i; start < j; start += _localized_random_shuffle_block_size ) { + for(size_t start = i; start < j; start += _localized_random_shuffle_block_size) + { const size_t end = std::min(start + _localized_random_shuffle_block_size, j); - std::shuffle(vector.begin() + start, vector.begin() + end, _rand[cpu_id].getGenerator()); + std::shuffle(vector.begin() + start, vector.begin() + end, + _rand[cpu_id].getGenerator()); } } @@ -278,4 +317,4 @@ class Randomize { size_t _localized_random_shuffle_block_size; }; -} // namespace mt_kahypar::utils +} // namespace mt_kahypar::utils diff --git a/mt-kahypar/utils/range.h b/mt-kahypar/utils/range.h index f2cdc425e..6302f8802 100644 --- a/mt-kahypar/utils/range.h +++ b/mt-kahypar/utils/range.h @@ -27,126 +27,134 @@ #pragma once -#include #include +#include -template -class IteratorRange { +template +class IteratorRange +{ public: - IteratorRange(const IteratorT& first, const IteratorT& firstInvalid) : __begin(first), __end(firstInvalid) { } + IteratorRange(const IteratorT &first, const IteratorT &firstInvalid) : + __begin(first), __end(firstInvalid) + { + } using Iterator = IteratorT; // make publicly visible - IteratorT begin() { - return __begin; - } + IteratorT begin() { return __begin; } - IteratorT end() { - return __end; - } + IteratorT end() { return __end; } - bool empty() { - return __begin == __end; - } + bool empty() { return __begin == __end; } private: IteratorT __begin, __end; }; - -template -class ConcatenatedRange { +template +class ConcatenatedRange +{ private: - struct begin_tag {}; - struct end_tag {}; + struct begin_tag + { + }; + struct end_tag + { + }; public: - class Iterator { + class Iterator + { public: - Iterator(std::vector& ranges, begin_tag) : ranges(ranges), currentRange(0), currentRangeIterator(ranges.front().begin()) { + Iterator(std::vector &ranges, begin_tag) : + ranges(ranges), currentRange(0), currentRangeIterator(ranges.front().begin()) + { moveToNextRange(); } - Iterator(std::vector& ranges, end_tag) : ranges(ranges), currentRange(ranges.size() - 1), currentRangeIterator(ranges.back().end()) { } - - - bool operator==(Iterator& o) { - return currentRange == o.currentRange && currentRangeIterator == o.currentRangeIterator; + Iterator(std::vector &ranges, end_tag) : + ranges(ranges), currentRange(ranges.size() - 1), + currentRangeIterator(ranges.back().end()) + { } - bool operator!=(Iterator& o) { - return !operator==(o); + bool operator==(Iterator &o) + { + return currentRange == o.currentRange && + currentRangeIterator == o.currentRangeIterator; } - Iterator& operator++() { + bool operator!=(Iterator &o) { return !operator==(o); } + + Iterator &operator++() + { // if we're at the end of the current range, advance to the next - // restrict currentRange to ranges.size() - 1, since the end() ConcatenatedRange::Iterator has to be initialized somehow - if (++currentRangeIterator == ranges[currentRange].end()) { + // restrict currentRange to ranges.size() - 1, since the end() + // ConcatenatedRange::Iterator has to be initialized somehow + if(++currentRangeIterator == ranges[currentRange].end()) + { moveToNextRange(); } return *this; } - typename RangeT::Iterator::value_type operator*() const { + typename RangeT::Iterator::value_type operator*() const + { return *currentRangeIterator; } private: - std::vector& ranges; + std::vector &ranges; size_t currentRange; typename RangeT::Iterator currentRangeIterator; - void moveToNextRange() { - while (currentRangeIterator == ranges[currentRange].end() && currentRange < ranges.size() - 1) { + void moveToNextRange() + { + while(currentRangeIterator == ranges[currentRange].end() && + currentRange < ranges.size() - 1) + { currentRangeIterator = ranges[++currentRange].begin(); } } - }; - Iterator begin() { + Iterator begin() + { assert(!ranges.empty()); return Iterator(ranges, begin_tag()); } - Iterator end() { + Iterator end() + { assert(!ranges.empty()); return Iterator(ranges, end_tag()); } - void concat(RangeT&& r) { - ranges.push_back(r); - } + void concat(RangeT &&r) { ranges.push_back(r); } - void concat(RangeT& r) { - ranges.push_back(r); - } + void concat(RangeT &r) { ranges.push_back(r); } private: std::vector ranges; }; +template +class IntegerRangeIterator +{ +public: + using const_iterator = typename std::vector::const_iterator; -template -class IntegerRangeIterator { - public: - using const_iterator = typename std::vector::const_iterator; - - IntegerRangeIterator() : _range() { } + IntegerRangeIterator() : _range() {} - IntegerRangeIterator(const T n) : - _range(n) { - std::iota(_range.begin(), _range.end(), 0); - } + IntegerRangeIterator(const T n) : _range(n) + { + std::iota(_range.begin(), _range.end(), 0); + } - const_iterator cbegin() const { - return _range.cbegin(); - } + const_iterator cbegin() const { return _range.cbegin(); } - const_iterator cend() const { - return _range.cend(); - } + const_iterator cend() const { return _range.cend(); } - private: - std::vector _range; +private: + std::vector _range; }; diff --git a/mt-kahypar/utils/reproducible_random.h b/mt-kahypar/utils/reproducible_random.h index 267572d0a..97ee4db09 100644 --- a/mt-kahypar/utils/reproducible_random.h +++ b/mt-kahypar/utils/reproducible_random.h @@ -36,40 +36,48 @@ namespace mt_kahypar::utils { /*! - * Combines a global seed and an iteration of a loop to initialize an RNG for that iteration + * Combines a global seed and an iteration of a loop to initialize an RNG for that + * iteration */ -inline size_t seed_iteration(size_t seed, size_t iteration) { +inline size_t seed_iteration(size_t seed, size_t iteration) +{ return hashing::integer::combine(seed, hashing::integer::hash(iteration)); } -template< template typename UnqualifiedHashFunction > -class UniformRandomSelector { +template