Skip to content

Commit

Permalink
#2201: Merge branch '2201-implement-memory-aware-temperedlb-in-vt' of…
Browse files Browse the repository at this point in the history
… github.com:DARMA-tasking/vt into 2201-implement-memory-aware-temperedlb-in-vt
  • Loading branch information
ppebay committed Nov 29, 2023
2 parents e99614d + 56a07b0 commit ba1f0b0
Show file tree
Hide file tree
Showing 48 changed files with 2,835 additions and 39 deletions.
20 changes: 20 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ set(PROJECT_BIN_DIR ${CMAKE_CURRENT_BINARY_DIR})
set(PROJECT_BASE_DIR ${CMAKE_CURRENT_SOURCE_DIR})
set(PROJECT_LIB_DIR ${CMAKE_CURRENT_SOURCE_DIR}/lib)
set(PROJECT_EXAMPLE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/examples)
set(PROJECT_TOOLS_DIR ${CMAKE_CURRENT_SOURCE_DIR}/tools)

# Import the linking macros for VT-related targets
include(cmake/link_vt.cmake)
Expand Down Expand Up @@ -114,6 +115,25 @@ if (VT_BUILD_TESTS
include(CTest)
endif()

#
# Tools
#
option(vt_build_tools "Build VT tools" ON)

if (vt_build_tools)
message(
STATUS
"VT: building tools"
)

add_custom_target(tools)
add_subdirectory(tools)
else()
message(
STATUS "VT: NOT building tools because vt_build_tools is not set."
)
endif()

#
# Examples
#
Expand Down
5 changes: 2 additions & 3 deletions docs/md/mainpage.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,9 @@ supercomputer architectures. The main public repositories are:
| Module | Name | Links |
| ---------------------------------- | -------------------------------------------------------------- | -------------------------- |
| HPC Runtime | @m_span{m-text m-success} DARMA/vt @m_endspan (Virtual Transport) | [Github](https://github.com/DARMA-tasking/vt) |
| HPC Serialization | @m_span{m-text m-success} DARMA/checkpoint @m_endspan (Checkpointing and Serialization Library) | [Github](https://github.com/DARMA-tasking/checkpoint) |
| HPC LB Simulator | @m_span{m-text m-success} DARMA/LBAF @m_endspan (Load Balancing Analysis Framework) | [Github](https://github.com/DARMA-tasking/LB-analysis-framework) |
| HPC Serialization | @m_span{m-text m-success} DARMA/magistrate @m_endspan (Checkpointing and Serialization Library) | [Github](https://github.com/DARMA-tasking/checkpoint), [Docs](https://darma-tasking.github.io/checkpoint_docs/html/index.html) |
| HPC Load Balancing Analysis Framework | @m_span{m-text m-success} DARMA/LBAF @m_endspan | [Github](https://github.com/DARMA-tasking/LB-analysis-framework), [Docs](https://darma-tasking.github.io/lbaf_docs/index.html) |
| HPC Serializer compiler analyzer | @m_span{m-text m-success} DARMA/checkpoint-analyzer @m_endspan (Static verification of serializers) | [Github](https://github.com/DARMA-tasking/checkpoint-member-analyzer) |
| Toolkit documentation | @m_span{m-text m-success} DARMA/docs @m_endspan | [Docs](https://github.com/DARMA-tasking/DARMA-tasking.github.io) |

\section intro-darma-vt Learn about DARMA/vt

Expand Down
44 changes: 36 additions & 8 deletions examples/hello_world/objgroup.cc
Original file line number Diff line number Diff line change
Expand Up @@ -54,20 +54,48 @@ struct MyObjGroup {
int main(int argc, char** argv) {
vt::initialize(argc, argv);

vt::NodeType this_node = vt::theContext()->getNode();
vt::NodeType num_nodes = vt::theContext()->getNumNodes();
const auto this_node = vt::theContext()->getNode();
const auto num_nodes = vt::theContext()->getNumNodes();

auto proxy = vt::theObjGroup()->makeCollective<MyObjGroup>(
"examples_hello_world"
);
auto proxy =
vt::theObjGroup()->makeCollective<MyObjGroup>("examples_hello_world");

// Create group of odd nodes and multicast to them (from root node)
vt::theGroup()->newGroupCollective(
this_node % 2, [proxy, this_node](::vt::GroupType type) {
if (this_node == 0) {
proxy.multicast<&MyObjGroup::handler>(type, 122, 244);
}
});

vt::theCollective()->barrier();

if (this_node == 0) {
proxy[0].send<&MyObjGroup::handler>(5,10);
// Send to object 0
proxy[0].send<&MyObjGroup::handler>(5, 10);
if (num_nodes > 1) {
proxy[1].send<&MyObjGroup::handler>(10,20);
// Send to object 1
proxy[1].send<&MyObjGroup::handler>(10, 20);
}
proxy.broadcast<&MyObjGroup::handler>(400,500);

// Broadcast to all nodes
proxy.broadcast<&MyObjGroup::handler>(400, 500);

using namespace ::vt::group::region;

// Create list of nodes and multicast to them
List::ListType range;
for (vt::NodeType node = 0; node < num_nodes; ++node) {
if (node % 2 == 0) {
range.push_back(node);
}
}

proxy.multicast<&MyObjGroup::handler>(
std::make_unique<List>(range), 20, 40
);
}
vt::theCollective()->barrier();

vt::finalize();

Expand Down
3 changes: 2 additions & 1 deletion scripts/JSON_data_files_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,8 @@ def _get_valid_schema(self) -> Schema:
},
'bytes': float
}
]
],
Optional('user_defined'): dict
},
]
}
Expand Down
2 changes: 1 addition & 1 deletion scripts/check_license.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
path_to_vt=${1}
cd "$path_to_vt" || exit 1

for sub_dir in "src" "tests/unit" "tests/perf" "tutorial" "examples"
for sub_dir in "src" "tests/unit" "tests/perf" "tutorial" "examples" "tools"
do
"$path_to_vt/scripts/add-license-perl.pl" "$path_to_vt/$sub_dir" "$path_to_vt/scripts/license-template"
done
Expand Down
21 changes: 21 additions & 0 deletions src/vt/collective/startup.cc
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,27 @@

namespace vt {

std::unique_ptr<arguments::ArgvContainer>
preconfigure(int& argc, char**& argv) {
return std::make_unique<arguments::ArgvContainer>(argc, argv);
}

RuntimePtrType initializePreconfigured(
MPI_Comm* comm, arguments::AppConfig const* appConfig,
arguments::ArgvContainer const* preconfigure_args
) {
arguments::ArgvContainer args =
preconfigure_args ? *preconfigure_args : arguments::ArgvContainer{};

auto argc = args.getArgc();
auto argv_container = args.getArgvDeepCopy();
auto argv = argv_container.get();
bool const is_interop = comm != nullptr;
return CollectiveOps::initialize(
argc, argv, is_interop, comm, appConfig
);
}

// vt::{initialize,finalize} for main ::vt namespace
RuntimePtrType initialize(
int& argc, char**& argv, MPI_Comm* comm, arguments::AppConfig const* appConfig
Expand Down
8 changes: 8 additions & 0 deletions src/vt/collective/startup.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,17 @@

#include "vt/config.h"
#include "vt/runtime/runtime_headers.h"
#include "vt/configs/arguments/argv_container.h"

namespace vt {

std::unique_ptr<arguments::ArgvContainer>
preconfigure(int& argc, char**& argv);
RuntimePtrType initializePreconfigured(
MPI_Comm* comm = nullptr,
arguments::AppConfig const* appConfig = nullptr,
arguments::ArgvContainer const* preconfigure_args = nullptr);

RuntimePtrType initialize(
int& argc, char**& argv, MPI_Comm* comm = nullptr,
arguments::AppConfig const* appConfig = nullptr
Expand Down
3 changes: 3 additions & 0 deletions src/vt/configs/arguments/app_config.h
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,7 @@ struct AppConfig {
bool vt_lb_self_migration = false;
bool vt_lb_spec = false;
std::string vt_lb_spec_file = "";
bool vt_lb_run_lb_first_phase = false;


bool vt_no_detect_hang = false;
Expand Down Expand Up @@ -225,6 +226,7 @@ struct AppConfig {
bool vt_debug_phase = false;
bool vt_debug_context = false;
bool vt_debug_epoch = false;
bool vt_debug_replay = false;

bool vt_debug_print_flush = false;

Expand Down Expand Up @@ -386,6 +388,7 @@ struct AppConfig {
| vt_debug_phase
| vt_debug_context
| vt_debug_epoch
| vt_debug_replay

| vt_debug_print_flush

Expand Down
6 changes: 6 additions & 0 deletions src/vt/configs/arguments/args.cc
Original file line number Diff line number Diff line change
Expand Up @@ -374,6 +374,7 @@ void addDebugPrintArgs(CLI::App& app, AppConfig& appConfig) {
auto dcp = "Enable debug_phase = \"" debug_pp(phase) "\"";
auto ddp = "Enable debug_context = \"" debug_pp(context) "\"";
auto dep = "Enable debug_epoch = \"" debug_pp(epoch) "\"";
auto dfp = "Enable debug_replay = \"" debug_pp(replay) "\"";

auto r1 = app.add_option("--vt_debug_level", appConfig.vt_debug_level, rq);

Expand Down Expand Up @@ -410,6 +411,7 @@ void addDebugPrintArgs(CLI::App& app, AppConfig& appConfig) {
auto dc = app.add_flag("--vt_debug_phase", appConfig.vt_debug_phase, dcp);
auto dd = app.add_flag("--vt_debug_context", appConfig.vt_debug_context, ddp);
auto de = app.add_flag("--vt_debug_epoch", appConfig.vt_debug_epoch, dep);
auto df = app.add_flag("--vt_debug_replay", appConfig.vt_debug_replay, dfp);

auto debugGroup = "Debug Print Configuration (must be compile-time enabled)";
r->group(debugGroup);
Expand Down Expand Up @@ -446,6 +448,7 @@ void addDebugPrintArgs(CLI::App& app, AppConfig& appConfig) {
dc->group(debugGroup);
dd->group(debugGroup);
de->group(debugGroup);
df->group(debugGroup);

auto dbq = "Always flush VT runtime prints";
auto eb = app.add_flag("--vt_debug_print_flush", appConfig.vt_debug_print_flush, dbq);
Expand Down Expand Up @@ -478,6 +481,7 @@ void addLbArgs(CLI::App& app, AppConfig& appConfig) {
auto lb_self_migration = "Allow load balancer to migrate objects to the same node";
auto lb_spec = "Enable LB spec file (defines which phases output LB data)";
auto lb_spec_file = "File containing LB spec; --vt_lb_spec to enable";
auto lb_first_phase_info = "Force LB to run on the first phase (phase 0)";
auto s = app.add_flag("--vt_lb", appConfig.vt_lb, lb);
auto t1 = app.add_flag("--vt_lb_quiet", appConfig.vt_lb_quiet, lb_quiet);
auto u = app.add_option("--vt_lb_file_name", appConfig.vt_lb_file_name, lb_file_name)->capture_default_str()->check(CLI::ExistingFile);
Expand All @@ -500,6 +504,7 @@ void addLbArgs(CLI::App& app, AppConfig& appConfig) {
auto lbasm = app.add_flag("--vt_lb_self_migration", appConfig.vt_lb_self_migration, lb_self_migration);
auto lbspec = app.add_flag("--vt_lb_spec", appConfig.vt_lb_spec, lb_spec);
auto lbspecfile = app.add_option("--vt_lb_spec_file", appConfig.vt_lb_spec_file, lb_spec_file)->capture_default_str()->check(CLI::ExistingFile);
auto lb_first_phase = app.add_flag("--vt_lb_run_lb_first_phase", appConfig.vt_lb_run_lb_first_phase, lb_first_phase_info);

// --vt_lb_name excludes --vt_lb_file_name, and vice versa
v->excludes(u);
Expand Down Expand Up @@ -528,6 +533,7 @@ void addLbArgs(CLI::App& app, AppConfig& appConfig) {
lbasm->group(debugLB);
lbspec->group(debugLB);
lbspecfile->group(debugLB);
lb_first_phase->group(debugLB);

// help options deliberately omitted from the debugLB group above so that
// they appear grouped with --vt_help when --vt_help is used
Expand Down
111 changes: 111 additions & 0 deletions src/vt/configs/arguments/argv_container.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
/*
//@HEADER
// *****************************************************************************
//
// argv_container.h
// DARMA/vt => Virtual Transport
//
// Copyright 2019-2021 National Technology & Engineering Solutions of Sandia, LLC
// (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the U.S.
// Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * Neither the name of the copyright holder nor the names of its
// contributors may be used to endorse or promote products derived from this
// software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact [email protected]
//
// *****************************************************************************
//@HEADER
*/

#if !defined INCLUDED_VT_CONFIGS_ARGUMENTS_ARGV_CONTAINER_H
#define INCLUDED_VT_CONFIGS_ARGUMENTS_ARGV_CONTAINER_H

#include <memory>
#include <string>
#include <vector>

namespace vt {

namespace arguments {

struct ArgvContainer {
ArgvContainer(int& argc, char**& argv)
{
std::vector<char*> non_vt_args;
for(int i = 0; i < argc; i++) {
// cache original argv parameter
argv_.push_back(strdup(argv[i]));
// collect non vt params
if (!((0 == strncmp(argv[i], "--vt_", 5)) ||
(0 == strncmp(argv[i], "!--vt_", 6)))) {
non_vt_args.push_back(argv[i]);
}
}

// Reconstruct argv without vt related params
int new_argc = non_vt_args.size();
static std::unique_ptr<char*[]> new_argv = nullptr;

new_argv = std::make_unique<char*[]>(new_argc + 1);

int i = 0;
for (auto&& arg : non_vt_args) {
new_argv[i++] = arg;
}
new_argv[i++] = nullptr;

argc = new_argc;
argv = new_argv.get();
}

ArgvContainer() = default;
ArgvContainer(const ArgvContainer&) = default;

int getArgc() const {
return argv_.size();
}

std::unique_ptr<char*[]> getArgvDeepCopy() const {
auto output = std::make_unique<char*[]>(argv_.size() + 1);

int i = 0;
for(auto&& arg : argv_) {
output[i++] = strdup(arg.c_str());
}
output[i++] = nullptr;

return output;
}

private:
std::vector<std::string> argv_;
};

} // namespace arguments
} // namespace vt

#endif /*INCLUDED_VT_CONFIGS_ARGUMENTS_ARGV_CONTAINER_H*/
4 changes: 3 additions & 1 deletion src/vt/configs/debug/debug_config.h
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,8 @@ enum CatEnum : uint64_t {
phase = 1ull<<28,
context = 1ull<<29,
epoch = 1ull<<30,
temperedwmin = 1ull<<31
temperedwmin = 1ull<<31,
replay = 1ull<<32
};

enum CtxEnum : uint64_t {
Expand Down Expand Up @@ -138,6 +139,7 @@ vt_option_category_pretty_print(reduce, "reduce")
vt_option_category_pretty_print(rdma, "RDMA")
vt_option_category_pretty_print(rdma_channel, "RDMA Channel")
vt_option_category_pretty_print(rdma_state, "RDMA State")
vt_option_category_pretty_print(replay, "replay")
vt_option_category_pretty_print(runtime, "runtime")
vt_option_category_pretty_print(scatter, "scatter")
vt_option_category_pretty_print(serial_msg, "serialized-msg")
Expand Down
Loading

0 comments on commit ba1f0b0

Please sign in to comment.