Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

#2387: Gather hashed trace user events at the end of run #2395

Draft
wants to merge 3 commits into
base: develop
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions src/vt/trace/trace.cc
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,10 @@ void insertNewUserEvent(
#endif
}

void Trace::addHashedEvent(UserEventIDType event_id) {
theTrace()->user_hashed_events_.push_back(event_id);
}

void Trace::addUserEvent(UserEventIDType event) {
if (not checkDynamicRuntimeEnabled()) {
return;
Expand Down
8 changes: 8 additions & 0 deletions src/vt/trace/trace.h
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,13 @@ struct Trace : runtime::component::Component<Trace>, TraceLite {
*/
void registerUserEventManual(std::string const& name, UserSpecEventIDType id);

/**
* \brief Store a hashed event ID
*
* \param[in] event_id the hashed event ID
*/
void addHashedEvent(UserEventIDType event_id);

/**
* \brief Log a user event
*
Expand Down Expand Up @@ -374,6 +381,7 @@ struct Trace : runtime::component::Component<Trace>, TraceLite {
| idle_begun_
| start_time_
| user_event_
| user_hashed_events_
| prog_name_
| trace_name_
| full_trace_name_
Expand Down
37 changes: 37 additions & 0 deletions src/vt/trace/trace_lite.cc
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@
#include <sys/stat.h>
#include <zlib.h>
#include <map>
#include <numeric>

namespace vt {
#if vt_check_enabled(trace_only)
Expand Down Expand Up @@ -542,6 +543,42 @@ void TraceLite::flushTracesFile(bool useGlobalSync) {

void TraceLite::writeTracesFile(int flush, bool is_incremental_flush) {
auto const node = theContext()->getNode();
auto const comm = theContext()->getComm();
auto const comm_size = theContext()->getNumNodes();

// Gather all hashed events to rank 0 before writing sts file
using events_t = std::vector<UserEventIDType>;
auto const root = 0;
events_t local_hashed_events = theTrace()->user_hashed_events_;
int local_size = local_hashed_events.size();
std::vector<int> all_sizes(comm_size);
MPI_Gather(&local_size, 1, MPI_INT, all_sizes.data(), 1, MPI_INT, 0, comm);

// Compute displacements
std::vector<int> displs(comm_size, 0);
if (node == 0) {
std::partial_sum(all_sizes.begin(), all_sizes.end() - 1, displs.begin() + 1);
}

// Create vector in which to store all events
events_t all_hashed_events;
if (node == 0) {
int total_size = std::accumulate(all_sizes.begin(), all_sizes.end(), 0);
all_hashed_events.resize(total_size);
}

// Gather events
MPI_Gatherv(
local_hashed_events.data(), // Send buffer
local_size, // Number of elements to send
MPI_UINT32_T, // Data type (adjust to match UserEventIDType)
all_hashed_events.data(), // Receive buffer (on root)
all_sizes.data(), // Number of elements to receive from each rank
displs.data(), // Displacements for each rank
MPI_UINT32_T, // Data type (adjust to match UserEventIDType)
root, // Root node
comm // Communicator
);

size_t to_write = traces_.size();

Expand Down
1 change: 1 addition & 0 deletions src/vt/trace/trace_lite.h
Original file line number Diff line number Diff line change
Expand Up @@ -427,6 +427,7 @@ struct TraceLite {
int incremental_flush_mode_ = 0;

UserEventRegistry user_event_ = {};
std::vector<UserEventIDType> user_hashed_events_;
EventHoldStackType event_holds_;
TraceStackType open_events_;
TraceContainerType traces_;
Expand Down
6 changes: 1 addition & 5 deletions src/vt/trace/trace_user_event.cc
Original file line number Diff line number Diff line change
Expand Up @@ -84,11 +84,7 @@ UserEventIDType UserEventRegistry::hash(std::string const& in_event_name) {
auto id = std::get<0>(ret);
auto inserted = std::get<1>(ret);
if (inserted) {
auto const node = theContext()->getNode();
if (node != 0) {
auto msg = makeMessage<NewUserEventMsg>(false, id, in_event_name);
theMsg()->sendMsg<newEventHan>(0, msg);
}
vt::theTrace()->addHashedEvent(id);
}
return id;
}
Expand Down
Loading