Skip to content

Commit

Permalink
EH: CS-578 Allow to measure thread performance with Google Performanc…
Browse files Browse the repository at this point in the history
…e Tools
  • Loading branch information
ernst-bablick committed Sep 16, 2024
1 parent e7f9bf3 commit 235ea42
Show file tree
Hide file tree
Showing 10 changed files with 330 additions and 3 deletions.
16 changes: 16 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ option(WITH_OPENSSL "Enable use of the openssl library for CSP mode" OFF)
option(WITH_MTMALLOC "Enable use of the mtmalloc memory allocator on Solaris" ON)
option(WITH_QMAKE "Enable build of qmake" ON)
option(WITH_JNI "Add JNI code for libraries like libdrmaa" ON)
option(WITH_GPERF "Enable profiling code with Google Performance Tools" OFF)

# private extensions
set(PROJECT_EXTENSIONS "None" CACHE STRING "directory of private extensions")
Expand Down Expand Up @@ -144,6 +145,21 @@ else ()
set(SPOOLING_LIBS spoolloader spoold spool)
endif ()

if (WITH_GPERF)
# gperftools can be installed on centos with: yum install google-perftools-devel
set (Gperftools_DIR "${CMAKE_CURRENT_LIST_DIR}/cmake/")
set (Gperftools_ROOT_DIR "/usr")
find_package(Gperftools)
if (Gperftools_FOUND)
message("Found and enabled build with gperftools")
add_compile_definitions(WITH_GPERF)
else()
# ignore if gperftools are not found so that build with TS does not fail if a build host is missing gperftools
set(GPERFTOOLS_PROFILER "") # set to empty string to avoid linking against gperftools
message("Cannot find gperftools although WITH_GPERF is set")
endif()
endif()

if (WITH_HWLOC)
if (SGE_ARCH MATCHES "darwin-arm64")
set(SGE_TOPO_LIB hwloc CoreFoundation Core)
Expand Down
54 changes: 54 additions & 0 deletions cmake/GperftoolsConfig.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
# Source: https://github.com/baidu/braft/blob/master/cmake/FindGperftools.cmake
# Distributed under Apache License 2.0

# Tries to find Gperftools.
#
# Usage of this module as follows:
#
# find_package(Gperftools)
#
# Variables used by this module, they can change the default behaviour and need
# to be set before calling find_package:
#
# Gperftools_ROOT_DIR Set this variable to the root installation of
# Gperftools if the module has problems finding
# the proper installation path.
#
# Variables defined by this module:
#
# GPERFTOOLS_FOUND System has Gperftools libs/headers
# GPERFTOOLS_LIBRARIES The Gperftools libraries (tcmalloc & profiler)
# GPERFTOOLS_INCLUDE_DIR The location of Gperftools headers

find_library(GPERFTOOLS_TCMALLOC
NAMES tcmalloc
HINTS ${Gperftools_ROOT_DIR}/lib)

find_library(GPERFTOOLS_PROFILER
NAMES profiler
HINTS ${Gperftools_ROOT_DIR}/lib)

find_library(GPERFTOOLS_TCMALLOC_AND_PROFILER
NAMES tcmalloc_and_profiler
HINTS ${Gperftools_ROOT_DIR}/lib)

find_path(GPERFTOOLS_INCLUDE_DIR
NAMES gperftools/heap-profiler.h
HINTS ${Gperftools_ROOT_DIR}/include)

set(GPERFTOOLS_LIBRARIES ${GPERFTOOLS_TCMALLOC_AND_PROFILER})

include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(
Gperftools
DEFAULT_MSG
GPERFTOOLS_LIBRARIES
GPERFTOOLS_INCLUDE_DIR)

mark_as_advanced(
Gperftools_ROOT_DIR
GPERFTOOLS_TCMALLOC
GPERFTOOLS_PROFILER
GPERFTOOLS_TCMALLOC_AND_PROFILER
GPERFTOOLS_LIBRARIES
GPERFTOOLS_INCLUDE_DIR)
1 change: 1 addition & 0 deletions source/daemons/qmaster/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@ target_link_libraries(
PRIVATE
${SGE_LIBS}
${SPOOLING_LIBS}
${GPERFTOOLS_PROFILER}
)

if (INSTALL_SGE_BIN)
Expand Down
30 changes: 30 additions & 0 deletions source/daemons/qmaster/sge_thread_scheduler.cc
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,8 @@
#include "msg_common.h"
#include "msg_qmaster.h"

#include <ocs_gperf.h>

#define SCHEDULER_TIMEOUT_S 10
#define SCHEDULER_TIMEOUT_N 0

Expand Down Expand Up @@ -484,6 +486,9 @@ sge_scheduler_main(void *arg) {
sge_evc_class_t *evc = nullptr;
lList *alp = nullptr;
sge_where_what_t where_what;
#ifdef WITH_GPERF
sge_gperf_per_thread_t gperf_data;
#endif

DENTER(TOP_LAYER);
memset(&where_what, 0, sizeof(where_what));
Expand All @@ -510,6 +515,10 @@ sge_scheduler_main(void *arg) {
/* initialize schedd_runlog logging */
schedd_set_schedd_log_file();

#ifdef WITH_GPERF
sge_gperf_per_thread_init(gperf_data);
#endif

/* set profiling parameters */
prof_set_level_name(SGE_PROF_EVENTMASTER, nullptr, nullptr);
prof_set_level_name(SGE_PROF_SPOOLING, nullptr, nullptr);
Expand Down Expand Up @@ -806,7 +815,28 @@ sge_scheduler_main(void *arg) {
double prof_copy = prof_get_measurement_wallclock(SGE_PROF_CUSTOM7, true, nullptr);
PROF_START_MEASUREMENT(SGE_PROF_CUSTOM7);

#ifdef WITH_GPERF
{
std::string gperf_name = mconf_get_gperf_name();
std::string gperf_threads = mconf_get_gperf_threads();
std::string gperf_thread_name = thread_name;

// do profile only when scheduler is triggered via qconf -tsm
if (evc->monitor_next_run) {
g_scheduler_use_gperftools = sge_gperf_start_profiling(gperf_data, gperf_thread_name, gperf_threads, gperf_name);
}
}
#endif

scheduler_method(evc, &answer_list, &copy, &orders);

#ifdef WITH_GPERF
if (evc->monitor_next_run) {
std::string gperf_thread_name = thread_name;
g_scheduler_use_gperftools = sge_gperf_stop_profiling(gperf_data, gperf_thread_name);
}
#endif

answer_list_output(&answer_list);

PROF_STOP_MEASUREMENT(SGE_PROF_CUSTOM7);
Expand Down
25 changes: 25 additions & 0 deletions source/libs/sgeobj/sge_conf.cc
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,9 @@ static bool inherit_env = true;
static bool enable_submit_lib_path = false;
static bool enable_submit_ld_preload = false;

std::string gperf_name = "gperf";
std::string gperf_threads = "*";

/*
* notify_kill_default and notify_susp_default
* 0 -> use the signal type stored in notify_kill and notify_susp
Expand Down Expand Up @@ -820,6 +823,12 @@ int merge_configuration(lList **answer_list, u_long32 progid, const char *cell_r
if (parse_bool_param(s, "ENABLE_SUBMIT_LD_PRELOAD", &enable_submit_ld_preload)) {
continue;
}
if (parse_string_param(s, "GPERF_NAME", gperf_name)) {
continue;
}
if (parse_string_param(s, "GPERF_THREADS", gperf_threads)) {
continue;
}
}
SGE_UNLOCK(LOCK_MASTER_CONF, LOCK_WRITE);
sge_free_saved_vars(conf_context);
Expand Down Expand Up @@ -2119,6 +2128,22 @@ bool mconf_get_old_reschedule_behavior() {
DRETURN(ret);
}

std::string mconf_get_gperf_name() {
DENTER(BASIS_LAYER);
SGE_LOCK(LOCK_MASTER_CONF, LOCK_READ);
std::string ret = gperf_name;
SGE_UNLOCK(LOCK_MASTER_CONF, LOCK_READ);
DRETURN(ret);
}

std::string mconf_get_gperf_threads() {
DENTER(BASIS_LAYER);
SGE_LOCK(LOCK_MASTER_CONF, LOCK_READ);
std::string ret = gperf_threads;
SGE_UNLOCK(LOCK_MASTER_CONF, LOCK_READ);
DRETURN(ret);
}

bool mconf_get_old_reschedule_behavior_array_job() {
bool ret;

Expand Down
2 changes: 2 additions & 0 deletions source/libs/sgeobj/sge_conf.h
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,8 @@ void mconf_set_new_config(bool new_config);
bool mconf_is_new_config();
bool mconf_get_old_reschedule_behavior();
bool mconf_get_old_reschedule_behavior_array_job();
std::string mconf_get_gperf_name();
std::string mconf_get_gperf_threads();

/* params */
bool mconf_is_monitor_message();
Expand Down
3 changes: 2 additions & 1 deletion source/libs/uti/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ set(LIBRARY_SOURCES
sge_error_class.cc
sge_fgl.cc
sge_getloadavg.cc
ocs_gperf.cc
sge_hostname.cc
sge_htable.cc
sge_io.cc
Expand Down Expand Up @@ -66,7 +67,7 @@ set(LIBRARY_SOURCES
sge_thread_ctrl.cc
sge_time.cc
sge_tmpnam.cc
ocs_topology.cc
ocs_topology.cc
sge_tq.cc
sge_uidgid.cc
sge_unistd.cc)
Expand Down
152 changes: 152 additions & 0 deletions source/libs/uti/ocs_gperf.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
/*___INFO__MARK_BEGIN_NEW__*/
/***************************************************************************
*
* Copyright 2024 HPC-Gridware GmbH
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
***************************************************************************/
/*___INFO__MARK_END_NEW__*/

#include <string>
#include <fnmatch.h>

#include "uti/ocs_gperf.h"

#include "uti/sge_rmon_macros.h"
#include "uti/sge_time.h"

#include <sge_log.h>

#ifdef WITH_GPERF

#include <gperftools/profiler.h>

bool g_scheduler_use_gperftools = false;

void
sge_gperf_per_thread_init(sge_gperf_per_thread_t &data) {
data.gperf_name = "";
data.gperf_started = false;
}

bool
sge_gperf_start_profiling(sge_gperf_per_thread_t &per_thread_data, const std::string &thread_name, const std::string &thread_pattern, const std::string &gperf_name) {
DENTER(TOP_LAYER);

bool ret = fnmatch(thread_pattern.c_str(), thread_name.c_str(), 0) == 0 ? true : false;
if (ret) {
sge_setenv("HEAP_PROFILE_ALLOCATION_INTERVAL", "104857600");
sge_setenv("HEAP_PROFILE_INUSE_INTERVAL", "104857600");
sge_setenv("CPUPROFILE_PER_THREAD_TIMERS", "t");
sge_setenv("HEAP_PROFILE_ONLY_MMAP", "true");

sigset_t sigset;
sigemptyset(&sigset);
sigaddset(&sigset, SIGPROF);
sigprocmask(SIG_UNBLOCK, &sigset, nullptr);

if (gperf_name != per_thread_data.gperf_name) {
per_thread_data.gperf_name = gperf_name;
}

std::string filename = "/tmp/" + thread_name + "-" + std::to_string(sge_get_gmt64())+ "-" + gperf_name;

if (per_thread_data.gperf_started) {
ERROR("Profiling already started");
ProfilerFlush();
ProfilerStop();
per_thread_data.gperf_started = false;
}
if (!per_thread_data.gperf_started) {
INFO("Starting profiling %s", filename.c_str());
ProfilerStart(filename.c_str());
ProfilerRegisterThread();
per_thread_data.gperf_started = true;
}
} else {
// Profiling has been disabled for this thread? Then stop it if it was active.
if (per_thread_data.gperf_started) {
INFO("Stopping profiling");
ProfilerFlush();
ProfilerStop();
per_thread_data.gperf_started = false;
}
}

DRETURN(ret);
}

bool
sge_gperf_stop_profiling(sge_gperf_per_thread_t &per_thread_data, const std::string &thread_name) {
DENTER(TOP_LAYER);

if (per_thread_data.gperf_started) {
INFO("Stopping profiling");
ProfilerFlush();
ProfilerStop();
per_thread_data.gperf_started = false;
}

DRETURN(true);
}

bool
sge_gperf_do_profiling(sge_gperf_per_thread_t &per_thread_data, const std::string &thread_name, const std::string &thread_pattern, const std::string &gperf_name) {
DENTER(TOP_LAYER);

const bool ret = fnmatch(thread_pattern.c_str(), thread_name.c_str(), 0) == 0 ? true : false;
if (ret) {
sge_setenv("HEAP_PROFILE_ALLOCATION_INTERVAL", "104857600");
sge_setenv("HEAP_PROFILE_INUSE_INTERVAL", "104857600");
sge_setenv("CPUPROFILE_PER_THREAD_TIMERS", "t");
sge_setenv("HEAP_PROFILE_ONLY_MMAP", "true");

// PROFILESELECTED=1 --
// if set, cpu-profiler will only profile regions of code
// surrounded with ProfilerEnable()/ProfilerDisable().
sigset_t sigset;
sigemptyset(&sigset);
sigaddset(&sigset, SIGPROF);
sigprocmask(SIG_UNBLOCK, &sigset, nullptr);

// start/stop only change profile name changes
if (gperf_name == per_thread_data.gperf_name) {
per_thread_data.gperf_name = gperf_name;
std::string filename = "/tmp/" + thread_name + "-" + std::to_string(sge_get_gmt64())+ "-" + gperf_name;

if (per_thread_data.gperf_started) {
ProfilerFlush();
ProfilerStop();
per_thread_data.gperf_started = false;
}
if (!per_thread_data.gperf_started) {
ProfilerStart(filename.c_str());
ProfilerRegisterThread();
per_thread_data.gperf_started = true;
}
} else {
ProfilerFlush();
}
} else {
if (per_thread_data.gperf_started) {
ProfilerFlush();
ProfilerStop();
per_thread_data.gperf_started = false;
}
}

DRETURN(ret);
}

#endif
Loading

0 comments on commit 235ea42

Please sign in to comment.