Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Instrument Julia's runtime to get thread traces #86

Closed
wants to merge 7 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions Make.inc
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,9 @@ WITH_DTRACE := 0
# Enable ITTAPI integration
WITH_ITTAPI := 0

# Enable Perfetto traces
WITH_PERFETTO := 1

# Prevent picking up $ARCH from the environment variables
ARCH:=

Expand Down Expand Up @@ -747,6 +750,12 @@ JCFLAGS += -DUSE_ITTAPI
LIBITTAPI:=-littnotify
endif

ifeq ($(WITH_PERFETTO), 1)
JCXXFLAGS += -DUSE_PERFETTO
JCFLAGS += -DUSE_PERFETTO
endif


# ===========================================================================

# Select the cpu architecture to target, or automatically detects the user's compiler
Expand Down
28 changes: 28 additions & 0 deletions src/codegen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,13 @@
#include <llvm/Bitcode/BitcodeReader.h>
#include <llvm/Linker/Linker.h>

#if 0
#ifdef USE_PERFETTO
extern FILE *tracef;
int tracecg = 1;
#endif
#endif

using namespace llvm;

//Drag some useful type functions into our namespace
Expand Down Expand Up @@ -8469,6 +8476,17 @@ jl_llvm_functions_t jl_emit_code(
jl_codegen_params_t &params)
{
JL_TIMING(CODEGEN);
#if 0
#ifdef USE_PERFETTO
char tbuf[1024];
jl_task_t *ct = jl_current_task;
snprintf(tbuf, 1024, "{\"name\":\"CodeGen\",\"cat\":\"compiler\",\"id\":%-d,"
"\"ph\":\"B\",\"pid\":%-d,\"tid\":%-d,\"ts\":%llu},\n",
tracecg, getpid(), jl_get_task_tid(ct), jl_hrtime()/1000);
fwrite(tbuf, strlen(tbuf), sizeof(char), tracef);
#endif
#endif

// caller must hold codegen_lock
jl_llvm_functions_t decls = {};
assert((params.params == &jl_default_cgparams /* fast path */ || !params.cache ||
Expand Down Expand Up @@ -8501,6 +8519,16 @@ jl_llvm_functions_t jl_emit_code(
jlbacktrace(); // written to STDERR_FILENO
}

#if 0
#ifdef USE_PERFETTO
snprintf(tbuf, 1024, "{\"name\":\"CodeGen\",\"cat\":\"compiler\",\"id\":%-d,"
"\"ph\":\"E\",\"pid\":%-d,\"tid\":%-d,\"ts\":%llu},\n",
tracecg, getpid(), jl_get_task_tid(ct), jl_hrtime()/1000);
tracecg++;
fwrite(tbuf, strlen(tbuf), sizeof(char), tracef);
#endif
#endif

return decls;
}

Expand Down
63 changes: 63 additions & 0 deletions src/gc.c
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,11 @@ int gc_first_tid;
uv_mutex_t gc_threads_lock;
uv_cond_t gc_threads_cond;

#ifdef USE_PERFETTO
extern FILE *tracef;
int tracegc = 1;
#endif

// Linked list of callback functions

typedef void (*jl_gc_cb_func_t)(void);
Expand Down Expand Up @@ -3269,6 +3274,14 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
int64_t last_perm_scanned_bytes = perm_scanned_bytes;
JL_PROBE_GC_MARK_BEGIN();
uint64_t start_mark_time = jl_hrtime();
#ifdef USE_PERFETTO
char tbuf[1024];
jl_task_t *ct = jl_current_task;
snprintf(tbuf, 1024, "{\"name\":\"Mark\",\"cat\":\"gc\",\"id\":%-d,\"ph\":\"B\","
"\"pid\":%-d,\"tid\":%-d,\"ts\":%llu},\n",
tracegc, getpid(), jl_get_task_tid(ct), start_mark_time/1000);
fwrite(tbuf, strlen(tbuf), sizeof(char), tracef);
#endif

// 1. fix GC bits of objects in the remset.
assert(gc_n_threads);
Expand Down Expand Up @@ -3313,6 +3326,13 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
gc_settime_premark_end();
gc_time_mark_pause(gc_start_time, scanned_bytes, perm_scanned_bytes);
uint64_t end_mark_time = jl_hrtime();
#ifdef USE_PERFETTO
snprintf(tbuf, 1024, "{\"name\":\"Mark\",\"cat\":\"gc\",\"id\":%-d,\"ph\":\"E\","
"\"pid\":%-d,\"tid\":%-d,\"ts\":%llu},\n",
tracegc, getpid(), jl_get_task_tid(ct), end_mark_time/1000);
fwrite(tbuf, strlen(tbuf), sizeof(char), tracef);
#endif

uint64_t mark_time = end_mark_time - start_mark_time;
gc_num.mark_time = mark_time;
gc_num.total_mark_time += mark_time;
Expand Down Expand Up @@ -3439,6 +3459,14 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
uint64_t start_sweep_time = jl_hrtime();
JL_PROBE_GC_SWEEP_BEGIN(sweep_full);
current_sweep_full = sweep_full;

#ifdef USE_PERFETTO
snprintf(tbuf, 1024, "{\"name\":\"Sweep\",\"cat\":\"gc\",\"id\":%-d,\"ph\":\"B\","
"\"pid\":%-d,\"tid\":%-d,\"ts\":%llu,\"args\":{\"full\":%d}},\n",
tracegc, getpid(), jl_get_task_tid(ct), start_sweep_time/1000, sweep_full);
fwrite(tbuf, strlen(tbuf), sizeof(char), tracef);
#endif

sweep_weak_refs();
sweep_stack_pools();
gc_sweep_foreign_objs();
Expand All @@ -3451,6 +3479,12 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
JL_PROBE_GC_SWEEP_END();

uint64_t gc_end_time = jl_hrtime();
#ifdef USE_PERFETTO
snprintf(tbuf, 1024, "{\"name\":\"Sweep\",\"cat\":\"gc\",\"id\":%-d,\"ph\":\"E\","
"\"pid\":%-d,\"tid\":%-d,\"ts\":%llu},\n",
tracegc, getpid(), jl_get_task_tid(ct), gc_end_time/1000);
fwrite(tbuf, strlen(tbuf), sizeof(char), tracef);
#endif
uint64_t pause = gc_end_time - gc_start_time;
uint64_t sweep_time = gc_end_time - start_sweep_time;
gc_num.total_sweep_time += sweep_time;
Expand Down Expand Up @@ -3586,6 +3620,14 @@ JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t collection)
return;
}
JL_TIMING(GC);
#ifdef USE_PERFETTO
char tbuf[1024];
snprintf(tbuf, 1024, "{\"name\":\"GC\",\"cat\":\"gc\",\"id\":%-d,\"ph\":\"B\","
"\"pid\":%-d,\"tid\":%-d,\"ts\":%llu},\n",
tracegc, getpid(), jl_get_task_tid(ct), t0/1000);
fwrite(tbuf, strlen(tbuf), sizeof(char), tracef);
#endif

int last_errno = errno;
#ifdef _OS_WINDOWS_
DWORD last_error = GetLastError();
Expand All @@ -3611,6 +3653,13 @@ JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t collection)
gc_num.max_time_to_safepoint = duration;
gc_num.time_to_safepoint = duration;
gc_num.total_time_to_safepoint += duration;
#ifdef USE_PERFETTO
snprintf(tbuf, 1024, "{\"name\":\"StopTheWorld\",\"cat\":\"gc\",\"ph\":\"i\","
"\"s\":\"t\",\"pid\":%-d,\"tid\":%-d,\"ts\":%llu},\n",
getpid(), jl_get_task_tid(ct), t1/1000);
fwrite(tbuf, strlen(tbuf), sizeof(char), tracef);
#endif


gc_invoke_callbacks(jl_gc_cb_pre_gc_t,
gc_cblist_pre_gc, (collection));
Expand All @@ -3631,6 +3680,14 @@ JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t collection)
jl_safepoint_end_gc();
jl_gc_state_set(ptls, old_state, JL_GC_STATE_WAITING);
JL_PROBE_GC_END();
#ifdef USE_PERFETTO
snprintf(tbuf, 1024, "{\"name\":\"GC\",\"cat\":\"gc\",\"id\":%-d,\"ph\":\"E\","
"\"pid\":%-d,\"tid\":%-d,\"ts\":%llu},\n",
tracegc, getpid(), jl_get_task_tid(ct), jl_hrtime()/1000);
tracegc++;
fwrite(tbuf, strlen(tbuf), sizeof(char), tracef);
#endif


// Only disable finalizers on current thread
// Doing this on all threads is racy (it's impossible to check
Expand All @@ -3639,6 +3696,12 @@ JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t collection)
run_finalizers(ct);
}
JL_PROBE_GC_FINALIZER();
#ifdef USE_PERFETTO
snprintf(tbuf, 1024, "{\"name\":\"Finalizer\",\"cat\":\"gc\",\"ph\":\"i\","
"\"pid\":%-d,\"tid\":%-d,\"ts\":%llu},\n",
getpid(), jl_get_task_tid(ct), jl_hrtime()/1000);
fwrite(tbuf, strlen(tbuf), sizeof(char), tracef);
#endif

gc_invoke_callbacks(jl_gc_cb_post_gc_t,
gc_cblist_post_gc, (collection));
Expand Down
22 changes: 22 additions & 0 deletions src/gf.c
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,11 @@
extern "C" {
#endif

#ifdef USE_PERFETTO
extern FILE *tracef;
int traceti = 1;
#endif

JL_DLLEXPORT _Atomic(size_t) jl_world_counter = 1; // uses atomic acquire/release
JL_DLLEXPORT size_t jl_get_world_counter(void) JL_NOTSAFEPOINT
{
Expand Down Expand Up @@ -3715,6 +3720,14 @@ JL_DLLEXPORT void jl_typeinf_timing_begin(void)
if (!ct->reentrant_timing++) {
ct->inference_start_time = jl_hrtime();
}
#ifdef USE_PERFETTO
char tbuf[1024];
snprintf(tbuf, 1024, "{\"name\":\"TypeInference\",\"cat\":\"compiler\",\"id\":%-d,"
"\"ph\":\"B\",\"pid\":%-d,\"tid\":%-d,\"ts\":%llu},\n",
traceti, getpid(), jl_get_task_tid(ct), jl_hrtime()/1000);
fwrite(tbuf, strlen(tbuf), sizeof(char), tracef);
#endif

}

JL_DLLEXPORT void jl_typeinf_timing_end(void)
Expand All @@ -3727,6 +3740,15 @@ JL_DLLEXPORT void jl_typeinf_timing_end(void)
}
ct->inference_start_time = 0;
}
#ifdef USE_PERFETTO
char tbuf[1024];
snprintf(tbuf, 1024, "{\"name\":\"TypeInference\",\"cat\":\"compiler\",\"id\":%-d,"
"\"ph\":\"E\",\"pid\":%-d,\"tid\":%-d,\"ts\":%llu},\n",
traceti, getpid(), jl_get_task_tid(ct), jl_hrtime()/1000);
traceti++;
fwrite(tbuf, strlen(tbuf), sizeof(char), tracef);
#endif

}

JL_DLLEXPORT void jl_typeinf_lock_begin(void)
Expand Down
15 changes: 15 additions & 0 deletions src/init.c
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,10 @@ extern BOOL (WINAPI *hSymRefreshModuleList)(HANDLE);
#include <unistd.h>
#endif

#ifdef USE_PERFETTO
extern FILE *tracef;
#endif

// list of modules being deserialized with __init__ methods
jl_array_t *jl_module_init_order;

Expand Down Expand Up @@ -204,6 +208,10 @@ void jl_task_frame_noreturn(jl_task_t *ct);
// cause this process to exit with WEXITSTATUS(signo), after waiting to finish all julia, C, and C++ cleanup
JL_DLLEXPORT void jl_exit(int exitcode)
{
#ifdef USE_PERFETTO
fprintf(tracef, "]}");
fclose(tracef);
#endif
jl_atexit_hook(exitcode);
exit(exitcode);
}
Expand Down Expand Up @@ -801,6 +809,13 @@ JL_DLLEXPORT void julia_init(JL_IMAGE_SEARCH rel)
jl_task_t *ct = jl_init_root_task(ptls, stack_lo, stack_hi);
#pragma GCC diagnostic pop
JL_GC_PROMISE_ROOTED(ct);
#ifdef USE_PERFETTO
char tfn[64];
sprintf(tfn, "perfetto-trace-%0d.json", getpid());
tracef = fopen(tfn, "w");
//fprintf(stdout, "Writing Perfetto trace to %s\n", tfn);
fprintf(tracef, "{\"traceEvents\":[\n");
#endif
_finish_julia_init(rel, ptls, ct);
}

Expand Down
49 changes: 49 additions & 0 deletions src/jitlayers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,13 @@ using namespace llvm;
# include <llvm/ExecutionEngine/SectionMemoryManager.h>
#endif

#if 0
#ifdef USE_PERFETTO
extern FILE *tracef;
int traceco = 1, tracelo = 1;
#endif
#endif

#define DEBUG_TYPE "julia_jitlayers"

STATISTIC(LinkedGlobals, "Number of globals linked");
Expand Down Expand Up @@ -187,6 +194,17 @@ static jl_callptr_t _jl_compile_codeinst(
if (timed)
start_time = jl_hrtime();

#if 0
#ifdef USE_PERFETTO
char tbuf[1024];
jl_task_t *ct = jl_current_task;
snprintf(tbuf, 1024, "{\"name\":\"Compile\",\"cat\":\"compiler\",\"id\":%-d,"
"\"ph\":\"B\",\"pid\":%-d,\"tid\":%-d,\"ts\":%llu},\n",
traceco, getpid(), jl_get_task_tid(ct), jl_hrtime()/1000);
fwrite(tbuf, strlen(tbuf), sizeof(char), tracef);
#endif
#endif

assert(jl_is_code_instance(codeinst));
assert(codeinst->min_world <= world && (codeinst->max_world >= world || codeinst->max_world == 0) &&
"invalid world for method-instance");
Expand Down Expand Up @@ -307,6 +325,17 @@ static jl_callptr_t _jl_compile_codeinst(
jl_printf(stream, "\"\n");
}
}

#if 0
#ifdef USE_PERFETTO
snprintf(tbuf, 1024, "{\"name\":\"Compile\",\"cat\":\"compiler\",\"id\":%-d,"
"\"ph\":\"E\",\"pid\":%-d,\"tid\":%-d,\"ts\":%llu},\n",
traceco, getpid(), jl_get_task_tid(ct), jl_hrtime()/1000);
traceco++;
fwrite(tbuf, strlen(tbuf), sizeof(char), tracef);
#endif
#endif

return fptr;
}

Expand Down Expand Up @@ -1115,6 +1144,17 @@ namespace {

OptimizerResultT operator()(orc::ThreadSafeModule TSM, orc::MaterializationResponsibility &R) {
TSM.withModuleDo([&](Module &M) {
#if 0
#ifdef USE_PERFETTO
char tbuf[1024];
jl_task_t *ct = jl_current_task;
snprintf(tbuf, 1024, "{\"name\":\"LLVMOpt\",\"cat\":\"compiler\","
"\"id\":%-d,\"ph\":\"B\",\"pid\":%-d,\"tid\":%-d,\"ts\":%llu},\n",
tracelo, getpid(), jl_get_task_tid(ct), jl_hrtime()/1000);
fwrite(tbuf, strlen(tbuf), sizeof(char), tracef);
#endif
#endif

uint64_t start_time = 0;
std::stringstream before_stats_ss;
bool should_dump_opt_stats = false;
Expand Down Expand Up @@ -1173,6 +1213,15 @@ namespace {
}
}
}
#if 0
#ifdef USE_PERFETTO
snprintf(tbuf, 1024, "{\"name\":\"LLVMOpt\",\"cat\":\"compiler\","
"\"id\":%-d,\"ph\":\"E\",\"pid\":%-d,\"tid\":%-d,\"ts\":%llu},\n",
tracelo, getpid(), jl_get_task_tid(ct), jl_hrtime()/1000);
tracelo++;
fwrite(tbuf, strlen(tbuf), sizeof(char), tracef);
#endif
#endif
});
switch (optlevel) {
case 0:
Expand Down
1 change: 1 addition & 0 deletions src/julia.h
Original file line number Diff line number Diff line change
Expand Up @@ -1966,6 +1966,7 @@ typedef struct _jl_task_t {

JL_DLLEXPORT jl_task_t *jl_new_task(jl_function_t*, jl_value_t*, size_t);
JL_DLLEXPORT void jl_switchto(jl_task_t **pt);
JL_DLLEXPORT int16_t jl_get_task_tid(jl_task_t *t) JL_NOTSAFEPOINT;
JL_DLLEXPORT int jl_set_task_tid(jl_task_t *task, int16_t tid) JL_NOTSAFEPOINT;
JL_DLLEXPORT int jl_set_task_threadpoolid(jl_task_t *task, int8_t tpid) JL_NOTSAFEPOINT;
JL_DLLEXPORT void JL_NORETURN jl_throw(jl_value_t *e JL_MAYBE_UNROOTED);
Expand Down
Loading
Loading