Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add per-Task cpu time metric (#56320) #194

Draft
wants to merge 7 commits into
base: v1.10.2+RAI
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
74 changes: 74 additions & 0 deletions base/experimental.jl
Original file line number Diff line number Diff line change
Expand Up @@ -368,4 +368,78 @@ adding them to the global method table.
"""
:@MethodTable

### Task metrics

"""
Base.Experimental.task_metrics(::Bool)

Enable or disable the collection of per-task metrics.
A `Task` created when `Base.task_metrics(true)` is in effect will have
[`Base.Experimental.task_cpu_time_ns`](@ref) and [`Base.Experimental.task_wall_time_ns`](@ref)
timing information available.

!!! note
Task metrics can be enabled at start-up via the `--task-metrics=yes` command line option.
"""
function task_metrics(b::Bool)
if b
ccall(:jl_task_metrics_enable, Cvoid, ())
else
ccall(:jl_task_metrics_disable, Cvoid, ())
end
return nothing
end

"""
Base.Experimental.task_cpu_time_ns(t::Task) -> Union{Int, Nothing}

Return the total nanoseconds that the task `t` has spent running.
This metric only updates when `t` yields or completes, unless `t` is the current task
in which it will update continuously.
See also [`Base.Experimental.task_wall_time_ns`](@ref).

Will be `nothing` if task timings are not enabled.
See [`Base.Experimental.task_metrics`](@ref).

!!! note "This metric is from the Julia scheduler"
A task may be running on an OS thread that is descheduled by the OS
scheduler, this time still counts towards the metric.

!!! compat "Julia 1.12"
This method was added in Julia 1.12.
"""
function task_cpu_time_ns(t::Task=current_task())
t.metrics_enabled || return nothing
if t == current_task()
# These metrics fields can't update while we're running.
# But since we're running we need to include the time since we last started running!
return Int(t.cpu_time_ns + (time_ns() - t.last_started_running_at))
else
return Int(t.cpu_time_ns)
end
end

"""
Base.Experimental.task_wall_time_ns(t::Task) -> Union{Int, Nothing}

Return the total nanoseconds that the task `t` was runnable.
This is the time since the task entered the run queue until the time at which it completed,
or until the current time if the task has not yet completed.
See also [`task_cpu_time_ns`](@ref).

Will be `nothing` if task timings are not enabled.
See [`Base.task_metrics`](@ref).

!!! compat "Julia 1.12"
This method was added in Julia 1.12.
"""
function task_wall_time_ns(t::Task=current_task())
t.metrics_enabled || return nothing
start_at = t.first_enqueued_at
start_at == 0 && return 0
end_at = t.finished_at
end_at == 0 && return Int(time_ns() - start_at)
return Int(end_at - start_at)
end

end # module
1 change: 1 addition & 0 deletions base/options.jl
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ struct JLOptions
heap_size_hint::UInt64
trace_compile_timing::Int8
safe_crash_log_file::Ptr{UInt8}
task_metrics::Int8
end

# This runs early in the sysimage != is not defined yet
Expand Down
47 changes: 45 additions & 2 deletions base/task.jl
Original file line number Diff line number Diff line change
Expand Up @@ -810,7 +810,11 @@ function enq_work(t::Task)
return t
end

schedule(t::Task) = enq_work(t)
function schedule(t::Task)
# [task] created -scheduled-> wait_time
maybe_record_enqueued!(t)
enq_work(t)
end

"""
schedule(t::Task, [val]; error=false)
Expand Down Expand Up @@ -857,6 +861,8 @@ function schedule(t::Task, @nospecialize(arg); error=false)
t.queue === nothing || Base.error("schedule: Task not runnable")
setfield!(t, :result, arg)
end
# [task] created -scheduled-> wait_time
maybe_record_enqueued!(t)
enq_work(t)
return t
end
Expand Down Expand Up @@ -888,7 +894,13 @@ A fast, unfair-scheduling version of `schedule(t, arg); yield()` which
immediately yields to `t` before calling the scheduler.
"""
function yield(t::Task, @nospecialize(x=nothing))
(t._state === task_state_runnable && t.queue === nothing) || error("yield: Task not runnable")
current = current_task()
# [task] user_time -yield-> wait_time
record_cpu_time!(current)
t === current && throw(ConcurrencyViolationError("Cannot yield to currently running task!"))
(t._state === task_state_runnable && t.queue === nothing) || throw(ConcurrencyViolationError("yield: Task not runnable"))
# [task] created -scheduled-> wait_time
maybe_record_enqueued!(t)
t.result = x
enq_work(current_task())
set_next_task(t)
Expand All @@ -904,13 +916,17 @@ call to `yieldto`. This is a low-level call that only switches tasks, not consid
or scheduling in any way. Its use is discouraged.
"""
function yieldto(t::Task, @nospecialize(x=nothing))
# [task] user_time -yield-> wait_time
record_cpu_time!(current_task())
# TODO: these are legacy behaviors; these should perhaps be a scheduler
# state error instead.
if t._state === task_state_done
return x
elseif t._state === task_state_failed
throw(t.result)
end
# [task] created -scheduled-unfairly-> wait_time
maybe_record_enqueued!(t)
t.result = x
set_next_task(t)
return try_yieldto(identity)
Expand All @@ -924,6 +940,10 @@ function try_yieldto(undo)
rethrow()
end
ct = current_task()
# [task] wait_time -(re)started-> user_time
if ct.metrics_enabled
@atomic :monotonic ct.last_started_running_at = time_ns()
end
if ct._isexception
exc = ct.result
ct.result = nothing
Expand All @@ -937,6 +957,10 @@ end

# yield to a task, throwing an exception in it
function throwto(t::Task, @nospecialize exc)
# [task] user_time -yield-> wait_time
record_cpu_time!(current_task())
# [task] created -scheduled-unfairly-> wait_time
maybe_record_enqueued!(t)
t.result = exc
t._isexception = true
set_next_task(t)
Expand Down Expand Up @@ -989,6 +1013,8 @@ checktaskempty = Partr.multiq_check_empty
end

function wait()
# [task] user_time -yield-or-done-> wait_time
record_cpu_time!(current_task())
GC.safepoint()
W = workqueue_for(Threads.threadid())
poptask(W)
Expand All @@ -1003,3 +1029,20 @@ if Sys.iswindows()
else
pause() = ccall(:pause, Cvoid, ())
end

# update the `cpu_time_ns` field of `t` to include the time since it last started running.
function record_cpu_time!(t::Task)
if t.metrics_enabled && !istaskdone(t)
@atomic :monotonic t.cpu_time_ns += time_ns() - t.last_started_running_at
end
return t
end
# if this is the first time `t` has been added to the run queue
# (or the first time it has been unfairly yielded to without being added to the run queue)
# then set the `first_enqueued_at` field to the current time.
function maybe_record_enqueued!(t::Task)
if t.metrics_enabled && t.first_enqueued_at == 0
@atomic :monotonic t.first_enqueued_at = time_ns()
end
return t
end
4 changes: 3 additions & 1 deletion doc/man/julia.1
Original file line number Diff line number Diff line change
Expand Up @@ -271,6 +271,9 @@ Print precompile statements for methods compiled during execution or save to a p
If --trace-compile is enabled show how long each took to compile in ms

.TP
--task-metrics={yes|no*}
Enable the collection of per-task metrics.

-image-codegen
Force generate code in imaging mode

Expand All @@ -281,6 +284,5 @@ See https://docs.julialang.org/en/v1/manual/environment-variables/
Please report any bugs using the GitHub issue tracker:
https://github.com/julialang/julia/issues?state=open


.SH AUTHORS
Contributors: https://github.com/JuliaLang/julia/graphs/contributors
8 changes: 8 additions & 0 deletions doc/src/base/multi-threading.md
Original file line number Diff line number Diff line change
Expand Up @@ -71,3 +71,11 @@ These building blocks are used to create the regular synchronization objects.
```@docs
Base.Threads.SpinLock
```

## Task metrics (Experimental)

```@docs
Base.Experimental.task_metrics
Base.Experimental.task_cpu_time_ns
Base.Experimental.task_wall_time_ns
```
1 change: 1 addition & 0 deletions doc/src/manual/command-line-interface.md
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,7 @@ The following is a complete list of command-line switches available when launchi
|`--code-coverage=tracefile.info` |Append coverage information to the LCOV tracefile (filename supports format tokens).|
|`--track-allocation[={none*\|user\|all}]` |Count bytes allocated by each source line (omitting setting is equivalent to "user")|
|`--track-allocation=@<path>` |Count bytes but only in files that fall under the given file path/directory. The `@` prefix is required to select this option. A `@` with no path will track the current directory.|
|`--task-metrics={yes\|no*}` |Enable the collection of per-task metrics|
|`--bug-report=KIND` |Launch a bug report session. It can be used to start a REPL, run a script, or evaluate expressions. It first tries to use BugReporting.jl installed in current environment and falls back to the latest compatible BugReporting.jl if not. For more information, see `--bug-report=help`.|
|`--compile={yes*\|no\|all\|min}` |Enable or disable JIT compiler, or request exhaustive or minimal compilation|
|`--output-o <name>` |Generate an object file (including system image data)|
Expand Down
4 changes: 4 additions & 0 deletions src/init.c
Original file line number Diff line number Diff line change
Expand Up @@ -851,6 +851,10 @@ JL_DLLEXPORT void julia_init(JL_IMAGE_SEARCH rel)
#if defined(_COMPILER_GCC_) && __GNUC__ >= 12
#pragma GCC diagnostic ignored "-Wdangling-pointer"
#endif
if (jl_options.task_metrics == JL_OPTIONS_TASK_METRICS_ON) {
// enable before creating the root task so it gets timings too.
jl_atomic_fetch_add(&jl_task_metrics_enabled, 1);
}
// warning: this changes `jl_current_task`, so be careful not to call that from this function
jl_task_t *ct = jl_init_root_task(ptls, stack_lo, stack_hi);
#pragma GCC diagnostic pop
Expand Down
22 changes: 22 additions & 0 deletions src/jlapi.c
Original file line number Diff line number Diff line change
Expand Up @@ -509,6 +509,28 @@ JL_DLLEXPORT uint64_t jl_cumulative_recompile_time_ns(void)
return jl_atomic_load_relaxed(&jl_cumulative_recompile_time);
}

/**
* @brief Enable per-task timing.
*/
JL_DLLEXPORT void jl_task_metrics_enable(void)
{
// Increment the flag to allow reentrant callers.
jl_atomic_fetch_add(&jl_task_metrics_enabled, 1);
}

/**
* @brief Disable per-task timing.
*/
JL_DLLEXPORT void jl_task_metrics_disable(void)
{
// Prevent decrementing the counter below zero
uint8_t enabled = jl_atomic_load_relaxed(&jl_task_metrics_enabled);
while (enabled > 0) {
if (jl_atomic_cmpswap(&jl_task_metrics_enabled, &enabled, enabled-1))
break;
}
}

JL_DLLEXPORT void jl_get_fenv_consts(int *ret)
{
ret[0] = FE_INEXACT;
Expand Down
14 changes: 13 additions & 1 deletion src/jloptions.c
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ JL_DLLEXPORT void jl_init_options(void)
0, // heap-size-hint
0, // trace_compile_timing
NULL, // safe_crash_log_file
0, // task_metrics
};
jl_options_initialized = 1;
}
Expand Down Expand Up @@ -206,7 +207,7 @@ static const char opts_hidden[] =
" --strip-metadata Remove docstrings and source location info from system image\n"
" --strip-ir Remove IR (intermediate representation) of compiled functions\n\n"

// compiler debugging (see the devdocs for tips on using these options)
// compiler debugging and experimental (see the devdocs for tips on using these options)
" --output-unopt-bc <name> Generate unoptimized LLVM bitcode (.bc)\n"
" --output-bc <name> Generate LLVM bitcode (.bc)\n"
" --output-asm <name> Generate an assembly file (.s)\n"
Expand All @@ -215,6 +216,7 @@ static const char opts_hidden[] =
" --trace-compile={stderr,name}\n"
" Print precompile statements for methods compiled during execution or save to a path\n"
" --trace-compile-timing If --trace-compile is enabled show how long each took to compile in ms\n"
" --task-metrics={yes|no*} Enable collection of per-task timing data.\n"
" --image-codegen Force generate code in imaging mode\n"
" --permalloc-pkgimg={yes|no*} Copy the data section of package images into memory\n"
;
Expand All @@ -239,6 +241,7 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp)
opt_trace_compile,
opt_trace_compile_timing,
opt_trace_dispatch,
opt_task_metrics,
opt_math_mode,
opt_worker,
opt_bind_to,
Expand Down Expand Up @@ -316,6 +319,7 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp)
{ "polly", required_argument, 0, opt_polly },
{ "trace-compile", required_argument, 0, opt_trace_compile },
{ "trace-dispatch", required_argument, 0, opt_trace_dispatch },
{ "task-metrics", required_argument, 0, opt_task_metrics },
{ "math-mode", required_argument, 0, opt_math_mode },
{ "handle-signals", required_argument, 0, opt_handle_signals },
// hidden command line options
Expand Down Expand Up @@ -872,6 +876,14 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp)
if (jl_options.safe_crash_log_file == NULL)
jl_error("julia: failed to allocate memory for --safe-crash-log-file");
break;
case opt_task_metrics:
if (!strcmp(optarg, "no"))
jl_options.task_metrics = JL_OPTIONS_TASK_METRICS_OFF;
else if (!strcmp(optarg, "yes"))
jl_options.task_metrics = JL_OPTIONS_TASK_METRICS_ON;
else
jl_errorf("julia: invalid argument to --task-metrics={yes|no} (%s)", optarg);
break;
default:
jl_errorf("julia: unhandled option -- %c\n"
"This is a bug, please report it.", c);
Expand Down
1 change: 1 addition & 0 deletions src/jloptions.h
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ typedef struct {
uint64_t heap_size_hint;
int8_t trace_compile_timing;
const char *safe_crash_log_file;
int8_t task_metrics;
} jl_options_t;

#endif
24 changes: 20 additions & 4 deletions src/jltypes.c
Original file line number Diff line number Diff line change
Expand Up @@ -3222,7 +3222,7 @@ void jl_init_types(void) JL_GC_DISABLED
NULL,
jl_any_type,
jl_emptysvec,
jl_perm_symsvec(16,
jl_perm_symsvec(21,
"next",
"queue",
"storage",
Expand All @@ -3238,8 +3238,13 @@ void jl_init_types(void) JL_GC_DISABLED
"_state",
"sticky",
"_isexception",
"priority"),
jl_svec(16,
"priority",
"metrics_enabled",
"first_enqueued_at",
"last_started_running_at",
"cpu_time_ns",
"finished_at"),
jl_svec(21,
jl_any_type,
jl_any_type,
jl_any_type,
Expand All @@ -3255,12 +3260,23 @@ void jl_init_types(void) JL_GC_DISABLED
jl_uint8_type,
jl_bool_type,
jl_bool_type,
jl_uint16_type),
jl_uint16_type,
jl_bool_type,
jl_uint64_type,
jl_uint64_type,
jl_uint64_type,
jl_uint64_type),
jl_emptysvec,
0, 1, 6);
XX(task);
jl_value_t *listt = jl_new_struct(jl_uniontype_type, jl_task_type, jl_nothing_type);
jl_svecset(jl_task_type->types, 0, listt);
// Set field 17 (metrics_enabled) as const
// Set fields 13 (_state) and 18-21 (metric counters) as atomic
const static uint32_t task_constfields[1] = { 0b000010000000000000000 };
const static uint32_t task_atomicfields[1] = { 0b111100001000000000000 };
jl_task_type->name->constfields = task_constfields;
jl_task_type->name->atomicfields = task_atomicfields;

jl_binding_type =
jl_new_datatype(jl_symbol("Binding"), core, jl_any_type, jl_emptysvec,
Expand Down
Loading