Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

expose metric to report reasons why full GCs were triggered (#55826) #189

Merged
merged 1 commit into from
Sep 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 33 additions & 0 deletions base/timing.jl
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,39 @@ function gc_page_utilization_data()
return Base.unsafe_wrap(Array, page_utilization_raw, JL_GC_N_MAX_POOLS, own=false)
end

# must be kept in sync with `src/gc.h``
const FULL_SWEEP_REASONS = [:FULL_SWEEP_REASON_SWEEP_ALWAYS_FULL, :FULL_SWEEP_REASON_FORCED_FULL_SWEEP,
:FULL_SWEEP_REASON_ALLOCATION_INTERVAL_ABOVE_MAXMEM, :FULL_SWEEP_REASON_LIVE_BYTES_ABOVE_MAX_TOTAL_MEMORY,
:FULL_SWEEP_REASON_LARGE_INTERGEN_FRONTIER]

"""
Base.full_sweep_reasons()

Return a dictionary of the number of times each full sweep reason has occurred.

The reasons are:
- `:FULL_SWEEP_REASON_SWEEP_ALWAYS_FULL`: Full sweep was caused due to `always_full` being set in the GC debug environment
- `:FULL_SWEEP_REASON_FORCED_FULL_SWEEP`: Full sweep was forced by `GC.gc(true)`
- `:FULL_SWEEP_REASON_ALLOCATION_INTERVAL_ABOVE_MAXMEM`: Full sweep was forced by the allocation interval being above the total
memory in the machine (as returned by LibUV) divided by the number of mutator threads
- `:FULL_SWEEP_REASON_LIVE_BYTES_ABOVE_MAX_TOTAL_MEMORY`: Full sweep was caused due to live bytes being above the
soft heap limit size (which is either automatically computed at initialization based on the total memory provided by LibUV,
or set by the user via `--heap-size-hint`)
- `:FULL_SWEEP_REASON_LARGE_INTERGEN_FRONTIER`: Full sweep was forced by the intergenerational frontier being too large
(i.e. too many pointers in the remembered set)

Note that the set of reasons is not guaranteed to be stable across minor versions of Julia.
"""
function full_sweep_reasons()
reason = cglobal(:jl_full_sweep_reasons, UInt64)
reasons_as_array = Base.unsafe_wrap(Vector{UInt64}, reason, length(FULL_SWEEP_REASONS), own=false)
d = Dict{Symbol, Int64}()
for (i, r) in enumerate(FULL_SWEEP_REASONS)
d[r] = reasons_as_array[i]
end
return d
end

"""
Base.jit_total_bytes()

Expand Down
7 changes: 7 additions & 0 deletions src/gc.c
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@ uv_sem_t gc_sweep_assists_needed;
uv_mutex_t gc_queue_observer_lock;
// Tag for sentinel nodes in bigval list
uintptr_t gc_bigval_sentinel_tag;
// Table recording number of full GCs due to each reason
JL_DLLEXPORT uint64_t jl_full_sweep_reasons[FULL_SWEEP_NUM_REASONS];

// Linked list of callback functions

Expand Down Expand Up @@ -3551,6 +3553,7 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
if (large_frontier) {
sweep_full = 1;
gc_num.interval = last_long_collect_interval;
gc_count_full_sweep_reason(FULL_SWEEP_REASON_LARGE_INTERGEN_FRONTIER);
}
if (not_freed_enough || large_frontier) {
gc_num.interval = gc_num.interval * 2;
Expand All @@ -3566,6 +3569,7 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
if (gc_num.interval > maxmem) {
sweep_full = 1;
gc_num.interval = maxmem;
gc_count_full_sweep_reason(FULL_SWEEP_REASON_ALLOCATION_INTERVAL_ABOVE_MAXMEM);
}
}

Expand All @@ -3574,13 +3578,16 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
if (live_bytes > max_total_memory) {
under_memory_pressure = 1;
sweep_full = 1;
gc_count_full_sweep_reason(FULL_SWEEP_REASON_LIVE_BYTES_ABOVE_MAX_TOTAL_MEMORY);
}
if (gc_sweep_always_full) {
sweep_full = 1;
gc_count_full_sweep_reason(FULL_SWEEP_REASON_SWEEP_ALWAYS_FULL);
}
if (collection == JL_GC_FULL && !prev_sweep_full) {
sweep_full = 1;
recollect = 1;
gc_count_full_sweep_reason(FULL_SWEEP_REASON_FORCED_FULL_SWEEP);
}
if (sweep_full) {
// these are the difference between the number of gc-perm bytes scanned
Expand Down
15 changes: 15 additions & 0 deletions src/gc.h
Original file line number Diff line number Diff line change
Expand Up @@ -560,6 +560,21 @@ FORCE_INLINE void gc_big_object_link(bigval_t *sentinel_node, bigval_t *node) JL
sentinel_node->next = node;
}

// Must be kept in sync with `base/timing.jl`
#define FULL_SWEEP_REASON_SWEEP_ALWAYS_FULL (0)
#define FULL_SWEEP_REASON_FORCED_FULL_SWEEP (1)
#define FULL_SWEEP_REASON_ALLOCATION_INTERVAL_ABOVE_MAXMEM (2)
#define FULL_SWEEP_REASON_LIVE_BYTES_ABOVE_MAX_TOTAL_MEMORY (3)
#define FULL_SWEEP_REASON_LARGE_INTERGEN_FRONTIER (4)
#define FULL_SWEEP_NUM_REASONS (5)

extern JL_DLLEXPORT uint64_t jl_full_sweep_reasons[FULL_SWEEP_NUM_REASONS];
STATIC_INLINE void gc_count_full_sweep_reason(int reason) JL_NOTSAFEPOINT
{
assert(reason >= 0 && reason < FULL_SWEEP_NUM_REASONS);
jl_full_sweep_reasons[reason]++;
}

extern uv_mutex_t gc_threads_lock;
extern uv_cond_t gc_threads_cond;
extern uv_sem_t gc_sweep_assists_needed;
Expand Down
2 changes: 1 addition & 1 deletion src/threading.c
Original file line number Diff line number Diff line change
Expand Up @@ -1035,7 +1035,7 @@ JL_DLLEXPORT int jl_heartbeat_resume(void)
if (uv_sem_trywait(&heartbeat_off_sem) != 0) {
return -1;
}

// reset state as we've been paused
n_hbs_missed = 0;
n_hbs_recvd = 0;
Expand Down
11 changes: 11 additions & 0 deletions test/gc.jl
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,13 @@ function run_pg_size_test()
@test page_size == (1 << 12) || page_size == (1 << 14)
end

function full_sweep_reasons_test()
GC.gc()
reasons = Base.full_sweep_reasons()
@test reasons[:FULL_SWEEP_REASON_FORCED_FULL_SWEEP] >= 1
@test keys(reasons) == Set(Base.FULL_SWEEP_REASONS)
end

# !!! note:
# Since we run our tests on 32bit OS as well we confine ourselves
# to parameters that allocate about 512MB of objects. Max RSS is lower
Expand All @@ -43,3 +50,7 @@ end
run_nonzero_page_utilization_test()
run_pg_size_test()
end

@testset "Full GC reasons" begin
full_sweep_reasons_test()
end