From 0b183bf2d608bedf118607b1471fbf1e68813a08 Mon Sep 17 00:00:00 2001 From: Kelvin Nilsen Date: Tue, 3 Mar 2026 09:39:06 +0000 Subject: [PATCH] 8312116: GenShen: make instantaneous allocation rate triggers more timely Reviewed-by: wkemper --- .../shenandoahAdaptiveHeuristics.cpp | 559 ++++++++++++++++-- .../shenandoahAdaptiveHeuristics.hpp | 76 +++ .../shenandoahGenerationalHeuristics.cpp | 6 + .../shenandoahGenerationalHeuristics.hpp | 2 + .../heuristics/shenandoahHeuristics.cpp | 30 +- .../heuristics/shenandoahHeuristics.hpp | 33 +- .../heuristics/shenandoahYoungHeuristics.cpp | 3 +- .../gc/shenandoah/shenandoahConcurrentGC.cpp | 1 + .../gc/shenandoah/shenandoahControlThread.cpp | 19 +- .../gc/shenandoah/shenandoahDegeneratedGC.cpp | 1 + .../share/gc/shenandoah/shenandoahFreeSet.cpp | 62 +- .../share/gc/shenandoah/shenandoahFreeSet.hpp | 70 ++- .../share/gc/shenandoah/shenandoahFullGC.cpp | 4 +- .../gc/shenandoah/shenandoahGeneration.cpp | 7 +- .../gc/shenandoah/shenandoahGeneration.hpp | 2 +- .../shenandoahGenerationalControlThread.cpp | 3 +- .../shenandoah/shenandoahGenerationalHeap.cpp | 22 +- .../shenandoah/shenandoahGenerationalHeap.hpp | 3 + .../share/gc/shenandoah/shenandoahHeap.cpp | 27 +- .../share/gc/shenandoah/shenandoahHeap.hpp | 3 + .../gc/shenandoah/shenandoahOldGeneration.cpp | 2 + .../shenandoah/shenandoahRegulatorThread.cpp | 16 +- .../shenandoah/shenandoahRegulatorThread.hpp | 3 + .../gc/shenandoah/shenandoah_globals.hpp | 53 ++ 24 files changed, 899 insertions(+), 108 deletions(-) diff --git a/src/hotspot/share/gc/shenandoah/heuristics/shenandoahAdaptiveHeuristics.cpp b/src/hotspot/share/gc/shenandoah/heuristics/shenandoahAdaptiveHeuristics.cpp index 7a8bd55c795..ac8b3ebdf37 100644 --- a/src/hotspot/share/gc/shenandoah/heuristics/shenandoahAdaptiveHeuristics.cpp +++ b/src/hotspot/share/gc/shenandoah/heuristics/shenandoahAdaptiveHeuristics.cpp @@ -33,6 +33,7 @@ #include "gc/shenandoah/shenandoahCollectorPolicy.hpp" #include "gc/shenandoah/shenandoahHeap.inline.hpp" #include "gc/shenandoah/shenandoahHeapRegion.inline.hpp" +#include "gc/shenandoah/shenandoahYoungGeneration.hpp" #include "logging/log.hpp" #include "logging/logTag.hpp" #include "runtime/globals.hpp" @@ -59,14 +60,95 @@ const double ShenandoahAdaptiveHeuristics::HIGHEST_EXPECTED_AVAILABLE_AT_END = 0 const double ShenandoahAdaptiveHeuristics::MINIMUM_CONFIDENCE = 0.319; // 25% const double ShenandoahAdaptiveHeuristics::MAXIMUM_CONFIDENCE = 3.291; // 99.9% + +// To enable detection of GC time trends, we keep separate track of the recent history of gc time. During initialization, +// for example, the amount of live memory may be increasing, which is likely to cause the GC times to increase. This history +// allows us to predict increasing GC times rather than always assuming average recent GC time is the best predictor. +const size_t ShenandoahAdaptiveHeuristics::GC_TIME_SAMPLE_SIZE = 3; + +// We also keep separate track of recently sampled allocation rates for two purposes: +// 1. The number of samples examined to determine acceleration of allocation is represented by +// ShenandoahRateAccelerationSampleSize +// 2. The number of most recent samples averaged to determine a momentary allocation spike is represented by +// ShenandoahMomentaryAllocationRateSpikeSampleSize + +// Allocation rates are sampled by the regulator thread, which typically runs every ms. There may be jitter in the scheduling +// of the regulator thread. To reduce signal noise and synchronization overhead, we do not sample allocation rate with every +// iteration of the regulator. We prefer sample time longer than 1 ms so that there can be a statistically significant number +// of allocations occuring within each sample period. The regulator thread samples allocation rate only if at least +// ShenandoahAccelerationSamplePeriod ms have passed since it previously sampled the allocation rate. +// +// This trigger responds much more quickly than the traditional trigger, which monitors 100 ms spans. When acceleration is +// detected, the impact of acceleration on anticipated consumption of available memory is also much more impactful +// than the assumed constant allocation rate consumption of available memory. + ShenandoahAdaptiveHeuristics::ShenandoahAdaptiveHeuristics(ShenandoahSpaceInfo* space_info) : ShenandoahHeuristics(space_info), _margin_of_error_sd(ShenandoahAdaptiveInitialConfidence), _spike_threshold_sd(ShenandoahAdaptiveInitialSpikeThreshold), _last_trigger(OTHER), - _available(Moving_Average_Samples, ShenandoahAdaptiveDecayFactor) { } + _available(Moving_Average_Samples, ShenandoahAdaptiveDecayFactor), + _free_set(nullptr), + _previous_acceleration_sample_timestamp(0.0), + _gc_time_first_sample_index(0), + _gc_time_num_samples(0), + _gc_time_timestamps(NEW_C_HEAP_ARRAY(double, GC_TIME_SAMPLE_SIZE, mtGC)), + _gc_time_samples(NEW_C_HEAP_ARRAY(double, GC_TIME_SAMPLE_SIZE, mtGC)), + _gc_time_xy(NEW_C_HEAP_ARRAY(double, GC_TIME_SAMPLE_SIZE, mtGC)), + _gc_time_xx(NEW_C_HEAP_ARRAY(double, GC_TIME_SAMPLE_SIZE, mtGC)), + _gc_time_sum_of_timestamps(0), + _gc_time_sum_of_samples(0), + _gc_time_sum_of_xy(0), + _gc_time_sum_of_xx(0), + _gc_time_m(0.0), + _gc_time_b(0.0), + _gc_time_sd(0.0), + _spike_acceleration_buffer_size(MAX2(ShenandoahRateAccelerationSampleSize, 1+ShenandoahMomentaryAllocationRateSpikeSampleSize)), + _spike_acceleration_first_sample_index(0), + _spike_acceleration_num_samples(0), + _spike_acceleration_rate_samples(NEW_C_HEAP_ARRAY(double, _spike_acceleration_buffer_size, mtGC)), + _spike_acceleration_rate_timestamps(NEW_C_HEAP_ARRAY(double, _spike_acceleration_buffer_size, mtGC)) { + } -ShenandoahAdaptiveHeuristics::~ShenandoahAdaptiveHeuristics() {} +ShenandoahAdaptiveHeuristics::~ShenandoahAdaptiveHeuristics() { + FREE_C_HEAP_ARRAY(double, _spike_acceleration_rate_samples); + FREE_C_HEAP_ARRAY(double, _spike_acceleration_rate_timestamps); + FREE_C_HEAP_ARRAY(double, _gc_time_timestamps); + FREE_C_HEAP_ARRAY(double, _gc_time_samples); + FREE_C_HEAP_ARRAY(double, _gc_time_xy); + FREE_C_HEAP_ARRAY(double, _gc_time_xx); +} + +void ShenandoahAdaptiveHeuristics::initialize() { + ShenandoahHeuristics::initialize(); +} + +void ShenandoahAdaptiveHeuristics::post_initialize() { + ShenandoahHeuristics::post_initialize(); + _free_set = ShenandoahHeap::heap()->free_set(); + assert(!ShenandoahHeap::heap()->mode()->is_generational(), "ShenandoahGenerationalHeuristics overrides this method"); + compute_headroom_adjustment(); +} + +void ShenandoahAdaptiveHeuristics::compute_headroom_adjustment() { + // The trigger threshold represents mutator available - "head room". + // We plan for GC to finish before the amount of allocated memory exceeds trigger threshold. This is the same as saying we + // intend to finish GC before the amount of available memory is less than the allocation headroom. Headroom is the planned + // safety buffer to allow a small amount of additional allocation to take place in case we were overly optimistic in delaying + // our trigger. + size_t capacity = ShenandoahHeap::heap()->soft_max_capacity(); + size_t spike_headroom = capacity / 100 * ShenandoahAllocSpikeFactor; + size_t penalties = capacity / 100 * _gc_time_penalties; + _headroom_adjustment = spike_headroom + penalties; +} + +void ShenandoahAdaptiveHeuristics::start_idle_span() { + compute_headroom_adjustment(); +} + +void ShenandoahAdaptiveHeuristics::adjust_penalty(intx step) { + ShenandoahHeuristics::adjust_penalty(step); +} void ShenandoahAdaptiveHeuristics::choose_collection_set_from_regiondata(ShenandoahCollectionSet* cset, RegionData* data, size_t size, @@ -76,8 +158,8 @@ void ShenandoahAdaptiveHeuristics::choose_collection_set_from_regiondata(Shenand // The logic for cset selection in adaptive is as follows: // // 1. We cannot get cset larger than available free space. Otherwise we guarantee OOME - // during evacuation, and thus guarantee full GC. In practice, we also want to let - // application to allocate something. This is why we limit CSet to some fraction of + // during evacuation, and thus guarantee full GC. In practice, we also want to let the + // application allocate during concurrent GC. This is why we limit CSet to some fraction of // available space. In non-overloaded heap, max_cset would contain all plausible candidates // over garbage threshold. // @@ -108,6 +190,7 @@ void ShenandoahAdaptiveHeuristics::choose_collection_set_from_regiondata(Shenand size_t cur_cset = 0; size_t cur_garbage = 0; + // Regions are sorted in order of decreasing garbage for (size_t idx = 0; idx < size; idx++) { ShenandoahHeapRegion* r = data[idx].get_region(); @@ -126,6 +209,88 @@ void ShenandoahAdaptiveHeuristics::choose_collection_set_from_regiondata(Shenand } } +void ShenandoahAdaptiveHeuristics::add_degenerated_gc_time(double timestamp, double gc_time) { + // Conservatively add sample into linear model If this time is above the predicted concurrent gc time + if (predict_gc_time(timestamp) < gc_time) { + add_gc_time(timestamp, gc_time); + } +} + +void ShenandoahAdaptiveHeuristics::add_gc_time(double timestamp, double gc_time) { + // Update best-fit linear predictor of GC time + uint index = (_gc_time_first_sample_index + _gc_time_num_samples) % GC_TIME_SAMPLE_SIZE; + if (_gc_time_num_samples == GC_TIME_SAMPLE_SIZE) { + _gc_time_sum_of_timestamps -= _gc_time_timestamps[index]; + _gc_time_sum_of_samples -= _gc_time_samples[index]; + _gc_time_sum_of_xy -= _gc_time_xy[index]; + _gc_time_sum_of_xx -= _gc_time_xx[index]; + } + _gc_time_timestamps[index] = timestamp; + _gc_time_samples[index] = gc_time; + _gc_time_xy[index] = timestamp * gc_time; + _gc_time_xx[index] = timestamp * timestamp; + + _gc_time_sum_of_timestamps += _gc_time_timestamps[index]; + _gc_time_sum_of_samples += _gc_time_samples[index]; + _gc_time_sum_of_xy += _gc_time_xy[index]; + _gc_time_sum_of_xx += _gc_time_xx[index]; + + if (_gc_time_num_samples < GC_TIME_SAMPLE_SIZE) { + _gc_time_num_samples++; + } else { + _gc_time_first_sample_index = (_gc_time_first_sample_index + 1) % GC_TIME_SAMPLE_SIZE; + } + + if (_gc_time_num_samples == 1) { + // The predictor is constant (horizontal line) + _gc_time_m = 0; + _gc_time_b = gc_time; + _gc_time_sd = 0.0; + } else if (_gc_time_num_samples == 2) { + // Two points define a line + double delta_y = gc_time - _gc_time_samples[_gc_time_first_sample_index]; + double delta_x = timestamp - _gc_time_timestamps[_gc_time_first_sample_index]; + _gc_time_m = delta_y / delta_x; + + // y = mx + b + // so b = y0 - mx0 + _gc_time_b = gc_time - _gc_time_m * timestamp; + _gc_time_sd = 0.0; + } else { + _gc_time_m = ((_gc_time_num_samples * _gc_time_sum_of_xy - _gc_time_sum_of_timestamps * _gc_time_sum_of_samples) / + (_gc_time_num_samples * _gc_time_sum_of_xx - _gc_time_sum_of_timestamps * _gc_time_sum_of_timestamps)); + _gc_time_b = (_gc_time_sum_of_samples - _gc_time_m * _gc_time_sum_of_timestamps) / _gc_time_num_samples; + double sum_of_squared_deviations = 0.0; + for (size_t i = 0; i < _gc_time_num_samples; i++) { + uint index = (_gc_time_first_sample_index + i) % GC_TIME_SAMPLE_SIZE; + double x = _gc_time_timestamps[index]; + double predicted_y = _gc_time_m * x + _gc_time_b; + double deviation = predicted_y - _gc_time_samples[index]; + sum_of_squared_deviations += deviation * deviation; + } + _gc_time_sd = sqrt(sum_of_squared_deviations / _gc_time_num_samples); + } +} + +double ShenandoahAdaptiveHeuristics::predict_gc_time(double timestamp_at_start) { + return _gc_time_m * timestamp_at_start + _gc_time_b + _gc_time_sd * _margin_of_error_sd;; +} + +void ShenandoahAdaptiveHeuristics::add_rate_to_acceleration_history(double timestamp, double rate) { + uint new_sample_index = + (_spike_acceleration_first_sample_index + _spike_acceleration_num_samples) % _spike_acceleration_buffer_size; + _spike_acceleration_rate_timestamps[new_sample_index] = timestamp; + _spike_acceleration_rate_samples[new_sample_index] = rate; + if (_spike_acceleration_num_samples == _spike_acceleration_buffer_size) { + _spike_acceleration_first_sample_index++; + if (_spike_acceleration_first_sample_index == _spike_acceleration_buffer_size) { + _spike_acceleration_first_sample_index = 0; + } + } else { + _spike_acceleration_num_samples++; + } +} + void ShenandoahAdaptiveHeuristics::record_cycle_start() { ShenandoahHeuristics::record_cycle_start(); _allocation_rate.allocation_counter_reset(); @@ -133,6 +298,10 @@ void ShenandoahAdaptiveHeuristics::record_cycle_start() { void ShenandoahAdaptiveHeuristics::record_success_concurrent() { ShenandoahHeuristics::record_success_concurrent(); + double now = os::elapsedTime(); + + // Should we not add GC time if this was an abbreviated cycle? + add_gc_time(_cycle_start, elapsed_cycle_time()); size_t available = _space_info->available(); @@ -185,6 +354,7 @@ void ShenandoahAdaptiveHeuristics::record_success_concurrent() { void ShenandoahAdaptiveHeuristics::record_degenerated() { ShenandoahHeuristics::record_degenerated(); + add_degenerated_gc_time(_precursor_cycle_start, elapsed_degenerated_cycle_time()); // Adjust both trigger's parameters in the case of a degenerated GC because // either of them should have triggered earlier to avoid this case. adjust_margin_of_error(DEGENERATE_PENALTY_SD); @@ -236,6 +406,24 @@ bool ShenandoahAdaptiveHeuristics::should_start_gc() { size_t available = _space_info->soft_mutator_available(); size_t allocated = _space_info->bytes_allocated_since_gc_start(); + double avg_cycle_time = 0; + double avg_alloc_rate = 0; + double now = get_most_recent_wake_time(); + size_t allocatable_words = this->allocatable(available); + double predicted_future_accelerated_gc_time = 0.0; + size_t allocated_bytes_since_last_sample = 0; + double instantaneous_rate_words_per_second = 0.0; + size_t consumption_accelerated = 0; + double acceleration = 0.0; + double current_rate_by_acceleration = 0.0; + size_t min_threshold = min_free_threshold(); + double predicted_future_gc_time = 0; + double future_planned_gc_time = 0; + bool future_planned_gc_time_is_average = false; + double avg_time_to_deplete_available = 0.0; + bool is_spiking = false; + double spike_time_to_deplete_available = 0.0; + log_debug(gc, ergo)("should_start_gc calculation: available: " PROPERFMT ", soft_max_capacity: " PROPERFMT ", " "allocated_since_gc_start: " PROPERFMT, PROPERFMTARGS(available), PROPERFMTARGS(capacity), PROPERFMTARGS(allocated)); @@ -250,7 +438,6 @@ bool ShenandoahAdaptiveHeuristics::should_start_gc() { _last_trigger = OTHER; - size_t min_threshold = min_free_threshold(); if (available < min_threshold) { log_trigger("Free (Soft) (" PROPERFMT ") is below minimum threshold (" PROPERFMT ")", PROPERFMTARGS(available), PROPERFMTARGS(min_threshold)); @@ -271,55 +458,227 @@ bool ShenandoahAdaptiveHeuristics::should_start_gc() { return true; } } - // Check if allocation headroom is still okay. This also factors in: - // 1. Some space to absorb allocation spikes (ShenandoahAllocSpikeFactor) - // 2. Accumulated penalties from Degenerated and Full GC - size_t allocation_headroom = available; - size_t spike_headroom = capacity / 100 * ShenandoahAllocSpikeFactor; - size_t penalties = capacity / 100 * _gc_time_penalties; + // The test (3 * allocated > available) below is intended to prevent triggers from firing so quickly that there + // has not been sufficient time to create garbage that can be reclaimed during the triggered GC cycle. If we trigger before + // garbage has been created, the concurrent GC will find no garbage. This has been observed to result in degens which + // experience OOM during evac or that experience "bad progress", both of which escalate to Full GC. Note that garbage that + // was allocated following the start of the current GC cycle cannot be reclaimed in this GC cycle. Here is the derivation + // of the expression: + // + // Let R (runway) represent the total amount of memory that can be allocated following the start of GC(N). The runway + // represents memory available at the start of the current GC plus garbage reclaimed by the current GC. In a balanced, + // fully utilized configuration, we will be starting each new GC cycle immediately following completion of the preceding + // GC cycle. In this configuration, we would expect half of R to be consumed during concurrent cycle GC(N) and half + // to be consumed during concurrent GC(N+1). + // + // Assume we want to delay GC trigger until: A/V > 0.33 + // This is equivalent to enforcing that: A > 0.33V + // which is: 3A > V + // Since A+V equals R, we have: A + 3A > A + V = R + // which is to say that: A > R/4 + // + // Postponing the trigger until at least 1/4 of the runway has been consumed helps to improve the efficiency of the + // triggered GC. Under heavy steady state workload, this delay condition generally has no effect: if the allocation + // runway is divided "equally" between the current GC and the next GC, then at any potential trigger point (which cannot + // happen any sooner than completion of the first GC), it is already the case that roughly A > R/2. + if (3 * allocated <= available) { + // Even though we will not issue an adaptive trigger unless a minimum threshold of memory has been allocated, + // we still allow more generic triggers, such as guaranteed GC intervals, to act. + return ShenandoahHeuristics::should_start_gc(); + } - allocation_headroom -= MIN2(allocation_headroom, spike_headroom); - allocation_headroom -= MIN2(allocation_headroom, penalties); + avg_cycle_time = _gc_cycle_time_history->davg() + (_margin_of_error_sd * _gc_cycle_time_history->dsd()); + avg_alloc_rate = _allocation_rate.upper_bound(_margin_of_error_sd); + if ((now - _previous_acceleration_sample_timestamp) >= (ShenandoahAccelerationSamplePeriod / 1000.0)) { + predicted_future_accelerated_gc_time = + predict_gc_time(now + MAX2(get_planned_sleep_interval(), ShenandoahAccelerationSamplePeriod / 1000.0)); + double future_accelerated_planned_gc_time; + bool future_accelerated_planned_gc_time_is_average; + if (predicted_future_accelerated_gc_time > avg_cycle_time) { + future_accelerated_planned_gc_time = predicted_future_accelerated_gc_time; + future_accelerated_planned_gc_time_is_average = false; + } else { + future_accelerated_planned_gc_time = avg_cycle_time; + future_accelerated_planned_gc_time_is_average = true; + } + allocated_bytes_since_last_sample = _free_set->get_bytes_allocated_since_previous_sample(); + instantaneous_rate_words_per_second = + (allocated_bytes_since_last_sample / HeapWordSize) / (now - _previous_acceleration_sample_timestamp); - double avg_cycle_time = _gc_cycle_time_history->davg() + (_margin_of_error_sd * _gc_cycle_time_history->dsd()); - double avg_alloc_rate = _allocation_rate.upper_bound(_margin_of_error_sd); + _previous_acceleration_sample_timestamp = now; + add_rate_to_acceleration_history(now, instantaneous_rate_words_per_second); + current_rate_by_acceleration = instantaneous_rate_words_per_second; + consumption_accelerated = + accelerated_consumption(acceleration, current_rate_by_acceleration, avg_alloc_rate / HeapWordSize, + (ShenandoahAccelerationSamplePeriod / 1000.0) + future_accelerated_planned_gc_time); - log_debug(gc)("average GC time: %.2f ms, allocation rate: %.0f %s/s", - avg_cycle_time * 1000, byte_size_in_proper_unit(avg_alloc_rate), proper_unit_for_byte_size(avg_alloc_rate)); - if (avg_cycle_time * avg_alloc_rate > allocation_headroom) { - log_trigger("Average GC time (%.2f ms) is above the time for average allocation rate (%.0f %sB/s)" - " to deplete free headroom (%zu%s) (margin of error = %.2f)", - avg_cycle_time * 1000, - byte_size_in_proper_unit(avg_alloc_rate), proper_unit_for_byte_size(avg_alloc_rate), - byte_size_in_proper_unit(allocation_headroom), proper_unit_for_byte_size(allocation_headroom), - _margin_of_error_sd); - log_info(gc, ergo)("Free headroom: %zu%s (free) - %zu%s (spike) - %zu%s (penalties) = %zu%s", - byte_size_in_proper_unit(available), proper_unit_for_byte_size(available), - byte_size_in_proper_unit(spike_headroom), proper_unit_for_byte_size(spike_headroom), - byte_size_in_proper_unit(penalties), proper_unit_for_byte_size(penalties), - byte_size_in_proper_unit(allocation_headroom), proper_unit_for_byte_size(allocation_headroom)); + // Note that even a single thread that wakes up and begins to allocate excessively can manifest as accelerating allocation + // rate. This thread will initially allocate a TLAB of minimum size. Then it will allocate a TLAB twice as big a bit later, + // and then twice as big again after another short delay. When a phase change causes many threads to increase their + // allocation behavior, this effect is multiplied, and compounded by jitter in the times that individual threads experience + // the phase change. + // + // The following trace represents an actual workload, with allocation rates sampled at 10 Hz, the default behavior before + // introduction of accelerated allocation rate detection. Though the allocation rate is seen to be increasing at times + // 101.907 and 102.007 and 102.108, the newly sampled allocation rate is not enough to trigger GC because the headroom is + // still quite large. In fact, GC is not triggered until time 102.409s, and this GC degenerates. + // + // Sample Time (s) Allocation Rate (MB/s) Headroom (GB) + // 101.807 0.0 26.93 + // <--- accelerated spike can trigger here, around time 101.9s + // 101.907 477.6 26.85 + // 102.007 3,206.0 26.35 + // 102.108 23,797.8 24.19 + // 102.208 24,164.5 21.83 + // 102.309 23,965.0 19.47 + // 102.409 24,624.35 17.05 <--- without accelerated rate detection, we trigger here + // + // Though the above measurements are from actual workload, the following details regarding sampled allocation rates at 3ms + // period were not measured directly for this run-time sample. These are hypothetical, though they represent a plausible + // result that correlates with the actual measurements. + // + // For most of the 100 ms time span that precedes the sample at 101.907, the allocation rate still remains at zero. The phase + // change that causes increasing allocations occurs near the end ot this time segment. When sampled with a 3 ms period, + // acceration of allocation can be triggered at approximately time 101.88s. + // + // In the default configuration, accelerated allocation rate is detected by examining a sequence of 8 allocation rate samples. + // + // Even a single allocation rate sample above the norm can be interpreted as acceleration of allocation rate. For example, the + // the best-fit line for the following samples has an acceleration rate of 3,553.3 MB/s/s. This is not enough to trigger GC, + // especially given the abundance of Headroom at this moment in time. + // + // TimeStamp (s) Alloc rate (MB/s) + // 101.857 0 + // 101.860 0 + // 101.863 0 + // 101.866 0 + // 101.869 53.3 + // + // At the next sample time, we will compute a slightly higher acceration, 9,150 MB/s/s. This is also insufficient to trigger + // GC. + // + // TimeStamp (s) Alloc rate (MB/s) + // 101.860 0 + // 101.863 0 + // 101.866 0 + // 101.869 53.3 + // 101.872 110.6 + // + // Eventually, we will observe a full history of accelerating rate samples, computing acceleration of 18,500 MB/s/s. This will + // trigger GC over 500 ms earlier than was previously possible. + // + // TimeStamp (s) Alloc rate (MB/s) + // 101.866 0 + // 101.869 53.3 + // 101.872 110.6 + // 101.875 165.9 + // 101.878 221.2 + // + // The accelerated rate heuristic is based on the following idea: + // + // Assume allocation rate is accelerating at a constant rate. If we postpone the spike trigger until the subsequent + // sample point, will there be enough memory to satisfy allocations that occur during the anticipated concurrent GC + // cycle? If not, we should trigger right now. + // + // Outline of this heuristic triggering technique: + // + // 1. We remember the N (e.g. N=3) most recent samples of spike allocation rate r0, r1, r2 samples at t0, t1, and t2 + // 2. if r1 < r0 or r2 < r1, approximate Acceleration = 0.0, Rate = Average(r0, r1, r2) + // 3. Otherwise, use least squares method to compute best-fit line of rate vs time + // 4. The slope of this line represents Acceleration. The y-intercept of this line represents "initial rate" + // 5. Use r2 to rrpresent CurrentRate + // 6. Use Consumption = CurrentRate * GCTime + 1/2 * Acceleration * GCTime * GCTime + // (See High School physics discussions on constant acceleration: D = v0 * t + 1/2 * a * t^2) + // 7. if Consumption exceeds headroom, trigger now + // + // Though larger sample size may improve quality of predictor, it also delays trigger response. Smaller sample sizes + // are more susceptible to false triggers based on random noise. The default configuration uses a sample size of 8 and + // a sample period of roughly 15 ms, spanning approximately 120 ms of execution. + if (consumption_accelerated > allocatable_words) { + size_t size_t_alloc_rate = (size_t) current_rate_by_acceleration * HeapWordSize; + if (acceleration > 0) { + size_t size_t_acceleration = (size_t) acceleration * HeapWordSize; + log_trigger("Accelerated consumption (" PROPERFMT ") exceeds free headroom (" PROPERFMT ") at " + "current rate (" PROPERFMT "/s) with acceleration (" PROPERFMT "/s/s) for planned %s GC time (%.2f ms)", + PROPERFMTARGS(consumption_accelerated * HeapWordSize), + PROPERFMTARGS(allocatable_words * HeapWordSize), + PROPERFMTARGS(size_t_alloc_rate), + PROPERFMTARGS(size_t_acceleration), + future_accelerated_planned_gc_time_is_average? "(from average)": "(by linear prediction)", + future_accelerated_planned_gc_time * 1000); + } else { + log_trigger("Momentary spike consumption (" PROPERFMT ") exceeds free headroom (" PROPERFMT ") at " + "current rate (" PROPERFMT "/s) for planned %s GC time (%.2f ms) (spike threshold = %.2f)", + PROPERFMTARGS(consumption_accelerated * HeapWordSize), + PROPERFMTARGS(allocatable_words * HeapWordSize), + PROPERFMTARGS(size_t_alloc_rate), + future_accelerated_planned_gc_time_is_average? "(from average)": "(by linear prediction)", + future_accelerated_planned_gc_time * 1000, _spike_threshold_sd); + + + } + _spike_acceleration_num_samples = 0; + _spike_acceleration_first_sample_index = 0; + + // Count this as a form of RATE trigger for purposes of adjusting heuristic triggering configuration because this + // trigger is influenced more by margin_of_error_sd than by spike_threshold_sd. + accept_trigger_with_type(RATE); + return true; + } + } + + // Suppose we don't trigger now, but decide to trigger in the next regulator cycle. What will be the GC time then? + predicted_future_gc_time = predict_gc_time(now + get_planned_sleep_interval()); + if (predicted_future_gc_time > avg_cycle_time) { + future_planned_gc_time = predicted_future_gc_time; + future_planned_gc_time_is_average = false; + } else { + future_planned_gc_time = avg_cycle_time; + future_planned_gc_time_is_average = true; + } + + log_debug(gc)("%s: average GC time: %.2f ms, predicted GC time: %.2f ms, allocation rate: %.0f %s/s", + _space_info->name(), avg_cycle_time * 1000, predicted_future_gc_time * 1000, + byte_size_in_proper_unit(avg_alloc_rate), proper_unit_for_byte_size(avg_alloc_rate)); + size_t allocatable_bytes = allocatable_words * HeapWordSize; + avg_time_to_deplete_available = allocatable_bytes / avg_alloc_rate; + + if (future_planned_gc_time > avg_time_to_deplete_available) { + log_trigger("%s GC time (%.2f ms) is above the time for average allocation rate (%.0f %sB/s)" + " to deplete free headroom (%zu%s) (margin of error = %.2f)", + future_planned_gc_time_is_average? "Average": "Linear prediction of", future_planned_gc_time * 1000, + byte_size_in_proper_unit(avg_alloc_rate), proper_unit_for_byte_size(avg_alloc_rate), + byte_size_in_proper_unit(allocatable_bytes), proper_unit_for_byte_size(allocatable_bytes), + _margin_of_error_sd); + + size_t spike_headroom = capacity / 100 * ShenandoahAllocSpikeFactor; + size_t penalties = capacity / 100 * _gc_time_penalties; + size_t allocation_headroom = available; + allocation_headroom -= MIN2(allocation_headroom, spike_headroom); + allocation_headroom -= MIN2(allocation_headroom, penalties); + log_info(gc, ergo)("Free headroom: " PROPERFMT " (free) - " PROPERFMT "(spike) - " PROPERFMT " (penalties) = " PROPERFMT, + PROPERFMTARGS(available), + PROPERFMTARGS(spike_headroom), + PROPERFMTARGS(penalties), + PROPERFMTARGS(allocation_headroom)); accept_trigger_with_type(RATE); return true; } - bool is_spiking = _allocation_rate.is_spiking(rate, _spike_threshold_sd); - if (is_spiking && avg_cycle_time > allocation_headroom / rate) { - log_trigger("Average GC time (%.2f ms) is above the time for instantaneous allocation rate (%.0f %sB/s) to deplete free headroom (%zu%s) (spike threshold = %.2f)", - avg_cycle_time * 1000, - byte_size_in_proper_unit(rate), proper_unit_for_byte_size(rate), - byte_size_in_proper_unit(allocation_headroom), proper_unit_for_byte_size(allocation_headroom), - _spike_threshold_sd); + is_spiking = _allocation_rate.is_spiking(rate, _spike_threshold_sd); + spike_time_to_deplete_available = (rate == 0)? 0: allocatable_bytes / rate; + if (is_spiking && (rate != 0) && (future_planned_gc_time > spike_time_to_deplete_available)) { + log_trigger("%s GC time (%.2f ms) is above the time for instantaneous allocation rate (%.0f %sB/s)" + " to deplete free headroom (%zu%s) (spike threshold = %.2f)", + future_planned_gc_time_is_average? "Average": "Linear prediction of", future_planned_gc_time * 1000, + byte_size_in_proper_unit(rate), proper_unit_for_byte_size(rate), + byte_size_in_proper_unit(allocatable_bytes), proper_unit_for_byte_size(allocatable_bytes), + _spike_threshold_sd); accept_trigger_with_type(SPIKE); return true; } - - if (ShenandoahHeuristics::should_start_gc()) { - _start_gc_is_pending = true; - return true; - } else { - return false; - } + return ShenandoahHeuristics::should_start_gc(); } void ShenandoahAdaptiveHeuristics::adjust_last_trigger_parameters(double amount) { @@ -352,6 +711,112 @@ size_t ShenandoahAdaptiveHeuristics::min_free_threshold() { return ShenandoahHeap::heap()->soft_max_capacity() / 100 * ShenandoahMinFreeThreshold; } +// This is called each time a new rate sample has been gathered, as governed by ShenandoahAccelerationSamplePeriod. +// Unlike traditional calculation of average allocation rate, there is no adjustment for standard deviation of the +// accelerated rate prediction. +size_t ShenandoahAdaptiveHeuristics::accelerated_consumption(double& acceleration, double& current_rate, + double avg_alloc_rate_words_per_second, + double predicted_cycle_time) const +{ + double *x_array = (double *) alloca(ShenandoahRateAccelerationSampleSize * sizeof(double)); + double *y_array = (double *) alloca(ShenandoahRateAccelerationSampleSize * sizeof(double)); + double x_sum = 0.0; + double y_sum = 0.0; + + assert(_spike_acceleration_num_samples > 0, "At minimum, we should have sample from this period"); + + double weighted_average_alloc; + if (_spike_acceleration_num_samples >= ShenandoahRateAccelerationSampleSize) { + double weighted_y_sum = 0; + double total_weight = 0; + double previous_x = 0; + uint delta = _spike_acceleration_num_samples - ShenandoahRateAccelerationSampleSize; + for (uint i = 0; i < ShenandoahRateAccelerationSampleSize; i++) { + uint index = (_spike_acceleration_first_sample_index + delta + i) % _spike_acceleration_buffer_size; + x_array[i] = _spike_acceleration_rate_timestamps[index]; + x_sum += x_array[i]; + y_array[i] = _spike_acceleration_rate_samples[index]; + if (i > 0) { + // first sample not included in weighted average because it has no weight. + double sample_weight = x_array[i] - x_array[i-1]; + weighted_y_sum = y_array[i] * sample_weight; + total_weight += sample_weight; + } + y_sum += y_array[i]; + } + weighted_average_alloc = (total_weight > 0)? weighted_y_sum / total_weight: 0; + } else { + weighted_average_alloc = 0; + } + + double momentary_rate; + if (_spike_acceleration_num_samples > ShenandoahMomentaryAllocationRateSpikeSampleSize) { + // Num samples must be strictly greater than sample size, because we need one extra sample to compute rate and weights + // In this context, the weight of a y value (an allocation rate) is the duration for which this allocation rate was + // active (the time since previous y value was reported). An allocation rate measured over a span of 300 ms (e.g. during + // concurrent GC) has much more "weight" than an allocation rate measured over a span of 15 s. + double weighted_y_sum = 0; + double total_weight = 0; + double sum_for_average = 0.0; + uint delta = _spike_acceleration_num_samples - ShenandoahMomentaryAllocationRateSpikeSampleSize; + for (uint i = 0; i < ShenandoahMomentaryAllocationRateSpikeSampleSize; i++) { + uint sample_index = (_spike_acceleration_first_sample_index + delta + i) % _spike_acceleration_buffer_size; + uint preceding_index = (sample_index == 0)? _spike_acceleration_buffer_size - 1: sample_index - 1; + double sample_weight = (_spike_acceleration_rate_timestamps[sample_index] + - _spike_acceleration_rate_timestamps[preceding_index]); + weighted_y_sum += _spike_acceleration_rate_samples[sample_index] * sample_weight; + total_weight += sample_weight; + } + momentary_rate = weighted_y_sum / total_weight; + bool is_spiking = _allocation_rate.is_spiking(momentary_rate, _spike_threshold_sd); + if (!is_spiking) { + // Disable momentary spike trigger unless allocation rate delta from average exceeds sd + momentary_rate = 0.0; + } + } else { + momentary_rate = 0.0; + } + + // By default, use momentary_rate for current rate and zero acceleration. Overwrite iff best-fit line has positive slope. + current_rate = momentary_rate; + acceleration = 0.0; + if ((_spike_acceleration_num_samples >= ShenandoahRateAccelerationSampleSize) + && (weighted_average_alloc >= avg_alloc_rate_words_per_second)) { + // If the average rate across the acceleration samples is below the overall average, this sample is not eligible to + // represent acceleration of allocation rate. We may just be catching up with allocations after a lull. + + double *xy_array = (double *) alloca(ShenandoahRateAccelerationSampleSize * sizeof(double)); + double *x2_array = (double *) alloca(ShenandoahRateAccelerationSampleSize * sizeof(double)); + double xy_sum = 0.0; + double x2_sum = 0.0; + for (uint i = 0; i < ShenandoahRateAccelerationSampleSize; i++) { + xy_array[i] = x_array[i] * y_array[i]; + xy_sum += xy_array[i]; + x2_array[i] = x_array[i] * x_array[i]; + x2_sum += x2_array[i]; + } + // Find the best-fit least-squares linear representation of rate vs time + double m; /* slope */ + double b; /* y-intercept */ + + m = ((ShenandoahRateAccelerationSampleSize * xy_sum - x_sum * y_sum) + / (ShenandoahRateAccelerationSampleSize * x2_sum - x_sum * x_sum)); + b = (y_sum - m * x_sum) / ShenandoahRateAccelerationSampleSize; + + if (m > 0) { + double proposed_current_rate = m * x_array[ShenandoahRateAccelerationSampleSize - 1] + b; + acceleration = m; + current_rate = proposed_current_rate; + } + // else, leave current_rate = momentary_rate, acceleration = 0 + } + // and here also, leave current_rate = momentary_rate, acceleration = 0 + + double time_delta = get_planned_sleep_interval() + predicted_cycle_time; + size_t words_to_be_consumed = (size_t) (current_rate * time_delta + 0.5 * acceleration * time_delta * time_delta); + return words_to_be_consumed; +} + ShenandoahAllocationRate::ShenandoahAllocationRate() : _last_sample_time(os::elapsedTime()), _last_sample_value(0), @@ -363,7 +828,7 @@ ShenandoahAllocationRate::ShenandoahAllocationRate() : double ShenandoahAllocationRate::force_sample(size_t allocated, size_t &unaccounted_bytes_allocated) { const double MinSampleTime = 0.002; // Do not sample if time since last update is less than 2 ms double now = os::elapsedTime(); - double time_since_last_update = now -_last_sample_time; + double time_since_last_update = now - _last_sample_time; if (time_since_last_update < MinSampleTime) { unaccounted_bytes_allocated = allocated - _last_sample_value; _last_sample_value = 0; @@ -412,8 +877,10 @@ bool ShenandoahAllocationRate::is_spiking(double rate, double threshold) const { double sd = _rate.sd(); if (sd > 0) { - // There is a small chance that that rate has already been sampled, but it - // seems not to matter in practice. + // There is a small chance that that rate has already been sampled, but it seems not to matter in practice. + // Note that z_score reports how close the rate is to the average. A value between -1 and 1 means we are within one + // standard deviation. A value between -3 and +3 means we are within 3 standard deviations. We only check for z_score + // greater than threshold because we are looking for an allocation spike which is greater than the mean. double z_score = (rate - _rate.avg()) / sd; if (z_score > threshold) { return true; diff --git a/src/hotspot/share/gc/shenandoah/heuristics/shenandoahAdaptiveHeuristics.hpp b/src/hotspot/share/gc/shenandoah/heuristics/shenandoahAdaptiveHeuristics.hpp index 9b7824a50d7..c761f2a82f3 100644 --- a/src/hotspot/share/gc/shenandoah/heuristics/shenandoahAdaptiveHeuristics.hpp +++ b/src/hotspot/share/gc/shenandoah/heuristics/shenandoahAdaptiveHeuristics.hpp @@ -27,7 +27,9 @@ #define SHARE_GC_SHENANDOAH_HEURISTICS_SHENANDOAHADAPTIVEHEURISTICS_HPP #include "gc/shenandoah/heuristics/shenandoahHeuristics.hpp" +#include "gc/shenandoah/shenandoahFreeSet.hpp" #include "gc/shenandoah/shenandoahPhaseTimings.hpp" +#include "gc/shenandoah/shenandoahRegulatorThread.hpp" #include "gc/shenandoah/shenandoahSharedVariables.hpp" #include "memory/allocation.hpp" #include "utilities/numberSeq.hpp" @@ -108,6 +110,26 @@ public: virtual ~ShenandoahAdaptiveHeuristics(); + virtual void initialize() override; + + virtual void post_initialize() override; + + virtual void adjust_penalty(intx step) override; + + // At the end of GC(N), we idle GC until necessary to start the next GC. Compute the threshold of memory that can be allocated + // before we need to start the next GC. + void start_idle_span() override; + + // Having observed a new allocation rate sample, add this to the acceleration history so that we can determine if allocation + // rate is accelerating. + void add_rate_to_acceleration_history(double timestamp, double rate); + + // Compute and return the current allocation rate, the current rate of acceleration, and the amount of memory that we expect + // to consume if we start GC right now and gc takes predicted_cycle_time to complete. + size_t accelerated_consumption(double& acceleration, double& current_rate, + double avg_rate_words_per_sec, double predicted_cycle_time) const; + + void choose_collection_set_from_regiondata(ShenandoahCollectionSet* cset, RegionData* data, size_t size, size_t actual_free) override; @@ -136,6 +158,8 @@ public: const static double LOWEST_EXPECTED_AVAILABLE_AT_END; const static double HIGHEST_EXPECTED_AVAILABLE_AT_END; + const static size_t GC_TIME_SAMPLE_SIZE; + friend class ShenandoahAllocationRate; // Used to record the last trigger that signaled to start a GC. @@ -150,9 +174,19 @@ public: void adjust_margin_of_error(double amount); void adjust_spike_threshold(double amount); + // Returns number of words that can be allocated before we need to trigger next GC, given available in bytes. + inline size_t allocatable(size_t available) const { + return (available > _headroom_adjustment)? (available - _headroom_adjustment) / HeapWordSize: 0; + } + protected: ShenandoahAllocationRate _allocation_rate; + // Invocations of should_start_gc() happen approximately once per ms. Queries of allocation rate only happen if a + // a certain amount of time has passed since the previous query. + size_t _allocated_at_previous_query; + double _time_of_previous_allocation_query; + // The margin of error expressed in standard deviations to add to our // average cycle time and allocation rate. As this value increases we // tend to overestimate the rate at which mutators will deplete the @@ -179,6 +213,48 @@ protected: // source of feedback to adjust trigger parameters. TruncatedSeq _available; + ShenandoahFreeSet* _free_set; + + // This represents the time at which the allocation rate was most recently sampled for the purpose of detecting acceleration. + double _previous_acceleration_sample_timestamp; + size_t _total_allocations_at_start_of_idle; + + // bytes of headroom at which we should trigger GC + size_t _headroom_adjustment; + + // Keep track of GC_TIME_SAMPLE_SIZE most recent concurrent GC cycle times + uint _gc_time_first_sample_index; + uint _gc_time_num_samples; + double* const _gc_time_timestamps; + double* const _gc_time_samples; + double* const _gc_time_xy; // timestamp * sample + double* const _gc_time_xx; // timestamp squared + double _gc_time_sum_of_timestamps; + double _gc_time_sum_of_samples; + double _gc_time_sum_of_xy; + double _gc_time_sum_of_xx; + + double _gc_time_m; // slope + double _gc_time_b; // y-intercept + double _gc_time_sd; // sd on deviance from prediction + + // In preparation for a span during which GC will be idle, compute the headroom adjustment that will be used to + // detect when GC needs to trigger. + void compute_headroom_adjustment() override; + + void add_gc_time(double timestamp_at_start, double duration); + void add_degenerated_gc_time(double timestamp_at_start, double duration); + double predict_gc_time(double timestamp_at_start); + + // Keep track of SPIKE_ACCELERATION_SAMPLE_SIZE most recent spike allocation rate measurements. Note that it is + // typical to experience a small spike following end of GC cycle, as mutator threads refresh their TLABs. But + // there is generally an abundance of memory at this time as well, so this will not generally trigger GC. + uint _spike_acceleration_buffer_size; + uint _spike_acceleration_first_sample_index; + uint _spike_acceleration_num_samples; + double* const _spike_acceleration_rate_samples; // holds rates in words/second + double* const _spike_acceleration_rate_timestamps; + // A conservative minimum threshold of free space that we'll try to maintain when possible. // For example, we might trigger a concurrent gc if we are likely to drop below // this threshold, or we might consider this when dynamically resizing generations diff --git a/src/hotspot/share/gc/shenandoah/heuristics/shenandoahGenerationalHeuristics.cpp b/src/hotspot/share/gc/shenandoah/heuristics/shenandoahGenerationalHeuristics.cpp index 029b917deab..f3d31b8d0e1 100644 --- a/src/hotspot/share/gc/shenandoah/heuristics/shenandoahGenerationalHeuristics.cpp +++ b/src/hotspot/share/gc/shenandoah/heuristics/shenandoahGenerationalHeuristics.cpp @@ -52,6 +52,12 @@ static int compare_by_aged_live(AgedRegionData a, AgedRegionData b) { return 0; } +void ShenandoahGenerationalHeuristics::post_initialize() { + ShenandoahHeuristics::post_initialize(); + _free_set = ShenandoahHeap::heap()->free_set(); + compute_headroom_adjustment(); +} + inline void assert_no_in_place_promotions() { #ifdef ASSERT class ShenandoahNoInPlacePromotions : public ShenandoahHeapRegionClosure { diff --git a/src/hotspot/share/gc/shenandoah/heuristics/shenandoahGenerationalHeuristics.hpp b/src/hotspot/share/gc/shenandoah/heuristics/shenandoahGenerationalHeuristics.hpp index 74d657feab7..da883d0d26f 100644 --- a/src/hotspot/share/gc/shenandoah/heuristics/shenandoahGenerationalHeuristics.hpp +++ b/src/hotspot/share/gc/shenandoah/heuristics/shenandoahGenerationalHeuristics.hpp @@ -49,6 +49,8 @@ public: void choose_collection_set(ShenandoahCollectionSet* collection_set) override; + virtual void post_initialize() override; + private: // Compute evacuation budgets prior to choosing collection set. void compute_evacuation_budgets(ShenandoahHeap* const heap); diff --git a/src/hotspot/share/gc/shenandoah/heuristics/shenandoahHeuristics.cpp b/src/hotspot/share/gc/shenandoah/heuristics/shenandoahHeuristics.cpp index 8fc744112bf..603e00c401d 100644 --- a/src/hotspot/share/gc/shenandoah/heuristics/shenandoahHeuristics.cpp +++ b/src/hotspot/share/gc/shenandoah/heuristics/shenandoahHeuristics.cpp @@ -46,13 +46,16 @@ int ShenandoahHeuristics::compare_by_garbage(RegionData a, RegionData b) { } ShenandoahHeuristics::ShenandoahHeuristics(ShenandoahSpaceInfo* space_info) : + _most_recent_trigger_evaluation_time(os::elapsedTime()), + _most_recent_planned_sleep_interval(0.0), _start_gc_is_pending(false), _declined_trigger_count(0), _most_recent_declined_trigger_count(0), _space_info(space_info), _region_data(nullptr), _guaranteed_gc_interval(0), - _cycle_start(os::elapsedTime()), + _precursor_cycle_start(os::elapsedTime()), + _cycle_start(_precursor_cycle_start), _last_cycle_end(0), _gc_times_learned(0), _gc_time_penalties(0), @@ -156,6 +159,19 @@ void ShenandoahHeuristics::choose_collection_set(ShenandoahCollectionSet* collec collection_set->summarize(total_garbage, immediate_garbage, immediate_regions); } +void ShenandoahHeuristics::start_idle_span() { + // do nothing +} + +void ShenandoahHeuristics::record_degenerated_cycle_start(bool out_of_cycle) { + if (out_of_cycle) { + _precursor_cycle_start = _cycle_start = os::elapsedTime(); + } else { + _precursor_cycle_start = _cycle_start; + _cycle_start = os::elapsedTime(); + } +} + void ShenandoahHeuristics::record_cycle_start() { _cycle_start = os::elapsedTime(); } @@ -197,7 +213,6 @@ bool ShenandoahHeuristics::should_degenerate_cycle() { void ShenandoahHeuristics::adjust_penalty(intx step) { assert(0 <= _gc_time_penalties && _gc_time_penalties <= 100, "In range before adjustment: %zd", _gc_time_penalties); - if ((_most_recent_declined_trigger_count <= Penalty_Free_Declinations) && (step > 0)) { // Don't penalize if heuristics are not responsible for a negative outcome. Allow Penalty_Free_Declinations following // previous GC for self calibration without penalty. @@ -274,6 +289,17 @@ void ShenandoahHeuristics::initialize() { // Nothing to do by default. } +void ShenandoahHeuristics::post_initialize() { + // Nothing to do by default. +} + double ShenandoahHeuristics::elapsed_cycle_time() const { return os::elapsedTime() - _cycle_start; } + + +// Includes the time spent in abandoned concurrent GC cycle that may have triggered this degenerated cycle. +double ShenandoahHeuristics::elapsed_degenerated_cycle_time() const { + double now = os::elapsedTime(); + return now - _precursor_cycle_start; +} diff --git a/src/hotspot/share/gc/shenandoah/heuristics/shenandoahHeuristics.hpp b/src/hotspot/share/gc/shenandoah/heuristics/shenandoahHeuristics.hpp index 633c4e87126..5bfba4f52d5 100644 --- a/src/hotspot/share/gc/shenandoah/heuristics/shenandoahHeuristics.hpp +++ b/src/hotspot/share/gc/shenandoah/heuristics/shenandoahHeuristics.hpp @@ -78,6 +78,10 @@ class ShenandoahHeuristics : public CHeapObj { }; #endif +private: + double _most_recent_trigger_evaluation_time; + double _most_recent_planned_sleep_interval; + protected: static const uint Moving_Average_Samples = 10; // Number of samples to store in moving averages @@ -85,14 +89,13 @@ protected: size_t _declined_trigger_count; // This counts how many times since previous GC finished that this // heuristic has answered false to should_start_gc(). size_t _most_recent_declined_trigger_count; - ; // This represents the value of _declined_trigger_count as captured at the + // This represents the value of _declined_trigger_count as captured at the // moment the most recent GC effort was triggered. In case the most recent // concurrent GC effort degenerates, the value of this variable allows us to // differentiate between degeneration because heuristic was overly optimistic // in delaying the trigger vs. degeneration for other reasons (such as the // most recent GC triggered "immediately" after previous GC finished, but the // free headroom has already been depleted). - class RegionData { private: ShenandoahHeapRegion* _region; @@ -103,6 +106,7 @@ protected: #ifdef ASSERT UnionTag _union_tag; #endif + public: inline void clear() { @@ -171,6 +175,7 @@ protected: size_t _guaranteed_gc_interval; + double _precursor_cycle_start; double _cycle_start; double _last_cycle_end; @@ -188,7 +193,7 @@ protected: RegionData* data, size_t data_size, size_t free) = 0; - void adjust_penalty(intx step); + virtual void adjust_penalty(intx step); inline void accept_trigger() { _most_recent_declined_trigger_count = _declined_trigger_count; @@ -200,6 +205,14 @@ protected: _declined_trigger_count++; } + inline double get_most_recent_wake_time() const { + return _most_recent_trigger_evaluation_time; + } + + inline double get_planned_sleep_interval() const { + return _most_recent_planned_sleep_interval; + } + public: ShenandoahHeuristics(ShenandoahSpaceInfo* space_info); virtual ~ShenandoahHeuristics(); @@ -212,10 +225,22 @@ public: _guaranteed_gc_interval = guaranteed_gc_interval; } + virtual void start_idle_span(); + virtual void compute_headroom_adjustment() { + // Default implementation does nothing. + } + virtual void record_cycle_start(); + void record_degenerated_cycle_start(bool out_of_cycle); + virtual void record_cycle_end(); + void update_should_start_query_times(double now, double planned_sleep_interval) { + _most_recent_trigger_evaluation_time = now; + _most_recent_planned_sleep_interval = planned_sleep_interval; + } + virtual bool should_start_gc(); inline void cancel_trigger_request() { @@ -248,8 +273,10 @@ public: virtual bool is_diagnostic() = 0; virtual bool is_experimental() = 0; virtual void initialize(); + virtual void post_initialize(); double elapsed_cycle_time() const; + double elapsed_degenerated_cycle_time() const; virtual size_t force_alloc_rate_sample(size_t bytes_allocated) { // do nothing diff --git a/src/hotspot/share/gc/shenandoah/heuristics/shenandoahYoungHeuristics.cpp b/src/hotspot/share/gc/shenandoah/heuristics/shenandoahYoungHeuristics.cpp index beff2200d90..09508a1163f 100644 --- a/src/hotspot/share/gc/shenandoah/heuristics/shenandoahYoungHeuristics.cpp +++ b/src/hotspot/share/gc/shenandoah/heuristics/shenandoahYoungHeuristics.cpp @@ -137,6 +137,7 @@ bool ShenandoahYoungHeuristics::should_start_gc() { // inherited triggers have already decided to start a cycle, so no further evaluation is required if (ShenandoahAdaptiveHeuristics::should_start_gc()) { + // ShenandoahAdaptiveHeuristics::should_start_gc() has already accepted trigger, or declined it. return true; } @@ -178,7 +179,7 @@ size_t ShenandoahYoungHeuristics::bytes_of_allocation_runway_before_gc_trigger(s size_t capacity = _space_info->max_capacity(); size_t usage = _space_info->used(); size_t available = (capacity > usage)? capacity - usage: 0; - size_t allocated = _space_info->bytes_allocated_since_gc_start(); + size_t allocated = _free_set->get_bytes_allocated_since_gc_start(); size_t available_young_collected = ShenandoahHeap::heap()->collection_set()->get_young_available_bytes_collected(); size_t anticipated_available = diff --git a/src/hotspot/share/gc/shenandoah/shenandoahConcurrentGC.cpp b/src/hotspot/share/gc/shenandoah/shenandoahConcurrentGC.cpp index 5206a0558e8..f0125c38cae 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahConcurrentGC.cpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahConcurrentGC.cpp @@ -1215,6 +1215,7 @@ void ShenandoahConcurrentGC::op_final_update_refs() { } heap->rebuild_free_set(true /*concurrent*/); + _generation->heuristics()->start_idle_span(); { ShenandoahTimingsTracker timing(ShenandoahPhaseTimings::final_update_refs_propagate_gc_state); diff --git a/src/hotspot/share/gc/shenandoah/shenandoahControlThread.cpp b/src/hotspot/share/gc/shenandoah/shenandoahControlThread.cpp index bc11659c5e5..c5607421265 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahControlThread.cpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahControlThread.cpp @@ -59,6 +59,7 @@ void ShenandoahControlThread::run_service() { ShenandoahCollectorPolicy* const policy = heap->shenandoah_policy(); ShenandoahHeuristics* const heuristics = heap->heuristics(); + double most_recent_wake_time = os::elapsedTime(); while (!should_terminate()) { const GCCause::Cause cancelled_cause = heap->cancelled_cause(); if (cancelled_cause == GCCause::_shenandoah_stop_vm) { @@ -222,16 +223,26 @@ void ShenandoahControlThread::run_service() { // Wait before performing the next action. If allocation happened during this wait, // we exit sooner, to let heuristics re-evaluate new conditions. If we are at idle, // back off exponentially. - const double current = os::elapsedTime(); + const double before_sleep = most_recent_wake_time; if (heap->has_changed()) { sleep = ShenandoahControlIntervalMin; - } else if ((current - last_sleep_adjust_time) * 1000 > ShenandoahControlIntervalAdjustPeriod){ + } else if ((before_sleep - last_sleep_adjust_time) * 1000 > ShenandoahControlIntervalAdjustPeriod){ sleep = MIN2(ShenandoahControlIntervalMax, MAX2(1, sleep * 2)); - last_sleep_adjust_time = current; + last_sleep_adjust_time = before_sleep; } - MonitorLocker ml(&_control_lock, Mutex::_no_safepoint_check_flag); ml.wait(sleep); + // Record a conservative estimate of the longest anticipated sleep duration until we sample again. + double planned_sleep_interval = MIN2(ShenandoahControlIntervalMax, MAX2(1, sleep * 2)) / 1000.0; + most_recent_wake_time = os::elapsedTime(); + heuristics->update_should_start_query_times(most_recent_wake_time, planned_sleep_interval); + if (LogTarget(Debug, gc, thread)::is_enabled()) { + double elapsed = most_recent_wake_time - before_sleep; + double hiccup = elapsed - double(sleep); + if (hiccup > 0.001) { + log_debug(gc, thread)("Control Thread hiccup time: %.3fs", hiccup); + } + } } } diff --git a/src/hotspot/share/gc/shenandoah/shenandoahDegeneratedGC.cpp b/src/hotspot/share/gc/shenandoah/shenandoahDegeneratedGC.cpp index 99776e38bfe..8cd8a390c4a 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahDegeneratedGC.cpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahDegeneratedGC.cpp @@ -314,6 +314,7 @@ void ShenandoahDegenGC::op_degenerated() { if (progress) { heap->notify_gc_progress(); _generation->heuristics()->record_degenerated(); + heap->start_idle_span(); } else if (policy->should_upgrade_degenerated_gc()) { // Upgrade to full GC, register full-GC impact on heuristics. op_degenerated_futile(); diff --git a/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp b/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp index 961800f20d9..c39e2e7bb79 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp @@ -287,9 +287,25 @@ void ShenandoahFreeSet::resize_old_collector_capacity(size_t regions) { // else, old generation is already appropriately sized } + void ShenandoahFreeSet::reset_bytes_allocated_since_gc_start(size_t initial_bytes_allocated) { shenandoah_assert_heaplocked(); + // Future inquiries of get_total_bytes_allocated() will return the sum of + // _total_bytes_previously_allocated and _mutator_bytes_allocated_since_gc_start. + // Since _mutator_bytes_allocated_since_gc_start does not start at zero, we subtract initial_bytes_allocated so as + // to not double count these allocated bytes. + size_t original_mutator_bytes_allocated_since_gc_start = _mutator_bytes_allocated_since_gc_start; + + // Setting _mutator_bytes_allocated_since_gc_start before _total_bytes_previously_allocated reduces the damage + // in the case that the control or regulator thread queries get_bytes_allocated_since_previous_sample() between + // the two assignments. + // + // These are not declared as volatile so the compiler or hardware may reorder the assignments. The implementation of + // get_bytes_allocated_since_previous_cycle() is robust to this possibility, as are triggering heuristics. The current + // implementation assumes we are better off to tolerate the very rare race rather than impose a synchronization penalty + // on every update and fetch. (Perhaps it would be better to make the opposite tradeoff for improved maintainability.) _mutator_bytes_allocated_since_gc_start = initial_bytes_allocated; + _total_bytes_previously_allocated += original_mutator_bytes_allocated_since_gc_start - initial_bytes_allocated; } void ShenandoahFreeSet::increase_bytes_allocated(size_t bytes) { @@ -1211,6 +1227,8 @@ inline void ShenandoahRegionPartitions::assert_bounds_sanity() { ShenandoahFreeSet::ShenandoahFreeSet(ShenandoahHeap* heap, size_t max_regions) : _heap(heap), _partitions(max_regions, this), + _total_bytes_previously_allocated(0), + _mutator_bytes_at_last_sample(0), _total_humongous_waste(0), _alloc_bias_weight(0), _total_young_used(0), @@ -1676,9 +1694,6 @@ HeapWord* ShenandoahFreeSet::try_allocate_in(ShenandoahHeapRegion* r, Shenandoah // Regardless of whether this allocation succeeded, if the remaining memory is less than PLAB:min_size(), retire this region. // Note that retire_from_partition() increases used to account for waste. - // Also, if this allocation request failed and the consumed within this region * ShenandoahEvacWaste > region size, - // then retire the region so that subsequent searches can find available memory more quickly. - size_t idx = r->index(); size_t waste_bytes = _partitions.retire_from_partition(orig_partition, idx, r->used()); DEBUG_ONLY(boundary_changed = true;) @@ -1796,7 +1811,6 @@ HeapWord* ShenandoahFreeSet::allocate_contiguous(ShenandoahAllocRequest& req, bo // found the match break; } - end++; } @@ -2036,7 +2050,8 @@ void ShenandoahFreeSet::clear_internal() { _partitions.set_bias_from_left_to_right(ShenandoahFreeSetPartitionId::OldCollector, false); } -void ShenandoahFreeSet::find_regions_with_alloc_capacity(size_t &young_trashed_regions, size_t &old_trashed_regions, +// Returns total allocatable words in Mutator partition +size_t ShenandoahFreeSet::find_regions_with_alloc_capacity(size_t &young_trashed_regions, size_t &old_trashed_regions, size_t &first_old_region, size_t &last_old_region, size_t &old_region_count) { // This resets all state information, removing all regions from all sets. @@ -2054,6 +2069,8 @@ void ShenandoahFreeSet::find_regions_with_alloc_capacity(size_t &young_trashed_r size_t region_size_bytes = _partitions.region_size_bytes(); size_t max_regions = _partitions.max(); + size_t mutator_alloc_capacity_in_words = 0; + size_t mutator_leftmost = max_regions; size_t mutator_rightmost = 0; size_t mutator_leftmost_empty = max_regions; @@ -2123,6 +2140,7 @@ void ShenandoahFreeSet::find_regions_with_alloc_capacity(size_t &young_trashed_r if (region->is_trash() || !region->is_old()) { // Both young and old (possibly immediately) collected regions (trashed) are placed into the Mutator set _partitions.raw_assign_membership(idx, ShenandoahFreeSetPartitionId::Mutator); + mutator_alloc_capacity_in_words += ac / HeapWordSize; if (idx < mutator_leftmost) { mutator_leftmost = idx; } @@ -2279,6 +2297,7 @@ void ShenandoahFreeSet::find_regions_with_alloc_capacity(size_t &young_trashed_r _partitions.rightmost(ShenandoahFreeSetPartitionId::Mutator), _partitions.leftmost(ShenandoahFreeSetPartitionId::OldCollector), _partitions.rightmost(ShenandoahFreeSetPartitionId::OldCollector)); + return mutator_alloc_capacity_in_words; } void ShenandoahFreeSet::transfer_humongous_regions_from_mutator_to_old_collector(size_t xfer_regions, @@ -2583,19 +2602,20 @@ void ShenandoahFreeSet::prepare_to_rebuild(size_t &young_trashed_regions, size_t clear(); log_debug(gc, free)("Rebuilding FreeSet"); - // This places regions that have alloc_capacity into the old_collector set if they identify as is_old() or the - // mutator set otherwise. All trashed (cset) regions are affiliated young and placed in mutator set. - find_regions_with_alloc_capacity(young_trashed_regions, old_trashed_regions, - first_old_region, last_old_region, old_region_count); + // Place regions that have alloc_capacity into the old_collector set if they identify as is_old() or the + // mutator set otherwise. All trashed (cset) regions are affiliated young and placed in mutator set. Save the + // allocatable words in mutator partition in state variable. + _prepare_to_rebuild_mutator_free = find_regions_with_alloc_capacity(young_trashed_regions, old_trashed_regions, + first_old_region, last_old_region, old_region_count); } - -void ShenandoahFreeSet::finish_rebuild(size_t young_cset_regions, size_t old_cset_regions, size_t old_region_count) { +// Return mutator free +void ShenandoahFreeSet::finish_rebuild(size_t young_trashed_regions, size_t old_trashed_regions, size_t old_region_count) { shenandoah_assert_heaplocked(); size_t young_reserve(0), old_reserve(0); if (_heap->mode()->is_generational()) { - compute_young_and_old_reserves(young_cset_regions, old_cset_regions, young_reserve, old_reserve); + compute_young_and_old_reserves(young_trashed_regions, old_trashed_regions, young_reserve, old_reserve); } else { young_reserve = (_heap->max_capacity() / 100) * ShenandoahEvacReserve; old_reserve = 0; @@ -2744,10 +2764,13 @@ void ShenandoahFreeSet::compute_young_and_old_reserves(size_t young_trashed_regi // into the collector set or old collector set in order to assure that the memory available for allocations within // the collector set is at least to_reserve and the memory available for allocations within the old collector set // is at least to_reserve_old. -void ShenandoahFreeSet::reserve_regions(size_t to_reserve, size_t to_reserve_old, size_t &old_region_count, - size_t &young_used_regions, size_t &old_used_regions, - size_t &young_used_bytes, size_t &old_used_bytes) { +// +// Returns total mutator alloc capacity, in words. +size_t ShenandoahFreeSet::reserve_regions(size_t to_reserve, size_t to_reserve_old, size_t &old_region_count, + size_t &young_used_regions, size_t &old_used_regions, + size_t &young_used_bytes, size_t &old_used_bytes) { const size_t region_size_bytes = ShenandoahHeapRegion::region_size_bytes(); + size_t mutator_allocatable_words = _prepare_to_rebuild_mutator_free; young_used_regions = 0; old_used_regions = 0; @@ -2825,6 +2848,8 @@ void ShenandoahFreeSet::reserve_regions(size_t to_reserve, size_t to_reserve_old _partitions.leftmost(ShenandoahFreeSetPartitionId::OldCollector), _partitions.rightmost(ShenandoahFreeSetPartitionId::OldCollector)); old_region_count++; + assert(ac = ShenandoahHeapRegion::region_size_bytes(), "Cannot move to old unless entire region is in alloc capacity"); + mutator_allocatable_words -= ShenandoahHeapRegion::region_size_words(); continue; } } @@ -2868,8 +2893,10 @@ void ShenandoahFreeSet::reserve_regions(size_t to_reserve, size_t to_reserve_old " Collector range [%zd, %zd]", _partitions.leftmost(ShenandoahFreeSetPartitionId::Mutator), _partitions.rightmost(ShenandoahFreeSetPartitionId::Mutator), - _partitions.leftmost(ShenandoahFreeSetPartitionId::Collector), - _partitions.rightmost(ShenandoahFreeSetPartitionId::Collector)); + _partitions.leftmost(ShenandoahFreeSetPartitionId::OldCollector), + _partitions.rightmost(ShenandoahFreeSetPartitionId::OldCollector)); + + mutator_allocatable_words -= ac / HeapWordSize; continue; } @@ -2977,6 +3004,7 @@ void ShenandoahFreeSet::reserve_regions(size_t to_reserve, size_t to_reserve_old PROPERFMTARGS(to_reserve), PROPERFMTARGS(reserve)); } } + return mutator_allocatable_words; } void ShenandoahFreeSet::establish_old_collector_alloc_bias() { diff --git a/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.hpp b/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.hpp index d55a06d5713..2df06432bd2 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.hpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.hpp @@ -437,6 +437,12 @@ private: ShenandoahHeap* const _heap; ShenandoahRegionPartitions _partitions; + size_t _total_bytes_previously_allocated; + size_t _mutator_bytes_at_last_sample; + + // Temporarily holds mutator_Free allocatable bytes between prepare_to_rebuild() and finish_rebuild() + size_t _prepare_to_rebuild_mutator_free; + // This locks the rebuild process (in combination with the global heap lock). Whenever we rebuild the free set, // we first acquire the global heap lock and then we acquire this _rebuild_lock in a nested context. Threads that // need to check available, acquire only the _rebuild_lock to make sure that they are not obtaining the value of @@ -446,10 +452,10 @@ private: // locks will acquire them in the same order: first the global heap lock and then the rebuild lock. ShenandoahRebuildLock _rebuild_lock; - size_t _total_humongous_waste; - HeapWord* allocate_aligned_plab(size_t size, ShenandoahAllocRequest& req, ShenandoahHeapRegion* r); + size_t _total_humongous_waste; + // We re-evaluate the left-to-right allocation bias whenever _alloc_bias_weight is less than zero. Each time // we allocate an object, we decrement the count of this value. Each time we re-evaluate whether to allocate // from right-to-left or left-to-right, we reset the value of this counter to _InitialAllocBiasWeight. @@ -662,10 +668,47 @@ public: void increase_bytes_allocated(size_t bytes); + // Return an approximation of the bytes allocated since GC start. The value returned is monotonically non-decreasing + // in time within each GC cycle. For certain GC cycles, the value returned may include some bytes allocated before + // the start of the current GC cycle. inline size_t get_bytes_allocated_since_gc_start() const { return _mutator_bytes_allocated_since_gc_start; } + inline size_t get_total_bytes_allocated() { + return _mutator_bytes_allocated_since_gc_start + _total_bytes_previously_allocated; + } + + inline size_t get_bytes_allocated_since_previous_sample() { + size_t total_bytes = get_total_bytes_allocated(); + size_t result; + if (total_bytes < _mutator_bytes_at_last_sample) { + // This rare condition may occur if bytes allocated overflows (wraps around) size_t tally of allocations. + // This may also occur in the very rare situation that get_total_bytes_allocated() is queried in the middle of + // reset_bytes_allocated_since_gc_start(). Note that there is no lock to assure that the two global variables + // it modifies are modified atomically (_total_bytes_previously_allocated and _mutator_byts_allocated_since_gc_start) + // This has been observed to occur when an out-of-cycle degenerated cycle is starting (and thus calls + // reset_bytes_allocated_since_gc_start()) at the same time that the control (non-generational mode) or + // regulator (generational-mode) thread calls should_start_gc() (which invokes get_bytes_allocated_since_previous_sample()). + // + // Handle this rare situation by responding with the "innocent" value 0 and resetting internal state so that the + // the next query can recalibrate. + result = 0; + } else { + // Note: there's always the possibility that the tally of total allocations exceeds the 64-bit capacity of our size_t + // counter. We assume that the difference between relevant samples does not exceed this count. Example: + // Suppose _mutator_words_at_last_sample is 0xffff_ffff_ffff_fff0 (18,446,744,073,709,551,600 Decimal) + // and _total_words is 0x0000_0000_0000_0800 ( 32,768 Decimal) + // Then, total_words - _mutator_words_at_last_sample can be done adding 1's complement of subtrahend: + // 1's complement of _mutator_words_at_last_sample is: 0x0000_0000_0000_0010 ( 16 Decimal)) + // plus total_words: 0x0000_0000_0000_0800 (32,768 Decimal) + // sum: 0x0000_0000_0000_0810 (32,784 Decimal) + result = total_bytes - _mutator_bytes_at_last_sample; + } + _mutator_bytes_at_last_sample = total_bytes; + return result; + } + // Public because ShenandoahRegionPartitions assertions require access. inline size_t alloc_capacity(ShenandoahHeapRegion *r) const; inline size_t alloc_capacity(size_t idx) const; @@ -781,15 +824,15 @@ public: // Acquire heap lock and log status, assuming heap lock is not acquired by the caller. void log_status_under_lock(); - // Note that capacity is the number of regions that had available memory at most recent rebuild. It is not the - // entire size of the young or global generation. (Regions within the generation that were fully utilized at time of - // rebuild are not counted as part of capacity.) - - // All three of the following functions may produce stale data if called without owning the global heap lock. + // All four of the following functions may produce stale data if called without owning the global heap lock. // Changes to the values of these variables are performed with a lock. A change to capacity or used "atomically" // adjusts available with respect to lock holders. However, sequential calls to these three functions may produce // inconsistent data: available may not equal capacity - used because the intermediate states of any "atomic" // locked action can be seen by these unlocked functions. + + // Note that capacity is the number of regions that had available memory at most recent rebuild. It is not the + // entire size of the young or global generation. (Regions within the generation that were fully utilized at time of + // rebuild are not counted as part of capacity.) inline size_t capacity_holding_lock() const { shenandoah_assert_heaplocked(); return _partitions.capacity_of(ShenandoahFreeSetPartitionId::Mutator); @@ -808,11 +851,14 @@ public: ShenandoahRebuildLocker locker(rebuild_lock()); return _partitions.used_by(ShenandoahFreeSetPartitionId::Mutator); } + inline size_t reserved() const { return _partitions.capacity_of(ShenandoahFreeSetPartitionId::Collector); } inline size_t available() { shenandoah_assert_not_heaplocked(); ShenandoahRebuildLocker locker(rebuild_lock()); return _partitions.available_in_locked_for_rebuild(ShenandoahFreeSetPartitionId::Mutator); } + inline size_t available_holding_lock() const + { return _partitions.available_in(ShenandoahFreeSetPartitionId::Mutator); } // Use this version of available() if the heap lock is held. inline size_t available_locked() const { @@ -880,13 +926,17 @@ public: // first_old_region is the index of the first region that is part of the OldCollector set // last_old_region is the index of the last region that is part of the OldCollector set // old_region_count is the number of regions in the OldCollector set that have memory available to be allocated - void find_regions_with_alloc_capacity(size_t &young_cset_regions, size_t &old_cset_regions, - size_t &first_old_region, size_t &last_old_region, size_t &old_region_count); + // + // Returns allocatable memory within Mutator partition, in words. + size_t find_regions_with_alloc_capacity(size_t &young_cset_regions, size_t &old_cset_regions, + size_t &first_old_region, size_t &last_old_region, size_t &old_region_count); // Ensure that Collector has at least to_reserve bytes of available memory, and OldCollector has at least old_reserve // bytes of available memory. On input, old_region_count holds the number of regions already present in the // OldCollector partition. Upon return, old_region_count holds the updated number of regions in the OldCollector partition. - void reserve_regions(size_t to_reserve, size_t old_reserve, size_t &old_region_count, + // + // Returns allocatable memory within Mutator partition, in words. + size_t reserve_regions(size_t to_reserve, size_t old_reserve, size_t &old_region_count, size_t &young_used_regions, size_t &old_used_regions, size_t &young_used_bytes, size_t &old_used_bytes); // Reserve space for evacuations, with regions reserved for old evacuations placed to the right diff --git a/src/hotspot/share/gc/shenandoah/shenandoahFullGC.cpp b/src/hotspot/share/gc/shenandoah/shenandoahFullGC.cpp index 3c92750cc0c..750f7e9122d 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahFullGC.cpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahFullGC.cpp @@ -252,6 +252,7 @@ void ShenandoahFullGC::do_it(GCCause::Cause gc_cause) { phase5_epilog(); } + heap->start_idle_span(); // Resize metaspace MetaspaceGC::compute_new_size(); @@ -1124,8 +1125,9 @@ void ShenandoahFullGC::phase5_epilog() { if (heap->mode()->is_generational()) { ShenandoahGenerationalFullGC::compute_balances(); } - free_set->finish_rebuild(young_trashed_regions, old_trashed_regions, num_old); + heap->free_set()->finish_rebuild(young_trashed_regions, old_trashed_regions, num_old); } + // Set mark incomplete because the marking bitmaps have been reset except pinned regions. _generation->set_mark_incomplete(); diff --git a/src/hotspot/share/gc/shenandoah/shenandoahGeneration.cpp b/src/hotspot/share/gc/shenandoah/shenandoahGeneration.cpp index ddb50ee0020..b2d5e5423dd 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahGeneration.cpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahGeneration.cpp @@ -151,6 +151,10 @@ ShenandoahHeuristics* ShenandoahGeneration::initialize_heuristics(ShenandoahMode return _heuristics; } +void ShenandoahGeneration::post_initialize_heuristics() { + _heuristics->post_initialize(); +} + void ShenandoahGeneration::set_evacuation_reserve(size_t new_val) { shenandoah_assert_heaplocked(); _evacuation_reserve = new_val; @@ -358,8 +362,7 @@ void ShenandoahGeneration::cancel_marking() { set_concurrent_mark_in_progress(false); } -ShenandoahGeneration::ShenandoahGeneration(ShenandoahGenerationType type, - uint max_workers) : +ShenandoahGeneration::ShenandoahGeneration(ShenandoahGenerationType type, uint max_workers) : _type(type), _task_queues(new ShenandoahObjToScanQueueSet(max_workers)), _ref_processor(new ShenandoahReferenceProcessor(this, MAX2(max_workers, 1U))), diff --git a/src/hotspot/share/gc/shenandoah/shenandoahGeneration.hpp b/src/hotspot/share/gc/shenandoah/shenandoahGeneration.hpp index 946f2b91520..1a549be8988 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahGeneration.hpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahGeneration.hpp @@ -83,10 +83,10 @@ private: ShenandoahReferenceProcessor* ref_processor() { return _ref_processor; } virtual ShenandoahHeuristics* initialize_heuristics(ShenandoahMode* gc_mode); + virtual void post_initialize_heuristics(); virtual void post_initialize(ShenandoahHeap* heap); - virtual size_t bytes_allocated_since_gc_start() const override = 0; virtual size_t used() const override = 0; virtual size_t used_regions() const = 0; virtual size_t used_regions_size() const = 0; diff --git a/src/hotspot/share/gc/shenandoah/shenandoahGenerationalControlThread.cpp b/src/hotspot/share/gc/shenandoah/shenandoahGenerationalControlThread.cpp index 3b57190cc75..cc7547b8ac1 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahGenerationalControlThread.cpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahGenerationalControlThread.cpp @@ -622,10 +622,11 @@ void ShenandoahGenerationalControlThread::service_stw_full_cycle(GCCause::Cause void ShenandoahGenerationalControlThread::service_stw_degenerated_cycle(const ShenandoahGCRequest& request) { assert(_degen_point != ShenandoahGC::_degenerated_unset, "Degenerated point should be set"); + request.generation->heuristics()->record_degenerated_cycle_start(ShenandoahGC::ShenandoahDegenPoint::_degenerated_outside_cycle + == _degen_point); _heap->increment_total_collections(false); ShenandoahGCSession session(request.cause, request.generation); - ShenandoahDegenGC gc(_degen_point, request.generation); gc.collect(request.cause); _degen_point = ShenandoahGC::_degenerated_unset; diff --git a/src/hotspot/share/gc/shenandoah/shenandoahGenerationalHeap.cpp b/src/hotspot/share/gc/shenandoah/shenandoahGenerationalHeap.cpp index 2c2e5533c01..b302bde8510 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahGenerationalHeap.cpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahGenerationalHeap.cpp @@ -90,11 +90,23 @@ ShenandoahGenerationalHeap::ShenandoahGenerationalHeap(ShenandoahCollectorPolicy assert(is_aligned(_max_plab_size, CardTable::card_size_in_words()), "max_plab_size must be aligned"); } +void ShenandoahGenerationalHeap::initialize_generations() { + ShenandoahHeap::initialize_generations(); + _young_generation->post_initialize(this); + _old_generation->post_initialize(this); +} + void ShenandoahGenerationalHeap::post_initialize() { ShenandoahHeap::post_initialize(); _age_census = new ShenandoahAgeCensus(); } +void ShenandoahGenerationalHeap::post_initialize_heuristics() { + ShenandoahHeap::post_initialize_heuristics(); + _young_generation->post_initialize_heuristics(); + _old_generation->post_initialize_heuristics(); +} + void ShenandoahGenerationalHeap::print_init_logger() const { ShenandoahGenerationalInitLogger logger; logger.print_all(); @@ -110,12 +122,6 @@ void ShenandoahGenerationalHeap::initialize_heuristics() { _old_generation->initialize_heuristics(mode()); } -void ShenandoahGenerationalHeap::post_initialize_heuristics() { - ShenandoahHeap::post_initialize_heuristics(); - _young_generation->post_initialize(this); - _old_generation->post_initialize(this); -} - void ShenandoahGenerationalHeap::initialize_serviceability() { assert(mode()->is_generational(), "Only for the generational mode"); _young_gen_memory_pool = new ShenandoahYoungGenMemoryPool(this); @@ -152,6 +158,10 @@ void ShenandoahGenerationalHeap::stop() { regulator_thread()->stop(); } +void ShenandoahGenerationalHeap::start_idle_span() { + young_generation()->heuristics()->start_idle_span(); +} + bool ShenandoahGenerationalHeap::requires_barriers(stackChunkOop obj) const { if (is_idle()) { return false; diff --git a/src/hotspot/share/gc/shenandoah/shenandoahGenerationalHeap.hpp b/src/hotspot/share/gc/shenandoah/shenandoahGenerationalHeap.hpp index 719bae52a83..7fe0362aa3f 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahGenerationalHeap.hpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahGenerationalHeap.hpp @@ -40,6 +40,7 @@ class ShenandoahGenerationalHeap : public ShenandoahHeap { public: explicit ShenandoahGenerationalHeap(ShenandoahCollectorPolicy* policy); void post_initialize() override; + void initialize_generations() override; void initialize_heuristics() override; void post_initialize_heuristics() override; @@ -82,6 +83,8 @@ public: inline bool is_tenurable(const ShenandoahHeapRegion* r) const; + void start_idle_span() override; + // Ages regions that haven't been used for allocations in the current cycle. // Resets ages for regions that have been used for allocations. void update_region_ages(ShenandoahMarkingContext* ctx); diff --git a/src/hotspot/share/gc/shenandoah/shenandoahHeap.cpp b/src/hotspot/share/gc/shenandoah/shenandoahHeap.cpp index d78bdae6a51..c6889351161 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahHeap.cpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahHeap.cpp @@ -435,7 +435,7 @@ jint ShenandoahHeap::initialize() { } _free_set = new ShenandoahFreeSet(this, _num_regions); - post_initialize_heuristics(); + initialize_generations(); // We are initializing free set. We ignore cset region tallies. size_t young_trashed_regions, old_trashed_regions, first_old, last_old, num_old; @@ -492,16 +492,17 @@ jint ShenandoahHeap::initialize() { _phase_timings = new ShenandoahPhaseTimings(max_workers()); ShenandoahCodeRoots::initialize(); + // Initialization of controller makes use of variables established by initialize_heuristics. initialize_controller(); + // Certain initialization of heuristics must be deferred until after controller is initialized. + post_initialize_heuristics(); + start_idle_span(); if (ShenandoahUncommit) { _uncommit_thread = new ShenandoahUncommitThread(this); } - print_init_logger(); - FullGCForwarding::initialize(_heap_region); - return JNI_OK; } @@ -545,10 +546,6 @@ void ShenandoahHeap::initialize_heuristics() { _global_generation->initialize_heuristics(mode()); } -void ShenandoahHeap::post_initialize_heuristics() { - _global_generation->post_initialize(this); -} - #ifdef _MSC_VER #pragma warning( push ) #pragma warning( disable:4355 ) // 'this' : used in base member initializer list @@ -690,6 +687,11 @@ public: } }; +void ShenandoahHeap::initialize_generations() { + _global_generation->post_initialize(this); +} + +// We do not call this explicitly It is called by Hotspot infrastructure. void ShenandoahHeap::post_initialize() { CollectedHeap::post_initialize(); @@ -717,6 +719,10 @@ void ShenandoahHeap::post_initialize() { JFR_ONLY(ShenandoahJFRSupport::register_jfr_type_serializers();) } +void ShenandoahHeap::post_initialize_heuristics() { + _global_generation->post_initialize_heuristics(); +} + ShenandoahHeuristics* ShenandoahHeap::heuristics() { return _global_generation->heuristics(); } @@ -760,6 +766,7 @@ void ShenandoahHeap::set_soft_max_capacity(size_t v) { "Should be in bounds: %zu <= %zu <= %zu", min_capacity(), v, max_capacity()); _soft_max_size.store_relaxed(v); + heuristics()->compute_headroom_adjustment(); } size_t ShenandoahHeap::min_capacity() const { @@ -835,6 +842,10 @@ void ShenandoahHeap::notify_heap_changed() { _heap_changed.try_set(); } +void ShenandoahHeap::start_idle_span() { + heuristics()->start_idle_span(); +} + void ShenandoahHeap::set_forced_counters_update(bool value) { monitoring_support()->set_forced_counters_update(value); } diff --git a/src/hotspot/share/gc/shenandoah/shenandoahHeap.hpp b/src/hotspot/share/gc/shenandoah/shenandoahHeap.hpp index 85ad339469d..d4604be0aec 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahHeap.hpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahHeap.hpp @@ -195,6 +195,7 @@ public: ShenandoahHeap(ShenandoahCollectorPolicy* policy); jint initialize() override; void post_initialize() override; + virtual void initialize_generations(); void initialize_mode(); virtual void initialize_heuristics(); virtual void post_initialize_heuristics(); @@ -393,6 +394,8 @@ public: return _heap_changed.try_unset(); } + virtual void start_idle_span(); + void set_concurrent_young_mark_in_progress(bool in_progress); void set_concurrent_old_mark_in_progress(bool in_progress); void set_evacuation_in_progress(bool in_progress); diff --git a/src/hotspot/share/gc/shenandoah/shenandoahOldGeneration.cpp b/src/hotspot/share/gc/shenandoah/shenandoahOldGeneration.cpp index 4fda65b4030..1b12909bcaf 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahOldGeneration.cpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahOldGeneration.cpp @@ -422,6 +422,7 @@ void ShenandoahOldGeneration::prepare_regions_and_collection_set(bool concurrent // At the end of old-gen, we may find that we have reclaimed immediate garbage, allowing a longer allocation runway. // We may also find that we have accumulated canddiate regions for mixed evacuation. If so, we will want to expand // the OldCollector reserve in order to make room for these mixed evacuations. + assert(ShenandoahHeap::heap()->mode()->is_generational(), "sanity"); assert(young_trash_regions == 0, "sanity"); ShenandoahGenerationalHeap* gen_heap = ShenandoahGenerationalHeap::heap(); @@ -765,6 +766,7 @@ size_t ShenandoahOldGeneration::used_regions_size() const { return used_regions * ShenandoahHeapRegion::region_size_bytes(); } +// For the old generation, max_capacity() equals soft_max_capacity() size_t ShenandoahOldGeneration::max_capacity() const { size_t total_regions = _free_set->total_old_regions(); return total_regions * ShenandoahHeapRegion::region_size_bytes(); diff --git a/src/hotspot/share/gc/shenandoah/shenandoahRegulatorThread.cpp b/src/hotspot/share/gc/shenandoah/shenandoahRegulatorThread.cpp index ec4b7c7217c..fe92a3a3e08 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahRegulatorThread.cpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahRegulatorThread.cpp @@ -36,7 +36,8 @@ ShenandoahRegulatorThread::ShenandoahRegulatorThread(ShenandoahGenerationalContr _heap(ShenandoahHeap::heap()), _control_thread(control_thread), _sleep(ShenandoahControlIntervalMin), - _last_sleep_adjust_time(os::elapsedTime()) { + _most_recent_wake_time(os::elapsedTime()), + _last_sleep_adjust_time(_most_recent_wake_time) { shenandoah_assert_generational(); _old_heuristics = _heap->old_generation()->heuristics(); _young_heuristics = _heap->young_generation()->heuristics(); @@ -115,19 +116,22 @@ void ShenandoahRegulatorThread::regulator_sleep() { // Wait before performing the next action. If allocation happened during this wait, // we exit sooner, to let heuristics re-evaluate new conditions. If we are at idle, // back off exponentially. - double current = os::elapsedTime(); - + double before_sleep_time = _most_recent_wake_time; if (ShenandoahHeap::heap()->has_changed()) { _sleep = ShenandoahControlIntervalMin; - } else if ((current - _last_sleep_adjust_time) * 1000 > ShenandoahControlIntervalAdjustPeriod){ + } else if ((before_sleep_time - _last_sleep_adjust_time) * 1000 > ShenandoahControlIntervalAdjustPeriod){ _sleep = MIN2(ShenandoahControlIntervalMax, MAX2(1u, _sleep * 2)); - _last_sleep_adjust_time = current; + _last_sleep_adjust_time = before_sleep_time; } SuspendibleThreadSetLeaver leaver; os::naked_short_sleep(_sleep); + double wake_time = os::elapsedTime(); + _most_recent_period = wake_time - _most_recent_wake_time; + _most_recent_wake_time = wake_time; + _young_heuristics->update_should_start_query_times(_most_recent_wake_time, double(_sleep) / 1000.0); if (LogTarget(Debug, gc, thread)::is_enabled()) { - double elapsed = os::elapsedTime() - current; + double elapsed = _most_recent_wake_time - before_sleep_time; double hiccup = elapsed - double(_sleep); if (hiccup > 0.001) { log_debug(gc, thread)("Regulator hiccup time: %.3fs", hiccup); diff --git a/src/hotspot/share/gc/shenandoah/shenandoahRegulatorThread.hpp b/src/hotspot/share/gc/shenandoah/shenandoahRegulatorThread.hpp index 2519025b6fb..cc41bc2c65b 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahRegulatorThread.hpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahRegulatorThread.hpp @@ -79,7 +79,10 @@ class ShenandoahRegulatorThread: public ConcurrentGCThread { ShenandoahOldHeuristics* _old_heuristics; ShenandoahHeuristics* _global_heuristics; + // duration of planned regulator sleep period, in ms uint _sleep; + double _most_recent_wake_time; + double _most_recent_period; double _last_sleep_adjust_time; }; diff --git a/src/hotspot/share/gc/shenandoah/shenandoah_globals.hpp b/src/hotspot/share/gc/shenandoah/shenandoah_globals.hpp index 3eb1a06a911..d3e9a1f9fae 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoah_globals.hpp +++ b/src/hotspot/share/gc/shenandoah/shenandoah_globals.hpp @@ -34,6 +34,59 @@ range, \ constraint) \ \ + product(uint, ShenandoahAccelerationSamplePeriod, 15, EXPERIMENTAL, \ + "When at least this much time (measured in ms) has passed " \ + "since the acceleration allocation rate was most recently " \ + "sampled, capture another allocation rate sample for the purpose "\ + "of detecting acceleration or momentary spikes in allocation " \ + "rate. A smaller value allows quicker response to changes in " \ + "allocation rates but is more vulnerable to noise and requires " \ + "more monitoring effort.") \ + range(1, 1000) \ + \ + product(uint, ShenandoahRateAccelerationSampleSize, 8, EXPERIMENTAL, \ + "In selected ShenandoahControlIntervals " \ + "(if ShenandoahAccelerationSamplePeriod ms have passed " \ + "since previous allocation rate sample), " \ + "we compute the allocation rate since the previous rate was " \ + "sampled. This many samples are analyzed to determine whether " \ + "allocation rates are accelerating. Acceleration may occur " \ + "due to increasing client demand or due to phase changes in " \ + "an application. A larger value reduces sensitivity to " \ + "noise and delays recognition of the accelerating trend. A " \ + "larger value may also cause the heuristic to miss detection " \ + "of very quick accelerations. Smaller values may cause random " \ + "noise to be perceived as acceleration of allocation rate, " \ + "triggering excess collections. Note that the acceleration " \ + "need not last the entire span of the sampled duration to be " \ + "detected. If the last several of all samples are signficantly " \ + "larger than the other samples, the best fit line through all " \ + "sampled values will have an upward slope, manifesting as " \ + "acceleration.") \ + range(1,64) \ + \ + product(uint, ShenandoahMomentaryAllocationRateSpikeSampleSize, \ + 2, EXPERIMENTAL, \ + "In selected ShenandoahControlIntervals " \ + "(if ShenandoahAccelerationSamplePeriod ms have passed " \ + "since previous allocation rate sample), we compute " \ + "the allocation rate since the previous rate was sampled. " \ + "The weighted average of this " \ + "many most recent momentary allocation rate samples is compared " \ + "against current allocation runway and anticipated GC time to " \ + "determine whether a spike in momentary allocation rate " \ + "justifies an early GC trigger. Momentary allocation spike " \ + "detection is in addition to previously implemented " \ + "ShenandoahAdaptiveInitialSpikeThreshold, the latter of which " \ + "is more effective at detecting slower spikes. The latter " \ + "spike detection samples at the rate specifieid by " \ + "ShenandoahAdaptiveSampleFrequencyHz. The value of this " \ + "parameter must be less than the value of " \ + "ShenandoahRateAccelerationSampleSize. A larger value makes " \ + "momentary spike detection less sensitive. A smaller value " \ + "may result in excessive GC triggers.") \ + range(1,64) \ + \ product(uintx, ShenandoahGenerationalMinPIPUsage, 30, EXPERIMENTAL, \ "(Generational mode only) What percent of a heap region " \ "should be used before we consider promoting a region in " \