/* * Copyright (c) 2018, 2019, Red Hat, Inc. All rights reserved. * Copyright Amazon.com Inc. or its affiliates. All Rights Reserved. * Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 only, as * published by the Free Software Foundation. * * This code is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * version 2 for more details (a copy is included in the LICENSE file that * accompanied this code). * * You should have received a copy of the GNU General Public License version * 2 along with this work; if not, write to the Free Software Foundation, * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. * * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA * or visit www.oracle.com if you need additional information or have any * questions. * */ #include "gc/shared/gcCause.hpp" #include "gc/shenandoah/heuristics/shenandoahAdaptiveHeuristics.hpp" #include "gc/shenandoah/heuristics/shenandoahHeuristics.hpp" #include "gc/shenandoah/heuristics/shenandoahSpaceInfo.hpp" #include "gc/shenandoah/shenandoahCollectionSet.hpp" #include "gc/shenandoah/shenandoahCollectorPolicy.hpp" #include "gc/shenandoah/shenandoahHeap.inline.hpp" #include "gc/shenandoah/shenandoahHeapRegion.inline.hpp" #include "gc/shenandoah/shenandoahYoungGeneration.hpp" #include "logging/log.hpp" #include "logging/logTag.hpp" #include "runtime/globals.hpp" #include "utilities/quickSort.hpp" // These constants are used to adjust the margin of error for the moving // average of the allocation rate and cycle time. The units are standard // deviations. const double ShenandoahAdaptiveHeuristics::FULL_PENALTY_SD = 0.2; const double ShenandoahAdaptiveHeuristics::DEGENERATE_PENALTY_SD = 0.1; // These are used to decide if we want to make any adjustments at all // at the end of a successful concurrent cycle. const double ShenandoahAdaptiveHeuristics::LOWEST_EXPECTED_AVAILABLE_AT_END = -0.5; const double ShenandoahAdaptiveHeuristics::HIGHEST_EXPECTED_AVAILABLE_AT_END = 0.5; // These values are the confidence interval expressed as standard deviations. // At the minimum confidence level, there is a 25% chance that the true value of // the estimate (average cycle time or allocation rate) is not more than // MINIMUM_CONFIDENCE standard deviations away from our estimate. Similarly, the // MAXIMUM_CONFIDENCE interval here means there is a one in a thousand chance // that the true value of our estimate is outside the interval. These are used // as bounds on the adjustments applied at the outcome of a GC cycle. const double ShenandoahAdaptiveHeuristics::MINIMUM_CONFIDENCE = 0.319; // 25% const double ShenandoahAdaptiveHeuristics::MAXIMUM_CONFIDENCE = 3.291; // 99.9% // To enable detection of GC time trends, we keep separate track of the recent history of gc time. During initialization, // for example, the amount of live memory may be increasing, which is likely to cause the GC times to increase. This history // allows us to predict increasing GC times rather than always assuming average recent GC time is the best predictor. const size_t ShenandoahAdaptiveHeuristics::GC_TIME_SAMPLE_SIZE = 3; // We also keep separate track of recently sampled allocation rates for two purposes: // 1. The number of samples examined to determine acceleration of allocation is represented by // ShenandoahRateAccelerationSampleSize // 2. The number of most recent samples averaged to determine a momentary allocation spike is represented by // ShenandoahMomentaryAllocationRateSpikeSampleSize // Allocation rates are sampled by the regulator thread, which typically runs every ms. There may be jitter in the scheduling // of the regulator thread. To reduce signal noise and synchronization overhead, we do not sample allocation rate with every // iteration of the regulator. We prefer sample time longer than 1 ms so that there can be a statistically significant number // of allocations occuring within each sample period. The regulator thread samples allocation rate only if at least // ShenandoahAccelerationSamplePeriod seconds have passed since it previously sampled the allocation rate. // // This trigger responds much more quickly than the traditional trigger, which monitors 100 ms spans. When acceleration is // detected, the impact of acceleration on anticipated consumption of available memory is also much more impactful // than the assumed constant allocation rate consumption of available memory. ShenandoahAdaptiveHeuristics::ShenandoahAdaptiveHeuristics(ShenandoahSpaceInfo* space_info) : ShenandoahHeuristics(space_info), _margin_of_error_sd(ShenandoahAdaptiveInitialConfidence), _spike_threshold_sd(ShenandoahAdaptiveInitialSpikeThreshold), _last_trigger(OTHER), _available(Moving_Average_Samples, ShenandoahAdaptiveDecayFactor), _free_set(nullptr), _is_generational(ShenandoahHeap::heap()->mode()->is_generational()), _regulator_thread(nullptr), _previous_allocation_timestamp(0.0), _gc_time_first_sample_index(0), _gc_time_num_samples(0), _gc_time_timestamps(NEW_C_HEAP_ARRAY(double, GC_TIME_SAMPLE_SIZE, mtGC)), _gc_time_samples(NEW_C_HEAP_ARRAY(double, GC_TIME_SAMPLE_SIZE, mtGC)), _gc_time_xy(NEW_C_HEAP_ARRAY(double, GC_TIME_SAMPLE_SIZE, mtGC)), _gc_time_xx(NEW_C_HEAP_ARRAY(double, GC_TIME_SAMPLE_SIZE, mtGC)), _gc_time_sum_of_timestamps(0), _gc_time_sum_of_samples(0), _gc_time_sum_of_xy(0), _gc_time_sum_of_xx(0), _gc_time_m(0.0), _gc_time_b(0.0), _gc_time_sd(0.0), _spike_acceleration_buffer_size(MAX2(ShenandoahRateAccelerationSampleSize, 1+ShenandoahMomentaryAllocationRateSpikeSampleSize)), _spike_acceleration_first_sample_index(0), _spike_acceleration_num_samples(0), _spike_acceleration_rate_samples(NEW_C_HEAP_ARRAY(double, _spike_acceleration_buffer_size, mtGC)), _spike_acceleration_rate_timestamps(NEW_C_HEAP_ARRAY(double, _spike_acceleration_buffer_size, mtGC)), _most_recent_headroom_at_start_of_idle((size_t) 0) { } ShenandoahAdaptiveHeuristics::~ShenandoahAdaptiveHeuristics() { FREE_C_HEAP_ARRAY(double, _spike_acceleration_rate_samples); FREE_C_HEAP_ARRAY(double, _spike_acceleration_rate_timestamps); FREE_C_HEAP_ARRAY(double, _gc_time_timestamps); FREE_C_HEAP_ARRAY(double, _gc_time_samples); FREE_C_HEAP_ARRAY(double, _gc_time_xy); FREE_C_HEAP_ARRAY(double, _gc_time_xx); } void ShenandoahAdaptiveHeuristics::initialize() { ShenandoahHeuristics::initialize(); } void ShenandoahAdaptiveHeuristics::post_initialize() { ShenandoahHeuristics::post_initialize(); _free_set = ShenandoahHeap::heap()->free_set(); assert(!_is_generational, "ShenandoahGenerationalHeuristics overrides this method"); _control_thread = ShenandoahHeap::heap()->control_thread(); size_t global_available = (ShenandoahHeap::heap()->global_generation()->max_capacity() - (ShenandoahHeap::heap()->global_generation()->used() + _free_set->reserved())); recalculate_trigger_threshold(global_available); } void ShenandoahAdaptiveHeuristics::recalculate_trigger_threshold(size_t mutator_available) { // The trigger threshold represents mutator available - "head room". // We plan for GC to finish before the amount of allocated memory exceeds trigger threshold. This is the same as saying we // intend to finish GC before the amount of available memory is less than the allocation headroom. Headroom is the planned // safety buffer to allow a small amount of additional allocation to take place in case we were overly optimistic in delaying // our trigger. size_t capacity = ShenandoahHeap::heap()->soft_max_capacity(); size_t spike_headroom = capacity / 100 * ShenandoahAllocSpikeFactor; size_t penalties = capacity / 100 * _gc_time_penalties; size_t bytes_allocated_at_start_of_idle_span = _free_set->get_bytes_allocated_since_gc_start(); // make headroom adjustments _headroom_adjustment = spike_headroom + penalties; size_t adjusted_mutator_available; if (mutator_available >= _headroom_adjustment) { adjusted_mutator_available = mutator_available - _headroom_adjustment; } else { adjusted_mutator_available = 0; } assert(!_is_generational || !strcmp(_space_info->name(), "Young") || !strcmp(_space_info->name(), "Global"), "Assumed young or global space, but got: %s", _space_info->name()); assert(_is_generational || !strcmp(_space_info->name(), ""), "Assumed global (unnamed) space, but got: %s", _space_info->name()); log_info(gc)("At start or resumption of idle gc span for %s, mutator available adjusted to: " PROPERFMT " after adjusting for spike_headroom: " PROPERFMT " and penalties: " PROPERFMT, _is_generational? _space_info->name(): "Global", PROPERFMTARGS(adjusted_mutator_available), PROPERFMTARGS(spike_headroom), PROPERFMTARGS(penalties)); _most_recent_headroom_at_start_of_idle = adjusted_mutator_available; // _trigger_threshold is expressed in words _trigger_threshold = (bytes_allocated_at_start_of_idle_span + adjusted_mutator_available) / HeapWordSize; } void ShenandoahAdaptiveHeuristics::start_idle_span() { size_t mutator_available = _free_set->available(); recalculate_trigger_threshold(mutator_available); } void ShenandoahAdaptiveHeuristics::resume_idle_span() { size_t mutator_available = _free_set->available_holding_lock(); recalculate_trigger_threshold(mutator_available); } // There is no headroom during evacuation and update refs. This information is not used to trigger the next GC. // In future implementations, this information may feed into worker surge calculations. void ShenandoahAdaptiveHeuristics::start_evac_span() { size_t mutator_available = _free_set->available_holding_lock(); _trigger_threshold = mutator_available; } void ShenandoahAdaptiveHeuristics::adjust_penalty(intx step) { ShenandoahHeuristics::adjust_penalty(step); } void ShenandoahAdaptiveHeuristics::choose_collection_set_from_regiondata(ShenandoahCollectionSet* cset, RegionData* data, size_t size, size_t actual_free) { size_t garbage_threshold = ShenandoahHeapRegion::region_size_bytes() * ShenandoahGarbageThreshold / 100; // The logic for cset selection in adaptive is as follows: // // 1. We cannot get cset larger than available free space. Otherwise we guarantee OOME // during evacuation, and thus guarantee full GC. In practice, we also want to let the // application allocate during concurrent GC. This is why we limit CSet to some fraction of // available space. In non-overloaded heap, max_cset would contain all plausible candidates // over garbage threshold. // // 2. We should not get cset too low so that free threshold would not be met right // after the cycle. Otherwise we get back-to-back cycles for no reason if heap is // too fragmented. In non-overloaded non-fragmented heap min_garbage would be around zero. // // Therefore, we start by sorting the regions by garbage. Then we unconditionally add the best candidates // before we meet min_garbage. Then we add all candidates that fit with a garbage threshold before // we hit max_cset. When max_cset is hit, we terminate the cset selection. Note that in this scheme, // ShenandoahGarbageThreshold is the soft threshold which would be ignored until min_garbage is hit. size_t capacity = ShenandoahHeap::heap()->soft_max_capacity(); size_t max_cset = (size_t)((1.0 * capacity / 100 * ShenandoahEvacReserve) / ShenandoahEvacWaste); size_t free_target = (capacity / 100 * ShenandoahMinFreeThreshold) + max_cset; size_t min_garbage = (free_target > actual_free ? (free_target - actual_free) : 0); log_info(gc, ergo)("Adaptive CSet Selection. Target Free: %zu%s, Actual Free: " "%zu%s, Max Evacuation: %zu%s, Min Garbage: %zu%s", byte_size_in_proper_unit(free_target), proper_unit_for_byte_size(free_target), byte_size_in_proper_unit(actual_free), proper_unit_for_byte_size(actual_free), byte_size_in_proper_unit(max_cset), proper_unit_for_byte_size(max_cset), byte_size_in_proper_unit(min_garbage), proper_unit_for_byte_size(min_garbage)); // Better select garbage-first regions QuickSort::sort(data, size, compare_by_garbage); size_t cur_cset = 0; size_t cur_garbage = 0; // Regions are sorted in order of decreasing garbage for (size_t idx = 0; idx < size; idx++) { ShenandoahHeapRegion* r = data[idx].get_region(); size_t new_cset = cur_cset + r->get_live_data_bytes(); size_t new_garbage = cur_garbage + r->garbage(); if (new_cset > max_cset) { break; } if ((new_garbage < min_garbage) || (r->garbage() > garbage_threshold)) { cset->add_region(r); cur_cset = new_cset; cur_garbage = new_garbage; } } } void ShenandoahAdaptiveHeuristics::add_degenerated_gc_time(double timestamp, double gc_time) { // Conservatively add sample into linear model If this time is above the predicted concurrent gc time if (predict_gc_time(timestamp) < gc_time) { add_gc_time(timestamp, gc_time); } } void ShenandoahAdaptiveHeuristics::add_gc_time(double timestamp, double gc_time) { // Update best-fit linear predictor of GC time uint index = (_gc_time_first_sample_index + _gc_time_num_samples) % GC_TIME_SAMPLE_SIZE; if (_gc_time_num_samples == GC_TIME_SAMPLE_SIZE) { _gc_time_sum_of_timestamps -= _gc_time_timestamps[index]; _gc_time_sum_of_samples -= _gc_time_samples[index]; _gc_time_sum_of_xy -= _gc_time_xy[index]; _gc_time_sum_of_xx -= _gc_time_xx[index]; } _gc_time_timestamps[index] = timestamp; _gc_time_samples[index] = gc_time; _gc_time_xy[index] = timestamp * gc_time; _gc_time_xx[index] = timestamp * timestamp; _gc_time_sum_of_timestamps += _gc_time_timestamps[index]; _gc_time_sum_of_samples += _gc_time_samples[index]; _gc_time_sum_of_xy += _gc_time_xy[index]; _gc_time_sum_of_xx += _gc_time_xx[index]; if (_gc_time_num_samples < GC_TIME_SAMPLE_SIZE) { _gc_time_num_samples++; } else { _gc_time_first_sample_index = (_gc_time_first_sample_index + 1) % GC_TIME_SAMPLE_SIZE; } if (_gc_time_num_samples == 1) { // The predictor is constant (horizontal line) _gc_time_m = 0; _gc_time_b = gc_time; _gc_time_sd = 0.0; } else if (_gc_time_num_samples == 2) { // Two points define a line double delta_y = gc_time - _gc_time_samples[_gc_time_first_sample_index]; double delta_x = timestamp - _gc_time_timestamps[_gc_time_first_sample_index]; _gc_time_m = delta_y / delta_x; // y = mx + b // so b = y0 - mx0 _gc_time_b = gc_time - _gc_time_m * timestamp; _gc_time_sd = 0.0; } else { _gc_time_m = ((_gc_time_num_samples * _gc_time_sum_of_xy - _gc_time_sum_of_timestamps * _gc_time_sum_of_samples) / (_gc_time_num_samples * _gc_time_sum_of_xx - _gc_time_sum_of_timestamps * _gc_time_sum_of_timestamps)); _gc_time_b = (_gc_time_sum_of_samples - _gc_time_m * _gc_time_sum_of_timestamps) / _gc_time_num_samples; double sum_of_squared_deviations = 0.0; for (size_t i = 0; i < _gc_time_num_samples; i++) { uint index = (_gc_time_first_sample_index + i) % GC_TIME_SAMPLE_SIZE; double x = _gc_time_timestamps[index]; double predicted_y = _gc_time_m * x + _gc_time_b; double deviation = predicted_y - _gc_time_samples[index]; sum_of_squared_deviations += deviation * deviation; } _gc_time_sd = sqrt(sum_of_squared_deviations / _gc_time_num_samples); } } double ShenandoahAdaptiveHeuristics::predict_gc_time(double timestamp_at_start) { return _gc_time_m * timestamp_at_start + _gc_time_b + _gc_time_sd * _margin_of_error_sd;; } void ShenandoahAdaptiveHeuristics::add_rate_to_acceleration_history(double timestamp, double rate) { uint new_sample_index = (_spike_acceleration_first_sample_index + _spike_acceleration_num_samples) % _spike_acceleration_buffer_size; _spike_acceleration_rate_timestamps[new_sample_index] = timestamp; _spike_acceleration_rate_samples[new_sample_index] = rate; if (_spike_acceleration_num_samples == _spike_acceleration_buffer_size) { _spike_acceleration_first_sample_index++; if (_spike_acceleration_first_sample_index == _spike_acceleration_buffer_size) { _spike_acceleration_first_sample_index = 0; } } else { _spike_acceleration_num_samples++; } } void ShenandoahAdaptiveHeuristics::record_cycle_start() { ShenandoahHeuristics::record_cycle_start(); _allocation_rate.allocation_counter_reset(); } void ShenandoahAdaptiveHeuristics::record_success_concurrent() { ShenandoahHeuristics::record_success_concurrent(); double now = os::elapsedTime(); // Should we not add GC time if this was an abbreviated cycle? add_gc_time(_cycle_start, elapsed_cycle_time()); size_t available = _space_info->available(); double z_score = 0.0; double available_sd = _available.sd(); if (available_sd > 0) { double available_avg = _available.avg(); z_score = (double(available) - available_avg) / available_sd; log_debug(gc, ergo)("Available: %zu %sB, z-score=%.3f. Average available: %.1f %sB +/- %.1f %sB.", byte_size_in_proper_unit(available), proper_unit_for_byte_size(available), z_score, byte_size_in_proper_unit(available_avg), proper_unit_for_byte_size(available_avg), byte_size_in_proper_unit(available_sd), proper_unit_for_byte_size(available_sd)); } _available.add(double(available)); // In the case when a concurrent GC cycle completes successfully but with an // unusually small amount of available memory we will adjust our trigger // parameters so that they are more likely to initiate a new cycle. // Conversely, when a GC cycle results in an above average amount of available // memory, we will adjust the trigger parameters to be less likely to initiate // a GC cycle. // // The z-score we've computed is in no way statistically related to the // trigger parameters, but it has the nice property that worse z-scores for // available memory indicate making larger adjustments to the trigger // parameters. It also results in fewer adjustments as the application // stabilizes. // // In order to avoid making endless and likely unnecessary adjustments to the // trigger parameters, the change in available memory (with respect to the // average) at the end of a cycle must be beyond these threshold values. if (z_score < LOWEST_EXPECTED_AVAILABLE_AT_END || z_score > HIGHEST_EXPECTED_AVAILABLE_AT_END) { // The sign is flipped because a negative z-score indicates that the // available memory at the end of the cycle is below average. Positive // adjustments make the triggers more sensitive (i.e., more likely to fire). // The z-score also gives us a measure of just how far below normal. This // property allows us to adjust the trigger parameters proportionally. // // The `100` here is used to attenuate the size of our adjustments. This // number was chosen empirically. It also means the adjustments at the end of // a concurrent cycle are an order of magnitude smaller than the adjustments // made for a degenerated or full GC cycle (which themselves were also // chosen empirically). adjust_last_trigger_parameters(z_score / -100); } } void ShenandoahAdaptiveHeuristics::record_degenerated() { ShenandoahHeuristics::record_degenerated(); add_degenerated_gc_time(_precursor_cycle_start, elapsed_degenerated_cycle_time()); // Adjust both trigger's parameters in the case of a degenerated GC because // either of them should have triggered earlier to avoid this case. adjust_margin_of_error(DEGENERATE_PENALTY_SD); adjust_spike_threshold(DEGENERATE_PENALTY_SD); } void ShenandoahAdaptiveHeuristics::record_success_full() { ShenandoahHeuristics::record_success_full(); // Adjust both trigger's parameters in the case of a full GC because // either of them should have triggered earlier to avoid this case. adjust_margin_of_error(FULL_PENALTY_SD); adjust_spike_threshold(FULL_PENALTY_SD); } static double saturate(double value, double min, double max) { return MAX2(MIN2(value, max), min); } #define KELVIN_START_GC #ifdef KELVIN_START_GC const size_t MaxRejectedTriggers = 256; typedef struct gc_start_info { double time_stamp; size_t capacity; size_t available; size_t allocated; size_t min_threshold; size_t learned_steps; double avg_alloc_rate; size_t allocatable_words; double avg_cycle_time; double predicted_future_accelerated_gc_time; size_t allocated_bytes_since_last_sample; double instantaneous_rate_words_per_second; double current_rate_by_acceleration;; size_t consumption_accelerated; double acceleration; double predicted_future_gc_time; double future_planned_gc_time; double avg_time_to_deplete_available; bool is_spiking; double rate; double spike_time_to_deplete_available; } TriggerInfo; // Most recently logged data represents the accepted trigger static void dumpTriggerInfo(size_t first_trigger, size_t rejected_triggers, TriggerInfo* trigger_log) { static const char* const header[] = { "\n", " Min Learned Allocatable Predicted Spike Current Acceleration Planned Is Spike", " TimeStamp Available Allocated Threshold Steps (bytes) Accelerated Alloc Rate by (MB/s^2) GC Avg Spiking Time", " | Capacity (Bytes) (Bytes) (Bytes) | Avg | Avg GC Allocated Rate Accel Accelerated | Future Time Time | Rate to", " | (Bytes) | | | | Alloc | Cycle Time Since (MB/s) (MB/s) Consumption | GC (s) to | (MB/s) Deplete", " | | | | | | Rate | Time (s) Last | | (bytes) | Time | Deplete | | Available", " | | | | | | (MB/s) | (s) | Sample | | | | (s) | Avail | | (s)", " | | | | | | | | | | (bytes) | | | | | | (s) | | |", " | | | | | | | | | | | | | | | | | | | | |", " v v v v v v v v v v v v v v v v v v v v v" }; for (unsigned int i = 0; i < sizeof(header) / sizeof(void*); i++) { log_info(gc)("%s", header[i]); } for (unsigned int i = 0; i < rejected_triggers; i++) { size_t __index = (first_trigger + i) % MaxRejectedTriggers; log_info(gc)("%8.3f %12zu %12zu %12zu %12zu %4zu " "%9.3f %12zu %8.3f %8.3f %12zu %9.3f %9.3f %12zu %9.3f %8.3f %8.3f %8.3f " "%3s %8.3f %8.3f", trigger_log[__index].time_stamp, trigger_log[__index].capacity, trigger_log[__index].available, trigger_log[__index].allocated, trigger_log[__index].min_threshold, trigger_log[__index].learned_steps, trigger_log[__index].avg_alloc_rate / (1024*1024), trigger_log[__index].allocatable_words * HeapWordSize, trigger_log[__index].avg_cycle_time, trigger_log[__index].predicted_future_accelerated_gc_time, trigger_log[__index].allocated_bytes_since_last_sample, (trigger_log[__index].instantaneous_rate_words_per_second * HeapWordSize) / (1024*1024), (trigger_log[__index].current_rate_by_acceleration * HeapWordSize) / (1024*1024), trigger_log[__index].consumption_accelerated * HeapWordSize, (trigger_log[__index].acceleration * HeapWordSize) / (1024*1024), trigger_log[__index].predicted_future_gc_time, trigger_log[__index].future_planned_gc_time, trigger_log[__index].avg_time_to_deplete_available, trigger_log[__index].is_spiking? "yes": "no", trigger_log[__index].rate / (1024*1024), trigger_log[__index].spike_time_to_deplete_available); } } #endif // Rationale: // The idea is that there is an average allocation rate and there are occasional abnormal bursts (or spikes) of // allocations that exceed the average allocation rate. What do these spikes look like? // // 1. At certain phase changes, we may discard large amounts of data and replace it with large numbers of newly // allocated objects. This "spike" looks more like a phase change. We were in steady state at M bytes/sec // allocation rate and now we're in a "reinitialization phase" that looks like N bytes/sec. We need the "spike" // accommodation to give us enough runway to recalibrate our "average allocation rate". // // 2. The typical workload changes. "Suddenly", our typical workload of N TPS increases to N+delta TPS. This means // our average allocation rate needs to be adjusted. Once again, we need the "spike" accomodation to give us // enough runway to recalibrate our "average allocation rate". // // 3. Though there is an "average" allocation rate, a given workload's demand for allocation may be very bursty. We // allocate a bunch of LABs during the 5 ms that follow completion of a GC, then we perform no more allocations for // the next 150 ms. It seems we want the "spike" to represent the maximum divergence from average within the // period of time between consecutive evaluation of the should_start_gc() service. Here's the thinking: // // a) Between now and the next time I ask whether should_start_gc(), we might experience a spike representing // the anticipated burst of allocations. If that would put us over budget, then we should start GC immediately. // b) Between now and the anticipated depletion of allocation pool, there may be two or more bursts of allocations. // If there are more than one of these bursts, we can "approximate" that these will be separated by spans of // time with very little or no allocations so the "average" allocation rate should be a suitable approximation // of how this will behave. // // For cases 1 and 2, we need to "quickly" recalibrate the average allocation rate whenever we detect a change // in operation mode. We want some way to decide that the average rate has changed, while keeping average // allocation rate computation independent. bool ShenandoahAdaptiveHeuristics::should_start_gc() { #ifdef KELVIN_START_GC static TriggerInfo rejected_trigger_log[MaxRejectedTriggers]; static size_t rejected_trigger_count = 0; static size_t first_rejected_trigger = 0; #define ForceAppendTriggerInfo(ts, cap, avail, alloced, mt, ls, aar, aw, act, pfagt, absls, \ irwps, crba, ca, accel, pfgt, fpgt, attda, is, r, sttda) \ if (rejected_trigger_count >= MaxRejectedTriggers) { \ first_rejected_trigger++; \ if (first_rejected_trigger >= MaxRejectedTriggers) { \ first_rejected_trigger = 0; \ } \ } else { \ rejected_trigger_count++; \ } \ { \ size_t __j; \ __j = (first_rejected_trigger + rejected_trigger_count - 1) % MaxRejectedTriggers; \ rejected_trigger_log[__j].time_stamp = ts; \ rejected_trigger_log[__j].capacity = cap; \ rejected_trigger_log[__j].available = avail; \ rejected_trigger_log[__j].allocated = alloced; \ rejected_trigger_log[__j].min_threshold = mt; \ rejected_trigger_log[__j].learned_steps = ls; \ rejected_trigger_log[__j].avg_alloc_rate = aar; \ rejected_trigger_log[__j].allocatable_words = aw; \ rejected_trigger_log[__j].avg_cycle_time = act; \ rejected_trigger_log[__j].predicted_future_accelerated_gc_time = pfagt; \ rejected_trigger_log[__j].allocated_bytes_since_last_sample = absls; \ rejected_trigger_log[__j].instantaneous_rate_words_per_second = irwps; \ rejected_trigger_log[__j].current_rate_by_acceleration = crba; \ rejected_trigger_log[__j].consumption_accelerated = ca; \ rejected_trigger_log[__j].acceleration = accel; \ rejected_trigger_log[__j].predicted_future_gc_time = pfgt; \ rejected_trigger_log[__j].future_planned_gc_time = fpgt; \ rejected_trigger_log[__j].avg_time_to_deplete_available = attda; \ rejected_trigger_log[__j].is_spiking = is; \ rejected_trigger_log[__j].rate = r; \ rejected_trigger_log[__j].spike_time_to_deplete_available = sttda; \ } // We do not append trigger info for non-consequential sample periods #define AppendTriggerInfo(ts, cap, avail, alloced, mt, ls, aar, aw, act, pfagt, absls, \ irwps, crba, ca, accel, pfgt, fpgt, attda, is, r, sttda) \ if (((absls) > 0) || ((r) > 0)) { \ ForceAppendTriggerInfo(ts, cap, avail, alloced, mt, ls, aar, aw, act, pfagt, absls, \ irwps, crba, ca, accel, pfgt, fpgt, attda, is, r, sttda) \ } #define DumpTriggerInfo(ts, cap, avail, alloced, mt, ls, aar, aw, act, pfagt, absls, \ irwps, crba, ca, accel, pfgt, fpgt, attda, is, r, sttda) \ ForceAppendTriggerInfo(ts, cap, avail, alloced, mt, ls, \ aar, aw, act, pfagt, absls, irwps, crba, ca, accel, \ pfgt, fpgt, attda, is, r, sttda); \ dumpTriggerInfo(first_rejected_trigger, rejected_trigger_count, rejected_trigger_log); \ rejected_trigger_count = 0; \ first_rejected_trigger = 0; #else #define AppendTriggerInfo(ts, cap, avail, alloced, mt, ls, aar, aw, act, pfagt, absls, \ irwps, crba, ca, accel, pfgt, fpgt, attda, is, r, sr, sttda) ; #define DumpTriggerInfo(ts, cap, avail, alloced, mt, ls, aar, aw, act, pfagt, absls, \ irwps, crba, ca, accel, pfgt, fpgt, attda, is, r, sr, sttda) ; #endif size_t capacity = ShenandoahHeap::heap()->soft_max_capacity(); size_t available = _space_info->soft_mutator_available(); size_t allocated = _space_info->bytes_allocated_since_gc_start(); double avg_cycle_time = 0; double avg_alloc_rate = 0; double now = get_most_recent_wake_time(); size_t allocatable_words = this->allocatable(available); double predicted_future_accelerated_gc_time = 0.0; size_t allocated_bytes_since_last_sample = 0; double instantaneous_rate_words_per_second = 0.0; size_t consumption_accelerated = 0; double acceleration = 0.0; double current_rate_by_acceleration = 0.0; size_t min_threshold = min_free_threshold(); #ifdef KELVIN_START_GC size_t learned_steps = _gc_times_learned; #endif double predicted_future_gc_time = 0; double future_planned_gc_time = 0; bool future_planned_gc_time_is_average = false; double avg_time_to_deplete_available = 0.0; bool is_spiking = false; double spike_time_to_deplete_available = 0.0; log_debug(gc, ergo)("should_start_gc calculation: available: " PROPERFMT ", soft_max_capacity: " PROPERFMT ", " "allocated_since_gc_start: " PROPERFMT, PROPERFMTARGS(available), PROPERFMTARGS(capacity), PROPERFMTARGS(allocated)); // Track allocation rate even if we decide to start a cycle for other reasons. double rate = _allocation_rate.sample(allocated); if (_start_gc_is_pending) { log_trigger("GC start is already pending"); DumpTriggerInfo(now, capacity, available, allocated, min_threshold, learned_steps, avg_alloc_rate, allocatable_words, avg_cycle_time, predicted_future_accelerated_gc_time, allocated_bytes_since_last_sample, instantaneous_rate_words_per_second, current_rate_by_acceleration, consumption_accelerated, acceleration, predicted_future_gc_time, future_planned_gc_time, avg_time_to_deplete_available, is_spiking, rate, spike_time_to_deplete_available); return true; } _last_trigger = OTHER; if (available < min_threshold) { log_trigger("Free (Soft) (" PROPERFMT ") is below minimum threshold (" PROPERFMT ")", PROPERFMTARGS(available), PROPERFMTARGS(min_threshold)); accept_trigger_with_type(OTHER); DumpTriggerInfo(now, capacity, available, allocated, min_threshold, learned_steps, avg_alloc_rate, allocatable_words, avg_cycle_time, predicted_future_accelerated_gc_time, allocated_bytes_since_last_sample, instantaneous_rate_words_per_second, current_rate_by_acceleration, consumption_accelerated, acceleration, predicted_future_gc_time, future_planned_gc_time, avg_time_to_deplete_available, is_spiking, rate, spike_time_to_deplete_available); return true; } // Check if we need to learn a bit about the application const size_t max_learn = ShenandoahLearningSteps; if (_gc_times_learned < max_learn) { size_t init_threshold = capacity / 100 * ShenandoahInitFreeThreshold; if (available < init_threshold) { log_trigger("Learning %zu of %zu. Free (%zu%s) is below initial threshold (%zu%s)", _gc_times_learned + 1, max_learn, byte_size_in_proper_unit(available), proper_unit_for_byte_size(available), byte_size_in_proper_unit(init_threshold), proper_unit_for_byte_size(init_threshold)); accept_trigger_with_type(OTHER); DumpTriggerInfo(now, capacity, available, allocated, min_threshold, learned_steps, avg_alloc_rate, allocatable_words, avg_cycle_time, predicted_future_accelerated_gc_time, allocated_bytes_since_last_sample, instantaneous_rate_words_per_second, current_rate_by_acceleration, consumption_accelerated, acceleration, predicted_future_gc_time, future_planned_gc_time, avg_time_to_deplete_available, is_spiking, rate, spike_time_to_deplete_available); return true; } } avg_cycle_time = _gc_cycle_time_history->davg() + (_margin_of_error_sd * _gc_cycle_time_history->dsd()); avg_alloc_rate = _allocation_rate.upper_bound(_margin_of_error_sd); if ((now - _previous_allocation_timestamp) >= ShenandoahAccelerationSamplePeriod) { predicted_future_accelerated_gc_time = predict_gc_time(now + MAX2(get_planned_sleep_interval(), ShenandoahAccelerationSamplePeriod)); double future_accelerated_planned_gc_time; bool future_accelerated_planned_gc_time_is_average; if (predicted_future_accelerated_gc_time > avg_cycle_time) { future_accelerated_planned_gc_time = predicted_future_accelerated_gc_time; future_accelerated_planned_gc_time_is_average = false; } else { future_accelerated_planned_gc_time = avg_cycle_time; future_accelerated_planned_gc_time_is_average = true; } allocated_bytes_since_last_sample = _free_set->get_bytes_allocated_since_previous_sample(); instantaneous_rate_words_per_second = (allocated_bytes_since_last_sample / HeapWordSize) / (now - _previous_allocation_timestamp); _previous_allocation_timestamp = now; add_rate_to_acceleration_history(now, instantaneous_rate_words_per_second); current_rate_by_acceleration = instantaneous_rate_words_per_second; consumption_accelerated = accelerated_consumption(acceleration, current_rate_by_acceleration, avg_alloc_rate / HeapWordSize, ShenandoahAccelerationSamplePeriod + future_accelerated_planned_gc_time); // Note that even a single thread that wakes up and begins to allocate excessively can manifest as accelerating allocation // rate. This thread will initially allocate a TLAB of minimum size. Then it will allocate a TLAB twice as big a bit later, // and then twice as big again after another short delay. When a phase change causes many threads to increase their // allocation behavior, this effect is multiplied, and compounded by jitter in the times that individual threads experience // the phase change. // // The following trace represents an actual workload, with allocation rates sampled at 10 Hz, the default behavior before // introduction of accelerated allocation rate detection. Though the allocation rate is seen to be increasing at times // 101.907 and 102.007 and 102.108, the newly sampled allocation rate is not enough to trigger GC because the headroom is // still quite large. In fact, GC is not triggered until time 102.409s, and this GC degenerates. // // Sample Time (s) Allocation Rate (MB/s) Headroom (GB) // 101.807 0.0 26.93 // <--- accelerated spike can trigger here, around time 101.9s // 101.907 477.6 26.85 // 102.007 3,206.0 26.35 // 102.108 23,797.8 24.19 // 102.208 24,164.5 21.83 // 102.309 23,965.0 19.47 // 102.409 24,624.35 17.05 <--- without accelerated rate detection, we trigger here // // Though the above measurements are from actual workload, the following details regarding sampled allocation rates at 3ms // period were not measured directly for this run-time sample. These are hypothetical, though they represent a plausible // result that correlates with the actual measurements. // // For most of the 100 ms time span that precedes the sample at 101.907, the allocation rate still remains at zero. The phase // change that causes increasing allocations occurs near the end ot this time segment. When sampled with a 3 ms period, // acceration of allocation can be triggered at approximately time 101.88s. // // In the default configuration, accelerated allocation rate is detected by examining a sequence of 5 allocation rate samples. // // Even a single allocation rate sample above the norm can be interpreted as acceleration of allocation rate. For example, the // the best-fit line for the following samples has an acceleration rate of 3,553.3 MB/s/s. This is not enough to trigger GC, // especially given the abundance of Headroom at this moment in time. // // TimeStamp (s) Alloc rate (MB/s) // 101.857 0 // 101.860 0 // 101.863 0 // 101.866 0 // 101.869 53.3 // // At the next sample time, we will compute a slightly higher acceration, 9,150 MB/s/s. This is also insufficient to trigger // GC. // // TimeStamp (s) Alloc rate (MB/s) // 101.860 0 // 101.863 0 // 101.866 0 // 101.869 53.3 // 101.872 110.6 // // Eventually, we will observe a full history of accelerating rate samples, computing acceleration of 18,500 MB/s/s. This will // trigger GC over 500 ms earlier than was previously possible. // // TimeStamp (s) Alloc rate (MB/s) // 101.866 0 // 101.869 53.3 // 101.872 110.6 // 101.875 165.9 // 101.878 221.2 // // The accelerated rate heuristic is based on the following idea: // // Assume allocation rate is accelerating at a constant rate. If we postpone the spike trigger until the subsequent // sample point, will there be enough memory to satisfy allocations that occur during the anticipated concurrent GC // cycle? If not, we should trigger right now. // // Outline of this heuristic triggering technique: // // 1. We remember the N (e.g. N=3) most recent samples of spike allocation rate r0, r1, r2 samples at t0, t1, and t2 // 2. if r1 < r0 or r2 < r1, approximate Acceleration = 0.0, Rate = Average(r0, r1, r2) // 3. Otherwise, use least squares method to compute best-fit line of rate vs time // 4. The slope of this line represents Acceleration. The y-intercept of this line represents "initial rate" // 5. Use r2 to rrpresent CurrentRate // 6. Use Consumption = CurrentRate * GCTime + 1/2 * Acceleration * GCTime * GCTime // (See High School physics discussions on constant acceleration: D = v0 * t + 1/2 * a * t^2) // 7. if Consumption exceeds headroom, trigger now // // Though larger sample size may improve quality of predictor, it would delay our trigger response as well. Smaller sample // sizes are more susceptible to false triggers based on random noise. The default configuration uses a sample size of 5, // spanning 15ms of execution. if (consumption_accelerated > allocatable_words) { size_t size_t_alloc_rate = (size_t) current_rate_by_acceleration * HeapWordSize; if (acceleration > 0) { size_t size_t_acceleration = (size_t) acceleration * HeapWordSize; log_trigger("Accelerated consumption (%zu%s) exceeds free headroom (%zu%s) at " "current rate (%zu%s/s) with acceleration (%zu%s/s/s) for planned %s GC time (%.2f ms)", byte_size_in_proper_unit(consumption_accelerated * HeapWordSize), proper_unit_for_byte_size(consumption_accelerated * HeapWordSize), byte_size_in_proper_unit(allocatable_words * HeapWordSize), proper_unit_for_byte_size(allocatable_words * HeapWordSize), byte_size_in_proper_unit(size_t_alloc_rate), proper_unit_for_byte_size(size_t_alloc_rate), byte_size_in_proper_unit(size_t_acceleration), proper_unit_for_byte_size(size_t_acceleration), future_accelerated_planned_gc_time_is_average? "(from average)": "(by linear prediction)", future_accelerated_planned_gc_time * 1000); } else { log_trigger("Momentary spike consumption (%zu%s) exceeds free headroom (%zu%s) at " "current rate (%zu%s/s) for planned %s GC time (%.2f ms) (spike threshold = %.2f)", byte_size_in_proper_unit(consumption_accelerated * HeapWordSize), proper_unit_for_byte_size(consumption_accelerated * HeapWordSize), byte_size_in_proper_unit(allocatable_words * HeapWordSize), proper_unit_for_byte_size(allocatable_words * HeapWordSize), byte_size_in_proper_unit(size_t_alloc_rate), proper_unit_for_byte_size(size_t_alloc_rate), future_accelerated_planned_gc_time_is_average? "(from average)": "(by linear prediction)", future_accelerated_planned_gc_time * 1000, _spike_threshold_sd); } _spike_acceleration_num_samples = 0; _spike_acceleration_first_sample_index = 0; // Count this as a form of RATE trigger for purposes of adjusting heuristic triggering configuration because this // trigger is influenced more by margin_of_error_sd than by spike_threshold_sd. accept_trigger_with_type(RATE); DumpTriggerInfo(now, capacity, available, allocated, min_threshold, learned_steps, avg_alloc_rate, allocatable_words, avg_cycle_time, predicted_future_accelerated_gc_time, allocated_bytes_since_last_sample, instantaneous_rate_words_per_second, current_rate_by_acceleration, consumption_accelerated, acceleration, predicted_future_gc_time, future_planned_gc_time, avg_time_to_deplete_available, is_spiking, rate, spike_time_to_deplete_available); return true; } } // Suppose we don't trigger now, but decide to trigger in the next regulator cycle. What will be the GC time then? predicted_future_gc_time = predict_gc_time(now + get_planned_sleep_interval()); if (predicted_future_gc_time > avg_cycle_time) { future_planned_gc_time = predicted_future_gc_time; future_planned_gc_time_is_average = false; } else { future_planned_gc_time = avg_cycle_time; future_planned_gc_time_is_average = true; } log_debug(gc)("%s: average GC time: %.2f ms, predicted GC time: %.2f ms, allocation rate: %.0f %s/s", _space_info->name(), avg_cycle_time * 1000, predicted_future_gc_time * 1000, byte_size_in_proper_unit(avg_alloc_rate), proper_unit_for_byte_size(avg_alloc_rate)); size_t allocatable_bytes = allocatable_words * HeapWordSize; avg_time_to_deplete_available = allocatable_bytes / avg_alloc_rate; if (future_planned_gc_time > avg_time_to_deplete_available) { log_trigger("%s GC time (%.2f ms) is above the time for average allocation rate (%.0f %sB/s)" " to deplete free headroom (%zu%s) (margin of error = %.2f)", future_planned_gc_time_is_average? "Average": "Linear prediction of", future_planned_gc_time * 1000, byte_size_in_proper_unit(avg_alloc_rate), proper_unit_for_byte_size(avg_alloc_rate), byte_size_in_proper_unit(allocatable_bytes), proper_unit_for_byte_size(allocatable_bytes), _margin_of_error_sd); size_t spike_headroom = capacity / 100 * ShenandoahAllocSpikeFactor; size_t penalties = capacity / 100 * _gc_time_penalties; size_t allocation_headroom = available; allocation_headroom -= MIN2(allocation_headroom, spike_headroom); allocation_headroom -= MIN2(allocation_headroom, penalties); log_info(gc, ergo)("Free headroom: %zu%s (free) - %zu%s (spike) - %zu%s (penalties) = %zu%s", byte_size_in_proper_unit(available), proper_unit_for_byte_size(available), byte_size_in_proper_unit(spike_headroom), proper_unit_for_byte_size(spike_headroom), byte_size_in_proper_unit(penalties), proper_unit_for_byte_size(penalties), byte_size_in_proper_unit(allocation_headroom), proper_unit_for_byte_size(allocation_headroom)); accept_trigger_with_type(RATE); DumpTriggerInfo(now, capacity, available, allocated, min_threshold, learned_steps, avg_alloc_rate, allocatable_words, avg_cycle_time, predicted_future_accelerated_gc_time, allocated_bytes_since_last_sample, instantaneous_rate_words_per_second, current_rate_by_acceleration, consumption_accelerated, acceleration, predicted_future_gc_time, future_planned_gc_time, avg_time_to_deplete_available, is_spiking, rate, spike_time_to_deplete_available); return true; } is_spiking = _allocation_rate.is_spiking(rate, _spike_threshold_sd); spike_time_to_deplete_available = (rate == 0)? 0: allocatable_bytes / rate; if (is_spiking && (rate != 0) && (future_planned_gc_time > spike_time_to_deplete_available)) { log_trigger("%s GC time (%.2f ms) is above the time for instantaneous allocation rate (%.0f %sB/s)" " to deplete free headroom (%zu%s) (spike threshold = %.2f)", future_planned_gc_time_is_average? "Average": "Linear prediction of", future_planned_gc_time * 1000, byte_size_in_proper_unit(rate), proper_unit_for_byte_size(rate), byte_size_in_proper_unit(allocatable_bytes), proper_unit_for_byte_size(allocatable_bytes), _spike_threshold_sd); accept_trigger_with_type(SPIKE); DumpTriggerInfo(now, capacity, available, allocated, min_threshold, learned_steps, avg_alloc_rate, allocatable_words, avg_cycle_time, predicted_future_accelerated_gc_time, allocated_bytes_since_last_sample, instantaneous_rate_words_per_second, current_rate_by_acceleration, consumption_accelerated, acceleration, predicted_future_gc_time, future_planned_gc_time, avg_time_to_deplete_available, is_spiking, rate, spike_time_to_deplete_available); return true; } #ifdef KELVIN_START_GC if (ShenandoahHeuristics::should_start_gc()) { // ShenandoahHeuristics::should_start_gc() has accepted trigger, or declined it. DumpTriggerInfo(now, capacity, available, allocated, min_threshold, learned_steps, avg_alloc_rate, allocatable_words, avg_cycle_time, predicted_future_accelerated_gc_time, allocated_bytes_since_last_sample, instantaneous_rate_words_per_second, current_rate_by_acceleration, consumption_accelerated, acceleration, predicted_future_gc_time, future_planned_gc_time, avg_time_to_deplete_available, is_spiking, rate, spike_time_to_deplete_available); return true; } else { AppendTriggerInfo(now, capacity, available, allocated, min_threshold, learned_steps, avg_alloc_rate, allocatable_words, avg_cycle_time, predicted_future_accelerated_gc_time, allocated_bytes_since_last_sample, instantaneous_rate_words_per_second, current_rate_by_acceleration, consumption_accelerated, acceleration, predicted_future_gc_time, future_planned_gc_time, avg_time_to_deplete_available, is_spiking, rate, spike_time_to_deplete_available); return false; } #else return ShenandoahHeuristics::should_start_gc(); #endif } void ShenandoahAdaptiveHeuristics::adjust_last_trigger_parameters(double amount) { switch (_last_trigger) { case RATE: adjust_margin_of_error(amount); break; case SPIKE: adjust_spike_threshold(amount); break; case OTHER: // nothing to adjust here. break; default: ShouldNotReachHere(); } } void ShenandoahAdaptiveHeuristics::adjust_margin_of_error(double amount) { _margin_of_error_sd = saturate(_margin_of_error_sd + amount, MINIMUM_CONFIDENCE, MAXIMUM_CONFIDENCE); log_debug(gc, ergo)("Margin of error now %.2f", _margin_of_error_sd); } void ShenandoahAdaptiveHeuristics::adjust_spike_threshold(double amount) { _spike_threshold_sd = saturate(_spike_threshold_sd - amount, MINIMUM_CONFIDENCE, MAXIMUM_CONFIDENCE); log_debug(gc, ergo)("Spike threshold now: %.2f", _spike_threshold_sd); } size_t ShenandoahAdaptiveHeuristics::min_free_threshold() { return ShenandoahHeap::heap()->soft_max_capacity() / 100 * ShenandoahMinFreeThreshold; } // This is called each time a new rate sample has been gathered, as governed by MINMUM_ALLOC_RATE_SAMPLE_INTERVAL. // There is no adjustment for standard deviation of the accelerated rate prediction. size_t ShenandoahAdaptiveHeuristics::accelerated_consumption(double& acceleration, double& current_rate, double avg_alloc_rate_words_per_second, double predicted_cycle_time) const { double *x_array = (double *) alloca(ShenandoahRateAccelerationSampleSize * sizeof(double)); double *y_array = (double *) alloca(ShenandoahRateAccelerationSampleSize * sizeof(double)); double x_sum = 0.0; double y_sum = 0.0; assert(_spike_acceleration_num_samples > 0, "At minimum, we should have sample from this period"); double weighted_average_alloc; if (_spike_acceleration_num_samples >= ShenandoahRateAccelerationSampleSize) { double weighted_y_sum = 0; double total_weight = 0; double previous_x = 0; uint delta = _spike_acceleration_num_samples - ShenandoahRateAccelerationSampleSize; for (uint i = 0; i < ShenandoahRateAccelerationSampleSize; i++) { uint index = (_spike_acceleration_first_sample_index + delta + i) % _spike_acceleration_buffer_size; x_array[i] = _spike_acceleration_rate_timestamps[index]; x_sum += x_array[i]; y_array[i] = _spike_acceleration_rate_samples[index]; if (i > 0) { // first sample not included in weighted average because it has no weight. double sample_weight = x_array[i] - x_array[i-1]; weighted_y_sum = y_array[i] * sample_weight; total_weight += sample_weight; } y_sum += y_array[i]; } weighted_average_alloc = (total_weight > 0)? weighted_y_sum / total_weight: 0; } else { weighted_average_alloc = 0; } double momentary_rate; if (_spike_acceleration_num_samples > ShenandoahMomentaryAllocationRateSpikeSampleSize) { // Num samples must be strictly greater than sample size, because we need one extra sample to compute rate and weights double weighted_y_sum = 0; double total_weight = 0; double sum_for_average = 0.0; uint delta = _spike_acceleration_num_samples - ShenandoahMomentaryAllocationRateSpikeSampleSize; for (uint i = 0; i < ShenandoahMomentaryAllocationRateSpikeSampleSize; i++) { uint sample_index = (_spike_acceleration_first_sample_index + delta + i) % _spike_acceleration_buffer_size; uint preceding_index = (sample_index == 0)? _spike_acceleration_buffer_size - 1: sample_index - 1; double sample_weight = (_spike_acceleration_rate_timestamps[sample_index] - _spike_acceleration_rate_timestamps[preceding_index]); weighted_y_sum += _spike_acceleration_rate_samples[sample_index] * sample_weight; total_weight += sample_weight; } momentary_rate = weighted_y_sum / total_weight; bool is_spiking = _allocation_rate.is_spiking(momentary_rate, _spike_threshold_sd); if (!is_spiking) { // Disable momentary spike trigger unless allocation rate delta from average exceeds sd momentary_rate = 0.0; } } else { momentary_rate = 0.0; } // By default, use momentary_rate for current rate and zero acceleration. Overwrite iff best-fit line has positive slope. current_rate = momentary_rate; acceleration = 0.0; if ((_spike_acceleration_num_samples >= ShenandoahRateAccelerationSampleSize) && (weighted_average_alloc >= avg_alloc_rate_words_per_second)) { // If the average rate across the acceleration samples is below the overall average, this sample is not eligible to // represent acceleration of allocation rate. We may just be catching up with allocations after a lull. double *xy_array = (double *) alloca(ShenandoahRateAccelerationSampleSize * sizeof(double)); double *x2_array = (double *) alloca(ShenandoahRateAccelerationSampleSize * sizeof(double)); double xy_sum = 0.0; double x2_sum = 0.0; for (uint i = 0; i < ShenandoahRateAccelerationSampleSize; i++) { xy_array[i] = x_array[i] * y_array[i]; xy_sum += xy_array[i]; x2_array[i] = x_array[i] * x_array[i]; x2_sum += x2_array[i]; } // Find the best-fit least-squares linear representation of rate vs time double m; /* slope */ double b; /* y-intercept */ m = ((ShenandoahRateAccelerationSampleSize * xy_sum - x_sum * y_sum) / (ShenandoahRateAccelerationSampleSize * x2_sum - x_sum * x_sum)); b = (y_sum - m * x_sum) / ShenandoahRateAccelerationSampleSize; if (m > 0) { double proposed_current_rate = m * x_array[ShenandoahRateAccelerationSampleSize - 1] + b; acceleration = m; current_rate = proposed_current_rate; } // else, leave current_rate = y_max, acceleration = 0 } // and here also, leave current_rate = y_max, acceleration = 0 double time_delta = get_planned_sleep_interval() + predicted_cycle_time; size_t words_to_be_consumed = (size_t) (current_rate * time_delta + 0.5 * acceleration * time_delta * time_delta); return words_to_be_consumed; } ShenandoahAllocationRate::ShenandoahAllocationRate() : _last_sample_time(os::elapsedTime()), _last_sample_value(0), _interval_sec(1.0 / ShenandoahAdaptiveSampleFrequencyHz), _rate(int(ShenandoahAdaptiveSampleSizeSeconds * ShenandoahAdaptiveSampleFrequencyHz), ShenandoahAdaptiveDecayFactor), _rate_avg(int(ShenandoahAdaptiveSampleSizeSeconds * ShenandoahAdaptiveSampleFrequencyHz), ShenandoahAdaptiveDecayFactor) { } double ShenandoahAllocationRate::force_sample(size_t allocated, size_t &unaccounted_bytes_allocated) { const double MinSampleTime = 0.002; // Do not sample if time since last update is less than 2 ms double now = os::elapsedTime(); double time_since_last_update = now - _last_sample_time; if (time_since_last_update < MinSampleTime) { unaccounted_bytes_allocated = allocated - _last_sample_value; _last_sample_value = 0; return 0.0; } else { double rate = instantaneous_rate(now, allocated); _rate.add(rate); _rate_avg.add(_rate.avg()); _last_sample_time = now; _last_sample_value = allocated; unaccounted_bytes_allocated = 0; return rate; } } double ShenandoahAllocationRate::sample(size_t allocated) { double now = os::elapsedTime(); double rate = 0.0; if (now - _last_sample_time > _interval_sec) { rate = instantaneous_rate(now, allocated); _rate.add(rate); _rate_avg.add(_rate.avg()); _last_sample_time = now; _last_sample_value = allocated; } return rate; } double ShenandoahAllocationRate::upper_bound(double sds) const { // Here we are using the standard deviation of the computed running // average, rather than the standard deviation of the samples that went // into the moving average. This is a much more stable value and is tied // to the actual statistic in use (moving average over samples of averages). return _rate.davg() + (sds * _rate_avg.dsd()); } void ShenandoahAllocationRate::allocation_counter_reset() { _last_sample_time = os::elapsedTime(); _last_sample_value = 0; } bool ShenandoahAllocationRate::is_spiking(double rate, double threshold) const { if (rate <= 0.0) { return false; } double sd = _rate.sd(); if (sd > 0) { // There is a small chance that that rate has already been sampled, but it seems not to matter in practice. // Note that z_score reports how close the rate is to the average. A value between -1 and 1 means we are within one // standard deviation. A value between -3 and +3 means we are within 3 standard deviations. We only check for z_score // greater than threshold because we are looking for an allocation spike which is greater than the mean. double z_score = (rate - _rate.avg()) / sd; if (z_score > threshold) { return true; } } return false; } double ShenandoahAllocationRate::instantaneous_rate(double time, size_t allocated) const { size_t last_value = _last_sample_value; double last_time = _last_sample_time; size_t allocation_delta = (allocated > last_value) ? (allocated - last_value) : 0; double time_delta_sec = time - last_time; return (time_delta_sec > 0) ? (allocation_delta / time_delta_sec) : 0; }